1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/cutils.h" 34 #include "qemu/host-utils.h" 35 #include "qemu/timer.h" 36 37 /* Note: the long term plan is to reduce the dependencies on the QEMU 38 CPU definitions. Currently they are used for qemu_ld/st 39 instructions */ 40 #define NO_CPU_IO_DEFS 41 #include "cpu.h" 42 43 #include "exec/cpu-common.h" 44 #include "exec/exec-all.h" 45 46 #include "tcg-op.h" 47 48 #if UINTPTR_MAX == UINT32_MAX 49 # define ELF_CLASS ELFCLASS32 50 #else 51 # define ELF_CLASS ELFCLASS64 52 #endif 53 #ifdef HOST_WORDS_BIGENDIAN 54 # define ELF_DATA ELFDATA2MSB 55 #else 56 # define ELF_DATA ELFDATA2LSB 57 #endif 58 59 #include "elf.h" 60 #include "exec/log.h" 61 #include "sysemu/sysemu.h" 62 63 /* Forward declarations for functions declared in tcg-target.inc.c and 64 used here. */ 65 static void tcg_target_init(TCGContext *s); 66 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static void patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 71 /* The CIE and FDE header definitions will be common to all hosts. */ 72 typedef struct { 73 uint32_t len __attribute__((aligned((sizeof(void *))))); 74 uint32_t id; 75 uint8_t version; 76 char augmentation[1]; 77 uint8_t code_align; 78 uint8_t data_align; 79 uint8_t return_column; 80 } DebugFrameCIE; 81 82 typedef struct QEMU_PACKED { 83 uint32_t len __attribute__((aligned((sizeof(void *))))); 84 uint32_t cie_offset; 85 uintptr_t func_start; 86 uintptr_t func_len; 87 } DebugFrameFDEHeader; 88 89 typedef struct QEMU_PACKED { 90 DebugFrameCIE cie; 91 DebugFrameFDEHeader fde; 92 } DebugFrameHeader; 93 94 static void tcg_register_jit_int(void *buf, size_t size, 95 const void *debug_frame, 96 size_t debug_frame_size) 97 __attribute__((unused)); 98 99 /* Forward declarations for functions declared and used in tcg-target.inc.c. */ 100 static const char *target_parse_constraint(TCGArgConstraint *ct, 101 const char *ct_str, TCGType type); 102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 103 intptr_t arg2); 104 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 105 static void tcg_out_movi(TCGContext *s, TCGType type, 106 TCGReg ret, tcg_target_long arg); 107 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 108 const int *const_args); 109 #if TCG_TARGET_MAYBE_vec 110 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 111 unsigned vece, const TCGArg *args, 112 const int *const_args); 113 #else 114 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 115 unsigned vece, const TCGArg *args, 116 const int *const_args) 117 { 118 g_assert_not_reached(); 119 } 120 #endif 121 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 122 intptr_t arg2); 123 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 124 TCGReg base, intptr_t ofs); 125 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); 126 static int tcg_target_const_match(tcg_target_long val, TCGType type, 127 const TCGArgConstraint *arg_ct); 128 #ifdef TCG_TARGET_NEED_LDST_LABELS 129 static bool tcg_out_ldst_finalize(TCGContext *s); 130 #endif 131 132 #define TCG_HIGHWATER 1024 133 134 static TCGContext **tcg_ctxs; 135 static unsigned int n_tcg_ctxs; 136 TCGv_env cpu_env = 0; 137 138 struct tcg_region_tree { 139 QemuMutex lock; 140 GTree *tree; 141 /* padding to avoid false sharing is computed at run-time */ 142 }; 143 144 /* 145 * We divide code_gen_buffer into equally-sized "regions" that TCG threads 146 * dynamically allocate from as demand dictates. Given appropriate region 147 * sizing, this minimizes flushes even when some TCG threads generate a lot 148 * more code than others. 149 */ 150 struct tcg_region_state { 151 QemuMutex lock; 152 153 /* fields set at init time */ 154 void *start; 155 void *start_aligned; 156 void *end; 157 size_t n; 158 size_t size; /* size of one region */ 159 size_t stride; /* .size + guard size */ 160 161 /* fields protected by the lock */ 162 size_t current; /* current region index */ 163 size_t agg_size_full; /* aggregate size of full regions */ 164 }; 165 166 static struct tcg_region_state region; 167 /* 168 * This is an array of struct tcg_region_tree's, with padding. 169 * We use void * to simplify the computation of region_trees[i]; each 170 * struct is found every tree_size bytes. 171 */ 172 static void *region_trees; 173 static size_t tree_size; 174 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 175 static TCGRegSet tcg_target_call_clobber_regs; 176 177 #if TCG_TARGET_INSN_UNIT_SIZE == 1 178 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 179 { 180 *s->code_ptr++ = v; 181 } 182 183 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 184 uint8_t v) 185 { 186 *p = v; 187 } 188 #endif 189 190 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 191 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 192 { 193 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 194 *s->code_ptr++ = v; 195 } else { 196 tcg_insn_unit *p = s->code_ptr; 197 memcpy(p, &v, sizeof(v)); 198 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 199 } 200 } 201 202 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 203 uint16_t v) 204 { 205 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 206 *p = v; 207 } else { 208 memcpy(p, &v, sizeof(v)); 209 } 210 } 211 #endif 212 213 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 214 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 215 { 216 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 217 *s->code_ptr++ = v; 218 } else { 219 tcg_insn_unit *p = s->code_ptr; 220 memcpy(p, &v, sizeof(v)); 221 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 222 } 223 } 224 225 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 226 uint32_t v) 227 { 228 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 229 *p = v; 230 } else { 231 memcpy(p, &v, sizeof(v)); 232 } 233 } 234 #endif 235 236 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 237 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 238 { 239 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 240 *s->code_ptr++ = v; 241 } else { 242 tcg_insn_unit *p = s->code_ptr; 243 memcpy(p, &v, sizeof(v)); 244 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 245 } 246 } 247 248 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 249 uint64_t v) 250 { 251 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 252 *p = v; 253 } else { 254 memcpy(p, &v, sizeof(v)); 255 } 256 } 257 #endif 258 259 /* label relocation processing */ 260 261 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 262 TCGLabel *l, intptr_t addend) 263 { 264 TCGRelocation *r; 265 266 if (l->has_value) { 267 /* FIXME: This may break relocations on RISC targets that 268 modify instruction fields in place. The caller may not have 269 written the initial value. */ 270 patch_reloc(code_ptr, type, l->u.value, addend); 271 } else { 272 /* add a new relocation entry */ 273 r = tcg_malloc(sizeof(TCGRelocation)); 274 r->type = type; 275 r->ptr = code_ptr; 276 r->addend = addend; 277 r->next = l->u.first_reloc; 278 l->u.first_reloc = r; 279 } 280 } 281 282 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) 283 { 284 intptr_t value = (intptr_t)ptr; 285 TCGRelocation *r; 286 287 tcg_debug_assert(!l->has_value); 288 289 for (r = l->u.first_reloc; r != NULL; r = r->next) { 290 patch_reloc(r->ptr, r->type, value, r->addend); 291 } 292 293 l->has_value = 1; 294 l->u.value_ptr = ptr; 295 } 296 297 TCGLabel *gen_new_label(void) 298 { 299 TCGContext *s = tcg_ctx; 300 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 301 302 *l = (TCGLabel){ 303 .id = s->nb_labels++ 304 }; 305 306 return l; 307 } 308 309 #include "tcg-target.inc.c" 310 311 /* compare a pointer @ptr and a tb_tc @s */ 312 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s) 313 { 314 if (ptr >= s->ptr + s->size) { 315 return 1; 316 } else if (ptr < s->ptr) { 317 return -1; 318 } 319 return 0; 320 } 321 322 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp) 323 { 324 const struct tb_tc *a = ap; 325 const struct tb_tc *b = bp; 326 327 /* 328 * When both sizes are set, we know this isn't a lookup. 329 * This is the most likely case: every TB must be inserted; lookups 330 * are a lot less frequent. 331 */ 332 if (likely(a->size && b->size)) { 333 if (a->ptr > b->ptr) { 334 return 1; 335 } else if (a->ptr < b->ptr) { 336 return -1; 337 } 338 /* a->ptr == b->ptr should happen only on deletions */ 339 g_assert(a->size == b->size); 340 return 0; 341 } 342 /* 343 * All lookups have either .size field set to 0. 344 * From the glib sources we see that @ap is always the lookup key. However 345 * the docs provide no guarantee, so we just mark this case as likely. 346 */ 347 if (likely(a->size == 0)) { 348 return ptr_cmp_tb_tc(a->ptr, b); 349 } 350 return ptr_cmp_tb_tc(b->ptr, a); 351 } 352 353 static void tcg_region_trees_init(void) 354 { 355 size_t i; 356 357 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize); 358 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size); 359 for (i = 0; i < region.n; i++) { 360 struct tcg_region_tree *rt = region_trees + i * tree_size; 361 362 qemu_mutex_init(&rt->lock); 363 rt->tree = g_tree_new(tb_tc_cmp); 364 } 365 } 366 367 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p) 368 { 369 size_t region_idx; 370 371 if (p < region.start_aligned) { 372 region_idx = 0; 373 } else { 374 ptrdiff_t offset = p - region.start_aligned; 375 376 if (offset > region.stride * (region.n - 1)) { 377 region_idx = region.n - 1; 378 } else { 379 region_idx = offset / region.stride; 380 } 381 } 382 return region_trees + region_idx * tree_size; 383 } 384 385 void tcg_tb_insert(TranslationBlock *tb) 386 { 387 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 388 389 qemu_mutex_lock(&rt->lock); 390 g_tree_insert(rt->tree, &tb->tc, tb); 391 qemu_mutex_unlock(&rt->lock); 392 } 393 394 void tcg_tb_remove(TranslationBlock *tb) 395 { 396 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 397 398 qemu_mutex_lock(&rt->lock); 399 g_tree_remove(rt->tree, &tb->tc); 400 qemu_mutex_unlock(&rt->lock); 401 } 402 403 /* 404 * Find the TB 'tb' such that 405 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size 406 * Return NULL if not found. 407 */ 408 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) 409 { 410 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr); 411 TranslationBlock *tb; 412 struct tb_tc s = { .ptr = (void *)tc_ptr }; 413 414 qemu_mutex_lock(&rt->lock); 415 tb = g_tree_lookup(rt->tree, &s); 416 qemu_mutex_unlock(&rt->lock); 417 return tb; 418 } 419 420 static void tcg_region_tree_lock_all(void) 421 { 422 size_t i; 423 424 for (i = 0; i < region.n; i++) { 425 struct tcg_region_tree *rt = region_trees + i * tree_size; 426 427 qemu_mutex_lock(&rt->lock); 428 } 429 } 430 431 static void tcg_region_tree_unlock_all(void) 432 { 433 size_t i; 434 435 for (i = 0; i < region.n; i++) { 436 struct tcg_region_tree *rt = region_trees + i * tree_size; 437 438 qemu_mutex_unlock(&rt->lock); 439 } 440 } 441 442 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data) 443 { 444 size_t i; 445 446 tcg_region_tree_lock_all(); 447 for (i = 0; i < region.n; i++) { 448 struct tcg_region_tree *rt = region_trees + i * tree_size; 449 450 g_tree_foreach(rt->tree, func, user_data); 451 } 452 tcg_region_tree_unlock_all(); 453 } 454 455 size_t tcg_nb_tbs(void) 456 { 457 size_t nb_tbs = 0; 458 size_t i; 459 460 tcg_region_tree_lock_all(); 461 for (i = 0; i < region.n; i++) { 462 struct tcg_region_tree *rt = region_trees + i * tree_size; 463 464 nb_tbs += g_tree_nnodes(rt->tree); 465 } 466 tcg_region_tree_unlock_all(); 467 return nb_tbs; 468 } 469 470 static void tcg_region_tree_reset_all(void) 471 { 472 size_t i; 473 474 tcg_region_tree_lock_all(); 475 for (i = 0; i < region.n; i++) { 476 struct tcg_region_tree *rt = region_trees + i * tree_size; 477 478 /* Increment the refcount first so that destroy acts as a reset */ 479 g_tree_ref(rt->tree); 480 g_tree_destroy(rt->tree); 481 } 482 tcg_region_tree_unlock_all(); 483 } 484 485 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 486 { 487 void *start, *end; 488 489 start = region.start_aligned + curr_region * region.stride; 490 end = start + region.size; 491 492 if (curr_region == 0) { 493 start = region.start; 494 } 495 if (curr_region == region.n - 1) { 496 end = region.end; 497 } 498 499 *pstart = start; 500 *pend = end; 501 } 502 503 static void tcg_region_assign(TCGContext *s, size_t curr_region) 504 { 505 void *start, *end; 506 507 tcg_region_bounds(curr_region, &start, &end); 508 509 s->code_gen_buffer = start; 510 s->code_gen_ptr = start; 511 s->code_gen_buffer_size = end - start; 512 s->code_gen_highwater = end - TCG_HIGHWATER; 513 } 514 515 static bool tcg_region_alloc__locked(TCGContext *s) 516 { 517 if (region.current == region.n) { 518 return true; 519 } 520 tcg_region_assign(s, region.current); 521 region.current++; 522 return false; 523 } 524 525 /* 526 * Request a new region once the one in use has filled up. 527 * Returns true on error. 528 */ 529 static bool tcg_region_alloc(TCGContext *s) 530 { 531 bool err; 532 /* read the region size now; alloc__locked will overwrite it on success */ 533 size_t size_full = s->code_gen_buffer_size; 534 535 qemu_mutex_lock(®ion.lock); 536 err = tcg_region_alloc__locked(s); 537 if (!err) { 538 region.agg_size_full += size_full - TCG_HIGHWATER; 539 } 540 qemu_mutex_unlock(®ion.lock); 541 return err; 542 } 543 544 /* 545 * Perform a context's first region allocation. 546 * This function does _not_ increment region.agg_size_full. 547 */ 548 static inline bool tcg_region_initial_alloc__locked(TCGContext *s) 549 { 550 return tcg_region_alloc__locked(s); 551 } 552 553 /* Call from a safe-work context */ 554 void tcg_region_reset_all(void) 555 { 556 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 557 unsigned int i; 558 559 qemu_mutex_lock(®ion.lock); 560 region.current = 0; 561 region.agg_size_full = 0; 562 563 for (i = 0; i < n_ctxs; i++) { 564 TCGContext *s = atomic_read(&tcg_ctxs[i]); 565 bool err = tcg_region_initial_alloc__locked(s); 566 567 g_assert(!err); 568 } 569 qemu_mutex_unlock(®ion.lock); 570 571 tcg_region_tree_reset_all(); 572 } 573 574 #ifdef CONFIG_USER_ONLY 575 static size_t tcg_n_regions(void) 576 { 577 return 1; 578 } 579 #else 580 /* 581 * It is likely that some vCPUs will translate more code than others, so we 582 * first try to set more regions than max_cpus, with those regions being of 583 * reasonable size. If that's not possible we make do by evenly dividing 584 * the code_gen_buffer among the vCPUs. 585 */ 586 static size_t tcg_n_regions(void) 587 { 588 size_t i; 589 590 /* Use a single region if all we have is one vCPU thread */ 591 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 592 return 1; 593 } 594 595 /* Try to have more regions than max_cpus, with each region being >= 2 MB */ 596 for (i = 8; i > 0; i--) { 597 size_t regions_per_thread = i; 598 size_t region_size; 599 600 region_size = tcg_init_ctx.code_gen_buffer_size; 601 region_size /= max_cpus * regions_per_thread; 602 603 if (region_size >= 2 * 1024u * 1024) { 604 return max_cpus * regions_per_thread; 605 } 606 } 607 /* If we can't, then just allocate one region per vCPU thread */ 608 return max_cpus; 609 } 610 #endif 611 612 /* 613 * Initializes region partitioning. 614 * 615 * Called at init time from the parent thread (i.e. the one calling 616 * tcg_context_init), after the target's TCG globals have been set. 617 * 618 * Region partitioning works by splitting code_gen_buffer into separate regions, 619 * and then assigning regions to TCG threads so that the threads can translate 620 * code in parallel without synchronization. 621 * 622 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 623 * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 624 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 625 * must have been parsed before calling this function, since it calls 626 * qemu_tcg_mttcg_enabled(). 627 * 628 * In user-mode we use a single region. Having multiple regions in user-mode 629 * is not supported, because the number of vCPU threads (recall that each thread 630 * spawned by the guest corresponds to a vCPU thread) is only bounded by the 631 * OS, and usually this number is huge (tens of thousands is not uncommon). 632 * Thus, given this large bound on the number of vCPU threads and the fact 633 * that code_gen_buffer is allocated at compile-time, we cannot guarantee 634 * that the availability of at least one region per vCPU thread. 635 * 636 * However, this user-mode limitation is unlikely to be a significant problem 637 * in practice. Multi-threaded guests share most if not all of their translated 638 * code, which makes parallel code generation less appealing than in softmmu. 639 */ 640 void tcg_region_init(void) 641 { 642 void *buf = tcg_init_ctx.code_gen_buffer; 643 void *aligned; 644 size_t size = tcg_init_ctx.code_gen_buffer_size; 645 size_t page_size = qemu_real_host_page_size; 646 size_t region_size; 647 size_t n_regions; 648 size_t i; 649 650 n_regions = tcg_n_regions(); 651 652 /* The first region will be 'aligned - buf' bytes larger than the others */ 653 aligned = QEMU_ALIGN_PTR_UP(buf, page_size); 654 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); 655 /* 656 * Make region_size a multiple of page_size, using aligned as the start. 657 * As a result of this we might end up with a few extra pages at the end of 658 * the buffer; we will assign those to the last region. 659 */ 660 region_size = (size - (aligned - buf)) / n_regions; 661 region_size = QEMU_ALIGN_DOWN(region_size, page_size); 662 663 /* A region must have at least 2 pages; one code, one guard */ 664 g_assert(region_size >= 2 * page_size); 665 666 /* init the region struct */ 667 qemu_mutex_init(®ion.lock); 668 region.n = n_regions; 669 region.size = region_size - page_size; 670 region.stride = region_size; 671 region.start = buf; 672 region.start_aligned = aligned; 673 /* page-align the end, since its last page will be a guard page */ 674 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); 675 /* account for that last guard page */ 676 region.end -= page_size; 677 678 /* set guard pages */ 679 for (i = 0; i < region.n; i++) { 680 void *start, *end; 681 int rc; 682 683 tcg_region_bounds(i, &start, &end); 684 rc = qemu_mprotect_none(end, page_size); 685 g_assert(!rc); 686 } 687 688 tcg_region_trees_init(); 689 690 /* In user-mode we support only one ctx, so do the initial allocation now */ 691 #ifdef CONFIG_USER_ONLY 692 { 693 bool err = tcg_region_initial_alloc__locked(tcg_ctx); 694 695 g_assert(!err); 696 } 697 #endif 698 } 699 700 /* 701 * All TCG threads except the parent (i.e. the one that called tcg_context_init 702 * and registered the target's TCG globals) must register with this function 703 * before initiating translation. 704 * 705 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 706 * of tcg_region_init() for the reasoning behind this. 707 * 708 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 709 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 710 * is not used anymore for translation once this function is called. 711 * 712 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 713 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 714 */ 715 #ifdef CONFIG_USER_ONLY 716 void tcg_register_thread(void) 717 { 718 tcg_ctx = &tcg_init_ctx; 719 } 720 #else 721 void tcg_register_thread(void) 722 { 723 TCGContext *s = g_malloc(sizeof(*s)); 724 unsigned int i, n; 725 bool err; 726 727 *s = tcg_init_ctx; 728 729 /* Relink mem_base. */ 730 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 731 if (tcg_init_ctx.temps[i].mem_base) { 732 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 733 tcg_debug_assert(b >= 0 && b < n); 734 s->temps[i].mem_base = &s->temps[b]; 735 } 736 } 737 738 /* Claim an entry in tcg_ctxs */ 739 n = atomic_fetch_inc(&n_tcg_ctxs); 740 g_assert(n < max_cpus); 741 atomic_set(&tcg_ctxs[n], s); 742 743 tcg_ctx = s; 744 qemu_mutex_lock(®ion.lock); 745 err = tcg_region_initial_alloc__locked(tcg_ctx); 746 g_assert(!err); 747 qemu_mutex_unlock(®ion.lock); 748 } 749 #endif /* !CONFIG_USER_ONLY */ 750 751 /* 752 * Returns the size (in bytes) of all translated code (i.e. from all regions) 753 * currently in the cache. 754 * See also: tcg_code_capacity() 755 * Do not confuse with tcg_current_code_size(); that one applies to a single 756 * TCG context. 757 */ 758 size_t tcg_code_size(void) 759 { 760 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 761 unsigned int i; 762 size_t total; 763 764 qemu_mutex_lock(®ion.lock); 765 total = region.agg_size_full; 766 for (i = 0; i < n_ctxs; i++) { 767 const TCGContext *s = atomic_read(&tcg_ctxs[i]); 768 size_t size; 769 770 size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 771 g_assert(size <= s->code_gen_buffer_size); 772 total += size; 773 } 774 qemu_mutex_unlock(®ion.lock); 775 return total; 776 } 777 778 /* 779 * Returns the code capacity (in bytes) of the entire cache, i.e. including all 780 * regions. 781 * See also: tcg_code_size() 782 */ 783 size_t tcg_code_capacity(void) 784 { 785 size_t guard_size, capacity; 786 787 /* no need for synchronization; these variables are set at init time */ 788 guard_size = region.stride - region.size; 789 capacity = region.end + guard_size - region.start; 790 capacity -= region.n * (guard_size + TCG_HIGHWATER); 791 return capacity; 792 } 793 794 /* pool based memory allocation */ 795 void *tcg_malloc_internal(TCGContext *s, int size) 796 { 797 TCGPool *p; 798 int pool_size; 799 800 if (size > TCG_POOL_CHUNK_SIZE) { 801 /* big malloc: insert a new pool (XXX: could optimize) */ 802 p = g_malloc(sizeof(TCGPool) + size); 803 p->size = size; 804 p->next = s->pool_first_large; 805 s->pool_first_large = p; 806 return p->data; 807 } else { 808 p = s->pool_current; 809 if (!p) { 810 p = s->pool_first; 811 if (!p) 812 goto new_pool; 813 } else { 814 if (!p->next) { 815 new_pool: 816 pool_size = TCG_POOL_CHUNK_SIZE; 817 p = g_malloc(sizeof(TCGPool) + pool_size); 818 p->size = pool_size; 819 p->next = NULL; 820 if (s->pool_current) 821 s->pool_current->next = p; 822 else 823 s->pool_first = p; 824 } else { 825 p = p->next; 826 } 827 } 828 } 829 s->pool_current = p; 830 s->pool_cur = p->data + size; 831 s->pool_end = p->data + p->size; 832 return p->data; 833 } 834 835 void tcg_pool_reset(TCGContext *s) 836 { 837 TCGPool *p, *t; 838 for (p = s->pool_first_large; p; p = t) { 839 t = p->next; 840 g_free(p); 841 } 842 s->pool_first_large = NULL; 843 s->pool_cur = s->pool_end = NULL; 844 s->pool_current = NULL; 845 } 846 847 typedef struct TCGHelperInfo { 848 void *func; 849 const char *name; 850 unsigned flags; 851 unsigned sizemask; 852 } TCGHelperInfo; 853 854 #include "exec/helper-proto.h" 855 856 static const TCGHelperInfo all_helpers[] = { 857 #include "exec/helper-tcg.h" 858 }; 859 static GHashTable *helper_table; 860 861 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 862 static void process_op_defs(TCGContext *s); 863 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 864 TCGReg reg, const char *name); 865 866 void tcg_context_init(TCGContext *s) 867 { 868 int op, total_args, n, i; 869 TCGOpDef *def; 870 TCGArgConstraint *args_ct; 871 int *sorted_args; 872 TCGTemp *ts; 873 874 memset(s, 0, sizeof(*s)); 875 s->nb_globals = 0; 876 877 /* Count total number of arguments and allocate the corresponding 878 space */ 879 total_args = 0; 880 for(op = 0; op < NB_OPS; op++) { 881 def = &tcg_op_defs[op]; 882 n = def->nb_iargs + def->nb_oargs; 883 total_args += n; 884 } 885 886 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); 887 sorted_args = g_malloc(sizeof(int) * total_args); 888 889 for(op = 0; op < NB_OPS; op++) { 890 def = &tcg_op_defs[op]; 891 def->args_ct = args_ct; 892 def->sorted_args = sorted_args; 893 n = def->nb_iargs + def->nb_oargs; 894 sorted_args += n; 895 args_ct += n; 896 } 897 898 /* Register helpers. */ 899 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 900 helper_table = g_hash_table_new(NULL, NULL); 901 902 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 903 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 904 (gpointer)&all_helpers[i]); 905 } 906 907 tcg_target_init(s); 908 process_op_defs(s); 909 910 /* Reverse the order of the saved registers, assuming they're all at 911 the start of tcg_target_reg_alloc_order. */ 912 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 913 int r = tcg_target_reg_alloc_order[n]; 914 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 915 break; 916 } 917 } 918 for (i = 0; i < n; ++i) { 919 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 920 } 921 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 922 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 923 } 924 925 tcg_ctx = s; 926 /* 927 * In user-mode we simply share the init context among threads, since we 928 * use a single region. See the documentation tcg_region_init() for the 929 * reasoning behind this. 930 * In softmmu we will have at most max_cpus TCG threads. 931 */ 932 #ifdef CONFIG_USER_ONLY 933 tcg_ctxs = &tcg_ctx; 934 n_tcg_ctxs = 1; 935 #else 936 tcg_ctxs = g_new(TCGContext *, max_cpus); 937 #endif 938 939 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 940 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 941 cpu_env = temp_tcgv_ptr(ts); 942 } 943 944 /* 945 * Allocate TBs right before their corresponding translated code, making 946 * sure that TBs and code are on different cache lines. 947 */ 948 TranslationBlock *tcg_tb_alloc(TCGContext *s) 949 { 950 uintptr_t align = qemu_icache_linesize; 951 TranslationBlock *tb; 952 void *next; 953 954 retry: 955 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 956 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 957 958 if (unlikely(next > s->code_gen_highwater)) { 959 if (tcg_region_alloc(s)) { 960 return NULL; 961 } 962 goto retry; 963 } 964 atomic_set(&s->code_gen_ptr, next); 965 s->data_gen_ptr = NULL; 966 return tb; 967 } 968 969 void tcg_prologue_init(TCGContext *s) 970 { 971 size_t prologue_size, total_size; 972 void *buf0, *buf1; 973 974 /* Put the prologue at the beginning of code_gen_buffer. */ 975 buf0 = s->code_gen_buffer; 976 total_size = s->code_gen_buffer_size; 977 s->code_ptr = buf0; 978 s->code_buf = buf0; 979 s->data_gen_ptr = NULL; 980 s->code_gen_prologue = buf0; 981 982 /* Compute a high-water mark, at which we voluntarily flush the buffer 983 and start over. The size here is arbitrary, significantly larger 984 than we expect the code generation for any one opcode to require. */ 985 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); 986 987 #ifdef TCG_TARGET_NEED_POOL_LABELS 988 s->pool_labels = NULL; 989 #endif 990 991 /* Generate the prologue. */ 992 tcg_target_qemu_prologue(s); 993 994 #ifdef TCG_TARGET_NEED_POOL_LABELS 995 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 996 { 997 bool ok = tcg_out_pool_finalize(s); 998 tcg_debug_assert(ok); 999 } 1000 #endif 1001 1002 buf1 = s->code_ptr; 1003 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); 1004 1005 /* Deduct the prologue from the buffer. */ 1006 prologue_size = tcg_current_code_size(s); 1007 s->code_gen_ptr = buf1; 1008 s->code_gen_buffer = buf1; 1009 s->code_buf = buf1; 1010 total_size -= prologue_size; 1011 s->code_gen_buffer_size = total_size; 1012 1013 tcg_register_jit(s->code_gen_buffer, total_size); 1014 1015 #ifdef DEBUG_DISAS 1016 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1017 qemu_log_lock(); 1018 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 1019 if (s->data_gen_ptr) { 1020 size_t code_size = s->data_gen_ptr - buf0; 1021 size_t data_size = prologue_size - code_size; 1022 size_t i; 1023 1024 log_disas(buf0, code_size); 1025 1026 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1027 if (sizeof(tcg_target_ulong) == 8) { 1028 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1029 (uintptr_t)s->data_gen_ptr + i, 1030 *(uint64_t *)(s->data_gen_ptr + i)); 1031 } else { 1032 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n", 1033 (uintptr_t)s->data_gen_ptr + i, 1034 *(uint32_t *)(s->data_gen_ptr + i)); 1035 } 1036 } 1037 } else { 1038 log_disas(buf0, prologue_size); 1039 } 1040 qemu_log("\n"); 1041 qemu_log_flush(); 1042 qemu_log_unlock(); 1043 } 1044 #endif 1045 1046 /* Assert that goto_ptr is implemented completely. */ 1047 if (TCG_TARGET_HAS_goto_ptr) { 1048 tcg_debug_assert(s->code_gen_epilogue != NULL); 1049 } 1050 } 1051 1052 void tcg_func_start(TCGContext *s) 1053 { 1054 tcg_pool_reset(s); 1055 s->nb_temps = s->nb_globals; 1056 1057 /* No temps have been previously allocated for size or locality. */ 1058 memset(s->free_temps, 0, sizeof(s->free_temps)); 1059 1060 s->nb_ops = 0; 1061 s->nb_labels = 0; 1062 s->current_frame_offset = s->frame_start; 1063 1064 #ifdef CONFIG_DEBUG_TCG 1065 s->goto_tb_issue_mask = 0; 1066 #endif 1067 1068 QTAILQ_INIT(&s->ops); 1069 QTAILQ_INIT(&s->free_ops); 1070 } 1071 1072 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) 1073 { 1074 int n = s->nb_temps++; 1075 tcg_debug_assert(n < TCG_MAX_TEMPS); 1076 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1077 } 1078 1079 static inline TCGTemp *tcg_global_alloc(TCGContext *s) 1080 { 1081 TCGTemp *ts; 1082 1083 tcg_debug_assert(s->nb_globals == s->nb_temps); 1084 s->nb_globals++; 1085 ts = tcg_temp_alloc(s); 1086 ts->temp_global = 1; 1087 1088 return ts; 1089 } 1090 1091 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1092 TCGReg reg, const char *name) 1093 { 1094 TCGTemp *ts; 1095 1096 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1097 tcg_abort(); 1098 } 1099 1100 ts = tcg_global_alloc(s); 1101 ts->base_type = type; 1102 ts->type = type; 1103 ts->fixed_reg = 1; 1104 ts->reg = reg; 1105 ts->name = name; 1106 tcg_regset_set_reg(s->reserved_regs, reg); 1107 1108 return ts; 1109 } 1110 1111 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1112 { 1113 s->frame_start = start; 1114 s->frame_end = start + size; 1115 s->frame_temp 1116 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1117 } 1118 1119 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1120 intptr_t offset, const char *name) 1121 { 1122 TCGContext *s = tcg_ctx; 1123 TCGTemp *base_ts = tcgv_ptr_temp(base); 1124 TCGTemp *ts = tcg_global_alloc(s); 1125 int indirect_reg = 0, bigendian = 0; 1126 #ifdef HOST_WORDS_BIGENDIAN 1127 bigendian = 1; 1128 #endif 1129 1130 if (!base_ts->fixed_reg) { 1131 /* We do not support double-indirect registers. */ 1132 tcg_debug_assert(!base_ts->indirect_reg); 1133 base_ts->indirect_base = 1; 1134 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1135 ? 2 : 1); 1136 indirect_reg = 1; 1137 } 1138 1139 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1140 TCGTemp *ts2 = tcg_global_alloc(s); 1141 char buf[64]; 1142 1143 ts->base_type = TCG_TYPE_I64; 1144 ts->type = TCG_TYPE_I32; 1145 ts->indirect_reg = indirect_reg; 1146 ts->mem_allocated = 1; 1147 ts->mem_base = base_ts; 1148 ts->mem_offset = offset + bigendian * 4; 1149 pstrcpy(buf, sizeof(buf), name); 1150 pstrcat(buf, sizeof(buf), "_0"); 1151 ts->name = strdup(buf); 1152 1153 tcg_debug_assert(ts2 == ts + 1); 1154 ts2->base_type = TCG_TYPE_I64; 1155 ts2->type = TCG_TYPE_I32; 1156 ts2->indirect_reg = indirect_reg; 1157 ts2->mem_allocated = 1; 1158 ts2->mem_base = base_ts; 1159 ts2->mem_offset = offset + (1 - bigendian) * 4; 1160 pstrcpy(buf, sizeof(buf), name); 1161 pstrcat(buf, sizeof(buf), "_1"); 1162 ts2->name = strdup(buf); 1163 } else { 1164 ts->base_type = type; 1165 ts->type = type; 1166 ts->indirect_reg = indirect_reg; 1167 ts->mem_allocated = 1; 1168 ts->mem_base = base_ts; 1169 ts->mem_offset = offset; 1170 ts->name = name; 1171 } 1172 return ts; 1173 } 1174 1175 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) 1176 { 1177 TCGContext *s = tcg_ctx; 1178 TCGTemp *ts; 1179 int idx, k; 1180 1181 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 1182 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 1183 if (idx < TCG_MAX_TEMPS) { 1184 /* There is already an available temp with the right type. */ 1185 clear_bit(idx, s->free_temps[k].l); 1186 1187 ts = &s->temps[idx]; 1188 ts->temp_allocated = 1; 1189 tcg_debug_assert(ts->base_type == type); 1190 tcg_debug_assert(ts->temp_local == temp_local); 1191 } else { 1192 ts = tcg_temp_alloc(s); 1193 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1194 TCGTemp *ts2 = tcg_temp_alloc(s); 1195 1196 ts->base_type = type; 1197 ts->type = TCG_TYPE_I32; 1198 ts->temp_allocated = 1; 1199 ts->temp_local = temp_local; 1200 1201 tcg_debug_assert(ts2 == ts + 1); 1202 ts2->base_type = TCG_TYPE_I64; 1203 ts2->type = TCG_TYPE_I32; 1204 ts2->temp_allocated = 1; 1205 ts2->temp_local = temp_local; 1206 } else { 1207 ts->base_type = type; 1208 ts->type = type; 1209 ts->temp_allocated = 1; 1210 ts->temp_local = temp_local; 1211 } 1212 } 1213 1214 #if defined(CONFIG_DEBUG_TCG) 1215 s->temps_in_use++; 1216 #endif 1217 return ts; 1218 } 1219 1220 TCGv_vec tcg_temp_new_vec(TCGType type) 1221 { 1222 TCGTemp *t; 1223 1224 #ifdef CONFIG_DEBUG_TCG 1225 switch (type) { 1226 case TCG_TYPE_V64: 1227 assert(TCG_TARGET_HAS_v64); 1228 break; 1229 case TCG_TYPE_V128: 1230 assert(TCG_TARGET_HAS_v128); 1231 break; 1232 case TCG_TYPE_V256: 1233 assert(TCG_TARGET_HAS_v256); 1234 break; 1235 default: 1236 g_assert_not_reached(); 1237 } 1238 #endif 1239 1240 t = tcg_temp_new_internal(type, 0); 1241 return temp_tcgv_vec(t); 1242 } 1243 1244 /* Create a new temp of the same type as an existing temp. */ 1245 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1246 { 1247 TCGTemp *t = tcgv_vec_temp(match); 1248 1249 tcg_debug_assert(t->temp_allocated != 0); 1250 1251 t = tcg_temp_new_internal(t->base_type, 0); 1252 return temp_tcgv_vec(t); 1253 } 1254 1255 void tcg_temp_free_internal(TCGTemp *ts) 1256 { 1257 TCGContext *s = tcg_ctx; 1258 int k, idx; 1259 1260 #if defined(CONFIG_DEBUG_TCG) 1261 s->temps_in_use--; 1262 if (s->temps_in_use < 0) { 1263 fprintf(stderr, "More temporaries freed than allocated!\n"); 1264 } 1265 #endif 1266 1267 tcg_debug_assert(ts->temp_global == 0); 1268 tcg_debug_assert(ts->temp_allocated != 0); 1269 ts->temp_allocated = 0; 1270 1271 idx = temp_idx(ts); 1272 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); 1273 set_bit(idx, s->free_temps[k].l); 1274 } 1275 1276 TCGv_i32 tcg_const_i32(int32_t val) 1277 { 1278 TCGv_i32 t0; 1279 t0 = tcg_temp_new_i32(); 1280 tcg_gen_movi_i32(t0, val); 1281 return t0; 1282 } 1283 1284 TCGv_i64 tcg_const_i64(int64_t val) 1285 { 1286 TCGv_i64 t0; 1287 t0 = tcg_temp_new_i64(); 1288 tcg_gen_movi_i64(t0, val); 1289 return t0; 1290 } 1291 1292 TCGv_i32 tcg_const_local_i32(int32_t val) 1293 { 1294 TCGv_i32 t0; 1295 t0 = tcg_temp_local_new_i32(); 1296 tcg_gen_movi_i32(t0, val); 1297 return t0; 1298 } 1299 1300 TCGv_i64 tcg_const_local_i64(int64_t val) 1301 { 1302 TCGv_i64 t0; 1303 t0 = tcg_temp_local_new_i64(); 1304 tcg_gen_movi_i64(t0, val); 1305 return t0; 1306 } 1307 1308 #if defined(CONFIG_DEBUG_TCG) 1309 void tcg_clear_temp_count(void) 1310 { 1311 TCGContext *s = tcg_ctx; 1312 s->temps_in_use = 0; 1313 } 1314 1315 int tcg_check_temp_count(void) 1316 { 1317 TCGContext *s = tcg_ctx; 1318 if (s->temps_in_use) { 1319 /* Clear the count so that we don't give another 1320 * warning immediately next time around. 1321 */ 1322 s->temps_in_use = 0; 1323 return 1; 1324 } 1325 return 0; 1326 } 1327 #endif 1328 1329 /* Return true if OP may appear in the opcode stream. 1330 Test the runtime variable that controls each opcode. */ 1331 bool tcg_op_supported(TCGOpcode op) 1332 { 1333 const bool have_vec 1334 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1335 1336 switch (op) { 1337 case INDEX_op_discard: 1338 case INDEX_op_set_label: 1339 case INDEX_op_call: 1340 case INDEX_op_br: 1341 case INDEX_op_mb: 1342 case INDEX_op_insn_start: 1343 case INDEX_op_exit_tb: 1344 case INDEX_op_goto_tb: 1345 case INDEX_op_qemu_ld_i32: 1346 case INDEX_op_qemu_st_i32: 1347 case INDEX_op_qemu_ld_i64: 1348 case INDEX_op_qemu_st_i64: 1349 return true; 1350 1351 case INDEX_op_goto_ptr: 1352 return TCG_TARGET_HAS_goto_ptr; 1353 1354 case INDEX_op_mov_i32: 1355 case INDEX_op_movi_i32: 1356 case INDEX_op_setcond_i32: 1357 case INDEX_op_brcond_i32: 1358 case INDEX_op_ld8u_i32: 1359 case INDEX_op_ld8s_i32: 1360 case INDEX_op_ld16u_i32: 1361 case INDEX_op_ld16s_i32: 1362 case INDEX_op_ld_i32: 1363 case INDEX_op_st8_i32: 1364 case INDEX_op_st16_i32: 1365 case INDEX_op_st_i32: 1366 case INDEX_op_add_i32: 1367 case INDEX_op_sub_i32: 1368 case INDEX_op_mul_i32: 1369 case INDEX_op_and_i32: 1370 case INDEX_op_or_i32: 1371 case INDEX_op_xor_i32: 1372 case INDEX_op_shl_i32: 1373 case INDEX_op_shr_i32: 1374 case INDEX_op_sar_i32: 1375 return true; 1376 1377 case INDEX_op_movcond_i32: 1378 return TCG_TARGET_HAS_movcond_i32; 1379 case INDEX_op_div_i32: 1380 case INDEX_op_divu_i32: 1381 return TCG_TARGET_HAS_div_i32; 1382 case INDEX_op_rem_i32: 1383 case INDEX_op_remu_i32: 1384 return TCG_TARGET_HAS_rem_i32; 1385 case INDEX_op_div2_i32: 1386 case INDEX_op_divu2_i32: 1387 return TCG_TARGET_HAS_div2_i32; 1388 case INDEX_op_rotl_i32: 1389 case INDEX_op_rotr_i32: 1390 return TCG_TARGET_HAS_rot_i32; 1391 case INDEX_op_deposit_i32: 1392 return TCG_TARGET_HAS_deposit_i32; 1393 case INDEX_op_extract_i32: 1394 return TCG_TARGET_HAS_extract_i32; 1395 case INDEX_op_sextract_i32: 1396 return TCG_TARGET_HAS_sextract_i32; 1397 case INDEX_op_add2_i32: 1398 return TCG_TARGET_HAS_add2_i32; 1399 case INDEX_op_sub2_i32: 1400 return TCG_TARGET_HAS_sub2_i32; 1401 case INDEX_op_mulu2_i32: 1402 return TCG_TARGET_HAS_mulu2_i32; 1403 case INDEX_op_muls2_i32: 1404 return TCG_TARGET_HAS_muls2_i32; 1405 case INDEX_op_muluh_i32: 1406 return TCG_TARGET_HAS_muluh_i32; 1407 case INDEX_op_mulsh_i32: 1408 return TCG_TARGET_HAS_mulsh_i32; 1409 case INDEX_op_ext8s_i32: 1410 return TCG_TARGET_HAS_ext8s_i32; 1411 case INDEX_op_ext16s_i32: 1412 return TCG_TARGET_HAS_ext16s_i32; 1413 case INDEX_op_ext8u_i32: 1414 return TCG_TARGET_HAS_ext8u_i32; 1415 case INDEX_op_ext16u_i32: 1416 return TCG_TARGET_HAS_ext16u_i32; 1417 case INDEX_op_bswap16_i32: 1418 return TCG_TARGET_HAS_bswap16_i32; 1419 case INDEX_op_bswap32_i32: 1420 return TCG_TARGET_HAS_bswap32_i32; 1421 case INDEX_op_not_i32: 1422 return TCG_TARGET_HAS_not_i32; 1423 case INDEX_op_neg_i32: 1424 return TCG_TARGET_HAS_neg_i32; 1425 case INDEX_op_andc_i32: 1426 return TCG_TARGET_HAS_andc_i32; 1427 case INDEX_op_orc_i32: 1428 return TCG_TARGET_HAS_orc_i32; 1429 case INDEX_op_eqv_i32: 1430 return TCG_TARGET_HAS_eqv_i32; 1431 case INDEX_op_nand_i32: 1432 return TCG_TARGET_HAS_nand_i32; 1433 case INDEX_op_nor_i32: 1434 return TCG_TARGET_HAS_nor_i32; 1435 case INDEX_op_clz_i32: 1436 return TCG_TARGET_HAS_clz_i32; 1437 case INDEX_op_ctz_i32: 1438 return TCG_TARGET_HAS_ctz_i32; 1439 case INDEX_op_ctpop_i32: 1440 return TCG_TARGET_HAS_ctpop_i32; 1441 1442 case INDEX_op_brcond2_i32: 1443 case INDEX_op_setcond2_i32: 1444 return TCG_TARGET_REG_BITS == 32; 1445 1446 case INDEX_op_mov_i64: 1447 case INDEX_op_movi_i64: 1448 case INDEX_op_setcond_i64: 1449 case INDEX_op_brcond_i64: 1450 case INDEX_op_ld8u_i64: 1451 case INDEX_op_ld8s_i64: 1452 case INDEX_op_ld16u_i64: 1453 case INDEX_op_ld16s_i64: 1454 case INDEX_op_ld32u_i64: 1455 case INDEX_op_ld32s_i64: 1456 case INDEX_op_ld_i64: 1457 case INDEX_op_st8_i64: 1458 case INDEX_op_st16_i64: 1459 case INDEX_op_st32_i64: 1460 case INDEX_op_st_i64: 1461 case INDEX_op_add_i64: 1462 case INDEX_op_sub_i64: 1463 case INDEX_op_mul_i64: 1464 case INDEX_op_and_i64: 1465 case INDEX_op_or_i64: 1466 case INDEX_op_xor_i64: 1467 case INDEX_op_shl_i64: 1468 case INDEX_op_shr_i64: 1469 case INDEX_op_sar_i64: 1470 case INDEX_op_ext_i32_i64: 1471 case INDEX_op_extu_i32_i64: 1472 return TCG_TARGET_REG_BITS == 64; 1473 1474 case INDEX_op_movcond_i64: 1475 return TCG_TARGET_HAS_movcond_i64; 1476 case INDEX_op_div_i64: 1477 case INDEX_op_divu_i64: 1478 return TCG_TARGET_HAS_div_i64; 1479 case INDEX_op_rem_i64: 1480 case INDEX_op_remu_i64: 1481 return TCG_TARGET_HAS_rem_i64; 1482 case INDEX_op_div2_i64: 1483 case INDEX_op_divu2_i64: 1484 return TCG_TARGET_HAS_div2_i64; 1485 case INDEX_op_rotl_i64: 1486 case INDEX_op_rotr_i64: 1487 return TCG_TARGET_HAS_rot_i64; 1488 case INDEX_op_deposit_i64: 1489 return TCG_TARGET_HAS_deposit_i64; 1490 case INDEX_op_extract_i64: 1491 return TCG_TARGET_HAS_extract_i64; 1492 case INDEX_op_sextract_i64: 1493 return TCG_TARGET_HAS_sextract_i64; 1494 case INDEX_op_extrl_i64_i32: 1495 return TCG_TARGET_HAS_extrl_i64_i32; 1496 case INDEX_op_extrh_i64_i32: 1497 return TCG_TARGET_HAS_extrh_i64_i32; 1498 case INDEX_op_ext8s_i64: 1499 return TCG_TARGET_HAS_ext8s_i64; 1500 case INDEX_op_ext16s_i64: 1501 return TCG_TARGET_HAS_ext16s_i64; 1502 case INDEX_op_ext32s_i64: 1503 return TCG_TARGET_HAS_ext32s_i64; 1504 case INDEX_op_ext8u_i64: 1505 return TCG_TARGET_HAS_ext8u_i64; 1506 case INDEX_op_ext16u_i64: 1507 return TCG_TARGET_HAS_ext16u_i64; 1508 case INDEX_op_ext32u_i64: 1509 return TCG_TARGET_HAS_ext32u_i64; 1510 case INDEX_op_bswap16_i64: 1511 return TCG_TARGET_HAS_bswap16_i64; 1512 case INDEX_op_bswap32_i64: 1513 return TCG_TARGET_HAS_bswap32_i64; 1514 case INDEX_op_bswap64_i64: 1515 return TCG_TARGET_HAS_bswap64_i64; 1516 case INDEX_op_not_i64: 1517 return TCG_TARGET_HAS_not_i64; 1518 case INDEX_op_neg_i64: 1519 return TCG_TARGET_HAS_neg_i64; 1520 case INDEX_op_andc_i64: 1521 return TCG_TARGET_HAS_andc_i64; 1522 case INDEX_op_orc_i64: 1523 return TCG_TARGET_HAS_orc_i64; 1524 case INDEX_op_eqv_i64: 1525 return TCG_TARGET_HAS_eqv_i64; 1526 case INDEX_op_nand_i64: 1527 return TCG_TARGET_HAS_nand_i64; 1528 case INDEX_op_nor_i64: 1529 return TCG_TARGET_HAS_nor_i64; 1530 case INDEX_op_clz_i64: 1531 return TCG_TARGET_HAS_clz_i64; 1532 case INDEX_op_ctz_i64: 1533 return TCG_TARGET_HAS_ctz_i64; 1534 case INDEX_op_ctpop_i64: 1535 return TCG_TARGET_HAS_ctpop_i64; 1536 case INDEX_op_add2_i64: 1537 return TCG_TARGET_HAS_add2_i64; 1538 case INDEX_op_sub2_i64: 1539 return TCG_TARGET_HAS_sub2_i64; 1540 case INDEX_op_mulu2_i64: 1541 return TCG_TARGET_HAS_mulu2_i64; 1542 case INDEX_op_muls2_i64: 1543 return TCG_TARGET_HAS_muls2_i64; 1544 case INDEX_op_muluh_i64: 1545 return TCG_TARGET_HAS_muluh_i64; 1546 case INDEX_op_mulsh_i64: 1547 return TCG_TARGET_HAS_mulsh_i64; 1548 1549 case INDEX_op_mov_vec: 1550 case INDEX_op_dup_vec: 1551 case INDEX_op_dupi_vec: 1552 case INDEX_op_ld_vec: 1553 case INDEX_op_st_vec: 1554 case INDEX_op_add_vec: 1555 case INDEX_op_sub_vec: 1556 case INDEX_op_and_vec: 1557 case INDEX_op_or_vec: 1558 case INDEX_op_xor_vec: 1559 case INDEX_op_cmp_vec: 1560 return have_vec; 1561 case INDEX_op_dup2_vec: 1562 return have_vec && TCG_TARGET_REG_BITS == 32; 1563 case INDEX_op_not_vec: 1564 return have_vec && TCG_TARGET_HAS_not_vec; 1565 case INDEX_op_neg_vec: 1566 return have_vec && TCG_TARGET_HAS_neg_vec; 1567 case INDEX_op_andc_vec: 1568 return have_vec && TCG_TARGET_HAS_andc_vec; 1569 case INDEX_op_orc_vec: 1570 return have_vec && TCG_TARGET_HAS_orc_vec; 1571 case INDEX_op_mul_vec: 1572 return have_vec && TCG_TARGET_HAS_mul_vec; 1573 case INDEX_op_shli_vec: 1574 case INDEX_op_shri_vec: 1575 case INDEX_op_sari_vec: 1576 return have_vec && TCG_TARGET_HAS_shi_vec; 1577 case INDEX_op_shls_vec: 1578 case INDEX_op_shrs_vec: 1579 case INDEX_op_sars_vec: 1580 return have_vec && TCG_TARGET_HAS_shs_vec; 1581 case INDEX_op_shlv_vec: 1582 case INDEX_op_shrv_vec: 1583 case INDEX_op_sarv_vec: 1584 return have_vec && TCG_TARGET_HAS_shv_vec; 1585 1586 default: 1587 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1588 return true; 1589 } 1590 } 1591 1592 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1593 and endian swap. Maybe it would be better to do the alignment 1594 and endian swap in tcg_reg_alloc_call(). */ 1595 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1596 { 1597 int i, real_args, nb_rets, pi; 1598 unsigned sizemask, flags; 1599 TCGHelperInfo *info; 1600 TCGOp *op; 1601 1602 info = g_hash_table_lookup(helper_table, (gpointer)func); 1603 flags = info->flags; 1604 sizemask = info->sizemask; 1605 1606 #if defined(__sparc__) && !defined(__arch64__) \ 1607 && !defined(CONFIG_TCG_INTERPRETER) 1608 /* We have 64-bit values in one register, but need to pass as two 1609 separate parameters. Split them. */ 1610 int orig_sizemask = sizemask; 1611 int orig_nargs = nargs; 1612 TCGv_i64 retl, reth; 1613 TCGTemp *split_args[MAX_OPC_PARAM]; 1614 1615 retl = NULL; 1616 reth = NULL; 1617 if (sizemask != 0) { 1618 for (i = real_args = 0; i < nargs; ++i) { 1619 int is_64bit = sizemask & (1 << (i+1)*2); 1620 if (is_64bit) { 1621 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1622 TCGv_i32 h = tcg_temp_new_i32(); 1623 TCGv_i32 l = tcg_temp_new_i32(); 1624 tcg_gen_extr_i64_i32(l, h, orig); 1625 split_args[real_args++] = tcgv_i32_temp(h); 1626 split_args[real_args++] = tcgv_i32_temp(l); 1627 } else { 1628 split_args[real_args++] = args[i]; 1629 } 1630 } 1631 nargs = real_args; 1632 args = split_args; 1633 sizemask = 0; 1634 } 1635 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1636 for (i = 0; i < nargs; ++i) { 1637 int is_64bit = sizemask & (1 << (i+1)*2); 1638 int is_signed = sizemask & (2 << (i+1)*2); 1639 if (!is_64bit) { 1640 TCGv_i64 temp = tcg_temp_new_i64(); 1641 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1642 if (is_signed) { 1643 tcg_gen_ext32s_i64(temp, orig); 1644 } else { 1645 tcg_gen_ext32u_i64(temp, orig); 1646 } 1647 args[i] = tcgv_i64_temp(temp); 1648 } 1649 } 1650 #endif /* TCG_TARGET_EXTEND_ARGS */ 1651 1652 op = tcg_emit_op(INDEX_op_call); 1653 1654 pi = 0; 1655 if (ret != NULL) { 1656 #if defined(__sparc__) && !defined(__arch64__) \ 1657 && !defined(CONFIG_TCG_INTERPRETER) 1658 if (orig_sizemask & 1) { 1659 /* The 32-bit ABI is going to return the 64-bit value in 1660 the %o0/%o1 register pair. Prepare for this by using 1661 two return temporaries, and reassemble below. */ 1662 retl = tcg_temp_new_i64(); 1663 reth = tcg_temp_new_i64(); 1664 op->args[pi++] = tcgv_i64_arg(reth); 1665 op->args[pi++] = tcgv_i64_arg(retl); 1666 nb_rets = 2; 1667 } else { 1668 op->args[pi++] = temp_arg(ret); 1669 nb_rets = 1; 1670 } 1671 #else 1672 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 1673 #ifdef HOST_WORDS_BIGENDIAN 1674 op->args[pi++] = temp_arg(ret + 1); 1675 op->args[pi++] = temp_arg(ret); 1676 #else 1677 op->args[pi++] = temp_arg(ret); 1678 op->args[pi++] = temp_arg(ret + 1); 1679 #endif 1680 nb_rets = 2; 1681 } else { 1682 op->args[pi++] = temp_arg(ret); 1683 nb_rets = 1; 1684 } 1685 #endif 1686 } else { 1687 nb_rets = 0; 1688 } 1689 TCGOP_CALLO(op) = nb_rets; 1690 1691 real_args = 0; 1692 for (i = 0; i < nargs; i++) { 1693 int is_64bit = sizemask & (1 << (i+1)*2); 1694 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 1695 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 1696 /* some targets want aligned 64 bit args */ 1697 if (real_args & 1) { 1698 op->args[pi++] = TCG_CALL_DUMMY_ARG; 1699 real_args++; 1700 } 1701 #endif 1702 /* If stack grows up, then we will be placing successive 1703 arguments at lower addresses, which means we need to 1704 reverse the order compared to how we would normally 1705 treat either big or little-endian. For those arguments 1706 that will wind up in registers, this still works for 1707 HPPA (the only current STACK_GROWSUP target) since the 1708 argument registers are *also* allocated in decreasing 1709 order. If another such target is added, this logic may 1710 have to get more complicated to differentiate between 1711 stack arguments and register arguments. */ 1712 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 1713 op->args[pi++] = temp_arg(args[i] + 1); 1714 op->args[pi++] = temp_arg(args[i]); 1715 #else 1716 op->args[pi++] = temp_arg(args[i]); 1717 op->args[pi++] = temp_arg(args[i] + 1); 1718 #endif 1719 real_args += 2; 1720 continue; 1721 } 1722 1723 op->args[pi++] = temp_arg(args[i]); 1724 real_args++; 1725 } 1726 op->args[pi++] = (uintptr_t)func; 1727 op->args[pi++] = flags; 1728 TCGOP_CALLI(op) = real_args; 1729 1730 /* Make sure the fields didn't overflow. */ 1731 tcg_debug_assert(TCGOP_CALLI(op) == real_args); 1732 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 1733 1734 #if defined(__sparc__) && !defined(__arch64__) \ 1735 && !defined(CONFIG_TCG_INTERPRETER) 1736 /* Free all of the parts we allocated above. */ 1737 for (i = real_args = 0; i < orig_nargs; ++i) { 1738 int is_64bit = orig_sizemask & (1 << (i+1)*2); 1739 if (is_64bit) { 1740 tcg_temp_free_internal(args[real_args++]); 1741 tcg_temp_free_internal(args[real_args++]); 1742 } else { 1743 real_args++; 1744 } 1745 } 1746 if (orig_sizemask & 1) { 1747 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 1748 Note that describing these as TCGv_i64 eliminates an unnecessary 1749 zero-extension that tcg_gen_concat_i32_i64 would create. */ 1750 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); 1751 tcg_temp_free_i64(retl); 1752 tcg_temp_free_i64(reth); 1753 } 1754 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1755 for (i = 0; i < nargs; ++i) { 1756 int is_64bit = sizemask & (1 << (i+1)*2); 1757 if (!is_64bit) { 1758 tcg_temp_free_internal(args[i]); 1759 } 1760 } 1761 #endif /* TCG_TARGET_EXTEND_ARGS */ 1762 } 1763 1764 static void tcg_reg_alloc_start(TCGContext *s) 1765 { 1766 int i, n; 1767 TCGTemp *ts; 1768 1769 for (i = 0, n = s->nb_globals; i < n; i++) { 1770 ts = &s->temps[i]; 1771 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM); 1772 } 1773 for (n = s->nb_temps; i < n; i++) { 1774 ts = &s->temps[i]; 1775 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 1776 ts->mem_allocated = 0; 1777 ts->fixed_reg = 0; 1778 } 1779 1780 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1781 } 1782 1783 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1784 TCGTemp *ts) 1785 { 1786 int idx = temp_idx(ts); 1787 1788 if (ts->temp_global) { 1789 pstrcpy(buf, buf_size, ts->name); 1790 } else if (ts->temp_local) { 1791 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1792 } else { 1793 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1794 } 1795 return buf; 1796 } 1797 1798 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1799 int buf_size, TCGArg arg) 1800 { 1801 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1802 } 1803 1804 /* Find helper name. */ 1805 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 1806 { 1807 const char *ret = NULL; 1808 if (helper_table) { 1809 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); 1810 if (info) { 1811 ret = info->name; 1812 } 1813 } 1814 return ret; 1815 } 1816 1817 static const char * const cond_name[] = 1818 { 1819 [TCG_COND_NEVER] = "never", 1820 [TCG_COND_ALWAYS] = "always", 1821 [TCG_COND_EQ] = "eq", 1822 [TCG_COND_NE] = "ne", 1823 [TCG_COND_LT] = "lt", 1824 [TCG_COND_GE] = "ge", 1825 [TCG_COND_LE] = "le", 1826 [TCG_COND_GT] = "gt", 1827 [TCG_COND_LTU] = "ltu", 1828 [TCG_COND_GEU] = "geu", 1829 [TCG_COND_LEU] = "leu", 1830 [TCG_COND_GTU] = "gtu" 1831 }; 1832 1833 static const char * const ldst_name[] = 1834 { 1835 [MO_UB] = "ub", 1836 [MO_SB] = "sb", 1837 [MO_LEUW] = "leuw", 1838 [MO_LESW] = "lesw", 1839 [MO_LEUL] = "leul", 1840 [MO_LESL] = "lesl", 1841 [MO_LEQ] = "leq", 1842 [MO_BEUW] = "beuw", 1843 [MO_BESW] = "besw", 1844 [MO_BEUL] = "beul", 1845 [MO_BESL] = "besl", 1846 [MO_BEQ] = "beq", 1847 }; 1848 1849 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1850 #ifdef ALIGNED_ONLY 1851 [MO_UNALN >> MO_ASHIFT] = "un+", 1852 [MO_ALIGN >> MO_ASHIFT] = "", 1853 #else 1854 [MO_UNALN >> MO_ASHIFT] = "", 1855 [MO_ALIGN >> MO_ASHIFT] = "al+", 1856 #endif 1857 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1858 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1859 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1860 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1861 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1862 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1863 }; 1864 1865 void tcg_dump_ops(TCGContext *s) 1866 { 1867 char buf[128]; 1868 TCGOp *op; 1869 1870 QTAILQ_FOREACH(op, &s->ops, link) { 1871 int i, k, nb_oargs, nb_iargs, nb_cargs; 1872 const TCGOpDef *def; 1873 TCGOpcode c; 1874 int col = 0; 1875 1876 c = op->opc; 1877 def = &tcg_op_defs[c]; 1878 1879 if (c == INDEX_op_insn_start) { 1880 col += qemu_log("\n ----"); 1881 1882 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1883 target_ulong a; 1884 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1885 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 1886 #else 1887 a = op->args[i]; 1888 #endif 1889 col += qemu_log(" " TARGET_FMT_lx, a); 1890 } 1891 } else if (c == INDEX_op_call) { 1892 /* variable number of arguments */ 1893 nb_oargs = TCGOP_CALLO(op); 1894 nb_iargs = TCGOP_CALLI(op); 1895 nb_cargs = def->nb_cargs; 1896 1897 /* function name, flags, out args */ 1898 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 1899 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), 1900 op->args[nb_oargs + nb_iargs + 1], nb_oargs); 1901 for (i = 0; i < nb_oargs; i++) { 1902 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), 1903 op->args[i])); 1904 } 1905 for (i = 0; i < nb_iargs; i++) { 1906 TCGArg arg = op->args[nb_oargs + i]; 1907 const char *t = "<dummy>"; 1908 if (arg != TCG_CALL_DUMMY_ARG) { 1909 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 1910 } 1911 col += qemu_log(",%s", t); 1912 } 1913 } else { 1914 col += qemu_log(" %s ", def->name); 1915 1916 nb_oargs = def->nb_oargs; 1917 nb_iargs = def->nb_iargs; 1918 nb_cargs = def->nb_cargs; 1919 1920 if (def->flags & TCG_OPF_VECTOR) { 1921 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op), 1922 8 << TCGOP_VECE(op)); 1923 } 1924 1925 k = 0; 1926 for (i = 0; i < nb_oargs; i++) { 1927 if (k != 0) { 1928 col += qemu_log(","); 1929 } 1930 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 1931 op->args[k++])); 1932 } 1933 for (i = 0; i < nb_iargs; i++) { 1934 if (k != 0) { 1935 col += qemu_log(","); 1936 } 1937 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 1938 op->args[k++])); 1939 } 1940 switch (c) { 1941 case INDEX_op_brcond_i32: 1942 case INDEX_op_setcond_i32: 1943 case INDEX_op_movcond_i32: 1944 case INDEX_op_brcond2_i32: 1945 case INDEX_op_setcond2_i32: 1946 case INDEX_op_brcond_i64: 1947 case INDEX_op_setcond_i64: 1948 case INDEX_op_movcond_i64: 1949 case INDEX_op_cmp_vec: 1950 if (op->args[k] < ARRAY_SIZE(cond_name) 1951 && cond_name[op->args[k]]) { 1952 col += qemu_log(",%s", cond_name[op->args[k++]]); 1953 } else { 1954 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]); 1955 } 1956 i = 1; 1957 break; 1958 case INDEX_op_qemu_ld_i32: 1959 case INDEX_op_qemu_st_i32: 1960 case INDEX_op_qemu_ld_i64: 1961 case INDEX_op_qemu_st_i64: 1962 { 1963 TCGMemOpIdx oi = op->args[k++]; 1964 TCGMemOp op = get_memop(oi); 1965 unsigned ix = get_mmuidx(oi); 1966 1967 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 1968 col += qemu_log(",$0x%x,%u", op, ix); 1969 } else { 1970 const char *s_al, *s_op; 1971 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 1972 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 1973 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 1974 } 1975 i = 1; 1976 } 1977 break; 1978 default: 1979 i = 0; 1980 break; 1981 } 1982 switch (c) { 1983 case INDEX_op_set_label: 1984 case INDEX_op_br: 1985 case INDEX_op_brcond_i32: 1986 case INDEX_op_brcond_i64: 1987 case INDEX_op_brcond2_i32: 1988 col += qemu_log("%s$L%d", k ? "," : "", 1989 arg_label(op->args[k])->id); 1990 i++, k++; 1991 break; 1992 default: 1993 break; 1994 } 1995 for (; i < nb_cargs; i++, k++) { 1996 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); 1997 } 1998 } 1999 if (op->life) { 2000 unsigned life = op->life; 2001 2002 for (; col < 48; ++col) { 2003 putc(' ', qemu_logfile); 2004 } 2005 2006 if (life & (SYNC_ARG * 3)) { 2007 qemu_log(" sync:"); 2008 for (i = 0; i < 2; ++i) { 2009 if (life & (SYNC_ARG << i)) { 2010 qemu_log(" %d", i); 2011 } 2012 } 2013 } 2014 life /= DEAD_ARG; 2015 if (life) { 2016 qemu_log(" dead:"); 2017 for (i = 0; life; ++i, life >>= 1) { 2018 if (life & 1) { 2019 qemu_log(" %d", i); 2020 } 2021 } 2022 } 2023 } 2024 qemu_log("\n"); 2025 } 2026 } 2027 2028 /* we give more priority to constraints with less registers */ 2029 static int get_constraint_priority(const TCGOpDef *def, int k) 2030 { 2031 const TCGArgConstraint *arg_ct; 2032 2033 int i, n; 2034 arg_ct = &def->args_ct[k]; 2035 if (arg_ct->ct & TCG_CT_ALIAS) { 2036 /* an alias is equivalent to a single register */ 2037 n = 1; 2038 } else { 2039 if (!(arg_ct->ct & TCG_CT_REG)) 2040 return 0; 2041 n = 0; 2042 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 2043 if (tcg_regset_test_reg(arg_ct->u.regs, i)) 2044 n++; 2045 } 2046 } 2047 return TCG_TARGET_NB_REGS - n + 1; 2048 } 2049 2050 /* sort from highest priority to lowest */ 2051 static void sort_constraints(TCGOpDef *def, int start, int n) 2052 { 2053 int i, j, p1, p2, tmp; 2054 2055 for(i = 0; i < n; i++) 2056 def->sorted_args[start + i] = start + i; 2057 if (n <= 1) 2058 return; 2059 for(i = 0; i < n - 1; i++) { 2060 for(j = i + 1; j < n; j++) { 2061 p1 = get_constraint_priority(def, def->sorted_args[start + i]); 2062 p2 = get_constraint_priority(def, def->sorted_args[start + j]); 2063 if (p1 < p2) { 2064 tmp = def->sorted_args[start + i]; 2065 def->sorted_args[start + i] = def->sorted_args[start + j]; 2066 def->sorted_args[start + j] = tmp; 2067 } 2068 } 2069 } 2070 } 2071 2072 static void process_op_defs(TCGContext *s) 2073 { 2074 TCGOpcode op; 2075 2076 for (op = 0; op < NB_OPS; op++) { 2077 TCGOpDef *def = &tcg_op_defs[op]; 2078 const TCGTargetOpDef *tdefs; 2079 TCGType type; 2080 int i, nb_args; 2081 2082 if (def->flags & TCG_OPF_NOT_PRESENT) { 2083 continue; 2084 } 2085 2086 nb_args = def->nb_iargs + def->nb_oargs; 2087 if (nb_args == 0) { 2088 continue; 2089 } 2090 2091 tdefs = tcg_target_op_def(op); 2092 /* Missing TCGTargetOpDef entry. */ 2093 tcg_debug_assert(tdefs != NULL); 2094 2095 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32); 2096 for (i = 0; i < nb_args; i++) { 2097 const char *ct_str = tdefs->args_ct_str[i]; 2098 /* Incomplete TCGTargetOpDef entry. */ 2099 tcg_debug_assert(ct_str != NULL); 2100 2101 def->args_ct[i].u.regs = 0; 2102 def->args_ct[i].ct = 0; 2103 while (*ct_str != '\0') { 2104 switch(*ct_str) { 2105 case '0' ... '9': 2106 { 2107 int oarg = *ct_str - '0'; 2108 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 2109 tcg_debug_assert(oarg < def->nb_oargs); 2110 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG); 2111 /* TCG_CT_ALIAS is for the output arguments. 2112 The input is tagged with TCG_CT_IALIAS. */ 2113 def->args_ct[i] = def->args_ct[oarg]; 2114 def->args_ct[oarg].ct |= TCG_CT_ALIAS; 2115 def->args_ct[oarg].alias_index = i; 2116 def->args_ct[i].ct |= TCG_CT_IALIAS; 2117 def->args_ct[i].alias_index = oarg; 2118 } 2119 ct_str++; 2120 break; 2121 case '&': 2122 def->args_ct[i].ct |= TCG_CT_NEWREG; 2123 ct_str++; 2124 break; 2125 case 'i': 2126 def->args_ct[i].ct |= TCG_CT_CONST; 2127 ct_str++; 2128 break; 2129 default: 2130 ct_str = target_parse_constraint(&def->args_ct[i], 2131 ct_str, type); 2132 /* Typo in TCGTargetOpDef constraint. */ 2133 tcg_debug_assert(ct_str != NULL); 2134 } 2135 } 2136 } 2137 2138 /* TCGTargetOpDef entry with too much information? */ 2139 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2140 2141 /* sort the constraints (XXX: this is just an heuristic) */ 2142 sort_constraints(def, 0, def->nb_oargs); 2143 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2144 } 2145 } 2146 2147 void tcg_op_remove(TCGContext *s, TCGOp *op) 2148 { 2149 QTAILQ_REMOVE(&s->ops, op, link); 2150 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2151 s->nb_ops--; 2152 2153 #ifdef CONFIG_PROFILER 2154 atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2155 #endif 2156 } 2157 2158 static TCGOp *tcg_op_alloc(TCGOpcode opc) 2159 { 2160 TCGContext *s = tcg_ctx; 2161 TCGOp *op; 2162 2163 if (likely(QTAILQ_EMPTY(&s->free_ops))) { 2164 op = tcg_malloc(sizeof(TCGOp)); 2165 } else { 2166 op = QTAILQ_FIRST(&s->free_ops); 2167 QTAILQ_REMOVE(&s->free_ops, op, link); 2168 } 2169 memset(op, 0, offsetof(TCGOp, link)); 2170 op->opc = opc; 2171 s->nb_ops++; 2172 2173 return op; 2174 } 2175 2176 TCGOp *tcg_emit_op(TCGOpcode opc) 2177 { 2178 TCGOp *op = tcg_op_alloc(opc); 2179 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2180 return op; 2181 } 2182 2183 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 2184 TCGOpcode opc, int nargs) 2185 { 2186 TCGOp *new_op = tcg_op_alloc(opc); 2187 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2188 return new_op; 2189 } 2190 2191 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 2192 TCGOpcode opc, int nargs) 2193 { 2194 TCGOp *new_op = tcg_op_alloc(opc); 2195 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2196 return new_op; 2197 } 2198 2199 #define TS_DEAD 1 2200 #define TS_MEM 2 2201 2202 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2203 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2204 2205 /* liveness analysis: end of function: all temps are dead, and globals 2206 should be in memory. */ 2207 static void tcg_la_func_end(TCGContext *s) 2208 { 2209 int ng = s->nb_globals; 2210 int nt = s->nb_temps; 2211 int i; 2212 2213 for (i = 0; i < ng; ++i) { 2214 s->temps[i].state = TS_DEAD | TS_MEM; 2215 } 2216 for (i = ng; i < nt; ++i) { 2217 s->temps[i].state = TS_DEAD; 2218 } 2219 } 2220 2221 /* liveness analysis: end of basic block: all temps are dead, globals 2222 and local temps should be in memory. */ 2223 static void tcg_la_bb_end(TCGContext *s) 2224 { 2225 int ng = s->nb_globals; 2226 int nt = s->nb_temps; 2227 int i; 2228 2229 for (i = 0; i < ng; ++i) { 2230 s->temps[i].state = TS_DEAD | TS_MEM; 2231 } 2232 for (i = ng; i < nt; ++i) { 2233 s->temps[i].state = (s->temps[i].temp_local 2234 ? TS_DEAD | TS_MEM 2235 : TS_DEAD); 2236 } 2237 } 2238 2239 /* Liveness analysis : update the opc_arg_life array to tell if a 2240 given input arguments is dead. Instructions updating dead 2241 temporaries are removed. */ 2242 static void liveness_pass_1(TCGContext *s) 2243 { 2244 int nb_globals = s->nb_globals; 2245 TCGOp *op, *op_prev; 2246 2247 tcg_la_func_end(s); 2248 2249 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, TCGOpHead, link, op_prev) { 2250 int i, nb_iargs, nb_oargs; 2251 TCGOpcode opc_new, opc_new2; 2252 bool have_opc_new2; 2253 TCGLifeData arg_life = 0; 2254 TCGTemp *arg_ts; 2255 TCGOpcode opc = op->opc; 2256 const TCGOpDef *def = &tcg_op_defs[opc]; 2257 2258 switch (opc) { 2259 case INDEX_op_call: 2260 { 2261 int call_flags; 2262 2263 nb_oargs = TCGOP_CALLO(op); 2264 nb_iargs = TCGOP_CALLI(op); 2265 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2266 2267 /* pure functions can be removed if their result is unused */ 2268 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2269 for (i = 0; i < nb_oargs; i++) { 2270 arg_ts = arg_temp(op->args[i]); 2271 if (arg_ts->state != TS_DEAD) { 2272 goto do_not_remove_call; 2273 } 2274 } 2275 goto do_remove; 2276 } else { 2277 do_not_remove_call: 2278 2279 /* output args are dead */ 2280 for (i = 0; i < nb_oargs; i++) { 2281 arg_ts = arg_temp(op->args[i]); 2282 if (arg_ts->state & TS_DEAD) { 2283 arg_life |= DEAD_ARG << i; 2284 } 2285 if (arg_ts->state & TS_MEM) { 2286 arg_life |= SYNC_ARG << i; 2287 } 2288 arg_ts->state = TS_DEAD; 2289 } 2290 2291 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2292 TCG_CALL_NO_READ_GLOBALS))) { 2293 /* globals should go back to memory */ 2294 for (i = 0; i < nb_globals; i++) { 2295 s->temps[i].state = TS_DEAD | TS_MEM; 2296 } 2297 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2298 /* globals should be synced to memory */ 2299 for (i = 0; i < nb_globals; i++) { 2300 s->temps[i].state |= TS_MEM; 2301 } 2302 } 2303 2304 /* record arguments that die in this helper */ 2305 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2306 arg_ts = arg_temp(op->args[i]); 2307 if (arg_ts && arg_ts->state & TS_DEAD) { 2308 arg_life |= DEAD_ARG << i; 2309 } 2310 } 2311 /* input arguments are live for preceding opcodes */ 2312 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2313 arg_ts = arg_temp(op->args[i]); 2314 if (arg_ts) { 2315 arg_ts->state &= ~TS_DEAD; 2316 } 2317 } 2318 } 2319 } 2320 break; 2321 case INDEX_op_insn_start: 2322 break; 2323 case INDEX_op_discard: 2324 /* mark the temporary as dead */ 2325 arg_temp(op->args[0])->state = TS_DEAD; 2326 break; 2327 2328 case INDEX_op_add2_i32: 2329 opc_new = INDEX_op_add_i32; 2330 goto do_addsub2; 2331 case INDEX_op_sub2_i32: 2332 opc_new = INDEX_op_sub_i32; 2333 goto do_addsub2; 2334 case INDEX_op_add2_i64: 2335 opc_new = INDEX_op_add_i64; 2336 goto do_addsub2; 2337 case INDEX_op_sub2_i64: 2338 opc_new = INDEX_op_sub_i64; 2339 do_addsub2: 2340 nb_iargs = 4; 2341 nb_oargs = 2; 2342 /* Test if the high part of the operation is dead, but not 2343 the low part. The result can be optimized to a simple 2344 add or sub. This happens often for x86_64 guest when the 2345 cpu mode is set to 32 bit. */ 2346 if (arg_temp(op->args[1])->state == TS_DEAD) { 2347 if (arg_temp(op->args[0])->state == TS_DEAD) { 2348 goto do_remove; 2349 } 2350 /* Replace the opcode and adjust the args in place, 2351 leaving 3 unused args at the end. */ 2352 op->opc = opc = opc_new; 2353 op->args[1] = op->args[2]; 2354 op->args[2] = op->args[4]; 2355 /* Fall through and mark the single-word operation live. */ 2356 nb_iargs = 2; 2357 nb_oargs = 1; 2358 } 2359 goto do_not_remove; 2360 2361 case INDEX_op_mulu2_i32: 2362 opc_new = INDEX_op_mul_i32; 2363 opc_new2 = INDEX_op_muluh_i32; 2364 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 2365 goto do_mul2; 2366 case INDEX_op_muls2_i32: 2367 opc_new = INDEX_op_mul_i32; 2368 opc_new2 = INDEX_op_mulsh_i32; 2369 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 2370 goto do_mul2; 2371 case INDEX_op_mulu2_i64: 2372 opc_new = INDEX_op_mul_i64; 2373 opc_new2 = INDEX_op_muluh_i64; 2374 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 2375 goto do_mul2; 2376 case INDEX_op_muls2_i64: 2377 opc_new = INDEX_op_mul_i64; 2378 opc_new2 = INDEX_op_mulsh_i64; 2379 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 2380 goto do_mul2; 2381 do_mul2: 2382 nb_iargs = 2; 2383 nb_oargs = 2; 2384 if (arg_temp(op->args[1])->state == TS_DEAD) { 2385 if (arg_temp(op->args[0])->state == TS_DEAD) { 2386 /* Both parts of the operation are dead. */ 2387 goto do_remove; 2388 } 2389 /* The high part of the operation is dead; generate the low. */ 2390 op->opc = opc = opc_new; 2391 op->args[1] = op->args[2]; 2392 op->args[2] = op->args[3]; 2393 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 2394 /* The low part of the operation is dead; generate the high. */ 2395 op->opc = opc = opc_new2; 2396 op->args[0] = op->args[1]; 2397 op->args[1] = op->args[2]; 2398 op->args[2] = op->args[3]; 2399 } else { 2400 goto do_not_remove; 2401 } 2402 /* Mark the single-word operation live. */ 2403 nb_oargs = 1; 2404 goto do_not_remove; 2405 2406 default: 2407 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 2408 nb_iargs = def->nb_iargs; 2409 nb_oargs = def->nb_oargs; 2410 2411 /* Test if the operation can be removed because all 2412 its outputs are dead. We assume that nb_oargs == 0 2413 implies side effects */ 2414 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 2415 for (i = 0; i < nb_oargs; i++) { 2416 if (arg_temp(op->args[i])->state != TS_DEAD) { 2417 goto do_not_remove; 2418 } 2419 } 2420 do_remove: 2421 tcg_op_remove(s, op); 2422 } else { 2423 do_not_remove: 2424 /* output args are dead */ 2425 for (i = 0; i < nb_oargs; i++) { 2426 arg_ts = arg_temp(op->args[i]); 2427 if (arg_ts->state & TS_DEAD) { 2428 arg_life |= DEAD_ARG << i; 2429 } 2430 if (arg_ts->state & TS_MEM) { 2431 arg_life |= SYNC_ARG << i; 2432 } 2433 arg_ts->state = TS_DEAD; 2434 } 2435 2436 /* if end of basic block, update */ 2437 if (def->flags & TCG_OPF_BB_END) { 2438 tcg_la_bb_end(s); 2439 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2440 /* globals should be synced to memory */ 2441 for (i = 0; i < nb_globals; i++) { 2442 s->temps[i].state |= TS_MEM; 2443 } 2444 } 2445 2446 /* record arguments that die in this opcode */ 2447 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2448 arg_ts = arg_temp(op->args[i]); 2449 if (arg_ts->state & TS_DEAD) { 2450 arg_life |= DEAD_ARG << i; 2451 } 2452 } 2453 /* input arguments are live for preceding opcodes */ 2454 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2455 arg_temp(op->args[i])->state &= ~TS_DEAD; 2456 } 2457 } 2458 break; 2459 } 2460 op->life = arg_life; 2461 } 2462 } 2463 2464 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 2465 static bool liveness_pass_2(TCGContext *s) 2466 { 2467 int nb_globals = s->nb_globals; 2468 int nb_temps, i; 2469 bool changes = false; 2470 TCGOp *op, *op_next; 2471 2472 /* Create a temporary for each indirect global. */ 2473 for (i = 0; i < nb_globals; ++i) { 2474 TCGTemp *its = &s->temps[i]; 2475 if (its->indirect_reg) { 2476 TCGTemp *dts = tcg_temp_alloc(s); 2477 dts->type = its->type; 2478 dts->base_type = its->base_type; 2479 its->state_ptr = dts; 2480 } else { 2481 its->state_ptr = NULL; 2482 } 2483 /* All globals begin dead. */ 2484 its->state = TS_DEAD; 2485 } 2486 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 2487 TCGTemp *its = &s->temps[i]; 2488 its->state_ptr = NULL; 2489 its->state = TS_DEAD; 2490 } 2491 2492 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2493 TCGOpcode opc = op->opc; 2494 const TCGOpDef *def = &tcg_op_defs[opc]; 2495 TCGLifeData arg_life = op->life; 2496 int nb_iargs, nb_oargs, call_flags; 2497 TCGTemp *arg_ts, *dir_ts; 2498 2499 if (opc == INDEX_op_call) { 2500 nb_oargs = TCGOP_CALLO(op); 2501 nb_iargs = TCGOP_CALLI(op); 2502 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2503 } else { 2504 nb_iargs = def->nb_iargs; 2505 nb_oargs = def->nb_oargs; 2506 2507 /* Set flags similar to how calls require. */ 2508 if (def->flags & TCG_OPF_BB_END) { 2509 /* Like writing globals: save_globals */ 2510 call_flags = 0; 2511 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2512 /* Like reading globals: sync_globals */ 2513 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2514 } else { 2515 /* No effect on globals. */ 2516 call_flags = (TCG_CALL_NO_READ_GLOBALS | 2517 TCG_CALL_NO_WRITE_GLOBALS); 2518 } 2519 } 2520 2521 /* Make sure that input arguments are available. */ 2522 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2523 arg_ts = arg_temp(op->args[i]); 2524 if (arg_ts) { 2525 dir_ts = arg_ts->state_ptr; 2526 if (dir_ts && arg_ts->state == TS_DEAD) { 2527 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 2528 ? INDEX_op_ld_i32 2529 : INDEX_op_ld_i64); 2530 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 2531 2532 lop->args[0] = temp_arg(dir_ts); 2533 lop->args[1] = temp_arg(arg_ts->mem_base); 2534 lop->args[2] = arg_ts->mem_offset; 2535 2536 /* Loaded, but synced with memory. */ 2537 arg_ts->state = TS_MEM; 2538 } 2539 } 2540 } 2541 2542 /* Perform input replacement, and mark inputs that became dead. 2543 No action is required except keeping temp_state up to date 2544 so that we reload when needed. */ 2545 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2546 arg_ts = arg_temp(op->args[i]); 2547 if (arg_ts) { 2548 dir_ts = arg_ts->state_ptr; 2549 if (dir_ts) { 2550 op->args[i] = temp_arg(dir_ts); 2551 changes = true; 2552 if (IS_DEAD_ARG(i)) { 2553 arg_ts->state = TS_DEAD; 2554 } 2555 } 2556 } 2557 } 2558 2559 /* Liveness analysis should ensure that the following are 2560 all correct, for call sites and basic block end points. */ 2561 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 2562 /* Nothing to do */ 2563 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 2564 for (i = 0; i < nb_globals; ++i) { 2565 /* Liveness should see that globals are synced back, 2566 that is, either TS_DEAD or TS_MEM. */ 2567 arg_ts = &s->temps[i]; 2568 tcg_debug_assert(arg_ts->state_ptr == 0 2569 || arg_ts->state != 0); 2570 } 2571 } else { 2572 for (i = 0; i < nb_globals; ++i) { 2573 /* Liveness should see that globals are saved back, 2574 that is, TS_DEAD, waiting to be reloaded. */ 2575 arg_ts = &s->temps[i]; 2576 tcg_debug_assert(arg_ts->state_ptr == 0 2577 || arg_ts->state == TS_DEAD); 2578 } 2579 } 2580 2581 /* Outputs become available. */ 2582 for (i = 0; i < nb_oargs; i++) { 2583 arg_ts = arg_temp(op->args[i]); 2584 dir_ts = arg_ts->state_ptr; 2585 if (!dir_ts) { 2586 continue; 2587 } 2588 op->args[i] = temp_arg(dir_ts); 2589 changes = true; 2590 2591 /* The output is now live and modified. */ 2592 arg_ts->state = 0; 2593 2594 /* Sync outputs upon their last write. */ 2595 if (NEED_SYNC_ARG(i)) { 2596 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 2597 ? INDEX_op_st_i32 2598 : INDEX_op_st_i64); 2599 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 2600 2601 sop->args[0] = temp_arg(dir_ts); 2602 sop->args[1] = temp_arg(arg_ts->mem_base); 2603 sop->args[2] = arg_ts->mem_offset; 2604 2605 arg_ts->state = TS_MEM; 2606 } 2607 /* Drop outputs that are dead. */ 2608 if (IS_DEAD_ARG(i)) { 2609 arg_ts->state = TS_DEAD; 2610 } 2611 } 2612 } 2613 2614 return changes; 2615 } 2616 2617 #ifdef CONFIG_DEBUG_TCG 2618 static void dump_regs(TCGContext *s) 2619 { 2620 TCGTemp *ts; 2621 int i; 2622 char buf[64]; 2623 2624 for(i = 0; i < s->nb_temps; i++) { 2625 ts = &s->temps[i]; 2626 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 2627 switch(ts->val_type) { 2628 case TEMP_VAL_REG: 2629 printf("%s", tcg_target_reg_names[ts->reg]); 2630 break; 2631 case TEMP_VAL_MEM: 2632 printf("%d(%s)", (int)ts->mem_offset, 2633 tcg_target_reg_names[ts->mem_base->reg]); 2634 break; 2635 case TEMP_VAL_CONST: 2636 printf("$0x%" TCG_PRIlx, ts->val); 2637 break; 2638 case TEMP_VAL_DEAD: 2639 printf("D"); 2640 break; 2641 default: 2642 printf("???"); 2643 break; 2644 } 2645 printf("\n"); 2646 } 2647 2648 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 2649 if (s->reg_to_temp[i] != NULL) { 2650 printf("%s: %s\n", 2651 tcg_target_reg_names[i], 2652 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 2653 } 2654 } 2655 } 2656 2657 static void check_regs(TCGContext *s) 2658 { 2659 int reg; 2660 int k; 2661 TCGTemp *ts; 2662 char buf[64]; 2663 2664 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 2665 ts = s->reg_to_temp[reg]; 2666 if (ts != NULL) { 2667 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 2668 printf("Inconsistency for register %s:\n", 2669 tcg_target_reg_names[reg]); 2670 goto fail; 2671 } 2672 } 2673 } 2674 for (k = 0; k < s->nb_temps; k++) { 2675 ts = &s->temps[k]; 2676 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg 2677 && s->reg_to_temp[ts->reg] != ts) { 2678 printf("Inconsistency for temp %s:\n", 2679 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 2680 fail: 2681 printf("reg state:\n"); 2682 dump_regs(s); 2683 tcg_abort(); 2684 } 2685 } 2686 } 2687 #endif 2688 2689 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 2690 { 2691 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 2692 /* Sparc64 stack is accessed with offset of 2047 */ 2693 s->current_frame_offset = (s->current_frame_offset + 2694 (tcg_target_long)sizeof(tcg_target_long) - 1) & 2695 ~(sizeof(tcg_target_long) - 1); 2696 #endif 2697 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 2698 s->frame_end) { 2699 tcg_abort(); 2700 } 2701 ts->mem_offset = s->current_frame_offset; 2702 ts->mem_base = s->frame_temp; 2703 ts->mem_allocated = 1; 2704 s->current_frame_offset += sizeof(tcg_target_long); 2705 } 2706 2707 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet); 2708 2709 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 2710 mark it free; otherwise mark it dead. */ 2711 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 2712 { 2713 if (ts->fixed_reg) { 2714 return; 2715 } 2716 if (ts->val_type == TEMP_VAL_REG) { 2717 s->reg_to_temp[ts->reg] = NULL; 2718 } 2719 ts->val_type = (free_or_dead < 0 2720 || ts->temp_local 2721 || ts->temp_global 2722 ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 2723 } 2724 2725 /* Mark a temporary as dead. */ 2726 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 2727 { 2728 temp_free_or_dead(s, ts, 1); 2729 } 2730 2731 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 2732 registers needs to be allocated to store a constant. If 'free_or_dead' 2733 is non-zero, subsequently release the temporary; if it is positive, the 2734 temp is dead; if it is negative, the temp is free. */ 2735 static void temp_sync(TCGContext *s, TCGTemp *ts, 2736 TCGRegSet allocated_regs, int free_or_dead) 2737 { 2738 if (ts->fixed_reg) { 2739 return; 2740 } 2741 if (!ts->mem_coherent) { 2742 if (!ts->mem_allocated) { 2743 temp_allocate_frame(s, ts); 2744 } 2745 switch (ts->val_type) { 2746 case TEMP_VAL_CONST: 2747 /* If we're going to free the temp immediately, then we won't 2748 require it later in a register, so attempt to store the 2749 constant to memory directly. */ 2750 if (free_or_dead 2751 && tcg_out_sti(s, ts->type, ts->val, 2752 ts->mem_base->reg, ts->mem_offset)) { 2753 break; 2754 } 2755 temp_load(s, ts, tcg_target_available_regs[ts->type], 2756 allocated_regs); 2757 /* fallthrough */ 2758 2759 case TEMP_VAL_REG: 2760 tcg_out_st(s, ts->type, ts->reg, 2761 ts->mem_base->reg, ts->mem_offset); 2762 break; 2763 2764 case TEMP_VAL_MEM: 2765 break; 2766 2767 case TEMP_VAL_DEAD: 2768 default: 2769 tcg_abort(); 2770 } 2771 ts->mem_coherent = 1; 2772 } 2773 if (free_or_dead) { 2774 temp_free_or_dead(s, ts, free_or_dead); 2775 } 2776 } 2777 2778 /* free register 'reg' by spilling the corresponding temporary if necessary */ 2779 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 2780 { 2781 TCGTemp *ts = s->reg_to_temp[reg]; 2782 if (ts != NULL) { 2783 temp_sync(s, ts, allocated_regs, -1); 2784 } 2785 } 2786 2787 /* Allocate a register belonging to reg1 & ~reg2 */ 2788 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, 2789 TCGRegSet allocated_regs, bool rev) 2790 { 2791 int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 2792 const int *order; 2793 TCGReg reg; 2794 TCGRegSet reg_ct; 2795 2796 reg_ct = desired_regs & ~allocated_regs; 2797 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 2798 2799 /* first try free registers */ 2800 for(i = 0; i < n; i++) { 2801 reg = order[i]; 2802 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL) 2803 return reg; 2804 } 2805 2806 /* XXX: do better spill choice */ 2807 for(i = 0; i < n; i++) { 2808 reg = order[i]; 2809 if (tcg_regset_test_reg(reg_ct, reg)) { 2810 tcg_reg_free(s, reg, allocated_regs); 2811 return reg; 2812 } 2813 } 2814 2815 tcg_abort(); 2816 } 2817 2818 /* Make sure the temporary is in a register. If needed, allocate the register 2819 from DESIRED while avoiding ALLOCATED. */ 2820 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 2821 TCGRegSet allocated_regs) 2822 { 2823 TCGReg reg; 2824 2825 switch (ts->val_type) { 2826 case TEMP_VAL_REG: 2827 return; 2828 case TEMP_VAL_CONST: 2829 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2830 tcg_out_movi(s, ts->type, reg, ts->val); 2831 ts->mem_coherent = 0; 2832 break; 2833 case TEMP_VAL_MEM: 2834 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2835 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 2836 ts->mem_coherent = 1; 2837 break; 2838 case TEMP_VAL_DEAD: 2839 default: 2840 tcg_abort(); 2841 } 2842 ts->reg = reg; 2843 ts->val_type = TEMP_VAL_REG; 2844 s->reg_to_temp[reg] = ts; 2845 } 2846 2847 /* Save a temporary to memory. 'allocated_regs' is used in case a 2848 temporary registers needs to be allocated to store a constant. */ 2849 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 2850 { 2851 /* The liveness analysis already ensures that globals are back 2852 in memory. Keep an tcg_debug_assert for safety. */ 2853 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); 2854 } 2855 2856 /* save globals to their canonical location and assume they can be 2857 modified be the following code. 'allocated_regs' is used in case a 2858 temporary registers needs to be allocated to store a constant. */ 2859 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 2860 { 2861 int i, n; 2862 2863 for (i = 0, n = s->nb_globals; i < n; i++) { 2864 temp_save(s, &s->temps[i], allocated_regs); 2865 } 2866 } 2867 2868 /* sync globals to their canonical location and assume they can be 2869 read by the following code. 'allocated_regs' is used in case a 2870 temporary registers needs to be allocated to store a constant. */ 2871 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 2872 { 2873 int i, n; 2874 2875 for (i = 0, n = s->nb_globals; i < n; i++) { 2876 TCGTemp *ts = &s->temps[i]; 2877 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 2878 || ts->fixed_reg 2879 || ts->mem_coherent); 2880 } 2881 } 2882 2883 /* at the end of a basic block, we assume all temporaries are dead and 2884 all globals are stored at their canonical location. */ 2885 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 2886 { 2887 int i; 2888 2889 for (i = s->nb_globals; i < s->nb_temps; i++) { 2890 TCGTemp *ts = &s->temps[i]; 2891 if (ts->temp_local) { 2892 temp_save(s, ts, allocated_regs); 2893 } else { 2894 /* The liveness analysis already ensures that temps are dead. 2895 Keep an tcg_debug_assert for safety. */ 2896 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 2897 } 2898 } 2899 2900 save_globals(s, allocated_regs); 2901 } 2902 2903 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 2904 tcg_target_ulong val, TCGLifeData arg_life) 2905 { 2906 if (ots->fixed_reg) { 2907 /* For fixed registers, we do not do any constant propagation. */ 2908 tcg_out_movi(s, ots->type, ots->reg, val); 2909 return; 2910 } 2911 2912 /* The movi is not explicitly generated here. */ 2913 if (ots->val_type == TEMP_VAL_REG) { 2914 s->reg_to_temp[ots->reg] = NULL; 2915 } 2916 ots->val_type = TEMP_VAL_CONST; 2917 ots->val = val; 2918 ots->mem_coherent = 0; 2919 if (NEED_SYNC_ARG(0)) { 2920 temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0)); 2921 } else if (IS_DEAD_ARG(0)) { 2922 temp_dead(s, ots); 2923 } 2924 } 2925 2926 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op) 2927 { 2928 TCGTemp *ots = arg_temp(op->args[0]); 2929 tcg_target_ulong val = op->args[1]; 2930 2931 tcg_reg_alloc_do_movi(s, ots, val, op->life); 2932 } 2933 2934 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 2935 { 2936 const TCGLifeData arg_life = op->life; 2937 TCGRegSet allocated_regs; 2938 TCGTemp *ts, *ots; 2939 TCGType otype, itype; 2940 2941 allocated_regs = s->reserved_regs; 2942 ots = arg_temp(op->args[0]); 2943 ts = arg_temp(op->args[1]); 2944 2945 /* Note that otype != itype for no-op truncation. */ 2946 otype = ots->type; 2947 itype = ts->type; 2948 2949 if (ts->val_type == TEMP_VAL_CONST) { 2950 /* propagate constant or generate sti */ 2951 tcg_target_ulong val = ts->val; 2952 if (IS_DEAD_ARG(1)) { 2953 temp_dead(s, ts); 2954 } 2955 tcg_reg_alloc_do_movi(s, ots, val, arg_life); 2956 return; 2957 } 2958 2959 /* If the source value is in memory we're going to be forced 2960 to have it in a register in order to perform the copy. Copy 2961 the SOURCE value into its own register first, that way we 2962 don't have to reload SOURCE the next time it is used. */ 2963 if (ts->val_type == TEMP_VAL_MEM) { 2964 temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs); 2965 } 2966 2967 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 2968 if (IS_DEAD_ARG(0) && !ots->fixed_reg) { 2969 /* mov to a non-saved dead register makes no sense (even with 2970 liveness analysis disabled). */ 2971 tcg_debug_assert(NEED_SYNC_ARG(0)); 2972 if (!ots->mem_allocated) { 2973 temp_allocate_frame(s, ots); 2974 } 2975 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 2976 if (IS_DEAD_ARG(1)) { 2977 temp_dead(s, ts); 2978 } 2979 temp_dead(s, ots); 2980 } else { 2981 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { 2982 /* the mov can be suppressed */ 2983 if (ots->val_type == TEMP_VAL_REG) { 2984 s->reg_to_temp[ots->reg] = NULL; 2985 } 2986 ots->reg = ts->reg; 2987 temp_dead(s, ts); 2988 } else { 2989 if (ots->val_type != TEMP_VAL_REG) { 2990 /* When allocating a new register, make sure to not spill the 2991 input one. */ 2992 tcg_regset_set_reg(allocated_regs, ts->reg); 2993 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 2994 allocated_regs, ots->indirect_base); 2995 } 2996 tcg_out_mov(s, otype, ots->reg, ts->reg); 2997 } 2998 ots->val_type = TEMP_VAL_REG; 2999 ots->mem_coherent = 0; 3000 s->reg_to_temp[ots->reg] = ots; 3001 if (NEED_SYNC_ARG(0)) { 3002 temp_sync(s, ots, allocated_regs, 0); 3003 } 3004 } 3005 } 3006 3007 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 3008 { 3009 const TCGLifeData arg_life = op->life; 3010 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 3011 TCGRegSet i_allocated_regs; 3012 TCGRegSet o_allocated_regs; 3013 int i, k, nb_iargs, nb_oargs; 3014 TCGReg reg; 3015 TCGArg arg; 3016 const TCGArgConstraint *arg_ct; 3017 TCGTemp *ts; 3018 TCGArg new_args[TCG_MAX_OP_ARGS]; 3019 int const_args[TCG_MAX_OP_ARGS]; 3020 3021 nb_oargs = def->nb_oargs; 3022 nb_iargs = def->nb_iargs; 3023 3024 /* copy constants */ 3025 memcpy(new_args + nb_oargs + nb_iargs, 3026 op->args + nb_oargs + nb_iargs, 3027 sizeof(TCGArg) * def->nb_cargs); 3028 3029 i_allocated_regs = s->reserved_regs; 3030 o_allocated_regs = s->reserved_regs; 3031 3032 /* satisfy input constraints */ 3033 for (k = 0; k < nb_iargs; k++) { 3034 i = def->sorted_args[nb_oargs + k]; 3035 arg = op->args[i]; 3036 arg_ct = &def->args_ct[i]; 3037 ts = arg_temp(arg); 3038 3039 if (ts->val_type == TEMP_VAL_CONST 3040 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 3041 /* constant is OK for instruction */ 3042 const_args[i] = 1; 3043 new_args[i] = ts->val; 3044 goto iarg_end; 3045 } 3046 3047 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs); 3048 3049 if (arg_ct->ct & TCG_CT_IALIAS) { 3050 if (ts->fixed_reg) { 3051 /* if fixed register, we must allocate a new register 3052 if the alias is not the same register */ 3053 if (arg != op->args[arg_ct->alias_index]) 3054 goto allocate_in_reg; 3055 } else { 3056 /* if the input is aliased to an output and if it is 3057 not dead after the instruction, we must allocate 3058 a new register and move it */ 3059 if (!IS_DEAD_ARG(i)) { 3060 goto allocate_in_reg; 3061 } 3062 /* check if the current register has already been allocated 3063 for another input aliased to an output */ 3064 int k2, i2; 3065 for (k2 = 0 ; k2 < k ; k2++) { 3066 i2 = def->sorted_args[nb_oargs + k2]; 3067 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && 3068 (new_args[i2] == ts->reg)) { 3069 goto allocate_in_reg; 3070 } 3071 } 3072 } 3073 } 3074 reg = ts->reg; 3075 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { 3076 /* nothing to do : the constraint is satisfied */ 3077 } else { 3078 allocate_in_reg: 3079 /* allocate a new register matching the constraint 3080 and move the temporary register into it */ 3081 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, 3082 ts->indirect_base); 3083 tcg_out_mov(s, ts->type, reg, ts->reg); 3084 } 3085 new_args[i] = reg; 3086 const_args[i] = 0; 3087 tcg_regset_set_reg(i_allocated_regs, reg); 3088 iarg_end: ; 3089 } 3090 3091 /* mark dead temporaries and free the associated registers */ 3092 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3093 if (IS_DEAD_ARG(i)) { 3094 temp_dead(s, arg_temp(op->args[i])); 3095 } 3096 } 3097 3098 if (def->flags & TCG_OPF_BB_END) { 3099 tcg_reg_alloc_bb_end(s, i_allocated_regs); 3100 } else { 3101 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3102 /* XXX: permit generic clobber register list ? */ 3103 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3104 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3105 tcg_reg_free(s, i, i_allocated_regs); 3106 } 3107 } 3108 } 3109 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3110 /* sync globals if the op has side effects and might trigger 3111 an exception. */ 3112 sync_globals(s, i_allocated_regs); 3113 } 3114 3115 /* satisfy the output constraints */ 3116 for(k = 0; k < nb_oargs; k++) { 3117 i = def->sorted_args[k]; 3118 arg = op->args[i]; 3119 arg_ct = &def->args_ct[i]; 3120 ts = arg_temp(arg); 3121 if ((arg_ct->ct & TCG_CT_ALIAS) 3122 && !const_args[arg_ct->alias_index]) { 3123 reg = new_args[arg_ct->alias_index]; 3124 } else if (arg_ct->ct & TCG_CT_NEWREG) { 3125 reg = tcg_reg_alloc(s, arg_ct->u.regs, 3126 i_allocated_regs | o_allocated_regs, 3127 ts->indirect_base); 3128 } else { 3129 /* if fixed register, we try to use it */ 3130 reg = ts->reg; 3131 if (ts->fixed_reg && 3132 tcg_regset_test_reg(arg_ct->u.regs, reg)) { 3133 goto oarg_end; 3134 } 3135 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, 3136 ts->indirect_base); 3137 } 3138 tcg_regset_set_reg(o_allocated_regs, reg); 3139 /* if a fixed register is used, then a move will be done afterwards */ 3140 if (!ts->fixed_reg) { 3141 if (ts->val_type == TEMP_VAL_REG) { 3142 s->reg_to_temp[ts->reg] = NULL; 3143 } 3144 ts->val_type = TEMP_VAL_REG; 3145 ts->reg = reg; 3146 /* temp value is modified, so the value kept in memory is 3147 potentially not the same */ 3148 ts->mem_coherent = 0; 3149 s->reg_to_temp[reg] = ts; 3150 } 3151 oarg_end: 3152 new_args[i] = reg; 3153 } 3154 } 3155 3156 /* emit instruction */ 3157 if (def->flags & TCG_OPF_VECTOR) { 3158 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 3159 new_args, const_args); 3160 } else { 3161 tcg_out_op(s, op->opc, new_args, const_args); 3162 } 3163 3164 /* move the outputs in the correct register if needed */ 3165 for(i = 0; i < nb_oargs; i++) { 3166 ts = arg_temp(op->args[i]); 3167 reg = new_args[i]; 3168 if (ts->fixed_reg && ts->reg != reg) { 3169 tcg_out_mov(s, ts->type, ts->reg, reg); 3170 } 3171 if (NEED_SYNC_ARG(i)) { 3172 temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i)); 3173 } else if (IS_DEAD_ARG(i)) { 3174 temp_dead(s, ts); 3175 } 3176 } 3177 } 3178 3179 #ifdef TCG_TARGET_STACK_GROWSUP 3180 #define STACK_DIR(x) (-(x)) 3181 #else 3182 #define STACK_DIR(x) (x) 3183 #endif 3184 3185 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 3186 { 3187 const int nb_oargs = TCGOP_CALLO(op); 3188 const int nb_iargs = TCGOP_CALLI(op); 3189 const TCGLifeData arg_life = op->life; 3190 int flags, nb_regs, i; 3191 TCGReg reg; 3192 TCGArg arg; 3193 TCGTemp *ts; 3194 intptr_t stack_offset; 3195 size_t call_stack_size; 3196 tcg_insn_unit *func_addr; 3197 int allocate_args; 3198 TCGRegSet allocated_regs; 3199 3200 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; 3201 flags = op->args[nb_oargs + nb_iargs + 1]; 3202 3203 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 3204 if (nb_regs > nb_iargs) { 3205 nb_regs = nb_iargs; 3206 } 3207 3208 /* assign stack slots first */ 3209 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 3210 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 3211 ~(TCG_TARGET_STACK_ALIGN - 1); 3212 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 3213 if (allocate_args) { 3214 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 3215 preallocate call stack */ 3216 tcg_abort(); 3217 } 3218 3219 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 3220 for (i = nb_regs; i < nb_iargs; i++) { 3221 arg = op->args[nb_oargs + i]; 3222 #ifdef TCG_TARGET_STACK_GROWSUP 3223 stack_offset -= sizeof(tcg_target_long); 3224 #endif 3225 if (arg != TCG_CALL_DUMMY_ARG) { 3226 ts = arg_temp(arg); 3227 temp_load(s, ts, tcg_target_available_regs[ts->type], 3228 s->reserved_regs); 3229 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 3230 } 3231 #ifndef TCG_TARGET_STACK_GROWSUP 3232 stack_offset += sizeof(tcg_target_long); 3233 #endif 3234 } 3235 3236 /* assign input registers */ 3237 allocated_regs = s->reserved_regs; 3238 for (i = 0; i < nb_regs; i++) { 3239 arg = op->args[nb_oargs + i]; 3240 if (arg != TCG_CALL_DUMMY_ARG) { 3241 ts = arg_temp(arg); 3242 reg = tcg_target_call_iarg_regs[i]; 3243 tcg_reg_free(s, reg, allocated_regs); 3244 3245 if (ts->val_type == TEMP_VAL_REG) { 3246 if (ts->reg != reg) { 3247 tcg_out_mov(s, ts->type, reg, ts->reg); 3248 } 3249 } else { 3250 TCGRegSet arg_set = 0; 3251 3252 tcg_regset_set_reg(arg_set, reg); 3253 temp_load(s, ts, arg_set, allocated_regs); 3254 } 3255 3256 tcg_regset_set_reg(allocated_regs, reg); 3257 } 3258 } 3259 3260 /* mark dead temporaries and free the associated registers */ 3261 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3262 if (IS_DEAD_ARG(i)) { 3263 temp_dead(s, arg_temp(op->args[i])); 3264 } 3265 } 3266 3267 /* clobber call registers */ 3268 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3269 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3270 tcg_reg_free(s, i, allocated_regs); 3271 } 3272 } 3273 3274 /* Save globals if they might be written by the helper, sync them if 3275 they might be read. */ 3276 if (flags & TCG_CALL_NO_READ_GLOBALS) { 3277 /* Nothing to do */ 3278 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 3279 sync_globals(s, allocated_regs); 3280 } else { 3281 save_globals(s, allocated_regs); 3282 } 3283 3284 tcg_out_call(s, func_addr); 3285 3286 /* assign output registers and emit moves if needed */ 3287 for(i = 0; i < nb_oargs; i++) { 3288 arg = op->args[i]; 3289 ts = arg_temp(arg); 3290 reg = tcg_target_call_oarg_regs[i]; 3291 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3292 3293 if (ts->fixed_reg) { 3294 if (ts->reg != reg) { 3295 tcg_out_mov(s, ts->type, ts->reg, reg); 3296 } 3297 } else { 3298 if (ts->val_type == TEMP_VAL_REG) { 3299 s->reg_to_temp[ts->reg] = NULL; 3300 } 3301 ts->val_type = TEMP_VAL_REG; 3302 ts->reg = reg; 3303 ts->mem_coherent = 0; 3304 s->reg_to_temp[reg] = ts; 3305 if (NEED_SYNC_ARG(i)) { 3306 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); 3307 } else if (IS_DEAD_ARG(i)) { 3308 temp_dead(s, ts); 3309 } 3310 } 3311 } 3312 } 3313 3314 #ifdef CONFIG_PROFILER 3315 3316 /* avoid copy/paste errors */ 3317 #define PROF_ADD(to, from, field) \ 3318 do { \ 3319 (to)->field += atomic_read(&((from)->field)); \ 3320 } while (0) 3321 3322 #define PROF_MAX(to, from, field) \ 3323 do { \ 3324 typeof((from)->field) val__ = atomic_read(&((from)->field)); \ 3325 if (val__ > (to)->field) { \ 3326 (to)->field = val__; \ 3327 } \ 3328 } while (0) 3329 3330 /* Pass in a zero'ed @prof */ 3331 static inline 3332 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 3333 { 3334 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 3335 unsigned int i; 3336 3337 for (i = 0; i < n_ctxs; i++) { 3338 TCGContext *s = atomic_read(&tcg_ctxs[i]); 3339 const TCGProfile *orig = &s->prof; 3340 3341 if (counters) { 3342 PROF_ADD(prof, orig, tb_count1); 3343 PROF_ADD(prof, orig, tb_count); 3344 PROF_ADD(prof, orig, op_count); 3345 PROF_MAX(prof, orig, op_count_max); 3346 PROF_ADD(prof, orig, temp_count); 3347 PROF_MAX(prof, orig, temp_count_max); 3348 PROF_ADD(prof, orig, del_op_count); 3349 PROF_ADD(prof, orig, code_in_len); 3350 PROF_ADD(prof, orig, code_out_len); 3351 PROF_ADD(prof, orig, search_out_len); 3352 PROF_ADD(prof, orig, interm_time); 3353 PROF_ADD(prof, orig, code_time); 3354 PROF_ADD(prof, orig, la_time); 3355 PROF_ADD(prof, orig, opt_time); 3356 PROF_ADD(prof, orig, restore_count); 3357 PROF_ADD(prof, orig, restore_time); 3358 } 3359 if (table) { 3360 int i; 3361 3362 for (i = 0; i < NB_OPS; i++) { 3363 PROF_ADD(prof, orig, table_op_count[i]); 3364 } 3365 } 3366 } 3367 } 3368 3369 #undef PROF_ADD 3370 #undef PROF_MAX 3371 3372 static void tcg_profile_snapshot_counters(TCGProfile *prof) 3373 { 3374 tcg_profile_snapshot(prof, true, false); 3375 } 3376 3377 static void tcg_profile_snapshot_table(TCGProfile *prof) 3378 { 3379 tcg_profile_snapshot(prof, false, true); 3380 } 3381 3382 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 3383 { 3384 TCGProfile prof = {}; 3385 int i; 3386 3387 tcg_profile_snapshot_table(&prof); 3388 for (i = 0; i < NB_OPS; i++) { 3389 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, 3390 prof.table_op_count[i]); 3391 } 3392 } 3393 #else 3394 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 3395 { 3396 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 3397 } 3398 #endif 3399 3400 3401 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 3402 { 3403 #ifdef CONFIG_PROFILER 3404 TCGProfile *prof = &s->prof; 3405 #endif 3406 int i, num_insns; 3407 TCGOp *op; 3408 3409 #ifdef CONFIG_PROFILER 3410 { 3411 int n; 3412 3413 QTAILQ_FOREACH(op, &s->ops, link) { 3414 n++; 3415 } 3416 atomic_set(&prof->op_count, prof->op_count + n); 3417 if (n > prof->op_count_max) { 3418 atomic_set(&prof->op_count_max, n); 3419 } 3420 3421 n = s->nb_temps; 3422 atomic_set(&prof->temp_count, prof->temp_count + n); 3423 if (n > prof->temp_count_max) { 3424 atomic_set(&prof->temp_count_max, n); 3425 } 3426 } 3427 #endif 3428 3429 #ifdef DEBUG_DISAS 3430 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 3431 && qemu_log_in_addr_range(tb->pc))) { 3432 qemu_log_lock(); 3433 qemu_log("OP:\n"); 3434 tcg_dump_ops(s); 3435 qemu_log("\n"); 3436 qemu_log_unlock(); 3437 } 3438 #endif 3439 3440 #ifdef CONFIG_PROFILER 3441 atomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 3442 #endif 3443 3444 #ifdef USE_TCG_OPTIMIZATIONS 3445 tcg_optimize(s); 3446 #endif 3447 3448 #ifdef CONFIG_PROFILER 3449 atomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 3450 atomic_set(&prof->la_time, prof->la_time - profile_getclock()); 3451 #endif 3452 3453 liveness_pass_1(s); 3454 3455 if (s->nb_indirects > 0) { 3456 #ifdef DEBUG_DISAS 3457 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 3458 && qemu_log_in_addr_range(tb->pc))) { 3459 qemu_log_lock(); 3460 qemu_log("OP before indirect lowering:\n"); 3461 tcg_dump_ops(s); 3462 qemu_log("\n"); 3463 qemu_log_unlock(); 3464 } 3465 #endif 3466 /* Replace indirect temps with direct temps. */ 3467 if (liveness_pass_2(s)) { 3468 /* If changes were made, re-run liveness. */ 3469 liveness_pass_1(s); 3470 } 3471 } 3472 3473 #ifdef CONFIG_PROFILER 3474 atomic_set(&prof->la_time, prof->la_time + profile_getclock()); 3475 #endif 3476 3477 #ifdef DEBUG_DISAS 3478 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 3479 && qemu_log_in_addr_range(tb->pc))) { 3480 qemu_log_lock(); 3481 qemu_log("OP after optimization and liveness analysis:\n"); 3482 tcg_dump_ops(s); 3483 qemu_log("\n"); 3484 qemu_log_unlock(); 3485 } 3486 #endif 3487 3488 tcg_reg_alloc_start(s); 3489 3490 s->code_buf = tb->tc.ptr; 3491 s->code_ptr = tb->tc.ptr; 3492 3493 #ifdef TCG_TARGET_NEED_LDST_LABELS 3494 QSIMPLEQ_INIT(&s->ldst_labels); 3495 #endif 3496 #ifdef TCG_TARGET_NEED_POOL_LABELS 3497 s->pool_labels = NULL; 3498 #endif 3499 3500 num_insns = -1; 3501 QTAILQ_FOREACH(op, &s->ops, link) { 3502 TCGOpcode opc = op->opc; 3503 3504 #ifdef CONFIG_PROFILER 3505 atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 3506 #endif 3507 3508 switch (opc) { 3509 case INDEX_op_mov_i32: 3510 case INDEX_op_mov_i64: 3511 case INDEX_op_mov_vec: 3512 tcg_reg_alloc_mov(s, op); 3513 break; 3514 case INDEX_op_movi_i32: 3515 case INDEX_op_movi_i64: 3516 case INDEX_op_dupi_vec: 3517 tcg_reg_alloc_movi(s, op); 3518 break; 3519 case INDEX_op_insn_start: 3520 if (num_insns >= 0) { 3521 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 3522 } 3523 num_insns++; 3524 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 3525 target_ulong a; 3526 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 3527 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 3528 #else 3529 a = op->args[i]; 3530 #endif 3531 s->gen_insn_data[num_insns][i] = a; 3532 } 3533 break; 3534 case INDEX_op_discard: 3535 temp_dead(s, arg_temp(op->args[0])); 3536 break; 3537 case INDEX_op_set_label: 3538 tcg_reg_alloc_bb_end(s, s->reserved_regs); 3539 tcg_out_label(s, arg_label(op->args[0]), s->code_ptr); 3540 break; 3541 case INDEX_op_call: 3542 tcg_reg_alloc_call(s, op); 3543 break; 3544 default: 3545 /* Sanity check that we've not introduced any unhandled opcodes. */ 3546 tcg_debug_assert(tcg_op_supported(opc)); 3547 /* Note: in order to speed up the code, it would be much 3548 faster to have specialized register allocator functions for 3549 some common argument patterns */ 3550 tcg_reg_alloc_op(s, op); 3551 break; 3552 } 3553 #ifdef CONFIG_DEBUG_TCG 3554 check_regs(s); 3555 #endif 3556 /* Test for (pending) buffer overflow. The assumption is that any 3557 one operation beginning below the high water mark cannot overrun 3558 the buffer completely. Thus we can test for overflow after 3559 generating code without having to check during generation. */ 3560 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 3561 return -1; 3562 } 3563 } 3564 tcg_debug_assert(num_insns >= 0); 3565 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 3566 3567 /* Generate TB finalization at the end of block */ 3568 #ifdef TCG_TARGET_NEED_LDST_LABELS 3569 if (!tcg_out_ldst_finalize(s)) { 3570 return -1; 3571 } 3572 #endif 3573 #ifdef TCG_TARGET_NEED_POOL_LABELS 3574 if (!tcg_out_pool_finalize(s)) { 3575 return -1; 3576 } 3577 #endif 3578 3579 /* flush instruction cache */ 3580 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); 3581 3582 return tcg_current_code_size(s); 3583 } 3584 3585 #ifdef CONFIG_PROFILER 3586 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 3587 { 3588 TCGProfile prof = {}; 3589 const TCGProfile *s; 3590 int64_t tb_count; 3591 int64_t tb_div_count; 3592 int64_t tot; 3593 3594 tcg_profile_snapshot_counters(&prof); 3595 s = &prof; 3596 tb_count = s->tb_count; 3597 tb_div_count = tb_count ? tb_count : 1; 3598 tot = s->interm_time + s->code_time; 3599 3600 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 3601 tot, tot / 2.4e9); 3602 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 3603 tb_count, s->tb_count1 - tb_count, 3604 (double)(s->tb_count1 - s->tb_count) 3605 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 3606 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", 3607 (double)s->op_count / tb_div_count, s->op_count_max); 3608 cpu_fprintf(f, "deleted ops/TB %0.2f\n", 3609 (double)s->del_op_count / tb_div_count); 3610 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", 3611 (double)s->temp_count / tb_div_count, s->temp_count_max); 3612 cpu_fprintf(f, "avg host code/TB %0.1f\n", 3613 (double)s->code_out_len / tb_div_count); 3614 cpu_fprintf(f, "avg search data/TB %0.1f\n", 3615 (double)s->search_out_len / tb_div_count); 3616 3617 cpu_fprintf(f, "cycles/op %0.1f\n", 3618 s->op_count ? (double)tot / s->op_count : 0); 3619 cpu_fprintf(f, "cycles/in byte %0.1f\n", 3620 s->code_in_len ? (double)tot / s->code_in_len : 0); 3621 cpu_fprintf(f, "cycles/out byte %0.1f\n", 3622 s->code_out_len ? (double)tot / s->code_out_len : 0); 3623 cpu_fprintf(f, "cycles/search byte %0.1f\n", 3624 s->search_out_len ? (double)tot / s->search_out_len : 0); 3625 if (tot == 0) { 3626 tot = 1; 3627 } 3628 cpu_fprintf(f, " gen_interm time %0.1f%%\n", 3629 (double)s->interm_time / tot * 100.0); 3630 cpu_fprintf(f, " gen_code time %0.1f%%\n", 3631 (double)s->code_time / tot * 100.0); 3632 cpu_fprintf(f, "optim./code time %0.1f%%\n", 3633 (double)s->opt_time / (s->code_time ? s->code_time : 1) 3634 * 100.0); 3635 cpu_fprintf(f, "liveness/code time %0.1f%%\n", 3636 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 3637 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", 3638 s->restore_count); 3639 cpu_fprintf(f, " avg cycles %0.1f\n", 3640 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 3641 } 3642 #else 3643 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 3644 { 3645 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 3646 } 3647 #endif 3648 3649 #ifdef ELF_HOST_MACHINE 3650 /* In order to use this feature, the backend needs to do three things: 3651 3652 (1) Define ELF_HOST_MACHINE to indicate both what value to 3653 put into the ELF image and to indicate support for the feature. 3654 3655 (2) Define tcg_register_jit. This should create a buffer containing 3656 the contents of a .debug_frame section that describes the post- 3657 prologue unwind info for the tcg machine. 3658 3659 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 3660 */ 3661 3662 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 3663 typedef enum { 3664 JIT_NOACTION = 0, 3665 JIT_REGISTER_FN, 3666 JIT_UNREGISTER_FN 3667 } jit_actions_t; 3668 3669 struct jit_code_entry { 3670 struct jit_code_entry *next_entry; 3671 struct jit_code_entry *prev_entry; 3672 const void *symfile_addr; 3673 uint64_t symfile_size; 3674 }; 3675 3676 struct jit_descriptor { 3677 uint32_t version; 3678 uint32_t action_flag; 3679 struct jit_code_entry *relevant_entry; 3680 struct jit_code_entry *first_entry; 3681 }; 3682 3683 void __jit_debug_register_code(void) __attribute__((noinline)); 3684 void __jit_debug_register_code(void) 3685 { 3686 asm(""); 3687 } 3688 3689 /* Must statically initialize the version, because GDB may check 3690 the version before we can set it. */ 3691 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 3692 3693 /* End GDB interface. */ 3694 3695 static int find_string(const char *strtab, const char *str) 3696 { 3697 const char *p = strtab + 1; 3698 3699 while (1) { 3700 if (strcmp(p, str) == 0) { 3701 return p - strtab; 3702 } 3703 p += strlen(p) + 1; 3704 } 3705 } 3706 3707 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, 3708 const void *debug_frame, 3709 size_t debug_frame_size) 3710 { 3711 struct __attribute__((packed)) DebugInfo { 3712 uint32_t len; 3713 uint16_t version; 3714 uint32_t abbrev; 3715 uint8_t ptr_size; 3716 uint8_t cu_die; 3717 uint16_t cu_lang; 3718 uintptr_t cu_low_pc; 3719 uintptr_t cu_high_pc; 3720 uint8_t fn_die; 3721 char fn_name[16]; 3722 uintptr_t fn_low_pc; 3723 uintptr_t fn_high_pc; 3724 uint8_t cu_eoc; 3725 }; 3726 3727 struct ElfImage { 3728 ElfW(Ehdr) ehdr; 3729 ElfW(Phdr) phdr; 3730 ElfW(Shdr) shdr[7]; 3731 ElfW(Sym) sym[2]; 3732 struct DebugInfo di; 3733 uint8_t da[24]; 3734 char str[80]; 3735 }; 3736 3737 struct ElfImage *img; 3738 3739 static const struct ElfImage img_template = { 3740 .ehdr = { 3741 .e_ident[EI_MAG0] = ELFMAG0, 3742 .e_ident[EI_MAG1] = ELFMAG1, 3743 .e_ident[EI_MAG2] = ELFMAG2, 3744 .e_ident[EI_MAG3] = ELFMAG3, 3745 .e_ident[EI_CLASS] = ELF_CLASS, 3746 .e_ident[EI_DATA] = ELF_DATA, 3747 .e_ident[EI_VERSION] = EV_CURRENT, 3748 .e_type = ET_EXEC, 3749 .e_machine = ELF_HOST_MACHINE, 3750 .e_version = EV_CURRENT, 3751 .e_phoff = offsetof(struct ElfImage, phdr), 3752 .e_shoff = offsetof(struct ElfImage, shdr), 3753 .e_ehsize = sizeof(ElfW(Shdr)), 3754 .e_phentsize = sizeof(ElfW(Phdr)), 3755 .e_phnum = 1, 3756 .e_shentsize = sizeof(ElfW(Shdr)), 3757 .e_shnum = ARRAY_SIZE(img->shdr), 3758 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 3759 #ifdef ELF_HOST_FLAGS 3760 .e_flags = ELF_HOST_FLAGS, 3761 #endif 3762 #ifdef ELF_OSABI 3763 .e_ident[EI_OSABI] = ELF_OSABI, 3764 #endif 3765 }, 3766 .phdr = { 3767 .p_type = PT_LOAD, 3768 .p_flags = PF_X, 3769 }, 3770 .shdr = { 3771 [0] = { .sh_type = SHT_NULL }, 3772 /* Trick: The contents of code_gen_buffer are not present in 3773 this fake ELF file; that got allocated elsewhere. Therefore 3774 we mark .text as SHT_NOBITS (similar to .bss) so that readers 3775 will not look for contents. We can record any address. */ 3776 [1] = { /* .text */ 3777 .sh_type = SHT_NOBITS, 3778 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 3779 }, 3780 [2] = { /* .debug_info */ 3781 .sh_type = SHT_PROGBITS, 3782 .sh_offset = offsetof(struct ElfImage, di), 3783 .sh_size = sizeof(struct DebugInfo), 3784 }, 3785 [3] = { /* .debug_abbrev */ 3786 .sh_type = SHT_PROGBITS, 3787 .sh_offset = offsetof(struct ElfImage, da), 3788 .sh_size = sizeof(img->da), 3789 }, 3790 [4] = { /* .debug_frame */ 3791 .sh_type = SHT_PROGBITS, 3792 .sh_offset = sizeof(struct ElfImage), 3793 }, 3794 [5] = { /* .symtab */ 3795 .sh_type = SHT_SYMTAB, 3796 .sh_offset = offsetof(struct ElfImage, sym), 3797 .sh_size = sizeof(img->sym), 3798 .sh_info = 1, 3799 .sh_link = ARRAY_SIZE(img->shdr) - 1, 3800 .sh_entsize = sizeof(ElfW(Sym)), 3801 }, 3802 [6] = { /* .strtab */ 3803 .sh_type = SHT_STRTAB, 3804 .sh_offset = offsetof(struct ElfImage, str), 3805 .sh_size = sizeof(img->str), 3806 } 3807 }, 3808 .sym = { 3809 [1] = { /* code_gen_buffer */ 3810 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 3811 .st_shndx = 1, 3812 } 3813 }, 3814 .di = { 3815 .len = sizeof(struct DebugInfo) - 4, 3816 .version = 2, 3817 .ptr_size = sizeof(void *), 3818 .cu_die = 1, 3819 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 3820 .fn_die = 2, 3821 .fn_name = "code_gen_buffer" 3822 }, 3823 .da = { 3824 1, /* abbrev number (the cu) */ 3825 0x11, 1, /* DW_TAG_compile_unit, has children */ 3826 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 3827 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 3828 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 3829 0, 0, /* end of abbrev */ 3830 2, /* abbrev number (the fn) */ 3831 0x2e, 0, /* DW_TAG_subprogram, no children */ 3832 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 3833 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 3834 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 3835 0, 0, /* end of abbrev */ 3836 0 /* no more abbrev */ 3837 }, 3838 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 3839 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 3840 }; 3841 3842 /* We only need a single jit entry; statically allocate it. */ 3843 static struct jit_code_entry one_entry; 3844 3845 uintptr_t buf = (uintptr_t)buf_ptr; 3846 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 3847 DebugFrameHeader *dfh; 3848 3849 img = g_malloc(img_size); 3850 *img = img_template; 3851 3852 img->phdr.p_vaddr = buf; 3853 img->phdr.p_paddr = buf; 3854 img->phdr.p_memsz = buf_size; 3855 3856 img->shdr[1].sh_name = find_string(img->str, ".text"); 3857 img->shdr[1].sh_addr = buf; 3858 img->shdr[1].sh_size = buf_size; 3859 3860 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 3861 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 3862 3863 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 3864 img->shdr[4].sh_size = debug_frame_size; 3865 3866 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 3867 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 3868 3869 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 3870 img->sym[1].st_value = buf; 3871 img->sym[1].st_size = buf_size; 3872 3873 img->di.cu_low_pc = buf; 3874 img->di.cu_high_pc = buf + buf_size; 3875 img->di.fn_low_pc = buf; 3876 img->di.fn_high_pc = buf + buf_size; 3877 3878 dfh = (DebugFrameHeader *)(img + 1); 3879 memcpy(dfh, debug_frame, debug_frame_size); 3880 dfh->fde.func_start = buf; 3881 dfh->fde.func_len = buf_size; 3882 3883 #ifdef DEBUG_JIT 3884 /* Enable this block to be able to debug the ELF image file creation. 3885 One can use readelf, objdump, or other inspection utilities. */ 3886 { 3887 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 3888 if (f) { 3889 if (fwrite(img, img_size, 1, f) != img_size) { 3890 /* Avoid stupid unused return value warning for fwrite. */ 3891 } 3892 fclose(f); 3893 } 3894 } 3895 #endif 3896 3897 one_entry.symfile_addr = img; 3898 one_entry.symfile_size = img_size; 3899 3900 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 3901 __jit_debug_descriptor.relevant_entry = &one_entry; 3902 __jit_debug_descriptor.first_entry = &one_entry; 3903 __jit_debug_register_code(); 3904 } 3905 #else 3906 /* No support for the feature. Provide the entry point expected by exec.c, 3907 and implement the internal function we declared earlier. */ 3908 3909 static void tcg_register_jit_int(void *buf, size_t size, 3910 const void *debug_frame, 3911 size_t debug_frame_size) 3912 { 3913 } 3914 3915 void tcg_register_jit(void *buf, size_t buf_size) 3916 { 3917 } 3918 #endif /* ELF_HOST_MACHINE */ 3919 3920 #if !TCG_TARGET_MAYBE_vec 3921 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 3922 { 3923 g_assert_not_reached(); 3924 } 3925 #endif 3926