1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/timer.h" 37 38 /* Note: the long term plan is to reduce the dependencies on the QEMU 39 CPU definitions. Currently they are used for qemu_ld/st 40 instructions */ 41 #define NO_CPU_IO_DEFS 42 #include "cpu.h" 43 44 #include "exec/cpu-common.h" 45 #include "exec/exec-all.h" 46 47 #include "tcg-op.h" 48 49 #if UINTPTR_MAX == UINT32_MAX 50 # define ELF_CLASS ELFCLASS32 51 #else 52 # define ELF_CLASS ELFCLASS64 53 #endif 54 #ifdef HOST_WORDS_BIGENDIAN 55 # define ELF_DATA ELFDATA2MSB 56 #else 57 # define ELF_DATA ELFDATA2LSB 58 #endif 59 60 #include "elf.h" 61 #include "exec/log.h" 62 #include "sysemu/sysemu.h" 63 64 /* Forward declarations for functions declared in tcg-target.inc.c and 65 used here. */ 66 static void tcg_target_init(TCGContext *s); 67 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); 68 static void tcg_target_qemu_prologue(TCGContext *s); 69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 70 intptr_t value, intptr_t addend); 71 72 /* The CIE and FDE header definitions will be common to all hosts. */ 73 typedef struct { 74 uint32_t len __attribute__((aligned((sizeof(void *))))); 75 uint32_t id; 76 uint8_t version; 77 char augmentation[1]; 78 uint8_t code_align; 79 uint8_t data_align; 80 uint8_t return_column; 81 } DebugFrameCIE; 82 83 typedef struct QEMU_PACKED { 84 uint32_t len __attribute__((aligned((sizeof(void *))))); 85 uint32_t cie_offset; 86 uintptr_t func_start; 87 uintptr_t func_len; 88 } DebugFrameFDEHeader; 89 90 typedef struct QEMU_PACKED { 91 DebugFrameCIE cie; 92 DebugFrameFDEHeader fde; 93 } DebugFrameHeader; 94 95 static void tcg_register_jit_int(void *buf, size_t size, 96 const void *debug_frame, 97 size_t debug_frame_size) 98 __attribute__((unused)); 99 100 /* Forward declarations for functions declared and used in tcg-target.inc.c. */ 101 static const char *target_parse_constraint(TCGArgConstraint *ct, 102 const char *ct_str, TCGType type); 103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 104 intptr_t arg2); 105 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 106 static void tcg_out_movi(TCGContext *s, TCGType type, 107 TCGReg ret, tcg_target_long arg); 108 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 109 const int *const_args); 110 #if TCG_TARGET_MAYBE_vec 111 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 112 unsigned vece, const TCGArg *args, 113 const int *const_args); 114 #else 115 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 116 unsigned vece, const TCGArg *args, 117 const int *const_args) 118 { 119 g_assert_not_reached(); 120 } 121 #endif 122 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 123 intptr_t arg2); 124 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 125 TCGReg base, intptr_t ofs); 126 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); 127 static int tcg_target_const_match(tcg_target_long val, TCGType type, 128 const TCGArgConstraint *arg_ct); 129 #ifdef TCG_TARGET_NEED_LDST_LABELS 130 static bool tcg_out_ldst_finalize(TCGContext *s); 131 #endif 132 133 #define TCG_HIGHWATER 1024 134 135 static TCGContext **tcg_ctxs; 136 static unsigned int n_tcg_ctxs; 137 TCGv_env cpu_env = 0; 138 139 struct tcg_region_tree { 140 QemuMutex lock; 141 GTree *tree; 142 /* padding to avoid false sharing is computed at run-time */ 143 }; 144 145 /* 146 * We divide code_gen_buffer into equally-sized "regions" that TCG threads 147 * dynamically allocate from as demand dictates. Given appropriate region 148 * sizing, this minimizes flushes even when some TCG threads generate a lot 149 * more code than others. 150 */ 151 struct tcg_region_state { 152 QemuMutex lock; 153 154 /* fields set at init time */ 155 void *start; 156 void *start_aligned; 157 void *end; 158 size_t n; 159 size_t size; /* size of one region */ 160 size_t stride; /* .size + guard size */ 161 162 /* fields protected by the lock */ 163 size_t current; /* current region index */ 164 size_t agg_size_full; /* aggregate size of full regions */ 165 }; 166 167 static struct tcg_region_state region; 168 /* 169 * This is an array of struct tcg_region_tree's, with padding. 170 * We use void * to simplify the computation of region_trees[i]; each 171 * struct is found every tree_size bytes. 172 */ 173 static void *region_trees; 174 static size_t tree_size; 175 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 176 static TCGRegSet tcg_target_call_clobber_regs; 177 178 #if TCG_TARGET_INSN_UNIT_SIZE == 1 179 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 180 { 181 *s->code_ptr++ = v; 182 } 183 184 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 185 uint8_t v) 186 { 187 *p = v; 188 } 189 #endif 190 191 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 192 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 193 { 194 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 195 *s->code_ptr++ = v; 196 } else { 197 tcg_insn_unit *p = s->code_ptr; 198 memcpy(p, &v, sizeof(v)); 199 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 200 } 201 } 202 203 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 204 uint16_t v) 205 { 206 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 207 *p = v; 208 } else { 209 memcpy(p, &v, sizeof(v)); 210 } 211 } 212 #endif 213 214 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 215 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 216 { 217 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 218 *s->code_ptr++ = v; 219 } else { 220 tcg_insn_unit *p = s->code_ptr; 221 memcpy(p, &v, sizeof(v)); 222 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 223 } 224 } 225 226 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 227 uint32_t v) 228 { 229 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 230 *p = v; 231 } else { 232 memcpy(p, &v, sizeof(v)); 233 } 234 } 235 #endif 236 237 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 238 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 239 { 240 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 241 *s->code_ptr++ = v; 242 } else { 243 tcg_insn_unit *p = s->code_ptr; 244 memcpy(p, &v, sizeof(v)); 245 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 246 } 247 } 248 249 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 250 uint64_t v) 251 { 252 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 253 *p = v; 254 } else { 255 memcpy(p, &v, sizeof(v)); 256 } 257 } 258 #endif 259 260 /* label relocation processing */ 261 262 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 263 TCGLabel *l, intptr_t addend) 264 { 265 TCGRelocation *r; 266 267 if (l->has_value) { 268 /* FIXME: This may break relocations on RISC targets that 269 modify instruction fields in place. The caller may not have 270 written the initial value. */ 271 bool ok = patch_reloc(code_ptr, type, l->u.value, addend); 272 tcg_debug_assert(ok); 273 } else { 274 /* add a new relocation entry */ 275 r = tcg_malloc(sizeof(TCGRelocation)); 276 r->type = type; 277 r->ptr = code_ptr; 278 r->addend = addend; 279 r->next = l->u.first_reloc; 280 l->u.first_reloc = r; 281 } 282 } 283 284 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) 285 { 286 intptr_t value = (intptr_t)ptr; 287 TCGRelocation *r; 288 289 tcg_debug_assert(!l->has_value); 290 291 for (r = l->u.first_reloc; r != NULL; r = r->next) { 292 bool ok = patch_reloc(r->ptr, r->type, value, r->addend); 293 tcg_debug_assert(ok); 294 } 295 296 l->has_value = 1; 297 l->u.value_ptr = ptr; 298 } 299 300 TCGLabel *gen_new_label(void) 301 { 302 TCGContext *s = tcg_ctx; 303 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 304 305 *l = (TCGLabel){ 306 .id = s->nb_labels++ 307 }; 308 #ifdef CONFIG_DEBUG_TCG 309 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 310 #endif 311 312 return l; 313 } 314 315 static void set_jmp_reset_offset(TCGContext *s, int which) 316 { 317 size_t off = tcg_current_code_size(s); 318 s->tb_jmp_reset_offset[which] = off; 319 /* Make sure that we didn't overflow the stored offset. */ 320 assert(s->tb_jmp_reset_offset[which] == off); 321 } 322 323 #include "tcg-target.inc.c" 324 325 /* compare a pointer @ptr and a tb_tc @s */ 326 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s) 327 { 328 if (ptr >= s->ptr + s->size) { 329 return 1; 330 } else if (ptr < s->ptr) { 331 return -1; 332 } 333 return 0; 334 } 335 336 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp) 337 { 338 const struct tb_tc *a = ap; 339 const struct tb_tc *b = bp; 340 341 /* 342 * When both sizes are set, we know this isn't a lookup. 343 * This is the most likely case: every TB must be inserted; lookups 344 * are a lot less frequent. 345 */ 346 if (likely(a->size && b->size)) { 347 if (a->ptr > b->ptr) { 348 return 1; 349 } else if (a->ptr < b->ptr) { 350 return -1; 351 } 352 /* a->ptr == b->ptr should happen only on deletions */ 353 g_assert(a->size == b->size); 354 return 0; 355 } 356 /* 357 * All lookups have either .size field set to 0. 358 * From the glib sources we see that @ap is always the lookup key. However 359 * the docs provide no guarantee, so we just mark this case as likely. 360 */ 361 if (likely(a->size == 0)) { 362 return ptr_cmp_tb_tc(a->ptr, b); 363 } 364 return ptr_cmp_tb_tc(b->ptr, a); 365 } 366 367 static void tcg_region_trees_init(void) 368 { 369 size_t i; 370 371 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize); 372 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size); 373 for (i = 0; i < region.n; i++) { 374 struct tcg_region_tree *rt = region_trees + i * tree_size; 375 376 qemu_mutex_init(&rt->lock); 377 rt->tree = g_tree_new(tb_tc_cmp); 378 } 379 } 380 381 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p) 382 { 383 size_t region_idx; 384 385 if (p < region.start_aligned) { 386 region_idx = 0; 387 } else { 388 ptrdiff_t offset = p - region.start_aligned; 389 390 if (offset > region.stride * (region.n - 1)) { 391 region_idx = region.n - 1; 392 } else { 393 region_idx = offset / region.stride; 394 } 395 } 396 return region_trees + region_idx * tree_size; 397 } 398 399 void tcg_tb_insert(TranslationBlock *tb) 400 { 401 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 402 403 qemu_mutex_lock(&rt->lock); 404 g_tree_insert(rt->tree, &tb->tc, tb); 405 qemu_mutex_unlock(&rt->lock); 406 } 407 408 void tcg_tb_remove(TranslationBlock *tb) 409 { 410 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 411 412 qemu_mutex_lock(&rt->lock); 413 g_tree_remove(rt->tree, &tb->tc); 414 qemu_mutex_unlock(&rt->lock); 415 } 416 417 /* 418 * Find the TB 'tb' such that 419 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size 420 * Return NULL if not found. 421 */ 422 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) 423 { 424 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr); 425 TranslationBlock *tb; 426 struct tb_tc s = { .ptr = (void *)tc_ptr }; 427 428 qemu_mutex_lock(&rt->lock); 429 tb = g_tree_lookup(rt->tree, &s); 430 qemu_mutex_unlock(&rt->lock); 431 return tb; 432 } 433 434 static void tcg_region_tree_lock_all(void) 435 { 436 size_t i; 437 438 for (i = 0; i < region.n; i++) { 439 struct tcg_region_tree *rt = region_trees + i * tree_size; 440 441 qemu_mutex_lock(&rt->lock); 442 } 443 } 444 445 static void tcg_region_tree_unlock_all(void) 446 { 447 size_t i; 448 449 for (i = 0; i < region.n; i++) { 450 struct tcg_region_tree *rt = region_trees + i * tree_size; 451 452 qemu_mutex_unlock(&rt->lock); 453 } 454 } 455 456 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data) 457 { 458 size_t i; 459 460 tcg_region_tree_lock_all(); 461 for (i = 0; i < region.n; i++) { 462 struct tcg_region_tree *rt = region_trees + i * tree_size; 463 464 g_tree_foreach(rt->tree, func, user_data); 465 } 466 tcg_region_tree_unlock_all(); 467 } 468 469 size_t tcg_nb_tbs(void) 470 { 471 size_t nb_tbs = 0; 472 size_t i; 473 474 tcg_region_tree_lock_all(); 475 for (i = 0; i < region.n; i++) { 476 struct tcg_region_tree *rt = region_trees + i * tree_size; 477 478 nb_tbs += g_tree_nnodes(rt->tree); 479 } 480 tcg_region_tree_unlock_all(); 481 return nb_tbs; 482 } 483 484 static void tcg_region_tree_reset_all(void) 485 { 486 size_t i; 487 488 tcg_region_tree_lock_all(); 489 for (i = 0; i < region.n; i++) { 490 struct tcg_region_tree *rt = region_trees + i * tree_size; 491 492 /* Increment the refcount first so that destroy acts as a reset */ 493 g_tree_ref(rt->tree); 494 g_tree_destroy(rt->tree); 495 } 496 tcg_region_tree_unlock_all(); 497 } 498 499 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 500 { 501 void *start, *end; 502 503 start = region.start_aligned + curr_region * region.stride; 504 end = start + region.size; 505 506 if (curr_region == 0) { 507 start = region.start; 508 } 509 if (curr_region == region.n - 1) { 510 end = region.end; 511 } 512 513 *pstart = start; 514 *pend = end; 515 } 516 517 static void tcg_region_assign(TCGContext *s, size_t curr_region) 518 { 519 void *start, *end; 520 521 tcg_region_bounds(curr_region, &start, &end); 522 523 s->code_gen_buffer = start; 524 s->code_gen_ptr = start; 525 s->code_gen_buffer_size = end - start; 526 s->code_gen_highwater = end - TCG_HIGHWATER; 527 } 528 529 static bool tcg_region_alloc__locked(TCGContext *s) 530 { 531 if (region.current == region.n) { 532 return true; 533 } 534 tcg_region_assign(s, region.current); 535 region.current++; 536 return false; 537 } 538 539 /* 540 * Request a new region once the one in use has filled up. 541 * Returns true on error. 542 */ 543 static bool tcg_region_alloc(TCGContext *s) 544 { 545 bool err; 546 /* read the region size now; alloc__locked will overwrite it on success */ 547 size_t size_full = s->code_gen_buffer_size; 548 549 qemu_mutex_lock(®ion.lock); 550 err = tcg_region_alloc__locked(s); 551 if (!err) { 552 region.agg_size_full += size_full - TCG_HIGHWATER; 553 } 554 qemu_mutex_unlock(®ion.lock); 555 return err; 556 } 557 558 /* 559 * Perform a context's first region allocation. 560 * This function does _not_ increment region.agg_size_full. 561 */ 562 static inline bool tcg_region_initial_alloc__locked(TCGContext *s) 563 { 564 return tcg_region_alloc__locked(s); 565 } 566 567 /* Call from a safe-work context */ 568 void tcg_region_reset_all(void) 569 { 570 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 571 unsigned int i; 572 573 qemu_mutex_lock(®ion.lock); 574 region.current = 0; 575 region.agg_size_full = 0; 576 577 for (i = 0; i < n_ctxs; i++) { 578 TCGContext *s = atomic_read(&tcg_ctxs[i]); 579 bool err = tcg_region_initial_alloc__locked(s); 580 581 g_assert(!err); 582 } 583 qemu_mutex_unlock(®ion.lock); 584 585 tcg_region_tree_reset_all(); 586 } 587 588 #ifdef CONFIG_USER_ONLY 589 static size_t tcg_n_regions(void) 590 { 591 return 1; 592 } 593 #else 594 /* 595 * It is likely that some vCPUs will translate more code than others, so we 596 * first try to set more regions than max_cpus, with those regions being of 597 * reasonable size. If that's not possible we make do by evenly dividing 598 * the code_gen_buffer among the vCPUs. 599 */ 600 static size_t tcg_n_regions(void) 601 { 602 size_t i; 603 604 /* Use a single region if all we have is one vCPU thread */ 605 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 606 return 1; 607 } 608 609 /* Try to have more regions than max_cpus, with each region being >= 2 MB */ 610 for (i = 8; i > 0; i--) { 611 size_t regions_per_thread = i; 612 size_t region_size; 613 614 region_size = tcg_init_ctx.code_gen_buffer_size; 615 region_size /= max_cpus * regions_per_thread; 616 617 if (region_size >= 2 * 1024u * 1024) { 618 return max_cpus * regions_per_thread; 619 } 620 } 621 /* If we can't, then just allocate one region per vCPU thread */ 622 return max_cpus; 623 } 624 #endif 625 626 /* 627 * Initializes region partitioning. 628 * 629 * Called at init time from the parent thread (i.e. the one calling 630 * tcg_context_init), after the target's TCG globals have been set. 631 * 632 * Region partitioning works by splitting code_gen_buffer into separate regions, 633 * and then assigning regions to TCG threads so that the threads can translate 634 * code in parallel without synchronization. 635 * 636 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 637 * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 638 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 639 * must have been parsed before calling this function, since it calls 640 * qemu_tcg_mttcg_enabled(). 641 * 642 * In user-mode we use a single region. Having multiple regions in user-mode 643 * is not supported, because the number of vCPU threads (recall that each thread 644 * spawned by the guest corresponds to a vCPU thread) is only bounded by the 645 * OS, and usually this number is huge (tens of thousands is not uncommon). 646 * Thus, given this large bound on the number of vCPU threads and the fact 647 * that code_gen_buffer is allocated at compile-time, we cannot guarantee 648 * that the availability of at least one region per vCPU thread. 649 * 650 * However, this user-mode limitation is unlikely to be a significant problem 651 * in practice. Multi-threaded guests share most if not all of their translated 652 * code, which makes parallel code generation less appealing than in softmmu. 653 */ 654 void tcg_region_init(void) 655 { 656 void *buf = tcg_init_ctx.code_gen_buffer; 657 void *aligned; 658 size_t size = tcg_init_ctx.code_gen_buffer_size; 659 size_t page_size = qemu_real_host_page_size; 660 size_t region_size; 661 size_t n_regions; 662 size_t i; 663 664 n_regions = tcg_n_regions(); 665 666 /* The first region will be 'aligned - buf' bytes larger than the others */ 667 aligned = QEMU_ALIGN_PTR_UP(buf, page_size); 668 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); 669 /* 670 * Make region_size a multiple of page_size, using aligned as the start. 671 * As a result of this we might end up with a few extra pages at the end of 672 * the buffer; we will assign those to the last region. 673 */ 674 region_size = (size - (aligned - buf)) / n_regions; 675 region_size = QEMU_ALIGN_DOWN(region_size, page_size); 676 677 /* A region must have at least 2 pages; one code, one guard */ 678 g_assert(region_size >= 2 * page_size); 679 680 /* init the region struct */ 681 qemu_mutex_init(®ion.lock); 682 region.n = n_regions; 683 region.size = region_size - page_size; 684 region.stride = region_size; 685 region.start = buf; 686 region.start_aligned = aligned; 687 /* page-align the end, since its last page will be a guard page */ 688 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); 689 /* account for that last guard page */ 690 region.end -= page_size; 691 692 /* set guard pages */ 693 for (i = 0; i < region.n; i++) { 694 void *start, *end; 695 int rc; 696 697 tcg_region_bounds(i, &start, &end); 698 rc = qemu_mprotect_none(end, page_size); 699 g_assert(!rc); 700 } 701 702 tcg_region_trees_init(); 703 704 /* In user-mode we support only one ctx, so do the initial allocation now */ 705 #ifdef CONFIG_USER_ONLY 706 { 707 bool err = tcg_region_initial_alloc__locked(tcg_ctx); 708 709 g_assert(!err); 710 } 711 #endif 712 } 713 714 /* 715 * All TCG threads except the parent (i.e. the one that called tcg_context_init 716 * and registered the target's TCG globals) must register with this function 717 * before initiating translation. 718 * 719 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 720 * of tcg_region_init() for the reasoning behind this. 721 * 722 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 723 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 724 * is not used anymore for translation once this function is called. 725 * 726 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 727 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 728 */ 729 #ifdef CONFIG_USER_ONLY 730 void tcg_register_thread(void) 731 { 732 tcg_ctx = &tcg_init_ctx; 733 } 734 #else 735 void tcg_register_thread(void) 736 { 737 TCGContext *s = g_malloc(sizeof(*s)); 738 unsigned int i, n; 739 bool err; 740 741 *s = tcg_init_ctx; 742 743 /* Relink mem_base. */ 744 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 745 if (tcg_init_ctx.temps[i].mem_base) { 746 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 747 tcg_debug_assert(b >= 0 && b < n); 748 s->temps[i].mem_base = &s->temps[b]; 749 } 750 } 751 752 /* Claim an entry in tcg_ctxs */ 753 n = atomic_fetch_inc(&n_tcg_ctxs); 754 g_assert(n < max_cpus); 755 atomic_set(&tcg_ctxs[n], s); 756 757 tcg_ctx = s; 758 qemu_mutex_lock(®ion.lock); 759 err = tcg_region_initial_alloc__locked(tcg_ctx); 760 g_assert(!err); 761 qemu_mutex_unlock(®ion.lock); 762 } 763 #endif /* !CONFIG_USER_ONLY */ 764 765 /* 766 * Returns the size (in bytes) of all translated code (i.e. from all regions) 767 * currently in the cache. 768 * See also: tcg_code_capacity() 769 * Do not confuse with tcg_current_code_size(); that one applies to a single 770 * TCG context. 771 */ 772 size_t tcg_code_size(void) 773 { 774 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 775 unsigned int i; 776 size_t total; 777 778 qemu_mutex_lock(®ion.lock); 779 total = region.agg_size_full; 780 for (i = 0; i < n_ctxs; i++) { 781 const TCGContext *s = atomic_read(&tcg_ctxs[i]); 782 size_t size; 783 784 size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 785 g_assert(size <= s->code_gen_buffer_size); 786 total += size; 787 } 788 qemu_mutex_unlock(®ion.lock); 789 return total; 790 } 791 792 /* 793 * Returns the code capacity (in bytes) of the entire cache, i.e. including all 794 * regions. 795 * See also: tcg_code_size() 796 */ 797 size_t tcg_code_capacity(void) 798 { 799 size_t guard_size, capacity; 800 801 /* no need for synchronization; these variables are set at init time */ 802 guard_size = region.stride - region.size; 803 capacity = region.end + guard_size - region.start; 804 capacity -= region.n * (guard_size + TCG_HIGHWATER); 805 return capacity; 806 } 807 808 size_t tcg_tb_phys_invalidate_count(void) 809 { 810 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 811 unsigned int i; 812 size_t total = 0; 813 814 for (i = 0; i < n_ctxs; i++) { 815 const TCGContext *s = atomic_read(&tcg_ctxs[i]); 816 817 total += atomic_read(&s->tb_phys_invalidate_count); 818 } 819 return total; 820 } 821 822 /* pool based memory allocation */ 823 void *tcg_malloc_internal(TCGContext *s, int size) 824 { 825 TCGPool *p; 826 int pool_size; 827 828 if (size > TCG_POOL_CHUNK_SIZE) { 829 /* big malloc: insert a new pool (XXX: could optimize) */ 830 p = g_malloc(sizeof(TCGPool) + size); 831 p->size = size; 832 p->next = s->pool_first_large; 833 s->pool_first_large = p; 834 return p->data; 835 } else { 836 p = s->pool_current; 837 if (!p) { 838 p = s->pool_first; 839 if (!p) 840 goto new_pool; 841 } else { 842 if (!p->next) { 843 new_pool: 844 pool_size = TCG_POOL_CHUNK_SIZE; 845 p = g_malloc(sizeof(TCGPool) + pool_size); 846 p->size = pool_size; 847 p->next = NULL; 848 if (s->pool_current) 849 s->pool_current->next = p; 850 else 851 s->pool_first = p; 852 } else { 853 p = p->next; 854 } 855 } 856 } 857 s->pool_current = p; 858 s->pool_cur = p->data + size; 859 s->pool_end = p->data + p->size; 860 return p->data; 861 } 862 863 void tcg_pool_reset(TCGContext *s) 864 { 865 TCGPool *p, *t; 866 for (p = s->pool_first_large; p; p = t) { 867 t = p->next; 868 g_free(p); 869 } 870 s->pool_first_large = NULL; 871 s->pool_cur = s->pool_end = NULL; 872 s->pool_current = NULL; 873 } 874 875 typedef struct TCGHelperInfo { 876 void *func; 877 const char *name; 878 unsigned flags; 879 unsigned sizemask; 880 } TCGHelperInfo; 881 882 #include "exec/helper-proto.h" 883 884 static const TCGHelperInfo all_helpers[] = { 885 #include "exec/helper-tcg.h" 886 }; 887 static GHashTable *helper_table; 888 889 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 890 static void process_op_defs(TCGContext *s); 891 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 892 TCGReg reg, const char *name); 893 894 void tcg_context_init(TCGContext *s) 895 { 896 int op, total_args, n, i; 897 TCGOpDef *def; 898 TCGArgConstraint *args_ct; 899 int *sorted_args; 900 TCGTemp *ts; 901 902 memset(s, 0, sizeof(*s)); 903 s->nb_globals = 0; 904 905 /* Count total number of arguments and allocate the corresponding 906 space */ 907 total_args = 0; 908 for(op = 0; op < NB_OPS; op++) { 909 def = &tcg_op_defs[op]; 910 n = def->nb_iargs + def->nb_oargs; 911 total_args += n; 912 } 913 914 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); 915 sorted_args = g_malloc(sizeof(int) * total_args); 916 917 for(op = 0; op < NB_OPS; op++) { 918 def = &tcg_op_defs[op]; 919 def->args_ct = args_ct; 920 def->sorted_args = sorted_args; 921 n = def->nb_iargs + def->nb_oargs; 922 sorted_args += n; 923 args_ct += n; 924 } 925 926 /* Register helpers. */ 927 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 928 helper_table = g_hash_table_new(NULL, NULL); 929 930 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 931 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 932 (gpointer)&all_helpers[i]); 933 } 934 935 tcg_target_init(s); 936 process_op_defs(s); 937 938 /* Reverse the order of the saved registers, assuming they're all at 939 the start of tcg_target_reg_alloc_order. */ 940 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 941 int r = tcg_target_reg_alloc_order[n]; 942 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 943 break; 944 } 945 } 946 for (i = 0; i < n; ++i) { 947 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 948 } 949 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 950 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 951 } 952 953 tcg_ctx = s; 954 /* 955 * In user-mode we simply share the init context among threads, since we 956 * use a single region. See the documentation tcg_region_init() for the 957 * reasoning behind this. 958 * In softmmu we will have at most max_cpus TCG threads. 959 */ 960 #ifdef CONFIG_USER_ONLY 961 tcg_ctxs = &tcg_ctx; 962 n_tcg_ctxs = 1; 963 #else 964 tcg_ctxs = g_new(TCGContext *, max_cpus); 965 #endif 966 967 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 968 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 969 cpu_env = temp_tcgv_ptr(ts); 970 } 971 972 /* 973 * Allocate TBs right before their corresponding translated code, making 974 * sure that TBs and code are on different cache lines. 975 */ 976 TranslationBlock *tcg_tb_alloc(TCGContext *s) 977 { 978 uintptr_t align = qemu_icache_linesize; 979 TranslationBlock *tb; 980 void *next; 981 982 retry: 983 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 984 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 985 986 if (unlikely(next > s->code_gen_highwater)) { 987 if (tcg_region_alloc(s)) { 988 return NULL; 989 } 990 goto retry; 991 } 992 atomic_set(&s->code_gen_ptr, next); 993 s->data_gen_ptr = NULL; 994 return tb; 995 } 996 997 void tcg_prologue_init(TCGContext *s) 998 { 999 size_t prologue_size, total_size; 1000 void *buf0, *buf1; 1001 1002 /* Put the prologue at the beginning of code_gen_buffer. */ 1003 buf0 = s->code_gen_buffer; 1004 total_size = s->code_gen_buffer_size; 1005 s->code_ptr = buf0; 1006 s->code_buf = buf0; 1007 s->data_gen_ptr = NULL; 1008 s->code_gen_prologue = buf0; 1009 1010 /* Compute a high-water mark, at which we voluntarily flush the buffer 1011 and start over. The size here is arbitrary, significantly larger 1012 than we expect the code generation for any one opcode to require. */ 1013 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); 1014 1015 #ifdef TCG_TARGET_NEED_POOL_LABELS 1016 s->pool_labels = NULL; 1017 #endif 1018 1019 /* Generate the prologue. */ 1020 tcg_target_qemu_prologue(s); 1021 1022 #ifdef TCG_TARGET_NEED_POOL_LABELS 1023 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1024 { 1025 bool ok = tcg_out_pool_finalize(s); 1026 tcg_debug_assert(ok); 1027 } 1028 #endif 1029 1030 buf1 = s->code_ptr; 1031 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); 1032 1033 /* Deduct the prologue from the buffer. */ 1034 prologue_size = tcg_current_code_size(s); 1035 s->code_gen_ptr = buf1; 1036 s->code_gen_buffer = buf1; 1037 s->code_buf = buf1; 1038 total_size -= prologue_size; 1039 s->code_gen_buffer_size = total_size; 1040 1041 tcg_register_jit(s->code_gen_buffer, total_size); 1042 1043 #ifdef DEBUG_DISAS 1044 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1045 qemu_log_lock(); 1046 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 1047 if (s->data_gen_ptr) { 1048 size_t code_size = s->data_gen_ptr - buf0; 1049 size_t data_size = prologue_size - code_size; 1050 size_t i; 1051 1052 log_disas(buf0, code_size); 1053 1054 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1055 if (sizeof(tcg_target_ulong) == 8) { 1056 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1057 (uintptr_t)s->data_gen_ptr + i, 1058 *(uint64_t *)(s->data_gen_ptr + i)); 1059 } else { 1060 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n", 1061 (uintptr_t)s->data_gen_ptr + i, 1062 *(uint32_t *)(s->data_gen_ptr + i)); 1063 } 1064 } 1065 } else { 1066 log_disas(buf0, prologue_size); 1067 } 1068 qemu_log("\n"); 1069 qemu_log_flush(); 1070 qemu_log_unlock(); 1071 } 1072 #endif 1073 1074 /* Assert that goto_ptr is implemented completely. */ 1075 if (TCG_TARGET_HAS_goto_ptr) { 1076 tcg_debug_assert(s->code_gen_epilogue != NULL); 1077 } 1078 } 1079 1080 void tcg_func_start(TCGContext *s) 1081 { 1082 tcg_pool_reset(s); 1083 s->nb_temps = s->nb_globals; 1084 1085 /* No temps have been previously allocated for size or locality. */ 1086 memset(s->free_temps, 0, sizeof(s->free_temps)); 1087 1088 s->nb_ops = 0; 1089 s->nb_labels = 0; 1090 s->current_frame_offset = s->frame_start; 1091 1092 #ifdef CONFIG_DEBUG_TCG 1093 s->goto_tb_issue_mask = 0; 1094 #endif 1095 1096 QTAILQ_INIT(&s->ops); 1097 QTAILQ_INIT(&s->free_ops); 1098 #ifdef CONFIG_DEBUG_TCG 1099 QSIMPLEQ_INIT(&s->labels); 1100 #endif 1101 } 1102 1103 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) 1104 { 1105 int n = s->nb_temps++; 1106 tcg_debug_assert(n < TCG_MAX_TEMPS); 1107 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1108 } 1109 1110 static inline TCGTemp *tcg_global_alloc(TCGContext *s) 1111 { 1112 TCGTemp *ts; 1113 1114 tcg_debug_assert(s->nb_globals == s->nb_temps); 1115 s->nb_globals++; 1116 ts = tcg_temp_alloc(s); 1117 ts->temp_global = 1; 1118 1119 return ts; 1120 } 1121 1122 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1123 TCGReg reg, const char *name) 1124 { 1125 TCGTemp *ts; 1126 1127 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1128 tcg_abort(); 1129 } 1130 1131 ts = tcg_global_alloc(s); 1132 ts->base_type = type; 1133 ts->type = type; 1134 ts->fixed_reg = 1; 1135 ts->reg = reg; 1136 ts->name = name; 1137 tcg_regset_set_reg(s->reserved_regs, reg); 1138 1139 return ts; 1140 } 1141 1142 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1143 { 1144 s->frame_start = start; 1145 s->frame_end = start + size; 1146 s->frame_temp 1147 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1148 } 1149 1150 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1151 intptr_t offset, const char *name) 1152 { 1153 TCGContext *s = tcg_ctx; 1154 TCGTemp *base_ts = tcgv_ptr_temp(base); 1155 TCGTemp *ts = tcg_global_alloc(s); 1156 int indirect_reg = 0, bigendian = 0; 1157 #ifdef HOST_WORDS_BIGENDIAN 1158 bigendian = 1; 1159 #endif 1160 1161 if (!base_ts->fixed_reg) { 1162 /* We do not support double-indirect registers. */ 1163 tcg_debug_assert(!base_ts->indirect_reg); 1164 base_ts->indirect_base = 1; 1165 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1166 ? 2 : 1); 1167 indirect_reg = 1; 1168 } 1169 1170 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1171 TCGTemp *ts2 = tcg_global_alloc(s); 1172 char buf[64]; 1173 1174 ts->base_type = TCG_TYPE_I64; 1175 ts->type = TCG_TYPE_I32; 1176 ts->indirect_reg = indirect_reg; 1177 ts->mem_allocated = 1; 1178 ts->mem_base = base_ts; 1179 ts->mem_offset = offset + bigendian * 4; 1180 pstrcpy(buf, sizeof(buf), name); 1181 pstrcat(buf, sizeof(buf), "_0"); 1182 ts->name = strdup(buf); 1183 1184 tcg_debug_assert(ts2 == ts + 1); 1185 ts2->base_type = TCG_TYPE_I64; 1186 ts2->type = TCG_TYPE_I32; 1187 ts2->indirect_reg = indirect_reg; 1188 ts2->mem_allocated = 1; 1189 ts2->mem_base = base_ts; 1190 ts2->mem_offset = offset + (1 - bigendian) * 4; 1191 pstrcpy(buf, sizeof(buf), name); 1192 pstrcat(buf, sizeof(buf), "_1"); 1193 ts2->name = strdup(buf); 1194 } else { 1195 ts->base_type = type; 1196 ts->type = type; 1197 ts->indirect_reg = indirect_reg; 1198 ts->mem_allocated = 1; 1199 ts->mem_base = base_ts; 1200 ts->mem_offset = offset; 1201 ts->name = name; 1202 } 1203 return ts; 1204 } 1205 1206 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) 1207 { 1208 TCGContext *s = tcg_ctx; 1209 TCGTemp *ts; 1210 int idx, k; 1211 1212 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 1213 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 1214 if (idx < TCG_MAX_TEMPS) { 1215 /* There is already an available temp with the right type. */ 1216 clear_bit(idx, s->free_temps[k].l); 1217 1218 ts = &s->temps[idx]; 1219 ts->temp_allocated = 1; 1220 tcg_debug_assert(ts->base_type == type); 1221 tcg_debug_assert(ts->temp_local == temp_local); 1222 } else { 1223 ts = tcg_temp_alloc(s); 1224 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1225 TCGTemp *ts2 = tcg_temp_alloc(s); 1226 1227 ts->base_type = type; 1228 ts->type = TCG_TYPE_I32; 1229 ts->temp_allocated = 1; 1230 ts->temp_local = temp_local; 1231 1232 tcg_debug_assert(ts2 == ts + 1); 1233 ts2->base_type = TCG_TYPE_I64; 1234 ts2->type = TCG_TYPE_I32; 1235 ts2->temp_allocated = 1; 1236 ts2->temp_local = temp_local; 1237 } else { 1238 ts->base_type = type; 1239 ts->type = type; 1240 ts->temp_allocated = 1; 1241 ts->temp_local = temp_local; 1242 } 1243 } 1244 1245 #if defined(CONFIG_DEBUG_TCG) 1246 s->temps_in_use++; 1247 #endif 1248 return ts; 1249 } 1250 1251 TCGv_vec tcg_temp_new_vec(TCGType type) 1252 { 1253 TCGTemp *t; 1254 1255 #ifdef CONFIG_DEBUG_TCG 1256 switch (type) { 1257 case TCG_TYPE_V64: 1258 assert(TCG_TARGET_HAS_v64); 1259 break; 1260 case TCG_TYPE_V128: 1261 assert(TCG_TARGET_HAS_v128); 1262 break; 1263 case TCG_TYPE_V256: 1264 assert(TCG_TARGET_HAS_v256); 1265 break; 1266 default: 1267 g_assert_not_reached(); 1268 } 1269 #endif 1270 1271 t = tcg_temp_new_internal(type, 0); 1272 return temp_tcgv_vec(t); 1273 } 1274 1275 /* Create a new temp of the same type as an existing temp. */ 1276 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1277 { 1278 TCGTemp *t = tcgv_vec_temp(match); 1279 1280 tcg_debug_assert(t->temp_allocated != 0); 1281 1282 t = tcg_temp_new_internal(t->base_type, 0); 1283 return temp_tcgv_vec(t); 1284 } 1285 1286 void tcg_temp_free_internal(TCGTemp *ts) 1287 { 1288 TCGContext *s = tcg_ctx; 1289 int k, idx; 1290 1291 #if defined(CONFIG_DEBUG_TCG) 1292 s->temps_in_use--; 1293 if (s->temps_in_use < 0) { 1294 fprintf(stderr, "More temporaries freed than allocated!\n"); 1295 } 1296 #endif 1297 1298 tcg_debug_assert(ts->temp_global == 0); 1299 tcg_debug_assert(ts->temp_allocated != 0); 1300 ts->temp_allocated = 0; 1301 1302 idx = temp_idx(ts); 1303 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); 1304 set_bit(idx, s->free_temps[k].l); 1305 } 1306 1307 TCGv_i32 tcg_const_i32(int32_t val) 1308 { 1309 TCGv_i32 t0; 1310 t0 = tcg_temp_new_i32(); 1311 tcg_gen_movi_i32(t0, val); 1312 return t0; 1313 } 1314 1315 TCGv_i64 tcg_const_i64(int64_t val) 1316 { 1317 TCGv_i64 t0; 1318 t0 = tcg_temp_new_i64(); 1319 tcg_gen_movi_i64(t0, val); 1320 return t0; 1321 } 1322 1323 TCGv_i32 tcg_const_local_i32(int32_t val) 1324 { 1325 TCGv_i32 t0; 1326 t0 = tcg_temp_local_new_i32(); 1327 tcg_gen_movi_i32(t0, val); 1328 return t0; 1329 } 1330 1331 TCGv_i64 tcg_const_local_i64(int64_t val) 1332 { 1333 TCGv_i64 t0; 1334 t0 = tcg_temp_local_new_i64(); 1335 tcg_gen_movi_i64(t0, val); 1336 return t0; 1337 } 1338 1339 #if defined(CONFIG_DEBUG_TCG) 1340 void tcg_clear_temp_count(void) 1341 { 1342 TCGContext *s = tcg_ctx; 1343 s->temps_in_use = 0; 1344 } 1345 1346 int tcg_check_temp_count(void) 1347 { 1348 TCGContext *s = tcg_ctx; 1349 if (s->temps_in_use) { 1350 /* Clear the count so that we don't give another 1351 * warning immediately next time around. 1352 */ 1353 s->temps_in_use = 0; 1354 return 1; 1355 } 1356 return 0; 1357 } 1358 #endif 1359 1360 /* Return true if OP may appear in the opcode stream. 1361 Test the runtime variable that controls each opcode. */ 1362 bool tcg_op_supported(TCGOpcode op) 1363 { 1364 const bool have_vec 1365 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1366 1367 switch (op) { 1368 case INDEX_op_discard: 1369 case INDEX_op_set_label: 1370 case INDEX_op_call: 1371 case INDEX_op_br: 1372 case INDEX_op_mb: 1373 case INDEX_op_insn_start: 1374 case INDEX_op_exit_tb: 1375 case INDEX_op_goto_tb: 1376 case INDEX_op_qemu_ld_i32: 1377 case INDEX_op_qemu_st_i32: 1378 case INDEX_op_qemu_ld_i64: 1379 case INDEX_op_qemu_st_i64: 1380 return true; 1381 1382 case INDEX_op_goto_ptr: 1383 return TCG_TARGET_HAS_goto_ptr; 1384 1385 case INDEX_op_mov_i32: 1386 case INDEX_op_movi_i32: 1387 case INDEX_op_setcond_i32: 1388 case INDEX_op_brcond_i32: 1389 case INDEX_op_ld8u_i32: 1390 case INDEX_op_ld8s_i32: 1391 case INDEX_op_ld16u_i32: 1392 case INDEX_op_ld16s_i32: 1393 case INDEX_op_ld_i32: 1394 case INDEX_op_st8_i32: 1395 case INDEX_op_st16_i32: 1396 case INDEX_op_st_i32: 1397 case INDEX_op_add_i32: 1398 case INDEX_op_sub_i32: 1399 case INDEX_op_mul_i32: 1400 case INDEX_op_and_i32: 1401 case INDEX_op_or_i32: 1402 case INDEX_op_xor_i32: 1403 case INDEX_op_shl_i32: 1404 case INDEX_op_shr_i32: 1405 case INDEX_op_sar_i32: 1406 return true; 1407 1408 case INDEX_op_movcond_i32: 1409 return TCG_TARGET_HAS_movcond_i32; 1410 case INDEX_op_div_i32: 1411 case INDEX_op_divu_i32: 1412 return TCG_TARGET_HAS_div_i32; 1413 case INDEX_op_rem_i32: 1414 case INDEX_op_remu_i32: 1415 return TCG_TARGET_HAS_rem_i32; 1416 case INDEX_op_div2_i32: 1417 case INDEX_op_divu2_i32: 1418 return TCG_TARGET_HAS_div2_i32; 1419 case INDEX_op_rotl_i32: 1420 case INDEX_op_rotr_i32: 1421 return TCG_TARGET_HAS_rot_i32; 1422 case INDEX_op_deposit_i32: 1423 return TCG_TARGET_HAS_deposit_i32; 1424 case INDEX_op_extract_i32: 1425 return TCG_TARGET_HAS_extract_i32; 1426 case INDEX_op_sextract_i32: 1427 return TCG_TARGET_HAS_sextract_i32; 1428 case INDEX_op_add2_i32: 1429 return TCG_TARGET_HAS_add2_i32; 1430 case INDEX_op_sub2_i32: 1431 return TCG_TARGET_HAS_sub2_i32; 1432 case INDEX_op_mulu2_i32: 1433 return TCG_TARGET_HAS_mulu2_i32; 1434 case INDEX_op_muls2_i32: 1435 return TCG_TARGET_HAS_muls2_i32; 1436 case INDEX_op_muluh_i32: 1437 return TCG_TARGET_HAS_muluh_i32; 1438 case INDEX_op_mulsh_i32: 1439 return TCG_TARGET_HAS_mulsh_i32; 1440 case INDEX_op_ext8s_i32: 1441 return TCG_TARGET_HAS_ext8s_i32; 1442 case INDEX_op_ext16s_i32: 1443 return TCG_TARGET_HAS_ext16s_i32; 1444 case INDEX_op_ext8u_i32: 1445 return TCG_TARGET_HAS_ext8u_i32; 1446 case INDEX_op_ext16u_i32: 1447 return TCG_TARGET_HAS_ext16u_i32; 1448 case INDEX_op_bswap16_i32: 1449 return TCG_TARGET_HAS_bswap16_i32; 1450 case INDEX_op_bswap32_i32: 1451 return TCG_TARGET_HAS_bswap32_i32; 1452 case INDEX_op_not_i32: 1453 return TCG_TARGET_HAS_not_i32; 1454 case INDEX_op_neg_i32: 1455 return TCG_TARGET_HAS_neg_i32; 1456 case INDEX_op_andc_i32: 1457 return TCG_TARGET_HAS_andc_i32; 1458 case INDEX_op_orc_i32: 1459 return TCG_TARGET_HAS_orc_i32; 1460 case INDEX_op_eqv_i32: 1461 return TCG_TARGET_HAS_eqv_i32; 1462 case INDEX_op_nand_i32: 1463 return TCG_TARGET_HAS_nand_i32; 1464 case INDEX_op_nor_i32: 1465 return TCG_TARGET_HAS_nor_i32; 1466 case INDEX_op_clz_i32: 1467 return TCG_TARGET_HAS_clz_i32; 1468 case INDEX_op_ctz_i32: 1469 return TCG_TARGET_HAS_ctz_i32; 1470 case INDEX_op_ctpop_i32: 1471 return TCG_TARGET_HAS_ctpop_i32; 1472 1473 case INDEX_op_brcond2_i32: 1474 case INDEX_op_setcond2_i32: 1475 return TCG_TARGET_REG_BITS == 32; 1476 1477 case INDEX_op_mov_i64: 1478 case INDEX_op_movi_i64: 1479 case INDEX_op_setcond_i64: 1480 case INDEX_op_brcond_i64: 1481 case INDEX_op_ld8u_i64: 1482 case INDEX_op_ld8s_i64: 1483 case INDEX_op_ld16u_i64: 1484 case INDEX_op_ld16s_i64: 1485 case INDEX_op_ld32u_i64: 1486 case INDEX_op_ld32s_i64: 1487 case INDEX_op_ld_i64: 1488 case INDEX_op_st8_i64: 1489 case INDEX_op_st16_i64: 1490 case INDEX_op_st32_i64: 1491 case INDEX_op_st_i64: 1492 case INDEX_op_add_i64: 1493 case INDEX_op_sub_i64: 1494 case INDEX_op_mul_i64: 1495 case INDEX_op_and_i64: 1496 case INDEX_op_or_i64: 1497 case INDEX_op_xor_i64: 1498 case INDEX_op_shl_i64: 1499 case INDEX_op_shr_i64: 1500 case INDEX_op_sar_i64: 1501 case INDEX_op_ext_i32_i64: 1502 case INDEX_op_extu_i32_i64: 1503 return TCG_TARGET_REG_BITS == 64; 1504 1505 case INDEX_op_movcond_i64: 1506 return TCG_TARGET_HAS_movcond_i64; 1507 case INDEX_op_div_i64: 1508 case INDEX_op_divu_i64: 1509 return TCG_TARGET_HAS_div_i64; 1510 case INDEX_op_rem_i64: 1511 case INDEX_op_remu_i64: 1512 return TCG_TARGET_HAS_rem_i64; 1513 case INDEX_op_div2_i64: 1514 case INDEX_op_divu2_i64: 1515 return TCG_TARGET_HAS_div2_i64; 1516 case INDEX_op_rotl_i64: 1517 case INDEX_op_rotr_i64: 1518 return TCG_TARGET_HAS_rot_i64; 1519 case INDEX_op_deposit_i64: 1520 return TCG_TARGET_HAS_deposit_i64; 1521 case INDEX_op_extract_i64: 1522 return TCG_TARGET_HAS_extract_i64; 1523 case INDEX_op_sextract_i64: 1524 return TCG_TARGET_HAS_sextract_i64; 1525 case INDEX_op_extrl_i64_i32: 1526 return TCG_TARGET_HAS_extrl_i64_i32; 1527 case INDEX_op_extrh_i64_i32: 1528 return TCG_TARGET_HAS_extrh_i64_i32; 1529 case INDEX_op_ext8s_i64: 1530 return TCG_TARGET_HAS_ext8s_i64; 1531 case INDEX_op_ext16s_i64: 1532 return TCG_TARGET_HAS_ext16s_i64; 1533 case INDEX_op_ext32s_i64: 1534 return TCG_TARGET_HAS_ext32s_i64; 1535 case INDEX_op_ext8u_i64: 1536 return TCG_TARGET_HAS_ext8u_i64; 1537 case INDEX_op_ext16u_i64: 1538 return TCG_TARGET_HAS_ext16u_i64; 1539 case INDEX_op_ext32u_i64: 1540 return TCG_TARGET_HAS_ext32u_i64; 1541 case INDEX_op_bswap16_i64: 1542 return TCG_TARGET_HAS_bswap16_i64; 1543 case INDEX_op_bswap32_i64: 1544 return TCG_TARGET_HAS_bswap32_i64; 1545 case INDEX_op_bswap64_i64: 1546 return TCG_TARGET_HAS_bswap64_i64; 1547 case INDEX_op_not_i64: 1548 return TCG_TARGET_HAS_not_i64; 1549 case INDEX_op_neg_i64: 1550 return TCG_TARGET_HAS_neg_i64; 1551 case INDEX_op_andc_i64: 1552 return TCG_TARGET_HAS_andc_i64; 1553 case INDEX_op_orc_i64: 1554 return TCG_TARGET_HAS_orc_i64; 1555 case INDEX_op_eqv_i64: 1556 return TCG_TARGET_HAS_eqv_i64; 1557 case INDEX_op_nand_i64: 1558 return TCG_TARGET_HAS_nand_i64; 1559 case INDEX_op_nor_i64: 1560 return TCG_TARGET_HAS_nor_i64; 1561 case INDEX_op_clz_i64: 1562 return TCG_TARGET_HAS_clz_i64; 1563 case INDEX_op_ctz_i64: 1564 return TCG_TARGET_HAS_ctz_i64; 1565 case INDEX_op_ctpop_i64: 1566 return TCG_TARGET_HAS_ctpop_i64; 1567 case INDEX_op_add2_i64: 1568 return TCG_TARGET_HAS_add2_i64; 1569 case INDEX_op_sub2_i64: 1570 return TCG_TARGET_HAS_sub2_i64; 1571 case INDEX_op_mulu2_i64: 1572 return TCG_TARGET_HAS_mulu2_i64; 1573 case INDEX_op_muls2_i64: 1574 return TCG_TARGET_HAS_muls2_i64; 1575 case INDEX_op_muluh_i64: 1576 return TCG_TARGET_HAS_muluh_i64; 1577 case INDEX_op_mulsh_i64: 1578 return TCG_TARGET_HAS_mulsh_i64; 1579 1580 case INDEX_op_mov_vec: 1581 case INDEX_op_dup_vec: 1582 case INDEX_op_dupi_vec: 1583 case INDEX_op_ld_vec: 1584 case INDEX_op_st_vec: 1585 case INDEX_op_add_vec: 1586 case INDEX_op_sub_vec: 1587 case INDEX_op_and_vec: 1588 case INDEX_op_or_vec: 1589 case INDEX_op_xor_vec: 1590 case INDEX_op_cmp_vec: 1591 return have_vec; 1592 case INDEX_op_dup2_vec: 1593 return have_vec && TCG_TARGET_REG_BITS == 32; 1594 case INDEX_op_not_vec: 1595 return have_vec && TCG_TARGET_HAS_not_vec; 1596 case INDEX_op_neg_vec: 1597 return have_vec && TCG_TARGET_HAS_neg_vec; 1598 case INDEX_op_andc_vec: 1599 return have_vec && TCG_TARGET_HAS_andc_vec; 1600 case INDEX_op_orc_vec: 1601 return have_vec && TCG_TARGET_HAS_orc_vec; 1602 case INDEX_op_mul_vec: 1603 return have_vec && TCG_TARGET_HAS_mul_vec; 1604 case INDEX_op_shli_vec: 1605 case INDEX_op_shri_vec: 1606 case INDEX_op_sari_vec: 1607 return have_vec && TCG_TARGET_HAS_shi_vec; 1608 case INDEX_op_shls_vec: 1609 case INDEX_op_shrs_vec: 1610 case INDEX_op_sars_vec: 1611 return have_vec && TCG_TARGET_HAS_shs_vec; 1612 case INDEX_op_shlv_vec: 1613 case INDEX_op_shrv_vec: 1614 case INDEX_op_sarv_vec: 1615 return have_vec && TCG_TARGET_HAS_shv_vec; 1616 case INDEX_op_ssadd_vec: 1617 case INDEX_op_usadd_vec: 1618 case INDEX_op_sssub_vec: 1619 case INDEX_op_ussub_vec: 1620 return have_vec && TCG_TARGET_HAS_sat_vec; 1621 case INDEX_op_smin_vec: 1622 case INDEX_op_umin_vec: 1623 case INDEX_op_smax_vec: 1624 case INDEX_op_umax_vec: 1625 return have_vec && TCG_TARGET_HAS_minmax_vec; 1626 1627 default: 1628 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1629 return true; 1630 } 1631 } 1632 1633 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1634 and endian swap. Maybe it would be better to do the alignment 1635 and endian swap in tcg_reg_alloc_call(). */ 1636 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1637 { 1638 int i, real_args, nb_rets, pi; 1639 unsigned sizemask, flags; 1640 TCGHelperInfo *info; 1641 TCGOp *op; 1642 1643 info = g_hash_table_lookup(helper_table, (gpointer)func); 1644 flags = info->flags; 1645 sizemask = info->sizemask; 1646 1647 #if defined(__sparc__) && !defined(__arch64__) \ 1648 && !defined(CONFIG_TCG_INTERPRETER) 1649 /* We have 64-bit values in one register, but need to pass as two 1650 separate parameters. Split them. */ 1651 int orig_sizemask = sizemask; 1652 int orig_nargs = nargs; 1653 TCGv_i64 retl, reth; 1654 TCGTemp *split_args[MAX_OPC_PARAM]; 1655 1656 retl = NULL; 1657 reth = NULL; 1658 if (sizemask != 0) { 1659 for (i = real_args = 0; i < nargs; ++i) { 1660 int is_64bit = sizemask & (1 << (i+1)*2); 1661 if (is_64bit) { 1662 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1663 TCGv_i32 h = tcg_temp_new_i32(); 1664 TCGv_i32 l = tcg_temp_new_i32(); 1665 tcg_gen_extr_i64_i32(l, h, orig); 1666 split_args[real_args++] = tcgv_i32_temp(h); 1667 split_args[real_args++] = tcgv_i32_temp(l); 1668 } else { 1669 split_args[real_args++] = args[i]; 1670 } 1671 } 1672 nargs = real_args; 1673 args = split_args; 1674 sizemask = 0; 1675 } 1676 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1677 for (i = 0; i < nargs; ++i) { 1678 int is_64bit = sizemask & (1 << (i+1)*2); 1679 int is_signed = sizemask & (2 << (i+1)*2); 1680 if (!is_64bit) { 1681 TCGv_i64 temp = tcg_temp_new_i64(); 1682 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1683 if (is_signed) { 1684 tcg_gen_ext32s_i64(temp, orig); 1685 } else { 1686 tcg_gen_ext32u_i64(temp, orig); 1687 } 1688 args[i] = tcgv_i64_temp(temp); 1689 } 1690 } 1691 #endif /* TCG_TARGET_EXTEND_ARGS */ 1692 1693 op = tcg_emit_op(INDEX_op_call); 1694 1695 pi = 0; 1696 if (ret != NULL) { 1697 #if defined(__sparc__) && !defined(__arch64__) \ 1698 && !defined(CONFIG_TCG_INTERPRETER) 1699 if (orig_sizemask & 1) { 1700 /* The 32-bit ABI is going to return the 64-bit value in 1701 the %o0/%o1 register pair. Prepare for this by using 1702 two return temporaries, and reassemble below. */ 1703 retl = tcg_temp_new_i64(); 1704 reth = tcg_temp_new_i64(); 1705 op->args[pi++] = tcgv_i64_arg(reth); 1706 op->args[pi++] = tcgv_i64_arg(retl); 1707 nb_rets = 2; 1708 } else { 1709 op->args[pi++] = temp_arg(ret); 1710 nb_rets = 1; 1711 } 1712 #else 1713 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 1714 #ifdef HOST_WORDS_BIGENDIAN 1715 op->args[pi++] = temp_arg(ret + 1); 1716 op->args[pi++] = temp_arg(ret); 1717 #else 1718 op->args[pi++] = temp_arg(ret); 1719 op->args[pi++] = temp_arg(ret + 1); 1720 #endif 1721 nb_rets = 2; 1722 } else { 1723 op->args[pi++] = temp_arg(ret); 1724 nb_rets = 1; 1725 } 1726 #endif 1727 } else { 1728 nb_rets = 0; 1729 } 1730 TCGOP_CALLO(op) = nb_rets; 1731 1732 real_args = 0; 1733 for (i = 0; i < nargs; i++) { 1734 int is_64bit = sizemask & (1 << (i+1)*2); 1735 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 1736 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 1737 /* some targets want aligned 64 bit args */ 1738 if (real_args & 1) { 1739 op->args[pi++] = TCG_CALL_DUMMY_ARG; 1740 real_args++; 1741 } 1742 #endif 1743 /* If stack grows up, then we will be placing successive 1744 arguments at lower addresses, which means we need to 1745 reverse the order compared to how we would normally 1746 treat either big or little-endian. For those arguments 1747 that will wind up in registers, this still works for 1748 HPPA (the only current STACK_GROWSUP target) since the 1749 argument registers are *also* allocated in decreasing 1750 order. If another such target is added, this logic may 1751 have to get more complicated to differentiate between 1752 stack arguments and register arguments. */ 1753 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 1754 op->args[pi++] = temp_arg(args[i] + 1); 1755 op->args[pi++] = temp_arg(args[i]); 1756 #else 1757 op->args[pi++] = temp_arg(args[i]); 1758 op->args[pi++] = temp_arg(args[i] + 1); 1759 #endif 1760 real_args += 2; 1761 continue; 1762 } 1763 1764 op->args[pi++] = temp_arg(args[i]); 1765 real_args++; 1766 } 1767 op->args[pi++] = (uintptr_t)func; 1768 op->args[pi++] = flags; 1769 TCGOP_CALLI(op) = real_args; 1770 1771 /* Make sure the fields didn't overflow. */ 1772 tcg_debug_assert(TCGOP_CALLI(op) == real_args); 1773 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 1774 1775 #if defined(__sparc__) && !defined(__arch64__) \ 1776 && !defined(CONFIG_TCG_INTERPRETER) 1777 /* Free all of the parts we allocated above. */ 1778 for (i = real_args = 0; i < orig_nargs; ++i) { 1779 int is_64bit = orig_sizemask & (1 << (i+1)*2); 1780 if (is_64bit) { 1781 tcg_temp_free_internal(args[real_args++]); 1782 tcg_temp_free_internal(args[real_args++]); 1783 } else { 1784 real_args++; 1785 } 1786 } 1787 if (orig_sizemask & 1) { 1788 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 1789 Note that describing these as TCGv_i64 eliminates an unnecessary 1790 zero-extension that tcg_gen_concat_i32_i64 would create. */ 1791 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); 1792 tcg_temp_free_i64(retl); 1793 tcg_temp_free_i64(reth); 1794 } 1795 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1796 for (i = 0; i < nargs; ++i) { 1797 int is_64bit = sizemask & (1 << (i+1)*2); 1798 if (!is_64bit) { 1799 tcg_temp_free_internal(args[i]); 1800 } 1801 } 1802 #endif /* TCG_TARGET_EXTEND_ARGS */ 1803 } 1804 1805 static void tcg_reg_alloc_start(TCGContext *s) 1806 { 1807 int i, n; 1808 TCGTemp *ts; 1809 1810 for (i = 0, n = s->nb_globals; i < n; i++) { 1811 ts = &s->temps[i]; 1812 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM); 1813 } 1814 for (n = s->nb_temps; i < n; i++) { 1815 ts = &s->temps[i]; 1816 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 1817 ts->mem_allocated = 0; 1818 ts->fixed_reg = 0; 1819 } 1820 1821 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1822 } 1823 1824 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1825 TCGTemp *ts) 1826 { 1827 int idx = temp_idx(ts); 1828 1829 if (ts->temp_global) { 1830 pstrcpy(buf, buf_size, ts->name); 1831 } else if (ts->temp_local) { 1832 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1833 } else { 1834 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1835 } 1836 return buf; 1837 } 1838 1839 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1840 int buf_size, TCGArg arg) 1841 { 1842 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1843 } 1844 1845 /* Find helper name. */ 1846 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 1847 { 1848 const char *ret = NULL; 1849 if (helper_table) { 1850 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); 1851 if (info) { 1852 ret = info->name; 1853 } 1854 } 1855 return ret; 1856 } 1857 1858 static const char * const cond_name[] = 1859 { 1860 [TCG_COND_NEVER] = "never", 1861 [TCG_COND_ALWAYS] = "always", 1862 [TCG_COND_EQ] = "eq", 1863 [TCG_COND_NE] = "ne", 1864 [TCG_COND_LT] = "lt", 1865 [TCG_COND_GE] = "ge", 1866 [TCG_COND_LE] = "le", 1867 [TCG_COND_GT] = "gt", 1868 [TCG_COND_LTU] = "ltu", 1869 [TCG_COND_GEU] = "geu", 1870 [TCG_COND_LEU] = "leu", 1871 [TCG_COND_GTU] = "gtu" 1872 }; 1873 1874 static const char * const ldst_name[] = 1875 { 1876 [MO_UB] = "ub", 1877 [MO_SB] = "sb", 1878 [MO_LEUW] = "leuw", 1879 [MO_LESW] = "lesw", 1880 [MO_LEUL] = "leul", 1881 [MO_LESL] = "lesl", 1882 [MO_LEQ] = "leq", 1883 [MO_BEUW] = "beuw", 1884 [MO_BESW] = "besw", 1885 [MO_BEUL] = "beul", 1886 [MO_BESL] = "besl", 1887 [MO_BEQ] = "beq", 1888 }; 1889 1890 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1891 #ifdef ALIGNED_ONLY 1892 [MO_UNALN >> MO_ASHIFT] = "un+", 1893 [MO_ALIGN >> MO_ASHIFT] = "", 1894 #else 1895 [MO_UNALN >> MO_ASHIFT] = "", 1896 [MO_ALIGN >> MO_ASHIFT] = "al+", 1897 #endif 1898 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1899 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1900 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1901 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1902 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1903 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1904 }; 1905 1906 static inline bool tcg_regset_single(TCGRegSet d) 1907 { 1908 return (d & (d - 1)) == 0; 1909 } 1910 1911 static inline TCGReg tcg_regset_first(TCGRegSet d) 1912 { 1913 if (TCG_TARGET_NB_REGS <= 32) { 1914 return ctz32(d); 1915 } else { 1916 return ctz64(d); 1917 } 1918 } 1919 1920 static void tcg_dump_ops(TCGContext *s, bool have_prefs) 1921 { 1922 char buf[128]; 1923 TCGOp *op; 1924 1925 QTAILQ_FOREACH(op, &s->ops, link) { 1926 int i, k, nb_oargs, nb_iargs, nb_cargs; 1927 const TCGOpDef *def; 1928 TCGOpcode c; 1929 int col = 0; 1930 1931 c = op->opc; 1932 def = &tcg_op_defs[c]; 1933 1934 if (c == INDEX_op_insn_start) { 1935 nb_oargs = 0; 1936 col += qemu_log("\n ----"); 1937 1938 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1939 target_ulong a; 1940 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1941 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 1942 #else 1943 a = op->args[i]; 1944 #endif 1945 col += qemu_log(" " TARGET_FMT_lx, a); 1946 } 1947 } else if (c == INDEX_op_call) { 1948 /* variable number of arguments */ 1949 nb_oargs = TCGOP_CALLO(op); 1950 nb_iargs = TCGOP_CALLI(op); 1951 nb_cargs = def->nb_cargs; 1952 1953 /* function name, flags, out args */ 1954 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 1955 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), 1956 op->args[nb_oargs + nb_iargs + 1], nb_oargs); 1957 for (i = 0; i < nb_oargs; i++) { 1958 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), 1959 op->args[i])); 1960 } 1961 for (i = 0; i < nb_iargs; i++) { 1962 TCGArg arg = op->args[nb_oargs + i]; 1963 const char *t = "<dummy>"; 1964 if (arg != TCG_CALL_DUMMY_ARG) { 1965 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 1966 } 1967 col += qemu_log(",%s", t); 1968 } 1969 } else { 1970 col += qemu_log(" %s ", def->name); 1971 1972 nb_oargs = def->nb_oargs; 1973 nb_iargs = def->nb_iargs; 1974 nb_cargs = def->nb_cargs; 1975 1976 if (def->flags & TCG_OPF_VECTOR) { 1977 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op), 1978 8 << TCGOP_VECE(op)); 1979 } 1980 1981 k = 0; 1982 for (i = 0; i < nb_oargs; i++) { 1983 if (k != 0) { 1984 col += qemu_log(","); 1985 } 1986 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 1987 op->args[k++])); 1988 } 1989 for (i = 0; i < nb_iargs; i++) { 1990 if (k != 0) { 1991 col += qemu_log(","); 1992 } 1993 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 1994 op->args[k++])); 1995 } 1996 switch (c) { 1997 case INDEX_op_brcond_i32: 1998 case INDEX_op_setcond_i32: 1999 case INDEX_op_movcond_i32: 2000 case INDEX_op_brcond2_i32: 2001 case INDEX_op_setcond2_i32: 2002 case INDEX_op_brcond_i64: 2003 case INDEX_op_setcond_i64: 2004 case INDEX_op_movcond_i64: 2005 case INDEX_op_cmp_vec: 2006 if (op->args[k] < ARRAY_SIZE(cond_name) 2007 && cond_name[op->args[k]]) { 2008 col += qemu_log(",%s", cond_name[op->args[k++]]); 2009 } else { 2010 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]); 2011 } 2012 i = 1; 2013 break; 2014 case INDEX_op_qemu_ld_i32: 2015 case INDEX_op_qemu_st_i32: 2016 case INDEX_op_qemu_ld_i64: 2017 case INDEX_op_qemu_st_i64: 2018 { 2019 TCGMemOpIdx oi = op->args[k++]; 2020 TCGMemOp op = get_memop(oi); 2021 unsigned ix = get_mmuidx(oi); 2022 2023 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2024 col += qemu_log(",$0x%x,%u", op, ix); 2025 } else { 2026 const char *s_al, *s_op; 2027 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2028 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2029 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 2030 } 2031 i = 1; 2032 } 2033 break; 2034 default: 2035 i = 0; 2036 break; 2037 } 2038 switch (c) { 2039 case INDEX_op_set_label: 2040 case INDEX_op_br: 2041 case INDEX_op_brcond_i32: 2042 case INDEX_op_brcond_i64: 2043 case INDEX_op_brcond2_i32: 2044 col += qemu_log("%s$L%d", k ? "," : "", 2045 arg_label(op->args[k])->id); 2046 i++, k++; 2047 break; 2048 default: 2049 break; 2050 } 2051 for (; i < nb_cargs; i++, k++) { 2052 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); 2053 } 2054 } 2055 2056 if (have_prefs || op->life) { 2057 for (; col < 40; ++col) { 2058 putc(' ', qemu_logfile); 2059 } 2060 } 2061 2062 if (op->life) { 2063 unsigned life = op->life; 2064 2065 if (life & (SYNC_ARG * 3)) { 2066 qemu_log(" sync:"); 2067 for (i = 0; i < 2; ++i) { 2068 if (life & (SYNC_ARG << i)) { 2069 qemu_log(" %d", i); 2070 } 2071 } 2072 } 2073 life /= DEAD_ARG; 2074 if (life) { 2075 qemu_log(" dead:"); 2076 for (i = 0; life; ++i, life >>= 1) { 2077 if (life & 1) { 2078 qemu_log(" %d", i); 2079 } 2080 } 2081 } 2082 } 2083 2084 if (have_prefs) { 2085 for (i = 0; i < nb_oargs; ++i) { 2086 TCGRegSet set = op->output_pref[i]; 2087 2088 if (i == 0) { 2089 qemu_log(" pref="); 2090 } else { 2091 qemu_log(","); 2092 } 2093 if (set == 0) { 2094 qemu_log("none"); 2095 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2096 qemu_log("all"); 2097 #ifdef CONFIG_DEBUG_TCG 2098 } else if (tcg_regset_single(set)) { 2099 TCGReg reg = tcg_regset_first(set); 2100 qemu_log("%s", tcg_target_reg_names[reg]); 2101 #endif 2102 } else if (TCG_TARGET_NB_REGS <= 32) { 2103 qemu_log("%#x", (uint32_t)set); 2104 } else { 2105 qemu_log("%#" PRIx64, (uint64_t)set); 2106 } 2107 } 2108 } 2109 2110 qemu_log("\n"); 2111 } 2112 } 2113 2114 /* we give more priority to constraints with less registers */ 2115 static int get_constraint_priority(const TCGOpDef *def, int k) 2116 { 2117 const TCGArgConstraint *arg_ct; 2118 2119 int i, n; 2120 arg_ct = &def->args_ct[k]; 2121 if (arg_ct->ct & TCG_CT_ALIAS) { 2122 /* an alias is equivalent to a single register */ 2123 n = 1; 2124 } else { 2125 if (!(arg_ct->ct & TCG_CT_REG)) 2126 return 0; 2127 n = 0; 2128 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 2129 if (tcg_regset_test_reg(arg_ct->u.regs, i)) 2130 n++; 2131 } 2132 } 2133 return TCG_TARGET_NB_REGS - n + 1; 2134 } 2135 2136 /* sort from highest priority to lowest */ 2137 static void sort_constraints(TCGOpDef *def, int start, int n) 2138 { 2139 int i, j, p1, p2, tmp; 2140 2141 for(i = 0; i < n; i++) 2142 def->sorted_args[start + i] = start + i; 2143 if (n <= 1) 2144 return; 2145 for(i = 0; i < n - 1; i++) { 2146 for(j = i + 1; j < n; j++) { 2147 p1 = get_constraint_priority(def, def->sorted_args[start + i]); 2148 p2 = get_constraint_priority(def, def->sorted_args[start + j]); 2149 if (p1 < p2) { 2150 tmp = def->sorted_args[start + i]; 2151 def->sorted_args[start + i] = def->sorted_args[start + j]; 2152 def->sorted_args[start + j] = tmp; 2153 } 2154 } 2155 } 2156 } 2157 2158 static void process_op_defs(TCGContext *s) 2159 { 2160 TCGOpcode op; 2161 2162 for (op = 0; op < NB_OPS; op++) { 2163 TCGOpDef *def = &tcg_op_defs[op]; 2164 const TCGTargetOpDef *tdefs; 2165 TCGType type; 2166 int i, nb_args; 2167 2168 if (def->flags & TCG_OPF_NOT_PRESENT) { 2169 continue; 2170 } 2171 2172 nb_args = def->nb_iargs + def->nb_oargs; 2173 if (nb_args == 0) { 2174 continue; 2175 } 2176 2177 tdefs = tcg_target_op_def(op); 2178 /* Missing TCGTargetOpDef entry. */ 2179 tcg_debug_assert(tdefs != NULL); 2180 2181 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32); 2182 for (i = 0; i < nb_args; i++) { 2183 const char *ct_str = tdefs->args_ct_str[i]; 2184 /* Incomplete TCGTargetOpDef entry. */ 2185 tcg_debug_assert(ct_str != NULL); 2186 2187 def->args_ct[i].u.regs = 0; 2188 def->args_ct[i].ct = 0; 2189 while (*ct_str != '\0') { 2190 switch(*ct_str) { 2191 case '0' ... '9': 2192 { 2193 int oarg = *ct_str - '0'; 2194 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 2195 tcg_debug_assert(oarg < def->nb_oargs); 2196 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG); 2197 /* TCG_CT_ALIAS is for the output arguments. 2198 The input is tagged with TCG_CT_IALIAS. */ 2199 def->args_ct[i] = def->args_ct[oarg]; 2200 def->args_ct[oarg].ct |= TCG_CT_ALIAS; 2201 def->args_ct[oarg].alias_index = i; 2202 def->args_ct[i].ct |= TCG_CT_IALIAS; 2203 def->args_ct[i].alias_index = oarg; 2204 } 2205 ct_str++; 2206 break; 2207 case '&': 2208 def->args_ct[i].ct |= TCG_CT_NEWREG; 2209 ct_str++; 2210 break; 2211 case 'i': 2212 def->args_ct[i].ct |= TCG_CT_CONST; 2213 ct_str++; 2214 break; 2215 default: 2216 ct_str = target_parse_constraint(&def->args_ct[i], 2217 ct_str, type); 2218 /* Typo in TCGTargetOpDef constraint. */ 2219 tcg_debug_assert(ct_str != NULL); 2220 } 2221 } 2222 } 2223 2224 /* TCGTargetOpDef entry with too much information? */ 2225 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2226 2227 /* sort the constraints (XXX: this is just an heuristic) */ 2228 sort_constraints(def, 0, def->nb_oargs); 2229 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2230 } 2231 } 2232 2233 void tcg_op_remove(TCGContext *s, TCGOp *op) 2234 { 2235 TCGLabel *label; 2236 2237 switch (op->opc) { 2238 case INDEX_op_br: 2239 label = arg_label(op->args[0]); 2240 label->refs--; 2241 break; 2242 case INDEX_op_brcond_i32: 2243 case INDEX_op_brcond_i64: 2244 label = arg_label(op->args[3]); 2245 label->refs--; 2246 break; 2247 case INDEX_op_brcond2_i32: 2248 label = arg_label(op->args[5]); 2249 label->refs--; 2250 break; 2251 default: 2252 break; 2253 } 2254 2255 QTAILQ_REMOVE(&s->ops, op, link); 2256 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2257 s->nb_ops--; 2258 2259 #ifdef CONFIG_PROFILER 2260 atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2261 #endif 2262 } 2263 2264 static TCGOp *tcg_op_alloc(TCGOpcode opc) 2265 { 2266 TCGContext *s = tcg_ctx; 2267 TCGOp *op; 2268 2269 if (likely(QTAILQ_EMPTY(&s->free_ops))) { 2270 op = tcg_malloc(sizeof(TCGOp)); 2271 } else { 2272 op = QTAILQ_FIRST(&s->free_ops); 2273 QTAILQ_REMOVE(&s->free_ops, op, link); 2274 } 2275 memset(op, 0, offsetof(TCGOp, link)); 2276 op->opc = opc; 2277 s->nb_ops++; 2278 2279 return op; 2280 } 2281 2282 TCGOp *tcg_emit_op(TCGOpcode opc) 2283 { 2284 TCGOp *op = tcg_op_alloc(opc); 2285 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2286 return op; 2287 } 2288 2289 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2290 { 2291 TCGOp *new_op = tcg_op_alloc(opc); 2292 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2293 return new_op; 2294 } 2295 2296 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2297 { 2298 TCGOp *new_op = tcg_op_alloc(opc); 2299 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2300 return new_op; 2301 } 2302 2303 /* Reachable analysis : remove unreachable code. */ 2304 static void reachable_code_pass(TCGContext *s) 2305 { 2306 TCGOp *op, *op_next; 2307 bool dead = false; 2308 2309 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2310 bool remove = dead; 2311 TCGLabel *label; 2312 int call_flags; 2313 2314 switch (op->opc) { 2315 case INDEX_op_set_label: 2316 label = arg_label(op->args[0]); 2317 if (label->refs == 0) { 2318 /* 2319 * While there is an occasional backward branch, virtually 2320 * all branches generated by the translators are forward. 2321 * Which means that generally we will have already removed 2322 * all references to the label that will be, and there is 2323 * little to be gained by iterating. 2324 */ 2325 remove = true; 2326 } else { 2327 /* Once we see a label, insns become live again. */ 2328 dead = false; 2329 remove = false; 2330 2331 /* 2332 * Optimization can fold conditional branches to unconditional. 2333 * If we find a label with one reference which is preceded by 2334 * an unconditional branch to it, remove both. This needed to 2335 * wait until the dead code in between them was removed. 2336 */ 2337 if (label->refs == 1) { 2338 TCGOp *op_prev = QTAILQ_PREV(op, link); 2339 if (op_prev->opc == INDEX_op_br && 2340 label == arg_label(op_prev->args[0])) { 2341 tcg_op_remove(s, op_prev); 2342 remove = true; 2343 } 2344 } 2345 } 2346 break; 2347 2348 case INDEX_op_br: 2349 case INDEX_op_exit_tb: 2350 case INDEX_op_goto_ptr: 2351 /* Unconditional branches; everything following is dead. */ 2352 dead = true; 2353 break; 2354 2355 case INDEX_op_call: 2356 /* Notice noreturn helper calls, raising exceptions. */ 2357 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; 2358 if (call_flags & TCG_CALL_NO_RETURN) { 2359 dead = true; 2360 } 2361 break; 2362 2363 case INDEX_op_insn_start: 2364 /* Never remove -- we need to keep these for unwind. */ 2365 remove = false; 2366 break; 2367 2368 default: 2369 break; 2370 } 2371 2372 if (remove) { 2373 tcg_op_remove(s, op); 2374 } 2375 } 2376 } 2377 2378 #define TS_DEAD 1 2379 #define TS_MEM 2 2380 2381 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2382 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2383 2384 /* For liveness_pass_1, the register preferences for a given temp. */ 2385 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2386 { 2387 return ts->state_ptr; 2388 } 2389 2390 /* For liveness_pass_1, reset the preferences for a given temp to the 2391 * maximal regset for its type. 2392 */ 2393 static inline void la_reset_pref(TCGTemp *ts) 2394 { 2395 *la_temp_pref(ts) 2396 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2397 } 2398 2399 /* liveness analysis: end of function: all temps are dead, and globals 2400 should be in memory. */ 2401 static void la_func_end(TCGContext *s, int ng, int nt) 2402 { 2403 int i; 2404 2405 for (i = 0; i < ng; ++i) { 2406 s->temps[i].state = TS_DEAD | TS_MEM; 2407 la_reset_pref(&s->temps[i]); 2408 } 2409 for (i = ng; i < nt; ++i) { 2410 s->temps[i].state = TS_DEAD; 2411 la_reset_pref(&s->temps[i]); 2412 } 2413 } 2414 2415 /* liveness analysis: end of basic block: all temps are dead, globals 2416 and local temps should be in memory. */ 2417 static void la_bb_end(TCGContext *s, int ng, int nt) 2418 { 2419 int i; 2420 2421 for (i = 0; i < ng; ++i) { 2422 s->temps[i].state = TS_DEAD | TS_MEM; 2423 la_reset_pref(&s->temps[i]); 2424 } 2425 for (i = ng; i < nt; ++i) { 2426 s->temps[i].state = (s->temps[i].temp_local 2427 ? TS_DEAD | TS_MEM 2428 : TS_DEAD); 2429 la_reset_pref(&s->temps[i]); 2430 } 2431 } 2432 2433 /* liveness analysis: sync globals back to memory. */ 2434 static void la_global_sync(TCGContext *s, int ng) 2435 { 2436 int i; 2437 2438 for (i = 0; i < ng; ++i) { 2439 int state = s->temps[i].state; 2440 s->temps[i].state = state | TS_MEM; 2441 if (state == TS_DEAD) { 2442 /* If the global was previously dead, reset prefs. */ 2443 la_reset_pref(&s->temps[i]); 2444 } 2445 } 2446 } 2447 2448 /* liveness analysis: sync globals back to memory and kill. */ 2449 static void la_global_kill(TCGContext *s, int ng) 2450 { 2451 int i; 2452 2453 for (i = 0; i < ng; i++) { 2454 s->temps[i].state = TS_DEAD | TS_MEM; 2455 la_reset_pref(&s->temps[i]); 2456 } 2457 } 2458 2459 /* liveness analysis: note live globals crossing calls. */ 2460 static void la_cross_call(TCGContext *s, int nt) 2461 { 2462 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2463 int i; 2464 2465 for (i = 0; i < nt; i++) { 2466 TCGTemp *ts = &s->temps[i]; 2467 if (!(ts->state & TS_DEAD)) { 2468 TCGRegSet *pset = la_temp_pref(ts); 2469 TCGRegSet set = *pset; 2470 2471 set &= mask; 2472 /* If the combination is not possible, restart. */ 2473 if (set == 0) { 2474 set = tcg_target_available_regs[ts->type] & mask; 2475 } 2476 *pset = set; 2477 } 2478 } 2479 } 2480 2481 /* Liveness analysis : update the opc_arg_life array to tell if a 2482 given input arguments is dead. Instructions updating dead 2483 temporaries are removed. */ 2484 static void liveness_pass_1(TCGContext *s) 2485 { 2486 int nb_globals = s->nb_globals; 2487 int nb_temps = s->nb_temps; 2488 TCGOp *op, *op_prev; 2489 TCGRegSet *prefs; 2490 int i; 2491 2492 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2493 for (i = 0; i < nb_temps; ++i) { 2494 s->temps[i].state_ptr = prefs + i; 2495 } 2496 2497 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2498 la_func_end(s, nb_globals, nb_temps); 2499 2500 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2501 int nb_iargs, nb_oargs; 2502 TCGOpcode opc_new, opc_new2; 2503 bool have_opc_new2; 2504 TCGLifeData arg_life = 0; 2505 TCGTemp *ts; 2506 TCGOpcode opc = op->opc; 2507 const TCGOpDef *def = &tcg_op_defs[opc]; 2508 2509 switch (opc) { 2510 case INDEX_op_call: 2511 { 2512 int call_flags; 2513 int nb_call_regs; 2514 2515 nb_oargs = TCGOP_CALLO(op); 2516 nb_iargs = TCGOP_CALLI(op); 2517 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2518 2519 /* pure functions can be removed if their result is unused */ 2520 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2521 for (i = 0; i < nb_oargs; i++) { 2522 ts = arg_temp(op->args[i]); 2523 if (ts->state != TS_DEAD) { 2524 goto do_not_remove_call; 2525 } 2526 } 2527 goto do_remove; 2528 } 2529 do_not_remove_call: 2530 2531 /* Output args are dead. */ 2532 for (i = 0; i < nb_oargs; i++) { 2533 ts = arg_temp(op->args[i]); 2534 if (ts->state & TS_DEAD) { 2535 arg_life |= DEAD_ARG << i; 2536 } 2537 if (ts->state & TS_MEM) { 2538 arg_life |= SYNC_ARG << i; 2539 } 2540 ts->state = TS_DEAD; 2541 la_reset_pref(ts); 2542 2543 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ 2544 op->output_pref[i] = 0; 2545 } 2546 2547 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2548 TCG_CALL_NO_READ_GLOBALS))) { 2549 la_global_kill(s, nb_globals); 2550 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2551 la_global_sync(s, nb_globals); 2552 } 2553 2554 /* Record arguments that die in this helper. */ 2555 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2556 ts = arg_temp(op->args[i]); 2557 if (ts && ts->state & TS_DEAD) { 2558 arg_life |= DEAD_ARG << i; 2559 } 2560 } 2561 2562 /* For all live registers, remove call-clobbered prefs. */ 2563 la_cross_call(s, nb_temps); 2564 2565 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2566 2567 /* Input arguments are live for preceding opcodes. */ 2568 for (i = 0; i < nb_iargs; i++) { 2569 ts = arg_temp(op->args[i + nb_oargs]); 2570 if (ts && ts->state & TS_DEAD) { 2571 /* For those arguments that die, and will be allocated 2572 * in registers, clear the register set for that arg, 2573 * to be filled in below. For args that will be on 2574 * the stack, reset to any available reg. 2575 */ 2576 *la_temp_pref(ts) 2577 = (i < nb_call_regs ? 0 : 2578 tcg_target_available_regs[ts->type]); 2579 ts->state &= ~TS_DEAD; 2580 } 2581 } 2582 2583 /* For each input argument, add its input register to prefs. 2584 If a temp is used once, this produces a single set bit. */ 2585 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { 2586 ts = arg_temp(op->args[i + nb_oargs]); 2587 if (ts) { 2588 tcg_regset_set_reg(*la_temp_pref(ts), 2589 tcg_target_call_iarg_regs[i]); 2590 } 2591 } 2592 } 2593 break; 2594 case INDEX_op_insn_start: 2595 break; 2596 case INDEX_op_discard: 2597 /* mark the temporary as dead */ 2598 ts = arg_temp(op->args[0]); 2599 ts->state = TS_DEAD; 2600 la_reset_pref(ts); 2601 break; 2602 2603 case INDEX_op_add2_i32: 2604 opc_new = INDEX_op_add_i32; 2605 goto do_addsub2; 2606 case INDEX_op_sub2_i32: 2607 opc_new = INDEX_op_sub_i32; 2608 goto do_addsub2; 2609 case INDEX_op_add2_i64: 2610 opc_new = INDEX_op_add_i64; 2611 goto do_addsub2; 2612 case INDEX_op_sub2_i64: 2613 opc_new = INDEX_op_sub_i64; 2614 do_addsub2: 2615 nb_iargs = 4; 2616 nb_oargs = 2; 2617 /* Test if the high part of the operation is dead, but not 2618 the low part. The result can be optimized to a simple 2619 add or sub. This happens often for x86_64 guest when the 2620 cpu mode is set to 32 bit. */ 2621 if (arg_temp(op->args[1])->state == TS_DEAD) { 2622 if (arg_temp(op->args[0])->state == TS_DEAD) { 2623 goto do_remove; 2624 } 2625 /* Replace the opcode and adjust the args in place, 2626 leaving 3 unused args at the end. */ 2627 op->opc = opc = opc_new; 2628 op->args[1] = op->args[2]; 2629 op->args[2] = op->args[4]; 2630 /* Fall through and mark the single-word operation live. */ 2631 nb_iargs = 2; 2632 nb_oargs = 1; 2633 } 2634 goto do_not_remove; 2635 2636 case INDEX_op_mulu2_i32: 2637 opc_new = INDEX_op_mul_i32; 2638 opc_new2 = INDEX_op_muluh_i32; 2639 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 2640 goto do_mul2; 2641 case INDEX_op_muls2_i32: 2642 opc_new = INDEX_op_mul_i32; 2643 opc_new2 = INDEX_op_mulsh_i32; 2644 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 2645 goto do_mul2; 2646 case INDEX_op_mulu2_i64: 2647 opc_new = INDEX_op_mul_i64; 2648 opc_new2 = INDEX_op_muluh_i64; 2649 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 2650 goto do_mul2; 2651 case INDEX_op_muls2_i64: 2652 opc_new = INDEX_op_mul_i64; 2653 opc_new2 = INDEX_op_mulsh_i64; 2654 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 2655 goto do_mul2; 2656 do_mul2: 2657 nb_iargs = 2; 2658 nb_oargs = 2; 2659 if (arg_temp(op->args[1])->state == TS_DEAD) { 2660 if (arg_temp(op->args[0])->state == TS_DEAD) { 2661 /* Both parts of the operation are dead. */ 2662 goto do_remove; 2663 } 2664 /* The high part of the operation is dead; generate the low. */ 2665 op->opc = opc = opc_new; 2666 op->args[1] = op->args[2]; 2667 op->args[2] = op->args[3]; 2668 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 2669 /* The low part of the operation is dead; generate the high. */ 2670 op->opc = opc = opc_new2; 2671 op->args[0] = op->args[1]; 2672 op->args[1] = op->args[2]; 2673 op->args[2] = op->args[3]; 2674 } else { 2675 goto do_not_remove; 2676 } 2677 /* Mark the single-word operation live. */ 2678 nb_oargs = 1; 2679 goto do_not_remove; 2680 2681 default: 2682 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 2683 nb_iargs = def->nb_iargs; 2684 nb_oargs = def->nb_oargs; 2685 2686 /* Test if the operation can be removed because all 2687 its outputs are dead. We assume that nb_oargs == 0 2688 implies side effects */ 2689 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 2690 for (i = 0; i < nb_oargs; i++) { 2691 if (arg_temp(op->args[i])->state != TS_DEAD) { 2692 goto do_not_remove; 2693 } 2694 } 2695 goto do_remove; 2696 } 2697 goto do_not_remove; 2698 2699 do_remove: 2700 tcg_op_remove(s, op); 2701 break; 2702 2703 do_not_remove: 2704 for (i = 0; i < nb_oargs; i++) { 2705 ts = arg_temp(op->args[i]); 2706 2707 /* Remember the preference of the uses that followed. */ 2708 op->output_pref[i] = *la_temp_pref(ts); 2709 2710 /* Output args are dead. */ 2711 if (ts->state & TS_DEAD) { 2712 arg_life |= DEAD_ARG << i; 2713 } 2714 if (ts->state & TS_MEM) { 2715 arg_life |= SYNC_ARG << i; 2716 } 2717 ts->state = TS_DEAD; 2718 la_reset_pref(ts); 2719 } 2720 2721 /* If end of basic block, update. */ 2722 if (def->flags & TCG_OPF_BB_EXIT) { 2723 la_func_end(s, nb_globals, nb_temps); 2724 } else if (def->flags & TCG_OPF_BB_END) { 2725 la_bb_end(s, nb_globals, nb_temps); 2726 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2727 la_global_sync(s, nb_globals); 2728 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2729 la_cross_call(s, nb_temps); 2730 } 2731 } 2732 2733 /* Record arguments that die in this opcode. */ 2734 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2735 ts = arg_temp(op->args[i]); 2736 if (ts->state & TS_DEAD) { 2737 arg_life |= DEAD_ARG << i; 2738 } 2739 } 2740 2741 /* Input arguments are live for preceding opcodes. */ 2742 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2743 ts = arg_temp(op->args[i]); 2744 if (ts->state & TS_DEAD) { 2745 /* For operands that were dead, initially allow 2746 all regs for the type. */ 2747 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 2748 ts->state &= ~TS_DEAD; 2749 } 2750 } 2751 2752 /* Incorporate constraints for this operand. */ 2753 switch (opc) { 2754 case INDEX_op_mov_i32: 2755 case INDEX_op_mov_i64: 2756 /* Note that these are TCG_OPF_NOT_PRESENT and do not 2757 have proper constraints. That said, special case 2758 moves to propagate preferences backward. */ 2759 if (IS_DEAD_ARG(1)) { 2760 *la_temp_pref(arg_temp(op->args[0])) 2761 = *la_temp_pref(arg_temp(op->args[1])); 2762 } 2763 break; 2764 2765 default: 2766 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2767 const TCGArgConstraint *ct = &def->args_ct[i]; 2768 TCGRegSet set, *pset; 2769 2770 ts = arg_temp(op->args[i]); 2771 pset = la_temp_pref(ts); 2772 set = *pset; 2773 2774 set &= ct->u.regs; 2775 if (ct->ct & TCG_CT_IALIAS) { 2776 set &= op->output_pref[ct->alias_index]; 2777 } 2778 /* If the combination is not possible, restart. */ 2779 if (set == 0) { 2780 set = ct->u.regs; 2781 } 2782 *pset = set; 2783 } 2784 break; 2785 } 2786 break; 2787 } 2788 op->life = arg_life; 2789 } 2790 } 2791 2792 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 2793 static bool liveness_pass_2(TCGContext *s) 2794 { 2795 int nb_globals = s->nb_globals; 2796 int nb_temps, i; 2797 bool changes = false; 2798 TCGOp *op, *op_next; 2799 2800 /* Create a temporary for each indirect global. */ 2801 for (i = 0; i < nb_globals; ++i) { 2802 TCGTemp *its = &s->temps[i]; 2803 if (its->indirect_reg) { 2804 TCGTemp *dts = tcg_temp_alloc(s); 2805 dts->type = its->type; 2806 dts->base_type = its->base_type; 2807 its->state_ptr = dts; 2808 } else { 2809 its->state_ptr = NULL; 2810 } 2811 /* All globals begin dead. */ 2812 its->state = TS_DEAD; 2813 } 2814 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 2815 TCGTemp *its = &s->temps[i]; 2816 its->state_ptr = NULL; 2817 its->state = TS_DEAD; 2818 } 2819 2820 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2821 TCGOpcode opc = op->opc; 2822 const TCGOpDef *def = &tcg_op_defs[opc]; 2823 TCGLifeData arg_life = op->life; 2824 int nb_iargs, nb_oargs, call_flags; 2825 TCGTemp *arg_ts, *dir_ts; 2826 2827 if (opc == INDEX_op_call) { 2828 nb_oargs = TCGOP_CALLO(op); 2829 nb_iargs = TCGOP_CALLI(op); 2830 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2831 } else { 2832 nb_iargs = def->nb_iargs; 2833 nb_oargs = def->nb_oargs; 2834 2835 /* Set flags similar to how calls require. */ 2836 if (def->flags & TCG_OPF_BB_END) { 2837 /* Like writing globals: save_globals */ 2838 call_flags = 0; 2839 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2840 /* Like reading globals: sync_globals */ 2841 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2842 } else { 2843 /* No effect on globals. */ 2844 call_flags = (TCG_CALL_NO_READ_GLOBALS | 2845 TCG_CALL_NO_WRITE_GLOBALS); 2846 } 2847 } 2848 2849 /* Make sure that input arguments are available. */ 2850 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2851 arg_ts = arg_temp(op->args[i]); 2852 if (arg_ts) { 2853 dir_ts = arg_ts->state_ptr; 2854 if (dir_ts && arg_ts->state == TS_DEAD) { 2855 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 2856 ? INDEX_op_ld_i32 2857 : INDEX_op_ld_i64); 2858 TCGOp *lop = tcg_op_insert_before(s, op, lopc); 2859 2860 lop->args[0] = temp_arg(dir_ts); 2861 lop->args[1] = temp_arg(arg_ts->mem_base); 2862 lop->args[2] = arg_ts->mem_offset; 2863 2864 /* Loaded, but synced with memory. */ 2865 arg_ts->state = TS_MEM; 2866 } 2867 } 2868 } 2869 2870 /* Perform input replacement, and mark inputs that became dead. 2871 No action is required except keeping temp_state up to date 2872 so that we reload when needed. */ 2873 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2874 arg_ts = arg_temp(op->args[i]); 2875 if (arg_ts) { 2876 dir_ts = arg_ts->state_ptr; 2877 if (dir_ts) { 2878 op->args[i] = temp_arg(dir_ts); 2879 changes = true; 2880 if (IS_DEAD_ARG(i)) { 2881 arg_ts->state = TS_DEAD; 2882 } 2883 } 2884 } 2885 } 2886 2887 /* Liveness analysis should ensure that the following are 2888 all correct, for call sites and basic block end points. */ 2889 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 2890 /* Nothing to do */ 2891 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 2892 for (i = 0; i < nb_globals; ++i) { 2893 /* Liveness should see that globals are synced back, 2894 that is, either TS_DEAD or TS_MEM. */ 2895 arg_ts = &s->temps[i]; 2896 tcg_debug_assert(arg_ts->state_ptr == 0 2897 || arg_ts->state != 0); 2898 } 2899 } else { 2900 for (i = 0; i < nb_globals; ++i) { 2901 /* Liveness should see that globals are saved back, 2902 that is, TS_DEAD, waiting to be reloaded. */ 2903 arg_ts = &s->temps[i]; 2904 tcg_debug_assert(arg_ts->state_ptr == 0 2905 || arg_ts->state == TS_DEAD); 2906 } 2907 } 2908 2909 /* Outputs become available. */ 2910 for (i = 0; i < nb_oargs; i++) { 2911 arg_ts = arg_temp(op->args[i]); 2912 dir_ts = arg_ts->state_ptr; 2913 if (!dir_ts) { 2914 continue; 2915 } 2916 op->args[i] = temp_arg(dir_ts); 2917 changes = true; 2918 2919 /* The output is now live and modified. */ 2920 arg_ts->state = 0; 2921 2922 /* Sync outputs upon their last write. */ 2923 if (NEED_SYNC_ARG(i)) { 2924 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 2925 ? INDEX_op_st_i32 2926 : INDEX_op_st_i64); 2927 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 2928 2929 sop->args[0] = temp_arg(dir_ts); 2930 sop->args[1] = temp_arg(arg_ts->mem_base); 2931 sop->args[2] = arg_ts->mem_offset; 2932 2933 arg_ts->state = TS_MEM; 2934 } 2935 /* Drop outputs that are dead. */ 2936 if (IS_DEAD_ARG(i)) { 2937 arg_ts->state = TS_DEAD; 2938 } 2939 } 2940 } 2941 2942 return changes; 2943 } 2944 2945 #ifdef CONFIG_DEBUG_TCG 2946 static void dump_regs(TCGContext *s) 2947 { 2948 TCGTemp *ts; 2949 int i; 2950 char buf[64]; 2951 2952 for(i = 0; i < s->nb_temps; i++) { 2953 ts = &s->temps[i]; 2954 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 2955 switch(ts->val_type) { 2956 case TEMP_VAL_REG: 2957 printf("%s", tcg_target_reg_names[ts->reg]); 2958 break; 2959 case TEMP_VAL_MEM: 2960 printf("%d(%s)", (int)ts->mem_offset, 2961 tcg_target_reg_names[ts->mem_base->reg]); 2962 break; 2963 case TEMP_VAL_CONST: 2964 printf("$0x%" TCG_PRIlx, ts->val); 2965 break; 2966 case TEMP_VAL_DEAD: 2967 printf("D"); 2968 break; 2969 default: 2970 printf("???"); 2971 break; 2972 } 2973 printf("\n"); 2974 } 2975 2976 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 2977 if (s->reg_to_temp[i] != NULL) { 2978 printf("%s: %s\n", 2979 tcg_target_reg_names[i], 2980 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 2981 } 2982 } 2983 } 2984 2985 static void check_regs(TCGContext *s) 2986 { 2987 int reg; 2988 int k; 2989 TCGTemp *ts; 2990 char buf[64]; 2991 2992 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 2993 ts = s->reg_to_temp[reg]; 2994 if (ts != NULL) { 2995 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 2996 printf("Inconsistency for register %s:\n", 2997 tcg_target_reg_names[reg]); 2998 goto fail; 2999 } 3000 } 3001 } 3002 for (k = 0; k < s->nb_temps; k++) { 3003 ts = &s->temps[k]; 3004 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg 3005 && s->reg_to_temp[ts->reg] != ts) { 3006 printf("Inconsistency for temp %s:\n", 3007 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3008 fail: 3009 printf("reg state:\n"); 3010 dump_regs(s); 3011 tcg_abort(); 3012 } 3013 } 3014 } 3015 #endif 3016 3017 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3018 { 3019 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 3020 /* Sparc64 stack is accessed with offset of 2047 */ 3021 s->current_frame_offset = (s->current_frame_offset + 3022 (tcg_target_long)sizeof(tcg_target_long) - 1) & 3023 ~(sizeof(tcg_target_long) - 1); 3024 #endif 3025 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 3026 s->frame_end) { 3027 tcg_abort(); 3028 } 3029 ts->mem_offset = s->current_frame_offset; 3030 ts->mem_base = s->frame_temp; 3031 ts->mem_allocated = 1; 3032 s->current_frame_offset += sizeof(tcg_target_long); 3033 } 3034 3035 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3036 3037 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3038 mark it free; otherwise mark it dead. */ 3039 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3040 { 3041 if (ts->fixed_reg) { 3042 return; 3043 } 3044 if (ts->val_type == TEMP_VAL_REG) { 3045 s->reg_to_temp[ts->reg] = NULL; 3046 } 3047 ts->val_type = (free_or_dead < 0 3048 || ts->temp_local 3049 || ts->temp_global 3050 ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 3051 } 3052 3053 /* Mark a temporary as dead. */ 3054 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3055 { 3056 temp_free_or_dead(s, ts, 1); 3057 } 3058 3059 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3060 registers needs to be allocated to store a constant. If 'free_or_dead' 3061 is non-zero, subsequently release the temporary; if it is positive, the 3062 temp is dead; if it is negative, the temp is free. */ 3063 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3064 TCGRegSet preferred_regs, int free_or_dead) 3065 { 3066 if (ts->fixed_reg) { 3067 return; 3068 } 3069 if (!ts->mem_coherent) { 3070 if (!ts->mem_allocated) { 3071 temp_allocate_frame(s, ts); 3072 } 3073 switch (ts->val_type) { 3074 case TEMP_VAL_CONST: 3075 /* If we're going to free the temp immediately, then we won't 3076 require it later in a register, so attempt to store the 3077 constant to memory directly. */ 3078 if (free_or_dead 3079 && tcg_out_sti(s, ts->type, ts->val, 3080 ts->mem_base->reg, ts->mem_offset)) { 3081 break; 3082 } 3083 temp_load(s, ts, tcg_target_available_regs[ts->type], 3084 allocated_regs, preferred_regs); 3085 /* fallthrough */ 3086 3087 case TEMP_VAL_REG: 3088 tcg_out_st(s, ts->type, ts->reg, 3089 ts->mem_base->reg, ts->mem_offset); 3090 break; 3091 3092 case TEMP_VAL_MEM: 3093 break; 3094 3095 case TEMP_VAL_DEAD: 3096 default: 3097 tcg_abort(); 3098 } 3099 ts->mem_coherent = 1; 3100 } 3101 if (free_or_dead) { 3102 temp_free_or_dead(s, ts, free_or_dead); 3103 } 3104 } 3105 3106 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3107 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3108 { 3109 TCGTemp *ts = s->reg_to_temp[reg]; 3110 if (ts != NULL) { 3111 temp_sync(s, ts, allocated_regs, 0, -1); 3112 } 3113 } 3114 3115 /** 3116 * tcg_reg_alloc: 3117 * @required_regs: Set of registers in which we must allocate. 3118 * @allocated_regs: Set of registers which must be avoided. 3119 * @preferred_regs: Set of registers we should prefer. 3120 * @rev: True if we search the registers in "indirect" order. 3121 * 3122 * The allocated register must be in @required_regs & ~@allocated_regs, 3123 * but if we can put it in @preferred_regs we may save a move later. 3124 */ 3125 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3126 TCGRegSet allocated_regs, 3127 TCGRegSet preferred_regs, bool rev) 3128 { 3129 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3130 TCGRegSet reg_ct[2]; 3131 const int *order; 3132 3133 reg_ct[1] = required_regs & ~allocated_regs; 3134 tcg_debug_assert(reg_ct[1] != 0); 3135 reg_ct[0] = reg_ct[1] & preferred_regs; 3136 3137 /* Skip the preferred_regs option if it cannot be satisfied, 3138 or if the preference made no difference. */ 3139 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3140 3141 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3142 3143 /* Try free registers, preferences first. */ 3144 for (j = f; j < 2; j++) { 3145 TCGRegSet set = reg_ct[j]; 3146 3147 if (tcg_regset_single(set)) { 3148 /* One register in the set. */ 3149 TCGReg reg = tcg_regset_first(set); 3150 if (s->reg_to_temp[reg] == NULL) { 3151 return reg; 3152 } 3153 } else { 3154 for (i = 0; i < n; i++) { 3155 TCGReg reg = order[i]; 3156 if (s->reg_to_temp[reg] == NULL && 3157 tcg_regset_test_reg(set, reg)) { 3158 return reg; 3159 } 3160 } 3161 } 3162 } 3163 3164 /* We must spill something. */ 3165 for (j = f; j < 2; j++) { 3166 TCGRegSet set = reg_ct[j]; 3167 3168 if (tcg_regset_single(set)) { 3169 /* One register in the set. */ 3170 TCGReg reg = tcg_regset_first(set); 3171 tcg_reg_free(s, reg, allocated_regs); 3172 return reg; 3173 } else { 3174 for (i = 0; i < n; i++) { 3175 TCGReg reg = order[i]; 3176 if (tcg_regset_test_reg(set, reg)) { 3177 tcg_reg_free(s, reg, allocated_regs); 3178 return reg; 3179 } 3180 } 3181 } 3182 } 3183 3184 tcg_abort(); 3185 } 3186 3187 /* Make sure the temporary is in a register. If needed, allocate the register 3188 from DESIRED while avoiding ALLOCATED. */ 3189 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3190 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3191 { 3192 TCGReg reg; 3193 3194 switch (ts->val_type) { 3195 case TEMP_VAL_REG: 3196 return; 3197 case TEMP_VAL_CONST: 3198 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3199 preferred_regs, ts->indirect_base); 3200 tcg_out_movi(s, ts->type, reg, ts->val); 3201 ts->mem_coherent = 0; 3202 break; 3203 case TEMP_VAL_MEM: 3204 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3205 preferred_regs, ts->indirect_base); 3206 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3207 ts->mem_coherent = 1; 3208 break; 3209 case TEMP_VAL_DEAD: 3210 default: 3211 tcg_abort(); 3212 } 3213 ts->reg = reg; 3214 ts->val_type = TEMP_VAL_REG; 3215 s->reg_to_temp[reg] = ts; 3216 } 3217 3218 /* Save a temporary to memory. 'allocated_regs' is used in case a 3219 temporary registers needs to be allocated to store a constant. */ 3220 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3221 { 3222 /* The liveness analysis already ensures that globals are back 3223 in memory. Keep an tcg_debug_assert for safety. */ 3224 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); 3225 } 3226 3227 /* save globals to their canonical location and assume they can be 3228 modified be the following code. 'allocated_regs' is used in case a 3229 temporary registers needs to be allocated to store a constant. */ 3230 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3231 { 3232 int i, n; 3233 3234 for (i = 0, n = s->nb_globals; i < n; i++) { 3235 temp_save(s, &s->temps[i], allocated_regs); 3236 } 3237 } 3238 3239 /* sync globals to their canonical location and assume they can be 3240 read by the following code. 'allocated_regs' is used in case a 3241 temporary registers needs to be allocated to store a constant. */ 3242 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3243 { 3244 int i, n; 3245 3246 for (i = 0, n = s->nb_globals; i < n; i++) { 3247 TCGTemp *ts = &s->temps[i]; 3248 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3249 || ts->fixed_reg 3250 || ts->mem_coherent); 3251 } 3252 } 3253 3254 /* at the end of a basic block, we assume all temporaries are dead and 3255 all globals are stored at their canonical location. */ 3256 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3257 { 3258 int i; 3259 3260 for (i = s->nb_globals; i < s->nb_temps; i++) { 3261 TCGTemp *ts = &s->temps[i]; 3262 if (ts->temp_local) { 3263 temp_save(s, ts, allocated_regs); 3264 } else { 3265 /* The liveness analysis already ensures that temps are dead. 3266 Keep an tcg_debug_assert for safety. */ 3267 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3268 } 3269 } 3270 3271 save_globals(s, allocated_regs); 3272 } 3273 3274 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3275 tcg_target_ulong val, TCGLifeData arg_life, 3276 TCGRegSet preferred_regs) 3277 { 3278 if (ots->fixed_reg) { 3279 /* For fixed registers, we do not do any constant propagation. */ 3280 tcg_out_movi(s, ots->type, ots->reg, val); 3281 return; 3282 } 3283 3284 /* The movi is not explicitly generated here. */ 3285 if (ots->val_type == TEMP_VAL_REG) { 3286 s->reg_to_temp[ots->reg] = NULL; 3287 } 3288 ots->val_type = TEMP_VAL_CONST; 3289 ots->val = val; 3290 ots->mem_coherent = 0; 3291 if (NEED_SYNC_ARG(0)) { 3292 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3293 } else if (IS_DEAD_ARG(0)) { 3294 temp_dead(s, ots); 3295 } 3296 } 3297 3298 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op) 3299 { 3300 TCGTemp *ots = arg_temp(op->args[0]); 3301 tcg_target_ulong val = op->args[1]; 3302 3303 tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]); 3304 } 3305 3306 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3307 { 3308 const TCGLifeData arg_life = op->life; 3309 TCGRegSet allocated_regs, preferred_regs; 3310 TCGTemp *ts, *ots; 3311 TCGType otype, itype; 3312 3313 allocated_regs = s->reserved_regs; 3314 preferred_regs = op->output_pref[0]; 3315 ots = arg_temp(op->args[0]); 3316 ts = arg_temp(op->args[1]); 3317 3318 /* Note that otype != itype for no-op truncation. */ 3319 otype = ots->type; 3320 itype = ts->type; 3321 3322 if (ts->val_type == TEMP_VAL_CONST) { 3323 /* propagate constant or generate sti */ 3324 tcg_target_ulong val = ts->val; 3325 if (IS_DEAD_ARG(1)) { 3326 temp_dead(s, ts); 3327 } 3328 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3329 return; 3330 } 3331 3332 /* If the source value is in memory we're going to be forced 3333 to have it in a register in order to perform the copy. Copy 3334 the SOURCE value into its own register first, that way we 3335 don't have to reload SOURCE the next time it is used. */ 3336 if (ts->val_type == TEMP_VAL_MEM) { 3337 temp_load(s, ts, tcg_target_available_regs[itype], 3338 allocated_regs, preferred_regs); 3339 } 3340 3341 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3342 if (IS_DEAD_ARG(0) && !ots->fixed_reg) { 3343 /* mov to a non-saved dead register makes no sense (even with 3344 liveness analysis disabled). */ 3345 tcg_debug_assert(NEED_SYNC_ARG(0)); 3346 if (!ots->mem_allocated) { 3347 temp_allocate_frame(s, ots); 3348 } 3349 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 3350 if (IS_DEAD_ARG(1)) { 3351 temp_dead(s, ts); 3352 } 3353 temp_dead(s, ots); 3354 } else { 3355 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { 3356 /* the mov can be suppressed */ 3357 if (ots->val_type == TEMP_VAL_REG) { 3358 s->reg_to_temp[ots->reg] = NULL; 3359 } 3360 ots->reg = ts->reg; 3361 temp_dead(s, ts); 3362 } else { 3363 if (ots->val_type != TEMP_VAL_REG) { 3364 /* When allocating a new register, make sure to not spill the 3365 input one. */ 3366 tcg_regset_set_reg(allocated_regs, ts->reg); 3367 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3368 allocated_regs, preferred_regs, 3369 ots->indirect_base); 3370 } 3371 tcg_out_mov(s, otype, ots->reg, ts->reg); 3372 } 3373 ots->val_type = TEMP_VAL_REG; 3374 ots->mem_coherent = 0; 3375 s->reg_to_temp[ots->reg] = ots; 3376 if (NEED_SYNC_ARG(0)) { 3377 temp_sync(s, ots, allocated_regs, 0, 0); 3378 } 3379 } 3380 } 3381 3382 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 3383 { 3384 const TCGLifeData arg_life = op->life; 3385 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 3386 TCGRegSet i_allocated_regs; 3387 TCGRegSet o_allocated_regs; 3388 int i, k, nb_iargs, nb_oargs; 3389 TCGReg reg; 3390 TCGArg arg; 3391 const TCGArgConstraint *arg_ct; 3392 TCGTemp *ts; 3393 TCGArg new_args[TCG_MAX_OP_ARGS]; 3394 int const_args[TCG_MAX_OP_ARGS]; 3395 3396 nb_oargs = def->nb_oargs; 3397 nb_iargs = def->nb_iargs; 3398 3399 /* copy constants */ 3400 memcpy(new_args + nb_oargs + nb_iargs, 3401 op->args + nb_oargs + nb_iargs, 3402 sizeof(TCGArg) * def->nb_cargs); 3403 3404 i_allocated_regs = s->reserved_regs; 3405 o_allocated_regs = s->reserved_regs; 3406 3407 /* satisfy input constraints */ 3408 for (k = 0; k < nb_iargs; k++) { 3409 TCGRegSet i_preferred_regs, o_preferred_regs; 3410 3411 i = def->sorted_args[nb_oargs + k]; 3412 arg = op->args[i]; 3413 arg_ct = &def->args_ct[i]; 3414 ts = arg_temp(arg); 3415 3416 if (ts->val_type == TEMP_VAL_CONST 3417 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 3418 /* constant is OK for instruction */ 3419 const_args[i] = 1; 3420 new_args[i] = ts->val; 3421 continue; 3422 } 3423 3424 i_preferred_regs = o_preferred_regs = 0; 3425 if (arg_ct->ct & TCG_CT_IALIAS) { 3426 o_preferred_regs = op->output_pref[arg_ct->alias_index]; 3427 if (ts->fixed_reg) { 3428 /* if fixed register, we must allocate a new register 3429 if the alias is not the same register */ 3430 if (arg != op->args[arg_ct->alias_index]) { 3431 goto allocate_in_reg; 3432 } 3433 } else { 3434 /* if the input is aliased to an output and if it is 3435 not dead after the instruction, we must allocate 3436 a new register and move it */ 3437 if (!IS_DEAD_ARG(i)) { 3438 goto allocate_in_reg; 3439 } 3440 3441 /* check if the current register has already been allocated 3442 for another input aliased to an output */ 3443 if (ts->val_type == TEMP_VAL_REG) { 3444 int k2, i2; 3445 reg = ts->reg; 3446 for (k2 = 0 ; k2 < k ; k2++) { 3447 i2 = def->sorted_args[nb_oargs + k2]; 3448 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && 3449 reg == new_args[i2]) { 3450 goto allocate_in_reg; 3451 } 3452 } 3453 } 3454 i_preferred_regs = o_preferred_regs; 3455 } 3456 } 3457 3458 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs); 3459 reg = ts->reg; 3460 3461 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { 3462 /* nothing to do : the constraint is satisfied */ 3463 } else { 3464 allocate_in_reg: 3465 /* allocate a new register matching the constraint 3466 and move the temporary register into it */ 3467 temp_load(s, ts, tcg_target_available_regs[ts->type], 3468 i_allocated_regs, 0); 3469 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, 3470 o_preferred_regs, ts->indirect_base); 3471 tcg_out_mov(s, ts->type, reg, ts->reg); 3472 } 3473 new_args[i] = reg; 3474 const_args[i] = 0; 3475 tcg_regset_set_reg(i_allocated_regs, reg); 3476 } 3477 3478 /* mark dead temporaries and free the associated registers */ 3479 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3480 if (IS_DEAD_ARG(i)) { 3481 temp_dead(s, arg_temp(op->args[i])); 3482 } 3483 } 3484 3485 if (def->flags & TCG_OPF_BB_END) { 3486 tcg_reg_alloc_bb_end(s, i_allocated_regs); 3487 } else { 3488 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3489 /* XXX: permit generic clobber register list ? */ 3490 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3491 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3492 tcg_reg_free(s, i, i_allocated_regs); 3493 } 3494 } 3495 } 3496 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3497 /* sync globals if the op has side effects and might trigger 3498 an exception. */ 3499 sync_globals(s, i_allocated_regs); 3500 } 3501 3502 /* satisfy the output constraints */ 3503 for(k = 0; k < nb_oargs; k++) { 3504 i = def->sorted_args[k]; 3505 arg = op->args[i]; 3506 arg_ct = &def->args_ct[i]; 3507 ts = arg_temp(arg); 3508 if ((arg_ct->ct & TCG_CT_ALIAS) 3509 && !const_args[arg_ct->alias_index]) { 3510 reg = new_args[arg_ct->alias_index]; 3511 } else if (arg_ct->ct & TCG_CT_NEWREG) { 3512 reg = tcg_reg_alloc(s, arg_ct->u.regs, 3513 i_allocated_regs | o_allocated_regs, 3514 op->output_pref[k], ts->indirect_base); 3515 } else { 3516 /* if fixed register, we try to use it */ 3517 reg = ts->reg; 3518 if (ts->fixed_reg && 3519 tcg_regset_test_reg(arg_ct->u.regs, reg)) { 3520 goto oarg_end; 3521 } 3522 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, 3523 op->output_pref[k], ts->indirect_base); 3524 } 3525 tcg_regset_set_reg(o_allocated_regs, reg); 3526 /* if a fixed register is used, then a move will be done afterwards */ 3527 if (!ts->fixed_reg) { 3528 if (ts->val_type == TEMP_VAL_REG) { 3529 s->reg_to_temp[ts->reg] = NULL; 3530 } 3531 ts->val_type = TEMP_VAL_REG; 3532 ts->reg = reg; 3533 /* temp value is modified, so the value kept in memory is 3534 potentially not the same */ 3535 ts->mem_coherent = 0; 3536 s->reg_to_temp[reg] = ts; 3537 } 3538 oarg_end: 3539 new_args[i] = reg; 3540 } 3541 } 3542 3543 /* emit instruction */ 3544 if (def->flags & TCG_OPF_VECTOR) { 3545 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 3546 new_args, const_args); 3547 } else { 3548 tcg_out_op(s, op->opc, new_args, const_args); 3549 } 3550 3551 /* move the outputs in the correct register if needed */ 3552 for(i = 0; i < nb_oargs; i++) { 3553 ts = arg_temp(op->args[i]); 3554 reg = new_args[i]; 3555 if (ts->fixed_reg && ts->reg != reg) { 3556 tcg_out_mov(s, ts->type, ts->reg, reg); 3557 } 3558 if (NEED_SYNC_ARG(i)) { 3559 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 3560 } else if (IS_DEAD_ARG(i)) { 3561 temp_dead(s, ts); 3562 } 3563 } 3564 } 3565 3566 #ifdef TCG_TARGET_STACK_GROWSUP 3567 #define STACK_DIR(x) (-(x)) 3568 #else 3569 #define STACK_DIR(x) (x) 3570 #endif 3571 3572 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 3573 { 3574 const int nb_oargs = TCGOP_CALLO(op); 3575 const int nb_iargs = TCGOP_CALLI(op); 3576 const TCGLifeData arg_life = op->life; 3577 int flags, nb_regs, i; 3578 TCGReg reg; 3579 TCGArg arg; 3580 TCGTemp *ts; 3581 intptr_t stack_offset; 3582 size_t call_stack_size; 3583 tcg_insn_unit *func_addr; 3584 int allocate_args; 3585 TCGRegSet allocated_regs; 3586 3587 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; 3588 flags = op->args[nb_oargs + nb_iargs + 1]; 3589 3590 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 3591 if (nb_regs > nb_iargs) { 3592 nb_regs = nb_iargs; 3593 } 3594 3595 /* assign stack slots first */ 3596 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 3597 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 3598 ~(TCG_TARGET_STACK_ALIGN - 1); 3599 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 3600 if (allocate_args) { 3601 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 3602 preallocate call stack */ 3603 tcg_abort(); 3604 } 3605 3606 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 3607 for (i = nb_regs; i < nb_iargs; i++) { 3608 arg = op->args[nb_oargs + i]; 3609 #ifdef TCG_TARGET_STACK_GROWSUP 3610 stack_offset -= sizeof(tcg_target_long); 3611 #endif 3612 if (arg != TCG_CALL_DUMMY_ARG) { 3613 ts = arg_temp(arg); 3614 temp_load(s, ts, tcg_target_available_regs[ts->type], 3615 s->reserved_regs, 0); 3616 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 3617 } 3618 #ifndef TCG_TARGET_STACK_GROWSUP 3619 stack_offset += sizeof(tcg_target_long); 3620 #endif 3621 } 3622 3623 /* assign input registers */ 3624 allocated_regs = s->reserved_regs; 3625 for (i = 0; i < nb_regs; i++) { 3626 arg = op->args[nb_oargs + i]; 3627 if (arg != TCG_CALL_DUMMY_ARG) { 3628 ts = arg_temp(arg); 3629 reg = tcg_target_call_iarg_regs[i]; 3630 3631 if (ts->val_type == TEMP_VAL_REG) { 3632 if (ts->reg != reg) { 3633 tcg_reg_free(s, reg, allocated_regs); 3634 tcg_out_mov(s, ts->type, reg, ts->reg); 3635 } 3636 } else { 3637 TCGRegSet arg_set = 0; 3638 3639 tcg_reg_free(s, reg, allocated_regs); 3640 tcg_regset_set_reg(arg_set, reg); 3641 temp_load(s, ts, arg_set, allocated_regs, 0); 3642 } 3643 3644 tcg_regset_set_reg(allocated_regs, reg); 3645 } 3646 } 3647 3648 /* mark dead temporaries and free the associated registers */ 3649 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3650 if (IS_DEAD_ARG(i)) { 3651 temp_dead(s, arg_temp(op->args[i])); 3652 } 3653 } 3654 3655 /* clobber call registers */ 3656 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3657 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3658 tcg_reg_free(s, i, allocated_regs); 3659 } 3660 } 3661 3662 /* Save globals if they might be written by the helper, sync them if 3663 they might be read. */ 3664 if (flags & TCG_CALL_NO_READ_GLOBALS) { 3665 /* Nothing to do */ 3666 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 3667 sync_globals(s, allocated_regs); 3668 } else { 3669 save_globals(s, allocated_regs); 3670 } 3671 3672 tcg_out_call(s, func_addr); 3673 3674 /* assign output registers and emit moves if needed */ 3675 for(i = 0; i < nb_oargs; i++) { 3676 arg = op->args[i]; 3677 ts = arg_temp(arg); 3678 reg = tcg_target_call_oarg_regs[i]; 3679 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3680 3681 if (ts->fixed_reg) { 3682 if (ts->reg != reg) { 3683 tcg_out_mov(s, ts->type, ts->reg, reg); 3684 } 3685 } else { 3686 if (ts->val_type == TEMP_VAL_REG) { 3687 s->reg_to_temp[ts->reg] = NULL; 3688 } 3689 ts->val_type = TEMP_VAL_REG; 3690 ts->reg = reg; 3691 ts->mem_coherent = 0; 3692 s->reg_to_temp[reg] = ts; 3693 if (NEED_SYNC_ARG(i)) { 3694 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); 3695 } else if (IS_DEAD_ARG(i)) { 3696 temp_dead(s, ts); 3697 } 3698 } 3699 } 3700 } 3701 3702 #ifdef CONFIG_PROFILER 3703 3704 /* avoid copy/paste errors */ 3705 #define PROF_ADD(to, from, field) \ 3706 do { \ 3707 (to)->field += atomic_read(&((from)->field)); \ 3708 } while (0) 3709 3710 #define PROF_MAX(to, from, field) \ 3711 do { \ 3712 typeof((from)->field) val__ = atomic_read(&((from)->field)); \ 3713 if (val__ > (to)->field) { \ 3714 (to)->field = val__; \ 3715 } \ 3716 } while (0) 3717 3718 /* Pass in a zero'ed @prof */ 3719 static inline 3720 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 3721 { 3722 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 3723 unsigned int i; 3724 3725 for (i = 0; i < n_ctxs; i++) { 3726 TCGContext *s = atomic_read(&tcg_ctxs[i]); 3727 const TCGProfile *orig = &s->prof; 3728 3729 if (counters) { 3730 PROF_ADD(prof, orig, cpu_exec_time); 3731 PROF_ADD(prof, orig, tb_count1); 3732 PROF_ADD(prof, orig, tb_count); 3733 PROF_ADD(prof, orig, op_count); 3734 PROF_MAX(prof, orig, op_count_max); 3735 PROF_ADD(prof, orig, temp_count); 3736 PROF_MAX(prof, orig, temp_count_max); 3737 PROF_ADD(prof, orig, del_op_count); 3738 PROF_ADD(prof, orig, code_in_len); 3739 PROF_ADD(prof, orig, code_out_len); 3740 PROF_ADD(prof, orig, search_out_len); 3741 PROF_ADD(prof, orig, interm_time); 3742 PROF_ADD(prof, orig, code_time); 3743 PROF_ADD(prof, orig, la_time); 3744 PROF_ADD(prof, orig, opt_time); 3745 PROF_ADD(prof, orig, restore_count); 3746 PROF_ADD(prof, orig, restore_time); 3747 } 3748 if (table) { 3749 int i; 3750 3751 for (i = 0; i < NB_OPS; i++) { 3752 PROF_ADD(prof, orig, table_op_count[i]); 3753 } 3754 } 3755 } 3756 } 3757 3758 #undef PROF_ADD 3759 #undef PROF_MAX 3760 3761 static void tcg_profile_snapshot_counters(TCGProfile *prof) 3762 { 3763 tcg_profile_snapshot(prof, true, false); 3764 } 3765 3766 static void tcg_profile_snapshot_table(TCGProfile *prof) 3767 { 3768 tcg_profile_snapshot(prof, false, true); 3769 } 3770 3771 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 3772 { 3773 TCGProfile prof = {}; 3774 int i; 3775 3776 tcg_profile_snapshot_table(&prof); 3777 for (i = 0; i < NB_OPS; i++) { 3778 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, 3779 prof.table_op_count[i]); 3780 } 3781 } 3782 3783 int64_t tcg_cpu_exec_time(void) 3784 { 3785 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 3786 unsigned int i; 3787 int64_t ret = 0; 3788 3789 for (i = 0; i < n_ctxs; i++) { 3790 const TCGContext *s = atomic_read(&tcg_ctxs[i]); 3791 const TCGProfile *prof = &s->prof; 3792 3793 ret += atomic_read(&prof->cpu_exec_time); 3794 } 3795 return ret; 3796 } 3797 #else 3798 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 3799 { 3800 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 3801 } 3802 3803 int64_t tcg_cpu_exec_time(void) 3804 { 3805 error_report("%s: TCG profiler not compiled", __func__); 3806 exit(EXIT_FAILURE); 3807 } 3808 #endif 3809 3810 3811 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 3812 { 3813 #ifdef CONFIG_PROFILER 3814 TCGProfile *prof = &s->prof; 3815 #endif 3816 int i, num_insns; 3817 TCGOp *op; 3818 3819 #ifdef CONFIG_PROFILER 3820 { 3821 int n = 0; 3822 3823 QTAILQ_FOREACH(op, &s->ops, link) { 3824 n++; 3825 } 3826 atomic_set(&prof->op_count, prof->op_count + n); 3827 if (n > prof->op_count_max) { 3828 atomic_set(&prof->op_count_max, n); 3829 } 3830 3831 n = s->nb_temps; 3832 atomic_set(&prof->temp_count, prof->temp_count + n); 3833 if (n > prof->temp_count_max) { 3834 atomic_set(&prof->temp_count_max, n); 3835 } 3836 } 3837 #endif 3838 3839 #ifdef DEBUG_DISAS 3840 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 3841 && qemu_log_in_addr_range(tb->pc))) { 3842 qemu_log_lock(); 3843 qemu_log("OP:\n"); 3844 tcg_dump_ops(s, false); 3845 qemu_log("\n"); 3846 qemu_log_unlock(); 3847 } 3848 #endif 3849 3850 #ifdef CONFIG_DEBUG_TCG 3851 /* Ensure all labels referenced have been emitted. */ 3852 { 3853 TCGLabel *l; 3854 bool error = false; 3855 3856 QSIMPLEQ_FOREACH(l, &s->labels, next) { 3857 if (unlikely(!l->present) && l->refs) { 3858 qemu_log_mask(CPU_LOG_TB_OP, 3859 "$L%d referenced but not present.\n", l->id); 3860 error = true; 3861 } 3862 } 3863 assert(!error); 3864 } 3865 #endif 3866 3867 #ifdef CONFIG_PROFILER 3868 atomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 3869 #endif 3870 3871 #ifdef USE_TCG_OPTIMIZATIONS 3872 tcg_optimize(s); 3873 #endif 3874 3875 #ifdef CONFIG_PROFILER 3876 atomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 3877 atomic_set(&prof->la_time, prof->la_time - profile_getclock()); 3878 #endif 3879 3880 reachable_code_pass(s); 3881 liveness_pass_1(s); 3882 3883 if (s->nb_indirects > 0) { 3884 #ifdef DEBUG_DISAS 3885 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 3886 && qemu_log_in_addr_range(tb->pc))) { 3887 qemu_log_lock(); 3888 qemu_log("OP before indirect lowering:\n"); 3889 tcg_dump_ops(s, false); 3890 qemu_log("\n"); 3891 qemu_log_unlock(); 3892 } 3893 #endif 3894 /* Replace indirect temps with direct temps. */ 3895 if (liveness_pass_2(s)) { 3896 /* If changes were made, re-run liveness. */ 3897 liveness_pass_1(s); 3898 } 3899 } 3900 3901 #ifdef CONFIG_PROFILER 3902 atomic_set(&prof->la_time, prof->la_time + profile_getclock()); 3903 #endif 3904 3905 #ifdef DEBUG_DISAS 3906 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 3907 && qemu_log_in_addr_range(tb->pc))) { 3908 qemu_log_lock(); 3909 qemu_log("OP after optimization and liveness analysis:\n"); 3910 tcg_dump_ops(s, true); 3911 qemu_log("\n"); 3912 qemu_log_unlock(); 3913 } 3914 #endif 3915 3916 tcg_reg_alloc_start(s); 3917 3918 s->code_buf = tb->tc.ptr; 3919 s->code_ptr = tb->tc.ptr; 3920 3921 #ifdef TCG_TARGET_NEED_LDST_LABELS 3922 QSIMPLEQ_INIT(&s->ldst_labels); 3923 #endif 3924 #ifdef TCG_TARGET_NEED_POOL_LABELS 3925 s->pool_labels = NULL; 3926 #endif 3927 3928 num_insns = -1; 3929 QTAILQ_FOREACH(op, &s->ops, link) { 3930 TCGOpcode opc = op->opc; 3931 3932 #ifdef CONFIG_PROFILER 3933 atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 3934 #endif 3935 3936 switch (opc) { 3937 case INDEX_op_mov_i32: 3938 case INDEX_op_mov_i64: 3939 case INDEX_op_mov_vec: 3940 tcg_reg_alloc_mov(s, op); 3941 break; 3942 case INDEX_op_movi_i32: 3943 case INDEX_op_movi_i64: 3944 case INDEX_op_dupi_vec: 3945 tcg_reg_alloc_movi(s, op); 3946 break; 3947 case INDEX_op_insn_start: 3948 if (num_insns >= 0) { 3949 size_t off = tcg_current_code_size(s); 3950 s->gen_insn_end_off[num_insns] = off; 3951 /* Assert that we do not overflow our stored offset. */ 3952 assert(s->gen_insn_end_off[num_insns] == off); 3953 } 3954 num_insns++; 3955 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 3956 target_ulong a; 3957 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 3958 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 3959 #else 3960 a = op->args[i]; 3961 #endif 3962 s->gen_insn_data[num_insns][i] = a; 3963 } 3964 break; 3965 case INDEX_op_discard: 3966 temp_dead(s, arg_temp(op->args[0])); 3967 break; 3968 case INDEX_op_set_label: 3969 tcg_reg_alloc_bb_end(s, s->reserved_regs); 3970 tcg_out_label(s, arg_label(op->args[0]), s->code_ptr); 3971 break; 3972 case INDEX_op_call: 3973 tcg_reg_alloc_call(s, op); 3974 break; 3975 default: 3976 /* Sanity check that we've not introduced any unhandled opcodes. */ 3977 tcg_debug_assert(tcg_op_supported(opc)); 3978 /* Note: in order to speed up the code, it would be much 3979 faster to have specialized register allocator functions for 3980 some common argument patterns */ 3981 tcg_reg_alloc_op(s, op); 3982 break; 3983 } 3984 #ifdef CONFIG_DEBUG_TCG 3985 check_regs(s); 3986 #endif 3987 /* Test for (pending) buffer overflow. The assumption is that any 3988 one operation beginning below the high water mark cannot overrun 3989 the buffer completely. Thus we can test for overflow after 3990 generating code without having to check during generation. */ 3991 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 3992 return -1; 3993 } 3994 } 3995 tcg_debug_assert(num_insns >= 0); 3996 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 3997 3998 /* Generate TB finalization at the end of block */ 3999 #ifdef TCG_TARGET_NEED_LDST_LABELS 4000 if (!tcg_out_ldst_finalize(s)) { 4001 return -1; 4002 } 4003 #endif 4004 #ifdef TCG_TARGET_NEED_POOL_LABELS 4005 if (!tcg_out_pool_finalize(s)) { 4006 return -1; 4007 } 4008 #endif 4009 4010 /* flush instruction cache */ 4011 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); 4012 4013 return tcg_current_code_size(s); 4014 } 4015 4016 #ifdef CONFIG_PROFILER 4017 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 4018 { 4019 TCGProfile prof = {}; 4020 const TCGProfile *s; 4021 int64_t tb_count; 4022 int64_t tb_div_count; 4023 int64_t tot; 4024 4025 tcg_profile_snapshot_counters(&prof); 4026 s = &prof; 4027 tb_count = s->tb_count; 4028 tb_div_count = tb_count ? tb_count : 1; 4029 tot = s->interm_time + s->code_time; 4030 4031 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 4032 tot, tot / 2.4e9); 4033 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 4034 tb_count, s->tb_count1 - tb_count, 4035 (double)(s->tb_count1 - s->tb_count) 4036 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 4037 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", 4038 (double)s->op_count / tb_div_count, s->op_count_max); 4039 cpu_fprintf(f, "deleted ops/TB %0.2f\n", 4040 (double)s->del_op_count / tb_div_count); 4041 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", 4042 (double)s->temp_count / tb_div_count, s->temp_count_max); 4043 cpu_fprintf(f, "avg host code/TB %0.1f\n", 4044 (double)s->code_out_len / tb_div_count); 4045 cpu_fprintf(f, "avg search data/TB %0.1f\n", 4046 (double)s->search_out_len / tb_div_count); 4047 4048 cpu_fprintf(f, "cycles/op %0.1f\n", 4049 s->op_count ? (double)tot / s->op_count : 0); 4050 cpu_fprintf(f, "cycles/in byte %0.1f\n", 4051 s->code_in_len ? (double)tot / s->code_in_len : 0); 4052 cpu_fprintf(f, "cycles/out byte %0.1f\n", 4053 s->code_out_len ? (double)tot / s->code_out_len : 0); 4054 cpu_fprintf(f, "cycles/search byte %0.1f\n", 4055 s->search_out_len ? (double)tot / s->search_out_len : 0); 4056 if (tot == 0) { 4057 tot = 1; 4058 } 4059 cpu_fprintf(f, " gen_interm time %0.1f%%\n", 4060 (double)s->interm_time / tot * 100.0); 4061 cpu_fprintf(f, " gen_code time %0.1f%%\n", 4062 (double)s->code_time / tot * 100.0); 4063 cpu_fprintf(f, "optim./code time %0.1f%%\n", 4064 (double)s->opt_time / (s->code_time ? s->code_time : 1) 4065 * 100.0); 4066 cpu_fprintf(f, "liveness/code time %0.1f%%\n", 4067 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 4068 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", 4069 s->restore_count); 4070 cpu_fprintf(f, " avg cycles %0.1f\n", 4071 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 4072 } 4073 #else 4074 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 4075 { 4076 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 4077 } 4078 #endif 4079 4080 #ifdef ELF_HOST_MACHINE 4081 /* In order to use this feature, the backend needs to do three things: 4082 4083 (1) Define ELF_HOST_MACHINE to indicate both what value to 4084 put into the ELF image and to indicate support for the feature. 4085 4086 (2) Define tcg_register_jit. This should create a buffer containing 4087 the contents of a .debug_frame section that describes the post- 4088 prologue unwind info for the tcg machine. 4089 4090 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 4091 */ 4092 4093 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 4094 typedef enum { 4095 JIT_NOACTION = 0, 4096 JIT_REGISTER_FN, 4097 JIT_UNREGISTER_FN 4098 } jit_actions_t; 4099 4100 struct jit_code_entry { 4101 struct jit_code_entry *next_entry; 4102 struct jit_code_entry *prev_entry; 4103 const void *symfile_addr; 4104 uint64_t symfile_size; 4105 }; 4106 4107 struct jit_descriptor { 4108 uint32_t version; 4109 uint32_t action_flag; 4110 struct jit_code_entry *relevant_entry; 4111 struct jit_code_entry *first_entry; 4112 }; 4113 4114 void __jit_debug_register_code(void) __attribute__((noinline)); 4115 void __jit_debug_register_code(void) 4116 { 4117 asm(""); 4118 } 4119 4120 /* Must statically initialize the version, because GDB may check 4121 the version before we can set it. */ 4122 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 4123 4124 /* End GDB interface. */ 4125 4126 static int find_string(const char *strtab, const char *str) 4127 { 4128 const char *p = strtab + 1; 4129 4130 while (1) { 4131 if (strcmp(p, str) == 0) { 4132 return p - strtab; 4133 } 4134 p += strlen(p) + 1; 4135 } 4136 } 4137 4138 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, 4139 const void *debug_frame, 4140 size_t debug_frame_size) 4141 { 4142 struct __attribute__((packed)) DebugInfo { 4143 uint32_t len; 4144 uint16_t version; 4145 uint32_t abbrev; 4146 uint8_t ptr_size; 4147 uint8_t cu_die; 4148 uint16_t cu_lang; 4149 uintptr_t cu_low_pc; 4150 uintptr_t cu_high_pc; 4151 uint8_t fn_die; 4152 char fn_name[16]; 4153 uintptr_t fn_low_pc; 4154 uintptr_t fn_high_pc; 4155 uint8_t cu_eoc; 4156 }; 4157 4158 struct ElfImage { 4159 ElfW(Ehdr) ehdr; 4160 ElfW(Phdr) phdr; 4161 ElfW(Shdr) shdr[7]; 4162 ElfW(Sym) sym[2]; 4163 struct DebugInfo di; 4164 uint8_t da[24]; 4165 char str[80]; 4166 }; 4167 4168 struct ElfImage *img; 4169 4170 static const struct ElfImage img_template = { 4171 .ehdr = { 4172 .e_ident[EI_MAG0] = ELFMAG0, 4173 .e_ident[EI_MAG1] = ELFMAG1, 4174 .e_ident[EI_MAG2] = ELFMAG2, 4175 .e_ident[EI_MAG3] = ELFMAG3, 4176 .e_ident[EI_CLASS] = ELF_CLASS, 4177 .e_ident[EI_DATA] = ELF_DATA, 4178 .e_ident[EI_VERSION] = EV_CURRENT, 4179 .e_type = ET_EXEC, 4180 .e_machine = ELF_HOST_MACHINE, 4181 .e_version = EV_CURRENT, 4182 .e_phoff = offsetof(struct ElfImage, phdr), 4183 .e_shoff = offsetof(struct ElfImage, shdr), 4184 .e_ehsize = sizeof(ElfW(Shdr)), 4185 .e_phentsize = sizeof(ElfW(Phdr)), 4186 .e_phnum = 1, 4187 .e_shentsize = sizeof(ElfW(Shdr)), 4188 .e_shnum = ARRAY_SIZE(img->shdr), 4189 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 4190 #ifdef ELF_HOST_FLAGS 4191 .e_flags = ELF_HOST_FLAGS, 4192 #endif 4193 #ifdef ELF_OSABI 4194 .e_ident[EI_OSABI] = ELF_OSABI, 4195 #endif 4196 }, 4197 .phdr = { 4198 .p_type = PT_LOAD, 4199 .p_flags = PF_X, 4200 }, 4201 .shdr = { 4202 [0] = { .sh_type = SHT_NULL }, 4203 /* Trick: The contents of code_gen_buffer are not present in 4204 this fake ELF file; that got allocated elsewhere. Therefore 4205 we mark .text as SHT_NOBITS (similar to .bss) so that readers 4206 will not look for contents. We can record any address. */ 4207 [1] = { /* .text */ 4208 .sh_type = SHT_NOBITS, 4209 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 4210 }, 4211 [2] = { /* .debug_info */ 4212 .sh_type = SHT_PROGBITS, 4213 .sh_offset = offsetof(struct ElfImage, di), 4214 .sh_size = sizeof(struct DebugInfo), 4215 }, 4216 [3] = { /* .debug_abbrev */ 4217 .sh_type = SHT_PROGBITS, 4218 .sh_offset = offsetof(struct ElfImage, da), 4219 .sh_size = sizeof(img->da), 4220 }, 4221 [4] = { /* .debug_frame */ 4222 .sh_type = SHT_PROGBITS, 4223 .sh_offset = sizeof(struct ElfImage), 4224 }, 4225 [5] = { /* .symtab */ 4226 .sh_type = SHT_SYMTAB, 4227 .sh_offset = offsetof(struct ElfImage, sym), 4228 .sh_size = sizeof(img->sym), 4229 .sh_info = 1, 4230 .sh_link = ARRAY_SIZE(img->shdr) - 1, 4231 .sh_entsize = sizeof(ElfW(Sym)), 4232 }, 4233 [6] = { /* .strtab */ 4234 .sh_type = SHT_STRTAB, 4235 .sh_offset = offsetof(struct ElfImage, str), 4236 .sh_size = sizeof(img->str), 4237 } 4238 }, 4239 .sym = { 4240 [1] = { /* code_gen_buffer */ 4241 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 4242 .st_shndx = 1, 4243 } 4244 }, 4245 .di = { 4246 .len = sizeof(struct DebugInfo) - 4, 4247 .version = 2, 4248 .ptr_size = sizeof(void *), 4249 .cu_die = 1, 4250 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 4251 .fn_die = 2, 4252 .fn_name = "code_gen_buffer" 4253 }, 4254 .da = { 4255 1, /* abbrev number (the cu) */ 4256 0x11, 1, /* DW_TAG_compile_unit, has children */ 4257 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 4258 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4259 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4260 0, 0, /* end of abbrev */ 4261 2, /* abbrev number (the fn) */ 4262 0x2e, 0, /* DW_TAG_subprogram, no children */ 4263 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 4264 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4265 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4266 0, 0, /* end of abbrev */ 4267 0 /* no more abbrev */ 4268 }, 4269 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 4270 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 4271 }; 4272 4273 /* We only need a single jit entry; statically allocate it. */ 4274 static struct jit_code_entry one_entry; 4275 4276 uintptr_t buf = (uintptr_t)buf_ptr; 4277 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 4278 DebugFrameHeader *dfh; 4279 4280 img = g_malloc(img_size); 4281 *img = img_template; 4282 4283 img->phdr.p_vaddr = buf; 4284 img->phdr.p_paddr = buf; 4285 img->phdr.p_memsz = buf_size; 4286 4287 img->shdr[1].sh_name = find_string(img->str, ".text"); 4288 img->shdr[1].sh_addr = buf; 4289 img->shdr[1].sh_size = buf_size; 4290 4291 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 4292 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 4293 4294 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 4295 img->shdr[4].sh_size = debug_frame_size; 4296 4297 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 4298 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 4299 4300 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 4301 img->sym[1].st_value = buf; 4302 img->sym[1].st_size = buf_size; 4303 4304 img->di.cu_low_pc = buf; 4305 img->di.cu_high_pc = buf + buf_size; 4306 img->di.fn_low_pc = buf; 4307 img->di.fn_high_pc = buf + buf_size; 4308 4309 dfh = (DebugFrameHeader *)(img + 1); 4310 memcpy(dfh, debug_frame, debug_frame_size); 4311 dfh->fde.func_start = buf; 4312 dfh->fde.func_len = buf_size; 4313 4314 #ifdef DEBUG_JIT 4315 /* Enable this block to be able to debug the ELF image file creation. 4316 One can use readelf, objdump, or other inspection utilities. */ 4317 { 4318 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 4319 if (f) { 4320 if (fwrite(img, img_size, 1, f) != img_size) { 4321 /* Avoid stupid unused return value warning for fwrite. */ 4322 } 4323 fclose(f); 4324 } 4325 } 4326 #endif 4327 4328 one_entry.symfile_addr = img; 4329 one_entry.symfile_size = img_size; 4330 4331 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 4332 __jit_debug_descriptor.relevant_entry = &one_entry; 4333 __jit_debug_descriptor.first_entry = &one_entry; 4334 __jit_debug_register_code(); 4335 } 4336 #else 4337 /* No support for the feature. Provide the entry point expected by exec.c, 4338 and implement the internal function we declared earlier. */ 4339 4340 static void tcg_register_jit_int(void *buf, size_t size, 4341 const void *debug_frame, 4342 size_t debug_frame_size) 4343 { 4344 } 4345 4346 void tcg_register_jit(void *buf, size_t buf_size) 4347 { 4348 } 4349 #endif /* ELF_HOST_MACHINE */ 4350 4351 #if !TCG_TARGET_MAYBE_vec 4352 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 4353 { 4354 g_assert_not_reached(); 4355 } 4356 #endif 4357