1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/timer.h" 38 39 /* Note: the long term plan is to reduce the dependencies on the QEMU 40 CPU definitions. Currently they are used for qemu_ld/st 41 instructions */ 42 #define NO_CPU_IO_DEFS 43 #include "cpu.h" 44 45 #include "exec/cpu-common.h" 46 #include "exec/exec-all.h" 47 48 #include "tcg-op.h" 49 50 #if UINTPTR_MAX == UINT32_MAX 51 # define ELF_CLASS ELFCLASS32 52 #else 53 # define ELF_CLASS ELFCLASS64 54 #endif 55 #ifdef HOST_WORDS_BIGENDIAN 56 # define ELF_DATA ELFDATA2MSB 57 #else 58 # define ELF_DATA ELFDATA2LSB 59 #endif 60 61 #include "elf.h" 62 #include "exec/log.h" 63 #include "sysemu/sysemu.h" 64 65 /* Forward declarations for functions declared in tcg-target.inc.c and 66 used here. */ 67 static void tcg_target_init(TCGContext *s); 68 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); 69 static void tcg_target_qemu_prologue(TCGContext *s); 70 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 71 intptr_t value, intptr_t addend); 72 73 /* The CIE and FDE header definitions will be common to all hosts. */ 74 typedef struct { 75 uint32_t len __attribute__((aligned((sizeof(void *))))); 76 uint32_t id; 77 uint8_t version; 78 char augmentation[1]; 79 uint8_t code_align; 80 uint8_t data_align; 81 uint8_t return_column; 82 } DebugFrameCIE; 83 84 typedef struct QEMU_PACKED { 85 uint32_t len __attribute__((aligned((sizeof(void *))))); 86 uint32_t cie_offset; 87 uintptr_t func_start; 88 uintptr_t func_len; 89 } DebugFrameFDEHeader; 90 91 typedef struct QEMU_PACKED { 92 DebugFrameCIE cie; 93 DebugFrameFDEHeader fde; 94 } DebugFrameHeader; 95 96 static void tcg_register_jit_int(void *buf, size_t size, 97 const void *debug_frame, 98 size_t debug_frame_size) 99 __attribute__((unused)); 100 101 /* Forward declarations for functions declared and used in tcg-target.inc.c. */ 102 static const char *target_parse_constraint(TCGArgConstraint *ct, 103 const char *ct_str, TCGType type); 104 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 105 intptr_t arg2); 106 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 107 static void tcg_out_movi(TCGContext *s, TCGType type, 108 TCGReg ret, tcg_target_long arg); 109 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 110 const int *const_args); 111 #if TCG_TARGET_MAYBE_vec 112 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 113 unsigned vece, const TCGArg *args, 114 const int *const_args); 115 #else 116 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 117 unsigned vece, const TCGArg *args, 118 const int *const_args) 119 { 120 g_assert_not_reached(); 121 } 122 #endif 123 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 124 intptr_t arg2); 125 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 126 TCGReg base, intptr_t ofs); 127 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); 128 static int tcg_target_const_match(tcg_target_long val, TCGType type, 129 const TCGArgConstraint *arg_ct); 130 #ifdef TCG_TARGET_NEED_LDST_LABELS 131 static bool tcg_out_ldst_finalize(TCGContext *s); 132 #endif 133 134 #define TCG_HIGHWATER 1024 135 136 static TCGContext **tcg_ctxs; 137 static unsigned int n_tcg_ctxs; 138 TCGv_env cpu_env = 0; 139 140 struct tcg_region_tree { 141 QemuMutex lock; 142 GTree *tree; 143 /* padding to avoid false sharing is computed at run-time */ 144 }; 145 146 /* 147 * We divide code_gen_buffer into equally-sized "regions" that TCG threads 148 * dynamically allocate from as demand dictates. Given appropriate region 149 * sizing, this minimizes flushes even when some TCG threads generate a lot 150 * more code than others. 151 */ 152 struct tcg_region_state { 153 QemuMutex lock; 154 155 /* fields set at init time */ 156 void *start; 157 void *start_aligned; 158 void *end; 159 size_t n; 160 size_t size; /* size of one region */ 161 size_t stride; /* .size + guard size */ 162 163 /* fields protected by the lock */ 164 size_t current; /* current region index */ 165 size_t agg_size_full; /* aggregate size of full regions */ 166 }; 167 168 static struct tcg_region_state region; 169 /* 170 * This is an array of struct tcg_region_tree's, with padding. 171 * We use void * to simplify the computation of region_trees[i]; each 172 * struct is found every tree_size bytes. 173 */ 174 static void *region_trees; 175 static size_t tree_size; 176 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 177 static TCGRegSet tcg_target_call_clobber_regs; 178 179 #if TCG_TARGET_INSN_UNIT_SIZE == 1 180 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 181 { 182 *s->code_ptr++ = v; 183 } 184 185 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 186 uint8_t v) 187 { 188 *p = v; 189 } 190 #endif 191 192 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 193 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 194 { 195 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 196 *s->code_ptr++ = v; 197 } else { 198 tcg_insn_unit *p = s->code_ptr; 199 memcpy(p, &v, sizeof(v)); 200 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 201 } 202 } 203 204 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 205 uint16_t v) 206 { 207 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 208 *p = v; 209 } else { 210 memcpy(p, &v, sizeof(v)); 211 } 212 } 213 #endif 214 215 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 216 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 217 { 218 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 219 *s->code_ptr++ = v; 220 } else { 221 tcg_insn_unit *p = s->code_ptr; 222 memcpy(p, &v, sizeof(v)); 223 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 224 } 225 } 226 227 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 228 uint32_t v) 229 { 230 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 231 *p = v; 232 } else { 233 memcpy(p, &v, sizeof(v)); 234 } 235 } 236 #endif 237 238 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 239 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 240 { 241 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 242 *s->code_ptr++ = v; 243 } else { 244 tcg_insn_unit *p = s->code_ptr; 245 memcpy(p, &v, sizeof(v)); 246 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 247 } 248 } 249 250 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 251 uint64_t v) 252 { 253 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 254 *p = v; 255 } else { 256 memcpy(p, &v, sizeof(v)); 257 } 258 } 259 #endif 260 261 /* label relocation processing */ 262 263 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 264 TCGLabel *l, intptr_t addend) 265 { 266 TCGRelocation *r; 267 268 if (l->has_value) { 269 /* FIXME: This may break relocations on RISC targets that 270 modify instruction fields in place. The caller may not have 271 written the initial value. */ 272 bool ok = patch_reloc(code_ptr, type, l->u.value, addend); 273 tcg_debug_assert(ok); 274 } else { 275 /* add a new relocation entry */ 276 r = tcg_malloc(sizeof(TCGRelocation)); 277 r->type = type; 278 r->ptr = code_ptr; 279 r->addend = addend; 280 r->next = l->u.first_reloc; 281 l->u.first_reloc = r; 282 } 283 } 284 285 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) 286 { 287 intptr_t value = (intptr_t)ptr; 288 TCGRelocation *r; 289 290 tcg_debug_assert(!l->has_value); 291 292 for (r = l->u.first_reloc; r != NULL; r = r->next) { 293 bool ok = patch_reloc(r->ptr, r->type, value, r->addend); 294 tcg_debug_assert(ok); 295 } 296 297 l->has_value = 1; 298 l->u.value_ptr = ptr; 299 } 300 301 TCGLabel *gen_new_label(void) 302 { 303 TCGContext *s = tcg_ctx; 304 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 305 306 *l = (TCGLabel){ 307 .id = s->nb_labels++ 308 }; 309 #ifdef CONFIG_DEBUG_TCG 310 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 311 #endif 312 313 return l; 314 } 315 316 static void set_jmp_reset_offset(TCGContext *s, int which) 317 { 318 size_t off = tcg_current_code_size(s); 319 s->tb_jmp_reset_offset[which] = off; 320 /* Make sure that we didn't overflow the stored offset. */ 321 assert(s->tb_jmp_reset_offset[which] == off); 322 } 323 324 #include "tcg-target.inc.c" 325 326 /* compare a pointer @ptr and a tb_tc @s */ 327 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s) 328 { 329 if (ptr >= s->ptr + s->size) { 330 return 1; 331 } else if (ptr < s->ptr) { 332 return -1; 333 } 334 return 0; 335 } 336 337 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp) 338 { 339 const struct tb_tc *a = ap; 340 const struct tb_tc *b = bp; 341 342 /* 343 * When both sizes are set, we know this isn't a lookup. 344 * This is the most likely case: every TB must be inserted; lookups 345 * are a lot less frequent. 346 */ 347 if (likely(a->size && b->size)) { 348 if (a->ptr > b->ptr) { 349 return 1; 350 } else if (a->ptr < b->ptr) { 351 return -1; 352 } 353 /* a->ptr == b->ptr should happen only on deletions */ 354 g_assert(a->size == b->size); 355 return 0; 356 } 357 /* 358 * All lookups have either .size field set to 0. 359 * From the glib sources we see that @ap is always the lookup key. However 360 * the docs provide no guarantee, so we just mark this case as likely. 361 */ 362 if (likely(a->size == 0)) { 363 return ptr_cmp_tb_tc(a->ptr, b); 364 } 365 return ptr_cmp_tb_tc(b->ptr, a); 366 } 367 368 static void tcg_region_trees_init(void) 369 { 370 size_t i; 371 372 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize); 373 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size); 374 for (i = 0; i < region.n; i++) { 375 struct tcg_region_tree *rt = region_trees + i * tree_size; 376 377 qemu_mutex_init(&rt->lock); 378 rt->tree = g_tree_new(tb_tc_cmp); 379 } 380 } 381 382 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p) 383 { 384 size_t region_idx; 385 386 if (p < region.start_aligned) { 387 region_idx = 0; 388 } else { 389 ptrdiff_t offset = p - region.start_aligned; 390 391 if (offset > region.stride * (region.n - 1)) { 392 region_idx = region.n - 1; 393 } else { 394 region_idx = offset / region.stride; 395 } 396 } 397 return region_trees + region_idx * tree_size; 398 } 399 400 void tcg_tb_insert(TranslationBlock *tb) 401 { 402 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 403 404 qemu_mutex_lock(&rt->lock); 405 g_tree_insert(rt->tree, &tb->tc, tb); 406 qemu_mutex_unlock(&rt->lock); 407 } 408 409 void tcg_tb_remove(TranslationBlock *tb) 410 { 411 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 412 413 qemu_mutex_lock(&rt->lock); 414 g_tree_remove(rt->tree, &tb->tc); 415 qemu_mutex_unlock(&rt->lock); 416 } 417 418 /* 419 * Find the TB 'tb' such that 420 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size 421 * Return NULL if not found. 422 */ 423 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) 424 { 425 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr); 426 TranslationBlock *tb; 427 struct tb_tc s = { .ptr = (void *)tc_ptr }; 428 429 qemu_mutex_lock(&rt->lock); 430 tb = g_tree_lookup(rt->tree, &s); 431 qemu_mutex_unlock(&rt->lock); 432 return tb; 433 } 434 435 static void tcg_region_tree_lock_all(void) 436 { 437 size_t i; 438 439 for (i = 0; i < region.n; i++) { 440 struct tcg_region_tree *rt = region_trees + i * tree_size; 441 442 qemu_mutex_lock(&rt->lock); 443 } 444 } 445 446 static void tcg_region_tree_unlock_all(void) 447 { 448 size_t i; 449 450 for (i = 0; i < region.n; i++) { 451 struct tcg_region_tree *rt = region_trees + i * tree_size; 452 453 qemu_mutex_unlock(&rt->lock); 454 } 455 } 456 457 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data) 458 { 459 size_t i; 460 461 tcg_region_tree_lock_all(); 462 for (i = 0; i < region.n; i++) { 463 struct tcg_region_tree *rt = region_trees + i * tree_size; 464 465 g_tree_foreach(rt->tree, func, user_data); 466 } 467 tcg_region_tree_unlock_all(); 468 } 469 470 size_t tcg_nb_tbs(void) 471 { 472 size_t nb_tbs = 0; 473 size_t i; 474 475 tcg_region_tree_lock_all(); 476 for (i = 0; i < region.n; i++) { 477 struct tcg_region_tree *rt = region_trees + i * tree_size; 478 479 nb_tbs += g_tree_nnodes(rt->tree); 480 } 481 tcg_region_tree_unlock_all(); 482 return nb_tbs; 483 } 484 485 static void tcg_region_tree_reset_all(void) 486 { 487 size_t i; 488 489 tcg_region_tree_lock_all(); 490 for (i = 0; i < region.n; i++) { 491 struct tcg_region_tree *rt = region_trees + i * tree_size; 492 493 /* Increment the refcount first so that destroy acts as a reset */ 494 g_tree_ref(rt->tree); 495 g_tree_destroy(rt->tree); 496 } 497 tcg_region_tree_unlock_all(); 498 } 499 500 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 501 { 502 void *start, *end; 503 504 start = region.start_aligned + curr_region * region.stride; 505 end = start + region.size; 506 507 if (curr_region == 0) { 508 start = region.start; 509 } 510 if (curr_region == region.n - 1) { 511 end = region.end; 512 } 513 514 *pstart = start; 515 *pend = end; 516 } 517 518 static void tcg_region_assign(TCGContext *s, size_t curr_region) 519 { 520 void *start, *end; 521 522 tcg_region_bounds(curr_region, &start, &end); 523 524 s->code_gen_buffer = start; 525 s->code_gen_ptr = start; 526 s->code_gen_buffer_size = end - start; 527 s->code_gen_highwater = end - TCG_HIGHWATER; 528 } 529 530 static bool tcg_region_alloc__locked(TCGContext *s) 531 { 532 if (region.current == region.n) { 533 return true; 534 } 535 tcg_region_assign(s, region.current); 536 region.current++; 537 return false; 538 } 539 540 /* 541 * Request a new region once the one in use has filled up. 542 * Returns true on error. 543 */ 544 static bool tcg_region_alloc(TCGContext *s) 545 { 546 bool err; 547 /* read the region size now; alloc__locked will overwrite it on success */ 548 size_t size_full = s->code_gen_buffer_size; 549 550 qemu_mutex_lock(®ion.lock); 551 err = tcg_region_alloc__locked(s); 552 if (!err) { 553 region.agg_size_full += size_full - TCG_HIGHWATER; 554 } 555 qemu_mutex_unlock(®ion.lock); 556 return err; 557 } 558 559 /* 560 * Perform a context's first region allocation. 561 * This function does _not_ increment region.agg_size_full. 562 */ 563 static inline bool tcg_region_initial_alloc__locked(TCGContext *s) 564 { 565 return tcg_region_alloc__locked(s); 566 } 567 568 /* Call from a safe-work context */ 569 void tcg_region_reset_all(void) 570 { 571 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 572 unsigned int i; 573 574 qemu_mutex_lock(®ion.lock); 575 region.current = 0; 576 region.agg_size_full = 0; 577 578 for (i = 0; i < n_ctxs; i++) { 579 TCGContext *s = atomic_read(&tcg_ctxs[i]); 580 bool err = tcg_region_initial_alloc__locked(s); 581 582 g_assert(!err); 583 } 584 qemu_mutex_unlock(®ion.lock); 585 586 tcg_region_tree_reset_all(); 587 } 588 589 #ifdef CONFIG_USER_ONLY 590 static size_t tcg_n_regions(void) 591 { 592 return 1; 593 } 594 #else 595 /* 596 * It is likely that some vCPUs will translate more code than others, so we 597 * first try to set more regions than max_cpus, with those regions being of 598 * reasonable size. If that's not possible we make do by evenly dividing 599 * the code_gen_buffer among the vCPUs. 600 */ 601 static size_t tcg_n_regions(void) 602 { 603 size_t i; 604 605 /* Use a single region if all we have is one vCPU thread */ 606 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 607 return 1; 608 } 609 610 /* Try to have more regions than max_cpus, with each region being >= 2 MB */ 611 for (i = 8; i > 0; i--) { 612 size_t regions_per_thread = i; 613 size_t region_size; 614 615 region_size = tcg_init_ctx.code_gen_buffer_size; 616 region_size /= max_cpus * regions_per_thread; 617 618 if (region_size >= 2 * 1024u * 1024) { 619 return max_cpus * regions_per_thread; 620 } 621 } 622 /* If we can't, then just allocate one region per vCPU thread */ 623 return max_cpus; 624 } 625 #endif 626 627 /* 628 * Initializes region partitioning. 629 * 630 * Called at init time from the parent thread (i.e. the one calling 631 * tcg_context_init), after the target's TCG globals have been set. 632 * 633 * Region partitioning works by splitting code_gen_buffer into separate regions, 634 * and then assigning regions to TCG threads so that the threads can translate 635 * code in parallel without synchronization. 636 * 637 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 638 * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 639 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 640 * must have been parsed before calling this function, since it calls 641 * qemu_tcg_mttcg_enabled(). 642 * 643 * In user-mode we use a single region. Having multiple regions in user-mode 644 * is not supported, because the number of vCPU threads (recall that each thread 645 * spawned by the guest corresponds to a vCPU thread) is only bounded by the 646 * OS, and usually this number is huge (tens of thousands is not uncommon). 647 * Thus, given this large bound on the number of vCPU threads and the fact 648 * that code_gen_buffer is allocated at compile-time, we cannot guarantee 649 * that the availability of at least one region per vCPU thread. 650 * 651 * However, this user-mode limitation is unlikely to be a significant problem 652 * in practice. Multi-threaded guests share most if not all of their translated 653 * code, which makes parallel code generation less appealing than in softmmu. 654 */ 655 void tcg_region_init(void) 656 { 657 void *buf = tcg_init_ctx.code_gen_buffer; 658 void *aligned; 659 size_t size = tcg_init_ctx.code_gen_buffer_size; 660 size_t page_size = qemu_real_host_page_size; 661 size_t region_size; 662 size_t n_regions; 663 size_t i; 664 665 n_regions = tcg_n_regions(); 666 667 /* The first region will be 'aligned - buf' bytes larger than the others */ 668 aligned = QEMU_ALIGN_PTR_UP(buf, page_size); 669 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); 670 /* 671 * Make region_size a multiple of page_size, using aligned as the start. 672 * As a result of this we might end up with a few extra pages at the end of 673 * the buffer; we will assign those to the last region. 674 */ 675 region_size = (size - (aligned - buf)) / n_regions; 676 region_size = QEMU_ALIGN_DOWN(region_size, page_size); 677 678 /* A region must have at least 2 pages; one code, one guard */ 679 g_assert(region_size >= 2 * page_size); 680 681 /* init the region struct */ 682 qemu_mutex_init(®ion.lock); 683 region.n = n_regions; 684 region.size = region_size - page_size; 685 region.stride = region_size; 686 region.start = buf; 687 region.start_aligned = aligned; 688 /* page-align the end, since its last page will be a guard page */ 689 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); 690 /* account for that last guard page */ 691 region.end -= page_size; 692 693 /* set guard pages */ 694 for (i = 0; i < region.n; i++) { 695 void *start, *end; 696 int rc; 697 698 tcg_region_bounds(i, &start, &end); 699 rc = qemu_mprotect_none(end, page_size); 700 g_assert(!rc); 701 } 702 703 tcg_region_trees_init(); 704 705 /* In user-mode we support only one ctx, so do the initial allocation now */ 706 #ifdef CONFIG_USER_ONLY 707 { 708 bool err = tcg_region_initial_alloc__locked(tcg_ctx); 709 710 g_assert(!err); 711 } 712 #endif 713 } 714 715 /* 716 * All TCG threads except the parent (i.e. the one that called tcg_context_init 717 * and registered the target's TCG globals) must register with this function 718 * before initiating translation. 719 * 720 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 721 * of tcg_region_init() for the reasoning behind this. 722 * 723 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 724 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 725 * is not used anymore for translation once this function is called. 726 * 727 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 728 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 729 */ 730 #ifdef CONFIG_USER_ONLY 731 void tcg_register_thread(void) 732 { 733 tcg_ctx = &tcg_init_ctx; 734 } 735 #else 736 void tcg_register_thread(void) 737 { 738 TCGContext *s = g_malloc(sizeof(*s)); 739 unsigned int i, n; 740 bool err; 741 742 *s = tcg_init_ctx; 743 744 /* Relink mem_base. */ 745 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 746 if (tcg_init_ctx.temps[i].mem_base) { 747 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 748 tcg_debug_assert(b >= 0 && b < n); 749 s->temps[i].mem_base = &s->temps[b]; 750 } 751 } 752 753 /* Claim an entry in tcg_ctxs */ 754 n = atomic_fetch_inc(&n_tcg_ctxs); 755 g_assert(n < max_cpus); 756 atomic_set(&tcg_ctxs[n], s); 757 758 tcg_ctx = s; 759 qemu_mutex_lock(®ion.lock); 760 err = tcg_region_initial_alloc__locked(tcg_ctx); 761 g_assert(!err); 762 qemu_mutex_unlock(®ion.lock); 763 } 764 #endif /* !CONFIG_USER_ONLY */ 765 766 /* 767 * Returns the size (in bytes) of all translated code (i.e. from all regions) 768 * currently in the cache. 769 * See also: tcg_code_capacity() 770 * Do not confuse with tcg_current_code_size(); that one applies to a single 771 * TCG context. 772 */ 773 size_t tcg_code_size(void) 774 { 775 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 776 unsigned int i; 777 size_t total; 778 779 qemu_mutex_lock(®ion.lock); 780 total = region.agg_size_full; 781 for (i = 0; i < n_ctxs; i++) { 782 const TCGContext *s = atomic_read(&tcg_ctxs[i]); 783 size_t size; 784 785 size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 786 g_assert(size <= s->code_gen_buffer_size); 787 total += size; 788 } 789 qemu_mutex_unlock(®ion.lock); 790 return total; 791 } 792 793 /* 794 * Returns the code capacity (in bytes) of the entire cache, i.e. including all 795 * regions. 796 * See also: tcg_code_size() 797 */ 798 size_t tcg_code_capacity(void) 799 { 800 size_t guard_size, capacity; 801 802 /* no need for synchronization; these variables are set at init time */ 803 guard_size = region.stride - region.size; 804 capacity = region.end + guard_size - region.start; 805 capacity -= region.n * (guard_size + TCG_HIGHWATER); 806 return capacity; 807 } 808 809 size_t tcg_tb_phys_invalidate_count(void) 810 { 811 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 812 unsigned int i; 813 size_t total = 0; 814 815 for (i = 0; i < n_ctxs; i++) { 816 const TCGContext *s = atomic_read(&tcg_ctxs[i]); 817 818 total += atomic_read(&s->tb_phys_invalidate_count); 819 } 820 return total; 821 } 822 823 /* pool based memory allocation */ 824 void *tcg_malloc_internal(TCGContext *s, int size) 825 { 826 TCGPool *p; 827 int pool_size; 828 829 if (size > TCG_POOL_CHUNK_SIZE) { 830 /* big malloc: insert a new pool (XXX: could optimize) */ 831 p = g_malloc(sizeof(TCGPool) + size); 832 p->size = size; 833 p->next = s->pool_first_large; 834 s->pool_first_large = p; 835 return p->data; 836 } else { 837 p = s->pool_current; 838 if (!p) { 839 p = s->pool_first; 840 if (!p) 841 goto new_pool; 842 } else { 843 if (!p->next) { 844 new_pool: 845 pool_size = TCG_POOL_CHUNK_SIZE; 846 p = g_malloc(sizeof(TCGPool) + pool_size); 847 p->size = pool_size; 848 p->next = NULL; 849 if (s->pool_current) 850 s->pool_current->next = p; 851 else 852 s->pool_first = p; 853 } else { 854 p = p->next; 855 } 856 } 857 } 858 s->pool_current = p; 859 s->pool_cur = p->data + size; 860 s->pool_end = p->data + p->size; 861 return p->data; 862 } 863 864 void tcg_pool_reset(TCGContext *s) 865 { 866 TCGPool *p, *t; 867 for (p = s->pool_first_large; p; p = t) { 868 t = p->next; 869 g_free(p); 870 } 871 s->pool_first_large = NULL; 872 s->pool_cur = s->pool_end = NULL; 873 s->pool_current = NULL; 874 } 875 876 typedef struct TCGHelperInfo { 877 void *func; 878 const char *name; 879 unsigned flags; 880 unsigned sizemask; 881 } TCGHelperInfo; 882 883 #include "exec/helper-proto.h" 884 885 static const TCGHelperInfo all_helpers[] = { 886 #include "exec/helper-tcg.h" 887 }; 888 static GHashTable *helper_table; 889 890 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 891 static void process_op_defs(TCGContext *s); 892 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 893 TCGReg reg, const char *name); 894 895 void tcg_context_init(TCGContext *s) 896 { 897 int op, total_args, n, i; 898 TCGOpDef *def; 899 TCGArgConstraint *args_ct; 900 int *sorted_args; 901 TCGTemp *ts; 902 903 memset(s, 0, sizeof(*s)); 904 s->nb_globals = 0; 905 906 /* Count total number of arguments and allocate the corresponding 907 space */ 908 total_args = 0; 909 for(op = 0; op < NB_OPS; op++) { 910 def = &tcg_op_defs[op]; 911 n = def->nb_iargs + def->nb_oargs; 912 total_args += n; 913 } 914 915 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); 916 sorted_args = g_malloc(sizeof(int) * total_args); 917 918 for(op = 0; op < NB_OPS; op++) { 919 def = &tcg_op_defs[op]; 920 def->args_ct = args_ct; 921 def->sorted_args = sorted_args; 922 n = def->nb_iargs + def->nb_oargs; 923 sorted_args += n; 924 args_ct += n; 925 } 926 927 /* Register helpers. */ 928 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 929 helper_table = g_hash_table_new(NULL, NULL); 930 931 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 932 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 933 (gpointer)&all_helpers[i]); 934 } 935 936 tcg_target_init(s); 937 process_op_defs(s); 938 939 /* Reverse the order of the saved registers, assuming they're all at 940 the start of tcg_target_reg_alloc_order. */ 941 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 942 int r = tcg_target_reg_alloc_order[n]; 943 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 944 break; 945 } 946 } 947 for (i = 0; i < n; ++i) { 948 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 949 } 950 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 951 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 952 } 953 954 tcg_ctx = s; 955 /* 956 * In user-mode we simply share the init context among threads, since we 957 * use a single region. See the documentation tcg_region_init() for the 958 * reasoning behind this. 959 * In softmmu we will have at most max_cpus TCG threads. 960 */ 961 #ifdef CONFIG_USER_ONLY 962 tcg_ctxs = &tcg_ctx; 963 n_tcg_ctxs = 1; 964 #else 965 tcg_ctxs = g_new(TCGContext *, max_cpus); 966 #endif 967 968 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 969 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 970 cpu_env = temp_tcgv_ptr(ts); 971 } 972 973 /* 974 * Allocate TBs right before their corresponding translated code, making 975 * sure that TBs and code are on different cache lines. 976 */ 977 TranslationBlock *tcg_tb_alloc(TCGContext *s) 978 { 979 uintptr_t align = qemu_icache_linesize; 980 TranslationBlock *tb; 981 void *next; 982 983 retry: 984 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 985 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 986 987 if (unlikely(next > s->code_gen_highwater)) { 988 if (tcg_region_alloc(s)) { 989 return NULL; 990 } 991 goto retry; 992 } 993 atomic_set(&s->code_gen_ptr, next); 994 s->data_gen_ptr = NULL; 995 return tb; 996 } 997 998 void tcg_prologue_init(TCGContext *s) 999 { 1000 size_t prologue_size, total_size; 1001 void *buf0, *buf1; 1002 1003 /* Put the prologue at the beginning of code_gen_buffer. */ 1004 buf0 = s->code_gen_buffer; 1005 total_size = s->code_gen_buffer_size; 1006 s->code_ptr = buf0; 1007 s->code_buf = buf0; 1008 s->data_gen_ptr = NULL; 1009 s->code_gen_prologue = buf0; 1010 1011 /* Compute a high-water mark, at which we voluntarily flush the buffer 1012 and start over. The size here is arbitrary, significantly larger 1013 than we expect the code generation for any one opcode to require. */ 1014 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); 1015 1016 #ifdef TCG_TARGET_NEED_POOL_LABELS 1017 s->pool_labels = NULL; 1018 #endif 1019 1020 /* Generate the prologue. */ 1021 tcg_target_qemu_prologue(s); 1022 1023 #ifdef TCG_TARGET_NEED_POOL_LABELS 1024 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1025 { 1026 bool ok = tcg_out_pool_finalize(s); 1027 tcg_debug_assert(ok); 1028 } 1029 #endif 1030 1031 buf1 = s->code_ptr; 1032 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); 1033 1034 /* Deduct the prologue from the buffer. */ 1035 prologue_size = tcg_current_code_size(s); 1036 s->code_gen_ptr = buf1; 1037 s->code_gen_buffer = buf1; 1038 s->code_buf = buf1; 1039 total_size -= prologue_size; 1040 s->code_gen_buffer_size = total_size; 1041 1042 tcg_register_jit(s->code_gen_buffer, total_size); 1043 1044 #ifdef DEBUG_DISAS 1045 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1046 qemu_log_lock(); 1047 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 1048 if (s->data_gen_ptr) { 1049 size_t code_size = s->data_gen_ptr - buf0; 1050 size_t data_size = prologue_size - code_size; 1051 size_t i; 1052 1053 log_disas(buf0, code_size); 1054 1055 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1056 if (sizeof(tcg_target_ulong) == 8) { 1057 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1058 (uintptr_t)s->data_gen_ptr + i, 1059 *(uint64_t *)(s->data_gen_ptr + i)); 1060 } else { 1061 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n", 1062 (uintptr_t)s->data_gen_ptr + i, 1063 *(uint32_t *)(s->data_gen_ptr + i)); 1064 } 1065 } 1066 } else { 1067 log_disas(buf0, prologue_size); 1068 } 1069 qemu_log("\n"); 1070 qemu_log_flush(); 1071 qemu_log_unlock(); 1072 } 1073 #endif 1074 1075 /* Assert that goto_ptr is implemented completely. */ 1076 if (TCG_TARGET_HAS_goto_ptr) { 1077 tcg_debug_assert(s->code_gen_epilogue != NULL); 1078 } 1079 } 1080 1081 void tcg_func_start(TCGContext *s) 1082 { 1083 tcg_pool_reset(s); 1084 s->nb_temps = s->nb_globals; 1085 1086 /* No temps have been previously allocated for size or locality. */ 1087 memset(s->free_temps, 0, sizeof(s->free_temps)); 1088 1089 s->nb_ops = 0; 1090 s->nb_labels = 0; 1091 s->current_frame_offset = s->frame_start; 1092 1093 #ifdef CONFIG_DEBUG_TCG 1094 s->goto_tb_issue_mask = 0; 1095 #endif 1096 1097 QTAILQ_INIT(&s->ops); 1098 QTAILQ_INIT(&s->free_ops); 1099 #ifdef CONFIG_DEBUG_TCG 1100 QSIMPLEQ_INIT(&s->labels); 1101 #endif 1102 } 1103 1104 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) 1105 { 1106 int n = s->nb_temps++; 1107 tcg_debug_assert(n < TCG_MAX_TEMPS); 1108 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1109 } 1110 1111 static inline TCGTemp *tcg_global_alloc(TCGContext *s) 1112 { 1113 TCGTemp *ts; 1114 1115 tcg_debug_assert(s->nb_globals == s->nb_temps); 1116 s->nb_globals++; 1117 ts = tcg_temp_alloc(s); 1118 ts->temp_global = 1; 1119 1120 return ts; 1121 } 1122 1123 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1124 TCGReg reg, const char *name) 1125 { 1126 TCGTemp *ts; 1127 1128 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1129 tcg_abort(); 1130 } 1131 1132 ts = tcg_global_alloc(s); 1133 ts->base_type = type; 1134 ts->type = type; 1135 ts->fixed_reg = 1; 1136 ts->reg = reg; 1137 ts->name = name; 1138 tcg_regset_set_reg(s->reserved_regs, reg); 1139 1140 return ts; 1141 } 1142 1143 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1144 { 1145 s->frame_start = start; 1146 s->frame_end = start + size; 1147 s->frame_temp 1148 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1149 } 1150 1151 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1152 intptr_t offset, const char *name) 1153 { 1154 TCGContext *s = tcg_ctx; 1155 TCGTemp *base_ts = tcgv_ptr_temp(base); 1156 TCGTemp *ts = tcg_global_alloc(s); 1157 int indirect_reg = 0, bigendian = 0; 1158 #ifdef HOST_WORDS_BIGENDIAN 1159 bigendian = 1; 1160 #endif 1161 1162 if (!base_ts->fixed_reg) { 1163 /* We do not support double-indirect registers. */ 1164 tcg_debug_assert(!base_ts->indirect_reg); 1165 base_ts->indirect_base = 1; 1166 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1167 ? 2 : 1); 1168 indirect_reg = 1; 1169 } 1170 1171 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1172 TCGTemp *ts2 = tcg_global_alloc(s); 1173 char buf[64]; 1174 1175 ts->base_type = TCG_TYPE_I64; 1176 ts->type = TCG_TYPE_I32; 1177 ts->indirect_reg = indirect_reg; 1178 ts->mem_allocated = 1; 1179 ts->mem_base = base_ts; 1180 ts->mem_offset = offset + bigendian * 4; 1181 pstrcpy(buf, sizeof(buf), name); 1182 pstrcat(buf, sizeof(buf), "_0"); 1183 ts->name = strdup(buf); 1184 1185 tcg_debug_assert(ts2 == ts + 1); 1186 ts2->base_type = TCG_TYPE_I64; 1187 ts2->type = TCG_TYPE_I32; 1188 ts2->indirect_reg = indirect_reg; 1189 ts2->mem_allocated = 1; 1190 ts2->mem_base = base_ts; 1191 ts2->mem_offset = offset + (1 - bigendian) * 4; 1192 pstrcpy(buf, sizeof(buf), name); 1193 pstrcat(buf, sizeof(buf), "_1"); 1194 ts2->name = strdup(buf); 1195 } else { 1196 ts->base_type = type; 1197 ts->type = type; 1198 ts->indirect_reg = indirect_reg; 1199 ts->mem_allocated = 1; 1200 ts->mem_base = base_ts; 1201 ts->mem_offset = offset; 1202 ts->name = name; 1203 } 1204 return ts; 1205 } 1206 1207 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) 1208 { 1209 TCGContext *s = tcg_ctx; 1210 TCGTemp *ts; 1211 int idx, k; 1212 1213 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 1214 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 1215 if (idx < TCG_MAX_TEMPS) { 1216 /* There is already an available temp with the right type. */ 1217 clear_bit(idx, s->free_temps[k].l); 1218 1219 ts = &s->temps[idx]; 1220 ts->temp_allocated = 1; 1221 tcg_debug_assert(ts->base_type == type); 1222 tcg_debug_assert(ts->temp_local == temp_local); 1223 } else { 1224 ts = tcg_temp_alloc(s); 1225 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1226 TCGTemp *ts2 = tcg_temp_alloc(s); 1227 1228 ts->base_type = type; 1229 ts->type = TCG_TYPE_I32; 1230 ts->temp_allocated = 1; 1231 ts->temp_local = temp_local; 1232 1233 tcg_debug_assert(ts2 == ts + 1); 1234 ts2->base_type = TCG_TYPE_I64; 1235 ts2->type = TCG_TYPE_I32; 1236 ts2->temp_allocated = 1; 1237 ts2->temp_local = temp_local; 1238 } else { 1239 ts->base_type = type; 1240 ts->type = type; 1241 ts->temp_allocated = 1; 1242 ts->temp_local = temp_local; 1243 } 1244 } 1245 1246 #if defined(CONFIG_DEBUG_TCG) 1247 s->temps_in_use++; 1248 #endif 1249 return ts; 1250 } 1251 1252 TCGv_vec tcg_temp_new_vec(TCGType type) 1253 { 1254 TCGTemp *t; 1255 1256 #ifdef CONFIG_DEBUG_TCG 1257 switch (type) { 1258 case TCG_TYPE_V64: 1259 assert(TCG_TARGET_HAS_v64); 1260 break; 1261 case TCG_TYPE_V128: 1262 assert(TCG_TARGET_HAS_v128); 1263 break; 1264 case TCG_TYPE_V256: 1265 assert(TCG_TARGET_HAS_v256); 1266 break; 1267 default: 1268 g_assert_not_reached(); 1269 } 1270 #endif 1271 1272 t = tcg_temp_new_internal(type, 0); 1273 return temp_tcgv_vec(t); 1274 } 1275 1276 /* Create a new temp of the same type as an existing temp. */ 1277 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1278 { 1279 TCGTemp *t = tcgv_vec_temp(match); 1280 1281 tcg_debug_assert(t->temp_allocated != 0); 1282 1283 t = tcg_temp_new_internal(t->base_type, 0); 1284 return temp_tcgv_vec(t); 1285 } 1286 1287 void tcg_temp_free_internal(TCGTemp *ts) 1288 { 1289 TCGContext *s = tcg_ctx; 1290 int k, idx; 1291 1292 #if defined(CONFIG_DEBUG_TCG) 1293 s->temps_in_use--; 1294 if (s->temps_in_use < 0) { 1295 fprintf(stderr, "More temporaries freed than allocated!\n"); 1296 } 1297 #endif 1298 1299 tcg_debug_assert(ts->temp_global == 0); 1300 tcg_debug_assert(ts->temp_allocated != 0); 1301 ts->temp_allocated = 0; 1302 1303 idx = temp_idx(ts); 1304 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); 1305 set_bit(idx, s->free_temps[k].l); 1306 } 1307 1308 TCGv_i32 tcg_const_i32(int32_t val) 1309 { 1310 TCGv_i32 t0; 1311 t0 = tcg_temp_new_i32(); 1312 tcg_gen_movi_i32(t0, val); 1313 return t0; 1314 } 1315 1316 TCGv_i64 tcg_const_i64(int64_t val) 1317 { 1318 TCGv_i64 t0; 1319 t0 = tcg_temp_new_i64(); 1320 tcg_gen_movi_i64(t0, val); 1321 return t0; 1322 } 1323 1324 TCGv_i32 tcg_const_local_i32(int32_t val) 1325 { 1326 TCGv_i32 t0; 1327 t0 = tcg_temp_local_new_i32(); 1328 tcg_gen_movi_i32(t0, val); 1329 return t0; 1330 } 1331 1332 TCGv_i64 tcg_const_local_i64(int64_t val) 1333 { 1334 TCGv_i64 t0; 1335 t0 = tcg_temp_local_new_i64(); 1336 tcg_gen_movi_i64(t0, val); 1337 return t0; 1338 } 1339 1340 #if defined(CONFIG_DEBUG_TCG) 1341 void tcg_clear_temp_count(void) 1342 { 1343 TCGContext *s = tcg_ctx; 1344 s->temps_in_use = 0; 1345 } 1346 1347 int tcg_check_temp_count(void) 1348 { 1349 TCGContext *s = tcg_ctx; 1350 if (s->temps_in_use) { 1351 /* Clear the count so that we don't give another 1352 * warning immediately next time around. 1353 */ 1354 s->temps_in_use = 0; 1355 return 1; 1356 } 1357 return 0; 1358 } 1359 #endif 1360 1361 /* Return true if OP may appear in the opcode stream. 1362 Test the runtime variable that controls each opcode. */ 1363 bool tcg_op_supported(TCGOpcode op) 1364 { 1365 const bool have_vec 1366 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1367 1368 switch (op) { 1369 case INDEX_op_discard: 1370 case INDEX_op_set_label: 1371 case INDEX_op_call: 1372 case INDEX_op_br: 1373 case INDEX_op_mb: 1374 case INDEX_op_insn_start: 1375 case INDEX_op_exit_tb: 1376 case INDEX_op_goto_tb: 1377 case INDEX_op_qemu_ld_i32: 1378 case INDEX_op_qemu_st_i32: 1379 case INDEX_op_qemu_ld_i64: 1380 case INDEX_op_qemu_st_i64: 1381 return true; 1382 1383 case INDEX_op_goto_ptr: 1384 return TCG_TARGET_HAS_goto_ptr; 1385 1386 case INDEX_op_mov_i32: 1387 case INDEX_op_movi_i32: 1388 case INDEX_op_setcond_i32: 1389 case INDEX_op_brcond_i32: 1390 case INDEX_op_ld8u_i32: 1391 case INDEX_op_ld8s_i32: 1392 case INDEX_op_ld16u_i32: 1393 case INDEX_op_ld16s_i32: 1394 case INDEX_op_ld_i32: 1395 case INDEX_op_st8_i32: 1396 case INDEX_op_st16_i32: 1397 case INDEX_op_st_i32: 1398 case INDEX_op_add_i32: 1399 case INDEX_op_sub_i32: 1400 case INDEX_op_mul_i32: 1401 case INDEX_op_and_i32: 1402 case INDEX_op_or_i32: 1403 case INDEX_op_xor_i32: 1404 case INDEX_op_shl_i32: 1405 case INDEX_op_shr_i32: 1406 case INDEX_op_sar_i32: 1407 return true; 1408 1409 case INDEX_op_movcond_i32: 1410 return TCG_TARGET_HAS_movcond_i32; 1411 case INDEX_op_div_i32: 1412 case INDEX_op_divu_i32: 1413 return TCG_TARGET_HAS_div_i32; 1414 case INDEX_op_rem_i32: 1415 case INDEX_op_remu_i32: 1416 return TCG_TARGET_HAS_rem_i32; 1417 case INDEX_op_div2_i32: 1418 case INDEX_op_divu2_i32: 1419 return TCG_TARGET_HAS_div2_i32; 1420 case INDEX_op_rotl_i32: 1421 case INDEX_op_rotr_i32: 1422 return TCG_TARGET_HAS_rot_i32; 1423 case INDEX_op_deposit_i32: 1424 return TCG_TARGET_HAS_deposit_i32; 1425 case INDEX_op_extract_i32: 1426 return TCG_TARGET_HAS_extract_i32; 1427 case INDEX_op_sextract_i32: 1428 return TCG_TARGET_HAS_sextract_i32; 1429 case INDEX_op_add2_i32: 1430 return TCG_TARGET_HAS_add2_i32; 1431 case INDEX_op_sub2_i32: 1432 return TCG_TARGET_HAS_sub2_i32; 1433 case INDEX_op_mulu2_i32: 1434 return TCG_TARGET_HAS_mulu2_i32; 1435 case INDEX_op_muls2_i32: 1436 return TCG_TARGET_HAS_muls2_i32; 1437 case INDEX_op_muluh_i32: 1438 return TCG_TARGET_HAS_muluh_i32; 1439 case INDEX_op_mulsh_i32: 1440 return TCG_TARGET_HAS_mulsh_i32; 1441 case INDEX_op_ext8s_i32: 1442 return TCG_TARGET_HAS_ext8s_i32; 1443 case INDEX_op_ext16s_i32: 1444 return TCG_TARGET_HAS_ext16s_i32; 1445 case INDEX_op_ext8u_i32: 1446 return TCG_TARGET_HAS_ext8u_i32; 1447 case INDEX_op_ext16u_i32: 1448 return TCG_TARGET_HAS_ext16u_i32; 1449 case INDEX_op_bswap16_i32: 1450 return TCG_TARGET_HAS_bswap16_i32; 1451 case INDEX_op_bswap32_i32: 1452 return TCG_TARGET_HAS_bswap32_i32; 1453 case INDEX_op_not_i32: 1454 return TCG_TARGET_HAS_not_i32; 1455 case INDEX_op_neg_i32: 1456 return TCG_TARGET_HAS_neg_i32; 1457 case INDEX_op_andc_i32: 1458 return TCG_TARGET_HAS_andc_i32; 1459 case INDEX_op_orc_i32: 1460 return TCG_TARGET_HAS_orc_i32; 1461 case INDEX_op_eqv_i32: 1462 return TCG_TARGET_HAS_eqv_i32; 1463 case INDEX_op_nand_i32: 1464 return TCG_TARGET_HAS_nand_i32; 1465 case INDEX_op_nor_i32: 1466 return TCG_TARGET_HAS_nor_i32; 1467 case INDEX_op_clz_i32: 1468 return TCG_TARGET_HAS_clz_i32; 1469 case INDEX_op_ctz_i32: 1470 return TCG_TARGET_HAS_ctz_i32; 1471 case INDEX_op_ctpop_i32: 1472 return TCG_TARGET_HAS_ctpop_i32; 1473 1474 case INDEX_op_brcond2_i32: 1475 case INDEX_op_setcond2_i32: 1476 return TCG_TARGET_REG_BITS == 32; 1477 1478 case INDEX_op_mov_i64: 1479 case INDEX_op_movi_i64: 1480 case INDEX_op_setcond_i64: 1481 case INDEX_op_brcond_i64: 1482 case INDEX_op_ld8u_i64: 1483 case INDEX_op_ld8s_i64: 1484 case INDEX_op_ld16u_i64: 1485 case INDEX_op_ld16s_i64: 1486 case INDEX_op_ld32u_i64: 1487 case INDEX_op_ld32s_i64: 1488 case INDEX_op_ld_i64: 1489 case INDEX_op_st8_i64: 1490 case INDEX_op_st16_i64: 1491 case INDEX_op_st32_i64: 1492 case INDEX_op_st_i64: 1493 case INDEX_op_add_i64: 1494 case INDEX_op_sub_i64: 1495 case INDEX_op_mul_i64: 1496 case INDEX_op_and_i64: 1497 case INDEX_op_or_i64: 1498 case INDEX_op_xor_i64: 1499 case INDEX_op_shl_i64: 1500 case INDEX_op_shr_i64: 1501 case INDEX_op_sar_i64: 1502 case INDEX_op_ext_i32_i64: 1503 case INDEX_op_extu_i32_i64: 1504 return TCG_TARGET_REG_BITS == 64; 1505 1506 case INDEX_op_movcond_i64: 1507 return TCG_TARGET_HAS_movcond_i64; 1508 case INDEX_op_div_i64: 1509 case INDEX_op_divu_i64: 1510 return TCG_TARGET_HAS_div_i64; 1511 case INDEX_op_rem_i64: 1512 case INDEX_op_remu_i64: 1513 return TCG_TARGET_HAS_rem_i64; 1514 case INDEX_op_div2_i64: 1515 case INDEX_op_divu2_i64: 1516 return TCG_TARGET_HAS_div2_i64; 1517 case INDEX_op_rotl_i64: 1518 case INDEX_op_rotr_i64: 1519 return TCG_TARGET_HAS_rot_i64; 1520 case INDEX_op_deposit_i64: 1521 return TCG_TARGET_HAS_deposit_i64; 1522 case INDEX_op_extract_i64: 1523 return TCG_TARGET_HAS_extract_i64; 1524 case INDEX_op_sextract_i64: 1525 return TCG_TARGET_HAS_sextract_i64; 1526 case INDEX_op_extrl_i64_i32: 1527 return TCG_TARGET_HAS_extrl_i64_i32; 1528 case INDEX_op_extrh_i64_i32: 1529 return TCG_TARGET_HAS_extrh_i64_i32; 1530 case INDEX_op_ext8s_i64: 1531 return TCG_TARGET_HAS_ext8s_i64; 1532 case INDEX_op_ext16s_i64: 1533 return TCG_TARGET_HAS_ext16s_i64; 1534 case INDEX_op_ext32s_i64: 1535 return TCG_TARGET_HAS_ext32s_i64; 1536 case INDEX_op_ext8u_i64: 1537 return TCG_TARGET_HAS_ext8u_i64; 1538 case INDEX_op_ext16u_i64: 1539 return TCG_TARGET_HAS_ext16u_i64; 1540 case INDEX_op_ext32u_i64: 1541 return TCG_TARGET_HAS_ext32u_i64; 1542 case INDEX_op_bswap16_i64: 1543 return TCG_TARGET_HAS_bswap16_i64; 1544 case INDEX_op_bswap32_i64: 1545 return TCG_TARGET_HAS_bswap32_i64; 1546 case INDEX_op_bswap64_i64: 1547 return TCG_TARGET_HAS_bswap64_i64; 1548 case INDEX_op_not_i64: 1549 return TCG_TARGET_HAS_not_i64; 1550 case INDEX_op_neg_i64: 1551 return TCG_TARGET_HAS_neg_i64; 1552 case INDEX_op_andc_i64: 1553 return TCG_TARGET_HAS_andc_i64; 1554 case INDEX_op_orc_i64: 1555 return TCG_TARGET_HAS_orc_i64; 1556 case INDEX_op_eqv_i64: 1557 return TCG_TARGET_HAS_eqv_i64; 1558 case INDEX_op_nand_i64: 1559 return TCG_TARGET_HAS_nand_i64; 1560 case INDEX_op_nor_i64: 1561 return TCG_TARGET_HAS_nor_i64; 1562 case INDEX_op_clz_i64: 1563 return TCG_TARGET_HAS_clz_i64; 1564 case INDEX_op_ctz_i64: 1565 return TCG_TARGET_HAS_ctz_i64; 1566 case INDEX_op_ctpop_i64: 1567 return TCG_TARGET_HAS_ctpop_i64; 1568 case INDEX_op_add2_i64: 1569 return TCG_TARGET_HAS_add2_i64; 1570 case INDEX_op_sub2_i64: 1571 return TCG_TARGET_HAS_sub2_i64; 1572 case INDEX_op_mulu2_i64: 1573 return TCG_TARGET_HAS_mulu2_i64; 1574 case INDEX_op_muls2_i64: 1575 return TCG_TARGET_HAS_muls2_i64; 1576 case INDEX_op_muluh_i64: 1577 return TCG_TARGET_HAS_muluh_i64; 1578 case INDEX_op_mulsh_i64: 1579 return TCG_TARGET_HAS_mulsh_i64; 1580 1581 case INDEX_op_mov_vec: 1582 case INDEX_op_dup_vec: 1583 case INDEX_op_dupi_vec: 1584 case INDEX_op_ld_vec: 1585 case INDEX_op_st_vec: 1586 case INDEX_op_add_vec: 1587 case INDEX_op_sub_vec: 1588 case INDEX_op_and_vec: 1589 case INDEX_op_or_vec: 1590 case INDEX_op_xor_vec: 1591 case INDEX_op_cmp_vec: 1592 return have_vec; 1593 case INDEX_op_dup2_vec: 1594 return have_vec && TCG_TARGET_REG_BITS == 32; 1595 case INDEX_op_not_vec: 1596 return have_vec && TCG_TARGET_HAS_not_vec; 1597 case INDEX_op_neg_vec: 1598 return have_vec && TCG_TARGET_HAS_neg_vec; 1599 case INDEX_op_andc_vec: 1600 return have_vec && TCG_TARGET_HAS_andc_vec; 1601 case INDEX_op_orc_vec: 1602 return have_vec && TCG_TARGET_HAS_orc_vec; 1603 case INDEX_op_mul_vec: 1604 return have_vec && TCG_TARGET_HAS_mul_vec; 1605 case INDEX_op_shli_vec: 1606 case INDEX_op_shri_vec: 1607 case INDEX_op_sari_vec: 1608 return have_vec && TCG_TARGET_HAS_shi_vec; 1609 case INDEX_op_shls_vec: 1610 case INDEX_op_shrs_vec: 1611 case INDEX_op_sars_vec: 1612 return have_vec && TCG_TARGET_HAS_shs_vec; 1613 case INDEX_op_shlv_vec: 1614 case INDEX_op_shrv_vec: 1615 case INDEX_op_sarv_vec: 1616 return have_vec && TCG_TARGET_HAS_shv_vec; 1617 case INDEX_op_ssadd_vec: 1618 case INDEX_op_usadd_vec: 1619 case INDEX_op_sssub_vec: 1620 case INDEX_op_ussub_vec: 1621 return have_vec && TCG_TARGET_HAS_sat_vec; 1622 case INDEX_op_smin_vec: 1623 case INDEX_op_umin_vec: 1624 case INDEX_op_smax_vec: 1625 case INDEX_op_umax_vec: 1626 return have_vec && TCG_TARGET_HAS_minmax_vec; 1627 1628 default: 1629 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1630 return true; 1631 } 1632 } 1633 1634 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1635 and endian swap. Maybe it would be better to do the alignment 1636 and endian swap in tcg_reg_alloc_call(). */ 1637 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1638 { 1639 int i, real_args, nb_rets, pi; 1640 unsigned sizemask, flags; 1641 TCGHelperInfo *info; 1642 TCGOp *op; 1643 1644 info = g_hash_table_lookup(helper_table, (gpointer)func); 1645 flags = info->flags; 1646 sizemask = info->sizemask; 1647 1648 #if defined(__sparc__) && !defined(__arch64__) \ 1649 && !defined(CONFIG_TCG_INTERPRETER) 1650 /* We have 64-bit values in one register, but need to pass as two 1651 separate parameters. Split them. */ 1652 int orig_sizemask = sizemask; 1653 int orig_nargs = nargs; 1654 TCGv_i64 retl, reth; 1655 TCGTemp *split_args[MAX_OPC_PARAM]; 1656 1657 retl = NULL; 1658 reth = NULL; 1659 if (sizemask != 0) { 1660 for (i = real_args = 0; i < nargs; ++i) { 1661 int is_64bit = sizemask & (1 << (i+1)*2); 1662 if (is_64bit) { 1663 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1664 TCGv_i32 h = tcg_temp_new_i32(); 1665 TCGv_i32 l = tcg_temp_new_i32(); 1666 tcg_gen_extr_i64_i32(l, h, orig); 1667 split_args[real_args++] = tcgv_i32_temp(h); 1668 split_args[real_args++] = tcgv_i32_temp(l); 1669 } else { 1670 split_args[real_args++] = args[i]; 1671 } 1672 } 1673 nargs = real_args; 1674 args = split_args; 1675 sizemask = 0; 1676 } 1677 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1678 for (i = 0; i < nargs; ++i) { 1679 int is_64bit = sizemask & (1 << (i+1)*2); 1680 int is_signed = sizemask & (2 << (i+1)*2); 1681 if (!is_64bit) { 1682 TCGv_i64 temp = tcg_temp_new_i64(); 1683 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1684 if (is_signed) { 1685 tcg_gen_ext32s_i64(temp, orig); 1686 } else { 1687 tcg_gen_ext32u_i64(temp, orig); 1688 } 1689 args[i] = tcgv_i64_temp(temp); 1690 } 1691 } 1692 #endif /* TCG_TARGET_EXTEND_ARGS */ 1693 1694 op = tcg_emit_op(INDEX_op_call); 1695 1696 pi = 0; 1697 if (ret != NULL) { 1698 #if defined(__sparc__) && !defined(__arch64__) \ 1699 && !defined(CONFIG_TCG_INTERPRETER) 1700 if (orig_sizemask & 1) { 1701 /* The 32-bit ABI is going to return the 64-bit value in 1702 the %o0/%o1 register pair. Prepare for this by using 1703 two return temporaries, and reassemble below. */ 1704 retl = tcg_temp_new_i64(); 1705 reth = tcg_temp_new_i64(); 1706 op->args[pi++] = tcgv_i64_arg(reth); 1707 op->args[pi++] = tcgv_i64_arg(retl); 1708 nb_rets = 2; 1709 } else { 1710 op->args[pi++] = temp_arg(ret); 1711 nb_rets = 1; 1712 } 1713 #else 1714 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 1715 #ifdef HOST_WORDS_BIGENDIAN 1716 op->args[pi++] = temp_arg(ret + 1); 1717 op->args[pi++] = temp_arg(ret); 1718 #else 1719 op->args[pi++] = temp_arg(ret); 1720 op->args[pi++] = temp_arg(ret + 1); 1721 #endif 1722 nb_rets = 2; 1723 } else { 1724 op->args[pi++] = temp_arg(ret); 1725 nb_rets = 1; 1726 } 1727 #endif 1728 } else { 1729 nb_rets = 0; 1730 } 1731 TCGOP_CALLO(op) = nb_rets; 1732 1733 real_args = 0; 1734 for (i = 0; i < nargs; i++) { 1735 int is_64bit = sizemask & (1 << (i+1)*2); 1736 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 1737 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 1738 /* some targets want aligned 64 bit args */ 1739 if (real_args & 1) { 1740 op->args[pi++] = TCG_CALL_DUMMY_ARG; 1741 real_args++; 1742 } 1743 #endif 1744 /* If stack grows up, then we will be placing successive 1745 arguments at lower addresses, which means we need to 1746 reverse the order compared to how we would normally 1747 treat either big or little-endian. For those arguments 1748 that will wind up in registers, this still works for 1749 HPPA (the only current STACK_GROWSUP target) since the 1750 argument registers are *also* allocated in decreasing 1751 order. If another such target is added, this logic may 1752 have to get more complicated to differentiate between 1753 stack arguments and register arguments. */ 1754 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 1755 op->args[pi++] = temp_arg(args[i] + 1); 1756 op->args[pi++] = temp_arg(args[i]); 1757 #else 1758 op->args[pi++] = temp_arg(args[i]); 1759 op->args[pi++] = temp_arg(args[i] + 1); 1760 #endif 1761 real_args += 2; 1762 continue; 1763 } 1764 1765 op->args[pi++] = temp_arg(args[i]); 1766 real_args++; 1767 } 1768 op->args[pi++] = (uintptr_t)func; 1769 op->args[pi++] = flags; 1770 TCGOP_CALLI(op) = real_args; 1771 1772 /* Make sure the fields didn't overflow. */ 1773 tcg_debug_assert(TCGOP_CALLI(op) == real_args); 1774 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 1775 1776 #if defined(__sparc__) && !defined(__arch64__) \ 1777 && !defined(CONFIG_TCG_INTERPRETER) 1778 /* Free all of the parts we allocated above. */ 1779 for (i = real_args = 0; i < orig_nargs; ++i) { 1780 int is_64bit = orig_sizemask & (1 << (i+1)*2); 1781 if (is_64bit) { 1782 tcg_temp_free_internal(args[real_args++]); 1783 tcg_temp_free_internal(args[real_args++]); 1784 } else { 1785 real_args++; 1786 } 1787 } 1788 if (orig_sizemask & 1) { 1789 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 1790 Note that describing these as TCGv_i64 eliminates an unnecessary 1791 zero-extension that tcg_gen_concat_i32_i64 would create. */ 1792 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); 1793 tcg_temp_free_i64(retl); 1794 tcg_temp_free_i64(reth); 1795 } 1796 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1797 for (i = 0; i < nargs; ++i) { 1798 int is_64bit = sizemask & (1 << (i+1)*2); 1799 if (!is_64bit) { 1800 tcg_temp_free_internal(args[i]); 1801 } 1802 } 1803 #endif /* TCG_TARGET_EXTEND_ARGS */ 1804 } 1805 1806 static void tcg_reg_alloc_start(TCGContext *s) 1807 { 1808 int i, n; 1809 TCGTemp *ts; 1810 1811 for (i = 0, n = s->nb_globals; i < n; i++) { 1812 ts = &s->temps[i]; 1813 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM); 1814 } 1815 for (n = s->nb_temps; i < n; i++) { 1816 ts = &s->temps[i]; 1817 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 1818 ts->mem_allocated = 0; 1819 ts->fixed_reg = 0; 1820 } 1821 1822 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1823 } 1824 1825 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1826 TCGTemp *ts) 1827 { 1828 int idx = temp_idx(ts); 1829 1830 if (ts->temp_global) { 1831 pstrcpy(buf, buf_size, ts->name); 1832 } else if (ts->temp_local) { 1833 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1834 } else { 1835 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1836 } 1837 return buf; 1838 } 1839 1840 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1841 int buf_size, TCGArg arg) 1842 { 1843 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1844 } 1845 1846 /* Find helper name. */ 1847 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 1848 { 1849 const char *ret = NULL; 1850 if (helper_table) { 1851 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); 1852 if (info) { 1853 ret = info->name; 1854 } 1855 } 1856 return ret; 1857 } 1858 1859 static const char * const cond_name[] = 1860 { 1861 [TCG_COND_NEVER] = "never", 1862 [TCG_COND_ALWAYS] = "always", 1863 [TCG_COND_EQ] = "eq", 1864 [TCG_COND_NE] = "ne", 1865 [TCG_COND_LT] = "lt", 1866 [TCG_COND_GE] = "ge", 1867 [TCG_COND_LE] = "le", 1868 [TCG_COND_GT] = "gt", 1869 [TCG_COND_LTU] = "ltu", 1870 [TCG_COND_GEU] = "geu", 1871 [TCG_COND_LEU] = "leu", 1872 [TCG_COND_GTU] = "gtu" 1873 }; 1874 1875 static const char * const ldst_name[] = 1876 { 1877 [MO_UB] = "ub", 1878 [MO_SB] = "sb", 1879 [MO_LEUW] = "leuw", 1880 [MO_LESW] = "lesw", 1881 [MO_LEUL] = "leul", 1882 [MO_LESL] = "lesl", 1883 [MO_LEQ] = "leq", 1884 [MO_BEUW] = "beuw", 1885 [MO_BESW] = "besw", 1886 [MO_BEUL] = "beul", 1887 [MO_BESL] = "besl", 1888 [MO_BEQ] = "beq", 1889 }; 1890 1891 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1892 #ifdef ALIGNED_ONLY 1893 [MO_UNALN >> MO_ASHIFT] = "un+", 1894 [MO_ALIGN >> MO_ASHIFT] = "", 1895 #else 1896 [MO_UNALN >> MO_ASHIFT] = "", 1897 [MO_ALIGN >> MO_ASHIFT] = "al+", 1898 #endif 1899 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1900 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1901 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1902 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1903 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1904 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1905 }; 1906 1907 static inline bool tcg_regset_single(TCGRegSet d) 1908 { 1909 return (d & (d - 1)) == 0; 1910 } 1911 1912 static inline TCGReg tcg_regset_first(TCGRegSet d) 1913 { 1914 if (TCG_TARGET_NB_REGS <= 32) { 1915 return ctz32(d); 1916 } else { 1917 return ctz64(d); 1918 } 1919 } 1920 1921 static void tcg_dump_ops(TCGContext *s, bool have_prefs) 1922 { 1923 char buf[128]; 1924 TCGOp *op; 1925 1926 QTAILQ_FOREACH(op, &s->ops, link) { 1927 int i, k, nb_oargs, nb_iargs, nb_cargs; 1928 const TCGOpDef *def; 1929 TCGOpcode c; 1930 int col = 0; 1931 1932 c = op->opc; 1933 def = &tcg_op_defs[c]; 1934 1935 if (c == INDEX_op_insn_start) { 1936 nb_oargs = 0; 1937 col += qemu_log("\n ----"); 1938 1939 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1940 target_ulong a; 1941 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1942 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 1943 #else 1944 a = op->args[i]; 1945 #endif 1946 col += qemu_log(" " TARGET_FMT_lx, a); 1947 } 1948 } else if (c == INDEX_op_call) { 1949 /* variable number of arguments */ 1950 nb_oargs = TCGOP_CALLO(op); 1951 nb_iargs = TCGOP_CALLI(op); 1952 nb_cargs = def->nb_cargs; 1953 1954 /* function name, flags, out args */ 1955 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 1956 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), 1957 op->args[nb_oargs + nb_iargs + 1], nb_oargs); 1958 for (i = 0; i < nb_oargs; i++) { 1959 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), 1960 op->args[i])); 1961 } 1962 for (i = 0; i < nb_iargs; i++) { 1963 TCGArg arg = op->args[nb_oargs + i]; 1964 const char *t = "<dummy>"; 1965 if (arg != TCG_CALL_DUMMY_ARG) { 1966 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 1967 } 1968 col += qemu_log(",%s", t); 1969 } 1970 } else { 1971 col += qemu_log(" %s ", def->name); 1972 1973 nb_oargs = def->nb_oargs; 1974 nb_iargs = def->nb_iargs; 1975 nb_cargs = def->nb_cargs; 1976 1977 if (def->flags & TCG_OPF_VECTOR) { 1978 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op), 1979 8 << TCGOP_VECE(op)); 1980 } 1981 1982 k = 0; 1983 for (i = 0; i < nb_oargs; i++) { 1984 if (k != 0) { 1985 col += qemu_log(","); 1986 } 1987 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 1988 op->args[k++])); 1989 } 1990 for (i = 0; i < nb_iargs; i++) { 1991 if (k != 0) { 1992 col += qemu_log(","); 1993 } 1994 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 1995 op->args[k++])); 1996 } 1997 switch (c) { 1998 case INDEX_op_brcond_i32: 1999 case INDEX_op_setcond_i32: 2000 case INDEX_op_movcond_i32: 2001 case INDEX_op_brcond2_i32: 2002 case INDEX_op_setcond2_i32: 2003 case INDEX_op_brcond_i64: 2004 case INDEX_op_setcond_i64: 2005 case INDEX_op_movcond_i64: 2006 case INDEX_op_cmp_vec: 2007 if (op->args[k] < ARRAY_SIZE(cond_name) 2008 && cond_name[op->args[k]]) { 2009 col += qemu_log(",%s", cond_name[op->args[k++]]); 2010 } else { 2011 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]); 2012 } 2013 i = 1; 2014 break; 2015 case INDEX_op_qemu_ld_i32: 2016 case INDEX_op_qemu_st_i32: 2017 case INDEX_op_qemu_ld_i64: 2018 case INDEX_op_qemu_st_i64: 2019 { 2020 TCGMemOpIdx oi = op->args[k++]; 2021 TCGMemOp op = get_memop(oi); 2022 unsigned ix = get_mmuidx(oi); 2023 2024 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2025 col += qemu_log(",$0x%x,%u", op, ix); 2026 } else { 2027 const char *s_al, *s_op; 2028 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2029 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2030 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 2031 } 2032 i = 1; 2033 } 2034 break; 2035 default: 2036 i = 0; 2037 break; 2038 } 2039 switch (c) { 2040 case INDEX_op_set_label: 2041 case INDEX_op_br: 2042 case INDEX_op_brcond_i32: 2043 case INDEX_op_brcond_i64: 2044 case INDEX_op_brcond2_i32: 2045 col += qemu_log("%s$L%d", k ? "," : "", 2046 arg_label(op->args[k])->id); 2047 i++, k++; 2048 break; 2049 default: 2050 break; 2051 } 2052 for (; i < nb_cargs; i++, k++) { 2053 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); 2054 } 2055 } 2056 2057 if (have_prefs || op->life) { 2058 for (; col < 40; ++col) { 2059 putc(' ', qemu_logfile); 2060 } 2061 } 2062 2063 if (op->life) { 2064 unsigned life = op->life; 2065 2066 if (life & (SYNC_ARG * 3)) { 2067 qemu_log(" sync:"); 2068 for (i = 0; i < 2; ++i) { 2069 if (life & (SYNC_ARG << i)) { 2070 qemu_log(" %d", i); 2071 } 2072 } 2073 } 2074 life /= DEAD_ARG; 2075 if (life) { 2076 qemu_log(" dead:"); 2077 for (i = 0; life; ++i, life >>= 1) { 2078 if (life & 1) { 2079 qemu_log(" %d", i); 2080 } 2081 } 2082 } 2083 } 2084 2085 if (have_prefs) { 2086 for (i = 0; i < nb_oargs; ++i) { 2087 TCGRegSet set = op->output_pref[i]; 2088 2089 if (i == 0) { 2090 qemu_log(" pref="); 2091 } else { 2092 qemu_log(","); 2093 } 2094 if (set == 0) { 2095 qemu_log("none"); 2096 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2097 qemu_log("all"); 2098 #ifdef CONFIG_DEBUG_TCG 2099 } else if (tcg_regset_single(set)) { 2100 TCGReg reg = tcg_regset_first(set); 2101 qemu_log("%s", tcg_target_reg_names[reg]); 2102 #endif 2103 } else if (TCG_TARGET_NB_REGS <= 32) { 2104 qemu_log("%#x", (uint32_t)set); 2105 } else { 2106 qemu_log("%#" PRIx64, (uint64_t)set); 2107 } 2108 } 2109 } 2110 2111 qemu_log("\n"); 2112 } 2113 } 2114 2115 /* we give more priority to constraints with less registers */ 2116 static int get_constraint_priority(const TCGOpDef *def, int k) 2117 { 2118 const TCGArgConstraint *arg_ct; 2119 2120 int i, n; 2121 arg_ct = &def->args_ct[k]; 2122 if (arg_ct->ct & TCG_CT_ALIAS) { 2123 /* an alias is equivalent to a single register */ 2124 n = 1; 2125 } else { 2126 if (!(arg_ct->ct & TCG_CT_REG)) 2127 return 0; 2128 n = 0; 2129 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 2130 if (tcg_regset_test_reg(arg_ct->u.regs, i)) 2131 n++; 2132 } 2133 } 2134 return TCG_TARGET_NB_REGS - n + 1; 2135 } 2136 2137 /* sort from highest priority to lowest */ 2138 static void sort_constraints(TCGOpDef *def, int start, int n) 2139 { 2140 int i, j, p1, p2, tmp; 2141 2142 for(i = 0; i < n; i++) 2143 def->sorted_args[start + i] = start + i; 2144 if (n <= 1) 2145 return; 2146 for(i = 0; i < n - 1; i++) { 2147 for(j = i + 1; j < n; j++) { 2148 p1 = get_constraint_priority(def, def->sorted_args[start + i]); 2149 p2 = get_constraint_priority(def, def->sorted_args[start + j]); 2150 if (p1 < p2) { 2151 tmp = def->sorted_args[start + i]; 2152 def->sorted_args[start + i] = def->sorted_args[start + j]; 2153 def->sorted_args[start + j] = tmp; 2154 } 2155 } 2156 } 2157 } 2158 2159 static void process_op_defs(TCGContext *s) 2160 { 2161 TCGOpcode op; 2162 2163 for (op = 0; op < NB_OPS; op++) { 2164 TCGOpDef *def = &tcg_op_defs[op]; 2165 const TCGTargetOpDef *tdefs; 2166 TCGType type; 2167 int i, nb_args; 2168 2169 if (def->flags & TCG_OPF_NOT_PRESENT) { 2170 continue; 2171 } 2172 2173 nb_args = def->nb_iargs + def->nb_oargs; 2174 if (nb_args == 0) { 2175 continue; 2176 } 2177 2178 tdefs = tcg_target_op_def(op); 2179 /* Missing TCGTargetOpDef entry. */ 2180 tcg_debug_assert(tdefs != NULL); 2181 2182 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32); 2183 for (i = 0; i < nb_args; i++) { 2184 const char *ct_str = tdefs->args_ct_str[i]; 2185 /* Incomplete TCGTargetOpDef entry. */ 2186 tcg_debug_assert(ct_str != NULL); 2187 2188 def->args_ct[i].u.regs = 0; 2189 def->args_ct[i].ct = 0; 2190 while (*ct_str != '\0') { 2191 switch(*ct_str) { 2192 case '0' ... '9': 2193 { 2194 int oarg = *ct_str - '0'; 2195 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 2196 tcg_debug_assert(oarg < def->nb_oargs); 2197 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG); 2198 /* TCG_CT_ALIAS is for the output arguments. 2199 The input is tagged with TCG_CT_IALIAS. */ 2200 def->args_ct[i] = def->args_ct[oarg]; 2201 def->args_ct[oarg].ct |= TCG_CT_ALIAS; 2202 def->args_ct[oarg].alias_index = i; 2203 def->args_ct[i].ct |= TCG_CT_IALIAS; 2204 def->args_ct[i].alias_index = oarg; 2205 } 2206 ct_str++; 2207 break; 2208 case '&': 2209 def->args_ct[i].ct |= TCG_CT_NEWREG; 2210 ct_str++; 2211 break; 2212 case 'i': 2213 def->args_ct[i].ct |= TCG_CT_CONST; 2214 ct_str++; 2215 break; 2216 default: 2217 ct_str = target_parse_constraint(&def->args_ct[i], 2218 ct_str, type); 2219 /* Typo in TCGTargetOpDef constraint. */ 2220 tcg_debug_assert(ct_str != NULL); 2221 } 2222 } 2223 } 2224 2225 /* TCGTargetOpDef entry with too much information? */ 2226 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2227 2228 /* sort the constraints (XXX: this is just an heuristic) */ 2229 sort_constraints(def, 0, def->nb_oargs); 2230 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2231 } 2232 } 2233 2234 void tcg_op_remove(TCGContext *s, TCGOp *op) 2235 { 2236 TCGLabel *label; 2237 2238 switch (op->opc) { 2239 case INDEX_op_br: 2240 label = arg_label(op->args[0]); 2241 label->refs--; 2242 break; 2243 case INDEX_op_brcond_i32: 2244 case INDEX_op_brcond_i64: 2245 label = arg_label(op->args[3]); 2246 label->refs--; 2247 break; 2248 case INDEX_op_brcond2_i32: 2249 label = arg_label(op->args[5]); 2250 label->refs--; 2251 break; 2252 default: 2253 break; 2254 } 2255 2256 QTAILQ_REMOVE(&s->ops, op, link); 2257 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2258 s->nb_ops--; 2259 2260 #ifdef CONFIG_PROFILER 2261 atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2262 #endif 2263 } 2264 2265 static TCGOp *tcg_op_alloc(TCGOpcode opc) 2266 { 2267 TCGContext *s = tcg_ctx; 2268 TCGOp *op; 2269 2270 if (likely(QTAILQ_EMPTY(&s->free_ops))) { 2271 op = tcg_malloc(sizeof(TCGOp)); 2272 } else { 2273 op = QTAILQ_FIRST(&s->free_ops); 2274 QTAILQ_REMOVE(&s->free_ops, op, link); 2275 } 2276 memset(op, 0, offsetof(TCGOp, link)); 2277 op->opc = opc; 2278 s->nb_ops++; 2279 2280 return op; 2281 } 2282 2283 TCGOp *tcg_emit_op(TCGOpcode opc) 2284 { 2285 TCGOp *op = tcg_op_alloc(opc); 2286 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2287 return op; 2288 } 2289 2290 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2291 { 2292 TCGOp *new_op = tcg_op_alloc(opc); 2293 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2294 return new_op; 2295 } 2296 2297 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2298 { 2299 TCGOp *new_op = tcg_op_alloc(opc); 2300 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2301 return new_op; 2302 } 2303 2304 /* Reachable analysis : remove unreachable code. */ 2305 static void reachable_code_pass(TCGContext *s) 2306 { 2307 TCGOp *op, *op_next; 2308 bool dead = false; 2309 2310 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2311 bool remove = dead; 2312 TCGLabel *label; 2313 int call_flags; 2314 2315 switch (op->opc) { 2316 case INDEX_op_set_label: 2317 label = arg_label(op->args[0]); 2318 if (label->refs == 0) { 2319 /* 2320 * While there is an occasional backward branch, virtually 2321 * all branches generated by the translators are forward. 2322 * Which means that generally we will have already removed 2323 * all references to the label that will be, and there is 2324 * little to be gained by iterating. 2325 */ 2326 remove = true; 2327 } else { 2328 /* Once we see a label, insns become live again. */ 2329 dead = false; 2330 remove = false; 2331 2332 /* 2333 * Optimization can fold conditional branches to unconditional. 2334 * If we find a label with one reference which is preceded by 2335 * an unconditional branch to it, remove both. This needed to 2336 * wait until the dead code in between them was removed. 2337 */ 2338 if (label->refs == 1) { 2339 TCGOp *op_prev = QTAILQ_PREV(op, link); 2340 if (op_prev->opc == INDEX_op_br && 2341 label == arg_label(op_prev->args[0])) { 2342 tcg_op_remove(s, op_prev); 2343 remove = true; 2344 } 2345 } 2346 } 2347 break; 2348 2349 case INDEX_op_br: 2350 case INDEX_op_exit_tb: 2351 case INDEX_op_goto_ptr: 2352 /* Unconditional branches; everything following is dead. */ 2353 dead = true; 2354 break; 2355 2356 case INDEX_op_call: 2357 /* Notice noreturn helper calls, raising exceptions. */ 2358 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; 2359 if (call_flags & TCG_CALL_NO_RETURN) { 2360 dead = true; 2361 } 2362 break; 2363 2364 case INDEX_op_insn_start: 2365 /* Never remove -- we need to keep these for unwind. */ 2366 remove = false; 2367 break; 2368 2369 default: 2370 break; 2371 } 2372 2373 if (remove) { 2374 tcg_op_remove(s, op); 2375 } 2376 } 2377 } 2378 2379 #define TS_DEAD 1 2380 #define TS_MEM 2 2381 2382 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2383 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2384 2385 /* For liveness_pass_1, the register preferences for a given temp. */ 2386 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2387 { 2388 return ts->state_ptr; 2389 } 2390 2391 /* For liveness_pass_1, reset the preferences for a given temp to the 2392 * maximal regset for its type. 2393 */ 2394 static inline void la_reset_pref(TCGTemp *ts) 2395 { 2396 *la_temp_pref(ts) 2397 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2398 } 2399 2400 /* liveness analysis: end of function: all temps are dead, and globals 2401 should be in memory. */ 2402 static void la_func_end(TCGContext *s, int ng, int nt) 2403 { 2404 int i; 2405 2406 for (i = 0; i < ng; ++i) { 2407 s->temps[i].state = TS_DEAD | TS_MEM; 2408 la_reset_pref(&s->temps[i]); 2409 } 2410 for (i = ng; i < nt; ++i) { 2411 s->temps[i].state = TS_DEAD; 2412 la_reset_pref(&s->temps[i]); 2413 } 2414 } 2415 2416 /* liveness analysis: end of basic block: all temps are dead, globals 2417 and local temps should be in memory. */ 2418 static void la_bb_end(TCGContext *s, int ng, int nt) 2419 { 2420 int i; 2421 2422 for (i = 0; i < ng; ++i) { 2423 s->temps[i].state = TS_DEAD | TS_MEM; 2424 la_reset_pref(&s->temps[i]); 2425 } 2426 for (i = ng; i < nt; ++i) { 2427 s->temps[i].state = (s->temps[i].temp_local 2428 ? TS_DEAD | TS_MEM 2429 : TS_DEAD); 2430 la_reset_pref(&s->temps[i]); 2431 } 2432 } 2433 2434 /* liveness analysis: sync globals back to memory. */ 2435 static void la_global_sync(TCGContext *s, int ng) 2436 { 2437 int i; 2438 2439 for (i = 0; i < ng; ++i) { 2440 int state = s->temps[i].state; 2441 s->temps[i].state = state | TS_MEM; 2442 if (state == TS_DEAD) { 2443 /* If the global was previously dead, reset prefs. */ 2444 la_reset_pref(&s->temps[i]); 2445 } 2446 } 2447 } 2448 2449 /* liveness analysis: sync globals back to memory and kill. */ 2450 static void la_global_kill(TCGContext *s, int ng) 2451 { 2452 int i; 2453 2454 for (i = 0; i < ng; i++) { 2455 s->temps[i].state = TS_DEAD | TS_MEM; 2456 la_reset_pref(&s->temps[i]); 2457 } 2458 } 2459 2460 /* liveness analysis: note live globals crossing calls. */ 2461 static void la_cross_call(TCGContext *s, int nt) 2462 { 2463 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2464 int i; 2465 2466 for (i = 0; i < nt; i++) { 2467 TCGTemp *ts = &s->temps[i]; 2468 if (!(ts->state & TS_DEAD)) { 2469 TCGRegSet *pset = la_temp_pref(ts); 2470 TCGRegSet set = *pset; 2471 2472 set &= mask; 2473 /* If the combination is not possible, restart. */ 2474 if (set == 0) { 2475 set = tcg_target_available_regs[ts->type] & mask; 2476 } 2477 *pset = set; 2478 } 2479 } 2480 } 2481 2482 /* Liveness analysis : update the opc_arg_life array to tell if a 2483 given input arguments is dead. Instructions updating dead 2484 temporaries are removed. */ 2485 static void liveness_pass_1(TCGContext *s) 2486 { 2487 int nb_globals = s->nb_globals; 2488 int nb_temps = s->nb_temps; 2489 TCGOp *op, *op_prev; 2490 TCGRegSet *prefs; 2491 int i; 2492 2493 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2494 for (i = 0; i < nb_temps; ++i) { 2495 s->temps[i].state_ptr = prefs + i; 2496 } 2497 2498 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2499 la_func_end(s, nb_globals, nb_temps); 2500 2501 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2502 int nb_iargs, nb_oargs; 2503 TCGOpcode opc_new, opc_new2; 2504 bool have_opc_new2; 2505 TCGLifeData arg_life = 0; 2506 TCGTemp *ts; 2507 TCGOpcode opc = op->opc; 2508 const TCGOpDef *def = &tcg_op_defs[opc]; 2509 2510 switch (opc) { 2511 case INDEX_op_call: 2512 { 2513 int call_flags; 2514 int nb_call_regs; 2515 2516 nb_oargs = TCGOP_CALLO(op); 2517 nb_iargs = TCGOP_CALLI(op); 2518 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2519 2520 /* pure functions can be removed if their result is unused */ 2521 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2522 for (i = 0; i < nb_oargs; i++) { 2523 ts = arg_temp(op->args[i]); 2524 if (ts->state != TS_DEAD) { 2525 goto do_not_remove_call; 2526 } 2527 } 2528 goto do_remove; 2529 } 2530 do_not_remove_call: 2531 2532 /* Output args are dead. */ 2533 for (i = 0; i < nb_oargs; i++) { 2534 ts = arg_temp(op->args[i]); 2535 if (ts->state & TS_DEAD) { 2536 arg_life |= DEAD_ARG << i; 2537 } 2538 if (ts->state & TS_MEM) { 2539 arg_life |= SYNC_ARG << i; 2540 } 2541 ts->state = TS_DEAD; 2542 la_reset_pref(ts); 2543 2544 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ 2545 op->output_pref[i] = 0; 2546 } 2547 2548 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2549 TCG_CALL_NO_READ_GLOBALS))) { 2550 la_global_kill(s, nb_globals); 2551 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2552 la_global_sync(s, nb_globals); 2553 } 2554 2555 /* Record arguments that die in this helper. */ 2556 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2557 ts = arg_temp(op->args[i]); 2558 if (ts && ts->state & TS_DEAD) { 2559 arg_life |= DEAD_ARG << i; 2560 } 2561 } 2562 2563 /* For all live registers, remove call-clobbered prefs. */ 2564 la_cross_call(s, nb_temps); 2565 2566 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2567 2568 /* Input arguments are live for preceding opcodes. */ 2569 for (i = 0; i < nb_iargs; i++) { 2570 ts = arg_temp(op->args[i + nb_oargs]); 2571 if (ts && ts->state & TS_DEAD) { 2572 /* For those arguments that die, and will be allocated 2573 * in registers, clear the register set for that arg, 2574 * to be filled in below. For args that will be on 2575 * the stack, reset to any available reg. 2576 */ 2577 *la_temp_pref(ts) 2578 = (i < nb_call_regs ? 0 : 2579 tcg_target_available_regs[ts->type]); 2580 ts->state &= ~TS_DEAD; 2581 } 2582 } 2583 2584 /* For each input argument, add its input register to prefs. 2585 If a temp is used once, this produces a single set bit. */ 2586 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { 2587 ts = arg_temp(op->args[i + nb_oargs]); 2588 if (ts) { 2589 tcg_regset_set_reg(*la_temp_pref(ts), 2590 tcg_target_call_iarg_regs[i]); 2591 } 2592 } 2593 } 2594 break; 2595 case INDEX_op_insn_start: 2596 break; 2597 case INDEX_op_discard: 2598 /* mark the temporary as dead */ 2599 ts = arg_temp(op->args[0]); 2600 ts->state = TS_DEAD; 2601 la_reset_pref(ts); 2602 break; 2603 2604 case INDEX_op_add2_i32: 2605 opc_new = INDEX_op_add_i32; 2606 goto do_addsub2; 2607 case INDEX_op_sub2_i32: 2608 opc_new = INDEX_op_sub_i32; 2609 goto do_addsub2; 2610 case INDEX_op_add2_i64: 2611 opc_new = INDEX_op_add_i64; 2612 goto do_addsub2; 2613 case INDEX_op_sub2_i64: 2614 opc_new = INDEX_op_sub_i64; 2615 do_addsub2: 2616 nb_iargs = 4; 2617 nb_oargs = 2; 2618 /* Test if the high part of the operation is dead, but not 2619 the low part. The result can be optimized to a simple 2620 add or sub. This happens often for x86_64 guest when the 2621 cpu mode is set to 32 bit. */ 2622 if (arg_temp(op->args[1])->state == TS_DEAD) { 2623 if (arg_temp(op->args[0])->state == TS_DEAD) { 2624 goto do_remove; 2625 } 2626 /* Replace the opcode and adjust the args in place, 2627 leaving 3 unused args at the end. */ 2628 op->opc = opc = opc_new; 2629 op->args[1] = op->args[2]; 2630 op->args[2] = op->args[4]; 2631 /* Fall through and mark the single-word operation live. */ 2632 nb_iargs = 2; 2633 nb_oargs = 1; 2634 } 2635 goto do_not_remove; 2636 2637 case INDEX_op_mulu2_i32: 2638 opc_new = INDEX_op_mul_i32; 2639 opc_new2 = INDEX_op_muluh_i32; 2640 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 2641 goto do_mul2; 2642 case INDEX_op_muls2_i32: 2643 opc_new = INDEX_op_mul_i32; 2644 opc_new2 = INDEX_op_mulsh_i32; 2645 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 2646 goto do_mul2; 2647 case INDEX_op_mulu2_i64: 2648 opc_new = INDEX_op_mul_i64; 2649 opc_new2 = INDEX_op_muluh_i64; 2650 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 2651 goto do_mul2; 2652 case INDEX_op_muls2_i64: 2653 opc_new = INDEX_op_mul_i64; 2654 opc_new2 = INDEX_op_mulsh_i64; 2655 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 2656 goto do_mul2; 2657 do_mul2: 2658 nb_iargs = 2; 2659 nb_oargs = 2; 2660 if (arg_temp(op->args[1])->state == TS_DEAD) { 2661 if (arg_temp(op->args[0])->state == TS_DEAD) { 2662 /* Both parts of the operation are dead. */ 2663 goto do_remove; 2664 } 2665 /* The high part of the operation is dead; generate the low. */ 2666 op->opc = opc = opc_new; 2667 op->args[1] = op->args[2]; 2668 op->args[2] = op->args[3]; 2669 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 2670 /* The low part of the operation is dead; generate the high. */ 2671 op->opc = opc = opc_new2; 2672 op->args[0] = op->args[1]; 2673 op->args[1] = op->args[2]; 2674 op->args[2] = op->args[3]; 2675 } else { 2676 goto do_not_remove; 2677 } 2678 /* Mark the single-word operation live. */ 2679 nb_oargs = 1; 2680 goto do_not_remove; 2681 2682 default: 2683 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 2684 nb_iargs = def->nb_iargs; 2685 nb_oargs = def->nb_oargs; 2686 2687 /* Test if the operation can be removed because all 2688 its outputs are dead. We assume that nb_oargs == 0 2689 implies side effects */ 2690 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 2691 for (i = 0; i < nb_oargs; i++) { 2692 if (arg_temp(op->args[i])->state != TS_DEAD) { 2693 goto do_not_remove; 2694 } 2695 } 2696 goto do_remove; 2697 } 2698 goto do_not_remove; 2699 2700 do_remove: 2701 tcg_op_remove(s, op); 2702 break; 2703 2704 do_not_remove: 2705 for (i = 0; i < nb_oargs; i++) { 2706 ts = arg_temp(op->args[i]); 2707 2708 /* Remember the preference of the uses that followed. */ 2709 op->output_pref[i] = *la_temp_pref(ts); 2710 2711 /* Output args are dead. */ 2712 if (ts->state & TS_DEAD) { 2713 arg_life |= DEAD_ARG << i; 2714 } 2715 if (ts->state & TS_MEM) { 2716 arg_life |= SYNC_ARG << i; 2717 } 2718 ts->state = TS_DEAD; 2719 la_reset_pref(ts); 2720 } 2721 2722 /* If end of basic block, update. */ 2723 if (def->flags & TCG_OPF_BB_EXIT) { 2724 la_func_end(s, nb_globals, nb_temps); 2725 } else if (def->flags & TCG_OPF_BB_END) { 2726 la_bb_end(s, nb_globals, nb_temps); 2727 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2728 la_global_sync(s, nb_globals); 2729 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2730 la_cross_call(s, nb_temps); 2731 } 2732 } 2733 2734 /* Record arguments that die in this opcode. */ 2735 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2736 ts = arg_temp(op->args[i]); 2737 if (ts->state & TS_DEAD) { 2738 arg_life |= DEAD_ARG << i; 2739 } 2740 } 2741 2742 /* Input arguments are live for preceding opcodes. */ 2743 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2744 ts = arg_temp(op->args[i]); 2745 if (ts->state & TS_DEAD) { 2746 /* For operands that were dead, initially allow 2747 all regs for the type. */ 2748 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 2749 ts->state &= ~TS_DEAD; 2750 } 2751 } 2752 2753 /* Incorporate constraints for this operand. */ 2754 switch (opc) { 2755 case INDEX_op_mov_i32: 2756 case INDEX_op_mov_i64: 2757 /* Note that these are TCG_OPF_NOT_PRESENT and do not 2758 have proper constraints. That said, special case 2759 moves to propagate preferences backward. */ 2760 if (IS_DEAD_ARG(1)) { 2761 *la_temp_pref(arg_temp(op->args[0])) 2762 = *la_temp_pref(arg_temp(op->args[1])); 2763 } 2764 break; 2765 2766 default: 2767 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2768 const TCGArgConstraint *ct = &def->args_ct[i]; 2769 TCGRegSet set, *pset; 2770 2771 ts = arg_temp(op->args[i]); 2772 pset = la_temp_pref(ts); 2773 set = *pset; 2774 2775 set &= ct->u.regs; 2776 if (ct->ct & TCG_CT_IALIAS) { 2777 set &= op->output_pref[ct->alias_index]; 2778 } 2779 /* If the combination is not possible, restart. */ 2780 if (set == 0) { 2781 set = ct->u.regs; 2782 } 2783 *pset = set; 2784 } 2785 break; 2786 } 2787 break; 2788 } 2789 op->life = arg_life; 2790 } 2791 } 2792 2793 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 2794 static bool liveness_pass_2(TCGContext *s) 2795 { 2796 int nb_globals = s->nb_globals; 2797 int nb_temps, i; 2798 bool changes = false; 2799 TCGOp *op, *op_next; 2800 2801 /* Create a temporary for each indirect global. */ 2802 for (i = 0; i < nb_globals; ++i) { 2803 TCGTemp *its = &s->temps[i]; 2804 if (its->indirect_reg) { 2805 TCGTemp *dts = tcg_temp_alloc(s); 2806 dts->type = its->type; 2807 dts->base_type = its->base_type; 2808 its->state_ptr = dts; 2809 } else { 2810 its->state_ptr = NULL; 2811 } 2812 /* All globals begin dead. */ 2813 its->state = TS_DEAD; 2814 } 2815 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 2816 TCGTemp *its = &s->temps[i]; 2817 its->state_ptr = NULL; 2818 its->state = TS_DEAD; 2819 } 2820 2821 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2822 TCGOpcode opc = op->opc; 2823 const TCGOpDef *def = &tcg_op_defs[opc]; 2824 TCGLifeData arg_life = op->life; 2825 int nb_iargs, nb_oargs, call_flags; 2826 TCGTemp *arg_ts, *dir_ts; 2827 2828 if (opc == INDEX_op_call) { 2829 nb_oargs = TCGOP_CALLO(op); 2830 nb_iargs = TCGOP_CALLI(op); 2831 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2832 } else { 2833 nb_iargs = def->nb_iargs; 2834 nb_oargs = def->nb_oargs; 2835 2836 /* Set flags similar to how calls require. */ 2837 if (def->flags & TCG_OPF_BB_END) { 2838 /* Like writing globals: save_globals */ 2839 call_flags = 0; 2840 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2841 /* Like reading globals: sync_globals */ 2842 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2843 } else { 2844 /* No effect on globals. */ 2845 call_flags = (TCG_CALL_NO_READ_GLOBALS | 2846 TCG_CALL_NO_WRITE_GLOBALS); 2847 } 2848 } 2849 2850 /* Make sure that input arguments are available. */ 2851 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2852 arg_ts = arg_temp(op->args[i]); 2853 if (arg_ts) { 2854 dir_ts = arg_ts->state_ptr; 2855 if (dir_ts && arg_ts->state == TS_DEAD) { 2856 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 2857 ? INDEX_op_ld_i32 2858 : INDEX_op_ld_i64); 2859 TCGOp *lop = tcg_op_insert_before(s, op, lopc); 2860 2861 lop->args[0] = temp_arg(dir_ts); 2862 lop->args[1] = temp_arg(arg_ts->mem_base); 2863 lop->args[2] = arg_ts->mem_offset; 2864 2865 /* Loaded, but synced with memory. */ 2866 arg_ts->state = TS_MEM; 2867 } 2868 } 2869 } 2870 2871 /* Perform input replacement, and mark inputs that became dead. 2872 No action is required except keeping temp_state up to date 2873 so that we reload when needed. */ 2874 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2875 arg_ts = arg_temp(op->args[i]); 2876 if (arg_ts) { 2877 dir_ts = arg_ts->state_ptr; 2878 if (dir_ts) { 2879 op->args[i] = temp_arg(dir_ts); 2880 changes = true; 2881 if (IS_DEAD_ARG(i)) { 2882 arg_ts->state = TS_DEAD; 2883 } 2884 } 2885 } 2886 } 2887 2888 /* Liveness analysis should ensure that the following are 2889 all correct, for call sites and basic block end points. */ 2890 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 2891 /* Nothing to do */ 2892 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 2893 for (i = 0; i < nb_globals; ++i) { 2894 /* Liveness should see that globals are synced back, 2895 that is, either TS_DEAD or TS_MEM. */ 2896 arg_ts = &s->temps[i]; 2897 tcg_debug_assert(arg_ts->state_ptr == 0 2898 || arg_ts->state != 0); 2899 } 2900 } else { 2901 for (i = 0; i < nb_globals; ++i) { 2902 /* Liveness should see that globals are saved back, 2903 that is, TS_DEAD, waiting to be reloaded. */ 2904 arg_ts = &s->temps[i]; 2905 tcg_debug_assert(arg_ts->state_ptr == 0 2906 || arg_ts->state == TS_DEAD); 2907 } 2908 } 2909 2910 /* Outputs become available. */ 2911 for (i = 0; i < nb_oargs; i++) { 2912 arg_ts = arg_temp(op->args[i]); 2913 dir_ts = arg_ts->state_ptr; 2914 if (!dir_ts) { 2915 continue; 2916 } 2917 op->args[i] = temp_arg(dir_ts); 2918 changes = true; 2919 2920 /* The output is now live and modified. */ 2921 arg_ts->state = 0; 2922 2923 /* Sync outputs upon their last write. */ 2924 if (NEED_SYNC_ARG(i)) { 2925 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 2926 ? INDEX_op_st_i32 2927 : INDEX_op_st_i64); 2928 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 2929 2930 sop->args[0] = temp_arg(dir_ts); 2931 sop->args[1] = temp_arg(arg_ts->mem_base); 2932 sop->args[2] = arg_ts->mem_offset; 2933 2934 arg_ts->state = TS_MEM; 2935 } 2936 /* Drop outputs that are dead. */ 2937 if (IS_DEAD_ARG(i)) { 2938 arg_ts->state = TS_DEAD; 2939 } 2940 } 2941 } 2942 2943 return changes; 2944 } 2945 2946 #ifdef CONFIG_DEBUG_TCG 2947 static void dump_regs(TCGContext *s) 2948 { 2949 TCGTemp *ts; 2950 int i; 2951 char buf[64]; 2952 2953 for(i = 0; i < s->nb_temps; i++) { 2954 ts = &s->temps[i]; 2955 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 2956 switch(ts->val_type) { 2957 case TEMP_VAL_REG: 2958 printf("%s", tcg_target_reg_names[ts->reg]); 2959 break; 2960 case TEMP_VAL_MEM: 2961 printf("%d(%s)", (int)ts->mem_offset, 2962 tcg_target_reg_names[ts->mem_base->reg]); 2963 break; 2964 case TEMP_VAL_CONST: 2965 printf("$0x%" TCG_PRIlx, ts->val); 2966 break; 2967 case TEMP_VAL_DEAD: 2968 printf("D"); 2969 break; 2970 default: 2971 printf("???"); 2972 break; 2973 } 2974 printf("\n"); 2975 } 2976 2977 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 2978 if (s->reg_to_temp[i] != NULL) { 2979 printf("%s: %s\n", 2980 tcg_target_reg_names[i], 2981 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 2982 } 2983 } 2984 } 2985 2986 static void check_regs(TCGContext *s) 2987 { 2988 int reg; 2989 int k; 2990 TCGTemp *ts; 2991 char buf[64]; 2992 2993 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 2994 ts = s->reg_to_temp[reg]; 2995 if (ts != NULL) { 2996 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 2997 printf("Inconsistency for register %s:\n", 2998 tcg_target_reg_names[reg]); 2999 goto fail; 3000 } 3001 } 3002 } 3003 for (k = 0; k < s->nb_temps; k++) { 3004 ts = &s->temps[k]; 3005 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg 3006 && s->reg_to_temp[ts->reg] != ts) { 3007 printf("Inconsistency for temp %s:\n", 3008 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3009 fail: 3010 printf("reg state:\n"); 3011 dump_regs(s); 3012 tcg_abort(); 3013 } 3014 } 3015 } 3016 #endif 3017 3018 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3019 { 3020 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 3021 /* Sparc64 stack is accessed with offset of 2047 */ 3022 s->current_frame_offset = (s->current_frame_offset + 3023 (tcg_target_long)sizeof(tcg_target_long) - 1) & 3024 ~(sizeof(tcg_target_long) - 1); 3025 #endif 3026 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 3027 s->frame_end) { 3028 tcg_abort(); 3029 } 3030 ts->mem_offset = s->current_frame_offset; 3031 ts->mem_base = s->frame_temp; 3032 ts->mem_allocated = 1; 3033 s->current_frame_offset += sizeof(tcg_target_long); 3034 } 3035 3036 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3037 3038 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3039 mark it free; otherwise mark it dead. */ 3040 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3041 { 3042 if (ts->fixed_reg) { 3043 return; 3044 } 3045 if (ts->val_type == TEMP_VAL_REG) { 3046 s->reg_to_temp[ts->reg] = NULL; 3047 } 3048 ts->val_type = (free_or_dead < 0 3049 || ts->temp_local 3050 || ts->temp_global 3051 ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 3052 } 3053 3054 /* Mark a temporary as dead. */ 3055 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3056 { 3057 temp_free_or_dead(s, ts, 1); 3058 } 3059 3060 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3061 registers needs to be allocated to store a constant. If 'free_or_dead' 3062 is non-zero, subsequently release the temporary; if it is positive, the 3063 temp is dead; if it is negative, the temp is free. */ 3064 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3065 TCGRegSet preferred_regs, int free_or_dead) 3066 { 3067 if (ts->fixed_reg) { 3068 return; 3069 } 3070 if (!ts->mem_coherent) { 3071 if (!ts->mem_allocated) { 3072 temp_allocate_frame(s, ts); 3073 } 3074 switch (ts->val_type) { 3075 case TEMP_VAL_CONST: 3076 /* If we're going to free the temp immediately, then we won't 3077 require it later in a register, so attempt to store the 3078 constant to memory directly. */ 3079 if (free_or_dead 3080 && tcg_out_sti(s, ts->type, ts->val, 3081 ts->mem_base->reg, ts->mem_offset)) { 3082 break; 3083 } 3084 temp_load(s, ts, tcg_target_available_regs[ts->type], 3085 allocated_regs, preferred_regs); 3086 /* fallthrough */ 3087 3088 case TEMP_VAL_REG: 3089 tcg_out_st(s, ts->type, ts->reg, 3090 ts->mem_base->reg, ts->mem_offset); 3091 break; 3092 3093 case TEMP_VAL_MEM: 3094 break; 3095 3096 case TEMP_VAL_DEAD: 3097 default: 3098 tcg_abort(); 3099 } 3100 ts->mem_coherent = 1; 3101 } 3102 if (free_or_dead) { 3103 temp_free_or_dead(s, ts, free_or_dead); 3104 } 3105 } 3106 3107 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3108 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3109 { 3110 TCGTemp *ts = s->reg_to_temp[reg]; 3111 if (ts != NULL) { 3112 temp_sync(s, ts, allocated_regs, 0, -1); 3113 } 3114 } 3115 3116 /** 3117 * tcg_reg_alloc: 3118 * @required_regs: Set of registers in which we must allocate. 3119 * @allocated_regs: Set of registers which must be avoided. 3120 * @preferred_regs: Set of registers we should prefer. 3121 * @rev: True if we search the registers in "indirect" order. 3122 * 3123 * The allocated register must be in @required_regs & ~@allocated_regs, 3124 * but if we can put it in @preferred_regs we may save a move later. 3125 */ 3126 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3127 TCGRegSet allocated_regs, 3128 TCGRegSet preferred_regs, bool rev) 3129 { 3130 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3131 TCGRegSet reg_ct[2]; 3132 const int *order; 3133 3134 reg_ct[1] = required_regs & ~allocated_regs; 3135 tcg_debug_assert(reg_ct[1] != 0); 3136 reg_ct[0] = reg_ct[1] & preferred_regs; 3137 3138 /* Skip the preferred_regs option if it cannot be satisfied, 3139 or if the preference made no difference. */ 3140 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3141 3142 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3143 3144 /* Try free registers, preferences first. */ 3145 for (j = f; j < 2; j++) { 3146 TCGRegSet set = reg_ct[j]; 3147 3148 if (tcg_regset_single(set)) { 3149 /* One register in the set. */ 3150 TCGReg reg = tcg_regset_first(set); 3151 if (s->reg_to_temp[reg] == NULL) { 3152 return reg; 3153 } 3154 } else { 3155 for (i = 0; i < n; i++) { 3156 TCGReg reg = order[i]; 3157 if (s->reg_to_temp[reg] == NULL && 3158 tcg_regset_test_reg(set, reg)) { 3159 return reg; 3160 } 3161 } 3162 } 3163 } 3164 3165 /* We must spill something. */ 3166 for (j = f; j < 2; j++) { 3167 TCGRegSet set = reg_ct[j]; 3168 3169 if (tcg_regset_single(set)) { 3170 /* One register in the set. */ 3171 TCGReg reg = tcg_regset_first(set); 3172 tcg_reg_free(s, reg, allocated_regs); 3173 return reg; 3174 } else { 3175 for (i = 0; i < n; i++) { 3176 TCGReg reg = order[i]; 3177 if (tcg_regset_test_reg(set, reg)) { 3178 tcg_reg_free(s, reg, allocated_regs); 3179 return reg; 3180 } 3181 } 3182 } 3183 } 3184 3185 tcg_abort(); 3186 } 3187 3188 /* Make sure the temporary is in a register. If needed, allocate the register 3189 from DESIRED while avoiding ALLOCATED. */ 3190 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3191 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3192 { 3193 TCGReg reg; 3194 3195 switch (ts->val_type) { 3196 case TEMP_VAL_REG: 3197 return; 3198 case TEMP_VAL_CONST: 3199 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3200 preferred_regs, ts->indirect_base); 3201 tcg_out_movi(s, ts->type, reg, ts->val); 3202 ts->mem_coherent = 0; 3203 break; 3204 case TEMP_VAL_MEM: 3205 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3206 preferred_regs, ts->indirect_base); 3207 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3208 ts->mem_coherent = 1; 3209 break; 3210 case TEMP_VAL_DEAD: 3211 default: 3212 tcg_abort(); 3213 } 3214 ts->reg = reg; 3215 ts->val_type = TEMP_VAL_REG; 3216 s->reg_to_temp[reg] = ts; 3217 } 3218 3219 /* Save a temporary to memory. 'allocated_regs' is used in case a 3220 temporary registers needs to be allocated to store a constant. */ 3221 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3222 { 3223 /* The liveness analysis already ensures that globals are back 3224 in memory. Keep an tcg_debug_assert for safety. */ 3225 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); 3226 } 3227 3228 /* save globals to their canonical location and assume they can be 3229 modified be the following code. 'allocated_regs' is used in case a 3230 temporary registers needs to be allocated to store a constant. */ 3231 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3232 { 3233 int i, n; 3234 3235 for (i = 0, n = s->nb_globals; i < n; i++) { 3236 temp_save(s, &s->temps[i], allocated_regs); 3237 } 3238 } 3239 3240 /* sync globals to their canonical location and assume they can be 3241 read by the following code. 'allocated_regs' is used in case a 3242 temporary registers needs to be allocated to store a constant. */ 3243 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3244 { 3245 int i, n; 3246 3247 for (i = 0, n = s->nb_globals; i < n; i++) { 3248 TCGTemp *ts = &s->temps[i]; 3249 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3250 || ts->fixed_reg 3251 || ts->mem_coherent); 3252 } 3253 } 3254 3255 /* at the end of a basic block, we assume all temporaries are dead and 3256 all globals are stored at their canonical location. */ 3257 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3258 { 3259 int i; 3260 3261 for (i = s->nb_globals; i < s->nb_temps; i++) { 3262 TCGTemp *ts = &s->temps[i]; 3263 if (ts->temp_local) { 3264 temp_save(s, ts, allocated_regs); 3265 } else { 3266 /* The liveness analysis already ensures that temps are dead. 3267 Keep an tcg_debug_assert for safety. */ 3268 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3269 } 3270 } 3271 3272 save_globals(s, allocated_regs); 3273 } 3274 3275 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3276 tcg_target_ulong val, TCGLifeData arg_life, 3277 TCGRegSet preferred_regs) 3278 { 3279 if (ots->fixed_reg) { 3280 /* For fixed registers, we do not do any constant propagation. */ 3281 tcg_out_movi(s, ots->type, ots->reg, val); 3282 return; 3283 } 3284 3285 /* The movi is not explicitly generated here. */ 3286 if (ots->val_type == TEMP_VAL_REG) { 3287 s->reg_to_temp[ots->reg] = NULL; 3288 } 3289 ots->val_type = TEMP_VAL_CONST; 3290 ots->val = val; 3291 ots->mem_coherent = 0; 3292 if (NEED_SYNC_ARG(0)) { 3293 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3294 } else if (IS_DEAD_ARG(0)) { 3295 temp_dead(s, ots); 3296 } 3297 } 3298 3299 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op) 3300 { 3301 TCGTemp *ots = arg_temp(op->args[0]); 3302 tcg_target_ulong val = op->args[1]; 3303 3304 tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]); 3305 } 3306 3307 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3308 { 3309 const TCGLifeData arg_life = op->life; 3310 TCGRegSet allocated_regs, preferred_regs; 3311 TCGTemp *ts, *ots; 3312 TCGType otype, itype; 3313 3314 allocated_regs = s->reserved_regs; 3315 preferred_regs = op->output_pref[0]; 3316 ots = arg_temp(op->args[0]); 3317 ts = arg_temp(op->args[1]); 3318 3319 /* Note that otype != itype for no-op truncation. */ 3320 otype = ots->type; 3321 itype = ts->type; 3322 3323 if (ts->val_type == TEMP_VAL_CONST) { 3324 /* propagate constant or generate sti */ 3325 tcg_target_ulong val = ts->val; 3326 if (IS_DEAD_ARG(1)) { 3327 temp_dead(s, ts); 3328 } 3329 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3330 return; 3331 } 3332 3333 /* If the source value is in memory we're going to be forced 3334 to have it in a register in order to perform the copy. Copy 3335 the SOURCE value into its own register first, that way we 3336 don't have to reload SOURCE the next time it is used. */ 3337 if (ts->val_type == TEMP_VAL_MEM) { 3338 temp_load(s, ts, tcg_target_available_regs[itype], 3339 allocated_regs, preferred_regs); 3340 } 3341 3342 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3343 if (IS_DEAD_ARG(0) && !ots->fixed_reg) { 3344 /* mov to a non-saved dead register makes no sense (even with 3345 liveness analysis disabled). */ 3346 tcg_debug_assert(NEED_SYNC_ARG(0)); 3347 if (!ots->mem_allocated) { 3348 temp_allocate_frame(s, ots); 3349 } 3350 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 3351 if (IS_DEAD_ARG(1)) { 3352 temp_dead(s, ts); 3353 } 3354 temp_dead(s, ots); 3355 } else { 3356 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { 3357 /* the mov can be suppressed */ 3358 if (ots->val_type == TEMP_VAL_REG) { 3359 s->reg_to_temp[ots->reg] = NULL; 3360 } 3361 ots->reg = ts->reg; 3362 temp_dead(s, ts); 3363 } else { 3364 if (ots->val_type != TEMP_VAL_REG) { 3365 /* When allocating a new register, make sure to not spill the 3366 input one. */ 3367 tcg_regset_set_reg(allocated_regs, ts->reg); 3368 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3369 allocated_regs, preferred_regs, 3370 ots->indirect_base); 3371 } 3372 tcg_out_mov(s, otype, ots->reg, ts->reg); 3373 } 3374 ots->val_type = TEMP_VAL_REG; 3375 ots->mem_coherent = 0; 3376 s->reg_to_temp[ots->reg] = ots; 3377 if (NEED_SYNC_ARG(0)) { 3378 temp_sync(s, ots, allocated_regs, 0, 0); 3379 } 3380 } 3381 } 3382 3383 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 3384 { 3385 const TCGLifeData arg_life = op->life; 3386 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 3387 TCGRegSet i_allocated_regs; 3388 TCGRegSet o_allocated_regs; 3389 int i, k, nb_iargs, nb_oargs; 3390 TCGReg reg; 3391 TCGArg arg; 3392 const TCGArgConstraint *arg_ct; 3393 TCGTemp *ts; 3394 TCGArg new_args[TCG_MAX_OP_ARGS]; 3395 int const_args[TCG_MAX_OP_ARGS]; 3396 3397 nb_oargs = def->nb_oargs; 3398 nb_iargs = def->nb_iargs; 3399 3400 /* copy constants */ 3401 memcpy(new_args + nb_oargs + nb_iargs, 3402 op->args + nb_oargs + nb_iargs, 3403 sizeof(TCGArg) * def->nb_cargs); 3404 3405 i_allocated_regs = s->reserved_regs; 3406 o_allocated_regs = s->reserved_regs; 3407 3408 /* satisfy input constraints */ 3409 for (k = 0; k < nb_iargs; k++) { 3410 TCGRegSet i_preferred_regs, o_preferred_regs; 3411 3412 i = def->sorted_args[nb_oargs + k]; 3413 arg = op->args[i]; 3414 arg_ct = &def->args_ct[i]; 3415 ts = arg_temp(arg); 3416 3417 if (ts->val_type == TEMP_VAL_CONST 3418 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 3419 /* constant is OK for instruction */ 3420 const_args[i] = 1; 3421 new_args[i] = ts->val; 3422 continue; 3423 } 3424 3425 i_preferred_regs = o_preferred_regs = 0; 3426 if (arg_ct->ct & TCG_CT_IALIAS) { 3427 o_preferred_regs = op->output_pref[arg_ct->alias_index]; 3428 if (ts->fixed_reg) { 3429 /* if fixed register, we must allocate a new register 3430 if the alias is not the same register */ 3431 if (arg != op->args[arg_ct->alias_index]) { 3432 goto allocate_in_reg; 3433 } 3434 } else { 3435 /* if the input is aliased to an output and if it is 3436 not dead after the instruction, we must allocate 3437 a new register and move it */ 3438 if (!IS_DEAD_ARG(i)) { 3439 goto allocate_in_reg; 3440 } 3441 3442 /* check if the current register has already been allocated 3443 for another input aliased to an output */ 3444 if (ts->val_type == TEMP_VAL_REG) { 3445 int k2, i2; 3446 reg = ts->reg; 3447 for (k2 = 0 ; k2 < k ; k2++) { 3448 i2 = def->sorted_args[nb_oargs + k2]; 3449 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && 3450 reg == new_args[i2]) { 3451 goto allocate_in_reg; 3452 } 3453 } 3454 } 3455 i_preferred_regs = o_preferred_regs; 3456 } 3457 } 3458 3459 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs); 3460 reg = ts->reg; 3461 3462 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { 3463 /* nothing to do : the constraint is satisfied */ 3464 } else { 3465 allocate_in_reg: 3466 /* allocate a new register matching the constraint 3467 and move the temporary register into it */ 3468 temp_load(s, ts, tcg_target_available_regs[ts->type], 3469 i_allocated_regs, 0); 3470 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, 3471 o_preferred_regs, ts->indirect_base); 3472 tcg_out_mov(s, ts->type, reg, ts->reg); 3473 } 3474 new_args[i] = reg; 3475 const_args[i] = 0; 3476 tcg_regset_set_reg(i_allocated_regs, reg); 3477 } 3478 3479 /* mark dead temporaries and free the associated registers */ 3480 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3481 if (IS_DEAD_ARG(i)) { 3482 temp_dead(s, arg_temp(op->args[i])); 3483 } 3484 } 3485 3486 if (def->flags & TCG_OPF_BB_END) { 3487 tcg_reg_alloc_bb_end(s, i_allocated_regs); 3488 } else { 3489 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3490 /* XXX: permit generic clobber register list ? */ 3491 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3492 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3493 tcg_reg_free(s, i, i_allocated_regs); 3494 } 3495 } 3496 } 3497 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3498 /* sync globals if the op has side effects and might trigger 3499 an exception. */ 3500 sync_globals(s, i_allocated_regs); 3501 } 3502 3503 /* satisfy the output constraints */ 3504 for(k = 0; k < nb_oargs; k++) { 3505 i = def->sorted_args[k]; 3506 arg = op->args[i]; 3507 arg_ct = &def->args_ct[i]; 3508 ts = arg_temp(arg); 3509 if ((arg_ct->ct & TCG_CT_ALIAS) 3510 && !const_args[arg_ct->alias_index]) { 3511 reg = new_args[arg_ct->alias_index]; 3512 } else if (arg_ct->ct & TCG_CT_NEWREG) { 3513 reg = tcg_reg_alloc(s, arg_ct->u.regs, 3514 i_allocated_regs | o_allocated_regs, 3515 op->output_pref[k], ts->indirect_base); 3516 } else { 3517 /* if fixed register, we try to use it */ 3518 reg = ts->reg; 3519 if (ts->fixed_reg && 3520 tcg_regset_test_reg(arg_ct->u.regs, reg)) { 3521 goto oarg_end; 3522 } 3523 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, 3524 op->output_pref[k], ts->indirect_base); 3525 } 3526 tcg_regset_set_reg(o_allocated_regs, reg); 3527 /* if a fixed register is used, then a move will be done afterwards */ 3528 if (!ts->fixed_reg) { 3529 if (ts->val_type == TEMP_VAL_REG) { 3530 s->reg_to_temp[ts->reg] = NULL; 3531 } 3532 ts->val_type = TEMP_VAL_REG; 3533 ts->reg = reg; 3534 /* temp value is modified, so the value kept in memory is 3535 potentially not the same */ 3536 ts->mem_coherent = 0; 3537 s->reg_to_temp[reg] = ts; 3538 } 3539 oarg_end: 3540 new_args[i] = reg; 3541 } 3542 } 3543 3544 /* emit instruction */ 3545 if (def->flags & TCG_OPF_VECTOR) { 3546 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 3547 new_args, const_args); 3548 } else { 3549 tcg_out_op(s, op->opc, new_args, const_args); 3550 } 3551 3552 /* move the outputs in the correct register if needed */ 3553 for(i = 0; i < nb_oargs; i++) { 3554 ts = arg_temp(op->args[i]); 3555 reg = new_args[i]; 3556 if (ts->fixed_reg && ts->reg != reg) { 3557 tcg_out_mov(s, ts->type, ts->reg, reg); 3558 } 3559 if (NEED_SYNC_ARG(i)) { 3560 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 3561 } else if (IS_DEAD_ARG(i)) { 3562 temp_dead(s, ts); 3563 } 3564 } 3565 } 3566 3567 #ifdef TCG_TARGET_STACK_GROWSUP 3568 #define STACK_DIR(x) (-(x)) 3569 #else 3570 #define STACK_DIR(x) (x) 3571 #endif 3572 3573 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 3574 { 3575 const int nb_oargs = TCGOP_CALLO(op); 3576 const int nb_iargs = TCGOP_CALLI(op); 3577 const TCGLifeData arg_life = op->life; 3578 int flags, nb_regs, i; 3579 TCGReg reg; 3580 TCGArg arg; 3581 TCGTemp *ts; 3582 intptr_t stack_offset; 3583 size_t call_stack_size; 3584 tcg_insn_unit *func_addr; 3585 int allocate_args; 3586 TCGRegSet allocated_regs; 3587 3588 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; 3589 flags = op->args[nb_oargs + nb_iargs + 1]; 3590 3591 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 3592 if (nb_regs > nb_iargs) { 3593 nb_regs = nb_iargs; 3594 } 3595 3596 /* assign stack slots first */ 3597 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 3598 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 3599 ~(TCG_TARGET_STACK_ALIGN - 1); 3600 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 3601 if (allocate_args) { 3602 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 3603 preallocate call stack */ 3604 tcg_abort(); 3605 } 3606 3607 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 3608 for (i = nb_regs; i < nb_iargs; i++) { 3609 arg = op->args[nb_oargs + i]; 3610 #ifdef TCG_TARGET_STACK_GROWSUP 3611 stack_offset -= sizeof(tcg_target_long); 3612 #endif 3613 if (arg != TCG_CALL_DUMMY_ARG) { 3614 ts = arg_temp(arg); 3615 temp_load(s, ts, tcg_target_available_regs[ts->type], 3616 s->reserved_regs, 0); 3617 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 3618 } 3619 #ifndef TCG_TARGET_STACK_GROWSUP 3620 stack_offset += sizeof(tcg_target_long); 3621 #endif 3622 } 3623 3624 /* assign input registers */ 3625 allocated_regs = s->reserved_regs; 3626 for (i = 0; i < nb_regs; i++) { 3627 arg = op->args[nb_oargs + i]; 3628 if (arg != TCG_CALL_DUMMY_ARG) { 3629 ts = arg_temp(arg); 3630 reg = tcg_target_call_iarg_regs[i]; 3631 3632 if (ts->val_type == TEMP_VAL_REG) { 3633 if (ts->reg != reg) { 3634 tcg_reg_free(s, reg, allocated_regs); 3635 tcg_out_mov(s, ts->type, reg, ts->reg); 3636 } 3637 } else { 3638 TCGRegSet arg_set = 0; 3639 3640 tcg_reg_free(s, reg, allocated_regs); 3641 tcg_regset_set_reg(arg_set, reg); 3642 temp_load(s, ts, arg_set, allocated_regs, 0); 3643 } 3644 3645 tcg_regset_set_reg(allocated_regs, reg); 3646 } 3647 } 3648 3649 /* mark dead temporaries and free the associated registers */ 3650 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3651 if (IS_DEAD_ARG(i)) { 3652 temp_dead(s, arg_temp(op->args[i])); 3653 } 3654 } 3655 3656 /* clobber call registers */ 3657 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3658 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3659 tcg_reg_free(s, i, allocated_regs); 3660 } 3661 } 3662 3663 /* Save globals if they might be written by the helper, sync them if 3664 they might be read. */ 3665 if (flags & TCG_CALL_NO_READ_GLOBALS) { 3666 /* Nothing to do */ 3667 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 3668 sync_globals(s, allocated_regs); 3669 } else { 3670 save_globals(s, allocated_regs); 3671 } 3672 3673 tcg_out_call(s, func_addr); 3674 3675 /* assign output registers and emit moves if needed */ 3676 for(i = 0; i < nb_oargs; i++) { 3677 arg = op->args[i]; 3678 ts = arg_temp(arg); 3679 reg = tcg_target_call_oarg_regs[i]; 3680 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3681 3682 if (ts->fixed_reg) { 3683 if (ts->reg != reg) { 3684 tcg_out_mov(s, ts->type, ts->reg, reg); 3685 } 3686 } else { 3687 if (ts->val_type == TEMP_VAL_REG) { 3688 s->reg_to_temp[ts->reg] = NULL; 3689 } 3690 ts->val_type = TEMP_VAL_REG; 3691 ts->reg = reg; 3692 ts->mem_coherent = 0; 3693 s->reg_to_temp[reg] = ts; 3694 if (NEED_SYNC_ARG(i)) { 3695 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); 3696 } else if (IS_DEAD_ARG(i)) { 3697 temp_dead(s, ts); 3698 } 3699 } 3700 } 3701 } 3702 3703 #ifdef CONFIG_PROFILER 3704 3705 /* avoid copy/paste errors */ 3706 #define PROF_ADD(to, from, field) \ 3707 do { \ 3708 (to)->field += atomic_read(&((from)->field)); \ 3709 } while (0) 3710 3711 #define PROF_MAX(to, from, field) \ 3712 do { \ 3713 typeof((from)->field) val__ = atomic_read(&((from)->field)); \ 3714 if (val__ > (to)->field) { \ 3715 (to)->field = val__; \ 3716 } \ 3717 } while (0) 3718 3719 /* Pass in a zero'ed @prof */ 3720 static inline 3721 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 3722 { 3723 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 3724 unsigned int i; 3725 3726 for (i = 0; i < n_ctxs; i++) { 3727 TCGContext *s = atomic_read(&tcg_ctxs[i]); 3728 const TCGProfile *orig = &s->prof; 3729 3730 if (counters) { 3731 PROF_ADD(prof, orig, cpu_exec_time); 3732 PROF_ADD(prof, orig, tb_count1); 3733 PROF_ADD(prof, orig, tb_count); 3734 PROF_ADD(prof, orig, op_count); 3735 PROF_MAX(prof, orig, op_count_max); 3736 PROF_ADD(prof, orig, temp_count); 3737 PROF_MAX(prof, orig, temp_count_max); 3738 PROF_ADD(prof, orig, del_op_count); 3739 PROF_ADD(prof, orig, code_in_len); 3740 PROF_ADD(prof, orig, code_out_len); 3741 PROF_ADD(prof, orig, search_out_len); 3742 PROF_ADD(prof, orig, interm_time); 3743 PROF_ADD(prof, orig, code_time); 3744 PROF_ADD(prof, orig, la_time); 3745 PROF_ADD(prof, orig, opt_time); 3746 PROF_ADD(prof, orig, restore_count); 3747 PROF_ADD(prof, orig, restore_time); 3748 } 3749 if (table) { 3750 int i; 3751 3752 for (i = 0; i < NB_OPS; i++) { 3753 PROF_ADD(prof, orig, table_op_count[i]); 3754 } 3755 } 3756 } 3757 } 3758 3759 #undef PROF_ADD 3760 #undef PROF_MAX 3761 3762 static void tcg_profile_snapshot_counters(TCGProfile *prof) 3763 { 3764 tcg_profile_snapshot(prof, true, false); 3765 } 3766 3767 static void tcg_profile_snapshot_table(TCGProfile *prof) 3768 { 3769 tcg_profile_snapshot(prof, false, true); 3770 } 3771 3772 void tcg_dump_op_count(void) 3773 { 3774 TCGProfile prof = {}; 3775 int i; 3776 3777 tcg_profile_snapshot_table(&prof); 3778 for (i = 0; i < NB_OPS; i++) { 3779 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name, 3780 prof.table_op_count[i]); 3781 } 3782 } 3783 3784 int64_t tcg_cpu_exec_time(void) 3785 { 3786 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 3787 unsigned int i; 3788 int64_t ret = 0; 3789 3790 for (i = 0; i < n_ctxs; i++) { 3791 const TCGContext *s = atomic_read(&tcg_ctxs[i]); 3792 const TCGProfile *prof = &s->prof; 3793 3794 ret += atomic_read(&prof->cpu_exec_time); 3795 } 3796 return ret; 3797 } 3798 #else 3799 void tcg_dump_op_count(void) 3800 { 3801 qemu_printf("[TCG profiler not compiled]\n"); 3802 } 3803 3804 int64_t tcg_cpu_exec_time(void) 3805 { 3806 error_report("%s: TCG profiler not compiled", __func__); 3807 exit(EXIT_FAILURE); 3808 } 3809 #endif 3810 3811 3812 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 3813 { 3814 #ifdef CONFIG_PROFILER 3815 TCGProfile *prof = &s->prof; 3816 #endif 3817 int i, num_insns; 3818 TCGOp *op; 3819 3820 #ifdef CONFIG_PROFILER 3821 { 3822 int n = 0; 3823 3824 QTAILQ_FOREACH(op, &s->ops, link) { 3825 n++; 3826 } 3827 atomic_set(&prof->op_count, prof->op_count + n); 3828 if (n > prof->op_count_max) { 3829 atomic_set(&prof->op_count_max, n); 3830 } 3831 3832 n = s->nb_temps; 3833 atomic_set(&prof->temp_count, prof->temp_count + n); 3834 if (n > prof->temp_count_max) { 3835 atomic_set(&prof->temp_count_max, n); 3836 } 3837 } 3838 #endif 3839 3840 #ifdef DEBUG_DISAS 3841 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 3842 && qemu_log_in_addr_range(tb->pc))) { 3843 qemu_log_lock(); 3844 qemu_log("OP:\n"); 3845 tcg_dump_ops(s, false); 3846 qemu_log("\n"); 3847 qemu_log_unlock(); 3848 } 3849 #endif 3850 3851 #ifdef CONFIG_DEBUG_TCG 3852 /* Ensure all labels referenced have been emitted. */ 3853 { 3854 TCGLabel *l; 3855 bool error = false; 3856 3857 QSIMPLEQ_FOREACH(l, &s->labels, next) { 3858 if (unlikely(!l->present) && l->refs) { 3859 qemu_log_mask(CPU_LOG_TB_OP, 3860 "$L%d referenced but not present.\n", l->id); 3861 error = true; 3862 } 3863 } 3864 assert(!error); 3865 } 3866 #endif 3867 3868 #ifdef CONFIG_PROFILER 3869 atomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 3870 #endif 3871 3872 #ifdef USE_TCG_OPTIMIZATIONS 3873 tcg_optimize(s); 3874 #endif 3875 3876 #ifdef CONFIG_PROFILER 3877 atomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 3878 atomic_set(&prof->la_time, prof->la_time - profile_getclock()); 3879 #endif 3880 3881 reachable_code_pass(s); 3882 liveness_pass_1(s); 3883 3884 if (s->nb_indirects > 0) { 3885 #ifdef DEBUG_DISAS 3886 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 3887 && qemu_log_in_addr_range(tb->pc))) { 3888 qemu_log_lock(); 3889 qemu_log("OP before indirect lowering:\n"); 3890 tcg_dump_ops(s, false); 3891 qemu_log("\n"); 3892 qemu_log_unlock(); 3893 } 3894 #endif 3895 /* Replace indirect temps with direct temps. */ 3896 if (liveness_pass_2(s)) { 3897 /* If changes were made, re-run liveness. */ 3898 liveness_pass_1(s); 3899 } 3900 } 3901 3902 #ifdef CONFIG_PROFILER 3903 atomic_set(&prof->la_time, prof->la_time + profile_getclock()); 3904 #endif 3905 3906 #ifdef DEBUG_DISAS 3907 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 3908 && qemu_log_in_addr_range(tb->pc))) { 3909 qemu_log_lock(); 3910 qemu_log("OP after optimization and liveness analysis:\n"); 3911 tcg_dump_ops(s, true); 3912 qemu_log("\n"); 3913 qemu_log_unlock(); 3914 } 3915 #endif 3916 3917 tcg_reg_alloc_start(s); 3918 3919 s->code_buf = tb->tc.ptr; 3920 s->code_ptr = tb->tc.ptr; 3921 3922 #ifdef TCG_TARGET_NEED_LDST_LABELS 3923 QSIMPLEQ_INIT(&s->ldst_labels); 3924 #endif 3925 #ifdef TCG_TARGET_NEED_POOL_LABELS 3926 s->pool_labels = NULL; 3927 #endif 3928 3929 num_insns = -1; 3930 QTAILQ_FOREACH(op, &s->ops, link) { 3931 TCGOpcode opc = op->opc; 3932 3933 #ifdef CONFIG_PROFILER 3934 atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 3935 #endif 3936 3937 switch (opc) { 3938 case INDEX_op_mov_i32: 3939 case INDEX_op_mov_i64: 3940 case INDEX_op_mov_vec: 3941 tcg_reg_alloc_mov(s, op); 3942 break; 3943 case INDEX_op_movi_i32: 3944 case INDEX_op_movi_i64: 3945 case INDEX_op_dupi_vec: 3946 tcg_reg_alloc_movi(s, op); 3947 break; 3948 case INDEX_op_insn_start: 3949 if (num_insns >= 0) { 3950 size_t off = tcg_current_code_size(s); 3951 s->gen_insn_end_off[num_insns] = off; 3952 /* Assert that we do not overflow our stored offset. */ 3953 assert(s->gen_insn_end_off[num_insns] == off); 3954 } 3955 num_insns++; 3956 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 3957 target_ulong a; 3958 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 3959 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 3960 #else 3961 a = op->args[i]; 3962 #endif 3963 s->gen_insn_data[num_insns][i] = a; 3964 } 3965 break; 3966 case INDEX_op_discard: 3967 temp_dead(s, arg_temp(op->args[0])); 3968 break; 3969 case INDEX_op_set_label: 3970 tcg_reg_alloc_bb_end(s, s->reserved_regs); 3971 tcg_out_label(s, arg_label(op->args[0]), s->code_ptr); 3972 break; 3973 case INDEX_op_call: 3974 tcg_reg_alloc_call(s, op); 3975 break; 3976 default: 3977 /* Sanity check that we've not introduced any unhandled opcodes. */ 3978 tcg_debug_assert(tcg_op_supported(opc)); 3979 /* Note: in order to speed up the code, it would be much 3980 faster to have specialized register allocator functions for 3981 some common argument patterns */ 3982 tcg_reg_alloc_op(s, op); 3983 break; 3984 } 3985 #ifdef CONFIG_DEBUG_TCG 3986 check_regs(s); 3987 #endif 3988 /* Test for (pending) buffer overflow. The assumption is that any 3989 one operation beginning below the high water mark cannot overrun 3990 the buffer completely. Thus we can test for overflow after 3991 generating code without having to check during generation. */ 3992 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 3993 return -1; 3994 } 3995 } 3996 tcg_debug_assert(num_insns >= 0); 3997 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 3998 3999 /* Generate TB finalization at the end of block */ 4000 #ifdef TCG_TARGET_NEED_LDST_LABELS 4001 if (!tcg_out_ldst_finalize(s)) { 4002 return -1; 4003 } 4004 #endif 4005 #ifdef TCG_TARGET_NEED_POOL_LABELS 4006 if (!tcg_out_pool_finalize(s)) { 4007 return -1; 4008 } 4009 #endif 4010 4011 /* flush instruction cache */ 4012 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); 4013 4014 return tcg_current_code_size(s); 4015 } 4016 4017 #ifdef CONFIG_PROFILER 4018 void tcg_dump_info(void) 4019 { 4020 TCGProfile prof = {}; 4021 const TCGProfile *s; 4022 int64_t tb_count; 4023 int64_t tb_div_count; 4024 int64_t tot; 4025 4026 tcg_profile_snapshot_counters(&prof); 4027 s = &prof; 4028 tb_count = s->tb_count; 4029 tb_div_count = tb_count ? tb_count : 1; 4030 tot = s->interm_time + s->code_time; 4031 4032 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 4033 tot, tot / 2.4e9); 4034 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64 4035 " %0.1f%%)\n", 4036 tb_count, s->tb_count1 - tb_count, 4037 (double)(s->tb_count1 - s->tb_count) 4038 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 4039 qemu_printf("avg ops/TB %0.1f max=%d\n", 4040 (double)s->op_count / tb_div_count, s->op_count_max); 4041 qemu_printf("deleted ops/TB %0.2f\n", 4042 (double)s->del_op_count / tb_div_count); 4043 qemu_printf("avg temps/TB %0.2f max=%d\n", 4044 (double)s->temp_count / tb_div_count, s->temp_count_max); 4045 qemu_printf("avg host code/TB %0.1f\n", 4046 (double)s->code_out_len / tb_div_count); 4047 qemu_printf("avg search data/TB %0.1f\n", 4048 (double)s->search_out_len / tb_div_count); 4049 4050 qemu_printf("cycles/op %0.1f\n", 4051 s->op_count ? (double)tot / s->op_count : 0); 4052 qemu_printf("cycles/in byte %0.1f\n", 4053 s->code_in_len ? (double)tot / s->code_in_len : 0); 4054 qemu_printf("cycles/out byte %0.1f\n", 4055 s->code_out_len ? (double)tot / s->code_out_len : 0); 4056 qemu_printf("cycles/search byte %0.1f\n", 4057 s->search_out_len ? (double)tot / s->search_out_len : 0); 4058 if (tot == 0) { 4059 tot = 1; 4060 } 4061 qemu_printf(" gen_interm time %0.1f%%\n", 4062 (double)s->interm_time / tot * 100.0); 4063 qemu_printf(" gen_code time %0.1f%%\n", 4064 (double)s->code_time / tot * 100.0); 4065 qemu_printf("optim./code time %0.1f%%\n", 4066 (double)s->opt_time / (s->code_time ? s->code_time : 1) 4067 * 100.0); 4068 qemu_printf("liveness/code time %0.1f%%\n", 4069 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 4070 qemu_printf("cpu_restore count %" PRId64 "\n", 4071 s->restore_count); 4072 qemu_printf(" avg cycles %0.1f\n", 4073 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 4074 } 4075 #else 4076 void tcg_dump_info(void) 4077 { 4078 qemu_printf("[TCG profiler not compiled]\n"); 4079 } 4080 #endif 4081 4082 #ifdef ELF_HOST_MACHINE 4083 /* In order to use this feature, the backend needs to do three things: 4084 4085 (1) Define ELF_HOST_MACHINE to indicate both what value to 4086 put into the ELF image and to indicate support for the feature. 4087 4088 (2) Define tcg_register_jit. This should create a buffer containing 4089 the contents of a .debug_frame section that describes the post- 4090 prologue unwind info for the tcg machine. 4091 4092 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 4093 */ 4094 4095 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 4096 typedef enum { 4097 JIT_NOACTION = 0, 4098 JIT_REGISTER_FN, 4099 JIT_UNREGISTER_FN 4100 } jit_actions_t; 4101 4102 struct jit_code_entry { 4103 struct jit_code_entry *next_entry; 4104 struct jit_code_entry *prev_entry; 4105 const void *symfile_addr; 4106 uint64_t symfile_size; 4107 }; 4108 4109 struct jit_descriptor { 4110 uint32_t version; 4111 uint32_t action_flag; 4112 struct jit_code_entry *relevant_entry; 4113 struct jit_code_entry *first_entry; 4114 }; 4115 4116 void __jit_debug_register_code(void) __attribute__((noinline)); 4117 void __jit_debug_register_code(void) 4118 { 4119 asm(""); 4120 } 4121 4122 /* Must statically initialize the version, because GDB may check 4123 the version before we can set it. */ 4124 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 4125 4126 /* End GDB interface. */ 4127 4128 static int find_string(const char *strtab, const char *str) 4129 { 4130 const char *p = strtab + 1; 4131 4132 while (1) { 4133 if (strcmp(p, str) == 0) { 4134 return p - strtab; 4135 } 4136 p += strlen(p) + 1; 4137 } 4138 } 4139 4140 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, 4141 const void *debug_frame, 4142 size_t debug_frame_size) 4143 { 4144 struct __attribute__((packed)) DebugInfo { 4145 uint32_t len; 4146 uint16_t version; 4147 uint32_t abbrev; 4148 uint8_t ptr_size; 4149 uint8_t cu_die; 4150 uint16_t cu_lang; 4151 uintptr_t cu_low_pc; 4152 uintptr_t cu_high_pc; 4153 uint8_t fn_die; 4154 char fn_name[16]; 4155 uintptr_t fn_low_pc; 4156 uintptr_t fn_high_pc; 4157 uint8_t cu_eoc; 4158 }; 4159 4160 struct ElfImage { 4161 ElfW(Ehdr) ehdr; 4162 ElfW(Phdr) phdr; 4163 ElfW(Shdr) shdr[7]; 4164 ElfW(Sym) sym[2]; 4165 struct DebugInfo di; 4166 uint8_t da[24]; 4167 char str[80]; 4168 }; 4169 4170 struct ElfImage *img; 4171 4172 static const struct ElfImage img_template = { 4173 .ehdr = { 4174 .e_ident[EI_MAG0] = ELFMAG0, 4175 .e_ident[EI_MAG1] = ELFMAG1, 4176 .e_ident[EI_MAG2] = ELFMAG2, 4177 .e_ident[EI_MAG3] = ELFMAG3, 4178 .e_ident[EI_CLASS] = ELF_CLASS, 4179 .e_ident[EI_DATA] = ELF_DATA, 4180 .e_ident[EI_VERSION] = EV_CURRENT, 4181 .e_type = ET_EXEC, 4182 .e_machine = ELF_HOST_MACHINE, 4183 .e_version = EV_CURRENT, 4184 .e_phoff = offsetof(struct ElfImage, phdr), 4185 .e_shoff = offsetof(struct ElfImage, shdr), 4186 .e_ehsize = sizeof(ElfW(Shdr)), 4187 .e_phentsize = sizeof(ElfW(Phdr)), 4188 .e_phnum = 1, 4189 .e_shentsize = sizeof(ElfW(Shdr)), 4190 .e_shnum = ARRAY_SIZE(img->shdr), 4191 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 4192 #ifdef ELF_HOST_FLAGS 4193 .e_flags = ELF_HOST_FLAGS, 4194 #endif 4195 #ifdef ELF_OSABI 4196 .e_ident[EI_OSABI] = ELF_OSABI, 4197 #endif 4198 }, 4199 .phdr = { 4200 .p_type = PT_LOAD, 4201 .p_flags = PF_X, 4202 }, 4203 .shdr = { 4204 [0] = { .sh_type = SHT_NULL }, 4205 /* Trick: The contents of code_gen_buffer are not present in 4206 this fake ELF file; that got allocated elsewhere. Therefore 4207 we mark .text as SHT_NOBITS (similar to .bss) so that readers 4208 will not look for contents. We can record any address. */ 4209 [1] = { /* .text */ 4210 .sh_type = SHT_NOBITS, 4211 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 4212 }, 4213 [2] = { /* .debug_info */ 4214 .sh_type = SHT_PROGBITS, 4215 .sh_offset = offsetof(struct ElfImage, di), 4216 .sh_size = sizeof(struct DebugInfo), 4217 }, 4218 [3] = { /* .debug_abbrev */ 4219 .sh_type = SHT_PROGBITS, 4220 .sh_offset = offsetof(struct ElfImage, da), 4221 .sh_size = sizeof(img->da), 4222 }, 4223 [4] = { /* .debug_frame */ 4224 .sh_type = SHT_PROGBITS, 4225 .sh_offset = sizeof(struct ElfImage), 4226 }, 4227 [5] = { /* .symtab */ 4228 .sh_type = SHT_SYMTAB, 4229 .sh_offset = offsetof(struct ElfImage, sym), 4230 .sh_size = sizeof(img->sym), 4231 .sh_info = 1, 4232 .sh_link = ARRAY_SIZE(img->shdr) - 1, 4233 .sh_entsize = sizeof(ElfW(Sym)), 4234 }, 4235 [6] = { /* .strtab */ 4236 .sh_type = SHT_STRTAB, 4237 .sh_offset = offsetof(struct ElfImage, str), 4238 .sh_size = sizeof(img->str), 4239 } 4240 }, 4241 .sym = { 4242 [1] = { /* code_gen_buffer */ 4243 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 4244 .st_shndx = 1, 4245 } 4246 }, 4247 .di = { 4248 .len = sizeof(struct DebugInfo) - 4, 4249 .version = 2, 4250 .ptr_size = sizeof(void *), 4251 .cu_die = 1, 4252 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 4253 .fn_die = 2, 4254 .fn_name = "code_gen_buffer" 4255 }, 4256 .da = { 4257 1, /* abbrev number (the cu) */ 4258 0x11, 1, /* DW_TAG_compile_unit, has children */ 4259 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 4260 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4261 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4262 0, 0, /* end of abbrev */ 4263 2, /* abbrev number (the fn) */ 4264 0x2e, 0, /* DW_TAG_subprogram, no children */ 4265 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 4266 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4267 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4268 0, 0, /* end of abbrev */ 4269 0 /* no more abbrev */ 4270 }, 4271 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 4272 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 4273 }; 4274 4275 /* We only need a single jit entry; statically allocate it. */ 4276 static struct jit_code_entry one_entry; 4277 4278 uintptr_t buf = (uintptr_t)buf_ptr; 4279 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 4280 DebugFrameHeader *dfh; 4281 4282 img = g_malloc(img_size); 4283 *img = img_template; 4284 4285 img->phdr.p_vaddr = buf; 4286 img->phdr.p_paddr = buf; 4287 img->phdr.p_memsz = buf_size; 4288 4289 img->shdr[1].sh_name = find_string(img->str, ".text"); 4290 img->shdr[1].sh_addr = buf; 4291 img->shdr[1].sh_size = buf_size; 4292 4293 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 4294 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 4295 4296 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 4297 img->shdr[4].sh_size = debug_frame_size; 4298 4299 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 4300 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 4301 4302 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 4303 img->sym[1].st_value = buf; 4304 img->sym[1].st_size = buf_size; 4305 4306 img->di.cu_low_pc = buf; 4307 img->di.cu_high_pc = buf + buf_size; 4308 img->di.fn_low_pc = buf; 4309 img->di.fn_high_pc = buf + buf_size; 4310 4311 dfh = (DebugFrameHeader *)(img + 1); 4312 memcpy(dfh, debug_frame, debug_frame_size); 4313 dfh->fde.func_start = buf; 4314 dfh->fde.func_len = buf_size; 4315 4316 #ifdef DEBUG_JIT 4317 /* Enable this block to be able to debug the ELF image file creation. 4318 One can use readelf, objdump, or other inspection utilities. */ 4319 { 4320 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 4321 if (f) { 4322 if (fwrite(img, img_size, 1, f) != img_size) { 4323 /* Avoid stupid unused return value warning for fwrite. */ 4324 } 4325 fclose(f); 4326 } 4327 } 4328 #endif 4329 4330 one_entry.symfile_addr = img; 4331 one_entry.symfile_size = img_size; 4332 4333 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 4334 __jit_debug_descriptor.relevant_entry = &one_entry; 4335 __jit_debug_descriptor.first_entry = &one_entry; 4336 __jit_debug_register_code(); 4337 } 4338 #else 4339 /* No support for the feature. Provide the entry point expected by exec.c, 4340 and implement the internal function we declared earlier. */ 4341 4342 static void tcg_register_jit_int(void *buf, size_t size, 4343 const void *debug_frame, 4344 size_t debug_frame_size) 4345 { 4346 } 4347 4348 void tcg_register_jit(void *buf, size_t buf_size) 4349 { 4350 } 4351 #endif /* ELF_HOST_MACHINE */ 4352 4353 #if !TCG_TARGET_MAYBE_vec 4354 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 4355 { 4356 g_assert_not_reached(); 4357 } 4358 #endif 4359