1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/timer.h" 38 #include "qemu/cacheflush.h" 39 40 /* Note: the long term plan is to reduce the dependencies on the QEMU 41 CPU definitions. Currently they are used for qemu_ld/st 42 instructions */ 43 #define NO_CPU_IO_DEFS 44 #include "cpu.h" 45 46 #include "exec/exec-all.h" 47 48 #if !defined(CONFIG_USER_ONLY) 49 #include "hw/boards.h" 50 #endif 51 52 #include "tcg/tcg-op.h" 53 54 #if UINTPTR_MAX == UINT32_MAX 55 # define ELF_CLASS ELFCLASS32 56 #else 57 # define ELF_CLASS ELFCLASS64 58 #endif 59 #ifdef HOST_WORDS_BIGENDIAN 60 # define ELF_DATA ELFDATA2MSB 61 #else 62 # define ELF_DATA ELFDATA2LSB 63 #endif 64 65 #include "elf.h" 66 #include "exec/log.h" 67 #include "sysemu/sysemu.h" 68 69 /* Forward declarations for functions declared in tcg-target.c.inc and 70 used here. */ 71 static void tcg_target_init(TCGContext *s); 72 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); 73 static void tcg_target_qemu_prologue(TCGContext *s); 74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 75 intptr_t value, intptr_t addend); 76 77 /* The CIE and FDE header definitions will be common to all hosts. */ 78 typedef struct { 79 uint32_t len __attribute__((aligned((sizeof(void *))))); 80 uint32_t id; 81 uint8_t version; 82 char augmentation[1]; 83 uint8_t code_align; 84 uint8_t data_align; 85 uint8_t return_column; 86 } DebugFrameCIE; 87 88 typedef struct QEMU_PACKED { 89 uint32_t len __attribute__((aligned((sizeof(void *))))); 90 uint32_t cie_offset; 91 uintptr_t func_start; 92 uintptr_t func_len; 93 } DebugFrameFDEHeader; 94 95 typedef struct QEMU_PACKED { 96 DebugFrameCIE cie; 97 DebugFrameFDEHeader fde; 98 } DebugFrameHeader; 99 100 static void tcg_register_jit_int(const void *buf, size_t size, 101 const void *debug_frame, 102 size_t debug_frame_size) 103 __attribute__((unused)); 104 105 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 106 static const char *target_parse_constraint(TCGArgConstraint *ct, 107 const char *ct_str, TCGType type); 108 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 109 intptr_t arg2); 110 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 111 static void tcg_out_movi(TCGContext *s, TCGType type, 112 TCGReg ret, tcg_target_long arg); 113 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 114 const int *const_args); 115 #if TCG_TARGET_MAYBE_vec 116 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 117 TCGReg dst, TCGReg src); 118 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 119 TCGReg dst, TCGReg base, intptr_t offset); 120 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, 121 TCGReg dst, tcg_target_long arg); 122 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 123 unsigned vece, const TCGArg *args, 124 const int *const_args); 125 #else 126 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 127 TCGReg dst, TCGReg src) 128 { 129 g_assert_not_reached(); 130 } 131 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 132 TCGReg dst, TCGReg base, intptr_t offset) 133 { 134 g_assert_not_reached(); 135 } 136 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, 137 TCGReg dst, tcg_target_long arg) 138 { 139 g_assert_not_reached(); 140 } 141 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 142 unsigned vece, const TCGArg *args, 143 const int *const_args) 144 { 145 g_assert_not_reached(); 146 } 147 #endif 148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 149 intptr_t arg2); 150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 151 TCGReg base, intptr_t ofs); 152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target); 153 static int tcg_target_const_match(tcg_target_long val, TCGType type, 154 const TCGArgConstraint *arg_ct); 155 #ifdef TCG_TARGET_NEED_LDST_LABELS 156 static int tcg_out_ldst_finalize(TCGContext *s); 157 #endif 158 159 #define TCG_HIGHWATER 1024 160 161 static TCGContext **tcg_ctxs; 162 static unsigned int n_tcg_ctxs; 163 TCGv_env cpu_env = 0; 164 const void *tcg_code_gen_epilogue; 165 uintptr_t tcg_splitwx_diff; 166 167 #ifndef CONFIG_TCG_INTERPRETER 168 tcg_prologue_fn *tcg_qemu_tb_exec; 169 #endif 170 171 struct tcg_region_tree { 172 QemuMutex lock; 173 GTree *tree; 174 /* padding to avoid false sharing is computed at run-time */ 175 }; 176 177 /* 178 * We divide code_gen_buffer into equally-sized "regions" that TCG threads 179 * dynamically allocate from as demand dictates. Given appropriate region 180 * sizing, this minimizes flushes even when some TCG threads generate a lot 181 * more code than others. 182 */ 183 struct tcg_region_state { 184 QemuMutex lock; 185 186 /* fields set at init time */ 187 void *start; 188 void *start_aligned; 189 void *end; 190 size_t n; 191 size_t size; /* size of one region */ 192 size_t stride; /* .size + guard size */ 193 194 /* fields protected by the lock */ 195 size_t current; /* current region index */ 196 size_t agg_size_full; /* aggregate size of full regions */ 197 }; 198 199 static struct tcg_region_state region; 200 /* 201 * This is an array of struct tcg_region_tree's, with padding. 202 * We use void * to simplify the computation of region_trees[i]; each 203 * struct is found every tree_size bytes. 204 */ 205 static void *region_trees; 206 static size_t tree_size; 207 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 208 static TCGRegSet tcg_target_call_clobber_regs; 209 210 #if TCG_TARGET_INSN_UNIT_SIZE == 1 211 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 212 { 213 *s->code_ptr++ = v; 214 } 215 216 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 217 uint8_t v) 218 { 219 *p = v; 220 } 221 #endif 222 223 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 224 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 225 { 226 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 227 *s->code_ptr++ = v; 228 } else { 229 tcg_insn_unit *p = s->code_ptr; 230 memcpy(p, &v, sizeof(v)); 231 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 232 } 233 } 234 235 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 236 uint16_t v) 237 { 238 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 239 *p = v; 240 } else { 241 memcpy(p, &v, sizeof(v)); 242 } 243 } 244 #endif 245 246 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 247 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 248 { 249 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 250 *s->code_ptr++ = v; 251 } else { 252 tcg_insn_unit *p = s->code_ptr; 253 memcpy(p, &v, sizeof(v)); 254 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 255 } 256 } 257 258 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 259 uint32_t v) 260 { 261 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 262 *p = v; 263 } else { 264 memcpy(p, &v, sizeof(v)); 265 } 266 } 267 #endif 268 269 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 270 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 271 { 272 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 273 *s->code_ptr++ = v; 274 } else { 275 tcg_insn_unit *p = s->code_ptr; 276 memcpy(p, &v, sizeof(v)); 277 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 278 } 279 } 280 281 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 282 uint64_t v) 283 { 284 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 285 *p = v; 286 } else { 287 memcpy(p, &v, sizeof(v)); 288 } 289 } 290 #endif 291 292 /* label relocation processing */ 293 294 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 295 TCGLabel *l, intptr_t addend) 296 { 297 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 298 299 r->type = type; 300 r->ptr = code_ptr; 301 r->addend = addend; 302 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 303 } 304 305 static void tcg_out_label(TCGContext *s, TCGLabel *l) 306 { 307 tcg_debug_assert(!l->has_value); 308 l->has_value = 1; 309 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 310 } 311 312 TCGLabel *gen_new_label(void) 313 { 314 TCGContext *s = tcg_ctx; 315 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 316 317 memset(l, 0, sizeof(TCGLabel)); 318 l->id = s->nb_labels++; 319 QSIMPLEQ_INIT(&l->relocs); 320 321 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 322 323 return l; 324 } 325 326 static bool tcg_resolve_relocs(TCGContext *s) 327 { 328 TCGLabel *l; 329 330 QSIMPLEQ_FOREACH(l, &s->labels, next) { 331 TCGRelocation *r; 332 uintptr_t value = l->u.value; 333 334 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 335 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 336 return false; 337 } 338 } 339 } 340 return true; 341 } 342 343 static void set_jmp_reset_offset(TCGContext *s, int which) 344 { 345 /* 346 * We will check for overflow at the end of the opcode loop in 347 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 348 */ 349 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s); 350 } 351 352 #include "tcg-target.c.inc" 353 354 /* compare a pointer @ptr and a tb_tc @s */ 355 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s) 356 { 357 if (ptr >= s->ptr + s->size) { 358 return 1; 359 } else if (ptr < s->ptr) { 360 return -1; 361 } 362 return 0; 363 } 364 365 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp) 366 { 367 const struct tb_tc *a = ap; 368 const struct tb_tc *b = bp; 369 370 /* 371 * When both sizes are set, we know this isn't a lookup. 372 * This is the most likely case: every TB must be inserted; lookups 373 * are a lot less frequent. 374 */ 375 if (likely(a->size && b->size)) { 376 if (a->ptr > b->ptr) { 377 return 1; 378 } else if (a->ptr < b->ptr) { 379 return -1; 380 } 381 /* a->ptr == b->ptr should happen only on deletions */ 382 g_assert(a->size == b->size); 383 return 0; 384 } 385 /* 386 * All lookups have either .size field set to 0. 387 * From the glib sources we see that @ap is always the lookup key. However 388 * the docs provide no guarantee, so we just mark this case as likely. 389 */ 390 if (likely(a->size == 0)) { 391 return ptr_cmp_tb_tc(a->ptr, b); 392 } 393 return ptr_cmp_tb_tc(b->ptr, a); 394 } 395 396 static void tcg_region_trees_init(void) 397 { 398 size_t i; 399 400 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize); 401 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size); 402 for (i = 0; i < region.n; i++) { 403 struct tcg_region_tree *rt = region_trees + i * tree_size; 404 405 qemu_mutex_init(&rt->lock); 406 rt->tree = g_tree_new(tb_tc_cmp); 407 } 408 } 409 410 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp) 411 { 412 void *p = tcg_splitwx_to_rw(cp); 413 size_t region_idx; 414 415 if (p < region.start_aligned) { 416 region_idx = 0; 417 } else { 418 ptrdiff_t offset = p - region.start_aligned; 419 420 if (offset > region.stride * (region.n - 1)) { 421 region_idx = region.n - 1; 422 } else { 423 region_idx = offset / region.stride; 424 } 425 } 426 return region_trees + region_idx * tree_size; 427 } 428 429 void tcg_tb_insert(TranslationBlock *tb) 430 { 431 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 432 433 qemu_mutex_lock(&rt->lock); 434 g_tree_insert(rt->tree, &tb->tc, tb); 435 qemu_mutex_unlock(&rt->lock); 436 } 437 438 void tcg_tb_remove(TranslationBlock *tb) 439 { 440 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 441 442 qemu_mutex_lock(&rt->lock); 443 g_tree_remove(rt->tree, &tb->tc); 444 qemu_mutex_unlock(&rt->lock); 445 } 446 447 /* 448 * Find the TB 'tb' such that 449 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size 450 * Return NULL if not found. 451 */ 452 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) 453 { 454 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr); 455 TranslationBlock *tb; 456 struct tb_tc s = { .ptr = (void *)tc_ptr }; 457 458 qemu_mutex_lock(&rt->lock); 459 tb = g_tree_lookup(rt->tree, &s); 460 qemu_mutex_unlock(&rt->lock); 461 return tb; 462 } 463 464 static void tcg_region_tree_lock_all(void) 465 { 466 size_t i; 467 468 for (i = 0; i < region.n; i++) { 469 struct tcg_region_tree *rt = region_trees + i * tree_size; 470 471 qemu_mutex_lock(&rt->lock); 472 } 473 } 474 475 static void tcg_region_tree_unlock_all(void) 476 { 477 size_t i; 478 479 for (i = 0; i < region.n; i++) { 480 struct tcg_region_tree *rt = region_trees + i * tree_size; 481 482 qemu_mutex_unlock(&rt->lock); 483 } 484 } 485 486 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data) 487 { 488 size_t i; 489 490 tcg_region_tree_lock_all(); 491 for (i = 0; i < region.n; i++) { 492 struct tcg_region_tree *rt = region_trees + i * tree_size; 493 494 g_tree_foreach(rt->tree, func, user_data); 495 } 496 tcg_region_tree_unlock_all(); 497 } 498 499 size_t tcg_nb_tbs(void) 500 { 501 size_t nb_tbs = 0; 502 size_t i; 503 504 tcg_region_tree_lock_all(); 505 for (i = 0; i < region.n; i++) { 506 struct tcg_region_tree *rt = region_trees + i * tree_size; 507 508 nb_tbs += g_tree_nnodes(rt->tree); 509 } 510 tcg_region_tree_unlock_all(); 511 return nb_tbs; 512 } 513 514 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data) 515 { 516 TranslationBlock *tb = v; 517 518 tb_destroy(tb); 519 return FALSE; 520 } 521 522 static void tcg_region_tree_reset_all(void) 523 { 524 size_t i; 525 526 tcg_region_tree_lock_all(); 527 for (i = 0; i < region.n; i++) { 528 struct tcg_region_tree *rt = region_trees + i * tree_size; 529 530 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL); 531 /* Increment the refcount first so that destroy acts as a reset */ 532 g_tree_ref(rt->tree); 533 g_tree_destroy(rt->tree); 534 } 535 tcg_region_tree_unlock_all(); 536 } 537 538 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 539 { 540 void *start, *end; 541 542 start = region.start_aligned + curr_region * region.stride; 543 end = start + region.size; 544 545 if (curr_region == 0) { 546 start = region.start; 547 } 548 if (curr_region == region.n - 1) { 549 end = region.end; 550 } 551 552 *pstart = start; 553 *pend = end; 554 } 555 556 static void tcg_region_assign(TCGContext *s, size_t curr_region) 557 { 558 void *start, *end; 559 560 tcg_region_bounds(curr_region, &start, &end); 561 562 s->code_gen_buffer = start; 563 s->code_gen_ptr = start; 564 s->code_gen_buffer_size = end - start; 565 s->code_gen_highwater = end - TCG_HIGHWATER; 566 } 567 568 static bool tcg_region_alloc__locked(TCGContext *s) 569 { 570 if (region.current == region.n) { 571 return true; 572 } 573 tcg_region_assign(s, region.current); 574 region.current++; 575 return false; 576 } 577 578 /* 579 * Request a new region once the one in use has filled up. 580 * Returns true on error. 581 */ 582 static bool tcg_region_alloc(TCGContext *s) 583 { 584 bool err; 585 /* read the region size now; alloc__locked will overwrite it on success */ 586 size_t size_full = s->code_gen_buffer_size; 587 588 qemu_mutex_lock(®ion.lock); 589 err = tcg_region_alloc__locked(s); 590 if (!err) { 591 region.agg_size_full += size_full - TCG_HIGHWATER; 592 } 593 qemu_mutex_unlock(®ion.lock); 594 return err; 595 } 596 597 /* 598 * Perform a context's first region allocation. 599 * This function does _not_ increment region.agg_size_full. 600 */ 601 static inline bool tcg_region_initial_alloc__locked(TCGContext *s) 602 { 603 return tcg_region_alloc__locked(s); 604 } 605 606 /* Call from a safe-work context */ 607 void tcg_region_reset_all(void) 608 { 609 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 610 unsigned int i; 611 612 qemu_mutex_lock(®ion.lock); 613 region.current = 0; 614 region.agg_size_full = 0; 615 616 for (i = 0; i < n_ctxs; i++) { 617 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 618 bool err = tcg_region_initial_alloc__locked(s); 619 620 g_assert(!err); 621 } 622 qemu_mutex_unlock(®ion.lock); 623 624 tcg_region_tree_reset_all(); 625 } 626 627 #ifdef CONFIG_USER_ONLY 628 static size_t tcg_n_regions(void) 629 { 630 return 1; 631 } 632 #else 633 /* 634 * It is likely that some vCPUs will translate more code than others, so we 635 * first try to set more regions than max_cpus, with those regions being of 636 * reasonable size. If that's not possible we make do by evenly dividing 637 * the code_gen_buffer among the vCPUs. 638 */ 639 static size_t tcg_n_regions(void) 640 { 641 size_t i; 642 643 /* Use a single region if all we have is one vCPU thread */ 644 #if !defined(CONFIG_USER_ONLY) 645 MachineState *ms = MACHINE(qdev_get_machine()); 646 unsigned int max_cpus = ms->smp.max_cpus; 647 #endif 648 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 649 return 1; 650 } 651 652 /* Try to have more regions than max_cpus, with each region being >= 2 MB */ 653 for (i = 8; i > 0; i--) { 654 size_t regions_per_thread = i; 655 size_t region_size; 656 657 region_size = tcg_init_ctx.code_gen_buffer_size; 658 region_size /= max_cpus * regions_per_thread; 659 660 if (region_size >= 2 * 1024u * 1024) { 661 return max_cpus * regions_per_thread; 662 } 663 } 664 /* If we can't, then just allocate one region per vCPU thread */ 665 return max_cpus; 666 } 667 #endif 668 669 /* 670 * Initializes region partitioning. 671 * 672 * Called at init time from the parent thread (i.e. the one calling 673 * tcg_context_init), after the target's TCG globals have been set. 674 * 675 * Region partitioning works by splitting code_gen_buffer into separate regions, 676 * and then assigning regions to TCG threads so that the threads can translate 677 * code in parallel without synchronization. 678 * 679 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 680 * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 681 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 682 * must have been parsed before calling this function, since it calls 683 * qemu_tcg_mttcg_enabled(). 684 * 685 * In user-mode we use a single region. Having multiple regions in user-mode 686 * is not supported, because the number of vCPU threads (recall that each thread 687 * spawned by the guest corresponds to a vCPU thread) is only bounded by the 688 * OS, and usually this number is huge (tens of thousands is not uncommon). 689 * Thus, given this large bound on the number of vCPU threads and the fact 690 * that code_gen_buffer is allocated at compile-time, we cannot guarantee 691 * that the availability of at least one region per vCPU thread. 692 * 693 * However, this user-mode limitation is unlikely to be a significant problem 694 * in practice. Multi-threaded guests share most if not all of their translated 695 * code, which makes parallel code generation less appealing than in softmmu. 696 */ 697 void tcg_region_init(void) 698 { 699 void *buf = tcg_init_ctx.code_gen_buffer; 700 void *aligned; 701 size_t size = tcg_init_ctx.code_gen_buffer_size; 702 size_t page_size = qemu_real_host_page_size; 703 size_t region_size; 704 size_t n_regions; 705 size_t i; 706 uintptr_t splitwx_diff; 707 708 n_regions = tcg_n_regions(); 709 710 /* The first region will be 'aligned - buf' bytes larger than the others */ 711 aligned = QEMU_ALIGN_PTR_UP(buf, page_size); 712 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); 713 /* 714 * Make region_size a multiple of page_size, using aligned as the start. 715 * As a result of this we might end up with a few extra pages at the end of 716 * the buffer; we will assign those to the last region. 717 */ 718 region_size = (size - (aligned - buf)) / n_regions; 719 region_size = QEMU_ALIGN_DOWN(region_size, page_size); 720 721 /* A region must have at least 2 pages; one code, one guard */ 722 g_assert(region_size >= 2 * page_size); 723 724 /* init the region struct */ 725 qemu_mutex_init(®ion.lock); 726 region.n = n_regions; 727 region.size = region_size - page_size; 728 region.stride = region_size; 729 region.start = buf; 730 region.start_aligned = aligned; 731 /* page-align the end, since its last page will be a guard page */ 732 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); 733 /* account for that last guard page */ 734 region.end -= page_size; 735 736 /* set guard pages */ 737 splitwx_diff = tcg_splitwx_diff; 738 for (i = 0; i < region.n; i++) { 739 void *start, *end; 740 int rc; 741 742 tcg_region_bounds(i, &start, &end); 743 rc = qemu_mprotect_none(end, page_size); 744 g_assert(!rc); 745 if (splitwx_diff) { 746 rc = qemu_mprotect_none(end + splitwx_diff, page_size); 747 g_assert(!rc); 748 } 749 } 750 751 tcg_region_trees_init(); 752 753 /* In user-mode we support only one ctx, so do the initial allocation now */ 754 #ifdef CONFIG_USER_ONLY 755 { 756 bool err = tcg_region_initial_alloc__locked(tcg_ctx); 757 758 g_assert(!err); 759 } 760 #endif 761 } 762 763 #ifdef CONFIG_DEBUG_TCG 764 const void *tcg_splitwx_to_rx(void *rw) 765 { 766 /* Pass NULL pointers unchanged. */ 767 if (rw) { 768 g_assert(in_code_gen_buffer(rw)); 769 rw += tcg_splitwx_diff; 770 } 771 return rw; 772 } 773 774 void *tcg_splitwx_to_rw(const void *rx) 775 { 776 /* Pass NULL pointers unchanged. */ 777 if (rx) { 778 rx -= tcg_splitwx_diff; 779 /* Assert that we end with a pointer in the rw region. */ 780 g_assert(in_code_gen_buffer(rx)); 781 } 782 return (void *)rx; 783 } 784 #endif /* CONFIG_DEBUG_TCG */ 785 786 static void alloc_tcg_plugin_context(TCGContext *s) 787 { 788 #ifdef CONFIG_PLUGIN 789 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 790 s->plugin_tb->insns = 791 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 792 #endif 793 } 794 795 /* 796 * All TCG threads except the parent (i.e. the one that called tcg_context_init 797 * and registered the target's TCG globals) must register with this function 798 * before initiating translation. 799 * 800 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 801 * of tcg_region_init() for the reasoning behind this. 802 * 803 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 804 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 805 * is not used anymore for translation once this function is called. 806 * 807 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 808 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 809 */ 810 #ifdef CONFIG_USER_ONLY 811 void tcg_register_thread(void) 812 { 813 tcg_ctx = &tcg_init_ctx; 814 } 815 #else 816 void tcg_register_thread(void) 817 { 818 MachineState *ms = MACHINE(qdev_get_machine()); 819 TCGContext *s = g_malloc(sizeof(*s)); 820 unsigned int i, n; 821 bool err; 822 823 *s = tcg_init_ctx; 824 825 /* Relink mem_base. */ 826 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 827 if (tcg_init_ctx.temps[i].mem_base) { 828 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 829 tcg_debug_assert(b >= 0 && b < n); 830 s->temps[i].mem_base = &s->temps[b]; 831 } 832 } 833 834 /* Claim an entry in tcg_ctxs */ 835 n = qatomic_fetch_inc(&n_tcg_ctxs); 836 g_assert(n < ms->smp.max_cpus); 837 qatomic_set(&tcg_ctxs[n], s); 838 839 if (n > 0) { 840 alloc_tcg_plugin_context(s); 841 } 842 843 tcg_ctx = s; 844 qemu_mutex_lock(®ion.lock); 845 err = tcg_region_initial_alloc__locked(tcg_ctx); 846 g_assert(!err); 847 qemu_mutex_unlock(®ion.lock); 848 } 849 #endif /* !CONFIG_USER_ONLY */ 850 851 /* 852 * Returns the size (in bytes) of all translated code (i.e. from all regions) 853 * currently in the cache. 854 * See also: tcg_code_capacity() 855 * Do not confuse with tcg_current_code_size(); that one applies to a single 856 * TCG context. 857 */ 858 size_t tcg_code_size(void) 859 { 860 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 861 unsigned int i; 862 size_t total; 863 864 qemu_mutex_lock(®ion.lock); 865 total = region.agg_size_full; 866 for (i = 0; i < n_ctxs; i++) { 867 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 868 size_t size; 869 870 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 871 g_assert(size <= s->code_gen_buffer_size); 872 total += size; 873 } 874 qemu_mutex_unlock(®ion.lock); 875 return total; 876 } 877 878 /* 879 * Returns the code capacity (in bytes) of the entire cache, i.e. including all 880 * regions. 881 * See also: tcg_code_size() 882 */ 883 size_t tcg_code_capacity(void) 884 { 885 size_t guard_size, capacity; 886 887 /* no need for synchronization; these variables are set at init time */ 888 guard_size = region.stride - region.size; 889 capacity = region.end + guard_size - region.start; 890 capacity -= region.n * (guard_size + TCG_HIGHWATER); 891 return capacity; 892 } 893 894 size_t tcg_tb_phys_invalidate_count(void) 895 { 896 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 897 unsigned int i; 898 size_t total = 0; 899 900 for (i = 0; i < n_ctxs; i++) { 901 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 902 903 total += qatomic_read(&s->tb_phys_invalidate_count); 904 } 905 return total; 906 } 907 908 /* pool based memory allocation */ 909 void *tcg_malloc_internal(TCGContext *s, int size) 910 { 911 TCGPool *p; 912 int pool_size; 913 914 if (size > TCG_POOL_CHUNK_SIZE) { 915 /* big malloc: insert a new pool (XXX: could optimize) */ 916 p = g_malloc(sizeof(TCGPool) + size); 917 p->size = size; 918 p->next = s->pool_first_large; 919 s->pool_first_large = p; 920 return p->data; 921 } else { 922 p = s->pool_current; 923 if (!p) { 924 p = s->pool_first; 925 if (!p) 926 goto new_pool; 927 } else { 928 if (!p->next) { 929 new_pool: 930 pool_size = TCG_POOL_CHUNK_SIZE; 931 p = g_malloc(sizeof(TCGPool) + pool_size); 932 p->size = pool_size; 933 p->next = NULL; 934 if (s->pool_current) 935 s->pool_current->next = p; 936 else 937 s->pool_first = p; 938 } else { 939 p = p->next; 940 } 941 } 942 } 943 s->pool_current = p; 944 s->pool_cur = p->data + size; 945 s->pool_end = p->data + p->size; 946 return p->data; 947 } 948 949 void tcg_pool_reset(TCGContext *s) 950 { 951 TCGPool *p, *t; 952 for (p = s->pool_first_large; p; p = t) { 953 t = p->next; 954 g_free(p); 955 } 956 s->pool_first_large = NULL; 957 s->pool_cur = s->pool_end = NULL; 958 s->pool_current = NULL; 959 } 960 961 typedef struct TCGHelperInfo { 962 void *func; 963 const char *name; 964 unsigned flags; 965 unsigned sizemask; 966 } TCGHelperInfo; 967 968 #include "exec/helper-proto.h" 969 970 static const TCGHelperInfo all_helpers[] = { 971 #include "exec/helper-tcg.h" 972 }; 973 static GHashTable *helper_table; 974 975 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 976 static void process_op_defs(TCGContext *s); 977 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 978 TCGReg reg, const char *name); 979 980 void tcg_context_init(TCGContext *s) 981 { 982 int op, total_args, n, i; 983 TCGOpDef *def; 984 TCGArgConstraint *args_ct; 985 TCGTemp *ts; 986 987 memset(s, 0, sizeof(*s)); 988 s->nb_globals = 0; 989 990 /* Count total number of arguments and allocate the corresponding 991 space */ 992 total_args = 0; 993 for(op = 0; op < NB_OPS; op++) { 994 def = &tcg_op_defs[op]; 995 n = def->nb_iargs + def->nb_oargs; 996 total_args += n; 997 } 998 999 args_ct = g_new0(TCGArgConstraint, total_args); 1000 1001 for(op = 0; op < NB_OPS; op++) { 1002 def = &tcg_op_defs[op]; 1003 def->args_ct = args_ct; 1004 n = def->nb_iargs + def->nb_oargs; 1005 args_ct += n; 1006 } 1007 1008 /* Register helpers. */ 1009 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 1010 helper_table = g_hash_table_new(NULL, NULL); 1011 1012 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 1013 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 1014 (gpointer)&all_helpers[i]); 1015 } 1016 1017 tcg_target_init(s); 1018 process_op_defs(s); 1019 1020 /* Reverse the order of the saved registers, assuming they're all at 1021 the start of tcg_target_reg_alloc_order. */ 1022 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1023 int r = tcg_target_reg_alloc_order[n]; 1024 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1025 break; 1026 } 1027 } 1028 for (i = 0; i < n; ++i) { 1029 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1030 } 1031 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1032 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1033 } 1034 1035 alloc_tcg_plugin_context(s); 1036 1037 tcg_ctx = s; 1038 /* 1039 * In user-mode we simply share the init context among threads, since we 1040 * use a single region. See the documentation tcg_region_init() for the 1041 * reasoning behind this. 1042 * In softmmu we will have at most max_cpus TCG threads. 1043 */ 1044 #ifdef CONFIG_USER_ONLY 1045 tcg_ctxs = &tcg_ctx; 1046 n_tcg_ctxs = 1; 1047 #else 1048 MachineState *ms = MACHINE(qdev_get_machine()); 1049 unsigned int max_cpus = ms->smp.max_cpus; 1050 tcg_ctxs = g_new(TCGContext *, max_cpus); 1051 #endif 1052 1053 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1054 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1055 cpu_env = temp_tcgv_ptr(ts); 1056 } 1057 1058 /* 1059 * Allocate TBs right before their corresponding translated code, making 1060 * sure that TBs and code are on different cache lines. 1061 */ 1062 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1063 { 1064 uintptr_t align = qemu_icache_linesize; 1065 TranslationBlock *tb; 1066 void *next; 1067 1068 retry: 1069 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1070 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1071 1072 if (unlikely(next > s->code_gen_highwater)) { 1073 if (tcg_region_alloc(s)) { 1074 return NULL; 1075 } 1076 goto retry; 1077 } 1078 qatomic_set(&s->code_gen_ptr, next); 1079 s->data_gen_ptr = NULL; 1080 return tb; 1081 } 1082 1083 void tcg_prologue_init(TCGContext *s) 1084 { 1085 size_t prologue_size, total_size; 1086 void *buf0, *buf1; 1087 1088 /* Put the prologue at the beginning of code_gen_buffer. */ 1089 buf0 = s->code_gen_buffer; 1090 total_size = s->code_gen_buffer_size; 1091 s->code_ptr = buf0; 1092 s->code_buf = buf0; 1093 s->data_gen_ptr = NULL; 1094 1095 /* 1096 * The region trees are not yet configured, but tcg_splitwx_to_rx 1097 * needs the bounds for an assert. 1098 */ 1099 region.start = buf0; 1100 region.end = buf0 + total_size; 1101 1102 #ifndef CONFIG_TCG_INTERPRETER 1103 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0); 1104 #endif 1105 1106 /* Compute a high-water mark, at which we voluntarily flush the buffer 1107 and start over. The size here is arbitrary, significantly larger 1108 than we expect the code generation for any one opcode to require. */ 1109 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); 1110 1111 #ifdef TCG_TARGET_NEED_POOL_LABELS 1112 s->pool_labels = NULL; 1113 #endif 1114 1115 /* Generate the prologue. */ 1116 tcg_target_qemu_prologue(s); 1117 1118 #ifdef TCG_TARGET_NEED_POOL_LABELS 1119 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1120 { 1121 int result = tcg_out_pool_finalize(s); 1122 tcg_debug_assert(result == 0); 1123 } 1124 #endif 1125 1126 buf1 = s->code_ptr; 1127 #ifndef CONFIG_TCG_INTERPRETER 1128 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0, 1129 tcg_ptr_byte_diff(buf1, buf0)); 1130 #endif 1131 1132 /* Deduct the prologue from the buffer. */ 1133 prologue_size = tcg_current_code_size(s); 1134 s->code_gen_ptr = buf1; 1135 s->code_gen_buffer = buf1; 1136 s->code_buf = buf1; 1137 total_size -= prologue_size; 1138 s->code_gen_buffer_size = total_size; 1139 1140 tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size); 1141 1142 #ifdef DEBUG_DISAS 1143 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1144 FILE *logfile = qemu_log_lock(); 1145 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 1146 if (s->data_gen_ptr) { 1147 size_t code_size = s->data_gen_ptr - buf0; 1148 size_t data_size = prologue_size - code_size; 1149 size_t i; 1150 1151 log_disas(buf0, code_size); 1152 1153 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1154 if (sizeof(tcg_target_ulong) == 8) { 1155 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1156 (uintptr_t)s->data_gen_ptr + i, 1157 *(uint64_t *)(s->data_gen_ptr + i)); 1158 } else { 1159 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n", 1160 (uintptr_t)s->data_gen_ptr + i, 1161 *(uint32_t *)(s->data_gen_ptr + i)); 1162 } 1163 } 1164 } else { 1165 log_disas(buf0, prologue_size); 1166 } 1167 qemu_log("\n"); 1168 qemu_log_flush(); 1169 qemu_log_unlock(logfile); 1170 } 1171 #endif 1172 1173 /* Assert that goto_ptr is implemented completely. */ 1174 if (TCG_TARGET_HAS_goto_ptr) { 1175 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1176 } 1177 } 1178 1179 void tcg_func_start(TCGContext *s) 1180 { 1181 tcg_pool_reset(s); 1182 s->nb_temps = s->nb_globals; 1183 1184 /* No temps have been previously allocated for size or locality. */ 1185 memset(s->free_temps, 0, sizeof(s->free_temps)); 1186 1187 s->nb_ops = 0; 1188 s->nb_labels = 0; 1189 s->current_frame_offset = s->frame_start; 1190 1191 #ifdef CONFIG_DEBUG_TCG 1192 s->goto_tb_issue_mask = 0; 1193 #endif 1194 1195 QTAILQ_INIT(&s->ops); 1196 QTAILQ_INIT(&s->free_ops); 1197 QSIMPLEQ_INIT(&s->labels); 1198 } 1199 1200 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) 1201 { 1202 int n = s->nb_temps++; 1203 tcg_debug_assert(n < TCG_MAX_TEMPS); 1204 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1205 } 1206 1207 static inline TCGTemp *tcg_global_alloc(TCGContext *s) 1208 { 1209 TCGTemp *ts; 1210 1211 tcg_debug_assert(s->nb_globals == s->nb_temps); 1212 s->nb_globals++; 1213 ts = tcg_temp_alloc(s); 1214 ts->temp_global = 1; 1215 1216 return ts; 1217 } 1218 1219 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1220 TCGReg reg, const char *name) 1221 { 1222 TCGTemp *ts; 1223 1224 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1225 tcg_abort(); 1226 } 1227 1228 ts = tcg_global_alloc(s); 1229 ts->base_type = type; 1230 ts->type = type; 1231 ts->fixed_reg = 1; 1232 ts->reg = reg; 1233 ts->name = name; 1234 tcg_regset_set_reg(s->reserved_regs, reg); 1235 1236 return ts; 1237 } 1238 1239 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1240 { 1241 s->frame_start = start; 1242 s->frame_end = start + size; 1243 s->frame_temp 1244 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1245 } 1246 1247 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1248 intptr_t offset, const char *name) 1249 { 1250 TCGContext *s = tcg_ctx; 1251 TCGTemp *base_ts = tcgv_ptr_temp(base); 1252 TCGTemp *ts = tcg_global_alloc(s); 1253 int indirect_reg = 0, bigendian = 0; 1254 #ifdef HOST_WORDS_BIGENDIAN 1255 bigendian = 1; 1256 #endif 1257 1258 if (!base_ts->fixed_reg) { 1259 /* We do not support double-indirect registers. */ 1260 tcg_debug_assert(!base_ts->indirect_reg); 1261 base_ts->indirect_base = 1; 1262 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1263 ? 2 : 1); 1264 indirect_reg = 1; 1265 } 1266 1267 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1268 TCGTemp *ts2 = tcg_global_alloc(s); 1269 char buf[64]; 1270 1271 ts->base_type = TCG_TYPE_I64; 1272 ts->type = TCG_TYPE_I32; 1273 ts->indirect_reg = indirect_reg; 1274 ts->mem_allocated = 1; 1275 ts->mem_base = base_ts; 1276 ts->mem_offset = offset + bigendian * 4; 1277 pstrcpy(buf, sizeof(buf), name); 1278 pstrcat(buf, sizeof(buf), "_0"); 1279 ts->name = strdup(buf); 1280 1281 tcg_debug_assert(ts2 == ts + 1); 1282 ts2->base_type = TCG_TYPE_I64; 1283 ts2->type = TCG_TYPE_I32; 1284 ts2->indirect_reg = indirect_reg; 1285 ts2->mem_allocated = 1; 1286 ts2->mem_base = base_ts; 1287 ts2->mem_offset = offset + (1 - bigendian) * 4; 1288 pstrcpy(buf, sizeof(buf), name); 1289 pstrcat(buf, sizeof(buf), "_1"); 1290 ts2->name = strdup(buf); 1291 } else { 1292 ts->base_type = type; 1293 ts->type = type; 1294 ts->indirect_reg = indirect_reg; 1295 ts->mem_allocated = 1; 1296 ts->mem_base = base_ts; 1297 ts->mem_offset = offset; 1298 ts->name = name; 1299 } 1300 return ts; 1301 } 1302 1303 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) 1304 { 1305 TCGContext *s = tcg_ctx; 1306 TCGTemp *ts; 1307 int idx, k; 1308 1309 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 1310 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 1311 if (idx < TCG_MAX_TEMPS) { 1312 /* There is already an available temp with the right type. */ 1313 clear_bit(idx, s->free_temps[k].l); 1314 1315 ts = &s->temps[idx]; 1316 ts->temp_allocated = 1; 1317 tcg_debug_assert(ts->base_type == type); 1318 tcg_debug_assert(ts->temp_local == temp_local); 1319 } else { 1320 ts = tcg_temp_alloc(s); 1321 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1322 TCGTemp *ts2 = tcg_temp_alloc(s); 1323 1324 ts->base_type = type; 1325 ts->type = TCG_TYPE_I32; 1326 ts->temp_allocated = 1; 1327 ts->temp_local = temp_local; 1328 1329 tcg_debug_assert(ts2 == ts + 1); 1330 ts2->base_type = TCG_TYPE_I64; 1331 ts2->type = TCG_TYPE_I32; 1332 ts2->temp_allocated = 1; 1333 ts2->temp_local = temp_local; 1334 } else { 1335 ts->base_type = type; 1336 ts->type = type; 1337 ts->temp_allocated = 1; 1338 ts->temp_local = temp_local; 1339 } 1340 } 1341 1342 #if defined(CONFIG_DEBUG_TCG) 1343 s->temps_in_use++; 1344 #endif 1345 return ts; 1346 } 1347 1348 TCGv_vec tcg_temp_new_vec(TCGType type) 1349 { 1350 TCGTemp *t; 1351 1352 #ifdef CONFIG_DEBUG_TCG 1353 switch (type) { 1354 case TCG_TYPE_V64: 1355 assert(TCG_TARGET_HAS_v64); 1356 break; 1357 case TCG_TYPE_V128: 1358 assert(TCG_TARGET_HAS_v128); 1359 break; 1360 case TCG_TYPE_V256: 1361 assert(TCG_TARGET_HAS_v256); 1362 break; 1363 default: 1364 g_assert_not_reached(); 1365 } 1366 #endif 1367 1368 t = tcg_temp_new_internal(type, 0); 1369 return temp_tcgv_vec(t); 1370 } 1371 1372 /* Create a new temp of the same type as an existing temp. */ 1373 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1374 { 1375 TCGTemp *t = tcgv_vec_temp(match); 1376 1377 tcg_debug_assert(t->temp_allocated != 0); 1378 1379 t = tcg_temp_new_internal(t->base_type, 0); 1380 return temp_tcgv_vec(t); 1381 } 1382 1383 void tcg_temp_free_internal(TCGTemp *ts) 1384 { 1385 TCGContext *s = tcg_ctx; 1386 int k, idx; 1387 1388 #if defined(CONFIG_DEBUG_TCG) 1389 s->temps_in_use--; 1390 if (s->temps_in_use < 0) { 1391 fprintf(stderr, "More temporaries freed than allocated!\n"); 1392 } 1393 #endif 1394 1395 tcg_debug_assert(ts->temp_global == 0); 1396 tcg_debug_assert(ts->temp_allocated != 0); 1397 ts->temp_allocated = 0; 1398 1399 idx = temp_idx(ts); 1400 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); 1401 set_bit(idx, s->free_temps[k].l); 1402 } 1403 1404 TCGv_i32 tcg_const_i32(int32_t val) 1405 { 1406 TCGv_i32 t0; 1407 t0 = tcg_temp_new_i32(); 1408 tcg_gen_movi_i32(t0, val); 1409 return t0; 1410 } 1411 1412 TCGv_i64 tcg_const_i64(int64_t val) 1413 { 1414 TCGv_i64 t0; 1415 t0 = tcg_temp_new_i64(); 1416 tcg_gen_movi_i64(t0, val); 1417 return t0; 1418 } 1419 1420 TCGv_i32 tcg_const_local_i32(int32_t val) 1421 { 1422 TCGv_i32 t0; 1423 t0 = tcg_temp_local_new_i32(); 1424 tcg_gen_movi_i32(t0, val); 1425 return t0; 1426 } 1427 1428 TCGv_i64 tcg_const_local_i64(int64_t val) 1429 { 1430 TCGv_i64 t0; 1431 t0 = tcg_temp_local_new_i64(); 1432 tcg_gen_movi_i64(t0, val); 1433 return t0; 1434 } 1435 1436 #if defined(CONFIG_DEBUG_TCG) 1437 void tcg_clear_temp_count(void) 1438 { 1439 TCGContext *s = tcg_ctx; 1440 s->temps_in_use = 0; 1441 } 1442 1443 int tcg_check_temp_count(void) 1444 { 1445 TCGContext *s = tcg_ctx; 1446 if (s->temps_in_use) { 1447 /* Clear the count so that we don't give another 1448 * warning immediately next time around. 1449 */ 1450 s->temps_in_use = 0; 1451 return 1; 1452 } 1453 return 0; 1454 } 1455 #endif 1456 1457 /* Return true if OP may appear in the opcode stream. 1458 Test the runtime variable that controls each opcode. */ 1459 bool tcg_op_supported(TCGOpcode op) 1460 { 1461 const bool have_vec 1462 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1463 1464 switch (op) { 1465 case INDEX_op_discard: 1466 case INDEX_op_set_label: 1467 case INDEX_op_call: 1468 case INDEX_op_br: 1469 case INDEX_op_mb: 1470 case INDEX_op_insn_start: 1471 case INDEX_op_exit_tb: 1472 case INDEX_op_goto_tb: 1473 case INDEX_op_qemu_ld_i32: 1474 case INDEX_op_qemu_st_i32: 1475 case INDEX_op_qemu_ld_i64: 1476 case INDEX_op_qemu_st_i64: 1477 return true; 1478 1479 case INDEX_op_qemu_st8_i32: 1480 return TCG_TARGET_HAS_qemu_st8_i32; 1481 1482 case INDEX_op_goto_ptr: 1483 return TCG_TARGET_HAS_goto_ptr; 1484 1485 case INDEX_op_mov_i32: 1486 case INDEX_op_movi_i32: 1487 case INDEX_op_setcond_i32: 1488 case INDEX_op_brcond_i32: 1489 case INDEX_op_ld8u_i32: 1490 case INDEX_op_ld8s_i32: 1491 case INDEX_op_ld16u_i32: 1492 case INDEX_op_ld16s_i32: 1493 case INDEX_op_ld_i32: 1494 case INDEX_op_st8_i32: 1495 case INDEX_op_st16_i32: 1496 case INDEX_op_st_i32: 1497 case INDEX_op_add_i32: 1498 case INDEX_op_sub_i32: 1499 case INDEX_op_mul_i32: 1500 case INDEX_op_and_i32: 1501 case INDEX_op_or_i32: 1502 case INDEX_op_xor_i32: 1503 case INDEX_op_shl_i32: 1504 case INDEX_op_shr_i32: 1505 case INDEX_op_sar_i32: 1506 return true; 1507 1508 case INDEX_op_movcond_i32: 1509 return TCG_TARGET_HAS_movcond_i32; 1510 case INDEX_op_div_i32: 1511 case INDEX_op_divu_i32: 1512 return TCG_TARGET_HAS_div_i32; 1513 case INDEX_op_rem_i32: 1514 case INDEX_op_remu_i32: 1515 return TCG_TARGET_HAS_rem_i32; 1516 case INDEX_op_div2_i32: 1517 case INDEX_op_divu2_i32: 1518 return TCG_TARGET_HAS_div2_i32; 1519 case INDEX_op_rotl_i32: 1520 case INDEX_op_rotr_i32: 1521 return TCG_TARGET_HAS_rot_i32; 1522 case INDEX_op_deposit_i32: 1523 return TCG_TARGET_HAS_deposit_i32; 1524 case INDEX_op_extract_i32: 1525 return TCG_TARGET_HAS_extract_i32; 1526 case INDEX_op_sextract_i32: 1527 return TCG_TARGET_HAS_sextract_i32; 1528 case INDEX_op_extract2_i32: 1529 return TCG_TARGET_HAS_extract2_i32; 1530 case INDEX_op_add2_i32: 1531 return TCG_TARGET_HAS_add2_i32; 1532 case INDEX_op_sub2_i32: 1533 return TCG_TARGET_HAS_sub2_i32; 1534 case INDEX_op_mulu2_i32: 1535 return TCG_TARGET_HAS_mulu2_i32; 1536 case INDEX_op_muls2_i32: 1537 return TCG_TARGET_HAS_muls2_i32; 1538 case INDEX_op_muluh_i32: 1539 return TCG_TARGET_HAS_muluh_i32; 1540 case INDEX_op_mulsh_i32: 1541 return TCG_TARGET_HAS_mulsh_i32; 1542 case INDEX_op_ext8s_i32: 1543 return TCG_TARGET_HAS_ext8s_i32; 1544 case INDEX_op_ext16s_i32: 1545 return TCG_TARGET_HAS_ext16s_i32; 1546 case INDEX_op_ext8u_i32: 1547 return TCG_TARGET_HAS_ext8u_i32; 1548 case INDEX_op_ext16u_i32: 1549 return TCG_TARGET_HAS_ext16u_i32; 1550 case INDEX_op_bswap16_i32: 1551 return TCG_TARGET_HAS_bswap16_i32; 1552 case INDEX_op_bswap32_i32: 1553 return TCG_TARGET_HAS_bswap32_i32; 1554 case INDEX_op_not_i32: 1555 return TCG_TARGET_HAS_not_i32; 1556 case INDEX_op_neg_i32: 1557 return TCG_TARGET_HAS_neg_i32; 1558 case INDEX_op_andc_i32: 1559 return TCG_TARGET_HAS_andc_i32; 1560 case INDEX_op_orc_i32: 1561 return TCG_TARGET_HAS_orc_i32; 1562 case INDEX_op_eqv_i32: 1563 return TCG_TARGET_HAS_eqv_i32; 1564 case INDEX_op_nand_i32: 1565 return TCG_TARGET_HAS_nand_i32; 1566 case INDEX_op_nor_i32: 1567 return TCG_TARGET_HAS_nor_i32; 1568 case INDEX_op_clz_i32: 1569 return TCG_TARGET_HAS_clz_i32; 1570 case INDEX_op_ctz_i32: 1571 return TCG_TARGET_HAS_ctz_i32; 1572 case INDEX_op_ctpop_i32: 1573 return TCG_TARGET_HAS_ctpop_i32; 1574 1575 case INDEX_op_brcond2_i32: 1576 case INDEX_op_setcond2_i32: 1577 return TCG_TARGET_REG_BITS == 32; 1578 1579 case INDEX_op_mov_i64: 1580 case INDEX_op_movi_i64: 1581 case INDEX_op_setcond_i64: 1582 case INDEX_op_brcond_i64: 1583 case INDEX_op_ld8u_i64: 1584 case INDEX_op_ld8s_i64: 1585 case INDEX_op_ld16u_i64: 1586 case INDEX_op_ld16s_i64: 1587 case INDEX_op_ld32u_i64: 1588 case INDEX_op_ld32s_i64: 1589 case INDEX_op_ld_i64: 1590 case INDEX_op_st8_i64: 1591 case INDEX_op_st16_i64: 1592 case INDEX_op_st32_i64: 1593 case INDEX_op_st_i64: 1594 case INDEX_op_add_i64: 1595 case INDEX_op_sub_i64: 1596 case INDEX_op_mul_i64: 1597 case INDEX_op_and_i64: 1598 case INDEX_op_or_i64: 1599 case INDEX_op_xor_i64: 1600 case INDEX_op_shl_i64: 1601 case INDEX_op_shr_i64: 1602 case INDEX_op_sar_i64: 1603 case INDEX_op_ext_i32_i64: 1604 case INDEX_op_extu_i32_i64: 1605 return TCG_TARGET_REG_BITS == 64; 1606 1607 case INDEX_op_movcond_i64: 1608 return TCG_TARGET_HAS_movcond_i64; 1609 case INDEX_op_div_i64: 1610 case INDEX_op_divu_i64: 1611 return TCG_TARGET_HAS_div_i64; 1612 case INDEX_op_rem_i64: 1613 case INDEX_op_remu_i64: 1614 return TCG_TARGET_HAS_rem_i64; 1615 case INDEX_op_div2_i64: 1616 case INDEX_op_divu2_i64: 1617 return TCG_TARGET_HAS_div2_i64; 1618 case INDEX_op_rotl_i64: 1619 case INDEX_op_rotr_i64: 1620 return TCG_TARGET_HAS_rot_i64; 1621 case INDEX_op_deposit_i64: 1622 return TCG_TARGET_HAS_deposit_i64; 1623 case INDEX_op_extract_i64: 1624 return TCG_TARGET_HAS_extract_i64; 1625 case INDEX_op_sextract_i64: 1626 return TCG_TARGET_HAS_sextract_i64; 1627 case INDEX_op_extract2_i64: 1628 return TCG_TARGET_HAS_extract2_i64; 1629 case INDEX_op_extrl_i64_i32: 1630 return TCG_TARGET_HAS_extrl_i64_i32; 1631 case INDEX_op_extrh_i64_i32: 1632 return TCG_TARGET_HAS_extrh_i64_i32; 1633 case INDEX_op_ext8s_i64: 1634 return TCG_TARGET_HAS_ext8s_i64; 1635 case INDEX_op_ext16s_i64: 1636 return TCG_TARGET_HAS_ext16s_i64; 1637 case INDEX_op_ext32s_i64: 1638 return TCG_TARGET_HAS_ext32s_i64; 1639 case INDEX_op_ext8u_i64: 1640 return TCG_TARGET_HAS_ext8u_i64; 1641 case INDEX_op_ext16u_i64: 1642 return TCG_TARGET_HAS_ext16u_i64; 1643 case INDEX_op_ext32u_i64: 1644 return TCG_TARGET_HAS_ext32u_i64; 1645 case INDEX_op_bswap16_i64: 1646 return TCG_TARGET_HAS_bswap16_i64; 1647 case INDEX_op_bswap32_i64: 1648 return TCG_TARGET_HAS_bswap32_i64; 1649 case INDEX_op_bswap64_i64: 1650 return TCG_TARGET_HAS_bswap64_i64; 1651 case INDEX_op_not_i64: 1652 return TCG_TARGET_HAS_not_i64; 1653 case INDEX_op_neg_i64: 1654 return TCG_TARGET_HAS_neg_i64; 1655 case INDEX_op_andc_i64: 1656 return TCG_TARGET_HAS_andc_i64; 1657 case INDEX_op_orc_i64: 1658 return TCG_TARGET_HAS_orc_i64; 1659 case INDEX_op_eqv_i64: 1660 return TCG_TARGET_HAS_eqv_i64; 1661 case INDEX_op_nand_i64: 1662 return TCG_TARGET_HAS_nand_i64; 1663 case INDEX_op_nor_i64: 1664 return TCG_TARGET_HAS_nor_i64; 1665 case INDEX_op_clz_i64: 1666 return TCG_TARGET_HAS_clz_i64; 1667 case INDEX_op_ctz_i64: 1668 return TCG_TARGET_HAS_ctz_i64; 1669 case INDEX_op_ctpop_i64: 1670 return TCG_TARGET_HAS_ctpop_i64; 1671 case INDEX_op_add2_i64: 1672 return TCG_TARGET_HAS_add2_i64; 1673 case INDEX_op_sub2_i64: 1674 return TCG_TARGET_HAS_sub2_i64; 1675 case INDEX_op_mulu2_i64: 1676 return TCG_TARGET_HAS_mulu2_i64; 1677 case INDEX_op_muls2_i64: 1678 return TCG_TARGET_HAS_muls2_i64; 1679 case INDEX_op_muluh_i64: 1680 return TCG_TARGET_HAS_muluh_i64; 1681 case INDEX_op_mulsh_i64: 1682 return TCG_TARGET_HAS_mulsh_i64; 1683 1684 case INDEX_op_mov_vec: 1685 case INDEX_op_dup_vec: 1686 case INDEX_op_dupi_vec: 1687 case INDEX_op_dupm_vec: 1688 case INDEX_op_ld_vec: 1689 case INDEX_op_st_vec: 1690 case INDEX_op_add_vec: 1691 case INDEX_op_sub_vec: 1692 case INDEX_op_and_vec: 1693 case INDEX_op_or_vec: 1694 case INDEX_op_xor_vec: 1695 case INDEX_op_cmp_vec: 1696 return have_vec; 1697 case INDEX_op_dup2_vec: 1698 return have_vec && TCG_TARGET_REG_BITS == 32; 1699 case INDEX_op_not_vec: 1700 return have_vec && TCG_TARGET_HAS_not_vec; 1701 case INDEX_op_neg_vec: 1702 return have_vec && TCG_TARGET_HAS_neg_vec; 1703 case INDEX_op_abs_vec: 1704 return have_vec && TCG_TARGET_HAS_abs_vec; 1705 case INDEX_op_andc_vec: 1706 return have_vec && TCG_TARGET_HAS_andc_vec; 1707 case INDEX_op_orc_vec: 1708 return have_vec && TCG_TARGET_HAS_orc_vec; 1709 case INDEX_op_mul_vec: 1710 return have_vec && TCG_TARGET_HAS_mul_vec; 1711 case INDEX_op_shli_vec: 1712 case INDEX_op_shri_vec: 1713 case INDEX_op_sari_vec: 1714 return have_vec && TCG_TARGET_HAS_shi_vec; 1715 case INDEX_op_shls_vec: 1716 case INDEX_op_shrs_vec: 1717 case INDEX_op_sars_vec: 1718 return have_vec && TCG_TARGET_HAS_shs_vec; 1719 case INDEX_op_shlv_vec: 1720 case INDEX_op_shrv_vec: 1721 case INDEX_op_sarv_vec: 1722 return have_vec && TCG_TARGET_HAS_shv_vec; 1723 case INDEX_op_rotli_vec: 1724 return have_vec && TCG_TARGET_HAS_roti_vec; 1725 case INDEX_op_rotls_vec: 1726 return have_vec && TCG_TARGET_HAS_rots_vec; 1727 case INDEX_op_rotlv_vec: 1728 case INDEX_op_rotrv_vec: 1729 return have_vec && TCG_TARGET_HAS_rotv_vec; 1730 case INDEX_op_ssadd_vec: 1731 case INDEX_op_usadd_vec: 1732 case INDEX_op_sssub_vec: 1733 case INDEX_op_ussub_vec: 1734 return have_vec && TCG_TARGET_HAS_sat_vec; 1735 case INDEX_op_smin_vec: 1736 case INDEX_op_umin_vec: 1737 case INDEX_op_smax_vec: 1738 case INDEX_op_umax_vec: 1739 return have_vec && TCG_TARGET_HAS_minmax_vec; 1740 case INDEX_op_bitsel_vec: 1741 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1742 case INDEX_op_cmpsel_vec: 1743 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1744 1745 default: 1746 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1747 return true; 1748 } 1749 } 1750 1751 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1752 and endian swap. Maybe it would be better to do the alignment 1753 and endian swap in tcg_reg_alloc_call(). */ 1754 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1755 { 1756 int i, real_args, nb_rets, pi; 1757 unsigned sizemask, flags; 1758 TCGHelperInfo *info; 1759 TCGOp *op; 1760 1761 info = g_hash_table_lookup(helper_table, (gpointer)func); 1762 flags = info->flags; 1763 sizemask = info->sizemask; 1764 1765 #ifdef CONFIG_PLUGIN 1766 /* detect non-plugin helpers */ 1767 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) { 1768 tcg_ctx->plugin_insn->calls_helpers = true; 1769 } 1770 #endif 1771 1772 #if defined(__sparc__) && !defined(__arch64__) \ 1773 && !defined(CONFIG_TCG_INTERPRETER) 1774 /* We have 64-bit values in one register, but need to pass as two 1775 separate parameters. Split them. */ 1776 int orig_sizemask = sizemask; 1777 int orig_nargs = nargs; 1778 TCGv_i64 retl, reth; 1779 TCGTemp *split_args[MAX_OPC_PARAM]; 1780 1781 retl = NULL; 1782 reth = NULL; 1783 if (sizemask != 0) { 1784 for (i = real_args = 0; i < nargs; ++i) { 1785 int is_64bit = sizemask & (1 << (i+1)*2); 1786 if (is_64bit) { 1787 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1788 TCGv_i32 h = tcg_temp_new_i32(); 1789 TCGv_i32 l = tcg_temp_new_i32(); 1790 tcg_gen_extr_i64_i32(l, h, orig); 1791 split_args[real_args++] = tcgv_i32_temp(h); 1792 split_args[real_args++] = tcgv_i32_temp(l); 1793 } else { 1794 split_args[real_args++] = args[i]; 1795 } 1796 } 1797 nargs = real_args; 1798 args = split_args; 1799 sizemask = 0; 1800 } 1801 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1802 for (i = 0; i < nargs; ++i) { 1803 int is_64bit = sizemask & (1 << (i+1)*2); 1804 int is_signed = sizemask & (2 << (i+1)*2); 1805 if (!is_64bit) { 1806 TCGv_i64 temp = tcg_temp_new_i64(); 1807 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1808 if (is_signed) { 1809 tcg_gen_ext32s_i64(temp, orig); 1810 } else { 1811 tcg_gen_ext32u_i64(temp, orig); 1812 } 1813 args[i] = tcgv_i64_temp(temp); 1814 } 1815 } 1816 #endif /* TCG_TARGET_EXTEND_ARGS */ 1817 1818 op = tcg_emit_op(INDEX_op_call); 1819 1820 pi = 0; 1821 if (ret != NULL) { 1822 #if defined(__sparc__) && !defined(__arch64__) \ 1823 && !defined(CONFIG_TCG_INTERPRETER) 1824 if (orig_sizemask & 1) { 1825 /* The 32-bit ABI is going to return the 64-bit value in 1826 the %o0/%o1 register pair. Prepare for this by using 1827 two return temporaries, and reassemble below. */ 1828 retl = tcg_temp_new_i64(); 1829 reth = tcg_temp_new_i64(); 1830 op->args[pi++] = tcgv_i64_arg(reth); 1831 op->args[pi++] = tcgv_i64_arg(retl); 1832 nb_rets = 2; 1833 } else { 1834 op->args[pi++] = temp_arg(ret); 1835 nb_rets = 1; 1836 } 1837 #else 1838 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 1839 #ifdef HOST_WORDS_BIGENDIAN 1840 op->args[pi++] = temp_arg(ret + 1); 1841 op->args[pi++] = temp_arg(ret); 1842 #else 1843 op->args[pi++] = temp_arg(ret); 1844 op->args[pi++] = temp_arg(ret + 1); 1845 #endif 1846 nb_rets = 2; 1847 } else { 1848 op->args[pi++] = temp_arg(ret); 1849 nb_rets = 1; 1850 } 1851 #endif 1852 } else { 1853 nb_rets = 0; 1854 } 1855 TCGOP_CALLO(op) = nb_rets; 1856 1857 real_args = 0; 1858 for (i = 0; i < nargs; i++) { 1859 int is_64bit = sizemask & (1 << (i+1)*2); 1860 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 1861 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 1862 /* some targets want aligned 64 bit args */ 1863 if (real_args & 1) { 1864 op->args[pi++] = TCG_CALL_DUMMY_ARG; 1865 real_args++; 1866 } 1867 #endif 1868 /* If stack grows up, then we will be placing successive 1869 arguments at lower addresses, which means we need to 1870 reverse the order compared to how we would normally 1871 treat either big or little-endian. For those arguments 1872 that will wind up in registers, this still works for 1873 HPPA (the only current STACK_GROWSUP target) since the 1874 argument registers are *also* allocated in decreasing 1875 order. If another such target is added, this logic may 1876 have to get more complicated to differentiate between 1877 stack arguments and register arguments. */ 1878 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 1879 op->args[pi++] = temp_arg(args[i] + 1); 1880 op->args[pi++] = temp_arg(args[i]); 1881 #else 1882 op->args[pi++] = temp_arg(args[i]); 1883 op->args[pi++] = temp_arg(args[i] + 1); 1884 #endif 1885 real_args += 2; 1886 continue; 1887 } 1888 1889 op->args[pi++] = temp_arg(args[i]); 1890 real_args++; 1891 } 1892 op->args[pi++] = (uintptr_t)func; 1893 op->args[pi++] = flags; 1894 TCGOP_CALLI(op) = real_args; 1895 1896 /* Make sure the fields didn't overflow. */ 1897 tcg_debug_assert(TCGOP_CALLI(op) == real_args); 1898 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 1899 1900 #if defined(__sparc__) && !defined(__arch64__) \ 1901 && !defined(CONFIG_TCG_INTERPRETER) 1902 /* Free all of the parts we allocated above. */ 1903 for (i = real_args = 0; i < orig_nargs; ++i) { 1904 int is_64bit = orig_sizemask & (1 << (i+1)*2); 1905 if (is_64bit) { 1906 tcg_temp_free_internal(args[real_args++]); 1907 tcg_temp_free_internal(args[real_args++]); 1908 } else { 1909 real_args++; 1910 } 1911 } 1912 if (orig_sizemask & 1) { 1913 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 1914 Note that describing these as TCGv_i64 eliminates an unnecessary 1915 zero-extension that tcg_gen_concat_i32_i64 would create. */ 1916 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); 1917 tcg_temp_free_i64(retl); 1918 tcg_temp_free_i64(reth); 1919 } 1920 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1921 for (i = 0; i < nargs; ++i) { 1922 int is_64bit = sizemask & (1 << (i+1)*2); 1923 if (!is_64bit) { 1924 tcg_temp_free_internal(args[i]); 1925 } 1926 } 1927 #endif /* TCG_TARGET_EXTEND_ARGS */ 1928 } 1929 1930 static void tcg_reg_alloc_start(TCGContext *s) 1931 { 1932 int i, n; 1933 TCGTemp *ts; 1934 1935 for (i = 0, n = s->nb_globals; i < n; i++) { 1936 ts = &s->temps[i]; 1937 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM); 1938 } 1939 for (n = s->nb_temps; i < n; i++) { 1940 ts = &s->temps[i]; 1941 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 1942 ts->mem_allocated = 0; 1943 ts->fixed_reg = 0; 1944 } 1945 1946 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1947 } 1948 1949 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1950 TCGTemp *ts) 1951 { 1952 int idx = temp_idx(ts); 1953 1954 if (ts->temp_global) { 1955 pstrcpy(buf, buf_size, ts->name); 1956 } else if (ts->temp_local) { 1957 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1958 } else { 1959 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1960 } 1961 return buf; 1962 } 1963 1964 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1965 int buf_size, TCGArg arg) 1966 { 1967 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1968 } 1969 1970 /* Find helper name. */ 1971 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 1972 { 1973 const char *ret = NULL; 1974 if (helper_table) { 1975 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); 1976 if (info) { 1977 ret = info->name; 1978 } 1979 } 1980 return ret; 1981 } 1982 1983 static const char * const cond_name[] = 1984 { 1985 [TCG_COND_NEVER] = "never", 1986 [TCG_COND_ALWAYS] = "always", 1987 [TCG_COND_EQ] = "eq", 1988 [TCG_COND_NE] = "ne", 1989 [TCG_COND_LT] = "lt", 1990 [TCG_COND_GE] = "ge", 1991 [TCG_COND_LE] = "le", 1992 [TCG_COND_GT] = "gt", 1993 [TCG_COND_LTU] = "ltu", 1994 [TCG_COND_GEU] = "geu", 1995 [TCG_COND_LEU] = "leu", 1996 [TCG_COND_GTU] = "gtu" 1997 }; 1998 1999 static const char * const ldst_name[] = 2000 { 2001 [MO_UB] = "ub", 2002 [MO_SB] = "sb", 2003 [MO_LEUW] = "leuw", 2004 [MO_LESW] = "lesw", 2005 [MO_LEUL] = "leul", 2006 [MO_LESL] = "lesl", 2007 [MO_LEQ] = "leq", 2008 [MO_BEUW] = "beuw", 2009 [MO_BESW] = "besw", 2010 [MO_BEUL] = "beul", 2011 [MO_BESL] = "besl", 2012 [MO_BEQ] = "beq", 2013 }; 2014 2015 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2016 #ifdef TARGET_ALIGNED_ONLY 2017 [MO_UNALN >> MO_ASHIFT] = "un+", 2018 [MO_ALIGN >> MO_ASHIFT] = "", 2019 #else 2020 [MO_UNALN >> MO_ASHIFT] = "", 2021 [MO_ALIGN >> MO_ASHIFT] = "al+", 2022 #endif 2023 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2024 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2025 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2026 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2027 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2028 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2029 }; 2030 2031 static inline bool tcg_regset_single(TCGRegSet d) 2032 { 2033 return (d & (d - 1)) == 0; 2034 } 2035 2036 static inline TCGReg tcg_regset_first(TCGRegSet d) 2037 { 2038 if (TCG_TARGET_NB_REGS <= 32) { 2039 return ctz32(d); 2040 } else { 2041 return ctz64(d); 2042 } 2043 } 2044 2045 static void tcg_dump_ops(TCGContext *s, bool have_prefs) 2046 { 2047 char buf[128]; 2048 TCGOp *op; 2049 2050 QTAILQ_FOREACH(op, &s->ops, link) { 2051 int i, k, nb_oargs, nb_iargs, nb_cargs; 2052 const TCGOpDef *def; 2053 TCGOpcode c; 2054 int col = 0; 2055 2056 c = op->opc; 2057 def = &tcg_op_defs[c]; 2058 2059 if (c == INDEX_op_insn_start) { 2060 nb_oargs = 0; 2061 col += qemu_log("\n ----"); 2062 2063 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2064 target_ulong a; 2065 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2066 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 2067 #else 2068 a = op->args[i]; 2069 #endif 2070 col += qemu_log(" " TARGET_FMT_lx, a); 2071 } 2072 } else if (c == INDEX_op_call) { 2073 /* variable number of arguments */ 2074 nb_oargs = TCGOP_CALLO(op); 2075 nb_iargs = TCGOP_CALLI(op); 2076 nb_cargs = def->nb_cargs; 2077 2078 /* function name, flags, out args */ 2079 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 2080 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), 2081 op->args[nb_oargs + nb_iargs + 1], nb_oargs); 2082 for (i = 0; i < nb_oargs; i++) { 2083 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2084 op->args[i])); 2085 } 2086 for (i = 0; i < nb_iargs; i++) { 2087 TCGArg arg = op->args[nb_oargs + i]; 2088 const char *t = "<dummy>"; 2089 if (arg != TCG_CALL_DUMMY_ARG) { 2090 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2091 } 2092 col += qemu_log(",%s", t); 2093 } 2094 } else { 2095 col += qemu_log(" %s ", def->name); 2096 2097 nb_oargs = def->nb_oargs; 2098 nb_iargs = def->nb_iargs; 2099 nb_cargs = def->nb_cargs; 2100 2101 if (def->flags & TCG_OPF_VECTOR) { 2102 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op), 2103 8 << TCGOP_VECE(op)); 2104 } 2105 2106 k = 0; 2107 for (i = 0; i < nb_oargs; i++) { 2108 if (k != 0) { 2109 col += qemu_log(","); 2110 } 2111 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 2112 op->args[k++])); 2113 } 2114 for (i = 0; i < nb_iargs; i++) { 2115 if (k != 0) { 2116 col += qemu_log(","); 2117 } 2118 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 2119 op->args[k++])); 2120 } 2121 switch (c) { 2122 case INDEX_op_brcond_i32: 2123 case INDEX_op_setcond_i32: 2124 case INDEX_op_movcond_i32: 2125 case INDEX_op_brcond2_i32: 2126 case INDEX_op_setcond2_i32: 2127 case INDEX_op_brcond_i64: 2128 case INDEX_op_setcond_i64: 2129 case INDEX_op_movcond_i64: 2130 case INDEX_op_cmp_vec: 2131 case INDEX_op_cmpsel_vec: 2132 if (op->args[k] < ARRAY_SIZE(cond_name) 2133 && cond_name[op->args[k]]) { 2134 col += qemu_log(",%s", cond_name[op->args[k++]]); 2135 } else { 2136 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]); 2137 } 2138 i = 1; 2139 break; 2140 case INDEX_op_qemu_ld_i32: 2141 case INDEX_op_qemu_st_i32: 2142 case INDEX_op_qemu_st8_i32: 2143 case INDEX_op_qemu_ld_i64: 2144 case INDEX_op_qemu_st_i64: 2145 { 2146 TCGMemOpIdx oi = op->args[k++]; 2147 MemOp op = get_memop(oi); 2148 unsigned ix = get_mmuidx(oi); 2149 2150 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2151 col += qemu_log(",$0x%x,%u", op, ix); 2152 } else { 2153 const char *s_al, *s_op; 2154 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2155 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2156 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 2157 } 2158 i = 1; 2159 } 2160 break; 2161 default: 2162 i = 0; 2163 break; 2164 } 2165 switch (c) { 2166 case INDEX_op_set_label: 2167 case INDEX_op_br: 2168 case INDEX_op_brcond_i32: 2169 case INDEX_op_brcond_i64: 2170 case INDEX_op_brcond2_i32: 2171 col += qemu_log("%s$L%d", k ? "," : "", 2172 arg_label(op->args[k])->id); 2173 i++, k++; 2174 break; 2175 default: 2176 break; 2177 } 2178 for (; i < nb_cargs; i++, k++) { 2179 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); 2180 } 2181 } 2182 2183 if (have_prefs || op->life) { 2184 2185 QemuLogFile *logfile; 2186 2187 rcu_read_lock(); 2188 logfile = qatomic_rcu_read(&qemu_logfile); 2189 if (logfile) { 2190 for (; col < 40; ++col) { 2191 putc(' ', logfile->fd); 2192 } 2193 } 2194 rcu_read_unlock(); 2195 } 2196 2197 if (op->life) { 2198 unsigned life = op->life; 2199 2200 if (life & (SYNC_ARG * 3)) { 2201 qemu_log(" sync:"); 2202 for (i = 0; i < 2; ++i) { 2203 if (life & (SYNC_ARG << i)) { 2204 qemu_log(" %d", i); 2205 } 2206 } 2207 } 2208 life /= DEAD_ARG; 2209 if (life) { 2210 qemu_log(" dead:"); 2211 for (i = 0; life; ++i, life >>= 1) { 2212 if (life & 1) { 2213 qemu_log(" %d", i); 2214 } 2215 } 2216 } 2217 } 2218 2219 if (have_prefs) { 2220 for (i = 0; i < nb_oargs; ++i) { 2221 TCGRegSet set = op->output_pref[i]; 2222 2223 if (i == 0) { 2224 qemu_log(" pref="); 2225 } else { 2226 qemu_log(","); 2227 } 2228 if (set == 0) { 2229 qemu_log("none"); 2230 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2231 qemu_log("all"); 2232 #ifdef CONFIG_DEBUG_TCG 2233 } else if (tcg_regset_single(set)) { 2234 TCGReg reg = tcg_regset_first(set); 2235 qemu_log("%s", tcg_target_reg_names[reg]); 2236 #endif 2237 } else if (TCG_TARGET_NB_REGS <= 32) { 2238 qemu_log("%#x", (uint32_t)set); 2239 } else { 2240 qemu_log("%#" PRIx64, (uint64_t)set); 2241 } 2242 } 2243 } 2244 2245 qemu_log("\n"); 2246 } 2247 } 2248 2249 /* we give more priority to constraints with less registers */ 2250 static int get_constraint_priority(const TCGOpDef *def, int k) 2251 { 2252 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2253 int n; 2254 2255 if (arg_ct->oalias) { 2256 /* an alias is equivalent to a single register */ 2257 n = 1; 2258 } else { 2259 n = ctpop64(arg_ct->regs); 2260 } 2261 return TCG_TARGET_NB_REGS - n + 1; 2262 } 2263 2264 /* sort from highest priority to lowest */ 2265 static void sort_constraints(TCGOpDef *def, int start, int n) 2266 { 2267 int i, j; 2268 TCGArgConstraint *a = def->args_ct; 2269 2270 for (i = 0; i < n; i++) { 2271 a[start + i].sort_index = start + i; 2272 } 2273 if (n <= 1) { 2274 return; 2275 } 2276 for (i = 0; i < n - 1; i++) { 2277 for (j = i + 1; j < n; j++) { 2278 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2279 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2280 if (p1 < p2) { 2281 int tmp = a[start + i].sort_index; 2282 a[start + i].sort_index = a[start + j].sort_index; 2283 a[start + j].sort_index = tmp; 2284 } 2285 } 2286 } 2287 } 2288 2289 static void process_op_defs(TCGContext *s) 2290 { 2291 TCGOpcode op; 2292 2293 for (op = 0; op < NB_OPS; op++) { 2294 TCGOpDef *def = &tcg_op_defs[op]; 2295 const TCGTargetOpDef *tdefs; 2296 TCGType type; 2297 int i, nb_args; 2298 2299 if (def->flags & TCG_OPF_NOT_PRESENT) { 2300 continue; 2301 } 2302 2303 nb_args = def->nb_iargs + def->nb_oargs; 2304 if (nb_args == 0) { 2305 continue; 2306 } 2307 2308 tdefs = tcg_target_op_def(op); 2309 /* Missing TCGTargetOpDef entry. */ 2310 tcg_debug_assert(tdefs != NULL); 2311 2312 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32); 2313 for (i = 0; i < nb_args; i++) { 2314 const char *ct_str = tdefs->args_ct_str[i]; 2315 /* Incomplete TCGTargetOpDef entry. */ 2316 tcg_debug_assert(ct_str != NULL); 2317 2318 while (*ct_str != '\0') { 2319 switch(*ct_str) { 2320 case '0' ... '9': 2321 { 2322 int oarg = *ct_str - '0'; 2323 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 2324 tcg_debug_assert(oarg < def->nb_oargs); 2325 tcg_debug_assert(def->args_ct[oarg].regs != 0); 2326 def->args_ct[i] = def->args_ct[oarg]; 2327 /* The output sets oalias. */ 2328 def->args_ct[oarg].oalias = true; 2329 def->args_ct[oarg].alias_index = i; 2330 /* The input sets ialias. */ 2331 def->args_ct[i].ialias = true; 2332 def->args_ct[i].alias_index = oarg; 2333 } 2334 ct_str++; 2335 break; 2336 case '&': 2337 def->args_ct[i].newreg = true; 2338 ct_str++; 2339 break; 2340 case 'i': 2341 def->args_ct[i].ct |= TCG_CT_CONST; 2342 ct_str++; 2343 break; 2344 default: 2345 ct_str = target_parse_constraint(&def->args_ct[i], 2346 ct_str, type); 2347 /* Typo in TCGTargetOpDef constraint. */ 2348 tcg_debug_assert(ct_str != NULL); 2349 } 2350 } 2351 } 2352 2353 /* TCGTargetOpDef entry with too much information? */ 2354 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2355 2356 /* sort the constraints (XXX: this is just an heuristic) */ 2357 sort_constraints(def, 0, def->nb_oargs); 2358 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2359 } 2360 } 2361 2362 void tcg_op_remove(TCGContext *s, TCGOp *op) 2363 { 2364 TCGLabel *label; 2365 2366 switch (op->opc) { 2367 case INDEX_op_br: 2368 label = arg_label(op->args[0]); 2369 label->refs--; 2370 break; 2371 case INDEX_op_brcond_i32: 2372 case INDEX_op_brcond_i64: 2373 label = arg_label(op->args[3]); 2374 label->refs--; 2375 break; 2376 case INDEX_op_brcond2_i32: 2377 label = arg_label(op->args[5]); 2378 label->refs--; 2379 break; 2380 default: 2381 break; 2382 } 2383 2384 QTAILQ_REMOVE(&s->ops, op, link); 2385 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2386 s->nb_ops--; 2387 2388 #ifdef CONFIG_PROFILER 2389 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2390 #endif 2391 } 2392 2393 static TCGOp *tcg_op_alloc(TCGOpcode opc) 2394 { 2395 TCGContext *s = tcg_ctx; 2396 TCGOp *op; 2397 2398 if (likely(QTAILQ_EMPTY(&s->free_ops))) { 2399 op = tcg_malloc(sizeof(TCGOp)); 2400 } else { 2401 op = QTAILQ_FIRST(&s->free_ops); 2402 QTAILQ_REMOVE(&s->free_ops, op, link); 2403 } 2404 memset(op, 0, offsetof(TCGOp, link)); 2405 op->opc = opc; 2406 s->nb_ops++; 2407 2408 return op; 2409 } 2410 2411 TCGOp *tcg_emit_op(TCGOpcode opc) 2412 { 2413 TCGOp *op = tcg_op_alloc(opc); 2414 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2415 return op; 2416 } 2417 2418 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2419 { 2420 TCGOp *new_op = tcg_op_alloc(opc); 2421 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2422 return new_op; 2423 } 2424 2425 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2426 { 2427 TCGOp *new_op = tcg_op_alloc(opc); 2428 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2429 return new_op; 2430 } 2431 2432 /* Reachable analysis : remove unreachable code. */ 2433 static void reachable_code_pass(TCGContext *s) 2434 { 2435 TCGOp *op, *op_next; 2436 bool dead = false; 2437 2438 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2439 bool remove = dead; 2440 TCGLabel *label; 2441 int call_flags; 2442 2443 switch (op->opc) { 2444 case INDEX_op_set_label: 2445 label = arg_label(op->args[0]); 2446 if (label->refs == 0) { 2447 /* 2448 * While there is an occasional backward branch, virtually 2449 * all branches generated by the translators are forward. 2450 * Which means that generally we will have already removed 2451 * all references to the label that will be, and there is 2452 * little to be gained by iterating. 2453 */ 2454 remove = true; 2455 } else { 2456 /* Once we see a label, insns become live again. */ 2457 dead = false; 2458 remove = false; 2459 2460 /* 2461 * Optimization can fold conditional branches to unconditional. 2462 * If we find a label with one reference which is preceded by 2463 * an unconditional branch to it, remove both. This needed to 2464 * wait until the dead code in between them was removed. 2465 */ 2466 if (label->refs == 1) { 2467 TCGOp *op_prev = QTAILQ_PREV(op, link); 2468 if (op_prev->opc == INDEX_op_br && 2469 label == arg_label(op_prev->args[0])) { 2470 tcg_op_remove(s, op_prev); 2471 remove = true; 2472 } 2473 } 2474 } 2475 break; 2476 2477 case INDEX_op_br: 2478 case INDEX_op_exit_tb: 2479 case INDEX_op_goto_ptr: 2480 /* Unconditional branches; everything following is dead. */ 2481 dead = true; 2482 break; 2483 2484 case INDEX_op_call: 2485 /* Notice noreturn helper calls, raising exceptions. */ 2486 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; 2487 if (call_flags & TCG_CALL_NO_RETURN) { 2488 dead = true; 2489 } 2490 break; 2491 2492 case INDEX_op_insn_start: 2493 /* Never remove -- we need to keep these for unwind. */ 2494 remove = false; 2495 break; 2496 2497 default: 2498 break; 2499 } 2500 2501 if (remove) { 2502 tcg_op_remove(s, op); 2503 } 2504 } 2505 } 2506 2507 #define TS_DEAD 1 2508 #define TS_MEM 2 2509 2510 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2511 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2512 2513 /* For liveness_pass_1, the register preferences for a given temp. */ 2514 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2515 { 2516 return ts->state_ptr; 2517 } 2518 2519 /* For liveness_pass_1, reset the preferences for a given temp to the 2520 * maximal regset for its type. 2521 */ 2522 static inline void la_reset_pref(TCGTemp *ts) 2523 { 2524 *la_temp_pref(ts) 2525 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2526 } 2527 2528 /* liveness analysis: end of function: all temps are dead, and globals 2529 should be in memory. */ 2530 static void la_func_end(TCGContext *s, int ng, int nt) 2531 { 2532 int i; 2533 2534 for (i = 0; i < ng; ++i) { 2535 s->temps[i].state = TS_DEAD | TS_MEM; 2536 la_reset_pref(&s->temps[i]); 2537 } 2538 for (i = ng; i < nt; ++i) { 2539 s->temps[i].state = TS_DEAD; 2540 la_reset_pref(&s->temps[i]); 2541 } 2542 } 2543 2544 /* liveness analysis: end of basic block: all temps are dead, globals 2545 and local temps should be in memory. */ 2546 static void la_bb_end(TCGContext *s, int ng, int nt) 2547 { 2548 int i; 2549 2550 for (i = 0; i < ng; ++i) { 2551 s->temps[i].state = TS_DEAD | TS_MEM; 2552 la_reset_pref(&s->temps[i]); 2553 } 2554 for (i = ng; i < nt; ++i) { 2555 s->temps[i].state = (s->temps[i].temp_local 2556 ? TS_DEAD | TS_MEM 2557 : TS_DEAD); 2558 la_reset_pref(&s->temps[i]); 2559 } 2560 } 2561 2562 /* liveness analysis: sync globals back to memory. */ 2563 static void la_global_sync(TCGContext *s, int ng) 2564 { 2565 int i; 2566 2567 for (i = 0; i < ng; ++i) { 2568 int state = s->temps[i].state; 2569 s->temps[i].state = state | TS_MEM; 2570 if (state == TS_DEAD) { 2571 /* If the global was previously dead, reset prefs. */ 2572 la_reset_pref(&s->temps[i]); 2573 } 2574 } 2575 } 2576 2577 /* 2578 * liveness analysis: conditional branch: all temps are dead, 2579 * globals and local temps should be synced. 2580 */ 2581 static void la_bb_sync(TCGContext *s, int ng, int nt) 2582 { 2583 la_global_sync(s, ng); 2584 2585 for (int i = ng; i < nt; ++i) { 2586 if (s->temps[i].temp_local) { 2587 int state = s->temps[i].state; 2588 s->temps[i].state = state | TS_MEM; 2589 if (state != TS_DEAD) { 2590 continue; 2591 } 2592 } else { 2593 s->temps[i].state = TS_DEAD; 2594 } 2595 la_reset_pref(&s->temps[i]); 2596 } 2597 } 2598 2599 /* liveness analysis: sync globals back to memory and kill. */ 2600 static void la_global_kill(TCGContext *s, int ng) 2601 { 2602 int i; 2603 2604 for (i = 0; i < ng; i++) { 2605 s->temps[i].state = TS_DEAD | TS_MEM; 2606 la_reset_pref(&s->temps[i]); 2607 } 2608 } 2609 2610 /* liveness analysis: note live globals crossing calls. */ 2611 static void la_cross_call(TCGContext *s, int nt) 2612 { 2613 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2614 int i; 2615 2616 for (i = 0; i < nt; i++) { 2617 TCGTemp *ts = &s->temps[i]; 2618 if (!(ts->state & TS_DEAD)) { 2619 TCGRegSet *pset = la_temp_pref(ts); 2620 TCGRegSet set = *pset; 2621 2622 set &= mask; 2623 /* If the combination is not possible, restart. */ 2624 if (set == 0) { 2625 set = tcg_target_available_regs[ts->type] & mask; 2626 } 2627 *pset = set; 2628 } 2629 } 2630 } 2631 2632 /* Liveness analysis : update the opc_arg_life array to tell if a 2633 given input arguments is dead. Instructions updating dead 2634 temporaries are removed. */ 2635 static void liveness_pass_1(TCGContext *s) 2636 { 2637 int nb_globals = s->nb_globals; 2638 int nb_temps = s->nb_temps; 2639 TCGOp *op, *op_prev; 2640 TCGRegSet *prefs; 2641 int i; 2642 2643 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2644 for (i = 0; i < nb_temps; ++i) { 2645 s->temps[i].state_ptr = prefs + i; 2646 } 2647 2648 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2649 la_func_end(s, nb_globals, nb_temps); 2650 2651 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2652 int nb_iargs, nb_oargs; 2653 TCGOpcode opc_new, opc_new2; 2654 bool have_opc_new2; 2655 TCGLifeData arg_life = 0; 2656 TCGTemp *ts; 2657 TCGOpcode opc = op->opc; 2658 const TCGOpDef *def = &tcg_op_defs[opc]; 2659 2660 switch (opc) { 2661 case INDEX_op_call: 2662 { 2663 int call_flags; 2664 int nb_call_regs; 2665 2666 nb_oargs = TCGOP_CALLO(op); 2667 nb_iargs = TCGOP_CALLI(op); 2668 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2669 2670 /* pure functions can be removed if their result is unused */ 2671 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2672 for (i = 0; i < nb_oargs; i++) { 2673 ts = arg_temp(op->args[i]); 2674 if (ts->state != TS_DEAD) { 2675 goto do_not_remove_call; 2676 } 2677 } 2678 goto do_remove; 2679 } 2680 do_not_remove_call: 2681 2682 /* Output args are dead. */ 2683 for (i = 0; i < nb_oargs; i++) { 2684 ts = arg_temp(op->args[i]); 2685 if (ts->state & TS_DEAD) { 2686 arg_life |= DEAD_ARG << i; 2687 } 2688 if (ts->state & TS_MEM) { 2689 arg_life |= SYNC_ARG << i; 2690 } 2691 ts->state = TS_DEAD; 2692 la_reset_pref(ts); 2693 2694 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ 2695 op->output_pref[i] = 0; 2696 } 2697 2698 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2699 TCG_CALL_NO_READ_GLOBALS))) { 2700 la_global_kill(s, nb_globals); 2701 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2702 la_global_sync(s, nb_globals); 2703 } 2704 2705 /* Record arguments that die in this helper. */ 2706 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2707 ts = arg_temp(op->args[i]); 2708 if (ts && ts->state & TS_DEAD) { 2709 arg_life |= DEAD_ARG << i; 2710 } 2711 } 2712 2713 /* For all live registers, remove call-clobbered prefs. */ 2714 la_cross_call(s, nb_temps); 2715 2716 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2717 2718 /* Input arguments are live for preceding opcodes. */ 2719 for (i = 0; i < nb_iargs; i++) { 2720 ts = arg_temp(op->args[i + nb_oargs]); 2721 if (ts && ts->state & TS_DEAD) { 2722 /* For those arguments that die, and will be allocated 2723 * in registers, clear the register set for that arg, 2724 * to be filled in below. For args that will be on 2725 * the stack, reset to any available reg. 2726 */ 2727 *la_temp_pref(ts) 2728 = (i < nb_call_regs ? 0 : 2729 tcg_target_available_regs[ts->type]); 2730 ts->state &= ~TS_DEAD; 2731 } 2732 } 2733 2734 /* For each input argument, add its input register to prefs. 2735 If a temp is used once, this produces a single set bit. */ 2736 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { 2737 ts = arg_temp(op->args[i + nb_oargs]); 2738 if (ts) { 2739 tcg_regset_set_reg(*la_temp_pref(ts), 2740 tcg_target_call_iarg_regs[i]); 2741 } 2742 } 2743 } 2744 break; 2745 case INDEX_op_insn_start: 2746 break; 2747 case INDEX_op_discard: 2748 /* mark the temporary as dead */ 2749 ts = arg_temp(op->args[0]); 2750 ts->state = TS_DEAD; 2751 la_reset_pref(ts); 2752 break; 2753 2754 case INDEX_op_add2_i32: 2755 opc_new = INDEX_op_add_i32; 2756 goto do_addsub2; 2757 case INDEX_op_sub2_i32: 2758 opc_new = INDEX_op_sub_i32; 2759 goto do_addsub2; 2760 case INDEX_op_add2_i64: 2761 opc_new = INDEX_op_add_i64; 2762 goto do_addsub2; 2763 case INDEX_op_sub2_i64: 2764 opc_new = INDEX_op_sub_i64; 2765 do_addsub2: 2766 nb_iargs = 4; 2767 nb_oargs = 2; 2768 /* Test if the high part of the operation is dead, but not 2769 the low part. The result can be optimized to a simple 2770 add or sub. This happens often for x86_64 guest when the 2771 cpu mode is set to 32 bit. */ 2772 if (arg_temp(op->args[1])->state == TS_DEAD) { 2773 if (arg_temp(op->args[0])->state == TS_DEAD) { 2774 goto do_remove; 2775 } 2776 /* Replace the opcode and adjust the args in place, 2777 leaving 3 unused args at the end. */ 2778 op->opc = opc = opc_new; 2779 op->args[1] = op->args[2]; 2780 op->args[2] = op->args[4]; 2781 /* Fall through and mark the single-word operation live. */ 2782 nb_iargs = 2; 2783 nb_oargs = 1; 2784 } 2785 goto do_not_remove; 2786 2787 case INDEX_op_mulu2_i32: 2788 opc_new = INDEX_op_mul_i32; 2789 opc_new2 = INDEX_op_muluh_i32; 2790 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 2791 goto do_mul2; 2792 case INDEX_op_muls2_i32: 2793 opc_new = INDEX_op_mul_i32; 2794 opc_new2 = INDEX_op_mulsh_i32; 2795 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 2796 goto do_mul2; 2797 case INDEX_op_mulu2_i64: 2798 opc_new = INDEX_op_mul_i64; 2799 opc_new2 = INDEX_op_muluh_i64; 2800 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 2801 goto do_mul2; 2802 case INDEX_op_muls2_i64: 2803 opc_new = INDEX_op_mul_i64; 2804 opc_new2 = INDEX_op_mulsh_i64; 2805 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 2806 goto do_mul2; 2807 do_mul2: 2808 nb_iargs = 2; 2809 nb_oargs = 2; 2810 if (arg_temp(op->args[1])->state == TS_DEAD) { 2811 if (arg_temp(op->args[0])->state == TS_DEAD) { 2812 /* Both parts of the operation are dead. */ 2813 goto do_remove; 2814 } 2815 /* The high part of the operation is dead; generate the low. */ 2816 op->opc = opc = opc_new; 2817 op->args[1] = op->args[2]; 2818 op->args[2] = op->args[3]; 2819 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 2820 /* The low part of the operation is dead; generate the high. */ 2821 op->opc = opc = opc_new2; 2822 op->args[0] = op->args[1]; 2823 op->args[1] = op->args[2]; 2824 op->args[2] = op->args[3]; 2825 } else { 2826 goto do_not_remove; 2827 } 2828 /* Mark the single-word operation live. */ 2829 nb_oargs = 1; 2830 goto do_not_remove; 2831 2832 default: 2833 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 2834 nb_iargs = def->nb_iargs; 2835 nb_oargs = def->nb_oargs; 2836 2837 /* Test if the operation can be removed because all 2838 its outputs are dead. We assume that nb_oargs == 0 2839 implies side effects */ 2840 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 2841 for (i = 0; i < nb_oargs; i++) { 2842 if (arg_temp(op->args[i])->state != TS_DEAD) { 2843 goto do_not_remove; 2844 } 2845 } 2846 goto do_remove; 2847 } 2848 goto do_not_remove; 2849 2850 do_remove: 2851 tcg_op_remove(s, op); 2852 break; 2853 2854 do_not_remove: 2855 for (i = 0; i < nb_oargs; i++) { 2856 ts = arg_temp(op->args[i]); 2857 2858 /* Remember the preference of the uses that followed. */ 2859 op->output_pref[i] = *la_temp_pref(ts); 2860 2861 /* Output args are dead. */ 2862 if (ts->state & TS_DEAD) { 2863 arg_life |= DEAD_ARG << i; 2864 } 2865 if (ts->state & TS_MEM) { 2866 arg_life |= SYNC_ARG << i; 2867 } 2868 ts->state = TS_DEAD; 2869 la_reset_pref(ts); 2870 } 2871 2872 /* If end of basic block, update. */ 2873 if (def->flags & TCG_OPF_BB_EXIT) { 2874 la_func_end(s, nb_globals, nb_temps); 2875 } else if (def->flags & TCG_OPF_COND_BRANCH) { 2876 la_bb_sync(s, nb_globals, nb_temps); 2877 } else if (def->flags & TCG_OPF_BB_END) { 2878 la_bb_end(s, nb_globals, nb_temps); 2879 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2880 la_global_sync(s, nb_globals); 2881 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2882 la_cross_call(s, nb_temps); 2883 } 2884 } 2885 2886 /* Record arguments that die in this opcode. */ 2887 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2888 ts = arg_temp(op->args[i]); 2889 if (ts->state & TS_DEAD) { 2890 arg_life |= DEAD_ARG << i; 2891 } 2892 } 2893 2894 /* Input arguments are live for preceding opcodes. */ 2895 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2896 ts = arg_temp(op->args[i]); 2897 if (ts->state & TS_DEAD) { 2898 /* For operands that were dead, initially allow 2899 all regs for the type. */ 2900 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 2901 ts->state &= ~TS_DEAD; 2902 } 2903 } 2904 2905 /* Incorporate constraints for this operand. */ 2906 switch (opc) { 2907 case INDEX_op_mov_i32: 2908 case INDEX_op_mov_i64: 2909 /* Note that these are TCG_OPF_NOT_PRESENT and do not 2910 have proper constraints. That said, special case 2911 moves to propagate preferences backward. */ 2912 if (IS_DEAD_ARG(1)) { 2913 *la_temp_pref(arg_temp(op->args[0])) 2914 = *la_temp_pref(arg_temp(op->args[1])); 2915 } 2916 break; 2917 2918 default: 2919 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2920 const TCGArgConstraint *ct = &def->args_ct[i]; 2921 TCGRegSet set, *pset; 2922 2923 ts = arg_temp(op->args[i]); 2924 pset = la_temp_pref(ts); 2925 set = *pset; 2926 2927 set &= ct->regs; 2928 if (ct->ialias) { 2929 set &= op->output_pref[ct->alias_index]; 2930 } 2931 /* If the combination is not possible, restart. */ 2932 if (set == 0) { 2933 set = ct->regs; 2934 } 2935 *pset = set; 2936 } 2937 break; 2938 } 2939 break; 2940 } 2941 op->life = arg_life; 2942 } 2943 } 2944 2945 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 2946 static bool liveness_pass_2(TCGContext *s) 2947 { 2948 int nb_globals = s->nb_globals; 2949 int nb_temps, i; 2950 bool changes = false; 2951 TCGOp *op, *op_next; 2952 2953 /* Create a temporary for each indirect global. */ 2954 for (i = 0; i < nb_globals; ++i) { 2955 TCGTemp *its = &s->temps[i]; 2956 if (its->indirect_reg) { 2957 TCGTemp *dts = tcg_temp_alloc(s); 2958 dts->type = its->type; 2959 dts->base_type = its->base_type; 2960 its->state_ptr = dts; 2961 } else { 2962 its->state_ptr = NULL; 2963 } 2964 /* All globals begin dead. */ 2965 its->state = TS_DEAD; 2966 } 2967 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 2968 TCGTemp *its = &s->temps[i]; 2969 its->state_ptr = NULL; 2970 its->state = TS_DEAD; 2971 } 2972 2973 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2974 TCGOpcode opc = op->opc; 2975 const TCGOpDef *def = &tcg_op_defs[opc]; 2976 TCGLifeData arg_life = op->life; 2977 int nb_iargs, nb_oargs, call_flags; 2978 TCGTemp *arg_ts, *dir_ts; 2979 2980 if (opc == INDEX_op_call) { 2981 nb_oargs = TCGOP_CALLO(op); 2982 nb_iargs = TCGOP_CALLI(op); 2983 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2984 } else { 2985 nb_iargs = def->nb_iargs; 2986 nb_oargs = def->nb_oargs; 2987 2988 /* Set flags similar to how calls require. */ 2989 if (def->flags & TCG_OPF_COND_BRANCH) { 2990 /* Like reading globals: sync_globals */ 2991 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2992 } else if (def->flags & TCG_OPF_BB_END) { 2993 /* Like writing globals: save_globals */ 2994 call_flags = 0; 2995 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2996 /* Like reading globals: sync_globals */ 2997 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2998 } else { 2999 /* No effect on globals. */ 3000 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3001 TCG_CALL_NO_WRITE_GLOBALS); 3002 } 3003 } 3004 3005 /* Make sure that input arguments are available. */ 3006 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3007 arg_ts = arg_temp(op->args[i]); 3008 if (arg_ts) { 3009 dir_ts = arg_ts->state_ptr; 3010 if (dir_ts && arg_ts->state == TS_DEAD) { 3011 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3012 ? INDEX_op_ld_i32 3013 : INDEX_op_ld_i64); 3014 TCGOp *lop = tcg_op_insert_before(s, op, lopc); 3015 3016 lop->args[0] = temp_arg(dir_ts); 3017 lop->args[1] = temp_arg(arg_ts->mem_base); 3018 lop->args[2] = arg_ts->mem_offset; 3019 3020 /* Loaded, but synced with memory. */ 3021 arg_ts->state = TS_MEM; 3022 } 3023 } 3024 } 3025 3026 /* Perform input replacement, and mark inputs that became dead. 3027 No action is required except keeping temp_state up to date 3028 so that we reload when needed. */ 3029 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3030 arg_ts = arg_temp(op->args[i]); 3031 if (arg_ts) { 3032 dir_ts = arg_ts->state_ptr; 3033 if (dir_ts) { 3034 op->args[i] = temp_arg(dir_ts); 3035 changes = true; 3036 if (IS_DEAD_ARG(i)) { 3037 arg_ts->state = TS_DEAD; 3038 } 3039 } 3040 } 3041 } 3042 3043 /* Liveness analysis should ensure that the following are 3044 all correct, for call sites and basic block end points. */ 3045 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3046 /* Nothing to do */ 3047 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3048 for (i = 0; i < nb_globals; ++i) { 3049 /* Liveness should see that globals are synced back, 3050 that is, either TS_DEAD or TS_MEM. */ 3051 arg_ts = &s->temps[i]; 3052 tcg_debug_assert(arg_ts->state_ptr == 0 3053 || arg_ts->state != 0); 3054 } 3055 } else { 3056 for (i = 0; i < nb_globals; ++i) { 3057 /* Liveness should see that globals are saved back, 3058 that is, TS_DEAD, waiting to be reloaded. */ 3059 arg_ts = &s->temps[i]; 3060 tcg_debug_assert(arg_ts->state_ptr == 0 3061 || arg_ts->state == TS_DEAD); 3062 } 3063 } 3064 3065 /* Outputs become available. */ 3066 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3067 arg_ts = arg_temp(op->args[0]); 3068 dir_ts = arg_ts->state_ptr; 3069 if (dir_ts) { 3070 op->args[0] = temp_arg(dir_ts); 3071 changes = true; 3072 3073 /* The output is now live and modified. */ 3074 arg_ts->state = 0; 3075 3076 if (NEED_SYNC_ARG(0)) { 3077 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3078 ? INDEX_op_st_i32 3079 : INDEX_op_st_i64); 3080 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 3081 TCGTemp *out_ts = dir_ts; 3082 3083 if (IS_DEAD_ARG(0)) { 3084 out_ts = arg_temp(op->args[1]); 3085 arg_ts->state = TS_DEAD; 3086 tcg_op_remove(s, op); 3087 } else { 3088 arg_ts->state = TS_MEM; 3089 } 3090 3091 sop->args[0] = temp_arg(out_ts); 3092 sop->args[1] = temp_arg(arg_ts->mem_base); 3093 sop->args[2] = arg_ts->mem_offset; 3094 } else { 3095 tcg_debug_assert(!IS_DEAD_ARG(0)); 3096 } 3097 } 3098 } else { 3099 for (i = 0; i < nb_oargs; i++) { 3100 arg_ts = arg_temp(op->args[i]); 3101 dir_ts = arg_ts->state_ptr; 3102 if (!dir_ts) { 3103 continue; 3104 } 3105 op->args[i] = temp_arg(dir_ts); 3106 changes = true; 3107 3108 /* The output is now live and modified. */ 3109 arg_ts->state = 0; 3110 3111 /* Sync outputs upon their last write. */ 3112 if (NEED_SYNC_ARG(i)) { 3113 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3114 ? INDEX_op_st_i32 3115 : INDEX_op_st_i64); 3116 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 3117 3118 sop->args[0] = temp_arg(dir_ts); 3119 sop->args[1] = temp_arg(arg_ts->mem_base); 3120 sop->args[2] = arg_ts->mem_offset; 3121 3122 arg_ts->state = TS_MEM; 3123 } 3124 /* Drop outputs that are dead. */ 3125 if (IS_DEAD_ARG(i)) { 3126 arg_ts->state = TS_DEAD; 3127 } 3128 } 3129 } 3130 } 3131 3132 return changes; 3133 } 3134 3135 #ifdef CONFIG_DEBUG_TCG 3136 static void dump_regs(TCGContext *s) 3137 { 3138 TCGTemp *ts; 3139 int i; 3140 char buf[64]; 3141 3142 for(i = 0; i < s->nb_temps; i++) { 3143 ts = &s->temps[i]; 3144 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3145 switch(ts->val_type) { 3146 case TEMP_VAL_REG: 3147 printf("%s", tcg_target_reg_names[ts->reg]); 3148 break; 3149 case TEMP_VAL_MEM: 3150 printf("%d(%s)", (int)ts->mem_offset, 3151 tcg_target_reg_names[ts->mem_base->reg]); 3152 break; 3153 case TEMP_VAL_CONST: 3154 printf("$0x%" TCG_PRIlx, ts->val); 3155 break; 3156 case TEMP_VAL_DEAD: 3157 printf("D"); 3158 break; 3159 default: 3160 printf("???"); 3161 break; 3162 } 3163 printf("\n"); 3164 } 3165 3166 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 3167 if (s->reg_to_temp[i] != NULL) { 3168 printf("%s: %s\n", 3169 tcg_target_reg_names[i], 3170 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 3171 } 3172 } 3173 } 3174 3175 static void check_regs(TCGContext *s) 3176 { 3177 int reg; 3178 int k; 3179 TCGTemp *ts; 3180 char buf[64]; 3181 3182 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 3183 ts = s->reg_to_temp[reg]; 3184 if (ts != NULL) { 3185 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 3186 printf("Inconsistency for register %s:\n", 3187 tcg_target_reg_names[reg]); 3188 goto fail; 3189 } 3190 } 3191 } 3192 for (k = 0; k < s->nb_temps; k++) { 3193 ts = &s->temps[k]; 3194 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg 3195 && s->reg_to_temp[ts->reg] != ts) { 3196 printf("Inconsistency for temp %s:\n", 3197 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3198 fail: 3199 printf("reg state:\n"); 3200 dump_regs(s); 3201 tcg_abort(); 3202 } 3203 } 3204 } 3205 #endif 3206 3207 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3208 { 3209 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 3210 /* Sparc64 stack is accessed with offset of 2047 */ 3211 s->current_frame_offset = (s->current_frame_offset + 3212 (tcg_target_long)sizeof(tcg_target_long) - 1) & 3213 ~(sizeof(tcg_target_long) - 1); 3214 #endif 3215 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 3216 s->frame_end) { 3217 tcg_abort(); 3218 } 3219 ts->mem_offset = s->current_frame_offset; 3220 ts->mem_base = s->frame_temp; 3221 ts->mem_allocated = 1; 3222 s->current_frame_offset += sizeof(tcg_target_long); 3223 } 3224 3225 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3226 3227 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3228 mark it free; otherwise mark it dead. */ 3229 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3230 { 3231 if (ts->fixed_reg) { 3232 return; 3233 } 3234 if (ts->val_type == TEMP_VAL_REG) { 3235 s->reg_to_temp[ts->reg] = NULL; 3236 } 3237 ts->val_type = (free_or_dead < 0 3238 || ts->temp_local 3239 || ts->temp_global 3240 ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 3241 } 3242 3243 /* Mark a temporary as dead. */ 3244 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3245 { 3246 temp_free_or_dead(s, ts, 1); 3247 } 3248 3249 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3250 registers needs to be allocated to store a constant. If 'free_or_dead' 3251 is non-zero, subsequently release the temporary; if it is positive, the 3252 temp is dead; if it is negative, the temp is free. */ 3253 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3254 TCGRegSet preferred_regs, int free_or_dead) 3255 { 3256 if (ts->fixed_reg) { 3257 return; 3258 } 3259 if (!ts->mem_coherent) { 3260 if (!ts->mem_allocated) { 3261 temp_allocate_frame(s, ts); 3262 } 3263 switch (ts->val_type) { 3264 case TEMP_VAL_CONST: 3265 /* If we're going to free the temp immediately, then we won't 3266 require it later in a register, so attempt to store the 3267 constant to memory directly. */ 3268 if (free_or_dead 3269 && tcg_out_sti(s, ts->type, ts->val, 3270 ts->mem_base->reg, ts->mem_offset)) { 3271 break; 3272 } 3273 temp_load(s, ts, tcg_target_available_regs[ts->type], 3274 allocated_regs, preferred_regs); 3275 /* fallthrough */ 3276 3277 case TEMP_VAL_REG: 3278 tcg_out_st(s, ts->type, ts->reg, 3279 ts->mem_base->reg, ts->mem_offset); 3280 break; 3281 3282 case TEMP_VAL_MEM: 3283 break; 3284 3285 case TEMP_VAL_DEAD: 3286 default: 3287 tcg_abort(); 3288 } 3289 ts->mem_coherent = 1; 3290 } 3291 if (free_or_dead) { 3292 temp_free_or_dead(s, ts, free_or_dead); 3293 } 3294 } 3295 3296 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3297 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3298 { 3299 TCGTemp *ts = s->reg_to_temp[reg]; 3300 if (ts != NULL) { 3301 temp_sync(s, ts, allocated_regs, 0, -1); 3302 } 3303 } 3304 3305 /** 3306 * tcg_reg_alloc: 3307 * @required_regs: Set of registers in which we must allocate. 3308 * @allocated_regs: Set of registers which must be avoided. 3309 * @preferred_regs: Set of registers we should prefer. 3310 * @rev: True if we search the registers in "indirect" order. 3311 * 3312 * The allocated register must be in @required_regs & ~@allocated_regs, 3313 * but if we can put it in @preferred_regs we may save a move later. 3314 */ 3315 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3316 TCGRegSet allocated_regs, 3317 TCGRegSet preferred_regs, bool rev) 3318 { 3319 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3320 TCGRegSet reg_ct[2]; 3321 const int *order; 3322 3323 reg_ct[1] = required_regs & ~allocated_regs; 3324 tcg_debug_assert(reg_ct[1] != 0); 3325 reg_ct[0] = reg_ct[1] & preferred_regs; 3326 3327 /* Skip the preferred_regs option if it cannot be satisfied, 3328 or if the preference made no difference. */ 3329 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3330 3331 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3332 3333 /* Try free registers, preferences first. */ 3334 for (j = f; j < 2; j++) { 3335 TCGRegSet set = reg_ct[j]; 3336 3337 if (tcg_regset_single(set)) { 3338 /* One register in the set. */ 3339 TCGReg reg = tcg_regset_first(set); 3340 if (s->reg_to_temp[reg] == NULL) { 3341 return reg; 3342 } 3343 } else { 3344 for (i = 0; i < n; i++) { 3345 TCGReg reg = order[i]; 3346 if (s->reg_to_temp[reg] == NULL && 3347 tcg_regset_test_reg(set, reg)) { 3348 return reg; 3349 } 3350 } 3351 } 3352 } 3353 3354 /* We must spill something. */ 3355 for (j = f; j < 2; j++) { 3356 TCGRegSet set = reg_ct[j]; 3357 3358 if (tcg_regset_single(set)) { 3359 /* One register in the set. */ 3360 TCGReg reg = tcg_regset_first(set); 3361 tcg_reg_free(s, reg, allocated_regs); 3362 return reg; 3363 } else { 3364 for (i = 0; i < n; i++) { 3365 TCGReg reg = order[i]; 3366 if (tcg_regset_test_reg(set, reg)) { 3367 tcg_reg_free(s, reg, allocated_regs); 3368 return reg; 3369 } 3370 } 3371 } 3372 } 3373 3374 tcg_abort(); 3375 } 3376 3377 /* Make sure the temporary is in a register. If needed, allocate the register 3378 from DESIRED while avoiding ALLOCATED. */ 3379 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3380 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3381 { 3382 TCGReg reg; 3383 3384 switch (ts->val_type) { 3385 case TEMP_VAL_REG: 3386 return; 3387 case TEMP_VAL_CONST: 3388 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3389 preferred_regs, ts->indirect_base); 3390 tcg_out_movi(s, ts->type, reg, ts->val); 3391 ts->mem_coherent = 0; 3392 break; 3393 case TEMP_VAL_MEM: 3394 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3395 preferred_regs, ts->indirect_base); 3396 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3397 ts->mem_coherent = 1; 3398 break; 3399 case TEMP_VAL_DEAD: 3400 default: 3401 tcg_abort(); 3402 } 3403 ts->reg = reg; 3404 ts->val_type = TEMP_VAL_REG; 3405 s->reg_to_temp[reg] = ts; 3406 } 3407 3408 /* Save a temporary to memory. 'allocated_regs' is used in case a 3409 temporary registers needs to be allocated to store a constant. */ 3410 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3411 { 3412 /* The liveness analysis already ensures that globals are back 3413 in memory. Keep an tcg_debug_assert for safety. */ 3414 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); 3415 } 3416 3417 /* save globals to their canonical location and assume they can be 3418 modified be the following code. 'allocated_regs' is used in case a 3419 temporary registers needs to be allocated to store a constant. */ 3420 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3421 { 3422 int i, n; 3423 3424 for (i = 0, n = s->nb_globals; i < n; i++) { 3425 temp_save(s, &s->temps[i], allocated_regs); 3426 } 3427 } 3428 3429 /* sync globals to their canonical location and assume they can be 3430 read by the following code. 'allocated_regs' is used in case a 3431 temporary registers needs to be allocated to store a constant. */ 3432 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3433 { 3434 int i, n; 3435 3436 for (i = 0, n = s->nb_globals; i < n; i++) { 3437 TCGTemp *ts = &s->temps[i]; 3438 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3439 || ts->fixed_reg 3440 || ts->mem_coherent); 3441 } 3442 } 3443 3444 /* at the end of a basic block, we assume all temporaries are dead and 3445 all globals are stored at their canonical location. */ 3446 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3447 { 3448 int i; 3449 3450 for (i = s->nb_globals; i < s->nb_temps; i++) { 3451 TCGTemp *ts = &s->temps[i]; 3452 if (ts->temp_local) { 3453 temp_save(s, ts, allocated_regs); 3454 } else { 3455 /* The liveness analysis already ensures that temps are dead. 3456 Keep an tcg_debug_assert for safety. */ 3457 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3458 } 3459 } 3460 3461 save_globals(s, allocated_regs); 3462 } 3463 3464 /* 3465 * At a conditional branch, we assume all temporaries are dead and 3466 * all globals and local temps are synced to their location. 3467 */ 3468 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 3469 { 3470 sync_globals(s, allocated_regs); 3471 3472 for (int i = s->nb_globals; i < s->nb_temps; i++) { 3473 TCGTemp *ts = &s->temps[i]; 3474 /* 3475 * The liveness analysis already ensures that temps are dead. 3476 * Keep tcg_debug_asserts for safety. 3477 */ 3478 if (ts->temp_local) { 3479 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 3480 } else { 3481 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3482 } 3483 } 3484 } 3485 3486 /* 3487 * Specialized code generation for INDEX_op_movi_*. 3488 */ 3489 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3490 tcg_target_ulong val, TCGLifeData arg_life, 3491 TCGRegSet preferred_regs) 3492 { 3493 /* ENV should not be modified. */ 3494 tcg_debug_assert(!ots->fixed_reg); 3495 3496 /* The movi is not explicitly generated here. */ 3497 if (ots->val_type == TEMP_VAL_REG) { 3498 s->reg_to_temp[ots->reg] = NULL; 3499 } 3500 ots->val_type = TEMP_VAL_CONST; 3501 ots->val = val; 3502 ots->mem_coherent = 0; 3503 if (NEED_SYNC_ARG(0)) { 3504 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3505 } else if (IS_DEAD_ARG(0)) { 3506 temp_dead(s, ots); 3507 } 3508 } 3509 3510 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op) 3511 { 3512 TCGTemp *ots = arg_temp(op->args[0]); 3513 tcg_target_ulong val = op->args[1]; 3514 3515 tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]); 3516 } 3517 3518 /* 3519 * Specialized code generation for INDEX_op_mov_*. 3520 */ 3521 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3522 { 3523 const TCGLifeData arg_life = op->life; 3524 TCGRegSet allocated_regs, preferred_regs; 3525 TCGTemp *ts, *ots; 3526 TCGType otype, itype; 3527 3528 allocated_regs = s->reserved_regs; 3529 preferred_regs = op->output_pref[0]; 3530 ots = arg_temp(op->args[0]); 3531 ts = arg_temp(op->args[1]); 3532 3533 /* ENV should not be modified. */ 3534 tcg_debug_assert(!ots->fixed_reg); 3535 3536 /* Note that otype != itype for no-op truncation. */ 3537 otype = ots->type; 3538 itype = ts->type; 3539 3540 if (ts->val_type == TEMP_VAL_CONST) { 3541 /* propagate constant or generate sti */ 3542 tcg_target_ulong val = ts->val; 3543 if (IS_DEAD_ARG(1)) { 3544 temp_dead(s, ts); 3545 } 3546 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3547 return; 3548 } 3549 3550 /* If the source value is in memory we're going to be forced 3551 to have it in a register in order to perform the copy. Copy 3552 the SOURCE value into its own register first, that way we 3553 don't have to reload SOURCE the next time it is used. */ 3554 if (ts->val_type == TEMP_VAL_MEM) { 3555 temp_load(s, ts, tcg_target_available_regs[itype], 3556 allocated_regs, preferred_regs); 3557 } 3558 3559 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3560 if (IS_DEAD_ARG(0)) { 3561 /* mov to a non-saved dead register makes no sense (even with 3562 liveness analysis disabled). */ 3563 tcg_debug_assert(NEED_SYNC_ARG(0)); 3564 if (!ots->mem_allocated) { 3565 temp_allocate_frame(s, ots); 3566 } 3567 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 3568 if (IS_DEAD_ARG(1)) { 3569 temp_dead(s, ts); 3570 } 3571 temp_dead(s, ots); 3572 } else { 3573 if (IS_DEAD_ARG(1) && !ts->fixed_reg) { 3574 /* the mov can be suppressed */ 3575 if (ots->val_type == TEMP_VAL_REG) { 3576 s->reg_to_temp[ots->reg] = NULL; 3577 } 3578 ots->reg = ts->reg; 3579 temp_dead(s, ts); 3580 } else { 3581 if (ots->val_type != TEMP_VAL_REG) { 3582 /* When allocating a new register, make sure to not spill the 3583 input one. */ 3584 tcg_regset_set_reg(allocated_regs, ts->reg); 3585 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3586 allocated_regs, preferred_regs, 3587 ots->indirect_base); 3588 } 3589 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) { 3590 /* 3591 * Cross register class move not supported. 3592 * Store the source register into the destination slot 3593 * and leave the destination temp as TEMP_VAL_MEM. 3594 */ 3595 assert(!ots->fixed_reg); 3596 if (!ts->mem_allocated) { 3597 temp_allocate_frame(s, ots); 3598 } 3599 tcg_out_st(s, ts->type, ts->reg, 3600 ots->mem_base->reg, ots->mem_offset); 3601 ots->mem_coherent = 1; 3602 temp_free_or_dead(s, ots, -1); 3603 return; 3604 } 3605 } 3606 ots->val_type = TEMP_VAL_REG; 3607 ots->mem_coherent = 0; 3608 s->reg_to_temp[ots->reg] = ots; 3609 if (NEED_SYNC_ARG(0)) { 3610 temp_sync(s, ots, allocated_regs, 0, 0); 3611 } 3612 } 3613 } 3614 3615 /* 3616 * Specialized code generation for INDEX_op_dup_vec. 3617 */ 3618 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 3619 { 3620 const TCGLifeData arg_life = op->life; 3621 TCGRegSet dup_out_regs, dup_in_regs; 3622 TCGTemp *its, *ots; 3623 TCGType itype, vtype; 3624 intptr_t endian_fixup; 3625 unsigned vece; 3626 bool ok; 3627 3628 ots = arg_temp(op->args[0]); 3629 its = arg_temp(op->args[1]); 3630 3631 /* ENV should not be modified. */ 3632 tcg_debug_assert(!ots->fixed_reg); 3633 3634 itype = its->type; 3635 vece = TCGOP_VECE(op); 3636 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 3637 3638 if (its->val_type == TEMP_VAL_CONST) { 3639 /* Propagate constant via movi -> dupi. */ 3640 tcg_target_ulong val = its->val; 3641 if (IS_DEAD_ARG(1)) { 3642 temp_dead(s, its); 3643 } 3644 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); 3645 return; 3646 } 3647 3648 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 3649 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 3650 3651 /* Allocate the output register now. */ 3652 if (ots->val_type != TEMP_VAL_REG) { 3653 TCGRegSet allocated_regs = s->reserved_regs; 3654 3655 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 3656 /* Make sure to not spill the input register. */ 3657 tcg_regset_set_reg(allocated_regs, its->reg); 3658 } 3659 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 3660 op->output_pref[0], ots->indirect_base); 3661 ots->val_type = TEMP_VAL_REG; 3662 ots->mem_coherent = 0; 3663 s->reg_to_temp[ots->reg] = ots; 3664 } 3665 3666 switch (its->val_type) { 3667 case TEMP_VAL_REG: 3668 /* 3669 * The dup constriaints must be broad, covering all possible VECE. 3670 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 3671 * to fail, indicating that extra moves are required for that case. 3672 */ 3673 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 3674 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 3675 goto done; 3676 } 3677 /* Try again from memory or a vector input register. */ 3678 } 3679 if (!its->mem_coherent) { 3680 /* 3681 * The input register is not synced, and so an extra store 3682 * would be required to use memory. Attempt an integer-vector 3683 * register move first. We do not have a TCGRegSet for this. 3684 */ 3685 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 3686 break; 3687 } 3688 /* Sync the temp back to its slot and load from there. */ 3689 temp_sync(s, its, s->reserved_regs, 0, 0); 3690 } 3691 /* fall through */ 3692 3693 case TEMP_VAL_MEM: 3694 #ifdef HOST_WORDS_BIGENDIAN 3695 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8; 3696 endian_fixup -= 1 << vece; 3697 #else 3698 endian_fixup = 0; 3699 #endif 3700 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 3701 its->mem_offset + endian_fixup)) { 3702 goto done; 3703 } 3704 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 3705 break; 3706 3707 default: 3708 g_assert_not_reached(); 3709 } 3710 3711 /* We now have a vector input register, so dup must succeed. */ 3712 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 3713 tcg_debug_assert(ok); 3714 3715 done: 3716 if (IS_DEAD_ARG(1)) { 3717 temp_dead(s, its); 3718 } 3719 if (NEED_SYNC_ARG(0)) { 3720 temp_sync(s, ots, s->reserved_regs, 0, 0); 3721 } 3722 if (IS_DEAD_ARG(0)) { 3723 temp_dead(s, ots); 3724 } 3725 } 3726 3727 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 3728 { 3729 const TCGLifeData arg_life = op->life; 3730 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 3731 TCGRegSet i_allocated_regs; 3732 TCGRegSet o_allocated_regs; 3733 int i, k, nb_iargs, nb_oargs; 3734 TCGReg reg; 3735 TCGArg arg; 3736 const TCGArgConstraint *arg_ct; 3737 TCGTemp *ts; 3738 TCGArg new_args[TCG_MAX_OP_ARGS]; 3739 int const_args[TCG_MAX_OP_ARGS]; 3740 3741 nb_oargs = def->nb_oargs; 3742 nb_iargs = def->nb_iargs; 3743 3744 /* copy constants */ 3745 memcpy(new_args + nb_oargs + nb_iargs, 3746 op->args + nb_oargs + nb_iargs, 3747 sizeof(TCGArg) * def->nb_cargs); 3748 3749 i_allocated_regs = s->reserved_regs; 3750 o_allocated_regs = s->reserved_regs; 3751 3752 /* satisfy input constraints */ 3753 for (k = 0; k < nb_iargs; k++) { 3754 TCGRegSet i_preferred_regs, o_preferred_regs; 3755 3756 i = def->args_ct[nb_oargs + k].sort_index; 3757 arg = op->args[i]; 3758 arg_ct = &def->args_ct[i]; 3759 ts = arg_temp(arg); 3760 3761 if (ts->val_type == TEMP_VAL_CONST 3762 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 3763 /* constant is OK for instruction */ 3764 const_args[i] = 1; 3765 new_args[i] = ts->val; 3766 continue; 3767 } 3768 3769 i_preferred_regs = o_preferred_regs = 0; 3770 if (arg_ct->ialias) { 3771 o_preferred_regs = op->output_pref[arg_ct->alias_index]; 3772 if (ts->fixed_reg) { 3773 /* if fixed register, we must allocate a new register 3774 if the alias is not the same register */ 3775 if (arg != op->args[arg_ct->alias_index]) { 3776 goto allocate_in_reg; 3777 } 3778 } else { 3779 /* if the input is aliased to an output and if it is 3780 not dead after the instruction, we must allocate 3781 a new register and move it */ 3782 if (!IS_DEAD_ARG(i)) { 3783 goto allocate_in_reg; 3784 } 3785 3786 /* check if the current register has already been allocated 3787 for another input aliased to an output */ 3788 if (ts->val_type == TEMP_VAL_REG) { 3789 int k2, i2; 3790 reg = ts->reg; 3791 for (k2 = 0 ; k2 < k ; k2++) { 3792 i2 = def->args_ct[nb_oargs + k2].sort_index; 3793 if (def->args_ct[i2].ialias && reg == new_args[i2]) { 3794 goto allocate_in_reg; 3795 } 3796 } 3797 } 3798 i_preferred_regs = o_preferred_regs; 3799 } 3800 } 3801 3802 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); 3803 reg = ts->reg; 3804 3805 if (tcg_regset_test_reg(arg_ct->regs, reg)) { 3806 /* nothing to do : the constraint is satisfied */ 3807 } else { 3808 allocate_in_reg: 3809 /* allocate a new register matching the constraint 3810 and move the temporary register into it */ 3811 temp_load(s, ts, tcg_target_available_regs[ts->type], 3812 i_allocated_regs, 0); 3813 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, 3814 o_preferred_regs, ts->indirect_base); 3815 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 3816 /* 3817 * Cross register class move not supported. Sync the 3818 * temp back to its slot and load from there. 3819 */ 3820 temp_sync(s, ts, i_allocated_regs, 0, 0); 3821 tcg_out_ld(s, ts->type, reg, 3822 ts->mem_base->reg, ts->mem_offset); 3823 } 3824 } 3825 new_args[i] = reg; 3826 const_args[i] = 0; 3827 tcg_regset_set_reg(i_allocated_regs, reg); 3828 } 3829 3830 /* mark dead temporaries and free the associated registers */ 3831 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3832 if (IS_DEAD_ARG(i)) { 3833 temp_dead(s, arg_temp(op->args[i])); 3834 } 3835 } 3836 3837 if (def->flags & TCG_OPF_COND_BRANCH) { 3838 tcg_reg_alloc_cbranch(s, i_allocated_regs); 3839 } else if (def->flags & TCG_OPF_BB_END) { 3840 tcg_reg_alloc_bb_end(s, i_allocated_regs); 3841 } else { 3842 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3843 /* XXX: permit generic clobber register list ? */ 3844 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3845 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3846 tcg_reg_free(s, i, i_allocated_regs); 3847 } 3848 } 3849 } 3850 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3851 /* sync globals if the op has side effects and might trigger 3852 an exception. */ 3853 sync_globals(s, i_allocated_regs); 3854 } 3855 3856 /* satisfy the output constraints */ 3857 for(k = 0; k < nb_oargs; k++) { 3858 i = def->args_ct[k].sort_index; 3859 arg = op->args[i]; 3860 arg_ct = &def->args_ct[i]; 3861 ts = arg_temp(arg); 3862 3863 /* ENV should not be modified. */ 3864 tcg_debug_assert(!ts->fixed_reg); 3865 3866 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 3867 reg = new_args[arg_ct->alias_index]; 3868 } else if (arg_ct->newreg) { 3869 reg = tcg_reg_alloc(s, arg_ct->regs, 3870 i_allocated_regs | o_allocated_regs, 3871 op->output_pref[k], ts->indirect_base); 3872 } else { 3873 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 3874 op->output_pref[k], ts->indirect_base); 3875 } 3876 tcg_regset_set_reg(o_allocated_regs, reg); 3877 if (ts->val_type == TEMP_VAL_REG) { 3878 s->reg_to_temp[ts->reg] = NULL; 3879 } 3880 ts->val_type = TEMP_VAL_REG; 3881 ts->reg = reg; 3882 /* 3883 * Temp value is modified, so the value kept in memory is 3884 * potentially not the same. 3885 */ 3886 ts->mem_coherent = 0; 3887 s->reg_to_temp[reg] = ts; 3888 new_args[i] = reg; 3889 } 3890 } 3891 3892 /* emit instruction */ 3893 if (def->flags & TCG_OPF_VECTOR) { 3894 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 3895 new_args, const_args); 3896 } else { 3897 tcg_out_op(s, op->opc, new_args, const_args); 3898 } 3899 3900 /* move the outputs in the correct register if needed */ 3901 for(i = 0; i < nb_oargs; i++) { 3902 ts = arg_temp(op->args[i]); 3903 3904 /* ENV should not be modified. */ 3905 tcg_debug_assert(!ts->fixed_reg); 3906 3907 if (NEED_SYNC_ARG(i)) { 3908 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 3909 } else if (IS_DEAD_ARG(i)) { 3910 temp_dead(s, ts); 3911 } 3912 } 3913 } 3914 3915 #ifdef TCG_TARGET_STACK_GROWSUP 3916 #define STACK_DIR(x) (-(x)) 3917 #else 3918 #define STACK_DIR(x) (x) 3919 #endif 3920 3921 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 3922 { 3923 const int nb_oargs = TCGOP_CALLO(op); 3924 const int nb_iargs = TCGOP_CALLI(op); 3925 const TCGLifeData arg_life = op->life; 3926 int flags, nb_regs, i; 3927 TCGReg reg; 3928 TCGArg arg; 3929 TCGTemp *ts; 3930 intptr_t stack_offset; 3931 size_t call_stack_size; 3932 tcg_insn_unit *func_addr; 3933 int allocate_args; 3934 TCGRegSet allocated_regs; 3935 3936 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; 3937 flags = op->args[nb_oargs + nb_iargs + 1]; 3938 3939 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 3940 if (nb_regs > nb_iargs) { 3941 nb_regs = nb_iargs; 3942 } 3943 3944 /* assign stack slots first */ 3945 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 3946 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 3947 ~(TCG_TARGET_STACK_ALIGN - 1); 3948 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 3949 if (allocate_args) { 3950 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 3951 preallocate call stack */ 3952 tcg_abort(); 3953 } 3954 3955 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 3956 for (i = nb_regs; i < nb_iargs; i++) { 3957 arg = op->args[nb_oargs + i]; 3958 #ifdef TCG_TARGET_STACK_GROWSUP 3959 stack_offset -= sizeof(tcg_target_long); 3960 #endif 3961 if (arg != TCG_CALL_DUMMY_ARG) { 3962 ts = arg_temp(arg); 3963 temp_load(s, ts, tcg_target_available_regs[ts->type], 3964 s->reserved_regs, 0); 3965 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 3966 } 3967 #ifndef TCG_TARGET_STACK_GROWSUP 3968 stack_offset += sizeof(tcg_target_long); 3969 #endif 3970 } 3971 3972 /* assign input registers */ 3973 allocated_regs = s->reserved_regs; 3974 for (i = 0; i < nb_regs; i++) { 3975 arg = op->args[nb_oargs + i]; 3976 if (arg != TCG_CALL_DUMMY_ARG) { 3977 ts = arg_temp(arg); 3978 reg = tcg_target_call_iarg_regs[i]; 3979 3980 if (ts->val_type == TEMP_VAL_REG) { 3981 if (ts->reg != reg) { 3982 tcg_reg_free(s, reg, allocated_regs); 3983 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 3984 /* 3985 * Cross register class move not supported. Sync the 3986 * temp back to its slot and load from there. 3987 */ 3988 temp_sync(s, ts, allocated_regs, 0, 0); 3989 tcg_out_ld(s, ts->type, reg, 3990 ts->mem_base->reg, ts->mem_offset); 3991 } 3992 } 3993 } else { 3994 TCGRegSet arg_set = 0; 3995 3996 tcg_reg_free(s, reg, allocated_regs); 3997 tcg_regset_set_reg(arg_set, reg); 3998 temp_load(s, ts, arg_set, allocated_regs, 0); 3999 } 4000 4001 tcg_regset_set_reg(allocated_regs, reg); 4002 } 4003 } 4004 4005 /* mark dead temporaries and free the associated registers */ 4006 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4007 if (IS_DEAD_ARG(i)) { 4008 temp_dead(s, arg_temp(op->args[i])); 4009 } 4010 } 4011 4012 /* clobber call registers */ 4013 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4014 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4015 tcg_reg_free(s, i, allocated_regs); 4016 } 4017 } 4018 4019 /* Save globals if they might be written by the helper, sync them if 4020 they might be read. */ 4021 if (flags & TCG_CALL_NO_READ_GLOBALS) { 4022 /* Nothing to do */ 4023 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 4024 sync_globals(s, allocated_regs); 4025 } else { 4026 save_globals(s, allocated_regs); 4027 } 4028 4029 tcg_out_call(s, func_addr); 4030 4031 /* assign output registers and emit moves if needed */ 4032 for(i = 0; i < nb_oargs; i++) { 4033 arg = op->args[i]; 4034 ts = arg_temp(arg); 4035 4036 /* ENV should not be modified. */ 4037 tcg_debug_assert(!ts->fixed_reg); 4038 4039 reg = tcg_target_call_oarg_regs[i]; 4040 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4041 if (ts->val_type == TEMP_VAL_REG) { 4042 s->reg_to_temp[ts->reg] = NULL; 4043 } 4044 ts->val_type = TEMP_VAL_REG; 4045 ts->reg = reg; 4046 ts->mem_coherent = 0; 4047 s->reg_to_temp[reg] = ts; 4048 if (NEED_SYNC_ARG(i)) { 4049 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); 4050 } else if (IS_DEAD_ARG(i)) { 4051 temp_dead(s, ts); 4052 } 4053 } 4054 } 4055 4056 #ifdef CONFIG_PROFILER 4057 4058 /* avoid copy/paste errors */ 4059 #define PROF_ADD(to, from, field) \ 4060 do { \ 4061 (to)->field += qatomic_read(&((from)->field)); \ 4062 } while (0) 4063 4064 #define PROF_MAX(to, from, field) \ 4065 do { \ 4066 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4067 if (val__ > (to)->field) { \ 4068 (to)->field = val__; \ 4069 } \ 4070 } while (0) 4071 4072 /* Pass in a zero'ed @prof */ 4073 static inline 4074 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 4075 { 4076 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 4077 unsigned int i; 4078 4079 for (i = 0; i < n_ctxs; i++) { 4080 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4081 const TCGProfile *orig = &s->prof; 4082 4083 if (counters) { 4084 PROF_ADD(prof, orig, cpu_exec_time); 4085 PROF_ADD(prof, orig, tb_count1); 4086 PROF_ADD(prof, orig, tb_count); 4087 PROF_ADD(prof, orig, op_count); 4088 PROF_MAX(prof, orig, op_count_max); 4089 PROF_ADD(prof, orig, temp_count); 4090 PROF_MAX(prof, orig, temp_count_max); 4091 PROF_ADD(prof, orig, del_op_count); 4092 PROF_ADD(prof, orig, code_in_len); 4093 PROF_ADD(prof, orig, code_out_len); 4094 PROF_ADD(prof, orig, search_out_len); 4095 PROF_ADD(prof, orig, interm_time); 4096 PROF_ADD(prof, orig, code_time); 4097 PROF_ADD(prof, orig, la_time); 4098 PROF_ADD(prof, orig, opt_time); 4099 PROF_ADD(prof, orig, restore_count); 4100 PROF_ADD(prof, orig, restore_time); 4101 } 4102 if (table) { 4103 int i; 4104 4105 for (i = 0; i < NB_OPS; i++) { 4106 PROF_ADD(prof, orig, table_op_count[i]); 4107 } 4108 } 4109 } 4110 } 4111 4112 #undef PROF_ADD 4113 #undef PROF_MAX 4114 4115 static void tcg_profile_snapshot_counters(TCGProfile *prof) 4116 { 4117 tcg_profile_snapshot(prof, true, false); 4118 } 4119 4120 static void tcg_profile_snapshot_table(TCGProfile *prof) 4121 { 4122 tcg_profile_snapshot(prof, false, true); 4123 } 4124 4125 void tcg_dump_op_count(void) 4126 { 4127 TCGProfile prof = {}; 4128 int i; 4129 4130 tcg_profile_snapshot_table(&prof); 4131 for (i = 0; i < NB_OPS; i++) { 4132 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name, 4133 prof.table_op_count[i]); 4134 } 4135 } 4136 4137 int64_t tcg_cpu_exec_time(void) 4138 { 4139 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 4140 unsigned int i; 4141 int64_t ret = 0; 4142 4143 for (i = 0; i < n_ctxs; i++) { 4144 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4145 const TCGProfile *prof = &s->prof; 4146 4147 ret += qatomic_read(&prof->cpu_exec_time); 4148 } 4149 return ret; 4150 } 4151 #else 4152 void tcg_dump_op_count(void) 4153 { 4154 qemu_printf("[TCG profiler not compiled]\n"); 4155 } 4156 4157 int64_t tcg_cpu_exec_time(void) 4158 { 4159 error_report("%s: TCG profiler not compiled", __func__); 4160 exit(EXIT_FAILURE); 4161 } 4162 #endif 4163 4164 4165 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 4166 { 4167 #ifdef CONFIG_PROFILER 4168 TCGProfile *prof = &s->prof; 4169 #endif 4170 int i, num_insns; 4171 TCGOp *op; 4172 4173 #ifdef CONFIG_PROFILER 4174 { 4175 int n = 0; 4176 4177 QTAILQ_FOREACH(op, &s->ops, link) { 4178 n++; 4179 } 4180 qatomic_set(&prof->op_count, prof->op_count + n); 4181 if (n > prof->op_count_max) { 4182 qatomic_set(&prof->op_count_max, n); 4183 } 4184 4185 n = s->nb_temps; 4186 qatomic_set(&prof->temp_count, prof->temp_count + n); 4187 if (n > prof->temp_count_max) { 4188 qatomic_set(&prof->temp_count_max, n); 4189 } 4190 } 4191 #endif 4192 4193 #ifdef DEBUG_DISAS 4194 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4195 && qemu_log_in_addr_range(tb->pc))) { 4196 FILE *logfile = qemu_log_lock(); 4197 qemu_log("OP:\n"); 4198 tcg_dump_ops(s, false); 4199 qemu_log("\n"); 4200 qemu_log_unlock(logfile); 4201 } 4202 #endif 4203 4204 #ifdef CONFIG_DEBUG_TCG 4205 /* Ensure all labels referenced have been emitted. */ 4206 { 4207 TCGLabel *l; 4208 bool error = false; 4209 4210 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4211 if (unlikely(!l->present) && l->refs) { 4212 qemu_log_mask(CPU_LOG_TB_OP, 4213 "$L%d referenced but not present.\n", l->id); 4214 error = true; 4215 } 4216 } 4217 assert(!error); 4218 } 4219 #endif 4220 4221 #ifdef CONFIG_PROFILER 4222 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 4223 #endif 4224 4225 #ifdef USE_TCG_OPTIMIZATIONS 4226 tcg_optimize(s); 4227 #endif 4228 4229 #ifdef CONFIG_PROFILER 4230 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 4231 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 4232 #endif 4233 4234 reachable_code_pass(s); 4235 liveness_pass_1(s); 4236 4237 if (s->nb_indirects > 0) { 4238 #ifdef DEBUG_DISAS 4239 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 4240 && qemu_log_in_addr_range(tb->pc))) { 4241 FILE *logfile = qemu_log_lock(); 4242 qemu_log("OP before indirect lowering:\n"); 4243 tcg_dump_ops(s, false); 4244 qemu_log("\n"); 4245 qemu_log_unlock(logfile); 4246 } 4247 #endif 4248 /* Replace indirect temps with direct temps. */ 4249 if (liveness_pass_2(s)) { 4250 /* If changes were made, re-run liveness. */ 4251 liveness_pass_1(s); 4252 } 4253 } 4254 4255 #ifdef CONFIG_PROFILER 4256 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 4257 #endif 4258 4259 #ifdef DEBUG_DISAS 4260 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 4261 && qemu_log_in_addr_range(tb->pc))) { 4262 FILE *logfile = qemu_log_lock(); 4263 qemu_log("OP after optimization and liveness analysis:\n"); 4264 tcg_dump_ops(s, true); 4265 qemu_log("\n"); 4266 qemu_log_unlock(logfile); 4267 } 4268 #endif 4269 4270 tcg_reg_alloc_start(s); 4271 4272 /* 4273 * Reset the buffer pointers when restarting after overflow. 4274 * TODO: Move this into translate-all.c with the rest of the 4275 * buffer management. Having only this done here is confusing. 4276 */ 4277 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 4278 s->code_ptr = s->code_buf; 4279 4280 #ifdef TCG_TARGET_NEED_LDST_LABELS 4281 QSIMPLEQ_INIT(&s->ldst_labels); 4282 #endif 4283 #ifdef TCG_TARGET_NEED_POOL_LABELS 4284 s->pool_labels = NULL; 4285 #endif 4286 4287 num_insns = -1; 4288 QTAILQ_FOREACH(op, &s->ops, link) { 4289 TCGOpcode opc = op->opc; 4290 4291 #ifdef CONFIG_PROFILER 4292 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 4293 #endif 4294 4295 switch (opc) { 4296 case INDEX_op_mov_i32: 4297 case INDEX_op_mov_i64: 4298 case INDEX_op_mov_vec: 4299 tcg_reg_alloc_mov(s, op); 4300 break; 4301 case INDEX_op_movi_i32: 4302 case INDEX_op_movi_i64: 4303 case INDEX_op_dupi_vec: 4304 tcg_reg_alloc_movi(s, op); 4305 break; 4306 case INDEX_op_dup_vec: 4307 tcg_reg_alloc_dup(s, op); 4308 break; 4309 case INDEX_op_insn_start: 4310 if (num_insns >= 0) { 4311 size_t off = tcg_current_code_size(s); 4312 s->gen_insn_end_off[num_insns] = off; 4313 /* Assert that we do not overflow our stored offset. */ 4314 assert(s->gen_insn_end_off[num_insns] == off); 4315 } 4316 num_insns++; 4317 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 4318 target_ulong a; 4319 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 4320 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 4321 #else 4322 a = op->args[i]; 4323 #endif 4324 s->gen_insn_data[num_insns][i] = a; 4325 } 4326 break; 4327 case INDEX_op_discard: 4328 temp_dead(s, arg_temp(op->args[0])); 4329 break; 4330 case INDEX_op_set_label: 4331 tcg_reg_alloc_bb_end(s, s->reserved_regs); 4332 tcg_out_label(s, arg_label(op->args[0])); 4333 break; 4334 case INDEX_op_call: 4335 tcg_reg_alloc_call(s, op); 4336 break; 4337 default: 4338 /* Sanity check that we've not introduced any unhandled opcodes. */ 4339 tcg_debug_assert(tcg_op_supported(opc)); 4340 /* Note: in order to speed up the code, it would be much 4341 faster to have specialized register allocator functions for 4342 some common argument patterns */ 4343 tcg_reg_alloc_op(s, op); 4344 break; 4345 } 4346 #ifdef CONFIG_DEBUG_TCG 4347 check_regs(s); 4348 #endif 4349 /* Test for (pending) buffer overflow. The assumption is that any 4350 one operation beginning below the high water mark cannot overrun 4351 the buffer completely. Thus we can test for overflow after 4352 generating code without having to check during generation. */ 4353 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 4354 return -1; 4355 } 4356 /* Test for TB overflow, as seen by gen_insn_end_off. */ 4357 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 4358 return -2; 4359 } 4360 } 4361 tcg_debug_assert(num_insns >= 0); 4362 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 4363 4364 /* Generate TB finalization at the end of block */ 4365 #ifdef TCG_TARGET_NEED_LDST_LABELS 4366 i = tcg_out_ldst_finalize(s); 4367 if (i < 0) { 4368 return i; 4369 } 4370 #endif 4371 #ifdef TCG_TARGET_NEED_POOL_LABELS 4372 i = tcg_out_pool_finalize(s); 4373 if (i < 0) { 4374 return i; 4375 } 4376 #endif 4377 if (!tcg_resolve_relocs(s)) { 4378 return -2; 4379 } 4380 4381 #ifndef CONFIG_TCG_INTERPRETER 4382 /* flush instruction cache */ 4383 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 4384 (uintptr_t)s->code_buf, 4385 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 4386 #endif 4387 4388 return tcg_current_code_size(s); 4389 } 4390 4391 #ifdef CONFIG_PROFILER 4392 void tcg_dump_info(void) 4393 { 4394 TCGProfile prof = {}; 4395 const TCGProfile *s; 4396 int64_t tb_count; 4397 int64_t tb_div_count; 4398 int64_t tot; 4399 4400 tcg_profile_snapshot_counters(&prof); 4401 s = &prof; 4402 tb_count = s->tb_count; 4403 tb_div_count = tb_count ? tb_count : 1; 4404 tot = s->interm_time + s->code_time; 4405 4406 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 4407 tot, tot / 2.4e9); 4408 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64 4409 " %0.1f%%)\n", 4410 tb_count, s->tb_count1 - tb_count, 4411 (double)(s->tb_count1 - s->tb_count) 4412 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 4413 qemu_printf("avg ops/TB %0.1f max=%d\n", 4414 (double)s->op_count / tb_div_count, s->op_count_max); 4415 qemu_printf("deleted ops/TB %0.2f\n", 4416 (double)s->del_op_count / tb_div_count); 4417 qemu_printf("avg temps/TB %0.2f max=%d\n", 4418 (double)s->temp_count / tb_div_count, s->temp_count_max); 4419 qemu_printf("avg host code/TB %0.1f\n", 4420 (double)s->code_out_len / tb_div_count); 4421 qemu_printf("avg search data/TB %0.1f\n", 4422 (double)s->search_out_len / tb_div_count); 4423 4424 qemu_printf("cycles/op %0.1f\n", 4425 s->op_count ? (double)tot / s->op_count : 0); 4426 qemu_printf("cycles/in byte %0.1f\n", 4427 s->code_in_len ? (double)tot / s->code_in_len : 0); 4428 qemu_printf("cycles/out byte %0.1f\n", 4429 s->code_out_len ? (double)tot / s->code_out_len : 0); 4430 qemu_printf("cycles/search byte %0.1f\n", 4431 s->search_out_len ? (double)tot / s->search_out_len : 0); 4432 if (tot == 0) { 4433 tot = 1; 4434 } 4435 qemu_printf(" gen_interm time %0.1f%%\n", 4436 (double)s->interm_time / tot * 100.0); 4437 qemu_printf(" gen_code time %0.1f%%\n", 4438 (double)s->code_time / tot * 100.0); 4439 qemu_printf("optim./code time %0.1f%%\n", 4440 (double)s->opt_time / (s->code_time ? s->code_time : 1) 4441 * 100.0); 4442 qemu_printf("liveness/code time %0.1f%%\n", 4443 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 4444 qemu_printf("cpu_restore count %" PRId64 "\n", 4445 s->restore_count); 4446 qemu_printf(" avg cycles %0.1f\n", 4447 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 4448 } 4449 #else 4450 void tcg_dump_info(void) 4451 { 4452 qemu_printf("[TCG profiler not compiled]\n"); 4453 } 4454 #endif 4455 4456 #ifdef ELF_HOST_MACHINE 4457 /* In order to use this feature, the backend needs to do three things: 4458 4459 (1) Define ELF_HOST_MACHINE to indicate both what value to 4460 put into the ELF image and to indicate support for the feature. 4461 4462 (2) Define tcg_register_jit. This should create a buffer containing 4463 the contents of a .debug_frame section that describes the post- 4464 prologue unwind info for the tcg machine. 4465 4466 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 4467 */ 4468 4469 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 4470 typedef enum { 4471 JIT_NOACTION = 0, 4472 JIT_REGISTER_FN, 4473 JIT_UNREGISTER_FN 4474 } jit_actions_t; 4475 4476 struct jit_code_entry { 4477 struct jit_code_entry *next_entry; 4478 struct jit_code_entry *prev_entry; 4479 const void *symfile_addr; 4480 uint64_t symfile_size; 4481 }; 4482 4483 struct jit_descriptor { 4484 uint32_t version; 4485 uint32_t action_flag; 4486 struct jit_code_entry *relevant_entry; 4487 struct jit_code_entry *first_entry; 4488 }; 4489 4490 void __jit_debug_register_code(void) __attribute__((noinline)); 4491 void __jit_debug_register_code(void) 4492 { 4493 asm(""); 4494 } 4495 4496 /* Must statically initialize the version, because GDB may check 4497 the version before we can set it. */ 4498 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 4499 4500 /* End GDB interface. */ 4501 4502 static int find_string(const char *strtab, const char *str) 4503 { 4504 const char *p = strtab + 1; 4505 4506 while (1) { 4507 if (strcmp(p, str) == 0) { 4508 return p - strtab; 4509 } 4510 p += strlen(p) + 1; 4511 } 4512 } 4513 4514 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 4515 const void *debug_frame, 4516 size_t debug_frame_size) 4517 { 4518 struct __attribute__((packed)) DebugInfo { 4519 uint32_t len; 4520 uint16_t version; 4521 uint32_t abbrev; 4522 uint8_t ptr_size; 4523 uint8_t cu_die; 4524 uint16_t cu_lang; 4525 uintptr_t cu_low_pc; 4526 uintptr_t cu_high_pc; 4527 uint8_t fn_die; 4528 char fn_name[16]; 4529 uintptr_t fn_low_pc; 4530 uintptr_t fn_high_pc; 4531 uint8_t cu_eoc; 4532 }; 4533 4534 struct ElfImage { 4535 ElfW(Ehdr) ehdr; 4536 ElfW(Phdr) phdr; 4537 ElfW(Shdr) shdr[7]; 4538 ElfW(Sym) sym[2]; 4539 struct DebugInfo di; 4540 uint8_t da[24]; 4541 char str[80]; 4542 }; 4543 4544 struct ElfImage *img; 4545 4546 static const struct ElfImage img_template = { 4547 .ehdr = { 4548 .e_ident[EI_MAG0] = ELFMAG0, 4549 .e_ident[EI_MAG1] = ELFMAG1, 4550 .e_ident[EI_MAG2] = ELFMAG2, 4551 .e_ident[EI_MAG3] = ELFMAG3, 4552 .e_ident[EI_CLASS] = ELF_CLASS, 4553 .e_ident[EI_DATA] = ELF_DATA, 4554 .e_ident[EI_VERSION] = EV_CURRENT, 4555 .e_type = ET_EXEC, 4556 .e_machine = ELF_HOST_MACHINE, 4557 .e_version = EV_CURRENT, 4558 .e_phoff = offsetof(struct ElfImage, phdr), 4559 .e_shoff = offsetof(struct ElfImage, shdr), 4560 .e_ehsize = sizeof(ElfW(Shdr)), 4561 .e_phentsize = sizeof(ElfW(Phdr)), 4562 .e_phnum = 1, 4563 .e_shentsize = sizeof(ElfW(Shdr)), 4564 .e_shnum = ARRAY_SIZE(img->shdr), 4565 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 4566 #ifdef ELF_HOST_FLAGS 4567 .e_flags = ELF_HOST_FLAGS, 4568 #endif 4569 #ifdef ELF_OSABI 4570 .e_ident[EI_OSABI] = ELF_OSABI, 4571 #endif 4572 }, 4573 .phdr = { 4574 .p_type = PT_LOAD, 4575 .p_flags = PF_X, 4576 }, 4577 .shdr = { 4578 [0] = { .sh_type = SHT_NULL }, 4579 /* Trick: The contents of code_gen_buffer are not present in 4580 this fake ELF file; that got allocated elsewhere. Therefore 4581 we mark .text as SHT_NOBITS (similar to .bss) so that readers 4582 will not look for contents. We can record any address. */ 4583 [1] = { /* .text */ 4584 .sh_type = SHT_NOBITS, 4585 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 4586 }, 4587 [2] = { /* .debug_info */ 4588 .sh_type = SHT_PROGBITS, 4589 .sh_offset = offsetof(struct ElfImage, di), 4590 .sh_size = sizeof(struct DebugInfo), 4591 }, 4592 [3] = { /* .debug_abbrev */ 4593 .sh_type = SHT_PROGBITS, 4594 .sh_offset = offsetof(struct ElfImage, da), 4595 .sh_size = sizeof(img->da), 4596 }, 4597 [4] = { /* .debug_frame */ 4598 .sh_type = SHT_PROGBITS, 4599 .sh_offset = sizeof(struct ElfImage), 4600 }, 4601 [5] = { /* .symtab */ 4602 .sh_type = SHT_SYMTAB, 4603 .sh_offset = offsetof(struct ElfImage, sym), 4604 .sh_size = sizeof(img->sym), 4605 .sh_info = 1, 4606 .sh_link = ARRAY_SIZE(img->shdr) - 1, 4607 .sh_entsize = sizeof(ElfW(Sym)), 4608 }, 4609 [6] = { /* .strtab */ 4610 .sh_type = SHT_STRTAB, 4611 .sh_offset = offsetof(struct ElfImage, str), 4612 .sh_size = sizeof(img->str), 4613 } 4614 }, 4615 .sym = { 4616 [1] = { /* code_gen_buffer */ 4617 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 4618 .st_shndx = 1, 4619 } 4620 }, 4621 .di = { 4622 .len = sizeof(struct DebugInfo) - 4, 4623 .version = 2, 4624 .ptr_size = sizeof(void *), 4625 .cu_die = 1, 4626 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 4627 .fn_die = 2, 4628 .fn_name = "code_gen_buffer" 4629 }, 4630 .da = { 4631 1, /* abbrev number (the cu) */ 4632 0x11, 1, /* DW_TAG_compile_unit, has children */ 4633 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 4634 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4635 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4636 0, 0, /* end of abbrev */ 4637 2, /* abbrev number (the fn) */ 4638 0x2e, 0, /* DW_TAG_subprogram, no children */ 4639 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 4640 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4641 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4642 0, 0, /* end of abbrev */ 4643 0 /* no more abbrev */ 4644 }, 4645 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 4646 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 4647 }; 4648 4649 /* We only need a single jit entry; statically allocate it. */ 4650 static struct jit_code_entry one_entry; 4651 4652 uintptr_t buf = (uintptr_t)buf_ptr; 4653 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 4654 DebugFrameHeader *dfh; 4655 4656 img = g_malloc(img_size); 4657 *img = img_template; 4658 4659 img->phdr.p_vaddr = buf; 4660 img->phdr.p_paddr = buf; 4661 img->phdr.p_memsz = buf_size; 4662 4663 img->shdr[1].sh_name = find_string(img->str, ".text"); 4664 img->shdr[1].sh_addr = buf; 4665 img->shdr[1].sh_size = buf_size; 4666 4667 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 4668 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 4669 4670 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 4671 img->shdr[4].sh_size = debug_frame_size; 4672 4673 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 4674 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 4675 4676 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 4677 img->sym[1].st_value = buf; 4678 img->sym[1].st_size = buf_size; 4679 4680 img->di.cu_low_pc = buf; 4681 img->di.cu_high_pc = buf + buf_size; 4682 img->di.fn_low_pc = buf; 4683 img->di.fn_high_pc = buf + buf_size; 4684 4685 dfh = (DebugFrameHeader *)(img + 1); 4686 memcpy(dfh, debug_frame, debug_frame_size); 4687 dfh->fde.func_start = buf; 4688 dfh->fde.func_len = buf_size; 4689 4690 #ifdef DEBUG_JIT 4691 /* Enable this block to be able to debug the ELF image file creation. 4692 One can use readelf, objdump, or other inspection utilities. */ 4693 { 4694 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 4695 if (f) { 4696 if (fwrite(img, img_size, 1, f) != img_size) { 4697 /* Avoid stupid unused return value warning for fwrite. */ 4698 } 4699 fclose(f); 4700 } 4701 } 4702 #endif 4703 4704 one_entry.symfile_addr = img; 4705 one_entry.symfile_size = img_size; 4706 4707 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 4708 __jit_debug_descriptor.relevant_entry = &one_entry; 4709 __jit_debug_descriptor.first_entry = &one_entry; 4710 __jit_debug_register_code(); 4711 } 4712 #else 4713 /* No support for the feature. Provide the entry point expected by exec.c, 4714 and implement the internal function we declared earlier. */ 4715 4716 static void tcg_register_jit_int(const void *buf, size_t size, 4717 const void *debug_frame, 4718 size_t debug_frame_size) 4719 { 4720 } 4721 4722 void tcg_register_jit(const void *buf, size_t buf_size) 4723 { 4724 } 4725 #endif /* ELF_HOST_MACHINE */ 4726 4727 #if !TCG_TARGET_MAYBE_vec 4728 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 4729 { 4730 g_assert_not_reached(); 4731 } 4732 #endif 4733