1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/timer.h" 38 #include "qemu/cacheflush.h" 39 40 /* Note: the long term plan is to reduce the dependencies on the QEMU 41 CPU definitions. Currently they are used for qemu_ld/st 42 instructions */ 43 #define NO_CPU_IO_DEFS 44 #include "cpu.h" 45 46 #include "exec/exec-all.h" 47 48 #if !defined(CONFIG_USER_ONLY) 49 #include "hw/boards.h" 50 #endif 51 52 #include "tcg/tcg-op.h" 53 54 #if UINTPTR_MAX == UINT32_MAX 55 # define ELF_CLASS ELFCLASS32 56 #else 57 # define ELF_CLASS ELFCLASS64 58 #endif 59 #ifdef HOST_WORDS_BIGENDIAN 60 # define ELF_DATA ELFDATA2MSB 61 #else 62 # define ELF_DATA ELFDATA2LSB 63 #endif 64 65 #include "elf.h" 66 #include "exec/log.h" 67 #include "sysemu/sysemu.h" 68 69 /* Forward declarations for functions declared in tcg-target.c.inc and 70 used here. */ 71 static void tcg_target_init(TCGContext *s); 72 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); 73 static void tcg_target_qemu_prologue(TCGContext *s); 74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 75 intptr_t value, intptr_t addend); 76 77 /* The CIE and FDE header definitions will be common to all hosts. */ 78 typedef struct { 79 uint32_t len __attribute__((aligned((sizeof(void *))))); 80 uint32_t id; 81 uint8_t version; 82 char augmentation[1]; 83 uint8_t code_align; 84 uint8_t data_align; 85 uint8_t return_column; 86 } DebugFrameCIE; 87 88 typedef struct QEMU_PACKED { 89 uint32_t len __attribute__((aligned((sizeof(void *))))); 90 uint32_t cie_offset; 91 uintptr_t func_start; 92 uintptr_t func_len; 93 } DebugFrameFDEHeader; 94 95 typedef struct QEMU_PACKED { 96 DebugFrameCIE cie; 97 DebugFrameFDEHeader fde; 98 } DebugFrameHeader; 99 100 static void tcg_register_jit_int(const void *buf, size_t size, 101 const void *debug_frame, 102 size_t debug_frame_size) 103 __attribute__((unused)); 104 105 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 106 static const char *target_parse_constraint(TCGArgConstraint *ct, 107 const char *ct_str, TCGType type); 108 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 109 intptr_t arg2); 110 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 111 static void tcg_out_movi(TCGContext *s, TCGType type, 112 TCGReg ret, tcg_target_long arg); 113 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 114 const int *const_args); 115 #if TCG_TARGET_MAYBE_vec 116 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 117 TCGReg dst, TCGReg src); 118 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 119 TCGReg dst, TCGReg base, intptr_t offset); 120 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 121 TCGReg dst, int64_t arg); 122 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 123 unsigned vece, const TCGArg *args, 124 const int *const_args); 125 #else 126 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 127 TCGReg dst, TCGReg src) 128 { 129 g_assert_not_reached(); 130 } 131 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 132 TCGReg dst, TCGReg base, intptr_t offset) 133 { 134 g_assert_not_reached(); 135 } 136 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 137 TCGReg dst, int64_t arg) 138 { 139 g_assert_not_reached(); 140 } 141 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 142 unsigned vece, const TCGArg *args, 143 const int *const_args) 144 { 145 g_assert_not_reached(); 146 } 147 #endif 148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 149 intptr_t arg2); 150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 151 TCGReg base, intptr_t ofs); 152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target); 153 static int tcg_target_const_match(tcg_target_long val, TCGType type, 154 const TCGArgConstraint *arg_ct); 155 #ifdef TCG_TARGET_NEED_LDST_LABELS 156 static int tcg_out_ldst_finalize(TCGContext *s); 157 #endif 158 159 #define TCG_HIGHWATER 1024 160 161 static TCGContext **tcg_ctxs; 162 static unsigned int n_tcg_ctxs; 163 TCGv_env cpu_env = 0; 164 const void *tcg_code_gen_epilogue; 165 uintptr_t tcg_splitwx_diff; 166 167 #ifndef CONFIG_TCG_INTERPRETER 168 tcg_prologue_fn *tcg_qemu_tb_exec; 169 #endif 170 171 struct tcg_region_tree { 172 QemuMutex lock; 173 GTree *tree; 174 /* padding to avoid false sharing is computed at run-time */ 175 }; 176 177 /* 178 * We divide code_gen_buffer into equally-sized "regions" that TCG threads 179 * dynamically allocate from as demand dictates. Given appropriate region 180 * sizing, this minimizes flushes even when some TCG threads generate a lot 181 * more code than others. 182 */ 183 struct tcg_region_state { 184 QemuMutex lock; 185 186 /* fields set at init time */ 187 void *start; 188 void *start_aligned; 189 void *end; 190 size_t n; 191 size_t size; /* size of one region */ 192 size_t stride; /* .size + guard size */ 193 194 /* fields protected by the lock */ 195 size_t current; /* current region index */ 196 size_t agg_size_full; /* aggregate size of full regions */ 197 }; 198 199 static struct tcg_region_state region; 200 /* 201 * This is an array of struct tcg_region_tree's, with padding. 202 * We use void * to simplify the computation of region_trees[i]; each 203 * struct is found every tree_size bytes. 204 */ 205 static void *region_trees; 206 static size_t tree_size; 207 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 208 static TCGRegSet tcg_target_call_clobber_regs; 209 210 #if TCG_TARGET_INSN_UNIT_SIZE == 1 211 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 212 { 213 *s->code_ptr++ = v; 214 } 215 216 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 217 uint8_t v) 218 { 219 *p = v; 220 } 221 #endif 222 223 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 224 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 225 { 226 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 227 *s->code_ptr++ = v; 228 } else { 229 tcg_insn_unit *p = s->code_ptr; 230 memcpy(p, &v, sizeof(v)); 231 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 232 } 233 } 234 235 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 236 uint16_t v) 237 { 238 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 239 *p = v; 240 } else { 241 memcpy(p, &v, sizeof(v)); 242 } 243 } 244 #endif 245 246 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 247 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 248 { 249 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 250 *s->code_ptr++ = v; 251 } else { 252 tcg_insn_unit *p = s->code_ptr; 253 memcpy(p, &v, sizeof(v)); 254 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 255 } 256 } 257 258 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 259 uint32_t v) 260 { 261 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 262 *p = v; 263 } else { 264 memcpy(p, &v, sizeof(v)); 265 } 266 } 267 #endif 268 269 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 270 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 271 { 272 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 273 *s->code_ptr++ = v; 274 } else { 275 tcg_insn_unit *p = s->code_ptr; 276 memcpy(p, &v, sizeof(v)); 277 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 278 } 279 } 280 281 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 282 uint64_t v) 283 { 284 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 285 *p = v; 286 } else { 287 memcpy(p, &v, sizeof(v)); 288 } 289 } 290 #endif 291 292 /* label relocation processing */ 293 294 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 295 TCGLabel *l, intptr_t addend) 296 { 297 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 298 299 r->type = type; 300 r->ptr = code_ptr; 301 r->addend = addend; 302 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 303 } 304 305 static void tcg_out_label(TCGContext *s, TCGLabel *l) 306 { 307 tcg_debug_assert(!l->has_value); 308 l->has_value = 1; 309 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 310 } 311 312 TCGLabel *gen_new_label(void) 313 { 314 TCGContext *s = tcg_ctx; 315 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 316 317 memset(l, 0, sizeof(TCGLabel)); 318 l->id = s->nb_labels++; 319 QSIMPLEQ_INIT(&l->relocs); 320 321 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 322 323 return l; 324 } 325 326 static bool tcg_resolve_relocs(TCGContext *s) 327 { 328 TCGLabel *l; 329 330 QSIMPLEQ_FOREACH(l, &s->labels, next) { 331 TCGRelocation *r; 332 uintptr_t value = l->u.value; 333 334 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 335 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 336 return false; 337 } 338 } 339 } 340 return true; 341 } 342 343 static void set_jmp_reset_offset(TCGContext *s, int which) 344 { 345 /* 346 * We will check for overflow at the end of the opcode loop in 347 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 348 */ 349 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s); 350 } 351 352 #include "tcg-target.c.inc" 353 354 /* compare a pointer @ptr and a tb_tc @s */ 355 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s) 356 { 357 if (ptr >= s->ptr + s->size) { 358 return 1; 359 } else if (ptr < s->ptr) { 360 return -1; 361 } 362 return 0; 363 } 364 365 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp) 366 { 367 const struct tb_tc *a = ap; 368 const struct tb_tc *b = bp; 369 370 /* 371 * When both sizes are set, we know this isn't a lookup. 372 * This is the most likely case: every TB must be inserted; lookups 373 * are a lot less frequent. 374 */ 375 if (likely(a->size && b->size)) { 376 if (a->ptr > b->ptr) { 377 return 1; 378 } else if (a->ptr < b->ptr) { 379 return -1; 380 } 381 /* a->ptr == b->ptr should happen only on deletions */ 382 g_assert(a->size == b->size); 383 return 0; 384 } 385 /* 386 * All lookups have either .size field set to 0. 387 * From the glib sources we see that @ap is always the lookup key. However 388 * the docs provide no guarantee, so we just mark this case as likely. 389 */ 390 if (likely(a->size == 0)) { 391 return ptr_cmp_tb_tc(a->ptr, b); 392 } 393 return ptr_cmp_tb_tc(b->ptr, a); 394 } 395 396 static void tcg_region_trees_init(void) 397 { 398 size_t i; 399 400 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize); 401 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size); 402 for (i = 0; i < region.n; i++) { 403 struct tcg_region_tree *rt = region_trees + i * tree_size; 404 405 qemu_mutex_init(&rt->lock); 406 rt->tree = g_tree_new(tb_tc_cmp); 407 } 408 } 409 410 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp) 411 { 412 void *p = tcg_splitwx_to_rw(cp); 413 size_t region_idx; 414 415 if (p < region.start_aligned) { 416 region_idx = 0; 417 } else { 418 ptrdiff_t offset = p - region.start_aligned; 419 420 if (offset > region.stride * (region.n - 1)) { 421 region_idx = region.n - 1; 422 } else { 423 region_idx = offset / region.stride; 424 } 425 } 426 return region_trees + region_idx * tree_size; 427 } 428 429 void tcg_tb_insert(TranslationBlock *tb) 430 { 431 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 432 433 qemu_mutex_lock(&rt->lock); 434 g_tree_insert(rt->tree, &tb->tc, tb); 435 qemu_mutex_unlock(&rt->lock); 436 } 437 438 void tcg_tb_remove(TranslationBlock *tb) 439 { 440 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 441 442 qemu_mutex_lock(&rt->lock); 443 g_tree_remove(rt->tree, &tb->tc); 444 qemu_mutex_unlock(&rt->lock); 445 } 446 447 /* 448 * Find the TB 'tb' such that 449 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size 450 * Return NULL if not found. 451 */ 452 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) 453 { 454 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr); 455 TranslationBlock *tb; 456 struct tb_tc s = { .ptr = (void *)tc_ptr }; 457 458 qemu_mutex_lock(&rt->lock); 459 tb = g_tree_lookup(rt->tree, &s); 460 qemu_mutex_unlock(&rt->lock); 461 return tb; 462 } 463 464 static void tcg_region_tree_lock_all(void) 465 { 466 size_t i; 467 468 for (i = 0; i < region.n; i++) { 469 struct tcg_region_tree *rt = region_trees + i * tree_size; 470 471 qemu_mutex_lock(&rt->lock); 472 } 473 } 474 475 static void tcg_region_tree_unlock_all(void) 476 { 477 size_t i; 478 479 for (i = 0; i < region.n; i++) { 480 struct tcg_region_tree *rt = region_trees + i * tree_size; 481 482 qemu_mutex_unlock(&rt->lock); 483 } 484 } 485 486 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data) 487 { 488 size_t i; 489 490 tcg_region_tree_lock_all(); 491 for (i = 0; i < region.n; i++) { 492 struct tcg_region_tree *rt = region_trees + i * tree_size; 493 494 g_tree_foreach(rt->tree, func, user_data); 495 } 496 tcg_region_tree_unlock_all(); 497 } 498 499 size_t tcg_nb_tbs(void) 500 { 501 size_t nb_tbs = 0; 502 size_t i; 503 504 tcg_region_tree_lock_all(); 505 for (i = 0; i < region.n; i++) { 506 struct tcg_region_tree *rt = region_trees + i * tree_size; 507 508 nb_tbs += g_tree_nnodes(rt->tree); 509 } 510 tcg_region_tree_unlock_all(); 511 return nb_tbs; 512 } 513 514 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data) 515 { 516 TranslationBlock *tb = v; 517 518 tb_destroy(tb); 519 return FALSE; 520 } 521 522 static void tcg_region_tree_reset_all(void) 523 { 524 size_t i; 525 526 tcg_region_tree_lock_all(); 527 for (i = 0; i < region.n; i++) { 528 struct tcg_region_tree *rt = region_trees + i * tree_size; 529 530 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL); 531 /* Increment the refcount first so that destroy acts as a reset */ 532 g_tree_ref(rt->tree); 533 g_tree_destroy(rt->tree); 534 } 535 tcg_region_tree_unlock_all(); 536 } 537 538 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 539 { 540 void *start, *end; 541 542 start = region.start_aligned + curr_region * region.stride; 543 end = start + region.size; 544 545 if (curr_region == 0) { 546 start = region.start; 547 } 548 if (curr_region == region.n - 1) { 549 end = region.end; 550 } 551 552 *pstart = start; 553 *pend = end; 554 } 555 556 static void tcg_region_assign(TCGContext *s, size_t curr_region) 557 { 558 void *start, *end; 559 560 tcg_region_bounds(curr_region, &start, &end); 561 562 s->code_gen_buffer = start; 563 s->code_gen_ptr = start; 564 s->code_gen_buffer_size = end - start; 565 s->code_gen_highwater = end - TCG_HIGHWATER; 566 } 567 568 static bool tcg_region_alloc__locked(TCGContext *s) 569 { 570 if (region.current == region.n) { 571 return true; 572 } 573 tcg_region_assign(s, region.current); 574 region.current++; 575 return false; 576 } 577 578 /* 579 * Request a new region once the one in use has filled up. 580 * Returns true on error. 581 */ 582 static bool tcg_region_alloc(TCGContext *s) 583 { 584 bool err; 585 /* read the region size now; alloc__locked will overwrite it on success */ 586 size_t size_full = s->code_gen_buffer_size; 587 588 qemu_mutex_lock(®ion.lock); 589 err = tcg_region_alloc__locked(s); 590 if (!err) { 591 region.agg_size_full += size_full - TCG_HIGHWATER; 592 } 593 qemu_mutex_unlock(®ion.lock); 594 return err; 595 } 596 597 /* 598 * Perform a context's first region allocation. 599 * This function does _not_ increment region.agg_size_full. 600 */ 601 static inline bool tcg_region_initial_alloc__locked(TCGContext *s) 602 { 603 return tcg_region_alloc__locked(s); 604 } 605 606 /* Call from a safe-work context */ 607 void tcg_region_reset_all(void) 608 { 609 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 610 unsigned int i; 611 612 qemu_mutex_lock(®ion.lock); 613 region.current = 0; 614 region.agg_size_full = 0; 615 616 for (i = 0; i < n_ctxs; i++) { 617 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 618 bool err = tcg_region_initial_alloc__locked(s); 619 620 g_assert(!err); 621 } 622 qemu_mutex_unlock(®ion.lock); 623 624 tcg_region_tree_reset_all(); 625 } 626 627 #ifdef CONFIG_USER_ONLY 628 static size_t tcg_n_regions(void) 629 { 630 return 1; 631 } 632 #else 633 /* 634 * It is likely that some vCPUs will translate more code than others, so we 635 * first try to set more regions than max_cpus, with those regions being of 636 * reasonable size. If that's not possible we make do by evenly dividing 637 * the code_gen_buffer among the vCPUs. 638 */ 639 static size_t tcg_n_regions(void) 640 { 641 size_t i; 642 643 /* Use a single region if all we have is one vCPU thread */ 644 #if !defined(CONFIG_USER_ONLY) 645 MachineState *ms = MACHINE(qdev_get_machine()); 646 unsigned int max_cpus = ms->smp.max_cpus; 647 #endif 648 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 649 return 1; 650 } 651 652 /* Try to have more regions than max_cpus, with each region being >= 2 MB */ 653 for (i = 8; i > 0; i--) { 654 size_t regions_per_thread = i; 655 size_t region_size; 656 657 region_size = tcg_init_ctx.code_gen_buffer_size; 658 region_size /= max_cpus * regions_per_thread; 659 660 if (region_size >= 2 * 1024u * 1024) { 661 return max_cpus * regions_per_thread; 662 } 663 } 664 /* If we can't, then just allocate one region per vCPU thread */ 665 return max_cpus; 666 } 667 #endif 668 669 /* 670 * Initializes region partitioning. 671 * 672 * Called at init time from the parent thread (i.e. the one calling 673 * tcg_context_init), after the target's TCG globals have been set. 674 * 675 * Region partitioning works by splitting code_gen_buffer into separate regions, 676 * and then assigning regions to TCG threads so that the threads can translate 677 * code in parallel without synchronization. 678 * 679 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 680 * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 681 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 682 * must have been parsed before calling this function, since it calls 683 * qemu_tcg_mttcg_enabled(). 684 * 685 * In user-mode we use a single region. Having multiple regions in user-mode 686 * is not supported, because the number of vCPU threads (recall that each thread 687 * spawned by the guest corresponds to a vCPU thread) is only bounded by the 688 * OS, and usually this number is huge (tens of thousands is not uncommon). 689 * Thus, given this large bound on the number of vCPU threads and the fact 690 * that code_gen_buffer is allocated at compile-time, we cannot guarantee 691 * that the availability of at least one region per vCPU thread. 692 * 693 * However, this user-mode limitation is unlikely to be a significant problem 694 * in practice. Multi-threaded guests share most if not all of their translated 695 * code, which makes parallel code generation less appealing than in softmmu. 696 */ 697 void tcg_region_init(void) 698 { 699 void *buf = tcg_init_ctx.code_gen_buffer; 700 void *aligned; 701 size_t size = tcg_init_ctx.code_gen_buffer_size; 702 size_t page_size = qemu_real_host_page_size; 703 size_t region_size; 704 size_t n_regions; 705 size_t i; 706 uintptr_t splitwx_diff; 707 708 n_regions = tcg_n_regions(); 709 710 /* The first region will be 'aligned - buf' bytes larger than the others */ 711 aligned = QEMU_ALIGN_PTR_UP(buf, page_size); 712 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); 713 /* 714 * Make region_size a multiple of page_size, using aligned as the start. 715 * As a result of this we might end up with a few extra pages at the end of 716 * the buffer; we will assign those to the last region. 717 */ 718 region_size = (size - (aligned - buf)) / n_regions; 719 region_size = QEMU_ALIGN_DOWN(region_size, page_size); 720 721 /* A region must have at least 2 pages; one code, one guard */ 722 g_assert(region_size >= 2 * page_size); 723 724 /* init the region struct */ 725 qemu_mutex_init(®ion.lock); 726 region.n = n_regions; 727 region.size = region_size - page_size; 728 region.stride = region_size; 729 region.start = buf; 730 region.start_aligned = aligned; 731 /* page-align the end, since its last page will be a guard page */ 732 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); 733 /* account for that last guard page */ 734 region.end -= page_size; 735 736 /* set guard pages */ 737 splitwx_diff = tcg_splitwx_diff; 738 for (i = 0; i < region.n; i++) { 739 void *start, *end; 740 int rc; 741 742 tcg_region_bounds(i, &start, &end); 743 rc = qemu_mprotect_none(end, page_size); 744 g_assert(!rc); 745 if (splitwx_diff) { 746 rc = qemu_mprotect_none(end + splitwx_diff, page_size); 747 g_assert(!rc); 748 } 749 } 750 751 tcg_region_trees_init(); 752 753 /* In user-mode we support only one ctx, so do the initial allocation now */ 754 #ifdef CONFIG_USER_ONLY 755 { 756 bool err = tcg_region_initial_alloc__locked(tcg_ctx); 757 758 g_assert(!err); 759 } 760 #endif 761 } 762 763 #ifdef CONFIG_DEBUG_TCG 764 const void *tcg_splitwx_to_rx(void *rw) 765 { 766 /* Pass NULL pointers unchanged. */ 767 if (rw) { 768 g_assert(in_code_gen_buffer(rw)); 769 rw += tcg_splitwx_diff; 770 } 771 return rw; 772 } 773 774 void *tcg_splitwx_to_rw(const void *rx) 775 { 776 /* Pass NULL pointers unchanged. */ 777 if (rx) { 778 rx -= tcg_splitwx_diff; 779 /* Assert that we end with a pointer in the rw region. */ 780 g_assert(in_code_gen_buffer(rx)); 781 } 782 return (void *)rx; 783 } 784 #endif /* CONFIG_DEBUG_TCG */ 785 786 static void alloc_tcg_plugin_context(TCGContext *s) 787 { 788 #ifdef CONFIG_PLUGIN 789 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 790 s->plugin_tb->insns = 791 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 792 #endif 793 } 794 795 /* 796 * All TCG threads except the parent (i.e. the one that called tcg_context_init 797 * and registered the target's TCG globals) must register with this function 798 * before initiating translation. 799 * 800 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 801 * of tcg_region_init() for the reasoning behind this. 802 * 803 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 804 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 805 * is not used anymore for translation once this function is called. 806 * 807 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 808 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 809 */ 810 #ifdef CONFIG_USER_ONLY 811 void tcg_register_thread(void) 812 { 813 tcg_ctx = &tcg_init_ctx; 814 } 815 #else 816 void tcg_register_thread(void) 817 { 818 MachineState *ms = MACHINE(qdev_get_machine()); 819 TCGContext *s = g_malloc(sizeof(*s)); 820 unsigned int i, n; 821 bool err; 822 823 *s = tcg_init_ctx; 824 825 /* Relink mem_base. */ 826 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 827 if (tcg_init_ctx.temps[i].mem_base) { 828 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 829 tcg_debug_assert(b >= 0 && b < n); 830 s->temps[i].mem_base = &s->temps[b]; 831 } 832 } 833 834 /* Claim an entry in tcg_ctxs */ 835 n = qatomic_fetch_inc(&n_tcg_ctxs); 836 g_assert(n < ms->smp.max_cpus); 837 qatomic_set(&tcg_ctxs[n], s); 838 839 if (n > 0) { 840 alloc_tcg_plugin_context(s); 841 } 842 843 tcg_ctx = s; 844 qemu_mutex_lock(®ion.lock); 845 err = tcg_region_initial_alloc__locked(tcg_ctx); 846 g_assert(!err); 847 qemu_mutex_unlock(®ion.lock); 848 } 849 #endif /* !CONFIG_USER_ONLY */ 850 851 /* 852 * Returns the size (in bytes) of all translated code (i.e. from all regions) 853 * currently in the cache. 854 * See also: tcg_code_capacity() 855 * Do not confuse with tcg_current_code_size(); that one applies to a single 856 * TCG context. 857 */ 858 size_t tcg_code_size(void) 859 { 860 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 861 unsigned int i; 862 size_t total; 863 864 qemu_mutex_lock(®ion.lock); 865 total = region.agg_size_full; 866 for (i = 0; i < n_ctxs; i++) { 867 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 868 size_t size; 869 870 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 871 g_assert(size <= s->code_gen_buffer_size); 872 total += size; 873 } 874 qemu_mutex_unlock(®ion.lock); 875 return total; 876 } 877 878 /* 879 * Returns the code capacity (in bytes) of the entire cache, i.e. including all 880 * regions. 881 * See also: tcg_code_size() 882 */ 883 size_t tcg_code_capacity(void) 884 { 885 size_t guard_size, capacity; 886 887 /* no need for synchronization; these variables are set at init time */ 888 guard_size = region.stride - region.size; 889 capacity = region.end + guard_size - region.start; 890 capacity -= region.n * (guard_size + TCG_HIGHWATER); 891 return capacity; 892 } 893 894 size_t tcg_tb_phys_invalidate_count(void) 895 { 896 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 897 unsigned int i; 898 size_t total = 0; 899 900 for (i = 0; i < n_ctxs; i++) { 901 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 902 903 total += qatomic_read(&s->tb_phys_invalidate_count); 904 } 905 return total; 906 } 907 908 /* pool based memory allocation */ 909 void *tcg_malloc_internal(TCGContext *s, int size) 910 { 911 TCGPool *p; 912 int pool_size; 913 914 if (size > TCG_POOL_CHUNK_SIZE) { 915 /* big malloc: insert a new pool (XXX: could optimize) */ 916 p = g_malloc(sizeof(TCGPool) + size); 917 p->size = size; 918 p->next = s->pool_first_large; 919 s->pool_first_large = p; 920 return p->data; 921 } else { 922 p = s->pool_current; 923 if (!p) { 924 p = s->pool_first; 925 if (!p) 926 goto new_pool; 927 } else { 928 if (!p->next) { 929 new_pool: 930 pool_size = TCG_POOL_CHUNK_SIZE; 931 p = g_malloc(sizeof(TCGPool) + pool_size); 932 p->size = pool_size; 933 p->next = NULL; 934 if (s->pool_current) 935 s->pool_current->next = p; 936 else 937 s->pool_first = p; 938 } else { 939 p = p->next; 940 } 941 } 942 } 943 s->pool_current = p; 944 s->pool_cur = p->data + size; 945 s->pool_end = p->data + p->size; 946 return p->data; 947 } 948 949 void tcg_pool_reset(TCGContext *s) 950 { 951 TCGPool *p, *t; 952 for (p = s->pool_first_large; p; p = t) { 953 t = p->next; 954 g_free(p); 955 } 956 s->pool_first_large = NULL; 957 s->pool_cur = s->pool_end = NULL; 958 s->pool_current = NULL; 959 } 960 961 typedef struct TCGHelperInfo { 962 void *func; 963 const char *name; 964 unsigned flags; 965 unsigned sizemask; 966 } TCGHelperInfo; 967 968 #include "exec/helper-proto.h" 969 970 static const TCGHelperInfo all_helpers[] = { 971 #include "exec/helper-tcg.h" 972 }; 973 static GHashTable *helper_table; 974 975 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 976 static void process_op_defs(TCGContext *s); 977 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 978 TCGReg reg, const char *name); 979 980 void tcg_context_init(TCGContext *s) 981 { 982 int op, total_args, n, i; 983 TCGOpDef *def; 984 TCGArgConstraint *args_ct; 985 TCGTemp *ts; 986 987 memset(s, 0, sizeof(*s)); 988 s->nb_globals = 0; 989 990 /* Count total number of arguments and allocate the corresponding 991 space */ 992 total_args = 0; 993 for(op = 0; op < NB_OPS; op++) { 994 def = &tcg_op_defs[op]; 995 n = def->nb_iargs + def->nb_oargs; 996 total_args += n; 997 } 998 999 args_ct = g_new0(TCGArgConstraint, total_args); 1000 1001 for(op = 0; op < NB_OPS; op++) { 1002 def = &tcg_op_defs[op]; 1003 def->args_ct = args_ct; 1004 n = def->nb_iargs + def->nb_oargs; 1005 args_ct += n; 1006 } 1007 1008 /* Register helpers. */ 1009 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 1010 helper_table = g_hash_table_new(NULL, NULL); 1011 1012 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 1013 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 1014 (gpointer)&all_helpers[i]); 1015 } 1016 1017 tcg_target_init(s); 1018 process_op_defs(s); 1019 1020 /* Reverse the order of the saved registers, assuming they're all at 1021 the start of tcg_target_reg_alloc_order. */ 1022 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1023 int r = tcg_target_reg_alloc_order[n]; 1024 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1025 break; 1026 } 1027 } 1028 for (i = 0; i < n; ++i) { 1029 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1030 } 1031 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1032 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1033 } 1034 1035 alloc_tcg_plugin_context(s); 1036 1037 tcg_ctx = s; 1038 /* 1039 * In user-mode we simply share the init context among threads, since we 1040 * use a single region. See the documentation tcg_region_init() for the 1041 * reasoning behind this. 1042 * In softmmu we will have at most max_cpus TCG threads. 1043 */ 1044 #ifdef CONFIG_USER_ONLY 1045 tcg_ctxs = &tcg_ctx; 1046 n_tcg_ctxs = 1; 1047 #else 1048 MachineState *ms = MACHINE(qdev_get_machine()); 1049 unsigned int max_cpus = ms->smp.max_cpus; 1050 tcg_ctxs = g_new(TCGContext *, max_cpus); 1051 #endif 1052 1053 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1054 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1055 cpu_env = temp_tcgv_ptr(ts); 1056 } 1057 1058 /* 1059 * Allocate TBs right before their corresponding translated code, making 1060 * sure that TBs and code are on different cache lines. 1061 */ 1062 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1063 { 1064 uintptr_t align = qemu_icache_linesize; 1065 TranslationBlock *tb; 1066 void *next; 1067 1068 retry: 1069 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1070 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1071 1072 if (unlikely(next > s->code_gen_highwater)) { 1073 if (tcg_region_alloc(s)) { 1074 return NULL; 1075 } 1076 goto retry; 1077 } 1078 qatomic_set(&s->code_gen_ptr, next); 1079 s->data_gen_ptr = NULL; 1080 return tb; 1081 } 1082 1083 void tcg_prologue_init(TCGContext *s) 1084 { 1085 size_t prologue_size, total_size; 1086 void *buf0, *buf1; 1087 1088 /* Put the prologue at the beginning of code_gen_buffer. */ 1089 buf0 = s->code_gen_buffer; 1090 total_size = s->code_gen_buffer_size; 1091 s->code_ptr = buf0; 1092 s->code_buf = buf0; 1093 s->data_gen_ptr = NULL; 1094 1095 /* 1096 * The region trees are not yet configured, but tcg_splitwx_to_rx 1097 * needs the bounds for an assert. 1098 */ 1099 region.start = buf0; 1100 region.end = buf0 + total_size; 1101 1102 #ifndef CONFIG_TCG_INTERPRETER 1103 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0); 1104 #endif 1105 1106 /* Compute a high-water mark, at which we voluntarily flush the buffer 1107 and start over. The size here is arbitrary, significantly larger 1108 than we expect the code generation for any one opcode to require. */ 1109 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); 1110 1111 #ifdef TCG_TARGET_NEED_POOL_LABELS 1112 s->pool_labels = NULL; 1113 #endif 1114 1115 /* Generate the prologue. */ 1116 tcg_target_qemu_prologue(s); 1117 1118 #ifdef TCG_TARGET_NEED_POOL_LABELS 1119 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1120 { 1121 int result = tcg_out_pool_finalize(s); 1122 tcg_debug_assert(result == 0); 1123 } 1124 #endif 1125 1126 buf1 = s->code_ptr; 1127 #ifndef CONFIG_TCG_INTERPRETER 1128 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0, 1129 tcg_ptr_byte_diff(buf1, buf0)); 1130 #endif 1131 1132 /* Deduct the prologue from the buffer. */ 1133 prologue_size = tcg_current_code_size(s); 1134 s->code_gen_ptr = buf1; 1135 s->code_gen_buffer = buf1; 1136 s->code_buf = buf1; 1137 total_size -= prologue_size; 1138 s->code_gen_buffer_size = total_size; 1139 1140 tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size); 1141 1142 #ifdef DEBUG_DISAS 1143 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1144 FILE *logfile = qemu_log_lock(); 1145 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 1146 if (s->data_gen_ptr) { 1147 size_t code_size = s->data_gen_ptr - buf0; 1148 size_t data_size = prologue_size - code_size; 1149 size_t i; 1150 1151 log_disas(buf0, code_size); 1152 1153 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1154 if (sizeof(tcg_target_ulong) == 8) { 1155 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1156 (uintptr_t)s->data_gen_ptr + i, 1157 *(uint64_t *)(s->data_gen_ptr + i)); 1158 } else { 1159 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n", 1160 (uintptr_t)s->data_gen_ptr + i, 1161 *(uint32_t *)(s->data_gen_ptr + i)); 1162 } 1163 } 1164 } else { 1165 log_disas(buf0, prologue_size); 1166 } 1167 qemu_log("\n"); 1168 qemu_log_flush(); 1169 qemu_log_unlock(logfile); 1170 } 1171 #endif 1172 1173 /* Assert that goto_ptr is implemented completely. */ 1174 if (TCG_TARGET_HAS_goto_ptr) { 1175 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1176 } 1177 } 1178 1179 void tcg_func_start(TCGContext *s) 1180 { 1181 tcg_pool_reset(s); 1182 s->nb_temps = s->nb_globals; 1183 1184 /* No temps have been previously allocated for size or locality. */ 1185 memset(s->free_temps, 0, sizeof(s->free_temps)); 1186 1187 /* No constant temps have been previously allocated. */ 1188 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1189 if (s->const_table[i]) { 1190 g_hash_table_remove_all(s->const_table[i]); 1191 } 1192 } 1193 1194 s->nb_ops = 0; 1195 s->nb_labels = 0; 1196 s->current_frame_offset = s->frame_start; 1197 1198 #ifdef CONFIG_DEBUG_TCG 1199 s->goto_tb_issue_mask = 0; 1200 #endif 1201 1202 QTAILQ_INIT(&s->ops); 1203 QTAILQ_INIT(&s->free_ops); 1204 QSIMPLEQ_INIT(&s->labels); 1205 } 1206 1207 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) 1208 { 1209 int n = s->nb_temps++; 1210 tcg_debug_assert(n < TCG_MAX_TEMPS); 1211 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1212 } 1213 1214 static inline TCGTemp *tcg_global_alloc(TCGContext *s) 1215 { 1216 TCGTemp *ts; 1217 1218 tcg_debug_assert(s->nb_globals == s->nb_temps); 1219 s->nb_globals++; 1220 ts = tcg_temp_alloc(s); 1221 ts->kind = TEMP_GLOBAL; 1222 1223 return ts; 1224 } 1225 1226 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1227 TCGReg reg, const char *name) 1228 { 1229 TCGTemp *ts; 1230 1231 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1232 tcg_abort(); 1233 } 1234 1235 ts = tcg_global_alloc(s); 1236 ts->base_type = type; 1237 ts->type = type; 1238 ts->kind = TEMP_FIXED; 1239 ts->reg = reg; 1240 ts->name = name; 1241 tcg_regset_set_reg(s->reserved_regs, reg); 1242 1243 return ts; 1244 } 1245 1246 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1247 { 1248 s->frame_start = start; 1249 s->frame_end = start + size; 1250 s->frame_temp 1251 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1252 } 1253 1254 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1255 intptr_t offset, const char *name) 1256 { 1257 TCGContext *s = tcg_ctx; 1258 TCGTemp *base_ts = tcgv_ptr_temp(base); 1259 TCGTemp *ts = tcg_global_alloc(s); 1260 int indirect_reg = 0, bigendian = 0; 1261 #ifdef HOST_WORDS_BIGENDIAN 1262 bigendian = 1; 1263 #endif 1264 1265 switch (base_ts->kind) { 1266 case TEMP_FIXED: 1267 break; 1268 case TEMP_GLOBAL: 1269 /* We do not support double-indirect registers. */ 1270 tcg_debug_assert(!base_ts->indirect_reg); 1271 base_ts->indirect_base = 1; 1272 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1273 ? 2 : 1); 1274 indirect_reg = 1; 1275 break; 1276 default: 1277 g_assert_not_reached(); 1278 } 1279 1280 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1281 TCGTemp *ts2 = tcg_global_alloc(s); 1282 char buf[64]; 1283 1284 ts->base_type = TCG_TYPE_I64; 1285 ts->type = TCG_TYPE_I32; 1286 ts->indirect_reg = indirect_reg; 1287 ts->mem_allocated = 1; 1288 ts->mem_base = base_ts; 1289 ts->mem_offset = offset + bigendian * 4; 1290 pstrcpy(buf, sizeof(buf), name); 1291 pstrcat(buf, sizeof(buf), "_0"); 1292 ts->name = strdup(buf); 1293 1294 tcg_debug_assert(ts2 == ts + 1); 1295 ts2->base_type = TCG_TYPE_I64; 1296 ts2->type = TCG_TYPE_I32; 1297 ts2->indirect_reg = indirect_reg; 1298 ts2->mem_allocated = 1; 1299 ts2->mem_base = base_ts; 1300 ts2->mem_offset = offset + (1 - bigendian) * 4; 1301 pstrcpy(buf, sizeof(buf), name); 1302 pstrcat(buf, sizeof(buf), "_1"); 1303 ts2->name = strdup(buf); 1304 } else { 1305 ts->base_type = type; 1306 ts->type = type; 1307 ts->indirect_reg = indirect_reg; 1308 ts->mem_allocated = 1; 1309 ts->mem_base = base_ts; 1310 ts->mem_offset = offset; 1311 ts->name = name; 1312 } 1313 return ts; 1314 } 1315 1316 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) 1317 { 1318 TCGContext *s = tcg_ctx; 1319 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL; 1320 TCGTemp *ts; 1321 int idx, k; 1322 1323 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 1324 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 1325 if (idx < TCG_MAX_TEMPS) { 1326 /* There is already an available temp with the right type. */ 1327 clear_bit(idx, s->free_temps[k].l); 1328 1329 ts = &s->temps[idx]; 1330 ts->temp_allocated = 1; 1331 tcg_debug_assert(ts->base_type == type); 1332 tcg_debug_assert(ts->kind == kind); 1333 } else { 1334 ts = tcg_temp_alloc(s); 1335 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1336 TCGTemp *ts2 = tcg_temp_alloc(s); 1337 1338 ts->base_type = type; 1339 ts->type = TCG_TYPE_I32; 1340 ts->temp_allocated = 1; 1341 ts->kind = kind; 1342 1343 tcg_debug_assert(ts2 == ts + 1); 1344 ts2->base_type = TCG_TYPE_I64; 1345 ts2->type = TCG_TYPE_I32; 1346 ts2->temp_allocated = 1; 1347 ts2->kind = kind; 1348 } else { 1349 ts->base_type = type; 1350 ts->type = type; 1351 ts->temp_allocated = 1; 1352 ts->kind = kind; 1353 } 1354 } 1355 1356 #if defined(CONFIG_DEBUG_TCG) 1357 s->temps_in_use++; 1358 #endif 1359 return ts; 1360 } 1361 1362 TCGv_vec tcg_temp_new_vec(TCGType type) 1363 { 1364 TCGTemp *t; 1365 1366 #ifdef CONFIG_DEBUG_TCG 1367 switch (type) { 1368 case TCG_TYPE_V64: 1369 assert(TCG_TARGET_HAS_v64); 1370 break; 1371 case TCG_TYPE_V128: 1372 assert(TCG_TARGET_HAS_v128); 1373 break; 1374 case TCG_TYPE_V256: 1375 assert(TCG_TARGET_HAS_v256); 1376 break; 1377 default: 1378 g_assert_not_reached(); 1379 } 1380 #endif 1381 1382 t = tcg_temp_new_internal(type, 0); 1383 return temp_tcgv_vec(t); 1384 } 1385 1386 /* Create a new temp of the same type as an existing temp. */ 1387 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1388 { 1389 TCGTemp *t = tcgv_vec_temp(match); 1390 1391 tcg_debug_assert(t->temp_allocated != 0); 1392 1393 t = tcg_temp_new_internal(t->base_type, 0); 1394 return temp_tcgv_vec(t); 1395 } 1396 1397 void tcg_temp_free_internal(TCGTemp *ts) 1398 { 1399 TCGContext *s = tcg_ctx; 1400 int k, idx; 1401 1402 /* In order to simplify users of tcg_constant_*, silently ignore free. */ 1403 if (ts->kind == TEMP_CONST) { 1404 return; 1405 } 1406 1407 #if defined(CONFIG_DEBUG_TCG) 1408 s->temps_in_use--; 1409 if (s->temps_in_use < 0) { 1410 fprintf(stderr, "More temporaries freed than allocated!\n"); 1411 } 1412 #endif 1413 1414 tcg_debug_assert(ts->kind < TEMP_GLOBAL); 1415 tcg_debug_assert(ts->temp_allocated != 0); 1416 ts->temp_allocated = 0; 1417 1418 idx = temp_idx(ts); 1419 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT); 1420 set_bit(idx, s->free_temps[k].l); 1421 } 1422 1423 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1424 { 1425 TCGContext *s = tcg_ctx; 1426 GHashTable *h = s->const_table[type]; 1427 TCGTemp *ts; 1428 1429 if (h == NULL) { 1430 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1431 s->const_table[type] = h; 1432 } 1433 1434 ts = g_hash_table_lookup(h, &val); 1435 if (ts == NULL) { 1436 ts = tcg_temp_alloc(s); 1437 1438 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1439 TCGTemp *ts2 = tcg_temp_alloc(s); 1440 1441 ts->base_type = TCG_TYPE_I64; 1442 ts->type = TCG_TYPE_I32; 1443 ts->kind = TEMP_CONST; 1444 ts->temp_allocated = 1; 1445 /* 1446 * Retain the full value of the 64-bit constant in the low 1447 * part, so that the hash table works. Actual uses will 1448 * truncate the value to the low part. 1449 */ 1450 ts->val = val; 1451 1452 tcg_debug_assert(ts2 == ts + 1); 1453 ts2->base_type = TCG_TYPE_I64; 1454 ts2->type = TCG_TYPE_I32; 1455 ts2->kind = TEMP_CONST; 1456 ts2->temp_allocated = 1; 1457 ts2->val = val >> 32; 1458 } else { 1459 ts->base_type = type; 1460 ts->type = type; 1461 ts->kind = TEMP_CONST; 1462 ts->temp_allocated = 1; 1463 ts->val = val; 1464 } 1465 g_hash_table_insert(h, &ts->val, ts); 1466 } 1467 1468 return ts; 1469 } 1470 1471 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1472 { 1473 val = dup_const(vece, val); 1474 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1475 } 1476 1477 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1478 { 1479 TCGTemp *t = tcgv_vec_temp(match); 1480 1481 tcg_debug_assert(t->temp_allocated != 0); 1482 return tcg_constant_vec(t->base_type, vece, val); 1483 } 1484 1485 TCGv_i32 tcg_const_i32(int32_t val) 1486 { 1487 TCGv_i32 t0; 1488 t0 = tcg_temp_new_i32(); 1489 tcg_gen_movi_i32(t0, val); 1490 return t0; 1491 } 1492 1493 TCGv_i64 tcg_const_i64(int64_t val) 1494 { 1495 TCGv_i64 t0; 1496 t0 = tcg_temp_new_i64(); 1497 tcg_gen_movi_i64(t0, val); 1498 return t0; 1499 } 1500 1501 TCGv_i32 tcg_const_local_i32(int32_t val) 1502 { 1503 TCGv_i32 t0; 1504 t0 = tcg_temp_local_new_i32(); 1505 tcg_gen_movi_i32(t0, val); 1506 return t0; 1507 } 1508 1509 TCGv_i64 tcg_const_local_i64(int64_t val) 1510 { 1511 TCGv_i64 t0; 1512 t0 = tcg_temp_local_new_i64(); 1513 tcg_gen_movi_i64(t0, val); 1514 return t0; 1515 } 1516 1517 #if defined(CONFIG_DEBUG_TCG) 1518 void tcg_clear_temp_count(void) 1519 { 1520 TCGContext *s = tcg_ctx; 1521 s->temps_in_use = 0; 1522 } 1523 1524 int tcg_check_temp_count(void) 1525 { 1526 TCGContext *s = tcg_ctx; 1527 if (s->temps_in_use) { 1528 /* Clear the count so that we don't give another 1529 * warning immediately next time around. 1530 */ 1531 s->temps_in_use = 0; 1532 return 1; 1533 } 1534 return 0; 1535 } 1536 #endif 1537 1538 /* Return true if OP may appear in the opcode stream. 1539 Test the runtime variable that controls each opcode. */ 1540 bool tcg_op_supported(TCGOpcode op) 1541 { 1542 const bool have_vec 1543 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1544 1545 switch (op) { 1546 case INDEX_op_discard: 1547 case INDEX_op_set_label: 1548 case INDEX_op_call: 1549 case INDEX_op_br: 1550 case INDEX_op_mb: 1551 case INDEX_op_insn_start: 1552 case INDEX_op_exit_tb: 1553 case INDEX_op_goto_tb: 1554 case INDEX_op_qemu_ld_i32: 1555 case INDEX_op_qemu_st_i32: 1556 case INDEX_op_qemu_ld_i64: 1557 case INDEX_op_qemu_st_i64: 1558 return true; 1559 1560 case INDEX_op_qemu_st8_i32: 1561 return TCG_TARGET_HAS_qemu_st8_i32; 1562 1563 case INDEX_op_goto_ptr: 1564 return TCG_TARGET_HAS_goto_ptr; 1565 1566 case INDEX_op_mov_i32: 1567 case INDEX_op_setcond_i32: 1568 case INDEX_op_brcond_i32: 1569 case INDEX_op_ld8u_i32: 1570 case INDEX_op_ld8s_i32: 1571 case INDEX_op_ld16u_i32: 1572 case INDEX_op_ld16s_i32: 1573 case INDEX_op_ld_i32: 1574 case INDEX_op_st8_i32: 1575 case INDEX_op_st16_i32: 1576 case INDEX_op_st_i32: 1577 case INDEX_op_add_i32: 1578 case INDEX_op_sub_i32: 1579 case INDEX_op_mul_i32: 1580 case INDEX_op_and_i32: 1581 case INDEX_op_or_i32: 1582 case INDEX_op_xor_i32: 1583 case INDEX_op_shl_i32: 1584 case INDEX_op_shr_i32: 1585 case INDEX_op_sar_i32: 1586 return true; 1587 1588 case INDEX_op_movcond_i32: 1589 return TCG_TARGET_HAS_movcond_i32; 1590 case INDEX_op_div_i32: 1591 case INDEX_op_divu_i32: 1592 return TCG_TARGET_HAS_div_i32; 1593 case INDEX_op_rem_i32: 1594 case INDEX_op_remu_i32: 1595 return TCG_TARGET_HAS_rem_i32; 1596 case INDEX_op_div2_i32: 1597 case INDEX_op_divu2_i32: 1598 return TCG_TARGET_HAS_div2_i32; 1599 case INDEX_op_rotl_i32: 1600 case INDEX_op_rotr_i32: 1601 return TCG_TARGET_HAS_rot_i32; 1602 case INDEX_op_deposit_i32: 1603 return TCG_TARGET_HAS_deposit_i32; 1604 case INDEX_op_extract_i32: 1605 return TCG_TARGET_HAS_extract_i32; 1606 case INDEX_op_sextract_i32: 1607 return TCG_TARGET_HAS_sextract_i32; 1608 case INDEX_op_extract2_i32: 1609 return TCG_TARGET_HAS_extract2_i32; 1610 case INDEX_op_add2_i32: 1611 return TCG_TARGET_HAS_add2_i32; 1612 case INDEX_op_sub2_i32: 1613 return TCG_TARGET_HAS_sub2_i32; 1614 case INDEX_op_mulu2_i32: 1615 return TCG_TARGET_HAS_mulu2_i32; 1616 case INDEX_op_muls2_i32: 1617 return TCG_TARGET_HAS_muls2_i32; 1618 case INDEX_op_muluh_i32: 1619 return TCG_TARGET_HAS_muluh_i32; 1620 case INDEX_op_mulsh_i32: 1621 return TCG_TARGET_HAS_mulsh_i32; 1622 case INDEX_op_ext8s_i32: 1623 return TCG_TARGET_HAS_ext8s_i32; 1624 case INDEX_op_ext16s_i32: 1625 return TCG_TARGET_HAS_ext16s_i32; 1626 case INDEX_op_ext8u_i32: 1627 return TCG_TARGET_HAS_ext8u_i32; 1628 case INDEX_op_ext16u_i32: 1629 return TCG_TARGET_HAS_ext16u_i32; 1630 case INDEX_op_bswap16_i32: 1631 return TCG_TARGET_HAS_bswap16_i32; 1632 case INDEX_op_bswap32_i32: 1633 return TCG_TARGET_HAS_bswap32_i32; 1634 case INDEX_op_not_i32: 1635 return TCG_TARGET_HAS_not_i32; 1636 case INDEX_op_neg_i32: 1637 return TCG_TARGET_HAS_neg_i32; 1638 case INDEX_op_andc_i32: 1639 return TCG_TARGET_HAS_andc_i32; 1640 case INDEX_op_orc_i32: 1641 return TCG_TARGET_HAS_orc_i32; 1642 case INDEX_op_eqv_i32: 1643 return TCG_TARGET_HAS_eqv_i32; 1644 case INDEX_op_nand_i32: 1645 return TCG_TARGET_HAS_nand_i32; 1646 case INDEX_op_nor_i32: 1647 return TCG_TARGET_HAS_nor_i32; 1648 case INDEX_op_clz_i32: 1649 return TCG_TARGET_HAS_clz_i32; 1650 case INDEX_op_ctz_i32: 1651 return TCG_TARGET_HAS_ctz_i32; 1652 case INDEX_op_ctpop_i32: 1653 return TCG_TARGET_HAS_ctpop_i32; 1654 1655 case INDEX_op_brcond2_i32: 1656 case INDEX_op_setcond2_i32: 1657 return TCG_TARGET_REG_BITS == 32; 1658 1659 case INDEX_op_mov_i64: 1660 case INDEX_op_setcond_i64: 1661 case INDEX_op_brcond_i64: 1662 case INDEX_op_ld8u_i64: 1663 case INDEX_op_ld8s_i64: 1664 case INDEX_op_ld16u_i64: 1665 case INDEX_op_ld16s_i64: 1666 case INDEX_op_ld32u_i64: 1667 case INDEX_op_ld32s_i64: 1668 case INDEX_op_ld_i64: 1669 case INDEX_op_st8_i64: 1670 case INDEX_op_st16_i64: 1671 case INDEX_op_st32_i64: 1672 case INDEX_op_st_i64: 1673 case INDEX_op_add_i64: 1674 case INDEX_op_sub_i64: 1675 case INDEX_op_mul_i64: 1676 case INDEX_op_and_i64: 1677 case INDEX_op_or_i64: 1678 case INDEX_op_xor_i64: 1679 case INDEX_op_shl_i64: 1680 case INDEX_op_shr_i64: 1681 case INDEX_op_sar_i64: 1682 case INDEX_op_ext_i32_i64: 1683 case INDEX_op_extu_i32_i64: 1684 return TCG_TARGET_REG_BITS == 64; 1685 1686 case INDEX_op_movcond_i64: 1687 return TCG_TARGET_HAS_movcond_i64; 1688 case INDEX_op_div_i64: 1689 case INDEX_op_divu_i64: 1690 return TCG_TARGET_HAS_div_i64; 1691 case INDEX_op_rem_i64: 1692 case INDEX_op_remu_i64: 1693 return TCG_TARGET_HAS_rem_i64; 1694 case INDEX_op_div2_i64: 1695 case INDEX_op_divu2_i64: 1696 return TCG_TARGET_HAS_div2_i64; 1697 case INDEX_op_rotl_i64: 1698 case INDEX_op_rotr_i64: 1699 return TCG_TARGET_HAS_rot_i64; 1700 case INDEX_op_deposit_i64: 1701 return TCG_TARGET_HAS_deposit_i64; 1702 case INDEX_op_extract_i64: 1703 return TCG_TARGET_HAS_extract_i64; 1704 case INDEX_op_sextract_i64: 1705 return TCG_TARGET_HAS_sextract_i64; 1706 case INDEX_op_extract2_i64: 1707 return TCG_TARGET_HAS_extract2_i64; 1708 case INDEX_op_extrl_i64_i32: 1709 return TCG_TARGET_HAS_extrl_i64_i32; 1710 case INDEX_op_extrh_i64_i32: 1711 return TCG_TARGET_HAS_extrh_i64_i32; 1712 case INDEX_op_ext8s_i64: 1713 return TCG_TARGET_HAS_ext8s_i64; 1714 case INDEX_op_ext16s_i64: 1715 return TCG_TARGET_HAS_ext16s_i64; 1716 case INDEX_op_ext32s_i64: 1717 return TCG_TARGET_HAS_ext32s_i64; 1718 case INDEX_op_ext8u_i64: 1719 return TCG_TARGET_HAS_ext8u_i64; 1720 case INDEX_op_ext16u_i64: 1721 return TCG_TARGET_HAS_ext16u_i64; 1722 case INDEX_op_ext32u_i64: 1723 return TCG_TARGET_HAS_ext32u_i64; 1724 case INDEX_op_bswap16_i64: 1725 return TCG_TARGET_HAS_bswap16_i64; 1726 case INDEX_op_bswap32_i64: 1727 return TCG_TARGET_HAS_bswap32_i64; 1728 case INDEX_op_bswap64_i64: 1729 return TCG_TARGET_HAS_bswap64_i64; 1730 case INDEX_op_not_i64: 1731 return TCG_TARGET_HAS_not_i64; 1732 case INDEX_op_neg_i64: 1733 return TCG_TARGET_HAS_neg_i64; 1734 case INDEX_op_andc_i64: 1735 return TCG_TARGET_HAS_andc_i64; 1736 case INDEX_op_orc_i64: 1737 return TCG_TARGET_HAS_orc_i64; 1738 case INDEX_op_eqv_i64: 1739 return TCG_TARGET_HAS_eqv_i64; 1740 case INDEX_op_nand_i64: 1741 return TCG_TARGET_HAS_nand_i64; 1742 case INDEX_op_nor_i64: 1743 return TCG_TARGET_HAS_nor_i64; 1744 case INDEX_op_clz_i64: 1745 return TCG_TARGET_HAS_clz_i64; 1746 case INDEX_op_ctz_i64: 1747 return TCG_TARGET_HAS_ctz_i64; 1748 case INDEX_op_ctpop_i64: 1749 return TCG_TARGET_HAS_ctpop_i64; 1750 case INDEX_op_add2_i64: 1751 return TCG_TARGET_HAS_add2_i64; 1752 case INDEX_op_sub2_i64: 1753 return TCG_TARGET_HAS_sub2_i64; 1754 case INDEX_op_mulu2_i64: 1755 return TCG_TARGET_HAS_mulu2_i64; 1756 case INDEX_op_muls2_i64: 1757 return TCG_TARGET_HAS_muls2_i64; 1758 case INDEX_op_muluh_i64: 1759 return TCG_TARGET_HAS_muluh_i64; 1760 case INDEX_op_mulsh_i64: 1761 return TCG_TARGET_HAS_mulsh_i64; 1762 1763 case INDEX_op_mov_vec: 1764 case INDEX_op_dup_vec: 1765 case INDEX_op_dupm_vec: 1766 case INDEX_op_ld_vec: 1767 case INDEX_op_st_vec: 1768 case INDEX_op_add_vec: 1769 case INDEX_op_sub_vec: 1770 case INDEX_op_and_vec: 1771 case INDEX_op_or_vec: 1772 case INDEX_op_xor_vec: 1773 case INDEX_op_cmp_vec: 1774 return have_vec; 1775 case INDEX_op_dup2_vec: 1776 return have_vec && TCG_TARGET_REG_BITS == 32; 1777 case INDEX_op_not_vec: 1778 return have_vec && TCG_TARGET_HAS_not_vec; 1779 case INDEX_op_neg_vec: 1780 return have_vec && TCG_TARGET_HAS_neg_vec; 1781 case INDEX_op_abs_vec: 1782 return have_vec && TCG_TARGET_HAS_abs_vec; 1783 case INDEX_op_andc_vec: 1784 return have_vec && TCG_TARGET_HAS_andc_vec; 1785 case INDEX_op_orc_vec: 1786 return have_vec && TCG_TARGET_HAS_orc_vec; 1787 case INDEX_op_mul_vec: 1788 return have_vec && TCG_TARGET_HAS_mul_vec; 1789 case INDEX_op_shli_vec: 1790 case INDEX_op_shri_vec: 1791 case INDEX_op_sari_vec: 1792 return have_vec && TCG_TARGET_HAS_shi_vec; 1793 case INDEX_op_shls_vec: 1794 case INDEX_op_shrs_vec: 1795 case INDEX_op_sars_vec: 1796 return have_vec && TCG_TARGET_HAS_shs_vec; 1797 case INDEX_op_shlv_vec: 1798 case INDEX_op_shrv_vec: 1799 case INDEX_op_sarv_vec: 1800 return have_vec && TCG_TARGET_HAS_shv_vec; 1801 case INDEX_op_rotli_vec: 1802 return have_vec && TCG_TARGET_HAS_roti_vec; 1803 case INDEX_op_rotls_vec: 1804 return have_vec && TCG_TARGET_HAS_rots_vec; 1805 case INDEX_op_rotlv_vec: 1806 case INDEX_op_rotrv_vec: 1807 return have_vec && TCG_TARGET_HAS_rotv_vec; 1808 case INDEX_op_ssadd_vec: 1809 case INDEX_op_usadd_vec: 1810 case INDEX_op_sssub_vec: 1811 case INDEX_op_ussub_vec: 1812 return have_vec && TCG_TARGET_HAS_sat_vec; 1813 case INDEX_op_smin_vec: 1814 case INDEX_op_umin_vec: 1815 case INDEX_op_smax_vec: 1816 case INDEX_op_umax_vec: 1817 return have_vec && TCG_TARGET_HAS_minmax_vec; 1818 case INDEX_op_bitsel_vec: 1819 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1820 case INDEX_op_cmpsel_vec: 1821 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1822 1823 default: 1824 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1825 return true; 1826 } 1827 } 1828 1829 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1830 and endian swap. Maybe it would be better to do the alignment 1831 and endian swap in tcg_reg_alloc_call(). */ 1832 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1833 { 1834 int i, real_args, nb_rets, pi; 1835 unsigned sizemask, flags; 1836 TCGHelperInfo *info; 1837 TCGOp *op; 1838 1839 info = g_hash_table_lookup(helper_table, (gpointer)func); 1840 flags = info->flags; 1841 sizemask = info->sizemask; 1842 1843 #ifdef CONFIG_PLUGIN 1844 /* detect non-plugin helpers */ 1845 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) { 1846 tcg_ctx->plugin_insn->calls_helpers = true; 1847 } 1848 #endif 1849 1850 #if defined(__sparc__) && !defined(__arch64__) \ 1851 && !defined(CONFIG_TCG_INTERPRETER) 1852 /* We have 64-bit values in one register, but need to pass as two 1853 separate parameters. Split them. */ 1854 int orig_sizemask = sizemask; 1855 int orig_nargs = nargs; 1856 TCGv_i64 retl, reth; 1857 TCGTemp *split_args[MAX_OPC_PARAM]; 1858 1859 retl = NULL; 1860 reth = NULL; 1861 if (sizemask != 0) { 1862 for (i = real_args = 0; i < nargs; ++i) { 1863 int is_64bit = sizemask & (1 << (i+1)*2); 1864 if (is_64bit) { 1865 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1866 TCGv_i32 h = tcg_temp_new_i32(); 1867 TCGv_i32 l = tcg_temp_new_i32(); 1868 tcg_gen_extr_i64_i32(l, h, orig); 1869 split_args[real_args++] = tcgv_i32_temp(h); 1870 split_args[real_args++] = tcgv_i32_temp(l); 1871 } else { 1872 split_args[real_args++] = args[i]; 1873 } 1874 } 1875 nargs = real_args; 1876 args = split_args; 1877 sizemask = 0; 1878 } 1879 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1880 for (i = 0; i < nargs; ++i) { 1881 int is_64bit = sizemask & (1 << (i+1)*2); 1882 int is_signed = sizemask & (2 << (i+1)*2); 1883 if (!is_64bit) { 1884 TCGv_i64 temp = tcg_temp_new_i64(); 1885 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1886 if (is_signed) { 1887 tcg_gen_ext32s_i64(temp, orig); 1888 } else { 1889 tcg_gen_ext32u_i64(temp, orig); 1890 } 1891 args[i] = tcgv_i64_temp(temp); 1892 } 1893 } 1894 #endif /* TCG_TARGET_EXTEND_ARGS */ 1895 1896 op = tcg_emit_op(INDEX_op_call); 1897 1898 pi = 0; 1899 if (ret != NULL) { 1900 #if defined(__sparc__) && !defined(__arch64__) \ 1901 && !defined(CONFIG_TCG_INTERPRETER) 1902 if (orig_sizemask & 1) { 1903 /* The 32-bit ABI is going to return the 64-bit value in 1904 the %o0/%o1 register pair. Prepare for this by using 1905 two return temporaries, and reassemble below. */ 1906 retl = tcg_temp_new_i64(); 1907 reth = tcg_temp_new_i64(); 1908 op->args[pi++] = tcgv_i64_arg(reth); 1909 op->args[pi++] = tcgv_i64_arg(retl); 1910 nb_rets = 2; 1911 } else { 1912 op->args[pi++] = temp_arg(ret); 1913 nb_rets = 1; 1914 } 1915 #else 1916 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 1917 #ifdef HOST_WORDS_BIGENDIAN 1918 op->args[pi++] = temp_arg(ret + 1); 1919 op->args[pi++] = temp_arg(ret); 1920 #else 1921 op->args[pi++] = temp_arg(ret); 1922 op->args[pi++] = temp_arg(ret + 1); 1923 #endif 1924 nb_rets = 2; 1925 } else { 1926 op->args[pi++] = temp_arg(ret); 1927 nb_rets = 1; 1928 } 1929 #endif 1930 } else { 1931 nb_rets = 0; 1932 } 1933 TCGOP_CALLO(op) = nb_rets; 1934 1935 real_args = 0; 1936 for (i = 0; i < nargs; i++) { 1937 int is_64bit = sizemask & (1 << (i+1)*2); 1938 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 1939 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 1940 /* some targets want aligned 64 bit args */ 1941 if (real_args & 1) { 1942 op->args[pi++] = TCG_CALL_DUMMY_ARG; 1943 real_args++; 1944 } 1945 #endif 1946 /* If stack grows up, then we will be placing successive 1947 arguments at lower addresses, which means we need to 1948 reverse the order compared to how we would normally 1949 treat either big or little-endian. For those arguments 1950 that will wind up in registers, this still works for 1951 HPPA (the only current STACK_GROWSUP target) since the 1952 argument registers are *also* allocated in decreasing 1953 order. If another such target is added, this logic may 1954 have to get more complicated to differentiate between 1955 stack arguments and register arguments. */ 1956 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 1957 op->args[pi++] = temp_arg(args[i] + 1); 1958 op->args[pi++] = temp_arg(args[i]); 1959 #else 1960 op->args[pi++] = temp_arg(args[i]); 1961 op->args[pi++] = temp_arg(args[i] + 1); 1962 #endif 1963 real_args += 2; 1964 continue; 1965 } 1966 1967 op->args[pi++] = temp_arg(args[i]); 1968 real_args++; 1969 } 1970 op->args[pi++] = (uintptr_t)func; 1971 op->args[pi++] = flags; 1972 TCGOP_CALLI(op) = real_args; 1973 1974 /* Make sure the fields didn't overflow. */ 1975 tcg_debug_assert(TCGOP_CALLI(op) == real_args); 1976 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 1977 1978 #if defined(__sparc__) && !defined(__arch64__) \ 1979 && !defined(CONFIG_TCG_INTERPRETER) 1980 /* Free all of the parts we allocated above. */ 1981 for (i = real_args = 0; i < orig_nargs; ++i) { 1982 int is_64bit = orig_sizemask & (1 << (i+1)*2); 1983 if (is_64bit) { 1984 tcg_temp_free_internal(args[real_args++]); 1985 tcg_temp_free_internal(args[real_args++]); 1986 } else { 1987 real_args++; 1988 } 1989 } 1990 if (orig_sizemask & 1) { 1991 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 1992 Note that describing these as TCGv_i64 eliminates an unnecessary 1993 zero-extension that tcg_gen_concat_i32_i64 would create. */ 1994 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); 1995 tcg_temp_free_i64(retl); 1996 tcg_temp_free_i64(reth); 1997 } 1998 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1999 for (i = 0; i < nargs; ++i) { 2000 int is_64bit = sizemask & (1 << (i+1)*2); 2001 if (!is_64bit) { 2002 tcg_temp_free_internal(args[i]); 2003 } 2004 } 2005 #endif /* TCG_TARGET_EXTEND_ARGS */ 2006 } 2007 2008 static void tcg_reg_alloc_start(TCGContext *s) 2009 { 2010 int i, n; 2011 2012 for (i = 0, n = s->nb_temps; i < n; i++) { 2013 TCGTemp *ts = &s->temps[i]; 2014 TCGTempVal val = TEMP_VAL_MEM; 2015 2016 switch (ts->kind) { 2017 case TEMP_CONST: 2018 val = TEMP_VAL_CONST; 2019 break; 2020 case TEMP_FIXED: 2021 val = TEMP_VAL_REG; 2022 break; 2023 case TEMP_GLOBAL: 2024 break; 2025 case TEMP_NORMAL: 2026 val = TEMP_VAL_DEAD; 2027 /* fall through */ 2028 case TEMP_LOCAL: 2029 ts->mem_allocated = 0; 2030 break; 2031 default: 2032 g_assert_not_reached(); 2033 } 2034 ts->val_type = val; 2035 } 2036 2037 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2038 } 2039 2040 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2041 TCGTemp *ts) 2042 { 2043 int idx = temp_idx(ts); 2044 2045 switch (ts->kind) { 2046 case TEMP_FIXED: 2047 case TEMP_GLOBAL: 2048 pstrcpy(buf, buf_size, ts->name); 2049 break; 2050 case TEMP_LOCAL: 2051 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2052 break; 2053 case TEMP_NORMAL: 2054 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2055 break; 2056 case TEMP_CONST: 2057 switch (ts->type) { 2058 case TCG_TYPE_I32: 2059 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2060 break; 2061 #if TCG_TARGET_REG_BITS > 32 2062 case TCG_TYPE_I64: 2063 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2064 break; 2065 #endif 2066 case TCG_TYPE_V64: 2067 case TCG_TYPE_V128: 2068 case TCG_TYPE_V256: 2069 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2070 64 << (ts->type - TCG_TYPE_V64), ts->val); 2071 break; 2072 default: 2073 g_assert_not_reached(); 2074 } 2075 break; 2076 } 2077 return buf; 2078 } 2079 2080 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2081 int buf_size, TCGArg arg) 2082 { 2083 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2084 } 2085 2086 /* Find helper name. */ 2087 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 2088 { 2089 const char *ret = NULL; 2090 if (helper_table) { 2091 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); 2092 if (info) { 2093 ret = info->name; 2094 } 2095 } 2096 return ret; 2097 } 2098 2099 static const char * const cond_name[] = 2100 { 2101 [TCG_COND_NEVER] = "never", 2102 [TCG_COND_ALWAYS] = "always", 2103 [TCG_COND_EQ] = "eq", 2104 [TCG_COND_NE] = "ne", 2105 [TCG_COND_LT] = "lt", 2106 [TCG_COND_GE] = "ge", 2107 [TCG_COND_LE] = "le", 2108 [TCG_COND_GT] = "gt", 2109 [TCG_COND_LTU] = "ltu", 2110 [TCG_COND_GEU] = "geu", 2111 [TCG_COND_LEU] = "leu", 2112 [TCG_COND_GTU] = "gtu" 2113 }; 2114 2115 static const char * const ldst_name[] = 2116 { 2117 [MO_UB] = "ub", 2118 [MO_SB] = "sb", 2119 [MO_LEUW] = "leuw", 2120 [MO_LESW] = "lesw", 2121 [MO_LEUL] = "leul", 2122 [MO_LESL] = "lesl", 2123 [MO_LEQ] = "leq", 2124 [MO_BEUW] = "beuw", 2125 [MO_BESW] = "besw", 2126 [MO_BEUL] = "beul", 2127 [MO_BESL] = "besl", 2128 [MO_BEQ] = "beq", 2129 }; 2130 2131 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2132 #ifdef TARGET_ALIGNED_ONLY 2133 [MO_UNALN >> MO_ASHIFT] = "un+", 2134 [MO_ALIGN >> MO_ASHIFT] = "", 2135 #else 2136 [MO_UNALN >> MO_ASHIFT] = "", 2137 [MO_ALIGN >> MO_ASHIFT] = "al+", 2138 #endif 2139 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2140 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2141 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2142 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2143 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2144 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2145 }; 2146 2147 static inline bool tcg_regset_single(TCGRegSet d) 2148 { 2149 return (d & (d - 1)) == 0; 2150 } 2151 2152 static inline TCGReg tcg_regset_first(TCGRegSet d) 2153 { 2154 if (TCG_TARGET_NB_REGS <= 32) { 2155 return ctz32(d); 2156 } else { 2157 return ctz64(d); 2158 } 2159 } 2160 2161 static void tcg_dump_ops(TCGContext *s, bool have_prefs) 2162 { 2163 char buf[128]; 2164 TCGOp *op; 2165 2166 QTAILQ_FOREACH(op, &s->ops, link) { 2167 int i, k, nb_oargs, nb_iargs, nb_cargs; 2168 const TCGOpDef *def; 2169 TCGOpcode c; 2170 int col = 0; 2171 2172 c = op->opc; 2173 def = &tcg_op_defs[c]; 2174 2175 if (c == INDEX_op_insn_start) { 2176 nb_oargs = 0; 2177 col += qemu_log("\n ----"); 2178 2179 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2180 target_ulong a; 2181 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2182 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 2183 #else 2184 a = op->args[i]; 2185 #endif 2186 col += qemu_log(" " TARGET_FMT_lx, a); 2187 } 2188 } else if (c == INDEX_op_call) { 2189 /* variable number of arguments */ 2190 nb_oargs = TCGOP_CALLO(op); 2191 nb_iargs = TCGOP_CALLI(op); 2192 nb_cargs = def->nb_cargs; 2193 2194 /* function name, flags, out args */ 2195 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 2196 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), 2197 op->args[nb_oargs + nb_iargs + 1], nb_oargs); 2198 for (i = 0; i < nb_oargs; i++) { 2199 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2200 op->args[i])); 2201 } 2202 for (i = 0; i < nb_iargs; i++) { 2203 TCGArg arg = op->args[nb_oargs + i]; 2204 const char *t = "<dummy>"; 2205 if (arg != TCG_CALL_DUMMY_ARG) { 2206 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2207 } 2208 col += qemu_log(",%s", t); 2209 } 2210 } else { 2211 col += qemu_log(" %s ", def->name); 2212 2213 nb_oargs = def->nb_oargs; 2214 nb_iargs = def->nb_iargs; 2215 nb_cargs = def->nb_cargs; 2216 2217 if (def->flags & TCG_OPF_VECTOR) { 2218 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op), 2219 8 << TCGOP_VECE(op)); 2220 } 2221 2222 k = 0; 2223 for (i = 0; i < nb_oargs; i++) { 2224 if (k != 0) { 2225 col += qemu_log(","); 2226 } 2227 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 2228 op->args[k++])); 2229 } 2230 for (i = 0; i < nb_iargs; i++) { 2231 if (k != 0) { 2232 col += qemu_log(","); 2233 } 2234 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 2235 op->args[k++])); 2236 } 2237 switch (c) { 2238 case INDEX_op_brcond_i32: 2239 case INDEX_op_setcond_i32: 2240 case INDEX_op_movcond_i32: 2241 case INDEX_op_brcond2_i32: 2242 case INDEX_op_setcond2_i32: 2243 case INDEX_op_brcond_i64: 2244 case INDEX_op_setcond_i64: 2245 case INDEX_op_movcond_i64: 2246 case INDEX_op_cmp_vec: 2247 case INDEX_op_cmpsel_vec: 2248 if (op->args[k] < ARRAY_SIZE(cond_name) 2249 && cond_name[op->args[k]]) { 2250 col += qemu_log(",%s", cond_name[op->args[k++]]); 2251 } else { 2252 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]); 2253 } 2254 i = 1; 2255 break; 2256 case INDEX_op_qemu_ld_i32: 2257 case INDEX_op_qemu_st_i32: 2258 case INDEX_op_qemu_st8_i32: 2259 case INDEX_op_qemu_ld_i64: 2260 case INDEX_op_qemu_st_i64: 2261 { 2262 TCGMemOpIdx oi = op->args[k++]; 2263 MemOp op = get_memop(oi); 2264 unsigned ix = get_mmuidx(oi); 2265 2266 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2267 col += qemu_log(",$0x%x,%u", op, ix); 2268 } else { 2269 const char *s_al, *s_op; 2270 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2271 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2272 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 2273 } 2274 i = 1; 2275 } 2276 break; 2277 default: 2278 i = 0; 2279 break; 2280 } 2281 switch (c) { 2282 case INDEX_op_set_label: 2283 case INDEX_op_br: 2284 case INDEX_op_brcond_i32: 2285 case INDEX_op_brcond_i64: 2286 case INDEX_op_brcond2_i32: 2287 col += qemu_log("%s$L%d", k ? "," : "", 2288 arg_label(op->args[k])->id); 2289 i++, k++; 2290 break; 2291 default: 2292 break; 2293 } 2294 for (; i < nb_cargs; i++, k++) { 2295 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); 2296 } 2297 } 2298 2299 if (have_prefs || op->life) { 2300 2301 QemuLogFile *logfile; 2302 2303 rcu_read_lock(); 2304 logfile = qatomic_rcu_read(&qemu_logfile); 2305 if (logfile) { 2306 for (; col < 40; ++col) { 2307 putc(' ', logfile->fd); 2308 } 2309 } 2310 rcu_read_unlock(); 2311 } 2312 2313 if (op->life) { 2314 unsigned life = op->life; 2315 2316 if (life & (SYNC_ARG * 3)) { 2317 qemu_log(" sync:"); 2318 for (i = 0; i < 2; ++i) { 2319 if (life & (SYNC_ARG << i)) { 2320 qemu_log(" %d", i); 2321 } 2322 } 2323 } 2324 life /= DEAD_ARG; 2325 if (life) { 2326 qemu_log(" dead:"); 2327 for (i = 0; life; ++i, life >>= 1) { 2328 if (life & 1) { 2329 qemu_log(" %d", i); 2330 } 2331 } 2332 } 2333 } 2334 2335 if (have_prefs) { 2336 for (i = 0; i < nb_oargs; ++i) { 2337 TCGRegSet set = op->output_pref[i]; 2338 2339 if (i == 0) { 2340 qemu_log(" pref="); 2341 } else { 2342 qemu_log(","); 2343 } 2344 if (set == 0) { 2345 qemu_log("none"); 2346 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2347 qemu_log("all"); 2348 #ifdef CONFIG_DEBUG_TCG 2349 } else if (tcg_regset_single(set)) { 2350 TCGReg reg = tcg_regset_first(set); 2351 qemu_log("%s", tcg_target_reg_names[reg]); 2352 #endif 2353 } else if (TCG_TARGET_NB_REGS <= 32) { 2354 qemu_log("%#x", (uint32_t)set); 2355 } else { 2356 qemu_log("%#" PRIx64, (uint64_t)set); 2357 } 2358 } 2359 } 2360 2361 qemu_log("\n"); 2362 } 2363 } 2364 2365 /* we give more priority to constraints with less registers */ 2366 static int get_constraint_priority(const TCGOpDef *def, int k) 2367 { 2368 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2369 int n; 2370 2371 if (arg_ct->oalias) { 2372 /* an alias is equivalent to a single register */ 2373 n = 1; 2374 } else { 2375 n = ctpop64(arg_ct->regs); 2376 } 2377 return TCG_TARGET_NB_REGS - n + 1; 2378 } 2379 2380 /* sort from highest priority to lowest */ 2381 static void sort_constraints(TCGOpDef *def, int start, int n) 2382 { 2383 int i, j; 2384 TCGArgConstraint *a = def->args_ct; 2385 2386 for (i = 0; i < n; i++) { 2387 a[start + i].sort_index = start + i; 2388 } 2389 if (n <= 1) { 2390 return; 2391 } 2392 for (i = 0; i < n - 1; i++) { 2393 for (j = i + 1; j < n; j++) { 2394 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2395 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2396 if (p1 < p2) { 2397 int tmp = a[start + i].sort_index; 2398 a[start + i].sort_index = a[start + j].sort_index; 2399 a[start + j].sort_index = tmp; 2400 } 2401 } 2402 } 2403 } 2404 2405 static void process_op_defs(TCGContext *s) 2406 { 2407 TCGOpcode op; 2408 2409 for (op = 0; op < NB_OPS; op++) { 2410 TCGOpDef *def = &tcg_op_defs[op]; 2411 const TCGTargetOpDef *tdefs; 2412 TCGType type; 2413 int i, nb_args; 2414 2415 if (def->flags & TCG_OPF_NOT_PRESENT) { 2416 continue; 2417 } 2418 2419 nb_args = def->nb_iargs + def->nb_oargs; 2420 if (nb_args == 0) { 2421 continue; 2422 } 2423 2424 tdefs = tcg_target_op_def(op); 2425 /* Missing TCGTargetOpDef entry. */ 2426 tcg_debug_assert(tdefs != NULL); 2427 2428 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32); 2429 for (i = 0; i < nb_args; i++) { 2430 const char *ct_str = tdefs->args_ct_str[i]; 2431 /* Incomplete TCGTargetOpDef entry. */ 2432 tcg_debug_assert(ct_str != NULL); 2433 2434 while (*ct_str != '\0') { 2435 switch(*ct_str) { 2436 case '0' ... '9': 2437 { 2438 int oarg = *ct_str - '0'; 2439 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 2440 tcg_debug_assert(oarg < def->nb_oargs); 2441 tcg_debug_assert(def->args_ct[oarg].regs != 0); 2442 def->args_ct[i] = def->args_ct[oarg]; 2443 /* The output sets oalias. */ 2444 def->args_ct[oarg].oalias = true; 2445 def->args_ct[oarg].alias_index = i; 2446 /* The input sets ialias. */ 2447 def->args_ct[i].ialias = true; 2448 def->args_ct[i].alias_index = oarg; 2449 } 2450 ct_str++; 2451 break; 2452 case '&': 2453 def->args_ct[i].newreg = true; 2454 ct_str++; 2455 break; 2456 case 'i': 2457 def->args_ct[i].ct |= TCG_CT_CONST; 2458 ct_str++; 2459 break; 2460 default: 2461 ct_str = target_parse_constraint(&def->args_ct[i], 2462 ct_str, type); 2463 /* Typo in TCGTargetOpDef constraint. */ 2464 tcg_debug_assert(ct_str != NULL); 2465 } 2466 } 2467 } 2468 2469 /* TCGTargetOpDef entry with too much information? */ 2470 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2471 2472 /* sort the constraints (XXX: this is just an heuristic) */ 2473 sort_constraints(def, 0, def->nb_oargs); 2474 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2475 } 2476 } 2477 2478 void tcg_op_remove(TCGContext *s, TCGOp *op) 2479 { 2480 TCGLabel *label; 2481 2482 switch (op->opc) { 2483 case INDEX_op_br: 2484 label = arg_label(op->args[0]); 2485 label->refs--; 2486 break; 2487 case INDEX_op_brcond_i32: 2488 case INDEX_op_brcond_i64: 2489 label = arg_label(op->args[3]); 2490 label->refs--; 2491 break; 2492 case INDEX_op_brcond2_i32: 2493 label = arg_label(op->args[5]); 2494 label->refs--; 2495 break; 2496 default: 2497 break; 2498 } 2499 2500 QTAILQ_REMOVE(&s->ops, op, link); 2501 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2502 s->nb_ops--; 2503 2504 #ifdef CONFIG_PROFILER 2505 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2506 #endif 2507 } 2508 2509 static TCGOp *tcg_op_alloc(TCGOpcode opc) 2510 { 2511 TCGContext *s = tcg_ctx; 2512 TCGOp *op; 2513 2514 if (likely(QTAILQ_EMPTY(&s->free_ops))) { 2515 op = tcg_malloc(sizeof(TCGOp)); 2516 } else { 2517 op = QTAILQ_FIRST(&s->free_ops); 2518 QTAILQ_REMOVE(&s->free_ops, op, link); 2519 } 2520 memset(op, 0, offsetof(TCGOp, link)); 2521 op->opc = opc; 2522 s->nb_ops++; 2523 2524 return op; 2525 } 2526 2527 TCGOp *tcg_emit_op(TCGOpcode opc) 2528 { 2529 TCGOp *op = tcg_op_alloc(opc); 2530 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2531 return op; 2532 } 2533 2534 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2535 { 2536 TCGOp *new_op = tcg_op_alloc(opc); 2537 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2538 return new_op; 2539 } 2540 2541 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2542 { 2543 TCGOp *new_op = tcg_op_alloc(opc); 2544 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2545 return new_op; 2546 } 2547 2548 /* Reachable analysis : remove unreachable code. */ 2549 static void reachable_code_pass(TCGContext *s) 2550 { 2551 TCGOp *op, *op_next; 2552 bool dead = false; 2553 2554 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2555 bool remove = dead; 2556 TCGLabel *label; 2557 int call_flags; 2558 2559 switch (op->opc) { 2560 case INDEX_op_set_label: 2561 label = arg_label(op->args[0]); 2562 if (label->refs == 0) { 2563 /* 2564 * While there is an occasional backward branch, virtually 2565 * all branches generated by the translators are forward. 2566 * Which means that generally we will have already removed 2567 * all references to the label that will be, and there is 2568 * little to be gained by iterating. 2569 */ 2570 remove = true; 2571 } else { 2572 /* Once we see a label, insns become live again. */ 2573 dead = false; 2574 remove = false; 2575 2576 /* 2577 * Optimization can fold conditional branches to unconditional. 2578 * If we find a label with one reference which is preceded by 2579 * an unconditional branch to it, remove both. This needed to 2580 * wait until the dead code in between them was removed. 2581 */ 2582 if (label->refs == 1) { 2583 TCGOp *op_prev = QTAILQ_PREV(op, link); 2584 if (op_prev->opc == INDEX_op_br && 2585 label == arg_label(op_prev->args[0])) { 2586 tcg_op_remove(s, op_prev); 2587 remove = true; 2588 } 2589 } 2590 } 2591 break; 2592 2593 case INDEX_op_br: 2594 case INDEX_op_exit_tb: 2595 case INDEX_op_goto_ptr: 2596 /* Unconditional branches; everything following is dead. */ 2597 dead = true; 2598 break; 2599 2600 case INDEX_op_call: 2601 /* Notice noreturn helper calls, raising exceptions. */ 2602 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; 2603 if (call_flags & TCG_CALL_NO_RETURN) { 2604 dead = true; 2605 } 2606 break; 2607 2608 case INDEX_op_insn_start: 2609 /* Never remove -- we need to keep these for unwind. */ 2610 remove = false; 2611 break; 2612 2613 default: 2614 break; 2615 } 2616 2617 if (remove) { 2618 tcg_op_remove(s, op); 2619 } 2620 } 2621 } 2622 2623 #define TS_DEAD 1 2624 #define TS_MEM 2 2625 2626 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2627 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2628 2629 /* For liveness_pass_1, the register preferences for a given temp. */ 2630 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2631 { 2632 return ts->state_ptr; 2633 } 2634 2635 /* For liveness_pass_1, reset the preferences for a given temp to the 2636 * maximal regset for its type. 2637 */ 2638 static inline void la_reset_pref(TCGTemp *ts) 2639 { 2640 *la_temp_pref(ts) 2641 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2642 } 2643 2644 /* liveness analysis: end of function: all temps are dead, and globals 2645 should be in memory. */ 2646 static void la_func_end(TCGContext *s, int ng, int nt) 2647 { 2648 int i; 2649 2650 for (i = 0; i < ng; ++i) { 2651 s->temps[i].state = TS_DEAD | TS_MEM; 2652 la_reset_pref(&s->temps[i]); 2653 } 2654 for (i = ng; i < nt; ++i) { 2655 s->temps[i].state = TS_DEAD; 2656 la_reset_pref(&s->temps[i]); 2657 } 2658 } 2659 2660 /* liveness analysis: end of basic block: all temps are dead, globals 2661 and local temps should be in memory. */ 2662 static void la_bb_end(TCGContext *s, int ng, int nt) 2663 { 2664 int i; 2665 2666 for (i = 0; i < nt; ++i) { 2667 TCGTemp *ts = &s->temps[i]; 2668 int state; 2669 2670 switch (ts->kind) { 2671 case TEMP_FIXED: 2672 case TEMP_GLOBAL: 2673 case TEMP_LOCAL: 2674 state = TS_DEAD | TS_MEM; 2675 break; 2676 case TEMP_NORMAL: 2677 case TEMP_CONST: 2678 state = TS_DEAD; 2679 break; 2680 default: 2681 g_assert_not_reached(); 2682 } 2683 ts->state = state; 2684 la_reset_pref(ts); 2685 } 2686 } 2687 2688 /* liveness analysis: sync globals back to memory. */ 2689 static void la_global_sync(TCGContext *s, int ng) 2690 { 2691 int i; 2692 2693 for (i = 0; i < ng; ++i) { 2694 int state = s->temps[i].state; 2695 s->temps[i].state = state | TS_MEM; 2696 if (state == TS_DEAD) { 2697 /* If the global was previously dead, reset prefs. */ 2698 la_reset_pref(&s->temps[i]); 2699 } 2700 } 2701 } 2702 2703 /* 2704 * liveness analysis: conditional branch: all temps are dead, 2705 * globals and local temps should be synced. 2706 */ 2707 static void la_bb_sync(TCGContext *s, int ng, int nt) 2708 { 2709 la_global_sync(s, ng); 2710 2711 for (int i = ng; i < nt; ++i) { 2712 TCGTemp *ts = &s->temps[i]; 2713 int state; 2714 2715 switch (ts->kind) { 2716 case TEMP_LOCAL: 2717 state = ts->state; 2718 ts->state = state | TS_MEM; 2719 if (state != TS_DEAD) { 2720 continue; 2721 } 2722 break; 2723 case TEMP_NORMAL: 2724 s->temps[i].state = TS_DEAD; 2725 break; 2726 case TEMP_CONST: 2727 continue; 2728 default: 2729 g_assert_not_reached(); 2730 } 2731 la_reset_pref(&s->temps[i]); 2732 } 2733 } 2734 2735 /* liveness analysis: sync globals back to memory and kill. */ 2736 static void la_global_kill(TCGContext *s, int ng) 2737 { 2738 int i; 2739 2740 for (i = 0; i < ng; i++) { 2741 s->temps[i].state = TS_DEAD | TS_MEM; 2742 la_reset_pref(&s->temps[i]); 2743 } 2744 } 2745 2746 /* liveness analysis: note live globals crossing calls. */ 2747 static void la_cross_call(TCGContext *s, int nt) 2748 { 2749 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2750 int i; 2751 2752 for (i = 0; i < nt; i++) { 2753 TCGTemp *ts = &s->temps[i]; 2754 if (!(ts->state & TS_DEAD)) { 2755 TCGRegSet *pset = la_temp_pref(ts); 2756 TCGRegSet set = *pset; 2757 2758 set &= mask; 2759 /* If the combination is not possible, restart. */ 2760 if (set == 0) { 2761 set = tcg_target_available_regs[ts->type] & mask; 2762 } 2763 *pset = set; 2764 } 2765 } 2766 } 2767 2768 /* Liveness analysis : update the opc_arg_life array to tell if a 2769 given input arguments is dead. Instructions updating dead 2770 temporaries are removed. */ 2771 static void liveness_pass_1(TCGContext *s) 2772 { 2773 int nb_globals = s->nb_globals; 2774 int nb_temps = s->nb_temps; 2775 TCGOp *op, *op_prev; 2776 TCGRegSet *prefs; 2777 int i; 2778 2779 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2780 for (i = 0; i < nb_temps; ++i) { 2781 s->temps[i].state_ptr = prefs + i; 2782 } 2783 2784 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2785 la_func_end(s, nb_globals, nb_temps); 2786 2787 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2788 int nb_iargs, nb_oargs; 2789 TCGOpcode opc_new, opc_new2; 2790 bool have_opc_new2; 2791 TCGLifeData arg_life = 0; 2792 TCGTemp *ts; 2793 TCGOpcode opc = op->opc; 2794 const TCGOpDef *def = &tcg_op_defs[opc]; 2795 2796 switch (opc) { 2797 case INDEX_op_call: 2798 { 2799 int call_flags; 2800 int nb_call_regs; 2801 2802 nb_oargs = TCGOP_CALLO(op); 2803 nb_iargs = TCGOP_CALLI(op); 2804 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2805 2806 /* pure functions can be removed if their result is unused */ 2807 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2808 for (i = 0; i < nb_oargs; i++) { 2809 ts = arg_temp(op->args[i]); 2810 if (ts->state != TS_DEAD) { 2811 goto do_not_remove_call; 2812 } 2813 } 2814 goto do_remove; 2815 } 2816 do_not_remove_call: 2817 2818 /* Output args are dead. */ 2819 for (i = 0; i < nb_oargs; i++) { 2820 ts = arg_temp(op->args[i]); 2821 if (ts->state & TS_DEAD) { 2822 arg_life |= DEAD_ARG << i; 2823 } 2824 if (ts->state & TS_MEM) { 2825 arg_life |= SYNC_ARG << i; 2826 } 2827 ts->state = TS_DEAD; 2828 la_reset_pref(ts); 2829 2830 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ 2831 op->output_pref[i] = 0; 2832 } 2833 2834 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2835 TCG_CALL_NO_READ_GLOBALS))) { 2836 la_global_kill(s, nb_globals); 2837 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2838 la_global_sync(s, nb_globals); 2839 } 2840 2841 /* Record arguments that die in this helper. */ 2842 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2843 ts = arg_temp(op->args[i]); 2844 if (ts && ts->state & TS_DEAD) { 2845 arg_life |= DEAD_ARG << i; 2846 } 2847 } 2848 2849 /* For all live registers, remove call-clobbered prefs. */ 2850 la_cross_call(s, nb_temps); 2851 2852 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2853 2854 /* Input arguments are live for preceding opcodes. */ 2855 for (i = 0; i < nb_iargs; i++) { 2856 ts = arg_temp(op->args[i + nb_oargs]); 2857 if (ts && ts->state & TS_DEAD) { 2858 /* For those arguments that die, and will be allocated 2859 * in registers, clear the register set for that arg, 2860 * to be filled in below. For args that will be on 2861 * the stack, reset to any available reg. 2862 */ 2863 *la_temp_pref(ts) 2864 = (i < nb_call_regs ? 0 : 2865 tcg_target_available_regs[ts->type]); 2866 ts->state &= ~TS_DEAD; 2867 } 2868 } 2869 2870 /* For each input argument, add its input register to prefs. 2871 If a temp is used once, this produces a single set bit. */ 2872 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { 2873 ts = arg_temp(op->args[i + nb_oargs]); 2874 if (ts) { 2875 tcg_regset_set_reg(*la_temp_pref(ts), 2876 tcg_target_call_iarg_regs[i]); 2877 } 2878 } 2879 } 2880 break; 2881 case INDEX_op_insn_start: 2882 break; 2883 case INDEX_op_discard: 2884 /* mark the temporary as dead */ 2885 ts = arg_temp(op->args[0]); 2886 ts->state = TS_DEAD; 2887 la_reset_pref(ts); 2888 break; 2889 2890 case INDEX_op_add2_i32: 2891 opc_new = INDEX_op_add_i32; 2892 goto do_addsub2; 2893 case INDEX_op_sub2_i32: 2894 opc_new = INDEX_op_sub_i32; 2895 goto do_addsub2; 2896 case INDEX_op_add2_i64: 2897 opc_new = INDEX_op_add_i64; 2898 goto do_addsub2; 2899 case INDEX_op_sub2_i64: 2900 opc_new = INDEX_op_sub_i64; 2901 do_addsub2: 2902 nb_iargs = 4; 2903 nb_oargs = 2; 2904 /* Test if the high part of the operation is dead, but not 2905 the low part. The result can be optimized to a simple 2906 add or sub. This happens often for x86_64 guest when the 2907 cpu mode is set to 32 bit. */ 2908 if (arg_temp(op->args[1])->state == TS_DEAD) { 2909 if (arg_temp(op->args[0])->state == TS_DEAD) { 2910 goto do_remove; 2911 } 2912 /* Replace the opcode and adjust the args in place, 2913 leaving 3 unused args at the end. */ 2914 op->opc = opc = opc_new; 2915 op->args[1] = op->args[2]; 2916 op->args[2] = op->args[4]; 2917 /* Fall through and mark the single-word operation live. */ 2918 nb_iargs = 2; 2919 nb_oargs = 1; 2920 } 2921 goto do_not_remove; 2922 2923 case INDEX_op_mulu2_i32: 2924 opc_new = INDEX_op_mul_i32; 2925 opc_new2 = INDEX_op_muluh_i32; 2926 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 2927 goto do_mul2; 2928 case INDEX_op_muls2_i32: 2929 opc_new = INDEX_op_mul_i32; 2930 opc_new2 = INDEX_op_mulsh_i32; 2931 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 2932 goto do_mul2; 2933 case INDEX_op_mulu2_i64: 2934 opc_new = INDEX_op_mul_i64; 2935 opc_new2 = INDEX_op_muluh_i64; 2936 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 2937 goto do_mul2; 2938 case INDEX_op_muls2_i64: 2939 opc_new = INDEX_op_mul_i64; 2940 opc_new2 = INDEX_op_mulsh_i64; 2941 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 2942 goto do_mul2; 2943 do_mul2: 2944 nb_iargs = 2; 2945 nb_oargs = 2; 2946 if (arg_temp(op->args[1])->state == TS_DEAD) { 2947 if (arg_temp(op->args[0])->state == TS_DEAD) { 2948 /* Both parts of the operation are dead. */ 2949 goto do_remove; 2950 } 2951 /* The high part of the operation is dead; generate the low. */ 2952 op->opc = opc = opc_new; 2953 op->args[1] = op->args[2]; 2954 op->args[2] = op->args[3]; 2955 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 2956 /* The low part of the operation is dead; generate the high. */ 2957 op->opc = opc = opc_new2; 2958 op->args[0] = op->args[1]; 2959 op->args[1] = op->args[2]; 2960 op->args[2] = op->args[3]; 2961 } else { 2962 goto do_not_remove; 2963 } 2964 /* Mark the single-word operation live. */ 2965 nb_oargs = 1; 2966 goto do_not_remove; 2967 2968 default: 2969 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 2970 nb_iargs = def->nb_iargs; 2971 nb_oargs = def->nb_oargs; 2972 2973 /* Test if the operation can be removed because all 2974 its outputs are dead. We assume that nb_oargs == 0 2975 implies side effects */ 2976 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 2977 for (i = 0; i < nb_oargs; i++) { 2978 if (arg_temp(op->args[i])->state != TS_DEAD) { 2979 goto do_not_remove; 2980 } 2981 } 2982 goto do_remove; 2983 } 2984 goto do_not_remove; 2985 2986 do_remove: 2987 tcg_op_remove(s, op); 2988 break; 2989 2990 do_not_remove: 2991 for (i = 0; i < nb_oargs; i++) { 2992 ts = arg_temp(op->args[i]); 2993 2994 /* Remember the preference of the uses that followed. */ 2995 op->output_pref[i] = *la_temp_pref(ts); 2996 2997 /* Output args are dead. */ 2998 if (ts->state & TS_DEAD) { 2999 arg_life |= DEAD_ARG << i; 3000 } 3001 if (ts->state & TS_MEM) { 3002 arg_life |= SYNC_ARG << i; 3003 } 3004 ts->state = TS_DEAD; 3005 la_reset_pref(ts); 3006 } 3007 3008 /* If end of basic block, update. */ 3009 if (def->flags & TCG_OPF_BB_EXIT) { 3010 la_func_end(s, nb_globals, nb_temps); 3011 } else if (def->flags & TCG_OPF_COND_BRANCH) { 3012 la_bb_sync(s, nb_globals, nb_temps); 3013 } else if (def->flags & TCG_OPF_BB_END) { 3014 la_bb_end(s, nb_globals, nb_temps); 3015 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3016 la_global_sync(s, nb_globals); 3017 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3018 la_cross_call(s, nb_temps); 3019 } 3020 } 3021 3022 /* Record arguments that die in this opcode. */ 3023 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3024 ts = arg_temp(op->args[i]); 3025 if (ts->state & TS_DEAD) { 3026 arg_life |= DEAD_ARG << i; 3027 } 3028 } 3029 3030 /* Input arguments are live for preceding opcodes. */ 3031 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3032 ts = arg_temp(op->args[i]); 3033 if (ts->state & TS_DEAD) { 3034 /* For operands that were dead, initially allow 3035 all regs for the type. */ 3036 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 3037 ts->state &= ~TS_DEAD; 3038 } 3039 } 3040 3041 /* Incorporate constraints for this operand. */ 3042 switch (opc) { 3043 case INDEX_op_mov_i32: 3044 case INDEX_op_mov_i64: 3045 /* Note that these are TCG_OPF_NOT_PRESENT and do not 3046 have proper constraints. That said, special case 3047 moves to propagate preferences backward. */ 3048 if (IS_DEAD_ARG(1)) { 3049 *la_temp_pref(arg_temp(op->args[0])) 3050 = *la_temp_pref(arg_temp(op->args[1])); 3051 } 3052 break; 3053 3054 default: 3055 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3056 const TCGArgConstraint *ct = &def->args_ct[i]; 3057 TCGRegSet set, *pset; 3058 3059 ts = arg_temp(op->args[i]); 3060 pset = la_temp_pref(ts); 3061 set = *pset; 3062 3063 set &= ct->regs; 3064 if (ct->ialias) { 3065 set &= op->output_pref[ct->alias_index]; 3066 } 3067 /* If the combination is not possible, restart. */ 3068 if (set == 0) { 3069 set = ct->regs; 3070 } 3071 *pset = set; 3072 } 3073 break; 3074 } 3075 break; 3076 } 3077 op->life = arg_life; 3078 } 3079 } 3080 3081 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 3082 static bool liveness_pass_2(TCGContext *s) 3083 { 3084 int nb_globals = s->nb_globals; 3085 int nb_temps, i; 3086 bool changes = false; 3087 TCGOp *op, *op_next; 3088 3089 /* Create a temporary for each indirect global. */ 3090 for (i = 0; i < nb_globals; ++i) { 3091 TCGTemp *its = &s->temps[i]; 3092 if (its->indirect_reg) { 3093 TCGTemp *dts = tcg_temp_alloc(s); 3094 dts->type = its->type; 3095 dts->base_type = its->base_type; 3096 its->state_ptr = dts; 3097 } else { 3098 its->state_ptr = NULL; 3099 } 3100 /* All globals begin dead. */ 3101 its->state = TS_DEAD; 3102 } 3103 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 3104 TCGTemp *its = &s->temps[i]; 3105 its->state_ptr = NULL; 3106 its->state = TS_DEAD; 3107 } 3108 3109 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3110 TCGOpcode opc = op->opc; 3111 const TCGOpDef *def = &tcg_op_defs[opc]; 3112 TCGLifeData arg_life = op->life; 3113 int nb_iargs, nb_oargs, call_flags; 3114 TCGTemp *arg_ts, *dir_ts; 3115 3116 if (opc == INDEX_op_call) { 3117 nb_oargs = TCGOP_CALLO(op); 3118 nb_iargs = TCGOP_CALLI(op); 3119 call_flags = op->args[nb_oargs + nb_iargs + 1]; 3120 } else { 3121 nb_iargs = def->nb_iargs; 3122 nb_oargs = def->nb_oargs; 3123 3124 /* Set flags similar to how calls require. */ 3125 if (def->flags & TCG_OPF_COND_BRANCH) { 3126 /* Like reading globals: sync_globals */ 3127 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3128 } else if (def->flags & TCG_OPF_BB_END) { 3129 /* Like writing globals: save_globals */ 3130 call_flags = 0; 3131 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3132 /* Like reading globals: sync_globals */ 3133 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3134 } else { 3135 /* No effect on globals. */ 3136 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3137 TCG_CALL_NO_WRITE_GLOBALS); 3138 } 3139 } 3140 3141 /* Make sure that input arguments are available. */ 3142 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3143 arg_ts = arg_temp(op->args[i]); 3144 if (arg_ts) { 3145 dir_ts = arg_ts->state_ptr; 3146 if (dir_ts && arg_ts->state == TS_DEAD) { 3147 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3148 ? INDEX_op_ld_i32 3149 : INDEX_op_ld_i64); 3150 TCGOp *lop = tcg_op_insert_before(s, op, lopc); 3151 3152 lop->args[0] = temp_arg(dir_ts); 3153 lop->args[1] = temp_arg(arg_ts->mem_base); 3154 lop->args[2] = arg_ts->mem_offset; 3155 3156 /* Loaded, but synced with memory. */ 3157 arg_ts->state = TS_MEM; 3158 } 3159 } 3160 } 3161 3162 /* Perform input replacement, and mark inputs that became dead. 3163 No action is required except keeping temp_state up to date 3164 so that we reload when needed. */ 3165 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3166 arg_ts = arg_temp(op->args[i]); 3167 if (arg_ts) { 3168 dir_ts = arg_ts->state_ptr; 3169 if (dir_ts) { 3170 op->args[i] = temp_arg(dir_ts); 3171 changes = true; 3172 if (IS_DEAD_ARG(i)) { 3173 arg_ts->state = TS_DEAD; 3174 } 3175 } 3176 } 3177 } 3178 3179 /* Liveness analysis should ensure that the following are 3180 all correct, for call sites and basic block end points. */ 3181 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3182 /* Nothing to do */ 3183 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3184 for (i = 0; i < nb_globals; ++i) { 3185 /* Liveness should see that globals are synced back, 3186 that is, either TS_DEAD or TS_MEM. */ 3187 arg_ts = &s->temps[i]; 3188 tcg_debug_assert(arg_ts->state_ptr == 0 3189 || arg_ts->state != 0); 3190 } 3191 } else { 3192 for (i = 0; i < nb_globals; ++i) { 3193 /* Liveness should see that globals are saved back, 3194 that is, TS_DEAD, waiting to be reloaded. */ 3195 arg_ts = &s->temps[i]; 3196 tcg_debug_assert(arg_ts->state_ptr == 0 3197 || arg_ts->state == TS_DEAD); 3198 } 3199 } 3200 3201 /* Outputs become available. */ 3202 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3203 arg_ts = arg_temp(op->args[0]); 3204 dir_ts = arg_ts->state_ptr; 3205 if (dir_ts) { 3206 op->args[0] = temp_arg(dir_ts); 3207 changes = true; 3208 3209 /* The output is now live and modified. */ 3210 arg_ts->state = 0; 3211 3212 if (NEED_SYNC_ARG(0)) { 3213 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3214 ? INDEX_op_st_i32 3215 : INDEX_op_st_i64); 3216 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 3217 TCGTemp *out_ts = dir_ts; 3218 3219 if (IS_DEAD_ARG(0)) { 3220 out_ts = arg_temp(op->args[1]); 3221 arg_ts->state = TS_DEAD; 3222 tcg_op_remove(s, op); 3223 } else { 3224 arg_ts->state = TS_MEM; 3225 } 3226 3227 sop->args[0] = temp_arg(out_ts); 3228 sop->args[1] = temp_arg(arg_ts->mem_base); 3229 sop->args[2] = arg_ts->mem_offset; 3230 } else { 3231 tcg_debug_assert(!IS_DEAD_ARG(0)); 3232 } 3233 } 3234 } else { 3235 for (i = 0; i < nb_oargs; i++) { 3236 arg_ts = arg_temp(op->args[i]); 3237 dir_ts = arg_ts->state_ptr; 3238 if (!dir_ts) { 3239 continue; 3240 } 3241 op->args[i] = temp_arg(dir_ts); 3242 changes = true; 3243 3244 /* The output is now live and modified. */ 3245 arg_ts->state = 0; 3246 3247 /* Sync outputs upon their last write. */ 3248 if (NEED_SYNC_ARG(i)) { 3249 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3250 ? INDEX_op_st_i32 3251 : INDEX_op_st_i64); 3252 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 3253 3254 sop->args[0] = temp_arg(dir_ts); 3255 sop->args[1] = temp_arg(arg_ts->mem_base); 3256 sop->args[2] = arg_ts->mem_offset; 3257 3258 arg_ts->state = TS_MEM; 3259 } 3260 /* Drop outputs that are dead. */ 3261 if (IS_DEAD_ARG(i)) { 3262 arg_ts->state = TS_DEAD; 3263 } 3264 } 3265 } 3266 } 3267 3268 return changes; 3269 } 3270 3271 #ifdef CONFIG_DEBUG_TCG 3272 static void dump_regs(TCGContext *s) 3273 { 3274 TCGTemp *ts; 3275 int i; 3276 char buf[64]; 3277 3278 for(i = 0; i < s->nb_temps; i++) { 3279 ts = &s->temps[i]; 3280 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3281 switch(ts->val_type) { 3282 case TEMP_VAL_REG: 3283 printf("%s", tcg_target_reg_names[ts->reg]); 3284 break; 3285 case TEMP_VAL_MEM: 3286 printf("%d(%s)", (int)ts->mem_offset, 3287 tcg_target_reg_names[ts->mem_base->reg]); 3288 break; 3289 case TEMP_VAL_CONST: 3290 printf("$0x%" PRIx64, ts->val); 3291 break; 3292 case TEMP_VAL_DEAD: 3293 printf("D"); 3294 break; 3295 default: 3296 printf("???"); 3297 break; 3298 } 3299 printf("\n"); 3300 } 3301 3302 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 3303 if (s->reg_to_temp[i] != NULL) { 3304 printf("%s: %s\n", 3305 tcg_target_reg_names[i], 3306 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 3307 } 3308 } 3309 } 3310 3311 static void check_regs(TCGContext *s) 3312 { 3313 int reg; 3314 int k; 3315 TCGTemp *ts; 3316 char buf[64]; 3317 3318 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 3319 ts = s->reg_to_temp[reg]; 3320 if (ts != NULL) { 3321 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 3322 printf("Inconsistency for register %s:\n", 3323 tcg_target_reg_names[reg]); 3324 goto fail; 3325 } 3326 } 3327 } 3328 for (k = 0; k < s->nb_temps; k++) { 3329 ts = &s->temps[k]; 3330 if (ts->val_type == TEMP_VAL_REG 3331 && ts->kind != TEMP_FIXED 3332 && s->reg_to_temp[ts->reg] != ts) { 3333 printf("Inconsistency for temp %s:\n", 3334 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3335 fail: 3336 printf("reg state:\n"); 3337 dump_regs(s); 3338 tcg_abort(); 3339 } 3340 } 3341 } 3342 #endif 3343 3344 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3345 { 3346 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 3347 /* Sparc64 stack is accessed with offset of 2047 */ 3348 s->current_frame_offset = (s->current_frame_offset + 3349 (tcg_target_long)sizeof(tcg_target_long) - 1) & 3350 ~(sizeof(tcg_target_long) - 1); 3351 #endif 3352 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 3353 s->frame_end) { 3354 tcg_abort(); 3355 } 3356 ts->mem_offset = s->current_frame_offset; 3357 ts->mem_base = s->frame_temp; 3358 ts->mem_allocated = 1; 3359 s->current_frame_offset += sizeof(tcg_target_long); 3360 } 3361 3362 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3363 3364 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3365 mark it free; otherwise mark it dead. */ 3366 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3367 { 3368 TCGTempVal new_type; 3369 3370 switch (ts->kind) { 3371 case TEMP_FIXED: 3372 return; 3373 case TEMP_GLOBAL: 3374 case TEMP_LOCAL: 3375 new_type = TEMP_VAL_MEM; 3376 break; 3377 case TEMP_NORMAL: 3378 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3379 break; 3380 case TEMP_CONST: 3381 new_type = TEMP_VAL_CONST; 3382 break; 3383 default: 3384 g_assert_not_reached(); 3385 } 3386 if (ts->val_type == TEMP_VAL_REG) { 3387 s->reg_to_temp[ts->reg] = NULL; 3388 } 3389 ts->val_type = new_type; 3390 } 3391 3392 /* Mark a temporary as dead. */ 3393 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3394 { 3395 temp_free_or_dead(s, ts, 1); 3396 } 3397 3398 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3399 registers needs to be allocated to store a constant. If 'free_or_dead' 3400 is non-zero, subsequently release the temporary; if it is positive, the 3401 temp is dead; if it is negative, the temp is free. */ 3402 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3403 TCGRegSet preferred_regs, int free_or_dead) 3404 { 3405 if (!temp_readonly(ts) && !ts->mem_coherent) { 3406 if (!ts->mem_allocated) { 3407 temp_allocate_frame(s, ts); 3408 } 3409 switch (ts->val_type) { 3410 case TEMP_VAL_CONST: 3411 /* If we're going to free the temp immediately, then we won't 3412 require it later in a register, so attempt to store the 3413 constant to memory directly. */ 3414 if (free_or_dead 3415 && tcg_out_sti(s, ts->type, ts->val, 3416 ts->mem_base->reg, ts->mem_offset)) { 3417 break; 3418 } 3419 temp_load(s, ts, tcg_target_available_regs[ts->type], 3420 allocated_regs, preferred_regs); 3421 /* fallthrough */ 3422 3423 case TEMP_VAL_REG: 3424 tcg_out_st(s, ts->type, ts->reg, 3425 ts->mem_base->reg, ts->mem_offset); 3426 break; 3427 3428 case TEMP_VAL_MEM: 3429 break; 3430 3431 case TEMP_VAL_DEAD: 3432 default: 3433 tcg_abort(); 3434 } 3435 ts->mem_coherent = 1; 3436 } 3437 if (free_or_dead) { 3438 temp_free_or_dead(s, ts, free_or_dead); 3439 } 3440 } 3441 3442 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3443 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3444 { 3445 TCGTemp *ts = s->reg_to_temp[reg]; 3446 if (ts != NULL) { 3447 temp_sync(s, ts, allocated_regs, 0, -1); 3448 } 3449 } 3450 3451 /** 3452 * tcg_reg_alloc: 3453 * @required_regs: Set of registers in which we must allocate. 3454 * @allocated_regs: Set of registers which must be avoided. 3455 * @preferred_regs: Set of registers we should prefer. 3456 * @rev: True if we search the registers in "indirect" order. 3457 * 3458 * The allocated register must be in @required_regs & ~@allocated_regs, 3459 * but if we can put it in @preferred_regs we may save a move later. 3460 */ 3461 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3462 TCGRegSet allocated_regs, 3463 TCGRegSet preferred_regs, bool rev) 3464 { 3465 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3466 TCGRegSet reg_ct[2]; 3467 const int *order; 3468 3469 reg_ct[1] = required_regs & ~allocated_regs; 3470 tcg_debug_assert(reg_ct[1] != 0); 3471 reg_ct[0] = reg_ct[1] & preferred_regs; 3472 3473 /* Skip the preferred_regs option if it cannot be satisfied, 3474 or if the preference made no difference. */ 3475 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3476 3477 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3478 3479 /* Try free registers, preferences first. */ 3480 for (j = f; j < 2; j++) { 3481 TCGRegSet set = reg_ct[j]; 3482 3483 if (tcg_regset_single(set)) { 3484 /* One register in the set. */ 3485 TCGReg reg = tcg_regset_first(set); 3486 if (s->reg_to_temp[reg] == NULL) { 3487 return reg; 3488 } 3489 } else { 3490 for (i = 0; i < n; i++) { 3491 TCGReg reg = order[i]; 3492 if (s->reg_to_temp[reg] == NULL && 3493 tcg_regset_test_reg(set, reg)) { 3494 return reg; 3495 } 3496 } 3497 } 3498 } 3499 3500 /* We must spill something. */ 3501 for (j = f; j < 2; j++) { 3502 TCGRegSet set = reg_ct[j]; 3503 3504 if (tcg_regset_single(set)) { 3505 /* One register in the set. */ 3506 TCGReg reg = tcg_regset_first(set); 3507 tcg_reg_free(s, reg, allocated_regs); 3508 return reg; 3509 } else { 3510 for (i = 0; i < n; i++) { 3511 TCGReg reg = order[i]; 3512 if (tcg_regset_test_reg(set, reg)) { 3513 tcg_reg_free(s, reg, allocated_regs); 3514 return reg; 3515 } 3516 } 3517 } 3518 } 3519 3520 tcg_abort(); 3521 } 3522 3523 /* Make sure the temporary is in a register. If needed, allocate the register 3524 from DESIRED while avoiding ALLOCATED. */ 3525 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3526 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3527 { 3528 TCGReg reg; 3529 3530 switch (ts->val_type) { 3531 case TEMP_VAL_REG: 3532 return; 3533 case TEMP_VAL_CONST: 3534 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3535 preferred_regs, ts->indirect_base); 3536 if (ts->type <= TCG_TYPE_I64) { 3537 tcg_out_movi(s, ts->type, reg, ts->val); 3538 } else { 3539 uint64_t val = ts->val; 3540 MemOp vece = MO_64; 3541 3542 /* 3543 * Find the minimal vector element that matches the constant. 3544 * The targets will, in general, have to do this search anyway, 3545 * do this generically. 3546 */ 3547 if (val == dup_const(MO_8, val)) { 3548 vece = MO_8; 3549 } else if (val == dup_const(MO_16, val)) { 3550 vece = MO_16; 3551 } else if (val == dup_const(MO_32, val)) { 3552 vece = MO_32; 3553 } 3554 3555 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 3556 } 3557 ts->mem_coherent = 0; 3558 break; 3559 case TEMP_VAL_MEM: 3560 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3561 preferred_regs, ts->indirect_base); 3562 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3563 ts->mem_coherent = 1; 3564 break; 3565 case TEMP_VAL_DEAD: 3566 default: 3567 tcg_abort(); 3568 } 3569 ts->reg = reg; 3570 ts->val_type = TEMP_VAL_REG; 3571 s->reg_to_temp[reg] = ts; 3572 } 3573 3574 /* Save a temporary to memory. 'allocated_regs' is used in case a 3575 temporary registers needs to be allocated to store a constant. */ 3576 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3577 { 3578 /* The liveness analysis already ensures that globals are back 3579 in memory. Keep an tcg_debug_assert for safety. */ 3580 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 3581 } 3582 3583 /* save globals to their canonical location and assume they can be 3584 modified be the following code. 'allocated_regs' is used in case a 3585 temporary registers needs to be allocated to store a constant. */ 3586 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3587 { 3588 int i, n; 3589 3590 for (i = 0, n = s->nb_globals; i < n; i++) { 3591 temp_save(s, &s->temps[i], allocated_regs); 3592 } 3593 } 3594 3595 /* sync globals to their canonical location and assume they can be 3596 read by the following code. 'allocated_regs' is used in case a 3597 temporary registers needs to be allocated to store a constant. */ 3598 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3599 { 3600 int i, n; 3601 3602 for (i = 0, n = s->nb_globals; i < n; i++) { 3603 TCGTemp *ts = &s->temps[i]; 3604 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3605 || ts->kind == TEMP_FIXED 3606 || ts->mem_coherent); 3607 } 3608 } 3609 3610 /* at the end of a basic block, we assume all temporaries are dead and 3611 all globals are stored at their canonical location. */ 3612 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3613 { 3614 int i; 3615 3616 for (i = s->nb_globals; i < s->nb_temps; i++) { 3617 TCGTemp *ts = &s->temps[i]; 3618 3619 switch (ts->kind) { 3620 case TEMP_LOCAL: 3621 temp_save(s, ts, allocated_regs); 3622 break; 3623 case TEMP_NORMAL: 3624 /* The liveness analysis already ensures that temps are dead. 3625 Keep an tcg_debug_assert for safety. */ 3626 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3627 break; 3628 case TEMP_CONST: 3629 /* Similarly, we should have freed any allocated register. */ 3630 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 3631 break; 3632 default: 3633 g_assert_not_reached(); 3634 } 3635 } 3636 3637 save_globals(s, allocated_regs); 3638 } 3639 3640 /* 3641 * At a conditional branch, we assume all temporaries are dead and 3642 * all globals and local temps are synced to their location. 3643 */ 3644 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 3645 { 3646 sync_globals(s, allocated_regs); 3647 3648 for (int i = s->nb_globals; i < s->nb_temps; i++) { 3649 TCGTemp *ts = &s->temps[i]; 3650 /* 3651 * The liveness analysis already ensures that temps are dead. 3652 * Keep tcg_debug_asserts for safety. 3653 */ 3654 switch (ts->kind) { 3655 case TEMP_LOCAL: 3656 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 3657 break; 3658 case TEMP_NORMAL: 3659 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3660 break; 3661 case TEMP_CONST: 3662 break; 3663 default: 3664 g_assert_not_reached(); 3665 } 3666 } 3667 } 3668 3669 /* 3670 * Specialized code generation for INDEX_op_mov_* with a constant. 3671 */ 3672 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3673 tcg_target_ulong val, TCGLifeData arg_life, 3674 TCGRegSet preferred_regs) 3675 { 3676 /* ENV should not be modified. */ 3677 tcg_debug_assert(!temp_readonly(ots)); 3678 3679 /* The movi is not explicitly generated here. */ 3680 if (ots->val_type == TEMP_VAL_REG) { 3681 s->reg_to_temp[ots->reg] = NULL; 3682 } 3683 ots->val_type = TEMP_VAL_CONST; 3684 ots->val = val; 3685 ots->mem_coherent = 0; 3686 if (NEED_SYNC_ARG(0)) { 3687 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3688 } else if (IS_DEAD_ARG(0)) { 3689 temp_dead(s, ots); 3690 } 3691 } 3692 3693 /* 3694 * Specialized code generation for INDEX_op_mov_*. 3695 */ 3696 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3697 { 3698 const TCGLifeData arg_life = op->life; 3699 TCGRegSet allocated_regs, preferred_regs; 3700 TCGTemp *ts, *ots; 3701 TCGType otype, itype; 3702 3703 allocated_regs = s->reserved_regs; 3704 preferred_regs = op->output_pref[0]; 3705 ots = arg_temp(op->args[0]); 3706 ts = arg_temp(op->args[1]); 3707 3708 /* ENV should not be modified. */ 3709 tcg_debug_assert(!temp_readonly(ots)); 3710 3711 /* Note that otype != itype for no-op truncation. */ 3712 otype = ots->type; 3713 itype = ts->type; 3714 3715 if (ts->val_type == TEMP_VAL_CONST) { 3716 /* propagate constant or generate sti */ 3717 tcg_target_ulong val = ts->val; 3718 if (IS_DEAD_ARG(1)) { 3719 temp_dead(s, ts); 3720 } 3721 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3722 return; 3723 } 3724 3725 /* If the source value is in memory we're going to be forced 3726 to have it in a register in order to perform the copy. Copy 3727 the SOURCE value into its own register first, that way we 3728 don't have to reload SOURCE the next time it is used. */ 3729 if (ts->val_type == TEMP_VAL_MEM) { 3730 temp_load(s, ts, tcg_target_available_regs[itype], 3731 allocated_regs, preferred_regs); 3732 } 3733 3734 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3735 if (IS_DEAD_ARG(0)) { 3736 /* mov to a non-saved dead register makes no sense (even with 3737 liveness analysis disabled). */ 3738 tcg_debug_assert(NEED_SYNC_ARG(0)); 3739 if (!ots->mem_allocated) { 3740 temp_allocate_frame(s, ots); 3741 } 3742 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 3743 if (IS_DEAD_ARG(1)) { 3744 temp_dead(s, ts); 3745 } 3746 temp_dead(s, ots); 3747 } else { 3748 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 3749 /* the mov can be suppressed */ 3750 if (ots->val_type == TEMP_VAL_REG) { 3751 s->reg_to_temp[ots->reg] = NULL; 3752 } 3753 ots->reg = ts->reg; 3754 temp_dead(s, ts); 3755 } else { 3756 if (ots->val_type != TEMP_VAL_REG) { 3757 /* When allocating a new register, make sure to not spill the 3758 input one. */ 3759 tcg_regset_set_reg(allocated_regs, ts->reg); 3760 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3761 allocated_regs, preferred_regs, 3762 ots->indirect_base); 3763 } 3764 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) { 3765 /* 3766 * Cross register class move not supported. 3767 * Store the source register into the destination slot 3768 * and leave the destination temp as TEMP_VAL_MEM. 3769 */ 3770 assert(!temp_readonly(ots)); 3771 if (!ts->mem_allocated) { 3772 temp_allocate_frame(s, ots); 3773 } 3774 tcg_out_st(s, ts->type, ts->reg, 3775 ots->mem_base->reg, ots->mem_offset); 3776 ots->mem_coherent = 1; 3777 temp_free_or_dead(s, ots, -1); 3778 return; 3779 } 3780 } 3781 ots->val_type = TEMP_VAL_REG; 3782 ots->mem_coherent = 0; 3783 s->reg_to_temp[ots->reg] = ots; 3784 if (NEED_SYNC_ARG(0)) { 3785 temp_sync(s, ots, allocated_regs, 0, 0); 3786 } 3787 } 3788 } 3789 3790 /* 3791 * Specialized code generation for INDEX_op_dup_vec. 3792 */ 3793 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 3794 { 3795 const TCGLifeData arg_life = op->life; 3796 TCGRegSet dup_out_regs, dup_in_regs; 3797 TCGTemp *its, *ots; 3798 TCGType itype, vtype; 3799 intptr_t endian_fixup; 3800 unsigned vece; 3801 bool ok; 3802 3803 ots = arg_temp(op->args[0]); 3804 its = arg_temp(op->args[1]); 3805 3806 /* ENV should not be modified. */ 3807 tcg_debug_assert(!temp_readonly(ots)); 3808 3809 itype = its->type; 3810 vece = TCGOP_VECE(op); 3811 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 3812 3813 if (its->val_type == TEMP_VAL_CONST) { 3814 /* Propagate constant via movi -> dupi. */ 3815 tcg_target_ulong val = its->val; 3816 if (IS_DEAD_ARG(1)) { 3817 temp_dead(s, its); 3818 } 3819 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); 3820 return; 3821 } 3822 3823 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 3824 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 3825 3826 /* Allocate the output register now. */ 3827 if (ots->val_type != TEMP_VAL_REG) { 3828 TCGRegSet allocated_regs = s->reserved_regs; 3829 3830 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 3831 /* Make sure to not spill the input register. */ 3832 tcg_regset_set_reg(allocated_regs, its->reg); 3833 } 3834 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 3835 op->output_pref[0], ots->indirect_base); 3836 ots->val_type = TEMP_VAL_REG; 3837 ots->mem_coherent = 0; 3838 s->reg_to_temp[ots->reg] = ots; 3839 } 3840 3841 switch (its->val_type) { 3842 case TEMP_VAL_REG: 3843 /* 3844 * The dup constriaints must be broad, covering all possible VECE. 3845 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 3846 * to fail, indicating that extra moves are required for that case. 3847 */ 3848 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 3849 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 3850 goto done; 3851 } 3852 /* Try again from memory or a vector input register. */ 3853 } 3854 if (!its->mem_coherent) { 3855 /* 3856 * The input register is not synced, and so an extra store 3857 * would be required to use memory. Attempt an integer-vector 3858 * register move first. We do not have a TCGRegSet for this. 3859 */ 3860 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 3861 break; 3862 } 3863 /* Sync the temp back to its slot and load from there. */ 3864 temp_sync(s, its, s->reserved_regs, 0, 0); 3865 } 3866 /* fall through */ 3867 3868 case TEMP_VAL_MEM: 3869 #ifdef HOST_WORDS_BIGENDIAN 3870 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8; 3871 endian_fixup -= 1 << vece; 3872 #else 3873 endian_fixup = 0; 3874 #endif 3875 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 3876 its->mem_offset + endian_fixup)) { 3877 goto done; 3878 } 3879 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 3880 break; 3881 3882 default: 3883 g_assert_not_reached(); 3884 } 3885 3886 /* We now have a vector input register, so dup must succeed. */ 3887 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 3888 tcg_debug_assert(ok); 3889 3890 done: 3891 if (IS_DEAD_ARG(1)) { 3892 temp_dead(s, its); 3893 } 3894 if (NEED_SYNC_ARG(0)) { 3895 temp_sync(s, ots, s->reserved_regs, 0, 0); 3896 } 3897 if (IS_DEAD_ARG(0)) { 3898 temp_dead(s, ots); 3899 } 3900 } 3901 3902 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 3903 { 3904 const TCGLifeData arg_life = op->life; 3905 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 3906 TCGRegSet i_allocated_regs; 3907 TCGRegSet o_allocated_regs; 3908 int i, k, nb_iargs, nb_oargs; 3909 TCGReg reg; 3910 TCGArg arg; 3911 const TCGArgConstraint *arg_ct; 3912 TCGTemp *ts; 3913 TCGArg new_args[TCG_MAX_OP_ARGS]; 3914 int const_args[TCG_MAX_OP_ARGS]; 3915 3916 nb_oargs = def->nb_oargs; 3917 nb_iargs = def->nb_iargs; 3918 3919 /* copy constants */ 3920 memcpy(new_args + nb_oargs + nb_iargs, 3921 op->args + nb_oargs + nb_iargs, 3922 sizeof(TCGArg) * def->nb_cargs); 3923 3924 i_allocated_regs = s->reserved_regs; 3925 o_allocated_regs = s->reserved_regs; 3926 3927 /* satisfy input constraints */ 3928 for (k = 0; k < nb_iargs; k++) { 3929 TCGRegSet i_preferred_regs, o_preferred_regs; 3930 3931 i = def->args_ct[nb_oargs + k].sort_index; 3932 arg = op->args[i]; 3933 arg_ct = &def->args_ct[i]; 3934 ts = arg_temp(arg); 3935 3936 if (ts->val_type == TEMP_VAL_CONST 3937 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 3938 /* constant is OK for instruction */ 3939 const_args[i] = 1; 3940 new_args[i] = ts->val; 3941 continue; 3942 } 3943 3944 i_preferred_regs = o_preferred_regs = 0; 3945 if (arg_ct->ialias) { 3946 o_preferred_regs = op->output_pref[arg_ct->alias_index]; 3947 3948 /* 3949 * If the input is readonly, then it cannot also be an 3950 * output and aliased to itself. If the input is not 3951 * dead after the instruction, we must allocate a new 3952 * register and move it. 3953 */ 3954 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 3955 goto allocate_in_reg; 3956 } 3957 3958 /* 3959 * Check if the current register has already been allocated 3960 * for another input aliased to an output. 3961 */ 3962 if (ts->val_type == TEMP_VAL_REG) { 3963 reg = ts->reg; 3964 for (int k2 = 0; k2 < k; k2++) { 3965 int i2 = def->args_ct[nb_oargs + k2].sort_index; 3966 if (def->args_ct[i2].ialias && reg == new_args[i2]) { 3967 goto allocate_in_reg; 3968 } 3969 } 3970 } 3971 i_preferred_regs = o_preferred_regs; 3972 } 3973 3974 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); 3975 reg = ts->reg; 3976 3977 if (!tcg_regset_test_reg(arg_ct->regs, reg)) { 3978 allocate_in_reg: 3979 /* 3980 * Allocate a new register matching the constraint 3981 * and move the temporary register into it. 3982 */ 3983 temp_load(s, ts, tcg_target_available_regs[ts->type], 3984 i_allocated_regs, 0); 3985 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, 3986 o_preferred_regs, ts->indirect_base); 3987 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 3988 /* 3989 * Cross register class move not supported. Sync the 3990 * temp back to its slot and load from there. 3991 */ 3992 temp_sync(s, ts, i_allocated_regs, 0, 0); 3993 tcg_out_ld(s, ts->type, reg, 3994 ts->mem_base->reg, ts->mem_offset); 3995 } 3996 } 3997 new_args[i] = reg; 3998 const_args[i] = 0; 3999 tcg_regset_set_reg(i_allocated_regs, reg); 4000 } 4001 4002 /* mark dead temporaries and free the associated registers */ 4003 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4004 if (IS_DEAD_ARG(i)) { 4005 temp_dead(s, arg_temp(op->args[i])); 4006 } 4007 } 4008 4009 if (def->flags & TCG_OPF_COND_BRANCH) { 4010 tcg_reg_alloc_cbranch(s, i_allocated_regs); 4011 } else if (def->flags & TCG_OPF_BB_END) { 4012 tcg_reg_alloc_bb_end(s, i_allocated_regs); 4013 } else { 4014 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4015 /* XXX: permit generic clobber register list ? */ 4016 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4017 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4018 tcg_reg_free(s, i, i_allocated_regs); 4019 } 4020 } 4021 } 4022 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4023 /* sync globals if the op has side effects and might trigger 4024 an exception. */ 4025 sync_globals(s, i_allocated_regs); 4026 } 4027 4028 /* satisfy the output constraints */ 4029 for(k = 0; k < nb_oargs; k++) { 4030 i = def->args_ct[k].sort_index; 4031 arg = op->args[i]; 4032 arg_ct = &def->args_ct[i]; 4033 ts = arg_temp(arg); 4034 4035 /* ENV should not be modified. */ 4036 tcg_debug_assert(!temp_readonly(ts)); 4037 4038 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 4039 reg = new_args[arg_ct->alias_index]; 4040 } else if (arg_ct->newreg) { 4041 reg = tcg_reg_alloc(s, arg_ct->regs, 4042 i_allocated_regs | o_allocated_regs, 4043 op->output_pref[k], ts->indirect_base); 4044 } else { 4045 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 4046 op->output_pref[k], ts->indirect_base); 4047 } 4048 tcg_regset_set_reg(o_allocated_regs, reg); 4049 if (ts->val_type == TEMP_VAL_REG) { 4050 s->reg_to_temp[ts->reg] = NULL; 4051 } 4052 ts->val_type = TEMP_VAL_REG; 4053 ts->reg = reg; 4054 /* 4055 * Temp value is modified, so the value kept in memory is 4056 * potentially not the same. 4057 */ 4058 ts->mem_coherent = 0; 4059 s->reg_to_temp[reg] = ts; 4060 new_args[i] = reg; 4061 } 4062 } 4063 4064 /* emit instruction */ 4065 if (def->flags & TCG_OPF_VECTOR) { 4066 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 4067 new_args, const_args); 4068 } else { 4069 tcg_out_op(s, op->opc, new_args, const_args); 4070 } 4071 4072 /* move the outputs in the correct register if needed */ 4073 for(i = 0; i < nb_oargs; i++) { 4074 ts = arg_temp(op->args[i]); 4075 4076 /* ENV should not be modified. */ 4077 tcg_debug_assert(!temp_readonly(ts)); 4078 4079 if (NEED_SYNC_ARG(i)) { 4080 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 4081 } else if (IS_DEAD_ARG(i)) { 4082 temp_dead(s, ts); 4083 } 4084 } 4085 } 4086 4087 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 4088 { 4089 const TCGLifeData arg_life = op->life; 4090 TCGTemp *ots, *itsl, *itsh; 4091 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4092 4093 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 4094 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 4095 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 4096 4097 ots = arg_temp(op->args[0]); 4098 itsl = arg_temp(op->args[1]); 4099 itsh = arg_temp(op->args[2]); 4100 4101 /* ENV should not be modified. */ 4102 tcg_debug_assert(!temp_readonly(ots)); 4103 4104 /* Allocate the output register now. */ 4105 if (ots->val_type != TEMP_VAL_REG) { 4106 TCGRegSet allocated_regs = s->reserved_regs; 4107 TCGRegSet dup_out_regs = 4108 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4109 4110 /* Make sure to not spill the input registers. */ 4111 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 4112 tcg_regset_set_reg(allocated_regs, itsl->reg); 4113 } 4114 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 4115 tcg_regset_set_reg(allocated_regs, itsh->reg); 4116 } 4117 4118 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4119 op->output_pref[0], ots->indirect_base); 4120 ots->val_type = TEMP_VAL_REG; 4121 ots->mem_coherent = 0; 4122 s->reg_to_temp[ots->reg] = ots; 4123 } 4124 4125 /* Promote dup2 of immediates to dupi_vec. */ 4126 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 4127 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 4128 MemOp vece = MO_64; 4129 4130 if (val == dup_const(MO_8, val)) { 4131 vece = MO_8; 4132 } else if (val == dup_const(MO_16, val)) { 4133 vece = MO_16; 4134 } else if (val == dup_const(MO_32, val)) { 4135 vece = MO_32; 4136 } 4137 4138 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 4139 goto done; 4140 } 4141 4142 /* If the two inputs form one 64-bit value, try dupm_vec. */ 4143 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) { 4144 if (!itsl->mem_coherent) { 4145 temp_sync(s, itsl, s->reserved_regs, 0, 0); 4146 } 4147 if (!itsh->mem_coherent) { 4148 temp_sync(s, itsh, s->reserved_regs, 0, 0); 4149 } 4150 #ifdef HOST_WORDS_BIGENDIAN 4151 TCGTemp *its = itsh; 4152 #else 4153 TCGTemp *its = itsl; 4154 #endif 4155 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 4156 its->mem_base->reg, its->mem_offset)) { 4157 goto done; 4158 } 4159 } 4160 4161 /* Fall back to generic expansion. */ 4162 return false; 4163 4164 done: 4165 if (IS_DEAD_ARG(1)) { 4166 temp_dead(s, itsl); 4167 } 4168 if (IS_DEAD_ARG(2)) { 4169 temp_dead(s, itsh); 4170 } 4171 if (NEED_SYNC_ARG(0)) { 4172 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 4173 } else if (IS_DEAD_ARG(0)) { 4174 temp_dead(s, ots); 4175 } 4176 return true; 4177 } 4178 4179 #ifdef TCG_TARGET_STACK_GROWSUP 4180 #define STACK_DIR(x) (-(x)) 4181 #else 4182 #define STACK_DIR(x) (x) 4183 #endif 4184 4185 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 4186 { 4187 const int nb_oargs = TCGOP_CALLO(op); 4188 const int nb_iargs = TCGOP_CALLI(op); 4189 const TCGLifeData arg_life = op->life; 4190 int flags, nb_regs, i; 4191 TCGReg reg; 4192 TCGArg arg; 4193 TCGTemp *ts; 4194 intptr_t stack_offset; 4195 size_t call_stack_size; 4196 tcg_insn_unit *func_addr; 4197 int allocate_args; 4198 TCGRegSet allocated_regs; 4199 4200 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; 4201 flags = op->args[nb_oargs + nb_iargs + 1]; 4202 4203 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 4204 if (nb_regs > nb_iargs) { 4205 nb_regs = nb_iargs; 4206 } 4207 4208 /* assign stack slots first */ 4209 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 4210 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 4211 ~(TCG_TARGET_STACK_ALIGN - 1); 4212 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 4213 if (allocate_args) { 4214 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 4215 preallocate call stack */ 4216 tcg_abort(); 4217 } 4218 4219 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 4220 for (i = nb_regs; i < nb_iargs; i++) { 4221 arg = op->args[nb_oargs + i]; 4222 #ifdef TCG_TARGET_STACK_GROWSUP 4223 stack_offset -= sizeof(tcg_target_long); 4224 #endif 4225 if (arg != TCG_CALL_DUMMY_ARG) { 4226 ts = arg_temp(arg); 4227 temp_load(s, ts, tcg_target_available_regs[ts->type], 4228 s->reserved_regs, 0); 4229 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 4230 } 4231 #ifndef TCG_TARGET_STACK_GROWSUP 4232 stack_offset += sizeof(tcg_target_long); 4233 #endif 4234 } 4235 4236 /* assign input registers */ 4237 allocated_regs = s->reserved_regs; 4238 for (i = 0; i < nb_regs; i++) { 4239 arg = op->args[nb_oargs + i]; 4240 if (arg != TCG_CALL_DUMMY_ARG) { 4241 ts = arg_temp(arg); 4242 reg = tcg_target_call_iarg_regs[i]; 4243 4244 if (ts->val_type == TEMP_VAL_REG) { 4245 if (ts->reg != reg) { 4246 tcg_reg_free(s, reg, allocated_regs); 4247 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4248 /* 4249 * Cross register class move not supported. Sync the 4250 * temp back to its slot and load from there. 4251 */ 4252 temp_sync(s, ts, allocated_regs, 0, 0); 4253 tcg_out_ld(s, ts->type, reg, 4254 ts->mem_base->reg, ts->mem_offset); 4255 } 4256 } 4257 } else { 4258 TCGRegSet arg_set = 0; 4259 4260 tcg_reg_free(s, reg, allocated_regs); 4261 tcg_regset_set_reg(arg_set, reg); 4262 temp_load(s, ts, arg_set, allocated_regs, 0); 4263 } 4264 4265 tcg_regset_set_reg(allocated_regs, reg); 4266 } 4267 } 4268 4269 /* mark dead temporaries and free the associated registers */ 4270 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4271 if (IS_DEAD_ARG(i)) { 4272 temp_dead(s, arg_temp(op->args[i])); 4273 } 4274 } 4275 4276 /* clobber call registers */ 4277 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4278 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4279 tcg_reg_free(s, i, allocated_regs); 4280 } 4281 } 4282 4283 /* Save globals if they might be written by the helper, sync them if 4284 they might be read. */ 4285 if (flags & TCG_CALL_NO_READ_GLOBALS) { 4286 /* Nothing to do */ 4287 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 4288 sync_globals(s, allocated_regs); 4289 } else { 4290 save_globals(s, allocated_regs); 4291 } 4292 4293 tcg_out_call(s, func_addr); 4294 4295 /* assign output registers and emit moves if needed */ 4296 for(i = 0; i < nb_oargs; i++) { 4297 arg = op->args[i]; 4298 ts = arg_temp(arg); 4299 4300 /* ENV should not be modified. */ 4301 tcg_debug_assert(!temp_readonly(ts)); 4302 4303 reg = tcg_target_call_oarg_regs[i]; 4304 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4305 if (ts->val_type == TEMP_VAL_REG) { 4306 s->reg_to_temp[ts->reg] = NULL; 4307 } 4308 ts->val_type = TEMP_VAL_REG; 4309 ts->reg = reg; 4310 ts->mem_coherent = 0; 4311 s->reg_to_temp[reg] = ts; 4312 if (NEED_SYNC_ARG(i)) { 4313 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); 4314 } else if (IS_DEAD_ARG(i)) { 4315 temp_dead(s, ts); 4316 } 4317 } 4318 } 4319 4320 #ifdef CONFIG_PROFILER 4321 4322 /* avoid copy/paste errors */ 4323 #define PROF_ADD(to, from, field) \ 4324 do { \ 4325 (to)->field += qatomic_read(&((from)->field)); \ 4326 } while (0) 4327 4328 #define PROF_MAX(to, from, field) \ 4329 do { \ 4330 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4331 if (val__ > (to)->field) { \ 4332 (to)->field = val__; \ 4333 } \ 4334 } while (0) 4335 4336 /* Pass in a zero'ed @prof */ 4337 static inline 4338 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 4339 { 4340 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 4341 unsigned int i; 4342 4343 for (i = 0; i < n_ctxs; i++) { 4344 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4345 const TCGProfile *orig = &s->prof; 4346 4347 if (counters) { 4348 PROF_ADD(prof, orig, cpu_exec_time); 4349 PROF_ADD(prof, orig, tb_count1); 4350 PROF_ADD(prof, orig, tb_count); 4351 PROF_ADD(prof, orig, op_count); 4352 PROF_MAX(prof, orig, op_count_max); 4353 PROF_ADD(prof, orig, temp_count); 4354 PROF_MAX(prof, orig, temp_count_max); 4355 PROF_ADD(prof, orig, del_op_count); 4356 PROF_ADD(prof, orig, code_in_len); 4357 PROF_ADD(prof, orig, code_out_len); 4358 PROF_ADD(prof, orig, search_out_len); 4359 PROF_ADD(prof, orig, interm_time); 4360 PROF_ADD(prof, orig, code_time); 4361 PROF_ADD(prof, orig, la_time); 4362 PROF_ADD(prof, orig, opt_time); 4363 PROF_ADD(prof, orig, restore_count); 4364 PROF_ADD(prof, orig, restore_time); 4365 } 4366 if (table) { 4367 int i; 4368 4369 for (i = 0; i < NB_OPS; i++) { 4370 PROF_ADD(prof, orig, table_op_count[i]); 4371 } 4372 } 4373 } 4374 } 4375 4376 #undef PROF_ADD 4377 #undef PROF_MAX 4378 4379 static void tcg_profile_snapshot_counters(TCGProfile *prof) 4380 { 4381 tcg_profile_snapshot(prof, true, false); 4382 } 4383 4384 static void tcg_profile_snapshot_table(TCGProfile *prof) 4385 { 4386 tcg_profile_snapshot(prof, false, true); 4387 } 4388 4389 void tcg_dump_op_count(void) 4390 { 4391 TCGProfile prof = {}; 4392 int i; 4393 4394 tcg_profile_snapshot_table(&prof); 4395 for (i = 0; i < NB_OPS; i++) { 4396 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name, 4397 prof.table_op_count[i]); 4398 } 4399 } 4400 4401 int64_t tcg_cpu_exec_time(void) 4402 { 4403 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 4404 unsigned int i; 4405 int64_t ret = 0; 4406 4407 for (i = 0; i < n_ctxs; i++) { 4408 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4409 const TCGProfile *prof = &s->prof; 4410 4411 ret += qatomic_read(&prof->cpu_exec_time); 4412 } 4413 return ret; 4414 } 4415 #else 4416 void tcg_dump_op_count(void) 4417 { 4418 qemu_printf("[TCG profiler not compiled]\n"); 4419 } 4420 4421 int64_t tcg_cpu_exec_time(void) 4422 { 4423 error_report("%s: TCG profiler not compiled", __func__); 4424 exit(EXIT_FAILURE); 4425 } 4426 #endif 4427 4428 4429 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 4430 { 4431 #ifdef CONFIG_PROFILER 4432 TCGProfile *prof = &s->prof; 4433 #endif 4434 int i, num_insns; 4435 TCGOp *op; 4436 4437 #ifdef CONFIG_PROFILER 4438 { 4439 int n = 0; 4440 4441 QTAILQ_FOREACH(op, &s->ops, link) { 4442 n++; 4443 } 4444 qatomic_set(&prof->op_count, prof->op_count + n); 4445 if (n > prof->op_count_max) { 4446 qatomic_set(&prof->op_count_max, n); 4447 } 4448 4449 n = s->nb_temps; 4450 qatomic_set(&prof->temp_count, prof->temp_count + n); 4451 if (n > prof->temp_count_max) { 4452 qatomic_set(&prof->temp_count_max, n); 4453 } 4454 } 4455 #endif 4456 4457 #ifdef DEBUG_DISAS 4458 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4459 && qemu_log_in_addr_range(tb->pc))) { 4460 FILE *logfile = qemu_log_lock(); 4461 qemu_log("OP:\n"); 4462 tcg_dump_ops(s, false); 4463 qemu_log("\n"); 4464 qemu_log_unlock(logfile); 4465 } 4466 #endif 4467 4468 #ifdef CONFIG_DEBUG_TCG 4469 /* Ensure all labels referenced have been emitted. */ 4470 { 4471 TCGLabel *l; 4472 bool error = false; 4473 4474 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4475 if (unlikely(!l->present) && l->refs) { 4476 qemu_log_mask(CPU_LOG_TB_OP, 4477 "$L%d referenced but not present.\n", l->id); 4478 error = true; 4479 } 4480 } 4481 assert(!error); 4482 } 4483 #endif 4484 4485 #ifdef CONFIG_PROFILER 4486 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 4487 #endif 4488 4489 #ifdef USE_TCG_OPTIMIZATIONS 4490 tcg_optimize(s); 4491 #endif 4492 4493 #ifdef CONFIG_PROFILER 4494 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 4495 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 4496 #endif 4497 4498 reachable_code_pass(s); 4499 liveness_pass_1(s); 4500 4501 if (s->nb_indirects > 0) { 4502 #ifdef DEBUG_DISAS 4503 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 4504 && qemu_log_in_addr_range(tb->pc))) { 4505 FILE *logfile = qemu_log_lock(); 4506 qemu_log("OP before indirect lowering:\n"); 4507 tcg_dump_ops(s, false); 4508 qemu_log("\n"); 4509 qemu_log_unlock(logfile); 4510 } 4511 #endif 4512 /* Replace indirect temps with direct temps. */ 4513 if (liveness_pass_2(s)) { 4514 /* If changes were made, re-run liveness. */ 4515 liveness_pass_1(s); 4516 } 4517 } 4518 4519 #ifdef CONFIG_PROFILER 4520 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 4521 #endif 4522 4523 #ifdef DEBUG_DISAS 4524 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 4525 && qemu_log_in_addr_range(tb->pc))) { 4526 FILE *logfile = qemu_log_lock(); 4527 qemu_log("OP after optimization and liveness analysis:\n"); 4528 tcg_dump_ops(s, true); 4529 qemu_log("\n"); 4530 qemu_log_unlock(logfile); 4531 } 4532 #endif 4533 4534 tcg_reg_alloc_start(s); 4535 4536 /* 4537 * Reset the buffer pointers when restarting after overflow. 4538 * TODO: Move this into translate-all.c with the rest of the 4539 * buffer management. Having only this done here is confusing. 4540 */ 4541 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 4542 s->code_ptr = s->code_buf; 4543 4544 #ifdef TCG_TARGET_NEED_LDST_LABELS 4545 QSIMPLEQ_INIT(&s->ldst_labels); 4546 #endif 4547 #ifdef TCG_TARGET_NEED_POOL_LABELS 4548 s->pool_labels = NULL; 4549 #endif 4550 4551 num_insns = -1; 4552 QTAILQ_FOREACH(op, &s->ops, link) { 4553 TCGOpcode opc = op->opc; 4554 4555 #ifdef CONFIG_PROFILER 4556 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 4557 #endif 4558 4559 switch (opc) { 4560 case INDEX_op_mov_i32: 4561 case INDEX_op_mov_i64: 4562 case INDEX_op_mov_vec: 4563 tcg_reg_alloc_mov(s, op); 4564 break; 4565 case INDEX_op_dup_vec: 4566 tcg_reg_alloc_dup(s, op); 4567 break; 4568 case INDEX_op_insn_start: 4569 if (num_insns >= 0) { 4570 size_t off = tcg_current_code_size(s); 4571 s->gen_insn_end_off[num_insns] = off; 4572 /* Assert that we do not overflow our stored offset. */ 4573 assert(s->gen_insn_end_off[num_insns] == off); 4574 } 4575 num_insns++; 4576 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 4577 target_ulong a; 4578 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 4579 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 4580 #else 4581 a = op->args[i]; 4582 #endif 4583 s->gen_insn_data[num_insns][i] = a; 4584 } 4585 break; 4586 case INDEX_op_discard: 4587 temp_dead(s, arg_temp(op->args[0])); 4588 break; 4589 case INDEX_op_set_label: 4590 tcg_reg_alloc_bb_end(s, s->reserved_regs); 4591 tcg_out_label(s, arg_label(op->args[0])); 4592 break; 4593 case INDEX_op_call: 4594 tcg_reg_alloc_call(s, op); 4595 break; 4596 case INDEX_op_dup2_vec: 4597 if (tcg_reg_alloc_dup2(s, op)) { 4598 break; 4599 } 4600 /* fall through */ 4601 default: 4602 /* Sanity check that we've not introduced any unhandled opcodes. */ 4603 tcg_debug_assert(tcg_op_supported(opc)); 4604 /* Note: in order to speed up the code, it would be much 4605 faster to have specialized register allocator functions for 4606 some common argument patterns */ 4607 tcg_reg_alloc_op(s, op); 4608 break; 4609 } 4610 #ifdef CONFIG_DEBUG_TCG 4611 check_regs(s); 4612 #endif 4613 /* Test for (pending) buffer overflow. The assumption is that any 4614 one operation beginning below the high water mark cannot overrun 4615 the buffer completely. Thus we can test for overflow after 4616 generating code without having to check during generation. */ 4617 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 4618 return -1; 4619 } 4620 /* Test for TB overflow, as seen by gen_insn_end_off. */ 4621 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 4622 return -2; 4623 } 4624 } 4625 tcg_debug_assert(num_insns >= 0); 4626 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 4627 4628 /* Generate TB finalization at the end of block */ 4629 #ifdef TCG_TARGET_NEED_LDST_LABELS 4630 i = tcg_out_ldst_finalize(s); 4631 if (i < 0) { 4632 return i; 4633 } 4634 #endif 4635 #ifdef TCG_TARGET_NEED_POOL_LABELS 4636 i = tcg_out_pool_finalize(s); 4637 if (i < 0) { 4638 return i; 4639 } 4640 #endif 4641 if (!tcg_resolve_relocs(s)) { 4642 return -2; 4643 } 4644 4645 #ifndef CONFIG_TCG_INTERPRETER 4646 /* flush instruction cache */ 4647 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 4648 (uintptr_t)s->code_buf, 4649 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 4650 #endif 4651 4652 return tcg_current_code_size(s); 4653 } 4654 4655 #ifdef CONFIG_PROFILER 4656 void tcg_dump_info(void) 4657 { 4658 TCGProfile prof = {}; 4659 const TCGProfile *s; 4660 int64_t tb_count; 4661 int64_t tb_div_count; 4662 int64_t tot; 4663 4664 tcg_profile_snapshot_counters(&prof); 4665 s = &prof; 4666 tb_count = s->tb_count; 4667 tb_div_count = tb_count ? tb_count : 1; 4668 tot = s->interm_time + s->code_time; 4669 4670 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 4671 tot, tot / 2.4e9); 4672 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64 4673 " %0.1f%%)\n", 4674 tb_count, s->tb_count1 - tb_count, 4675 (double)(s->tb_count1 - s->tb_count) 4676 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 4677 qemu_printf("avg ops/TB %0.1f max=%d\n", 4678 (double)s->op_count / tb_div_count, s->op_count_max); 4679 qemu_printf("deleted ops/TB %0.2f\n", 4680 (double)s->del_op_count / tb_div_count); 4681 qemu_printf("avg temps/TB %0.2f max=%d\n", 4682 (double)s->temp_count / tb_div_count, s->temp_count_max); 4683 qemu_printf("avg host code/TB %0.1f\n", 4684 (double)s->code_out_len / tb_div_count); 4685 qemu_printf("avg search data/TB %0.1f\n", 4686 (double)s->search_out_len / tb_div_count); 4687 4688 qemu_printf("cycles/op %0.1f\n", 4689 s->op_count ? (double)tot / s->op_count : 0); 4690 qemu_printf("cycles/in byte %0.1f\n", 4691 s->code_in_len ? (double)tot / s->code_in_len : 0); 4692 qemu_printf("cycles/out byte %0.1f\n", 4693 s->code_out_len ? (double)tot / s->code_out_len : 0); 4694 qemu_printf("cycles/search byte %0.1f\n", 4695 s->search_out_len ? (double)tot / s->search_out_len : 0); 4696 if (tot == 0) { 4697 tot = 1; 4698 } 4699 qemu_printf(" gen_interm time %0.1f%%\n", 4700 (double)s->interm_time / tot * 100.0); 4701 qemu_printf(" gen_code time %0.1f%%\n", 4702 (double)s->code_time / tot * 100.0); 4703 qemu_printf("optim./code time %0.1f%%\n", 4704 (double)s->opt_time / (s->code_time ? s->code_time : 1) 4705 * 100.0); 4706 qemu_printf("liveness/code time %0.1f%%\n", 4707 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 4708 qemu_printf("cpu_restore count %" PRId64 "\n", 4709 s->restore_count); 4710 qemu_printf(" avg cycles %0.1f\n", 4711 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 4712 } 4713 #else 4714 void tcg_dump_info(void) 4715 { 4716 qemu_printf("[TCG profiler not compiled]\n"); 4717 } 4718 #endif 4719 4720 #ifdef ELF_HOST_MACHINE 4721 /* In order to use this feature, the backend needs to do three things: 4722 4723 (1) Define ELF_HOST_MACHINE to indicate both what value to 4724 put into the ELF image and to indicate support for the feature. 4725 4726 (2) Define tcg_register_jit. This should create a buffer containing 4727 the contents of a .debug_frame section that describes the post- 4728 prologue unwind info for the tcg machine. 4729 4730 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 4731 */ 4732 4733 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 4734 typedef enum { 4735 JIT_NOACTION = 0, 4736 JIT_REGISTER_FN, 4737 JIT_UNREGISTER_FN 4738 } jit_actions_t; 4739 4740 struct jit_code_entry { 4741 struct jit_code_entry *next_entry; 4742 struct jit_code_entry *prev_entry; 4743 const void *symfile_addr; 4744 uint64_t symfile_size; 4745 }; 4746 4747 struct jit_descriptor { 4748 uint32_t version; 4749 uint32_t action_flag; 4750 struct jit_code_entry *relevant_entry; 4751 struct jit_code_entry *first_entry; 4752 }; 4753 4754 void __jit_debug_register_code(void) __attribute__((noinline)); 4755 void __jit_debug_register_code(void) 4756 { 4757 asm(""); 4758 } 4759 4760 /* Must statically initialize the version, because GDB may check 4761 the version before we can set it. */ 4762 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 4763 4764 /* End GDB interface. */ 4765 4766 static int find_string(const char *strtab, const char *str) 4767 { 4768 const char *p = strtab + 1; 4769 4770 while (1) { 4771 if (strcmp(p, str) == 0) { 4772 return p - strtab; 4773 } 4774 p += strlen(p) + 1; 4775 } 4776 } 4777 4778 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 4779 const void *debug_frame, 4780 size_t debug_frame_size) 4781 { 4782 struct __attribute__((packed)) DebugInfo { 4783 uint32_t len; 4784 uint16_t version; 4785 uint32_t abbrev; 4786 uint8_t ptr_size; 4787 uint8_t cu_die; 4788 uint16_t cu_lang; 4789 uintptr_t cu_low_pc; 4790 uintptr_t cu_high_pc; 4791 uint8_t fn_die; 4792 char fn_name[16]; 4793 uintptr_t fn_low_pc; 4794 uintptr_t fn_high_pc; 4795 uint8_t cu_eoc; 4796 }; 4797 4798 struct ElfImage { 4799 ElfW(Ehdr) ehdr; 4800 ElfW(Phdr) phdr; 4801 ElfW(Shdr) shdr[7]; 4802 ElfW(Sym) sym[2]; 4803 struct DebugInfo di; 4804 uint8_t da[24]; 4805 char str[80]; 4806 }; 4807 4808 struct ElfImage *img; 4809 4810 static const struct ElfImage img_template = { 4811 .ehdr = { 4812 .e_ident[EI_MAG0] = ELFMAG0, 4813 .e_ident[EI_MAG1] = ELFMAG1, 4814 .e_ident[EI_MAG2] = ELFMAG2, 4815 .e_ident[EI_MAG3] = ELFMAG3, 4816 .e_ident[EI_CLASS] = ELF_CLASS, 4817 .e_ident[EI_DATA] = ELF_DATA, 4818 .e_ident[EI_VERSION] = EV_CURRENT, 4819 .e_type = ET_EXEC, 4820 .e_machine = ELF_HOST_MACHINE, 4821 .e_version = EV_CURRENT, 4822 .e_phoff = offsetof(struct ElfImage, phdr), 4823 .e_shoff = offsetof(struct ElfImage, shdr), 4824 .e_ehsize = sizeof(ElfW(Shdr)), 4825 .e_phentsize = sizeof(ElfW(Phdr)), 4826 .e_phnum = 1, 4827 .e_shentsize = sizeof(ElfW(Shdr)), 4828 .e_shnum = ARRAY_SIZE(img->shdr), 4829 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 4830 #ifdef ELF_HOST_FLAGS 4831 .e_flags = ELF_HOST_FLAGS, 4832 #endif 4833 #ifdef ELF_OSABI 4834 .e_ident[EI_OSABI] = ELF_OSABI, 4835 #endif 4836 }, 4837 .phdr = { 4838 .p_type = PT_LOAD, 4839 .p_flags = PF_X, 4840 }, 4841 .shdr = { 4842 [0] = { .sh_type = SHT_NULL }, 4843 /* Trick: The contents of code_gen_buffer are not present in 4844 this fake ELF file; that got allocated elsewhere. Therefore 4845 we mark .text as SHT_NOBITS (similar to .bss) so that readers 4846 will not look for contents. We can record any address. */ 4847 [1] = { /* .text */ 4848 .sh_type = SHT_NOBITS, 4849 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 4850 }, 4851 [2] = { /* .debug_info */ 4852 .sh_type = SHT_PROGBITS, 4853 .sh_offset = offsetof(struct ElfImage, di), 4854 .sh_size = sizeof(struct DebugInfo), 4855 }, 4856 [3] = { /* .debug_abbrev */ 4857 .sh_type = SHT_PROGBITS, 4858 .sh_offset = offsetof(struct ElfImage, da), 4859 .sh_size = sizeof(img->da), 4860 }, 4861 [4] = { /* .debug_frame */ 4862 .sh_type = SHT_PROGBITS, 4863 .sh_offset = sizeof(struct ElfImage), 4864 }, 4865 [5] = { /* .symtab */ 4866 .sh_type = SHT_SYMTAB, 4867 .sh_offset = offsetof(struct ElfImage, sym), 4868 .sh_size = sizeof(img->sym), 4869 .sh_info = 1, 4870 .sh_link = ARRAY_SIZE(img->shdr) - 1, 4871 .sh_entsize = sizeof(ElfW(Sym)), 4872 }, 4873 [6] = { /* .strtab */ 4874 .sh_type = SHT_STRTAB, 4875 .sh_offset = offsetof(struct ElfImage, str), 4876 .sh_size = sizeof(img->str), 4877 } 4878 }, 4879 .sym = { 4880 [1] = { /* code_gen_buffer */ 4881 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 4882 .st_shndx = 1, 4883 } 4884 }, 4885 .di = { 4886 .len = sizeof(struct DebugInfo) - 4, 4887 .version = 2, 4888 .ptr_size = sizeof(void *), 4889 .cu_die = 1, 4890 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 4891 .fn_die = 2, 4892 .fn_name = "code_gen_buffer" 4893 }, 4894 .da = { 4895 1, /* abbrev number (the cu) */ 4896 0x11, 1, /* DW_TAG_compile_unit, has children */ 4897 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 4898 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4899 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4900 0, 0, /* end of abbrev */ 4901 2, /* abbrev number (the fn) */ 4902 0x2e, 0, /* DW_TAG_subprogram, no children */ 4903 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 4904 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4905 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4906 0, 0, /* end of abbrev */ 4907 0 /* no more abbrev */ 4908 }, 4909 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 4910 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 4911 }; 4912 4913 /* We only need a single jit entry; statically allocate it. */ 4914 static struct jit_code_entry one_entry; 4915 4916 uintptr_t buf = (uintptr_t)buf_ptr; 4917 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 4918 DebugFrameHeader *dfh; 4919 4920 img = g_malloc(img_size); 4921 *img = img_template; 4922 4923 img->phdr.p_vaddr = buf; 4924 img->phdr.p_paddr = buf; 4925 img->phdr.p_memsz = buf_size; 4926 4927 img->shdr[1].sh_name = find_string(img->str, ".text"); 4928 img->shdr[1].sh_addr = buf; 4929 img->shdr[1].sh_size = buf_size; 4930 4931 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 4932 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 4933 4934 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 4935 img->shdr[4].sh_size = debug_frame_size; 4936 4937 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 4938 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 4939 4940 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 4941 img->sym[1].st_value = buf; 4942 img->sym[1].st_size = buf_size; 4943 4944 img->di.cu_low_pc = buf; 4945 img->di.cu_high_pc = buf + buf_size; 4946 img->di.fn_low_pc = buf; 4947 img->di.fn_high_pc = buf + buf_size; 4948 4949 dfh = (DebugFrameHeader *)(img + 1); 4950 memcpy(dfh, debug_frame, debug_frame_size); 4951 dfh->fde.func_start = buf; 4952 dfh->fde.func_len = buf_size; 4953 4954 #ifdef DEBUG_JIT 4955 /* Enable this block to be able to debug the ELF image file creation. 4956 One can use readelf, objdump, or other inspection utilities. */ 4957 { 4958 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 4959 if (f) { 4960 if (fwrite(img, img_size, 1, f) != img_size) { 4961 /* Avoid stupid unused return value warning for fwrite. */ 4962 } 4963 fclose(f); 4964 } 4965 } 4966 #endif 4967 4968 one_entry.symfile_addr = img; 4969 one_entry.symfile_size = img_size; 4970 4971 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 4972 __jit_debug_descriptor.relevant_entry = &one_entry; 4973 __jit_debug_descriptor.first_entry = &one_entry; 4974 __jit_debug_register_code(); 4975 } 4976 #else 4977 /* No support for the feature. Provide the entry point expected by exec.c, 4978 and implement the internal function we declared earlier. */ 4979 4980 static void tcg_register_jit_int(const void *buf, size_t size, 4981 const void *debug_frame, 4982 size_t debug_frame_size) 4983 { 4984 } 4985 4986 void tcg_register_jit(const void *buf, size_t buf_size) 4987 { 4988 } 4989 #endif /* ELF_HOST_MACHINE */ 4990 4991 #if !TCG_TARGET_MAYBE_vec 4992 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 4993 { 4994 g_assert_not_reached(); 4995 } 4996 #endif 4997