1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/timer.h" 38 #include "qemu/cacheflush.h" 39 40 /* Note: the long term plan is to reduce the dependencies on the QEMU 41 CPU definitions. Currently they are used for qemu_ld/st 42 instructions */ 43 #define NO_CPU_IO_DEFS 44 #include "cpu.h" 45 46 #include "exec/exec-all.h" 47 48 #if !defined(CONFIG_USER_ONLY) 49 #include "hw/boards.h" 50 #endif 51 52 #include "tcg/tcg-op.h" 53 54 #if UINTPTR_MAX == UINT32_MAX 55 # define ELF_CLASS ELFCLASS32 56 #else 57 # define ELF_CLASS ELFCLASS64 58 #endif 59 #ifdef HOST_WORDS_BIGENDIAN 60 # define ELF_DATA ELFDATA2MSB 61 #else 62 # define ELF_DATA ELFDATA2LSB 63 #endif 64 65 #include "elf.h" 66 #include "exec/log.h" 67 #include "sysemu/sysemu.h" 68 69 /* Forward declarations for functions declared in tcg-target.c.inc and 70 used here. */ 71 static void tcg_target_init(TCGContext *s); 72 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); 73 static void tcg_target_qemu_prologue(TCGContext *s); 74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 75 intptr_t value, intptr_t addend); 76 77 /* The CIE and FDE header definitions will be common to all hosts. */ 78 typedef struct { 79 uint32_t len __attribute__((aligned((sizeof(void *))))); 80 uint32_t id; 81 uint8_t version; 82 char augmentation[1]; 83 uint8_t code_align; 84 uint8_t data_align; 85 uint8_t return_column; 86 } DebugFrameCIE; 87 88 typedef struct QEMU_PACKED { 89 uint32_t len __attribute__((aligned((sizeof(void *))))); 90 uint32_t cie_offset; 91 uintptr_t func_start; 92 uintptr_t func_len; 93 } DebugFrameFDEHeader; 94 95 typedef struct QEMU_PACKED { 96 DebugFrameCIE cie; 97 DebugFrameFDEHeader fde; 98 } DebugFrameHeader; 99 100 static void tcg_register_jit_int(const void *buf, size_t size, 101 const void *debug_frame, 102 size_t debug_frame_size) 103 __attribute__((unused)); 104 105 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 106 static const char *target_parse_constraint(TCGArgConstraint *ct, 107 const char *ct_str, TCGType type); 108 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 109 intptr_t arg2); 110 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 111 static void tcg_out_movi(TCGContext *s, TCGType type, 112 TCGReg ret, tcg_target_long arg); 113 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 114 const int *const_args); 115 #if TCG_TARGET_MAYBE_vec 116 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 117 TCGReg dst, TCGReg src); 118 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 119 TCGReg dst, TCGReg base, intptr_t offset); 120 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 121 TCGReg dst, int64_t arg); 122 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 123 unsigned vece, const TCGArg *args, 124 const int *const_args); 125 #else 126 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 127 TCGReg dst, TCGReg src) 128 { 129 g_assert_not_reached(); 130 } 131 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 132 TCGReg dst, TCGReg base, intptr_t offset) 133 { 134 g_assert_not_reached(); 135 } 136 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 137 TCGReg dst, int64_t arg) 138 { 139 g_assert_not_reached(); 140 } 141 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 142 unsigned vece, const TCGArg *args, 143 const int *const_args) 144 { 145 g_assert_not_reached(); 146 } 147 #endif 148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 149 intptr_t arg2); 150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 151 TCGReg base, intptr_t ofs); 152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target); 153 static int tcg_target_const_match(tcg_target_long val, TCGType type, 154 const TCGArgConstraint *arg_ct); 155 #ifdef TCG_TARGET_NEED_LDST_LABELS 156 static int tcg_out_ldst_finalize(TCGContext *s); 157 #endif 158 159 #define TCG_HIGHWATER 1024 160 161 static TCGContext **tcg_ctxs; 162 static unsigned int n_tcg_ctxs; 163 TCGv_env cpu_env = 0; 164 const void *tcg_code_gen_epilogue; 165 uintptr_t tcg_splitwx_diff; 166 167 #ifndef CONFIG_TCG_INTERPRETER 168 tcg_prologue_fn *tcg_qemu_tb_exec; 169 #endif 170 171 struct tcg_region_tree { 172 QemuMutex lock; 173 GTree *tree; 174 /* padding to avoid false sharing is computed at run-time */ 175 }; 176 177 /* 178 * We divide code_gen_buffer into equally-sized "regions" that TCG threads 179 * dynamically allocate from as demand dictates. Given appropriate region 180 * sizing, this minimizes flushes even when some TCG threads generate a lot 181 * more code than others. 182 */ 183 struct tcg_region_state { 184 QemuMutex lock; 185 186 /* fields set at init time */ 187 void *start; 188 void *start_aligned; 189 void *end; 190 size_t n; 191 size_t size; /* size of one region */ 192 size_t stride; /* .size + guard size */ 193 194 /* fields protected by the lock */ 195 size_t current; /* current region index */ 196 size_t agg_size_full; /* aggregate size of full regions */ 197 }; 198 199 static struct tcg_region_state region; 200 /* 201 * This is an array of struct tcg_region_tree's, with padding. 202 * We use void * to simplify the computation of region_trees[i]; each 203 * struct is found every tree_size bytes. 204 */ 205 static void *region_trees; 206 static size_t tree_size; 207 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 208 static TCGRegSet tcg_target_call_clobber_regs; 209 210 #if TCG_TARGET_INSN_UNIT_SIZE == 1 211 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 212 { 213 *s->code_ptr++ = v; 214 } 215 216 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 217 uint8_t v) 218 { 219 *p = v; 220 } 221 #endif 222 223 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 224 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 225 { 226 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 227 *s->code_ptr++ = v; 228 } else { 229 tcg_insn_unit *p = s->code_ptr; 230 memcpy(p, &v, sizeof(v)); 231 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 232 } 233 } 234 235 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 236 uint16_t v) 237 { 238 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 239 *p = v; 240 } else { 241 memcpy(p, &v, sizeof(v)); 242 } 243 } 244 #endif 245 246 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 247 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 248 { 249 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 250 *s->code_ptr++ = v; 251 } else { 252 tcg_insn_unit *p = s->code_ptr; 253 memcpy(p, &v, sizeof(v)); 254 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 255 } 256 } 257 258 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 259 uint32_t v) 260 { 261 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 262 *p = v; 263 } else { 264 memcpy(p, &v, sizeof(v)); 265 } 266 } 267 #endif 268 269 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 270 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 271 { 272 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 273 *s->code_ptr++ = v; 274 } else { 275 tcg_insn_unit *p = s->code_ptr; 276 memcpy(p, &v, sizeof(v)); 277 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 278 } 279 } 280 281 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 282 uint64_t v) 283 { 284 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 285 *p = v; 286 } else { 287 memcpy(p, &v, sizeof(v)); 288 } 289 } 290 #endif 291 292 /* label relocation processing */ 293 294 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 295 TCGLabel *l, intptr_t addend) 296 { 297 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 298 299 r->type = type; 300 r->ptr = code_ptr; 301 r->addend = addend; 302 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 303 } 304 305 static void tcg_out_label(TCGContext *s, TCGLabel *l) 306 { 307 tcg_debug_assert(!l->has_value); 308 l->has_value = 1; 309 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 310 } 311 312 TCGLabel *gen_new_label(void) 313 { 314 TCGContext *s = tcg_ctx; 315 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 316 317 memset(l, 0, sizeof(TCGLabel)); 318 l->id = s->nb_labels++; 319 QSIMPLEQ_INIT(&l->relocs); 320 321 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 322 323 return l; 324 } 325 326 static bool tcg_resolve_relocs(TCGContext *s) 327 { 328 TCGLabel *l; 329 330 QSIMPLEQ_FOREACH(l, &s->labels, next) { 331 TCGRelocation *r; 332 uintptr_t value = l->u.value; 333 334 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 335 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 336 return false; 337 } 338 } 339 } 340 return true; 341 } 342 343 static void set_jmp_reset_offset(TCGContext *s, int which) 344 { 345 /* 346 * We will check for overflow at the end of the opcode loop in 347 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 348 */ 349 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s); 350 } 351 352 #include "tcg-target.c.inc" 353 354 /* compare a pointer @ptr and a tb_tc @s */ 355 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s) 356 { 357 if (ptr >= s->ptr + s->size) { 358 return 1; 359 } else if (ptr < s->ptr) { 360 return -1; 361 } 362 return 0; 363 } 364 365 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp) 366 { 367 const struct tb_tc *a = ap; 368 const struct tb_tc *b = bp; 369 370 /* 371 * When both sizes are set, we know this isn't a lookup. 372 * This is the most likely case: every TB must be inserted; lookups 373 * are a lot less frequent. 374 */ 375 if (likely(a->size && b->size)) { 376 if (a->ptr > b->ptr) { 377 return 1; 378 } else if (a->ptr < b->ptr) { 379 return -1; 380 } 381 /* a->ptr == b->ptr should happen only on deletions */ 382 g_assert(a->size == b->size); 383 return 0; 384 } 385 /* 386 * All lookups have either .size field set to 0. 387 * From the glib sources we see that @ap is always the lookup key. However 388 * the docs provide no guarantee, so we just mark this case as likely. 389 */ 390 if (likely(a->size == 0)) { 391 return ptr_cmp_tb_tc(a->ptr, b); 392 } 393 return ptr_cmp_tb_tc(b->ptr, a); 394 } 395 396 static void tcg_region_trees_init(void) 397 { 398 size_t i; 399 400 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize); 401 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size); 402 for (i = 0; i < region.n; i++) { 403 struct tcg_region_tree *rt = region_trees + i * tree_size; 404 405 qemu_mutex_init(&rt->lock); 406 rt->tree = g_tree_new(tb_tc_cmp); 407 } 408 } 409 410 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp) 411 { 412 void *p = tcg_splitwx_to_rw(cp); 413 size_t region_idx; 414 415 if (p < region.start_aligned) { 416 region_idx = 0; 417 } else { 418 ptrdiff_t offset = p - region.start_aligned; 419 420 if (offset > region.stride * (region.n - 1)) { 421 region_idx = region.n - 1; 422 } else { 423 region_idx = offset / region.stride; 424 } 425 } 426 return region_trees + region_idx * tree_size; 427 } 428 429 void tcg_tb_insert(TranslationBlock *tb) 430 { 431 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 432 433 qemu_mutex_lock(&rt->lock); 434 g_tree_insert(rt->tree, &tb->tc, tb); 435 qemu_mutex_unlock(&rt->lock); 436 } 437 438 void tcg_tb_remove(TranslationBlock *tb) 439 { 440 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 441 442 qemu_mutex_lock(&rt->lock); 443 g_tree_remove(rt->tree, &tb->tc); 444 qemu_mutex_unlock(&rt->lock); 445 } 446 447 /* 448 * Find the TB 'tb' such that 449 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size 450 * Return NULL if not found. 451 */ 452 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) 453 { 454 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr); 455 TranslationBlock *tb; 456 struct tb_tc s = { .ptr = (void *)tc_ptr }; 457 458 qemu_mutex_lock(&rt->lock); 459 tb = g_tree_lookup(rt->tree, &s); 460 qemu_mutex_unlock(&rt->lock); 461 return tb; 462 } 463 464 static void tcg_region_tree_lock_all(void) 465 { 466 size_t i; 467 468 for (i = 0; i < region.n; i++) { 469 struct tcg_region_tree *rt = region_trees + i * tree_size; 470 471 qemu_mutex_lock(&rt->lock); 472 } 473 } 474 475 static void tcg_region_tree_unlock_all(void) 476 { 477 size_t i; 478 479 for (i = 0; i < region.n; i++) { 480 struct tcg_region_tree *rt = region_trees + i * tree_size; 481 482 qemu_mutex_unlock(&rt->lock); 483 } 484 } 485 486 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data) 487 { 488 size_t i; 489 490 tcg_region_tree_lock_all(); 491 for (i = 0; i < region.n; i++) { 492 struct tcg_region_tree *rt = region_trees + i * tree_size; 493 494 g_tree_foreach(rt->tree, func, user_data); 495 } 496 tcg_region_tree_unlock_all(); 497 } 498 499 size_t tcg_nb_tbs(void) 500 { 501 size_t nb_tbs = 0; 502 size_t i; 503 504 tcg_region_tree_lock_all(); 505 for (i = 0; i < region.n; i++) { 506 struct tcg_region_tree *rt = region_trees + i * tree_size; 507 508 nb_tbs += g_tree_nnodes(rt->tree); 509 } 510 tcg_region_tree_unlock_all(); 511 return nb_tbs; 512 } 513 514 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data) 515 { 516 TranslationBlock *tb = v; 517 518 tb_destroy(tb); 519 return FALSE; 520 } 521 522 static void tcg_region_tree_reset_all(void) 523 { 524 size_t i; 525 526 tcg_region_tree_lock_all(); 527 for (i = 0; i < region.n; i++) { 528 struct tcg_region_tree *rt = region_trees + i * tree_size; 529 530 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL); 531 /* Increment the refcount first so that destroy acts as a reset */ 532 g_tree_ref(rt->tree); 533 g_tree_destroy(rt->tree); 534 } 535 tcg_region_tree_unlock_all(); 536 } 537 538 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 539 { 540 void *start, *end; 541 542 start = region.start_aligned + curr_region * region.stride; 543 end = start + region.size; 544 545 if (curr_region == 0) { 546 start = region.start; 547 } 548 if (curr_region == region.n - 1) { 549 end = region.end; 550 } 551 552 *pstart = start; 553 *pend = end; 554 } 555 556 static void tcg_region_assign(TCGContext *s, size_t curr_region) 557 { 558 void *start, *end; 559 560 tcg_region_bounds(curr_region, &start, &end); 561 562 s->code_gen_buffer = start; 563 s->code_gen_ptr = start; 564 s->code_gen_buffer_size = end - start; 565 s->code_gen_highwater = end - TCG_HIGHWATER; 566 } 567 568 static bool tcg_region_alloc__locked(TCGContext *s) 569 { 570 if (region.current == region.n) { 571 return true; 572 } 573 tcg_region_assign(s, region.current); 574 region.current++; 575 return false; 576 } 577 578 /* 579 * Request a new region once the one in use has filled up. 580 * Returns true on error. 581 */ 582 static bool tcg_region_alloc(TCGContext *s) 583 { 584 bool err; 585 /* read the region size now; alloc__locked will overwrite it on success */ 586 size_t size_full = s->code_gen_buffer_size; 587 588 qemu_mutex_lock(®ion.lock); 589 err = tcg_region_alloc__locked(s); 590 if (!err) { 591 region.agg_size_full += size_full - TCG_HIGHWATER; 592 } 593 qemu_mutex_unlock(®ion.lock); 594 return err; 595 } 596 597 /* 598 * Perform a context's first region allocation. 599 * This function does _not_ increment region.agg_size_full. 600 */ 601 static inline bool tcg_region_initial_alloc__locked(TCGContext *s) 602 { 603 return tcg_region_alloc__locked(s); 604 } 605 606 /* Call from a safe-work context */ 607 void tcg_region_reset_all(void) 608 { 609 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 610 unsigned int i; 611 612 qemu_mutex_lock(®ion.lock); 613 region.current = 0; 614 region.agg_size_full = 0; 615 616 for (i = 0; i < n_ctxs; i++) { 617 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 618 bool err = tcg_region_initial_alloc__locked(s); 619 620 g_assert(!err); 621 } 622 qemu_mutex_unlock(®ion.lock); 623 624 tcg_region_tree_reset_all(); 625 } 626 627 #ifdef CONFIG_USER_ONLY 628 static size_t tcg_n_regions(void) 629 { 630 return 1; 631 } 632 #else 633 /* 634 * It is likely that some vCPUs will translate more code than others, so we 635 * first try to set more regions than max_cpus, with those regions being of 636 * reasonable size. If that's not possible we make do by evenly dividing 637 * the code_gen_buffer among the vCPUs. 638 */ 639 static size_t tcg_n_regions(void) 640 { 641 size_t i; 642 643 /* Use a single region if all we have is one vCPU thread */ 644 #if !defined(CONFIG_USER_ONLY) 645 MachineState *ms = MACHINE(qdev_get_machine()); 646 unsigned int max_cpus = ms->smp.max_cpus; 647 #endif 648 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 649 return 1; 650 } 651 652 /* Try to have more regions than max_cpus, with each region being >= 2 MB */ 653 for (i = 8; i > 0; i--) { 654 size_t regions_per_thread = i; 655 size_t region_size; 656 657 region_size = tcg_init_ctx.code_gen_buffer_size; 658 region_size /= max_cpus * regions_per_thread; 659 660 if (region_size >= 2 * 1024u * 1024) { 661 return max_cpus * regions_per_thread; 662 } 663 } 664 /* If we can't, then just allocate one region per vCPU thread */ 665 return max_cpus; 666 } 667 #endif 668 669 /* 670 * Initializes region partitioning. 671 * 672 * Called at init time from the parent thread (i.e. the one calling 673 * tcg_context_init), after the target's TCG globals have been set. 674 * 675 * Region partitioning works by splitting code_gen_buffer into separate regions, 676 * and then assigning regions to TCG threads so that the threads can translate 677 * code in parallel without synchronization. 678 * 679 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 680 * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 681 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 682 * must have been parsed before calling this function, since it calls 683 * qemu_tcg_mttcg_enabled(). 684 * 685 * In user-mode we use a single region. Having multiple regions in user-mode 686 * is not supported, because the number of vCPU threads (recall that each thread 687 * spawned by the guest corresponds to a vCPU thread) is only bounded by the 688 * OS, and usually this number is huge (tens of thousands is not uncommon). 689 * Thus, given this large bound on the number of vCPU threads and the fact 690 * that code_gen_buffer is allocated at compile-time, we cannot guarantee 691 * that the availability of at least one region per vCPU thread. 692 * 693 * However, this user-mode limitation is unlikely to be a significant problem 694 * in practice. Multi-threaded guests share most if not all of their translated 695 * code, which makes parallel code generation less appealing than in softmmu. 696 */ 697 void tcg_region_init(void) 698 { 699 void *buf = tcg_init_ctx.code_gen_buffer; 700 void *aligned; 701 size_t size = tcg_init_ctx.code_gen_buffer_size; 702 size_t page_size = qemu_real_host_page_size; 703 size_t region_size; 704 size_t n_regions; 705 size_t i; 706 uintptr_t splitwx_diff; 707 708 n_regions = tcg_n_regions(); 709 710 /* The first region will be 'aligned - buf' bytes larger than the others */ 711 aligned = QEMU_ALIGN_PTR_UP(buf, page_size); 712 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); 713 /* 714 * Make region_size a multiple of page_size, using aligned as the start. 715 * As a result of this we might end up with a few extra pages at the end of 716 * the buffer; we will assign those to the last region. 717 */ 718 region_size = (size - (aligned - buf)) / n_regions; 719 region_size = QEMU_ALIGN_DOWN(region_size, page_size); 720 721 /* A region must have at least 2 pages; one code, one guard */ 722 g_assert(region_size >= 2 * page_size); 723 724 /* init the region struct */ 725 qemu_mutex_init(®ion.lock); 726 region.n = n_regions; 727 region.size = region_size - page_size; 728 region.stride = region_size; 729 region.start = buf; 730 region.start_aligned = aligned; 731 /* page-align the end, since its last page will be a guard page */ 732 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); 733 /* account for that last guard page */ 734 region.end -= page_size; 735 736 /* set guard pages */ 737 splitwx_diff = tcg_splitwx_diff; 738 for (i = 0; i < region.n; i++) { 739 void *start, *end; 740 int rc; 741 742 tcg_region_bounds(i, &start, &end); 743 rc = qemu_mprotect_none(end, page_size); 744 g_assert(!rc); 745 if (splitwx_diff) { 746 rc = qemu_mprotect_none(end + splitwx_diff, page_size); 747 g_assert(!rc); 748 } 749 } 750 751 tcg_region_trees_init(); 752 753 /* In user-mode we support only one ctx, so do the initial allocation now */ 754 #ifdef CONFIG_USER_ONLY 755 { 756 bool err = tcg_region_initial_alloc__locked(tcg_ctx); 757 758 g_assert(!err); 759 } 760 #endif 761 } 762 763 #ifdef CONFIG_DEBUG_TCG 764 const void *tcg_splitwx_to_rx(void *rw) 765 { 766 /* Pass NULL pointers unchanged. */ 767 if (rw) { 768 g_assert(in_code_gen_buffer(rw)); 769 rw += tcg_splitwx_diff; 770 } 771 return rw; 772 } 773 774 void *tcg_splitwx_to_rw(const void *rx) 775 { 776 /* Pass NULL pointers unchanged. */ 777 if (rx) { 778 rx -= tcg_splitwx_diff; 779 /* Assert that we end with a pointer in the rw region. */ 780 g_assert(in_code_gen_buffer(rx)); 781 } 782 return (void *)rx; 783 } 784 #endif /* CONFIG_DEBUG_TCG */ 785 786 static void alloc_tcg_plugin_context(TCGContext *s) 787 { 788 #ifdef CONFIG_PLUGIN 789 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 790 s->plugin_tb->insns = 791 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 792 #endif 793 } 794 795 /* 796 * All TCG threads except the parent (i.e. the one that called tcg_context_init 797 * and registered the target's TCG globals) must register with this function 798 * before initiating translation. 799 * 800 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 801 * of tcg_region_init() for the reasoning behind this. 802 * 803 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 804 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 805 * is not used anymore for translation once this function is called. 806 * 807 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 808 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 809 */ 810 #ifdef CONFIG_USER_ONLY 811 void tcg_register_thread(void) 812 { 813 tcg_ctx = &tcg_init_ctx; 814 } 815 #else 816 void tcg_register_thread(void) 817 { 818 MachineState *ms = MACHINE(qdev_get_machine()); 819 TCGContext *s = g_malloc(sizeof(*s)); 820 unsigned int i, n; 821 bool err; 822 823 *s = tcg_init_ctx; 824 825 /* Relink mem_base. */ 826 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 827 if (tcg_init_ctx.temps[i].mem_base) { 828 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 829 tcg_debug_assert(b >= 0 && b < n); 830 s->temps[i].mem_base = &s->temps[b]; 831 } 832 } 833 834 /* Claim an entry in tcg_ctxs */ 835 n = qatomic_fetch_inc(&n_tcg_ctxs); 836 g_assert(n < ms->smp.max_cpus); 837 qatomic_set(&tcg_ctxs[n], s); 838 839 if (n > 0) { 840 alloc_tcg_plugin_context(s); 841 } 842 843 tcg_ctx = s; 844 qemu_mutex_lock(®ion.lock); 845 err = tcg_region_initial_alloc__locked(tcg_ctx); 846 g_assert(!err); 847 qemu_mutex_unlock(®ion.lock); 848 } 849 #endif /* !CONFIG_USER_ONLY */ 850 851 /* 852 * Returns the size (in bytes) of all translated code (i.e. from all regions) 853 * currently in the cache. 854 * See also: tcg_code_capacity() 855 * Do not confuse with tcg_current_code_size(); that one applies to a single 856 * TCG context. 857 */ 858 size_t tcg_code_size(void) 859 { 860 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 861 unsigned int i; 862 size_t total; 863 864 qemu_mutex_lock(®ion.lock); 865 total = region.agg_size_full; 866 for (i = 0; i < n_ctxs; i++) { 867 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 868 size_t size; 869 870 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 871 g_assert(size <= s->code_gen_buffer_size); 872 total += size; 873 } 874 qemu_mutex_unlock(®ion.lock); 875 return total; 876 } 877 878 /* 879 * Returns the code capacity (in bytes) of the entire cache, i.e. including all 880 * regions. 881 * See also: tcg_code_size() 882 */ 883 size_t tcg_code_capacity(void) 884 { 885 size_t guard_size, capacity; 886 887 /* no need for synchronization; these variables are set at init time */ 888 guard_size = region.stride - region.size; 889 capacity = region.end + guard_size - region.start; 890 capacity -= region.n * (guard_size + TCG_HIGHWATER); 891 return capacity; 892 } 893 894 size_t tcg_tb_phys_invalidate_count(void) 895 { 896 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 897 unsigned int i; 898 size_t total = 0; 899 900 for (i = 0; i < n_ctxs; i++) { 901 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 902 903 total += qatomic_read(&s->tb_phys_invalidate_count); 904 } 905 return total; 906 } 907 908 /* pool based memory allocation */ 909 void *tcg_malloc_internal(TCGContext *s, int size) 910 { 911 TCGPool *p; 912 int pool_size; 913 914 if (size > TCG_POOL_CHUNK_SIZE) { 915 /* big malloc: insert a new pool (XXX: could optimize) */ 916 p = g_malloc(sizeof(TCGPool) + size); 917 p->size = size; 918 p->next = s->pool_first_large; 919 s->pool_first_large = p; 920 return p->data; 921 } else { 922 p = s->pool_current; 923 if (!p) { 924 p = s->pool_first; 925 if (!p) 926 goto new_pool; 927 } else { 928 if (!p->next) { 929 new_pool: 930 pool_size = TCG_POOL_CHUNK_SIZE; 931 p = g_malloc(sizeof(TCGPool) + pool_size); 932 p->size = pool_size; 933 p->next = NULL; 934 if (s->pool_current) 935 s->pool_current->next = p; 936 else 937 s->pool_first = p; 938 } else { 939 p = p->next; 940 } 941 } 942 } 943 s->pool_current = p; 944 s->pool_cur = p->data + size; 945 s->pool_end = p->data + p->size; 946 return p->data; 947 } 948 949 void tcg_pool_reset(TCGContext *s) 950 { 951 TCGPool *p, *t; 952 for (p = s->pool_first_large; p; p = t) { 953 t = p->next; 954 g_free(p); 955 } 956 s->pool_first_large = NULL; 957 s->pool_cur = s->pool_end = NULL; 958 s->pool_current = NULL; 959 } 960 961 typedef struct TCGHelperInfo { 962 void *func; 963 const char *name; 964 unsigned flags; 965 unsigned sizemask; 966 } TCGHelperInfo; 967 968 #include "exec/helper-proto.h" 969 970 static const TCGHelperInfo all_helpers[] = { 971 #include "exec/helper-tcg.h" 972 }; 973 static GHashTable *helper_table; 974 975 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 976 static void process_op_defs(TCGContext *s); 977 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 978 TCGReg reg, const char *name); 979 980 void tcg_context_init(TCGContext *s) 981 { 982 int op, total_args, n, i; 983 TCGOpDef *def; 984 TCGArgConstraint *args_ct; 985 TCGTemp *ts; 986 987 memset(s, 0, sizeof(*s)); 988 s->nb_globals = 0; 989 990 /* Count total number of arguments and allocate the corresponding 991 space */ 992 total_args = 0; 993 for(op = 0; op < NB_OPS; op++) { 994 def = &tcg_op_defs[op]; 995 n = def->nb_iargs + def->nb_oargs; 996 total_args += n; 997 } 998 999 args_ct = g_new0(TCGArgConstraint, total_args); 1000 1001 for(op = 0; op < NB_OPS; op++) { 1002 def = &tcg_op_defs[op]; 1003 def->args_ct = args_ct; 1004 n = def->nb_iargs + def->nb_oargs; 1005 args_ct += n; 1006 } 1007 1008 /* Register helpers. */ 1009 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 1010 helper_table = g_hash_table_new(NULL, NULL); 1011 1012 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 1013 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 1014 (gpointer)&all_helpers[i]); 1015 } 1016 1017 tcg_target_init(s); 1018 process_op_defs(s); 1019 1020 /* Reverse the order of the saved registers, assuming they're all at 1021 the start of tcg_target_reg_alloc_order. */ 1022 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1023 int r = tcg_target_reg_alloc_order[n]; 1024 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1025 break; 1026 } 1027 } 1028 for (i = 0; i < n; ++i) { 1029 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1030 } 1031 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1032 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1033 } 1034 1035 alloc_tcg_plugin_context(s); 1036 1037 tcg_ctx = s; 1038 /* 1039 * In user-mode we simply share the init context among threads, since we 1040 * use a single region. See the documentation tcg_region_init() for the 1041 * reasoning behind this. 1042 * In softmmu we will have at most max_cpus TCG threads. 1043 */ 1044 #ifdef CONFIG_USER_ONLY 1045 tcg_ctxs = &tcg_ctx; 1046 n_tcg_ctxs = 1; 1047 #else 1048 MachineState *ms = MACHINE(qdev_get_machine()); 1049 unsigned int max_cpus = ms->smp.max_cpus; 1050 tcg_ctxs = g_new(TCGContext *, max_cpus); 1051 #endif 1052 1053 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1054 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1055 cpu_env = temp_tcgv_ptr(ts); 1056 } 1057 1058 /* 1059 * Allocate TBs right before their corresponding translated code, making 1060 * sure that TBs and code are on different cache lines. 1061 */ 1062 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1063 { 1064 uintptr_t align = qemu_icache_linesize; 1065 TranslationBlock *tb; 1066 void *next; 1067 1068 retry: 1069 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1070 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1071 1072 if (unlikely(next > s->code_gen_highwater)) { 1073 if (tcg_region_alloc(s)) { 1074 return NULL; 1075 } 1076 goto retry; 1077 } 1078 qatomic_set(&s->code_gen_ptr, next); 1079 s->data_gen_ptr = NULL; 1080 return tb; 1081 } 1082 1083 void tcg_prologue_init(TCGContext *s) 1084 { 1085 size_t prologue_size, total_size; 1086 void *buf0, *buf1; 1087 1088 /* Put the prologue at the beginning of code_gen_buffer. */ 1089 buf0 = s->code_gen_buffer; 1090 total_size = s->code_gen_buffer_size; 1091 s->code_ptr = buf0; 1092 s->code_buf = buf0; 1093 s->data_gen_ptr = NULL; 1094 1095 /* 1096 * The region trees are not yet configured, but tcg_splitwx_to_rx 1097 * needs the bounds for an assert. 1098 */ 1099 region.start = buf0; 1100 region.end = buf0 + total_size; 1101 1102 #ifndef CONFIG_TCG_INTERPRETER 1103 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0); 1104 #endif 1105 1106 /* Compute a high-water mark, at which we voluntarily flush the buffer 1107 and start over. The size here is arbitrary, significantly larger 1108 than we expect the code generation for any one opcode to require. */ 1109 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); 1110 1111 #ifdef TCG_TARGET_NEED_POOL_LABELS 1112 s->pool_labels = NULL; 1113 #endif 1114 1115 qemu_thread_jit_write(); 1116 /* Generate the prologue. */ 1117 tcg_target_qemu_prologue(s); 1118 1119 #ifdef TCG_TARGET_NEED_POOL_LABELS 1120 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1121 { 1122 int result = tcg_out_pool_finalize(s); 1123 tcg_debug_assert(result == 0); 1124 } 1125 #endif 1126 1127 buf1 = s->code_ptr; 1128 #ifndef CONFIG_TCG_INTERPRETER 1129 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0, 1130 tcg_ptr_byte_diff(buf1, buf0)); 1131 #endif 1132 1133 /* Deduct the prologue from the buffer. */ 1134 prologue_size = tcg_current_code_size(s); 1135 s->code_gen_ptr = buf1; 1136 s->code_gen_buffer = buf1; 1137 s->code_buf = buf1; 1138 total_size -= prologue_size; 1139 s->code_gen_buffer_size = total_size; 1140 1141 tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size); 1142 1143 #ifdef DEBUG_DISAS 1144 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1145 FILE *logfile = qemu_log_lock(); 1146 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 1147 if (s->data_gen_ptr) { 1148 size_t code_size = s->data_gen_ptr - buf0; 1149 size_t data_size = prologue_size - code_size; 1150 size_t i; 1151 1152 log_disas(buf0, code_size); 1153 1154 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1155 if (sizeof(tcg_target_ulong) == 8) { 1156 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1157 (uintptr_t)s->data_gen_ptr + i, 1158 *(uint64_t *)(s->data_gen_ptr + i)); 1159 } else { 1160 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n", 1161 (uintptr_t)s->data_gen_ptr + i, 1162 *(uint32_t *)(s->data_gen_ptr + i)); 1163 } 1164 } 1165 } else { 1166 log_disas(buf0, prologue_size); 1167 } 1168 qemu_log("\n"); 1169 qemu_log_flush(); 1170 qemu_log_unlock(logfile); 1171 } 1172 #endif 1173 1174 /* Assert that goto_ptr is implemented completely. */ 1175 if (TCG_TARGET_HAS_goto_ptr) { 1176 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1177 } 1178 } 1179 1180 void tcg_func_start(TCGContext *s) 1181 { 1182 tcg_pool_reset(s); 1183 s->nb_temps = s->nb_globals; 1184 1185 /* No temps have been previously allocated for size or locality. */ 1186 memset(s->free_temps, 0, sizeof(s->free_temps)); 1187 1188 /* No constant temps have been previously allocated. */ 1189 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1190 if (s->const_table[i]) { 1191 g_hash_table_remove_all(s->const_table[i]); 1192 } 1193 } 1194 1195 s->nb_ops = 0; 1196 s->nb_labels = 0; 1197 s->current_frame_offset = s->frame_start; 1198 1199 #ifdef CONFIG_DEBUG_TCG 1200 s->goto_tb_issue_mask = 0; 1201 #endif 1202 1203 QTAILQ_INIT(&s->ops); 1204 QTAILQ_INIT(&s->free_ops); 1205 QSIMPLEQ_INIT(&s->labels); 1206 } 1207 1208 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1209 { 1210 int n = s->nb_temps++; 1211 1212 if (n >= TCG_MAX_TEMPS) { 1213 /* Signal overflow, starting over with fewer guest insns. */ 1214 siglongjmp(s->jmp_trans, -2); 1215 } 1216 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1217 } 1218 1219 static TCGTemp *tcg_global_alloc(TCGContext *s) 1220 { 1221 TCGTemp *ts; 1222 1223 tcg_debug_assert(s->nb_globals == s->nb_temps); 1224 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1225 s->nb_globals++; 1226 ts = tcg_temp_alloc(s); 1227 ts->kind = TEMP_GLOBAL; 1228 1229 return ts; 1230 } 1231 1232 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1233 TCGReg reg, const char *name) 1234 { 1235 TCGTemp *ts; 1236 1237 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1238 tcg_abort(); 1239 } 1240 1241 ts = tcg_global_alloc(s); 1242 ts->base_type = type; 1243 ts->type = type; 1244 ts->kind = TEMP_FIXED; 1245 ts->reg = reg; 1246 ts->name = name; 1247 tcg_regset_set_reg(s->reserved_regs, reg); 1248 1249 return ts; 1250 } 1251 1252 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1253 { 1254 s->frame_start = start; 1255 s->frame_end = start + size; 1256 s->frame_temp 1257 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1258 } 1259 1260 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1261 intptr_t offset, const char *name) 1262 { 1263 TCGContext *s = tcg_ctx; 1264 TCGTemp *base_ts = tcgv_ptr_temp(base); 1265 TCGTemp *ts = tcg_global_alloc(s); 1266 int indirect_reg = 0, bigendian = 0; 1267 #ifdef HOST_WORDS_BIGENDIAN 1268 bigendian = 1; 1269 #endif 1270 1271 switch (base_ts->kind) { 1272 case TEMP_FIXED: 1273 break; 1274 case TEMP_GLOBAL: 1275 /* We do not support double-indirect registers. */ 1276 tcg_debug_assert(!base_ts->indirect_reg); 1277 base_ts->indirect_base = 1; 1278 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1279 ? 2 : 1); 1280 indirect_reg = 1; 1281 break; 1282 default: 1283 g_assert_not_reached(); 1284 } 1285 1286 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1287 TCGTemp *ts2 = tcg_global_alloc(s); 1288 char buf[64]; 1289 1290 ts->base_type = TCG_TYPE_I64; 1291 ts->type = TCG_TYPE_I32; 1292 ts->indirect_reg = indirect_reg; 1293 ts->mem_allocated = 1; 1294 ts->mem_base = base_ts; 1295 ts->mem_offset = offset + bigendian * 4; 1296 pstrcpy(buf, sizeof(buf), name); 1297 pstrcat(buf, sizeof(buf), "_0"); 1298 ts->name = strdup(buf); 1299 1300 tcg_debug_assert(ts2 == ts + 1); 1301 ts2->base_type = TCG_TYPE_I64; 1302 ts2->type = TCG_TYPE_I32; 1303 ts2->indirect_reg = indirect_reg; 1304 ts2->mem_allocated = 1; 1305 ts2->mem_base = base_ts; 1306 ts2->mem_offset = offset + (1 - bigendian) * 4; 1307 pstrcpy(buf, sizeof(buf), name); 1308 pstrcat(buf, sizeof(buf), "_1"); 1309 ts2->name = strdup(buf); 1310 } else { 1311 ts->base_type = type; 1312 ts->type = type; 1313 ts->indirect_reg = indirect_reg; 1314 ts->mem_allocated = 1; 1315 ts->mem_base = base_ts; 1316 ts->mem_offset = offset; 1317 ts->name = name; 1318 } 1319 return ts; 1320 } 1321 1322 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) 1323 { 1324 TCGContext *s = tcg_ctx; 1325 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL; 1326 TCGTemp *ts; 1327 int idx, k; 1328 1329 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 1330 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 1331 if (idx < TCG_MAX_TEMPS) { 1332 /* There is already an available temp with the right type. */ 1333 clear_bit(idx, s->free_temps[k].l); 1334 1335 ts = &s->temps[idx]; 1336 ts->temp_allocated = 1; 1337 tcg_debug_assert(ts->base_type == type); 1338 tcg_debug_assert(ts->kind == kind); 1339 } else { 1340 ts = tcg_temp_alloc(s); 1341 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1342 TCGTemp *ts2 = tcg_temp_alloc(s); 1343 1344 ts->base_type = type; 1345 ts->type = TCG_TYPE_I32; 1346 ts->temp_allocated = 1; 1347 ts->kind = kind; 1348 1349 tcg_debug_assert(ts2 == ts + 1); 1350 ts2->base_type = TCG_TYPE_I64; 1351 ts2->type = TCG_TYPE_I32; 1352 ts2->temp_allocated = 1; 1353 ts2->kind = kind; 1354 } else { 1355 ts->base_type = type; 1356 ts->type = type; 1357 ts->temp_allocated = 1; 1358 ts->kind = kind; 1359 } 1360 } 1361 1362 #if defined(CONFIG_DEBUG_TCG) 1363 s->temps_in_use++; 1364 #endif 1365 return ts; 1366 } 1367 1368 TCGv_vec tcg_temp_new_vec(TCGType type) 1369 { 1370 TCGTemp *t; 1371 1372 #ifdef CONFIG_DEBUG_TCG 1373 switch (type) { 1374 case TCG_TYPE_V64: 1375 assert(TCG_TARGET_HAS_v64); 1376 break; 1377 case TCG_TYPE_V128: 1378 assert(TCG_TARGET_HAS_v128); 1379 break; 1380 case TCG_TYPE_V256: 1381 assert(TCG_TARGET_HAS_v256); 1382 break; 1383 default: 1384 g_assert_not_reached(); 1385 } 1386 #endif 1387 1388 t = tcg_temp_new_internal(type, 0); 1389 return temp_tcgv_vec(t); 1390 } 1391 1392 /* Create a new temp of the same type as an existing temp. */ 1393 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1394 { 1395 TCGTemp *t = tcgv_vec_temp(match); 1396 1397 tcg_debug_assert(t->temp_allocated != 0); 1398 1399 t = tcg_temp_new_internal(t->base_type, 0); 1400 return temp_tcgv_vec(t); 1401 } 1402 1403 void tcg_temp_free_internal(TCGTemp *ts) 1404 { 1405 TCGContext *s = tcg_ctx; 1406 int k, idx; 1407 1408 /* In order to simplify users of tcg_constant_*, silently ignore free. */ 1409 if (ts->kind == TEMP_CONST) { 1410 return; 1411 } 1412 1413 #if defined(CONFIG_DEBUG_TCG) 1414 s->temps_in_use--; 1415 if (s->temps_in_use < 0) { 1416 fprintf(stderr, "More temporaries freed than allocated!\n"); 1417 } 1418 #endif 1419 1420 tcg_debug_assert(ts->kind < TEMP_GLOBAL); 1421 tcg_debug_assert(ts->temp_allocated != 0); 1422 ts->temp_allocated = 0; 1423 1424 idx = temp_idx(ts); 1425 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT); 1426 set_bit(idx, s->free_temps[k].l); 1427 } 1428 1429 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1430 { 1431 TCGContext *s = tcg_ctx; 1432 GHashTable *h = s->const_table[type]; 1433 TCGTemp *ts; 1434 1435 if (h == NULL) { 1436 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1437 s->const_table[type] = h; 1438 } 1439 1440 ts = g_hash_table_lookup(h, &val); 1441 if (ts == NULL) { 1442 ts = tcg_temp_alloc(s); 1443 1444 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1445 TCGTemp *ts2 = tcg_temp_alloc(s); 1446 1447 ts->base_type = TCG_TYPE_I64; 1448 ts->type = TCG_TYPE_I32; 1449 ts->kind = TEMP_CONST; 1450 ts->temp_allocated = 1; 1451 /* 1452 * Retain the full value of the 64-bit constant in the low 1453 * part, so that the hash table works. Actual uses will 1454 * truncate the value to the low part. 1455 */ 1456 ts->val = val; 1457 1458 tcg_debug_assert(ts2 == ts + 1); 1459 ts2->base_type = TCG_TYPE_I64; 1460 ts2->type = TCG_TYPE_I32; 1461 ts2->kind = TEMP_CONST; 1462 ts2->temp_allocated = 1; 1463 ts2->val = val >> 32; 1464 } else { 1465 ts->base_type = type; 1466 ts->type = type; 1467 ts->kind = TEMP_CONST; 1468 ts->temp_allocated = 1; 1469 ts->val = val; 1470 } 1471 g_hash_table_insert(h, &ts->val, ts); 1472 } 1473 1474 return ts; 1475 } 1476 1477 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1478 { 1479 val = dup_const(vece, val); 1480 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1481 } 1482 1483 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1484 { 1485 TCGTemp *t = tcgv_vec_temp(match); 1486 1487 tcg_debug_assert(t->temp_allocated != 0); 1488 return tcg_constant_vec(t->base_type, vece, val); 1489 } 1490 1491 TCGv_i32 tcg_const_i32(int32_t val) 1492 { 1493 TCGv_i32 t0; 1494 t0 = tcg_temp_new_i32(); 1495 tcg_gen_movi_i32(t0, val); 1496 return t0; 1497 } 1498 1499 TCGv_i64 tcg_const_i64(int64_t val) 1500 { 1501 TCGv_i64 t0; 1502 t0 = tcg_temp_new_i64(); 1503 tcg_gen_movi_i64(t0, val); 1504 return t0; 1505 } 1506 1507 TCGv_i32 tcg_const_local_i32(int32_t val) 1508 { 1509 TCGv_i32 t0; 1510 t0 = tcg_temp_local_new_i32(); 1511 tcg_gen_movi_i32(t0, val); 1512 return t0; 1513 } 1514 1515 TCGv_i64 tcg_const_local_i64(int64_t val) 1516 { 1517 TCGv_i64 t0; 1518 t0 = tcg_temp_local_new_i64(); 1519 tcg_gen_movi_i64(t0, val); 1520 return t0; 1521 } 1522 1523 #if defined(CONFIG_DEBUG_TCG) 1524 void tcg_clear_temp_count(void) 1525 { 1526 TCGContext *s = tcg_ctx; 1527 s->temps_in_use = 0; 1528 } 1529 1530 int tcg_check_temp_count(void) 1531 { 1532 TCGContext *s = tcg_ctx; 1533 if (s->temps_in_use) { 1534 /* Clear the count so that we don't give another 1535 * warning immediately next time around. 1536 */ 1537 s->temps_in_use = 0; 1538 return 1; 1539 } 1540 return 0; 1541 } 1542 #endif 1543 1544 /* Return true if OP may appear in the opcode stream. 1545 Test the runtime variable that controls each opcode. */ 1546 bool tcg_op_supported(TCGOpcode op) 1547 { 1548 const bool have_vec 1549 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1550 1551 switch (op) { 1552 case INDEX_op_discard: 1553 case INDEX_op_set_label: 1554 case INDEX_op_call: 1555 case INDEX_op_br: 1556 case INDEX_op_mb: 1557 case INDEX_op_insn_start: 1558 case INDEX_op_exit_tb: 1559 case INDEX_op_goto_tb: 1560 case INDEX_op_qemu_ld_i32: 1561 case INDEX_op_qemu_st_i32: 1562 case INDEX_op_qemu_ld_i64: 1563 case INDEX_op_qemu_st_i64: 1564 return true; 1565 1566 case INDEX_op_qemu_st8_i32: 1567 return TCG_TARGET_HAS_qemu_st8_i32; 1568 1569 case INDEX_op_goto_ptr: 1570 return TCG_TARGET_HAS_goto_ptr; 1571 1572 case INDEX_op_mov_i32: 1573 case INDEX_op_setcond_i32: 1574 case INDEX_op_brcond_i32: 1575 case INDEX_op_ld8u_i32: 1576 case INDEX_op_ld8s_i32: 1577 case INDEX_op_ld16u_i32: 1578 case INDEX_op_ld16s_i32: 1579 case INDEX_op_ld_i32: 1580 case INDEX_op_st8_i32: 1581 case INDEX_op_st16_i32: 1582 case INDEX_op_st_i32: 1583 case INDEX_op_add_i32: 1584 case INDEX_op_sub_i32: 1585 case INDEX_op_mul_i32: 1586 case INDEX_op_and_i32: 1587 case INDEX_op_or_i32: 1588 case INDEX_op_xor_i32: 1589 case INDEX_op_shl_i32: 1590 case INDEX_op_shr_i32: 1591 case INDEX_op_sar_i32: 1592 return true; 1593 1594 case INDEX_op_movcond_i32: 1595 return TCG_TARGET_HAS_movcond_i32; 1596 case INDEX_op_div_i32: 1597 case INDEX_op_divu_i32: 1598 return TCG_TARGET_HAS_div_i32; 1599 case INDEX_op_rem_i32: 1600 case INDEX_op_remu_i32: 1601 return TCG_TARGET_HAS_rem_i32; 1602 case INDEX_op_div2_i32: 1603 case INDEX_op_divu2_i32: 1604 return TCG_TARGET_HAS_div2_i32; 1605 case INDEX_op_rotl_i32: 1606 case INDEX_op_rotr_i32: 1607 return TCG_TARGET_HAS_rot_i32; 1608 case INDEX_op_deposit_i32: 1609 return TCG_TARGET_HAS_deposit_i32; 1610 case INDEX_op_extract_i32: 1611 return TCG_TARGET_HAS_extract_i32; 1612 case INDEX_op_sextract_i32: 1613 return TCG_TARGET_HAS_sextract_i32; 1614 case INDEX_op_extract2_i32: 1615 return TCG_TARGET_HAS_extract2_i32; 1616 case INDEX_op_add2_i32: 1617 return TCG_TARGET_HAS_add2_i32; 1618 case INDEX_op_sub2_i32: 1619 return TCG_TARGET_HAS_sub2_i32; 1620 case INDEX_op_mulu2_i32: 1621 return TCG_TARGET_HAS_mulu2_i32; 1622 case INDEX_op_muls2_i32: 1623 return TCG_TARGET_HAS_muls2_i32; 1624 case INDEX_op_muluh_i32: 1625 return TCG_TARGET_HAS_muluh_i32; 1626 case INDEX_op_mulsh_i32: 1627 return TCG_TARGET_HAS_mulsh_i32; 1628 case INDEX_op_ext8s_i32: 1629 return TCG_TARGET_HAS_ext8s_i32; 1630 case INDEX_op_ext16s_i32: 1631 return TCG_TARGET_HAS_ext16s_i32; 1632 case INDEX_op_ext8u_i32: 1633 return TCG_TARGET_HAS_ext8u_i32; 1634 case INDEX_op_ext16u_i32: 1635 return TCG_TARGET_HAS_ext16u_i32; 1636 case INDEX_op_bswap16_i32: 1637 return TCG_TARGET_HAS_bswap16_i32; 1638 case INDEX_op_bswap32_i32: 1639 return TCG_TARGET_HAS_bswap32_i32; 1640 case INDEX_op_not_i32: 1641 return TCG_TARGET_HAS_not_i32; 1642 case INDEX_op_neg_i32: 1643 return TCG_TARGET_HAS_neg_i32; 1644 case INDEX_op_andc_i32: 1645 return TCG_TARGET_HAS_andc_i32; 1646 case INDEX_op_orc_i32: 1647 return TCG_TARGET_HAS_orc_i32; 1648 case INDEX_op_eqv_i32: 1649 return TCG_TARGET_HAS_eqv_i32; 1650 case INDEX_op_nand_i32: 1651 return TCG_TARGET_HAS_nand_i32; 1652 case INDEX_op_nor_i32: 1653 return TCG_TARGET_HAS_nor_i32; 1654 case INDEX_op_clz_i32: 1655 return TCG_TARGET_HAS_clz_i32; 1656 case INDEX_op_ctz_i32: 1657 return TCG_TARGET_HAS_ctz_i32; 1658 case INDEX_op_ctpop_i32: 1659 return TCG_TARGET_HAS_ctpop_i32; 1660 1661 case INDEX_op_brcond2_i32: 1662 case INDEX_op_setcond2_i32: 1663 return TCG_TARGET_REG_BITS == 32; 1664 1665 case INDEX_op_mov_i64: 1666 case INDEX_op_setcond_i64: 1667 case INDEX_op_brcond_i64: 1668 case INDEX_op_ld8u_i64: 1669 case INDEX_op_ld8s_i64: 1670 case INDEX_op_ld16u_i64: 1671 case INDEX_op_ld16s_i64: 1672 case INDEX_op_ld32u_i64: 1673 case INDEX_op_ld32s_i64: 1674 case INDEX_op_ld_i64: 1675 case INDEX_op_st8_i64: 1676 case INDEX_op_st16_i64: 1677 case INDEX_op_st32_i64: 1678 case INDEX_op_st_i64: 1679 case INDEX_op_add_i64: 1680 case INDEX_op_sub_i64: 1681 case INDEX_op_mul_i64: 1682 case INDEX_op_and_i64: 1683 case INDEX_op_or_i64: 1684 case INDEX_op_xor_i64: 1685 case INDEX_op_shl_i64: 1686 case INDEX_op_shr_i64: 1687 case INDEX_op_sar_i64: 1688 case INDEX_op_ext_i32_i64: 1689 case INDEX_op_extu_i32_i64: 1690 return TCG_TARGET_REG_BITS == 64; 1691 1692 case INDEX_op_movcond_i64: 1693 return TCG_TARGET_HAS_movcond_i64; 1694 case INDEX_op_div_i64: 1695 case INDEX_op_divu_i64: 1696 return TCG_TARGET_HAS_div_i64; 1697 case INDEX_op_rem_i64: 1698 case INDEX_op_remu_i64: 1699 return TCG_TARGET_HAS_rem_i64; 1700 case INDEX_op_div2_i64: 1701 case INDEX_op_divu2_i64: 1702 return TCG_TARGET_HAS_div2_i64; 1703 case INDEX_op_rotl_i64: 1704 case INDEX_op_rotr_i64: 1705 return TCG_TARGET_HAS_rot_i64; 1706 case INDEX_op_deposit_i64: 1707 return TCG_TARGET_HAS_deposit_i64; 1708 case INDEX_op_extract_i64: 1709 return TCG_TARGET_HAS_extract_i64; 1710 case INDEX_op_sextract_i64: 1711 return TCG_TARGET_HAS_sextract_i64; 1712 case INDEX_op_extract2_i64: 1713 return TCG_TARGET_HAS_extract2_i64; 1714 case INDEX_op_extrl_i64_i32: 1715 return TCG_TARGET_HAS_extrl_i64_i32; 1716 case INDEX_op_extrh_i64_i32: 1717 return TCG_TARGET_HAS_extrh_i64_i32; 1718 case INDEX_op_ext8s_i64: 1719 return TCG_TARGET_HAS_ext8s_i64; 1720 case INDEX_op_ext16s_i64: 1721 return TCG_TARGET_HAS_ext16s_i64; 1722 case INDEX_op_ext32s_i64: 1723 return TCG_TARGET_HAS_ext32s_i64; 1724 case INDEX_op_ext8u_i64: 1725 return TCG_TARGET_HAS_ext8u_i64; 1726 case INDEX_op_ext16u_i64: 1727 return TCG_TARGET_HAS_ext16u_i64; 1728 case INDEX_op_ext32u_i64: 1729 return TCG_TARGET_HAS_ext32u_i64; 1730 case INDEX_op_bswap16_i64: 1731 return TCG_TARGET_HAS_bswap16_i64; 1732 case INDEX_op_bswap32_i64: 1733 return TCG_TARGET_HAS_bswap32_i64; 1734 case INDEX_op_bswap64_i64: 1735 return TCG_TARGET_HAS_bswap64_i64; 1736 case INDEX_op_not_i64: 1737 return TCG_TARGET_HAS_not_i64; 1738 case INDEX_op_neg_i64: 1739 return TCG_TARGET_HAS_neg_i64; 1740 case INDEX_op_andc_i64: 1741 return TCG_TARGET_HAS_andc_i64; 1742 case INDEX_op_orc_i64: 1743 return TCG_TARGET_HAS_orc_i64; 1744 case INDEX_op_eqv_i64: 1745 return TCG_TARGET_HAS_eqv_i64; 1746 case INDEX_op_nand_i64: 1747 return TCG_TARGET_HAS_nand_i64; 1748 case INDEX_op_nor_i64: 1749 return TCG_TARGET_HAS_nor_i64; 1750 case INDEX_op_clz_i64: 1751 return TCG_TARGET_HAS_clz_i64; 1752 case INDEX_op_ctz_i64: 1753 return TCG_TARGET_HAS_ctz_i64; 1754 case INDEX_op_ctpop_i64: 1755 return TCG_TARGET_HAS_ctpop_i64; 1756 case INDEX_op_add2_i64: 1757 return TCG_TARGET_HAS_add2_i64; 1758 case INDEX_op_sub2_i64: 1759 return TCG_TARGET_HAS_sub2_i64; 1760 case INDEX_op_mulu2_i64: 1761 return TCG_TARGET_HAS_mulu2_i64; 1762 case INDEX_op_muls2_i64: 1763 return TCG_TARGET_HAS_muls2_i64; 1764 case INDEX_op_muluh_i64: 1765 return TCG_TARGET_HAS_muluh_i64; 1766 case INDEX_op_mulsh_i64: 1767 return TCG_TARGET_HAS_mulsh_i64; 1768 1769 case INDEX_op_mov_vec: 1770 case INDEX_op_dup_vec: 1771 case INDEX_op_dupm_vec: 1772 case INDEX_op_ld_vec: 1773 case INDEX_op_st_vec: 1774 case INDEX_op_add_vec: 1775 case INDEX_op_sub_vec: 1776 case INDEX_op_and_vec: 1777 case INDEX_op_or_vec: 1778 case INDEX_op_xor_vec: 1779 case INDEX_op_cmp_vec: 1780 return have_vec; 1781 case INDEX_op_dup2_vec: 1782 return have_vec && TCG_TARGET_REG_BITS == 32; 1783 case INDEX_op_not_vec: 1784 return have_vec && TCG_TARGET_HAS_not_vec; 1785 case INDEX_op_neg_vec: 1786 return have_vec && TCG_TARGET_HAS_neg_vec; 1787 case INDEX_op_abs_vec: 1788 return have_vec && TCG_TARGET_HAS_abs_vec; 1789 case INDEX_op_andc_vec: 1790 return have_vec && TCG_TARGET_HAS_andc_vec; 1791 case INDEX_op_orc_vec: 1792 return have_vec && TCG_TARGET_HAS_orc_vec; 1793 case INDEX_op_mul_vec: 1794 return have_vec && TCG_TARGET_HAS_mul_vec; 1795 case INDEX_op_shli_vec: 1796 case INDEX_op_shri_vec: 1797 case INDEX_op_sari_vec: 1798 return have_vec && TCG_TARGET_HAS_shi_vec; 1799 case INDEX_op_shls_vec: 1800 case INDEX_op_shrs_vec: 1801 case INDEX_op_sars_vec: 1802 return have_vec && TCG_TARGET_HAS_shs_vec; 1803 case INDEX_op_shlv_vec: 1804 case INDEX_op_shrv_vec: 1805 case INDEX_op_sarv_vec: 1806 return have_vec && TCG_TARGET_HAS_shv_vec; 1807 case INDEX_op_rotli_vec: 1808 return have_vec && TCG_TARGET_HAS_roti_vec; 1809 case INDEX_op_rotls_vec: 1810 return have_vec && TCG_TARGET_HAS_rots_vec; 1811 case INDEX_op_rotlv_vec: 1812 case INDEX_op_rotrv_vec: 1813 return have_vec && TCG_TARGET_HAS_rotv_vec; 1814 case INDEX_op_ssadd_vec: 1815 case INDEX_op_usadd_vec: 1816 case INDEX_op_sssub_vec: 1817 case INDEX_op_ussub_vec: 1818 return have_vec && TCG_TARGET_HAS_sat_vec; 1819 case INDEX_op_smin_vec: 1820 case INDEX_op_umin_vec: 1821 case INDEX_op_smax_vec: 1822 case INDEX_op_umax_vec: 1823 return have_vec && TCG_TARGET_HAS_minmax_vec; 1824 case INDEX_op_bitsel_vec: 1825 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1826 case INDEX_op_cmpsel_vec: 1827 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1828 1829 default: 1830 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1831 return true; 1832 } 1833 } 1834 1835 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1836 and endian swap. Maybe it would be better to do the alignment 1837 and endian swap in tcg_reg_alloc_call(). */ 1838 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1839 { 1840 int i, real_args, nb_rets, pi; 1841 unsigned sizemask, flags; 1842 TCGHelperInfo *info; 1843 TCGOp *op; 1844 1845 info = g_hash_table_lookup(helper_table, (gpointer)func); 1846 flags = info->flags; 1847 sizemask = info->sizemask; 1848 1849 #ifdef CONFIG_PLUGIN 1850 /* detect non-plugin helpers */ 1851 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) { 1852 tcg_ctx->plugin_insn->calls_helpers = true; 1853 } 1854 #endif 1855 1856 #if defined(__sparc__) && !defined(__arch64__) \ 1857 && !defined(CONFIG_TCG_INTERPRETER) 1858 /* We have 64-bit values in one register, but need to pass as two 1859 separate parameters. Split them. */ 1860 int orig_sizemask = sizemask; 1861 int orig_nargs = nargs; 1862 TCGv_i64 retl, reth; 1863 TCGTemp *split_args[MAX_OPC_PARAM]; 1864 1865 retl = NULL; 1866 reth = NULL; 1867 if (sizemask != 0) { 1868 for (i = real_args = 0; i < nargs; ++i) { 1869 int is_64bit = sizemask & (1 << (i+1)*2); 1870 if (is_64bit) { 1871 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1872 TCGv_i32 h = tcg_temp_new_i32(); 1873 TCGv_i32 l = tcg_temp_new_i32(); 1874 tcg_gen_extr_i64_i32(l, h, orig); 1875 split_args[real_args++] = tcgv_i32_temp(h); 1876 split_args[real_args++] = tcgv_i32_temp(l); 1877 } else { 1878 split_args[real_args++] = args[i]; 1879 } 1880 } 1881 nargs = real_args; 1882 args = split_args; 1883 sizemask = 0; 1884 } 1885 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1886 for (i = 0; i < nargs; ++i) { 1887 int is_64bit = sizemask & (1 << (i+1)*2); 1888 int is_signed = sizemask & (2 << (i+1)*2); 1889 if (!is_64bit) { 1890 TCGv_i64 temp = tcg_temp_new_i64(); 1891 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1892 if (is_signed) { 1893 tcg_gen_ext32s_i64(temp, orig); 1894 } else { 1895 tcg_gen_ext32u_i64(temp, orig); 1896 } 1897 args[i] = tcgv_i64_temp(temp); 1898 } 1899 } 1900 #endif /* TCG_TARGET_EXTEND_ARGS */ 1901 1902 op = tcg_emit_op(INDEX_op_call); 1903 1904 pi = 0; 1905 if (ret != NULL) { 1906 #if defined(__sparc__) && !defined(__arch64__) \ 1907 && !defined(CONFIG_TCG_INTERPRETER) 1908 if (orig_sizemask & 1) { 1909 /* The 32-bit ABI is going to return the 64-bit value in 1910 the %o0/%o1 register pair. Prepare for this by using 1911 two return temporaries, and reassemble below. */ 1912 retl = tcg_temp_new_i64(); 1913 reth = tcg_temp_new_i64(); 1914 op->args[pi++] = tcgv_i64_arg(reth); 1915 op->args[pi++] = tcgv_i64_arg(retl); 1916 nb_rets = 2; 1917 } else { 1918 op->args[pi++] = temp_arg(ret); 1919 nb_rets = 1; 1920 } 1921 #else 1922 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 1923 #ifdef HOST_WORDS_BIGENDIAN 1924 op->args[pi++] = temp_arg(ret + 1); 1925 op->args[pi++] = temp_arg(ret); 1926 #else 1927 op->args[pi++] = temp_arg(ret); 1928 op->args[pi++] = temp_arg(ret + 1); 1929 #endif 1930 nb_rets = 2; 1931 } else { 1932 op->args[pi++] = temp_arg(ret); 1933 nb_rets = 1; 1934 } 1935 #endif 1936 } else { 1937 nb_rets = 0; 1938 } 1939 TCGOP_CALLO(op) = nb_rets; 1940 1941 real_args = 0; 1942 for (i = 0; i < nargs; i++) { 1943 int is_64bit = sizemask & (1 << (i+1)*2); 1944 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 1945 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 1946 /* some targets want aligned 64 bit args */ 1947 if (real_args & 1) { 1948 op->args[pi++] = TCG_CALL_DUMMY_ARG; 1949 real_args++; 1950 } 1951 #endif 1952 /* If stack grows up, then we will be placing successive 1953 arguments at lower addresses, which means we need to 1954 reverse the order compared to how we would normally 1955 treat either big or little-endian. For those arguments 1956 that will wind up in registers, this still works for 1957 HPPA (the only current STACK_GROWSUP target) since the 1958 argument registers are *also* allocated in decreasing 1959 order. If another such target is added, this logic may 1960 have to get more complicated to differentiate between 1961 stack arguments and register arguments. */ 1962 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 1963 op->args[pi++] = temp_arg(args[i] + 1); 1964 op->args[pi++] = temp_arg(args[i]); 1965 #else 1966 op->args[pi++] = temp_arg(args[i]); 1967 op->args[pi++] = temp_arg(args[i] + 1); 1968 #endif 1969 real_args += 2; 1970 continue; 1971 } 1972 1973 op->args[pi++] = temp_arg(args[i]); 1974 real_args++; 1975 } 1976 op->args[pi++] = (uintptr_t)func; 1977 op->args[pi++] = flags; 1978 TCGOP_CALLI(op) = real_args; 1979 1980 /* Make sure the fields didn't overflow. */ 1981 tcg_debug_assert(TCGOP_CALLI(op) == real_args); 1982 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 1983 1984 #if defined(__sparc__) && !defined(__arch64__) \ 1985 && !defined(CONFIG_TCG_INTERPRETER) 1986 /* Free all of the parts we allocated above. */ 1987 for (i = real_args = 0; i < orig_nargs; ++i) { 1988 int is_64bit = orig_sizemask & (1 << (i+1)*2); 1989 if (is_64bit) { 1990 tcg_temp_free_internal(args[real_args++]); 1991 tcg_temp_free_internal(args[real_args++]); 1992 } else { 1993 real_args++; 1994 } 1995 } 1996 if (orig_sizemask & 1) { 1997 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 1998 Note that describing these as TCGv_i64 eliminates an unnecessary 1999 zero-extension that tcg_gen_concat_i32_i64 would create. */ 2000 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); 2001 tcg_temp_free_i64(retl); 2002 tcg_temp_free_i64(reth); 2003 } 2004 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 2005 for (i = 0; i < nargs; ++i) { 2006 int is_64bit = sizemask & (1 << (i+1)*2); 2007 if (!is_64bit) { 2008 tcg_temp_free_internal(args[i]); 2009 } 2010 } 2011 #endif /* TCG_TARGET_EXTEND_ARGS */ 2012 } 2013 2014 static void tcg_reg_alloc_start(TCGContext *s) 2015 { 2016 int i, n; 2017 2018 for (i = 0, n = s->nb_temps; i < n; i++) { 2019 TCGTemp *ts = &s->temps[i]; 2020 TCGTempVal val = TEMP_VAL_MEM; 2021 2022 switch (ts->kind) { 2023 case TEMP_CONST: 2024 val = TEMP_VAL_CONST; 2025 break; 2026 case TEMP_FIXED: 2027 val = TEMP_VAL_REG; 2028 break; 2029 case TEMP_GLOBAL: 2030 break; 2031 case TEMP_NORMAL: 2032 val = TEMP_VAL_DEAD; 2033 /* fall through */ 2034 case TEMP_LOCAL: 2035 ts->mem_allocated = 0; 2036 break; 2037 default: 2038 g_assert_not_reached(); 2039 } 2040 ts->val_type = val; 2041 } 2042 2043 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2044 } 2045 2046 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2047 TCGTemp *ts) 2048 { 2049 int idx = temp_idx(ts); 2050 2051 switch (ts->kind) { 2052 case TEMP_FIXED: 2053 case TEMP_GLOBAL: 2054 pstrcpy(buf, buf_size, ts->name); 2055 break; 2056 case TEMP_LOCAL: 2057 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2058 break; 2059 case TEMP_NORMAL: 2060 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2061 break; 2062 case TEMP_CONST: 2063 switch (ts->type) { 2064 case TCG_TYPE_I32: 2065 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2066 break; 2067 #if TCG_TARGET_REG_BITS > 32 2068 case TCG_TYPE_I64: 2069 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2070 break; 2071 #endif 2072 case TCG_TYPE_V64: 2073 case TCG_TYPE_V128: 2074 case TCG_TYPE_V256: 2075 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2076 64 << (ts->type - TCG_TYPE_V64), ts->val); 2077 break; 2078 default: 2079 g_assert_not_reached(); 2080 } 2081 break; 2082 } 2083 return buf; 2084 } 2085 2086 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2087 int buf_size, TCGArg arg) 2088 { 2089 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2090 } 2091 2092 /* Find helper name. */ 2093 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 2094 { 2095 const char *ret = NULL; 2096 if (helper_table) { 2097 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); 2098 if (info) { 2099 ret = info->name; 2100 } 2101 } 2102 return ret; 2103 } 2104 2105 static const char * const cond_name[] = 2106 { 2107 [TCG_COND_NEVER] = "never", 2108 [TCG_COND_ALWAYS] = "always", 2109 [TCG_COND_EQ] = "eq", 2110 [TCG_COND_NE] = "ne", 2111 [TCG_COND_LT] = "lt", 2112 [TCG_COND_GE] = "ge", 2113 [TCG_COND_LE] = "le", 2114 [TCG_COND_GT] = "gt", 2115 [TCG_COND_LTU] = "ltu", 2116 [TCG_COND_GEU] = "geu", 2117 [TCG_COND_LEU] = "leu", 2118 [TCG_COND_GTU] = "gtu" 2119 }; 2120 2121 static const char * const ldst_name[] = 2122 { 2123 [MO_UB] = "ub", 2124 [MO_SB] = "sb", 2125 [MO_LEUW] = "leuw", 2126 [MO_LESW] = "lesw", 2127 [MO_LEUL] = "leul", 2128 [MO_LESL] = "lesl", 2129 [MO_LEQ] = "leq", 2130 [MO_BEUW] = "beuw", 2131 [MO_BESW] = "besw", 2132 [MO_BEUL] = "beul", 2133 [MO_BESL] = "besl", 2134 [MO_BEQ] = "beq", 2135 }; 2136 2137 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2138 #ifdef TARGET_ALIGNED_ONLY 2139 [MO_UNALN >> MO_ASHIFT] = "un+", 2140 [MO_ALIGN >> MO_ASHIFT] = "", 2141 #else 2142 [MO_UNALN >> MO_ASHIFT] = "", 2143 [MO_ALIGN >> MO_ASHIFT] = "al+", 2144 #endif 2145 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2146 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2147 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2148 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2149 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2150 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2151 }; 2152 2153 static inline bool tcg_regset_single(TCGRegSet d) 2154 { 2155 return (d & (d - 1)) == 0; 2156 } 2157 2158 static inline TCGReg tcg_regset_first(TCGRegSet d) 2159 { 2160 if (TCG_TARGET_NB_REGS <= 32) { 2161 return ctz32(d); 2162 } else { 2163 return ctz64(d); 2164 } 2165 } 2166 2167 static void tcg_dump_ops(TCGContext *s, bool have_prefs) 2168 { 2169 char buf[128]; 2170 TCGOp *op; 2171 2172 QTAILQ_FOREACH(op, &s->ops, link) { 2173 int i, k, nb_oargs, nb_iargs, nb_cargs; 2174 const TCGOpDef *def; 2175 TCGOpcode c; 2176 int col = 0; 2177 2178 c = op->opc; 2179 def = &tcg_op_defs[c]; 2180 2181 if (c == INDEX_op_insn_start) { 2182 nb_oargs = 0; 2183 col += qemu_log("\n ----"); 2184 2185 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2186 target_ulong a; 2187 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2188 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 2189 #else 2190 a = op->args[i]; 2191 #endif 2192 col += qemu_log(" " TARGET_FMT_lx, a); 2193 } 2194 } else if (c == INDEX_op_call) { 2195 /* variable number of arguments */ 2196 nb_oargs = TCGOP_CALLO(op); 2197 nb_iargs = TCGOP_CALLI(op); 2198 nb_cargs = def->nb_cargs; 2199 2200 /* function name, flags, out args */ 2201 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 2202 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), 2203 op->args[nb_oargs + nb_iargs + 1], nb_oargs); 2204 for (i = 0; i < nb_oargs; i++) { 2205 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2206 op->args[i])); 2207 } 2208 for (i = 0; i < nb_iargs; i++) { 2209 TCGArg arg = op->args[nb_oargs + i]; 2210 const char *t = "<dummy>"; 2211 if (arg != TCG_CALL_DUMMY_ARG) { 2212 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2213 } 2214 col += qemu_log(",%s", t); 2215 } 2216 } else { 2217 col += qemu_log(" %s ", def->name); 2218 2219 nb_oargs = def->nb_oargs; 2220 nb_iargs = def->nb_iargs; 2221 nb_cargs = def->nb_cargs; 2222 2223 if (def->flags & TCG_OPF_VECTOR) { 2224 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op), 2225 8 << TCGOP_VECE(op)); 2226 } 2227 2228 k = 0; 2229 for (i = 0; i < nb_oargs; i++) { 2230 if (k != 0) { 2231 col += qemu_log(","); 2232 } 2233 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 2234 op->args[k++])); 2235 } 2236 for (i = 0; i < nb_iargs; i++) { 2237 if (k != 0) { 2238 col += qemu_log(","); 2239 } 2240 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 2241 op->args[k++])); 2242 } 2243 switch (c) { 2244 case INDEX_op_brcond_i32: 2245 case INDEX_op_setcond_i32: 2246 case INDEX_op_movcond_i32: 2247 case INDEX_op_brcond2_i32: 2248 case INDEX_op_setcond2_i32: 2249 case INDEX_op_brcond_i64: 2250 case INDEX_op_setcond_i64: 2251 case INDEX_op_movcond_i64: 2252 case INDEX_op_cmp_vec: 2253 case INDEX_op_cmpsel_vec: 2254 if (op->args[k] < ARRAY_SIZE(cond_name) 2255 && cond_name[op->args[k]]) { 2256 col += qemu_log(",%s", cond_name[op->args[k++]]); 2257 } else { 2258 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]); 2259 } 2260 i = 1; 2261 break; 2262 case INDEX_op_qemu_ld_i32: 2263 case INDEX_op_qemu_st_i32: 2264 case INDEX_op_qemu_st8_i32: 2265 case INDEX_op_qemu_ld_i64: 2266 case INDEX_op_qemu_st_i64: 2267 { 2268 TCGMemOpIdx oi = op->args[k++]; 2269 MemOp op = get_memop(oi); 2270 unsigned ix = get_mmuidx(oi); 2271 2272 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2273 col += qemu_log(",$0x%x,%u", op, ix); 2274 } else { 2275 const char *s_al, *s_op; 2276 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2277 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2278 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 2279 } 2280 i = 1; 2281 } 2282 break; 2283 default: 2284 i = 0; 2285 break; 2286 } 2287 switch (c) { 2288 case INDEX_op_set_label: 2289 case INDEX_op_br: 2290 case INDEX_op_brcond_i32: 2291 case INDEX_op_brcond_i64: 2292 case INDEX_op_brcond2_i32: 2293 col += qemu_log("%s$L%d", k ? "," : "", 2294 arg_label(op->args[k])->id); 2295 i++, k++; 2296 break; 2297 default: 2298 break; 2299 } 2300 for (; i < nb_cargs; i++, k++) { 2301 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); 2302 } 2303 } 2304 2305 if (have_prefs || op->life) { 2306 2307 QemuLogFile *logfile; 2308 2309 rcu_read_lock(); 2310 logfile = qatomic_rcu_read(&qemu_logfile); 2311 if (logfile) { 2312 for (; col < 40; ++col) { 2313 putc(' ', logfile->fd); 2314 } 2315 } 2316 rcu_read_unlock(); 2317 } 2318 2319 if (op->life) { 2320 unsigned life = op->life; 2321 2322 if (life & (SYNC_ARG * 3)) { 2323 qemu_log(" sync:"); 2324 for (i = 0; i < 2; ++i) { 2325 if (life & (SYNC_ARG << i)) { 2326 qemu_log(" %d", i); 2327 } 2328 } 2329 } 2330 life /= DEAD_ARG; 2331 if (life) { 2332 qemu_log(" dead:"); 2333 for (i = 0; life; ++i, life >>= 1) { 2334 if (life & 1) { 2335 qemu_log(" %d", i); 2336 } 2337 } 2338 } 2339 } 2340 2341 if (have_prefs) { 2342 for (i = 0; i < nb_oargs; ++i) { 2343 TCGRegSet set = op->output_pref[i]; 2344 2345 if (i == 0) { 2346 qemu_log(" pref="); 2347 } else { 2348 qemu_log(","); 2349 } 2350 if (set == 0) { 2351 qemu_log("none"); 2352 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2353 qemu_log("all"); 2354 #ifdef CONFIG_DEBUG_TCG 2355 } else if (tcg_regset_single(set)) { 2356 TCGReg reg = tcg_regset_first(set); 2357 qemu_log("%s", tcg_target_reg_names[reg]); 2358 #endif 2359 } else if (TCG_TARGET_NB_REGS <= 32) { 2360 qemu_log("%#x", (uint32_t)set); 2361 } else { 2362 qemu_log("%#" PRIx64, (uint64_t)set); 2363 } 2364 } 2365 } 2366 2367 qemu_log("\n"); 2368 } 2369 } 2370 2371 /* we give more priority to constraints with less registers */ 2372 static int get_constraint_priority(const TCGOpDef *def, int k) 2373 { 2374 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2375 int n; 2376 2377 if (arg_ct->oalias) { 2378 /* an alias is equivalent to a single register */ 2379 n = 1; 2380 } else { 2381 n = ctpop64(arg_ct->regs); 2382 } 2383 return TCG_TARGET_NB_REGS - n + 1; 2384 } 2385 2386 /* sort from highest priority to lowest */ 2387 static void sort_constraints(TCGOpDef *def, int start, int n) 2388 { 2389 int i, j; 2390 TCGArgConstraint *a = def->args_ct; 2391 2392 for (i = 0; i < n; i++) { 2393 a[start + i].sort_index = start + i; 2394 } 2395 if (n <= 1) { 2396 return; 2397 } 2398 for (i = 0; i < n - 1; i++) { 2399 for (j = i + 1; j < n; j++) { 2400 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2401 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2402 if (p1 < p2) { 2403 int tmp = a[start + i].sort_index; 2404 a[start + i].sort_index = a[start + j].sort_index; 2405 a[start + j].sort_index = tmp; 2406 } 2407 } 2408 } 2409 } 2410 2411 static void process_op_defs(TCGContext *s) 2412 { 2413 TCGOpcode op; 2414 2415 for (op = 0; op < NB_OPS; op++) { 2416 TCGOpDef *def = &tcg_op_defs[op]; 2417 const TCGTargetOpDef *tdefs; 2418 TCGType type; 2419 int i, nb_args; 2420 2421 if (def->flags & TCG_OPF_NOT_PRESENT) { 2422 continue; 2423 } 2424 2425 nb_args = def->nb_iargs + def->nb_oargs; 2426 if (nb_args == 0) { 2427 continue; 2428 } 2429 2430 tdefs = tcg_target_op_def(op); 2431 /* Missing TCGTargetOpDef entry. */ 2432 tcg_debug_assert(tdefs != NULL); 2433 2434 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32); 2435 for (i = 0; i < nb_args; i++) { 2436 const char *ct_str = tdefs->args_ct_str[i]; 2437 /* Incomplete TCGTargetOpDef entry. */ 2438 tcg_debug_assert(ct_str != NULL); 2439 2440 while (*ct_str != '\0') { 2441 switch(*ct_str) { 2442 case '0' ... '9': 2443 { 2444 int oarg = *ct_str - '0'; 2445 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 2446 tcg_debug_assert(oarg < def->nb_oargs); 2447 tcg_debug_assert(def->args_ct[oarg].regs != 0); 2448 def->args_ct[i] = def->args_ct[oarg]; 2449 /* The output sets oalias. */ 2450 def->args_ct[oarg].oalias = true; 2451 def->args_ct[oarg].alias_index = i; 2452 /* The input sets ialias. */ 2453 def->args_ct[i].ialias = true; 2454 def->args_ct[i].alias_index = oarg; 2455 } 2456 ct_str++; 2457 break; 2458 case '&': 2459 def->args_ct[i].newreg = true; 2460 ct_str++; 2461 break; 2462 case 'i': 2463 def->args_ct[i].ct |= TCG_CT_CONST; 2464 ct_str++; 2465 break; 2466 default: 2467 ct_str = target_parse_constraint(&def->args_ct[i], 2468 ct_str, type); 2469 /* Typo in TCGTargetOpDef constraint. */ 2470 tcg_debug_assert(ct_str != NULL); 2471 } 2472 } 2473 } 2474 2475 /* TCGTargetOpDef entry with too much information? */ 2476 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2477 2478 /* sort the constraints (XXX: this is just an heuristic) */ 2479 sort_constraints(def, 0, def->nb_oargs); 2480 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2481 } 2482 } 2483 2484 void tcg_op_remove(TCGContext *s, TCGOp *op) 2485 { 2486 TCGLabel *label; 2487 2488 switch (op->opc) { 2489 case INDEX_op_br: 2490 label = arg_label(op->args[0]); 2491 label->refs--; 2492 break; 2493 case INDEX_op_brcond_i32: 2494 case INDEX_op_brcond_i64: 2495 label = arg_label(op->args[3]); 2496 label->refs--; 2497 break; 2498 case INDEX_op_brcond2_i32: 2499 label = arg_label(op->args[5]); 2500 label->refs--; 2501 break; 2502 default: 2503 break; 2504 } 2505 2506 QTAILQ_REMOVE(&s->ops, op, link); 2507 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2508 s->nb_ops--; 2509 2510 #ifdef CONFIG_PROFILER 2511 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2512 #endif 2513 } 2514 2515 static TCGOp *tcg_op_alloc(TCGOpcode opc) 2516 { 2517 TCGContext *s = tcg_ctx; 2518 TCGOp *op; 2519 2520 if (likely(QTAILQ_EMPTY(&s->free_ops))) { 2521 op = tcg_malloc(sizeof(TCGOp)); 2522 } else { 2523 op = QTAILQ_FIRST(&s->free_ops); 2524 QTAILQ_REMOVE(&s->free_ops, op, link); 2525 } 2526 memset(op, 0, offsetof(TCGOp, link)); 2527 op->opc = opc; 2528 s->nb_ops++; 2529 2530 return op; 2531 } 2532 2533 TCGOp *tcg_emit_op(TCGOpcode opc) 2534 { 2535 TCGOp *op = tcg_op_alloc(opc); 2536 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2537 return op; 2538 } 2539 2540 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2541 { 2542 TCGOp *new_op = tcg_op_alloc(opc); 2543 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2544 return new_op; 2545 } 2546 2547 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2548 { 2549 TCGOp *new_op = tcg_op_alloc(opc); 2550 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2551 return new_op; 2552 } 2553 2554 /* Reachable analysis : remove unreachable code. */ 2555 static void reachable_code_pass(TCGContext *s) 2556 { 2557 TCGOp *op, *op_next; 2558 bool dead = false; 2559 2560 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2561 bool remove = dead; 2562 TCGLabel *label; 2563 int call_flags; 2564 2565 switch (op->opc) { 2566 case INDEX_op_set_label: 2567 label = arg_label(op->args[0]); 2568 if (label->refs == 0) { 2569 /* 2570 * While there is an occasional backward branch, virtually 2571 * all branches generated by the translators are forward. 2572 * Which means that generally we will have already removed 2573 * all references to the label that will be, and there is 2574 * little to be gained by iterating. 2575 */ 2576 remove = true; 2577 } else { 2578 /* Once we see a label, insns become live again. */ 2579 dead = false; 2580 remove = false; 2581 2582 /* 2583 * Optimization can fold conditional branches to unconditional. 2584 * If we find a label with one reference which is preceded by 2585 * an unconditional branch to it, remove both. This needed to 2586 * wait until the dead code in between them was removed. 2587 */ 2588 if (label->refs == 1) { 2589 TCGOp *op_prev = QTAILQ_PREV(op, link); 2590 if (op_prev->opc == INDEX_op_br && 2591 label == arg_label(op_prev->args[0])) { 2592 tcg_op_remove(s, op_prev); 2593 remove = true; 2594 } 2595 } 2596 } 2597 break; 2598 2599 case INDEX_op_br: 2600 case INDEX_op_exit_tb: 2601 case INDEX_op_goto_ptr: 2602 /* Unconditional branches; everything following is dead. */ 2603 dead = true; 2604 break; 2605 2606 case INDEX_op_call: 2607 /* Notice noreturn helper calls, raising exceptions. */ 2608 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; 2609 if (call_flags & TCG_CALL_NO_RETURN) { 2610 dead = true; 2611 } 2612 break; 2613 2614 case INDEX_op_insn_start: 2615 /* Never remove -- we need to keep these for unwind. */ 2616 remove = false; 2617 break; 2618 2619 default: 2620 break; 2621 } 2622 2623 if (remove) { 2624 tcg_op_remove(s, op); 2625 } 2626 } 2627 } 2628 2629 #define TS_DEAD 1 2630 #define TS_MEM 2 2631 2632 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2633 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2634 2635 /* For liveness_pass_1, the register preferences for a given temp. */ 2636 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2637 { 2638 return ts->state_ptr; 2639 } 2640 2641 /* For liveness_pass_1, reset the preferences for a given temp to the 2642 * maximal regset for its type. 2643 */ 2644 static inline void la_reset_pref(TCGTemp *ts) 2645 { 2646 *la_temp_pref(ts) 2647 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2648 } 2649 2650 /* liveness analysis: end of function: all temps are dead, and globals 2651 should be in memory. */ 2652 static void la_func_end(TCGContext *s, int ng, int nt) 2653 { 2654 int i; 2655 2656 for (i = 0; i < ng; ++i) { 2657 s->temps[i].state = TS_DEAD | TS_MEM; 2658 la_reset_pref(&s->temps[i]); 2659 } 2660 for (i = ng; i < nt; ++i) { 2661 s->temps[i].state = TS_DEAD; 2662 la_reset_pref(&s->temps[i]); 2663 } 2664 } 2665 2666 /* liveness analysis: end of basic block: all temps are dead, globals 2667 and local temps should be in memory. */ 2668 static void la_bb_end(TCGContext *s, int ng, int nt) 2669 { 2670 int i; 2671 2672 for (i = 0; i < nt; ++i) { 2673 TCGTemp *ts = &s->temps[i]; 2674 int state; 2675 2676 switch (ts->kind) { 2677 case TEMP_FIXED: 2678 case TEMP_GLOBAL: 2679 case TEMP_LOCAL: 2680 state = TS_DEAD | TS_MEM; 2681 break; 2682 case TEMP_NORMAL: 2683 case TEMP_CONST: 2684 state = TS_DEAD; 2685 break; 2686 default: 2687 g_assert_not_reached(); 2688 } 2689 ts->state = state; 2690 la_reset_pref(ts); 2691 } 2692 } 2693 2694 /* liveness analysis: sync globals back to memory. */ 2695 static void la_global_sync(TCGContext *s, int ng) 2696 { 2697 int i; 2698 2699 for (i = 0; i < ng; ++i) { 2700 int state = s->temps[i].state; 2701 s->temps[i].state = state | TS_MEM; 2702 if (state == TS_DEAD) { 2703 /* If the global was previously dead, reset prefs. */ 2704 la_reset_pref(&s->temps[i]); 2705 } 2706 } 2707 } 2708 2709 /* 2710 * liveness analysis: conditional branch: all temps are dead, 2711 * globals and local temps should be synced. 2712 */ 2713 static void la_bb_sync(TCGContext *s, int ng, int nt) 2714 { 2715 la_global_sync(s, ng); 2716 2717 for (int i = ng; i < nt; ++i) { 2718 TCGTemp *ts = &s->temps[i]; 2719 int state; 2720 2721 switch (ts->kind) { 2722 case TEMP_LOCAL: 2723 state = ts->state; 2724 ts->state = state | TS_MEM; 2725 if (state != TS_DEAD) { 2726 continue; 2727 } 2728 break; 2729 case TEMP_NORMAL: 2730 s->temps[i].state = TS_DEAD; 2731 break; 2732 case TEMP_CONST: 2733 continue; 2734 default: 2735 g_assert_not_reached(); 2736 } 2737 la_reset_pref(&s->temps[i]); 2738 } 2739 } 2740 2741 /* liveness analysis: sync globals back to memory and kill. */ 2742 static void la_global_kill(TCGContext *s, int ng) 2743 { 2744 int i; 2745 2746 for (i = 0; i < ng; i++) { 2747 s->temps[i].state = TS_DEAD | TS_MEM; 2748 la_reset_pref(&s->temps[i]); 2749 } 2750 } 2751 2752 /* liveness analysis: note live globals crossing calls. */ 2753 static void la_cross_call(TCGContext *s, int nt) 2754 { 2755 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2756 int i; 2757 2758 for (i = 0; i < nt; i++) { 2759 TCGTemp *ts = &s->temps[i]; 2760 if (!(ts->state & TS_DEAD)) { 2761 TCGRegSet *pset = la_temp_pref(ts); 2762 TCGRegSet set = *pset; 2763 2764 set &= mask; 2765 /* If the combination is not possible, restart. */ 2766 if (set == 0) { 2767 set = tcg_target_available_regs[ts->type] & mask; 2768 } 2769 *pset = set; 2770 } 2771 } 2772 } 2773 2774 /* Liveness analysis : update the opc_arg_life array to tell if a 2775 given input arguments is dead. Instructions updating dead 2776 temporaries are removed. */ 2777 static void liveness_pass_1(TCGContext *s) 2778 { 2779 int nb_globals = s->nb_globals; 2780 int nb_temps = s->nb_temps; 2781 TCGOp *op, *op_prev; 2782 TCGRegSet *prefs; 2783 int i; 2784 2785 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2786 for (i = 0; i < nb_temps; ++i) { 2787 s->temps[i].state_ptr = prefs + i; 2788 } 2789 2790 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2791 la_func_end(s, nb_globals, nb_temps); 2792 2793 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2794 int nb_iargs, nb_oargs; 2795 TCGOpcode opc_new, opc_new2; 2796 bool have_opc_new2; 2797 TCGLifeData arg_life = 0; 2798 TCGTemp *ts; 2799 TCGOpcode opc = op->opc; 2800 const TCGOpDef *def = &tcg_op_defs[opc]; 2801 2802 switch (opc) { 2803 case INDEX_op_call: 2804 { 2805 int call_flags; 2806 int nb_call_regs; 2807 2808 nb_oargs = TCGOP_CALLO(op); 2809 nb_iargs = TCGOP_CALLI(op); 2810 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2811 2812 /* pure functions can be removed if their result is unused */ 2813 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2814 for (i = 0; i < nb_oargs; i++) { 2815 ts = arg_temp(op->args[i]); 2816 if (ts->state != TS_DEAD) { 2817 goto do_not_remove_call; 2818 } 2819 } 2820 goto do_remove; 2821 } 2822 do_not_remove_call: 2823 2824 /* Output args are dead. */ 2825 for (i = 0; i < nb_oargs; i++) { 2826 ts = arg_temp(op->args[i]); 2827 if (ts->state & TS_DEAD) { 2828 arg_life |= DEAD_ARG << i; 2829 } 2830 if (ts->state & TS_MEM) { 2831 arg_life |= SYNC_ARG << i; 2832 } 2833 ts->state = TS_DEAD; 2834 la_reset_pref(ts); 2835 2836 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ 2837 op->output_pref[i] = 0; 2838 } 2839 2840 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2841 TCG_CALL_NO_READ_GLOBALS))) { 2842 la_global_kill(s, nb_globals); 2843 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2844 la_global_sync(s, nb_globals); 2845 } 2846 2847 /* Record arguments that die in this helper. */ 2848 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2849 ts = arg_temp(op->args[i]); 2850 if (ts && ts->state & TS_DEAD) { 2851 arg_life |= DEAD_ARG << i; 2852 } 2853 } 2854 2855 /* For all live registers, remove call-clobbered prefs. */ 2856 la_cross_call(s, nb_temps); 2857 2858 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2859 2860 /* Input arguments are live for preceding opcodes. */ 2861 for (i = 0; i < nb_iargs; i++) { 2862 ts = arg_temp(op->args[i + nb_oargs]); 2863 if (ts && ts->state & TS_DEAD) { 2864 /* For those arguments that die, and will be allocated 2865 * in registers, clear the register set for that arg, 2866 * to be filled in below. For args that will be on 2867 * the stack, reset to any available reg. 2868 */ 2869 *la_temp_pref(ts) 2870 = (i < nb_call_regs ? 0 : 2871 tcg_target_available_regs[ts->type]); 2872 ts->state &= ~TS_DEAD; 2873 } 2874 } 2875 2876 /* For each input argument, add its input register to prefs. 2877 If a temp is used once, this produces a single set bit. */ 2878 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { 2879 ts = arg_temp(op->args[i + nb_oargs]); 2880 if (ts) { 2881 tcg_regset_set_reg(*la_temp_pref(ts), 2882 tcg_target_call_iarg_regs[i]); 2883 } 2884 } 2885 } 2886 break; 2887 case INDEX_op_insn_start: 2888 break; 2889 case INDEX_op_discard: 2890 /* mark the temporary as dead */ 2891 ts = arg_temp(op->args[0]); 2892 ts->state = TS_DEAD; 2893 la_reset_pref(ts); 2894 break; 2895 2896 case INDEX_op_add2_i32: 2897 opc_new = INDEX_op_add_i32; 2898 goto do_addsub2; 2899 case INDEX_op_sub2_i32: 2900 opc_new = INDEX_op_sub_i32; 2901 goto do_addsub2; 2902 case INDEX_op_add2_i64: 2903 opc_new = INDEX_op_add_i64; 2904 goto do_addsub2; 2905 case INDEX_op_sub2_i64: 2906 opc_new = INDEX_op_sub_i64; 2907 do_addsub2: 2908 nb_iargs = 4; 2909 nb_oargs = 2; 2910 /* Test if the high part of the operation is dead, but not 2911 the low part. The result can be optimized to a simple 2912 add or sub. This happens often for x86_64 guest when the 2913 cpu mode is set to 32 bit. */ 2914 if (arg_temp(op->args[1])->state == TS_DEAD) { 2915 if (arg_temp(op->args[0])->state == TS_DEAD) { 2916 goto do_remove; 2917 } 2918 /* Replace the opcode and adjust the args in place, 2919 leaving 3 unused args at the end. */ 2920 op->opc = opc = opc_new; 2921 op->args[1] = op->args[2]; 2922 op->args[2] = op->args[4]; 2923 /* Fall through and mark the single-word operation live. */ 2924 nb_iargs = 2; 2925 nb_oargs = 1; 2926 } 2927 goto do_not_remove; 2928 2929 case INDEX_op_mulu2_i32: 2930 opc_new = INDEX_op_mul_i32; 2931 opc_new2 = INDEX_op_muluh_i32; 2932 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 2933 goto do_mul2; 2934 case INDEX_op_muls2_i32: 2935 opc_new = INDEX_op_mul_i32; 2936 opc_new2 = INDEX_op_mulsh_i32; 2937 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 2938 goto do_mul2; 2939 case INDEX_op_mulu2_i64: 2940 opc_new = INDEX_op_mul_i64; 2941 opc_new2 = INDEX_op_muluh_i64; 2942 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 2943 goto do_mul2; 2944 case INDEX_op_muls2_i64: 2945 opc_new = INDEX_op_mul_i64; 2946 opc_new2 = INDEX_op_mulsh_i64; 2947 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 2948 goto do_mul2; 2949 do_mul2: 2950 nb_iargs = 2; 2951 nb_oargs = 2; 2952 if (arg_temp(op->args[1])->state == TS_DEAD) { 2953 if (arg_temp(op->args[0])->state == TS_DEAD) { 2954 /* Both parts of the operation are dead. */ 2955 goto do_remove; 2956 } 2957 /* The high part of the operation is dead; generate the low. */ 2958 op->opc = opc = opc_new; 2959 op->args[1] = op->args[2]; 2960 op->args[2] = op->args[3]; 2961 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 2962 /* The low part of the operation is dead; generate the high. */ 2963 op->opc = opc = opc_new2; 2964 op->args[0] = op->args[1]; 2965 op->args[1] = op->args[2]; 2966 op->args[2] = op->args[3]; 2967 } else { 2968 goto do_not_remove; 2969 } 2970 /* Mark the single-word operation live. */ 2971 nb_oargs = 1; 2972 goto do_not_remove; 2973 2974 default: 2975 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 2976 nb_iargs = def->nb_iargs; 2977 nb_oargs = def->nb_oargs; 2978 2979 /* Test if the operation can be removed because all 2980 its outputs are dead. We assume that nb_oargs == 0 2981 implies side effects */ 2982 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 2983 for (i = 0; i < nb_oargs; i++) { 2984 if (arg_temp(op->args[i])->state != TS_DEAD) { 2985 goto do_not_remove; 2986 } 2987 } 2988 goto do_remove; 2989 } 2990 goto do_not_remove; 2991 2992 do_remove: 2993 tcg_op_remove(s, op); 2994 break; 2995 2996 do_not_remove: 2997 for (i = 0; i < nb_oargs; i++) { 2998 ts = arg_temp(op->args[i]); 2999 3000 /* Remember the preference of the uses that followed. */ 3001 op->output_pref[i] = *la_temp_pref(ts); 3002 3003 /* Output args are dead. */ 3004 if (ts->state & TS_DEAD) { 3005 arg_life |= DEAD_ARG << i; 3006 } 3007 if (ts->state & TS_MEM) { 3008 arg_life |= SYNC_ARG << i; 3009 } 3010 ts->state = TS_DEAD; 3011 la_reset_pref(ts); 3012 } 3013 3014 /* If end of basic block, update. */ 3015 if (def->flags & TCG_OPF_BB_EXIT) { 3016 la_func_end(s, nb_globals, nb_temps); 3017 } else if (def->flags & TCG_OPF_COND_BRANCH) { 3018 la_bb_sync(s, nb_globals, nb_temps); 3019 } else if (def->flags & TCG_OPF_BB_END) { 3020 la_bb_end(s, nb_globals, nb_temps); 3021 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3022 la_global_sync(s, nb_globals); 3023 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3024 la_cross_call(s, nb_temps); 3025 } 3026 } 3027 3028 /* Record arguments that die in this opcode. */ 3029 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3030 ts = arg_temp(op->args[i]); 3031 if (ts->state & TS_DEAD) { 3032 arg_life |= DEAD_ARG << i; 3033 } 3034 } 3035 3036 /* Input arguments are live for preceding opcodes. */ 3037 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3038 ts = arg_temp(op->args[i]); 3039 if (ts->state & TS_DEAD) { 3040 /* For operands that were dead, initially allow 3041 all regs for the type. */ 3042 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 3043 ts->state &= ~TS_DEAD; 3044 } 3045 } 3046 3047 /* Incorporate constraints for this operand. */ 3048 switch (opc) { 3049 case INDEX_op_mov_i32: 3050 case INDEX_op_mov_i64: 3051 /* Note that these are TCG_OPF_NOT_PRESENT and do not 3052 have proper constraints. That said, special case 3053 moves to propagate preferences backward. */ 3054 if (IS_DEAD_ARG(1)) { 3055 *la_temp_pref(arg_temp(op->args[0])) 3056 = *la_temp_pref(arg_temp(op->args[1])); 3057 } 3058 break; 3059 3060 default: 3061 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3062 const TCGArgConstraint *ct = &def->args_ct[i]; 3063 TCGRegSet set, *pset; 3064 3065 ts = arg_temp(op->args[i]); 3066 pset = la_temp_pref(ts); 3067 set = *pset; 3068 3069 set &= ct->regs; 3070 if (ct->ialias) { 3071 set &= op->output_pref[ct->alias_index]; 3072 } 3073 /* If the combination is not possible, restart. */ 3074 if (set == 0) { 3075 set = ct->regs; 3076 } 3077 *pset = set; 3078 } 3079 break; 3080 } 3081 break; 3082 } 3083 op->life = arg_life; 3084 } 3085 } 3086 3087 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 3088 static bool liveness_pass_2(TCGContext *s) 3089 { 3090 int nb_globals = s->nb_globals; 3091 int nb_temps, i; 3092 bool changes = false; 3093 TCGOp *op, *op_next; 3094 3095 /* Create a temporary for each indirect global. */ 3096 for (i = 0; i < nb_globals; ++i) { 3097 TCGTemp *its = &s->temps[i]; 3098 if (its->indirect_reg) { 3099 TCGTemp *dts = tcg_temp_alloc(s); 3100 dts->type = its->type; 3101 dts->base_type = its->base_type; 3102 its->state_ptr = dts; 3103 } else { 3104 its->state_ptr = NULL; 3105 } 3106 /* All globals begin dead. */ 3107 its->state = TS_DEAD; 3108 } 3109 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 3110 TCGTemp *its = &s->temps[i]; 3111 its->state_ptr = NULL; 3112 its->state = TS_DEAD; 3113 } 3114 3115 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3116 TCGOpcode opc = op->opc; 3117 const TCGOpDef *def = &tcg_op_defs[opc]; 3118 TCGLifeData arg_life = op->life; 3119 int nb_iargs, nb_oargs, call_flags; 3120 TCGTemp *arg_ts, *dir_ts; 3121 3122 if (opc == INDEX_op_call) { 3123 nb_oargs = TCGOP_CALLO(op); 3124 nb_iargs = TCGOP_CALLI(op); 3125 call_flags = op->args[nb_oargs + nb_iargs + 1]; 3126 } else { 3127 nb_iargs = def->nb_iargs; 3128 nb_oargs = def->nb_oargs; 3129 3130 /* Set flags similar to how calls require. */ 3131 if (def->flags & TCG_OPF_COND_BRANCH) { 3132 /* Like reading globals: sync_globals */ 3133 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3134 } else if (def->flags & TCG_OPF_BB_END) { 3135 /* Like writing globals: save_globals */ 3136 call_flags = 0; 3137 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3138 /* Like reading globals: sync_globals */ 3139 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3140 } else { 3141 /* No effect on globals. */ 3142 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3143 TCG_CALL_NO_WRITE_GLOBALS); 3144 } 3145 } 3146 3147 /* Make sure that input arguments are available. */ 3148 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3149 arg_ts = arg_temp(op->args[i]); 3150 if (arg_ts) { 3151 dir_ts = arg_ts->state_ptr; 3152 if (dir_ts && arg_ts->state == TS_DEAD) { 3153 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3154 ? INDEX_op_ld_i32 3155 : INDEX_op_ld_i64); 3156 TCGOp *lop = tcg_op_insert_before(s, op, lopc); 3157 3158 lop->args[0] = temp_arg(dir_ts); 3159 lop->args[1] = temp_arg(arg_ts->mem_base); 3160 lop->args[2] = arg_ts->mem_offset; 3161 3162 /* Loaded, but synced with memory. */ 3163 arg_ts->state = TS_MEM; 3164 } 3165 } 3166 } 3167 3168 /* Perform input replacement, and mark inputs that became dead. 3169 No action is required except keeping temp_state up to date 3170 so that we reload when needed. */ 3171 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3172 arg_ts = arg_temp(op->args[i]); 3173 if (arg_ts) { 3174 dir_ts = arg_ts->state_ptr; 3175 if (dir_ts) { 3176 op->args[i] = temp_arg(dir_ts); 3177 changes = true; 3178 if (IS_DEAD_ARG(i)) { 3179 arg_ts->state = TS_DEAD; 3180 } 3181 } 3182 } 3183 } 3184 3185 /* Liveness analysis should ensure that the following are 3186 all correct, for call sites and basic block end points. */ 3187 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3188 /* Nothing to do */ 3189 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3190 for (i = 0; i < nb_globals; ++i) { 3191 /* Liveness should see that globals are synced back, 3192 that is, either TS_DEAD or TS_MEM. */ 3193 arg_ts = &s->temps[i]; 3194 tcg_debug_assert(arg_ts->state_ptr == 0 3195 || arg_ts->state != 0); 3196 } 3197 } else { 3198 for (i = 0; i < nb_globals; ++i) { 3199 /* Liveness should see that globals are saved back, 3200 that is, TS_DEAD, waiting to be reloaded. */ 3201 arg_ts = &s->temps[i]; 3202 tcg_debug_assert(arg_ts->state_ptr == 0 3203 || arg_ts->state == TS_DEAD); 3204 } 3205 } 3206 3207 /* Outputs become available. */ 3208 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3209 arg_ts = arg_temp(op->args[0]); 3210 dir_ts = arg_ts->state_ptr; 3211 if (dir_ts) { 3212 op->args[0] = temp_arg(dir_ts); 3213 changes = true; 3214 3215 /* The output is now live and modified. */ 3216 arg_ts->state = 0; 3217 3218 if (NEED_SYNC_ARG(0)) { 3219 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3220 ? INDEX_op_st_i32 3221 : INDEX_op_st_i64); 3222 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 3223 TCGTemp *out_ts = dir_ts; 3224 3225 if (IS_DEAD_ARG(0)) { 3226 out_ts = arg_temp(op->args[1]); 3227 arg_ts->state = TS_DEAD; 3228 tcg_op_remove(s, op); 3229 } else { 3230 arg_ts->state = TS_MEM; 3231 } 3232 3233 sop->args[0] = temp_arg(out_ts); 3234 sop->args[1] = temp_arg(arg_ts->mem_base); 3235 sop->args[2] = arg_ts->mem_offset; 3236 } else { 3237 tcg_debug_assert(!IS_DEAD_ARG(0)); 3238 } 3239 } 3240 } else { 3241 for (i = 0; i < nb_oargs; i++) { 3242 arg_ts = arg_temp(op->args[i]); 3243 dir_ts = arg_ts->state_ptr; 3244 if (!dir_ts) { 3245 continue; 3246 } 3247 op->args[i] = temp_arg(dir_ts); 3248 changes = true; 3249 3250 /* The output is now live and modified. */ 3251 arg_ts->state = 0; 3252 3253 /* Sync outputs upon their last write. */ 3254 if (NEED_SYNC_ARG(i)) { 3255 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3256 ? INDEX_op_st_i32 3257 : INDEX_op_st_i64); 3258 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 3259 3260 sop->args[0] = temp_arg(dir_ts); 3261 sop->args[1] = temp_arg(arg_ts->mem_base); 3262 sop->args[2] = arg_ts->mem_offset; 3263 3264 arg_ts->state = TS_MEM; 3265 } 3266 /* Drop outputs that are dead. */ 3267 if (IS_DEAD_ARG(i)) { 3268 arg_ts->state = TS_DEAD; 3269 } 3270 } 3271 } 3272 } 3273 3274 return changes; 3275 } 3276 3277 #ifdef CONFIG_DEBUG_TCG 3278 static void dump_regs(TCGContext *s) 3279 { 3280 TCGTemp *ts; 3281 int i; 3282 char buf[64]; 3283 3284 for(i = 0; i < s->nb_temps; i++) { 3285 ts = &s->temps[i]; 3286 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3287 switch(ts->val_type) { 3288 case TEMP_VAL_REG: 3289 printf("%s", tcg_target_reg_names[ts->reg]); 3290 break; 3291 case TEMP_VAL_MEM: 3292 printf("%d(%s)", (int)ts->mem_offset, 3293 tcg_target_reg_names[ts->mem_base->reg]); 3294 break; 3295 case TEMP_VAL_CONST: 3296 printf("$0x%" PRIx64, ts->val); 3297 break; 3298 case TEMP_VAL_DEAD: 3299 printf("D"); 3300 break; 3301 default: 3302 printf("???"); 3303 break; 3304 } 3305 printf("\n"); 3306 } 3307 3308 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 3309 if (s->reg_to_temp[i] != NULL) { 3310 printf("%s: %s\n", 3311 tcg_target_reg_names[i], 3312 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 3313 } 3314 } 3315 } 3316 3317 static void check_regs(TCGContext *s) 3318 { 3319 int reg; 3320 int k; 3321 TCGTemp *ts; 3322 char buf[64]; 3323 3324 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 3325 ts = s->reg_to_temp[reg]; 3326 if (ts != NULL) { 3327 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 3328 printf("Inconsistency for register %s:\n", 3329 tcg_target_reg_names[reg]); 3330 goto fail; 3331 } 3332 } 3333 } 3334 for (k = 0; k < s->nb_temps; k++) { 3335 ts = &s->temps[k]; 3336 if (ts->val_type == TEMP_VAL_REG 3337 && ts->kind != TEMP_FIXED 3338 && s->reg_to_temp[ts->reg] != ts) { 3339 printf("Inconsistency for temp %s:\n", 3340 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3341 fail: 3342 printf("reg state:\n"); 3343 dump_regs(s); 3344 tcg_abort(); 3345 } 3346 } 3347 } 3348 #endif 3349 3350 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3351 { 3352 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 3353 /* Sparc64 stack is accessed with offset of 2047 */ 3354 s->current_frame_offset = (s->current_frame_offset + 3355 (tcg_target_long)sizeof(tcg_target_long) - 1) & 3356 ~(sizeof(tcg_target_long) - 1); 3357 #endif 3358 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 3359 s->frame_end) { 3360 tcg_abort(); 3361 } 3362 ts->mem_offset = s->current_frame_offset; 3363 ts->mem_base = s->frame_temp; 3364 ts->mem_allocated = 1; 3365 s->current_frame_offset += sizeof(tcg_target_long); 3366 } 3367 3368 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3369 3370 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3371 mark it free; otherwise mark it dead. */ 3372 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3373 { 3374 TCGTempVal new_type; 3375 3376 switch (ts->kind) { 3377 case TEMP_FIXED: 3378 return; 3379 case TEMP_GLOBAL: 3380 case TEMP_LOCAL: 3381 new_type = TEMP_VAL_MEM; 3382 break; 3383 case TEMP_NORMAL: 3384 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3385 break; 3386 case TEMP_CONST: 3387 new_type = TEMP_VAL_CONST; 3388 break; 3389 default: 3390 g_assert_not_reached(); 3391 } 3392 if (ts->val_type == TEMP_VAL_REG) { 3393 s->reg_to_temp[ts->reg] = NULL; 3394 } 3395 ts->val_type = new_type; 3396 } 3397 3398 /* Mark a temporary as dead. */ 3399 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3400 { 3401 temp_free_or_dead(s, ts, 1); 3402 } 3403 3404 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3405 registers needs to be allocated to store a constant. If 'free_or_dead' 3406 is non-zero, subsequently release the temporary; if it is positive, the 3407 temp is dead; if it is negative, the temp is free. */ 3408 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3409 TCGRegSet preferred_regs, int free_or_dead) 3410 { 3411 if (!temp_readonly(ts) && !ts->mem_coherent) { 3412 if (!ts->mem_allocated) { 3413 temp_allocate_frame(s, ts); 3414 } 3415 switch (ts->val_type) { 3416 case TEMP_VAL_CONST: 3417 /* If we're going to free the temp immediately, then we won't 3418 require it later in a register, so attempt to store the 3419 constant to memory directly. */ 3420 if (free_or_dead 3421 && tcg_out_sti(s, ts->type, ts->val, 3422 ts->mem_base->reg, ts->mem_offset)) { 3423 break; 3424 } 3425 temp_load(s, ts, tcg_target_available_regs[ts->type], 3426 allocated_regs, preferred_regs); 3427 /* fallthrough */ 3428 3429 case TEMP_VAL_REG: 3430 tcg_out_st(s, ts->type, ts->reg, 3431 ts->mem_base->reg, ts->mem_offset); 3432 break; 3433 3434 case TEMP_VAL_MEM: 3435 break; 3436 3437 case TEMP_VAL_DEAD: 3438 default: 3439 tcg_abort(); 3440 } 3441 ts->mem_coherent = 1; 3442 } 3443 if (free_or_dead) { 3444 temp_free_or_dead(s, ts, free_or_dead); 3445 } 3446 } 3447 3448 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3449 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3450 { 3451 TCGTemp *ts = s->reg_to_temp[reg]; 3452 if (ts != NULL) { 3453 temp_sync(s, ts, allocated_regs, 0, -1); 3454 } 3455 } 3456 3457 /** 3458 * tcg_reg_alloc: 3459 * @required_regs: Set of registers in which we must allocate. 3460 * @allocated_regs: Set of registers which must be avoided. 3461 * @preferred_regs: Set of registers we should prefer. 3462 * @rev: True if we search the registers in "indirect" order. 3463 * 3464 * The allocated register must be in @required_regs & ~@allocated_regs, 3465 * but if we can put it in @preferred_regs we may save a move later. 3466 */ 3467 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3468 TCGRegSet allocated_regs, 3469 TCGRegSet preferred_regs, bool rev) 3470 { 3471 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3472 TCGRegSet reg_ct[2]; 3473 const int *order; 3474 3475 reg_ct[1] = required_regs & ~allocated_regs; 3476 tcg_debug_assert(reg_ct[1] != 0); 3477 reg_ct[0] = reg_ct[1] & preferred_regs; 3478 3479 /* Skip the preferred_regs option if it cannot be satisfied, 3480 or if the preference made no difference. */ 3481 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3482 3483 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3484 3485 /* Try free registers, preferences first. */ 3486 for (j = f; j < 2; j++) { 3487 TCGRegSet set = reg_ct[j]; 3488 3489 if (tcg_regset_single(set)) { 3490 /* One register in the set. */ 3491 TCGReg reg = tcg_regset_first(set); 3492 if (s->reg_to_temp[reg] == NULL) { 3493 return reg; 3494 } 3495 } else { 3496 for (i = 0; i < n; i++) { 3497 TCGReg reg = order[i]; 3498 if (s->reg_to_temp[reg] == NULL && 3499 tcg_regset_test_reg(set, reg)) { 3500 return reg; 3501 } 3502 } 3503 } 3504 } 3505 3506 /* We must spill something. */ 3507 for (j = f; j < 2; j++) { 3508 TCGRegSet set = reg_ct[j]; 3509 3510 if (tcg_regset_single(set)) { 3511 /* One register in the set. */ 3512 TCGReg reg = tcg_regset_first(set); 3513 tcg_reg_free(s, reg, allocated_regs); 3514 return reg; 3515 } else { 3516 for (i = 0; i < n; i++) { 3517 TCGReg reg = order[i]; 3518 if (tcg_regset_test_reg(set, reg)) { 3519 tcg_reg_free(s, reg, allocated_regs); 3520 return reg; 3521 } 3522 } 3523 } 3524 } 3525 3526 tcg_abort(); 3527 } 3528 3529 /* Make sure the temporary is in a register. If needed, allocate the register 3530 from DESIRED while avoiding ALLOCATED. */ 3531 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3532 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3533 { 3534 TCGReg reg; 3535 3536 switch (ts->val_type) { 3537 case TEMP_VAL_REG: 3538 return; 3539 case TEMP_VAL_CONST: 3540 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3541 preferred_regs, ts->indirect_base); 3542 if (ts->type <= TCG_TYPE_I64) { 3543 tcg_out_movi(s, ts->type, reg, ts->val); 3544 } else { 3545 uint64_t val = ts->val; 3546 MemOp vece = MO_64; 3547 3548 /* 3549 * Find the minimal vector element that matches the constant. 3550 * The targets will, in general, have to do this search anyway, 3551 * do this generically. 3552 */ 3553 if (val == dup_const(MO_8, val)) { 3554 vece = MO_8; 3555 } else if (val == dup_const(MO_16, val)) { 3556 vece = MO_16; 3557 } else if (val == dup_const(MO_32, val)) { 3558 vece = MO_32; 3559 } 3560 3561 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 3562 } 3563 ts->mem_coherent = 0; 3564 break; 3565 case TEMP_VAL_MEM: 3566 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3567 preferred_regs, ts->indirect_base); 3568 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3569 ts->mem_coherent = 1; 3570 break; 3571 case TEMP_VAL_DEAD: 3572 default: 3573 tcg_abort(); 3574 } 3575 ts->reg = reg; 3576 ts->val_type = TEMP_VAL_REG; 3577 s->reg_to_temp[reg] = ts; 3578 } 3579 3580 /* Save a temporary to memory. 'allocated_regs' is used in case a 3581 temporary registers needs to be allocated to store a constant. */ 3582 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3583 { 3584 /* The liveness analysis already ensures that globals are back 3585 in memory. Keep an tcg_debug_assert for safety. */ 3586 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 3587 } 3588 3589 /* save globals to their canonical location and assume they can be 3590 modified be the following code. 'allocated_regs' is used in case a 3591 temporary registers needs to be allocated to store a constant. */ 3592 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3593 { 3594 int i, n; 3595 3596 for (i = 0, n = s->nb_globals; i < n; i++) { 3597 temp_save(s, &s->temps[i], allocated_regs); 3598 } 3599 } 3600 3601 /* sync globals to their canonical location and assume they can be 3602 read by the following code. 'allocated_regs' is used in case a 3603 temporary registers needs to be allocated to store a constant. */ 3604 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3605 { 3606 int i, n; 3607 3608 for (i = 0, n = s->nb_globals; i < n; i++) { 3609 TCGTemp *ts = &s->temps[i]; 3610 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3611 || ts->kind == TEMP_FIXED 3612 || ts->mem_coherent); 3613 } 3614 } 3615 3616 /* at the end of a basic block, we assume all temporaries are dead and 3617 all globals are stored at their canonical location. */ 3618 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3619 { 3620 int i; 3621 3622 for (i = s->nb_globals; i < s->nb_temps; i++) { 3623 TCGTemp *ts = &s->temps[i]; 3624 3625 switch (ts->kind) { 3626 case TEMP_LOCAL: 3627 temp_save(s, ts, allocated_regs); 3628 break; 3629 case TEMP_NORMAL: 3630 /* The liveness analysis already ensures that temps are dead. 3631 Keep an tcg_debug_assert for safety. */ 3632 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3633 break; 3634 case TEMP_CONST: 3635 /* Similarly, we should have freed any allocated register. */ 3636 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 3637 break; 3638 default: 3639 g_assert_not_reached(); 3640 } 3641 } 3642 3643 save_globals(s, allocated_regs); 3644 } 3645 3646 /* 3647 * At a conditional branch, we assume all temporaries are dead and 3648 * all globals and local temps are synced to their location. 3649 */ 3650 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 3651 { 3652 sync_globals(s, allocated_regs); 3653 3654 for (int i = s->nb_globals; i < s->nb_temps; i++) { 3655 TCGTemp *ts = &s->temps[i]; 3656 /* 3657 * The liveness analysis already ensures that temps are dead. 3658 * Keep tcg_debug_asserts for safety. 3659 */ 3660 switch (ts->kind) { 3661 case TEMP_LOCAL: 3662 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 3663 break; 3664 case TEMP_NORMAL: 3665 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3666 break; 3667 case TEMP_CONST: 3668 break; 3669 default: 3670 g_assert_not_reached(); 3671 } 3672 } 3673 } 3674 3675 /* 3676 * Specialized code generation for INDEX_op_mov_* with a constant. 3677 */ 3678 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3679 tcg_target_ulong val, TCGLifeData arg_life, 3680 TCGRegSet preferred_regs) 3681 { 3682 /* ENV should not be modified. */ 3683 tcg_debug_assert(!temp_readonly(ots)); 3684 3685 /* The movi is not explicitly generated here. */ 3686 if (ots->val_type == TEMP_VAL_REG) { 3687 s->reg_to_temp[ots->reg] = NULL; 3688 } 3689 ots->val_type = TEMP_VAL_CONST; 3690 ots->val = val; 3691 ots->mem_coherent = 0; 3692 if (NEED_SYNC_ARG(0)) { 3693 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3694 } else if (IS_DEAD_ARG(0)) { 3695 temp_dead(s, ots); 3696 } 3697 } 3698 3699 /* 3700 * Specialized code generation for INDEX_op_mov_*. 3701 */ 3702 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3703 { 3704 const TCGLifeData arg_life = op->life; 3705 TCGRegSet allocated_regs, preferred_regs; 3706 TCGTemp *ts, *ots; 3707 TCGType otype, itype; 3708 3709 allocated_regs = s->reserved_regs; 3710 preferred_regs = op->output_pref[0]; 3711 ots = arg_temp(op->args[0]); 3712 ts = arg_temp(op->args[1]); 3713 3714 /* ENV should not be modified. */ 3715 tcg_debug_assert(!temp_readonly(ots)); 3716 3717 /* Note that otype != itype for no-op truncation. */ 3718 otype = ots->type; 3719 itype = ts->type; 3720 3721 if (ts->val_type == TEMP_VAL_CONST) { 3722 /* propagate constant or generate sti */ 3723 tcg_target_ulong val = ts->val; 3724 if (IS_DEAD_ARG(1)) { 3725 temp_dead(s, ts); 3726 } 3727 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3728 return; 3729 } 3730 3731 /* If the source value is in memory we're going to be forced 3732 to have it in a register in order to perform the copy. Copy 3733 the SOURCE value into its own register first, that way we 3734 don't have to reload SOURCE the next time it is used. */ 3735 if (ts->val_type == TEMP_VAL_MEM) { 3736 temp_load(s, ts, tcg_target_available_regs[itype], 3737 allocated_regs, preferred_regs); 3738 } 3739 3740 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3741 if (IS_DEAD_ARG(0)) { 3742 /* mov to a non-saved dead register makes no sense (even with 3743 liveness analysis disabled). */ 3744 tcg_debug_assert(NEED_SYNC_ARG(0)); 3745 if (!ots->mem_allocated) { 3746 temp_allocate_frame(s, ots); 3747 } 3748 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 3749 if (IS_DEAD_ARG(1)) { 3750 temp_dead(s, ts); 3751 } 3752 temp_dead(s, ots); 3753 } else { 3754 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 3755 /* the mov can be suppressed */ 3756 if (ots->val_type == TEMP_VAL_REG) { 3757 s->reg_to_temp[ots->reg] = NULL; 3758 } 3759 ots->reg = ts->reg; 3760 temp_dead(s, ts); 3761 } else { 3762 if (ots->val_type != TEMP_VAL_REG) { 3763 /* When allocating a new register, make sure to not spill the 3764 input one. */ 3765 tcg_regset_set_reg(allocated_regs, ts->reg); 3766 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3767 allocated_regs, preferred_regs, 3768 ots->indirect_base); 3769 } 3770 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) { 3771 /* 3772 * Cross register class move not supported. 3773 * Store the source register into the destination slot 3774 * and leave the destination temp as TEMP_VAL_MEM. 3775 */ 3776 assert(!temp_readonly(ots)); 3777 if (!ts->mem_allocated) { 3778 temp_allocate_frame(s, ots); 3779 } 3780 tcg_out_st(s, ts->type, ts->reg, 3781 ots->mem_base->reg, ots->mem_offset); 3782 ots->mem_coherent = 1; 3783 temp_free_or_dead(s, ots, -1); 3784 return; 3785 } 3786 } 3787 ots->val_type = TEMP_VAL_REG; 3788 ots->mem_coherent = 0; 3789 s->reg_to_temp[ots->reg] = ots; 3790 if (NEED_SYNC_ARG(0)) { 3791 temp_sync(s, ots, allocated_regs, 0, 0); 3792 } 3793 } 3794 } 3795 3796 /* 3797 * Specialized code generation for INDEX_op_dup_vec. 3798 */ 3799 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 3800 { 3801 const TCGLifeData arg_life = op->life; 3802 TCGRegSet dup_out_regs, dup_in_regs; 3803 TCGTemp *its, *ots; 3804 TCGType itype, vtype; 3805 intptr_t endian_fixup; 3806 unsigned vece; 3807 bool ok; 3808 3809 ots = arg_temp(op->args[0]); 3810 its = arg_temp(op->args[1]); 3811 3812 /* ENV should not be modified. */ 3813 tcg_debug_assert(!temp_readonly(ots)); 3814 3815 itype = its->type; 3816 vece = TCGOP_VECE(op); 3817 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 3818 3819 if (its->val_type == TEMP_VAL_CONST) { 3820 /* Propagate constant via movi -> dupi. */ 3821 tcg_target_ulong val = its->val; 3822 if (IS_DEAD_ARG(1)) { 3823 temp_dead(s, its); 3824 } 3825 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); 3826 return; 3827 } 3828 3829 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 3830 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 3831 3832 /* Allocate the output register now. */ 3833 if (ots->val_type != TEMP_VAL_REG) { 3834 TCGRegSet allocated_regs = s->reserved_regs; 3835 3836 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 3837 /* Make sure to not spill the input register. */ 3838 tcg_regset_set_reg(allocated_regs, its->reg); 3839 } 3840 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 3841 op->output_pref[0], ots->indirect_base); 3842 ots->val_type = TEMP_VAL_REG; 3843 ots->mem_coherent = 0; 3844 s->reg_to_temp[ots->reg] = ots; 3845 } 3846 3847 switch (its->val_type) { 3848 case TEMP_VAL_REG: 3849 /* 3850 * The dup constriaints must be broad, covering all possible VECE. 3851 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 3852 * to fail, indicating that extra moves are required for that case. 3853 */ 3854 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 3855 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 3856 goto done; 3857 } 3858 /* Try again from memory or a vector input register. */ 3859 } 3860 if (!its->mem_coherent) { 3861 /* 3862 * The input register is not synced, and so an extra store 3863 * would be required to use memory. Attempt an integer-vector 3864 * register move first. We do not have a TCGRegSet for this. 3865 */ 3866 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 3867 break; 3868 } 3869 /* Sync the temp back to its slot and load from there. */ 3870 temp_sync(s, its, s->reserved_regs, 0, 0); 3871 } 3872 /* fall through */ 3873 3874 case TEMP_VAL_MEM: 3875 #ifdef HOST_WORDS_BIGENDIAN 3876 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8; 3877 endian_fixup -= 1 << vece; 3878 #else 3879 endian_fixup = 0; 3880 #endif 3881 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 3882 its->mem_offset + endian_fixup)) { 3883 goto done; 3884 } 3885 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 3886 break; 3887 3888 default: 3889 g_assert_not_reached(); 3890 } 3891 3892 /* We now have a vector input register, so dup must succeed. */ 3893 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 3894 tcg_debug_assert(ok); 3895 3896 done: 3897 if (IS_DEAD_ARG(1)) { 3898 temp_dead(s, its); 3899 } 3900 if (NEED_SYNC_ARG(0)) { 3901 temp_sync(s, ots, s->reserved_regs, 0, 0); 3902 } 3903 if (IS_DEAD_ARG(0)) { 3904 temp_dead(s, ots); 3905 } 3906 } 3907 3908 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 3909 { 3910 const TCGLifeData arg_life = op->life; 3911 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 3912 TCGRegSet i_allocated_regs; 3913 TCGRegSet o_allocated_regs; 3914 int i, k, nb_iargs, nb_oargs; 3915 TCGReg reg; 3916 TCGArg arg; 3917 const TCGArgConstraint *arg_ct; 3918 TCGTemp *ts; 3919 TCGArg new_args[TCG_MAX_OP_ARGS]; 3920 int const_args[TCG_MAX_OP_ARGS]; 3921 3922 nb_oargs = def->nb_oargs; 3923 nb_iargs = def->nb_iargs; 3924 3925 /* copy constants */ 3926 memcpy(new_args + nb_oargs + nb_iargs, 3927 op->args + nb_oargs + nb_iargs, 3928 sizeof(TCGArg) * def->nb_cargs); 3929 3930 i_allocated_regs = s->reserved_regs; 3931 o_allocated_regs = s->reserved_regs; 3932 3933 /* satisfy input constraints */ 3934 for (k = 0; k < nb_iargs; k++) { 3935 TCGRegSet i_preferred_regs, o_preferred_regs; 3936 3937 i = def->args_ct[nb_oargs + k].sort_index; 3938 arg = op->args[i]; 3939 arg_ct = &def->args_ct[i]; 3940 ts = arg_temp(arg); 3941 3942 if (ts->val_type == TEMP_VAL_CONST 3943 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 3944 /* constant is OK for instruction */ 3945 const_args[i] = 1; 3946 new_args[i] = ts->val; 3947 continue; 3948 } 3949 3950 i_preferred_regs = o_preferred_regs = 0; 3951 if (arg_ct->ialias) { 3952 o_preferred_regs = op->output_pref[arg_ct->alias_index]; 3953 3954 /* 3955 * If the input is readonly, then it cannot also be an 3956 * output and aliased to itself. If the input is not 3957 * dead after the instruction, we must allocate a new 3958 * register and move it. 3959 */ 3960 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 3961 goto allocate_in_reg; 3962 } 3963 3964 /* 3965 * Check if the current register has already been allocated 3966 * for another input aliased to an output. 3967 */ 3968 if (ts->val_type == TEMP_VAL_REG) { 3969 reg = ts->reg; 3970 for (int k2 = 0; k2 < k; k2++) { 3971 int i2 = def->args_ct[nb_oargs + k2].sort_index; 3972 if (def->args_ct[i2].ialias && reg == new_args[i2]) { 3973 goto allocate_in_reg; 3974 } 3975 } 3976 } 3977 i_preferred_regs = o_preferred_regs; 3978 } 3979 3980 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); 3981 reg = ts->reg; 3982 3983 if (!tcg_regset_test_reg(arg_ct->regs, reg)) { 3984 allocate_in_reg: 3985 /* 3986 * Allocate a new register matching the constraint 3987 * and move the temporary register into it. 3988 */ 3989 temp_load(s, ts, tcg_target_available_regs[ts->type], 3990 i_allocated_regs, 0); 3991 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, 3992 o_preferred_regs, ts->indirect_base); 3993 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 3994 /* 3995 * Cross register class move not supported. Sync the 3996 * temp back to its slot and load from there. 3997 */ 3998 temp_sync(s, ts, i_allocated_regs, 0, 0); 3999 tcg_out_ld(s, ts->type, reg, 4000 ts->mem_base->reg, ts->mem_offset); 4001 } 4002 } 4003 new_args[i] = reg; 4004 const_args[i] = 0; 4005 tcg_regset_set_reg(i_allocated_regs, reg); 4006 } 4007 4008 /* mark dead temporaries and free the associated registers */ 4009 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4010 if (IS_DEAD_ARG(i)) { 4011 temp_dead(s, arg_temp(op->args[i])); 4012 } 4013 } 4014 4015 if (def->flags & TCG_OPF_COND_BRANCH) { 4016 tcg_reg_alloc_cbranch(s, i_allocated_regs); 4017 } else if (def->flags & TCG_OPF_BB_END) { 4018 tcg_reg_alloc_bb_end(s, i_allocated_regs); 4019 } else { 4020 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4021 /* XXX: permit generic clobber register list ? */ 4022 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4023 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4024 tcg_reg_free(s, i, i_allocated_regs); 4025 } 4026 } 4027 } 4028 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4029 /* sync globals if the op has side effects and might trigger 4030 an exception. */ 4031 sync_globals(s, i_allocated_regs); 4032 } 4033 4034 /* satisfy the output constraints */ 4035 for(k = 0; k < nb_oargs; k++) { 4036 i = def->args_ct[k].sort_index; 4037 arg = op->args[i]; 4038 arg_ct = &def->args_ct[i]; 4039 ts = arg_temp(arg); 4040 4041 /* ENV should not be modified. */ 4042 tcg_debug_assert(!temp_readonly(ts)); 4043 4044 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 4045 reg = new_args[arg_ct->alias_index]; 4046 } else if (arg_ct->newreg) { 4047 reg = tcg_reg_alloc(s, arg_ct->regs, 4048 i_allocated_regs | o_allocated_regs, 4049 op->output_pref[k], ts->indirect_base); 4050 } else { 4051 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 4052 op->output_pref[k], ts->indirect_base); 4053 } 4054 tcg_regset_set_reg(o_allocated_regs, reg); 4055 if (ts->val_type == TEMP_VAL_REG) { 4056 s->reg_to_temp[ts->reg] = NULL; 4057 } 4058 ts->val_type = TEMP_VAL_REG; 4059 ts->reg = reg; 4060 /* 4061 * Temp value is modified, so the value kept in memory is 4062 * potentially not the same. 4063 */ 4064 ts->mem_coherent = 0; 4065 s->reg_to_temp[reg] = ts; 4066 new_args[i] = reg; 4067 } 4068 } 4069 4070 /* emit instruction */ 4071 if (def->flags & TCG_OPF_VECTOR) { 4072 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 4073 new_args, const_args); 4074 } else { 4075 tcg_out_op(s, op->opc, new_args, const_args); 4076 } 4077 4078 /* move the outputs in the correct register if needed */ 4079 for(i = 0; i < nb_oargs; i++) { 4080 ts = arg_temp(op->args[i]); 4081 4082 /* ENV should not be modified. */ 4083 tcg_debug_assert(!temp_readonly(ts)); 4084 4085 if (NEED_SYNC_ARG(i)) { 4086 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 4087 } else if (IS_DEAD_ARG(i)) { 4088 temp_dead(s, ts); 4089 } 4090 } 4091 } 4092 4093 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 4094 { 4095 const TCGLifeData arg_life = op->life; 4096 TCGTemp *ots, *itsl, *itsh; 4097 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4098 4099 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 4100 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 4101 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 4102 4103 ots = arg_temp(op->args[0]); 4104 itsl = arg_temp(op->args[1]); 4105 itsh = arg_temp(op->args[2]); 4106 4107 /* ENV should not be modified. */ 4108 tcg_debug_assert(!temp_readonly(ots)); 4109 4110 /* Allocate the output register now. */ 4111 if (ots->val_type != TEMP_VAL_REG) { 4112 TCGRegSet allocated_regs = s->reserved_regs; 4113 TCGRegSet dup_out_regs = 4114 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4115 4116 /* Make sure to not spill the input registers. */ 4117 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 4118 tcg_regset_set_reg(allocated_regs, itsl->reg); 4119 } 4120 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 4121 tcg_regset_set_reg(allocated_regs, itsh->reg); 4122 } 4123 4124 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4125 op->output_pref[0], ots->indirect_base); 4126 ots->val_type = TEMP_VAL_REG; 4127 ots->mem_coherent = 0; 4128 s->reg_to_temp[ots->reg] = ots; 4129 } 4130 4131 /* Promote dup2 of immediates to dupi_vec. */ 4132 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 4133 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 4134 MemOp vece = MO_64; 4135 4136 if (val == dup_const(MO_8, val)) { 4137 vece = MO_8; 4138 } else if (val == dup_const(MO_16, val)) { 4139 vece = MO_16; 4140 } else if (val == dup_const(MO_32, val)) { 4141 vece = MO_32; 4142 } 4143 4144 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 4145 goto done; 4146 } 4147 4148 /* If the two inputs form one 64-bit value, try dupm_vec. */ 4149 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) { 4150 if (!itsl->mem_coherent) { 4151 temp_sync(s, itsl, s->reserved_regs, 0, 0); 4152 } 4153 if (!itsh->mem_coherent) { 4154 temp_sync(s, itsh, s->reserved_regs, 0, 0); 4155 } 4156 #ifdef HOST_WORDS_BIGENDIAN 4157 TCGTemp *its = itsh; 4158 #else 4159 TCGTemp *its = itsl; 4160 #endif 4161 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 4162 its->mem_base->reg, its->mem_offset)) { 4163 goto done; 4164 } 4165 } 4166 4167 /* Fall back to generic expansion. */ 4168 return false; 4169 4170 done: 4171 if (IS_DEAD_ARG(1)) { 4172 temp_dead(s, itsl); 4173 } 4174 if (IS_DEAD_ARG(2)) { 4175 temp_dead(s, itsh); 4176 } 4177 if (NEED_SYNC_ARG(0)) { 4178 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 4179 } else if (IS_DEAD_ARG(0)) { 4180 temp_dead(s, ots); 4181 } 4182 return true; 4183 } 4184 4185 #ifdef TCG_TARGET_STACK_GROWSUP 4186 #define STACK_DIR(x) (-(x)) 4187 #else 4188 #define STACK_DIR(x) (x) 4189 #endif 4190 4191 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 4192 { 4193 const int nb_oargs = TCGOP_CALLO(op); 4194 const int nb_iargs = TCGOP_CALLI(op); 4195 const TCGLifeData arg_life = op->life; 4196 int flags, nb_regs, i; 4197 TCGReg reg; 4198 TCGArg arg; 4199 TCGTemp *ts; 4200 intptr_t stack_offset; 4201 size_t call_stack_size; 4202 tcg_insn_unit *func_addr; 4203 int allocate_args; 4204 TCGRegSet allocated_regs; 4205 4206 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; 4207 flags = op->args[nb_oargs + nb_iargs + 1]; 4208 4209 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 4210 if (nb_regs > nb_iargs) { 4211 nb_regs = nb_iargs; 4212 } 4213 4214 /* assign stack slots first */ 4215 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 4216 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 4217 ~(TCG_TARGET_STACK_ALIGN - 1); 4218 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 4219 if (allocate_args) { 4220 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 4221 preallocate call stack */ 4222 tcg_abort(); 4223 } 4224 4225 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 4226 for (i = nb_regs; i < nb_iargs; i++) { 4227 arg = op->args[nb_oargs + i]; 4228 #ifdef TCG_TARGET_STACK_GROWSUP 4229 stack_offset -= sizeof(tcg_target_long); 4230 #endif 4231 if (arg != TCG_CALL_DUMMY_ARG) { 4232 ts = arg_temp(arg); 4233 temp_load(s, ts, tcg_target_available_regs[ts->type], 4234 s->reserved_regs, 0); 4235 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 4236 } 4237 #ifndef TCG_TARGET_STACK_GROWSUP 4238 stack_offset += sizeof(tcg_target_long); 4239 #endif 4240 } 4241 4242 /* assign input registers */ 4243 allocated_regs = s->reserved_regs; 4244 for (i = 0; i < nb_regs; i++) { 4245 arg = op->args[nb_oargs + i]; 4246 if (arg != TCG_CALL_DUMMY_ARG) { 4247 ts = arg_temp(arg); 4248 reg = tcg_target_call_iarg_regs[i]; 4249 4250 if (ts->val_type == TEMP_VAL_REG) { 4251 if (ts->reg != reg) { 4252 tcg_reg_free(s, reg, allocated_regs); 4253 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4254 /* 4255 * Cross register class move not supported. Sync the 4256 * temp back to its slot and load from there. 4257 */ 4258 temp_sync(s, ts, allocated_regs, 0, 0); 4259 tcg_out_ld(s, ts->type, reg, 4260 ts->mem_base->reg, ts->mem_offset); 4261 } 4262 } 4263 } else { 4264 TCGRegSet arg_set = 0; 4265 4266 tcg_reg_free(s, reg, allocated_regs); 4267 tcg_regset_set_reg(arg_set, reg); 4268 temp_load(s, ts, arg_set, allocated_regs, 0); 4269 } 4270 4271 tcg_regset_set_reg(allocated_regs, reg); 4272 } 4273 } 4274 4275 /* mark dead temporaries and free the associated registers */ 4276 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4277 if (IS_DEAD_ARG(i)) { 4278 temp_dead(s, arg_temp(op->args[i])); 4279 } 4280 } 4281 4282 /* clobber call registers */ 4283 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4284 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4285 tcg_reg_free(s, i, allocated_regs); 4286 } 4287 } 4288 4289 /* Save globals if they might be written by the helper, sync them if 4290 they might be read. */ 4291 if (flags & TCG_CALL_NO_READ_GLOBALS) { 4292 /* Nothing to do */ 4293 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 4294 sync_globals(s, allocated_regs); 4295 } else { 4296 save_globals(s, allocated_regs); 4297 } 4298 4299 tcg_out_call(s, func_addr); 4300 4301 /* assign output registers and emit moves if needed */ 4302 for(i = 0; i < nb_oargs; i++) { 4303 arg = op->args[i]; 4304 ts = arg_temp(arg); 4305 4306 /* ENV should not be modified. */ 4307 tcg_debug_assert(!temp_readonly(ts)); 4308 4309 reg = tcg_target_call_oarg_regs[i]; 4310 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4311 if (ts->val_type == TEMP_VAL_REG) { 4312 s->reg_to_temp[ts->reg] = NULL; 4313 } 4314 ts->val_type = TEMP_VAL_REG; 4315 ts->reg = reg; 4316 ts->mem_coherent = 0; 4317 s->reg_to_temp[reg] = ts; 4318 if (NEED_SYNC_ARG(i)) { 4319 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); 4320 } else if (IS_DEAD_ARG(i)) { 4321 temp_dead(s, ts); 4322 } 4323 } 4324 } 4325 4326 #ifdef CONFIG_PROFILER 4327 4328 /* avoid copy/paste errors */ 4329 #define PROF_ADD(to, from, field) \ 4330 do { \ 4331 (to)->field += qatomic_read(&((from)->field)); \ 4332 } while (0) 4333 4334 #define PROF_MAX(to, from, field) \ 4335 do { \ 4336 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4337 if (val__ > (to)->field) { \ 4338 (to)->field = val__; \ 4339 } \ 4340 } while (0) 4341 4342 /* Pass in a zero'ed @prof */ 4343 static inline 4344 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 4345 { 4346 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 4347 unsigned int i; 4348 4349 for (i = 0; i < n_ctxs; i++) { 4350 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4351 const TCGProfile *orig = &s->prof; 4352 4353 if (counters) { 4354 PROF_ADD(prof, orig, cpu_exec_time); 4355 PROF_ADD(prof, orig, tb_count1); 4356 PROF_ADD(prof, orig, tb_count); 4357 PROF_ADD(prof, orig, op_count); 4358 PROF_MAX(prof, orig, op_count_max); 4359 PROF_ADD(prof, orig, temp_count); 4360 PROF_MAX(prof, orig, temp_count_max); 4361 PROF_ADD(prof, orig, del_op_count); 4362 PROF_ADD(prof, orig, code_in_len); 4363 PROF_ADD(prof, orig, code_out_len); 4364 PROF_ADD(prof, orig, search_out_len); 4365 PROF_ADD(prof, orig, interm_time); 4366 PROF_ADD(prof, orig, code_time); 4367 PROF_ADD(prof, orig, la_time); 4368 PROF_ADD(prof, orig, opt_time); 4369 PROF_ADD(prof, orig, restore_count); 4370 PROF_ADD(prof, orig, restore_time); 4371 } 4372 if (table) { 4373 int i; 4374 4375 for (i = 0; i < NB_OPS; i++) { 4376 PROF_ADD(prof, orig, table_op_count[i]); 4377 } 4378 } 4379 } 4380 } 4381 4382 #undef PROF_ADD 4383 #undef PROF_MAX 4384 4385 static void tcg_profile_snapshot_counters(TCGProfile *prof) 4386 { 4387 tcg_profile_snapshot(prof, true, false); 4388 } 4389 4390 static void tcg_profile_snapshot_table(TCGProfile *prof) 4391 { 4392 tcg_profile_snapshot(prof, false, true); 4393 } 4394 4395 void tcg_dump_op_count(void) 4396 { 4397 TCGProfile prof = {}; 4398 int i; 4399 4400 tcg_profile_snapshot_table(&prof); 4401 for (i = 0; i < NB_OPS; i++) { 4402 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name, 4403 prof.table_op_count[i]); 4404 } 4405 } 4406 4407 int64_t tcg_cpu_exec_time(void) 4408 { 4409 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 4410 unsigned int i; 4411 int64_t ret = 0; 4412 4413 for (i = 0; i < n_ctxs; i++) { 4414 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4415 const TCGProfile *prof = &s->prof; 4416 4417 ret += qatomic_read(&prof->cpu_exec_time); 4418 } 4419 return ret; 4420 } 4421 #else 4422 void tcg_dump_op_count(void) 4423 { 4424 qemu_printf("[TCG profiler not compiled]\n"); 4425 } 4426 4427 int64_t tcg_cpu_exec_time(void) 4428 { 4429 error_report("%s: TCG profiler not compiled", __func__); 4430 exit(EXIT_FAILURE); 4431 } 4432 #endif 4433 4434 4435 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 4436 { 4437 #ifdef CONFIG_PROFILER 4438 TCGProfile *prof = &s->prof; 4439 #endif 4440 int i, num_insns; 4441 TCGOp *op; 4442 4443 #ifdef CONFIG_PROFILER 4444 { 4445 int n = 0; 4446 4447 QTAILQ_FOREACH(op, &s->ops, link) { 4448 n++; 4449 } 4450 qatomic_set(&prof->op_count, prof->op_count + n); 4451 if (n > prof->op_count_max) { 4452 qatomic_set(&prof->op_count_max, n); 4453 } 4454 4455 n = s->nb_temps; 4456 qatomic_set(&prof->temp_count, prof->temp_count + n); 4457 if (n > prof->temp_count_max) { 4458 qatomic_set(&prof->temp_count_max, n); 4459 } 4460 } 4461 #endif 4462 4463 #ifdef DEBUG_DISAS 4464 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4465 && qemu_log_in_addr_range(tb->pc))) { 4466 FILE *logfile = qemu_log_lock(); 4467 qemu_log("OP:\n"); 4468 tcg_dump_ops(s, false); 4469 qemu_log("\n"); 4470 qemu_log_unlock(logfile); 4471 } 4472 #endif 4473 4474 #ifdef CONFIG_DEBUG_TCG 4475 /* Ensure all labels referenced have been emitted. */ 4476 { 4477 TCGLabel *l; 4478 bool error = false; 4479 4480 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4481 if (unlikely(!l->present) && l->refs) { 4482 qemu_log_mask(CPU_LOG_TB_OP, 4483 "$L%d referenced but not present.\n", l->id); 4484 error = true; 4485 } 4486 } 4487 assert(!error); 4488 } 4489 #endif 4490 4491 #ifdef CONFIG_PROFILER 4492 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 4493 #endif 4494 4495 #ifdef USE_TCG_OPTIMIZATIONS 4496 tcg_optimize(s); 4497 #endif 4498 4499 #ifdef CONFIG_PROFILER 4500 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 4501 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 4502 #endif 4503 4504 reachable_code_pass(s); 4505 liveness_pass_1(s); 4506 4507 if (s->nb_indirects > 0) { 4508 #ifdef DEBUG_DISAS 4509 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 4510 && qemu_log_in_addr_range(tb->pc))) { 4511 FILE *logfile = qemu_log_lock(); 4512 qemu_log("OP before indirect lowering:\n"); 4513 tcg_dump_ops(s, false); 4514 qemu_log("\n"); 4515 qemu_log_unlock(logfile); 4516 } 4517 #endif 4518 /* Replace indirect temps with direct temps. */ 4519 if (liveness_pass_2(s)) { 4520 /* If changes were made, re-run liveness. */ 4521 liveness_pass_1(s); 4522 } 4523 } 4524 4525 #ifdef CONFIG_PROFILER 4526 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 4527 #endif 4528 4529 #ifdef DEBUG_DISAS 4530 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 4531 && qemu_log_in_addr_range(tb->pc))) { 4532 FILE *logfile = qemu_log_lock(); 4533 qemu_log("OP after optimization and liveness analysis:\n"); 4534 tcg_dump_ops(s, true); 4535 qemu_log("\n"); 4536 qemu_log_unlock(logfile); 4537 } 4538 #endif 4539 4540 tcg_reg_alloc_start(s); 4541 4542 /* 4543 * Reset the buffer pointers when restarting after overflow. 4544 * TODO: Move this into translate-all.c with the rest of the 4545 * buffer management. Having only this done here is confusing. 4546 */ 4547 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 4548 s->code_ptr = s->code_buf; 4549 4550 #ifdef TCG_TARGET_NEED_LDST_LABELS 4551 QSIMPLEQ_INIT(&s->ldst_labels); 4552 #endif 4553 #ifdef TCG_TARGET_NEED_POOL_LABELS 4554 s->pool_labels = NULL; 4555 #endif 4556 4557 num_insns = -1; 4558 QTAILQ_FOREACH(op, &s->ops, link) { 4559 TCGOpcode opc = op->opc; 4560 4561 #ifdef CONFIG_PROFILER 4562 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 4563 #endif 4564 4565 switch (opc) { 4566 case INDEX_op_mov_i32: 4567 case INDEX_op_mov_i64: 4568 case INDEX_op_mov_vec: 4569 tcg_reg_alloc_mov(s, op); 4570 break; 4571 case INDEX_op_dup_vec: 4572 tcg_reg_alloc_dup(s, op); 4573 break; 4574 case INDEX_op_insn_start: 4575 if (num_insns >= 0) { 4576 size_t off = tcg_current_code_size(s); 4577 s->gen_insn_end_off[num_insns] = off; 4578 /* Assert that we do not overflow our stored offset. */ 4579 assert(s->gen_insn_end_off[num_insns] == off); 4580 } 4581 num_insns++; 4582 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 4583 target_ulong a; 4584 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 4585 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 4586 #else 4587 a = op->args[i]; 4588 #endif 4589 s->gen_insn_data[num_insns][i] = a; 4590 } 4591 break; 4592 case INDEX_op_discard: 4593 temp_dead(s, arg_temp(op->args[0])); 4594 break; 4595 case INDEX_op_set_label: 4596 tcg_reg_alloc_bb_end(s, s->reserved_regs); 4597 tcg_out_label(s, arg_label(op->args[0])); 4598 break; 4599 case INDEX_op_call: 4600 tcg_reg_alloc_call(s, op); 4601 break; 4602 case INDEX_op_dup2_vec: 4603 if (tcg_reg_alloc_dup2(s, op)) { 4604 break; 4605 } 4606 /* fall through */ 4607 default: 4608 /* Sanity check that we've not introduced any unhandled opcodes. */ 4609 tcg_debug_assert(tcg_op_supported(opc)); 4610 /* Note: in order to speed up the code, it would be much 4611 faster to have specialized register allocator functions for 4612 some common argument patterns */ 4613 tcg_reg_alloc_op(s, op); 4614 break; 4615 } 4616 #ifdef CONFIG_DEBUG_TCG 4617 check_regs(s); 4618 #endif 4619 /* Test for (pending) buffer overflow. The assumption is that any 4620 one operation beginning below the high water mark cannot overrun 4621 the buffer completely. Thus we can test for overflow after 4622 generating code without having to check during generation. */ 4623 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 4624 return -1; 4625 } 4626 /* Test for TB overflow, as seen by gen_insn_end_off. */ 4627 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 4628 return -2; 4629 } 4630 } 4631 tcg_debug_assert(num_insns >= 0); 4632 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 4633 4634 /* Generate TB finalization at the end of block */ 4635 #ifdef TCG_TARGET_NEED_LDST_LABELS 4636 i = tcg_out_ldst_finalize(s); 4637 if (i < 0) { 4638 return i; 4639 } 4640 #endif 4641 #ifdef TCG_TARGET_NEED_POOL_LABELS 4642 i = tcg_out_pool_finalize(s); 4643 if (i < 0) { 4644 return i; 4645 } 4646 #endif 4647 if (!tcg_resolve_relocs(s)) { 4648 return -2; 4649 } 4650 4651 #ifndef CONFIG_TCG_INTERPRETER 4652 /* flush instruction cache */ 4653 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 4654 (uintptr_t)s->code_buf, 4655 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 4656 #endif 4657 4658 return tcg_current_code_size(s); 4659 } 4660 4661 #ifdef CONFIG_PROFILER 4662 void tcg_dump_info(void) 4663 { 4664 TCGProfile prof = {}; 4665 const TCGProfile *s; 4666 int64_t tb_count; 4667 int64_t tb_div_count; 4668 int64_t tot; 4669 4670 tcg_profile_snapshot_counters(&prof); 4671 s = &prof; 4672 tb_count = s->tb_count; 4673 tb_div_count = tb_count ? tb_count : 1; 4674 tot = s->interm_time + s->code_time; 4675 4676 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 4677 tot, tot / 2.4e9); 4678 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64 4679 " %0.1f%%)\n", 4680 tb_count, s->tb_count1 - tb_count, 4681 (double)(s->tb_count1 - s->tb_count) 4682 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 4683 qemu_printf("avg ops/TB %0.1f max=%d\n", 4684 (double)s->op_count / tb_div_count, s->op_count_max); 4685 qemu_printf("deleted ops/TB %0.2f\n", 4686 (double)s->del_op_count / tb_div_count); 4687 qemu_printf("avg temps/TB %0.2f max=%d\n", 4688 (double)s->temp_count / tb_div_count, s->temp_count_max); 4689 qemu_printf("avg host code/TB %0.1f\n", 4690 (double)s->code_out_len / tb_div_count); 4691 qemu_printf("avg search data/TB %0.1f\n", 4692 (double)s->search_out_len / tb_div_count); 4693 4694 qemu_printf("cycles/op %0.1f\n", 4695 s->op_count ? (double)tot / s->op_count : 0); 4696 qemu_printf("cycles/in byte %0.1f\n", 4697 s->code_in_len ? (double)tot / s->code_in_len : 0); 4698 qemu_printf("cycles/out byte %0.1f\n", 4699 s->code_out_len ? (double)tot / s->code_out_len : 0); 4700 qemu_printf("cycles/search byte %0.1f\n", 4701 s->search_out_len ? (double)tot / s->search_out_len : 0); 4702 if (tot == 0) { 4703 tot = 1; 4704 } 4705 qemu_printf(" gen_interm time %0.1f%%\n", 4706 (double)s->interm_time / tot * 100.0); 4707 qemu_printf(" gen_code time %0.1f%%\n", 4708 (double)s->code_time / tot * 100.0); 4709 qemu_printf("optim./code time %0.1f%%\n", 4710 (double)s->opt_time / (s->code_time ? s->code_time : 1) 4711 * 100.0); 4712 qemu_printf("liveness/code time %0.1f%%\n", 4713 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 4714 qemu_printf("cpu_restore count %" PRId64 "\n", 4715 s->restore_count); 4716 qemu_printf(" avg cycles %0.1f\n", 4717 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 4718 } 4719 #else 4720 void tcg_dump_info(void) 4721 { 4722 qemu_printf("[TCG profiler not compiled]\n"); 4723 } 4724 #endif 4725 4726 #ifdef ELF_HOST_MACHINE 4727 /* In order to use this feature, the backend needs to do three things: 4728 4729 (1) Define ELF_HOST_MACHINE to indicate both what value to 4730 put into the ELF image and to indicate support for the feature. 4731 4732 (2) Define tcg_register_jit. This should create a buffer containing 4733 the contents of a .debug_frame section that describes the post- 4734 prologue unwind info for the tcg machine. 4735 4736 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 4737 */ 4738 4739 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 4740 typedef enum { 4741 JIT_NOACTION = 0, 4742 JIT_REGISTER_FN, 4743 JIT_UNREGISTER_FN 4744 } jit_actions_t; 4745 4746 struct jit_code_entry { 4747 struct jit_code_entry *next_entry; 4748 struct jit_code_entry *prev_entry; 4749 const void *symfile_addr; 4750 uint64_t symfile_size; 4751 }; 4752 4753 struct jit_descriptor { 4754 uint32_t version; 4755 uint32_t action_flag; 4756 struct jit_code_entry *relevant_entry; 4757 struct jit_code_entry *first_entry; 4758 }; 4759 4760 void __jit_debug_register_code(void) __attribute__((noinline)); 4761 void __jit_debug_register_code(void) 4762 { 4763 asm(""); 4764 } 4765 4766 /* Must statically initialize the version, because GDB may check 4767 the version before we can set it. */ 4768 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 4769 4770 /* End GDB interface. */ 4771 4772 static int find_string(const char *strtab, const char *str) 4773 { 4774 const char *p = strtab + 1; 4775 4776 while (1) { 4777 if (strcmp(p, str) == 0) { 4778 return p - strtab; 4779 } 4780 p += strlen(p) + 1; 4781 } 4782 } 4783 4784 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 4785 const void *debug_frame, 4786 size_t debug_frame_size) 4787 { 4788 struct __attribute__((packed)) DebugInfo { 4789 uint32_t len; 4790 uint16_t version; 4791 uint32_t abbrev; 4792 uint8_t ptr_size; 4793 uint8_t cu_die; 4794 uint16_t cu_lang; 4795 uintptr_t cu_low_pc; 4796 uintptr_t cu_high_pc; 4797 uint8_t fn_die; 4798 char fn_name[16]; 4799 uintptr_t fn_low_pc; 4800 uintptr_t fn_high_pc; 4801 uint8_t cu_eoc; 4802 }; 4803 4804 struct ElfImage { 4805 ElfW(Ehdr) ehdr; 4806 ElfW(Phdr) phdr; 4807 ElfW(Shdr) shdr[7]; 4808 ElfW(Sym) sym[2]; 4809 struct DebugInfo di; 4810 uint8_t da[24]; 4811 char str[80]; 4812 }; 4813 4814 struct ElfImage *img; 4815 4816 static const struct ElfImage img_template = { 4817 .ehdr = { 4818 .e_ident[EI_MAG0] = ELFMAG0, 4819 .e_ident[EI_MAG1] = ELFMAG1, 4820 .e_ident[EI_MAG2] = ELFMAG2, 4821 .e_ident[EI_MAG3] = ELFMAG3, 4822 .e_ident[EI_CLASS] = ELF_CLASS, 4823 .e_ident[EI_DATA] = ELF_DATA, 4824 .e_ident[EI_VERSION] = EV_CURRENT, 4825 .e_type = ET_EXEC, 4826 .e_machine = ELF_HOST_MACHINE, 4827 .e_version = EV_CURRENT, 4828 .e_phoff = offsetof(struct ElfImage, phdr), 4829 .e_shoff = offsetof(struct ElfImage, shdr), 4830 .e_ehsize = sizeof(ElfW(Shdr)), 4831 .e_phentsize = sizeof(ElfW(Phdr)), 4832 .e_phnum = 1, 4833 .e_shentsize = sizeof(ElfW(Shdr)), 4834 .e_shnum = ARRAY_SIZE(img->shdr), 4835 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 4836 #ifdef ELF_HOST_FLAGS 4837 .e_flags = ELF_HOST_FLAGS, 4838 #endif 4839 #ifdef ELF_OSABI 4840 .e_ident[EI_OSABI] = ELF_OSABI, 4841 #endif 4842 }, 4843 .phdr = { 4844 .p_type = PT_LOAD, 4845 .p_flags = PF_X, 4846 }, 4847 .shdr = { 4848 [0] = { .sh_type = SHT_NULL }, 4849 /* Trick: The contents of code_gen_buffer are not present in 4850 this fake ELF file; that got allocated elsewhere. Therefore 4851 we mark .text as SHT_NOBITS (similar to .bss) so that readers 4852 will not look for contents. We can record any address. */ 4853 [1] = { /* .text */ 4854 .sh_type = SHT_NOBITS, 4855 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 4856 }, 4857 [2] = { /* .debug_info */ 4858 .sh_type = SHT_PROGBITS, 4859 .sh_offset = offsetof(struct ElfImage, di), 4860 .sh_size = sizeof(struct DebugInfo), 4861 }, 4862 [3] = { /* .debug_abbrev */ 4863 .sh_type = SHT_PROGBITS, 4864 .sh_offset = offsetof(struct ElfImage, da), 4865 .sh_size = sizeof(img->da), 4866 }, 4867 [4] = { /* .debug_frame */ 4868 .sh_type = SHT_PROGBITS, 4869 .sh_offset = sizeof(struct ElfImage), 4870 }, 4871 [5] = { /* .symtab */ 4872 .sh_type = SHT_SYMTAB, 4873 .sh_offset = offsetof(struct ElfImage, sym), 4874 .sh_size = sizeof(img->sym), 4875 .sh_info = 1, 4876 .sh_link = ARRAY_SIZE(img->shdr) - 1, 4877 .sh_entsize = sizeof(ElfW(Sym)), 4878 }, 4879 [6] = { /* .strtab */ 4880 .sh_type = SHT_STRTAB, 4881 .sh_offset = offsetof(struct ElfImage, str), 4882 .sh_size = sizeof(img->str), 4883 } 4884 }, 4885 .sym = { 4886 [1] = { /* code_gen_buffer */ 4887 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 4888 .st_shndx = 1, 4889 } 4890 }, 4891 .di = { 4892 .len = sizeof(struct DebugInfo) - 4, 4893 .version = 2, 4894 .ptr_size = sizeof(void *), 4895 .cu_die = 1, 4896 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 4897 .fn_die = 2, 4898 .fn_name = "code_gen_buffer" 4899 }, 4900 .da = { 4901 1, /* abbrev number (the cu) */ 4902 0x11, 1, /* DW_TAG_compile_unit, has children */ 4903 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 4904 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4905 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4906 0, 0, /* end of abbrev */ 4907 2, /* abbrev number (the fn) */ 4908 0x2e, 0, /* DW_TAG_subprogram, no children */ 4909 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 4910 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4911 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4912 0, 0, /* end of abbrev */ 4913 0 /* no more abbrev */ 4914 }, 4915 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 4916 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 4917 }; 4918 4919 /* We only need a single jit entry; statically allocate it. */ 4920 static struct jit_code_entry one_entry; 4921 4922 uintptr_t buf = (uintptr_t)buf_ptr; 4923 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 4924 DebugFrameHeader *dfh; 4925 4926 img = g_malloc(img_size); 4927 *img = img_template; 4928 4929 img->phdr.p_vaddr = buf; 4930 img->phdr.p_paddr = buf; 4931 img->phdr.p_memsz = buf_size; 4932 4933 img->shdr[1].sh_name = find_string(img->str, ".text"); 4934 img->shdr[1].sh_addr = buf; 4935 img->shdr[1].sh_size = buf_size; 4936 4937 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 4938 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 4939 4940 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 4941 img->shdr[4].sh_size = debug_frame_size; 4942 4943 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 4944 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 4945 4946 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 4947 img->sym[1].st_value = buf; 4948 img->sym[1].st_size = buf_size; 4949 4950 img->di.cu_low_pc = buf; 4951 img->di.cu_high_pc = buf + buf_size; 4952 img->di.fn_low_pc = buf; 4953 img->di.fn_high_pc = buf + buf_size; 4954 4955 dfh = (DebugFrameHeader *)(img + 1); 4956 memcpy(dfh, debug_frame, debug_frame_size); 4957 dfh->fde.func_start = buf; 4958 dfh->fde.func_len = buf_size; 4959 4960 #ifdef DEBUG_JIT 4961 /* Enable this block to be able to debug the ELF image file creation. 4962 One can use readelf, objdump, or other inspection utilities. */ 4963 { 4964 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 4965 if (f) { 4966 if (fwrite(img, img_size, 1, f) != img_size) { 4967 /* Avoid stupid unused return value warning for fwrite. */ 4968 } 4969 fclose(f); 4970 } 4971 } 4972 #endif 4973 4974 one_entry.symfile_addr = img; 4975 one_entry.symfile_size = img_size; 4976 4977 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 4978 __jit_debug_descriptor.relevant_entry = &one_entry; 4979 __jit_debug_descriptor.first_entry = &one_entry; 4980 __jit_debug_register_code(); 4981 } 4982 #else 4983 /* No support for the feature. Provide the entry point expected by exec.c, 4984 and implement the internal function we declared earlier. */ 4985 4986 static void tcg_register_jit_int(const void *buf, size_t size, 4987 const void *debug_frame, 4988 size_t debug_frame_size) 4989 { 4990 } 4991 4992 void tcg_register_jit(const void *buf, size_t buf_size) 4993 { 4994 } 4995 #endif /* ELF_HOST_MACHINE */ 4996 4997 #if !TCG_TARGET_MAYBE_vec 4998 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 4999 { 5000 g_assert_not_reached(); 5001 } 5002 #endif 5003