1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/timer.h" 38 39 /* Note: the long term plan is to reduce the dependencies on the QEMU 40 CPU definitions. Currently they are used for qemu_ld/st 41 instructions */ 42 #define NO_CPU_IO_DEFS 43 #include "cpu.h" 44 45 #include "exec/cpu-common.h" 46 #include "exec/exec-all.h" 47 48 #if !defined(CONFIG_USER_ONLY) 49 #include "hw/boards.h" 50 #endif 51 52 #include "tcg-op.h" 53 54 #if UINTPTR_MAX == UINT32_MAX 55 # define ELF_CLASS ELFCLASS32 56 #else 57 # define ELF_CLASS ELFCLASS64 58 #endif 59 #ifdef HOST_WORDS_BIGENDIAN 60 # define ELF_DATA ELFDATA2MSB 61 #else 62 # define ELF_DATA ELFDATA2LSB 63 #endif 64 65 #include "elf.h" 66 #include "exec/log.h" 67 #include "sysemu/sysemu.h" 68 69 /* Forward declarations for functions declared in tcg-target.inc.c and 70 used here. */ 71 static void tcg_target_init(TCGContext *s); 72 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); 73 static void tcg_target_qemu_prologue(TCGContext *s); 74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 75 intptr_t value, intptr_t addend); 76 77 /* The CIE and FDE header definitions will be common to all hosts. */ 78 typedef struct { 79 uint32_t len __attribute__((aligned((sizeof(void *))))); 80 uint32_t id; 81 uint8_t version; 82 char augmentation[1]; 83 uint8_t code_align; 84 uint8_t data_align; 85 uint8_t return_column; 86 } DebugFrameCIE; 87 88 typedef struct QEMU_PACKED { 89 uint32_t len __attribute__((aligned((sizeof(void *))))); 90 uint32_t cie_offset; 91 uintptr_t func_start; 92 uintptr_t func_len; 93 } DebugFrameFDEHeader; 94 95 typedef struct QEMU_PACKED { 96 DebugFrameCIE cie; 97 DebugFrameFDEHeader fde; 98 } DebugFrameHeader; 99 100 static void tcg_register_jit_int(void *buf, size_t size, 101 const void *debug_frame, 102 size_t debug_frame_size) 103 __attribute__((unused)); 104 105 /* Forward declarations for functions declared and used in tcg-target.inc.c. */ 106 static const char *target_parse_constraint(TCGArgConstraint *ct, 107 const char *ct_str, TCGType type); 108 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 109 intptr_t arg2); 110 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 111 static void tcg_out_movi(TCGContext *s, TCGType type, 112 TCGReg ret, tcg_target_long arg); 113 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 114 const int *const_args); 115 #if TCG_TARGET_MAYBE_vec 116 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 117 TCGReg dst, TCGReg src); 118 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 119 TCGReg dst, TCGReg base, intptr_t offset); 120 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, 121 TCGReg dst, tcg_target_long arg); 122 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 123 unsigned vece, const TCGArg *args, 124 const int *const_args); 125 #else 126 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 127 TCGReg dst, TCGReg src) 128 { 129 g_assert_not_reached(); 130 } 131 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 132 TCGReg dst, TCGReg base, intptr_t offset) 133 { 134 g_assert_not_reached(); 135 } 136 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, 137 TCGReg dst, tcg_target_long arg) 138 { 139 g_assert_not_reached(); 140 } 141 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 142 unsigned vece, const TCGArg *args, 143 const int *const_args) 144 { 145 g_assert_not_reached(); 146 } 147 #endif 148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 149 intptr_t arg2); 150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 151 TCGReg base, intptr_t ofs); 152 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); 153 static int tcg_target_const_match(tcg_target_long val, TCGType type, 154 const TCGArgConstraint *arg_ct); 155 #ifdef TCG_TARGET_NEED_LDST_LABELS 156 static int tcg_out_ldst_finalize(TCGContext *s); 157 #endif 158 159 #define TCG_HIGHWATER 1024 160 161 static TCGContext **tcg_ctxs; 162 static unsigned int n_tcg_ctxs; 163 TCGv_env cpu_env = 0; 164 165 struct tcg_region_tree { 166 QemuMutex lock; 167 GTree *tree; 168 /* padding to avoid false sharing is computed at run-time */ 169 }; 170 171 /* 172 * We divide code_gen_buffer into equally-sized "regions" that TCG threads 173 * dynamically allocate from as demand dictates. Given appropriate region 174 * sizing, this minimizes flushes even when some TCG threads generate a lot 175 * more code than others. 176 */ 177 struct tcg_region_state { 178 QemuMutex lock; 179 180 /* fields set at init time */ 181 void *start; 182 void *start_aligned; 183 void *end; 184 size_t n; 185 size_t size; /* size of one region */ 186 size_t stride; /* .size + guard size */ 187 188 /* fields protected by the lock */ 189 size_t current; /* current region index */ 190 size_t agg_size_full; /* aggregate size of full regions */ 191 }; 192 193 static struct tcg_region_state region; 194 /* 195 * This is an array of struct tcg_region_tree's, with padding. 196 * We use void * to simplify the computation of region_trees[i]; each 197 * struct is found every tree_size bytes. 198 */ 199 static void *region_trees; 200 static size_t tree_size; 201 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 202 static TCGRegSet tcg_target_call_clobber_regs; 203 204 #if TCG_TARGET_INSN_UNIT_SIZE == 1 205 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 206 { 207 *s->code_ptr++ = v; 208 } 209 210 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 211 uint8_t v) 212 { 213 *p = v; 214 } 215 #endif 216 217 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 218 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 219 { 220 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 221 *s->code_ptr++ = v; 222 } else { 223 tcg_insn_unit *p = s->code_ptr; 224 memcpy(p, &v, sizeof(v)); 225 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 226 } 227 } 228 229 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 230 uint16_t v) 231 { 232 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 233 *p = v; 234 } else { 235 memcpy(p, &v, sizeof(v)); 236 } 237 } 238 #endif 239 240 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 241 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 242 { 243 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 244 *s->code_ptr++ = v; 245 } else { 246 tcg_insn_unit *p = s->code_ptr; 247 memcpy(p, &v, sizeof(v)); 248 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 249 } 250 } 251 252 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 253 uint32_t v) 254 { 255 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 256 *p = v; 257 } else { 258 memcpy(p, &v, sizeof(v)); 259 } 260 } 261 #endif 262 263 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 264 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 265 { 266 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 267 *s->code_ptr++ = v; 268 } else { 269 tcg_insn_unit *p = s->code_ptr; 270 memcpy(p, &v, sizeof(v)); 271 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 272 } 273 } 274 275 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 276 uint64_t v) 277 { 278 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 279 *p = v; 280 } else { 281 memcpy(p, &v, sizeof(v)); 282 } 283 } 284 #endif 285 286 /* label relocation processing */ 287 288 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 289 TCGLabel *l, intptr_t addend) 290 { 291 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 292 293 r->type = type; 294 r->ptr = code_ptr; 295 r->addend = addend; 296 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 297 } 298 299 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) 300 { 301 tcg_debug_assert(!l->has_value); 302 l->has_value = 1; 303 l->u.value_ptr = ptr; 304 } 305 306 TCGLabel *gen_new_label(void) 307 { 308 TCGContext *s = tcg_ctx; 309 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 310 311 memset(l, 0, sizeof(TCGLabel)); 312 l->id = s->nb_labels++; 313 QSIMPLEQ_INIT(&l->relocs); 314 315 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 316 317 return l; 318 } 319 320 static bool tcg_resolve_relocs(TCGContext *s) 321 { 322 TCGLabel *l; 323 324 QSIMPLEQ_FOREACH(l, &s->labels, next) { 325 TCGRelocation *r; 326 uintptr_t value = l->u.value; 327 328 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 329 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 330 return false; 331 } 332 } 333 } 334 return true; 335 } 336 337 static void set_jmp_reset_offset(TCGContext *s, int which) 338 { 339 size_t off = tcg_current_code_size(s); 340 s->tb_jmp_reset_offset[which] = off; 341 /* Make sure that we didn't overflow the stored offset. */ 342 assert(s->tb_jmp_reset_offset[which] == off); 343 } 344 345 #include "tcg-target.inc.c" 346 347 /* compare a pointer @ptr and a tb_tc @s */ 348 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s) 349 { 350 if (ptr >= s->ptr + s->size) { 351 return 1; 352 } else if (ptr < s->ptr) { 353 return -1; 354 } 355 return 0; 356 } 357 358 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp) 359 { 360 const struct tb_tc *a = ap; 361 const struct tb_tc *b = bp; 362 363 /* 364 * When both sizes are set, we know this isn't a lookup. 365 * This is the most likely case: every TB must be inserted; lookups 366 * are a lot less frequent. 367 */ 368 if (likely(a->size && b->size)) { 369 if (a->ptr > b->ptr) { 370 return 1; 371 } else if (a->ptr < b->ptr) { 372 return -1; 373 } 374 /* a->ptr == b->ptr should happen only on deletions */ 375 g_assert(a->size == b->size); 376 return 0; 377 } 378 /* 379 * All lookups have either .size field set to 0. 380 * From the glib sources we see that @ap is always the lookup key. However 381 * the docs provide no guarantee, so we just mark this case as likely. 382 */ 383 if (likely(a->size == 0)) { 384 return ptr_cmp_tb_tc(a->ptr, b); 385 } 386 return ptr_cmp_tb_tc(b->ptr, a); 387 } 388 389 static void tcg_region_trees_init(void) 390 { 391 size_t i; 392 393 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize); 394 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size); 395 for (i = 0; i < region.n; i++) { 396 struct tcg_region_tree *rt = region_trees + i * tree_size; 397 398 qemu_mutex_init(&rt->lock); 399 rt->tree = g_tree_new(tb_tc_cmp); 400 } 401 } 402 403 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p) 404 { 405 size_t region_idx; 406 407 if (p < region.start_aligned) { 408 region_idx = 0; 409 } else { 410 ptrdiff_t offset = p - region.start_aligned; 411 412 if (offset > region.stride * (region.n - 1)) { 413 region_idx = region.n - 1; 414 } else { 415 region_idx = offset / region.stride; 416 } 417 } 418 return region_trees + region_idx * tree_size; 419 } 420 421 void tcg_tb_insert(TranslationBlock *tb) 422 { 423 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 424 425 qemu_mutex_lock(&rt->lock); 426 g_tree_insert(rt->tree, &tb->tc, tb); 427 qemu_mutex_unlock(&rt->lock); 428 } 429 430 void tcg_tb_remove(TranslationBlock *tb) 431 { 432 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 433 434 qemu_mutex_lock(&rt->lock); 435 g_tree_remove(rt->tree, &tb->tc); 436 qemu_mutex_unlock(&rt->lock); 437 } 438 439 /* 440 * Find the TB 'tb' such that 441 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size 442 * Return NULL if not found. 443 */ 444 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) 445 { 446 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr); 447 TranslationBlock *tb; 448 struct tb_tc s = { .ptr = (void *)tc_ptr }; 449 450 qemu_mutex_lock(&rt->lock); 451 tb = g_tree_lookup(rt->tree, &s); 452 qemu_mutex_unlock(&rt->lock); 453 return tb; 454 } 455 456 static void tcg_region_tree_lock_all(void) 457 { 458 size_t i; 459 460 for (i = 0; i < region.n; i++) { 461 struct tcg_region_tree *rt = region_trees + i * tree_size; 462 463 qemu_mutex_lock(&rt->lock); 464 } 465 } 466 467 static void tcg_region_tree_unlock_all(void) 468 { 469 size_t i; 470 471 for (i = 0; i < region.n; i++) { 472 struct tcg_region_tree *rt = region_trees + i * tree_size; 473 474 qemu_mutex_unlock(&rt->lock); 475 } 476 } 477 478 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data) 479 { 480 size_t i; 481 482 tcg_region_tree_lock_all(); 483 for (i = 0; i < region.n; i++) { 484 struct tcg_region_tree *rt = region_trees + i * tree_size; 485 486 g_tree_foreach(rt->tree, func, user_data); 487 } 488 tcg_region_tree_unlock_all(); 489 } 490 491 size_t tcg_nb_tbs(void) 492 { 493 size_t nb_tbs = 0; 494 size_t i; 495 496 tcg_region_tree_lock_all(); 497 for (i = 0; i < region.n; i++) { 498 struct tcg_region_tree *rt = region_trees + i * tree_size; 499 500 nb_tbs += g_tree_nnodes(rt->tree); 501 } 502 tcg_region_tree_unlock_all(); 503 return nb_tbs; 504 } 505 506 static void tcg_region_tree_reset_all(void) 507 { 508 size_t i; 509 510 tcg_region_tree_lock_all(); 511 for (i = 0; i < region.n; i++) { 512 struct tcg_region_tree *rt = region_trees + i * tree_size; 513 514 /* Increment the refcount first so that destroy acts as a reset */ 515 g_tree_ref(rt->tree); 516 g_tree_destroy(rt->tree); 517 } 518 tcg_region_tree_unlock_all(); 519 } 520 521 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 522 { 523 void *start, *end; 524 525 start = region.start_aligned + curr_region * region.stride; 526 end = start + region.size; 527 528 if (curr_region == 0) { 529 start = region.start; 530 } 531 if (curr_region == region.n - 1) { 532 end = region.end; 533 } 534 535 *pstart = start; 536 *pend = end; 537 } 538 539 static void tcg_region_assign(TCGContext *s, size_t curr_region) 540 { 541 void *start, *end; 542 543 tcg_region_bounds(curr_region, &start, &end); 544 545 s->code_gen_buffer = start; 546 s->code_gen_ptr = start; 547 s->code_gen_buffer_size = end - start; 548 s->code_gen_highwater = end - TCG_HIGHWATER; 549 } 550 551 static bool tcg_region_alloc__locked(TCGContext *s) 552 { 553 if (region.current == region.n) { 554 return true; 555 } 556 tcg_region_assign(s, region.current); 557 region.current++; 558 return false; 559 } 560 561 /* 562 * Request a new region once the one in use has filled up. 563 * Returns true on error. 564 */ 565 static bool tcg_region_alloc(TCGContext *s) 566 { 567 bool err; 568 /* read the region size now; alloc__locked will overwrite it on success */ 569 size_t size_full = s->code_gen_buffer_size; 570 571 qemu_mutex_lock(®ion.lock); 572 err = tcg_region_alloc__locked(s); 573 if (!err) { 574 region.agg_size_full += size_full - TCG_HIGHWATER; 575 } 576 qemu_mutex_unlock(®ion.lock); 577 return err; 578 } 579 580 /* 581 * Perform a context's first region allocation. 582 * This function does _not_ increment region.agg_size_full. 583 */ 584 static inline bool tcg_region_initial_alloc__locked(TCGContext *s) 585 { 586 return tcg_region_alloc__locked(s); 587 } 588 589 /* Call from a safe-work context */ 590 void tcg_region_reset_all(void) 591 { 592 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 593 unsigned int i; 594 595 qemu_mutex_lock(®ion.lock); 596 region.current = 0; 597 region.agg_size_full = 0; 598 599 for (i = 0; i < n_ctxs; i++) { 600 TCGContext *s = atomic_read(&tcg_ctxs[i]); 601 bool err = tcg_region_initial_alloc__locked(s); 602 603 g_assert(!err); 604 } 605 qemu_mutex_unlock(®ion.lock); 606 607 tcg_region_tree_reset_all(); 608 } 609 610 #ifdef CONFIG_USER_ONLY 611 static size_t tcg_n_regions(void) 612 { 613 return 1; 614 } 615 #else 616 /* 617 * It is likely that some vCPUs will translate more code than others, so we 618 * first try to set more regions than max_cpus, with those regions being of 619 * reasonable size. If that's not possible we make do by evenly dividing 620 * the code_gen_buffer among the vCPUs. 621 */ 622 static size_t tcg_n_regions(void) 623 { 624 size_t i; 625 626 /* Use a single region if all we have is one vCPU thread */ 627 #if !defined(CONFIG_USER_ONLY) 628 MachineState *ms = MACHINE(qdev_get_machine()); 629 unsigned int max_cpus = ms->smp.max_cpus; 630 #endif 631 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 632 return 1; 633 } 634 635 /* Try to have more regions than max_cpus, with each region being >= 2 MB */ 636 for (i = 8; i > 0; i--) { 637 size_t regions_per_thread = i; 638 size_t region_size; 639 640 region_size = tcg_init_ctx.code_gen_buffer_size; 641 region_size /= max_cpus * regions_per_thread; 642 643 if (region_size >= 2 * 1024u * 1024) { 644 return max_cpus * regions_per_thread; 645 } 646 } 647 /* If we can't, then just allocate one region per vCPU thread */ 648 return max_cpus; 649 } 650 #endif 651 652 /* 653 * Initializes region partitioning. 654 * 655 * Called at init time from the parent thread (i.e. the one calling 656 * tcg_context_init), after the target's TCG globals have been set. 657 * 658 * Region partitioning works by splitting code_gen_buffer into separate regions, 659 * and then assigning regions to TCG threads so that the threads can translate 660 * code in parallel without synchronization. 661 * 662 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 663 * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 664 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 665 * must have been parsed before calling this function, since it calls 666 * qemu_tcg_mttcg_enabled(). 667 * 668 * In user-mode we use a single region. Having multiple regions in user-mode 669 * is not supported, because the number of vCPU threads (recall that each thread 670 * spawned by the guest corresponds to a vCPU thread) is only bounded by the 671 * OS, and usually this number is huge (tens of thousands is not uncommon). 672 * Thus, given this large bound on the number of vCPU threads and the fact 673 * that code_gen_buffer is allocated at compile-time, we cannot guarantee 674 * that the availability of at least one region per vCPU thread. 675 * 676 * However, this user-mode limitation is unlikely to be a significant problem 677 * in practice. Multi-threaded guests share most if not all of their translated 678 * code, which makes parallel code generation less appealing than in softmmu. 679 */ 680 void tcg_region_init(void) 681 { 682 void *buf = tcg_init_ctx.code_gen_buffer; 683 void *aligned; 684 size_t size = tcg_init_ctx.code_gen_buffer_size; 685 size_t page_size = qemu_real_host_page_size; 686 size_t region_size; 687 size_t n_regions; 688 size_t i; 689 690 n_regions = tcg_n_regions(); 691 692 /* The first region will be 'aligned - buf' bytes larger than the others */ 693 aligned = QEMU_ALIGN_PTR_UP(buf, page_size); 694 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); 695 /* 696 * Make region_size a multiple of page_size, using aligned as the start. 697 * As a result of this we might end up with a few extra pages at the end of 698 * the buffer; we will assign those to the last region. 699 */ 700 region_size = (size - (aligned - buf)) / n_regions; 701 region_size = QEMU_ALIGN_DOWN(region_size, page_size); 702 703 /* A region must have at least 2 pages; one code, one guard */ 704 g_assert(region_size >= 2 * page_size); 705 706 /* init the region struct */ 707 qemu_mutex_init(®ion.lock); 708 region.n = n_regions; 709 region.size = region_size - page_size; 710 region.stride = region_size; 711 region.start = buf; 712 region.start_aligned = aligned; 713 /* page-align the end, since its last page will be a guard page */ 714 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); 715 /* account for that last guard page */ 716 region.end -= page_size; 717 718 /* set guard pages */ 719 for (i = 0; i < region.n; i++) { 720 void *start, *end; 721 int rc; 722 723 tcg_region_bounds(i, &start, &end); 724 rc = qemu_mprotect_none(end, page_size); 725 g_assert(!rc); 726 } 727 728 tcg_region_trees_init(); 729 730 /* In user-mode we support only one ctx, so do the initial allocation now */ 731 #ifdef CONFIG_USER_ONLY 732 { 733 bool err = tcg_region_initial_alloc__locked(tcg_ctx); 734 735 g_assert(!err); 736 } 737 #endif 738 } 739 740 /* 741 * All TCG threads except the parent (i.e. the one that called tcg_context_init 742 * and registered the target's TCG globals) must register with this function 743 * before initiating translation. 744 * 745 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 746 * of tcg_region_init() for the reasoning behind this. 747 * 748 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 749 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 750 * is not used anymore for translation once this function is called. 751 * 752 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 753 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 754 */ 755 #ifdef CONFIG_USER_ONLY 756 void tcg_register_thread(void) 757 { 758 tcg_ctx = &tcg_init_ctx; 759 } 760 #else 761 void tcg_register_thread(void) 762 { 763 MachineState *ms = MACHINE(qdev_get_machine()); 764 TCGContext *s = g_malloc(sizeof(*s)); 765 unsigned int i, n; 766 bool err; 767 768 *s = tcg_init_ctx; 769 770 /* Relink mem_base. */ 771 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 772 if (tcg_init_ctx.temps[i].mem_base) { 773 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 774 tcg_debug_assert(b >= 0 && b < n); 775 s->temps[i].mem_base = &s->temps[b]; 776 } 777 } 778 779 /* Claim an entry in tcg_ctxs */ 780 n = atomic_fetch_inc(&n_tcg_ctxs); 781 g_assert(n < ms->smp.max_cpus); 782 atomic_set(&tcg_ctxs[n], s); 783 784 tcg_ctx = s; 785 qemu_mutex_lock(®ion.lock); 786 err = tcg_region_initial_alloc__locked(tcg_ctx); 787 g_assert(!err); 788 qemu_mutex_unlock(®ion.lock); 789 } 790 #endif /* !CONFIG_USER_ONLY */ 791 792 /* 793 * Returns the size (in bytes) of all translated code (i.e. from all regions) 794 * currently in the cache. 795 * See also: tcg_code_capacity() 796 * Do not confuse with tcg_current_code_size(); that one applies to a single 797 * TCG context. 798 */ 799 size_t tcg_code_size(void) 800 { 801 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 802 unsigned int i; 803 size_t total; 804 805 qemu_mutex_lock(®ion.lock); 806 total = region.agg_size_full; 807 for (i = 0; i < n_ctxs; i++) { 808 const TCGContext *s = atomic_read(&tcg_ctxs[i]); 809 size_t size; 810 811 size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 812 g_assert(size <= s->code_gen_buffer_size); 813 total += size; 814 } 815 qemu_mutex_unlock(®ion.lock); 816 return total; 817 } 818 819 /* 820 * Returns the code capacity (in bytes) of the entire cache, i.e. including all 821 * regions. 822 * See also: tcg_code_size() 823 */ 824 size_t tcg_code_capacity(void) 825 { 826 size_t guard_size, capacity; 827 828 /* no need for synchronization; these variables are set at init time */ 829 guard_size = region.stride - region.size; 830 capacity = region.end + guard_size - region.start; 831 capacity -= region.n * (guard_size + TCG_HIGHWATER); 832 return capacity; 833 } 834 835 size_t tcg_tb_phys_invalidate_count(void) 836 { 837 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 838 unsigned int i; 839 size_t total = 0; 840 841 for (i = 0; i < n_ctxs; i++) { 842 const TCGContext *s = atomic_read(&tcg_ctxs[i]); 843 844 total += atomic_read(&s->tb_phys_invalidate_count); 845 } 846 return total; 847 } 848 849 /* pool based memory allocation */ 850 void *tcg_malloc_internal(TCGContext *s, int size) 851 { 852 TCGPool *p; 853 int pool_size; 854 855 if (size > TCG_POOL_CHUNK_SIZE) { 856 /* big malloc: insert a new pool (XXX: could optimize) */ 857 p = g_malloc(sizeof(TCGPool) + size); 858 p->size = size; 859 p->next = s->pool_first_large; 860 s->pool_first_large = p; 861 return p->data; 862 } else { 863 p = s->pool_current; 864 if (!p) { 865 p = s->pool_first; 866 if (!p) 867 goto new_pool; 868 } else { 869 if (!p->next) { 870 new_pool: 871 pool_size = TCG_POOL_CHUNK_SIZE; 872 p = g_malloc(sizeof(TCGPool) + pool_size); 873 p->size = pool_size; 874 p->next = NULL; 875 if (s->pool_current) 876 s->pool_current->next = p; 877 else 878 s->pool_first = p; 879 } else { 880 p = p->next; 881 } 882 } 883 } 884 s->pool_current = p; 885 s->pool_cur = p->data + size; 886 s->pool_end = p->data + p->size; 887 return p->data; 888 } 889 890 void tcg_pool_reset(TCGContext *s) 891 { 892 TCGPool *p, *t; 893 for (p = s->pool_first_large; p; p = t) { 894 t = p->next; 895 g_free(p); 896 } 897 s->pool_first_large = NULL; 898 s->pool_cur = s->pool_end = NULL; 899 s->pool_current = NULL; 900 } 901 902 typedef struct TCGHelperInfo { 903 void *func; 904 const char *name; 905 unsigned flags; 906 unsigned sizemask; 907 } TCGHelperInfo; 908 909 #include "exec/helper-proto.h" 910 911 static const TCGHelperInfo all_helpers[] = { 912 #include "exec/helper-tcg.h" 913 }; 914 static GHashTable *helper_table; 915 916 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 917 static void process_op_defs(TCGContext *s); 918 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 919 TCGReg reg, const char *name); 920 921 void tcg_context_init(TCGContext *s) 922 { 923 int op, total_args, n, i; 924 TCGOpDef *def; 925 TCGArgConstraint *args_ct; 926 int *sorted_args; 927 TCGTemp *ts; 928 929 memset(s, 0, sizeof(*s)); 930 s->nb_globals = 0; 931 932 /* Count total number of arguments and allocate the corresponding 933 space */ 934 total_args = 0; 935 for(op = 0; op < NB_OPS; op++) { 936 def = &tcg_op_defs[op]; 937 n = def->nb_iargs + def->nb_oargs; 938 total_args += n; 939 } 940 941 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); 942 sorted_args = g_malloc(sizeof(int) * total_args); 943 944 for(op = 0; op < NB_OPS; op++) { 945 def = &tcg_op_defs[op]; 946 def->args_ct = args_ct; 947 def->sorted_args = sorted_args; 948 n = def->nb_iargs + def->nb_oargs; 949 sorted_args += n; 950 args_ct += n; 951 } 952 953 /* Register helpers. */ 954 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 955 helper_table = g_hash_table_new(NULL, NULL); 956 957 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 958 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 959 (gpointer)&all_helpers[i]); 960 } 961 962 tcg_target_init(s); 963 process_op_defs(s); 964 965 /* Reverse the order of the saved registers, assuming they're all at 966 the start of tcg_target_reg_alloc_order. */ 967 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 968 int r = tcg_target_reg_alloc_order[n]; 969 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 970 break; 971 } 972 } 973 for (i = 0; i < n; ++i) { 974 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 975 } 976 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 977 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 978 } 979 980 tcg_ctx = s; 981 /* 982 * In user-mode we simply share the init context among threads, since we 983 * use a single region. See the documentation tcg_region_init() for the 984 * reasoning behind this. 985 * In softmmu we will have at most max_cpus TCG threads. 986 */ 987 #ifdef CONFIG_USER_ONLY 988 tcg_ctxs = &tcg_ctx; 989 n_tcg_ctxs = 1; 990 #else 991 MachineState *ms = MACHINE(qdev_get_machine()); 992 unsigned int max_cpus = ms->smp.max_cpus; 993 tcg_ctxs = g_new(TCGContext *, max_cpus); 994 #endif 995 996 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 997 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 998 cpu_env = temp_tcgv_ptr(ts); 999 } 1000 1001 /* 1002 * Allocate TBs right before their corresponding translated code, making 1003 * sure that TBs and code are on different cache lines. 1004 */ 1005 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1006 { 1007 uintptr_t align = qemu_icache_linesize; 1008 TranslationBlock *tb; 1009 void *next; 1010 1011 retry: 1012 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1013 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1014 1015 if (unlikely(next > s->code_gen_highwater)) { 1016 if (tcg_region_alloc(s)) { 1017 return NULL; 1018 } 1019 goto retry; 1020 } 1021 atomic_set(&s->code_gen_ptr, next); 1022 s->data_gen_ptr = NULL; 1023 return tb; 1024 } 1025 1026 void tcg_prologue_init(TCGContext *s) 1027 { 1028 size_t prologue_size, total_size; 1029 void *buf0, *buf1; 1030 1031 /* Put the prologue at the beginning of code_gen_buffer. */ 1032 buf0 = s->code_gen_buffer; 1033 total_size = s->code_gen_buffer_size; 1034 s->code_ptr = buf0; 1035 s->code_buf = buf0; 1036 s->data_gen_ptr = NULL; 1037 s->code_gen_prologue = buf0; 1038 1039 /* Compute a high-water mark, at which we voluntarily flush the buffer 1040 and start over. The size here is arbitrary, significantly larger 1041 than we expect the code generation for any one opcode to require. */ 1042 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); 1043 1044 #ifdef TCG_TARGET_NEED_POOL_LABELS 1045 s->pool_labels = NULL; 1046 #endif 1047 1048 /* Generate the prologue. */ 1049 tcg_target_qemu_prologue(s); 1050 1051 #ifdef TCG_TARGET_NEED_POOL_LABELS 1052 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1053 { 1054 int result = tcg_out_pool_finalize(s); 1055 tcg_debug_assert(result == 0); 1056 } 1057 #endif 1058 1059 buf1 = s->code_ptr; 1060 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); 1061 1062 /* Deduct the prologue from the buffer. */ 1063 prologue_size = tcg_current_code_size(s); 1064 s->code_gen_ptr = buf1; 1065 s->code_gen_buffer = buf1; 1066 s->code_buf = buf1; 1067 total_size -= prologue_size; 1068 s->code_gen_buffer_size = total_size; 1069 1070 tcg_register_jit(s->code_gen_buffer, total_size); 1071 1072 #ifdef DEBUG_DISAS 1073 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1074 qemu_log_lock(); 1075 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 1076 if (s->data_gen_ptr) { 1077 size_t code_size = s->data_gen_ptr - buf0; 1078 size_t data_size = prologue_size - code_size; 1079 size_t i; 1080 1081 log_disas(buf0, code_size); 1082 1083 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1084 if (sizeof(tcg_target_ulong) == 8) { 1085 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1086 (uintptr_t)s->data_gen_ptr + i, 1087 *(uint64_t *)(s->data_gen_ptr + i)); 1088 } else { 1089 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n", 1090 (uintptr_t)s->data_gen_ptr + i, 1091 *(uint32_t *)(s->data_gen_ptr + i)); 1092 } 1093 } 1094 } else { 1095 log_disas(buf0, prologue_size); 1096 } 1097 qemu_log("\n"); 1098 qemu_log_flush(); 1099 qemu_log_unlock(); 1100 } 1101 #endif 1102 1103 /* Assert that goto_ptr is implemented completely. */ 1104 if (TCG_TARGET_HAS_goto_ptr) { 1105 tcg_debug_assert(s->code_gen_epilogue != NULL); 1106 } 1107 } 1108 1109 void tcg_func_start(TCGContext *s) 1110 { 1111 tcg_pool_reset(s); 1112 s->nb_temps = s->nb_globals; 1113 1114 /* No temps have been previously allocated for size or locality. */ 1115 memset(s->free_temps, 0, sizeof(s->free_temps)); 1116 1117 s->nb_ops = 0; 1118 s->nb_labels = 0; 1119 s->current_frame_offset = s->frame_start; 1120 1121 #ifdef CONFIG_DEBUG_TCG 1122 s->goto_tb_issue_mask = 0; 1123 #endif 1124 1125 QTAILQ_INIT(&s->ops); 1126 QTAILQ_INIT(&s->free_ops); 1127 QSIMPLEQ_INIT(&s->labels); 1128 } 1129 1130 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) 1131 { 1132 int n = s->nb_temps++; 1133 tcg_debug_assert(n < TCG_MAX_TEMPS); 1134 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1135 } 1136 1137 static inline TCGTemp *tcg_global_alloc(TCGContext *s) 1138 { 1139 TCGTemp *ts; 1140 1141 tcg_debug_assert(s->nb_globals == s->nb_temps); 1142 s->nb_globals++; 1143 ts = tcg_temp_alloc(s); 1144 ts->temp_global = 1; 1145 1146 return ts; 1147 } 1148 1149 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1150 TCGReg reg, const char *name) 1151 { 1152 TCGTemp *ts; 1153 1154 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1155 tcg_abort(); 1156 } 1157 1158 ts = tcg_global_alloc(s); 1159 ts->base_type = type; 1160 ts->type = type; 1161 ts->fixed_reg = 1; 1162 ts->reg = reg; 1163 ts->name = name; 1164 tcg_regset_set_reg(s->reserved_regs, reg); 1165 1166 return ts; 1167 } 1168 1169 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1170 { 1171 s->frame_start = start; 1172 s->frame_end = start + size; 1173 s->frame_temp 1174 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1175 } 1176 1177 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1178 intptr_t offset, const char *name) 1179 { 1180 TCGContext *s = tcg_ctx; 1181 TCGTemp *base_ts = tcgv_ptr_temp(base); 1182 TCGTemp *ts = tcg_global_alloc(s); 1183 int indirect_reg = 0, bigendian = 0; 1184 #ifdef HOST_WORDS_BIGENDIAN 1185 bigendian = 1; 1186 #endif 1187 1188 if (!base_ts->fixed_reg) { 1189 /* We do not support double-indirect registers. */ 1190 tcg_debug_assert(!base_ts->indirect_reg); 1191 base_ts->indirect_base = 1; 1192 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1193 ? 2 : 1); 1194 indirect_reg = 1; 1195 } 1196 1197 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1198 TCGTemp *ts2 = tcg_global_alloc(s); 1199 char buf[64]; 1200 1201 ts->base_type = TCG_TYPE_I64; 1202 ts->type = TCG_TYPE_I32; 1203 ts->indirect_reg = indirect_reg; 1204 ts->mem_allocated = 1; 1205 ts->mem_base = base_ts; 1206 ts->mem_offset = offset + bigendian * 4; 1207 pstrcpy(buf, sizeof(buf), name); 1208 pstrcat(buf, sizeof(buf), "_0"); 1209 ts->name = strdup(buf); 1210 1211 tcg_debug_assert(ts2 == ts + 1); 1212 ts2->base_type = TCG_TYPE_I64; 1213 ts2->type = TCG_TYPE_I32; 1214 ts2->indirect_reg = indirect_reg; 1215 ts2->mem_allocated = 1; 1216 ts2->mem_base = base_ts; 1217 ts2->mem_offset = offset + (1 - bigendian) * 4; 1218 pstrcpy(buf, sizeof(buf), name); 1219 pstrcat(buf, sizeof(buf), "_1"); 1220 ts2->name = strdup(buf); 1221 } else { 1222 ts->base_type = type; 1223 ts->type = type; 1224 ts->indirect_reg = indirect_reg; 1225 ts->mem_allocated = 1; 1226 ts->mem_base = base_ts; 1227 ts->mem_offset = offset; 1228 ts->name = name; 1229 } 1230 return ts; 1231 } 1232 1233 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) 1234 { 1235 TCGContext *s = tcg_ctx; 1236 TCGTemp *ts; 1237 int idx, k; 1238 1239 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 1240 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 1241 if (idx < TCG_MAX_TEMPS) { 1242 /* There is already an available temp with the right type. */ 1243 clear_bit(idx, s->free_temps[k].l); 1244 1245 ts = &s->temps[idx]; 1246 ts->temp_allocated = 1; 1247 tcg_debug_assert(ts->base_type == type); 1248 tcg_debug_assert(ts->temp_local == temp_local); 1249 } else { 1250 ts = tcg_temp_alloc(s); 1251 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1252 TCGTemp *ts2 = tcg_temp_alloc(s); 1253 1254 ts->base_type = type; 1255 ts->type = TCG_TYPE_I32; 1256 ts->temp_allocated = 1; 1257 ts->temp_local = temp_local; 1258 1259 tcg_debug_assert(ts2 == ts + 1); 1260 ts2->base_type = TCG_TYPE_I64; 1261 ts2->type = TCG_TYPE_I32; 1262 ts2->temp_allocated = 1; 1263 ts2->temp_local = temp_local; 1264 } else { 1265 ts->base_type = type; 1266 ts->type = type; 1267 ts->temp_allocated = 1; 1268 ts->temp_local = temp_local; 1269 } 1270 } 1271 1272 #if defined(CONFIG_DEBUG_TCG) 1273 s->temps_in_use++; 1274 #endif 1275 return ts; 1276 } 1277 1278 TCGv_vec tcg_temp_new_vec(TCGType type) 1279 { 1280 TCGTemp *t; 1281 1282 #ifdef CONFIG_DEBUG_TCG 1283 switch (type) { 1284 case TCG_TYPE_V64: 1285 assert(TCG_TARGET_HAS_v64); 1286 break; 1287 case TCG_TYPE_V128: 1288 assert(TCG_TARGET_HAS_v128); 1289 break; 1290 case TCG_TYPE_V256: 1291 assert(TCG_TARGET_HAS_v256); 1292 break; 1293 default: 1294 g_assert_not_reached(); 1295 } 1296 #endif 1297 1298 t = tcg_temp_new_internal(type, 0); 1299 return temp_tcgv_vec(t); 1300 } 1301 1302 /* Create a new temp of the same type as an existing temp. */ 1303 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1304 { 1305 TCGTemp *t = tcgv_vec_temp(match); 1306 1307 tcg_debug_assert(t->temp_allocated != 0); 1308 1309 t = tcg_temp_new_internal(t->base_type, 0); 1310 return temp_tcgv_vec(t); 1311 } 1312 1313 void tcg_temp_free_internal(TCGTemp *ts) 1314 { 1315 TCGContext *s = tcg_ctx; 1316 int k, idx; 1317 1318 #if defined(CONFIG_DEBUG_TCG) 1319 s->temps_in_use--; 1320 if (s->temps_in_use < 0) { 1321 fprintf(stderr, "More temporaries freed than allocated!\n"); 1322 } 1323 #endif 1324 1325 tcg_debug_assert(ts->temp_global == 0); 1326 tcg_debug_assert(ts->temp_allocated != 0); 1327 ts->temp_allocated = 0; 1328 1329 idx = temp_idx(ts); 1330 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); 1331 set_bit(idx, s->free_temps[k].l); 1332 } 1333 1334 TCGv_i32 tcg_const_i32(int32_t val) 1335 { 1336 TCGv_i32 t0; 1337 t0 = tcg_temp_new_i32(); 1338 tcg_gen_movi_i32(t0, val); 1339 return t0; 1340 } 1341 1342 TCGv_i64 tcg_const_i64(int64_t val) 1343 { 1344 TCGv_i64 t0; 1345 t0 = tcg_temp_new_i64(); 1346 tcg_gen_movi_i64(t0, val); 1347 return t0; 1348 } 1349 1350 TCGv_i32 tcg_const_local_i32(int32_t val) 1351 { 1352 TCGv_i32 t0; 1353 t0 = tcg_temp_local_new_i32(); 1354 tcg_gen_movi_i32(t0, val); 1355 return t0; 1356 } 1357 1358 TCGv_i64 tcg_const_local_i64(int64_t val) 1359 { 1360 TCGv_i64 t0; 1361 t0 = tcg_temp_local_new_i64(); 1362 tcg_gen_movi_i64(t0, val); 1363 return t0; 1364 } 1365 1366 #if defined(CONFIG_DEBUG_TCG) 1367 void tcg_clear_temp_count(void) 1368 { 1369 TCGContext *s = tcg_ctx; 1370 s->temps_in_use = 0; 1371 } 1372 1373 int tcg_check_temp_count(void) 1374 { 1375 TCGContext *s = tcg_ctx; 1376 if (s->temps_in_use) { 1377 /* Clear the count so that we don't give another 1378 * warning immediately next time around. 1379 */ 1380 s->temps_in_use = 0; 1381 return 1; 1382 } 1383 return 0; 1384 } 1385 #endif 1386 1387 /* Return true if OP may appear in the opcode stream. 1388 Test the runtime variable that controls each opcode. */ 1389 bool tcg_op_supported(TCGOpcode op) 1390 { 1391 const bool have_vec 1392 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1393 1394 switch (op) { 1395 case INDEX_op_discard: 1396 case INDEX_op_set_label: 1397 case INDEX_op_call: 1398 case INDEX_op_br: 1399 case INDEX_op_mb: 1400 case INDEX_op_insn_start: 1401 case INDEX_op_exit_tb: 1402 case INDEX_op_goto_tb: 1403 case INDEX_op_qemu_ld_i32: 1404 case INDEX_op_qemu_st_i32: 1405 case INDEX_op_qemu_ld_i64: 1406 case INDEX_op_qemu_st_i64: 1407 return true; 1408 1409 case INDEX_op_goto_ptr: 1410 return TCG_TARGET_HAS_goto_ptr; 1411 1412 case INDEX_op_mov_i32: 1413 case INDEX_op_movi_i32: 1414 case INDEX_op_setcond_i32: 1415 case INDEX_op_brcond_i32: 1416 case INDEX_op_ld8u_i32: 1417 case INDEX_op_ld8s_i32: 1418 case INDEX_op_ld16u_i32: 1419 case INDEX_op_ld16s_i32: 1420 case INDEX_op_ld_i32: 1421 case INDEX_op_st8_i32: 1422 case INDEX_op_st16_i32: 1423 case INDEX_op_st_i32: 1424 case INDEX_op_add_i32: 1425 case INDEX_op_sub_i32: 1426 case INDEX_op_mul_i32: 1427 case INDEX_op_and_i32: 1428 case INDEX_op_or_i32: 1429 case INDEX_op_xor_i32: 1430 case INDEX_op_shl_i32: 1431 case INDEX_op_shr_i32: 1432 case INDEX_op_sar_i32: 1433 return true; 1434 1435 case INDEX_op_movcond_i32: 1436 return TCG_TARGET_HAS_movcond_i32; 1437 case INDEX_op_div_i32: 1438 case INDEX_op_divu_i32: 1439 return TCG_TARGET_HAS_div_i32; 1440 case INDEX_op_rem_i32: 1441 case INDEX_op_remu_i32: 1442 return TCG_TARGET_HAS_rem_i32; 1443 case INDEX_op_div2_i32: 1444 case INDEX_op_divu2_i32: 1445 return TCG_TARGET_HAS_div2_i32; 1446 case INDEX_op_rotl_i32: 1447 case INDEX_op_rotr_i32: 1448 return TCG_TARGET_HAS_rot_i32; 1449 case INDEX_op_deposit_i32: 1450 return TCG_TARGET_HAS_deposit_i32; 1451 case INDEX_op_extract_i32: 1452 return TCG_TARGET_HAS_extract_i32; 1453 case INDEX_op_sextract_i32: 1454 return TCG_TARGET_HAS_sextract_i32; 1455 case INDEX_op_extract2_i32: 1456 return TCG_TARGET_HAS_extract2_i32; 1457 case INDEX_op_add2_i32: 1458 return TCG_TARGET_HAS_add2_i32; 1459 case INDEX_op_sub2_i32: 1460 return TCG_TARGET_HAS_sub2_i32; 1461 case INDEX_op_mulu2_i32: 1462 return TCG_TARGET_HAS_mulu2_i32; 1463 case INDEX_op_muls2_i32: 1464 return TCG_TARGET_HAS_muls2_i32; 1465 case INDEX_op_muluh_i32: 1466 return TCG_TARGET_HAS_muluh_i32; 1467 case INDEX_op_mulsh_i32: 1468 return TCG_TARGET_HAS_mulsh_i32; 1469 case INDEX_op_ext8s_i32: 1470 return TCG_TARGET_HAS_ext8s_i32; 1471 case INDEX_op_ext16s_i32: 1472 return TCG_TARGET_HAS_ext16s_i32; 1473 case INDEX_op_ext8u_i32: 1474 return TCG_TARGET_HAS_ext8u_i32; 1475 case INDEX_op_ext16u_i32: 1476 return TCG_TARGET_HAS_ext16u_i32; 1477 case INDEX_op_bswap16_i32: 1478 return TCG_TARGET_HAS_bswap16_i32; 1479 case INDEX_op_bswap32_i32: 1480 return TCG_TARGET_HAS_bswap32_i32; 1481 case INDEX_op_not_i32: 1482 return TCG_TARGET_HAS_not_i32; 1483 case INDEX_op_neg_i32: 1484 return TCG_TARGET_HAS_neg_i32; 1485 case INDEX_op_andc_i32: 1486 return TCG_TARGET_HAS_andc_i32; 1487 case INDEX_op_orc_i32: 1488 return TCG_TARGET_HAS_orc_i32; 1489 case INDEX_op_eqv_i32: 1490 return TCG_TARGET_HAS_eqv_i32; 1491 case INDEX_op_nand_i32: 1492 return TCG_TARGET_HAS_nand_i32; 1493 case INDEX_op_nor_i32: 1494 return TCG_TARGET_HAS_nor_i32; 1495 case INDEX_op_clz_i32: 1496 return TCG_TARGET_HAS_clz_i32; 1497 case INDEX_op_ctz_i32: 1498 return TCG_TARGET_HAS_ctz_i32; 1499 case INDEX_op_ctpop_i32: 1500 return TCG_TARGET_HAS_ctpop_i32; 1501 1502 case INDEX_op_brcond2_i32: 1503 case INDEX_op_setcond2_i32: 1504 return TCG_TARGET_REG_BITS == 32; 1505 1506 case INDEX_op_mov_i64: 1507 case INDEX_op_movi_i64: 1508 case INDEX_op_setcond_i64: 1509 case INDEX_op_brcond_i64: 1510 case INDEX_op_ld8u_i64: 1511 case INDEX_op_ld8s_i64: 1512 case INDEX_op_ld16u_i64: 1513 case INDEX_op_ld16s_i64: 1514 case INDEX_op_ld32u_i64: 1515 case INDEX_op_ld32s_i64: 1516 case INDEX_op_ld_i64: 1517 case INDEX_op_st8_i64: 1518 case INDEX_op_st16_i64: 1519 case INDEX_op_st32_i64: 1520 case INDEX_op_st_i64: 1521 case INDEX_op_add_i64: 1522 case INDEX_op_sub_i64: 1523 case INDEX_op_mul_i64: 1524 case INDEX_op_and_i64: 1525 case INDEX_op_or_i64: 1526 case INDEX_op_xor_i64: 1527 case INDEX_op_shl_i64: 1528 case INDEX_op_shr_i64: 1529 case INDEX_op_sar_i64: 1530 case INDEX_op_ext_i32_i64: 1531 case INDEX_op_extu_i32_i64: 1532 return TCG_TARGET_REG_BITS == 64; 1533 1534 case INDEX_op_movcond_i64: 1535 return TCG_TARGET_HAS_movcond_i64; 1536 case INDEX_op_div_i64: 1537 case INDEX_op_divu_i64: 1538 return TCG_TARGET_HAS_div_i64; 1539 case INDEX_op_rem_i64: 1540 case INDEX_op_remu_i64: 1541 return TCG_TARGET_HAS_rem_i64; 1542 case INDEX_op_div2_i64: 1543 case INDEX_op_divu2_i64: 1544 return TCG_TARGET_HAS_div2_i64; 1545 case INDEX_op_rotl_i64: 1546 case INDEX_op_rotr_i64: 1547 return TCG_TARGET_HAS_rot_i64; 1548 case INDEX_op_deposit_i64: 1549 return TCG_TARGET_HAS_deposit_i64; 1550 case INDEX_op_extract_i64: 1551 return TCG_TARGET_HAS_extract_i64; 1552 case INDEX_op_sextract_i64: 1553 return TCG_TARGET_HAS_sextract_i64; 1554 case INDEX_op_extract2_i64: 1555 return TCG_TARGET_HAS_extract2_i64; 1556 case INDEX_op_extrl_i64_i32: 1557 return TCG_TARGET_HAS_extrl_i64_i32; 1558 case INDEX_op_extrh_i64_i32: 1559 return TCG_TARGET_HAS_extrh_i64_i32; 1560 case INDEX_op_ext8s_i64: 1561 return TCG_TARGET_HAS_ext8s_i64; 1562 case INDEX_op_ext16s_i64: 1563 return TCG_TARGET_HAS_ext16s_i64; 1564 case INDEX_op_ext32s_i64: 1565 return TCG_TARGET_HAS_ext32s_i64; 1566 case INDEX_op_ext8u_i64: 1567 return TCG_TARGET_HAS_ext8u_i64; 1568 case INDEX_op_ext16u_i64: 1569 return TCG_TARGET_HAS_ext16u_i64; 1570 case INDEX_op_ext32u_i64: 1571 return TCG_TARGET_HAS_ext32u_i64; 1572 case INDEX_op_bswap16_i64: 1573 return TCG_TARGET_HAS_bswap16_i64; 1574 case INDEX_op_bswap32_i64: 1575 return TCG_TARGET_HAS_bswap32_i64; 1576 case INDEX_op_bswap64_i64: 1577 return TCG_TARGET_HAS_bswap64_i64; 1578 case INDEX_op_not_i64: 1579 return TCG_TARGET_HAS_not_i64; 1580 case INDEX_op_neg_i64: 1581 return TCG_TARGET_HAS_neg_i64; 1582 case INDEX_op_andc_i64: 1583 return TCG_TARGET_HAS_andc_i64; 1584 case INDEX_op_orc_i64: 1585 return TCG_TARGET_HAS_orc_i64; 1586 case INDEX_op_eqv_i64: 1587 return TCG_TARGET_HAS_eqv_i64; 1588 case INDEX_op_nand_i64: 1589 return TCG_TARGET_HAS_nand_i64; 1590 case INDEX_op_nor_i64: 1591 return TCG_TARGET_HAS_nor_i64; 1592 case INDEX_op_clz_i64: 1593 return TCG_TARGET_HAS_clz_i64; 1594 case INDEX_op_ctz_i64: 1595 return TCG_TARGET_HAS_ctz_i64; 1596 case INDEX_op_ctpop_i64: 1597 return TCG_TARGET_HAS_ctpop_i64; 1598 case INDEX_op_add2_i64: 1599 return TCG_TARGET_HAS_add2_i64; 1600 case INDEX_op_sub2_i64: 1601 return TCG_TARGET_HAS_sub2_i64; 1602 case INDEX_op_mulu2_i64: 1603 return TCG_TARGET_HAS_mulu2_i64; 1604 case INDEX_op_muls2_i64: 1605 return TCG_TARGET_HAS_muls2_i64; 1606 case INDEX_op_muluh_i64: 1607 return TCG_TARGET_HAS_muluh_i64; 1608 case INDEX_op_mulsh_i64: 1609 return TCG_TARGET_HAS_mulsh_i64; 1610 1611 case INDEX_op_mov_vec: 1612 case INDEX_op_dup_vec: 1613 case INDEX_op_dupi_vec: 1614 case INDEX_op_dupm_vec: 1615 case INDEX_op_ld_vec: 1616 case INDEX_op_st_vec: 1617 case INDEX_op_add_vec: 1618 case INDEX_op_sub_vec: 1619 case INDEX_op_and_vec: 1620 case INDEX_op_or_vec: 1621 case INDEX_op_xor_vec: 1622 case INDEX_op_cmp_vec: 1623 return have_vec; 1624 case INDEX_op_dup2_vec: 1625 return have_vec && TCG_TARGET_REG_BITS == 32; 1626 case INDEX_op_not_vec: 1627 return have_vec && TCG_TARGET_HAS_not_vec; 1628 case INDEX_op_neg_vec: 1629 return have_vec && TCG_TARGET_HAS_neg_vec; 1630 case INDEX_op_abs_vec: 1631 return have_vec && TCG_TARGET_HAS_abs_vec; 1632 case INDEX_op_andc_vec: 1633 return have_vec && TCG_TARGET_HAS_andc_vec; 1634 case INDEX_op_orc_vec: 1635 return have_vec && TCG_TARGET_HAS_orc_vec; 1636 case INDEX_op_mul_vec: 1637 return have_vec && TCG_TARGET_HAS_mul_vec; 1638 case INDEX_op_shli_vec: 1639 case INDEX_op_shri_vec: 1640 case INDEX_op_sari_vec: 1641 return have_vec && TCG_TARGET_HAS_shi_vec; 1642 case INDEX_op_shls_vec: 1643 case INDEX_op_shrs_vec: 1644 case INDEX_op_sars_vec: 1645 return have_vec && TCG_TARGET_HAS_shs_vec; 1646 case INDEX_op_shlv_vec: 1647 case INDEX_op_shrv_vec: 1648 case INDEX_op_sarv_vec: 1649 return have_vec && TCG_TARGET_HAS_shv_vec; 1650 case INDEX_op_ssadd_vec: 1651 case INDEX_op_usadd_vec: 1652 case INDEX_op_sssub_vec: 1653 case INDEX_op_ussub_vec: 1654 return have_vec && TCG_TARGET_HAS_sat_vec; 1655 case INDEX_op_smin_vec: 1656 case INDEX_op_umin_vec: 1657 case INDEX_op_smax_vec: 1658 case INDEX_op_umax_vec: 1659 return have_vec && TCG_TARGET_HAS_minmax_vec; 1660 case INDEX_op_bitsel_vec: 1661 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1662 case INDEX_op_cmpsel_vec: 1663 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1664 1665 default: 1666 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1667 return true; 1668 } 1669 } 1670 1671 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1672 and endian swap. Maybe it would be better to do the alignment 1673 and endian swap in tcg_reg_alloc_call(). */ 1674 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1675 { 1676 int i, real_args, nb_rets, pi; 1677 unsigned sizemask, flags; 1678 TCGHelperInfo *info; 1679 TCGOp *op; 1680 1681 info = g_hash_table_lookup(helper_table, (gpointer)func); 1682 flags = info->flags; 1683 sizemask = info->sizemask; 1684 1685 #if defined(__sparc__) && !defined(__arch64__) \ 1686 && !defined(CONFIG_TCG_INTERPRETER) 1687 /* We have 64-bit values in one register, but need to pass as two 1688 separate parameters. Split them. */ 1689 int orig_sizemask = sizemask; 1690 int orig_nargs = nargs; 1691 TCGv_i64 retl, reth; 1692 TCGTemp *split_args[MAX_OPC_PARAM]; 1693 1694 retl = NULL; 1695 reth = NULL; 1696 if (sizemask != 0) { 1697 for (i = real_args = 0; i < nargs; ++i) { 1698 int is_64bit = sizemask & (1 << (i+1)*2); 1699 if (is_64bit) { 1700 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1701 TCGv_i32 h = tcg_temp_new_i32(); 1702 TCGv_i32 l = tcg_temp_new_i32(); 1703 tcg_gen_extr_i64_i32(l, h, orig); 1704 split_args[real_args++] = tcgv_i32_temp(h); 1705 split_args[real_args++] = tcgv_i32_temp(l); 1706 } else { 1707 split_args[real_args++] = args[i]; 1708 } 1709 } 1710 nargs = real_args; 1711 args = split_args; 1712 sizemask = 0; 1713 } 1714 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1715 for (i = 0; i < nargs; ++i) { 1716 int is_64bit = sizemask & (1 << (i+1)*2); 1717 int is_signed = sizemask & (2 << (i+1)*2); 1718 if (!is_64bit) { 1719 TCGv_i64 temp = tcg_temp_new_i64(); 1720 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1721 if (is_signed) { 1722 tcg_gen_ext32s_i64(temp, orig); 1723 } else { 1724 tcg_gen_ext32u_i64(temp, orig); 1725 } 1726 args[i] = tcgv_i64_temp(temp); 1727 } 1728 } 1729 #endif /* TCG_TARGET_EXTEND_ARGS */ 1730 1731 op = tcg_emit_op(INDEX_op_call); 1732 1733 pi = 0; 1734 if (ret != NULL) { 1735 #if defined(__sparc__) && !defined(__arch64__) \ 1736 && !defined(CONFIG_TCG_INTERPRETER) 1737 if (orig_sizemask & 1) { 1738 /* The 32-bit ABI is going to return the 64-bit value in 1739 the %o0/%o1 register pair. Prepare for this by using 1740 two return temporaries, and reassemble below. */ 1741 retl = tcg_temp_new_i64(); 1742 reth = tcg_temp_new_i64(); 1743 op->args[pi++] = tcgv_i64_arg(reth); 1744 op->args[pi++] = tcgv_i64_arg(retl); 1745 nb_rets = 2; 1746 } else { 1747 op->args[pi++] = temp_arg(ret); 1748 nb_rets = 1; 1749 } 1750 #else 1751 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 1752 #ifdef HOST_WORDS_BIGENDIAN 1753 op->args[pi++] = temp_arg(ret + 1); 1754 op->args[pi++] = temp_arg(ret); 1755 #else 1756 op->args[pi++] = temp_arg(ret); 1757 op->args[pi++] = temp_arg(ret + 1); 1758 #endif 1759 nb_rets = 2; 1760 } else { 1761 op->args[pi++] = temp_arg(ret); 1762 nb_rets = 1; 1763 } 1764 #endif 1765 } else { 1766 nb_rets = 0; 1767 } 1768 TCGOP_CALLO(op) = nb_rets; 1769 1770 real_args = 0; 1771 for (i = 0; i < nargs; i++) { 1772 int is_64bit = sizemask & (1 << (i+1)*2); 1773 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 1774 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 1775 /* some targets want aligned 64 bit args */ 1776 if (real_args & 1) { 1777 op->args[pi++] = TCG_CALL_DUMMY_ARG; 1778 real_args++; 1779 } 1780 #endif 1781 /* If stack grows up, then we will be placing successive 1782 arguments at lower addresses, which means we need to 1783 reverse the order compared to how we would normally 1784 treat either big or little-endian. For those arguments 1785 that will wind up in registers, this still works for 1786 HPPA (the only current STACK_GROWSUP target) since the 1787 argument registers are *also* allocated in decreasing 1788 order. If another such target is added, this logic may 1789 have to get more complicated to differentiate between 1790 stack arguments and register arguments. */ 1791 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 1792 op->args[pi++] = temp_arg(args[i] + 1); 1793 op->args[pi++] = temp_arg(args[i]); 1794 #else 1795 op->args[pi++] = temp_arg(args[i]); 1796 op->args[pi++] = temp_arg(args[i] + 1); 1797 #endif 1798 real_args += 2; 1799 continue; 1800 } 1801 1802 op->args[pi++] = temp_arg(args[i]); 1803 real_args++; 1804 } 1805 op->args[pi++] = (uintptr_t)func; 1806 op->args[pi++] = flags; 1807 TCGOP_CALLI(op) = real_args; 1808 1809 /* Make sure the fields didn't overflow. */ 1810 tcg_debug_assert(TCGOP_CALLI(op) == real_args); 1811 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 1812 1813 #if defined(__sparc__) && !defined(__arch64__) \ 1814 && !defined(CONFIG_TCG_INTERPRETER) 1815 /* Free all of the parts we allocated above. */ 1816 for (i = real_args = 0; i < orig_nargs; ++i) { 1817 int is_64bit = orig_sizemask & (1 << (i+1)*2); 1818 if (is_64bit) { 1819 tcg_temp_free_internal(args[real_args++]); 1820 tcg_temp_free_internal(args[real_args++]); 1821 } else { 1822 real_args++; 1823 } 1824 } 1825 if (orig_sizemask & 1) { 1826 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 1827 Note that describing these as TCGv_i64 eliminates an unnecessary 1828 zero-extension that tcg_gen_concat_i32_i64 would create. */ 1829 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); 1830 tcg_temp_free_i64(retl); 1831 tcg_temp_free_i64(reth); 1832 } 1833 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1834 for (i = 0; i < nargs; ++i) { 1835 int is_64bit = sizemask & (1 << (i+1)*2); 1836 if (!is_64bit) { 1837 tcg_temp_free_internal(args[i]); 1838 } 1839 } 1840 #endif /* TCG_TARGET_EXTEND_ARGS */ 1841 } 1842 1843 static void tcg_reg_alloc_start(TCGContext *s) 1844 { 1845 int i, n; 1846 TCGTemp *ts; 1847 1848 for (i = 0, n = s->nb_globals; i < n; i++) { 1849 ts = &s->temps[i]; 1850 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM); 1851 } 1852 for (n = s->nb_temps; i < n; i++) { 1853 ts = &s->temps[i]; 1854 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 1855 ts->mem_allocated = 0; 1856 ts->fixed_reg = 0; 1857 } 1858 1859 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1860 } 1861 1862 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1863 TCGTemp *ts) 1864 { 1865 int idx = temp_idx(ts); 1866 1867 if (ts->temp_global) { 1868 pstrcpy(buf, buf_size, ts->name); 1869 } else if (ts->temp_local) { 1870 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1871 } else { 1872 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1873 } 1874 return buf; 1875 } 1876 1877 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1878 int buf_size, TCGArg arg) 1879 { 1880 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1881 } 1882 1883 /* Find helper name. */ 1884 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 1885 { 1886 const char *ret = NULL; 1887 if (helper_table) { 1888 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); 1889 if (info) { 1890 ret = info->name; 1891 } 1892 } 1893 return ret; 1894 } 1895 1896 static const char * const cond_name[] = 1897 { 1898 [TCG_COND_NEVER] = "never", 1899 [TCG_COND_ALWAYS] = "always", 1900 [TCG_COND_EQ] = "eq", 1901 [TCG_COND_NE] = "ne", 1902 [TCG_COND_LT] = "lt", 1903 [TCG_COND_GE] = "ge", 1904 [TCG_COND_LE] = "le", 1905 [TCG_COND_GT] = "gt", 1906 [TCG_COND_LTU] = "ltu", 1907 [TCG_COND_GEU] = "geu", 1908 [TCG_COND_LEU] = "leu", 1909 [TCG_COND_GTU] = "gtu" 1910 }; 1911 1912 static const char * const ldst_name[] = 1913 { 1914 [MO_UB] = "ub", 1915 [MO_SB] = "sb", 1916 [MO_LEUW] = "leuw", 1917 [MO_LESW] = "lesw", 1918 [MO_LEUL] = "leul", 1919 [MO_LESL] = "lesl", 1920 [MO_LEQ] = "leq", 1921 [MO_BEUW] = "beuw", 1922 [MO_BESW] = "besw", 1923 [MO_BEUL] = "beul", 1924 [MO_BESL] = "besl", 1925 [MO_BEQ] = "beq", 1926 }; 1927 1928 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1929 #ifdef ALIGNED_ONLY 1930 [MO_UNALN >> MO_ASHIFT] = "un+", 1931 [MO_ALIGN >> MO_ASHIFT] = "", 1932 #else 1933 [MO_UNALN >> MO_ASHIFT] = "", 1934 [MO_ALIGN >> MO_ASHIFT] = "al+", 1935 #endif 1936 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1937 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1938 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1939 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1940 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1941 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1942 }; 1943 1944 static inline bool tcg_regset_single(TCGRegSet d) 1945 { 1946 return (d & (d - 1)) == 0; 1947 } 1948 1949 static inline TCGReg tcg_regset_first(TCGRegSet d) 1950 { 1951 if (TCG_TARGET_NB_REGS <= 32) { 1952 return ctz32(d); 1953 } else { 1954 return ctz64(d); 1955 } 1956 } 1957 1958 static void tcg_dump_ops(TCGContext *s, bool have_prefs) 1959 { 1960 char buf[128]; 1961 TCGOp *op; 1962 1963 QTAILQ_FOREACH(op, &s->ops, link) { 1964 int i, k, nb_oargs, nb_iargs, nb_cargs; 1965 const TCGOpDef *def; 1966 TCGOpcode c; 1967 int col = 0; 1968 1969 c = op->opc; 1970 def = &tcg_op_defs[c]; 1971 1972 if (c == INDEX_op_insn_start) { 1973 nb_oargs = 0; 1974 col += qemu_log("\n ----"); 1975 1976 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1977 target_ulong a; 1978 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1979 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 1980 #else 1981 a = op->args[i]; 1982 #endif 1983 col += qemu_log(" " TARGET_FMT_lx, a); 1984 } 1985 } else if (c == INDEX_op_call) { 1986 /* variable number of arguments */ 1987 nb_oargs = TCGOP_CALLO(op); 1988 nb_iargs = TCGOP_CALLI(op); 1989 nb_cargs = def->nb_cargs; 1990 1991 /* function name, flags, out args */ 1992 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 1993 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), 1994 op->args[nb_oargs + nb_iargs + 1], nb_oargs); 1995 for (i = 0; i < nb_oargs; i++) { 1996 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), 1997 op->args[i])); 1998 } 1999 for (i = 0; i < nb_iargs; i++) { 2000 TCGArg arg = op->args[nb_oargs + i]; 2001 const char *t = "<dummy>"; 2002 if (arg != TCG_CALL_DUMMY_ARG) { 2003 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2004 } 2005 col += qemu_log(",%s", t); 2006 } 2007 } else { 2008 col += qemu_log(" %s ", def->name); 2009 2010 nb_oargs = def->nb_oargs; 2011 nb_iargs = def->nb_iargs; 2012 nb_cargs = def->nb_cargs; 2013 2014 if (def->flags & TCG_OPF_VECTOR) { 2015 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op), 2016 8 << TCGOP_VECE(op)); 2017 } 2018 2019 k = 0; 2020 for (i = 0; i < nb_oargs; i++) { 2021 if (k != 0) { 2022 col += qemu_log(","); 2023 } 2024 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 2025 op->args[k++])); 2026 } 2027 for (i = 0; i < nb_iargs; i++) { 2028 if (k != 0) { 2029 col += qemu_log(","); 2030 } 2031 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 2032 op->args[k++])); 2033 } 2034 switch (c) { 2035 case INDEX_op_brcond_i32: 2036 case INDEX_op_setcond_i32: 2037 case INDEX_op_movcond_i32: 2038 case INDEX_op_brcond2_i32: 2039 case INDEX_op_setcond2_i32: 2040 case INDEX_op_brcond_i64: 2041 case INDEX_op_setcond_i64: 2042 case INDEX_op_movcond_i64: 2043 case INDEX_op_cmp_vec: 2044 case INDEX_op_cmpsel_vec: 2045 if (op->args[k] < ARRAY_SIZE(cond_name) 2046 && cond_name[op->args[k]]) { 2047 col += qemu_log(",%s", cond_name[op->args[k++]]); 2048 } else { 2049 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]); 2050 } 2051 i = 1; 2052 break; 2053 case INDEX_op_qemu_ld_i32: 2054 case INDEX_op_qemu_st_i32: 2055 case INDEX_op_qemu_ld_i64: 2056 case INDEX_op_qemu_st_i64: 2057 { 2058 TCGMemOpIdx oi = op->args[k++]; 2059 TCGMemOp op = get_memop(oi); 2060 unsigned ix = get_mmuidx(oi); 2061 2062 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2063 col += qemu_log(",$0x%x,%u", op, ix); 2064 } else { 2065 const char *s_al, *s_op; 2066 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2067 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2068 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 2069 } 2070 i = 1; 2071 } 2072 break; 2073 default: 2074 i = 0; 2075 break; 2076 } 2077 switch (c) { 2078 case INDEX_op_set_label: 2079 case INDEX_op_br: 2080 case INDEX_op_brcond_i32: 2081 case INDEX_op_brcond_i64: 2082 case INDEX_op_brcond2_i32: 2083 col += qemu_log("%s$L%d", k ? "," : "", 2084 arg_label(op->args[k])->id); 2085 i++, k++; 2086 break; 2087 default: 2088 break; 2089 } 2090 for (; i < nb_cargs; i++, k++) { 2091 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); 2092 } 2093 } 2094 2095 if (have_prefs || op->life) { 2096 for (; col < 40; ++col) { 2097 putc(' ', qemu_logfile); 2098 } 2099 } 2100 2101 if (op->life) { 2102 unsigned life = op->life; 2103 2104 if (life & (SYNC_ARG * 3)) { 2105 qemu_log(" sync:"); 2106 for (i = 0; i < 2; ++i) { 2107 if (life & (SYNC_ARG << i)) { 2108 qemu_log(" %d", i); 2109 } 2110 } 2111 } 2112 life /= DEAD_ARG; 2113 if (life) { 2114 qemu_log(" dead:"); 2115 for (i = 0; life; ++i, life >>= 1) { 2116 if (life & 1) { 2117 qemu_log(" %d", i); 2118 } 2119 } 2120 } 2121 } 2122 2123 if (have_prefs) { 2124 for (i = 0; i < nb_oargs; ++i) { 2125 TCGRegSet set = op->output_pref[i]; 2126 2127 if (i == 0) { 2128 qemu_log(" pref="); 2129 } else { 2130 qemu_log(","); 2131 } 2132 if (set == 0) { 2133 qemu_log("none"); 2134 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2135 qemu_log("all"); 2136 #ifdef CONFIG_DEBUG_TCG 2137 } else if (tcg_regset_single(set)) { 2138 TCGReg reg = tcg_regset_first(set); 2139 qemu_log("%s", tcg_target_reg_names[reg]); 2140 #endif 2141 } else if (TCG_TARGET_NB_REGS <= 32) { 2142 qemu_log("%#x", (uint32_t)set); 2143 } else { 2144 qemu_log("%#" PRIx64, (uint64_t)set); 2145 } 2146 } 2147 } 2148 2149 qemu_log("\n"); 2150 } 2151 } 2152 2153 /* we give more priority to constraints with less registers */ 2154 static int get_constraint_priority(const TCGOpDef *def, int k) 2155 { 2156 const TCGArgConstraint *arg_ct; 2157 2158 int i, n; 2159 arg_ct = &def->args_ct[k]; 2160 if (arg_ct->ct & TCG_CT_ALIAS) { 2161 /* an alias is equivalent to a single register */ 2162 n = 1; 2163 } else { 2164 if (!(arg_ct->ct & TCG_CT_REG)) 2165 return 0; 2166 n = 0; 2167 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 2168 if (tcg_regset_test_reg(arg_ct->u.regs, i)) 2169 n++; 2170 } 2171 } 2172 return TCG_TARGET_NB_REGS - n + 1; 2173 } 2174 2175 /* sort from highest priority to lowest */ 2176 static void sort_constraints(TCGOpDef *def, int start, int n) 2177 { 2178 int i, j, p1, p2, tmp; 2179 2180 for(i = 0; i < n; i++) 2181 def->sorted_args[start + i] = start + i; 2182 if (n <= 1) 2183 return; 2184 for(i = 0; i < n - 1; i++) { 2185 for(j = i + 1; j < n; j++) { 2186 p1 = get_constraint_priority(def, def->sorted_args[start + i]); 2187 p2 = get_constraint_priority(def, def->sorted_args[start + j]); 2188 if (p1 < p2) { 2189 tmp = def->sorted_args[start + i]; 2190 def->sorted_args[start + i] = def->sorted_args[start + j]; 2191 def->sorted_args[start + j] = tmp; 2192 } 2193 } 2194 } 2195 } 2196 2197 static void process_op_defs(TCGContext *s) 2198 { 2199 TCGOpcode op; 2200 2201 for (op = 0; op < NB_OPS; op++) { 2202 TCGOpDef *def = &tcg_op_defs[op]; 2203 const TCGTargetOpDef *tdefs; 2204 TCGType type; 2205 int i, nb_args; 2206 2207 if (def->flags & TCG_OPF_NOT_PRESENT) { 2208 continue; 2209 } 2210 2211 nb_args = def->nb_iargs + def->nb_oargs; 2212 if (nb_args == 0) { 2213 continue; 2214 } 2215 2216 tdefs = tcg_target_op_def(op); 2217 /* Missing TCGTargetOpDef entry. */ 2218 tcg_debug_assert(tdefs != NULL); 2219 2220 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32); 2221 for (i = 0; i < nb_args; i++) { 2222 const char *ct_str = tdefs->args_ct_str[i]; 2223 /* Incomplete TCGTargetOpDef entry. */ 2224 tcg_debug_assert(ct_str != NULL); 2225 2226 def->args_ct[i].u.regs = 0; 2227 def->args_ct[i].ct = 0; 2228 while (*ct_str != '\0') { 2229 switch(*ct_str) { 2230 case '0' ... '9': 2231 { 2232 int oarg = *ct_str - '0'; 2233 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 2234 tcg_debug_assert(oarg < def->nb_oargs); 2235 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG); 2236 /* TCG_CT_ALIAS is for the output arguments. 2237 The input is tagged with TCG_CT_IALIAS. */ 2238 def->args_ct[i] = def->args_ct[oarg]; 2239 def->args_ct[oarg].ct |= TCG_CT_ALIAS; 2240 def->args_ct[oarg].alias_index = i; 2241 def->args_ct[i].ct |= TCG_CT_IALIAS; 2242 def->args_ct[i].alias_index = oarg; 2243 } 2244 ct_str++; 2245 break; 2246 case '&': 2247 def->args_ct[i].ct |= TCG_CT_NEWREG; 2248 ct_str++; 2249 break; 2250 case 'i': 2251 def->args_ct[i].ct |= TCG_CT_CONST; 2252 ct_str++; 2253 break; 2254 default: 2255 ct_str = target_parse_constraint(&def->args_ct[i], 2256 ct_str, type); 2257 /* Typo in TCGTargetOpDef constraint. */ 2258 tcg_debug_assert(ct_str != NULL); 2259 } 2260 } 2261 } 2262 2263 /* TCGTargetOpDef entry with too much information? */ 2264 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2265 2266 /* sort the constraints (XXX: this is just an heuristic) */ 2267 sort_constraints(def, 0, def->nb_oargs); 2268 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2269 } 2270 } 2271 2272 void tcg_op_remove(TCGContext *s, TCGOp *op) 2273 { 2274 TCGLabel *label; 2275 2276 switch (op->opc) { 2277 case INDEX_op_br: 2278 label = arg_label(op->args[0]); 2279 label->refs--; 2280 break; 2281 case INDEX_op_brcond_i32: 2282 case INDEX_op_brcond_i64: 2283 label = arg_label(op->args[3]); 2284 label->refs--; 2285 break; 2286 case INDEX_op_brcond2_i32: 2287 label = arg_label(op->args[5]); 2288 label->refs--; 2289 break; 2290 default: 2291 break; 2292 } 2293 2294 QTAILQ_REMOVE(&s->ops, op, link); 2295 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2296 s->nb_ops--; 2297 2298 #ifdef CONFIG_PROFILER 2299 atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2300 #endif 2301 } 2302 2303 static TCGOp *tcg_op_alloc(TCGOpcode opc) 2304 { 2305 TCGContext *s = tcg_ctx; 2306 TCGOp *op; 2307 2308 if (likely(QTAILQ_EMPTY(&s->free_ops))) { 2309 op = tcg_malloc(sizeof(TCGOp)); 2310 } else { 2311 op = QTAILQ_FIRST(&s->free_ops); 2312 QTAILQ_REMOVE(&s->free_ops, op, link); 2313 } 2314 memset(op, 0, offsetof(TCGOp, link)); 2315 op->opc = opc; 2316 s->nb_ops++; 2317 2318 return op; 2319 } 2320 2321 TCGOp *tcg_emit_op(TCGOpcode opc) 2322 { 2323 TCGOp *op = tcg_op_alloc(opc); 2324 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2325 return op; 2326 } 2327 2328 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2329 { 2330 TCGOp *new_op = tcg_op_alloc(opc); 2331 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2332 return new_op; 2333 } 2334 2335 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2336 { 2337 TCGOp *new_op = tcg_op_alloc(opc); 2338 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2339 return new_op; 2340 } 2341 2342 /* Reachable analysis : remove unreachable code. */ 2343 static void reachable_code_pass(TCGContext *s) 2344 { 2345 TCGOp *op, *op_next; 2346 bool dead = false; 2347 2348 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2349 bool remove = dead; 2350 TCGLabel *label; 2351 int call_flags; 2352 2353 switch (op->opc) { 2354 case INDEX_op_set_label: 2355 label = arg_label(op->args[0]); 2356 if (label->refs == 0) { 2357 /* 2358 * While there is an occasional backward branch, virtually 2359 * all branches generated by the translators are forward. 2360 * Which means that generally we will have already removed 2361 * all references to the label that will be, and there is 2362 * little to be gained by iterating. 2363 */ 2364 remove = true; 2365 } else { 2366 /* Once we see a label, insns become live again. */ 2367 dead = false; 2368 remove = false; 2369 2370 /* 2371 * Optimization can fold conditional branches to unconditional. 2372 * If we find a label with one reference which is preceded by 2373 * an unconditional branch to it, remove both. This needed to 2374 * wait until the dead code in between them was removed. 2375 */ 2376 if (label->refs == 1) { 2377 TCGOp *op_prev = QTAILQ_PREV(op, link); 2378 if (op_prev->opc == INDEX_op_br && 2379 label == arg_label(op_prev->args[0])) { 2380 tcg_op_remove(s, op_prev); 2381 remove = true; 2382 } 2383 } 2384 } 2385 break; 2386 2387 case INDEX_op_br: 2388 case INDEX_op_exit_tb: 2389 case INDEX_op_goto_ptr: 2390 /* Unconditional branches; everything following is dead. */ 2391 dead = true; 2392 break; 2393 2394 case INDEX_op_call: 2395 /* Notice noreturn helper calls, raising exceptions. */ 2396 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; 2397 if (call_flags & TCG_CALL_NO_RETURN) { 2398 dead = true; 2399 } 2400 break; 2401 2402 case INDEX_op_insn_start: 2403 /* Never remove -- we need to keep these for unwind. */ 2404 remove = false; 2405 break; 2406 2407 default: 2408 break; 2409 } 2410 2411 if (remove) { 2412 tcg_op_remove(s, op); 2413 } 2414 } 2415 } 2416 2417 #define TS_DEAD 1 2418 #define TS_MEM 2 2419 2420 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2421 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2422 2423 /* For liveness_pass_1, the register preferences for a given temp. */ 2424 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2425 { 2426 return ts->state_ptr; 2427 } 2428 2429 /* For liveness_pass_1, reset the preferences for a given temp to the 2430 * maximal regset for its type. 2431 */ 2432 static inline void la_reset_pref(TCGTemp *ts) 2433 { 2434 *la_temp_pref(ts) 2435 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2436 } 2437 2438 /* liveness analysis: end of function: all temps are dead, and globals 2439 should be in memory. */ 2440 static void la_func_end(TCGContext *s, int ng, int nt) 2441 { 2442 int i; 2443 2444 for (i = 0; i < ng; ++i) { 2445 s->temps[i].state = TS_DEAD | TS_MEM; 2446 la_reset_pref(&s->temps[i]); 2447 } 2448 for (i = ng; i < nt; ++i) { 2449 s->temps[i].state = TS_DEAD; 2450 la_reset_pref(&s->temps[i]); 2451 } 2452 } 2453 2454 /* liveness analysis: end of basic block: all temps are dead, globals 2455 and local temps should be in memory. */ 2456 static void la_bb_end(TCGContext *s, int ng, int nt) 2457 { 2458 int i; 2459 2460 for (i = 0; i < ng; ++i) { 2461 s->temps[i].state = TS_DEAD | TS_MEM; 2462 la_reset_pref(&s->temps[i]); 2463 } 2464 for (i = ng; i < nt; ++i) { 2465 s->temps[i].state = (s->temps[i].temp_local 2466 ? TS_DEAD | TS_MEM 2467 : TS_DEAD); 2468 la_reset_pref(&s->temps[i]); 2469 } 2470 } 2471 2472 /* liveness analysis: sync globals back to memory. */ 2473 static void la_global_sync(TCGContext *s, int ng) 2474 { 2475 int i; 2476 2477 for (i = 0; i < ng; ++i) { 2478 int state = s->temps[i].state; 2479 s->temps[i].state = state | TS_MEM; 2480 if (state == TS_DEAD) { 2481 /* If the global was previously dead, reset prefs. */ 2482 la_reset_pref(&s->temps[i]); 2483 } 2484 } 2485 } 2486 2487 /* liveness analysis: sync globals back to memory and kill. */ 2488 static void la_global_kill(TCGContext *s, int ng) 2489 { 2490 int i; 2491 2492 for (i = 0; i < ng; i++) { 2493 s->temps[i].state = TS_DEAD | TS_MEM; 2494 la_reset_pref(&s->temps[i]); 2495 } 2496 } 2497 2498 /* liveness analysis: note live globals crossing calls. */ 2499 static void la_cross_call(TCGContext *s, int nt) 2500 { 2501 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2502 int i; 2503 2504 for (i = 0; i < nt; i++) { 2505 TCGTemp *ts = &s->temps[i]; 2506 if (!(ts->state & TS_DEAD)) { 2507 TCGRegSet *pset = la_temp_pref(ts); 2508 TCGRegSet set = *pset; 2509 2510 set &= mask; 2511 /* If the combination is not possible, restart. */ 2512 if (set == 0) { 2513 set = tcg_target_available_regs[ts->type] & mask; 2514 } 2515 *pset = set; 2516 } 2517 } 2518 } 2519 2520 /* Liveness analysis : update the opc_arg_life array to tell if a 2521 given input arguments is dead. Instructions updating dead 2522 temporaries are removed. */ 2523 static void liveness_pass_1(TCGContext *s) 2524 { 2525 int nb_globals = s->nb_globals; 2526 int nb_temps = s->nb_temps; 2527 TCGOp *op, *op_prev; 2528 TCGRegSet *prefs; 2529 int i; 2530 2531 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2532 for (i = 0; i < nb_temps; ++i) { 2533 s->temps[i].state_ptr = prefs + i; 2534 } 2535 2536 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2537 la_func_end(s, nb_globals, nb_temps); 2538 2539 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2540 int nb_iargs, nb_oargs; 2541 TCGOpcode opc_new, opc_new2; 2542 bool have_opc_new2; 2543 TCGLifeData arg_life = 0; 2544 TCGTemp *ts; 2545 TCGOpcode opc = op->opc; 2546 const TCGOpDef *def = &tcg_op_defs[opc]; 2547 2548 switch (opc) { 2549 case INDEX_op_call: 2550 { 2551 int call_flags; 2552 int nb_call_regs; 2553 2554 nb_oargs = TCGOP_CALLO(op); 2555 nb_iargs = TCGOP_CALLI(op); 2556 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2557 2558 /* pure functions can be removed if their result is unused */ 2559 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2560 for (i = 0; i < nb_oargs; i++) { 2561 ts = arg_temp(op->args[i]); 2562 if (ts->state != TS_DEAD) { 2563 goto do_not_remove_call; 2564 } 2565 } 2566 goto do_remove; 2567 } 2568 do_not_remove_call: 2569 2570 /* Output args are dead. */ 2571 for (i = 0; i < nb_oargs; i++) { 2572 ts = arg_temp(op->args[i]); 2573 if (ts->state & TS_DEAD) { 2574 arg_life |= DEAD_ARG << i; 2575 } 2576 if (ts->state & TS_MEM) { 2577 arg_life |= SYNC_ARG << i; 2578 } 2579 ts->state = TS_DEAD; 2580 la_reset_pref(ts); 2581 2582 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ 2583 op->output_pref[i] = 0; 2584 } 2585 2586 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2587 TCG_CALL_NO_READ_GLOBALS))) { 2588 la_global_kill(s, nb_globals); 2589 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2590 la_global_sync(s, nb_globals); 2591 } 2592 2593 /* Record arguments that die in this helper. */ 2594 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2595 ts = arg_temp(op->args[i]); 2596 if (ts && ts->state & TS_DEAD) { 2597 arg_life |= DEAD_ARG << i; 2598 } 2599 } 2600 2601 /* For all live registers, remove call-clobbered prefs. */ 2602 la_cross_call(s, nb_temps); 2603 2604 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2605 2606 /* Input arguments are live for preceding opcodes. */ 2607 for (i = 0; i < nb_iargs; i++) { 2608 ts = arg_temp(op->args[i + nb_oargs]); 2609 if (ts && ts->state & TS_DEAD) { 2610 /* For those arguments that die, and will be allocated 2611 * in registers, clear the register set for that arg, 2612 * to be filled in below. For args that will be on 2613 * the stack, reset to any available reg. 2614 */ 2615 *la_temp_pref(ts) 2616 = (i < nb_call_regs ? 0 : 2617 tcg_target_available_regs[ts->type]); 2618 ts->state &= ~TS_DEAD; 2619 } 2620 } 2621 2622 /* For each input argument, add its input register to prefs. 2623 If a temp is used once, this produces a single set bit. */ 2624 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { 2625 ts = arg_temp(op->args[i + nb_oargs]); 2626 if (ts) { 2627 tcg_regset_set_reg(*la_temp_pref(ts), 2628 tcg_target_call_iarg_regs[i]); 2629 } 2630 } 2631 } 2632 break; 2633 case INDEX_op_insn_start: 2634 break; 2635 case INDEX_op_discard: 2636 /* mark the temporary as dead */ 2637 ts = arg_temp(op->args[0]); 2638 ts->state = TS_DEAD; 2639 la_reset_pref(ts); 2640 break; 2641 2642 case INDEX_op_add2_i32: 2643 opc_new = INDEX_op_add_i32; 2644 goto do_addsub2; 2645 case INDEX_op_sub2_i32: 2646 opc_new = INDEX_op_sub_i32; 2647 goto do_addsub2; 2648 case INDEX_op_add2_i64: 2649 opc_new = INDEX_op_add_i64; 2650 goto do_addsub2; 2651 case INDEX_op_sub2_i64: 2652 opc_new = INDEX_op_sub_i64; 2653 do_addsub2: 2654 nb_iargs = 4; 2655 nb_oargs = 2; 2656 /* Test if the high part of the operation is dead, but not 2657 the low part. The result can be optimized to a simple 2658 add or sub. This happens often for x86_64 guest when the 2659 cpu mode is set to 32 bit. */ 2660 if (arg_temp(op->args[1])->state == TS_DEAD) { 2661 if (arg_temp(op->args[0])->state == TS_DEAD) { 2662 goto do_remove; 2663 } 2664 /* Replace the opcode and adjust the args in place, 2665 leaving 3 unused args at the end. */ 2666 op->opc = opc = opc_new; 2667 op->args[1] = op->args[2]; 2668 op->args[2] = op->args[4]; 2669 /* Fall through and mark the single-word operation live. */ 2670 nb_iargs = 2; 2671 nb_oargs = 1; 2672 } 2673 goto do_not_remove; 2674 2675 case INDEX_op_mulu2_i32: 2676 opc_new = INDEX_op_mul_i32; 2677 opc_new2 = INDEX_op_muluh_i32; 2678 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 2679 goto do_mul2; 2680 case INDEX_op_muls2_i32: 2681 opc_new = INDEX_op_mul_i32; 2682 opc_new2 = INDEX_op_mulsh_i32; 2683 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 2684 goto do_mul2; 2685 case INDEX_op_mulu2_i64: 2686 opc_new = INDEX_op_mul_i64; 2687 opc_new2 = INDEX_op_muluh_i64; 2688 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 2689 goto do_mul2; 2690 case INDEX_op_muls2_i64: 2691 opc_new = INDEX_op_mul_i64; 2692 opc_new2 = INDEX_op_mulsh_i64; 2693 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 2694 goto do_mul2; 2695 do_mul2: 2696 nb_iargs = 2; 2697 nb_oargs = 2; 2698 if (arg_temp(op->args[1])->state == TS_DEAD) { 2699 if (arg_temp(op->args[0])->state == TS_DEAD) { 2700 /* Both parts of the operation are dead. */ 2701 goto do_remove; 2702 } 2703 /* The high part of the operation is dead; generate the low. */ 2704 op->opc = opc = opc_new; 2705 op->args[1] = op->args[2]; 2706 op->args[2] = op->args[3]; 2707 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 2708 /* The low part of the operation is dead; generate the high. */ 2709 op->opc = opc = opc_new2; 2710 op->args[0] = op->args[1]; 2711 op->args[1] = op->args[2]; 2712 op->args[2] = op->args[3]; 2713 } else { 2714 goto do_not_remove; 2715 } 2716 /* Mark the single-word operation live. */ 2717 nb_oargs = 1; 2718 goto do_not_remove; 2719 2720 default: 2721 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 2722 nb_iargs = def->nb_iargs; 2723 nb_oargs = def->nb_oargs; 2724 2725 /* Test if the operation can be removed because all 2726 its outputs are dead. We assume that nb_oargs == 0 2727 implies side effects */ 2728 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 2729 for (i = 0; i < nb_oargs; i++) { 2730 if (arg_temp(op->args[i])->state != TS_DEAD) { 2731 goto do_not_remove; 2732 } 2733 } 2734 goto do_remove; 2735 } 2736 goto do_not_remove; 2737 2738 do_remove: 2739 tcg_op_remove(s, op); 2740 break; 2741 2742 do_not_remove: 2743 for (i = 0; i < nb_oargs; i++) { 2744 ts = arg_temp(op->args[i]); 2745 2746 /* Remember the preference of the uses that followed. */ 2747 op->output_pref[i] = *la_temp_pref(ts); 2748 2749 /* Output args are dead. */ 2750 if (ts->state & TS_DEAD) { 2751 arg_life |= DEAD_ARG << i; 2752 } 2753 if (ts->state & TS_MEM) { 2754 arg_life |= SYNC_ARG << i; 2755 } 2756 ts->state = TS_DEAD; 2757 la_reset_pref(ts); 2758 } 2759 2760 /* If end of basic block, update. */ 2761 if (def->flags & TCG_OPF_BB_EXIT) { 2762 la_func_end(s, nb_globals, nb_temps); 2763 } else if (def->flags & TCG_OPF_BB_END) { 2764 la_bb_end(s, nb_globals, nb_temps); 2765 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2766 la_global_sync(s, nb_globals); 2767 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2768 la_cross_call(s, nb_temps); 2769 } 2770 } 2771 2772 /* Record arguments that die in this opcode. */ 2773 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2774 ts = arg_temp(op->args[i]); 2775 if (ts->state & TS_DEAD) { 2776 arg_life |= DEAD_ARG << i; 2777 } 2778 } 2779 2780 /* Input arguments are live for preceding opcodes. */ 2781 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2782 ts = arg_temp(op->args[i]); 2783 if (ts->state & TS_DEAD) { 2784 /* For operands that were dead, initially allow 2785 all regs for the type. */ 2786 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 2787 ts->state &= ~TS_DEAD; 2788 } 2789 } 2790 2791 /* Incorporate constraints for this operand. */ 2792 switch (opc) { 2793 case INDEX_op_mov_i32: 2794 case INDEX_op_mov_i64: 2795 /* Note that these are TCG_OPF_NOT_PRESENT and do not 2796 have proper constraints. That said, special case 2797 moves to propagate preferences backward. */ 2798 if (IS_DEAD_ARG(1)) { 2799 *la_temp_pref(arg_temp(op->args[0])) 2800 = *la_temp_pref(arg_temp(op->args[1])); 2801 } 2802 break; 2803 2804 default: 2805 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2806 const TCGArgConstraint *ct = &def->args_ct[i]; 2807 TCGRegSet set, *pset; 2808 2809 ts = arg_temp(op->args[i]); 2810 pset = la_temp_pref(ts); 2811 set = *pset; 2812 2813 set &= ct->u.regs; 2814 if (ct->ct & TCG_CT_IALIAS) { 2815 set &= op->output_pref[ct->alias_index]; 2816 } 2817 /* If the combination is not possible, restart. */ 2818 if (set == 0) { 2819 set = ct->u.regs; 2820 } 2821 *pset = set; 2822 } 2823 break; 2824 } 2825 break; 2826 } 2827 op->life = arg_life; 2828 } 2829 } 2830 2831 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 2832 static bool liveness_pass_2(TCGContext *s) 2833 { 2834 int nb_globals = s->nb_globals; 2835 int nb_temps, i; 2836 bool changes = false; 2837 TCGOp *op, *op_next; 2838 2839 /* Create a temporary for each indirect global. */ 2840 for (i = 0; i < nb_globals; ++i) { 2841 TCGTemp *its = &s->temps[i]; 2842 if (its->indirect_reg) { 2843 TCGTemp *dts = tcg_temp_alloc(s); 2844 dts->type = its->type; 2845 dts->base_type = its->base_type; 2846 its->state_ptr = dts; 2847 } else { 2848 its->state_ptr = NULL; 2849 } 2850 /* All globals begin dead. */ 2851 its->state = TS_DEAD; 2852 } 2853 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 2854 TCGTemp *its = &s->temps[i]; 2855 its->state_ptr = NULL; 2856 its->state = TS_DEAD; 2857 } 2858 2859 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2860 TCGOpcode opc = op->opc; 2861 const TCGOpDef *def = &tcg_op_defs[opc]; 2862 TCGLifeData arg_life = op->life; 2863 int nb_iargs, nb_oargs, call_flags; 2864 TCGTemp *arg_ts, *dir_ts; 2865 2866 if (opc == INDEX_op_call) { 2867 nb_oargs = TCGOP_CALLO(op); 2868 nb_iargs = TCGOP_CALLI(op); 2869 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2870 } else { 2871 nb_iargs = def->nb_iargs; 2872 nb_oargs = def->nb_oargs; 2873 2874 /* Set flags similar to how calls require. */ 2875 if (def->flags & TCG_OPF_BB_END) { 2876 /* Like writing globals: save_globals */ 2877 call_flags = 0; 2878 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2879 /* Like reading globals: sync_globals */ 2880 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2881 } else { 2882 /* No effect on globals. */ 2883 call_flags = (TCG_CALL_NO_READ_GLOBALS | 2884 TCG_CALL_NO_WRITE_GLOBALS); 2885 } 2886 } 2887 2888 /* Make sure that input arguments are available. */ 2889 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2890 arg_ts = arg_temp(op->args[i]); 2891 if (arg_ts) { 2892 dir_ts = arg_ts->state_ptr; 2893 if (dir_ts && arg_ts->state == TS_DEAD) { 2894 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 2895 ? INDEX_op_ld_i32 2896 : INDEX_op_ld_i64); 2897 TCGOp *lop = tcg_op_insert_before(s, op, lopc); 2898 2899 lop->args[0] = temp_arg(dir_ts); 2900 lop->args[1] = temp_arg(arg_ts->mem_base); 2901 lop->args[2] = arg_ts->mem_offset; 2902 2903 /* Loaded, but synced with memory. */ 2904 arg_ts->state = TS_MEM; 2905 } 2906 } 2907 } 2908 2909 /* Perform input replacement, and mark inputs that became dead. 2910 No action is required except keeping temp_state up to date 2911 so that we reload when needed. */ 2912 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2913 arg_ts = arg_temp(op->args[i]); 2914 if (arg_ts) { 2915 dir_ts = arg_ts->state_ptr; 2916 if (dir_ts) { 2917 op->args[i] = temp_arg(dir_ts); 2918 changes = true; 2919 if (IS_DEAD_ARG(i)) { 2920 arg_ts->state = TS_DEAD; 2921 } 2922 } 2923 } 2924 } 2925 2926 /* Liveness analysis should ensure that the following are 2927 all correct, for call sites and basic block end points. */ 2928 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 2929 /* Nothing to do */ 2930 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 2931 for (i = 0; i < nb_globals; ++i) { 2932 /* Liveness should see that globals are synced back, 2933 that is, either TS_DEAD or TS_MEM. */ 2934 arg_ts = &s->temps[i]; 2935 tcg_debug_assert(arg_ts->state_ptr == 0 2936 || arg_ts->state != 0); 2937 } 2938 } else { 2939 for (i = 0; i < nb_globals; ++i) { 2940 /* Liveness should see that globals are saved back, 2941 that is, TS_DEAD, waiting to be reloaded. */ 2942 arg_ts = &s->temps[i]; 2943 tcg_debug_assert(arg_ts->state_ptr == 0 2944 || arg_ts->state == TS_DEAD); 2945 } 2946 } 2947 2948 /* Outputs become available. */ 2949 for (i = 0; i < nb_oargs; i++) { 2950 arg_ts = arg_temp(op->args[i]); 2951 dir_ts = arg_ts->state_ptr; 2952 if (!dir_ts) { 2953 continue; 2954 } 2955 op->args[i] = temp_arg(dir_ts); 2956 changes = true; 2957 2958 /* The output is now live and modified. */ 2959 arg_ts->state = 0; 2960 2961 /* Sync outputs upon their last write. */ 2962 if (NEED_SYNC_ARG(i)) { 2963 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 2964 ? INDEX_op_st_i32 2965 : INDEX_op_st_i64); 2966 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 2967 2968 sop->args[0] = temp_arg(dir_ts); 2969 sop->args[1] = temp_arg(arg_ts->mem_base); 2970 sop->args[2] = arg_ts->mem_offset; 2971 2972 arg_ts->state = TS_MEM; 2973 } 2974 /* Drop outputs that are dead. */ 2975 if (IS_DEAD_ARG(i)) { 2976 arg_ts->state = TS_DEAD; 2977 } 2978 } 2979 } 2980 2981 return changes; 2982 } 2983 2984 #ifdef CONFIG_DEBUG_TCG 2985 static void dump_regs(TCGContext *s) 2986 { 2987 TCGTemp *ts; 2988 int i; 2989 char buf[64]; 2990 2991 for(i = 0; i < s->nb_temps; i++) { 2992 ts = &s->temps[i]; 2993 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 2994 switch(ts->val_type) { 2995 case TEMP_VAL_REG: 2996 printf("%s", tcg_target_reg_names[ts->reg]); 2997 break; 2998 case TEMP_VAL_MEM: 2999 printf("%d(%s)", (int)ts->mem_offset, 3000 tcg_target_reg_names[ts->mem_base->reg]); 3001 break; 3002 case TEMP_VAL_CONST: 3003 printf("$0x%" TCG_PRIlx, ts->val); 3004 break; 3005 case TEMP_VAL_DEAD: 3006 printf("D"); 3007 break; 3008 default: 3009 printf("???"); 3010 break; 3011 } 3012 printf("\n"); 3013 } 3014 3015 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 3016 if (s->reg_to_temp[i] != NULL) { 3017 printf("%s: %s\n", 3018 tcg_target_reg_names[i], 3019 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 3020 } 3021 } 3022 } 3023 3024 static void check_regs(TCGContext *s) 3025 { 3026 int reg; 3027 int k; 3028 TCGTemp *ts; 3029 char buf[64]; 3030 3031 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 3032 ts = s->reg_to_temp[reg]; 3033 if (ts != NULL) { 3034 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 3035 printf("Inconsistency for register %s:\n", 3036 tcg_target_reg_names[reg]); 3037 goto fail; 3038 } 3039 } 3040 } 3041 for (k = 0; k < s->nb_temps; k++) { 3042 ts = &s->temps[k]; 3043 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg 3044 && s->reg_to_temp[ts->reg] != ts) { 3045 printf("Inconsistency for temp %s:\n", 3046 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3047 fail: 3048 printf("reg state:\n"); 3049 dump_regs(s); 3050 tcg_abort(); 3051 } 3052 } 3053 } 3054 #endif 3055 3056 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3057 { 3058 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 3059 /* Sparc64 stack is accessed with offset of 2047 */ 3060 s->current_frame_offset = (s->current_frame_offset + 3061 (tcg_target_long)sizeof(tcg_target_long) - 1) & 3062 ~(sizeof(tcg_target_long) - 1); 3063 #endif 3064 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 3065 s->frame_end) { 3066 tcg_abort(); 3067 } 3068 ts->mem_offset = s->current_frame_offset; 3069 ts->mem_base = s->frame_temp; 3070 ts->mem_allocated = 1; 3071 s->current_frame_offset += sizeof(tcg_target_long); 3072 } 3073 3074 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3075 3076 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3077 mark it free; otherwise mark it dead. */ 3078 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3079 { 3080 if (ts->fixed_reg) { 3081 return; 3082 } 3083 if (ts->val_type == TEMP_VAL_REG) { 3084 s->reg_to_temp[ts->reg] = NULL; 3085 } 3086 ts->val_type = (free_or_dead < 0 3087 || ts->temp_local 3088 || ts->temp_global 3089 ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 3090 } 3091 3092 /* Mark a temporary as dead. */ 3093 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3094 { 3095 temp_free_or_dead(s, ts, 1); 3096 } 3097 3098 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3099 registers needs to be allocated to store a constant. If 'free_or_dead' 3100 is non-zero, subsequently release the temporary; if it is positive, the 3101 temp is dead; if it is negative, the temp is free. */ 3102 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3103 TCGRegSet preferred_regs, int free_or_dead) 3104 { 3105 if (ts->fixed_reg) { 3106 return; 3107 } 3108 if (!ts->mem_coherent) { 3109 if (!ts->mem_allocated) { 3110 temp_allocate_frame(s, ts); 3111 } 3112 switch (ts->val_type) { 3113 case TEMP_VAL_CONST: 3114 /* If we're going to free the temp immediately, then we won't 3115 require it later in a register, so attempt to store the 3116 constant to memory directly. */ 3117 if (free_or_dead 3118 && tcg_out_sti(s, ts->type, ts->val, 3119 ts->mem_base->reg, ts->mem_offset)) { 3120 break; 3121 } 3122 temp_load(s, ts, tcg_target_available_regs[ts->type], 3123 allocated_regs, preferred_regs); 3124 /* fallthrough */ 3125 3126 case TEMP_VAL_REG: 3127 tcg_out_st(s, ts->type, ts->reg, 3128 ts->mem_base->reg, ts->mem_offset); 3129 break; 3130 3131 case TEMP_VAL_MEM: 3132 break; 3133 3134 case TEMP_VAL_DEAD: 3135 default: 3136 tcg_abort(); 3137 } 3138 ts->mem_coherent = 1; 3139 } 3140 if (free_or_dead) { 3141 temp_free_or_dead(s, ts, free_or_dead); 3142 } 3143 } 3144 3145 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3146 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3147 { 3148 TCGTemp *ts = s->reg_to_temp[reg]; 3149 if (ts != NULL) { 3150 temp_sync(s, ts, allocated_regs, 0, -1); 3151 } 3152 } 3153 3154 /** 3155 * tcg_reg_alloc: 3156 * @required_regs: Set of registers in which we must allocate. 3157 * @allocated_regs: Set of registers which must be avoided. 3158 * @preferred_regs: Set of registers we should prefer. 3159 * @rev: True if we search the registers in "indirect" order. 3160 * 3161 * The allocated register must be in @required_regs & ~@allocated_regs, 3162 * but if we can put it in @preferred_regs we may save a move later. 3163 */ 3164 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3165 TCGRegSet allocated_regs, 3166 TCGRegSet preferred_regs, bool rev) 3167 { 3168 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3169 TCGRegSet reg_ct[2]; 3170 const int *order; 3171 3172 reg_ct[1] = required_regs & ~allocated_regs; 3173 tcg_debug_assert(reg_ct[1] != 0); 3174 reg_ct[0] = reg_ct[1] & preferred_regs; 3175 3176 /* Skip the preferred_regs option if it cannot be satisfied, 3177 or if the preference made no difference. */ 3178 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3179 3180 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3181 3182 /* Try free registers, preferences first. */ 3183 for (j = f; j < 2; j++) { 3184 TCGRegSet set = reg_ct[j]; 3185 3186 if (tcg_regset_single(set)) { 3187 /* One register in the set. */ 3188 TCGReg reg = tcg_regset_first(set); 3189 if (s->reg_to_temp[reg] == NULL) { 3190 return reg; 3191 } 3192 } else { 3193 for (i = 0; i < n; i++) { 3194 TCGReg reg = order[i]; 3195 if (s->reg_to_temp[reg] == NULL && 3196 tcg_regset_test_reg(set, reg)) { 3197 return reg; 3198 } 3199 } 3200 } 3201 } 3202 3203 /* We must spill something. */ 3204 for (j = f; j < 2; j++) { 3205 TCGRegSet set = reg_ct[j]; 3206 3207 if (tcg_regset_single(set)) { 3208 /* One register in the set. */ 3209 TCGReg reg = tcg_regset_first(set); 3210 tcg_reg_free(s, reg, allocated_regs); 3211 return reg; 3212 } else { 3213 for (i = 0; i < n; i++) { 3214 TCGReg reg = order[i]; 3215 if (tcg_regset_test_reg(set, reg)) { 3216 tcg_reg_free(s, reg, allocated_regs); 3217 return reg; 3218 } 3219 } 3220 } 3221 } 3222 3223 tcg_abort(); 3224 } 3225 3226 /* Make sure the temporary is in a register. If needed, allocate the register 3227 from DESIRED while avoiding ALLOCATED. */ 3228 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3229 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3230 { 3231 TCGReg reg; 3232 3233 switch (ts->val_type) { 3234 case TEMP_VAL_REG: 3235 return; 3236 case TEMP_VAL_CONST: 3237 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3238 preferred_regs, ts->indirect_base); 3239 tcg_out_movi(s, ts->type, reg, ts->val); 3240 ts->mem_coherent = 0; 3241 break; 3242 case TEMP_VAL_MEM: 3243 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3244 preferred_regs, ts->indirect_base); 3245 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3246 ts->mem_coherent = 1; 3247 break; 3248 case TEMP_VAL_DEAD: 3249 default: 3250 tcg_abort(); 3251 } 3252 ts->reg = reg; 3253 ts->val_type = TEMP_VAL_REG; 3254 s->reg_to_temp[reg] = ts; 3255 } 3256 3257 /* Save a temporary to memory. 'allocated_regs' is used in case a 3258 temporary registers needs to be allocated to store a constant. */ 3259 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3260 { 3261 /* The liveness analysis already ensures that globals are back 3262 in memory. Keep an tcg_debug_assert for safety. */ 3263 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); 3264 } 3265 3266 /* save globals to their canonical location and assume they can be 3267 modified be the following code. 'allocated_regs' is used in case a 3268 temporary registers needs to be allocated to store a constant. */ 3269 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3270 { 3271 int i, n; 3272 3273 for (i = 0, n = s->nb_globals; i < n; i++) { 3274 temp_save(s, &s->temps[i], allocated_regs); 3275 } 3276 } 3277 3278 /* sync globals to their canonical location and assume they can be 3279 read by the following code. 'allocated_regs' is used in case a 3280 temporary registers needs to be allocated to store a constant. */ 3281 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3282 { 3283 int i, n; 3284 3285 for (i = 0, n = s->nb_globals; i < n; i++) { 3286 TCGTemp *ts = &s->temps[i]; 3287 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3288 || ts->fixed_reg 3289 || ts->mem_coherent); 3290 } 3291 } 3292 3293 /* at the end of a basic block, we assume all temporaries are dead and 3294 all globals are stored at their canonical location. */ 3295 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3296 { 3297 int i; 3298 3299 for (i = s->nb_globals; i < s->nb_temps; i++) { 3300 TCGTemp *ts = &s->temps[i]; 3301 if (ts->temp_local) { 3302 temp_save(s, ts, allocated_regs); 3303 } else { 3304 /* The liveness analysis already ensures that temps are dead. 3305 Keep an tcg_debug_assert for safety. */ 3306 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3307 } 3308 } 3309 3310 save_globals(s, allocated_regs); 3311 } 3312 3313 /* 3314 * Specialized code generation for INDEX_op_movi_*. 3315 */ 3316 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3317 tcg_target_ulong val, TCGLifeData arg_life, 3318 TCGRegSet preferred_regs) 3319 { 3320 /* ENV should not be modified. */ 3321 tcg_debug_assert(!ots->fixed_reg); 3322 3323 /* The movi is not explicitly generated here. */ 3324 if (ots->val_type == TEMP_VAL_REG) { 3325 s->reg_to_temp[ots->reg] = NULL; 3326 } 3327 ots->val_type = TEMP_VAL_CONST; 3328 ots->val = val; 3329 ots->mem_coherent = 0; 3330 if (NEED_SYNC_ARG(0)) { 3331 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3332 } else if (IS_DEAD_ARG(0)) { 3333 temp_dead(s, ots); 3334 } 3335 } 3336 3337 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op) 3338 { 3339 TCGTemp *ots = arg_temp(op->args[0]); 3340 tcg_target_ulong val = op->args[1]; 3341 3342 tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]); 3343 } 3344 3345 /* 3346 * Specialized code generation for INDEX_op_mov_*. 3347 */ 3348 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3349 { 3350 const TCGLifeData arg_life = op->life; 3351 TCGRegSet allocated_regs, preferred_regs; 3352 TCGTemp *ts, *ots; 3353 TCGType otype, itype; 3354 3355 allocated_regs = s->reserved_regs; 3356 preferred_regs = op->output_pref[0]; 3357 ots = arg_temp(op->args[0]); 3358 ts = arg_temp(op->args[1]); 3359 3360 /* ENV should not be modified. */ 3361 tcg_debug_assert(!ots->fixed_reg); 3362 3363 /* Note that otype != itype for no-op truncation. */ 3364 otype = ots->type; 3365 itype = ts->type; 3366 3367 if (ts->val_type == TEMP_VAL_CONST) { 3368 /* propagate constant or generate sti */ 3369 tcg_target_ulong val = ts->val; 3370 if (IS_DEAD_ARG(1)) { 3371 temp_dead(s, ts); 3372 } 3373 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3374 return; 3375 } 3376 3377 /* If the source value is in memory we're going to be forced 3378 to have it in a register in order to perform the copy. Copy 3379 the SOURCE value into its own register first, that way we 3380 don't have to reload SOURCE the next time it is used. */ 3381 if (ts->val_type == TEMP_VAL_MEM) { 3382 temp_load(s, ts, tcg_target_available_regs[itype], 3383 allocated_regs, preferred_regs); 3384 } 3385 3386 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3387 if (IS_DEAD_ARG(0)) { 3388 /* mov to a non-saved dead register makes no sense (even with 3389 liveness analysis disabled). */ 3390 tcg_debug_assert(NEED_SYNC_ARG(0)); 3391 if (!ots->mem_allocated) { 3392 temp_allocate_frame(s, ots); 3393 } 3394 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 3395 if (IS_DEAD_ARG(1)) { 3396 temp_dead(s, ts); 3397 } 3398 temp_dead(s, ots); 3399 } else { 3400 if (IS_DEAD_ARG(1) && !ts->fixed_reg) { 3401 /* the mov can be suppressed */ 3402 if (ots->val_type == TEMP_VAL_REG) { 3403 s->reg_to_temp[ots->reg] = NULL; 3404 } 3405 ots->reg = ts->reg; 3406 temp_dead(s, ts); 3407 } else { 3408 if (ots->val_type != TEMP_VAL_REG) { 3409 /* When allocating a new register, make sure to not spill the 3410 input one. */ 3411 tcg_regset_set_reg(allocated_regs, ts->reg); 3412 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3413 allocated_regs, preferred_regs, 3414 ots->indirect_base); 3415 } 3416 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) { 3417 /* 3418 * Cross register class move not supported. 3419 * Store the source register into the destination slot 3420 * and leave the destination temp as TEMP_VAL_MEM. 3421 */ 3422 assert(!ots->fixed_reg); 3423 if (!ts->mem_allocated) { 3424 temp_allocate_frame(s, ots); 3425 } 3426 tcg_out_st(s, ts->type, ts->reg, 3427 ots->mem_base->reg, ots->mem_offset); 3428 ots->mem_coherent = 1; 3429 temp_free_or_dead(s, ots, -1); 3430 return; 3431 } 3432 } 3433 ots->val_type = TEMP_VAL_REG; 3434 ots->mem_coherent = 0; 3435 s->reg_to_temp[ots->reg] = ots; 3436 if (NEED_SYNC_ARG(0)) { 3437 temp_sync(s, ots, allocated_regs, 0, 0); 3438 } 3439 } 3440 } 3441 3442 /* 3443 * Specialized code generation for INDEX_op_dup_vec. 3444 */ 3445 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 3446 { 3447 const TCGLifeData arg_life = op->life; 3448 TCGRegSet dup_out_regs, dup_in_regs; 3449 TCGTemp *its, *ots; 3450 TCGType itype, vtype; 3451 intptr_t endian_fixup; 3452 unsigned vece; 3453 bool ok; 3454 3455 ots = arg_temp(op->args[0]); 3456 its = arg_temp(op->args[1]); 3457 3458 /* ENV should not be modified. */ 3459 tcg_debug_assert(!ots->fixed_reg); 3460 3461 itype = its->type; 3462 vece = TCGOP_VECE(op); 3463 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 3464 3465 if (its->val_type == TEMP_VAL_CONST) { 3466 /* Propagate constant via movi -> dupi. */ 3467 tcg_target_ulong val = its->val; 3468 if (IS_DEAD_ARG(1)) { 3469 temp_dead(s, its); 3470 } 3471 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); 3472 return; 3473 } 3474 3475 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs; 3476 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs; 3477 3478 /* Allocate the output register now. */ 3479 if (ots->val_type != TEMP_VAL_REG) { 3480 TCGRegSet allocated_regs = s->reserved_regs; 3481 3482 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 3483 /* Make sure to not spill the input register. */ 3484 tcg_regset_set_reg(allocated_regs, its->reg); 3485 } 3486 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 3487 op->output_pref[0], ots->indirect_base); 3488 ots->val_type = TEMP_VAL_REG; 3489 ots->mem_coherent = 0; 3490 s->reg_to_temp[ots->reg] = ots; 3491 } 3492 3493 switch (its->val_type) { 3494 case TEMP_VAL_REG: 3495 /* 3496 * The dup constriaints must be broad, covering all possible VECE. 3497 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 3498 * to fail, indicating that extra moves are required for that case. 3499 */ 3500 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 3501 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 3502 goto done; 3503 } 3504 /* Try again from memory or a vector input register. */ 3505 } 3506 if (!its->mem_coherent) { 3507 /* 3508 * The input register is not synced, and so an extra store 3509 * would be required to use memory. Attempt an integer-vector 3510 * register move first. We do not have a TCGRegSet for this. 3511 */ 3512 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 3513 break; 3514 } 3515 /* Sync the temp back to its slot and load from there. */ 3516 temp_sync(s, its, s->reserved_regs, 0, 0); 3517 } 3518 /* fall through */ 3519 3520 case TEMP_VAL_MEM: 3521 #ifdef HOST_WORDS_BIGENDIAN 3522 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8; 3523 endian_fixup -= 1 << vece; 3524 #else 3525 endian_fixup = 0; 3526 #endif 3527 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 3528 its->mem_offset + endian_fixup)) { 3529 goto done; 3530 } 3531 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 3532 break; 3533 3534 default: 3535 g_assert_not_reached(); 3536 } 3537 3538 /* We now have a vector input register, so dup must succeed. */ 3539 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 3540 tcg_debug_assert(ok); 3541 3542 done: 3543 if (IS_DEAD_ARG(1)) { 3544 temp_dead(s, its); 3545 } 3546 if (NEED_SYNC_ARG(0)) { 3547 temp_sync(s, ots, s->reserved_regs, 0, 0); 3548 } 3549 if (IS_DEAD_ARG(0)) { 3550 temp_dead(s, ots); 3551 } 3552 } 3553 3554 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 3555 { 3556 const TCGLifeData arg_life = op->life; 3557 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 3558 TCGRegSet i_allocated_regs; 3559 TCGRegSet o_allocated_regs; 3560 int i, k, nb_iargs, nb_oargs; 3561 TCGReg reg; 3562 TCGArg arg; 3563 const TCGArgConstraint *arg_ct; 3564 TCGTemp *ts; 3565 TCGArg new_args[TCG_MAX_OP_ARGS]; 3566 int const_args[TCG_MAX_OP_ARGS]; 3567 3568 nb_oargs = def->nb_oargs; 3569 nb_iargs = def->nb_iargs; 3570 3571 /* copy constants */ 3572 memcpy(new_args + nb_oargs + nb_iargs, 3573 op->args + nb_oargs + nb_iargs, 3574 sizeof(TCGArg) * def->nb_cargs); 3575 3576 i_allocated_regs = s->reserved_regs; 3577 o_allocated_regs = s->reserved_regs; 3578 3579 /* satisfy input constraints */ 3580 for (k = 0; k < nb_iargs; k++) { 3581 TCGRegSet i_preferred_regs, o_preferred_regs; 3582 3583 i = def->sorted_args[nb_oargs + k]; 3584 arg = op->args[i]; 3585 arg_ct = &def->args_ct[i]; 3586 ts = arg_temp(arg); 3587 3588 if (ts->val_type == TEMP_VAL_CONST 3589 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 3590 /* constant is OK for instruction */ 3591 const_args[i] = 1; 3592 new_args[i] = ts->val; 3593 continue; 3594 } 3595 3596 i_preferred_regs = o_preferred_regs = 0; 3597 if (arg_ct->ct & TCG_CT_IALIAS) { 3598 o_preferred_regs = op->output_pref[arg_ct->alias_index]; 3599 if (ts->fixed_reg) { 3600 /* if fixed register, we must allocate a new register 3601 if the alias is not the same register */ 3602 if (arg != op->args[arg_ct->alias_index]) { 3603 goto allocate_in_reg; 3604 } 3605 } else { 3606 /* if the input is aliased to an output and if it is 3607 not dead after the instruction, we must allocate 3608 a new register and move it */ 3609 if (!IS_DEAD_ARG(i)) { 3610 goto allocate_in_reg; 3611 } 3612 3613 /* check if the current register has already been allocated 3614 for another input aliased to an output */ 3615 if (ts->val_type == TEMP_VAL_REG) { 3616 int k2, i2; 3617 reg = ts->reg; 3618 for (k2 = 0 ; k2 < k ; k2++) { 3619 i2 = def->sorted_args[nb_oargs + k2]; 3620 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && 3621 reg == new_args[i2]) { 3622 goto allocate_in_reg; 3623 } 3624 } 3625 } 3626 i_preferred_regs = o_preferred_regs; 3627 } 3628 } 3629 3630 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs); 3631 reg = ts->reg; 3632 3633 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { 3634 /* nothing to do : the constraint is satisfied */ 3635 } else { 3636 allocate_in_reg: 3637 /* allocate a new register matching the constraint 3638 and move the temporary register into it */ 3639 temp_load(s, ts, tcg_target_available_regs[ts->type], 3640 i_allocated_regs, 0); 3641 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, 3642 o_preferred_regs, ts->indirect_base); 3643 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 3644 /* 3645 * Cross register class move not supported. Sync the 3646 * temp back to its slot and load from there. 3647 */ 3648 temp_sync(s, ts, i_allocated_regs, 0, 0); 3649 tcg_out_ld(s, ts->type, reg, 3650 ts->mem_base->reg, ts->mem_offset); 3651 } 3652 } 3653 new_args[i] = reg; 3654 const_args[i] = 0; 3655 tcg_regset_set_reg(i_allocated_regs, reg); 3656 } 3657 3658 /* mark dead temporaries and free the associated registers */ 3659 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3660 if (IS_DEAD_ARG(i)) { 3661 temp_dead(s, arg_temp(op->args[i])); 3662 } 3663 } 3664 3665 if (def->flags & TCG_OPF_BB_END) { 3666 tcg_reg_alloc_bb_end(s, i_allocated_regs); 3667 } else { 3668 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3669 /* XXX: permit generic clobber register list ? */ 3670 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3671 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3672 tcg_reg_free(s, i, i_allocated_regs); 3673 } 3674 } 3675 } 3676 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3677 /* sync globals if the op has side effects and might trigger 3678 an exception. */ 3679 sync_globals(s, i_allocated_regs); 3680 } 3681 3682 /* satisfy the output constraints */ 3683 for(k = 0; k < nb_oargs; k++) { 3684 i = def->sorted_args[k]; 3685 arg = op->args[i]; 3686 arg_ct = &def->args_ct[i]; 3687 ts = arg_temp(arg); 3688 3689 /* ENV should not be modified. */ 3690 tcg_debug_assert(!ts->fixed_reg); 3691 3692 if ((arg_ct->ct & TCG_CT_ALIAS) 3693 && !const_args[arg_ct->alias_index]) { 3694 reg = new_args[arg_ct->alias_index]; 3695 } else if (arg_ct->ct & TCG_CT_NEWREG) { 3696 reg = tcg_reg_alloc(s, arg_ct->u.regs, 3697 i_allocated_regs | o_allocated_regs, 3698 op->output_pref[k], ts->indirect_base); 3699 } else { 3700 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, 3701 op->output_pref[k], ts->indirect_base); 3702 } 3703 tcg_regset_set_reg(o_allocated_regs, reg); 3704 if (ts->val_type == TEMP_VAL_REG) { 3705 s->reg_to_temp[ts->reg] = NULL; 3706 } 3707 ts->val_type = TEMP_VAL_REG; 3708 ts->reg = reg; 3709 /* 3710 * Temp value is modified, so the value kept in memory is 3711 * potentially not the same. 3712 */ 3713 ts->mem_coherent = 0; 3714 s->reg_to_temp[reg] = ts; 3715 new_args[i] = reg; 3716 } 3717 } 3718 3719 /* emit instruction */ 3720 if (def->flags & TCG_OPF_VECTOR) { 3721 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 3722 new_args, const_args); 3723 } else { 3724 tcg_out_op(s, op->opc, new_args, const_args); 3725 } 3726 3727 /* move the outputs in the correct register if needed */ 3728 for(i = 0; i < nb_oargs; i++) { 3729 ts = arg_temp(op->args[i]); 3730 3731 /* ENV should not be modified. */ 3732 tcg_debug_assert(!ts->fixed_reg); 3733 3734 if (NEED_SYNC_ARG(i)) { 3735 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 3736 } else if (IS_DEAD_ARG(i)) { 3737 temp_dead(s, ts); 3738 } 3739 } 3740 } 3741 3742 #ifdef TCG_TARGET_STACK_GROWSUP 3743 #define STACK_DIR(x) (-(x)) 3744 #else 3745 #define STACK_DIR(x) (x) 3746 #endif 3747 3748 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 3749 { 3750 const int nb_oargs = TCGOP_CALLO(op); 3751 const int nb_iargs = TCGOP_CALLI(op); 3752 const TCGLifeData arg_life = op->life; 3753 int flags, nb_regs, i; 3754 TCGReg reg; 3755 TCGArg arg; 3756 TCGTemp *ts; 3757 intptr_t stack_offset; 3758 size_t call_stack_size; 3759 tcg_insn_unit *func_addr; 3760 int allocate_args; 3761 TCGRegSet allocated_regs; 3762 3763 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; 3764 flags = op->args[nb_oargs + nb_iargs + 1]; 3765 3766 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 3767 if (nb_regs > nb_iargs) { 3768 nb_regs = nb_iargs; 3769 } 3770 3771 /* assign stack slots first */ 3772 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 3773 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 3774 ~(TCG_TARGET_STACK_ALIGN - 1); 3775 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 3776 if (allocate_args) { 3777 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 3778 preallocate call stack */ 3779 tcg_abort(); 3780 } 3781 3782 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 3783 for (i = nb_regs; i < nb_iargs; i++) { 3784 arg = op->args[nb_oargs + i]; 3785 #ifdef TCG_TARGET_STACK_GROWSUP 3786 stack_offset -= sizeof(tcg_target_long); 3787 #endif 3788 if (arg != TCG_CALL_DUMMY_ARG) { 3789 ts = arg_temp(arg); 3790 temp_load(s, ts, tcg_target_available_regs[ts->type], 3791 s->reserved_regs, 0); 3792 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 3793 } 3794 #ifndef TCG_TARGET_STACK_GROWSUP 3795 stack_offset += sizeof(tcg_target_long); 3796 #endif 3797 } 3798 3799 /* assign input registers */ 3800 allocated_regs = s->reserved_regs; 3801 for (i = 0; i < nb_regs; i++) { 3802 arg = op->args[nb_oargs + i]; 3803 if (arg != TCG_CALL_DUMMY_ARG) { 3804 ts = arg_temp(arg); 3805 reg = tcg_target_call_iarg_regs[i]; 3806 3807 if (ts->val_type == TEMP_VAL_REG) { 3808 if (ts->reg != reg) { 3809 tcg_reg_free(s, reg, allocated_regs); 3810 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 3811 /* 3812 * Cross register class move not supported. Sync the 3813 * temp back to its slot and load from there. 3814 */ 3815 temp_sync(s, ts, allocated_regs, 0, 0); 3816 tcg_out_ld(s, ts->type, reg, 3817 ts->mem_base->reg, ts->mem_offset); 3818 } 3819 } 3820 } else { 3821 TCGRegSet arg_set = 0; 3822 3823 tcg_reg_free(s, reg, allocated_regs); 3824 tcg_regset_set_reg(arg_set, reg); 3825 temp_load(s, ts, arg_set, allocated_regs, 0); 3826 } 3827 3828 tcg_regset_set_reg(allocated_regs, reg); 3829 } 3830 } 3831 3832 /* mark dead temporaries and free the associated registers */ 3833 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3834 if (IS_DEAD_ARG(i)) { 3835 temp_dead(s, arg_temp(op->args[i])); 3836 } 3837 } 3838 3839 /* clobber call registers */ 3840 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3841 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3842 tcg_reg_free(s, i, allocated_regs); 3843 } 3844 } 3845 3846 /* Save globals if they might be written by the helper, sync them if 3847 they might be read. */ 3848 if (flags & TCG_CALL_NO_READ_GLOBALS) { 3849 /* Nothing to do */ 3850 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 3851 sync_globals(s, allocated_regs); 3852 } else { 3853 save_globals(s, allocated_regs); 3854 } 3855 3856 tcg_out_call(s, func_addr); 3857 3858 /* assign output registers and emit moves if needed */ 3859 for(i = 0; i < nb_oargs; i++) { 3860 arg = op->args[i]; 3861 ts = arg_temp(arg); 3862 3863 /* ENV should not be modified. */ 3864 tcg_debug_assert(!ts->fixed_reg); 3865 3866 reg = tcg_target_call_oarg_regs[i]; 3867 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3868 if (ts->val_type == TEMP_VAL_REG) { 3869 s->reg_to_temp[ts->reg] = NULL; 3870 } 3871 ts->val_type = TEMP_VAL_REG; 3872 ts->reg = reg; 3873 ts->mem_coherent = 0; 3874 s->reg_to_temp[reg] = ts; 3875 if (NEED_SYNC_ARG(i)) { 3876 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); 3877 } else if (IS_DEAD_ARG(i)) { 3878 temp_dead(s, ts); 3879 } 3880 } 3881 } 3882 3883 #ifdef CONFIG_PROFILER 3884 3885 /* avoid copy/paste errors */ 3886 #define PROF_ADD(to, from, field) \ 3887 do { \ 3888 (to)->field += atomic_read(&((from)->field)); \ 3889 } while (0) 3890 3891 #define PROF_MAX(to, from, field) \ 3892 do { \ 3893 typeof((from)->field) val__ = atomic_read(&((from)->field)); \ 3894 if (val__ > (to)->field) { \ 3895 (to)->field = val__; \ 3896 } \ 3897 } while (0) 3898 3899 /* Pass in a zero'ed @prof */ 3900 static inline 3901 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 3902 { 3903 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 3904 unsigned int i; 3905 3906 for (i = 0; i < n_ctxs; i++) { 3907 TCGContext *s = atomic_read(&tcg_ctxs[i]); 3908 const TCGProfile *orig = &s->prof; 3909 3910 if (counters) { 3911 PROF_ADD(prof, orig, cpu_exec_time); 3912 PROF_ADD(prof, orig, tb_count1); 3913 PROF_ADD(prof, orig, tb_count); 3914 PROF_ADD(prof, orig, op_count); 3915 PROF_MAX(prof, orig, op_count_max); 3916 PROF_ADD(prof, orig, temp_count); 3917 PROF_MAX(prof, orig, temp_count_max); 3918 PROF_ADD(prof, orig, del_op_count); 3919 PROF_ADD(prof, orig, code_in_len); 3920 PROF_ADD(prof, orig, code_out_len); 3921 PROF_ADD(prof, orig, search_out_len); 3922 PROF_ADD(prof, orig, interm_time); 3923 PROF_ADD(prof, orig, code_time); 3924 PROF_ADD(prof, orig, la_time); 3925 PROF_ADD(prof, orig, opt_time); 3926 PROF_ADD(prof, orig, restore_count); 3927 PROF_ADD(prof, orig, restore_time); 3928 } 3929 if (table) { 3930 int i; 3931 3932 for (i = 0; i < NB_OPS; i++) { 3933 PROF_ADD(prof, orig, table_op_count[i]); 3934 } 3935 } 3936 } 3937 } 3938 3939 #undef PROF_ADD 3940 #undef PROF_MAX 3941 3942 static void tcg_profile_snapshot_counters(TCGProfile *prof) 3943 { 3944 tcg_profile_snapshot(prof, true, false); 3945 } 3946 3947 static void tcg_profile_snapshot_table(TCGProfile *prof) 3948 { 3949 tcg_profile_snapshot(prof, false, true); 3950 } 3951 3952 void tcg_dump_op_count(void) 3953 { 3954 TCGProfile prof = {}; 3955 int i; 3956 3957 tcg_profile_snapshot_table(&prof); 3958 for (i = 0; i < NB_OPS; i++) { 3959 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name, 3960 prof.table_op_count[i]); 3961 } 3962 } 3963 3964 int64_t tcg_cpu_exec_time(void) 3965 { 3966 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 3967 unsigned int i; 3968 int64_t ret = 0; 3969 3970 for (i = 0; i < n_ctxs; i++) { 3971 const TCGContext *s = atomic_read(&tcg_ctxs[i]); 3972 const TCGProfile *prof = &s->prof; 3973 3974 ret += atomic_read(&prof->cpu_exec_time); 3975 } 3976 return ret; 3977 } 3978 #else 3979 void tcg_dump_op_count(void) 3980 { 3981 qemu_printf("[TCG profiler not compiled]\n"); 3982 } 3983 3984 int64_t tcg_cpu_exec_time(void) 3985 { 3986 error_report("%s: TCG profiler not compiled", __func__); 3987 exit(EXIT_FAILURE); 3988 } 3989 #endif 3990 3991 3992 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 3993 { 3994 #ifdef CONFIG_PROFILER 3995 TCGProfile *prof = &s->prof; 3996 #endif 3997 int i, num_insns; 3998 TCGOp *op; 3999 4000 #ifdef CONFIG_PROFILER 4001 { 4002 int n = 0; 4003 4004 QTAILQ_FOREACH(op, &s->ops, link) { 4005 n++; 4006 } 4007 atomic_set(&prof->op_count, prof->op_count + n); 4008 if (n > prof->op_count_max) { 4009 atomic_set(&prof->op_count_max, n); 4010 } 4011 4012 n = s->nb_temps; 4013 atomic_set(&prof->temp_count, prof->temp_count + n); 4014 if (n > prof->temp_count_max) { 4015 atomic_set(&prof->temp_count_max, n); 4016 } 4017 } 4018 #endif 4019 4020 #ifdef DEBUG_DISAS 4021 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4022 && qemu_log_in_addr_range(tb->pc))) { 4023 qemu_log_lock(); 4024 qemu_log("OP:\n"); 4025 tcg_dump_ops(s, false); 4026 qemu_log("\n"); 4027 qemu_log_unlock(); 4028 } 4029 #endif 4030 4031 #ifdef CONFIG_DEBUG_TCG 4032 /* Ensure all labels referenced have been emitted. */ 4033 { 4034 TCGLabel *l; 4035 bool error = false; 4036 4037 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4038 if (unlikely(!l->present) && l->refs) { 4039 qemu_log_mask(CPU_LOG_TB_OP, 4040 "$L%d referenced but not present.\n", l->id); 4041 error = true; 4042 } 4043 } 4044 assert(!error); 4045 } 4046 #endif 4047 4048 #ifdef CONFIG_PROFILER 4049 atomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 4050 #endif 4051 4052 #ifdef USE_TCG_OPTIMIZATIONS 4053 tcg_optimize(s); 4054 #endif 4055 4056 #ifdef CONFIG_PROFILER 4057 atomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 4058 atomic_set(&prof->la_time, prof->la_time - profile_getclock()); 4059 #endif 4060 4061 reachable_code_pass(s); 4062 liveness_pass_1(s); 4063 4064 if (s->nb_indirects > 0) { 4065 #ifdef DEBUG_DISAS 4066 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 4067 && qemu_log_in_addr_range(tb->pc))) { 4068 qemu_log_lock(); 4069 qemu_log("OP before indirect lowering:\n"); 4070 tcg_dump_ops(s, false); 4071 qemu_log("\n"); 4072 qemu_log_unlock(); 4073 } 4074 #endif 4075 /* Replace indirect temps with direct temps. */ 4076 if (liveness_pass_2(s)) { 4077 /* If changes were made, re-run liveness. */ 4078 liveness_pass_1(s); 4079 } 4080 } 4081 4082 #ifdef CONFIG_PROFILER 4083 atomic_set(&prof->la_time, prof->la_time + profile_getclock()); 4084 #endif 4085 4086 #ifdef DEBUG_DISAS 4087 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 4088 && qemu_log_in_addr_range(tb->pc))) { 4089 qemu_log_lock(); 4090 qemu_log("OP after optimization and liveness analysis:\n"); 4091 tcg_dump_ops(s, true); 4092 qemu_log("\n"); 4093 qemu_log_unlock(); 4094 } 4095 #endif 4096 4097 tcg_reg_alloc_start(s); 4098 4099 s->code_buf = tb->tc.ptr; 4100 s->code_ptr = tb->tc.ptr; 4101 4102 #ifdef TCG_TARGET_NEED_LDST_LABELS 4103 QSIMPLEQ_INIT(&s->ldst_labels); 4104 #endif 4105 #ifdef TCG_TARGET_NEED_POOL_LABELS 4106 s->pool_labels = NULL; 4107 #endif 4108 4109 num_insns = -1; 4110 QTAILQ_FOREACH(op, &s->ops, link) { 4111 TCGOpcode opc = op->opc; 4112 4113 #ifdef CONFIG_PROFILER 4114 atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 4115 #endif 4116 4117 switch (opc) { 4118 case INDEX_op_mov_i32: 4119 case INDEX_op_mov_i64: 4120 case INDEX_op_mov_vec: 4121 tcg_reg_alloc_mov(s, op); 4122 break; 4123 case INDEX_op_movi_i32: 4124 case INDEX_op_movi_i64: 4125 case INDEX_op_dupi_vec: 4126 tcg_reg_alloc_movi(s, op); 4127 break; 4128 case INDEX_op_dup_vec: 4129 tcg_reg_alloc_dup(s, op); 4130 break; 4131 case INDEX_op_insn_start: 4132 if (num_insns >= 0) { 4133 size_t off = tcg_current_code_size(s); 4134 s->gen_insn_end_off[num_insns] = off; 4135 /* Assert that we do not overflow our stored offset. */ 4136 assert(s->gen_insn_end_off[num_insns] == off); 4137 } 4138 num_insns++; 4139 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 4140 target_ulong a; 4141 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 4142 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 4143 #else 4144 a = op->args[i]; 4145 #endif 4146 s->gen_insn_data[num_insns][i] = a; 4147 } 4148 break; 4149 case INDEX_op_discard: 4150 temp_dead(s, arg_temp(op->args[0])); 4151 break; 4152 case INDEX_op_set_label: 4153 tcg_reg_alloc_bb_end(s, s->reserved_regs); 4154 tcg_out_label(s, arg_label(op->args[0]), s->code_ptr); 4155 break; 4156 case INDEX_op_call: 4157 tcg_reg_alloc_call(s, op); 4158 break; 4159 default: 4160 /* Sanity check that we've not introduced any unhandled opcodes. */ 4161 tcg_debug_assert(tcg_op_supported(opc)); 4162 /* Note: in order to speed up the code, it would be much 4163 faster to have specialized register allocator functions for 4164 some common argument patterns */ 4165 tcg_reg_alloc_op(s, op); 4166 break; 4167 } 4168 #ifdef CONFIG_DEBUG_TCG 4169 check_regs(s); 4170 #endif 4171 /* Test for (pending) buffer overflow. The assumption is that any 4172 one operation beginning below the high water mark cannot overrun 4173 the buffer completely. Thus we can test for overflow after 4174 generating code without having to check during generation. */ 4175 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 4176 return -1; 4177 } 4178 /* Test for TB overflow, as seen by gen_insn_end_off. */ 4179 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 4180 return -2; 4181 } 4182 } 4183 tcg_debug_assert(num_insns >= 0); 4184 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 4185 4186 /* Generate TB finalization at the end of block */ 4187 #ifdef TCG_TARGET_NEED_LDST_LABELS 4188 i = tcg_out_ldst_finalize(s); 4189 if (i < 0) { 4190 return i; 4191 } 4192 #endif 4193 #ifdef TCG_TARGET_NEED_POOL_LABELS 4194 i = tcg_out_pool_finalize(s); 4195 if (i < 0) { 4196 return i; 4197 } 4198 #endif 4199 if (!tcg_resolve_relocs(s)) { 4200 return -2; 4201 } 4202 4203 /* flush instruction cache */ 4204 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); 4205 4206 return tcg_current_code_size(s); 4207 } 4208 4209 #ifdef CONFIG_PROFILER 4210 void tcg_dump_info(void) 4211 { 4212 TCGProfile prof = {}; 4213 const TCGProfile *s; 4214 int64_t tb_count; 4215 int64_t tb_div_count; 4216 int64_t tot; 4217 4218 tcg_profile_snapshot_counters(&prof); 4219 s = &prof; 4220 tb_count = s->tb_count; 4221 tb_div_count = tb_count ? tb_count : 1; 4222 tot = s->interm_time + s->code_time; 4223 4224 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 4225 tot, tot / 2.4e9); 4226 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64 4227 " %0.1f%%)\n", 4228 tb_count, s->tb_count1 - tb_count, 4229 (double)(s->tb_count1 - s->tb_count) 4230 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 4231 qemu_printf("avg ops/TB %0.1f max=%d\n", 4232 (double)s->op_count / tb_div_count, s->op_count_max); 4233 qemu_printf("deleted ops/TB %0.2f\n", 4234 (double)s->del_op_count / tb_div_count); 4235 qemu_printf("avg temps/TB %0.2f max=%d\n", 4236 (double)s->temp_count / tb_div_count, s->temp_count_max); 4237 qemu_printf("avg host code/TB %0.1f\n", 4238 (double)s->code_out_len / tb_div_count); 4239 qemu_printf("avg search data/TB %0.1f\n", 4240 (double)s->search_out_len / tb_div_count); 4241 4242 qemu_printf("cycles/op %0.1f\n", 4243 s->op_count ? (double)tot / s->op_count : 0); 4244 qemu_printf("cycles/in byte %0.1f\n", 4245 s->code_in_len ? (double)tot / s->code_in_len : 0); 4246 qemu_printf("cycles/out byte %0.1f\n", 4247 s->code_out_len ? (double)tot / s->code_out_len : 0); 4248 qemu_printf("cycles/search byte %0.1f\n", 4249 s->search_out_len ? (double)tot / s->search_out_len : 0); 4250 if (tot == 0) { 4251 tot = 1; 4252 } 4253 qemu_printf(" gen_interm time %0.1f%%\n", 4254 (double)s->interm_time / tot * 100.0); 4255 qemu_printf(" gen_code time %0.1f%%\n", 4256 (double)s->code_time / tot * 100.0); 4257 qemu_printf("optim./code time %0.1f%%\n", 4258 (double)s->opt_time / (s->code_time ? s->code_time : 1) 4259 * 100.0); 4260 qemu_printf("liveness/code time %0.1f%%\n", 4261 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 4262 qemu_printf("cpu_restore count %" PRId64 "\n", 4263 s->restore_count); 4264 qemu_printf(" avg cycles %0.1f\n", 4265 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 4266 } 4267 #else 4268 void tcg_dump_info(void) 4269 { 4270 qemu_printf("[TCG profiler not compiled]\n"); 4271 } 4272 #endif 4273 4274 #ifdef ELF_HOST_MACHINE 4275 /* In order to use this feature, the backend needs to do three things: 4276 4277 (1) Define ELF_HOST_MACHINE to indicate both what value to 4278 put into the ELF image and to indicate support for the feature. 4279 4280 (2) Define tcg_register_jit. This should create a buffer containing 4281 the contents of a .debug_frame section that describes the post- 4282 prologue unwind info for the tcg machine. 4283 4284 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 4285 */ 4286 4287 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 4288 typedef enum { 4289 JIT_NOACTION = 0, 4290 JIT_REGISTER_FN, 4291 JIT_UNREGISTER_FN 4292 } jit_actions_t; 4293 4294 struct jit_code_entry { 4295 struct jit_code_entry *next_entry; 4296 struct jit_code_entry *prev_entry; 4297 const void *symfile_addr; 4298 uint64_t symfile_size; 4299 }; 4300 4301 struct jit_descriptor { 4302 uint32_t version; 4303 uint32_t action_flag; 4304 struct jit_code_entry *relevant_entry; 4305 struct jit_code_entry *first_entry; 4306 }; 4307 4308 void __jit_debug_register_code(void) __attribute__((noinline)); 4309 void __jit_debug_register_code(void) 4310 { 4311 asm(""); 4312 } 4313 4314 /* Must statically initialize the version, because GDB may check 4315 the version before we can set it. */ 4316 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 4317 4318 /* End GDB interface. */ 4319 4320 static int find_string(const char *strtab, const char *str) 4321 { 4322 const char *p = strtab + 1; 4323 4324 while (1) { 4325 if (strcmp(p, str) == 0) { 4326 return p - strtab; 4327 } 4328 p += strlen(p) + 1; 4329 } 4330 } 4331 4332 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, 4333 const void *debug_frame, 4334 size_t debug_frame_size) 4335 { 4336 struct __attribute__((packed)) DebugInfo { 4337 uint32_t len; 4338 uint16_t version; 4339 uint32_t abbrev; 4340 uint8_t ptr_size; 4341 uint8_t cu_die; 4342 uint16_t cu_lang; 4343 uintptr_t cu_low_pc; 4344 uintptr_t cu_high_pc; 4345 uint8_t fn_die; 4346 char fn_name[16]; 4347 uintptr_t fn_low_pc; 4348 uintptr_t fn_high_pc; 4349 uint8_t cu_eoc; 4350 }; 4351 4352 struct ElfImage { 4353 ElfW(Ehdr) ehdr; 4354 ElfW(Phdr) phdr; 4355 ElfW(Shdr) shdr[7]; 4356 ElfW(Sym) sym[2]; 4357 struct DebugInfo di; 4358 uint8_t da[24]; 4359 char str[80]; 4360 }; 4361 4362 struct ElfImage *img; 4363 4364 static const struct ElfImage img_template = { 4365 .ehdr = { 4366 .e_ident[EI_MAG0] = ELFMAG0, 4367 .e_ident[EI_MAG1] = ELFMAG1, 4368 .e_ident[EI_MAG2] = ELFMAG2, 4369 .e_ident[EI_MAG3] = ELFMAG3, 4370 .e_ident[EI_CLASS] = ELF_CLASS, 4371 .e_ident[EI_DATA] = ELF_DATA, 4372 .e_ident[EI_VERSION] = EV_CURRENT, 4373 .e_type = ET_EXEC, 4374 .e_machine = ELF_HOST_MACHINE, 4375 .e_version = EV_CURRENT, 4376 .e_phoff = offsetof(struct ElfImage, phdr), 4377 .e_shoff = offsetof(struct ElfImage, shdr), 4378 .e_ehsize = sizeof(ElfW(Shdr)), 4379 .e_phentsize = sizeof(ElfW(Phdr)), 4380 .e_phnum = 1, 4381 .e_shentsize = sizeof(ElfW(Shdr)), 4382 .e_shnum = ARRAY_SIZE(img->shdr), 4383 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 4384 #ifdef ELF_HOST_FLAGS 4385 .e_flags = ELF_HOST_FLAGS, 4386 #endif 4387 #ifdef ELF_OSABI 4388 .e_ident[EI_OSABI] = ELF_OSABI, 4389 #endif 4390 }, 4391 .phdr = { 4392 .p_type = PT_LOAD, 4393 .p_flags = PF_X, 4394 }, 4395 .shdr = { 4396 [0] = { .sh_type = SHT_NULL }, 4397 /* Trick: The contents of code_gen_buffer are not present in 4398 this fake ELF file; that got allocated elsewhere. Therefore 4399 we mark .text as SHT_NOBITS (similar to .bss) so that readers 4400 will not look for contents. We can record any address. */ 4401 [1] = { /* .text */ 4402 .sh_type = SHT_NOBITS, 4403 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 4404 }, 4405 [2] = { /* .debug_info */ 4406 .sh_type = SHT_PROGBITS, 4407 .sh_offset = offsetof(struct ElfImage, di), 4408 .sh_size = sizeof(struct DebugInfo), 4409 }, 4410 [3] = { /* .debug_abbrev */ 4411 .sh_type = SHT_PROGBITS, 4412 .sh_offset = offsetof(struct ElfImage, da), 4413 .sh_size = sizeof(img->da), 4414 }, 4415 [4] = { /* .debug_frame */ 4416 .sh_type = SHT_PROGBITS, 4417 .sh_offset = sizeof(struct ElfImage), 4418 }, 4419 [5] = { /* .symtab */ 4420 .sh_type = SHT_SYMTAB, 4421 .sh_offset = offsetof(struct ElfImage, sym), 4422 .sh_size = sizeof(img->sym), 4423 .sh_info = 1, 4424 .sh_link = ARRAY_SIZE(img->shdr) - 1, 4425 .sh_entsize = sizeof(ElfW(Sym)), 4426 }, 4427 [6] = { /* .strtab */ 4428 .sh_type = SHT_STRTAB, 4429 .sh_offset = offsetof(struct ElfImage, str), 4430 .sh_size = sizeof(img->str), 4431 } 4432 }, 4433 .sym = { 4434 [1] = { /* code_gen_buffer */ 4435 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 4436 .st_shndx = 1, 4437 } 4438 }, 4439 .di = { 4440 .len = sizeof(struct DebugInfo) - 4, 4441 .version = 2, 4442 .ptr_size = sizeof(void *), 4443 .cu_die = 1, 4444 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 4445 .fn_die = 2, 4446 .fn_name = "code_gen_buffer" 4447 }, 4448 .da = { 4449 1, /* abbrev number (the cu) */ 4450 0x11, 1, /* DW_TAG_compile_unit, has children */ 4451 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 4452 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4453 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4454 0, 0, /* end of abbrev */ 4455 2, /* abbrev number (the fn) */ 4456 0x2e, 0, /* DW_TAG_subprogram, no children */ 4457 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 4458 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4459 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4460 0, 0, /* end of abbrev */ 4461 0 /* no more abbrev */ 4462 }, 4463 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 4464 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 4465 }; 4466 4467 /* We only need a single jit entry; statically allocate it. */ 4468 static struct jit_code_entry one_entry; 4469 4470 uintptr_t buf = (uintptr_t)buf_ptr; 4471 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 4472 DebugFrameHeader *dfh; 4473 4474 img = g_malloc(img_size); 4475 *img = img_template; 4476 4477 img->phdr.p_vaddr = buf; 4478 img->phdr.p_paddr = buf; 4479 img->phdr.p_memsz = buf_size; 4480 4481 img->shdr[1].sh_name = find_string(img->str, ".text"); 4482 img->shdr[1].sh_addr = buf; 4483 img->shdr[1].sh_size = buf_size; 4484 4485 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 4486 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 4487 4488 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 4489 img->shdr[4].sh_size = debug_frame_size; 4490 4491 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 4492 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 4493 4494 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 4495 img->sym[1].st_value = buf; 4496 img->sym[1].st_size = buf_size; 4497 4498 img->di.cu_low_pc = buf; 4499 img->di.cu_high_pc = buf + buf_size; 4500 img->di.fn_low_pc = buf; 4501 img->di.fn_high_pc = buf + buf_size; 4502 4503 dfh = (DebugFrameHeader *)(img + 1); 4504 memcpy(dfh, debug_frame, debug_frame_size); 4505 dfh->fde.func_start = buf; 4506 dfh->fde.func_len = buf_size; 4507 4508 #ifdef DEBUG_JIT 4509 /* Enable this block to be able to debug the ELF image file creation. 4510 One can use readelf, objdump, or other inspection utilities. */ 4511 { 4512 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 4513 if (f) { 4514 if (fwrite(img, img_size, 1, f) != img_size) { 4515 /* Avoid stupid unused return value warning for fwrite. */ 4516 } 4517 fclose(f); 4518 } 4519 } 4520 #endif 4521 4522 one_entry.symfile_addr = img; 4523 one_entry.symfile_size = img_size; 4524 4525 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 4526 __jit_debug_descriptor.relevant_entry = &one_entry; 4527 __jit_debug_descriptor.first_entry = &one_entry; 4528 __jit_debug_register_code(); 4529 } 4530 #else 4531 /* No support for the feature. Provide the entry point expected by exec.c, 4532 and implement the internal function we declared earlier. */ 4533 4534 static void tcg_register_jit_int(void *buf, size_t size, 4535 const void *debug_frame, 4536 size_t debug_frame_size) 4537 { 4538 } 4539 4540 void tcg_register_jit(void *buf, size_t buf_size) 4541 { 4542 } 4543 #endif /* ELF_HOST_MACHINE */ 4544 4545 #if !TCG_TARGET_MAYBE_vec 4546 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 4547 { 4548 g_assert_not_reached(); 4549 } 4550 #endif 4551