1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/cutils.h" 34 #include "qemu/host-utils.h" 35 #include "qemu/timer.h" 36 37 /* Note: the long term plan is to reduce the dependencies on the QEMU 38 CPU definitions. Currently they are used for qemu_ld/st 39 instructions */ 40 #define NO_CPU_IO_DEFS 41 #include "cpu.h" 42 43 #include "exec/cpu-common.h" 44 #include "exec/exec-all.h" 45 46 #include "tcg-op.h" 47 48 #if UINTPTR_MAX == UINT32_MAX 49 # define ELF_CLASS ELFCLASS32 50 #else 51 # define ELF_CLASS ELFCLASS64 52 #endif 53 #ifdef HOST_WORDS_BIGENDIAN 54 # define ELF_DATA ELFDATA2MSB 55 #else 56 # define ELF_DATA ELFDATA2LSB 57 #endif 58 59 #include "elf.h" 60 #include "exec/log.h" 61 #include "sysemu/sysemu.h" 62 63 /* Forward declarations for functions declared in tcg-target.inc.c and 64 used here. */ 65 static void tcg_target_init(TCGContext *s); 66 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static void patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 71 /* The CIE and FDE header definitions will be common to all hosts. */ 72 typedef struct { 73 uint32_t len __attribute__((aligned((sizeof(void *))))); 74 uint32_t id; 75 uint8_t version; 76 char augmentation[1]; 77 uint8_t code_align; 78 uint8_t data_align; 79 uint8_t return_column; 80 } DebugFrameCIE; 81 82 typedef struct QEMU_PACKED { 83 uint32_t len __attribute__((aligned((sizeof(void *))))); 84 uint32_t cie_offset; 85 uintptr_t func_start; 86 uintptr_t func_len; 87 } DebugFrameFDEHeader; 88 89 typedef struct QEMU_PACKED { 90 DebugFrameCIE cie; 91 DebugFrameFDEHeader fde; 92 } DebugFrameHeader; 93 94 static void tcg_register_jit_int(void *buf, size_t size, 95 const void *debug_frame, 96 size_t debug_frame_size) 97 __attribute__((unused)); 98 99 /* Forward declarations for functions declared and used in tcg-target.inc.c. */ 100 static const char *target_parse_constraint(TCGArgConstraint *ct, 101 const char *ct_str, TCGType type); 102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 103 intptr_t arg2); 104 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 105 static void tcg_out_movi(TCGContext *s, TCGType type, 106 TCGReg ret, tcg_target_long arg); 107 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 108 const int *const_args); 109 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 110 intptr_t arg2); 111 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 112 TCGReg base, intptr_t ofs); 113 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); 114 static int tcg_target_const_match(tcg_target_long val, TCGType type, 115 const TCGArgConstraint *arg_ct); 116 #ifdef TCG_TARGET_NEED_LDST_LABELS 117 static bool tcg_out_ldst_finalize(TCGContext *s); 118 #endif 119 120 #define TCG_HIGHWATER 1024 121 122 static TCGContext **tcg_ctxs; 123 static unsigned int n_tcg_ctxs; 124 TCGv_env cpu_env = 0; 125 126 /* 127 * We divide code_gen_buffer into equally-sized "regions" that TCG threads 128 * dynamically allocate from as demand dictates. Given appropriate region 129 * sizing, this minimizes flushes even when some TCG threads generate a lot 130 * more code than others. 131 */ 132 struct tcg_region_state { 133 QemuMutex lock; 134 135 /* fields set at init time */ 136 void *start; 137 void *start_aligned; 138 void *end; 139 size_t n; 140 size_t size; /* size of one region */ 141 size_t stride; /* .size + guard size */ 142 143 /* fields protected by the lock */ 144 size_t current; /* current region index */ 145 size_t agg_size_full; /* aggregate size of full regions */ 146 }; 147 148 static struct tcg_region_state region; 149 150 static TCGRegSet tcg_target_available_regs[2]; 151 static TCGRegSet tcg_target_call_clobber_regs; 152 153 #if TCG_TARGET_INSN_UNIT_SIZE == 1 154 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 155 { 156 *s->code_ptr++ = v; 157 } 158 159 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 160 uint8_t v) 161 { 162 *p = v; 163 } 164 #endif 165 166 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 167 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 168 { 169 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 170 *s->code_ptr++ = v; 171 } else { 172 tcg_insn_unit *p = s->code_ptr; 173 memcpy(p, &v, sizeof(v)); 174 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 175 } 176 } 177 178 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 179 uint16_t v) 180 { 181 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 182 *p = v; 183 } else { 184 memcpy(p, &v, sizeof(v)); 185 } 186 } 187 #endif 188 189 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 190 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 191 { 192 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 193 *s->code_ptr++ = v; 194 } else { 195 tcg_insn_unit *p = s->code_ptr; 196 memcpy(p, &v, sizeof(v)); 197 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 198 } 199 } 200 201 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 202 uint32_t v) 203 { 204 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 205 *p = v; 206 } else { 207 memcpy(p, &v, sizeof(v)); 208 } 209 } 210 #endif 211 212 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 213 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 214 { 215 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 216 *s->code_ptr++ = v; 217 } else { 218 tcg_insn_unit *p = s->code_ptr; 219 memcpy(p, &v, sizeof(v)); 220 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 221 } 222 } 223 224 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 225 uint64_t v) 226 { 227 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 228 *p = v; 229 } else { 230 memcpy(p, &v, sizeof(v)); 231 } 232 } 233 #endif 234 235 /* label relocation processing */ 236 237 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 238 TCGLabel *l, intptr_t addend) 239 { 240 TCGRelocation *r; 241 242 if (l->has_value) { 243 /* FIXME: This may break relocations on RISC targets that 244 modify instruction fields in place. The caller may not have 245 written the initial value. */ 246 patch_reloc(code_ptr, type, l->u.value, addend); 247 } else { 248 /* add a new relocation entry */ 249 r = tcg_malloc(sizeof(TCGRelocation)); 250 r->type = type; 251 r->ptr = code_ptr; 252 r->addend = addend; 253 r->next = l->u.first_reloc; 254 l->u.first_reloc = r; 255 } 256 } 257 258 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) 259 { 260 intptr_t value = (intptr_t)ptr; 261 TCGRelocation *r; 262 263 tcg_debug_assert(!l->has_value); 264 265 for (r = l->u.first_reloc; r != NULL; r = r->next) { 266 patch_reloc(r->ptr, r->type, value, r->addend); 267 } 268 269 l->has_value = 1; 270 l->u.value_ptr = ptr; 271 } 272 273 TCGLabel *gen_new_label(void) 274 { 275 TCGContext *s = tcg_ctx; 276 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 277 278 *l = (TCGLabel){ 279 .id = s->nb_labels++ 280 }; 281 282 return l; 283 } 284 285 #include "tcg-target.inc.c" 286 287 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 288 { 289 void *start, *end; 290 291 start = region.start_aligned + curr_region * region.stride; 292 end = start + region.size; 293 294 if (curr_region == 0) { 295 start = region.start; 296 } 297 if (curr_region == region.n - 1) { 298 end = region.end; 299 } 300 301 *pstart = start; 302 *pend = end; 303 } 304 305 static void tcg_region_assign(TCGContext *s, size_t curr_region) 306 { 307 void *start, *end; 308 309 tcg_region_bounds(curr_region, &start, &end); 310 311 s->code_gen_buffer = start; 312 s->code_gen_ptr = start; 313 s->code_gen_buffer_size = end - start; 314 s->code_gen_highwater = end - TCG_HIGHWATER; 315 } 316 317 static bool tcg_region_alloc__locked(TCGContext *s) 318 { 319 if (region.current == region.n) { 320 return true; 321 } 322 tcg_region_assign(s, region.current); 323 region.current++; 324 return false; 325 } 326 327 /* 328 * Request a new region once the one in use has filled up. 329 * Returns true on error. 330 */ 331 static bool tcg_region_alloc(TCGContext *s) 332 { 333 bool err; 334 /* read the region size now; alloc__locked will overwrite it on success */ 335 size_t size_full = s->code_gen_buffer_size; 336 337 qemu_mutex_lock(®ion.lock); 338 err = tcg_region_alloc__locked(s); 339 if (!err) { 340 region.agg_size_full += size_full - TCG_HIGHWATER; 341 } 342 qemu_mutex_unlock(®ion.lock); 343 return err; 344 } 345 346 /* 347 * Perform a context's first region allocation. 348 * This function does _not_ increment region.agg_size_full. 349 */ 350 static inline bool tcg_region_initial_alloc__locked(TCGContext *s) 351 { 352 return tcg_region_alloc__locked(s); 353 } 354 355 /* Call from a safe-work context */ 356 void tcg_region_reset_all(void) 357 { 358 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 359 unsigned int i; 360 361 qemu_mutex_lock(®ion.lock); 362 region.current = 0; 363 region.agg_size_full = 0; 364 365 for (i = 0; i < n_ctxs; i++) { 366 TCGContext *s = atomic_read(&tcg_ctxs[i]); 367 bool err = tcg_region_initial_alloc__locked(s); 368 369 g_assert(!err); 370 } 371 qemu_mutex_unlock(®ion.lock); 372 } 373 374 #ifdef CONFIG_USER_ONLY 375 static size_t tcg_n_regions(void) 376 { 377 return 1; 378 } 379 #else 380 /* 381 * It is likely that some vCPUs will translate more code than others, so we 382 * first try to set more regions than max_cpus, with those regions being of 383 * reasonable size. If that's not possible we make do by evenly dividing 384 * the code_gen_buffer among the vCPUs. 385 */ 386 static size_t tcg_n_regions(void) 387 { 388 size_t i; 389 390 /* Use a single region if all we have is one vCPU thread */ 391 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 392 return 1; 393 } 394 395 /* Try to have more regions than max_cpus, with each region being >= 2 MB */ 396 for (i = 8; i > 0; i--) { 397 size_t regions_per_thread = i; 398 size_t region_size; 399 400 region_size = tcg_init_ctx.code_gen_buffer_size; 401 region_size /= max_cpus * regions_per_thread; 402 403 if (region_size >= 2 * 1024u * 1024) { 404 return max_cpus * regions_per_thread; 405 } 406 } 407 /* If we can't, then just allocate one region per vCPU thread */ 408 return max_cpus; 409 } 410 #endif 411 412 /* 413 * Initializes region partitioning. 414 * 415 * Called at init time from the parent thread (i.e. the one calling 416 * tcg_context_init), after the target's TCG globals have been set. 417 * 418 * Region partitioning works by splitting code_gen_buffer into separate regions, 419 * and then assigning regions to TCG threads so that the threads can translate 420 * code in parallel without synchronization. 421 * 422 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 423 * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 424 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 425 * must have been parsed before calling this function, since it calls 426 * qemu_tcg_mttcg_enabled(). 427 * 428 * In user-mode we use a single region. Having multiple regions in user-mode 429 * is not supported, because the number of vCPU threads (recall that each thread 430 * spawned by the guest corresponds to a vCPU thread) is only bounded by the 431 * OS, and usually this number is huge (tens of thousands is not uncommon). 432 * Thus, given this large bound on the number of vCPU threads and the fact 433 * that code_gen_buffer is allocated at compile-time, we cannot guarantee 434 * that the availability of at least one region per vCPU thread. 435 * 436 * However, this user-mode limitation is unlikely to be a significant problem 437 * in practice. Multi-threaded guests share most if not all of their translated 438 * code, which makes parallel code generation less appealing than in softmmu. 439 */ 440 void tcg_region_init(void) 441 { 442 void *buf = tcg_init_ctx.code_gen_buffer; 443 void *aligned; 444 size_t size = tcg_init_ctx.code_gen_buffer_size; 445 size_t page_size = qemu_real_host_page_size; 446 size_t region_size; 447 size_t n_regions; 448 size_t i; 449 450 n_regions = tcg_n_regions(); 451 452 /* The first region will be 'aligned - buf' bytes larger than the others */ 453 aligned = QEMU_ALIGN_PTR_UP(buf, page_size); 454 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); 455 /* 456 * Make region_size a multiple of page_size, using aligned as the start. 457 * As a result of this we might end up with a few extra pages at the end of 458 * the buffer; we will assign those to the last region. 459 */ 460 region_size = (size - (aligned - buf)) / n_regions; 461 region_size = QEMU_ALIGN_DOWN(region_size, page_size); 462 463 /* A region must have at least 2 pages; one code, one guard */ 464 g_assert(region_size >= 2 * page_size); 465 466 /* init the region struct */ 467 qemu_mutex_init(®ion.lock); 468 region.n = n_regions; 469 region.size = region_size - page_size; 470 region.stride = region_size; 471 region.start = buf; 472 region.start_aligned = aligned; 473 /* page-align the end, since its last page will be a guard page */ 474 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); 475 /* account for that last guard page */ 476 region.end -= page_size; 477 478 /* set guard pages */ 479 for (i = 0; i < region.n; i++) { 480 void *start, *end; 481 int rc; 482 483 tcg_region_bounds(i, &start, &end); 484 rc = qemu_mprotect_none(end, page_size); 485 g_assert(!rc); 486 } 487 488 /* In user-mode we support only one ctx, so do the initial allocation now */ 489 #ifdef CONFIG_USER_ONLY 490 { 491 bool err = tcg_region_initial_alloc__locked(tcg_ctx); 492 493 g_assert(!err); 494 } 495 #endif 496 } 497 498 /* 499 * All TCG threads except the parent (i.e. the one that called tcg_context_init 500 * and registered the target's TCG globals) must register with this function 501 * before initiating translation. 502 * 503 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 504 * of tcg_region_init() for the reasoning behind this. 505 * 506 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 507 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 508 * is not used anymore for translation once this function is called. 509 * 510 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 511 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 512 */ 513 #ifdef CONFIG_USER_ONLY 514 void tcg_register_thread(void) 515 { 516 tcg_ctx = &tcg_init_ctx; 517 } 518 #else 519 void tcg_register_thread(void) 520 { 521 TCGContext *s = g_malloc(sizeof(*s)); 522 unsigned int i, n; 523 bool err; 524 525 *s = tcg_init_ctx; 526 527 /* Relink mem_base. */ 528 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 529 if (tcg_init_ctx.temps[i].mem_base) { 530 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 531 tcg_debug_assert(b >= 0 && b < n); 532 s->temps[i].mem_base = &s->temps[b]; 533 } 534 } 535 536 /* Claim an entry in tcg_ctxs */ 537 n = atomic_fetch_inc(&n_tcg_ctxs); 538 g_assert(n < max_cpus); 539 atomic_set(&tcg_ctxs[n], s); 540 541 tcg_ctx = s; 542 qemu_mutex_lock(®ion.lock); 543 err = tcg_region_initial_alloc__locked(tcg_ctx); 544 g_assert(!err); 545 qemu_mutex_unlock(®ion.lock); 546 } 547 #endif /* !CONFIG_USER_ONLY */ 548 549 /* 550 * Returns the size (in bytes) of all translated code (i.e. from all regions) 551 * currently in the cache. 552 * See also: tcg_code_capacity() 553 * Do not confuse with tcg_current_code_size(); that one applies to a single 554 * TCG context. 555 */ 556 size_t tcg_code_size(void) 557 { 558 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 559 unsigned int i; 560 size_t total; 561 562 qemu_mutex_lock(®ion.lock); 563 total = region.agg_size_full; 564 for (i = 0; i < n_ctxs; i++) { 565 const TCGContext *s = atomic_read(&tcg_ctxs[i]); 566 size_t size; 567 568 size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 569 g_assert(size <= s->code_gen_buffer_size); 570 total += size; 571 } 572 qemu_mutex_unlock(®ion.lock); 573 return total; 574 } 575 576 /* 577 * Returns the code capacity (in bytes) of the entire cache, i.e. including all 578 * regions. 579 * See also: tcg_code_size() 580 */ 581 size_t tcg_code_capacity(void) 582 { 583 size_t guard_size, capacity; 584 585 /* no need for synchronization; these variables are set at init time */ 586 guard_size = region.stride - region.size; 587 capacity = region.end + guard_size - region.start; 588 capacity -= region.n * (guard_size + TCG_HIGHWATER); 589 return capacity; 590 } 591 592 /* pool based memory allocation */ 593 void *tcg_malloc_internal(TCGContext *s, int size) 594 { 595 TCGPool *p; 596 int pool_size; 597 598 if (size > TCG_POOL_CHUNK_SIZE) { 599 /* big malloc: insert a new pool (XXX: could optimize) */ 600 p = g_malloc(sizeof(TCGPool) + size); 601 p->size = size; 602 p->next = s->pool_first_large; 603 s->pool_first_large = p; 604 return p->data; 605 } else { 606 p = s->pool_current; 607 if (!p) { 608 p = s->pool_first; 609 if (!p) 610 goto new_pool; 611 } else { 612 if (!p->next) { 613 new_pool: 614 pool_size = TCG_POOL_CHUNK_SIZE; 615 p = g_malloc(sizeof(TCGPool) + pool_size); 616 p->size = pool_size; 617 p->next = NULL; 618 if (s->pool_current) 619 s->pool_current->next = p; 620 else 621 s->pool_first = p; 622 } else { 623 p = p->next; 624 } 625 } 626 } 627 s->pool_current = p; 628 s->pool_cur = p->data + size; 629 s->pool_end = p->data + p->size; 630 return p->data; 631 } 632 633 void tcg_pool_reset(TCGContext *s) 634 { 635 TCGPool *p, *t; 636 for (p = s->pool_first_large; p; p = t) { 637 t = p->next; 638 g_free(p); 639 } 640 s->pool_first_large = NULL; 641 s->pool_cur = s->pool_end = NULL; 642 s->pool_current = NULL; 643 } 644 645 typedef struct TCGHelperInfo { 646 void *func; 647 const char *name; 648 unsigned flags; 649 unsigned sizemask; 650 } TCGHelperInfo; 651 652 #include "exec/helper-proto.h" 653 654 static const TCGHelperInfo all_helpers[] = { 655 #include "exec/helper-tcg.h" 656 }; 657 static GHashTable *helper_table; 658 659 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 660 static void process_op_defs(TCGContext *s); 661 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 662 TCGReg reg, const char *name); 663 664 void tcg_context_init(TCGContext *s) 665 { 666 int op, total_args, n, i; 667 TCGOpDef *def; 668 TCGArgConstraint *args_ct; 669 int *sorted_args; 670 TCGTemp *ts; 671 672 memset(s, 0, sizeof(*s)); 673 s->nb_globals = 0; 674 675 /* Count total number of arguments and allocate the corresponding 676 space */ 677 total_args = 0; 678 for(op = 0; op < NB_OPS; op++) { 679 def = &tcg_op_defs[op]; 680 n = def->nb_iargs + def->nb_oargs; 681 total_args += n; 682 } 683 684 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); 685 sorted_args = g_malloc(sizeof(int) * total_args); 686 687 for(op = 0; op < NB_OPS; op++) { 688 def = &tcg_op_defs[op]; 689 def->args_ct = args_ct; 690 def->sorted_args = sorted_args; 691 n = def->nb_iargs + def->nb_oargs; 692 sorted_args += n; 693 args_ct += n; 694 } 695 696 /* Register helpers. */ 697 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 698 helper_table = g_hash_table_new(NULL, NULL); 699 700 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 701 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 702 (gpointer)&all_helpers[i]); 703 } 704 705 tcg_target_init(s); 706 process_op_defs(s); 707 708 /* Reverse the order of the saved registers, assuming they're all at 709 the start of tcg_target_reg_alloc_order. */ 710 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 711 int r = tcg_target_reg_alloc_order[n]; 712 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 713 break; 714 } 715 } 716 for (i = 0; i < n; ++i) { 717 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 718 } 719 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 720 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 721 } 722 723 tcg_ctx = s; 724 /* 725 * In user-mode we simply share the init context among threads, since we 726 * use a single region. See the documentation tcg_region_init() for the 727 * reasoning behind this. 728 * In softmmu we will have at most max_cpus TCG threads. 729 */ 730 #ifdef CONFIG_USER_ONLY 731 tcg_ctxs = &tcg_ctx; 732 n_tcg_ctxs = 1; 733 #else 734 tcg_ctxs = g_new(TCGContext *, max_cpus); 735 #endif 736 737 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 738 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 739 cpu_env = temp_tcgv_ptr(ts); 740 } 741 742 /* 743 * Allocate TBs right before their corresponding translated code, making 744 * sure that TBs and code are on different cache lines. 745 */ 746 TranslationBlock *tcg_tb_alloc(TCGContext *s) 747 { 748 uintptr_t align = qemu_icache_linesize; 749 TranslationBlock *tb; 750 void *next; 751 752 retry: 753 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 754 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 755 756 if (unlikely(next > s->code_gen_highwater)) { 757 if (tcg_region_alloc(s)) { 758 return NULL; 759 } 760 goto retry; 761 } 762 atomic_set(&s->code_gen_ptr, next); 763 s->data_gen_ptr = NULL; 764 return tb; 765 } 766 767 void tcg_prologue_init(TCGContext *s) 768 { 769 size_t prologue_size, total_size; 770 void *buf0, *buf1; 771 772 /* Put the prologue at the beginning of code_gen_buffer. */ 773 buf0 = s->code_gen_buffer; 774 total_size = s->code_gen_buffer_size; 775 s->code_ptr = buf0; 776 s->code_buf = buf0; 777 s->data_gen_ptr = NULL; 778 s->code_gen_prologue = buf0; 779 780 /* Compute a high-water mark, at which we voluntarily flush the buffer 781 and start over. The size here is arbitrary, significantly larger 782 than we expect the code generation for any one opcode to require. */ 783 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); 784 785 #ifdef TCG_TARGET_NEED_POOL_LABELS 786 s->pool_labels = NULL; 787 #endif 788 789 /* Generate the prologue. */ 790 tcg_target_qemu_prologue(s); 791 792 #ifdef TCG_TARGET_NEED_POOL_LABELS 793 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 794 { 795 bool ok = tcg_out_pool_finalize(s); 796 tcg_debug_assert(ok); 797 } 798 #endif 799 800 buf1 = s->code_ptr; 801 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); 802 803 /* Deduct the prologue from the buffer. */ 804 prologue_size = tcg_current_code_size(s); 805 s->code_gen_ptr = buf1; 806 s->code_gen_buffer = buf1; 807 s->code_buf = buf1; 808 total_size -= prologue_size; 809 s->code_gen_buffer_size = total_size; 810 811 tcg_register_jit(s->code_gen_buffer, total_size); 812 813 #ifdef DEBUG_DISAS 814 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 815 qemu_log_lock(); 816 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 817 if (s->data_gen_ptr) { 818 size_t code_size = s->data_gen_ptr - buf0; 819 size_t data_size = prologue_size - code_size; 820 size_t i; 821 822 log_disas(buf0, code_size); 823 824 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 825 if (sizeof(tcg_target_ulong) == 8) { 826 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 827 (uintptr_t)s->data_gen_ptr + i, 828 *(uint64_t *)(s->data_gen_ptr + i)); 829 } else { 830 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n", 831 (uintptr_t)s->data_gen_ptr + i, 832 *(uint32_t *)(s->data_gen_ptr + i)); 833 } 834 } 835 } else { 836 log_disas(buf0, prologue_size); 837 } 838 qemu_log("\n"); 839 qemu_log_flush(); 840 qemu_log_unlock(); 841 } 842 #endif 843 844 /* Assert that goto_ptr is implemented completely. */ 845 if (TCG_TARGET_HAS_goto_ptr) { 846 tcg_debug_assert(s->code_gen_epilogue != NULL); 847 } 848 } 849 850 void tcg_func_start(TCGContext *s) 851 { 852 tcg_pool_reset(s); 853 s->nb_temps = s->nb_globals; 854 855 /* No temps have been previously allocated for size or locality. */ 856 memset(s->free_temps, 0, sizeof(s->free_temps)); 857 858 s->nb_labels = 0; 859 s->current_frame_offset = s->frame_start; 860 861 #ifdef CONFIG_DEBUG_TCG 862 s->goto_tb_issue_mask = 0; 863 #endif 864 865 QTAILQ_INIT(&s->ops); 866 QTAILQ_INIT(&s->free_ops); 867 } 868 869 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) 870 { 871 int n = s->nb_temps++; 872 tcg_debug_assert(n < TCG_MAX_TEMPS); 873 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 874 } 875 876 static inline TCGTemp *tcg_global_alloc(TCGContext *s) 877 { 878 TCGTemp *ts; 879 880 tcg_debug_assert(s->nb_globals == s->nb_temps); 881 s->nb_globals++; 882 ts = tcg_temp_alloc(s); 883 ts->temp_global = 1; 884 885 return ts; 886 } 887 888 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 889 TCGReg reg, const char *name) 890 { 891 TCGTemp *ts; 892 893 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 894 tcg_abort(); 895 } 896 897 ts = tcg_global_alloc(s); 898 ts->base_type = type; 899 ts->type = type; 900 ts->fixed_reg = 1; 901 ts->reg = reg; 902 ts->name = name; 903 tcg_regset_set_reg(s->reserved_regs, reg); 904 905 return ts; 906 } 907 908 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 909 { 910 s->frame_start = start; 911 s->frame_end = start + size; 912 s->frame_temp 913 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 914 } 915 916 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 917 intptr_t offset, const char *name) 918 { 919 TCGContext *s = tcg_ctx; 920 TCGTemp *base_ts = tcgv_ptr_temp(base); 921 TCGTemp *ts = tcg_global_alloc(s); 922 int indirect_reg = 0, bigendian = 0; 923 #ifdef HOST_WORDS_BIGENDIAN 924 bigendian = 1; 925 #endif 926 927 if (!base_ts->fixed_reg) { 928 /* We do not support double-indirect registers. */ 929 tcg_debug_assert(!base_ts->indirect_reg); 930 base_ts->indirect_base = 1; 931 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 932 ? 2 : 1); 933 indirect_reg = 1; 934 } 935 936 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 937 TCGTemp *ts2 = tcg_global_alloc(s); 938 char buf[64]; 939 940 ts->base_type = TCG_TYPE_I64; 941 ts->type = TCG_TYPE_I32; 942 ts->indirect_reg = indirect_reg; 943 ts->mem_allocated = 1; 944 ts->mem_base = base_ts; 945 ts->mem_offset = offset + bigendian * 4; 946 pstrcpy(buf, sizeof(buf), name); 947 pstrcat(buf, sizeof(buf), "_0"); 948 ts->name = strdup(buf); 949 950 tcg_debug_assert(ts2 == ts + 1); 951 ts2->base_type = TCG_TYPE_I64; 952 ts2->type = TCG_TYPE_I32; 953 ts2->indirect_reg = indirect_reg; 954 ts2->mem_allocated = 1; 955 ts2->mem_base = base_ts; 956 ts2->mem_offset = offset + (1 - bigendian) * 4; 957 pstrcpy(buf, sizeof(buf), name); 958 pstrcat(buf, sizeof(buf), "_1"); 959 ts2->name = strdup(buf); 960 } else { 961 ts->base_type = type; 962 ts->type = type; 963 ts->indirect_reg = indirect_reg; 964 ts->mem_allocated = 1; 965 ts->mem_base = base_ts; 966 ts->mem_offset = offset; 967 ts->name = name; 968 } 969 return ts; 970 } 971 972 static TCGTemp *tcg_temp_new_internal(TCGType type, int temp_local) 973 { 974 TCGContext *s = tcg_ctx; 975 TCGTemp *ts; 976 int idx, k; 977 978 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 979 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 980 if (idx < TCG_MAX_TEMPS) { 981 /* There is already an available temp with the right type. */ 982 clear_bit(idx, s->free_temps[k].l); 983 984 ts = &s->temps[idx]; 985 ts->temp_allocated = 1; 986 tcg_debug_assert(ts->base_type == type); 987 tcg_debug_assert(ts->temp_local == temp_local); 988 } else { 989 ts = tcg_temp_alloc(s); 990 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 991 TCGTemp *ts2 = tcg_temp_alloc(s); 992 993 ts->base_type = type; 994 ts->type = TCG_TYPE_I32; 995 ts->temp_allocated = 1; 996 ts->temp_local = temp_local; 997 998 tcg_debug_assert(ts2 == ts + 1); 999 ts2->base_type = TCG_TYPE_I64; 1000 ts2->type = TCG_TYPE_I32; 1001 ts2->temp_allocated = 1; 1002 ts2->temp_local = temp_local; 1003 } else { 1004 ts->base_type = type; 1005 ts->type = type; 1006 ts->temp_allocated = 1; 1007 ts->temp_local = temp_local; 1008 } 1009 } 1010 1011 #if defined(CONFIG_DEBUG_TCG) 1012 s->temps_in_use++; 1013 #endif 1014 return ts; 1015 } 1016 1017 TCGv_i32 tcg_temp_new_internal_i32(int temp_local) 1018 { 1019 TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, temp_local); 1020 return temp_tcgv_i32(t); 1021 } 1022 1023 TCGv_i64 tcg_temp_new_internal_i64(int temp_local) 1024 { 1025 TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, temp_local); 1026 return temp_tcgv_i64(t); 1027 } 1028 1029 static void tcg_temp_free_internal(TCGTemp *ts) 1030 { 1031 TCGContext *s = tcg_ctx; 1032 int k, idx; 1033 1034 #if defined(CONFIG_DEBUG_TCG) 1035 s->temps_in_use--; 1036 if (s->temps_in_use < 0) { 1037 fprintf(stderr, "More temporaries freed than allocated!\n"); 1038 } 1039 #endif 1040 1041 tcg_debug_assert(ts->temp_global == 0); 1042 tcg_debug_assert(ts->temp_allocated != 0); 1043 ts->temp_allocated = 0; 1044 1045 idx = temp_idx(ts); 1046 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); 1047 set_bit(idx, s->free_temps[k].l); 1048 } 1049 1050 void tcg_temp_free_i32(TCGv_i32 arg) 1051 { 1052 tcg_temp_free_internal(tcgv_i32_temp(arg)); 1053 } 1054 1055 void tcg_temp_free_i64(TCGv_i64 arg) 1056 { 1057 tcg_temp_free_internal(tcgv_i64_temp(arg)); 1058 } 1059 1060 TCGv_i32 tcg_const_i32(int32_t val) 1061 { 1062 TCGv_i32 t0; 1063 t0 = tcg_temp_new_i32(); 1064 tcg_gen_movi_i32(t0, val); 1065 return t0; 1066 } 1067 1068 TCGv_i64 tcg_const_i64(int64_t val) 1069 { 1070 TCGv_i64 t0; 1071 t0 = tcg_temp_new_i64(); 1072 tcg_gen_movi_i64(t0, val); 1073 return t0; 1074 } 1075 1076 TCGv_i32 tcg_const_local_i32(int32_t val) 1077 { 1078 TCGv_i32 t0; 1079 t0 = tcg_temp_local_new_i32(); 1080 tcg_gen_movi_i32(t0, val); 1081 return t0; 1082 } 1083 1084 TCGv_i64 tcg_const_local_i64(int64_t val) 1085 { 1086 TCGv_i64 t0; 1087 t0 = tcg_temp_local_new_i64(); 1088 tcg_gen_movi_i64(t0, val); 1089 return t0; 1090 } 1091 1092 #if defined(CONFIG_DEBUG_TCG) 1093 void tcg_clear_temp_count(void) 1094 { 1095 TCGContext *s = tcg_ctx; 1096 s->temps_in_use = 0; 1097 } 1098 1099 int tcg_check_temp_count(void) 1100 { 1101 TCGContext *s = tcg_ctx; 1102 if (s->temps_in_use) { 1103 /* Clear the count so that we don't give another 1104 * warning immediately next time around. 1105 */ 1106 s->temps_in_use = 0; 1107 return 1; 1108 } 1109 return 0; 1110 } 1111 #endif 1112 1113 /* Return true if OP may appear in the opcode stream. 1114 Test the runtime variable that controls each opcode. */ 1115 bool tcg_op_supported(TCGOpcode op) 1116 { 1117 switch (op) { 1118 case INDEX_op_discard: 1119 case INDEX_op_set_label: 1120 case INDEX_op_call: 1121 case INDEX_op_br: 1122 case INDEX_op_mb: 1123 case INDEX_op_insn_start: 1124 case INDEX_op_exit_tb: 1125 case INDEX_op_goto_tb: 1126 case INDEX_op_qemu_ld_i32: 1127 case INDEX_op_qemu_st_i32: 1128 case INDEX_op_qemu_ld_i64: 1129 case INDEX_op_qemu_st_i64: 1130 return true; 1131 1132 case INDEX_op_goto_ptr: 1133 return TCG_TARGET_HAS_goto_ptr; 1134 1135 case INDEX_op_mov_i32: 1136 case INDEX_op_movi_i32: 1137 case INDEX_op_setcond_i32: 1138 case INDEX_op_brcond_i32: 1139 case INDEX_op_ld8u_i32: 1140 case INDEX_op_ld8s_i32: 1141 case INDEX_op_ld16u_i32: 1142 case INDEX_op_ld16s_i32: 1143 case INDEX_op_ld_i32: 1144 case INDEX_op_st8_i32: 1145 case INDEX_op_st16_i32: 1146 case INDEX_op_st_i32: 1147 case INDEX_op_add_i32: 1148 case INDEX_op_sub_i32: 1149 case INDEX_op_mul_i32: 1150 case INDEX_op_and_i32: 1151 case INDEX_op_or_i32: 1152 case INDEX_op_xor_i32: 1153 case INDEX_op_shl_i32: 1154 case INDEX_op_shr_i32: 1155 case INDEX_op_sar_i32: 1156 return true; 1157 1158 case INDEX_op_movcond_i32: 1159 return TCG_TARGET_HAS_movcond_i32; 1160 case INDEX_op_div_i32: 1161 case INDEX_op_divu_i32: 1162 return TCG_TARGET_HAS_div_i32; 1163 case INDEX_op_rem_i32: 1164 case INDEX_op_remu_i32: 1165 return TCG_TARGET_HAS_rem_i32; 1166 case INDEX_op_div2_i32: 1167 case INDEX_op_divu2_i32: 1168 return TCG_TARGET_HAS_div2_i32; 1169 case INDEX_op_rotl_i32: 1170 case INDEX_op_rotr_i32: 1171 return TCG_TARGET_HAS_rot_i32; 1172 case INDEX_op_deposit_i32: 1173 return TCG_TARGET_HAS_deposit_i32; 1174 case INDEX_op_extract_i32: 1175 return TCG_TARGET_HAS_extract_i32; 1176 case INDEX_op_sextract_i32: 1177 return TCG_TARGET_HAS_sextract_i32; 1178 case INDEX_op_add2_i32: 1179 return TCG_TARGET_HAS_add2_i32; 1180 case INDEX_op_sub2_i32: 1181 return TCG_TARGET_HAS_sub2_i32; 1182 case INDEX_op_mulu2_i32: 1183 return TCG_TARGET_HAS_mulu2_i32; 1184 case INDEX_op_muls2_i32: 1185 return TCG_TARGET_HAS_muls2_i32; 1186 case INDEX_op_muluh_i32: 1187 return TCG_TARGET_HAS_muluh_i32; 1188 case INDEX_op_mulsh_i32: 1189 return TCG_TARGET_HAS_mulsh_i32; 1190 case INDEX_op_ext8s_i32: 1191 return TCG_TARGET_HAS_ext8s_i32; 1192 case INDEX_op_ext16s_i32: 1193 return TCG_TARGET_HAS_ext16s_i32; 1194 case INDEX_op_ext8u_i32: 1195 return TCG_TARGET_HAS_ext8u_i32; 1196 case INDEX_op_ext16u_i32: 1197 return TCG_TARGET_HAS_ext16u_i32; 1198 case INDEX_op_bswap16_i32: 1199 return TCG_TARGET_HAS_bswap16_i32; 1200 case INDEX_op_bswap32_i32: 1201 return TCG_TARGET_HAS_bswap32_i32; 1202 case INDEX_op_not_i32: 1203 return TCG_TARGET_HAS_not_i32; 1204 case INDEX_op_neg_i32: 1205 return TCG_TARGET_HAS_neg_i32; 1206 case INDEX_op_andc_i32: 1207 return TCG_TARGET_HAS_andc_i32; 1208 case INDEX_op_orc_i32: 1209 return TCG_TARGET_HAS_orc_i32; 1210 case INDEX_op_eqv_i32: 1211 return TCG_TARGET_HAS_eqv_i32; 1212 case INDEX_op_nand_i32: 1213 return TCG_TARGET_HAS_nand_i32; 1214 case INDEX_op_nor_i32: 1215 return TCG_TARGET_HAS_nor_i32; 1216 case INDEX_op_clz_i32: 1217 return TCG_TARGET_HAS_clz_i32; 1218 case INDEX_op_ctz_i32: 1219 return TCG_TARGET_HAS_ctz_i32; 1220 case INDEX_op_ctpop_i32: 1221 return TCG_TARGET_HAS_ctpop_i32; 1222 1223 case INDEX_op_brcond2_i32: 1224 case INDEX_op_setcond2_i32: 1225 return TCG_TARGET_REG_BITS == 32; 1226 1227 case INDEX_op_mov_i64: 1228 case INDEX_op_movi_i64: 1229 case INDEX_op_setcond_i64: 1230 case INDEX_op_brcond_i64: 1231 case INDEX_op_ld8u_i64: 1232 case INDEX_op_ld8s_i64: 1233 case INDEX_op_ld16u_i64: 1234 case INDEX_op_ld16s_i64: 1235 case INDEX_op_ld32u_i64: 1236 case INDEX_op_ld32s_i64: 1237 case INDEX_op_ld_i64: 1238 case INDEX_op_st8_i64: 1239 case INDEX_op_st16_i64: 1240 case INDEX_op_st32_i64: 1241 case INDEX_op_st_i64: 1242 case INDEX_op_add_i64: 1243 case INDEX_op_sub_i64: 1244 case INDEX_op_mul_i64: 1245 case INDEX_op_and_i64: 1246 case INDEX_op_or_i64: 1247 case INDEX_op_xor_i64: 1248 case INDEX_op_shl_i64: 1249 case INDEX_op_shr_i64: 1250 case INDEX_op_sar_i64: 1251 case INDEX_op_ext_i32_i64: 1252 case INDEX_op_extu_i32_i64: 1253 return TCG_TARGET_REG_BITS == 64; 1254 1255 case INDEX_op_movcond_i64: 1256 return TCG_TARGET_HAS_movcond_i64; 1257 case INDEX_op_div_i64: 1258 case INDEX_op_divu_i64: 1259 return TCG_TARGET_HAS_div_i64; 1260 case INDEX_op_rem_i64: 1261 case INDEX_op_remu_i64: 1262 return TCG_TARGET_HAS_rem_i64; 1263 case INDEX_op_div2_i64: 1264 case INDEX_op_divu2_i64: 1265 return TCG_TARGET_HAS_div2_i64; 1266 case INDEX_op_rotl_i64: 1267 case INDEX_op_rotr_i64: 1268 return TCG_TARGET_HAS_rot_i64; 1269 case INDEX_op_deposit_i64: 1270 return TCG_TARGET_HAS_deposit_i64; 1271 case INDEX_op_extract_i64: 1272 return TCG_TARGET_HAS_extract_i64; 1273 case INDEX_op_sextract_i64: 1274 return TCG_TARGET_HAS_sextract_i64; 1275 case INDEX_op_extrl_i64_i32: 1276 return TCG_TARGET_HAS_extrl_i64_i32; 1277 case INDEX_op_extrh_i64_i32: 1278 return TCG_TARGET_HAS_extrh_i64_i32; 1279 case INDEX_op_ext8s_i64: 1280 return TCG_TARGET_HAS_ext8s_i64; 1281 case INDEX_op_ext16s_i64: 1282 return TCG_TARGET_HAS_ext16s_i64; 1283 case INDEX_op_ext32s_i64: 1284 return TCG_TARGET_HAS_ext32s_i64; 1285 case INDEX_op_ext8u_i64: 1286 return TCG_TARGET_HAS_ext8u_i64; 1287 case INDEX_op_ext16u_i64: 1288 return TCG_TARGET_HAS_ext16u_i64; 1289 case INDEX_op_ext32u_i64: 1290 return TCG_TARGET_HAS_ext32u_i64; 1291 case INDEX_op_bswap16_i64: 1292 return TCG_TARGET_HAS_bswap16_i64; 1293 case INDEX_op_bswap32_i64: 1294 return TCG_TARGET_HAS_bswap32_i64; 1295 case INDEX_op_bswap64_i64: 1296 return TCG_TARGET_HAS_bswap64_i64; 1297 case INDEX_op_not_i64: 1298 return TCG_TARGET_HAS_not_i64; 1299 case INDEX_op_neg_i64: 1300 return TCG_TARGET_HAS_neg_i64; 1301 case INDEX_op_andc_i64: 1302 return TCG_TARGET_HAS_andc_i64; 1303 case INDEX_op_orc_i64: 1304 return TCG_TARGET_HAS_orc_i64; 1305 case INDEX_op_eqv_i64: 1306 return TCG_TARGET_HAS_eqv_i64; 1307 case INDEX_op_nand_i64: 1308 return TCG_TARGET_HAS_nand_i64; 1309 case INDEX_op_nor_i64: 1310 return TCG_TARGET_HAS_nor_i64; 1311 case INDEX_op_clz_i64: 1312 return TCG_TARGET_HAS_clz_i64; 1313 case INDEX_op_ctz_i64: 1314 return TCG_TARGET_HAS_ctz_i64; 1315 case INDEX_op_ctpop_i64: 1316 return TCG_TARGET_HAS_ctpop_i64; 1317 case INDEX_op_add2_i64: 1318 return TCG_TARGET_HAS_add2_i64; 1319 case INDEX_op_sub2_i64: 1320 return TCG_TARGET_HAS_sub2_i64; 1321 case INDEX_op_mulu2_i64: 1322 return TCG_TARGET_HAS_mulu2_i64; 1323 case INDEX_op_muls2_i64: 1324 return TCG_TARGET_HAS_muls2_i64; 1325 case INDEX_op_muluh_i64: 1326 return TCG_TARGET_HAS_muluh_i64; 1327 case INDEX_op_mulsh_i64: 1328 return TCG_TARGET_HAS_mulsh_i64; 1329 1330 case NB_OPS: 1331 break; 1332 } 1333 g_assert_not_reached(); 1334 } 1335 1336 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1337 and endian swap. Maybe it would be better to do the alignment 1338 and endian swap in tcg_reg_alloc_call(). */ 1339 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1340 { 1341 int i, real_args, nb_rets, pi; 1342 unsigned sizemask, flags; 1343 TCGHelperInfo *info; 1344 TCGOp *op; 1345 1346 info = g_hash_table_lookup(helper_table, (gpointer)func); 1347 flags = info->flags; 1348 sizemask = info->sizemask; 1349 1350 #if defined(__sparc__) && !defined(__arch64__) \ 1351 && !defined(CONFIG_TCG_INTERPRETER) 1352 /* We have 64-bit values in one register, but need to pass as two 1353 separate parameters. Split them. */ 1354 int orig_sizemask = sizemask; 1355 int orig_nargs = nargs; 1356 TCGv_i64 retl, reth; 1357 TCGTemp *split_args[MAX_OPC_PARAM]; 1358 1359 retl = NULL; 1360 reth = NULL; 1361 if (sizemask != 0) { 1362 for (i = real_args = 0; i < nargs; ++i) { 1363 int is_64bit = sizemask & (1 << (i+1)*2); 1364 if (is_64bit) { 1365 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1366 TCGv_i32 h = tcg_temp_new_i32(); 1367 TCGv_i32 l = tcg_temp_new_i32(); 1368 tcg_gen_extr_i64_i32(l, h, orig); 1369 split_args[real_args++] = tcgv_i32_temp(h); 1370 split_args[real_args++] = tcgv_i32_temp(l); 1371 } else { 1372 split_args[real_args++] = args[i]; 1373 } 1374 } 1375 nargs = real_args; 1376 args = split_args; 1377 sizemask = 0; 1378 } 1379 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1380 for (i = 0; i < nargs; ++i) { 1381 int is_64bit = sizemask & (1 << (i+1)*2); 1382 int is_signed = sizemask & (2 << (i+1)*2); 1383 if (!is_64bit) { 1384 TCGv_i64 temp = tcg_temp_new_i64(); 1385 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1386 if (is_signed) { 1387 tcg_gen_ext32s_i64(temp, orig); 1388 } else { 1389 tcg_gen_ext32u_i64(temp, orig); 1390 } 1391 args[i] = tcgv_i64_temp(temp); 1392 } 1393 } 1394 #endif /* TCG_TARGET_EXTEND_ARGS */ 1395 1396 op = tcg_emit_op(INDEX_op_call); 1397 1398 pi = 0; 1399 if (ret != NULL) { 1400 #if defined(__sparc__) && !defined(__arch64__) \ 1401 && !defined(CONFIG_TCG_INTERPRETER) 1402 if (orig_sizemask & 1) { 1403 /* The 32-bit ABI is going to return the 64-bit value in 1404 the %o0/%o1 register pair. Prepare for this by using 1405 two return temporaries, and reassemble below. */ 1406 retl = tcg_temp_new_i64(); 1407 reth = tcg_temp_new_i64(); 1408 op->args[pi++] = tcgv_i64_arg(reth); 1409 op->args[pi++] = tcgv_i64_arg(retl); 1410 nb_rets = 2; 1411 } else { 1412 op->args[pi++] = temp_arg(ret); 1413 nb_rets = 1; 1414 } 1415 #else 1416 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 1417 #ifdef HOST_WORDS_BIGENDIAN 1418 op->args[pi++] = temp_arg(ret + 1); 1419 op->args[pi++] = temp_arg(ret); 1420 #else 1421 op->args[pi++] = temp_arg(ret); 1422 op->args[pi++] = temp_arg(ret + 1); 1423 #endif 1424 nb_rets = 2; 1425 } else { 1426 op->args[pi++] = temp_arg(ret); 1427 nb_rets = 1; 1428 } 1429 #endif 1430 } else { 1431 nb_rets = 0; 1432 } 1433 TCGOP_CALLO(op) = nb_rets; 1434 1435 real_args = 0; 1436 for (i = 0; i < nargs; i++) { 1437 int is_64bit = sizemask & (1 << (i+1)*2); 1438 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 1439 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 1440 /* some targets want aligned 64 bit args */ 1441 if (real_args & 1) { 1442 op->args[pi++] = TCG_CALL_DUMMY_ARG; 1443 real_args++; 1444 } 1445 #endif 1446 /* If stack grows up, then we will be placing successive 1447 arguments at lower addresses, which means we need to 1448 reverse the order compared to how we would normally 1449 treat either big or little-endian. For those arguments 1450 that will wind up in registers, this still works for 1451 HPPA (the only current STACK_GROWSUP target) since the 1452 argument registers are *also* allocated in decreasing 1453 order. If another such target is added, this logic may 1454 have to get more complicated to differentiate between 1455 stack arguments and register arguments. */ 1456 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 1457 op->args[pi++] = temp_arg(args[i] + 1); 1458 op->args[pi++] = temp_arg(args[i]); 1459 #else 1460 op->args[pi++] = temp_arg(args[i]); 1461 op->args[pi++] = temp_arg(args[i] + 1); 1462 #endif 1463 real_args += 2; 1464 continue; 1465 } 1466 1467 op->args[pi++] = temp_arg(args[i]); 1468 real_args++; 1469 } 1470 op->args[pi++] = (uintptr_t)func; 1471 op->args[pi++] = flags; 1472 TCGOP_CALLI(op) = real_args; 1473 1474 /* Make sure the fields didn't overflow. */ 1475 tcg_debug_assert(TCGOP_CALLI(op) == real_args); 1476 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 1477 1478 #if defined(__sparc__) && !defined(__arch64__) \ 1479 && !defined(CONFIG_TCG_INTERPRETER) 1480 /* Free all of the parts we allocated above. */ 1481 for (i = real_args = 0; i < orig_nargs; ++i) { 1482 int is_64bit = orig_sizemask & (1 << (i+1)*2); 1483 if (is_64bit) { 1484 tcg_temp_free_internal(args[real_args++]); 1485 tcg_temp_free_internal(args[real_args++]); 1486 } else { 1487 real_args++; 1488 } 1489 } 1490 if (orig_sizemask & 1) { 1491 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 1492 Note that describing these as TCGv_i64 eliminates an unnecessary 1493 zero-extension that tcg_gen_concat_i32_i64 would create. */ 1494 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); 1495 tcg_temp_free_i64(retl); 1496 tcg_temp_free_i64(reth); 1497 } 1498 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1499 for (i = 0; i < nargs; ++i) { 1500 int is_64bit = sizemask & (1 << (i+1)*2); 1501 if (!is_64bit) { 1502 tcg_temp_free_internal(args[i]); 1503 } 1504 } 1505 #endif /* TCG_TARGET_EXTEND_ARGS */ 1506 } 1507 1508 static void tcg_reg_alloc_start(TCGContext *s) 1509 { 1510 int i, n; 1511 TCGTemp *ts; 1512 1513 for (i = 0, n = s->nb_globals; i < n; i++) { 1514 ts = &s->temps[i]; 1515 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM); 1516 } 1517 for (n = s->nb_temps; i < n; i++) { 1518 ts = &s->temps[i]; 1519 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 1520 ts->mem_allocated = 0; 1521 ts->fixed_reg = 0; 1522 } 1523 1524 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1525 } 1526 1527 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1528 TCGTemp *ts) 1529 { 1530 int idx = temp_idx(ts); 1531 1532 if (ts->temp_global) { 1533 pstrcpy(buf, buf_size, ts->name); 1534 } else if (ts->temp_local) { 1535 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1536 } else { 1537 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1538 } 1539 return buf; 1540 } 1541 1542 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1543 int buf_size, TCGArg arg) 1544 { 1545 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1546 } 1547 1548 /* Find helper name. */ 1549 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 1550 { 1551 const char *ret = NULL; 1552 if (helper_table) { 1553 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); 1554 if (info) { 1555 ret = info->name; 1556 } 1557 } 1558 return ret; 1559 } 1560 1561 static const char * const cond_name[] = 1562 { 1563 [TCG_COND_NEVER] = "never", 1564 [TCG_COND_ALWAYS] = "always", 1565 [TCG_COND_EQ] = "eq", 1566 [TCG_COND_NE] = "ne", 1567 [TCG_COND_LT] = "lt", 1568 [TCG_COND_GE] = "ge", 1569 [TCG_COND_LE] = "le", 1570 [TCG_COND_GT] = "gt", 1571 [TCG_COND_LTU] = "ltu", 1572 [TCG_COND_GEU] = "geu", 1573 [TCG_COND_LEU] = "leu", 1574 [TCG_COND_GTU] = "gtu" 1575 }; 1576 1577 static const char * const ldst_name[] = 1578 { 1579 [MO_UB] = "ub", 1580 [MO_SB] = "sb", 1581 [MO_LEUW] = "leuw", 1582 [MO_LESW] = "lesw", 1583 [MO_LEUL] = "leul", 1584 [MO_LESL] = "lesl", 1585 [MO_LEQ] = "leq", 1586 [MO_BEUW] = "beuw", 1587 [MO_BESW] = "besw", 1588 [MO_BEUL] = "beul", 1589 [MO_BESL] = "besl", 1590 [MO_BEQ] = "beq", 1591 }; 1592 1593 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1594 #ifdef ALIGNED_ONLY 1595 [MO_UNALN >> MO_ASHIFT] = "un+", 1596 [MO_ALIGN >> MO_ASHIFT] = "", 1597 #else 1598 [MO_UNALN >> MO_ASHIFT] = "", 1599 [MO_ALIGN >> MO_ASHIFT] = "al+", 1600 #endif 1601 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1602 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1603 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1604 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1605 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1606 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1607 }; 1608 1609 void tcg_dump_ops(TCGContext *s) 1610 { 1611 char buf[128]; 1612 TCGOp *op; 1613 1614 QTAILQ_FOREACH(op, &s->ops, link) { 1615 int i, k, nb_oargs, nb_iargs, nb_cargs; 1616 const TCGOpDef *def; 1617 TCGOpcode c; 1618 int col = 0; 1619 1620 c = op->opc; 1621 def = &tcg_op_defs[c]; 1622 1623 if (c == INDEX_op_insn_start) { 1624 col += qemu_log("\n ----"); 1625 1626 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1627 target_ulong a; 1628 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1629 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 1630 #else 1631 a = op->args[i]; 1632 #endif 1633 col += qemu_log(" " TARGET_FMT_lx, a); 1634 } 1635 } else if (c == INDEX_op_call) { 1636 /* variable number of arguments */ 1637 nb_oargs = TCGOP_CALLO(op); 1638 nb_iargs = TCGOP_CALLI(op); 1639 nb_cargs = def->nb_cargs; 1640 1641 /* function name, flags, out args */ 1642 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 1643 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), 1644 op->args[nb_oargs + nb_iargs + 1], nb_oargs); 1645 for (i = 0; i < nb_oargs; i++) { 1646 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), 1647 op->args[i])); 1648 } 1649 for (i = 0; i < nb_iargs; i++) { 1650 TCGArg arg = op->args[nb_oargs + i]; 1651 const char *t = "<dummy>"; 1652 if (arg != TCG_CALL_DUMMY_ARG) { 1653 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 1654 } 1655 col += qemu_log(",%s", t); 1656 } 1657 } else { 1658 col += qemu_log(" %s ", def->name); 1659 1660 nb_oargs = def->nb_oargs; 1661 nb_iargs = def->nb_iargs; 1662 nb_cargs = def->nb_cargs; 1663 1664 k = 0; 1665 for (i = 0; i < nb_oargs; i++) { 1666 if (k != 0) { 1667 col += qemu_log(","); 1668 } 1669 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 1670 op->args[k++])); 1671 } 1672 for (i = 0; i < nb_iargs; i++) { 1673 if (k != 0) { 1674 col += qemu_log(","); 1675 } 1676 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 1677 op->args[k++])); 1678 } 1679 switch (c) { 1680 case INDEX_op_brcond_i32: 1681 case INDEX_op_setcond_i32: 1682 case INDEX_op_movcond_i32: 1683 case INDEX_op_brcond2_i32: 1684 case INDEX_op_setcond2_i32: 1685 case INDEX_op_brcond_i64: 1686 case INDEX_op_setcond_i64: 1687 case INDEX_op_movcond_i64: 1688 if (op->args[k] < ARRAY_SIZE(cond_name) 1689 && cond_name[op->args[k]]) { 1690 col += qemu_log(",%s", cond_name[op->args[k++]]); 1691 } else { 1692 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]); 1693 } 1694 i = 1; 1695 break; 1696 case INDEX_op_qemu_ld_i32: 1697 case INDEX_op_qemu_st_i32: 1698 case INDEX_op_qemu_ld_i64: 1699 case INDEX_op_qemu_st_i64: 1700 { 1701 TCGMemOpIdx oi = op->args[k++]; 1702 TCGMemOp op = get_memop(oi); 1703 unsigned ix = get_mmuidx(oi); 1704 1705 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 1706 col += qemu_log(",$0x%x,%u", op, ix); 1707 } else { 1708 const char *s_al, *s_op; 1709 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 1710 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 1711 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 1712 } 1713 i = 1; 1714 } 1715 break; 1716 default: 1717 i = 0; 1718 break; 1719 } 1720 switch (c) { 1721 case INDEX_op_set_label: 1722 case INDEX_op_br: 1723 case INDEX_op_brcond_i32: 1724 case INDEX_op_brcond_i64: 1725 case INDEX_op_brcond2_i32: 1726 col += qemu_log("%s$L%d", k ? "," : "", 1727 arg_label(op->args[k])->id); 1728 i++, k++; 1729 break; 1730 default: 1731 break; 1732 } 1733 for (; i < nb_cargs; i++, k++) { 1734 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); 1735 } 1736 } 1737 if (op->life) { 1738 unsigned life = op->life; 1739 1740 for (; col < 48; ++col) { 1741 putc(' ', qemu_logfile); 1742 } 1743 1744 if (life & (SYNC_ARG * 3)) { 1745 qemu_log(" sync:"); 1746 for (i = 0; i < 2; ++i) { 1747 if (life & (SYNC_ARG << i)) { 1748 qemu_log(" %d", i); 1749 } 1750 } 1751 } 1752 life /= DEAD_ARG; 1753 if (life) { 1754 qemu_log(" dead:"); 1755 for (i = 0; life; ++i, life >>= 1) { 1756 if (life & 1) { 1757 qemu_log(" %d", i); 1758 } 1759 } 1760 } 1761 } 1762 qemu_log("\n"); 1763 } 1764 } 1765 1766 /* we give more priority to constraints with less registers */ 1767 static int get_constraint_priority(const TCGOpDef *def, int k) 1768 { 1769 const TCGArgConstraint *arg_ct; 1770 1771 int i, n; 1772 arg_ct = &def->args_ct[k]; 1773 if (arg_ct->ct & TCG_CT_ALIAS) { 1774 /* an alias is equivalent to a single register */ 1775 n = 1; 1776 } else { 1777 if (!(arg_ct->ct & TCG_CT_REG)) 1778 return 0; 1779 n = 0; 1780 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 1781 if (tcg_regset_test_reg(arg_ct->u.regs, i)) 1782 n++; 1783 } 1784 } 1785 return TCG_TARGET_NB_REGS - n + 1; 1786 } 1787 1788 /* sort from highest priority to lowest */ 1789 static void sort_constraints(TCGOpDef *def, int start, int n) 1790 { 1791 int i, j, p1, p2, tmp; 1792 1793 for(i = 0; i < n; i++) 1794 def->sorted_args[start + i] = start + i; 1795 if (n <= 1) 1796 return; 1797 for(i = 0; i < n - 1; i++) { 1798 for(j = i + 1; j < n; j++) { 1799 p1 = get_constraint_priority(def, def->sorted_args[start + i]); 1800 p2 = get_constraint_priority(def, def->sorted_args[start + j]); 1801 if (p1 < p2) { 1802 tmp = def->sorted_args[start + i]; 1803 def->sorted_args[start + i] = def->sorted_args[start + j]; 1804 def->sorted_args[start + j] = tmp; 1805 } 1806 } 1807 } 1808 } 1809 1810 static void process_op_defs(TCGContext *s) 1811 { 1812 TCGOpcode op; 1813 1814 for (op = 0; op < NB_OPS; op++) { 1815 TCGOpDef *def = &tcg_op_defs[op]; 1816 const TCGTargetOpDef *tdefs; 1817 TCGType type; 1818 int i, nb_args; 1819 1820 if (def->flags & TCG_OPF_NOT_PRESENT) { 1821 continue; 1822 } 1823 1824 nb_args = def->nb_iargs + def->nb_oargs; 1825 if (nb_args == 0) { 1826 continue; 1827 } 1828 1829 tdefs = tcg_target_op_def(op); 1830 /* Missing TCGTargetOpDef entry. */ 1831 tcg_debug_assert(tdefs != NULL); 1832 1833 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32); 1834 for (i = 0; i < nb_args; i++) { 1835 const char *ct_str = tdefs->args_ct_str[i]; 1836 /* Incomplete TCGTargetOpDef entry. */ 1837 tcg_debug_assert(ct_str != NULL); 1838 1839 def->args_ct[i].u.regs = 0; 1840 def->args_ct[i].ct = 0; 1841 while (*ct_str != '\0') { 1842 switch(*ct_str) { 1843 case '0' ... '9': 1844 { 1845 int oarg = *ct_str - '0'; 1846 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 1847 tcg_debug_assert(oarg < def->nb_oargs); 1848 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG); 1849 /* TCG_CT_ALIAS is for the output arguments. 1850 The input is tagged with TCG_CT_IALIAS. */ 1851 def->args_ct[i] = def->args_ct[oarg]; 1852 def->args_ct[oarg].ct |= TCG_CT_ALIAS; 1853 def->args_ct[oarg].alias_index = i; 1854 def->args_ct[i].ct |= TCG_CT_IALIAS; 1855 def->args_ct[i].alias_index = oarg; 1856 } 1857 ct_str++; 1858 break; 1859 case '&': 1860 def->args_ct[i].ct |= TCG_CT_NEWREG; 1861 ct_str++; 1862 break; 1863 case 'i': 1864 def->args_ct[i].ct |= TCG_CT_CONST; 1865 ct_str++; 1866 break; 1867 default: 1868 ct_str = target_parse_constraint(&def->args_ct[i], 1869 ct_str, type); 1870 /* Typo in TCGTargetOpDef constraint. */ 1871 tcg_debug_assert(ct_str != NULL); 1872 } 1873 } 1874 } 1875 1876 /* TCGTargetOpDef entry with too much information? */ 1877 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 1878 1879 /* sort the constraints (XXX: this is just an heuristic) */ 1880 sort_constraints(def, 0, def->nb_oargs); 1881 sort_constraints(def, def->nb_oargs, def->nb_iargs); 1882 } 1883 } 1884 1885 void tcg_op_remove(TCGContext *s, TCGOp *op) 1886 { 1887 QTAILQ_REMOVE(&s->ops, op, link); 1888 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 1889 1890 #ifdef CONFIG_PROFILER 1891 atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 1892 #endif 1893 } 1894 1895 static TCGOp *tcg_op_alloc(TCGOpcode opc) 1896 { 1897 TCGContext *s = tcg_ctx; 1898 TCGOp *op; 1899 1900 if (likely(QTAILQ_EMPTY(&s->free_ops))) { 1901 op = tcg_malloc(sizeof(TCGOp)); 1902 } else { 1903 op = QTAILQ_FIRST(&s->free_ops); 1904 QTAILQ_REMOVE(&s->free_ops, op, link); 1905 } 1906 memset(op, 0, offsetof(TCGOp, link)); 1907 op->opc = opc; 1908 1909 return op; 1910 } 1911 1912 TCGOp *tcg_emit_op(TCGOpcode opc) 1913 { 1914 TCGOp *op = tcg_op_alloc(opc); 1915 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 1916 return op; 1917 } 1918 1919 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 1920 TCGOpcode opc, int nargs) 1921 { 1922 TCGOp *new_op = tcg_op_alloc(opc); 1923 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 1924 return new_op; 1925 } 1926 1927 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 1928 TCGOpcode opc, int nargs) 1929 { 1930 TCGOp *new_op = tcg_op_alloc(opc); 1931 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 1932 return new_op; 1933 } 1934 1935 #define TS_DEAD 1 1936 #define TS_MEM 2 1937 1938 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 1939 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 1940 1941 /* liveness analysis: end of function: all temps are dead, and globals 1942 should be in memory. */ 1943 static void tcg_la_func_end(TCGContext *s) 1944 { 1945 int ng = s->nb_globals; 1946 int nt = s->nb_temps; 1947 int i; 1948 1949 for (i = 0; i < ng; ++i) { 1950 s->temps[i].state = TS_DEAD | TS_MEM; 1951 } 1952 for (i = ng; i < nt; ++i) { 1953 s->temps[i].state = TS_DEAD; 1954 } 1955 } 1956 1957 /* liveness analysis: end of basic block: all temps are dead, globals 1958 and local temps should be in memory. */ 1959 static void tcg_la_bb_end(TCGContext *s) 1960 { 1961 int ng = s->nb_globals; 1962 int nt = s->nb_temps; 1963 int i; 1964 1965 for (i = 0; i < ng; ++i) { 1966 s->temps[i].state = TS_DEAD | TS_MEM; 1967 } 1968 for (i = ng; i < nt; ++i) { 1969 s->temps[i].state = (s->temps[i].temp_local 1970 ? TS_DEAD | TS_MEM 1971 : TS_DEAD); 1972 } 1973 } 1974 1975 /* Liveness analysis : update the opc_arg_life array to tell if a 1976 given input arguments is dead. Instructions updating dead 1977 temporaries are removed. */ 1978 static void liveness_pass_1(TCGContext *s) 1979 { 1980 int nb_globals = s->nb_globals; 1981 TCGOp *op, *op_prev; 1982 1983 tcg_la_func_end(s); 1984 1985 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, TCGOpHead, link, op_prev) { 1986 int i, nb_iargs, nb_oargs; 1987 TCGOpcode opc_new, opc_new2; 1988 bool have_opc_new2; 1989 TCGLifeData arg_life = 0; 1990 TCGTemp *arg_ts; 1991 TCGOpcode opc = op->opc; 1992 const TCGOpDef *def = &tcg_op_defs[opc]; 1993 1994 switch (opc) { 1995 case INDEX_op_call: 1996 { 1997 int call_flags; 1998 1999 nb_oargs = TCGOP_CALLO(op); 2000 nb_iargs = TCGOP_CALLI(op); 2001 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2002 2003 /* pure functions can be removed if their result is unused */ 2004 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2005 for (i = 0; i < nb_oargs; i++) { 2006 arg_ts = arg_temp(op->args[i]); 2007 if (arg_ts->state != TS_DEAD) { 2008 goto do_not_remove_call; 2009 } 2010 } 2011 goto do_remove; 2012 } else { 2013 do_not_remove_call: 2014 2015 /* output args are dead */ 2016 for (i = 0; i < nb_oargs; i++) { 2017 arg_ts = arg_temp(op->args[i]); 2018 if (arg_ts->state & TS_DEAD) { 2019 arg_life |= DEAD_ARG << i; 2020 } 2021 if (arg_ts->state & TS_MEM) { 2022 arg_life |= SYNC_ARG << i; 2023 } 2024 arg_ts->state = TS_DEAD; 2025 } 2026 2027 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2028 TCG_CALL_NO_READ_GLOBALS))) { 2029 /* globals should go back to memory */ 2030 for (i = 0; i < nb_globals; i++) { 2031 s->temps[i].state = TS_DEAD | TS_MEM; 2032 } 2033 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2034 /* globals should be synced to memory */ 2035 for (i = 0; i < nb_globals; i++) { 2036 s->temps[i].state |= TS_MEM; 2037 } 2038 } 2039 2040 /* record arguments that die in this helper */ 2041 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2042 arg_ts = arg_temp(op->args[i]); 2043 if (arg_ts && arg_ts->state & TS_DEAD) { 2044 arg_life |= DEAD_ARG << i; 2045 } 2046 } 2047 /* input arguments are live for preceding opcodes */ 2048 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2049 arg_ts = arg_temp(op->args[i]); 2050 if (arg_ts) { 2051 arg_ts->state &= ~TS_DEAD; 2052 } 2053 } 2054 } 2055 } 2056 break; 2057 case INDEX_op_insn_start: 2058 break; 2059 case INDEX_op_discard: 2060 /* mark the temporary as dead */ 2061 arg_temp(op->args[0])->state = TS_DEAD; 2062 break; 2063 2064 case INDEX_op_add2_i32: 2065 opc_new = INDEX_op_add_i32; 2066 goto do_addsub2; 2067 case INDEX_op_sub2_i32: 2068 opc_new = INDEX_op_sub_i32; 2069 goto do_addsub2; 2070 case INDEX_op_add2_i64: 2071 opc_new = INDEX_op_add_i64; 2072 goto do_addsub2; 2073 case INDEX_op_sub2_i64: 2074 opc_new = INDEX_op_sub_i64; 2075 do_addsub2: 2076 nb_iargs = 4; 2077 nb_oargs = 2; 2078 /* Test if the high part of the operation is dead, but not 2079 the low part. The result can be optimized to a simple 2080 add or sub. This happens often for x86_64 guest when the 2081 cpu mode is set to 32 bit. */ 2082 if (arg_temp(op->args[1])->state == TS_DEAD) { 2083 if (arg_temp(op->args[0])->state == TS_DEAD) { 2084 goto do_remove; 2085 } 2086 /* Replace the opcode and adjust the args in place, 2087 leaving 3 unused args at the end. */ 2088 op->opc = opc = opc_new; 2089 op->args[1] = op->args[2]; 2090 op->args[2] = op->args[4]; 2091 /* Fall through and mark the single-word operation live. */ 2092 nb_iargs = 2; 2093 nb_oargs = 1; 2094 } 2095 goto do_not_remove; 2096 2097 case INDEX_op_mulu2_i32: 2098 opc_new = INDEX_op_mul_i32; 2099 opc_new2 = INDEX_op_muluh_i32; 2100 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 2101 goto do_mul2; 2102 case INDEX_op_muls2_i32: 2103 opc_new = INDEX_op_mul_i32; 2104 opc_new2 = INDEX_op_mulsh_i32; 2105 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 2106 goto do_mul2; 2107 case INDEX_op_mulu2_i64: 2108 opc_new = INDEX_op_mul_i64; 2109 opc_new2 = INDEX_op_muluh_i64; 2110 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 2111 goto do_mul2; 2112 case INDEX_op_muls2_i64: 2113 opc_new = INDEX_op_mul_i64; 2114 opc_new2 = INDEX_op_mulsh_i64; 2115 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 2116 goto do_mul2; 2117 do_mul2: 2118 nb_iargs = 2; 2119 nb_oargs = 2; 2120 if (arg_temp(op->args[1])->state == TS_DEAD) { 2121 if (arg_temp(op->args[0])->state == TS_DEAD) { 2122 /* Both parts of the operation are dead. */ 2123 goto do_remove; 2124 } 2125 /* The high part of the operation is dead; generate the low. */ 2126 op->opc = opc = opc_new; 2127 op->args[1] = op->args[2]; 2128 op->args[2] = op->args[3]; 2129 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 2130 /* The low part of the operation is dead; generate the high. */ 2131 op->opc = opc = opc_new2; 2132 op->args[0] = op->args[1]; 2133 op->args[1] = op->args[2]; 2134 op->args[2] = op->args[3]; 2135 } else { 2136 goto do_not_remove; 2137 } 2138 /* Mark the single-word operation live. */ 2139 nb_oargs = 1; 2140 goto do_not_remove; 2141 2142 default: 2143 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 2144 nb_iargs = def->nb_iargs; 2145 nb_oargs = def->nb_oargs; 2146 2147 /* Test if the operation can be removed because all 2148 its outputs are dead. We assume that nb_oargs == 0 2149 implies side effects */ 2150 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 2151 for (i = 0; i < nb_oargs; i++) { 2152 if (arg_temp(op->args[i])->state != TS_DEAD) { 2153 goto do_not_remove; 2154 } 2155 } 2156 do_remove: 2157 tcg_op_remove(s, op); 2158 } else { 2159 do_not_remove: 2160 /* output args are dead */ 2161 for (i = 0; i < nb_oargs; i++) { 2162 arg_ts = arg_temp(op->args[i]); 2163 if (arg_ts->state & TS_DEAD) { 2164 arg_life |= DEAD_ARG << i; 2165 } 2166 if (arg_ts->state & TS_MEM) { 2167 arg_life |= SYNC_ARG << i; 2168 } 2169 arg_ts->state = TS_DEAD; 2170 } 2171 2172 /* if end of basic block, update */ 2173 if (def->flags & TCG_OPF_BB_END) { 2174 tcg_la_bb_end(s); 2175 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2176 /* globals should be synced to memory */ 2177 for (i = 0; i < nb_globals; i++) { 2178 s->temps[i].state |= TS_MEM; 2179 } 2180 } 2181 2182 /* record arguments that die in this opcode */ 2183 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2184 arg_ts = arg_temp(op->args[i]); 2185 if (arg_ts->state & TS_DEAD) { 2186 arg_life |= DEAD_ARG << i; 2187 } 2188 } 2189 /* input arguments are live for preceding opcodes */ 2190 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2191 arg_temp(op->args[i])->state &= ~TS_DEAD; 2192 } 2193 } 2194 break; 2195 } 2196 op->life = arg_life; 2197 } 2198 } 2199 2200 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 2201 static bool liveness_pass_2(TCGContext *s) 2202 { 2203 int nb_globals = s->nb_globals; 2204 int nb_temps, i; 2205 bool changes = false; 2206 TCGOp *op, *op_next; 2207 2208 /* Create a temporary for each indirect global. */ 2209 for (i = 0; i < nb_globals; ++i) { 2210 TCGTemp *its = &s->temps[i]; 2211 if (its->indirect_reg) { 2212 TCGTemp *dts = tcg_temp_alloc(s); 2213 dts->type = its->type; 2214 dts->base_type = its->base_type; 2215 its->state_ptr = dts; 2216 } else { 2217 its->state_ptr = NULL; 2218 } 2219 /* All globals begin dead. */ 2220 its->state = TS_DEAD; 2221 } 2222 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 2223 TCGTemp *its = &s->temps[i]; 2224 its->state_ptr = NULL; 2225 its->state = TS_DEAD; 2226 } 2227 2228 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2229 TCGOpcode opc = op->opc; 2230 const TCGOpDef *def = &tcg_op_defs[opc]; 2231 TCGLifeData arg_life = op->life; 2232 int nb_iargs, nb_oargs, call_flags; 2233 TCGTemp *arg_ts, *dir_ts; 2234 2235 if (opc == INDEX_op_call) { 2236 nb_oargs = TCGOP_CALLO(op); 2237 nb_iargs = TCGOP_CALLI(op); 2238 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2239 } else { 2240 nb_iargs = def->nb_iargs; 2241 nb_oargs = def->nb_oargs; 2242 2243 /* Set flags similar to how calls require. */ 2244 if (def->flags & TCG_OPF_BB_END) { 2245 /* Like writing globals: save_globals */ 2246 call_flags = 0; 2247 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2248 /* Like reading globals: sync_globals */ 2249 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2250 } else { 2251 /* No effect on globals. */ 2252 call_flags = (TCG_CALL_NO_READ_GLOBALS | 2253 TCG_CALL_NO_WRITE_GLOBALS); 2254 } 2255 } 2256 2257 /* Make sure that input arguments are available. */ 2258 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2259 arg_ts = arg_temp(op->args[i]); 2260 if (arg_ts) { 2261 dir_ts = arg_ts->state_ptr; 2262 if (dir_ts && arg_ts->state == TS_DEAD) { 2263 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 2264 ? INDEX_op_ld_i32 2265 : INDEX_op_ld_i64); 2266 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 2267 2268 lop->args[0] = temp_arg(dir_ts); 2269 lop->args[1] = temp_arg(arg_ts->mem_base); 2270 lop->args[2] = arg_ts->mem_offset; 2271 2272 /* Loaded, but synced with memory. */ 2273 arg_ts->state = TS_MEM; 2274 } 2275 } 2276 } 2277 2278 /* Perform input replacement, and mark inputs that became dead. 2279 No action is required except keeping temp_state up to date 2280 so that we reload when needed. */ 2281 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2282 arg_ts = arg_temp(op->args[i]); 2283 if (arg_ts) { 2284 dir_ts = arg_ts->state_ptr; 2285 if (dir_ts) { 2286 op->args[i] = temp_arg(dir_ts); 2287 changes = true; 2288 if (IS_DEAD_ARG(i)) { 2289 arg_ts->state = TS_DEAD; 2290 } 2291 } 2292 } 2293 } 2294 2295 /* Liveness analysis should ensure that the following are 2296 all correct, for call sites and basic block end points. */ 2297 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 2298 /* Nothing to do */ 2299 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 2300 for (i = 0; i < nb_globals; ++i) { 2301 /* Liveness should see that globals are synced back, 2302 that is, either TS_DEAD or TS_MEM. */ 2303 arg_ts = &s->temps[i]; 2304 tcg_debug_assert(arg_ts->state_ptr == 0 2305 || arg_ts->state != 0); 2306 } 2307 } else { 2308 for (i = 0; i < nb_globals; ++i) { 2309 /* Liveness should see that globals are saved back, 2310 that is, TS_DEAD, waiting to be reloaded. */ 2311 arg_ts = &s->temps[i]; 2312 tcg_debug_assert(arg_ts->state_ptr == 0 2313 || arg_ts->state == TS_DEAD); 2314 } 2315 } 2316 2317 /* Outputs become available. */ 2318 for (i = 0; i < nb_oargs; i++) { 2319 arg_ts = arg_temp(op->args[i]); 2320 dir_ts = arg_ts->state_ptr; 2321 if (!dir_ts) { 2322 continue; 2323 } 2324 op->args[i] = temp_arg(dir_ts); 2325 changes = true; 2326 2327 /* The output is now live and modified. */ 2328 arg_ts->state = 0; 2329 2330 /* Sync outputs upon their last write. */ 2331 if (NEED_SYNC_ARG(i)) { 2332 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 2333 ? INDEX_op_st_i32 2334 : INDEX_op_st_i64); 2335 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 2336 2337 sop->args[0] = temp_arg(dir_ts); 2338 sop->args[1] = temp_arg(arg_ts->mem_base); 2339 sop->args[2] = arg_ts->mem_offset; 2340 2341 arg_ts->state = TS_MEM; 2342 } 2343 /* Drop outputs that are dead. */ 2344 if (IS_DEAD_ARG(i)) { 2345 arg_ts->state = TS_DEAD; 2346 } 2347 } 2348 } 2349 2350 return changes; 2351 } 2352 2353 #ifdef CONFIG_DEBUG_TCG 2354 static void dump_regs(TCGContext *s) 2355 { 2356 TCGTemp *ts; 2357 int i; 2358 char buf[64]; 2359 2360 for(i = 0; i < s->nb_temps; i++) { 2361 ts = &s->temps[i]; 2362 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 2363 switch(ts->val_type) { 2364 case TEMP_VAL_REG: 2365 printf("%s", tcg_target_reg_names[ts->reg]); 2366 break; 2367 case TEMP_VAL_MEM: 2368 printf("%d(%s)", (int)ts->mem_offset, 2369 tcg_target_reg_names[ts->mem_base->reg]); 2370 break; 2371 case TEMP_VAL_CONST: 2372 printf("$0x%" TCG_PRIlx, ts->val); 2373 break; 2374 case TEMP_VAL_DEAD: 2375 printf("D"); 2376 break; 2377 default: 2378 printf("???"); 2379 break; 2380 } 2381 printf("\n"); 2382 } 2383 2384 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 2385 if (s->reg_to_temp[i] != NULL) { 2386 printf("%s: %s\n", 2387 tcg_target_reg_names[i], 2388 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 2389 } 2390 } 2391 } 2392 2393 static void check_regs(TCGContext *s) 2394 { 2395 int reg; 2396 int k; 2397 TCGTemp *ts; 2398 char buf[64]; 2399 2400 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 2401 ts = s->reg_to_temp[reg]; 2402 if (ts != NULL) { 2403 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 2404 printf("Inconsistency for register %s:\n", 2405 tcg_target_reg_names[reg]); 2406 goto fail; 2407 } 2408 } 2409 } 2410 for (k = 0; k < s->nb_temps; k++) { 2411 ts = &s->temps[k]; 2412 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg 2413 && s->reg_to_temp[ts->reg] != ts) { 2414 printf("Inconsistency for temp %s:\n", 2415 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 2416 fail: 2417 printf("reg state:\n"); 2418 dump_regs(s); 2419 tcg_abort(); 2420 } 2421 } 2422 } 2423 #endif 2424 2425 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 2426 { 2427 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 2428 /* Sparc64 stack is accessed with offset of 2047 */ 2429 s->current_frame_offset = (s->current_frame_offset + 2430 (tcg_target_long)sizeof(tcg_target_long) - 1) & 2431 ~(sizeof(tcg_target_long) - 1); 2432 #endif 2433 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 2434 s->frame_end) { 2435 tcg_abort(); 2436 } 2437 ts->mem_offset = s->current_frame_offset; 2438 ts->mem_base = s->frame_temp; 2439 ts->mem_allocated = 1; 2440 s->current_frame_offset += sizeof(tcg_target_long); 2441 } 2442 2443 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet); 2444 2445 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 2446 mark it free; otherwise mark it dead. */ 2447 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 2448 { 2449 if (ts->fixed_reg) { 2450 return; 2451 } 2452 if (ts->val_type == TEMP_VAL_REG) { 2453 s->reg_to_temp[ts->reg] = NULL; 2454 } 2455 ts->val_type = (free_or_dead < 0 2456 || ts->temp_local 2457 || ts->temp_global 2458 ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 2459 } 2460 2461 /* Mark a temporary as dead. */ 2462 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 2463 { 2464 temp_free_or_dead(s, ts, 1); 2465 } 2466 2467 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 2468 registers needs to be allocated to store a constant. If 'free_or_dead' 2469 is non-zero, subsequently release the temporary; if it is positive, the 2470 temp is dead; if it is negative, the temp is free. */ 2471 static void temp_sync(TCGContext *s, TCGTemp *ts, 2472 TCGRegSet allocated_regs, int free_or_dead) 2473 { 2474 if (ts->fixed_reg) { 2475 return; 2476 } 2477 if (!ts->mem_coherent) { 2478 if (!ts->mem_allocated) { 2479 temp_allocate_frame(s, ts); 2480 } 2481 switch (ts->val_type) { 2482 case TEMP_VAL_CONST: 2483 /* If we're going to free the temp immediately, then we won't 2484 require it later in a register, so attempt to store the 2485 constant to memory directly. */ 2486 if (free_or_dead 2487 && tcg_out_sti(s, ts->type, ts->val, 2488 ts->mem_base->reg, ts->mem_offset)) { 2489 break; 2490 } 2491 temp_load(s, ts, tcg_target_available_regs[ts->type], 2492 allocated_regs); 2493 /* fallthrough */ 2494 2495 case TEMP_VAL_REG: 2496 tcg_out_st(s, ts->type, ts->reg, 2497 ts->mem_base->reg, ts->mem_offset); 2498 break; 2499 2500 case TEMP_VAL_MEM: 2501 break; 2502 2503 case TEMP_VAL_DEAD: 2504 default: 2505 tcg_abort(); 2506 } 2507 ts->mem_coherent = 1; 2508 } 2509 if (free_or_dead) { 2510 temp_free_or_dead(s, ts, free_or_dead); 2511 } 2512 } 2513 2514 /* free register 'reg' by spilling the corresponding temporary if necessary */ 2515 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 2516 { 2517 TCGTemp *ts = s->reg_to_temp[reg]; 2518 if (ts != NULL) { 2519 temp_sync(s, ts, allocated_regs, -1); 2520 } 2521 } 2522 2523 /* Allocate a register belonging to reg1 & ~reg2 */ 2524 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, 2525 TCGRegSet allocated_regs, bool rev) 2526 { 2527 int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 2528 const int *order; 2529 TCGReg reg; 2530 TCGRegSet reg_ct; 2531 2532 reg_ct = desired_regs & ~allocated_regs; 2533 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 2534 2535 /* first try free registers */ 2536 for(i = 0; i < n; i++) { 2537 reg = order[i]; 2538 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL) 2539 return reg; 2540 } 2541 2542 /* XXX: do better spill choice */ 2543 for(i = 0; i < n; i++) { 2544 reg = order[i]; 2545 if (tcg_regset_test_reg(reg_ct, reg)) { 2546 tcg_reg_free(s, reg, allocated_regs); 2547 return reg; 2548 } 2549 } 2550 2551 tcg_abort(); 2552 } 2553 2554 /* Make sure the temporary is in a register. If needed, allocate the register 2555 from DESIRED while avoiding ALLOCATED. */ 2556 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 2557 TCGRegSet allocated_regs) 2558 { 2559 TCGReg reg; 2560 2561 switch (ts->val_type) { 2562 case TEMP_VAL_REG: 2563 return; 2564 case TEMP_VAL_CONST: 2565 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2566 tcg_out_movi(s, ts->type, reg, ts->val); 2567 ts->mem_coherent = 0; 2568 break; 2569 case TEMP_VAL_MEM: 2570 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2571 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 2572 ts->mem_coherent = 1; 2573 break; 2574 case TEMP_VAL_DEAD: 2575 default: 2576 tcg_abort(); 2577 } 2578 ts->reg = reg; 2579 ts->val_type = TEMP_VAL_REG; 2580 s->reg_to_temp[reg] = ts; 2581 } 2582 2583 /* Save a temporary to memory. 'allocated_regs' is used in case a 2584 temporary registers needs to be allocated to store a constant. */ 2585 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 2586 { 2587 /* The liveness analysis already ensures that globals are back 2588 in memory. Keep an tcg_debug_assert for safety. */ 2589 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); 2590 } 2591 2592 /* save globals to their canonical location and assume they can be 2593 modified be the following code. 'allocated_regs' is used in case a 2594 temporary registers needs to be allocated to store a constant. */ 2595 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 2596 { 2597 int i, n; 2598 2599 for (i = 0, n = s->nb_globals; i < n; i++) { 2600 temp_save(s, &s->temps[i], allocated_regs); 2601 } 2602 } 2603 2604 /* sync globals to their canonical location and assume they can be 2605 read by the following code. 'allocated_regs' is used in case a 2606 temporary registers needs to be allocated to store a constant. */ 2607 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 2608 { 2609 int i, n; 2610 2611 for (i = 0, n = s->nb_globals; i < n; i++) { 2612 TCGTemp *ts = &s->temps[i]; 2613 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 2614 || ts->fixed_reg 2615 || ts->mem_coherent); 2616 } 2617 } 2618 2619 /* at the end of a basic block, we assume all temporaries are dead and 2620 all globals are stored at their canonical location. */ 2621 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 2622 { 2623 int i; 2624 2625 for (i = s->nb_globals; i < s->nb_temps; i++) { 2626 TCGTemp *ts = &s->temps[i]; 2627 if (ts->temp_local) { 2628 temp_save(s, ts, allocated_regs); 2629 } else { 2630 /* The liveness analysis already ensures that temps are dead. 2631 Keep an tcg_debug_assert for safety. */ 2632 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 2633 } 2634 } 2635 2636 save_globals(s, allocated_regs); 2637 } 2638 2639 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 2640 tcg_target_ulong val, TCGLifeData arg_life) 2641 { 2642 if (ots->fixed_reg) { 2643 /* For fixed registers, we do not do any constant propagation. */ 2644 tcg_out_movi(s, ots->type, ots->reg, val); 2645 return; 2646 } 2647 2648 /* The movi is not explicitly generated here. */ 2649 if (ots->val_type == TEMP_VAL_REG) { 2650 s->reg_to_temp[ots->reg] = NULL; 2651 } 2652 ots->val_type = TEMP_VAL_CONST; 2653 ots->val = val; 2654 ots->mem_coherent = 0; 2655 if (NEED_SYNC_ARG(0)) { 2656 temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0)); 2657 } else if (IS_DEAD_ARG(0)) { 2658 temp_dead(s, ots); 2659 } 2660 } 2661 2662 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op) 2663 { 2664 TCGTemp *ots = arg_temp(op->args[0]); 2665 tcg_target_ulong val = op->args[1]; 2666 2667 tcg_reg_alloc_do_movi(s, ots, val, op->life); 2668 } 2669 2670 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 2671 { 2672 const TCGLifeData arg_life = op->life; 2673 TCGRegSet allocated_regs; 2674 TCGTemp *ts, *ots; 2675 TCGType otype, itype; 2676 2677 allocated_regs = s->reserved_regs; 2678 ots = arg_temp(op->args[0]); 2679 ts = arg_temp(op->args[1]); 2680 2681 /* Note that otype != itype for no-op truncation. */ 2682 otype = ots->type; 2683 itype = ts->type; 2684 2685 if (ts->val_type == TEMP_VAL_CONST) { 2686 /* propagate constant or generate sti */ 2687 tcg_target_ulong val = ts->val; 2688 if (IS_DEAD_ARG(1)) { 2689 temp_dead(s, ts); 2690 } 2691 tcg_reg_alloc_do_movi(s, ots, val, arg_life); 2692 return; 2693 } 2694 2695 /* If the source value is in memory we're going to be forced 2696 to have it in a register in order to perform the copy. Copy 2697 the SOURCE value into its own register first, that way we 2698 don't have to reload SOURCE the next time it is used. */ 2699 if (ts->val_type == TEMP_VAL_MEM) { 2700 temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs); 2701 } 2702 2703 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 2704 if (IS_DEAD_ARG(0) && !ots->fixed_reg) { 2705 /* mov to a non-saved dead register makes no sense (even with 2706 liveness analysis disabled). */ 2707 tcg_debug_assert(NEED_SYNC_ARG(0)); 2708 if (!ots->mem_allocated) { 2709 temp_allocate_frame(s, ots); 2710 } 2711 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 2712 if (IS_DEAD_ARG(1)) { 2713 temp_dead(s, ts); 2714 } 2715 temp_dead(s, ots); 2716 } else { 2717 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { 2718 /* the mov can be suppressed */ 2719 if (ots->val_type == TEMP_VAL_REG) { 2720 s->reg_to_temp[ots->reg] = NULL; 2721 } 2722 ots->reg = ts->reg; 2723 temp_dead(s, ts); 2724 } else { 2725 if (ots->val_type != TEMP_VAL_REG) { 2726 /* When allocating a new register, make sure to not spill the 2727 input one. */ 2728 tcg_regset_set_reg(allocated_regs, ts->reg); 2729 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 2730 allocated_regs, ots->indirect_base); 2731 } 2732 tcg_out_mov(s, otype, ots->reg, ts->reg); 2733 } 2734 ots->val_type = TEMP_VAL_REG; 2735 ots->mem_coherent = 0; 2736 s->reg_to_temp[ots->reg] = ots; 2737 if (NEED_SYNC_ARG(0)) { 2738 temp_sync(s, ots, allocated_regs, 0); 2739 } 2740 } 2741 } 2742 2743 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 2744 { 2745 const TCGLifeData arg_life = op->life; 2746 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 2747 TCGRegSet i_allocated_regs; 2748 TCGRegSet o_allocated_regs; 2749 int i, k, nb_iargs, nb_oargs; 2750 TCGReg reg; 2751 TCGArg arg; 2752 const TCGArgConstraint *arg_ct; 2753 TCGTemp *ts; 2754 TCGArg new_args[TCG_MAX_OP_ARGS]; 2755 int const_args[TCG_MAX_OP_ARGS]; 2756 2757 nb_oargs = def->nb_oargs; 2758 nb_iargs = def->nb_iargs; 2759 2760 /* copy constants */ 2761 memcpy(new_args + nb_oargs + nb_iargs, 2762 op->args + nb_oargs + nb_iargs, 2763 sizeof(TCGArg) * def->nb_cargs); 2764 2765 i_allocated_regs = s->reserved_regs; 2766 o_allocated_regs = s->reserved_regs; 2767 2768 /* satisfy input constraints */ 2769 for (k = 0; k < nb_iargs; k++) { 2770 i = def->sorted_args[nb_oargs + k]; 2771 arg = op->args[i]; 2772 arg_ct = &def->args_ct[i]; 2773 ts = arg_temp(arg); 2774 2775 if (ts->val_type == TEMP_VAL_CONST 2776 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 2777 /* constant is OK for instruction */ 2778 const_args[i] = 1; 2779 new_args[i] = ts->val; 2780 goto iarg_end; 2781 } 2782 2783 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs); 2784 2785 if (arg_ct->ct & TCG_CT_IALIAS) { 2786 if (ts->fixed_reg) { 2787 /* if fixed register, we must allocate a new register 2788 if the alias is not the same register */ 2789 if (arg != op->args[arg_ct->alias_index]) 2790 goto allocate_in_reg; 2791 } else { 2792 /* if the input is aliased to an output and if it is 2793 not dead after the instruction, we must allocate 2794 a new register and move it */ 2795 if (!IS_DEAD_ARG(i)) { 2796 goto allocate_in_reg; 2797 } 2798 /* check if the current register has already been allocated 2799 for another input aliased to an output */ 2800 int k2, i2; 2801 for (k2 = 0 ; k2 < k ; k2++) { 2802 i2 = def->sorted_args[nb_oargs + k2]; 2803 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && 2804 (new_args[i2] == ts->reg)) { 2805 goto allocate_in_reg; 2806 } 2807 } 2808 } 2809 } 2810 reg = ts->reg; 2811 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2812 /* nothing to do : the constraint is satisfied */ 2813 } else { 2814 allocate_in_reg: 2815 /* allocate a new register matching the constraint 2816 and move the temporary register into it */ 2817 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, 2818 ts->indirect_base); 2819 tcg_out_mov(s, ts->type, reg, ts->reg); 2820 } 2821 new_args[i] = reg; 2822 const_args[i] = 0; 2823 tcg_regset_set_reg(i_allocated_regs, reg); 2824 iarg_end: ; 2825 } 2826 2827 /* mark dead temporaries and free the associated registers */ 2828 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2829 if (IS_DEAD_ARG(i)) { 2830 temp_dead(s, arg_temp(op->args[i])); 2831 } 2832 } 2833 2834 if (def->flags & TCG_OPF_BB_END) { 2835 tcg_reg_alloc_bb_end(s, i_allocated_regs); 2836 } else { 2837 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2838 /* XXX: permit generic clobber register list ? */ 2839 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 2840 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 2841 tcg_reg_free(s, i, i_allocated_regs); 2842 } 2843 } 2844 } 2845 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2846 /* sync globals if the op has side effects and might trigger 2847 an exception. */ 2848 sync_globals(s, i_allocated_regs); 2849 } 2850 2851 /* satisfy the output constraints */ 2852 for(k = 0; k < nb_oargs; k++) { 2853 i = def->sorted_args[k]; 2854 arg = op->args[i]; 2855 arg_ct = &def->args_ct[i]; 2856 ts = arg_temp(arg); 2857 if ((arg_ct->ct & TCG_CT_ALIAS) 2858 && !const_args[arg_ct->alias_index]) { 2859 reg = new_args[arg_ct->alias_index]; 2860 } else if (arg_ct->ct & TCG_CT_NEWREG) { 2861 reg = tcg_reg_alloc(s, arg_ct->u.regs, 2862 i_allocated_regs | o_allocated_regs, 2863 ts->indirect_base); 2864 } else { 2865 /* if fixed register, we try to use it */ 2866 reg = ts->reg; 2867 if (ts->fixed_reg && 2868 tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2869 goto oarg_end; 2870 } 2871 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, 2872 ts->indirect_base); 2873 } 2874 tcg_regset_set_reg(o_allocated_regs, reg); 2875 /* if a fixed register is used, then a move will be done afterwards */ 2876 if (!ts->fixed_reg) { 2877 if (ts->val_type == TEMP_VAL_REG) { 2878 s->reg_to_temp[ts->reg] = NULL; 2879 } 2880 ts->val_type = TEMP_VAL_REG; 2881 ts->reg = reg; 2882 /* temp value is modified, so the value kept in memory is 2883 potentially not the same */ 2884 ts->mem_coherent = 0; 2885 s->reg_to_temp[reg] = ts; 2886 } 2887 oarg_end: 2888 new_args[i] = reg; 2889 } 2890 } 2891 2892 /* emit instruction */ 2893 tcg_out_op(s, op->opc, new_args, const_args); 2894 2895 /* move the outputs in the correct register if needed */ 2896 for(i = 0; i < nb_oargs; i++) { 2897 ts = arg_temp(op->args[i]); 2898 reg = new_args[i]; 2899 if (ts->fixed_reg && ts->reg != reg) { 2900 tcg_out_mov(s, ts->type, ts->reg, reg); 2901 } 2902 if (NEED_SYNC_ARG(i)) { 2903 temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i)); 2904 } else if (IS_DEAD_ARG(i)) { 2905 temp_dead(s, ts); 2906 } 2907 } 2908 } 2909 2910 #ifdef TCG_TARGET_STACK_GROWSUP 2911 #define STACK_DIR(x) (-(x)) 2912 #else 2913 #define STACK_DIR(x) (x) 2914 #endif 2915 2916 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 2917 { 2918 const int nb_oargs = TCGOP_CALLO(op); 2919 const int nb_iargs = TCGOP_CALLI(op); 2920 const TCGLifeData arg_life = op->life; 2921 int flags, nb_regs, i; 2922 TCGReg reg; 2923 TCGArg arg; 2924 TCGTemp *ts; 2925 intptr_t stack_offset; 2926 size_t call_stack_size; 2927 tcg_insn_unit *func_addr; 2928 int allocate_args; 2929 TCGRegSet allocated_regs; 2930 2931 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; 2932 flags = op->args[nb_oargs + nb_iargs + 1]; 2933 2934 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2935 if (nb_regs > nb_iargs) { 2936 nb_regs = nb_iargs; 2937 } 2938 2939 /* assign stack slots first */ 2940 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 2941 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 2942 ~(TCG_TARGET_STACK_ALIGN - 1); 2943 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 2944 if (allocate_args) { 2945 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 2946 preallocate call stack */ 2947 tcg_abort(); 2948 } 2949 2950 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 2951 for (i = nb_regs; i < nb_iargs; i++) { 2952 arg = op->args[nb_oargs + i]; 2953 #ifdef TCG_TARGET_STACK_GROWSUP 2954 stack_offset -= sizeof(tcg_target_long); 2955 #endif 2956 if (arg != TCG_CALL_DUMMY_ARG) { 2957 ts = arg_temp(arg); 2958 temp_load(s, ts, tcg_target_available_regs[ts->type], 2959 s->reserved_regs); 2960 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 2961 } 2962 #ifndef TCG_TARGET_STACK_GROWSUP 2963 stack_offset += sizeof(tcg_target_long); 2964 #endif 2965 } 2966 2967 /* assign input registers */ 2968 allocated_regs = s->reserved_regs; 2969 for (i = 0; i < nb_regs; i++) { 2970 arg = op->args[nb_oargs + i]; 2971 if (arg != TCG_CALL_DUMMY_ARG) { 2972 ts = arg_temp(arg); 2973 reg = tcg_target_call_iarg_regs[i]; 2974 tcg_reg_free(s, reg, allocated_regs); 2975 2976 if (ts->val_type == TEMP_VAL_REG) { 2977 if (ts->reg != reg) { 2978 tcg_out_mov(s, ts->type, reg, ts->reg); 2979 } 2980 } else { 2981 TCGRegSet arg_set = 0; 2982 2983 tcg_regset_set_reg(arg_set, reg); 2984 temp_load(s, ts, arg_set, allocated_regs); 2985 } 2986 2987 tcg_regset_set_reg(allocated_regs, reg); 2988 } 2989 } 2990 2991 /* mark dead temporaries and free the associated registers */ 2992 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2993 if (IS_DEAD_ARG(i)) { 2994 temp_dead(s, arg_temp(op->args[i])); 2995 } 2996 } 2997 2998 /* clobber call registers */ 2999 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3000 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3001 tcg_reg_free(s, i, allocated_regs); 3002 } 3003 } 3004 3005 /* Save globals if they might be written by the helper, sync them if 3006 they might be read. */ 3007 if (flags & TCG_CALL_NO_READ_GLOBALS) { 3008 /* Nothing to do */ 3009 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 3010 sync_globals(s, allocated_regs); 3011 } else { 3012 save_globals(s, allocated_regs); 3013 } 3014 3015 tcg_out_call(s, func_addr); 3016 3017 /* assign output registers and emit moves if needed */ 3018 for(i = 0; i < nb_oargs; i++) { 3019 arg = op->args[i]; 3020 ts = arg_temp(arg); 3021 reg = tcg_target_call_oarg_regs[i]; 3022 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3023 3024 if (ts->fixed_reg) { 3025 if (ts->reg != reg) { 3026 tcg_out_mov(s, ts->type, ts->reg, reg); 3027 } 3028 } else { 3029 if (ts->val_type == TEMP_VAL_REG) { 3030 s->reg_to_temp[ts->reg] = NULL; 3031 } 3032 ts->val_type = TEMP_VAL_REG; 3033 ts->reg = reg; 3034 ts->mem_coherent = 0; 3035 s->reg_to_temp[reg] = ts; 3036 if (NEED_SYNC_ARG(i)) { 3037 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); 3038 } else if (IS_DEAD_ARG(i)) { 3039 temp_dead(s, ts); 3040 } 3041 } 3042 } 3043 } 3044 3045 #ifdef CONFIG_PROFILER 3046 3047 /* avoid copy/paste errors */ 3048 #define PROF_ADD(to, from, field) \ 3049 do { \ 3050 (to)->field += atomic_read(&((from)->field)); \ 3051 } while (0) 3052 3053 #define PROF_MAX(to, from, field) \ 3054 do { \ 3055 typeof((from)->field) val__ = atomic_read(&((from)->field)); \ 3056 if (val__ > (to)->field) { \ 3057 (to)->field = val__; \ 3058 } \ 3059 } while (0) 3060 3061 /* Pass in a zero'ed @prof */ 3062 static inline 3063 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 3064 { 3065 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 3066 unsigned int i; 3067 3068 for (i = 0; i < n_ctxs; i++) { 3069 TCGContext *s = atomic_read(&tcg_ctxs[i]); 3070 const TCGProfile *orig = &s->prof; 3071 3072 if (counters) { 3073 PROF_ADD(prof, orig, tb_count1); 3074 PROF_ADD(prof, orig, tb_count); 3075 PROF_ADD(prof, orig, op_count); 3076 PROF_MAX(prof, orig, op_count_max); 3077 PROF_ADD(prof, orig, temp_count); 3078 PROF_MAX(prof, orig, temp_count_max); 3079 PROF_ADD(prof, orig, del_op_count); 3080 PROF_ADD(prof, orig, code_in_len); 3081 PROF_ADD(prof, orig, code_out_len); 3082 PROF_ADD(prof, orig, search_out_len); 3083 PROF_ADD(prof, orig, interm_time); 3084 PROF_ADD(prof, orig, code_time); 3085 PROF_ADD(prof, orig, la_time); 3086 PROF_ADD(prof, orig, opt_time); 3087 PROF_ADD(prof, orig, restore_count); 3088 PROF_ADD(prof, orig, restore_time); 3089 } 3090 if (table) { 3091 int i; 3092 3093 for (i = 0; i < NB_OPS; i++) { 3094 PROF_ADD(prof, orig, table_op_count[i]); 3095 } 3096 } 3097 } 3098 } 3099 3100 #undef PROF_ADD 3101 #undef PROF_MAX 3102 3103 static void tcg_profile_snapshot_counters(TCGProfile *prof) 3104 { 3105 tcg_profile_snapshot(prof, true, false); 3106 } 3107 3108 static void tcg_profile_snapshot_table(TCGProfile *prof) 3109 { 3110 tcg_profile_snapshot(prof, false, true); 3111 } 3112 3113 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 3114 { 3115 TCGProfile prof = {}; 3116 int i; 3117 3118 tcg_profile_snapshot_table(&prof); 3119 for (i = 0; i < NB_OPS; i++) { 3120 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, 3121 prof.table_op_count[i]); 3122 } 3123 } 3124 #else 3125 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 3126 { 3127 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 3128 } 3129 #endif 3130 3131 3132 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 3133 { 3134 #ifdef CONFIG_PROFILER 3135 TCGProfile *prof = &s->prof; 3136 #endif 3137 int i, num_insns; 3138 TCGOp *op; 3139 3140 #ifdef CONFIG_PROFILER 3141 { 3142 int n; 3143 3144 QTAILQ_FOREACH(op, &s->ops, link) { 3145 n++; 3146 } 3147 atomic_set(&prof->op_count, prof->op_count + n); 3148 if (n > prof->op_count_max) { 3149 atomic_set(&prof->op_count_max, n); 3150 } 3151 3152 n = s->nb_temps; 3153 atomic_set(&prof->temp_count, prof->temp_count + n); 3154 if (n > prof->temp_count_max) { 3155 atomic_set(&prof->temp_count_max, n); 3156 } 3157 } 3158 #endif 3159 3160 #ifdef DEBUG_DISAS 3161 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 3162 && qemu_log_in_addr_range(tb->pc))) { 3163 qemu_log_lock(); 3164 qemu_log("OP:\n"); 3165 tcg_dump_ops(s); 3166 qemu_log("\n"); 3167 qemu_log_unlock(); 3168 } 3169 #endif 3170 3171 #ifdef CONFIG_PROFILER 3172 atomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 3173 #endif 3174 3175 #ifdef USE_TCG_OPTIMIZATIONS 3176 tcg_optimize(s); 3177 #endif 3178 3179 #ifdef CONFIG_PROFILER 3180 atomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 3181 atomic_set(&prof->la_time, prof->la_time - profile_getclock()); 3182 #endif 3183 3184 liveness_pass_1(s); 3185 3186 if (s->nb_indirects > 0) { 3187 #ifdef DEBUG_DISAS 3188 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 3189 && qemu_log_in_addr_range(tb->pc))) { 3190 qemu_log_lock(); 3191 qemu_log("OP before indirect lowering:\n"); 3192 tcg_dump_ops(s); 3193 qemu_log("\n"); 3194 qemu_log_unlock(); 3195 } 3196 #endif 3197 /* Replace indirect temps with direct temps. */ 3198 if (liveness_pass_2(s)) { 3199 /* If changes were made, re-run liveness. */ 3200 liveness_pass_1(s); 3201 } 3202 } 3203 3204 #ifdef CONFIG_PROFILER 3205 atomic_set(&prof->la_time, prof->la_time + profile_getclock()); 3206 #endif 3207 3208 #ifdef DEBUG_DISAS 3209 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 3210 && qemu_log_in_addr_range(tb->pc))) { 3211 qemu_log_lock(); 3212 qemu_log("OP after optimization and liveness analysis:\n"); 3213 tcg_dump_ops(s); 3214 qemu_log("\n"); 3215 qemu_log_unlock(); 3216 } 3217 #endif 3218 3219 tcg_reg_alloc_start(s); 3220 3221 s->code_buf = tb->tc.ptr; 3222 s->code_ptr = tb->tc.ptr; 3223 3224 #ifdef TCG_TARGET_NEED_LDST_LABELS 3225 s->ldst_labels = NULL; 3226 #endif 3227 #ifdef TCG_TARGET_NEED_POOL_LABELS 3228 s->pool_labels = NULL; 3229 #endif 3230 3231 num_insns = -1; 3232 QTAILQ_FOREACH(op, &s->ops, link) { 3233 TCGOpcode opc = op->opc; 3234 3235 #ifdef CONFIG_PROFILER 3236 atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 3237 #endif 3238 3239 switch (opc) { 3240 case INDEX_op_mov_i32: 3241 case INDEX_op_mov_i64: 3242 tcg_reg_alloc_mov(s, op); 3243 break; 3244 case INDEX_op_movi_i32: 3245 case INDEX_op_movi_i64: 3246 tcg_reg_alloc_movi(s, op); 3247 break; 3248 case INDEX_op_insn_start: 3249 if (num_insns >= 0) { 3250 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 3251 } 3252 num_insns++; 3253 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 3254 target_ulong a; 3255 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 3256 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 3257 #else 3258 a = op->args[i]; 3259 #endif 3260 s->gen_insn_data[num_insns][i] = a; 3261 } 3262 break; 3263 case INDEX_op_discard: 3264 temp_dead(s, arg_temp(op->args[0])); 3265 break; 3266 case INDEX_op_set_label: 3267 tcg_reg_alloc_bb_end(s, s->reserved_regs); 3268 tcg_out_label(s, arg_label(op->args[0]), s->code_ptr); 3269 break; 3270 case INDEX_op_call: 3271 tcg_reg_alloc_call(s, op); 3272 break; 3273 default: 3274 /* Sanity check that we've not introduced any unhandled opcodes. */ 3275 tcg_debug_assert(tcg_op_supported(opc)); 3276 /* Note: in order to speed up the code, it would be much 3277 faster to have specialized register allocator functions for 3278 some common argument patterns */ 3279 tcg_reg_alloc_op(s, op); 3280 break; 3281 } 3282 #ifdef CONFIG_DEBUG_TCG 3283 check_regs(s); 3284 #endif 3285 /* Test for (pending) buffer overflow. The assumption is that any 3286 one operation beginning below the high water mark cannot overrun 3287 the buffer completely. Thus we can test for overflow after 3288 generating code without having to check during generation. */ 3289 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 3290 return -1; 3291 } 3292 } 3293 tcg_debug_assert(num_insns >= 0); 3294 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 3295 3296 /* Generate TB finalization at the end of block */ 3297 #ifdef TCG_TARGET_NEED_LDST_LABELS 3298 if (!tcg_out_ldst_finalize(s)) { 3299 return -1; 3300 } 3301 #endif 3302 #ifdef TCG_TARGET_NEED_POOL_LABELS 3303 if (!tcg_out_pool_finalize(s)) { 3304 return -1; 3305 } 3306 #endif 3307 3308 /* flush instruction cache */ 3309 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); 3310 3311 return tcg_current_code_size(s); 3312 } 3313 3314 #ifdef CONFIG_PROFILER 3315 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 3316 { 3317 TCGProfile prof = {}; 3318 const TCGProfile *s; 3319 int64_t tb_count; 3320 int64_t tb_div_count; 3321 int64_t tot; 3322 3323 tcg_profile_snapshot_counters(&prof); 3324 s = &prof; 3325 tb_count = s->tb_count; 3326 tb_div_count = tb_count ? tb_count : 1; 3327 tot = s->interm_time + s->code_time; 3328 3329 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 3330 tot, tot / 2.4e9); 3331 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 3332 tb_count, s->tb_count1 - tb_count, 3333 (double)(s->tb_count1 - s->tb_count) 3334 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 3335 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", 3336 (double)s->op_count / tb_div_count, s->op_count_max); 3337 cpu_fprintf(f, "deleted ops/TB %0.2f\n", 3338 (double)s->del_op_count / tb_div_count); 3339 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", 3340 (double)s->temp_count / tb_div_count, s->temp_count_max); 3341 cpu_fprintf(f, "avg host code/TB %0.1f\n", 3342 (double)s->code_out_len / tb_div_count); 3343 cpu_fprintf(f, "avg search data/TB %0.1f\n", 3344 (double)s->search_out_len / tb_div_count); 3345 3346 cpu_fprintf(f, "cycles/op %0.1f\n", 3347 s->op_count ? (double)tot / s->op_count : 0); 3348 cpu_fprintf(f, "cycles/in byte %0.1f\n", 3349 s->code_in_len ? (double)tot / s->code_in_len : 0); 3350 cpu_fprintf(f, "cycles/out byte %0.1f\n", 3351 s->code_out_len ? (double)tot / s->code_out_len : 0); 3352 cpu_fprintf(f, "cycles/search byte %0.1f\n", 3353 s->search_out_len ? (double)tot / s->search_out_len : 0); 3354 if (tot == 0) { 3355 tot = 1; 3356 } 3357 cpu_fprintf(f, " gen_interm time %0.1f%%\n", 3358 (double)s->interm_time / tot * 100.0); 3359 cpu_fprintf(f, " gen_code time %0.1f%%\n", 3360 (double)s->code_time / tot * 100.0); 3361 cpu_fprintf(f, "optim./code time %0.1f%%\n", 3362 (double)s->opt_time / (s->code_time ? s->code_time : 1) 3363 * 100.0); 3364 cpu_fprintf(f, "liveness/code time %0.1f%%\n", 3365 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 3366 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", 3367 s->restore_count); 3368 cpu_fprintf(f, " avg cycles %0.1f\n", 3369 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 3370 } 3371 #else 3372 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 3373 { 3374 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 3375 } 3376 #endif 3377 3378 #ifdef ELF_HOST_MACHINE 3379 /* In order to use this feature, the backend needs to do three things: 3380 3381 (1) Define ELF_HOST_MACHINE to indicate both what value to 3382 put into the ELF image and to indicate support for the feature. 3383 3384 (2) Define tcg_register_jit. This should create a buffer containing 3385 the contents of a .debug_frame section that describes the post- 3386 prologue unwind info for the tcg machine. 3387 3388 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 3389 */ 3390 3391 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 3392 typedef enum { 3393 JIT_NOACTION = 0, 3394 JIT_REGISTER_FN, 3395 JIT_UNREGISTER_FN 3396 } jit_actions_t; 3397 3398 struct jit_code_entry { 3399 struct jit_code_entry *next_entry; 3400 struct jit_code_entry *prev_entry; 3401 const void *symfile_addr; 3402 uint64_t symfile_size; 3403 }; 3404 3405 struct jit_descriptor { 3406 uint32_t version; 3407 uint32_t action_flag; 3408 struct jit_code_entry *relevant_entry; 3409 struct jit_code_entry *first_entry; 3410 }; 3411 3412 void __jit_debug_register_code(void) __attribute__((noinline)); 3413 void __jit_debug_register_code(void) 3414 { 3415 asm(""); 3416 } 3417 3418 /* Must statically initialize the version, because GDB may check 3419 the version before we can set it. */ 3420 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 3421 3422 /* End GDB interface. */ 3423 3424 static int find_string(const char *strtab, const char *str) 3425 { 3426 const char *p = strtab + 1; 3427 3428 while (1) { 3429 if (strcmp(p, str) == 0) { 3430 return p - strtab; 3431 } 3432 p += strlen(p) + 1; 3433 } 3434 } 3435 3436 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, 3437 const void *debug_frame, 3438 size_t debug_frame_size) 3439 { 3440 struct __attribute__((packed)) DebugInfo { 3441 uint32_t len; 3442 uint16_t version; 3443 uint32_t abbrev; 3444 uint8_t ptr_size; 3445 uint8_t cu_die; 3446 uint16_t cu_lang; 3447 uintptr_t cu_low_pc; 3448 uintptr_t cu_high_pc; 3449 uint8_t fn_die; 3450 char fn_name[16]; 3451 uintptr_t fn_low_pc; 3452 uintptr_t fn_high_pc; 3453 uint8_t cu_eoc; 3454 }; 3455 3456 struct ElfImage { 3457 ElfW(Ehdr) ehdr; 3458 ElfW(Phdr) phdr; 3459 ElfW(Shdr) shdr[7]; 3460 ElfW(Sym) sym[2]; 3461 struct DebugInfo di; 3462 uint8_t da[24]; 3463 char str[80]; 3464 }; 3465 3466 struct ElfImage *img; 3467 3468 static const struct ElfImage img_template = { 3469 .ehdr = { 3470 .e_ident[EI_MAG0] = ELFMAG0, 3471 .e_ident[EI_MAG1] = ELFMAG1, 3472 .e_ident[EI_MAG2] = ELFMAG2, 3473 .e_ident[EI_MAG3] = ELFMAG3, 3474 .e_ident[EI_CLASS] = ELF_CLASS, 3475 .e_ident[EI_DATA] = ELF_DATA, 3476 .e_ident[EI_VERSION] = EV_CURRENT, 3477 .e_type = ET_EXEC, 3478 .e_machine = ELF_HOST_MACHINE, 3479 .e_version = EV_CURRENT, 3480 .e_phoff = offsetof(struct ElfImage, phdr), 3481 .e_shoff = offsetof(struct ElfImage, shdr), 3482 .e_ehsize = sizeof(ElfW(Shdr)), 3483 .e_phentsize = sizeof(ElfW(Phdr)), 3484 .e_phnum = 1, 3485 .e_shentsize = sizeof(ElfW(Shdr)), 3486 .e_shnum = ARRAY_SIZE(img->shdr), 3487 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 3488 #ifdef ELF_HOST_FLAGS 3489 .e_flags = ELF_HOST_FLAGS, 3490 #endif 3491 #ifdef ELF_OSABI 3492 .e_ident[EI_OSABI] = ELF_OSABI, 3493 #endif 3494 }, 3495 .phdr = { 3496 .p_type = PT_LOAD, 3497 .p_flags = PF_X, 3498 }, 3499 .shdr = { 3500 [0] = { .sh_type = SHT_NULL }, 3501 /* Trick: The contents of code_gen_buffer are not present in 3502 this fake ELF file; that got allocated elsewhere. Therefore 3503 we mark .text as SHT_NOBITS (similar to .bss) so that readers 3504 will not look for contents. We can record any address. */ 3505 [1] = { /* .text */ 3506 .sh_type = SHT_NOBITS, 3507 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 3508 }, 3509 [2] = { /* .debug_info */ 3510 .sh_type = SHT_PROGBITS, 3511 .sh_offset = offsetof(struct ElfImage, di), 3512 .sh_size = sizeof(struct DebugInfo), 3513 }, 3514 [3] = { /* .debug_abbrev */ 3515 .sh_type = SHT_PROGBITS, 3516 .sh_offset = offsetof(struct ElfImage, da), 3517 .sh_size = sizeof(img->da), 3518 }, 3519 [4] = { /* .debug_frame */ 3520 .sh_type = SHT_PROGBITS, 3521 .sh_offset = sizeof(struct ElfImage), 3522 }, 3523 [5] = { /* .symtab */ 3524 .sh_type = SHT_SYMTAB, 3525 .sh_offset = offsetof(struct ElfImage, sym), 3526 .sh_size = sizeof(img->sym), 3527 .sh_info = 1, 3528 .sh_link = ARRAY_SIZE(img->shdr) - 1, 3529 .sh_entsize = sizeof(ElfW(Sym)), 3530 }, 3531 [6] = { /* .strtab */ 3532 .sh_type = SHT_STRTAB, 3533 .sh_offset = offsetof(struct ElfImage, str), 3534 .sh_size = sizeof(img->str), 3535 } 3536 }, 3537 .sym = { 3538 [1] = { /* code_gen_buffer */ 3539 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 3540 .st_shndx = 1, 3541 } 3542 }, 3543 .di = { 3544 .len = sizeof(struct DebugInfo) - 4, 3545 .version = 2, 3546 .ptr_size = sizeof(void *), 3547 .cu_die = 1, 3548 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 3549 .fn_die = 2, 3550 .fn_name = "code_gen_buffer" 3551 }, 3552 .da = { 3553 1, /* abbrev number (the cu) */ 3554 0x11, 1, /* DW_TAG_compile_unit, has children */ 3555 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 3556 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 3557 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 3558 0, 0, /* end of abbrev */ 3559 2, /* abbrev number (the fn) */ 3560 0x2e, 0, /* DW_TAG_subprogram, no children */ 3561 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 3562 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 3563 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 3564 0, 0, /* end of abbrev */ 3565 0 /* no more abbrev */ 3566 }, 3567 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 3568 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 3569 }; 3570 3571 /* We only need a single jit entry; statically allocate it. */ 3572 static struct jit_code_entry one_entry; 3573 3574 uintptr_t buf = (uintptr_t)buf_ptr; 3575 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 3576 DebugFrameHeader *dfh; 3577 3578 img = g_malloc(img_size); 3579 *img = img_template; 3580 3581 img->phdr.p_vaddr = buf; 3582 img->phdr.p_paddr = buf; 3583 img->phdr.p_memsz = buf_size; 3584 3585 img->shdr[1].sh_name = find_string(img->str, ".text"); 3586 img->shdr[1].sh_addr = buf; 3587 img->shdr[1].sh_size = buf_size; 3588 3589 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 3590 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 3591 3592 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 3593 img->shdr[4].sh_size = debug_frame_size; 3594 3595 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 3596 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 3597 3598 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 3599 img->sym[1].st_value = buf; 3600 img->sym[1].st_size = buf_size; 3601 3602 img->di.cu_low_pc = buf; 3603 img->di.cu_high_pc = buf + buf_size; 3604 img->di.fn_low_pc = buf; 3605 img->di.fn_high_pc = buf + buf_size; 3606 3607 dfh = (DebugFrameHeader *)(img + 1); 3608 memcpy(dfh, debug_frame, debug_frame_size); 3609 dfh->fde.func_start = buf; 3610 dfh->fde.func_len = buf_size; 3611 3612 #ifdef DEBUG_JIT 3613 /* Enable this block to be able to debug the ELF image file creation. 3614 One can use readelf, objdump, or other inspection utilities. */ 3615 { 3616 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 3617 if (f) { 3618 if (fwrite(img, img_size, 1, f) != img_size) { 3619 /* Avoid stupid unused return value warning for fwrite. */ 3620 } 3621 fclose(f); 3622 } 3623 } 3624 #endif 3625 3626 one_entry.symfile_addr = img; 3627 one_entry.symfile_size = img_size; 3628 3629 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 3630 __jit_debug_descriptor.relevant_entry = &one_entry; 3631 __jit_debug_descriptor.first_entry = &one_entry; 3632 __jit_debug_register_code(); 3633 } 3634 #else 3635 /* No support for the feature. Provide the entry point expected by exec.c, 3636 and implement the internal function we declared earlier. */ 3637 3638 static void tcg_register_jit_int(void *buf, size_t size, 3639 const void *debug_frame, 3640 size_t debug_frame_size) 3641 { 3642 } 3643 3644 void tcg_register_jit(void *buf, size_t buf_size) 3645 { 3646 } 3647 #endif /* ELF_HOST_MACHINE */ 3648