1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/cutils.h" 34 #include "qemu/host-utils.h" 35 #include "qemu/timer.h" 36 37 /* Note: the long term plan is to reduce the dependencies on the QEMU 38 CPU definitions. Currently they are used for qemu_ld/st 39 instructions */ 40 #define NO_CPU_IO_DEFS 41 #include "cpu.h" 42 43 #include "exec/cpu-common.h" 44 #include "exec/exec-all.h" 45 46 #include "tcg-op.h" 47 48 #if UINTPTR_MAX == UINT32_MAX 49 # define ELF_CLASS ELFCLASS32 50 #else 51 # define ELF_CLASS ELFCLASS64 52 #endif 53 #ifdef HOST_WORDS_BIGENDIAN 54 # define ELF_DATA ELFDATA2MSB 55 #else 56 # define ELF_DATA ELFDATA2LSB 57 #endif 58 59 #include "elf.h" 60 #include "exec/log.h" 61 #include "sysemu/sysemu.h" 62 63 /* Forward declarations for functions declared in tcg-target.inc.c and 64 used here. */ 65 static void tcg_target_init(TCGContext *s); 66 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static void patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 71 /* The CIE and FDE header definitions will be common to all hosts. */ 72 typedef struct { 73 uint32_t len __attribute__((aligned((sizeof(void *))))); 74 uint32_t id; 75 uint8_t version; 76 char augmentation[1]; 77 uint8_t code_align; 78 uint8_t data_align; 79 uint8_t return_column; 80 } DebugFrameCIE; 81 82 typedef struct QEMU_PACKED { 83 uint32_t len __attribute__((aligned((sizeof(void *))))); 84 uint32_t cie_offset; 85 uintptr_t func_start; 86 uintptr_t func_len; 87 } DebugFrameFDEHeader; 88 89 typedef struct QEMU_PACKED { 90 DebugFrameCIE cie; 91 DebugFrameFDEHeader fde; 92 } DebugFrameHeader; 93 94 static void tcg_register_jit_int(void *buf, size_t size, 95 const void *debug_frame, 96 size_t debug_frame_size) 97 __attribute__((unused)); 98 99 /* Forward declarations for functions declared and used in tcg-target.inc.c. */ 100 static const char *target_parse_constraint(TCGArgConstraint *ct, 101 const char *ct_str, TCGType type); 102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 103 intptr_t arg2); 104 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 105 static void tcg_out_movi(TCGContext *s, TCGType type, 106 TCGReg ret, tcg_target_long arg); 107 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 108 const int *const_args); 109 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 110 intptr_t arg2); 111 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 112 TCGReg base, intptr_t ofs); 113 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); 114 static int tcg_target_const_match(tcg_target_long val, TCGType type, 115 const TCGArgConstraint *arg_ct); 116 #ifdef TCG_TARGET_NEED_LDST_LABELS 117 static bool tcg_out_ldst_finalize(TCGContext *s); 118 #endif 119 120 #define TCG_HIGHWATER 1024 121 122 static TCGContext **tcg_ctxs; 123 static unsigned int n_tcg_ctxs; 124 TCGv_env cpu_env = 0; 125 126 /* 127 * We divide code_gen_buffer into equally-sized "regions" that TCG threads 128 * dynamically allocate from as demand dictates. Given appropriate region 129 * sizing, this minimizes flushes even when some TCG threads generate a lot 130 * more code than others. 131 */ 132 struct tcg_region_state { 133 QemuMutex lock; 134 135 /* fields set at init time */ 136 void *start; 137 void *start_aligned; 138 void *end; 139 size_t n; 140 size_t size; /* size of one region */ 141 size_t stride; /* .size + guard size */ 142 143 /* fields protected by the lock */ 144 size_t current; /* current region index */ 145 size_t agg_size_full; /* aggregate size of full regions */ 146 }; 147 148 static struct tcg_region_state region; 149 150 static TCGRegSet tcg_target_available_regs[2]; 151 static TCGRegSet tcg_target_call_clobber_regs; 152 153 #if TCG_TARGET_INSN_UNIT_SIZE == 1 154 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 155 { 156 *s->code_ptr++ = v; 157 } 158 159 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 160 uint8_t v) 161 { 162 *p = v; 163 } 164 #endif 165 166 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 167 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 168 { 169 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 170 *s->code_ptr++ = v; 171 } else { 172 tcg_insn_unit *p = s->code_ptr; 173 memcpy(p, &v, sizeof(v)); 174 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 175 } 176 } 177 178 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 179 uint16_t v) 180 { 181 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 182 *p = v; 183 } else { 184 memcpy(p, &v, sizeof(v)); 185 } 186 } 187 #endif 188 189 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 190 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 191 { 192 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 193 *s->code_ptr++ = v; 194 } else { 195 tcg_insn_unit *p = s->code_ptr; 196 memcpy(p, &v, sizeof(v)); 197 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 198 } 199 } 200 201 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 202 uint32_t v) 203 { 204 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 205 *p = v; 206 } else { 207 memcpy(p, &v, sizeof(v)); 208 } 209 } 210 #endif 211 212 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 213 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 214 { 215 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 216 *s->code_ptr++ = v; 217 } else { 218 tcg_insn_unit *p = s->code_ptr; 219 memcpy(p, &v, sizeof(v)); 220 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 221 } 222 } 223 224 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 225 uint64_t v) 226 { 227 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 228 *p = v; 229 } else { 230 memcpy(p, &v, sizeof(v)); 231 } 232 } 233 #endif 234 235 /* label relocation processing */ 236 237 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 238 TCGLabel *l, intptr_t addend) 239 { 240 TCGRelocation *r; 241 242 if (l->has_value) { 243 /* FIXME: This may break relocations on RISC targets that 244 modify instruction fields in place. The caller may not have 245 written the initial value. */ 246 patch_reloc(code_ptr, type, l->u.value, addend); 247 } else { 248 /* add a new relocation entry */ 249 r = tcg_malloc(sizeof(TCGRelocation)); 250 r->type = type; 251 r->ptr = code_ptr; 252 r->addend = addend; 253 r->next = l->u.first_reloc; 254 l->u.first_reloc = r; 255 } 256 } 257 258 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) 259 { 260 intptr_t value = (intptr_t)ptr; 261 TCGRelocation *r; 262 263 tcg_debug_assert(!l->has_value); 264 265 for (r = l->u.first_reloc; r != NULL; r = r->next) { 266 patch_reloc(r->ptr, r->type, value, r->addend); 267 } 268 269 l->has_value = 1; 270 l->u.value_ptr = ptr; 271 } 272 273 TCGLabel *gen_new_label(void) 274 { 275 TCGContext *s = tcg_ctx; 276 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 277 278 *l = (TCGLabel){ 279 .id = s->nb_labels++ 280 }; 281 282 return l; 283 } 284 285 #include "tcg-target.inc.c" 286 287 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 288 { 289 void *start, *end; 290 291 start = region.start_aligned + curr_region * region.stride; 292 end = start + region.size; 293 294 if (curr_region == 0) { 295 start = region.start; 296 } 297 if (curr_region == region.n - 1) { 298 end = region.end; 299 } 300 301 *pstart = start; 302 *pend = end; 303 } 304 305 static void tcg_region_assign(TCGContext *s, size_t curr_region) 306 { 307 void *start, *end; 308 309 tcg_region_bounds(curr_region, &start, &end); 310 311 s->code_gen_buffer = start; 312 s->code_gen_ptr = start; 313 s->code_gen_buffer_size = end - start; 314 s->code_gen_highwater = end - TCG_HIGHWATER; 315 } 316 317 static bool tcg_region_alloc__locked(TCGContext *s) 318 { 319 if (region.current == region.n) { 320 return true; 321 } 322 tcg_region_assign(s, region.current); 323 region.current++; 324 return false; 325 } 326 327 /* 328 * Request a new region once the one in use has filled up. 329 * Returns true on error. 330 */ 331 static bool tcg_region_alloc(TCGContext *s) 332 { 333 bool err; 334 /* read the region size now; alloc__locked will overwrite it on success */ 335 size_t size_full = s->code_gen_buffer_size; 336 337 qemu_mutex_lock(®ion.lock); 338 err = tcg_region_alloc__locked(s); 339 if (!err) { 340 region.agg_size_full += size_full - TCG_HIGHWATER; 341 } 342 qemu_mutex_unlock(®ion.lock); 343 return err; 344 } 345 346 /* 347 * Perform a context's first region allocation. 348 * This function does _not_ increment region.agg_size_full. 349 */ 350 static inline bool tcg_region_initial_alloc__locked(TCGContext *s) 351 { 352 return tcg_region_alloc__locked(s); 353 } 354 355 /* Call from a safe-work context */ 356 void tcg_region_reset_all(void) 357 { 358 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 359 unsigned int i; 360 361 qemu_mutex_lock(®ion.lock); 362 region.current = 0; 363 region.agg_size_full = 0; 364 365 for (i = 0; i < n_ctxs; i++) { 366 TCGContext *s = atomic_read(&tcg_ctxs[i]); 367 bool err = tcg_region_initial_alloc__locked(s); 368 369 g_assert(!err); 370 } 371 qemu_mutex_unlock(®ion.lock); 372 } 373 374 #ifdef CONFIG_USER_ONLY 375 static size_t tcg_n_regions(void) 376 { 377 return 1; 378 } 379 #else 380 /* 381 * It is likely that some vCPUs will translate more code than others, so we 382 * first try to set more regions than max_cpus, with those regions being of 383 * reasonable size. If that's not possible we make do by evenly dividing 384 * the code_gen_buffer among the vCPUs. 385 */ 386 static size_t tcg_n_regions(void) 387 { 388 size_t i; 389 390 /* Use a single region if all we have is one vCPU thread */ 391 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 392 return 1; 393 } 394 395 /* Try to have more regions than max_cpus, with each region being >= 2 MB */ 396 for (i = 8; i > 0; i--) { 397 size_t regions_per_thread = i; 398 size_t region_size; 399 400 region_size = tcg_init_ctx.code_gen_buffer_size; 401 region_size /= max_cpus * regions_per_thread; 402 403 if (region_size >= 2 * 1024u * 1024) { 404 return max_cpus * regions_per_thread; 405 } 406 } 407 /* If we can't, then just allocate one region per vCPU thread */ 408 return max_cpus; 409 } 410 #endif 411 412 /* 413 * Initializes region partitioning. 414 * 415 * Called at init time from the parent thread (i.e. the one calling 416 * tcg_context_init), after the target's TCG globals have been set. 417 * 418 * Region partitioning works by splitting code_gen_buffer into separate regions, 419 * and then assigning regions to TCG threads so that the threads can translate 420 * code in parallel without synchronization. 421 * 422 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 423 * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 424 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 425 * must have been parsed before calling this function, since it calls 426 * qemu_tcg_mttcg_enabled(). 427 * 428 * In user-mode we use a single region. Having multiple regions in user-mode 429 * is not supported, because the number of vCPU threads (recall that each thread 430 * spawned by the guest corresponds to a vCPU thread) is only bounded by the 431 * OS, and usually this number is huge (tens of thousands is not uncommon). 432 * Thus, given this large bound on the number of vCPU threads and the fact 433 * that code_gen_buffer is allocated at compile-time, we cannot guarantee 434 * that the availability of at least one region per vCPU thread. 435 * 436 * However, this user-mode limitation is unlikely to be a significant problem 437 * in practice. Multi-threaded guests share most if not all of their translated 438 * code, which makes parallel code generation less appealing than in softmmu. 439 */ 440 void tcg_region_init(void) 441 { 442 void *buf = tcg_init_ctx.code_gen_buffer; 443 void *aligned; 444 size_t size = tcg_init_ctx.code_gen_buffer_size; 445 size_t page_size = qemu_real_host_page_size; 446 size_t region_size; 447 size_t n_regions; 448 size_t i; 449 450 n_regions = tcg_n_regions(); 451 452 /* The first region will be 'aligned - buf' bytes larger than the others */ 453 aligned = QEMU_ALIGN_PTR_UP(buf, page_size); 454 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); 455 /* 456 * Make region_size a multiple of page_size, using aligned as the start. 457 * As a result of this we might end up with a few extra pages at the end of 458 * the buffer; we will assign those to the last region. 459 */ 460 region_size = (size - (aligned - buf)) / n_regions; 461 region_size = QEMU_ALIGN_DOWN(region_size, page_size); 462 463 /* A region must have at least 2 pages; one code, one guard */ 464 g_assert(region_size >= 2 * page_size); 465 466 /* init the region struct */ 467 qemu_mutex_init(®ion.lock); 468 region.n = n_regions; 469 region.size = region_size - page_size; 470 region.stride = region_size; 471 region.start = buf; 472 region.start_aligned = aligned; 473 /* page-align the end, since its last page will be a guard page */ 474 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); 475 /* account for that last guard page */ 476 region.end -= page_size; 477 478 /* set guard pages */ 479 for (i = 0; i < region.n; i++) { 480 void *start, *end; 481 int rc; 482 483 tcg_region_bounds(i, &start, &end); 484 rc = qemu_mprotect_none(end, page_size); 485 g_assert(!rc); 486 } 487 488 /* In user-mode we support only one ctx, so do the initial allocation now */ 489 #ifdef CONFIG_USER_ONLY 490 { 491 bool err = tcg_region_initial_alloc__locked(tcg_ctx); 492 493 g_assert(!err); 494 } 495 #endif 496 } 497 498 /* 499 * All TCG threads except the parent (i.e. the one that called tcg_context_init 500 * and registered the target's TCG globals) must register with this function 501 * before initiating translation. 502 * 503 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 504 * of tcg_region_init() for the reasoning behind this. 505 * 506 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 507 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 508 * is not used anymore for translation once this function is called. 509 * 510 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 511 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 512 */ 513 #ifdef CONFIG_USER_ONLY 514 void tcg_register_thread(void) 515 { 516 tcg_ctx = &tcg_init_ctx; 517 } 518 #else 519 void tcg_register_thread(void) 520 { 521 TCGContext *s = g_malloc(sizeof(*s)); 522 unsigned int i, n; 523 bool err; 524 525 *s = tcg_init_ctx; 526 527 /* Relink mem_base. */ 528 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 529 if (tcg_init_ctx.temps[i].mem_base) { 530 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 531 tcg_debug_assert(b >= 0 && b < n); 532 s->temps[i].mem_base = &s->temps[b]; 533 } 534 } 535 536 /* Claim an entry in tcg_ctxs */ 537 n = atomic_fetch_inc(&n_tcg_ctxs); 538 g_assert(n < max_cpus); 539 atomic_set(&tcg_ctxs[n], s); 540 541 tcg_ctx = s; 542 qemu_mutex_lock(®ion.lock); 543 err = tcg_region_initial_alloc__locked(tcg_ctx); 544 g_assert(!err); 545 qemu_mutex_unlock(®ion.lock); 546 } 547 #endif /* !CONFIG_USER_ONLY */ 548 549 /* 550 * Returns the size (in bytes) of all translated code (i.e. from all regions) 551 * currently in the cache. 552 * See also: tcg_code_capacity() 553 * Do not confuse with tcg_current_code_size(); that one applies to a single 554 * TCG context. 555 */ 556 size_t tcg_code_size(void) 557 { 558 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 559 unsigned int i; 560 size_t total; 561 562 qemu_mutex_lock(®ion.lock); 563 total = region.agg_size_full; 564 for (i = 0; i < n_ctxs; i++) { 565 const TCGContext *s = atomic_read(&tcg_ctxs[i]); 566 size_t size; 567 568 size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 569 g_assert(size <= s->code_gen_buffer_size); 570 total += size; 571 } 572 qemu_mutex_unlock(®ion.lock); 573 return total; 574 } 575 576 /* 577 * Returns the code capacity (in bytes) of the entire cache, i.e. including all 578 * regions. 579 * See also: tcg_code_size() 580 */ 581 size_t tcg_code_capacity(void) 582 { 583 size_t guard_size, capacity; 584 585 /* no need for synchronization; these variables are set at init time */ 586 guard_size = region.stride - region.size; 587 capacity = region.end + guard_size - region.start; 588 capacity -= region.n * (guard_size + TCG_HIGHWATER); 589 return capacity; 590 } 591 592 /* pool based memory allocation */ 593 void *tcg_malloc_internal(TCGContext *s, int size) 594 { 595 TCGPool *p; 596 int pool_size; 597 598 if (size > TCG_POOL_CHUNK_SIZE) { 599 /* big malloc: insert a new pool (XXX: could optimize) */ 600 p = g_malloc(sizeof(TCGPool) + size); 601 p->size = size; 602 p->next = s->pool_first_large; 603 s->pool_first_large = p; 604 return p->data; 605 } else { 606 p = s->pool_current; 607 if (!p) { 608 p = s->pool_first; 609 if (!p) 610 goto new_pool; 611 } else { 612 if (!p->next) { 613 new_pool: 614 pool_size = TCG_POOL_CHUNK_SIZE; 615 p = g_malloc(sizeof(TCGPool) + pool_size); 616 p->size = pool_size; 617 p->next = NULL; 618 if (s->pool_current) 619 s->pool_current->next = p; 620 else 621 s->pool_first = p; 622 } else { 623 p = p->next; 624 } 625 } 626 } 627 s->pool_current = p; 628 s->pool_cur = p->data + size; 629 s->pool_end = p->data + p->size; 630 return p->data; 631 } 632 633 void tcg_pool_reset(TCGContext *s) 634 { 635 TCGPool *p, *t; 636 for (p = s->pool_first_large; p; p = t) { 637 t = p->next; 638 g_free(p); 639 } 640 s->pool_first_large = NULL; 641 s->pool_cur = s->pool_end = NULL; 642 s->pool_current = NULL; 643 } 644 645 typedef struct TCGHelperInfo { 646 void *func; 647 const char *name; 648 unsigned flags; 649 unsigned sizemask; 650 } TCGHelperInfo; 651 652 #include "exec/helper-proto.h" 653 654 static const TCGHelperInfo all_helpers[] = { 655 #include "exec/helper-tcg.h" 656 }; 657 static GHashTable *helper_table; 658 659 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 660 static void process_op_defs(TCGContext *s); 661 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 662 TCGReg reg, const char *name); 663 664 void tcg_context_init(TCGContext *s) 665 { 666 int op, total_args, n, i; 667 TCGOpDef *def; 668 TCGArgConstraint *args_ct; 669 int *sorted_args; 670 TCGTemp *ts; 671 672 memset(s, 0, sizeof(*s)); 673 s->nb_globals = 0; 674 675 /* Count total number of arguments and allocate the corresponding 676 space */ 677 total_args = 0; 678 for(op = 0; op < NB_OPS; op++) { 679 def = &tcg_op_defs[op]; 680 n = def->nb_iargs + def->nb_oargs; 681 total_args += n; 682 } 683 684 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); 685 sorted_args = g_malloc(sizeof(int) * total_args); 686 687 for(op = 0; op < NB_OPS; op++) { 688 def = &tcg_op_defs[op]; 689 def->args_ct = args_ct; 690 def->sorted_args = sorted_args; 691 n = def->nb_iargs + def->nb_oargs; 692 sorted_args += n; 693 args_ct += n; 694 } 695 696 /* Register helpers. */ 697 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 698 helper_table = g_hash_table_new(NULL, NULL); 699 700 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 701 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 702 (gpointer)&all_helpers[i]); 703 } 704 705 tcg_target_init(s); 706 process_op_defs(s); 707 708 /* Reverse the order of the saved registers, assuming they're all at 709 the start of tcg_target_reg_alloc_order. */ 710 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 711 int r = tcg_target_reg_alloc_order[n]; 712 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 713 break; 714 } 715 } 716 for (i = 0; i < n; ++i) { 717 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 718 } 719 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 720 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 721 } 722 723 tcg_ctx = s; 724 /* 725 * In user-mode we simply share the init context among threads, since we 726 * use a single region. See the documentation tcg_region_init() for the 727 * reasoning behind this. 728 * In softmmu we will have at most max_cpus TCG threads. 729 */ 730 #ifdef CONFIG_USER_ONLY 731 tcg_ctxs = &tcg_ctx; 732 n_tcg_ctxs = 1; 733 #else 734 tcg_ctxs = g_new(TCGContext *, max_cpus); 735 #endif 736 737 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 738 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 739 cpu_env = temp_tcgv_ptr(ts); 740 } 741 742 /* 743 * Allocate TBs right before their corresponding translated code, making 744 * sure that TBs and code are on different cache lines. 745 */ 746 TranslationBlock *tcg_tb_alloc(TCGContext *s) 747 { 748 uintptr_t align = qemu_icache_linesize; 749 TranslationBlock *tb; 750 void *next; 751 752 retry: 753 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 754 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 755 756 if (unlikely(next > s->code_gen_highwater)) { 757 if (tcg_region_alloc(s)) { 758 return NULL; 759 } 760 goto retry; 761 } 762 atomic_set(&s->code_gen_ptr, next); 763 s->data_gen_ptr = NULL; 764 return tb; 765 } 766 767 void tcg_prologue_init(TCGContext *s) 768 { 769 size_t prologue_size, total_size; 770 void *buf0, *buf1; 771 772 /* Put the prologue at the beginning of code_gen_buffer. */ 773 buf0 = s->code_gen_buffer; 774 s->code_ptr = buf0; 775 s->code_buf = buf0; 776 s->code_gen_prologue = buf0; 777 778 /* Generate the prologue. */ 779 tcg_target_qemu_prologue(s); 780 buf1 = s->code_ptr; 781 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); 782 783 /* Deduct the prologue from the buffer. */ 784 prologue_size = tcg_current_code_size(s); 785 s->code_gen_ptr = buf1; 786 s->code_gen_buffer = buf1; 787 s->code_buf = buf1; 788 total_size = s->code_gen_buffer_size - prologue_size; 789 s->code_gen_buffer_size = total_size; 790 791 /* Compute a high-water mark, at which we voluntarily flush the buffer 792 and start over. The size here is arbitrary, significantly larger 793 than we expect the code generation for any one opcode to require. */ 794 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); 795 796 tcg_register_jit(s->code_gen_buffer, total_size); 797 798 #ifdef DEBUG_DISAS 799 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 800 qemu_log_lock(); 801 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 802 log_disas(buf0, prologue_size); 803 qemu_log("\n"); 804 qemu_log_flush(); 805 qemu_log_unlock(); 806 } 807 #endif 808 809 /* Assert that goto_ptr is implemented completely. */ 810 if (TCG_TARGET_HAS_goto_ptr) { 811 tcg_debug_assert(s->code_gen_epilogue != NULL); 812 } 813 } 814 815 void tcg_func_start(TCGContext *s) 816 { 817 tcg_pool_reset(s); 818 s->nb_temps = s->nb_globals; 819 820 /* No temps have been previously allocated for size or locality. */ 821 memset(s->free_temps, 0, sizeof(s->free_temps)); 822 823 s->nb_labels = 0; 824 s->current_frame_offset = s->frame_start; 825 826 #ifdef CONFIG_DEBUG_TCG 827 s->goto_tb_issue_mask = 0; 828 #endif 829 830 s->gen_op_buf[0].next = 1; 831 s->gen_op_buf[0].prev = 0; 832 s->gen_next_op_idx = 1; 833 } 834 835 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) 836 { 837 int n = s->nb_temps++; 838 tcg_debug_assert(n < TCG_MAX_TEMPS); 839 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 840 } 841 842 static inline TCGTemp *tcg_global_alloc(TCGContext *s) 843 { 844 TCGTemp *ts; 845 846 tcg_debug_assert(s->nb_globals == s->nb_temps); 847 s->nb_globals++; 848 ts = tcg_temp_alloc(s); 849 ts->temp_global = 1; 850 851 return ts; 852 } 853 854 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 855 TCGReg reg, const char *name) 856 { 857 TCGTemp *ts; 858 859 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 860 tcg_abort(); 861 } 862 863 ts = tcg_global_alloc(s); 864 ts->base_type = type; 865 ts->type = type; 866 ts->fixed_reg = 1; 867 ts->reg = reg; 868 ts->name = name; 869 tcg_regset_set_reg(s->reserved_regs, reg); 870 871 return ts; 872 } 873 874 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 875 { 876 s->frame_start = start; 877 s->frame_end = start + size; 878 s->frame_temp 879 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 880 } 881 882 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 883 intptr_t offset, const char *name) 884 { 885 TCGContext *s = tcg_ctx; 886 TCGTemp *base_ts = tcgv_ptr_temp(base); 887 TCGTemp *ts = tcg_global_alloc(s); 888 int indirect_reg = 0, bigendian = 0; 889 #ifdef HOST_WORDS_BIGENDIAN 890 bigendian = 1; 891 #endif 892 893 if (!base_ts->fixed_reg) { 894 /* We do not support double-indirect registers. */ 895 tcg_debug_assert(!base_ts->indirect_reg); 896 base_ts->indirect_base = 1; 897 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 898 ? 2 : 1); 899 indirect_reg = 1; 900 } 901 902 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 903 TCGTemp *ts2 = tcg_global_alloc(s); 904 char buf[64]; 905 906 ts->base_type = TCG_TYPE_I64; 907 ts->type = TCG_TYPE_I32; 908 ts->indirect_reg = indirect_reg; 909 ts->mem_allocated = 1; 910 ts->mem_base = base_ts; 911 ts->mem_offset = offset + bigendian * 4; 912 pstrcpy(buf, sizeof(buf), name); 913 pstrcat(buf, sizeof(buf), "_0"); 914 ts->name = strdup(buf); 915 916 tcg_debug_assert(ts2 == ts + 1); 917 ts2->base_type = TCG_TYPE_I64; 918 ts2->type = TCG_TYPE_I32; 919 ts2->indirect_reg = indirect_reg; 920 ts2->mem_allocated = 1; 921 ts2->mem_base = base_ts; 922 ts2->mem_offset = offset + (1 - bigendian) * 4; 923 pstrcpy(buf, sizeof(buf), name); 924 pstrcat(buf, sizeof(buf), "_1"); 925 ts2->name = strdup(buf); 926 } else { 927 ts->base_type = type; 928 ts->type = type; 929 ts->indirect_reg = indirect_reg; 930 ts->mem_allocated = 1; 931 ts->mem_base = base_ts; 932 ts->mem_offset = offset; 933 ts->name = name; 934 } 935 return ts; 936 } 937 938 static TCGTemp *tcg_temp_new_internal(TCGType type, int temp_local) 939 { 940 TCGContext *s = tcg_ctx; 941 TCGTemp *ts; 942 int idx, k; 943 944 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 945 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 946 if (idx < TCG_MAX_TEMPS) { 947 /* There is already an available temp with the right type. */ 948 clear_bit(idx, s->free_temps[k].l); 949 950 ts = &s->temps[idx]; 951 ts->temp_allocated = 1; 952 tcg_debug_assert(ts->base_type == type); 953 tcg_debug_assert(ts->temp_local == temp_local); 954 } else { 955 ts = tcg_temp_alloc(s); 956 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 957 TCGTemp *ts2 = tcg_temp_alloc(s); 958 959 ts->base_type = type; 960 ts->type = TCG_TYPE_I32; 961 ts->temp_allocated = 1; 962 ts->temp_local = temp_local; 963 964 tcg_debug_assert(ts2 == ts + 1); 965 ts2->base_type = TCG_TYPE_I64; 966 ts2->type = TCG_TYPE_I32; 967 ts2->temp_allocated = 1; 968 ts2->temp_local = temp_local; 969 } else { 970 ts->base_type = type; 971 ts->type = type; 972 ts->temp_allocated = 1; 973 ts->temp_local = temp_local; 974 } 975 } 976 977 #if defined(CONFIG_DEBUG_TCG) 978 s->temps_in_use++; 979 #endif 980 return ts; 981 } 982 983 TCGv_i32 tcg_temp_new_internal_i32(int temp_local) 984 { 985 TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, temp_local); 986 return temp_tcgv_i32(t); 987 } 988 989 TCGv_i64 tcg_temp_new_internal_i64(int temp_local) 990 { 991 TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, temp_local); 992 return temp_tcgv_i64(t); 993 } 994 995 static void tcg_temp_free_internal(TCGTemp *ts) 996 { 997 TCGContext *s = tcg_ctx; 998 int k, idx; 999 1000 #if defined(CONFIG_DEBUG_TCG) 1001 s->temps_in_use--; 1002 if (s->temps_in_use < 0) { 1003 fprintf(stderr, "More temporaries freed than allocated!\n"); 1004 } 1005 #endif 1006 1007 tcg_debug_assert(ts->temp_global == 0); 1008 tcg_debug_assert(ts->temp_allocated != 0); 1009 ts->temp_allocated = 0; 1010 1011 idx = temp_idx(ts); 1012 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); 1013 set_bit(idx, s->free_temps[k].l); 1014 } 1015 1016 void tcg_temp_free_i32(TCGv_i32 arg) 1017 { 1018 tcg_temp_free_internal(tcgv_i32_temp(arg)); 1019 } 1020 1021 void tcg_temp_free_i64(TCGv_i64 arg) 1022 { 1023 tcg_temp_free_internal(tcgv_i64_temp(arg)); 1024 } 1025 1026 TCGv_i32 tcg_const_i32(int32_t val) 1027 { 1028 TCGv_i32 t0; 1029 t0 = tcg_temp_new_i32(); 1030 tcg_gen_movi_i32(t0, val); 1031 return t0; 1032 } 1033 1034 TCGv_i64 tcg_const_i64(int64_t val) 1035 { 1036 TCGv_i64 t0; 1037 t0 = tcg_temp_new_i64(); 1038 tcg_gen_movi_i64(t0, val); 1039 return t0; 1040 } 1041 1042 TCGv_i32 tcg_const_local_i32(int32_t val) 1043 { 1044 TCGv_i32 t0; 1045 t0 = tcg_temp_local_new_i32(); 1046 tcg_gen_movi_i32(t0, val); 1047 return t0; 1048 } 1049 1050 TCGv_i64 tcg_const_local_i64(int64_t val) 1051 { 1052 TCGv_i64 t0; 1053 t0 = tcg_temp_local_new_i64(); 1054 tcg_gen_movi_i64(t0, val); 1055 return t0; 1056 } 1057 1058 #if defined(CONFIG_DEBUG_TCG) 1059 void tcg_clear_temp_count(void) 1060 { 1061 TCGContext *s = tcg_ctx; 1062 s->temps_in_use = 0; 1063 } 1064 1065 int tcg_check_temp_count(void) 1066 { 1067 TCGContext *s = tcg_ctx; 1068 if (s->temps_in_use) { 1069 /* Clear the count so that we don't give another 1070 * warning immediately next time around. 1071 */ 1072 s->temps_in_use = 0; 1073 return 1; 1074 } 1075 return 0; 1076 } 1077 #endif 1078 1079 /* Return true if OP may appear in the opcode stream. 1080 Test the runtime variable that controls each opcode. */ 1081 bool tcg_op_supported(TCGOpcode op) 1082 { 1083 switch (op) { 1084 case INDEX_op_discard: 1085 case INDEX_op_set_label: 1086 case INDEX_op_call: 1087 case INDEX_op_br: 1088 case INDEX_op_mb: 1089 case INDEX_op_insn_start: 1090 case INDEX_op_exit_tb: 1091 case INDEX_op_goto_tb: 1092 case INDEX_op_qemu_ld_i32: 1093 case INDEX_op_qemu_st_i32: 1094 case INDEX_op_qemu_ld_i64: 1095 case INDEX_op_qemu_st_i64: 1096 return true; 1097 1098 case INDEX_op_goto_ptr: 1099 return TCG_TARGET_HAS_goto_ptr; 1100 1101 case INDEX_op_mov_i32: 1102 case INDEX_op_movi_i32: 1103 case INDEX_op_setcond_i32: 1104 case INDEX_op_brcond_i32: 1105 case INDEX_op_ld8u_i32: 1106 case INDEX_op_ld8s_i32: 1107 case INDEX_op_ld16u_i32: 1108 case INDEX_op_ld16s_i32: 1109 case INDEX_op_ld_i32: 1110 case INDEX_op_st8_i32: 1111 case INDEX_op_st16_i32: 1112 case INDEX_op_st_i32: 1113 case INDEX_op_add_i32: 1114 case INDEX_op_sub_i32: 1115 case INDEX_op_mul_i32: 1116 case INDEX_op_and_i32: 1117 case INDEX_op_or_i32: 1118 case INDEX_op_xor_i32: 1119 case INDEX_op_shl_i32: 1120 case INDEX_op_shr_i32: 1121 case INDEX_op_sar_i32: 1122 return true; 1123 1124 case INDEX_op_movcond_i32: 1125 return TCG_TARGET_HAS_movcond_i32; 1126 case INDEX_op_div_i32: 1127 case INDEX_op_divu_i32: 1128 return TCG_TARGET_HAS_div_i32; 1129 case INDEX_op_rem_i32: 1130 case INDEX_op_remu_i32: 1131 return TCG_TARGET_HAS_rem_i32; 1132 case INDEX_op_div2_i32: 1133 case INDEX_op_divu2_i32: 1134 return TCG_TARGET_HAS_div2_i32; 1135 case INDEX_op_rotl_i32: 1136 case INDEX_op_rotr_i32: 1137 return TCG_TARGET_HAS_rot_i32; 1138 case INDEX_op_deposit_i32: 1139 return TCG_TARGET_HAS_deposit_i32; 1140 case INDEX_op_extract_i32: 1141 return TCG_TARGET_HAS_extract_i32; 1142 case INDEX_op_sextract_i32: 1143 return TCG_TARGET_HAS_sextract_i32; 1144 case INDEX_op_add2_i32: 1145 return TCG_TARGET_HAS_add2_i32; 1146 case INDEX_op_sub2_i32: 1147 return TCG_TARGET_HAS_sub2_i32; 1148 case INDEX_op_mulu2_i32: 1149 return TCG_TARGET_HAS_mulu2_i32; 1150 case INDEX_op_muls2_i32: 1151 return TCG_TARGET_HAS_muls2_i32; 1152 case INDEX_op_muluh_i32: 1153 return TCG_TARGET_HAS_muluh_i32; 1154 case INDEX_op_mulsh_i32: 1155 return TCG_TARGET_HAS_mulsh_i32; 1156 case INDEX_op_ext8s_i32: 1157 return TCG_TARGET_HAS_ext8s_i32; 1158 case INDEX_op_ext16s_i32: 1159 return TCG_TARGET_HAS_ext16s_i32; 1160 case INDEX_op_ext8u_i32: 1161 return TCG_TARGET_HAS_ext8u_i32; 1162 case INDEX_op_ext16u_i32: 1163 return TCG_TARGET_HAS_ext16u_i32; 1164 case INDEX_op_bswap16_i32: 1165 return TCG_TARGET_HAS_bswap16_i32; 1166 case INDEX_op_bswap32_i32: 1167 return TCG_TARGET_HAS_bswap32_i32; 1168 case INDEX_op_not_i32: 1169 return TCG_TARGET_HAS_not_i32; 1170 case INDEX_op_neg_i32: 1171 return TCG_TARGET_HAS_neg_i32; 1172 case INDEX_op_andc_i32: 1173 return TCG_TARGET_HAS_andc_i32; 1174 case INDEX_op_orc_i32: 1175 return TCG_TARGET_HAS_orc_i32; 1176 case INDEX_op_eqv_i32: 1177 return TCG_TARGET_HAS_eqv_i32; 1178 case INDEX_op_nand_i32: 1179 return TCG_TARGET_HAS_nand_i32; 1180 case INDEX_op_nor_i32: 1181 return TCG_TARGET_HAS_nor_i32; 1182 case INDEX_op_clz_i32: 1183 return TCG_TARGET_HAS_clz_i32; 1184 case INDEX_op_ctz_i32: 1185 return TCG_TARGET_HAS_ctz_i32; 1186 case INDEX_op_ctpop_i32: 1187 return TCG_TARGET_HAS_ctpop_i32; 1188 1189 case INDEX_op_brcond2_i32: 1190 case INDEX_op_setcond2_i32: 1191 return TCG_TARGET_REG_BITS == 32; 1192 1193 case INDEX_op_mov_i64: 1194 case INDEX_op_movi_i64: 1195 case INDEX_op_setcond_i64: 1196 case INDEX_op_brcond_i64: 1197 case INDEX_op_ld8u_i64: 1198 case INDEX_op_ld8s_i64: 1199 case INDEX_op_ld16u_i64: 1200 case INDEX_op_ld16s_i64: 1201 case INDEX_op_ld32u_i64: 1202 case INDEX_op_ld32s_i64: 1203 case INDEX_op_ld_i64: 1204 case INDEX_op_st8_i64: 1205 case INDEX_op_st16_i64: 1206 case INDEX_op_st32_i64: 1207 case INDEX_op_st_i64: 1208 case INDEX_op_add_i64: 1209 case INDEX_op_sub_i64: 1210 case INDEX_op_mul_i64: 1211 case INDEX_op_and_i64: 1212 case INDEX_op_or_i64: 1213 case INDEX_op_xor_i64: 1214 case INDEX_op_shl_i64: 1215 case INDEX_op_shr_i64: 1216 case INDEX_op_sar_i64: 1217 case INDEX_op_ext_i32_i64: 1218 case INDEX_op_extu_i32_i64: 1219 return TCG_TARGET_REG_BITS == 64; 1220 1221 case INDEX_op_movcond_i64: 1222 return TCG_TARGET_HAS_movcond_i64; 1223 case INDEX_op_div_i64: 1224 case INDEX_op_divu_i64: 1225 return TCG_TARGET_HAS_div_i64; 1226 case INDEX_op_rem_i64: 1227 case INDEX_op_remu_i64: 1228 return TCG_TARGET_HAS_rem_i64; 1229 case INDEX_op_div2_i64: 1230 case INDEX_op_divu2_i64: 1231 return TCG_TARGET_HAS_div2_i64; 1232 case INDEX_op_rotl_i64: 1233 case INDEX_op_rotr_i64: 1234 return TCG_TARGET_HAS_rot_i64; 1235 case INDEX_op_deposit_i64: 1236 return TCG_TARGET_HAS_deposit_i64; 1237 case INDEX_op_extract_i64: 1238 return TCG_TARGET_HAS_extract_i64; 1239 case INDEX_op_sextract_i64: 1240 return TCG_TARGET_HAS_sextract_i64; 1241 case INDEX_op_extrl_i64_i32: 1242 return TCG_TARGET_HAS_extrl_i64_i32; 1243 case INDEX_op_extrh_i64_i32: 1244 return TCG_TARGET_HAS_extrh_i64_i32; 1245 case INDEX_op_ext8s_i64: 1246 return TCG_TARGET_HAS_ext8s_i64; 1247 case INDEX_op_ext16s_i64: 1248 return TCG_TARGET_HAS_ext16s_i64; 1249 case INDEX_op_ext32s_i64: 1250 return TCG_TARGET_HAS_ext32s_i64; 1251 case INDEX_op_ext8u_i64: 1252 return TCG_TARGET_HAS_ext8u_i64; 1253 case INDEX_op_ext16u_i64: 1254 return TCG_TARGET_HAS_ext16u_i64; 1255 case INDEX_op_ext32u_i64: 1256 return TCG_TARGET_HAS_ext32u_i64; 1257 case INDEX_op_bswap16_i64: 1258 return TCG_TARGET_HAS_bswap16_i64; 1259 case INDEX_op_bswap32_i64: 1260 return TCG_TARGET_HAS_bswap32_i64; 1261 case INDEX_op_bswap64_i64: 1262 return TCG_TARGET_HAS_bswap64_i64; 1263 case INDEX_op_not_i64: 1264 return TCG_TARGET_HAS_not_i64; 1265 case INDEX_op_neg_i64: 1266 return TCG_TARGET_HAS_neg_i64; 1267 case INDEX_op_andc_i64: 1268 return TCG_TARGET_HAS_andc_i64; 1269 case INDEX_op_orc_i64: 1270 return TCG_TARGET_HAS_orc_i64; 1271 case INDEX_op_eqv_i64: 1272 return TCG_TARGET_HAS_eqv_i64; 1273 case INDEX_op_nand_i64: 1274 return TCG_TARGET_HAS_nand_i64; 1275 case INDEX_op_nor_i64: 1276 return TCG_TARGET_HAS_nor_i64; 1277 case INDEX_op_clz_i64: 1278 return TCG_TARGET_HAS_clz_i64; 1279 case INDEX_op_ctz_i64: 1280 return TCG_TARGET_HAS_ctz_i64; 1281 case INDEX_op_ctpop_i64: 1282 return TCG_TARGET_HAS_ctpop_i64; 1283 case INDEX_op_add2_i64: 1284 return TCG_TARGET_HAS_add2_i64; 1285 case INDEX_op_sub2_i64: 1286 return TCG_TARGET_HAS_sub2_i64; 1287 case INDEX_op_mulu2_i64: 1288 return TCG_TARGET_HAS_mulu2_i64; 1289 case INDEX_op_muls2_i64: 1290 return TCG_TARGET_HAS_muls2_i64; 1291 case INDEX_op_muluh_i64: 1292 return TCG_TARGET_HAS_muluh_i64; 1293 case INDEX_op_mulsh_i64: 1294 return TCG_TARGET_HAS_mulsh_i64; 1295 1296 case NB_OPS: 1297 break; 1298 } 1299 g_assert_not_reached(); 1300 } 1301 1302 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1303 and endian swap. Maybe it would be better to do the alignment 1304 and endian swap in tcg_reg_alloc_call(). */ 1305 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1306 { 1307 TCGContext *s = tcg_ctx; 1308 int i, real_args, nb_rets, pi; 1309 unsigned sizemask, flags; 1310 TCGHelperInfo *info; 1311 TCGOp *op; 1312 1313 info = g_hash_table_lookup(helper_table, (gpointer)func); 1314 flags = info->flags; 1315 sizemask = info->sizemask; 1316 1317 #if defined(__sparc__) && !defined(__arch64__) \ 1318 && !defined(CONFIG_TCG_INTERPRETER) 1319 /* We have 64-bit values in one register, but need to pass as two 1320 separate parameters. Split them. */ 1321 int orig_sizemask = sizemask; 1322 int orig_nargs = nargs; 1323 TCGv_i64 retl, reth; 1324 TCGTemp *split_args[MAX_OPC_PARAM]; 1325 1326 TCGV_UNUSED_I64(retl); 1327 TCGV_UNUSED_I64(reth); 1328 if (sizemask != 0) { 1329 for (i = real_args = 0; i < nargs; ++i) { 1330 int is_64bit = sizemask & (1 << (i+1)*2); 1331 if (is_64bit) { 1332 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1333 TCGv_i32 h = tcg_temp_new_i32(); 1334 TCGv_i32 l = tcg_temp_new_i32(); 1335 tcg_gen_extr_i64_i32(l, h, orig); 1336 split_args[real_args++] = tcgv_i32_temp(h); 1337 split_args[real_args++] = tcgv_i32_temp(l); 1338 } else { 1339 split_args[real_args++] = args[i]; 1340 } 1341 } 1342 nargs = real_args; 1343 args = split_args; 1344 sizemask = 0; 1345 } 1346 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1347 for (i = 0; i < nargs; ++i) { 1348 int is_64bit = sizemask & (1 << (i+1)*2); 1349 int is_signed = sizemask & (2 << (i+1)*2); 1350 if (!is_64bit) { 1351 TCGv_i64 temp = tcg_temp_new_i64(); 1352 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1353 if (is_signed) { 1354 tcg_gen_ext32s_i64(temp, orig); 1355 } else { 1356 tcg_gen_ext32u_i64(temp, orig); 1357 } 1358 args[i] = tcgv_i64_temp(temp); 1359 } 1360 } 1361 #endif /* TCG_TARGET_EXTEND_ARGS */ 1362 1363 i = s->gen_next_op_idx; 1364 tcg_debug_assert(i < OPC_BUF_SIZE); 1365 s->gen_op_buf[0].prev = i; 1366 s->gen_next_op_idx = i + 1; 1367 op = &s->gen_op_buf[i]; 1368 1369 /* Set links for sequential allocation during translation. */ 1370 memset(op, 0, offsetof(TCGOp, args)); 1371 op->opc = INDEX_op_call; 1372 op->prev = i - 1; 1373 op->next = i + 1; 1374 1375 pi = 0; 1376 if (ret != NULL) { 1377 #if defined(__sparc__) && !defined(__arch64__) \ 1378 && !defined(CONFIG_TCG_INTERPRETER) 1379 if (orig_sizemask & 1) { 1380 /* The 32-bit ABI is going to return the 64-bit value in 1381 the %o0/%o1 register pair. Prepare for this by using 1382 two return temporaries, and reassemble below. */ 1383 retl = tcg_temp_new_i64(); 1384 reth = tcg_temp_new_i64(); 1385 op->args[pi++] = tcgv_i64_arg(reth); 1386 op->args[pi++] = tcgv_i64_arg(retl); 1387 nb_rets = 2; 1388 } else { 1389 op->args[pi++] = temp_arg(ret); 1390 nb_rets = 1; 1391 } 1392 #else 1393 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 1394 #ifdef HOST_WORDS_BIGENDIAN 1395 op->args[pi++] = temp_arg(ret + 1); 1396 op->args[pi++] = temp_arg(ret); 1397 #else 1398 op->args[pi++] = temp_arg(ret); 1399 op->args[pi++] = temp_arg(ret + 1); 1400 #endif 1401 nb_rets = 2; 1402 } else { 1403 op->args[pi++] = temp_arg(ret); 1404 nb_rets = 1; 1405 } 1406 #endif 1407 } else { 1408 nb_rets = 0; 1409 } 1410 op->callo = nb_rets; 1411 1412 real_args = 0; 1413 for (i = 0; i < nargs; i++) { 1414 int is_64bit = sizemask & (1 << (i+1)*2); 1415 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 1416 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 1417 /* some targets want aligned 64 bit args */ 1418 if (real_args & 1) { 1419 op->args[pi++] = TCG_CALL_DUMMY_ARG; 1420 real_args++; 1421 } 1422 #endif 1423 /* If stack grows up, then we will be placing successive 1424 arguments at lower addresses, which means we need to 1425 reverse the order compared to how we would normally 1426 treat either big or little-endian. For those arguments 1427 that will wind up in registers, this still works for 1428 HPPA (the only current STACK_GROWSUP target) since the 1429 argument registers are *also* allocated in decreasing 1430 order. If another such target is added, this logic may 1431 have to get more complicated to differentiate between 1432 stack arguments and register arguments. */ 1433 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 1434 op->args[pi++] = temp_arg(args[i] + 1); 1435 op->args[pi++] = temp_arg(args[i]); 1436 #else 1437 op->args[pi++] = temp_arg(args[i]); 1438 op->args[pi++] = temp_arg(args[i] + 1); 1439 #endif 1440 real_args += 2; 1441 continue; 1442 } 1443 1444 op->args[pi++] = temp_arg(args[i]); 1445 real_args++; 1446 } 1447 op->args[pi++] = (uintptr_t)func; 1448 op->args[pi++] = flags; 1449 op->calli = real_args; 1450 1451 /* Make sure the fields didn't overflow. */ 1452 tcg_debug_assert(op->calli == real_args); 1453 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 1454 1455 #if defined(__sparc__) && !defined(__arch64__) \ 1456 && !defined(CONFIG_TCG_INTERPRETER) 1457 /* Free all of the parts we allocated above. */ 1458 for (i = real_args = 0; i < orig_nargs; ++i) { 1459 int is_64bit = orig_sizemask & (1 << (i+1)*2); 1460 if (is_64bit) { 1461 tcg_temp_free_internal(args[real_args++]); 1462 tcg_temp_free_internal(args[real_args++]); 1463 } else { 1464 real_args++; 1465 } 1466 } 1467 if (orig_sizemask & 1) { 1468 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 1469 Note that describing these as TCGv_i64 eliminates an unnecessary 1470 zero-extension that tcg_gen_concat_i32_i64 would create. */ 1471 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); 1472 tcg_temp_free_i64(retl); 1473 tcg_temp_free_i64(reth); 1474 } 1475 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1476 for (i = 0; i < nargs; ++i) { 1477 int is_64bit = sizemask & (1 << (i+1)*2); 1478 if (!is_64bit) { 1479 tcg_temp_free_internal(args[i]); 1480 } 1481 } 1482 #endif /* TCG_TARGET_EXTEND_ARGS */ 1483 } 1484 1485 static void tcg_reg_alloc_start(TCGContext *s) 1486 { 1487 int i, n; 1488 TCGTemp *ts; 1489 1490 for (i = 0, n = s->nb_globals; i < n; i++) { 1491 ts = &s->temps[i]; 1492 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM); 1493 } 1494 for (n = s->nb_temps; i < n; i++) { 1495 ts = &s->temps[i]; 1496 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 1497 ts->mem_allocated = 0; 1498 ts->fixed_reg = 0; 1499 } 1500 1501 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1502 } 1503 1504 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1505 TCGTemp *ts) 1506 { 1507 int idx = temp_idx(ts); 1508 1509 if (ts->temp_global) { 1510 pstrcpy(buf, buf_size, ts->name); 1511 } else if (ts->temp_local) { 1512 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1513 } else { 1514 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1515 } 1516 return buf; 1517 } 1518 1519 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1520 int buf_size, TCGArg arg) 1521 { 1522 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1523 } 1524 1525 /* Find helper name. */ 1526 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 1527 { 1528 const char *ret = NULL; 1529 if (helper_table) { 1530 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); 1531 if (info) { 1532 ret = info->name; 1533 } 1534 } 1535 return ret; 1536 } 1537 1538 static const char * const cond_name[] = 1539 { 1540 [TCG_COND_NEVER] = "never", 1541 [TCG_COND_ALWAYS] = "always", 1542 [TCG_COND_EQ] = "eq", 1543 [TCG_COND_NE] = "ne", 1544 [TCG_COND_LT] = "lt", 1545 [TCG_COND_GE] = "ge", 1546 [TCG_COND_LE] = "le", 1547 [TCG_COND_GT] = "gt", 1548 [TCG_COND_LTU] = "ltu", 1549 [TCG_COND_GEU] = "geu", 1550 [TCG_COND_LEU] = "leu", 1551 [TCG_COND_GTU] = "gtu" 1552 }; 1553 1554 static const char * const ldst_name[] = 1555 { 1556 [MO_UB] = "ub", 1557 [MO_SB] = "sb", 1558 [MO_LEUW] = "leuw", 1559 [MO_LESW] = "lesw", 1560 [MO_LEUL] = "leul", 1561 [MO_LESL] = "lesl", 1562 [MO_LEQ] = "leq", 1563 [MO_BEUW] = "beuw", 1564 [MO_BESW] = "besw", 1565 [MO_BEUL] = "beul", 1566 [MO_BESL] = "besl", 1567 [MO_BEQ] = "beq", 1568 }; 1569 1570 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1571 #ifdef ALIGNED_ONLY 1572 [MO_UNALN >> MO_ASHIFT] = "un+", 1573 [MO_ALIGN >> MO_ASHIFT] = "", 1574 #else 1575 [MO_UNALN >> MO_ASHIFT] = "", 1576 [MO_ALIGN >> MO_ASHIFT] = "al+", 1577 #endif 1578 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1579 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1580 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1581 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1582 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1583 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1584 }; 1585 1586 void tcg_dump_ops(TCGContext *s) 1587 { 1588 char buf[128]; 1589 TCGOp *op; 1590 int oi; 1591 1592 for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) { 1593 int i, k, nb_oargs, nb_iargs, nb_cargs; 1594 const TCGOpDef *def; 1595 TCGOpcode c; 1596 int col = 0; 1597 1598 op = &s->gen_op_buf[oi]; 1599 c = op->opc; 1600 def = &tcg_op_defs[c]; 1601 1602 if (c == INDEX_op_insn_start) { 1603 col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : ""); 1604 1605 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1606 target_ulong a; 1607 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1608 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 1609 #else 1610 a = op->args[i]; 1611 #endif 1612 col += qemu_log(" " TARGET_FMT_lx, a); 1613 } 1614 } else if (c == INDEX_op_call) { 1615 /* variable number of arguments */ 1616 nb_oargs = op->callo; 1617 nb_iargs = op->calli; 1618 nb_cargs = def->nb_cargs; 1619 1620 /* function name, flags, out args */ 1621 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 1622 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), 1623 op->args[nb_oargs + nb_iargs + 1], nb_oargs); 1624 for (i = 0; i < nb_oargs; i++) { 1625 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), 1626 op->args[i])); 1627 } 1628 for (i = 0; i < nb_iargs; i++) { 1629 TCGArg arg = op->args[nb_oargs + i]; 1630 const char *t = "<dummy>"; 1631 if (arg != TCG_CALL_DUMMY_ARG) { 1632 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 1633 } 1634 col += qemu_log(",%s", t); 1635 } 1636 } else { 1637 col += qemu_log(" %s ", def->name); 1638 1639 nb_oargs = def->nb_oargs; 1640 nb_iargs = def->nb_iargs; 1641 nb_cargs = def->nb_cargs; 1642 1643 k = 0; 1644 for (i = 0; i < nb_oargs; i++) { 1645 if (k != 0) { 1646 col += qemu_log(","); 1647 } 1648 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 1649 op->args[k++])); 1650 } 1651 for (i = 0; i < nb_iargs; i++) { 1652 if (k != 0) { 1653 col += qemu_log(","); 1654 } 1655 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 1656 op->args[k++])); 1657 } 1658 switch (c) { 1659 case INDEX_op_brcond_i32: 1660 case INDEX_op_setcond_i32: 1661 case INDEX_op_movcond_i32: 1662 case INDEX_op_brcond2_i32: 1663 case INDEX_op_setcond2_i32: 1664 case INDEX_op_brcond_i64: 1665 case INDEX_op_setcond_i64: 1666 case INDEX_op_movcond_i64: 1667 if (op->args[k] < ARRAY_SIZE(cond_name) 1668 && cond_name[op->args[k]]) { 1669 col += qemu_log(",%s", cond_name[op->args[k++]]); 1670 } else { 1671 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]); 1672 } 1673 i = 1; 1674 break; 1675 case INDEX_op_qemu_ld_i32: 1676 case INDEX_op_qemu_st_i32: 1677 case INDEX_op_qemu_ld_i64: 1678 case INDEX_op_qemu_st_i64: 1679 { 1680 TCGMemOpIdx oi = op->args[k++]; 1681 TCGMemOp op = get_memop(oi); 1682 unsigned ix = get_mmuidx(oi); 1683 1684 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 1685 col += qemu_log(",$0x%x,%u", op, ix); 1686 } else { 1687 const char *s_al, *s_op; 1688 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 1689 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 1690 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 1691 } 1692 i = 1; 1693 } 1694 break; 1695 default: 1696 i = 0; 1697 break; 1698 } 1699 switch (c) { 1700 case INDEX_op_set_label: 1701 case INDEX_op_br: 1702 case INDEX_op_brcond_i32: 1703 case INDEX_op_brcond_i64: 1704 case INDEX_op_brcond2_i32: 1705 col += qemu_log("%s$L%d", k ? "," : "", 1706 arg_label(op->args[k])->id); 1707 i++, k++; 1708 break; 1709 default: 1710 break; 1711 } 1712 for (; i < nb_cargs; i++, k++) { 1713 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); 1714 } 1715 } 1716 if (op->life) { 1717 unsigned life = op->life; 1718 1719 for (; col < 48; ++col) { 1720 putc(' ', qemu_logfile); 1721 } 1722 1723 if (life & (SYNC_ARG * 3)) { 1724 qemu_log(" sync:"); 1725 for (i = 0; i < 2; ++i) { 1726 if (life & (SYNC_ARG << i)) { 1727 qemu_log(" %d", i); 1728 } 1729 } 1730 } 1731 life /= DEAD_ARG; 1732 if (life) { 1733 qemu_log(" dead:"); 1734 for (i = 0; life; ++i, life >>= 1) { 1735 if (life & 1) { 1736 qemu_log(" %d", i); 1737 } 1738 } 1739 } 1740 } 1741 qemu_log("\n"); 1742 } 1743 } 1744 1745 /* we give more priority to constraints with less registers */ 1746 static int get_constraint_priority(const TCGOpDef *def, int k) 1747 { 1748 const TCGArgConstraint *arg_ct; 1749 1750 int i, n; 1751 arg_ct = &def->args_ct[k]; 1752 if (arg_ct->ct & TCG_CT_ALIAS) { 1753 /* an alias is equivalent to a single register */ 1754 n = 1; 1755 } else { 1756 if (!(arg_ct->ct & TCG_CT_REG)) 1757 return 0; 1758 n = 0; 1759 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 1760 if (tcg_regset_test_reg(arg_ct->u.regs, i)) 1761 n++; 1762 } 1763 } 1764 return TCG_TARGET_NB_REGS - n + 1; 1765 } 1766 1767 /* sort from highest priority to lowest */ 1768 static void sort_constraints(TCGOpDef *def, int start, int n) 1769 { 1770 int i, j, p1, p2, tmp; 1771 1772 for(i = 0; i < n; i++) 1773 def->sorted_args[start + i] = start + i; 1774 if (n <= 1) 1775 return; 1776 for(i = 0; i < n - 1; i++) { 1777 for(j = i + 1; j < n; j++) { 1778 p1 = get_constraint_priority(def, def->sorted_args[start + i]); 1779 p2 = get_constraint_priority(def, def->sorted_args[start + j]); 1780 if (p1 < p2) { 1781 tmp = def->sorted_args[start + i]; 1782 def->sorted_args[start + i] = def->sorted_args[start + j]; 1783 def->sorted_args[start + j] = tmp; 1784 } 1785 } 1786 } 1787 } 1788 1789 static void process_op_defs(TCGContext *s) 1790 { 1791 TCGOpcode op; 1792 1793 for (op = 0; op < NB_OPS; op++) { 1794 TCGOpDef *def = &tcg_op_defs[op]; 1795 const TCGTargetOpDef *tdefs; 1796 TCGType type; 1797 int i, nb_args; 1798 1799 if (def->flags & TCG_OPF_NOT_PRESENT) { 1800 continue; 1801 } 1802 1803 nb_args = def->nb_iargs + def->nb_oargs; 1804 if (nb_args == 0) { 1805 continue; 1806 } 1807 1808 tdefs = tcg_target_op_def(op); 1809 /* Missing TCGTargetOpDef entry. */ 1810 tcg_debug_assert(tdefs != NULL); 1811 1812 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32); 1813 for (i = 0; i < nb_args; i++) { 1814 const char *ct_str = tdefs->args_ct_str[i]; 1815 /* Incomplete TCGTargetOpDef entry. */ 1816 tcg_debug_assert(ct_str != NULL); 1817 1818 def->args_ct[i].u.regs = 0; 1819 def->args_ct[i].ct = 0; 1820 while (*ct_str != '\0') { 1821 switch(*ct_str) { 1822 case '0' ... '9': 1823 { 1824 int oarg = *ct_str - '0'; 1825 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 1826 tcg_debug_assert(oarg < def->nb_oargs); 1827 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG); 1828 /* TCG_CT_ALIAS is for the output arguments. 1829 The input is tagged with TCG_CT_IALIAS. */ 1830 def->args_ct[i] = def->args_ct[oarg]; 1831 def->args_ct[oarg].ct |= TCG_CT_ALIAS; 1832 def->args_ct[oarg].alias_index = i; 1833 def->args_ct[i].ct |= TCG_CT_IALIAS; 1834 def->args_ct[i].alias_index = oarg; 1835 } 1836 ct_str++; 1837 break; 1838 case '&': 1839 def->args_ct[i].ct |= TCG_CT_NEWREG; 1840 ct_str++; 1841 break; 1842 case 'i': 1843 def->args_ct[i].ct |= TCG_CT_CONST; 1844 ct_str++; 1845 break; 1846 default: 1847 ct_str = target_parse_constraint(&def->args_ct[i], 1848 ct_str, type); 1849 /* Typo in TCGTargetOpDef constraint. */ 1850 tcg_debug_assert(ct_str != NULL); 1851 } 1852 } 1853 } 1854 1855 /* TCGTargetOpDef entry with too much information? */ 1856 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 1857 1858 /* sort the constraints (XXX: this is just an heuristic) */ 1859 sort_constraints(def, 0, def->nb_oargs); 1860 sort_constraints(def, def->nb_oargs, def->nb_iargs); 1861 } 1862 } 1863 1864 void tcg_op_remove(TCGContext *s, TCGOp *op) 1865 { 1866 int next = op->next; 1867 int prev = op->prev; 1868 1869 /* We should never attempt to remove the list terminator. */ 1870 tcg_debug_assert(op != &s->gen_op_buf[0]); 1871 1872 s->gen_op_buf[next].prev = prev; 1873 s->gen_op_buf[prev].next = next; 1874 1875 memset(op, 0, sizeof(*op)); 1876 1877 #ifdef CONFIG_PROFILER 1878 atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 1879 #endif 1880 } 1881 1882 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 1883 TCGOpcode opc, int nargs) 1884 { 1885 int oi = s->gen_next_op_idx; 1886 int prev = old_op->prev; 1887 int next = old_op - s->gen_op_buf; 1888 TCGOp *new_op; 1889 1890 tcg_debug_assert(oi < OPC_BUF_SIZE); 1891 s->gen_next_op_idx = oi + 1; 1892 1893 new_op = &s->gen_op_buf[oi]; 1894 *new_op = (TCGOp){ 1895 .opc = opc, 1896 .prev = prev, 1897 .next = next 1898 }; 1899 s->gen_op_buf[prev].next = oi; 1900 old_op->prev = oi; 1901 1902 return new_op; 1903 } 1904 1905 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 1906 TCGOpcode opc, int nargs) 1907 { 1908 int oi = s->gen_next_op_idx; 1909 int prev = old_op - s->gen_op_buf; 1910 int next = old_op->next; 1911 TCGOp *new_op; 1912 1913 tcg_debug_assert(oi < OPC_BUF_SIZE); 1914 s->gen_next_op_idx = oi + 1; 1915 1916 new_op = &s->gen_op_buf[oi]; 1917 *new_op = (TCGOp){ 1918 .opc = opc, 1919 .prev = prev, 1920 .next = next 1921 }; 1922 s->gen_op_buf[next].prev = oi; 1923 old_op->next = oi; 1924 1925 return new_op; 1926 } 1927 1928 #define TS_DEAD 1 1929 #define TS_MEM 2 1930 1931 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 1932 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 1933 1934 /* liveness analysis: end of function: all temps are dead, and globals 1935 should be in memory. */ 1936 static void tcg_la_func_end(TCGContext *s) 1937 { 1938 int ng = s->nb_globals; 1939 int nt = s->nb_temps; 1940 int i; 1941 1942 for (i = 0; i < ng; ++i) { 1943 s->temps[i].state = TS_DEAD | TS_MEM; 1944 } 1945 for (i = ng; i < nt; ++i) { 1946 s->temps[i].state = TS_DEAD; 1947 } 1948 } 1949 1950 /* liveness analysis: end of basic block: all temps are dead, globals 1951 and local temps should be in memory. */ 1952 static void tcg_la_bb_end(TCGContext *s) 1953 { 1954 int ng = s->nb_globals; 1955 int nt = s->nb_temps; 1956 int i; 1957 1958 for (i = 0; i < ng; ++i) { 1959 s->temps[i].state = TS_DEAD | TS_MEM; 1960 } 1961 for (i = ng; i < nt; ++i) { 1962 s->temps[i].state = (s->temps[i].temp_local 1963 ? TS_DEAD | TS_MEM 1964 : TS_DEAD); 1965 } 1966 } 1967 1968 /* Liveness analysis : update the opc_arg_life array to tell if a 1969 given input arguments is dead. Instructions updating dead 1970 temporaries are removed. */ 1971 static void liveness_pass_1(TCGContext *s) 1972 { 1973 int nb_globals = s->nb_globals; 1974 int oi, oi_prev; 1975 1976 tcg_la_func_end(s); 1977 1978 for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) { 1979 int i, nb_iargs, nb_oargs; 1980 TCGOpcode opc_new, opc_new2; 1981 bool have_opc_new2; 1982 TCGLifeData arg_life = 0; 1983 TCGTemp *arg_ts; 1984 1985 TCGOp * const op = &s->gen_op_buf[oi]; 1986 TCGOpcode opc = op->opc; 1987 const TCGOpDef *def = &tcg_op_defs[opc]; 1988 1989 oi_prev = op->prev; 1990 1991 switch (opc) { 1992 case INDEX_op_call: 1993 { 1994 int call_flags; 1995 1996 nb_oargs = op->callo; 1997 nb_iargs = op->calli; 1998 call_flags = op->args[nb_oargs + nb_iargs + 1]; 1999 2000 /* pure functions can be removed if their result is unused */ 2001 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2002 for (i = 0; i < nb_oargs; i++) { 2003 arg_ts = arg_temp(op->args[i]); 2004 if (arg_ts->state != TS_DEAD) { 2005 goto do_not_remove_call; 2006 } 2007 } 2008 goto do_remove; 2009 } else { 2010 do_not_remove_call: 2011 2012 /* output args are dead */ 2013 for (i = 0; i < nb_oargs; i++) { 2014 arg_ts = arg_temp(op->args[i]); 2015 if (arg_ts->state & TS_DEAD) { 2016 arg_life |= DEAD_ARG << i; 2017 } 2018 if (arg_ts->state & TS_MEM) { 2019 arg_life |= SYNC_ARG << i; 2020 } 2021 arg_ts->state = TS_DEAD; 2022 } 2023 2024 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2025 TCG_CALL_NO_READ_GLOBALS))) { 2026 /* globals should go back to memory */ 2027 for (i = 0; i < nb_globals; i++) { 2028 s->temps[i].state = TS_DEAD | TS_MEM; 2029 } 2030 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2031 /* globals should be synced to memory */ 2032 for (i = 0; i < nb_globals; i++) { 2033 s->temps[i].state |= TS_MEM; 2034 } 2035 } 2036 2037 /* record arguments that die in this helper */ 2038 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2039 arg_ts = arg_temp(op->args[i]); 2040 if (arg_ts && arg_ts->state & TS_DEAD) { 2041 arg_life |= DEAD_ARG << i; 2042 } 2043 } 2044 /* input arguments are live for preceding opcodes */ 2045 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2046 arg_ts = arg_temp(op->args[i]); 2047 if (arg_ts) { 2048 arg_ts->state &= ~TS_DEAD; 2049 } 2050 } 2051 } 2052 } 2053 break; 2054 case INDEX_op_insn_start: 2055 break; 2056 case INDEX_op_discard: 2057 /* mark the temporary as dead */ 2058 arg_temp(op->args[0])->state = TS_DEAD; 2059 break; 2060 2061 case INDEX_op_add2_i32: 2062 opc_new = INDEX_op_add_i32; 2063 goto do_addsub2; 2064 case INDEX_op_sub2_i32: 2065 opc_new = INDEX_op_sub_i32; 2066 goto do_addsub2; 2067 case INDEX_op_add2_i64: 2068 opc_new = INDEX_op_add_i64; 2069 goto do_addsub2; 2070 case INDEX_op_sub2_i64: 2071 opc_new = INDEX_op_sub_i64; 2072 do_addsub2: 2073 nb_iargs = 4; 2074 nb_oargs = 2; 2075 /* Test if the high part of the operation is dead, but not 2076 the low part. The result can be optimized to a simple 2077 add or sub. This happens often for x86_64 guest when the 2078 cpu mode is set to 32 bit. */ 2079 if (arg_temp(op->args[1])->state == TS_DEAD) { 2080 if (arg_temp(op->args[0])->state == TS_DEAD) { 2081 goto do_remove; 2082 } 2083 /* Replace the opcode and adjust the args in place, 2084 leaving 3 unused args at the end. */ 2085 op->opc = opc = opc_new; 2086 op->args[1] = op->args[2]; 2087 op->args[2] = op->args[4]; 2088 /* Fall through and mark the single-word operation live. */ 2089 nb_iargs = 2; 2090 nb_oargs = 1; 2091 } 2092 goto do_not_remove; 2093 2094 case INDEX_op_mulu2_i32: 2095 opc_new = INDEX_op_mul_i32; 2096 opc_new2 = INDEX_op_muluh_i32; 2097 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 2098 goto do_mul2; 2099 case INDEX_op_muls2_i32: 2100 opc_new = INDEX_op_mul_i32; 2101 opc_new2 = INDEX_op_mulsh_i32; 2102 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 2103 goto do_mul2; 2104 case INDEX_op_mulu2_i64: 2105 opc_new = INDEX_op_mul_i64; 2106 opc_new2 = INDEX_op_muluh_i64; 2107 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 2108 goto do_mul2; 2109 case INDEX_op_muls2_i64: 2110 opc_new = INDEX_op_mul_i64; 2111 opc_new2 = INDEX_op_mulsh_i64; 2112 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 2113 goto do_mul2; 2114 do_mul2: 2115 nb_iargs = 2; 2116 nb_oargs = 2; 2117 if (arg_temp(op->args[1])->state == TS_DEAD) { 2118 if (arg_temp(op->args[0])->state == TS_DEAD) { 2119 /* Both parts of the operation are dead. */ 2120 goto do_remove; 2121 } 2122 /* The high part of the operation is dead; generate the low. */ 2123 op->opc = opc = opc_new; 2124 op->args[1] = op->args[2]; 2125 op->args[2] = op->args[3]; 2126 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 2127 /* The low part of the operation is dead; generate the high. */ 2128 op->opc = opc = opc_new2; 2129 op->args[0] = op->args[1]; 2130 op->args[1] = op->args[2]; 2131 op->args[2] = op->args[3]; 2132 } else { 2133 goto do_not_remove; 2134 } 2135 /* Mark the single-word operation live. */ 2136 nb_oargs = 1; 2137 goto do_not_remove; 2138 2139 default: 2140 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 2141 nb_iargs = def->nb_iargs; 2142 nb_oargs = def->nb_oargs; 2143 2144 /* Test if the operation can be removed because all 2145 its outputs are dead. We assume that nb_oargs == 0 2146 implies side effects */ 2147 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 2148 for (i = 0; i < nb_oargs; i++) { 2149 if (arg_temp(op->args[i])->state != TS_DEAD) { 2150 goto do_not_remove; 2151 } 2152 } 2153 do_remove: 2154 tcg_op_remove(s, op); 2155 } else { 2156 do_not_remove: 2157 /* output args are dead */ 2158 for (i = 0; i < nb_oargs; i++) { 2159 arg_ts = arg_temp(op->args[i]); 2160 if (arg_ts->state & TS_DEAD) { 2161 arg_life |= DEAD_ARG << i; 2162 } 2163 if (arg_ts->state & TS_MEM) { 2164 arg_life |= SYNC_ARG << i; 2165 } 2166 arg_ts->state = TS_DEAD; 2167 } 2168 2169 /* if end of basic block, update */ 2170 if (def->flags & TCG_OPF_BB_END) { 2171 tcg_la_bb_end(s); 2172 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2173 /* globals should be synced to memory */ 2174 for (i = 0; i < nb_globals; i++) { 2175 s->temps[i].state |= TS_MEM; 2176 } 2177 } 2178 2179 /* record arguments that die in this opcode */ 2180 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2181 arg_ts = arg_temp(op->args[i]); 2182 if (arg_ts->state & TS_DEAD) { 2183 arg_life |= DEAD_ARG << i; 2184 } 2185 } 2186 /* input arguments are live for preceding opcodes */ 2187 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2188 arg_temp(op->args[i])->state &= ~TS_DEAD; 2189 } 2190 } 2191 break; 2192 } 2193 op->life = arg_life; 2194 } 2195 } 2196 2197 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 2198 static bool liveness_pass_2(TCGContext *s) 2199 { 2200 int nb_globals = s->nb_globals; 2201 int nb_temps, i, oi, oi_next; 2202 bool changes = false; 2203 2204 /* Create a temporary for each indirect global. */ 2205 for (i = 0; i < nb_globals; ++i) { 2206 TCGTemp *its = &s->temps[i]; 2207 if (its->indirect_reg) { 2208 TCGTemp *dts = tcg_temp_alloc(s); 2209 dts->type = its->type; 2210 dts->base_type = its->base_type; 2211 its->state_ptr = dts; 2212 } else { 2213 its->state_ptr = NULL; 2214 } 2215 /* All globals begin dead. */ 2216 its->state = TS_DEAD; 2217 } 2218 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 2219 TCGTemp *its = &s->temps[i]; 2220 its->state_ptr = NULL; 2221 its->state = TS_DEAD; 2222 } 2223 2224 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 2225 TCGOp *op = &s->gen_op_buf[oi]; 2226 TCGOpcode opc = op->opc; 2227 const TCGOpDef *def = &tcg_op_defs[opc]; 2228 TCGLifeData arg_life = op->life; 2229 int nb_iargs, nb_oargs, call_flags; 2230 TCGTemp *arg_ts, *dir_ts; 2231 2232 oi_next = op->next; 2233 2234 if (opc == INDEX_op_call) { 2235 nb_oargs = op->callo; 2236 nb_iargs = op->calli; 2237 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2238 } else { 2239 nb_iargs = def->nb_iargs; 2240 nb_oargs = def->nb_oargs; 2241 2242 /* Set flags similar to how calls require. */ 2243 if (def->flags & TCG_OPF_BB_END) { 2244 /* Like writing globals: save_globals */ 2245 call_flags = 0; 2246 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2247 /* Like reading globals: sync_globals */ 2248 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2249 } else { 2250 /* No effect on globals. */ 2251 call_flags = (TCG_CALL_NO_READ_GLOBALS | 2252 TCG_CALL_NO_WRITE_GLOBALS); 2253 } 2254 } 2255 2256 /* Make sure that input arguments are available. */ 2257 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2258 arg_ts = arg_temp(op->args[i]); 2259 if (arg_ts) { 2260 dir_ts = arg_ts->state_ptr; 2261 if (dir_ts && arg_ts->state == TS_DEAD) { 2262 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 2263 ? INDEX_op_ld_i32 2264 : INDEX_op_ld_i64); 2265 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 2266 2267 lop->args[0] = temp_arg(dir_ts); 2268 lop->args[1] = temp_arg(arg_ts->mem_base); 2269 lop->args[2] = arg_ts->mem_offset; 2270 2271 /* Loaded, but synced with memory. */ 2272 arg_ts->state = TS_MEM; 2273 } 2274 } 2275 } 2276 2277 /* Perform input replacement, and mark inputs that became dead. 2278 No action is required except keeping temp_state up to date 2279 so that we reload when needed. */ 2280 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2281 arg_ts = arg_temp(op->args[i]); 2282 if (arg_ts) { 2283 dir_ts = arg_ts->state_ptr; 2284 if (dir_ts) { 2285 op->args[i] = temp_arg(dir_ts); 2286 changes = true; 2287 if (IS_DEAD_ARG(i)) { 2288 arg_ts->state = TS_DEAD; 2289 } 2290 } 2291 } 2292 } 2293 2294 /* Liveness analysis should ensure that the following are 2295 all correct, for call sites and basic block end points. */ 2296 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 2297 /* Nothing to do */ 2298 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 2299 for (i = 0; i < nb_globals; ++i) { 2300 /* Liveness should see that globals are synced back, 2301 that is, either TS_DEAD or TS_MEM. */ 2302 arg_ts = &s->temps[i]; 2303 tcg_debug_assert(arg_ts->state_ptr == 0 2304 || arg_ts->state != 0); 2305 } 2306 } else { 2307 for (i = 0; i < nb_globals; ++i) { 2308 /* Liveness should see that globals are saved back, 2309 that is, TS_DEAD, waiting to be reloaded. */ 2310 arg_ts = &s->temps[i]; 2311 tcg_debug_assert(arg_ts->state_ptr == 0 2312 || arg_ts->state == TS_DEAD); 2313 } 2314 } 2315 2316 /* Outputs become available. */ 2317 for (i = 0; i < nb_oargs; i++) { 2318 arg_ts = arg_temp(op->args[i]); 2319 dir_ts = arg_ts->state_ptr; 2320 if (!dir_ts) { 2321 continue; 2322 } 2323 op->args[i] = temp_arg(dir_ts); 2324 changes = true; 2325 2326 /* The output is now live and modified. */ 2327 arg_ts->state = 0; 2328 2329 /* Sync outputs upon their last write. */ 2330 if (NEED_SYNC_ARG(i)) { 2331 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 2332 ? INDEX_op_st_i32 2333 : INDEX_op_st_i64); 2334 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 2335 2336 sop->args[0] = temp_arg(dir_ts); 2337 sop->args[1] = temp_arg(arg_ts->mem_base); 2338 sop->args[2] = arg_ts->mem_offset; 2339 2340 arg_ts->state = TS_MEM; 2341 } 2342 /* Drop outputs that are dead. */ 2343 if (IS_DEAD_ARG(i)) { 2344 arg_ts->state = TS_DEAD; 2345 } 2346 } 2347 } 2348 2349 return changes; 2350 } 2351 2352 #ifdef CONFIG_DEBUG_TCG 2353 static void dump_regs(TCGContext *s) 2354 { 2355 TCGTemp *ts; 2356 int i; 2357 char buf[64]; 2358 2359 for(i = 0; i < s->nb_temps; i++) { 2360 ts = &s->temps[i]; 2361 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 2362 switch(ts->val_type) { 2363 case TEMP_VAL_REG: 2364 printf("%s", tcg_target_reg_names[ts->reg]); 2365 break; 2366 case TEMP_VAL_MEM: 2367 printf("%d(%s)", (int)ts->mem_offset, 2368 tcg_target_reg_names[ts->mem_base->reg]); 2369 break; 2370 case TEMP_VAL_CONST: 2371 printf("$0x%" TCG_PRIlx, ts->val); 2372 break; 2373 case TEMP_VAL_DEAD: 2374 printf("D"); 2375 break; 2376 default: 2377 printf("???"); 2378 break; 2379 } 2380 printf("\n"); 2381 } 2382 2383 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 2384 if (s->reg_to_temp[i] != NULL) { 2385 printf("%s: %s\n", 2386 tcg_target_reg_names[i], 2387 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 2388 } 2389 } 2390 } 2391 2392 static void check_regs(TCGContext *s) 2393 { 2394 int reg; 2395 int k; 2396 TCGTemp *ts; 2397 char buf[64]; 2398 2399 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 2400 ts = s->reg_to_temp[reg]; 2401 if (ts != NULL) { 2402 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 2403 printf("Inconsistency for register %s:\n", 2404 tcg_target_reg_names[reg]); 2405 goto fail; 2406 } 2407 } 2408 } 2409 for (k = 0; k < s->nb_temps; k++) { 2410 ts = &s->temps[k]; 2411 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg 2412 && s->reg_to_temp[ts->reg] != ts) { 2413 printf("Inconsistency for temp %s:\n", 2414 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 2415 fail: 2416 printf("reg state:\n"); 2417 dump_regs(s); 2418 tcg_abort(); 2419 } 2420 } 2421 } 2422 #endif 2423 2424 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 2425 { 2426 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 2427 /* Sparc64 stack is accessed with offset of 2047 */ 2428 s->current_frame_offset = (s->current_frame_offset + 2429 (tcg_target_long)sizeof(tcg_target_long) - 1) & 2430 ~(sizeof(tcg_target_long) - 1); 2431 #endif 2432 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 2433 s->frame_end) { 2434 tcg_abort(); 2435 } 2436 ts->mem_offset = s->current_frame_offset; 2437 ts->mem_base = s->frame_temp; 2438 ts->mem_allocated = 1; 2439 s->current_frame_offset += sizeof(tcg_target_long); 2440 } 2441 2442 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet); 2443 2444 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 2445 mark it free; otherwise mark it dead. */ 2446 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 2447 { 2448 if (ts->fixed_reg) { 2449 return; 2450 } 2451 if (ts->val_type == TEMP_VAL_REG) { 2452 s->reg_to_temp[ts->reg] = NULL; 2453 } 2454 ts->val_type = (free_or_dead < 0 2455 || ts->temp_local 2456 || ts->temp_global 2457 ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 2458 } 2459 2460 /* Mark a temporary as dead. */ 2461 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 2462 { 2463 temp_free_or_dead(s, ts, 1); 2464 } 2465 2466 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 2467 registers needs to be allocated to store a constant. If 'free_or_dead' 2468 is non-zero, subsequently release the temporary; if it is positive, the 2469 temp is dead; if it is negative, the temp is free. */ 2470 static void temp_sync(TCGContext *s, TCGTemp *ts, 2471 TCGRegSet allocated_regs, int free_or_dead) 2472 { 2473 if (ts->fixed_reg) { 2474 return; 2475 } 2476 if (!ts->mem_coherent) { 2477 if (!ts->mem_allocated) { 2478 temp_allocate_frame(s, ts); 2479 } 2480 switch (ts->val_type) { 2481 case TEMP_VAL_CONST: 2482 /* If we're going to free the temp immediately, then we won't 2483 require it later in a register, so attempt to store the 2484 constant to memory directly. */ 2485 if (free_or_dead 2486 && tcg_out_sti(s, ts->type, ts->val, 2487 ts->mem_base->reg, ts->mem_offset)) { 2488 break; 2489 } 2490 temp_load(s, ts, tcg_target_available_regs[ts->type], 2491 allocated_regs); 2492 /* fallthrough */ 2493 2494 case TEMP_VAL_REG: 2495 tcg_out_st(s, ts->type, ts->reg, 2496 ts->mem_base->reg, ts->mem_offset); 2497 break; 2498 2499 case TEMP_VAL_MEM: 2500 break; 2501 2502 case TEMP_VAL_DEAD: 2503 default: 2504 tcg_abort(); 2505 } 2506 ts->mem_coherent = 1; 2507 } 2508 if (free_or_dead) { 2509 temp_free_or_dead(s, ts, free_or_dead); 2510 } 2511 } 2512 2513 /* free register 'reg' by spilling the corresponding temporary if necessary */ 2514 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 2515 { 2516 TCGTemp *ts = s->reg_to_temp[reg]; 2517 if (ts != NULL) { 2518 temp_sync(s, ts, allocated_regs, -1); 2519 } 2520 } 2521 2522 /* Allocate a register belonging to reg1 & ~reg2 */ 2523 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, 2524 TCGRegSet allocated_regs, bool rev) 2525 { 2526 int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 2527 const int *order; 2528 TCGReg reg; 2529 TCGRegSet reg_ct; 2530 2531 reg_ct = desired_regs & ~allocated_regs; 2532 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 2533 2534 /* first try free registers */ 2535 for(i = 0; i < n; i++) { 2536 reg = order[i]; 2537 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL) 2538 return reg; 2539 } 2540 2541 /* XXX: do better spill choice */ 2542 for(i = 0; i < n; i++) { 2543 reg = order[i]; 2544 if (tcg_regset_test_reg(reg_ct, reg)) { 2545 tcg_reg_free(s, reg, allocated_regs); 2546 return reg; 2547 } 2548 } 2549 2550 tcg_abort(); 2551 } 2552 2553 /* Make sure the temporary is in a register. If needed, allocate the register 2554 from DESIRED while avoiding ALLOCATED. */ 2555 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 2556 TCGRegSet allocated_regs) 2557 { 2558 TCGReg reg; 2559 2560 switch (ts->val_type) { 2561 case TEMP_VAL_REG: 2562 return; 2563 case TEMP_VAL_CONST: 2564 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2565 tcg_out_movi(s, ts->type, reg, ts->val); 2566 ts->mem_coherent = 0; 2567 break; 2568 case TEMP_VAL_MEM: 2569 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2570 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 2571 ts->mem_coherent = 1; 2572 break; 2573 case TEMP_VAL_DEAD: 2574 default: 2575 tcg_abort(); 2576 } 2577 ts->reg = reg; 2578 ts->val_type = TEMP_VAL_REG; 2579 s->reg_to_temp[reg] = ts; 2580 } 2581 2582 /* Save a temporary to memory. 'allocated_regs' is used in case a 2583 temporary registers needs to be allocated to store a constant. */ 2584 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 2585 { 2586 /* The liveness analysis already ensures that globals are back 2587 in memory. Keep an tcg_debug_assert for safety. */ 2588 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); 2589 } 2590 2591 /* save globals to their canonical location and assume they can be 2592 modified be the following code. 'allocated_regs' is used in case a 2593 temporary registers needs to be allocated to store a constant. */ 2594 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 2595 { 2596 int i, n; 2597 2598 for (i = 0, n = s->nb_globals; i < n; i++) { 2599 temp_save(s, &s->temps[i], allocated_regs); 2600 } 2601 } 2602 2603 /* sync globals to their canonical location and assume they can be 2604 read by the following code. 'allocated_regs' is used in case a 2605 temporary registers needs to be allocated to store a constant. */ 2606 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 2607 { 2608 int i, n; 2609 2610 for (i = 0, n = s->nb_globals; i < n; i++) { 2611 TCGTemp *ts = &s->temps[i]; 2612 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 2613 || ts->fixed_reg 2614 || ts->mem_coherent); 2615 } 2616 } 2617 2618 /* at the end of a basic block, we assume all temporaries are dead and 2619 all globals are stored at their canonical location. */ 2620 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 2621 { 2622 int i; 2623 2624 for (i = s->nb_globals; i < s->nb_temps; i++) { 2625 TCGTemp *ts = &s->temps[i]; 2626 if (ts->temp_local) { 2627 temp_save(s, ts, allocated_regs); 2628 } else { 2629 /* The liveness analysis already ensures that temps are dead. 2630 Keep an tcg_debug_assert for safety. */ 2631 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 2632 } 2633 } 2634 2635 save_globals(s, allocated_regs); 2636 } 2637 2638 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 2639 tcg_target_ulong val, TCGLifeData arg_life) 2640 { 2641 if (ots->fixed_reg) { 2642 /* For fixed registers, we do not do any constant propagation. */ 2643 tcg_out_movi(s, ots->type, ots->reg, val); 2644 return; 2645 } 2646 2647 /* The movi is not explicitly generated here. */ 2648 if (ots->val_type == TEMP_VAL_REG) { 2649 s->reg_to_temp[ots->reg] = NULL; 2650 } 2651 ots->val_type = TEMP_VAL_CONST; 2652 ots->val = val; 2653 ots->mem_coherent = 0; 2654 if (NEED_SYNC_ARG(0)) { 2655 temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0)); 2656 } else if (IS_DEAD_ARG(0)) { 2657 temp_dead(s, ots); 2658 } 2659 } 2660 2661 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op) 2662 { 2663 TCGTemp *ots = arg_temp(op->args[0]); 2664 tcg_target_ulong val = op->args[1]; 2665 2666 tcg_reg_alloc_do_movi(s, ots, val, op->life); 2667 } 2668 2669 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 2670 { 2671 const TCGLifeData arg_life = op->life; 2672 TCGRegSet allocated_regs; 2673 TCGTemp *ts, *ots; 2674 TCGType otype, itype; 2675 2676 allocated_regs = s->reserved_regs; 2677 ots = arg_temp(op->args[0]); 2678 ts = arg_temp(op->args[1]); 2679 2680 /* Note that otype != itype for no-op truncation. */ 2681 otype = ots->type; 2682 itype = ts->type; 2683 2684 if (ts->val_type == TEMP_VAL_CONST) { 2685 /* propagate constant or generate sti */ 2686 tcg_target_ulong val = ts->val; 2687 if (IS_DEAD_ARG(1)) { 2688 temp_dead(s, ts); 2689 } 2690 tcg_reg_alloc_do_movi(s, ots, val, arg_life); 2691 return; 2692 } 2693 2694 /* If the source value is in memory we're going to be forced 2695 to have it in a register in order to perform the copy. Copy 2696 the SOURCE value into its own register first, that way we 2697 don't have to reload SOURCE the next time it is used. */ 2698 if (ts->val_type == TEMP_VAL_MEM) { 2699 temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs); 2700 } 2701 2702 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 2703 if (IS_DEAD_ARG(0) && !ots->fixed_reg) { 2704 /* mov to a non-saved dead register makes no sense (even with 2705 liveness analysis disabled). */ 2706 tcg_debug_assert(NEED_SYNC_ARG(0)); 2707 if (!ots->mem_allocated) { 2708 temp_allocate_frame(s, ots); 2709 } 2710 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 2711 if (IS_DEAD_ARG(1)) { 2712 temp_dead(s, ts); 2713 } 2714 temp_dead(s, ots); 2715 } else { 2716 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { 2717 /* the mov can be suppressed */ 2718 if (ots->val_type == TEMP_VAL_REG) { 2719 s->reg_to_temp[ots->reg] = NULL; 2720 } 2721 ots->reg = ts->reg; 2722 temp_dead(s, ts); 2723 } else { 2724 if (ots->val_type != TEMP_VAL_REG) { 2725 /* When allocating a new register, make sure to not spill the 2726 input one. */ 2727 tcg_regset_set_reg(allocated_regs, ts->reg); 2728 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 2729 allocated_regs, ots->indirect_base); 2730 } 2731 tcg_out_mov(s, otype, ots->reg, ts->reg); 2732 } 2733 ots->val_type = TEMP_VAL_REG; 2734 ots->mem_coherent = 0; 2735 s->reg_to_temp[ots->reg] = ots; 2736 if (NEED_SYNC_ARG(0)) { 2737 temp_sync(s, ots, allocated_regs, 0); 2738 } 2739 } 2740 } 2741 2742 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 2743 { 2744 const TCGLifeData arg_life = op->life; 2745 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 2746 TCGRegSet i_allocated_regs; 2747 TCGRegSet o_allocated_regs; 2748 int i, k, nb_iargs, nb_oargs; 2749 TCGReg reg; 2750 TCGArg arg; 2751 const TCGArgConstraint *arg_ct; 2752 TCGTemp *ts; 2753 TCGArg new_args[TCG_MAX_OP_ARGS]; 2754 int const_args[TCG_MAX_OP_ARGS]; 2755 2756 nb_oargs = def->nb_oargs; 2757 nb_iargs = def->nb_iargs; 2758 2759 /* copy constants */ 2760 memcpy(new_args + nb_oargs + nb_iargs, 2761 op->args + nb_oargs + nb_iargs, 2762 sizeof(TCGArg) * def->nb_cargs); 2763 2764 i_allocated_regs = s->reserved_regs; 2765 o_allocated_regs = s->reserved_regs; 2766 2767 /* satisfy input constraints */ 2768 for (k = 0; k < nb_iargs; k++) { 2769 i = def->sorted_args[nb_oargs + k]; 2770 arg = op->args[i]; 2771 arg_ct = &def->args_ct[i]; 2772 ts = arg_temp(arg); 2773 2774 if (ts->val_type == TEMP_VAL_CONST 2775 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 2776 /* constant is OK for instruction */ 2777 const_args[i] = 1; 2778 new_args[i] = ts->val; 2779 goto iarg_end; 2780 } 2781 2782 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs); 2783 2784 if (arg_ct->ct & TCG_CT_IALIAS) { 2785 if (ts->fixed_reg) { 2786 /* if fixed register, we must allocate a new register 2787 if the alias is not the same register */ 2788 if (arg != op->args[arg_ct->alias_index]) 2789 goto allocate_in_reg; 2790 } else { 2791 /* if the input is aliased to an output and if it is 2792 not dead after the instruction, we must allocate 2793 a new register and move it */ 2794 if (!IS_DEAD_ARG(i)) { 2795 goto allocate_in_reg; 2796 } 2797 /* check if the current register has already been allocated 2798 for another input aliased to an output */ 2799 int k2, i2; 2800 for (k2 = 0 ; k2 < k ; k2++) { 2801 i2 = def->sorted_args[nb_oargs + k2]; 2802 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && 2803 (new_args[i2] == ts->reg)) { 2804 goto allocate_in_reg; 2805 } 2806 } 2807 } 2808 } 2809 reg = ts->reg; 2810 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2811 /* nothing to do : the constraint is satisfied */ 2812 } else { 2813 allocate_in_reg: 2814 /* allocate a new register matching the constraint 2815 and move the temporary register into it */ 2816 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, 2817 ts->indirect_base); 2818 tcg_out_mov(s, ts->type, reg, ts->reg); 2819 } 2820 new_args[i] = reg; 2821 const_args[i] = 0; 2822 tcg_regset_set_reg(i_allocated_regs, reg); 2823 iarg_end: ; 2824 } 2825 2826 /* mark dead temporaries and free the associated registers */ 2827 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2828 if (IS_DEAD_ARG(i)) { 2829 temp_dead(s, arg_temp(op->args[i])); 2830 } 2831 } 2832 2833 if (def->flags & TCG_OPF_BB_END) { 2834 tcg_reg_alloc_bb_end(s, i_allocated_regs); 2835 } else { 2836 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2837 /* XXX: permit generic clobber register list ? */ 2838 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 2839 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 2840 tcg_reg_free(s, i, i_allocated_regs); 2841 } 2842 } 2843 } 2844 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2845 /* sync globals if the op has side effects and might trigger 2846 an exception. */ 2847 sync_globals(s, i_allocated_regs); 2848 } 2849 2850 /* satisfy the output constraints */ 2851 for(k = 0; k < nb_oargs; k++) { 2852 i = def->sorted_args[k]; 2853 arg = op->args[i]; 2854 arg_ct = &def->args_ct[i]; 2855 ts = arg_temp(arg); 2856 if ((arg_ct->ct & TCG_CT_ALIAS) 2857 && !const_args[arg_ct->alias_index]) { 2858 reg = new_args[arg_ct->alias_index]; 2859 } else if (arg_ct->ct & TCG_CT_NEWREG) { 2860 reg = tcg_reg_alloc(s, arg_ct->u.regs, 2861 i_allocated_regs | o_allocated_regs, 2862 ts->indirect_base); 2863 } else { 2864 /* if fixed register, we try to use it */ 2865 reg = ts->reg; 2866 if (ts->fixed_reg && 2867 tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2868 goto oarg_end; 2869 } 2870 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, 2871 ts->indirect_base); 2872 } 2873 tcg_regset_set_reg(o_allocated_regs, reg); 2874 /* if a fixed register is used, then a move will be done afterwards */ 2875 if (!ts->fixed_reg) { 2876 if (ts->val_type == TEMP_VAL_REG) { 2877 s->reg_to_temp[ts->reg] = NULL; 2878 } 2879 ts->val_type = TEMP_VAL_REG; 2880 ts->reg = reg; 2881 /* temp value is modified, so the value kept in memory is 2882 potentially not the same */ 2883 ts->mem_coherent = 0; 2884 s->reg_to_temp[reg] = ts; 2885 } 2886 oarg_end: 2887 new_args[i] = reg; 2888 } 2889 } 2890 2891 /* emit instruction */ 2892 tcg_out_op(s, op->opc, new_args, const_args); 2893 2894 /* move the outputs in the correct register if needed */ 2895 for(i = 0; i < nb_oargs; i++) { 2896 ts = arg_temp(op->args[i]); 2897 reg = new_args[i]; 2898 if (ts->fixed_reg && ts->reg != reg) { 2899 tcg_out_mov(s, ts->type, ts->reg, reg); 2900 } 2901 if (NEED_SYNC_ARG(i)) { 2902 temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i)); 2903 } else if (IS_DEAD_ARG(i)) { 2904 temp_dead(s, ts); 2905 } 2906 } 2907 } 2908 2909 #ifdef TCG_TARGET_STACK_GROWSUP 2910 #define STACK_DIR(x) (-(x)) 2911 #else 2912 #define STACK_DIR(x) (x) 2913 #endif 2914 2915 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 2916 { 2917 const int nb_oargs = op->callo; 2918 const int nb_iargs = op->calli; 2919 const TCGLifeData arg_life = op->life; 2920 int flags, nb_regs, i; 2921 TCGReg reg; 2922 TCGArg arg; 2923 TCGTemp *ts; 2924 intptr_t stack_offset; 2925 size_t call_stack_size; 2926 tcg_insn_unit *func_addr; 2927 int allocate_args; 2928 TCGRegSet allocated_regs; 2929 2930 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; 2931 flags = op->args[nb_oargs + nb_iargs + 1]; 2932 2933 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2934 if (nb_regs > nb_iargs) { 2935 nb_regs = nb_iargs; 2936 } 2937 2938 /* assign stack slots first */ 2939 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 2940 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 2941 ~(TCG_TARGET_STACK_ALIGN - 1); 2942 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 2943 if (allocate_args) { 2944 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 2945 preallocate call stack */ 2946 tcg_abort(); 2947 } 2948 2949 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 2950 for (i = nb_regs; i < nb_iargs; i++) { 2951 arg = op->args[nb_oargs + i]; 2952 #ifdef TCG_TARGET_STACK_GROWSUP 2953 stack_offset -= sizeof(tcg_target_long); 2954 #endif 2955 if (arg != TCG_CALL_DUMMY_ARG) { 2956 ts = arg_temp(arg); 2957 temp_load(s, ts, tcg_target_available_regs[ts->type], 2958 s->reserved_regs); 2959 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 2960 } 2961 #ifndef TCG_TARGET_STACK_GROWSUP 2962 stack_offset += sizeof(tcg_target_long); 2963 #endif 2964 } 2965 2966 /* assign input registers */ 2967 allocated_regs = s->reserved_regs; 2968 for (i = 0; i < nb_regs; i++) { 2969 arg = op->args[nb_oargs + i]; 2970 if (arg != TCG_CALL_DUMMY_ARG) { 2971 ts = arg_temp(arg); 2972 reg = tcg_target_call_iarg_regs[i]; 2973 tcg_reg_free(s, reg, allocated_regs); 2974 2975 if (ts->val_type == TEMP_VAL_REG) { 2976 if (ts->reg != reg) { 2977 tcg_out_mov(s, ts->type, reg, ts->reg); 2978 } 2979 } else { 2980 TCGRegSet arg_set = 0; 2981 2982 tcg_regset_set_reg(arg_set, reg); 2983 temp_load(s, ts, arg_set, allocated_regs); 2984 } 2985 2986 tcg_regset_set_reg(allocated_regs, reg); 2987 } 2988 } 2989 2990 /* mark dead temporaries and free the associated registers */ 2991 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2992 if (IS_DEAD_ARG(i)) { 2993 temp_dead(s, arg_temp(op->args[i])); 2994 } 2995 } 2996 2997 /* clobber call registers */ 2998 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 2999 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3000 tcg_reg_free(s, i, allocated_regs); 3001 } 3002 } 3003 3004 /* Save globals if they might be written by the helper, sync them if 3005 they might be read. */ 3006 if (flags & TCG_CALL_NO_READ_GLOBALS) { 3007 /* Nothing to do */ 3008 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 3009 sync_globals(s, allocated_regs); 3010 } else { 3011 save_globals(s, allocated_regs); 3012 } 3013 3014 tcg_out_call(s, func_addr); 3015 3016 /* assign output registers and emit moves if needed */ 3017 for(i = 0; i < nb_oargs; i++) { 3018 arg = op->args[i]; 3019 ts = arg_temp(arg); 3020 reg = tcg_target_call_oarg_regs[i]; 3021 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3022 3023 if (ts->fixed_reg) { 3024 if (ts->reg != reg) { 3025 tcg_out_mov(s, ts->type, ts->reg, reg); 3026 } 3027 } else { 3028 if (ts->val_type == TEMP_VAL_REG) { 3029 s->reg_to_temp[ts->reg] = NULL; 3030 } 3031 ts->val_type = TEMP_VAL_REG; 3032 ts->reg = reg; 3033 ts->mem_coherent = 0; 3034 s->reg_to_temp[reg] = ts; 3035 if (NEED_SYNC_ARG(i)) { 3036 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); 3037 } else if (IS_DEAD_ARG(i)) { 3038 temp_dead(s, ts); 3039 } 3040 } 3041 } 3042 } 3043 3044 #ifdef CONFIG_PROFILER 3045 3046 /* avoid copy/paste errors */ 3047 #define PROF_ADD(to, from, field) \ 3048 do { \ 3049 (to)->field += atomic_read(&((from)->field)); \ 3050 } while (0) 3051 3052 #define PROF_MAX(to, from, field) \ 3053 do { \ 3054 typeof((from)->field) val__ = atomic_read(&((from)->field)); \ 3055 if (val__ > (to)->field) { \ 3056 (to)->field = val__; \ 3057 } \ 3058 } while (0) 3059 3060 /* Pass in a zero'ed @prof */ 3061 static inline 3062 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 3063 { 3064 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 3065 unsigned int i; 3066 3067 for (i = 0; i < n_ctxs; i++) { 3068 TCGContext *s = atomic_read(&tcg_ctxs[i]); 3069 const TCGProfile *orig = &s->prof; 3070 3071 if (counters) { 3072 PROF_ADD(prof, orig, tb_count1); 3073 PROF_ADD(prof, orig, tb_count); 3074 PROF_ADD(prof, orig, op_count); 3075 PROF_MAX(prof, orig, op_count_max); 3076 PROF_ADD(prof, orig, temp_count); 3077 PROF_MAX(prof, orig, temp_count_max); 3078 PROF_ADD(prof, orig, del_op_count); 3079 PROF_ADD(prof, orig, code_in_len); 3080 PROF_ADD(prof, orig, code_out_len); 3081 PROF_ADD(prof, orig, search_out_len); 3082 PROF_ADD(prof, orig, interm_time); 3083 PROF_ADD(prof, orig, code_time); 3084 PROF_ADD(prof, orig, la_time); 3085 PROF_ADD(prof, orig, opt_time); 3086 PROF_ADD(prof, orig, restore_count); 3087 PROF_ADD(prof, orig, restore_time); 3088 } 3089 if (table) { 3090 int i; 3091 3092 for (i = 0; i < NB_OPS; i++) { 3093 PROF_ADD(prof, orig, table_op_count[i]); 3094 } 3095 } 3096 } 3097 } 3098 3099 #undef PROF_ADD 3100 #undef PROF_MAX 3101 3102 static void tcg_profile_snapshot_counters(TCGProfile *prof) 3103 { 3104 tcg_profile_snapshot(prof, true, false); 3105 } 3106 3107 static void tcg_profile_snapshot_table(TCGProfile *prof) 3108 { 3109 tcg_profile_snapshot(prof, false, true); 3110 } 3111 3112 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 3113 { 3114 TCGProfile prof = {}; 3115 int i; 3116 3117 tcg_profile_snapshot_table(&prof); 3118 for (i = 0; i < NB_OPS; i++) { 3119 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, 3120 prof.table_op_count[i]); 3121 } 3122 } 3123 #else 3124 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 3125 { 3126 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 3127 } 3128 #endif 3129 3130 3131 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 3132 { 3133 #ifdef CONFIG_PROFILER 3134 TCGProfile *prof = &s->prof; 3135 #endif 3136 int i, oi, oi_next, num_insns; 3137 3138 #ifdef CONFIG_PROFILER 3139 { 3140 int n; 3141 3142 n = s->gen_op_buf[0].prev + 1; 3143 atomic_set(&prof->op_count, prof->op_count + n); 3144 if (n > prof->op_count_max) { 3145 atomic_set(&prof->op_count_max, n); 3146 } 3147 3148 n = s->nb_temps; 3149 atomic_set(&prof->temp_count, prof->temp_count + n); 3150 if (n > prof->temp_count_max) { 3151 atomic_set(&prof->temp_count_max, n); 3152 } 3153 } 3154 #endif 3155 3156 #ifdef DEBUG_DISAS 3157 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 3158 && qemu_log_in_addr_range(tb->pc))) { 3159 qemu_log_lock(); 3160 qemu_log("OP:\n"); 3161 tcg_dump_ops(s); 3162 qemu_log("\n"); 3163 qemu_log_unlock(); 3164 } 3165 #endif 3166 3167 #ifdef CONFIG_PROFILER 3168 atomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 3169 #endif 3170 3171 #ifdef USE_TCG_OPTIMIZATIONS 3172 tcg_optimize(s); 3173 #endif 3174 3175 #ifdef CONFIG_PROFILER 3176 atomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 3177 atomic_set(&prof->la_time, prof->la_time - profile_getclock()); 3178 #endif 3179 3180 liveness_pass_1(s); 3181 3182 if (s->nb_indirects > 0) { 3183 #ifdef DEBUG_DISAS 3184 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 3185 && qemu_log_in_addr_range(tb->pc))) { 3186 qemu_log_lock(); 3187 qemu_log("OP before indirect lowering:\n"); 3188 tcg_dump_ops(s); 3189 qemu_log("\n"); 3190 qemu_log_unlock(); 3191 } 3192 #endif 3193 /* Replace indirect temps with direct temps. */ 3194 if (liveness_pass_2(s)) { 3195 /* If changes were made, re-run liveness. */ 3196 liveness_pass_1(s); 3197 } 3198 } 3199 3200 #ifdef CONFIG_PROFILER 3201 atomic_set(&prof->la_time, prof->la_time + profile_getclock()); 3202 #endif 3203 3204 #ifdef DEBUG_DISAS 3205 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 3206 && qemu_log_in_addr_range(tb->pc))) { 3207 qemu_log_lock(); 3208 qemu_log("OP after optimization and liveness analysis:\n"); 3209 tcg_dump_ops(s); 3210 qemu_log("\n"); 3211 qemu_log_unlock(); 3212 } 3213 #endif 3214 3215 tcg_reg_alloc_start(s); 3216 3217 s->code_buf = tb->tc.ptr; 3218 s->code_ptr = tb->tc.ptr; 3219 3220 #ifdef TCG_TARGET_NEED_LDST_LABELS 3221 s->ldst_labels = NULL; 3222 #endif 3223 #ifdef TCG_TARGET_NEED_POOL_LABELS 3224 s->pool_labels = NULL; 3225 #endif 3226 3227 num_insns = -1; 3228 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 3229 TCGOp * const op = &s->gen_op_buf[oi]; 3230 TCGOpcode opc = op->opc; 3231 3232 oi_next = op->next; 3233 #ifdef CONFIG_PROFILER 3234 atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 3235 #endif 3236 3237 switch (opc) { 3238 case INDEX_op_mov_i32: 3239 case INDEX_op_mov_i64: 3240 tcg_reg_alloc_mov(s, op); 3241 break; 3242 case INDEX_op_movi_i32: 3243 case INDEX_op_movi_i64: 3244 tcg_reg_alloc_movi(s, op); 3245 break; 3246 case INDEX_op_insn_start: 3247 if (num_insns >= 0) { 3248 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 3249 } 3250 num_insns++; 3251 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 3252 target_ulong a; 3253 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 3254 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 3255 #else 3256 a = op->args[i]; 3257 #endif 3258 s->gen_insn_data[num_insns][i] = a; 3259 } 3260 break; 3261 case INDEX_op_discard: 3262 temp_dead(s, arg_temp(op->args[0])); 3263 break; 3264 case INDEX_op_set_label: 3265 tcg_reg_alloc_bb_end(s, s->reserved_regs); 3266 tcg_out_label(s, arg_label(op->args[0]), s->code_ptr); 3267 break; 3268 case INDEX_op_call: 3269 tcg_reg_alloc_call(s, op); 3270 break; 3271 default: 3272 /* Sanity check that we've not introduced any unhandled opcodes. */ 3273 tcg_debug_assert(tcg_op_supported(opc)); 3274 /* Note: in order to speed up the code, it would be much 3275 faster to have specialized register allocator functions for 3276 some common argument patterns */ 3277 tcg_reg_alloc_op(s, op); 3278 break; 3279 } 3280 #ifdef CONFIG_DEBUG_TCG 3281 check_regs(s); 3282 #endif 3283 /* Test for (pending) buffer overflow. The assumption is that any 3284 one operation beginning below the high water mark cannot overrun 3285 the buffer completely. Thus we can test for overflow after 3286 generating code without having to check during generation. */ 3287 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 3288 return -1; 3289 } 3290 } 3291 tcg_debug_assert(num_insns >= 0); 3292 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 3293 3294 /* Generate TB finalization at the end of block */ 3295 #ifdef TCG_TARGET_NEED_LDST_LABELS 3296 if (!tcg_out_ldst_finalize(s)) { 3297 return -1; 3298 } 3299 #endif 3300 #ifdef TCG_TARGET_NEED_POOL_LABELS 3301 if (!tcg_out_pool_finalize(s)) { 3302 return -1; 3303 } 3304 #endif 3305 3306 /* flush instruction cache */ 3307 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); 3308 3309 return tcg_current_code_size(s); 3310 } 3311 3312 #ifdef CONFIG_PROFILER 3313 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 3314 { 3315 TCGProfile prof = {}; 3316 const TCGProfile *s; 3317 int64_t tb_count; 3318 int64_t tb_div_count; 3319 int64_t tot; 3320 3321 tcg_profile_snapshot_counters(&prof); 3322 s = &prof; 3323 tb_count = s->tb_count; 3324 tb_div_count = tb_count ? tb_count : 1; 3325 tot = s->interm_time + s->code_time; 3326 3327 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 3328 tot, tot / 2.4e9); 3329 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 3330 tb_count, s->tb_count1 - tb_count, 3331 (double)(s->tb_count1 - s->tb_count) 3332 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 3333 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", 3334 (double)s->op_count / tb_div_count, s->op_count_max); 3335 cpu_fprintf(f, "deleted ops/TB %0.2f\n", 3336 (double)s->del_op_count / tb_div_count); 3337 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", 3338 (double)s->temp_count / tb_div_count, s->temp_count_max); 3339 cpu_fprintf(f, "avg host code/TB %0.1f\n", 3340 (double)s->code_out_len / tb_div_count); 3341 cpu_fprintf(f, "avg search data/TB %0.1f\n", 3342 (double)s->search_out_len / tb_div_count); 3343 3344 cpu_fprintf(f, "cycles/op %0.1f\n", 3345 s->op_count ? (double)tot / s->op_count : 0); 3346 cpu_fprintf(f, "cycles/in byte %0.1f\n", 3347 s->code_in_len ? (double)tot / s->code_in_len : 0); 3348 cpu_fprintf(f, "cycles/out byte %0.1f\n", 3349 s->code_out_len ? (double)tot / s->code_out_len : 0); 3350 cpu_fprintf(f, "cycles/search byte %0.1f\n", 3351 s->search_out_len ? (double)tot / s->search_out_len : 0); 3352 if (tot == 0) { 3353 tot = 1; 3354 } 3355 cpu_fprintf(f, " gen_interm time %0.1f%%\n", 3356 (double)s->interm_time / tot * 100.0); 3357 cpu_fprintf(f, " gen_code time %0.1f%%\n", 3358 (double)s->code_time / tot * 100.0); 3359 cpu_fprintf(f, "optim./code time %0.1f%%\n", 3360 (double)s->opt_time / (s->code_time ? s->code_time : 1) 3361 * 100.0); 3362 cpu_fprintf(f, "liveness/code time %0.1f%%\n", 3363 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 3364 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", 3365 s->restore_count); 3366 cpu_fprintf(f, " avg cycles %0.1f\n", 3367 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 3368 } 3369 #else 3370 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 3371 { 3372 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 3373 } 3374 #endif 3375 3376 #ifdef ELF_HOST_MACHINE 3377 /* In order to use this feature, the backend needs to do three things: 3378 3379 (1) Define ELF_HOST_MACHINE to indicate both what value to 3380 put into the ELF image and to indicate support for the feature. 3381 3382 (2) Define tcg_register_jit. This should create a buffer containing 3383 the contents of a .debug_frame section that describes the post- 3384 prologue unwind info for the tcg machine. 3385 3386 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 3387 */ 3388 3389 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 3390 typedef enum { 3391 JIT_NOACTION = 0, 3392 JIT_REGISTER_FN, 3393 JIT_UNREGISTER_FN 3394 } jit_actions_t; 3395 3396 struct jit_code_entry { 3397 struct jit_code_entry *next_entry; 3398 struct jit_code_entry *prev_entry; 3399 const void *symfile_addr; 3400 uint64_t symfile_size; 3401 }; 3402 3403 struct jit_descriptor { 3404 uint32_t version; 3405 uint32_t action_flag; 3406 struct jit_code_entry *relevant_entry; 3407 struct jit_code_entry *first_entry; 3408 }; 3409 3410 void __jit_debug_register_code(void) __attribute__((noinline)); 3411 void __jit_debug_register_code(void) 3412 { 3413 asm(""); 3414 } 3415 3416 /* Must statically initialize the version, because GDB may check 3417 the version before we can set it. */ 3418 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 3419 3420 /* End GDB interface. */ 3421 3422 static int find_string(const char *strtab, const char *str) 3423 { 3424 const char *p = strtab + 1; 3425 3426 while (1) { 3427 if (strcmp(p, str) == 0) { 3428 return p - strtab; 3429 } 3430 p += strlen(p) + 1; 3431 } 3432 } 3433 3434 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, 3435 const void *debug_frame, 3436 size_t debug_frame_size) 3437 { 3438 struct __attribute__((packed)) DebugInfo { 3439 uint32_t len; 3440 uint16_t version; 3441 uint32_t abbrev; 3442 uint8_t ptr_size; 3443 uint8_t cu_die; 3444 uint16_t cu_lang; 3445 uintptr_t cu_low_pc; 3446 uintptr_t cu_high_pc; 3447 uint8_t fn_die; 3448 char fn_name[16]; 3449 uintptr_t fn_low_pc; 3450 uintptr_t fn_high_pc; 3451 uint8_t cu_eoc; 3452 }; 3453 3454 struct ElfImage { 3455 ElfW(Ehdr) ehdr; 3456 ElfW(Phdr) phdr; 3457 ElfW(Shdr) shdr[7]; 3458 ElfW(Sym) sym[2]; 3459 struct DebugInfo di; 3460 uint8_t da[24]; 3461 char str[80]; 3462 }; 3463 3464 struct ElfImage *img; 3465 3466 static const struct ElfImage img_template = { 3467 .ehdr = { 3468 .e_ident[EI_MAG0] = ELFMAG0, 3469 .e_ident[EI_MAG1] = ELFMAG1, 3470 .e_ident[EI_MAG2] = ELFMAG2, 3471 .e_ident[EI_MAG3] = ELFMAG3, 3472 .e_ident[EI_CLASS] = ELF_CLASS, 3473 .e_ident[EI_DATA] = ELF_DATA, 3474 .e_ident[EI_VERSION] = EV_CURRENT, 3475 .e_type = ET_EXEC, 3476 .e_machine = ELF_HOST_MACHINE, 3477 .e_version = EV_CURRENT, 3478 .e_phoff = offsetof(struct ElfImage, phdr), 3479 .e_shoff = offsetof(struct ElfImage, shdr), 3480 .e_ehsize = sizeof(ElfW(Shdr)), 3481 .e_phentsize = sizeof(ElfW(Phdr)), 3482 .e_phnum = 1, 3483 .e_shentsize = sizeof(ElfW(Shdr)), 3484 .e_shnum = ARRAY_SIZE(img->shdr), 3485 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 3486 #ifdef ELF_HOST_FLAGS 3487 .e_flags = ELF_HOST_FLAGS, 3488 #endif 3489 #ifdef ELF_OSABI 3490 .e_ident[EI_OSABI] = ELF_OSABI, 3491 #endif 3492 }, 3493 .phdr = { 3494 .p_type = PT_LOAD, 3495 .p_flags = PF_X, 3496 }, 3497 .shdr = { 3498 [0] = { .sh_type = SHT_NULL }, 3499 /* Trick: The contents of code_gen_buffer are not present in 3500 this fake ELF file; that got allocated elsewhere. Therefore 3501 we mark .text as SHT_NOBITS (similar to .bss) so that readers 3502 will not look for contents. We can record any address. */ 3503 [1] = { /* .text */ 3504 .sh_type = SHT_NOBITS, 3505 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 3506 }, 3507 [2] = { /* .debug_info */ 3508 .sh_type = SHT_PROGBITS, 3509 .sh_offset = offsetof(struct ElfImage, di), 3510 .sh_size = sizeof(struct DebugInfo), 3511 }, 3512 [3] = { /* .debug_abbrev */ 3513 .sh_type = SHT_PROGBITS, 3514 .sh_offset = offsetof(struct ElfImage, da), 3515 .sh_size = sizeof(img->da), 3516 }, 3517 [4] = { /* .debug_frame */ 3518 .sh_type = SHT_PROGBITS, 3519 .sh_offset = sizeof(struct ElfImage), 3520 }, 3521 [5] = { /* .symtab */ 3522 .sh_type = SHT_SYMTAB, 3523 .sh_offset = offsetof(struct ElfImage, sym), 3524 .sh_size = sizeof(img->sym), 3525 .sh_info = 1, 3526 .sh_link = ARRAY_SIZE(img->shdr) - 1, 3527 .sh_entsize = sizeof(ElfW(Sym)), 3528 }, 3529 [6] = { /* .strtab */ 3530 .sh_type = SHT_STRTAB, 3531 .sh_offset = offsetof(struct ElfImage, str), 3532 .sh_size = sizeof(img->str), 3533 } 3534 }, 3535 .sym = { 3536 [1] = { /* code_gen_buffer */ 3537 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 3538 .st_shndx = 1, 3539 } 3540 }, 3541 .di = { 3542 .len = sizeof(struct DebugInfo) - 4, 3543 .version = 2, 3544 .ptr_size = sizeof(void *), 3545 .cu_die = 1, 3546 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 3547 .fn_die = 2, 3548 .fn_name = "code_gen_buffer" 3549 }, 3550 .da = { 3551 1, /* abbrev number (the cu) */ 3552 0x11, 1, /* DW_TAG_compile_unit, has children */ 3553 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 3554 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 3555 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 3556 0, 0, /* end of abbrev */ 3557 2, /* abbrev number (the fn) */ 3558 0x2e, 0, /* DW_TAG_subprogram, no children */ 3559 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 3560 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 3561 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 3562 0, 0, /* end of abbrev */ 3563 0 /* no more abbrev */ 3564 }, 3565 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 3566 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 3567 }; 3568 3569 /* We only need a single jit entry; statically allocate it. */ 3570 static struct jit_code_entry one_entry; 3571 3572 uintptr_t buf = (uintptr_t)buf_ptr; 3573 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 3574 DebugFrameHeader *dfh; 3575 3576 img = g_malloc(img_size); 3577 *img = img_template; 3578 3579 img->phdr.p_vaddr = buf; 3580 img->phdr.p_paddr = buf; 3581 img->phdr.p_memsz = buf_size; 3582 3583 img->shdr[1].sh_name = find_string(img->str, ".text"); 3584 img->shdr[1].sh_addr = buf; 3585 img->shdr[1].sh_size = buf_size; 3586 3587 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 3588 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 3589 3590 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 3591 img->shdr[4].sh_size = debug_frame_size; 3592 3593 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 3594 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 3595 3596 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 3597 img->sym[1].st_value = buf; 3598 img->sym[1].st_size = buf_size; 3599 3600 img->di.cu_low_pc = buf; 3601 img->di.cu_high_pc = buf + buf_size; 3602 img->di.fn_low_pc = buf; 3603 img->di.fn_high_pc = buf + buf_size; 3604 3605 dfh = (DebugFrameHeader *)(img + 1); 3606 memcpy(dfh, debug_frame, debug_frame_size); 3607 dfh->fde.func_start = buf; 3608 dfh->fde.func_len = buf_size; 3609 3610 #ifdef DEBUG_JIT 3611 /* Enable this block to be able to debug the ELF image file creation. 3612 One can use readelf, objdump, or other inspection utilities. */ 3613 { 3614 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 3615 if (f) { 3616 if (fwrite(img, img_size, 1, f) != img_size) { 3617 /* Avoid stupid unused return value warning for fwrite. */ 3618 } 3619 fclose(f); 3620 } 3621 } 3622 #endif 3623 3624 one_entry.symfile_addr = img; 3625 one_entry.symfile_size = img_size; 3626 3627 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 3628 __jit_debug_descriptor.relevant_entry = &one_entry; 3629 __jit_debug_descriptor.first_entry = &one_entry; 3630 __jit_debug_register_code(); 3631 } 3632 #else 3633 /* No support for the feature. Provide the entry point expected by exec.c, 3634 and implement the internal function we declared earlier. */ 3635 3636 static void tcg_register_jit_int(void *buf, size_t size, 3637 const void *debug_frame, 3638 size_t debug_frame_size) 3639 { 3640 } 3641 3642 void tcg_register_jit(void *buf, size_t buf_size) 3643 { 3644 } 3645 #endif /* ELF_HOST_MACHINE */ 3646