1 /* 2 * Host code generation 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 22 #define NO_CPU_IO_DEFS 23 #include "trace.h" 24 #include "disas/disas.h" 25 #include "exec/exec-all.h" 26 #include "tcg/tcg.h" 27 #if defined(CONFIG_USER_ONLY) 28 #include "qemu.h" 29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 30 #include <sys/param.h> 31 #if __FreeBSD_version >= 700104 32 #define HAVE_KINFO_GETVMMAP 33 #define sigqueue sigqueue_freebsd /* avoid redefinition */ 34 #include <sys/proc.h> 35 #include <machine/profile.h> 36 #define _KERNEL 37 #include <sys/user.h> 38 #undef _KERNEL 39 #undef sigqueue 40 #include <libutil.h> 41 #endif 42 #endif 43 #else 44 #include "exec/ram_addr.h" 45 #endif 46 47 #include "exec/cputlb.h" 48 #include "exec/translate-all.h" 49 #include "exec/translator.h" 50 #include "exec/tb-flush.h" 51 #include "qemu/bitmap.h" 52 #include "qemu/qemu-print.h" 53 #include "qemu/main-loop.h" 54 #include "qemu/cacheinfo.h" 55 #include "qemu/timer.h" 56 #include "exec/log.h" 57 #include "sysemu/cpus.h" 58 #include "sysemu/cpu-timers.h" 59 #include "sysemu/tcg.h" 60 #include "qapi/error.h" 61 #include "hw/core/tcg-cpu-ops.h" 62 #include "tb-jmp-cache.h" 63 #include "tb-hash.h" 64 #include "tb-context.h" 65 #include "internal.h" 66 #include "perf.h" 67 68 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */ 69 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS > 70 sizeof_field(TranslationBlock, trace_vcpu_dstate) 71 * BITS_PER_BYTE); 72 73 TBContext tb_ctx; 74 75 /* 76 * Encode VAL as a signed leb128 sequence at P. 77 * Return P incremented past the encoded value. 78 */ 79 static uint8_t *encode_sleb128(uint8_t *p, int64_t val) 80 { 81 int more, byte; 82 83 do { 84 byte = val & 0x7f; 85 val >>= 7; 86 more = !((val == 0 && (byte & 0x40) == 0) 87 || (val == -1 && (byte & 0x40) != 0)); 88 if (more) { 89 byte |= 0x80; 90 } 91 *p++ = byte; 92 } while (more); 93 94 return p; 95 } 96 97 /* 98 * Decode a signed leb128 sequence at *PP; increment *PP past the 99 * decoded value. Return the decoded value. 100 */ 101 static int64_t decode_sleb128(const uint8_t **pp) 102 { 103 const uint8_t *p = *pp; 104 int64_t val = 0; 105 int byte, shift = 0; 106 107 do { 108 byte = *p++; 109 val |= (int64_t)(byte & 0x7f) << shift; 110 shift += 7; 111 } while (byte & 0x80); 112 if (shift < TARGET_LONG_BITS && (byte & 0x40)) { 113 val |= -(int64_t)1 << shift; 114 } 115 116 *pp = p; 117 return val; 118 } 119 120 /* Encode the data collected about the instructions while compiling TB. 121 Place the data at BLOCK, and return the number of bytes consumed. 122 123 The logical table consists of TARGET_INSN_START_WORDS target_ulong's, 124 which come from the target's insn_start data, followed by a uintptr_t 125 which comes from the host pc of the end of the code implementing the insn. 126 127 Each line of the table is encoded as sleb128 deltas from the previous 128 line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. 129 That is, the first column is seeded with the guest pc, the last column 130 with the host pc, and the middle columns with zeros. */ 131 132 static int encode_search(TranslationBlock *tb, uint8_t *block) 133 { 134 uint8_t *highwater = tcg_ctx->code_gen_highwater; 135 uint8_t *p = block; 136 int i, j, n; 137 138 for (i = 0, n = tb->icount; i < n; ++i) { 139 uint64_t prev; 140 141 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 142 if (i == 0) { 143 prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0); 144 } else { 145 prev = tcg_ctx->gen_insn_data[i - 1][j]; 146 } 147 p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev); 148 } 149 prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]); 150 p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev); 151 152 /* Test for (pending) buffer overflow. The assumption is that any 153 one row beginning below the high water mark cannot overrun 154 the buffer completely. Thus we can test for overflow after 155 encoding a row without having to check during encoding. */ 156 if (unlikely(p > highwater)) { 157 return -1; 158 } 159 } 160 161 return p - block; 162 } 163 164 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, 165 uint64_t *data) 166 { 167 uintptr_t iter_pc = (uintptr_t)tb->tc.ptr; 168 const uint8_t *p = tb->tc.ptr + tb->tc.size; 169 int i, j, num_insns = tb->icount; 170 171 host_pc -= GETPC_ADJ; 172 173 if (host_pc < iter_pc) { 174 return -1; 175 } 176 177 memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS); 178 if (!(tb_cflags(tb) & CF_PCREL)) { 179 data[0] = tb->pc; 180 } 181 182 /* 183 * Reconstruct the stored insn data while looking for the point 184 * at which the end of the insn exceeds host_pc. 185 */ 186 for (i = 0; i < num_insns; ++i) { 187 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 188 data[j] += decode_sleb128(&p); 189 } 190 iter_pc += decode_sleb128(&p); 191 if (iter_pc > host_pc) { 192 return num_insns - i; 193 } 194 } 195 return -1; 196 } 197 198 /* 199 * The cpu state corresponding to 'host_pc' is restored in 200 * preparation for exiting the TB. 201 */ 202 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, 203 uintptr_t host_pc) 204 { 205 uint64_t data[TARGET_INSN_START_WORDS]; 206 #ifdef CONFIG_PROFILER 207 TCGProfile *prof = &tcg_ctx->prof; 208 int64_t ti = profile_getclock(); 209 #endif 210 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); 211 212 if (insns_left < 0) { 213 return; 214 } 215 216 if (tb_cflags(tb) & CF_USE_ICOUNT) { 217 assert(icount_enabled()); 218 /* 219 * Reset the cycle counter to the start of the block and 220 * shift if to the number of actually executed instructions. 221 */ 222 cpu_neg(cpu)->icount_decr.u16.low += insns_left; 223 } 224 225 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); 226 227 #ifdef CONFIG_PROFILER 228 qatomic_set(&prof->restore_time, 229 prof->restore_time + profile_getclock() - ti); 230 qatomic_set(&prof->restore_count, prof->restore_count + 1); 231 #endif 232 } 233 234 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) 235 { 236 /* 237 * The host_pc has to be in the rx region of the code buffer. 238 * If it is not we will not be able to resolve it here. 239 * The two cases where host_pc will not be correct are: 240 * 241 * - fault during translation (instruction fetch) 242 * - fault from helper (not using GETPC() macro) 243 * 244 * Either way we need return early as we can't resolve it here. 245 */ 246 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 247 TranslationBlock *tb = tcg_tb_lookup(host_pc); 248 if (tb) { 249 cpu_restore_state_from_tb(cpu, tb, host_pc); 250 return true; 251 } 252 } 253 return false; 254 } 255 256 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) 257 { 258 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 259 TranslationBlock *tb = tcg_tb_lookup(host_pc); 260 if (tb) { 261 return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0; 262 } 263 } 264 return false; 265 } 266 267 void page_init(void) 268 { 269 page_size_init(); 270 page_table_config_init(); 271 } 272 273 /* 274 * Isolate the portion of code gen which can setjmp/longjmp. 275 * Return the size of the generated code, or negative on error. 276 */ 277 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, 278 target_ulong pc, void *host_pc, 279 int *max_insns, int64_t *ti) 280 { 281 int ret = sigsetjmp(tcg_ctx->jmp_trans, 0); 282 if (unlikely(ret != 0)) { 283 return ret; 284 } 285 286 tcg_func_start(tcg_ctx); 287 288 tcg_ctx->cpu = env_cpu(env); 289 gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc); 290 assert(tb->size != 0); 291 tcg_ctx->cpu = NULL; 292 *max_insns = tb->icount; 293 294 #ifdef CONFIG_PROFILER 295 qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1); 296 qatomic_set(&tcg_ctx->prof.interm_time, 297 tcg_ctx->prof.interm_time + profile_getclock() - *ti); 298 *ti = profile_getclock(); 299 #endif 300 301 return tcg_gen_code(tcg_ctx, tb, pc); 302 } 303 304 /* Called with mmap_lock held for user mode emulation. */ 305 TranslationBlock *tb_gen_code(CPUState *cpu, 306 target_ulong pc, target_ulong cs_base, 307 uint32_t flags, int cflags) 308 { 309 CPUArchState *env = cpu->env_ptr; 310 TranslationBlock *tb, *existing_tb; 311 tb_page_addr_t phys_pc; 312 tcg_insn_unit *gen_code_buf; 313 int gen_code_size, search_size, max_insns; 314 #ifdef CONFIG_PROFILER 315 TCGProfile *prof = &tcg_ctx->prof; 316 #endif 317 int64_t ti; 318 void *host_pc; 319 320 assert_memory_lock(); 321 qemu_thread_jit_write(); 322 323 phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); 324 325 if (phys_pc == -1) { 326 /* Generate a one-shot TB with 1 insn in it */ 327 cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1; 328 } 329 330 max_insns = cflags & CF_COUNT_MASK; 331 if (max_insns == 0) { 332 max_insns = TCG_MAX_INSNS; 333 } 334 QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); 335 336 buffer_overflow: 337 tb = tcg_tb_alloc(tcg_ctx); 338 if (unlikely(!tb)) { 339 /* flush must be done */ 340 tb_flush(cpu); 341 mmap_unlock(); 342 /* Make the execution loop process the flush as soon as possible. */ 343 cpu->exception_index = EXCP_INTERRUPT; 344 cpu_loop_exit(cpu); 345 } 346 347 gen_code_buf = tcg_ctx->code_gen_ptr; 348 tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); 349 if (!(cflags & CF_PCREL)) { 350 tb->pc = pc; 351 } 352 tb->cs_base = cs_base; 353 tb->flags = flags; 354 tb->cflags = cflags; 355 tb->trace_vcpu_dstate = *cpu->trace_dstate; 356 tb_set_page_addr0(tb, phys_pc); 357 tb_set_page_addr1(tb, -1); 358 tcg_ctx->gen_tb = tb; 359 tcg_ctx->addr_type = TCG_TYPE_TL; 360 #ifdef CONFIG_SOFTMMU 361 tcg_ctx->page_bits = TARGET_PAGE_BITS; 362 tcg_ctx->page_mask = TARGET_PAGE_MASK; 363 tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS; 364 #endif 365 366 tb_overflow: 367 368 #ifdef CONFIG_PROFILER 369 /* includes aborted translations because of exceptions */ 370 qatomic_set(&prof->tb_count1, prof->tb_count1 + 1); 371 ti = profile_getclock(); 372 #endif 373 374 trace_translate_block(tb, pc, tb->tc.ptr); 375 376 gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti); 377 if (unlikely(gen_code_size < 0)) { 378 switch (gen_code_size) { 379 case -1: 380 /* 381 * Overflow of code_gen_buffer, or the current slice of it. 382 * 383 * TODO: We don't need to re-do gen_intermediate_code, nor 384 * should we re-do the tcg optimization currently hidden 385 * inside tcg_gen_code. All that should be required is to 386 * flush the TBs, allocate a new TB, re-initialize it per 387 * above, and re-do the actual code generation. 388 */ 389 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 390 "Restarting code generation for " 391 "code_gen_buffer overflow\n"); 392 goto buffer_overflow; 393 394 case -2: 395 /* 396 * The code generated for the TranslationBlock is too large. 397 * The maximum size allowed by the unwind info is 64k. 398 * There may be stricter constraints from relocations 399 * in the tcg backend. 400 * 401 * Try again with half as many insns as we attempted this time. 402 * If a single insn overflows, there's a bug somewhere... 403 */ 404 assert(max_insns > 1); 405 max_insns /= 2; 406 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 407 "Restarting code generation with " 408 "smaller translation block (max %d insns)\n", 409 max_insns); 410 goto tb_overflow; 411 412 default: 413 g_assert_not_reached(); 414 } 415 } 416 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); 417 if (unlikely(search_size < 0)) { 418 goto buffer_overflow; 419 } 420 tb->tc.size = gen_code_size; 421 422 /* 423 * For CF_PCREL, attribute all executions of the generated code 424 * to its first mapping. 425 */ 426 perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf)); 427 428 #ifdef CONFIG_PROFILER 429 qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); 430 qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); 431 qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); 432 qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); 433 #endif 434 435 #ifdef DEBUG_DISAS 436 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && 437 qemu_log_in_addr_range(pc)) { 438 FILE *logfile = qemu_log_trylock(); 439 if (logfile) { 440 int code_size, data_size; 441 const tcg_target_ulong *rx_data_gen_ptr; 442 size_t chunk_start; 443 int insn = 0; 444 445 if (tcg_ctx->data_gen_ptr) { 446 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr); 447 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr; 448 data_size = gen_code_size - code_size; 449 } else { 450 rx_data_gen_ptr = 0; 451 code_size = gen_code_size; 452 data_size = 0; 453 } 454 455 /* Dump header and the first instruction */ 456 fprintf(logfile, "OUT: [size=%d]\n", gen_code_size); 457 fprintf(logfile, 458 " -- guest addr 0x%016" PRIx64 " + tb prologue\n", 459 tcg_ctx->gen_insn_data[insn][0]); 460 chunk_start = tcg_ctx->gen_insn_end_off[insn]; 461 disas(logfile, tb->tc.ptr, chunk_start); 462 463 /* 464 * Dump each instruction chunk, wrapping up empty chunks into 465 * the next instruction. The whole array is offset so the 466 * first entry is the beginning of the 2nd instruction. 467 */ 468 while (insn < tb->icount) { 469 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn]; 470 if (chunk_end > chunk_start) { 471 fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n", 472 tcg_ctx->gen_insn_data[insn][0]); 473 disas(logfile, tb->tc.ptr + chunk_start, 474 chunk_end - chunk_start); 475 chunk_start = chunk_end; 476 } 477 insn++; 478 } 479 480 if (chunk_start < code_size) { 481 fprintf(logfile, " -- tb slow paths + alignment\n"); 482 disas(logfile, tb->tc.ptr + chunk_start, 483 code_size - chunk_start); 484 } 485 486 /* Finally dump any data we may have after the block */ 487 if (data_size) { 488 int i; 489 fprintf(logfile, " data: [size=%d]\n", data_size); 490 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { 491 if (sizeof(tcg_target_ulong) == 8) { 492 fprintf(logfile, 493 "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", 494 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 495 } else if (sizeof(tcg_target_ulong) == 4) { 496 fprintf(logfile, 497 "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", 498 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 499 } else { 500 qemu_build_not_reached(); 501 } 502 } 503 } 504 fprintf(logfile, "\n"); 505 qemu_log_unlock(logfile); 506 } 507 } 508 #endif 509 510 qatomic_set(&tcg_ctx->code_gen_ptr, (void *) 511 ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, 512 CODE_GEN_ALIGN)); 513 514 /* init jump list */ 515 qemu_spin_init(&tb->jmp_lock); 516 tb->jmp_list_head = (uintptr_t)NULL; 517 tb->jmp_list_next[0] = (uintptr_t)NULL; 518 tb->jmp_list_next[1] = (uintptr_t)NULL; 519 tb->jmp_dest[0] = (uintptr_t)NULL; 520 tb->jmp_dest[1] = (uintptr_t)NULL; 521 522 /* init original jump addresses which have been set during tcg_gen_code() */ 523 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { 524 tb_reset_jump(tb, 0); 525 } 526 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { 527 tb_reset_jump(tb, 1); 528 } 529 530 /* 531 * If the TB is not associated with a physical RAM page then it must be 532 * a temporary one-insn TB, and we have nothing left to do. Return early 533 * before attempting to link to other TBs or add to the lookup table. 534 */ 535 if (tb_page_addr0(tb) == -1) { 536 return tb; 537 } 538 539 /* 540 * Insert TB into the corresponding region tree before publishing it 541 * through QHT. Otherwise rewinding happened in the TB might fail to 542 * lookup itself using host PC. 543 */ 544 tcg_tb_insert(tb); 545 546 /* 547 * No explicit memory barrier is required -- tb_link_page() makes the 548 * TB visible in a consistent state. 549 */ 550 existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb)); 551 /* if the TB already exists, discard what we just translated */ 552 if (unlikely(existing_tb != tb)) { 553 uintptr_t orig_aligned = (uintptr_t)gen_code_buf; 554 555 orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); 556 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned); 557 tcg_tb_remove(tb); 558 return existing_tb; 559 } 560 return tb; 561 } 562 563 /* user-mode: call with mmap_lock held */ 564 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) 565 { 566 TranslationBlock *tb; 567 568 assert_memory_lock(); 569 570 tb = tcg_tb_lookup(retaddr); 571 if (tb) { 572 /* We can use retranslation to find the PC. */ 573 cpu_restore_state_from_tb(cpu, tb, retaddr); 574 tb_phys_invalidate(tb, -1); 575 } else { 576 /* The exception probably happened in a helper. The CPU state should 577 have been saved before calling it. Fetch the PC from there. */ 578 CPUArchState *env = cpu->env_ptr; 579 target_ulong pc, cs_base; 580 tb_page_addr_t addr; 581 uint32_t flags; 582 583 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); 584 addr = get_page_addr_code(env, pc); 585 if (addr != -1) { 586 tb_invalidate_phys_range(addr, addr); 587 } 588 } 589 } 590 591 #ifndef CONFIG_USER_ONLY 592 /* 593 * In deterministic execution mode, instructions doing device I/Os 594 * must be at the end of the TB. 595 * 596 * Called by softmmu_template.h, with iothread mutex not held. 597 */ 598 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) 599 { 600 TranslationBlock *tb; 601 CPUClass *cc; 602 uint32_t n; 603 604 tb = tcg_tb_lookup(retaddr); 605 if (!tb) { 606 cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", 607 (void *)retaddr); 608 } 609 cpu_restore_state_from_tb(cpu, tb, retaddr); 610 611 /* 612 * Some guests must re-execute the branch when re-executing a delay 613 * slot instruction. When this is the case, adjust icount and N 614 * to account for the re-execution of the branch. 615 */ 616 n = 1; 617 cc = CPU_GET_CLASS(cpu); 618 if (cc->tcg_ops->io_recompile_replay_branch && 619 cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { 620 cpu_neg(cpu)->icount_decr.u16.low++; 621 n = 2; 622 } 623 624 /* 625 * Exit the loop and potentially generate a new TB executing the 626 * just the I/O insns. We also limit instrumentation to memory 627 * operations only (which execute after completion) so we don't 628 * double instrument the instruction. 629 */ 630 cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; 631 632 if (qemu_loglevel_mask(CPU_LOG_EXEC)) { 633 target_ulong pc = log_pc(cpu, tb); 634 if (qemu_log_in_addr_range(pc)) { 635 qemu_log("cpu_io_recompile: rewound execution of TB to " 636 TARGET_FMT_lx "\n", pc); 637 } 638 } 639 640 cpu_loop_exit_noexc(cpu); 641 } 642 643 static void print_qht_statistics(struct qht_stats hst, GString *buf) 644 { 645 uint32_t hgram_opts; 646 size_t hgram_bins; 647 char *hgram; 648 649 if (!hst.head_buckets) { 650 return; 651 } 652 g_string_append_printf(buf, "TB hash buckets %zu/%zu " 653 "(%0.2f%% head buckets used)\n", 654 hst.used_head_buckets, hst.head_buckets, 655 (double)hst.used_head_buckets / 656 hst.head_buckets * 100); 657 658 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 659 hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; 660 if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { 661 hgram_opts |= QDIST_PR_NODECIMAL; 662 } 663 hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); 664 g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " 665 "Histogram: %s\n", 666 qdist_avg(&hst.occupancy) * 100, hgram); 667 g_free(hgram); 668 669 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 670 hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); 671 if (hgram_bins > 10) { 672 hgram_bins = 10; 673 } else { 674 hgram_bins = 0; 675 hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; 676 } 677 hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); 678 g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " 679 "Histogram: %s\n", 680 qdist_avg(&hst.chain), hgram); 681 g_free(hgram); 682 } 683 684 struct tb_tree_stats { 685 size_t nb_tbs; 686 size_t host_size; 687 size_t target_size; 688 size_t max_target_size; 689 size_t direct_jmp_count; 690 size_t direct_jmp2_count; 691 size_t cross_page; 692 }; 693 694 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) 695 { 696 const TranslationBlock *tb = value; 697 struct tb_tree_stats *tst = data; 698 699 tst->nb_tbs++; 700 tst->host_size += tb->tc.size; 701 tst->target_size += tb->size; 702 if (tb->size > tst->max_target_size) { 703 tst->max_target_size = tb->size; 704 } 705 if (tb_page_addr1(tb) != -1) { 706 tst->cross_page++; 707 } 708 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { 709 tst->direct_jmp_count++; 710 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { 711 tst->direct_jmp2_count++; 712 } 713 } 714 return false; 715 } 716 717 void dump_exec_info(GString *buf) 718 { 719 struct tb_tree_stats tst = {}; 720 struct qht_stats hst; 721 size_t nb_tbs, flush_full, flush_part, flush_elide; 722 723 tcg_tb_foreach(tb_tree_stats_iter, &tst); 724 nb_tbs = tst.nb_tbs; 725 /* XXX: avoid using doubles ? */ 726 g_string_append_printf(buf, "Translation buffer state:\n"); 727 /* 728 * Report total code size including the padding and TB structs; 729 * otherwise users might think "-accel tcg,tb-size" is not honoured. 730 * For avg host size we use the precise numbers from tb_tree_stats though. 731 */ 732 g_string_append_printf(buf, "gen code size %zu/%zu\n", 733 tcg_code_size(), tcg_code_capacity()); 734 g_string_append_printf(buf, "TB count %zu\n", nb_tbs); 735 g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", 736 nb_tbs ? tst.target_size / nb_tbs : 0, 737 tst.max_target_size); 738 g_string_append_printf(buf, "TB avg host size %zu bytes " 739 "(expansion ratio: %0.1f)\n", 740 nb_tbs ? tst.host_size / nb_tbs : 0, 741 tst.target_size ? 742 (double)tst.host_size / tst.target_size : 0); 743 g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", 744 tst.cross_page, 745 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); 746 g_string_append_printf(buf, "direct jump count %zu (%zu%%) " 747 "(2 jumps=%zu %zu%%)\n", 748 tst.direct_jmp_count, 749 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, 750 tst.direct_jmp2_count, 751 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); 752 753 qht_statistics_init(&tb_ctx.htable, &hst); 754 print_qht_statistics(hst, buf); 755 qht_statistics_destroy(&hst); 756 757 g_string_append_printf(buf, "\nStatistics:\n"); 758 g_string_append_printf(buf, "TB flush count %u\n", 759 qatomic_read(&tb_ctx.tb_flush_count)); 760 g_string_append_printf(buf, "TB invalidate count %u\n", 761 qatomic_read(&tb_ctx.tb_phys_invalidate_count)); 762 763 tlb_flush_counts(&flush_full, &flush_part, &flush_elide); 764 g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); 765 g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); 766 g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); 767 tcg_dump_info(buf); 768 } 769 770 #else /* CONFIG_USER_ONLY */ 771 772 void cpu_interrupt(CPUState *cpu, int mask) 773 { 774 g_assert(qemu_mutex_iothread_locked()); 775 cpu->interrupt_request |= mask; 776 qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); 777 } 778 779 #endif /* CONFIG_USER_ONLY */ 780 781 /* 782 * Called by generic code at e.g. cpu reset after cpu creation, 783 * therefore we must be prepared to allocate the jump cache. 784 */ 785 void tcg_flush_jmp_cache(CPUState *cpu) 786 { 787 CPUJumpCache *jc = cpu->tb_jmp_cache; 788 789 /* During early initialization, the cache may not yet be allocated. */ 790 if (unlikely(jc == NULL)) { 791 return; 792 } 793 794 for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { 795 qatomic_set(&jc->array[i].tb, NULL); 796 } 797 } 798 799 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ 800 void tcg_flush_softmmu_tlb(CPUState *cs) 801 { 802 #ifdef CONFIG_SOFTMMU 803 tlb_flush(cs); 804 #endif 805 } 806