1 /* 2 * Host code generation 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 22 #include "trace.h" 23 #include "disas/disas.h" 24 #include "exec/exec-all.h" 25 #include "tcg/tcg.h" 26 #if defined(CONFIG_USER_ONLY) 27 #include "qemu.h" 28 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 29 #include <sys/param.h> 30 #if __FreeBSD_version >= 700104 31 #define HAVE_KINFO_GETVMMAP 32 #define sigqueue sigqueue_freebsd /* avoid redefinition */ 33 #include <sys/proc.h> 34 #include <machine/profile.h> 35 #define _KERNEL 36 #include <sys/user.h> 37 #undef _KERNEL 38 #undef sigqueue 39 #include <libutil.h> 40 #endif 41 #endif 42 #else 43 #include "exec/ram_addr.h" 44 #endif 45 46 #include "exec/cputlb.h" 47 #include "exec/translate-all.h" 48 #include "exec/translator.h" 49 #include "exec/tb-flush.h" 50 #include "qemu/bitmap.h" 51 #include "qemu/qemu-print.h" 52 #include "qemu/main-loop.h" 53 #include "qemu/cacheinfo.h" 54 #include "qemu/timer.h" 55 #include "exec/log.h" 56 #include "sysemu/cpus.h" 57 #include "sysemu/cpu-timers.h" 58 #include "sysemu/tcg.h" 59 #include "qapi/error.h" 60 #include "hw/core/tcg-cpu-ops.h" 61 #include "tb-jmp-cache.h" 62 #include "tb-hash.h" 63 #include "tb-context.h" 64 #include "internal.h" 65 #include "perf.h" 66 #include "tcg/insn-start-words.h" 67 68 TBContext tb_ctx; 69 70 /* 71 * Encode VAL as a signed leb128 sequence at P. 72 * Return P incremented past the encoded value. 73 */ 74 static uint8_t *encode_sleb128(uint8_t *p, int64_t val) 75 { 76 int more, byte; 77 78 do { 79 byte = val & 0x7f; 80 val >>= 7; 81 more = !((val == 0 && (byte & 0x40) == 0) 82 || (val == -1 && (byte & 0x40) != 0)); 83 if (more) { 84 byte |= 0x80; 85 } 86 *p++ = byte; 87 } while (more); 88 89 return p; 90 } 91 92 /* 93 * Decode a signed leb128 sequence at *PP; increment *PP past the 94 * decoded value. Return the decoded value. 95 */ 96 static int64_t decode_sleb128(const uint8_t **pp) 97 { 98 const uint8_t *p = *pp; 99 int64_t val = 0; 100 int byte, shift = 0; 101 102 do { 103 byte = *p++; 104 val |= (int64_t)(byte & 0x7f) << shift; 105 shift += 7; 106 } while (byte & 0x80); 107 if (shift < TARGET_LONG_BITS && (byte & 0x40)) { 108 val |= -(int64_t)1 << shift; 109 } 110 111 *pp = p; 112 return val; 113 } 114 115 /* Encode the data collected about the instructions while compiling TB. 116 Place the data at BLOCK, and return the number of bytes consumed. 117 118 The logical table consists of TARGET_INSN_START_WORDS target_ulong's, 119 which come from the target's insn_start data, followed by a uintptr_t 120 which comes from the host pc of the end of the code implementing the insn. 121 122 Each line of the table is encoded as sleb128 deltas from the previous 123 line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. 124 That is, the first column is seeded with the guest pc, the last column 125 with the host pc, and the middle columns with zeros. */ 126 127 static int encode_search(TranslationBlock *tb, uint8_t *block) 128 { 129 uint8_t *highwater = tcg_ctx->code_gen_highwater; 130 uint64_t *insn_data = tcg_ctx->gen_insn_data; 131 uint16_t *insn_end_off = tcg_ctx->gen_insn_end_off; 132 uint8_t *p = block; 133 int i, j, n; 134 135 for (i = 0, n = tb->icount; i < n; ++i) { 136 uint64_t prev, curr; 137 138 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 139 if (i == 0) { 140 prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0); 141 } else { 142 prev = insn_data[(i - 1) * TARGET_INSN_START_WORDS + j]; 143 } 144 curr = insn_data[i * TARGET_INSN_START_WORDS + j]; 145 p = encode_sleb128(p, curr - prev); 146 } 147 prev = (i == 0 ? 0 : insn_end_off[i - 1]); 148 curr = insn_end_off[i]; 149 p = encode_sleb128(p, curr - prev); 150 151 /* Test for (pending) buffer overflow. The assumption is that any 152 one row beginning below the high water mark cannot overrun 153 the buffer completely. Thus we can test for overflow after 154 encoding a row without having to check during encoding. */ 155 if (unlikely(p > highwater)) { 156 return -1; 157 } 158 } 159 160 return p - block; 161 } 162 163 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, 164 uint64_t *data) 165 { 166 uintptr_t iter_pc = (uintptr_t)tb->tc.ptr; 167 const uint8_t *p = tb->tc.ptr + tb->tc.size; 168 int i, j, num_insns = tb->icount; 169 170 host_pc -= GETPC_ADJ; 171 172 if (host_pc < iter_pc) { 173 return -1; 174 } 175 176 memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS); 177 if (!(tb_cflags(tb) & CF_PCREL)) { 178 data[0] = tb->pc; 179 } 180 181 /* 182 * Reconstruct the stored insn data while looking for the point 183 * at which the end of the insn exceeds host_pc. 184 */ 185 for (i = 0; i < num_insns; ++i) { 186 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 187 data[j] += decode_sleb128(&p); 188 } 189 iter_pc += decode_sleb128(&p); 190 if (iter_pc > host_pc) { 191 return num_insns - i; 192 } 193 } 194 return -1; 195 } 196 197 /* 198 * The cpu state corresponding to 'host_pc' is restored in 199 * preparation for exiting the TB. 200 */ 201 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, 202 uintptr_t host_pc) 203 { 204 uint64_t data[TARGET_INSN_START_WORDS]; 205 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); 206 207 if (insns_left < 0) { 208 return; 209 } 210 211 if (tb_cflags(tb) & CF_USE_ICOUNT) { 212 assert(icount_enabled()); 213 /* 214 * Reset the cycle counter to the start of the block and 215 * shift if to the number of actually executed instructions. 216 */ 217 cpu_neg(cpu)->icount_decr.u16.low += insns_left; 218 } 219 220 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); 221 } 222 223 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) 224 { 225 /* 226 * The host_pc has to be in the rx region of the code buffer. 227 * If it is not we will not be able to resolve it here. 228 * The two cases where host_pc will not be correct are: 229 * 230 * - fault during translation (instruction fetch) 231 * - fault from helper (not using GETPC() macro) 232 * 233 * Either way we need return early as we can't resolve it here. 234 */ 235 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 236 TranslationBlock *tb = tcg_tb_lookup(host_pc); 237 if (tb) { 238 cpu_restore_state_from_tb(cpu, tb, host_pc); 239 return true; 240 } 241 } 242 return false; 243 } 244 245 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) 246 { 247 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 248 TranslationBlock *tb = tcg_tb_lookup(host_pc); 249 if (tb) { 250 return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0; 251 } 252 } 253 return false; 254 } 255 256 void page_init(void) 257 { 258 page_size_init(); 259 page_table_config_init(); 260 } 261 262 /* 263 * Isolate the portion of code gen which can setjmp/longjmp. 264 * Return the size of the generated code, or negative on error. 265 */ 266 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, 267 vaddr pc, void *host_pc, 268 int *max_insns, int64_t *ti) 269 { 270 int ret = sigsetjmp(tcg_ctx->jmp_trans, 0); 271 if (unlikely(ret != 0)) { 272 return ret; 273 } 274 275 tcg_func_start(tcg_ctx); 276 277 tcg_ctx->cpu = env_cpu(env); 278 gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc); 279 assert(tb->size != 0); 280 tcg_ctx->cpu = NULL; 281 *max_insns = tb->icount; 282 283 return tcg_gen_code(tcg_ctx, tb, pc); 284 } 285 286 /* Called with mmap_lock held for user mode emulation. */ 287 TranslationBlock *tb_gen_code(CPUState *cpu, 288 vaddr pc, uint64_t cs_base, 289 uint32_t flags, int cflags) 290 { 291 CPUArchState *env = cpu->env_ptr; 292 TranslationBlock *tb, *existing_tb; 293 tb_page_addr_t phys_pc; 294 tcg_insn_unit *gen_code_buf; 295 int gen_code_size, search_size, max_insns; 296 int64_t ti; 297 void *host_pc; 298 299 assert_memory_lock(); 300 qemu_thread_jit_write(); 301 302 phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); 303 304 if (phys_pc == -1) { 305 /* Generate a one-shot TB with 1 insn in it */ 306 cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1; 307 } 308 309 max_insns = cflags & CF_COUNT_MASK; 310 if (max_insns == 0) { 311 max_insns = TCG_MAX_INSNS; 312 } 313 QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); 314 315 buffer_overflow: 316 tb = tcg_tb_alloc(tcg_ctx); 317 if (unlikely(!tb)) { 318 /* flush must be done */ 319 tb_flush(cpu); 320 mmap_unlock(); 321 /* Make the execution loop process the flush as soon as possible. */ 322 cpu->exception_index = EXCP_INTERRUPT; 323 cpu_loop_exit(cpu); 324 } 325 326 gen_code_buf = tcg_ctx->code_gen_ptr; 327 tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); 328 if (!(cflags & CF_PCREL)) { 329 tb->pc = pc; 330 } 331 tb->cs_base = cs_base; 332 tb->flags = flags; 333 tb->cflags = cflags; 334 tb_set_page_addr0(tb, phys_pc); 335 tb_set_page_addr1(tb, -1); 336 tcg_ctx->gen_tb = tb; 337 tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64; 338 #ifdef CONFIG_SOFTMMU 339 tcg_ctx->page_bits = TARGET_PAGE_BITS; 340 tcg_ctx->page_mask = TARGET_PAGE_MASK; 341 tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS; 342 tcg_ctx->tlb_fast_offset = 343 (int)offsetof(ArchCPU, neg.tlb.f) - (int)offsetof(ArchCPU, env); 344 #endif 345 tcg_ctx->insn_start_words = TARGET_INSN_START_WORDS; 346 #ifdef TCG_GUEST_DEFAULT_MO 347 tcg_ctx->guest_mo = TCG_GUEST_DEFAULT_MO; 348 #else 349 tcg_ctx->guest_mo = TCG_MO_ALL; 350 #endif 351 352 tb_overflow: 353 354 trace_translate_block(tb, pc, tb->tc.ptr); 355 356 gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti); 357 if (unlikely(gen_code_size < 0)) { 358 switch (gen_code_size) { 359 case -1: 360 /* 361 * Overflow of code_gen_buffer, or the current slice of it. 362 * 363 * TODO: We don't need to re-do gen_intermediate_code, nor 364 * should we re-do the tcg optimization currently hidden 365 * inside tcg_gen_code. All that should be required is to 366 * flush the TBs, allocate a new TB, re-initialize it per 367 * above, and re-do the actual code generation. 368 */ 369 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 370 "Restarting code generation for " 371 "code_gen_buffer overflow\n"); 372 goto buffer_overflow; 373 374 case -2: 375 /* 376 * The code generated for the TranslationBlock is too large. 377 * The maximum size allowed by the unwind info is 64k. 378 * There may be stricter constraints from relocations 379 * in the tcg backend. 380 * 381 * Try again with half as many insns as we attempted this time. 382 * If a single insn overflows, there's a bug somewhere... 383 */ 384 assert(max_insns > 1); 385 max_insns /= 2; 386 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 387 "Restarting code generation with " 388 "smaller translation block (max %d insns)\n", 389 max_insns); 390 goto tb_overflow; 391 392 default: 393 g_assert_not_reached(); 394 } 395 } 396 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); 397 if (unlikely(search_size < 0)) { 398 goto buffer_overflow; 399 } 400 tb->tc.size = gen_code_size; 401 402 /* 403 * For CF_PCREL, attribute all executions of the generated code 404 * to its first mapping. 405 */ 406 perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf)); 407 408 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && 409 qemu_log_in_addr_range(pc)) { 410 FILE *logfile = qemu_log_trylock(); 411 if (logfile) { 412 int code_size, data_size; 413 const tcg_target_ulong *rx_data_gen_ptr; 414 size_t chunk_start; 415 int insn = 0; 416 417 if (tcg_ctx->data_gen_ptr) { 418 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr); 419 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr; 420 data_size = gen_code_size - code_size; 421 } else { 422 rx_data_gen_ptr = 0; 423 code_size = gen_code_size; 424 data_size = 0; 425 } 426 427 /* Dump header and the first instruction */ 428 fprintf(logfile, "OUT: [size=%d]\n", gen_code_size); 429 fprintf(logfile, 430 " -- guest addr 0x%016" PRIx64 " + tb prologue\n", 431 tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]); 432 chunk_start = tcg_ctx->gen_insn_end_off[insn]; 433 disas(logfile, tb->tc.ptr, chunk_start); 434 435 /* 436 * Dump each instruction chunk, wrapping up empty chunks into 437 * the next instruction. The whole array is offset so the 438 * first entry is the beginning of the 2nd instruction. 439 */ 440 while (insn < tb->icount) { 441 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn]; 442 if (chunk_end > chunk_start) { 443 fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n", 444 tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]); 445 disas(logfile, tb->tc.ptr + chunk_start, 446 chunk_end - chunk_start); 447 chunk_start = chunk_end; 448 } 449 insn++; 450 } 451 452 if (chunk_start < code_size) { 453 fprintf(logfile, " -- tb slow paths + alignment\n"); 454 disas(logfile, tb->tc.ptr + chunk_start, 455 code_size - chunk_start); 456 } 457 458 /* Finally dump any data we may have after the block */ 459 if (data_size) { 460 int i; 461 fprintf(logfile, " data: [size=%d]\n", data_size); 462 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { 463 if (sizeof(tcg_target_ulong) == 8) { 464 fprintf(logfile, 465 "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", 466 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 467 } else if (sizeof(tcg_target_ulong) == 4) { 468 fprintf(logfile, 469 "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", 470 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 471 } else { 472 qemu_build_not_reached(); 473 } 474 } 475 } 476 fprintf(logfile, "\n"); 477 qemu_log_unlock(logfile); 478 } 479 } 480 481 qatomic_set(&tcg_ctx->code_gen_ptr, (void *) 482 ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, 483 CODE_GEN_ALIGN)); 484 485 /* init jump list */ 486 qemu_spin_init(&tb->jmp_lock); 487 tb->jmp_list_head = (uintptr_t)NULL; 488 tb->jmp_list_next[0] = (uintptr_t)NULL; 489 tb->jmp_list_next[1] = (uintptr_t)NULL; 490 tb->jmp_dest[0] = (uintptr_t)NULL; 491 tb->jmp_dest[1] = (uintptr_t)NULL; 492 493 /* init original jump addresses which have been set during tcg_gen_code() */ 494 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { 495 tb_reset_jump(tb, 0); 496 } 497 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { 498 tb_reset_jump(tb, 1); 499 } 500 501 /* 502 * If the TB is not associated with a physical RAM page then it must be 503 * a temporary one-insn TB, and we have nothing left to do. Return early 504 * before attempting to link to other TBs or add to the lookup table. 505 */ 506 if (tb_page_addr0(tb) == -1) { 507 return tb; 508 } 509 510 /* 511 * Insert TB into the corresponding region tree before publishing it 512 * through QHT. Otherwise rewinding happened in the TB might fail to 513 * lookup itself using host PC. 514 */ 515 tcg_tb_insert(tb); 516 517 /* 518 * No explicit memory barrier is required -- tb_link_page() makes the 519 * TB visible in a consistent state. 520 */ 521 existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb)); 522 /* if the TB already exists, discard what we just translated */ 523 if (unlikely(existing_tb != tb)) { 524 uintptr_t orig_aligned = (uintptr_t)gen_code_buf; 525 526 orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); 527 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned); 528 tcg_tb_remove(tb); 529 return existing_tb; 530 } 531 return tb; 532 } 533 534 /* user-mode: call with mmap_lock held */ 535 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) 536 { 537 TranslationBlock *tb; 538 539 assert_memory_lock(); 540 541 tb = tcg_tb_lookup(retaddr); 542 if (tb) { 543 /* We can use retranslation to find the PC. */ 544 cpu_restore_state_from_tb(cpu, tb, retaddr); 545 tb_phys_invalidate(tb, -1); 546 } else { 547 /* The exception probably happened in a helper. The CPU state should 548 have been saved before calling it. Fetch the PC from there. */ 549 CPUArchState *env = cpu->env_ptr; 550 vaddr pc; 551 uint64_t cs_base; 552 tb_page_addr_t addr; 553 uint32_t flags; 554 555 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); 556 addr = get_page_addr_code(env, pc); 557 if (addr != -1) { 558 tb_invalidate_phys_range(addr, addr); 559 } 560 } 561 } 562 563 #ifndef CONFIG_USER_ONLY 564 /* 565 * In deterministic execution mode, instructions doing device I/Os 566 * must be at the end of the TB. 567 * 568 * Called by softmmu_template.h, with iothread mutex not held. 569 */ 570 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) 571 { 572 TranslationBlock *tb; 573 CPUClass *cc; 574 uint32_t n; 575 576 tb = tcg_tb_lookup(retaddr); 577 if (!tb) { 578 cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", 579 (void *)retaddr); 580 } 581 cpu_restore_state_from_tb(cpu, tb, retaddr); 582 583 /* 584 * Some guests must re-execute the branch when re-executing a delay 585 * slot instruction. When this is the case, adjust icount and N 586 * to account for the re-execution of the branch. 587 */ 588 n = 1; 589 cc = CPU_GET_CLASS(cpu); 590 if (cc->tcg_ops->io_recompile_replay_branch && 591 cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { 592 cpu_neg(cpu)->icount_decr.u16.low++; 593 n = 2; 594 } 595 596 /* 597 * Exit the loop and potentially generate a new TB executing the 598 * just the I/O insns. We also limit instrumentation to memory 599 * operations only (which execute after completion) so we don't 600 * double instrument the instruction. 601 */ 602 cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; 603 604 if (qemu_loglevel_mask(CPU_LOG_EXEC)) { 605 vaddr pc = log_pc(cpu, tb); 606 if (qemu_log_in_addr_range(pc)) { 607 qemu_log("cpu_io_recompile: rewound execution of TB to %" 608 VADDR_PRIx "\n", pc); 609 } 610 } 611 612 cpu_loop_exit_noexc(cpu); 613 } 614 615 static void print_qht_statistics(struct qht_stats hst, GString *buf) 616 { 617 uint32_t hgram_opts; 618 size_t hgram_bins; 619 char *hgram; 620 621 if (!hst.head_buckets) { 622 return; 623 } 624 g_string_append_printf(buf, "TB hash buckets %zu/%zu " 625 "(%0.2f%% head buckets used)\n", 626 hst.used_head_buckets, hst.head_buckets, 627 (double)hst.used_head_buckets / 628 hst.head_buckets * 100); 629 630 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 631 hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; 632 if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { 633 hgram_opts |= QDIST_PR_NODECIMAL; 634 } 635 hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); 636 g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " 637 "Histogram: %s\n", 638 qdist_avg(&hst.occupancy) * 100, hgram); 639 g_free(hgram); 640 641 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 642 hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); 643 if (hgram_bins > 10) { 644 hgram_bins = 10; 645 } else { 646 hgram_bins = 0; 647 hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; 648 } 649 hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); 650 g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " 651 "Histogram: %s\n", 652 qdist_avg(&hst.chain), hgram); 653 g_free(hgram); 654 } 655 656 struct tb_tree_stats { 657 size_t nb_tbs; 658 size_t host_size; 659 size_t target_size; 660 size_t max_target_size; 661 size_t direct_jmp_count; 662 size_t direct_jmp2_count; 663 size_t cross_page; 664 }; 665 666 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) 667 { 668 const TranslationBlock *tb = value; 669 struct tb_tree_stats *tst = data; 670 671 tst->nb_tbs++; 672 tst->host_size += tb->tc.size; 673 tst->target_size += tb->size; 674 if (tb->size > tst->max_target_size) { 675 tst->max_target_size = tb->size; 676 } 677 if (tb_page_addr1(tb) != -1) { 678 tst->cross_page++; 679 } 680 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { 681 tst->direct_jmp_count++; 682 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { 683 tst->direct_jmp2_count++; 684 } 685 } 686 return false; 687 } 688 689 void dump_exec_info(GString *buf) 690 { 691 struct tb_tree_stats tst = {}; 692 struct qht_stats hst; 693 size_t nb_tbs, flush_full, flush_part, flush_elide; 694 695 tcg_tb_foreach(tb_tree_stats_iter, &tst); 696 nb_tbs = tst.nb_tbs; 697 /* XXX: avoid using doubles ? */ 698 g_string_append_printf(buf, "Translation buffer state:\n"); 699 /* 700 * Report total code size including the padding and TB structs; 701 * otherwise users might think "-accel tcg,tb-size" is not honoured. 702 * For avg host size we use the precise numbers from tb_tree_stats though. 703 */ 704 g_string_append_printf(buf, "gen code size %zu/%zu\n", 705 tcg_code_size(), tcg_code_capacity()); 706 g_string_append_printf(buf, "TB count %zu\n", nb_tbs); 707 g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", 708 nb_tbs ? tst.target_size / nb_tbs : 0, 709 tst.max_target_size); 710 g_string_append_printf(buf, "TB avg host size %zu bytes " 711 "(expansion ratio: %0.1f)\n", 712 nb_tbs ? tst.host_size / nb_tbs : 0, 713 tst.target_size ? 714 (double)tst.host_size / tst.target_size : 0); 715 g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", 716 tst.cross_page, 717 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); 718 g_string_append_printf(buf, "direct jump count %zu (%zu%%) " 719 "(2 jumps=%zu %zu%%)\n", 720 tst.direct_jmp_count, 721 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, 722 tst.direct_jmp2_count, 723 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); 724 725 qht_statistics_init(&tb_ctx.htable, &hst); 726 print_qht_statistics(hst, buf); 727 qht_statistics_destroy(&hst); 728 729 g_string_append_printf(buf, "\nStatistics:\n"); 730 g_string_append_printf(buf, "TB flush count %u\n", 731 qatomic_read(&tb_ctx.tb_flush_count)); 732 g_string_append_printf(buf, "TB invalidate count %u\n", 733 qatomic_read(&tb_ctx.tb_phys_invalidate_count)); 734 735 tlb_flush_counts(&flush_full, &flush_part, &flush_elide); 736 g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); 737 g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); 738 g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); 739 tcg_dump_info(buf); 740 } 741 742 #else /* CONFIG_USER_ONLY */ 743 744 void cpu_interrupt(CPUState *cpu, int mask) 745 { 746 g_assert(qemu_mutex_iothread_locked()); 747 cpu->interrupt_request |= mask; 748 qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); 749 } 750 751 #endif /* CONFIG_USER_ONLY */ 752 753 /* 754 * Called by generic code at e.g. cpu reset after cpu creation, 755 * therefore we must be prepared to allocate the jump cache. 756 */ 757 void tcg_flush_jmp_cache(CPUState *cpu) 758 { 759 CPUJumpCache *jc = cpu->tb_jmp_cache; 760 761 /* During early initialization, the cache may not yet be allocated. */ 762 if (unlikely(jc == NULL)) { 763 return; 764 } 765 766 for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { 767 qatomic_set(&jc->array[i].tb, NULL); 768 } 769 } 770 771 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ 772 void tcg_flush_softmmu_tlb(CPUState *cs) 773 { 774 #ifdef CONFIG_SOFTMMU 775 tlb_flush(cs); 776 #endif 777 } 778