1 /* 2 * Host code generation 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 22 #define NO_CPU_IO_DEFS 23 #include "trace.h" 24 #include "disas/disas.h" 25 #include "exec/exec-all.h" 26 #include "tcg/tcg.h" 27 #if defined(CONFIG_USER_ONLY) 28 #include "qemu.h" 29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 30 #include <sys/param.h> 31 #if __FreeBSD_version >= 700104 32 #define HAVE_KINFO_GETVMMAP 33 #define sigqueue sigqueue_freebsd /* avoid redefinition */ 34 #include <sys/proc.h> 35 #include <machine/profile.h> 36 #define _KERNEL 37 #include <sys/user.h> 38 #undef _KERNEL 39 #undef sigqueue 40 #include <libutil.h> 41 #endif 42 #endif 43 #else 44 #include "exec/ram_addr.h" 45 #endif 46 47 #include "exec/cputlb.h" 48 #include "exec/translate-all.h" 49 #include "exec/translator.h" 50 #include "exec/tb-flush.h" 51 #include "qemu/bitmap.h" 52 #include "qemu/qemu-print.h" 53 #include "qemu/main-loop.h" 54 #include "qemu/cacheinfo.h" 55 #include "qemu/timer.h" 56 #include "exec/log.h" 57 #include "sysemu/cpus.h" 58 #include "sysemu/cpu-timers.h" 59 #include "sysemu/tcg.h" 60 #include "qapi/error.h" 61 #include "hw/core/tcg-cpu-ops.h" 62 #include "tb-jmp-cache.h" 63 #include "tb-hash.h" 64 #include "tb-context.h" 65 #include "internal.h" 66 #include "perf.h" 67 68 TBContext tb_ctx; 69 70 /* 71 * Encode VAL as a signed leb128 sequence at P. 72 * Return P incremented past the encoded value. 73 */ 74 static uint8_t *encode_sleb128(uint8_t *p, int64_t val) 75 { 76 int more, byte; 77 78 do { 79 byte = val & 0x7f; 80 val >>= 7; 81 more = !((val == 0 && (byte & 0x40) == 0) 82 || (val == -1 && (byte & 0x40) != 0)); 83 if (more) { 84 byte |= 0x80; 85 } 86 *p++ = byte; 87 } while (more); 88 89 return p; 90 } 91 92 /* 93 * Decode a signed leb128 sequence at *PP; increment *PP past the 94 * decoded value. Return the decoded value. 95 */ 96 static int64_t decode_sleb128(const uint8_t **pp) 97 { 98 const uint8_t *p = *pp; 99 int64_t val = 0; 100 int byte, shift = 0; 101 102 do { 103 byte = *p++; 104 val |= (int64_t)(byte & 0x7f) << shift; 105 shift += 7; 106 } while (byte & 0x80); 107 if (shift < TARGET_LONG_BITS && (byte & 0x40)) { 108 val |= -(int64_t)1 << shift; 109 } 110 111 *pp = p; 112 return val; 113 } 114 115 /* Encode the data collected about the instructions while compiling TB. 116 Place the data at BLOCK, and return the number of bytes consumed. 117 118 The logical table consists of TARGET_INSN_START_WORDS target_ulong's, 119 which come from the target's insn_start data, followed by a uintptr_t 120 which comes from the host pc of the end of the code implementing the insn. 121 122 Each line of the table is encoded as sleb128 deltas from the previous 123 line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. 124 That is, the first column is seeded with the guest pc, the last column 125 with the host pc, and the middle columns with zeros. */ 126 127 static int encode_search(TranslationBlock *tb, uint8_t *block) 128 { 129 uint8_t *highwater = tcg_ctx->code_gen_highwater; 130 uint8_t *p = block; 131 int i, j, n; 132 133 for (i = 0, n = tb->icount; i < n; ++i) { 134 uint64_t prev; 135 136 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 137 if (i == 0) { 138 prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0); 139 } else { 140 prev = tcg_ctx->gen_insn_data[i - 1][j]; 141 } 142 p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev); 143 } 144 prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]); 145 p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev); 146 147 /* Test for (pending) buffer overflow. The assumption is that any 148 one row beginning below the high water mark cannot overrun 149 the buffer completely. Thus we can test for overflow after 150 encoding a row without having to check during encoding. */ 151 if (unlikely(p > highwater)) { 152 return -1; 153 } 154 } 155 156 return p - block; 157 } 158 159 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, 160 uint64_t *data) 161 { 162 uintptr_t iter_pc = (uintptr_t)tb->tc.ptr; 163 const uint8_t *p = tb->tc.ptr + tb->tc.size; 164 int i, j, num_insns = tb->icount; 165 166 host_pc -= GETPC_ADJ; 167 168 if (host_pc < iter_pc) { 169 return -1; 170 } 171 172 memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS); 173 if (!(tb_cflags(tb) & CF_PCREL)) { 174 data[0] = tb->pc; 175 } 176 177 /* 178 * Reconstruct the stored insn data while looking for the point 179 * at which the end of the insn exceeds host_pc. 180 */ 181 for (i = 0; i < num_insns; ++i) { 182 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 183 data[j] += decode_sleb128(&p); 184 } 185 iter_pc += decode_sleb128(&p); 186 if (iter_pc > host_pc) { 187 return num_insns - i; 188 } 189 } 190 return -1; 191 } 192 193 /* 194 * The cpu state corresponding to 'host_pc' is restored in 195 * preparation for exiting the TB. 196 */ 197 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, 198 uintptr_t host_pc) 199 { 200 uint64_t data[TARGET_INSN_START_WORDS]; 201 #ifdef CONFIG_PROFILER 202 TCGProfile *prof = &tcg_ctx->prof; 203 int64_t ti = profile_getclock(); 204 #endif 205 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); 206 207 if (insns_left < 0) { 208 return; 209 } 210 211 if (tb_cflags(tb) & CF_USE_ICOUNT) { 212 assert(icount_enabled()); 213 /* 214 * Reset the cycle counter to the start of the block and 215 * shift if to the number of actually executed instructions. 216 */ 217 cpu_neg(cpu)->icount_decr.u16.low += insns_left; 218 } 219 220 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); 221 222 #ifdef CONFIG_PROFILER 223 qatomic_set(&prof->restore_time, 224 prof->restore_time + profile_getclock() - ti); 225 qatomic_set(&prof->restore_count, prof->restore_count + 1); 226 #endif 227 } 228 229 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) 230 { 231 /* 232 * The host_pc has to be in the rx region of the code buffer. 233 * If it is not we will not be able to resolve it here. 234 * The two cases where host_pc will not be correct are: 235 * 236 * - fault during translation (instruction fetch) 237 * - fault from helper (not using GETPC() macro) 238 * 239 * Either way we need return early as we can't resolve it here. 240 */ 241 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 242 TranslationBlock *tb = tcg_tb_lookup(host_pc); 243 if (tb) { 244 cpu_restore_state_from_tb(cpu, tb, host_pc); 245 return true; 246 } 247 } 248 return false; 249 } 250 251 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) 252 { 253 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 254 TranslationBlock *tb = tcg_tb_lookup(host_pc); 255 if (tb) { 256 return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0; 257 } 258 } 259 return false; 260 } 261 262 void page_init(void) 263 { 264 page_size_init(); 265 page_table_config_init(); 266 } 267 268 /* 269 * Isolate the portion of code gen which can setjmp/longjmp. 270 * Return the size of the generated code, or negative on error. 271 */ 272 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, 273 target_ulong pc, void *host_pc, 274 int *max_insns, int64_t *ti) 275 { 276 int ret = sigsetjmp(tcg_ctx->jmp_trans, 0); 277 if (unlikely(ret != 0)) { 278 return ret; 279 } 280 281 tcg_func_start(tcg_ctx); 282 283 tcg_ctx->cpu = env_cpu(env); 284 gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc); 285 assert(tb->size != 0); 286 tcg_ctx->cpu = NULL; 287 *max_insns = tb->icount; 288 289 #ifdef CONFIG_PROFILER 290 qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1); 291 qatomic_set(&tcg_ctx->prof.interm_time, 292 tcg_ctx->prof.interm_time + profile_getclock() - *ti); 293 *ti = profile_getclock(); 294 #endif 295 296 return tcg_gen_code(tcg_ctx, tb, pc); 297 } 298 299 /* Called with mmap_lock held for user mode emulation. */ 300 TranslationBlock *tb_gen_code(CPUState *cpu, 301 target_ulong pc, target_ulong cs_base, 302 uint32_t flags, int cflags) 303 { 304 CPUArchState *env = cpu->env_ptr; 305 TranslationBlock *tb, *existing_tb; 306 tb_page_addr_t phys_pc; 307 tcg_insn_unit *gen_code_buf; 308 int gen_code_size, search_size, max_insns; 309 #ifdef CONFIG_PROFILER 310 TCGProfile *prof = &tcg_ctx->prof; 311 #endif 312 int64_t ti; 313 void *host_pc; 314 315 assert_memory_lock(); 316 qemu_thread_jit_write(); 317 318 phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); 319 320 if (phys_pc == -1) { 321 /* Generate a one-shot TB with 1 insn in it */ 322 cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1; 323 } 324 325 max_insns = cflags & CF_COUNT_MASK; 326 if (max_insns == 0) { 327 max_insns = TCG_MAX_INSNS; 328 } 329 QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); 330 331 buffer_overflow: 332 tb = tcg_tb_alloc(tcg_ctx); 333 if (unlikely(!tb)) { 334 /* flush must be done */ 335 tb_flush(cpu); 336 mmap_unlock(); 337 /* Make the execution loop process the flush as soon as possible. */ 338 cpu->exception_index = EXCP_INTERRUPT; 339 cpu_loop_exit(cpu); 340 } 341 342 gen_code_buf = tcg_ctx->code_gen_ptr; 343 tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); 344 if (!(cflags & CF_PCREL)) { 345 tb->pc = pc; 346 } 347 tb->cs_base = cs_base; 348 tb->flags = flags; 349 tb->cflags = cflags; 350 tb_set_page_addr0(tb, phys_pc); 351 tb_set_page_addr1(tb, -1); 352 tcg_ctx->gen_tb = tb; 353 tcg_ctx->addr_type = TCG_TYPE_TL; 354 #ifdef CONFIG_SOFTMMU 355 tcg_ctx->page_bits = TARGET_PAGE_BITS; 356 tcg_ctx->page_mask = TARGET_PAGE_MASK; 357 tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS; 358 #endif 359 360 tb_overflow: 361 362 #ifdef CONFIG_PROFILER 363 /* includes aborted translations because of exceptions */ 364 qatomic_set(&prof->tb_count1, prof->tb_count1 + 1); 365 ti = profile_getclock(); 366 #endif 367 368 trace_translate_block(tb, pc, tb->tc.ptr); 369 370 gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti); 371 if (unlikely(gen_code_size < 0)) { 372 switch (gen_code_size) { 373 case -1: 374 /* 375 * Overflow of code_gen_buffer, or the current slice of it. 376 * 377 * TODO: We don't need to re-do gen_intermediate_code, nor 378 * should we re-do the tcg optimization currently hidden 379 * inside tcg_gen_code. All that should be required is to 380 * flush the TBs, allocate a new TB, re-initialize it per 381 * above, and re-do the actual code generation. 382 */ 383 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 384 "Restarting code generation for " 385 "code_gen_buffer overflow\n"); 386 goto buffer_overflow; 387 388 case -2: 389 /* 390 * The code generated for the TranslationBlock is too large. 391 * The maximum size allowed by the unwind info is 64k. 392 * There may be stricter constraints from relocations 393 * in the tcg backend. 394 * 395 * Try again with half as many insns as we attempted this time. 396 * If a single insn overflows, there's a bug somewhere... 397 */ 398 assert(max_insns > 1); 399 max_insns /= 2; 400 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 401 "Restarting code generation with " 402 "smaller translation block (max %d insns)\n", 403 max_insns); 404 goto tb_overflow; 405 406 default: 407 g_assert_not_reached(); 408 } 409 } 410 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); 411 if (unlikely(search_size < 0)) { 412 goto buffer_overflow; 413 } 414 tb->tc.size = gen_code_size; 415 416 /* 417 * For CF_PCREL, attribute all executions of the generated code 418 * to its first mapping. 419 */ 420 perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf)); 421 422 #ifdef CONFIG_PROFILER 423 qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); 424 qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); 425 qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); 426 qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); 427 #endif 428 429 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && 430 qemu_log_in_addr_range(pc)) { 431 FILE *logfile = qemu_log_trylock(); 432 if (logfile) { 433 int code_size, data_size; 434 const tcg_target_ulong *rx_data_gen_ptr; 435 size_t chunk_start; 436 int insn = 0; 437 438 if (tcg_ctx->data_gen_ptr) { 439 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr); 440 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr; 441 data_size = gen_code_size - code_size; 442 } else { 443 rx_data_gen_ptr = 0; 444 code_size = gen_code_size; 445 data_size = 0; 446 } 447 448 /* Dump header and the first instruction */ 449 fprintf(logfile, "OUT: [size=%d]\n", gen_code_size); 450 fprintf(logfile, 451 " -- guest addr 0x%016" PRIx64 " + tb prologue\n", 452 tcg_ctx->gen_insn_data[insn][0]); 453 chunk_start = tcg_ctx->gen_insn_end_off[insn]; 454 disas(logfile, tb->tc.ptr, chunk_start); 455 456 /* 457 * Dump each instruction chunk, wrapping up empty chunks into 458 * the next instruction. The whole array is offset so the 459 * first entry is the beginning of the 2nd instruction. 460 */ 461 while (insn < tb->icount) { 462 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn]; 463 if (chunk_end > chunk_start) { 464 fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n", 465 tcg_ctx->gen_insn_data[insn][0]); 466 disas(logfile, tb->tc.ptr + chunk_start, 467 chunk_end - chunk_start); 468 chunk_start = chunk_end; 469 } 470 insn++; 471 } 472 473 if (chunk_start < code_size) { 474 fprintf(logfile, " -- tb slow paths + alignment\n"); 475 disas(logfile, tb->tc.ptr + chunk_start, 476 code_size - chunk_start); 477 } 478 479 /* Finally dump any data we may have after the block */ 480 if (data_size) { 481 int i; 482 fprintf(logfile, " data: [size=%d]\n", data_size); 483 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { 484 if (sizeof(tcg_target_ulong) == 8) { 485 fprintf(logfile, 486 "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", 487 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 488 } else if (sizeof(tcg_target_ulong) == 4) { 489 fprintf(logfile, 490 "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", 491 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 492 } else { 493 qemu_build_not_reached(); 494 } 495 } 496 } 497 fprintf(logfile, "\n"); 498 qemu_log_unlock(logfile); 499 } 500 } 501 502 qatomic_set(&tcg_ctx->code_gen_ptr, (void *) 503 ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, 504 CODE_GEN_ALIGN)); 505 506 /* init jump list */ 507 qemu_spin_init(&tb->jmp_lock); 508 tb->jmp_list_head = (uintptr_t)NULL; 509 tb->jmp_list_next[0] = (uintptr_t)NULL; 510 tb->jmp_list_next[1] = (uintptr_t)NULL; 511 tb->jmp_dest[0] = (uintptr_t)NULL; 512 tb->jmp_dest[1] = (uintptr_t)NULL; 513 514 /* init original jump addresses which have been set during tcg_gen_code() */ 515 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { 516 tb_reset_jump(tb, 0); 517 } 518 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { 519 tb_reset_jump(tb, 1); 520 } 521 522 /* 523 * If the TB is not associated with a physical RAM page then it must be 524 * a temporary one-insn TB, and we have nothing left to do. Return early 525 * before attempting to link to other TBs or add to the lookup table. 526 */ 527 if (tb_page_addr0(tb) == -1) { 528 return tb; 529 } 530 531 /* 532 * Insert TB into the corresponding region tree before publishing it 533 * through QHT. Otherwise rewinding happened in the TB might fail to 534 * lookup itself using host PC. 535 */ 536 tcg_tb_insert(tb); 537 538 /* 539 * No explicit memory barrier is required -- tb_link_page() makes the 540 * TB visible in a consistent state. 541 */ 542 existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb)); 543 /* if the TB already exists, discard what we just translated */ 544 if (unlikely(existing_tb != tb)) { 545 uintptr_t orig_aligned = (uintptr_t)gen_code_buf; 546 547 orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); 548 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned); 549 tcg_tb_remove(tb); 550 return existing_tb; 551 } 552 return tb; 553 } 554 555 /* user-mode: call with mmap_lock held */ 556 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) 557 { 558 TranslationBlock *tb; 559 560 assert_memory_lock(); 561 562 tb = tcg_tb_lookup(retaddr); 563 if (tb) { 564 /* We can use retranslation to find the PC. */ 565 cpu_restore_state_from_tb(cpu, tb, retaddr); 566 tb_phys_invalidate(tb, -1); 567 } else { 568 /* The exception probably happened in a helper. The CPU state should 569 have been saved before calling it. Fetch the PC from there. */ 570 CPUArchState *env = cpu->env_ptr; 571 target_ulong pc, cs_base; 572 tb_page_addr_t addr; 573 uint32_t flags; 574 575 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); 576 addr = get_page_addr_code(env, pc); 577 if (addr != -1) { 578 tb_invalidate_phys_range(addr, addr); 579 } 580 } 581 } 582 583 #ifndef CONFIG_USER_ONLY 584 /* 585 * In deterministic execution mode, instructions doing device I/Os 586 * must be at the end of the TB. 587 * 588 * Called by softmmu_template.h, with iothread mutex not held. 589 */ 590 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) 591 { 592 TranslationBlock *tb; 593 CPUClass *cc; 594 uint32_t n; 595 596 tb = tcg_tb_lookup(retaddr); 597 if (!tb) { 598 cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", 599 (void *)retaddr); 600 } 601 cpu_restore_state_from_tb(cpu, tb, retaddr); 602 603 /* 604 * Some guests must re-execute the branch when re-executing a delay 605 * slot instruction. When this is the case, adjust icount and N 606 * to account for the re-execution of the branch. 607 */ 608 n = 1; 609 cc = CPU_GET_CLASS(cpu); 610 if (cc->tcg_ops->io_recompile_replay_branch && 611 cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { 612 cpu_neg(cpu)->icount_decr.u16.low++; 613 n = 2; 614 } 615 616 /* 617 * Exit the loop and potentially generate a new TB executing the 618 * just the I/O insns. We also limit instrumentation to memory 619 * operations only (which execute after completion) so we don't 620 * double instrument the instruction. 621 */ 622 cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; 623 624 if (qemu_loglevel_mask(CPU_LOG_EXEC)) { 625 target_ulong pc = log_pc(cpu, tb); 626 if (qemu_log_in_addr_range(pc)) { 627 qemu_log("cpu_io_recompile: rewound execution of TB to " 628 TARGET_FMT_lx "\n", pc); 629 } 630 } 631 632 cpu_loop_exit_noexc(cpu); 633 } 634 635 static void print_qht_statistics(struct qht_stats hst, GString *buf) 636 { 637 uint32_t hgram_opts; 638 size_t hgram_bins; 639 char *hgram; 640 641 if (!hst.head_buckets) { 642 return; 643 } 644 g_string_append_printf(buf, "TB hash buckets %zu/%zu " 645 "(%0.2f%% head buckets used)\n", 646 hst.used_head_buckets, hst.head_buckets, 647 (double)hst.used_head_buckets / 648 hst.head_buckets * 100); 649 650 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 651 hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; 652 if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { 653 hgram_opts |= QDIST_PR_NODECIMAL; 654 } 655 hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); 656 g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " 657 "Histogram: %s\n", 658 qdist_avg(&hst.occupancy) * 100, hgram); 659 g_free(hgram); 660 661 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 662 hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); 663 if (hgram_bins > 10) { 664 hgram_bins = 10; 665 } else { 666 hgram_bins = 0; 667 hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; 668 } 669 hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); 670 g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " 671 "Histogram: %s\n", 672 qdist_avg(&hst.chain), hgram); 673 g_free(hgram); 674 } 675 676 struct tb_tree_stats { 677 size_t nb_tbs; 678 size_t host_size; 679 size_t target_size; 680 size_t max_target_size; 681 size_t direct_jmp_count; 682 size_t direct_jmp2_count; 683 size_t cross_page; 684 }; 685 686 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) 687 { 688 const TranslationBlock *tb = value; 689 struct tb_tree_stats *tst = data; 690 691 tst->nb_tbs++; 692 tst->host_size += tb->tc.size; 693 tst->target_size += tb->size; 694 if (tb->size > tst->max_target_size) { 695 tst->max_target_size = tb->size; 696 } 697 if (tb_page_addr1(tb) != -1) { 698 tst->cross_page++; 699 } 700 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { 701 tst->direct_jmp_count++; 702 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { 703 tst->direct_jmp2_count++; 704 } 705 } 706 return false; 707 } 708 709 void dump_exec_info(GString *buf) 710 { 711 struct tb_tree_stats tst = {}; 712 struct qht_stats hst; 713 size_t nb_tbs, flush_full, flush_part, flush_elide; 714 715 tcg_tb_foreach(tb_tree_stats_iter, &tst); 716 nb_tbs = tst.nb_tbs; 717 /* XXX: avoid using doubles ? */ 718 g_string_append_printf(buf, "Translation buffer state:\n"); 719 /* 720 * Report total code size including the padding and TB structs; 721 * otherwise users might think "-accel tcg,tb-size" is not honoured. 722 * For avg host size we use the precise numbers from tb_tree_stats though. 723 */ 724 g_string_append_printf(buf, "gen code size %zu/%zu\n", 725 tcg_code_size(), tcg_code_capacity()); 726 g_string_append_printf(buf, "TB count %zu\n", nb_tbs); 727 g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", 728 nb_tbs ? tst.target_size / nb_tbs : 0, 729 tst.max_target_size); 730 g_string_append_printf(buf, "TB avg host size %zu bytes " 731 "(expansion ratio: %0.1f)\n", 732 nb_tbs ? tst.host_size / nb_tbs : 0, 733 tst.target_size ? 734 (double)tst.host_size / tst.target_size : 0); 735 g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", 736 tst.cross_page, 737 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); 738 g_string_append_printf(buf, "direct jump count %zu (%zu%%) " 739 "(2 jumps=%zu %zu%%)\n", 740 tst.direct_jmp_count, 741 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, 742 tst.direct_jmp2_count, 743 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); 744 745 qht_statistics_init(&tb_ctx.htable, &hst); 746 print_qht_statistics(hst, buf); 747 qht_statistics_destroy(&hst); 748 749 g_string_append_printf(buf, "\nStatistics:\n"); 750 g_string_append_printf(buf, "TB flush count %u\n", 751 qatomic_read(&tb_ctx.tb_flush_count)); 752 g_string_append_printf(buf, "TB invalidate count %u\n", 753 qatomic_read(&tb_ctx.tb_phys_invalidate_count)); 754 755 tlb_flush_counts(&flush_full, &flush_part, &flush_elide); 756 g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); 757 g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); 758 g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); 759 tcg_dump_info(buf); 760 } 761 762 #else /* CONFIG_USER_ONLY */ 763 764 void cpu_interrupt(CPUState *cpu, int mask) 765 { 766 g_assert(qemu_mutex_iothread_locked()); 767 cpu->interrupt_request |= mask; 768 qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); 769 } 770 771 #endif /* CONFIG_USER_ONLY */ 772 773 /* 774 * Called by generic code at e.g. cpu reset after cpu creation, 775 * therefore we must be prepared to allocate the jump cache. 776 */ 777 void tcg_flush_jmp_cache(CPUState *cpu) 778 { 779 CPUJumpCache *jc = cpu->tb_jmp_cache; 780 781 /* During early initialization, the cache may not yet be allocated. */ 782 if (unlikely(jc == NULL)) { 783 return; 784 } 785 786 for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { 787 qatomic_set(&jc->array[i].tb, NULL); 788 } 789 } 790 791 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ 792 void tcg_flush_softmmu_tlb(CPUState *cs) 793 { 794 #ifdef CONFIG_SOFTMMU 795 tlb_flush(cs); 796 #endif 797 } 798