1 /* 2 * Host code generation 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 22 #define NO_CPU_IO_DEFS 23 #include "trace.h" 24 #include "disas/disas.h" 25 #include "exec/exec-all.h" 26 #include "tcg/tcg.h" 27 #if defined(CONFIG_USER_ONLY) 28 #include "qemu.h" 29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 30 #include <sys/param.h> 31 #if __FreeBSD_version >= 700104 32 #define HAVE_KINFO_GETVMMAP 33 #define sigqueue sigqueue_freebsd /* avoid redefinition */ 34 #include <sys/proc.h> 35 #include <machine/profile.h> 36 #define _KERNEL 37 #include <sys/user.h> 38 #undef _KERNEL 39 #undef sigqueue 40 #include <libutil.h> 41 #endif 42 #endif 43 #else 44 #include "exec/ram_addr.h" 45 #endif 46 47 #include "exec/cputlb.h" 48 #include "exec/translate-all.h" 49 #include "exec/translator.h" 50 #include "exec/tb-flush.h" 51 #include "qemu/bitmap.h" 52 #include "qemu/qemu-print.h" 53 #include "qemu/main-loop.h" 54 #include "qemu/cacheinfo.h" 55 #include "qemu/timer.h" 56 #include "exec/log.h" 57 #include "sysemu/cpus.h" 58 #include "sysemu/cpu-timers.h" 59 #include "sysemu/tcg.h" 60 #include "qapi/error.h" 61 #include "hw/core/tcg-cpu-ops.h" 62 #include "tb-jmp-cache.h" 63 #include "tb-hash.h" 64 #include "tb-context.h" 65 #include "internal.h" 66 #include "perf.h" 67 68 TBContext tb_ctx; 69 70 /* 71 * Encode VAL as a signed leb128 sequence at P. 72 * Return P incremented past the encoded value. 73 */ 74 static uint8_t *encode_sleb128(uint8_t *p, int64_t val) 75 { 76 int more, byte; 77 78 do { 79 byte = val & 0x7f; 80 val >>= 7; 81 more = !((val == 0 && (byte & 0x40) == 0) 82 || (val == -1 && (byte & 0x40) != 0)); 83 if (more) { 84 byte |= 0x80; 85 } 86 *p++ = byte; 87 } while (more); 88 89 return p; 90 } 91 92 /* 93 * Decode a signed leb128 sequence at *PP; increment *PP past the 94 * decoded value. Return the decoded value. 95 */ 96 static int64_t decode_sleb128(const uint8_t **pp) 97 { 98 const uint8_t *p = *pp; 99 int64_t val = 0; 100 int byte, shift = 0; 101 102 do { 103 byte = *p++; 104 val |= (int64_t)(byte & 0x7f) << shift; 105 shift += 7; 106 } while (byte & 0x80); 107 if (shift < TARGET_LONG_BITS && (byte & 0x40)) { 108 val |= -(int64_t)1 << shift; 109 } 110 111 *pp = p; 112 return val; 113 } 114 115 /* Encode the data collected about the instructions while compiling TB. 116 Place the data at BLOCK, and return the number of bytes consumed. 117 118 The logical table consists of TARGET_INSN_START_WORDS target_ulong's, 119 which come from the target's insn_start data, followed by a uintptr_t 120 which comes from the host pc of the end of the code implementing the insn. 121 122 Each line of the table is encoded as sleb128 deltas from the previous 123 line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. 124 That is, the first column is seeded with the guest pc, the last column 125 with the host pc, and the middle columns with zeros. */ 126 127 static int encode_search(TranslationBlock *tb, uint8_t *block) 128 { 129 uint8_t *highwater = tcg_ctx->code_gen_highwater; 130 uint8_t *p = block; 131 int i, j, n; 132 133 for (i = 0, n = tb->icount; i < n; ++i) { 134 uint64_t prev; 135 136 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 137 if (i == 0) { 138 prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0); 139 } else { 140 prev = tcg_ctx->gen_insn_data[i - 1][j]; 141 } 142 p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev); 143 } 144 prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]); 145 p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev); 146 147 /* Test for (pending) buffer overflow. The assumption is that any 148 one row beginning below the high water mark cannot overrun 149 the buffer completely. Thus we can test for overflow after 150 encoding a row without having to check during encoding. */ 151 if (unlikely(p > highwater)) { 152 return -1; 153 } 154 } 155 156 return p - block; 157 } 158 159 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, 160 uint64_t *data) 161 { 162 uintptr_t iter_pc = (uintptr_t)tb->tc.ptr; 163 const uint8_t *p = tb->tc.ptr + tb->tc.size; 164 int i, j, num_insns = tb->icount; 165 166 host_pc -= GETPC_ADJ; 167 168 if (host_pc < iter_pc) { 169 return -1; 170 } 171 172 memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS); 173 if (!(tb_cflags(tb) & CF_PCREL)) { 174 data[0] = tb->pc; 175 } 176 177 /* 178 * Reconstruct the stored insn data while looking for the point 179 * at which the end of the insn exceeds host_pc. 180 */ 181 for (i = 0; i < num_insns; ++i) { 182 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 183 data[j] += decode_sleb128(&p); 184 } 185 iter_pc += decode_sleb128(&p); 186 if (iter_pc > host_pc) { 187 return num_insns - i; 188 } 189 } 190 return -1; 191 } 192 193 /* 194 * The cpu state corresponding to 'host_pc' is restored in 195 * preparation for exiting the TB. 196 */ 197 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, 198 uintptr_t host_pc) 199 { 200 uint64_t data[TARGET_INSN_START_WORDS]; 201 #ifdef CONFIG_PROFILER 202 TCGProfile *prof = &tcg_ctx->prof; 203 int64_t ti = profile_getclock(); 204 #endif 205 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); 206 207 if (insns_left < 0) { 208 return; 209 } 210 211 if (tb_cflags(tb) & CF_USE_ICOUNT) { 212 assert(icount_enabled()); 213 /* 214 * Reset the cycle counter to the start of the block and 215 * shift if to the number of actually executed instructions. 216 */ 217 cpu_neg(cpu)->icount_decr.u16.low += insns_left; 218 } 219 220 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); 221 222 #ifdef CONFIG_PROFILER 223 qatomic_set(&prof->restore_time, 224 prof->restore_time + profile_getclock() - ti); 225 qatomic_set(&prof->restore_count, prof->restore_count + 1); 226 #endif 227 } 228 229 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) 230 { 231 /* 232 * The host_pc has to be in the rx region of the code buffer. 233 * If it is not we will not be able to resolve it here. 234 * The two cases where host_pc will not be correct are: 235 * 236 * - fault during translation (instruction fetch) 237 * - fault from helper (not using GETPC() macro) 238 * 239 * Either way we need return early as we can't resolve it here. 240 */ 241 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 242 TranslationBlock *tb = tcg_tb_lookup(host_pc); 243 if (tb) { 244 cpu_restore_state_from_tb(cpu, tb, host_pc); 245 return true; 246 } 247 } 248 return false; 249 } 250 251 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) 252 { 253 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 254 TranslationBlock *tb = tcg_tb_lookup(host_pc); 255 if (tb) { 256 return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0; 257 } 258 } 259 return false; 260 } 261 262 void page_init(void) 263 { 264 page_size_init(); 265 page_table_config_init(); 266 } 267 268 /* 269 * Isolate the portion of code gen which can setjmp/longjmp. 270 * Return the size of the generated code, or negative on error. 271 */ 272 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, 273 target_ulong pc, void *host_pc, 274 int *max_insns, int64_t *ti) 275 { 276 int ret = sigsetjmp(tcg_ctx->jmp_trans, 0); 277 if (unlikely(ret != 0)) { 278 return ret; 279 } 280 281 tcg_func_start(tcg_ctx); 282 283 tcg_ctx->cpu = env_cpu(env); 284 gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc); 285 assert(tb->size != 0); 286 tcg_ctx->cpu = NULL; 287 *max_insns = tb->icount; 288 289 #ifdef CONFIG_PROFILER 290 qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1); 291 qatomic_set(&tcg_ctx->prof.interm_time, 292 tcg_ctx->prof.interm_time + profile_getclock() - *ti); 293 *ti = profile_getclock(); 294 #endif 295 296 return tcg_gen_code(tcg_ctx, tb, pc); 297 } 298 299 /* Called with mmap_lock held for user mode emulation. */ 300 TranslationBlock *tb_gen_code(CPUState *cpu, 301 target_ulong pc, target_ulong cs_base, 302 uint32_t flags, int cflags) 303 { 304 CPUArchState *env = cpu->env_ptr; 305 TranslationBlock *tb, *existing_tb; 306 tb_page_addr_t phys_pc; 307 tcg_insn_unit *gen_code_buf; 308 int gen_code_size, search_size, max_insns; 309 #ifdef CONFIG_PROFILER 310 TCGProfile *prof = &tcg_ctx->prof; 311 #endif 312 int64_t ti; 313 void *host_pc; 314 315 assert_memory_lock(); 316 qemu_thread_jit_write(); 317 318 phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); 319 320 if (phys_pc == -1) { 321 /* Generate a one-shot TB with 1 insn in it */ 322 cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1; 323 } 324 325 max_insns = cflags & CF_COUNT_MASK; 326 if (max_insns == 0) { 327 max_insns = TCG_MAX_INSNS; 328 } 329 QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); 330 331 buffer_overflow: 332 tb = tcg_tb_alloc(tcg_ctx); 333 if (unlikely(!tb)) { 334 /* flush must be done */ 335 tb_flush(cpu); 336 mmap_unlock(); 337 /* Make the execution loop process the flush as soon as possible. */ 338 cpu->exception_index = EXCP_INTERRUPT; 339 cpu_loop_exit(cpu); 340 } 341 342 gen_code_buf = tcg_ctx->code_gen_ptr; 343 tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); 344 if (!(cflags & CF_PCREL)) { 345 tb->pc = pc; 346 } 347 tb->cs_base = cs_base; 348 tb->flags = flags; 349 tb->cflags = cflags; 350 tb_set_page_addr0(tb, phys_pc); 351 tb_set_page_addr1(tb, -1); 352 tcg_ctx->gen_tb = tb; 353 tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64; 354 #ifdef CONFIG_SOFTMMU 355 tcg_ctx->page_bits = TARGET_PAGE_BITS; 356 tcg_ctx->page_mask = TARGET_PAGE_MASK; 357 tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS; 358 tcg_ctx->tlb_fast_offset = 359 (int)offsetof(ArchCPU, neg.tlb.f) - (int)offsetof(ArchCPU, env); 360 #endif 361 362 tb_overflow: 363 364 #ifdef CONFIG_PROFILER 365 /* includes aborted translations because of exceptions */ 366 qatomic_set(&prof->tb_count1, prof->tb_count1 + 1); 367 ti = profile_getclock(); 368 #endif 369 370 trace_translate_block(tb, pc, tb->tc.ptr); 371 372 gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti); 373 if (unlikely(gen_code_size < 0)) { 374 switch (gen_code_size) { 375 case -1: 376 /* 377 * Overflow of code_gen_buffer, or the current slice of it. 378 * 379 * TODO: We don't need to re-do gen_intermediate_code, nor 380 * should we re-do the tcg optimization currently hidden 381 * inside tcg_gen_code. All that should be required is to 382 * flush the TBs, allocate a new TB, re-initialize it per 383 * above, and re-do the actual code generation. 384 */ 385 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 386 "Restarting code generation for " 387 "code_gen_buffer overflow\n"); 388 goto buffer_overflow; 389 390 case -2: 391 /* 392 * The code generated for the TranslationBlock is too large. 393 * The maximum size allowed by the unwind info is 64k. 394 * There may be stricter constraints from relocations 395 * in the tcg backend. 396 * 397 * Try again with half as many insns as we attempted this time. 398 * If a single insn overflows, there's a bug somewhere... 399 */ 400 assert(max_insns > 1); 401 max_insns /= 2; 402 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 403 "Restarting code generation with " 404 "smaller translation block (max %d insns)\n", 405 max_insns); 406 goto tb_overflow; 407 408 default: 409 g_assert_not_reached(); 410 } 411 } 412 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); 413 if (unlikely(search_size < 0)) { 414 goto buffer_overflow; 415 } 416 tb->tc.size = gen_code_size; 417 418 /* 419 * For CF_PCREL, attribute all executions of the generated code 420 * to its first mapping. 421 */ 422 perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf)); 423 424 #ifdef CONFIG_PROFILER 425 qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); 426 qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); 427 qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); 428 qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); 429 #endif 430 431 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && 432 qemu_log_in_addr_range(pc)) { 433 FILE *logfile = qemu_log_trylock(); 434 if (logfile) { 435 int code_size, data_size; 436 const tcg_target_ulong *rx_data_gen_ptr; 437 size_t chunk_start; 438 int insn = 0; 439 440 if (tcg_ctx->data_gen_ptr) { 441 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr); 442 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr; 443 data_size = gen_code_size - code_size; 444 } else { 445 rx_data_gen_ptr = 0; 446 code_size = gen_code_size; 447 data_size = 0; 448 } 449 450 /* Dump header and the first instruction */ 451 fprintf(logfile, "OUT: [size=%d]\n", gen_code_size); 452 fprintf(logfile, 453 " -- guest addr 0x%016" PRIx64 " + tb prologue\n", 454 tcg_ctx->gen_insn_data[insn][0]); 455 chunk_start = tcg_ctx->gen_insn_end_off[insn]; 456 disas(logfile, tb->tc.ptr, chunk_start); 457 458 /* 459 * Dump each instruction chunk, wrapping up empty chunks into 460 * the next instruction. The whole array is offset so the 461 * first entry is the beginning of the 2nd instruction. 462 */ 463 while (insn < tb->icount) { 464 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn]; 465 if (chunk_end > chunk_start) { 466 fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n", 467 tcg_ctx->gen_insn_data[insn][0]); 468 disas(logfile, tb->tc.ptr + chunk_start, 469 chunk_end - chunk_start); 470 chunk_start = chunk_end; 471 } 472 insn++; 473 } 474 475 if (chunk_start < code_size) { 476 fprintf(logfile, " -- tb slow paths + alignment\n"); 477 disas(logfile, tb->tc.ptr + chunk_start, 478 code_size - chunk_start); 479 } 480 481 /* Finally dump any data we may have after the block */ 482 if (data_size) { 483 int i; 484 fprintf(logfile, " data: [size=%d]\n", data_size); 485 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { 486 if (sizeof(tcg_target_ulong) == 8) { 487 fprintf(logfile, 488 "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", 489 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 490 } else if (sizeof(tcg_target_ulong) == 4) { 491 fprintf(logfile, 492 "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", 493 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 494 } else { 495 qemu_build_not_reached(); 496 } 497 } 498 } 499 fprintf(logfile, "\n"); 500 qemu_log_unlock(logfile); 501 } 502 } 503 504 qatomic_set(&tcg_ctx->code_gen_ptr, (void *) 505 ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, 506 CODE_GEN_ALIGN)); 507 508 /* init jump list */ 509 qemu_spin_init(&tb->jmp_lock); 510 tb->jmp_list_head = (uintptr_t)NULL; 511 tb->jmp_list_next[0] = (uintptr_t)NULL; 512 tb->jmp_list_next[1] = (uintptr_t)NULL; 513 tb->jmp_dest[0] = (uintptr_t)NULL; 514 tb->jmp_dest[1] = (uintptr_t)NULL; 515 516 /* init original jump addresses which have been set during tcg_gen_code() */ 517 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { 518 tb_reset_jump(tb, 0); 519 } 520 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { 521 tb_reset_jump(tb, 1); 522 } 523 524 /* 525 * If the TB is not associated with a physical RAM page then it must be 526 * a temporary one-insn TB, and we have nothing left to do. Return early 527 * before attempting to link to other TBs or add to the lookup table. 528 */ 529 if (tb_page_addr0(tb) == -1) { 530 return tb; 531 } 532 533 /* 534 * Insert TB into the corresponding region tree before publishing it 535 * through QHT. Otherwise rewinding happened in the TB might fail to 536 * lookup itself using host PC. 537 */ 538 tcg_tb_insert(tb); 539 540 /* 541 * No explicit memory barrier is required -- tb_link_page() makes the 542 * TB visible in a consistent state. 543 */ 544 existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb)); 545 /* if the TB already exists, discard what we just translated */ 546 if (unlikely(existing_tb != tb)) { 547 uintptr_t orig_aligned = (uintptr_t)gen_code_buf; 548 549 orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); 550 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned); 551 tcg_tb_remove(tb); 552 return existing_tb; 553 } 554 return tb; 555 } 556 557 /* user-mode: call with mmap_lock held */ 558 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) 559 { 560 TranslationBlock *tb; 561 562 assert_memory_lock(); 563 564 tb = tcg_tb_lookup(retaddr); 565 if (tb) { 566 /* We can use retranslation to find the PC. */ 567 cpu_restore_state_from_tb(cpu, tb, retaddr); 568 tb_phys_invalidate(tb, -1); 569 } else { 570 /* The exception probably happened in a helper. The CPU state should 571 have been saved before calling it. Fetch the PC from there. */ 572 CPUArchState *env = cpu->env_ptr; 573 target_ulong pc, cs_base; 574 tb_page_addr_t addr; 575 uint32_t flags; 576 577 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); 578 addr = get_page_addr_code(env, pc); 579 if (addr != -1) { 580 tb_invalidate_phys_range(addr, addr); 581 } 582 } 583 } 584 585 #ifndef CONFIG_USER_ONLY 586 /* 587 * In deterministic execution mode, instructions doing device I/Os 588 * must be at the end of the TB. 589 * 590 * Called by softmmu_template.h, with iothread mutex not held. 591 */ 592 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) 593 { 594 TranslationBlock *tb; 595 CPUClass *cc; 596 uint32_t n; 597 598 tb = tcg_tb_lookup(retaddr); 599 if (!tb) { 600 cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", 601 (void *)retaddr); 602 } 603 cpu_restore_state_from_tb(cpu, tb, retaddr); 604 605 /* 606 * Some guests must re-execute the branch when re-executing a delay 607 * slot instruction. When this is the case, adjust icount and N 608 * to account for the re-execution of the branch. 609 */ 610 n = 1; 611 cc = CPU_GET_CLASS(cpu); 612 if (cc->tcg_ops->io_recompile_replay_branch && 613 cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { 614 cpu_neg(cpu)->icount_decr.u16.low++; 615 n = 2; 616 } 617 618 /* 619 * Exit the loop and potentially generate a new TB executing the 620 * just the I/O insns. We also limit instrumentation to memory 621 * operations only (which execute after completion) so we don't 622 * double instrument the instruction. 623 */ 624 cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; 625 626 if (qemu_loglevel_mask(CPU_LOG_EXEC)) { 627 target_ulong pc = log_pc(cpu, tb); 628 if (qemu_log_in_addr_range(pc)) { 629 qemu_log("cpu_io_recompile: rewound execution of TB to " 630 TARGET_FMT_lx "\n", pc); 631 } 632 } 633 634 cpu_loop_exit_noexc(cpu); 635 } 636 637 static void print_qht_statistics(struct qht_stats hst, GString *buf) 638 { 639 uint32_t hgram_opts; 640 size_t hgram_bins; 641 char *hgram; 642 643 if (!hst.head_buckets) { 644 return; 645 } 646 g_string_append_printf(buf, "TB hash buckets %zu/%zu " 647 "(%0.2f%% head buckets used)\n", 648 hst.used_head_buckets, hst.head_buckets, 649 (double)hst.used_head_buckets / 650 hst.head_buckets * 100); 651 652 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 653 hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; 654 if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { 655 hgram_opts |= QDIST_PR_NODECIMAL; 656 } 657 hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); 658 g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " 659 "Histogram: %s\n", 660 qdist_avg(&hst.occupancy) * 100, hgram); 661 g_free(hgram); 662 663 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 664 hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); 665 if (hgram_bins > 10) { 666 hgram_bins = 10; 667 } else { 668 hgram_bins = 0; 669 hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; 670 } 671 hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); 672 g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " 673 "Histogram: %s\n", 674 qdist_avg(&hst.chain), hgram); 675 g_free(hgram); 676 } 677 678 struct tb_tree_stats { 679 size_t nb_tbs; 680 size_t host_size; 681 size_t target_size; 682 size_t max_target_size; 683 size_t direct_jmp_count; 684 size_t direct_jmp2_count; 685 size_t cross_page; 686 }; 687 688 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) 689 { 690 const TranslationBlock *tb = value; 691 struct tb_tree_stats *tst = data; 692 693 tst->nb_tbs++; 694 tst->host_size += tb->tc.size; 695 tst->target_size += tb->size; 696 if (tb->size > tst->max_target_size) { 697 tst->max_target_size = tb->size; 698 } 699 if (tb_page_addr1(tb) != -1) { 700 tst->cross_page++; 701 } 702 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { 703 tst->direct_jmp_count++; 704 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { 705 tst->direct_jmp2_count++; 706 } 707 } 708 return false; 709 } 710 711 void dump_exec_info(GString *buf) 712 { 713 struct tb_tree_stats tst = {}; 714 struct qht_stats hst; 715 size_t nb_tbs, flush_full, flush_part, flush_elide; 716 717 tcg_tb_foreach(tb_tree_stats_iter, &tst); 718 nb_tbs = tst.nb_tbs; 719 /* XXX: avoid using doubles ? */ 720 g_string_append_printf(buf, "Translation buffer state:\n"); 721 /* 722 * Report total code size including the padding and TB structs; 723 * otherwise users might think "-accel tcg,tb-size" is not honoured. 724 * For avg host size we use the precise numbers from tb_tree_stats though. 725 */ 726 g_string_append_printf(buf, "gen code size %zu/%zu\n", 727 tcg_code_size(), tcg_code_capacity()); 728 g_string_append_printf(buf, "TB count %zu\n", nb_tbs); 729 g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", 730 nb_tbs ? tst.target_size / nb_tbs : 0, 731 tst.max_target_size); 732 g_string_append_printf(buf, "TB avg host size %zu bytes " 733 "(expansion ratio: %0.1f)\n", 734 nb_tbs ? tst.host_size / nb_tbs : 0, 735 tst.target_size ? 736 (double)tst.host_size / tst.target_size : 0); 737 g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", 738 tst.cross_page, 739 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); 740 g_string_append_printf(buf, "direct jump count %zu (%zu%%) " 741 "(2 jumps=%zu %zu%%)\n", 742 tst.direct_jmp_count, 743 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, 744 tst.direct_jmp2_count, 745 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); 746 747 qht_statistics_init(&tb_ctx.htable, &hst); 748 print_qht_statistics(hst, buf); 749 qht_statistics_destroy(&hst); 750 751 g_string_append_printf(buf, "\nStatistics:\n"); 752 g_string_append_printf(buf, "TB flush count %u\n", 753 qatomic_read(&tb_ctx.tb_flush_count)); 754 g_string_append_printf(buf, "TB invalidate count %u\n", 755 qatomic_read(&tb_ctx.tb_phys_invalidate_count)); 756 757 tlb_flush_counts(&flush_full, &flush_part, &flush_elide); 758 g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); 759 g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); 760 g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); 761 tcg_dump_info(buf); 762 } 763 764 #else /* CONFIG_USER_ONLY */ 765 766 void cpu_interrupt(CPUState *cpu, int mask) 767 { 768 g_assert(qemu_mutex_iothread_locked()); 769 cpu->interrupt_request |= mask; 770 qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); 771 } 772 773 #endif /* CONFIG_USER_ONLY */ 774 775 /* 776 * Called by generic code at e.g. cpu reset after cpu creation, 777 * therefore we must be prepared to allocate the jump cache. 778 */ 779 void tcg_flush_jmp_cache(CPUState *cpu) 780 { 781 CPUJumpCache *jc = cpu->tb_jmp_cache; 782 783 /* During early initialization, the cache may not yet be allocated. */ 784 if (unlikely(jc == NULL)) { 785 return; 786 } 787 788 for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { 789 qatomic_set(&jc->array[i].tb, NULL); 790 } 791 } 792 793 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ 794 void tcg_flush_softmmu_tlb(CPUState *cs) 795 { 796 #ifdef CONFIG_SOFTMMU 797 tlb_flush(cs); 798 #endif 799 } 800