1 /* 2 * Host code generation 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 22 #define NO_CPU_IO_DEFS 23 #include "trace.h" 24 #include "disas/disas.h" 25 #include "exec/exec-all.h" 26 #include "tcg/tcg.h" 27 #if defined(CONFIG_USER_ONLY) 28 #include "qemu.h" 29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 30 #include <sys/param.h> 31 #if __FreeBSD_version >= 700104 32 #define HAVE_KINFO_GETVMMAP 33 #define sigqueue sigqueue_freebsd /* avoid redefinition */ 34 #include <sys/proc.h> 35 #include <machine/profile.h> 36 #define _KERNEL 37 #include <sys/user.h> 38 #undef _KERNEL 39 #undef sigqueue 40 #include <libutil.h> 41 #endif 42 #endif 43 #else 44 #include "exec/ram_addr.h" 45 #endif 46 47 #include "exec/cputlb.h" 48 #include "exec/translate-all.h" 49 #include "exec/translator.h" 50 #include "exec/tb-flush.h" 51 #include "qemu/bitmap.h" 52 #include "qemu/qemu-print.h" 53 #include "qemu/main-loop.h" 54 #include "qemu/cacheinfo.h" 55 #include "qemu/timer.h" 56 #include "exec/log.h" 57 #include "sysemu/cpus.h" 58 #include "sysemu/cpu-timers.h" 59 #include "sysemu/tcg.h" 60 #include "qapi/error.h" 61 #include "hw/core/tcg-cpu-ops.h" 62 #include "tb-jmp-cache.h" 63 #include "tb-hash.h" 64 #include "tb-context.h" 65 #include "internal.h" 66 #include "perf.h" 67 68 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */ 69 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS > 70 sizeof_field(TranslationBlock, trace_vcpu_dstate) 71 * BITS_PER_BYTE); 72 73 TBContext tb_ctx; 74 75 /* Encode VAL as a signed leb128 sequence at P. 76 Return P incremented past the encoded value. */ 77 static uint8_t *encode_sleb128(uint8_t *p, target_long val) 78 { 79 int more, byte; 80 81 do { 82 byte = val & 0x7f; 83 val >>= 7; 84 more = !((val == 0 && (byte & 0x40) == 0) 85 || (val == -1 && (byte & 0x40) != 0)); 86 if (more) { 87 byte |= 0x80; 88 } 89 *p++ = byte; 90 } while (more); 91 92 return p; 93 } 94 95 /* Decode a signed leb128 sequence at *PP; increment *PP past the 96 decoded value. Return the decoded value. */ 97 static target_long decode_sleb128(const uint8_t **pp) 98 { 99 const uint8_t *p = *pp; 100 target_long val = 0; 101 int byte, shift = 0; 102 103 do { 104 byte = *p++; 105 val |= (target_ulong)(byte & 0x7f) << shift; 106 shift += 7; 107 } while (byte & 0x80); 108 if (shift < TARGET_LONG_BITS && (byte & 0x40)) { 109 val |= -(target_ulong)1 << shift; 110 } 111 112 *pp = p; 113 return val; 114 } 115 116 /* Encode the data collected about the instructions while compiling TB. 117 Place the data at BLOCK, and return the number of bytes consumed. 118 119 The logical table consists of TARGET_INSN_START_WORDS target_ulong's, 120 which come from the target's insn_start data, followed by a uintptr_t 121 which comes from the host pc of the end of the code implementing the insn. 122 123 Each line of the table is encoded as sleb128 deltas from the previous 124 line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. 125 That is, the first column is seeded with the guest pc, the last column 126 with the host pc, and the middle columns with zeros. */ 127 128 static int encode_search(TranslationBlock *tb, uint8_t *block) 129 { 130 uint8_t *highwater = tcg_ctx->code_gen_highwater; 131 uint8_t *p = block; 132 int i, j, n; 133 134 for (i = 0, n = tb->icount; i < n; ++i) { 135 target_ulong prev; 136 137 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 138 if (i == 0) { 139 prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0); 140 } else { 141 prev = tcg_ctx->gen_insn_data[i - 1][j]; 142 } 143 p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev); 144 } 145 prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]); 146 p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev); 147 148 /* Test for (pending) buffer overflow. The assumption is that any 149 one row beginning below the high water mark cannot overrun 150 the buffer completely. Thus we can test for overflow after 151 encoding a row without having to check during encoding. */ 152 if (unlikely(p > highwater)) { 153 return -1; 154 } 155 } 156 157 return p - block; 158 } 159 160 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, 161 uint64_t *data) 162 { 163 uintptr_t iter_pc = (uintptr_t)tb->tc.ptr; 164 const uint8_t *p = tb->tc.ptr + tb->tc.size; 165 int i, j, num_insns = tb->icount; 166 167 host_pc -= GETPC_ADJ; 168 169 if (host_pc < iter_pc) { 170 return -1; 171 } 172 173 memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS); 174 if (!(tb_cflags(tb) & CF_PCREL)) { 175 data[0] = tb->pc; 176 } 177 178 /* 179 * Reconstruct the stored insn data while looking for the point 180 * at which the end of the insn exceeds host_pc. 181 */ 182 for (i = 0; i < num_insns; ++i) { 183 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 184 data[j] += decode_sleb128(&p); 185 } 186 iter_pc += decode_sleb128(&p); 187 if (iter_pc > host_pc) { 188 return num_insns - i; 189 } 190 } 191 return -1; 192 } 193 194 /* 195 * The cpu state corresponding to 'host_pc' is restored in 196 * preparation for exiting the TB. 197 */ 198 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, 199 uintptr_t host_pc) 200 { 201 uint64_t data[TARGET_INSN_START_WORDS]; 202 #ifdef CONFIG_PROFILER 203 TCGProfile *prof = &tcg_ctx->prof; 204 int64_t ti = profile_getclock(); 205 #endif 206 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); 207 208 if (insns_left < 0) { 209 return; 210 } 211 212 if (tb_cflags(tb) & CF_USE_ICOUNT) { 213 assert(icount_enabled()); 214 /* 215 * Reset the cycle counter to the start of the block and 216 * shift if to the number of actually executed instructions. 217 */ 218 cpu_neg(cpu)->icount_decr.u16.low += insns_left; 219 } 220 221 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); 222 223 #ifdef CONFIG_PROFILER 224 qatomic_set(&prof->restore_time, 225 prof->restore_time + profile_getclock() - ti); 226 qatomic_set(&prof->restore_count, prof->restore_count + 1); 227 #endif 228 } 229 230 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) 231 { 232 /* 233 * The host_pc has to be in the rx region of the code buffer. 234 * If it is not we will not be able to resolve it here. 235 * The two cases where host_pc will not be correct are: 236 * 237 * - fault during translation (instruction fetch) 238 * - fault from helper (not using GETPC() macro) 239 * 240 * Either way we need return early as we can't resolve it here. 241 */ 242 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 243 TranslationBlock *tb = tcg_tb_lookup(host_pc); 244 if (tb) { 245 cpu_restore_state_from_tb(cpu, tb, host_pc); 246 return true; 247 } 248 } 249 return false; 250 } 251 252 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) 253 { 254 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 255 TranslationBlock *tb = tcg_tb_lookup(host_pc); 256 if (tb) { 257 return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0; 258 } 259 } 260 return false; 261 } 262 263 void page_init(void) 264 { 265 page_size_init(); 266 page_table_config_init(); 267 } 268 269 /* 270 * Isolate the portion of code gen which can setjmp/longjmp. 271 * Return the size of the generated code, or negative on error. 272 */ 273 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, 274 target_ulong pc, void *host_pc, 275 int *max_insns, int64_t *ti) 276 { 277 int ret = sigsetjmp(tcg_ctx->jmp_trans, 0); 278 if (unlikely(ret != 0)) { 279 return ret; 280 } 281 282 tcg_func_start(tcg_ctx); 283 284 tcg_ctx->cpu = env_cpu(env); 285 gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc); 286 assert(tb->size != 0); 287 tcg_ctx->cpu = NULL; 288 *max_insns = tb->icount; 289 290 #ifdef CONFIG_PROFILER 291 qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1); 292 qatomic_set(&tcg_ctx->prof.interm_time, 293 tcg_ctx->prof.interm_time + profile_getclock() - *ti); 294 *ti = profile_getclock(); 295 #endif 296 297 return tcg_gen_code(tcg_ctx, tb, pc); 298 } 299 300 /* Called with mmap_lock held for user mode emulation. */ 301 TranslationBlock *tb_gen_code(CPUState *cpu, 302 target_ulong pc, target_ulong cs_base, 303 uint32_t flags, int cflags) 304 { 305 CPUArchState *env = cpu->env_ptr; 306 TranslationBlock *tb, *existing_tb; 307 tb_page_addr_t phys_pc; 308 tcg_insn_unit *gen_code_buf; 309 int gen_code_size, search_size, max_insns; 310 #ifdef CONFIG_PROFILER 311 TCGProfile *prof = &tcg_ctx->prof; 312 #endif 313 int64_t ti; 314 void *host_pc; 315 316 assert_memory_lock(); 317 qemu_thread_jit_write(); 318 319 phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); 320 321 if (phys_pc == -1) { 322 /* Generate a one-shot TB with 1 insn in it */ 323 cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1; 324 } 325 326 max_insns = cflags & CF_COUNT_MASK; 327 if (max_insns == 0) { 328 max_insns = TCG_MAX_INSNS; 329 } 330 QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); 331 332 buffer_overflow: 333 tb = tcg_tb_alloc(tcg_ctx); 334 if (unlikely(!tb)) { 335 /* flush must be done */ 336 tb_flush(cpu); 337 mmap_unlock(); 338 /* Make the execution loop process the flush as soon as possible. */ 339 cpu->exception_index = EXCP_INTERRUPT; 340 cpu_loop_exit(cpu); 341 } 342 343 gen_code_buf = tcg_ctx->code_gen_ptr; 344 tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); 345 if (!(cflags & CF_PCREL)) { 346 tb->pc = pc; 347 } 348 tb->cs_base = cs_base; 349 tb->flags = flags; 350 tb->cflags = cflags; 351 tb->trace_vcpu_dstate = *cpu->trace_dstate; 352 tb_set_page_addr0(tb, phys_pc); 353 tb_set_page_addr1(tb, -1); 354 tcg_ctx->gen_tb = tb; 355 tb_overflow: 356 357 #ifdef CONFIG_PROFILER 358 /* includes aborted translations because of exceptions */ 359 qatomic_set(&prof->tb_count1, prof->tb_count1 + 1); 360 ti = profile_getclock(); 361 #endif 362 363 trace_translate_block(tb, pc, tb->tc.ptr); 364 365 gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti); 366 if (unlikely(gen_code_size < 0)) { 367 switch (gen_code_size) { 368 case -1: 369 /* 370 * Overflow of code_gen_buffer, or the current slice of it. 371 * 372 * TODO: We don't need to re-do gen_intermediate_code, nor 373 * should we re-do the tcg optimization currently hidden 374 * inside tcg_gen_code. All that should be required is to 375 * flush the TBs, allocate a new TB, re-initialize it per 376 * above, and re-do the actual code generation. 377 */ 378 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 379 "Restarting code generation for " 380 "code_gen_buffer overflow\n"); 381 goto buffer_overflow; 382 383 case -2: 384 /* 385 * The code generated for the TranslationBlock is too large. 386 * The maximum size allowed by the unwind info is 64k. 387 * There may be stricter constraints from relocations 388 * in the tcg backend. 389 * 390 * Try again with half as many insns as we attempted this time. 391 * If a single insn overflows, there's a bug somewhere... 392 */ 393 assert(max_insns > 1); 394 max_insns /= 2; 395 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 396 "Restarting code generation with " 397 "smaller translation block (max %d insns)\n", 398 max_insns); 399 goto tb_overflow; 400 401 default: 402 g_assert_not_reached(); 403 } 404 } 405 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); 406 if (unlikely(search_size < 0)) { 407 goto buffer_overflow; 408 } 409 tb->tc.size = gen_code_size; 410 411 /* 412 * For CF_PCREL, attribute all executions of the generated code 413 * to its first mapping. 414 */ 415 perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf)); 416 417 #ifdef CONFIG_PROFILER 418 qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); 419 qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); 420 qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); 421 qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); 422 #endif 423 424 #ifdef DEBUG_DISAS 425 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && 426 qemu_log_in_addr_range(pc)) { 427 FILE *logfile = qemu_log_trylock(); 428 if (logfile) { 429 int code_size, data_size; 430 const tcg_target_ulong *rx_data_gen_ptr; 431 size_t chunk_start; 432 int insn = 0; 433 434 if (tcg_ctx->data_gen_ptr) { 435 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr); 436 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr; 437 data_size = gen_code_size - code_size; 438 } else { 439 rx_data_gen_ptr = 0; 440 code_size = gen_code_size; 441 data_size = 0; 442 } 443 444 /* Dump header and the first instruction */ 445 fprintf(logfile, "OUT: [size=%d]\n", gen_code_size); 446 fprintf(logfile, 447 " -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n", 448 tcg_ctx->gen_insn_data[insn][0]); 449 chunk_start = tcg_ctx->gen_insn_end_off[insn]; 450 disas(logfile, tb->tc.ptr, chunk_start); 451 452 /* 453 * Dump each instruction chunk, wrapping up empty chunks into 454 * the next instruction. The whole array is offset so the 455 * first entry is the beginning of the 2nd instruction. 456 */ 457 while (insn < tb->icount) { 458 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn]; 459 if (chunk_end > chunk_start) { 460 fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n", 461 tcg_ctx->gen_insn_data[insn][0]); 462 disas(logfile, tb->tc.ptr + chunk_start, 463 chunk_end - chunk_start); 464 chunk_start = chunk_end; 465 } 466 insn++; 467 } 468 469 if (chunk_start < code_size) { 470 fprintf(logfile, " -- tb slow paths + alignment\n"); 471 disas(logfile, tb->tc.ptr + chunk_start, 472 code_size - chunk_start); 473 } 474 475 /* Finally dump any data we may have after the block */ 476 if (data_size) { 477 int i; 478 fprintf(logfile, " data: [size=%d]\n", data_size); 479 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { 480 if (sizeof(tcg_target_ulong) == 8) { 481 fprintf(logfile, 482 "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", 483 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 484 } else if (sizeof(tcg_target_ulong) == 4) { 485 fprintf(logfile, 486 "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", 487 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 488 } else { 489 qemu_build_not_reached(); 490 } 491 } 492 } 493 fprintf(logfile, "\n"); 494 qemu_log_unlock(logfile); 495 } 496 } 497 #endif 498 499 qatomic_set(&tcg_ctx->code_gen_ptr, (void *) 500 ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, 501 CODE_GEN_ALIGN)); 502 503 /* init jump list */ 504 qemu_spin_init(&tb->jmp_lock); 505 tb->jmp_list_head = (uintptr_t)NULL; 506 tb->jmp_list_next[0] = (uintptr_t)NULL; 507 tb->jmp_list_next[1] = (uintptr_t)NULL; 508 tb->jmp_dest[0] = (uintptr_t)NULL; 509 tb->jmp_dest[1] = (uintptr_t)NULL; 510 511 /* init original jump addresses which have been set during tcg_gen_code() */ 512 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { 513 tb_reset_jump(tb, 0); 514 } 515 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { 516 tb_reset_jump(tb, 1); 517 } 518 519 /* 520 * If the TB is not associated with a physical RAM page then it must be 521 * a temporary one-insn TB, and we have nothing left to do. Return early 522 * before attempting to link to other TBs or add to the lookup table. 523 */ 524 if (tb_page_addr0(tb) == -1) { 525 return tb; 526 } 527 528 /* 529 * Insert TB into the corresponding region tree before publishing it 530 * through QHT. Otherwise rewinding happened in the TB might fail to 531 * lookup itself using host PC. 532 */ 533 tcg_tb_insert(tb); 534 535 /* 536 * No explicit memory barrier is required -- tb_link_page() makes the 537 * TB visible in a consistent state. 538 */ 539 existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb)); 540 /* if the TB already exists, discard what we just translated */ 541 if (unlikely(existing_tb != tb)) { 542 uintptr_t orig_aligned = (uintptr_t)gen_code_buf; 543 544 orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); 545 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned); 546 tcg_tb_remove(tb); 547 return existing_tb; 548 } 549 return tb; 550 } 551 552 /* user-mode: call with mmap_lock held */ 553 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) 554 { 555 TranslationBlock *tb; 556 557 assert_memory_lock(); 558 559 tb = tcg_tb_lookup(retaddr); 560 if (tb) { 561 /* We can use retranslation to find the PC. */ 562 cpu_restore_state_from_tb(cpu, tb, retaddr); 563 tb_phys_invalidate(tb, -1); 564 } else { 565 /* The exception probably happened in a helper. The CPU state should 566 have been saved before calling it. Fetch the PC from there. */ 567 CPUArchState *env = cpu->env_ptr; 568 target_ulong pc, cs_base; 569 tb_page_addr_t addr; 570 uint32_t flags; 571 572 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); 573 addr = get_page_addr_code(env, pc); 574 if (addr != -1) { 575 tb_invalidate_phys_range(addr, addr); 576 } 577 } 578 } 579 580 #ifndef CONFIG_USER_ONLY 581 /* 582 * In deterministic execution mode, instructions doing device I/Os 583 * must be at the end of the TB. 584 * 585 * Called by softmmu_template.h, with iothread mutex not held. 586 */ 587 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) 588 { 589 TranslationBlock *tb; 590 CPUClass *cc; 591 uint32_t n; 592 593 tb = tcg_tb_lookup(retaddr); 594 if (!tb) { 595 cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", 596 (void *)retaddr); 597 } 598 cpu_restore_state_from_tb(cpu, tb, retaddr); 599 600 /* 601 * Some guests must re-execute the branch when re-executing a delay 602 * slot instruction. When this is the case, adjust icount and N 603 * to account for the re-execution of the branch. 604 */ 605 n = 1; 606 cc = CPU_GET_CLASS(cpu); 607 if (cc->tcg_ops->io_recompile_replay_branch && 608 cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { 609 cpu_neg(cpu)->icount_decr.u16.low++; 610 n = 2; 611 } 612 613 /* 614 * Exit the loop and potentially generate a new TB executing the 615 * just the I/O insns. We also limit instrumentation to memory 616 * operations only (which execute after completion) so we don't 617 * double instrument the instruction. 618 */ 619 cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; 620 621 if (qemu_loglevel_mask(CPU_LOG_EXEC)) { 622 target_ulong pc = log_pc(cpu, tb); 623 if (qemu_log_in_addr_range(pc)) { 624 qemu_log("cpu_io_recompile: rewound execution of TB to " 625 TARGET_FMT_lx "\n", pc); 626 } 627 } 628 629 cpu_loop_exit_noexc(cpu); 630 } 631 632 static void print_qht_statistics(struct qht_stats hst, GString *buf) 633 { 634 uint32_t hgram_opts; 635 size_t hgram_bins; 636 char *hgram; 637 638 if (!hst.head_buckets) { 639 return; 640 } 641 g_string_append_printf(buf, "TB hash buckets %zu/%zu " 642 "(%0.2f%% head buckets used)\n", 643 hst.used_head_buckets, hst.head_buckets, 644 (double)hst.used_head_buckets / 645 hst.head_buckets * 100); 646 647 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 648 hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; 649 if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { 650 hgram_opts |= QDIST_PR_NODECIMAL; 651 } 652 hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); 653 g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " 654 "Histogram: %s\n", 655 qdist_avg(&hst.occupancy) * 100, hgram); 656 g_free(hgram); 657 658 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 659 hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); 660 if (hgram_bins > 10) { 661 hgram_bins = 10; 662 } else { 663 hgram_bins = 0; 664 hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; 665 } 666 hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); 667 g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " 668 "Histogram: %s\n", 669 qdist_avg(&hst.chain), hgram); 670 g_free(hgram); 671 } 672 673 struct tb_tree_stats { 674 size_t nb_tbs; 675 size_t host_size; 676 size_t target_size; 677 size_t max_target_size; 678 size_t direct_jmp_count; 679 size_t direct_jmp2_count; 680 size_t cross_page; 681 }; 682 683 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) 684 { 685 const TranslationBlock *tb = value; 686 struct tb_tree_stats *tst = data; 687 688 tst->nb_tbs++; 689 tst->host_size += tb->tc.size; 690 tst->target_size += tb->size; 691 if (tb->size > tst->max_target_size) { 692 tst->max_target_size = tb->size; 693 } 694 if (tb_page_addr1(tb) != -1) { 695 tst->cross_page++; 696 } 697 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { 698 tst->direct_jmp_count++; 699 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { 700 tst->direct_jmp2_count++; 701 } 702 } 703 return false; 704 } 705 706 void dump_exec_info(GString *buf) 707 { 708 struct tb_tree_stats tst = {}; 709 struct qht_stats hst; 710 size_t nb_tbs, flush_full, flush_part, flush_elide; 711 712 tcg_tb_foreach(tb_tree_stats_iter, &tst); 713 nb_tbs = tst.nb_tbs; 714 /* XXX: avoid using doubles ? */ 715 g_string_append_printf(buf, "Translation buffer state:\n"); 716 /* 717 * Report total code size including the padding and TB structs; 718 * otherwise users might think "-accel tcg,tb-size" is not honoured. 719 * For avg host size we use the precise numbers from tb_tree_stats though. 720 */ 721 g_string_append_printf(buf, "gen code size %zu/%zu\n", 722 tcg_code_size(), tcg_code_capacity()); 723 g_string_append_printf(buf, "TB count %zu\n", nb_tbs); 724 g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", 725 nb_tbs ? tst.target_size / nb_tbs : 0, 726 tst.max_target_size); 727 g_string_append_printf(buf, "TB avg host size %zu bytes " 728 "(expansion ratio: %0.1f)\n", 729 nb_tbs ? tst.host_size / nb_tbs : 0, 730 tst.target_size ? 731 (double)tst.host_size / tst.target_size : 0); 732 g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", 733 tst.cross_page, 734 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); 735 g_string_append_printf(buf, "direct jump count %zu (%zu%%) " 736 "(2 jumps=%zu %zu%%)\n", 737 tst.direct_jmp_count, 738 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, 739 tst.direct_jmp2_count, 740 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); 741 742 qht_statistics_init(&tb_ctx.htable, &hst); 743 print_qht_statistics(hst, buf); 744 qht_statistics_destroy(&hst); 745 746 g_string_append_printf(buf, "\nStatistics:\n"); 747 g_string_append_printf(buf, "TB flush count %u\n", 748 qatomic_read(&tb_ctx.tb_flush_count)); 749 g_string_append_printf(buf, "TB invalidate count %u\n", 750 qatomic_read(&tb_ctx.tb_phys_invalidate_count)); 751 752 tlb_flush_counts(&flush_full, &flush_part, &flush_elide); 753 g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); 754 g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); 755 g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); 756 tcg_dump_info(buf); 757 } 758 759 #else /* CONFIG_USER_ONLY */ 760 761 void cpu_interrupt(CPUState *cpu, int mask) 762 { 763 g_assert(qemu_mutex_iothread_locked()); 764 cpu->interrupt_request |= mask; 765 qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); 766 } 767 768 #endif /* CONFIG_USER_ONLY */ 769 770 /* 771 * Called by generic code at e.g. cpu reset after cpu creation, 772 * therefore we must be prepared to allocate the jump cache. 773 */ 774 void tcg_flush_jmp_cache(CPUState *cpu) 775 { 776 CPUJumpCache *jc = cpu->tb_jmp_cache; 777 778 /* During early initialization, the cache may not yet be allocated. */ 779 if (unlikely(jc == NULL)) { 780 return; 781 } 782 783 for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { 784 qatomic_set(&jc->array[i].tb, NULL); 785 } 786 } 787 788 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ 789 void tcg_flush_softmmu_tlb(CPUState *cs) 790 { 791 #ifdef CONFIG_SOFTMMU 792 tlb_flush(cs); 793 #endif 794 } 795