1 /* 2 * Host code generation 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 22 #define NO_CPU_IO_DEFS 23 #include "trace.h" 24 #include "disas/disas.h" 25 #include "exec/exec-all.h" 26 #include "tcg/tcg.h" 27 #if defined(CONFIG_USER_ONLY) 28 #include "qemu.h" 29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 30 #include <sys/param.h> 31 #if __FreeBSD_version >= 700104 32 #define HAVE_KINFO_GETVMMAP 33 #define sigqueue sigqueue_freebsd /* avoid redefinition */ 34 #include <sys/proc.h> 35 #include <machine/profile.h> 36 #define _KERNEL 37 #include <sys/user.h> 38 #undef _KERNEL 39 #undef sigqueue 40 #include <libutil.h> 41 #endif 42 #endif 43 #else 44 #include "exec/ram_addr.h" 45 #endif 46 47 #include "exec/cputlb.h" 48 #include "exec/translate-all.h" 49 #include "exec/translator.h" 50 #include "qemu/bitmap.h" 51 #include "qemu/qemu-print.h" 52 #include "qemu/timer.h" 53 #include "qemu/main-loop.h" 54 #include "qemu/cacheinfo.h" 55 #include "exec/log.h" 56 #include "sysemu/cpus.h" 57 #include "sysemu/cpu-timers.h" 58 #include "sysemu/tcg.h" 59 #include "qapi/error.h" 60 #include "hw/core/tcg-cpu-ops.h" 61 #include "tb-jmp-cache.h" 62 #include "tb-hash.h" 63 #include "tb-context.h" 64 #include "internal.h" 65 #include "perf.h" 66 67 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */ 68 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS > 69 sizeof_field(TranslationBlock, trace_vcpu_dstate) 70 * BITS_PER_BYTE); 71 72 TBContext tb_ctx; 73 74 /* Encode VAL as a signed leb128 sequence at P. 75 Return P incremented past the encoded value. */ 76 static uint8_t *encode_sleb128(uint8_t *p, target_long val) 77 { 78 int more, byte; 79 80 do { 81 byte = val & 0x7f; 82 val >>= 7; 83 more = !((val == 0 && (byte & 0x40) == 0) 84 || (val == -1 && (byte & 0x40) != 0)); 85 if (more) { 86 byte |= 0x80; 87 } 88 *p++ = byte; 89 } while (more); 90 91 return p; 92 } 93 94 /* Decode a signed leb128 sequence at *PP; increment *PP past the 95 decoded value. Return the decoded value. */ 96 static target_long decode_sleb128(const uint8_t **pp) 97 { 98 const uint8_t *p = *pp; 99 target_long val = 0; 100 int byte, shift = 0; 101 102 do { 103 byte = *p++; 104 val |= (target_ulong)(byte & 0x7f) << shift; 105 shift += 7; 106 } while (byte & 0x80); 107 if (shift < TARGET_LONG_BITS && (byte & 0x40)) { 108 val |= -(target_ulong)1 << shift; 109 } 110 111 *pp = p; 112 return val; 113 } 114 115 /* Encode the data collected about the instructions while compiling TB. 116 Place the data at BLOCK, and return the number of bytes consumed. 117 118 The logical table consists of TARGET_INSN_START_WORDS target_ulong's, 119 which come from the target's insn_start data, followed by a uintptr_t 120 which comes from the host pc of the end of the code implementing the insn. 121 122 Each line of the table is encoded as sleb128 deltas from the previous 123 line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. 124 That is, the first column is seeded with the guest pc, the last column 125 with the host pc, and the middle columns with zeros. */ 126 127 static int encode_search(TranslationBlock *tb, uint8_t *block) 128 { 129 uint8_t *highwater = tcg_ctx->code_gen_highwater; 130 uint8_t *p = block; 131 int i, j, n; 132 133 for (i = 0, n = tb->icount; i < n; ++i) { 134 target_ulong prev; 135 136 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 137 if (i == 0) { 138 prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0); 139 } else { 140 prev = tcg_ctx->gen_insn_data[i - 1][j]; 141 } 142 p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev); 143 } 144 prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]); 145 p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev); 146 147 /* Test for (pending) buffer overflow. The assumption is that any 148 one row beginning below the high water mark cannot overrun 149 the buffer completely. Thus we can test for overflow after 150 encoding a row without having to check during encoding. */ 151 if (unlikely(p > highwater)) { 152 return -1; 153 } 154 } 155 156 return p - block; 157 } 158 159 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, 160 uint64_t *data) 161 { 162 uintptr_t iter_pc = (uintptr_t)tb->tc.ptr; 163 const uint8_t *p = tb->tc.ptr + tb->tc.size; 164 int i, j, num_insns = tb->icount; 165 166 host_pc -= GETPC_ADJ; 167 168 if (host_pc < iter_pc) { 169 return -1; 170 } 171 172 memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS); 173 if (!TARGET_TB_PCREL) { 174 data[0] = tb_pc(tb); 175 } 176 177 /* 178 * Reconstruct the stored insn data while looking for the point 179 * at which the end of the insn exceeds host_pc. 180 */ 181 for (i = 0; i < num_insns; ++i) { 182 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 183 data[j] += decode_sleb128(&p); 184 } 185 iter_pc += decode_sleb128(&p); 186 if (iter_pc > host_pc) { 187 return num_insns - i; 188 } 189 } 190 return -1; 191 } 192 193 /* 194 * The cpu state corresponding to 'host_pc' is restored in 195 * preparation for exiting the TB. 196 */ 197 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, 198 uintptr_t host_pc) 199 { 200 uint64_t data[TARGET_INSN_START_WORDS]; 201 #ifdef CONFIG_PROFILER 202 TCGProfile *prof = &tcg_ctx->prof; 203 int64_t ti = profile_getclock(); 204 #endif 205 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); 206 207 if (insns_left < 0) { 208 return; 209 } 210 211 if (tb_cflags(tb) & CF_USE_ICOUNT) { 212 assert(icount_enabled()); 213 /* 214 * Reset the cycle counter to the start of the block and 215 * shift if to the number of actually executed instructions. 216 */ 217 cpu_neg(cpu)->icount_decr.u16.low += insns_left; 218 } 219 220 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); 221 222 #ifdef CONFIG_PROFILER 223 qatomic_set(&prof->restore_time, 224 prof->restore_time + profile_getclock() - ti); 225 qatomic_set(&prof->restore_count, prof->restore_count + 1); 226 #endif 227 } 228 229 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) 230 { 231 /* 232 * The host_pc has to be in the rx region of the code buffer. 233 * If it is not we will not be able to resolve it here. 234 * The two cases where host_pc will not be correct are: 235 * 236 * - fault during translation (instruction fetch) 237 * - fault from helper (not using GETPC() macro) 238 * 239 * Either way we need return early as we can't resolve it here. 240 */ 241 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 242 TranslationBlock *tb = tcg_tb_lookup(host_pc); 243 if (tb) { 244 cpu_restore_state_from_tb(cpu, tb, host_pc); 245 return true; 246 } 247 } 248 return false; 249 } 250 251 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) 252 { 253 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 254 TranslationBlock *tb = tcg_tb_lookup(host_pc); 255 if (tb) { 256 return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0; 257 } 258 } 259 return false; 260 } 261 262 void page_init(void) 263 { 264 page_size_init(); 265 page_table_config_init(); 266 } 267 268 /* 269 * Isolate the portion of code gen which can setjmp/longjmp. 270 * Return the size of the generated code, or negative on error. 271 */ 272 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, 273 target_ulong pc, void *host_pc, 274 int *max_insns, int64_t *ti) 275 { 276 int ret = sigsetjmp(tcg_ctx->jmp_trans, 0); 277 if (unlikely(ret != 0)) { 278 return ret; 279 } 280 281 tcg_func_start(tcg_ctx); 282 283 tcg_ctx->cpu = env_cpu(env); 284 gen_intermediate_code(env_cpu(env), tb, *max_insns, pc, host_pc); 285 assert(tb->size != 0); 286 tcg_ctx->cpu = NULL; 287 *max_insns = tb->icount; 288 289 #ifdef CONFIG_PROFILER 290 qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1); 291 qatomic_set(&tcg_ctx->prof.interm_time, 292 tcg_ctx->prof.interm_time + profile_getclock() - *ti); 293 *ti = profile_getclock(); 294 #endif 295 296 return tcg_gen_code(tcg_ctx, tb, pc); 297 } 298 299 /* Called with mmap_lock held for user mode emulation. */ 300 TranslationBlock *tb_gen_code(CPUState *cpu, 301 target_ulong pc, target_ulong cs_base, 302 uint32_t flags, int cflags) 303 { 304 CPUArchState *env = cpu->env_ptr; 305 TranslationBlock *tb, *existing_tb; 306 tb_page_addr_t phys_pc; 307 tcg_insn_unit *gen_code_buf; 308 int gen_code_size, search_size, max_insns; 309 #ifdef CONFIG_PROFILER 310 TCGProfile *prof = &tcg_ctx->prof; 311 #endif 312 int64_t ti; 313 void *host_pc; 314 315 assert_memory_lock(); 316 qemu_thread_jit_write(); 317 318 phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); 319 320 if (phys_pc == -1) { 321 /* Generate a one-shot TB with 1 insn in it */ 322 cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1; 323 } 324 325 max_insns = cflags & CF_COUNT_MASK; 326 if (max_insns == 0) { 327 max_insns = TCG_MAX_INSNS; 328 } 329 QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); 330 331 buffer_overflow: 332 tb = tcg_tb_alloc(tcg_ctx); 333 if (unlikely(!tb)) { 334 /* flush must be done */ 335 tb_flush(cpu); 336 mmap_unlock(); 337 /* Make the execution loop process the flush as soon as possible. */ 338 cpu->exception_index = EXCP_INTERRUPT; 339 cpu_loop_exit(cpu); 340 } 341 342 gen_code_buf = tcg_ctx->code_gen_ptr; 343 tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); 344 #if !TARGET_TB_PCREL 345 tb->pc = pc; 346 #endif 347 tb->cs_base = cs_base; 348 tb->flags = flags; 349 tb->cflags = cflags; 350 tb->trace_vcpu_dstate = *cpu->trace_dstate; 351 tb_set_page_addr0(tb, phys_pc); 352 tb_set_page_addr1(tb, -1); 353 tcg_ctx->gen_tb = tb; 354 tb_overflow: 355 356 #ifdef CONFIG_PROFILER 357 /* includes aborted translations because of exceptions */ 358 qatomic_set(&prof->tb_count1, prof->tb_count1 + 1); 359 ti = profile_getclock(); 360 #endif 361 362 trace_translate_block(tb, pc, tb->tc.ptr); 363 364 gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti); 365 if (unlikely(gen_code_size < 0)) { 366 switch (gen_code_size) { 367 case -1: 368 /* 369 * Overflow of code_gen_buffer, or the current slice of it. 370 * 371 * TODO: We don't need to re-do gen_intermediate_code, nor 372 * should we re-do the tcg optimization currently hidden 373 * inside tcg_gen_code. All that should be required is to 374 * flush the TBs, allocate a new TB, re-initialize it per 375 * above, and re-do the actual code generation. 376 */ 377 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 378 "Restarting code generation for " 379 "code_gen_buffer overflow\n"); 380 goto buffer_overflow; 381 382 case -2: 383 /* 384 * The code generated for the TranslationBlock is too large. 385 * The maximum size allowed by the unwind info is 64k. 386 * There may be stricter constraints from relocations 387 * in the tcg backend. 388 * 389 * Try again with half as many insns as we attempted this time. 390 * If a single insn overflows, there's a bug somewhere... 391 */ 392 assert(max_insns > 1); 393 max_insns /= 2; 394 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 395 "Restarting code generation with " 396 "smaller translation block (max %d insns)\n", 397 max_insns); 398 goto tb_overflow; 399 400 default: 401 g_assert_not_reached(); 402 } 403 } 404 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); 405 if (unlikely(search_size < 0)) { 406 goto buffer_overflow; 407 } 408 tb->tc.size = gen_code_size; 409 410 /* 411 * For TARGET_TB_PCREL, attribute all executions of the generated 412 * code to its first mapping. 413 */ 414 perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf)); 415 416 #ifdef CONFIG_PROFILER 417 qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); 418 qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); 419 qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); 420 qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); 421 #endif 422 423 #ifdef DEBUG_DISAS 424 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && 425 qemu_log_in_addr_range(pc)) { 426 FILE *logfile = qemu_log_trylock(); 427 if (logfile) { 428 int code_size, data_size; 429 const tcg_target_ulong *rx_data_gen_ptr; 430 size_t chunk_start; 431 int insn = 0; 432 433 if (tcg_ctx->data_gen_ptr) { 434 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr); 435 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr; 436 data_size = gen_code_size - code_size; 437 } else { 438 rx_data_gen_ptr = 0; 439 code_size = gen_code_size; 440 data_size = 0; 441 } 442 443 /* Dump header and the first instruction */ 444 fprintf(logfile, "OUT: [size=%d]\n", gen_code_size); 445 fprintf(logfile, 446 " -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n", 447 tcg_ctx->gen_insn_data[insn][0]); 448 chunk_start = tcg_ctx->gen_insn_end_off[insn]; 449 disas(logfile, tb->tc.ptr, chunk_start); 450 451 /* 452 * Dump each instruction chunk, wrapping up empty chunks into 453 * the next instruction. The whole array is offset so the 454 * first entry is the beginning of the 2nd instruction. 455 */ 456 while (insn < tb->icount) { 457 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn]; 458 if (chunk_end > chunk_start) { 459 fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n", 460 tcg_ctx->gen_insn_data[insn][0]); 461 disas(logfile, tb->tc.ptr + chunk_start, 462 chunk_end - chunk_start); 463 chunk_start = chunk_end; 464 } 465 insn++; 466 } 467 468 if (chunk_start < code_size) { 469 fprintf(logfile, " -- tb slow paths + alignment\n"); 470 disas(logfile, tb->tc.ptr + chunk_start, 471 code_size - chunk_start); 472 } 473 474 /* Finally dump any data we may have after the block */ 475 if (data_size) { 476 int i; 477 fprintf(logfile, " data: [size=%d]\n", data_size); 478 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { 479 if (sizeof(tcg_target_ulong) == 8) { 480 fprintf(logfile, 481 "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", 482 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 483 } else if (sizeof(tcg_target_ulong) == 4) { 484 fprintf(logfile, 485 "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", 486 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 487 } else { 488 qemu_build_not_reached(); 489 } 490 } 491 } 492 fprintf(logfile, "\n"); 493 qemu_log_unlock(logfile); 494 } 495 } 496 #endif 497 498 qatomic_set(&tcg_ctx->code_gen_ptr, (void *) 499 ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, 500 CODE_GEN_ALIGN)); 501 502 /* init jump list */ 503 qemu_spin_init(&tb->jmp_lock); 504 tb->jmp_list_head = (uintptr_t)NULL; 505 tb->jmp_list_next[0] = (uintptr_t)NULL; 506 tb->jmp_list_next[1] = (uintptr_t)NULL; 507 tb->jmp_dest[0] = (uintptr_t)NULL; 508 tb->jmp_dest[1] = (uintptr_t)NULL; 509 510 /* init original jump addresses which have been set during tcg_gen_code() */ 511 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { 512 tb_reset_jump(tb, 0); 513 } 514 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { 515 tb_reset_jump(tb, 1); 516 } 517 518 /* 519 * If the TB is not associated with a physical RAM page then it must be 520 * a temporary one-insn TB, and we have nothing left to do. Return early 521 * before attempting to link to other TBs or add to the lookup table. 522 */ 523 if (tb_page_addr0(tb) == -1) { 524 return tb; 525 } 526 527 /* 528 * Insert TB into the corresponding region tree before publishing it 529 * through QHT. Otherwise rewinding happened in the TB might fail to 530 * lookup itself using host PC. 531 */ 532 tcg_tb_insert(tb); 533 534 /* 535 * No explicit memory barrier is required -- tb_link_page() makes the 536 * TB visible in a consistent state. 537 */ 538 existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb)); 539 /* if the TB already exists, discard what we just translated */ 540 if (unlikely(existing_tb != tb)) { 541 uintptr_t orig_aligned = (uintptr_t)gen_code_buf; 542 543 orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); 544 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned); 545 tcg_tb_remove(tb); 546 return existing_tb; 547 } 548 return tb; 549 } 550 551 /* user-mode: call with mmap_lock held */ 552 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) 553 { 554 TranslationBlock *tb; 555 556 assert_memory_lock(); 557 558 tb = tcg_tb_lookup(retaddr); 559 if (tb) { 560 /* We can use retranslation to find the PC. */ 561 cpu_restore_state_from_tb(cpu, tb, retaddr); 562 tb_phys_invalidate(tb, -1); 563 } else { 564 /* The exception probably happened in a helper. The CPU state should 565 have been saved before calling it. Fetch the PC from there. */ 566 CPUArchState *env = cpu->env_ptr; 567 target_ulong pc, cs_base; 568 tb_page_addr_t addr; 569 uint32_t flags; 570 571 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); 572 addr = get_page_addr_code(env, pc); 573 if (addr != -1) { 574 tb_invalidate_phys_range(addr, addr + 1); 575 } 576 } 577 } 578 579 #ifndef CONFIG_USER_ONLY 580 /* 581 * In deterministic execution mode, instructions doing device I/Os 582 * must be at the end of the TB. 583 * 584 * Called by softmmu_template.h, with iothread mutex not held. 585 */ 586 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) 587 { 588 TranslationBlock *tb; 589 CPUClass *cc; 590 uint32_t n; 591 592 tb = tcg_tb_lookup(retaddr); 593 if (!tb) { 594 cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", 595 (void *)retaddr); 596 } 597 cpu_restore_state_from_tb(cpu, tb, retaddr); 598 599 /* 600 * Some guests must re-execute the branch when re-executing a delay 601 * slot instruction. When this is the case, adjust icount and N 602 * to account for the re-execution of the branch. 603 */ 604 n = 1; 605 cc = CPU_GET_CLASS(cpu); 606 if (cc->tcg_ops->io_recompile_replay_branch && 607 cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { 608 cpu_neg(cpu)->icount_decr.u16.low++; 609 n = 2; 610 } 611 612 /* 613 * Exit the loop and potentially generate a new TB executing the 614 * just the I/O insns. We also limit instrumentation to memory 615 * operations only (which execute after completion) so we don't 616 * double instrument the instruction. 617 */ 618 cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; 619 620 if (qemu_loglevel_mask(CPU_LOG_EXEC)) { 621 target_ulong pc = log_pc(cpu, tb); 622 if (qemu_log_in_addr_range(pc)) { 623 qemu_log("cpu_io_recompile: rewound execution of TB to " 624 TARGET_FMT_lx "\n", pc); 625 } 626 } 627 628 cpu_loop_exit_noexc(cpu); 629 } 630 631 static void print_qht_statistics(struct qht_stats hst, GString *buf) 632 { 633 uint32_t hgram_opts; 634 size_t hgram_bins; 635 char *hgram; 636 637 if (!hst.head_buckets) { 638 return; 639 } 640 g_string_append_printf(buf, "TB hash buckets %zu/%zu " 641 "(%0.2f%% head buckets used)\n", 642 hst.used_head_buckets, hst.head_buckets, 643 (double)hst.used_head_buckets / 644 hst.head_buckets * 100); 645 646 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 647 hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; 648 if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { 649 hgram_opts |= QDIST_PR_NODECIMAL; 650 } 651 hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); 652 g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " 653 "Histogram: %s\n", 654 qdist_avg(&hst.occupancy) * 100, hgram); 655 g_free(hgram); 656 657 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 658 hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); 659 if (hgram_bins > 10) { 660 hgram_bins = 10; 661 } else { 662 hgram_bins = 0; 663 hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; 664 } 665 hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); 666 g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " 667 "Histogram: %s\n", 668 qdist_avg(&hst.chain), hgram); 669 g_free(hgram); 670 } 671 672 struct tb_tree_stats { 673 size_t nb_tbs; 674 size_t host_size; 675 size_t target_size; 676 size_t max_target_size; 677 size_t direct_jmp_count; 678 size_t direct_jmp2_count; 679 size_t cross_page; 680 }; 681 682 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) 683 { 684 const TranslationBlock *tb = value; 685 struct tb_tree_stats *tst = data; 686 687 tst->nb_tbs++; 688 tst->host_size += tb->tc.size; 689 tst->target_size += tb->size; 690 if (tb->size > tst->max_target_size) { 691 tst->max_target_size = tb->size; 692 } 693 if (tb_page_addr1(tb) != -1) { 694 tst->cross_page++; 695 } 696 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { 697 tst->direct_jmp_count++; 698 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { 699 tst->direct_jmp2_count++; 700 } 701 } 702 return false; 703 } 704 705 void dump_exec_info(GString *buf) 706 { 707 struct tb_tree_stats tst = {}; 708 struct qht_stats hst; 709 size_t nb_tbs, flush_full, flush_part, flush_elide; 710 711 tcg_tb_foreach(tb_tree_stats_iter, &tst); 712 nb_tbs = tst.nb_tbs; 713 /* XXX: avoid using doubles ? */ 714 g_string_append_printf(buf, "Translation buffer state:\n"); 715 /* 716 * Report total code size including the padding and TB structs; 717 * otherwise users might think "-accel tcg,tb-size" is not honoured. 718 * For avg host size we use the precise numbers from tb_tree_stats though. 719 */ 720 g_string_append_printf(buf, "gen code size %zu/%zu\n", 721 tcg_code_size(), tcg_code_capacity()); 722 g_string_append_printf(buf, "TB count %zu\n", nb_tbs); 723 g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", 724 nb_tbs ? tst.target_size / nb_tbs : 0, 725 tst.max_target_size); 726 g_string_append_printf(buf, "TB avg host size %zu bytes " 727 "(expansion ratio: %0.1f)\n", 728 nb_tbs ? tst.host_size / nb_tbs : 0, 729 tst.target_size ? 730 (double)tst.host_size / tst.target_size : 0); 731 g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", 732 tst.cross_page, 733 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); 734 g_string_append_printf(buf, "direct jump count %zu (%zu%%) " 735 "(2 jumps=%zu %zu%%)\n", 736 tst.direct_jmp_count, 737 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, 738 tst.direct_jmp2_count, 739 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); 740 741 qht_statistics_init(&tb_ctx.htable, &hst); 742 print_qht_statistics(hst, buf); 743 qht_statistics_destroy(&hst); 744 745 g_string_append_printf(buf, "\nStatistics:\n"); 746 g_string_append_printf(buf, "TB flush count %u\n", 747 qatomic_read(&tb_ctx.tb_flush_count)); 748 g_string_append_printf(buf, "TB invalidate count %u\n", 749 qatomic_read(&tb_ctx.tb_phys_invalidate_count)); 750 751 tlb_flush_counts(&flush_full, &flush_part, &flush_elide); 752 g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); 753 g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); 754 g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); 755 tcg_dump_info(buf); 756 } 757 758 #else /* CONFIG_USER_ONLY */ 759 760 void cpu_interrupt(CPUState *cpu, int mask) 761 { 762 g_assert(qemu_mutex_iothread_locked()); 763 cpu->interrupt_request |= mask; 764 qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); 765 } 766 767 #endif /* CONFIG_USER_ONLY */ 768 769 /* 770 * Called by generic code at e.g. cpu reset after cpu creation, 771 * therefore we must be prepared to allocate the jump cache. 772 */ 773 void tcg_flush_jmp_cache(CPUState *cpu) 774 { 775 CPUJumpCache *jc = cpu->tb_jmp_cache; 776 777 /* During early initialization, the cache may not yet be allocated. */ 778 if (unlikely(jc == NULL)) { 779 return; 780 } 781 782 for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { 783 qatomic_set(&jc->array[i].tb, NULL); 784 } 785 } 786 787 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ 788 void tcg_flush_softmmu_tlb(CPUState *cs) 789 { 790 #ifdef CONFIG_SOFTMMU 791 tlb_flush(cs); 792 #endif 793 } 794