1 /* 2 * Host code generation 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 22 #define NO_CPU_IO_DEFS 23 #include "trace.h" 24 #include "disas/disas.h" 25 #include "exec/exec-all.h" 26 #include "tcg/tcg.h" 27 #if defined(CONFIG_USER_ONLY) 28 #include "qemu.h" 29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 30 #include <sys/param.h> 31 #if __FreeBSD_version >= 700104 32 #define HAVE_KINFO_GETVMMAP 33 #define sigqueue sigqueue_freebsd /* avoid redefinition */ 34 #include <sys/proc.h> 35 #include <machine/profile.h> 36 #define _KERNEL 37 #include <sys/user.h> 38 #undef _KERNEL 39 #undef sigqueue 40 #include <libutil.h> 41 #endif 42 #endif 43 #else 44 #include "exec/ram_addr.h" 45 #endif 46 47 #include "exec/cputlb.h" 48 #include "exec/translate-all.h" 49 #include "exec/translator.h" 50 #include "qemu/bitmap.h" 51 #include "qemu/qemu-print.h" 52 #include "qemu/timer.h" 53 #include "qemu/main-loop.h" 54 #include "qemu/cacheinfo.h" 55 #include "exec/log.h" 56 #include "sysemu/cpus.h" 57 #include "sysemu/cpu-timers.h" 58 #include "sysemu/tcg.h" 59 #include "qapi/error.h" 60 #include "hw/core/tcg-cpu-ops.h" 61 #include "tb-jmp-cache.h" 62 #include "tb-hash.h" 63 #include "tb-context.h" 64 #include "internal.h" 65 66 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */ 67 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS > 68 sizeof_field(TranslationBlock, trace_vcpu_dstate) 69 * BITS_PER_BYTE); 70 71 TBContext tb_ctx; 72 73 /* Encode VAL as a signed leb128 sequence at P. 74 Return P incremented past the encoded value. */ 75 static uint8_t *encode_sleb128(uint8_t *p, target_long val) 76 { 77 int more, byte; 78 79 do { 80 byte = val & 0x7f; 81 val >>= 7; 82 more = !((val == 0 && (byte & 0x40) == 0) 83 || (val == -1 && (byte & 0x40) != 0)); 84 if (more) { 85 byte |= 0x80; 86 } 87 *p++ = byte; 88 } while (more); 89 90 return p; 91 } 92 93 /* Decode a signed leb128 sequence at *PP; increment *PP past the 94 decoded value. Return the decoded value. */ 95 static target_long decode_sleb128(const uint8_t **pp) 96 { 97 const uint8_t *p = *pp; 98 target_long val = 0; 99 int byte, shift = 0; 100 101 do { 102 byte = *p++; 103 val |= (target_ulong)(byte & 0x7f) << shift; 104 shift += 7; 105 } while (byte & 0x80); 106 if (shift < TARGET_LONG_BITS && (byte & 0x40)) { 107 val |= -(target_ulong)1 << shift; 108 } 109 110 *pp = p; 111 return val; 112 } 113 114 /* Encode the data collected about the instructions while compiling TB. 115 Place the data at BLOCK, and return the number of bytes consumed. 116 117 The logical table consists of TARGET_INSN_START_WORDS target_ulong's, 118 which come from the target's insn_start data, followed by a uintptr_t 119 which comes from the host pc of the end of the code implementing the insn. 120 121 Each line of the table is encoded as sleb128 deltas from the previous 122 line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. 123 That is, the first column is seeded with the guest pc, the last column 124 with the host pc, and the middle columns with zeros. */ 125 126 static int encode_search(TranslationBlock *tb, uint8_t *block) 127 { 128 uint8_t *highwater = tcg_ctx->code_gen_highwater; 129 uint8_t *p = block; 130 int i, j, n; 131 132 for (i = 0, n = tb->icount; i < n; ++i) { 133 target_ulong prev; 134 135 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 136 if (i == 0) { 137 prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0); 138 } else { 139 prev = tcg_ctx->gen_insn_data[i - 1][j]; 140 } 141 p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev); 142 } 143 prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]); 144 p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev); 145 146 /* Test for (pending) buffer overflow. The assumption is that any 147 one row beginning below the high water mark cannot overrun 148 the buffer completely. Thus we can test for overflow after 149 encoding a row without having to check during encoding. */ 150 if (unlikely(p > highwater)) { 151 return -1; 152 } 153 } 154 155 return p - block; 156 } 157 158 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, 159 uint64_t *data) 160 { 161 uintptr_t iter_pc = (uintptr_t)tb->tc.ptr; 162 const uint8_t *p = tb->tc.ptr + tb->tc.size; 163 int i, j, num_insns = tb->icount; 164 165 host_pc -= GETPC_ADJ; 166 167 if (host_pc < iter_pc) { 168 return -1; 169 } 170 171 memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS); 172 if (!TARGET_TB_PCREL) { 173 data[0] = tb_pc(tb); 174 } 175 176 /* 177 * Reconstruct the stored insn data while looking for the point 178 * at which the end of the insn exceeds host_pc. 179 */ 180 for (i = 0; i < num_insns; ++i) { 181 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 182 data[j] += decode_sleb128(&p); 183 } 184 iter_pc += decode_sleb128(&p); 185 if (iter_pc > host_pc) { 186 return num_insns - i; 187 } 188 } 189 return -1; 190 } 191 192 /* 193 * The cpu state corresponding to 'host_pc' is restored in 194 * preparation for exiting the TB. 195 */ 196 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, 197 uintptr_t host_pc) 198 { 199 uint64_t data[TARGET_INSN_START_WORDS]; 200 #ifdef CONFIG_PROFILER 201 TCGProfile *prof = &tcg_ctx->prof; 202 int64_t ti = profile_getclock(); 203 #endif 204 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); 205 206 if (insns_left < 0) { 207 return; 208 } 209 210 if (tb_cflags(tb) & CF_USE_ICOUNT) { 211 assert(icount_enabled()); 212 /* 213 * Reset the cycle counter to the start of the block and 214 * shift if to the number of actually executed instructions. 215 */ 216 cpu_neg(cpu)->icount_decr.u16.low += insns_left; 217 } 218 219 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); 220 221 #ifdef CONFIG_PROFILER 222 qatomic_set(&prof->restore_time, 223 prof->restore_time + profile_getclock() - ti); 224 qatomic_set(&prof->restore_count, prof->restore_count + 1); 225 #endif 226 } 227 228 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) 229 { 230 /* 231 * The host_pc has to be in the rx region of the code buffer. 232 * If it is not we will not be able to resolve it here. 233 * The two cases where host_pc will not be correct are: 234 * 235 * - fault during translation (instruction fetch) 236 * - fault from helper (not using GETPC() macro) 237 * 238 * Either way we need return early as we can't resolve it here. 239 */ 240 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 241 TranslationBlock *tb = tcg_tb_lookup(host_pc); 242 if (tb) { 243 cpu_restore_state_from_tb(cpu, tb, host_pc); 244 return true; 245 } 246 } 247 return false; 248 } 249 250 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) 251 { 252 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 253 TranslationBlock *tb = tcg_tb_lookup(host_pc); 254 if (tb) { 255 return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0; 256 } 257 } 258 return false; 259 } 260 261 void page_init(void) 262 { 263 page_size_init(); 264 page_table_config_init(); 265 } 266 267 /* 268 * Isolate the portion of code gen which can setjmp/longjmp. 269 * Return the size of the generated code, or negative on error. 270 */ 271 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, 272 target_ulong pc, void *host_pc, 273 int *max_insns, int64_t *ti) 274 { 275 int ret = sigsetjmp(tcg_ctx->jmp_trans, 0); 276 if (unlikely(ret != 0)) { 277 return ret; 278 } 279 280 tcg_func_start(tcg_ctx); 281 282 tcg_ctx->cpu = env_cpu(env); 283 gen_intermediate_code(env_cpu(env), tb, *max_insns, pc, host_pc); 284 assert(tb->size != 0); 285 tcg_ctx->cpu = NULL; 286 *max_insns = tb->icount; 287 288 #ifdef CONFIG_PROFILER 289 qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1); 290 qatomic_set(&tcg_ctx->prof.interm_time, 291 tcg_ctx->prof.interm_time + profile_getclock() - *ti); 292 *ti = profile_getclock(); 293 #endif 294 295 return tcg_gen_code(tcg_ctx, tb, pc); 296 } 297 298 /* Called with mmap_lock held for user mode emulation. */ 299 TranslationBlock *tb_gen_code(CPUState *cpu, 300 target_ulong pc, target_ulong cs_base, 301 uint32_t flags, int cflags) 302 { 303 CPUArchState *env = cpu->env_ptr; 304 TranslationBlock *tb, *existing_tb; 305 tb_page_addr_t phys_pc; 306 tcg_insn_unit *gen_code_buf; 307 int gen_code_size, search_size, max_insns; 308 #ifdef CONFIG_PROFILER 309 TCGProfile *prof = &tcg_ctx->prof; 310 #endif 311 int64_t ti; 312 void *host_pc; 313 314 assert_memory_lock(); 315 qemu_thread_jit_write(); 316 317 phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); 318 319 if (phys_pc == -1) { 320 /* Generate a one-shot TB with 1 insn in it */ 321 cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1; 322 } 323 324 max_insns = cflags & CF_COUNT_MASK; 325 if (max_insns == 0) { 326 max_insns = TCG_MAX_INSNS; 327 } 328 QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); 329 330 buffer_overflow: 331 tb = tcg_tb_alloc(tcg_ctx); 332 if (unlikely(!tb)) { 333 /* flush must be done */ 334 tb_flush(cpu); 335 mmap_unlock(); 336 /* Make the execution loop process the flush as soon as possible. */ 337 cpu->exception_index = EXCP_INTERRUPT; 338 cpu_loop_exit(cpu); 339 } 340 341 gen_code_buf = tcg_ctx->code_gen_ptr; 342 tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); 343 #if !TARGET_TB_PCREL 344 tb->pc = pc; 345 #endif 346 tb->cs_base = cs_base; 347 tb->flags = flags; 348 tb->cflags = cflags; 349 tb->trace_vcpu_dstate = *cpu->trace_dstate; 350 tb_set_page_addr0(tb, phys_pc); 351 tb_set_page_addr1(tb, -1); 352 tcg_ctx->tb_cflags = cflags; 353 tb_overflow: 354 355 #ifdef CONFIG_PROFILER 356 /* includes aborted translations because of exceptions */ 357 qatomic_set(&prof->tb_count1, prof->tb_count1 + 1); 358 ti = profile_getclock(); 359 #endif 360 361 trace_translate_block(tb, pc, tb->tc.ptr); 362 363 gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti); 364 if (unlikely(gen_code_size < 0)) { 365 switch (gen_code_size) { 366 case -1: 367 /* 368 * Overflow of code_gen_buffer, or the current slice of it. 369 * 370 * TODO: We don't need to re-do gen_intermediate_code, nor 371 * should we re-do the tcg optimization currently hidden 372 * inside tcg_gen_code. All that should be required is to 373 * flush the TBs, allocate a new TB, re-initialize it per 374 * above, and re-do the actual code generation. 375 */ 376 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 377 "Restarting code generation for " 378 "code_gen_buffer overflow\n"); 379 goto buffer_overflow; 380 381 case -2: 382 /* 383 * The code generated for the TranslationBlock is too large. 384 * The maximum size allowed by the unwind info is 64k. 385 * There may be stricter constraints from relocations 386 * in the tcg backend. 387 * 388 * Try again with half as many insns as we attempted this time. 389 * If a single insn overflows, there's a bug somewhere... 390 */ 391 assert(max_insns > 1); 392 max_insns /= 2; 393 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 394 "Restarting code generation with " 395 "smaller translation block (max %d insns)\n", 396 max_insns); 397 goto tb_overflow; 398 399 default: 400 g_assert_not_reached(); 401 } 402 } 403 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); 404 if (unlikely(search_size < 0)) { 405 goto buffer_overflow; 406 } 407 tb->tc.size = gen_code_size; 408 409 #ifdef CONFIG_PROFILER 410 qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); 411 qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); 412 qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); 413 qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); 414 #endif 415 416 #ifdef DEBUG_DISAS 417 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && 418 qemu_log_in_addr_range(pc)) { 419 FILE *logfile = qemu_log_trylock(); 420 if (logfile) { 421 int code_size, data_size; 422 const tcg_target_ulong *rx_data_gen_ptr; 423 size_t chunk_start; 424 int insn = 0; 425 426 if (tcg_ctx->data_gen_ptr) { 427 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr); 428 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr; 429 data_size = gen_code_size - code_size; 430 } else { 431 rx_data_gen_ptr = 0; 432 code_size = gen_code_size; 433 data_size = 0; 434 } 435 436 /* Dump header and the first instruction */ 437 fprintf(logfile, "OUT: [size=%d]\n", gen_code_size); 438 fprintf(logfile, 439 " -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n", 440 tcg_ctx->gen_insn_data[insn][0]); 441 chunk_start = tcg_ctx->gen_insn_end_off[insn]; 442 disas(logfile, tb->tc.ptr, chunk_start); 443 444 /* 445 * Dump each instruction chunk, wrapping up empty chunks into 446 * the next instruction. The whole array is offset so the 447 * first entry is the beginning of the 2nd instruction. 448 */ 449 while (insn < tb->icount) { 450 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn]; 451 if (chunk_end > chunk_start) { 452 fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n", 453 tcg_ctx->gen_insn_data[insn][0]); 454 disas(logfile, tb->tc.ptr + chunk_start, 455 chunk_end - chunk_start); 456 chunk_start = chunk_end; 457 } 458 insn++; 459 } 460 461 if (chunk_start < code_size) { 462 fprintf(logfile, " -- tb slow paths + alignment\n"); 463 disas(logfile, tb->tc.ptr + chunk_start, 464 code_size - chunk_start); 465 } 466 467 /* Finally dump any data we may have after the block */ 468 if (data_size) { 469 int i; 470 fprintf(logfile, " data: [size=%d]\n", data_size); 471 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { 472 if (sizeof(tcg_target_ulong) == 8) { 473 fprintf(logfile, 474 "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", 475 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 476 } else if (sizeof(tcg_target_ulong) == 4) { 477 fprintf(logfile, 478 "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", 479 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 480 } else { 481 qemu_build_not_reached(); 482 } 483 } 484 } 485 fprintf(logfile, "\n"); 486 qemu_log_unlock(logfile); 487 } 488 } 489 #endif 490 491 qatomic_set(&tcg_ctx->code_gen_ptr, (void *) 492 ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, 493 CODE_GEN_ALIGN)); 494 495 /* init jump list */ 496 qemu_spin_init(&tb->jmp_lock); 497 tb->jmp_list_head = (uintptr_t)NULL; 498 tb->jmp_list_next[0] = (uintptr_t)NULL; 499 tb->jmp_list_next[1] = (uintptr_t)NULL; 500 tb->jmp_dest[0] = (uintptr_t)NULL; 501 tb->jmp_dest[1] = (uintptr_t)NULL; 502 503 /* init original jump addresses which have been set during tcg_gen_code() */ 504 if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { 505 tb_reset_jump(tb, 0); 506 } 507 if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { 508 tb_reset_jump(tb, 1); 509 } 510 511 /* 512 * If the TB is not associated with a physical RAM page then it must be 513 * a temporary one-insn TB, and we have nothing left to do. Return early 514 * before attempting to link to other TBs or add to the lookup table. 515 */ 516 if (tb_page_addr0(tb) == -1) { 517 return tb; 518 } 519 520 /* 521 * Insert TB into the corresponding region tree before publishing it 522 * through QHT. Otherwise rewinding happened in the TB might fail to 523 * lookup itself using host PC. 524 */ 525 tcg_tb_insert(tb); 526 527 /* 528 * No explicit memory barrier is required -- tb_link_page() makes the 529 * TB visible in a consistent state. 530 */ 531 existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb)); 532 /* if the TB already exists, discard what we just translated */ 533 if (unlikely(existing_tb != tb)) { 534 uintptr_t orig_aligned = (uintptr_t)gen_code_buf; 535 536 orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); 537 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned); 538 tcg_tb_remove(tb); 539 return existing_tb; 540 } 541 return tb; 542 } 543 544 /* user-mode: call with mmap_lock held */ 545 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) 546 { 547 TranslationBlock *tb; 548 549 assert_memory_lock(); 550 551 tb = tcg_tb_lookup(retaddr); 552 if (tb) { 553 /* We can use retranslation to find the PC. */ 554 cpu_restore_state_from_tb(cpu, tb, retaddr); 555 tb_phys_invalidate(tb, -1); 556 } else { 557 /* The exception probably happened in a helper. The CPU state should 558 have been saved before calling it. Fetch the PC from there. */ 559 CPUArchState *env = cpu->env_ptr; 560 target_ulong pc, cs_base; 561 tb_page_addr_t addr; 562 uint32_t flags; 563 564 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); 565 addr = get_page_addr_code(env, pc); 566 if (addr != -1) { 567 tb_invalidate_phys_range(addr, addr + 1); 568 } 569 } 570 } 571 572 #ifndef CONFIG_USER_ONLY 573 /* 574 * In deterministic execution mode, instructions doing device I/Os 575 * must be at the end of the TB. 576 * 577 * Called by softmmu_template.h, with iothread mutex not held. 578 */ 579 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) 580 { 581 TranslationBlock *tb; 582 CPUClass *cc; 583 uint32_t n; 584 585 tb = tcg_tb_lookup(retaddr); 586 if (!tb) { 587 cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", 588 (void *)retaddr); 589 } 590 cpu_restore_state_from_tb(cpu, tb, retaddr); 591 592 /* 593 * Some guests must re-execute the branch when re-executing a delay 594 * slot instruction. When this is the case, adjust icount and N 595 * to account for the re-execution of the branch. 596 */ 597 n = 1; 598 cc = CPU_GET_CLASS(cpu); 599 if (cc->tcg_ops->io_recompile_replay_branch && 600 cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { 601 cpu_neg(cpu)->icount_decr.u16.low++; 602 n = 2; 603 } 604 605 /* 606 * Exit the loop and potentially generate a new TB executing the 607 * just the I/O insns. We also limit instrumentation to memory 608 * operations only (which execute after completion) so we don't 609 * double instrument the instruction. 610 */ 611 cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; 612 613 if (qemu_loglevel_mask(CPU_LOG_EXEC)) { 614 target_ulong pc = log_pc(cpu, tb); 615 if (qemu_log_in_addr_range(pc)) { 616 qemu_log("cpu_io_recompile: rewound execution of TB to " 617 TARGET_FMT_lx "\n", pc); 618 } 619 } 620 621 cpu_loop_exit_noexc(cpu); 622 } 623 624 static void print_qht_statistics(struct qht_stats hst, GString *buf) 625 { 626 uint32_t hgram_opts; 627 size_t hgram_bins; 628 char *hgram; 629 630 if (!hst.head_buckets) { 631 return; 632 } 633 g_string_append_printf(buf, "TB hash buckets %zu/%zu " 634 "(%0.2f%% head buckets used)\n", 635 hst.used_head_buckets, hst.head_buckets, 636 (double)hst.used_head_buckets / 637 hst.head_buckets * 100); 638 639 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 640 hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; 641 if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { 642 hgram_opts |= QDIST_PR_NODECIMAL; 643 } 644 hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); 645 g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " 646 "Histogram: %s\n", 647 qdist_avg(&hst.occupancy) * 100, hgram); 648 g_free(hgram); 649 650 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 651 hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); 652 if (hgram_bins > 10) { 653 hgram_bins = 10; 654 } else { 655 hgram_bins = 0; 656 hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; 657 } 658 hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); 659 g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " 660 "Histogram: %s\n", 661 qdist_avg(&hst.chain), hgram); 662 g_free(hgram); 663 } 664 665 struct tb_tree_stats { 666 size_t nb_tbs; 667 size_t host_size; 668 size_t target_size; 669 size_t max_target_size; 670 size_t direct_jmp_count; 671 size_t direct_jmp2_count; 672 size_t cross_page; 673 }; 674 675 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) 676 { 677 const TranslationBlock *tb = value; 678 struct tb_tree_stats *tst = data; 679 680 tst->nb_tbs++; 681 tst->host_size += tb->tc.size; 682 tst->target_size += tb->size; 683 if (tb->size > tst->max_target_size) { 684 tst->max_target_size = tb->size; 685 } 686 if (tb_page_addr1(tb) != -1) { 687 tst->cross_page++; 688 } 689 if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { 690 tst->direct_jmp_count++; 691 if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { 692 tst->direct_jmp2_count++; 693 } 694 } 695 return false; 696 } 697 698 void dump_exec_info(GString *buf) 699 { 700 struct tb_tree_stats tst = {}; 701 struct qht_stats hst; 702 size_t nb_tbs, flush_full, flush_part, flush_elide; 703 704 tcg_tb_foreach(tb_tree_stats_iter, &tst); 705 nb_tbs = tst.nb_tbs; 706 /* XXX: avoid using doubles ? */ 707 g_string_append_printf(buf, "Translation buffer state:\n"); 708 /* 709 * Report total code size including the padding and TB structs; 710 * otherwise users might think "-accel tcg,tb-size" is not honoured. 711 * For avg host size we use the precise numbers from tb_tree_stats though. 712 */ 713 g_string_append_printf(buf, "gen code size %zu/%zu\n", 714 tcg_code_size(), tcg_code_capacity()); 715 g_string_append_printf(buf, "TB count %zu\n", nb_tbs); 716 g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", 717 nb_tbs ? tst.target_size / nb_tbs : 0, 718 tst.max_target_size); 719 g_string_append_printf(buf, "TB avg host size %zu bytes " 720 "(expansion ratio: %0.1f)\n", 721 nb_tbs ? tst.host_size / nb_tbs : 0, 722 tst.target_size ? 723 (double)tst.host_size / tst.target_size : 0); 724 g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", 725 tst.cross_page, 726 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); 727 g_string_append_printf(buf, "direct jump count %zu (%zu%%) " 728 "(2 jumps=%zu %zu%%)\n", 729 tst.direct_jmp_count, 730 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, 731 tst.direct_jmp2_count, 732 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); 733 734 qht_statistics_init(&tb_ctx.htable, &hst); 735 print_qht_statistics(hst, buf); 736 qht_statistics_destroy(&hst); 737 738 g_string_append_printf(buf, "\nStatistics:\n"); 739 g_string_append_printf(buf, "TB flush count %u\n", 740 qatomic_read(&tb_ctx.tb_flush_count)); 741 g_string_append_printf(buf, "TB invalidate count %u\n", 742 qatomic_read(&tb_ctx.tb_phys_invalidate_count)); 743 744 tlb_flush_counts(&flush_full, &flush_part, &flush_elide); 745 g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); 746 g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); 747 g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); 748 tcg_dump_info(buf); 749 } 750 751 #else /* CONFIG_USER_ONLY */ 752 753 void cpu_interrupt(CPUState *cpu, int mask) 754 { 755 g_assert(qemu_mutex_iothread_locked()); 756 cpu->interrupt_request |= mask; 757 qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); 758 } 759 760 #endif /* CONFIG_USER_ONLY */ 761 762 /* 763 * Called by generic code at e.g. cpu reset after cpu creation, 764 * therefore we must be prepared to allocate the jump cache. 765 */ 766 void tcg_flush_jmp_cache(CPUState *cpu) 767 { 768 CPUJumpCache *jc = cpu->tb_jmp_cache; 769 770 /* During early initialization, the cache may not yet be allocated. */ 771 if (unlikely(jc == NULL)) { 772 return; 773 } 774 775 for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { 776 qatomic_set(&jc->array[i].tb, NULL); 777 } 778 } 779 780 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ 781 void tcg_flush_softmmu_tlb(CPUState *cs) 782 { 783 #ifdef CONFIG_SOFTMMU 784 tlb_flush(cs); 785 #endif 786 } 787