xref: /openbmc/qemu/accel/tcg/translate-all.c (revision 51e47cf8)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 
22 #define NO_CPU_IO_DEFS
23 #include "trace.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg.h"
27 #if defined(CONFIG_USER_ONLY)
28 #include "qemu.h"
29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
30 #include <sys/param.h>
31 #if __FreeBSD_version >= 700104
32 #define HAVE_KINFO_GETVMMAP
33 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
34 #include <sys/proc.h>
35 #include <machine/profile.h>
36 #define _KERNEL
37 #include <sys/user.h>
38 #undef _KERNEL
39 #undef sigqueue
40 #include <libutil.h>
41 #endif
42 #endif
43 #else
44 #include "exec/ram_addr.h"
45 #endif
46 
47 #include "exec/cputlb.h"
48 #include "exec/translate-all.h"
49 #include "exec/translator.h"
50 #include "exec/tb-flush.h"
51 #include "qemu/bitmap.h"
52 #include "qemu/qemu-print.h"
53 #include "qemu/main-loop.h"
54 #include "qemu/cacheinfo.h"
55 #include "qemu/timer.h"
56 #include "exec/log.h"
57 #include "sysemu/cpus.h"
58 #include "sysemu/cpu-timers.h"
59 #include "sysemu/tcg.h"
60 #include "qapi/error.h"
61 #include "hw/core/tcg-cpu-ops.h"
62 #include "tb-jmp-cache.h"
63 #include "tb-hash.h"
64 #include "tb-context.h"
65 #include "internal.h"
66 #include "perf.h"
67 
68 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
69 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
70                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
71                   * BITS_PER_BYTE);
72 
73 TBContext tb_ctx;
74 
75 /* Encode VAL as a signed leb128 sequence at P.
76    Return P incremented past the encoded value.  */
77 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
78 {
79     int more, byte;
80 
81     do {
82         byte = val & 0x7f;
83         val >>= 7;
84         more = !((val == 0 && (byte & 0x40) == 0)
85                  || (val == -1 && (byte & 0x40) != 0));
86         if (more) {
87             byte |= 0x80;
88         }
89         *p++ = byte;
90     } while (more);
91 
92     return p;
93 }
94 
95 /* Decode a signed leb128 sequence at *PP; increment *PP past the
96    decoded value.  Return the decoded value.  */
97 static target_long decode_sleb128(const uint8_t **pp)
98 {
99     const uint8_t *p = *pp;
100     target_long val = 0;
101     int byte, shift = 0;
102 
103     do {
104         byte = *p++;
105         val |= (target_ulong)(byte & 0x7f) << shift;
106         shift += 7;
107     } while (byte & 0x80);
108     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
109         val |= -(target_ulong)1 << shift;
110     }
111 
112     *pp = p;
113     return val;
114 }
115 
116 /* Encode the data collected about the instructions while compiling TB.
117    Place the data at BLOCK, and return the number of bytes consumed.
118 
119    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
120    which come from the target's insn_start data, followed by a uintptr_t
121    which comes from the host pc of the end of the code implementing the insn.
122 
123    Each line of the table is encoded as sleb128 deltas from the previous
124    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
125    That is, the first column is seeded with the guest pc, the last column
126    with the host pc, and the middle columns with zeros.  */
127 
128 static int encode_search(TranslationBlock *tb, uint8_t *block)
129 {
130     uint8_t *highwater = tcg_ctx->code_gen_highwater;
131     uint8_t *p = block;
132     int i, j, n;
133 
134     for (i = 0, n = tb->icount; i < n; ++i) {
135         target_ulong prev;
136 
137         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
138             if (i == 0) {
139                 prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
140             } else {
141                 prev = tcg_ctx->gen_insn_data[i - 1][j];
142             }
143             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
144         }
145         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
146         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
147 
148         /* Test for (pending) buffer overflow.  The assumption is that any
149            one row beginning below the high water mark cannot overrun
150            the buffer completely.  Thus we can test for overflow after
151            encoding a row without having to check during encoding.  */
152         if (unlikely(p > highwater)) {
153             return -1;
154         }
155     }
156 
157     return p - block;
158 }
159 
160 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
161                                    uint64_t *data)
162 {
163     uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
164     const uint8_t *p = tb->tc.ptr + tb->tc.size;
165     int i, j, num_insns = tb->icount;
166 
167     host_pc -= GETPC_ADJ;
168 
169     if (host_pc < iter_pc) {
170         return -1;
171     }
172 
173     memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
174     if (!(tb_cflags(tb) & CF_PCREL)) {
175         data[0] = tb->pc;
176     }
177 
178     /*
179      * Reconstruct the stored insn data while looking for the point
180      * at which the end of the insn exceeds host_pc.
181      */
182     for (i = 0; i < num_insns; ++i) {
183         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
184             data[j] += decode_sleb128(&p);
185         }
186         iter_pc += decode_sleb128(&p);
187         if (iter_pc > host_pc) {
188             return num_insns - i;
189         }
190     }
191     return -1;
192 }
193 
194 /*
195  * The cpu state corresponding to 'host_pc' is restored in
196  * preparation for exiting the TB.
197  */
198 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
199                                uintptr_t host_pc)
200 {
201     uint64_t data[TARGET_INSN_START_WORDS];
202 #ifdef CONFIG_PROFILER
203     TCGProfile *prof = &tcg_ctx->prof;
204     int64_t ti = profile_getclock();
205 #endif
206     int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
207 
208     if (insns_left < 0) {
209         return;
210     }
211 
212     if (tb_cflags(tb) & CF_USE_ICOUNT) {
213         assert(icount_enabled());
214         /*
215          * Reset the cycle counter to the start of the block and
216          * shift if to the number of actually executed instructions.
217          */
218         cpu_neg(cpu)->icount_decr.u16.low += insns_left;
219     }
220 
221     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
222 
223 #ifdef CONFIG_PROFILER
224     qatomic_set(&prof->restore_time,
225                 prof->restore_time + profile_getclock() - ti);
226     qatomic_set(&prof->restore_count, prof->restore_count + 1);
227 #endif
228 }
229 
230 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
231 {
232     /*
233      * The host_pc has to be in the rx region of the code buffer.
234      * If it is not we will not be able to resolve it here.
235      * The two cases where host_pc will not be correct are:
236      *
237      *  - fault during translation (instruction fetch)
238      *  - fault from helper (not using GETPC() macro)
239      *
240      * Either way we need return early as we can't resolve it here.
241      */
242     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
243         TranslationBlock *tb = tcg_tb_lookup(host_pc);
244         if (tb) {
245             cpu_restore_state_from_tb(cpu, tb, host_pc);
246             return true;
247         }
248     }
249     return false;
250 }
251 
252 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
253 {
254     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
255         TranslationBlock *tb = tcg_tb_lookup(host_pc);
256         if (tb) {
257             return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
258         }
259     }
260     return false;
261 }
262 
263 void page_init(void)
264 {
265     page_size_init();
266     page_table_config_init();
267 }
268 
269 /*
270  * Isolate the portion of code gen which can setjmp/longjmp.
271  * Return the size of the generated code, or negative on error.
272  */
273 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
274                            target_ulong pc, void *host_pc,
275                            int *max_insns, int64_t *ti)
276 {
277     int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
278     if (unlikely(ret != 0)) {
279         return ret;
280     }
281 
282     tcg_func_start(tcg_ctx);
283 
284     tcg_ctx->cpu = env_cpu(env);
285     gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
286     assert(tb->size != 0);
287     tcg_ctx->cpu = NULL;
288     *max_insns = tb->icount;
289 
290 #ifdef CONFIG_PROFILER
291     qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
292     qatomic_set(&tcg_ctx->prof.interm_time,
293                 tcg_ctx->prof.interm_time + profile_getclock() - *ti);
294     *ti = profile_getclock();
295 #endif
296 
297     return tcg_gen_code(tcg_ctx, tb, pc);
298 }
299 
300 /* Called with mmap_lock held for user mode emulation.  */
301 TranslationBlock *tb_gen_code(CPUState *cpu,
302                               target_ulong pc, target_ulong cs_base,
303                               uint32_t flags, int cflags)
304 {
305     CPUArchState *env = cpu->env_ptr;
306     TranslationBlock *tb, *existing_tb;
307     tb_page_addr_t phys_pc;
308     tcg_insn_unit *gen_code_buf;
309     int gen_code_size, search_size, max_insns;
310 #ifdef CONFIG_PROFILER
311     TCGProfile *prof = &tcg_ctx->prof;
312 #endif
313     int64_t ti;
314     void *host_pc;
315 
316     assert_memory_lock();
317     qemu_thread_jit_write();
318 
319     phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
320 
321     if (phys_pc == -1) {
322         /* Generate a one-shot TB with 1 insn in it */
323         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
324     }
325 
326     max_insns = cflags & CF_COUNT_MASK;
327     if (max_insns == 0) {
328         max_insns = TCG_MAX_INSNS;
329     }
330     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
331 
332  buffer_overflow:
333     tb = tcg_tb_alloc(tcg_ctx);
334     if (unlikely(!tb)) {
335         /* flush must be done */
336         tb_flush(cpu);
337         mmap_unlock();
338         /* Make the execution loop process the flush as soon as possible.  */
339         cpu->exception_index = EXCP_INTERRUPT;
340         cpu_loop_exit(cpu);
341     }
342 
343     gen_code_buf = tcg_ctx->code_gen_ptr;
344     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
345     if (!(cflags & CF_PCREL)) {
346         tb->pc = pc;
347     }
348     tb->cs_base = cs_base;
349     tb->flags = flags;
350     tb->cflags = cflags;
351     tb->trace_vcpu_dstate = *cpu->trace_dstate;
352     tb_set_page_addr0(tb, phys_pc);
353     tb_set_page_addr1(tb, -1);
354     tcg_ctx->gen_tb = tb;
355  tb_overflow:
356 
357 #ifdef CONFIG_PROFILER
358     /* includes aborted translations because of exceptions */
359     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
360     ti = profile_getclock();
361 #endif
362 
363     trace_translate_block(tb, pc, tb->tc.ptr);
364 
365     gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
366     if (unlikely(gen_code_size < 0)) {
367         switch (gen_code_size) {
368         case -1:
369             /*
370              * Overflow of code_gen_buffer, or the current slice of it.
371              *
372              * TODO: We don't need to re-do gen_intermediate_code, nor
373              * should we re-do the tcg optimization currently hidden
374              * inside tcg_gen_code.  All that should be required is to
375              * flush the TBs, allocate a new TB, re-initialize it per
376              * above, and re-do the actual code generation.
377              */
378             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
379                           "Restarting code generation for "
380                           "code_gen_buffer overflow\n");
381             goto buffer_overflow;
382 
383         case -2:
384             /*
385              * The code generated for the TranslationBlock is too large.
386              * The maximum size allowed by the unwind info is 64k.
387              * There may be stricter constraints from relocations
388              * in the tcg backend.
389              *
390              * Try again with half as many insns as we attempted this time.
391              * If a single insn overflows, there's a bug somewhere...
392              */
393             assert(max_insns > 1);
394             max_insns /= 2;
395             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
396                           "Restarting code generation with "
397                           "smaller translation block (max %d insns)\n",
398                           max_insns);
399             goto tb_overflow;
400 
401         default:
402             g_assert_not_reached();
403         }
404     }
405     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
406     if (unlikely(search_size < 0)) {
407         goto buffer_overflow;
408     }
409     tb->tc.size = gen_code_size;
410 
411     /*
412      * For CF_PCREL, attribute all executions of the generated code
413      * to its first mapping.
414      */
415     perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
416 
417 #ifdef CONFIG_PROFILER
418     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
419     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
420     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
421     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
422 #endif
423 
424 #ifdef DEBUG_DISAS
425     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
426         qemu_log_in_addr_range(pc)) {
427         FILE *logfile = qemu_log_trylock();
428         if (logfile) {
429             int code_size, data_size;
430             const tcg_target_ulong *rx_data_gen_ptr;
431             size_t chunk_start;
432             int insn = 0;
433 
434             if (tcg_ctx->data_gen_ptr) {
435                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
436                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
437                 data_size = gen_code_size - code_size;
438             } else {
439                 rx_data_gen_ptr = 0;
440                 code_size = gen_code_size;
441                 data_size = 0;
442             }
443 
444             /* Dump header and the first instruction */
445             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
446             fprintf(logfile,
447                     "  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
448                     tcg_ctx->gen_insn_data[insn][0]);
449             chunk_start = tcg_ctx->gen_insn_end_off[insn];
450             disas(logfile, tb->tc.ptr, chunk_start);
451 
452             /*
453              * Dump each instruction chunk, wrapping up empty chunks into
454              * the next instruction. The whole array is offset so the
455              * first entry is the beginning of the 2nd instruction.
456              */
457             while (insn < tb->icount) {
458                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
459                 if (chunk_end > chunk_start) {
460                     fprintf(logfile, "  -- guest addr 0x" TARGET_FMT_lx "\n",
461                             tcg_ctx->gen_insn_data[insn][0]);
462                     disas(logfile, tb->tc.ptr + chunk_start,
463                           chunk_end - chunk_start);
464                     chunk_start = chunk_end;
465                 }
466                 insn++;
467             }
468 
469             if (chunk_start < code_size) {
470                 fprintf(logfile, "  -- tb slow paths + alignment\n");
471                 disas(logfile, tb->tc.ptr + chunk_start,
472                       code_size - chunk_start);
473             }
474 
475             /* Finally dump any data we may have after the block */
476             if (data_size) {
477                 int i;
478                 fprintf(logfile, "  data: [size=%d]\n", data_size);
479                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
480                     if (sizeof(tcg_target_ulong) == 8) {
481                         fprintf(logfile,
482                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
483                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
484                     } else if (sizeof(tcg_target_ulong) == 4) {
485                         fprintf(logfile,
486                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
487                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
488                     } else {
489                         qemu_build_not_reached();
490                     }
491                 }
492             }
493             fprintf(logfile, "\n");
494             qemu_log_unlock(logfile);
495         }
496     }
497 #endif
498 
499     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
500         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
501                  CODE_GEN_ALIGN));
502 
503     /* init jump list */
504     qemu_spin_init(&tb->jmp_lock);
505     tb->jmp_list_head = (uintptr_t)NULL;
506     tb->jmp_list_next[0] = (uintptr_t)NULL;
507     tb->jmp_list_next[1] = (uintptr_t)NULL;
508     tb->jmp_dest[0] = (uintptr_t)NULL;
509     tb->jmp_dest[1] = (uintptr_t)NULL;
510 
511     /* init original jump addresses which have been set during tcg_gen_code() */
512     if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
513         tb_reset_jump(tb, 0);
514     }
515     if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
516         tb_reset_jump(tb, 1);
517     }
518 
519     /*
520      * If the TB is not associated with a physical RAM page then it must be
521      * a temporary one-insn TB, and we have nothing left to do. Return early
522      * before attempting to link to other TBs or add to the lookup table.
523      */
524     if (tb_page_addr0(tb) == -1) {
525         return tb;
526     }
527 
528     /*
529      * Insert TB into the corresponding region tree before publishing it
530      * through QHT. Otherwise rewinding happened in the TB might fail to
531      * lookup itself using host PC.
532      */
533     tcg_tb_insert(tb);
534 
535     /*
536      * No explicit memory barrier is required -- tb_link_page() makes the
537      * TB visible in a consistent state.
538      */
539     existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
540     /* if the TB already exists, discard what we just translated */
541     if (unlikely(existing_tb != tb)) {
542         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
543 
544         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
545         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
546         tcg_tb_remove(tb);
547         return existing_tb;
548     }
549     return tb;
550 }
551 
552 /* user-mode: call with mmap_lock held */
553 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
554 {
555     TranslationBlock *tb;
556 
557     assert_memory_lock();
558 
559     tb = tcg_tb_lookup(retaddr);
560     if (tb) {
561         /* We can use retranslation to find the PC.  */
562         cpu_restore_state_from_tb(cpu, tb, retaddr);
563         tb_phys_invalidate(tb, -1);
564     } else {
565         /* The exception probably happened in a helper.  The CPU state should
566            have been saved before calling it. Fetch the PC from there.  */
567         CPUArchState *env = cpu->env_ptr;
568         target_ulong pc, cs_base;
569         tb_page_addr_t addr;
570         uint32_t flags;
571 
572         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
573         addr = get_page_addr_code(env, pc);
574         if (addr != -1) {
575             tb_invalidate_phys_range(addr, addr);
576         }
577     }
578 }
579 
580 #ifndef CONFIG_USER_ONLY
581 /*
582  * In deterministic execution mode, instructions doing device I/Os
583  * must be at the end of the TB.
584  *
585  * Called by softmmu_template.h, with iothread mutex not held.
586  */
587 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
588 {
589     TranslationBlock *tb;
590     CPUClass *cc;
591     uint32_t n;
592 
593     tb = tcg_tb_lookup(retaddr);
594     if (!tb) {
595         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
596                   (void *)retaddr);
597     }
598     cpu_restore_state_from_tb(cpu, tb, retaddr);
599 
600     /*
601      * Some guests must re-execute the branch when re-executing a delay
602      * slot instruction.  When this is the case, adjust icount and N
603      * to account for the re-execution of the branch.
604      */
605     n = 1;
606     cc = CPU_GET_CLASS(cpu);
607     if (cc->tcg_ops->io_recompile_replay_branch &&
608         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
609         cpu_neg(cpu)->icount_decr.u16.low++;
610         n = 2;
611     }
612 
613     /*
614      * Exit the loop and potentially generate a new TB executing the
615      * just the I/O insns. We also limit instrumentation to memory
616      * operations only (which execute after completion) so we don't
617      * double instrument the instruction.
618      */
619     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
620 
621     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
622         target_ulong pc = log_pc(cpu, tb);
623         if (qemu_log_in_addr_range(pc)) {
624             qemu_log("cpu_io_recompile: rewound execution of TB to "
625                      TARGET_FMT_lx "\n", pc);
626         }
627     }
628 
629     cpu_loop_exit_noexc(cpu);
630 }
631 
632 static void print_qht_statistics(struct qht_stats hst, GString *buf)
633 {
634     uint32_t hgram_opts;
635     size_t hgram_bins;
636     char *hgram;
637 
638     if (!hst.head_buckets) {
639         return;
640     }
641     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
642                            "(%0.2f%% head buckets used)\n",
643                            hst.used_head_buckets, hst.head_buckets,
644                            (double)hst.used_head_buckets /
645                            hst.head_buckets * 100);
646 
647     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
648     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
649     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
650         hgram_opts |= QDIST_PR_NODECIMAL;
651     }
652     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
653     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
654                            "Histogram: %s\n",
655                            qdist_avg(&hst.occupancy) * 100, hgram);
656     g_free(hgram);
657 
658     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
659     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
660     if (hgram_bins > 10) {
661         hgram_bins = 10;
662     } else {
663         hgram_bins = 0;
664         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
665     }
666     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
667     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
668                            "Histogram: %s\n",
669                            qdist_avg(&hst.chain), hgram);
670     g_free(hgram);
671 }
672 
673 struct tb_tree_stats {
674     size_t nb_tbs;
675     size_t host_size;
676     size_t target_size;
677     size_t max_target_size;
678     size_t direct_jmp_count;
679     size_t direct_jmp2_count;
680     size_t cross_page;
681 };
682 
683 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
684 {
685     const TranslationBlock *tb = value;
686     struct tb_tree_stats *tst = data;
687 
688     tst->nb_tbs++;
689     tst->host_size += tb->tc.size;
690     tst->target_size += tb->size;
691     if (tb->size > tst->max_target_size) {
692         tst->max_target_size = tb->size;
693     }
694     if (tb_page_addr1(tb) != -1) {
695         tst->cross_page++;
696     }
697     if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
698         tst->direct_jmp_count++;
699         if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
700             tst->direct_jmp2_count++;
701         }
702     }
703     return false;
704 }
705 
706 void dump_exec_info(GString *buf)
707 {
708     struct tb_tree_stats tst = {};
709     struct qht_stats hst;
710     size_t nb_tbs, flush_full, flush_part, flush_elide;
711 
712     tcg_tb_foreach(tb_tree_stats_iter, &tst);
713     nb_tbs = tst.nb_tbs;
714     /* XXX: avoid using doubles ? */
715     g_string_append_printf(buf, "Translation buffer state:\n");
716     /*
717      * Report total code size including the padding and TB structs;
718      * otherwise users might think "-accel tcg,tb-size" is not honoured.
719      * For avg host size we use the precise numbers from tb_tree_stats though.
720      */
721     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
722                            tcg_code_size(), tcg_code_capacity());
723     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
724     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
725                            nb_tbs ? tst.target_size / nb_tbs : 0,
726                            tst.max_target_size);
727     g_string_append_printf(buf, "TB avg host size    %zu bytes "
728                            "(expansion ratio: %0.1f)\n",
729                            nb_tbs ? tst.host_size / nb_tbs : 0,
730                            tst.target_size ?
731                            (double)tst.host_size / tst.target_size : 0);
732     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
733                            tst.cross_page,
734                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
735     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
736                            "(2 jumps=%zu %zu%%)\n",
737                            tst.direct_jmp_count,
738                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
739                            tst.direct_jmp2_count,
740                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
741 
742     qht_statistics_init(&tb_ctx.htable, &hst);
743     print_qht_statistics(hst, buf);
744     qht_statistics_destroy(&hst);
745 
746     g_string_append_printf(buf, "\nStatistics:\n");
747     g_string_append_printf(buf, "TB flush count      %u\n",
748                            qatomic_read(&tb_ctx.tb_flush_count));
749     g_string_append_printf(buf, "TB invalidate count %u\n",
750                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
751 
752     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
753     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
754     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
755     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
756     tcg_dump_info(buf);
757 }
758 
759 #else /* CONFIG_USER_ONLY */
760 
761 void cpu_interrupt(CPUState *cpu, int mask)
762 {
763     g_assert(qemu_mutex_iothread_locked());
764     cpu->interrupt_request |= mask;
765     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
766 }
767 
768 #endif /* CONFIG_USER_ONLY */
769 
770 /*
771  * Called by generic code at e.g. cpu reset after cpu creation,
772  * therefore we must be prepared to allocate the jump cache.
773  */
774 void tcg_flush_jmp_cache(CPUState *cpu)
775 {
776     CPUJumpCache *jc = cpu->tb_jmp_cache;
777 
778     /* During early initialization, the cache may not yet be allocated. */
779     if (unlikely(jc == NULL)) {
780         return;
781     }
782 
783     for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
784         qatomic_set(&jc->array[i].tb, NULL);
785     }
786 }
787 
788 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
789 void tcg_flush_softmmu_tlb(CPUState *cs)
790 {
791 #ifdef CONFIG_SOFTMMU
792     tlb_flush(cs);
793 #endif
794 }
795