xref: /openbmc/qemu/accel/tcg/translate-all.c (revision 5b5968c4)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 
22 #define NO_CPU_IO_DEFS
23 #include "trace.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg.h"
27 #if defined(CONFIG_USER_ONLY)
28 #include "qemu.h"
29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
30 #include <sys/param.h>
31 #if __FreeBSD_version >= 700104
32 #define HAVE_KINFO_GETVMMAP
33 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
34 #include <sys/proc.h>
35 #include <machine/profile.h>
36 #define _KERNEL
37 #include <sys/user.h>
38 #undef _KERNEL
39 #undef sigqueue
40 #include <libutil.h>
41 #endif
42 #endif
43 #else
44 #include "exec/ram_addr.h"
45 #endif
46 
47 #include "exec/cputlb.h"
48 #include "exec/translate-all.h"
49 #include "exec/translator.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/qemu-print.h"
52 #include "qemu/main-loop.h"
53 #include "qemu/cacheinfo.h"
54 #include "exec/log.h"
55 #include "sysemu/cpus.h"
56 #include "sysemu/cpu-timers.h"
57 #include "sysemu/tcg.h"
58 #include "qapi/error.h"
59 #include "hw/core/tcg-cpu-ops.h"
60 #include "tb-jmp-cache.h"
61 #include "tb-hash.h"
62 #include "tb-context.h"
63 #include "internal.h"
64 #include "perf.h"
65 
66 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
67 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
68                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
69                   * BITS_PER_BYTE);
70 
71 TBContext tb_ctx;
72 
73 /* Encode VAL as a signed leb128 sequence at P.
74    Return P incremented past the encoded value.  */
75 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
76 {
77     int more, byte;
78 
79     do {
80         byte = val & 0x7f;
81         val >>= 7;
82         more = !((val == 0 && (byte & 0x40) == 0)
83                  || (val == -1 && (byte & 0x40) != 0));
84         if (more) {
85             byte |= 0x80;
86         }
87         *p++ = byte;
88     } while (more);
89 
90     return p;
91 }
92 
93 /* Decode a signed leb128 sequence at *PP; increment *PP past the
94    decoded value.  Return the decoded value.  */
95 static target_long decode_sleb128(const uint8_t **pp)
96 {
97     const uint8_t *p = *pp;
98     target_long val = 0;
99     int byte, shift = 0;
100 
101     do {
102         byte = *p++;
103         val |= (target_ulong)(byte & 0x7f) << shift;
104         shift += 7;
105     } while (byte & 0x80);
106     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
107         val |= -(target_ulong)1 << shift;
108     }
109 
110     *pp = p;
111     return val;
112 }
113 
114 /* Encode the data collected about the instructions while compiling TB.
115    Place the data at BLOCK, and return the number of bytes consumed.
116 
117    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
118    which come from the target's insn_start data, followed by a uintptr_t
119    which comes from the host pc of the end of the code implementing the insn.
120 
121    Each line of the table is encoded as sleb128 deltas from the previous
122    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
123    That is, the first column is seeded with the guest pc, the last column
124    with the host pc, and the middle columns with zeros.  */
125 
126 static int encode_search(TranslationBlock *tb, uint8_t *block)
127 {
128     uint8_t *highwater = tcg_ctx->code_gen_highwater;
129     uint8_t *p = block;
130     int i, j, n;
131 
132     for (i = 0, n = tb->icount; i < n; ++i) {
133         target_ulong prev;
134 
135         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
136             if (i == 0) {
137                 prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
138             } else {
139                 prev = tcg_ctx->gen_insn_data[i - 1][j];
140             }
141             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
142         }
143         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
144         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
145 
146         /* Test for (pending) buffer overflow.  The assumption is that any
147            one row beginning below the high water mark cannot overrun
148            the buffer completely.  Thus we can test for overflow after
149            encoding a row without having to check during encoding.  */
150         if (unlikely(p > highwater)) {
151             return -1;
152         }
153     }
154 
155     return p - block;
156 }
157 
158 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
159                                    uint64_t *data)
160 {
161     uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
162     const uint8_t *p = tb->tc.ptr + tb->tc.size;
163     int i, j, num_insns = tb->icount;
164 
165     host_pc -= GETPC_ADJ;
166 
167     if (host_pc < iter_pc) {
168         return -1;
169     }
170 
171     memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
172     if (!TARGET_TB_PCREL) {
173         data[0] = tb_pc(tb);
174     }
175 
176     /*
177      * Reconstruct the stored insn data while looking for the point
178      * at which the end of the insn exceeds host_pc.
179      */
180     for (i = 0; i < num_insns; ++i) {
181         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
182             data[j] += decode_sleb128(&p);
183         }
184         iter_pc += decode_sleb128(&p);
185         if (iter_pc > host_pc) {
186             return num_insns - i;
187         }
188     }
189     return -1;
190 }
191 
192 /*
193  * The cpu state corresponding to 'host_pc' is restored in
194  * preparation for exiting the TB.
195  */
196 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
197                                uintptr_t host_pc)
198 {
199     uint64_t data[TARGET_INSN_START_WORDS];
200 #ifdef CONFIG_PROFILER
201     TCGProfile *prof = &tcg_ctx->prof;
202     int64_t ti = profile_getclock();
203 #endif
204     int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
205 
206     if (insns_left < 0) {
207         return;
208     }
209 
210     if (tb_cflags(tb) & CF_USE_ICOUNT) {
211         assert(icount_enabled());
212         /*
213          * Reset the cycle counter to the start of the block and
214          * shift if to the number of actually executed instructions.
215          */
216         cpu_neg(cpu)->icount_decr.u16.low += insns_left;
217     }
218 
219     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
220 
221 #ifdef CONFIG_PROFILER
222     qatomic_set(&prof->restore_time,
223                 prof->restore_time + profile_getclock() - ti);
224     qatomic_set(&prof->restore_count, prof->restore_count + 1);
225 #endif
226 }
227 
228 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
229 {
230     /*
231      * The host_pc has to be in the rx region of the code buffer.
232      * If it is not we will not be able to resolve it here.
233      * The two cases where host_pc will not be correct are:
234      *
235      *  - fault during translation (instruction fetch)
236      *  - fault from helper (not using GETPC() macro)
237      *
238      * Either way we need return early as we can't resolve it here.
239      */
240     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
241         TranslationBlock *tb = tcg_tb_lookup(host_pc);
242         if (tb) {
243             cpu_restore_state_from_tb(cpu, tb, host_pc);
244             return true;
245         }
246     }
247     return false;
248 }
249 
250 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
251 {
252     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
253         TranslationBlock *tb = tcg_tb_lookup(host_pc);
254         if (tb) {
255             return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
256         }
257     }
258     return false;
259 }
260 
261 void page_init(void)
262 {
263     page_size_init();
264     page_table_config_init();
265 }
266 
267 /*
268  * Isolate the portion of code gen which can setjmp/longjmp.
269  * Return the size of the generated code, or negative on error.
270  */
271 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
272                            target_ulong pc, void *host_pc,
273                            int *max_insns, int64_t *ti)
274 {
275     int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
276     if (unlikely(ret != 0)) {
277         return ret;
278     }
279 
280     tcg_func_start(tcg_ctx);
281 
282     tcg_ctx->cpu = env_cpu(env);
283     gen_intermediate_code(env_cpu(env), tb, *max_insns, pc, host_pc);
284     assert(tb->size != 0);
285     tcg_ctx->cpu = NULL;
286     *max_insns = tb->icount;
287 
288 #ifdef CONFIG_PROFILER
289     qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
290     qatomic_set(&tcg_ctx->prof.interm_time,
291                 tcg_ctx->prof.interm_time + profile_getclock() - *ti);
292     *ti = profile_getclock();
293 #endif
294 
295     return tcg_gen_code(tcg_ctx, tb, pc);
296 }
297 
298 /* Called with mmap_lock held for user mode emulation.  */
299 TranslationBlock *tb_gen_code(CPUState *cpu,
300                               target_ulong pc, target_ulong cs_base,
301                               uint32_t flags, int cflags)
302 {
303     CPUArchState *env = cpu->env_ptr;
304     TranslationBlock *tb, *existing_tb;
305     tb_page_addr_t phys_pc;
306     tcg_insn_unit *gen_code_buf;
307     int gen_code_size, search_size, max_insns;
308 #ifdef CONFIG_PROFILER
309     TCGProfile *prof = &tcg_ctx->prof;
310 #endif
311     int64_t ti;
312     void *host_pc;
313 
314     assert_memory_lock();
315     qemu_thread_jit_write();
316 
317     phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
318 
319     if (phys_pc == -1) {
320         /* Generate a one-shot TB with 1 insn in it */
321         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
322     }
323 
324     max_insns = cflags & CF_COUNT_MASK;
325     if (max_insns == 0) {
326         max_insns = TCG_MAX_INSNS;
327     }
328     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
329 
330  buffer_overflow:
331     tb = tcg_tb_alloc(tcg_ctx);
332     if (unlikely(!tb)) {
333         /* flush must be done */
334         tb_flush(cpu);
335         mmap_unlock();
336         /* Make the execution loop process the flush as soon as possible.  */
337         cpu->exception_index = EXCP_INTERRUPT;
338         cpu_loop_exit(cpu);
339     }
340 
341     gen_code_buf = tcg_ctx->code_gen_ptr;
342     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
343 #if !TARGET_TB_PCREL
344     tb->pc = pc;
345 #endif
346     tb->cs_base = cs_base;
347     tb->flags = flags;
348     tb->cflags = cflags;
349     tb->trace_vcpu_dstate = *cpu->trace_dstate;
350     tb_set_page_addr0(tb, phys_pc);
351     tb_set_page_addr1(tb, -1);
352     tcg_ctx->gen_tb = tb;
353  tb_overflow:
354 
355 #ifdef CONFIG_PROFILER
356     /* includes aborted translations because of exceptions */
357     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
358     ti = profile_getclock();
359 #endif
360 
361     trace_translate_block(tb, pc, tb->tc.ptr);
362 
363     gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
364     if (unlikely(gen_code_size < 0)) {
365         switch (gen_code_size) {
366         case -1:
367             /*
368              * Overflow of code_gen_buffer, or the current slice of it.
369              *
370              * TODO: We don't need to re-do gen_intermediate_code, nor
371              * should we re-do the tcg optimization currently hidden
372              * inside tcg_gen_code.  All that should be required is to
373              * flush the TBs, allocate a new TB, re-initialize it per
374              * above, and re-do the actual code generation.
375              */
376             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
377                           "Restarting code generation for "
378                           "code_gen_buffer overflow\n");
379             goto buffer_overflow;
380 
381         case -2:
382             /*
383              * The code generated for the TranslationBlock is too large.
384              * The maximum size allowed by the unwind info is 64k.
385              * There may be stricter constraints from relocations
386              * in the tcg backend.
387              *
388              * Try again with half as many insns as we attempted this time.
389              * If a single insn overflows, there's a bug somewhere...
390              */
391             assert(max_insns > 1);
392             max_insns /= 2;
393             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
394                           "Restarting code generation with "
395                           "smaller translation block (max %d insns)\n",
396                           max_insns);
397             goto tb_overflow;
398 
399         default:
400             g_assert_not_reached();
401         }
402     }
403     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
404     if (unlikely(search_size < 0)) {
405         goto buffer_overflow;
406     }
407     tb->tc.size = gen_code_size;
408 
409     /*
410      * For TARGET_TB_PCREL, attribute all executions of the generated
411      * code to its first mapping.
412      */
413     perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
414 
415 #ifdef CONFIG_PROFILER
416     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
417     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
418     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
419     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
420 #endif
421 
422 #ifdef DEBUG_DISAS
423     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
424         qemu_log_in_addr_range(pc)) {
425         FILE *logfile = qemu_log_trylock();
426         if (logfile) {
427             int code_size, data_size;
428             const tcg_target_ulong *rx_data_gen_ptr;
429             size_t chunk_start;
430             int insn = 0;
431 
432             if (tcg_ctx->data_gen_ptr) {
433                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
434                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
435                 data_size = gen_code_size - code_size;
436             } else {
437                 rx_data_gen_ptr = 0;
438                 code_size = gen_code_size;
439                 data_size = 0;
440             }
441 
442             /* Dump header and the first instruction */
443             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
444             fprintf(logfile,
445                     "  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
446                     tcg_ctx->gen_insn_data[insn][0]);
447             chunk_start = tcg_ctx->gen_insn_end_off[insn];
448             disas(logfile, tb->tc.ptr, chunk_start);
449 
450             /*
451              * Dump each instruction chunk, wrapping up empty chunks into
452              * the next instruction. The whole array is offset so the
453              * first entry is the beginning of the 2nd instruction.
454              */
455             while (insn < tb->icount) {
456                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
457                 if (chunk_end > chunk_start) {
458                     fprintf(logfile, "  -- guest addr 0x" TARGET_FMT_lx "\n",
459                             tcg_ctx->gen_insn_data[insn][0]);
460                     disas(logfile, tb->tc.ptr + chunk_start,
461                           chunk_end - chunk_start);
462                     chunk_start = chunk_end;
463                 }
464                 insn++;
465             }
466 
467             if (chunk_start < code_size) {
468                 fprintf(logfile, "  -- tb slow paths + alignment\n");
469                 disas(logfile, tb->tc.ptr + chunk_start,
470                       code_size - chunk_start);
471             }
472 
473             /* Finally dump any data we may have after the block */
474             if (data_size) {
475                 int i;
476                 fprintf(logfile, "  data: [size=%d]\n", data_size);
477                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
478                     if (sizeof(tcg_target_ulong) == 8) {
479                         fprintf(logfile,
480                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
481                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
482                     } else if (sizeof(tcg_target_ulong) == 4) {
483                         fprintf(logfile,
484                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
485                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
486                     } else {
487                         qemu_build_not_reached();
488                     }
489                 }
490             }
491             fprintf(logfile, "\n");
492             qemu_log_unlock(logfile);
493         }
494     }
495 #endif
496 
497     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
498         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
499                  CODE_GEN_ALIGN));
500 
501     /* init jump list */
502     qemu_spin_init(&tb->jmp_lock);
503     tb->jmp_list_head = (uintptr_t)NULL;
504     tb->jmp_list_next[0] = (uintptr_t)NULL;
505     tb->jmp_list_next[1] = (uintptr_t)NULL;
506     tb->jmp_dest[0] = (uintptr_t)NULL;
507     tb->jmp_dest[1] = (uintptr_t)NULL;
508 
509     /* init original jump addresses which have been set during tcg_gen_code() */
510     if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
511         tb_reset_jump(tb, 0);
512     }
513     if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
514         tb_reset_jump(tb, 1);
515     }
516 
517     /*
518      * If the TB is not associated with a physical RAM page then it must be
519      * a temporary one-insn TB, and we have nothing left to do. Return early
520      * before attempting to link to other TBs or add to the lookup table.
521      */
522     if (tb_page_addr0(tb) == -1) {
523         return tb;
524     }
525 
526     /*
527      * Insert TB into the corresponding region tree before publishing it
528      * through QHT. Otherwise rewinding happened in the TB might fail to
529      * lookup itself using host PC.
530      */
531     tcg_tb_insert(tb);
532 
533     /*
534      * No explicit memory barrier is required -- tb_link_page() makes the
535      * TB visible in a consistent state.
536      */
537     existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
538     /* if the TB already exists, discard what we just translated */
539     if (unlikely(existing_tb != tb)) {
540         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
541 
542         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
543         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
544         tcg_tb_remove(tb);
545         return existing_tb;
546     }
547     return tb;
548 }
549 
550 /* user-mode: call with mmap_lock held */
551 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
552 {
553     TranslationBlock *tb;
554 
555     assert_memory_lock();
556 
557     tb = tcg_tb_lookup(retaddr);
558     if (tb) {
559         /* We can use retranslation to find the PC.  */
560         cpu_restore_state_from_tb(cpu, tb, retaddr);
561         tb_phys_invalidate(tb, -1);
562     } else {
563         /* The exception probably happened in a helper.  The CPU state should
564            have been saved before calling it. Fetch the PC from there.  */
565         CPUArchState *env = cpu->env_ptr;
566         target_ulong pc, cs_base;
567         tb_page_addr_t addr;
568         uint32_t flags;
569 
570         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
571         addr = get_page_addr_code(env, pc);
572         if (addr != -1) {
573             tb_invalidate_phys_range(addr, addr + 1);
574         }
575     }
576 }
577 
578 #ifndef CONFIG_USER_ONLY
579 /*
580  * In deterministic execution mode, instructions doing device I/Os
581  * must be at the end of the TB.
582  *
583  * Called by softmmu_template.h, with iothread mutex not held.
584  */
585 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
586 {
587     TranslationBlock *tb;
588     CPUClass *cc;
589     uint32_t n;
590 
591     tb = tcg_tb_lookup(retaddr);
592     if (!tb) {
593         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
594                   (void *)retaddr);
595     }
596     cpu_restore_state_from_tb(cpu, tb, retaddr);
597 
598     /*
599      * Some guests must re-execute the branch when re-executing a delay
600      * slot instruction.  When this is the case, adjust icount and N
601      * to account for the re-execution of the branch.
602      */
603     n = 1;
604     cc = CPU_GET_CLASS(cpu);
605     if (cc->tcg_ops->io_recompile_replay_branch &&
606         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
607         cpu_neg(cpu)->icount_decr.u16.low++;
608         n = 2;
609     }
610 
611     /*
612      * Exit the loop and potentially generate a new TB executing the
613      * just the I/O insns. We also limit instrumentation to memory
614      * operations only (which execute after completion) so we don't
615      * double instrument the instruction.
616      */
617     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
618 
619     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
620         target_ulong pc = log_pc(cpu, tb);
621         if (qemu_log_in_addr_range(pc)) {
622             qemu_log("cpu_io_recompile: rewound execution of TB to "
623                      TARGET_FMT_lx "\n", pc);
624         }
625     }
626 
627     cpu_loop_exit_noexc(cpu);
628 }
629 
630 static void print_qht_statistics(struct qht_stats hst, GString *buf)
631 {
632     uint32_t hgram_opts;
633     size_t hgram_bins;
634     char *hgram;
635 
636     if (!hst.head_buckets) {
637         return;
638     }
639     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
640                            "(%0.2f%% head buckets used)\n",
641                            hst.used_head_buckets, hst.head_buckets,
642                            (double)hst.used_head_buckets /
643                            hst.head_buckets * 100);
644 
645     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
646     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
647     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
648         hgram_opts |= QDIST_PR_NODECIMAL;
649     }
650     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
651     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
652                            "Histogram: %s\n",
653                            qdist_avg(&hst.occupancy) * 100, hgram);
654     g_free(hgram);
655 
656     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
657     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
658     if (hgram_bins > 10) {
659         hgram_bins = 10;
660     } else {
661         hgram_bins = 0;
662         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
663     }
664     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
665     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
666                            "Histogram: %s\n",
667                            qdist_avg(&hst.chain), hgram);
668     g_free(hgram);
669 }
670 
671 struct tb_tree_stats {
672     size_t nb_tbs;
673     size_t host_size;
674     size_t target_size;
675     size_t max_target_size;
676     size_t direct_jmp_count;
677     size_t direct_jmp2_count;
678     size_t cross_page;
679 };
680 
681 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
682 {
683     const TranslationBlock *tb = value;
684     struct tb_tree_stats *tst = data;
685 
686     tst->nb_tbs++;
687     tst->host_size += tb->tc.size;
688     tst->target_size += tb->size;
689     if (tb->size > tst->max_target_size) {
690         tst->max_target_size = tb->size;
691     }
692     if (tb_page_addr1(tb) != -1) {
693         tst->cross_page++;
694     }
695     if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
696         tst->direct_jmp_count++;
697         if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
698             tst->direct_jmp2_count++;
699         }
700     }
701     return false;
702 }
703 
704 void dump_exec_info(GString *buf)
705 {
706     struct tb_tree_stats tst = {};
707     struct qht_stats hst;
708     size_t nb_tbs, flush_full, flush_part, flush_elide;
709 
710     tcg_tb_foreach(tb_tree_stats_iter, &tst);
711     nb_tbs = tst.nb_tbs;
712     /* XXX: avoid using doubles ? */
713     g_string_append_printf(buf, "Translation buffer state:\n");
714     /*
715      * Report total code size including the padding and TB structs;
716      * otherwise users might think "-accel tcg,tb-size" is not honoured.
717      * For avg host size we use the precise numbers from tb_tree_stats though.
718      */
719     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
720                            tcg_code_size(), tcg_code_capacity());
721     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
722     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
723                            nb_tbs ? tst.target_size / nb_tbs : 0,
724                            tst.max_target_size);
725     g_string_append_printf(buf, "TB avg host size    %zu bytes "
726                            "(expansion ratio: %0.1f)\n",
727                            nb_tbs ? tst.host_size / nb_tbs : 0,
728                            tst.target_size ?
729                            (double)tst.host_size / tst.target_size : 0);
730     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
731                            tst.cross_page,
732                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
733     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
734                            "(2 jumps=%zu %zu%%)\n",
735                            tst.direct_jmp_count,
736                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
737                            tst.direct_jmp2_count,
738                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
739 
740     qht_statistics_init(&tb_ctx.htable, &hst);
741     print_qht_statistics(hst, buf);
742     qht_statistics_destroy(&hst);
743 
744     g_string_append_printf(buf, "\nStatistics:\n");
745     g_string_append_printf(buf, "TB flush count      %u\n",
746                            qatomic_read(&tb_ctx.tb_flush_count));
747     g_string_append_printf(buf, "TB invalidate count %u\n",
748                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
749 
750     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
751     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
752     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
753     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
754     tcg_dump_info(buf);
755 }
756 
757 #else /* CONFIG_USER_ONLY */
758 
759 void cpu_interrupt(CPUState *cpu, int mask)
760 {
761     g_assert(qemu_mutex_iothread_locked());
762     cpu->interrupt_request |= mask;
763     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
764 }
765 
766 #endif /* CONFIG_USER_ONLY */
767 
768 /*
769  * Called by generic code at e.g. cpu reset after cpu creation,
770  * therefore we must be prepared to allocate the jump cache.
771  */
772 void tcg_flush_jmp_cache(CPUState *cpu)
773 {
774     CPUJumpCache *jc = cpu->tb_jmp_cache;
775 
776     /* During early initialization, the cache may not yet be allocated. */
777     if (unlikely(jc == NULL)) {
778         return;
779     }
780 
781     for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
782         qatomic_set(&jc->array[i].tb, NULL);
783     }
784 }
785 
786 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
787 void tcg_flush_softmmu_tlb(CPUState *cs)
788 {
789 #ifdef CONFIG_SOFTMMU
790     tlb_flush(cs);
791 #endif
792 }
793