xref: /openbmc/qemu/accel/tcg/translate-all.c (revision c1774bdb)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 
22 #include "trace.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg.h"
26 #if defined(CONFIG_USER_ONLY)
27 #include "qemu.h"
28 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
29 #include <sys/param.h>
30 #if __FreeBSD_version >= 700104
31 #define HAVE_KINFO_GETVMMAP
32 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
33 #include <sys/proc.h>
34 #include <machine/profile.h>
35 #define _KERNEL
36 #include <sys/user.h>
37 #undef _KERNEL
38 #undef sigqueue
39 #include <libutil.h>
40 #endif
41 #endif
42 #else
43 #include "exec/ram_addr.h"
44 #endif
45 
46 #include "exec/cputlb.h"
47 #include "exec/translate-all.h"
48 #include "exec/translator.h"
49 #include "exec/tb-flush.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/qemu-print.h"
52 #include "qemu/main-loop.h"
53 #include "qemu/cacheinfo.h"
54 #include "qemu/timer.h"
55 #include "exec/log.h"
56 #include "sysemu/cpus.h"
57 #include "sysemu/cpu-timers.h"
58 #include "sysemu/tcg.h"
59 #include "qapi/error.h"
60 #include "hw/core/tcg-cpu-ops.h"
61 #include "tb-jmp-cache.h"
62 #include "tb-hash.h"
63 #include "tb-context.h"
64 #include "internal-common.h"
65 #include "internal-target.h"
66 #include "perf.h"
67 #include "tcg/insn-start-words.h"
68 
69 TBContext tb_ctx;
70 
71 /*
72  * Encode VAL as a signed leb128 sequence at P.
73  * Return P incremented past the encoded value.
74  */
75 static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
76 {
77     int more, byte;
78 
79     do {
80         byte = val & 0x7f;
81         val >>= 7;
82         more = !((val == 0 && (byte & 0x40) == 0)
83                  || (val == -1 && (byte & 0x40) != 0));
84         if (more) {
85             byte |= 0x80;
86         }
87         *p++ = byte;
88     } while (more);
89 
90     return p;
91 }
92 
93 /*
94  * Decode a signed leb128 sequence at *PP; increment *PP past the
95  * decoded value.  Return the decoded value.
96  */
97 static int64_t decode_sleb128(const uint8_t **pp)
98 {
99     const uint8_t *p = *pp;
100     int64_t val = 0;
101     int byte, shift = 0;
102 
103     do {
104         byte = *p++;
105         val |= (int64_t)(byte & 0x7f) << shift;
106         shift += 7;
107     } while (byte & 0x80);
108     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
109         val |= -(int64_t)1 << shift;
110     }
111 
112     *pp = p;
113     return val;
114 }
115 
116 /* Encode the data collected about the instructions while compiling TB.
117    Place the data at BLOCK, and return the number of bytes consumed.
118 
119    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
120    which come from the target's insn_start data, followed by a uintptr_t
121    which comes from the host pc of the end of the code implementing the insn.
122 
123    Each line of the table is encoded as sleb128 deltas from the previous
124    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
125    That is, the first column is seeded with the guest pc, the last column
126    with the host pc, and the middle columns with zeros.  */
127 
128 static int encode_search(TranslationBlock *tb, uint8_t *block)
129 {
130     uint8_t *highwater = tcg_ctx->code_gen_highwater;
131     uint64_t *insn_data = tcg_ctx->gen_insn_data;
132     uint16_t *insn_end_off = tcg_ctx->gen_insn_end_off;
133     uint8_t *p = block;
134     int i, j, n;
135 
136     for (i = 0, n = tb->icount; i < n; ++i) {
137         uint64_t prev, curr;
138 
139         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
140             if (i == 0) {
141                 prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
142             } else {
143                 prev = insn_data[(i - 1) * TARGET_INSN_START_WORDS + j];
144             }
145             curr = insn_data[i * TARGET_INSN_START_WORDS + j];
146             p = encode_sleb128(p, curr - prev);
147         }
148         prev = (i == 0 ? 0 : insn_end_off[i - 1]);
149         curr = insn_end_off[i];
150         p = encode_sleb128(p, curr - prev);
151 
152         /* Test for (pending) buffer overflow.  The assumption is that any
153            one row beginning below the high water mark cannot overrun
154            the buffer completely.  Thus we can test for overflow after
155            encoding a row without having to check during encoding.  */
156         if (unlikely(p > highwater)) {
157             return -1;
158         }
159     }
160 
161     return p - block;
162 }
163 
164 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
165                                    uint64_t *data)
166 {
167     uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
168     const uint8_t *p = tb->tc.ptr + tb->tc.size;
169     int i, j, num_insns = tb->icount;
170 
171     host_pc -= GETPC_ADJ;
172 
173     if (host_pc < iter_pc) {
174         return -1;
175     }
176 
177     memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
178     if (!(tb_cflags(tb) & CF_PCREL)) {
179         data[0] = tb->pc;
180     }
181 
182     /*
183      * Reconstruct the stored insn data while looking for the point
184      * at which the end of the insn exceeds host_pc.
185      */
186     for (i = 0; i < num_insns; ++i) {
187         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
188             data[j] += decode_sleb128(&p);
189         }
190         iter_pc += decode_sleb128(&p);
191         if (iter_pc > host_pc) {
192             return num_insns - i;
193         }
194     }
195     return -1;
196 }
197 
198 /*
199  * The cpu state corresponding to 'host_pc' is restored in
200  * preparation for exiting the TB.
201  */
202 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
203                                uintptr_t host_pc)
204 {
205     uint64_t data[TARGET_INSN_START_WORDS];
206     int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
207 
208     if (insns_left < 0) {
209         return;
210     }
211 
212     if (tb_cflags(tb) & CF_USE_ICOUNT) {
213         assert(icount_enabled());
214         /*
215          * Reset the cycle counter to the start of the block and
216          * shift if to the number of actually executed instructions.
217          */
218         cpu->neg.icount_decr.u16.low += insns_left;
219     }
220 
221     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
222 }
223 
224 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
225 {
226     /*
227      * The host_pc has to be in the rx region of the code buffer.
228      * If it is not we will not be able to resolve it here.
229      * The two cases where host_pc will not be correct are:
230      *
231      *  - fault during translation (instruction fetch)
232      *  - fault from helper (not using GETPC() macro)
233      *
234      * Either way we need return early as we can't resolve it here.
235      */
236     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
237         TranslationBlock *tb = tcg_tb_lookup(host_pc);
238         if (tb) {
239             cpu_restore_state_from_tb(cpu, tb, host_pc);
240             return true;
241         }
242     }
243     return false;
244 }
245 
246 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
247 {
248     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
249         TranslationBlock *tb = tcg_tb_lookup(host_pc);
250         if (tb) {
251             return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
252         }
253     }
254     return false;
255 }
256 
257 void page_init(void)
258 {
259     page_size_init();
260     page_table_config_init();
261 }
262 
263 /*
264  * Isolate the portion of code gen which can setjmp/longjmp.
265  * Return the size of the generated code, or negative on error.
266  */
267 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
268                            vaddr pc, void *host_pc,
269                            int *max_insns, int64_t *ti)
270 {
271     int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
272     if (unlikely(ret != 0)) {
273         return ret;
274     }
275 
276     tcg_func_start(tcg_ctx);
277 
278     tcg_ctx->cpu = env_cpu(env);
279     gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
280     assert(tb->size != 0);
281     tcg_ctx->cpu = NULL;
282     *max_insns = tb->icount;
283 
284     return tcg_gen_code(tcg_ctx, tb, pc);
285 }
286 
287 /* Called with mmap_lock held for user mode emulation.  */
288 TranslationBlock *tb_gen_code(CPUState *cpu,
289                               vaddr pc, uint64_t cs_base,
290                               uint32_t flags, int cflags)
291 {
292     CPUArchState *env = cpu_env(cpu);
293     TranslationBlock *tb, *existing_tb;
294     tb_page_addr_t phys_pc, phys_p2;
295     tcg_insn_unit *gen_code_buf;
296     int gen_code_size, search_size, max_insns;
297     int64_t ti;
298     void *host_pc;
299 
300     assert_memory_lock();
301     qemu_thread_jit_write();
302 
303     phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
304 
305     if (phys_pc == -1) {
306         /* Generate a one-shot TB with 1 insn in it */
307         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
308     }
309 
310     max_insns = cflags & CF_COUNT_MASK;
311     if (max_insns == 0) {
312         max_insns = TCG_MAX_INSNS;
313     }
314     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
315 
316  buffer_overflow:
317     assert_no_pages_locked();
318     tb = tcg_tb_alloc(tcg_ctx);
319     if (unlikely(!tb)) {
320         /* flush must be done */
321         tb_flush(cpu);
322         mmap_unlock();
323         /* Make the execution loop process the flush as soon as possible.  */
324         cpu->exception_index = EXCP_INTERRUPT;
325         cpu_loop_exit(cpu);
326     }
327 
328     gen_code_buf = tcg_ctx->code_gen_ptr;
329     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
330     if (!(cflags & CF_PCREL)) {
331         tb->pc = pc;
332     }
333     tb->cs_base = cs_base;
334     tb->flags = flags;
335     tb->cflags = cflags;
336     tb_set_page_addr0(tb, phys_pc);
337     tb_set_page_addr1(tb, -1);
338     if (phys_pc != -1) {
339         tb_lock_page0(phys_pc);
340     }
341 
342     tcg_ctx->gen_tb = tb;
343     tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64;
344 #ifdef CONFIG_SOFTMMU
345     tcg_ctx->page_bits = TARGET_PAGE_BITS;
346     tcg_ctx->page_mask = TARGET_PAGE_MASK;
347     tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
348 #endif
349     tcg_ctx->insn_start_words = TARGET_INSN_START_WORDS;
350 #ifdef TCG_GUEST_DEFAULT_MO
351     tcg_ctx->guest_mo = TCG_GUEST_DEFAULT_MO;
352 #else
353     tcg_ctx->guest_mo = TCG_MO_ALL;
354 #endif
355 
356  restart_translate:
357     trace_translate_block(tb, pc, tb->tc.ptr);
358 
359     gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
360     if (unlikely(gen_code_size < 0)) {
361         switch (gen_code_size) {
362         case -1:
363             /*
364              * Overflow of code_gen_buffer, or the current slice of it.
365              *
366              * TODO: We don't need to re-do gen_intermediate_code, nor
367              * should we re-do the tcg optimization currently hidden
368              * inside tcg_gen_code.  All that should be required is to
369              * flush the TBs, allocate a new TB, re-initialize it per
370              * above, and re-do the actual code generation.
371              */
372             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
373                           "Restarting code generation for "
374                           "code_gen_buffer overflow\n");
375             tb_unlock_pages(tb);
376             tcg_ctx->gen_tb = NULL;
377             goto buffer_overflow;
378 
379         case -2:
380             /*
381              * The code generated for the TranslationBlock is too large.
382              * The maximum size allowed by the unwind info is 64k.
383              * There may be stricter constraints from relocations
384              * in the tcg backend.
385              *
386              * Try again with half as many insns as we attempted this time.
387              * If a single insn overflows, there's a bug somewhere...
388              */
389             assert(max_insns > 1);
390             max_insns /= 2;
391             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
392                           "Restarting code generation with "
393                           "smaller translation block (max %d insns)\n",
394                           max_insns);
395 
396             /*
397              * The half-sized TB may not cross pages.
398              * TODO: Fix all targets that cross pages except with
399              * the first insn, at which point this can't be reached.
400              */
401             phys_p2 = tb_page_addr1(tb);
402             if (unlikely(phys_p2 != -1)) {
403                 tb_unlock_page1(phys_pc, phys_p2);
404                 tb_set_page_addr1(tb, -1);
405             }
406             goto restart_translate;
407 
408         case -3:
409             /*
410              * We had a page lock ordering problem.  In order to avoid
411              * deadlock we had to drop the lock on page0, which means
412              * that everything we translated so far is compromised.
413              * Restart with locks held on both pages.
414              */
415             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
416                           "Restarting code generation with re-locked pages");
417             goto restart_translate;
418 
419         default:
420             g_assert_not_reached();
421         }
422     }
423     tcg_ctx->gen_tb = NULL;
424 
425     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
426     if (unlikely(search_size < 0)) {
427         tb_unlock_pages(tb);
428         goto buffer_overflow;
429     }
430     tb->tc.size = gen_code_size;
431 
432     /*
433      * For CF_PCREL, attribute all executions of the generated code
434      * to its first mapping.
435      */
436     perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
437 
438     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
439         qemu_log_in_addr_range(pc)) {
440         FILE *logfile = qemu_log_trylock();
441         if (logfile) {
442             int code_size, data_size;
443             const tcg_target_ulong *rx_data_gen_ptr;
444             size_t chunk_start;
445             int insn = 0;
446 
447             if (tcg_ctx->data_gen_ptr) {
448                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
449                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
450                 data_size = gen_code_size - code_size;
451             } else {
452                 rx_data_gen_ptr = 0;
453                 code_size = gen_code_size;
454                 data_size = 0;
455             }
456 
457             /* Dump header and the first instruction */
458             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
459             fprintf(logfile,
460                     "  -- guest addr 0x%016" PRIx64 " + tb prologue\n",
461                     tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
462             chunk_start = tcg_ctx->gen_insn_end_off[insn];
463             disas(logfile, tb->tc.ptr, chunk_start);
464 
465             /*
466              * Dump each instruction chunk, wrapping up empty chunks into
467              * the next instruction. The whole array is offset so the
468              * first entry is the beginning of the 2nd instruction.
469              */
470             while (insn < tb->icount) {
471                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
472                 if (chunk_end > chunk_start) {
473                     fprintf(logfile, "  -- guest addr 0x%016" PRIx64 "\n",
474                             tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
475                     disas(logfile, tb->tc.ptr + chunk_start,
476                           chunk_end - chunk_start);
477                     chunk_start = chunk_end;
478                 }
479                 insn++;
480             }
481 
482             if (chunk_start < code_size) {
483                 fprintf(logfile, "  -- tb slow paths + alignment\n");
484                 disas(logfile, tb->tc.ptr + chunk_start,
485                       code_size - chunk_start);
486             }
487 
488             /* Finally dump any data we may have after the block */
489             if (data_size) {
490                 int i;
491                 fprintf(logfile, "  data: [size=%d]\n", data_size);
492                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
493                     if (sizeof(tcg_target_ulong) == 8) {
494                         fprintf(logfile,
495                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
496                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
497                     } else if (sizeof(tcg_target_ulong) == 4) {
498                         fprintf(logfile,
499                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
500                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
501                     } else {
502                         qemu_build_not_reached();
503                     }
504                 }
505             }
506             fprintf(logfile, "\n");
507             qemu_log_unlock(logfile);
508         }
509     }
510 
511     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
512         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
513                  CODE_GEN_ALIGN));
514 
515     /* init jump list */
516     qemu_spin_init(&tb->jmp_lock);
517     tb->jmp_list_head = (uintptr_t)NULL;
518     tb->jmp_list_next[0] = (uintptr_t)NULL;
519     tb->jmp_list_next[1] = (uintptr_t)NULL;
520     tb->jmp_dest[0] = (uintptr_t)NULL;
521     tb->jmp_dest[1] = (uintptr_t)NULL;
522 
523     /* init original jump addresses which have been set during tcg_gen_code() */
524     if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
525         tb_reset_jump(tb, 0);
526     }
527     if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
528         tb_reset_jump(tb, 1);
529     }
530 
531     /*
532      * If the TB is not associated with a physical RAM page then it must be
533      * a temporary one-insn TB, and we have nothing left to do. Return early
534      * before attempting to link to other TBs or add to the lookup table.
535      */
536     if (tb_page_addr0(tb) == -1) {
537         assert_no_pages_locked();
538         return tb;
539     }
540 
541     /*
542      * Insert TB into the corresponding region tree before publishing it
543      * through QHT. Otherwise rewinding happened in the TB might fail to
544      * lookup itself using host PC.
545      */
546     tcg_tb_insert(tb);
547 
548     /*
549      * No explicit memory barrier is required -- tb_link_page() makes the
550      * TB visible in a consistent state.
551      */
552     existing_tb = tb_link_page(tb);
553     assert_no_pages_locked();
554 
555     /* if the TB already exists, discard what we just translated */
556     if (unlikely(existing_tb != tb)) {
557         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
558 
559         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
560         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
561         tcg_tb_remove(tb);
562         return existing_tb;
563     }
564     return tb;
565 }
566 
567 /* user-mode: call with mmap_lock held */
568 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
569 {
570     TranslationBlock *tb;
571 
572     assert_memory_lock();
573 
574     tb = tcg_tb_lookup(retaddr);
575     if (tb) {
576         /* We can use retranslation to find the PC.  */
577         cpu_restore_state_from_tb(cpu, tb, retaddr);
578         tb_phys_invalidate(tb, -1);
579     } else {
580         /* The exception probably happened in a helper.  The CPU state should
581            have been saved before calling it. Fetch the PC from there.  */
582         CPUArchState *env = cpu_env(cpu);
583         vaddr pc;
584         uint64_t cs_base;
585         tb_page_addr_t addr;
586         uint32_t flags;
587 
588         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
589         addr = get_page_addr_code(env, pc);
590         if (addr != -1) {
591             tb_invalidate_phys_range(addr, addr);
592         }
593     }
594 }
595 
596 #ifndef CONFIG_USER_ONLY
597 /*
598  * In deterministic execution mode, instructions doing device I/Os
599  * must be at the end of the TB.
600  *
601  * Called by softmmu_template.h, with iothread mutex not held.
602  */
603 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
604 {
605     TranslationBlock *tb;
606     CPUClass *cc;
607     uint32_t n;
608 
609     tb = tcg_tb_lookup(retaddr);
610     if (!tb) {
611         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
612                   (void *)retaddr);
613     }
614     cpu_restore_state_from_tb(cpu, tb, retaddr);
615 
616     /*
617      * Some guests must re-execute the branch when re-executing a delay
618      * slot instruction.  When this is the case, adjust icount and N
619      * to account for the re-execution of the branch.
620      */
621     n = 1;
622     cc = CPU_GET_CLASS(cpu);
623     if (cc->tcg_ops->io_recompile_replay_branch &&
624         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
625         cpu->neg.icount_decr.u16.low++;
626         n = 2;
627     }
628 
629     /*
630      * Exit the loop and potentially generate a new TB executing the
631      * just the I/O insns. We also limit instrumentation to memory
632      * operations only (which execute after completion) so we don't
633      * double instrument the instruction.
634      */
635     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
636 
637     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
638         vaddr pc = log_pc(cpu, tb);
639         if (qemu_log_in_addr_range(pc)) {
640             qemu_log("cpu_io_recompile: rewound execution of TB to %016"
641                      VADDR_PRIx "\n", pc);
642         }
643     }
644 
645     cpu_loop_exit_noexc(cpu);
646 }
647 
648 static void print_qht_statistics(struct qht_stats hst, GString *buf)
649 {
650     uint32_t hgram_opts;
651     size_t hgram_bins;
652     char *hgram;
653 
654     if (!hst.head_buckets) {
655         return;
656     }
657     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
658                            "(%0.2f%% head buckets used)\n",
659                            hst.used_head_buckets, hst.head_buckets,
660                            (double)hst.used_head_buckets /
661                            hst.head_buckets * 100);
662 
663     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
664     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
665     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
666         hgram_opts |= QDIST_PR_NODECIMAL;
667     }
668     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
669     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
670                            "Histogram: %s\n",
671                            qdist_avg(&hst.occupancy) * 100, hgram);
672     g_free(hgram);
673 
674     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
675     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
676     if (hgram_bins > 10) {
677         hgram_bins = 10;
678     } else {
679         hgram_bins = 0;
680         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
681     }
682     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
683     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
684                            "Histogram: %s\n",
685                            qdist_avg(&hst.chain), hgram);
686     g_free(hgram);
687 }
688 
689 struct tb_tree_stats {
690     size_t nb_tbs;
691     size_t host_size;
692     size_t target_size;
693     size_t max_target_size;
694     size_t direct_jmp_count;
695     size_t direct_jmp2_count;
696     size_t cross_page;
697 };
698 
699 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
700 {
701     const TranslationBlock *tb = value;
702     struct tb_tree_stats *tst = data;
703 
704     tst->nb_tbs++;
705     tst->host_size += tb->tc.size;
706     tst->target_size += tb->size;
707     if (tb->size > tst->max_target_size) {
708         tst->max_target_size = tb->size;
709     }
710     if (tb_page_addr1(tb) != -1) {
711         tst->cross_page++;
712     }
713     if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
714         tst->direct_jmp_count++;
715         if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
716             tst->direct_jmp2_count++;
717         }
718     }
719     return false;
720 }
721 
722 void dump_exec_info(GString *buf)
723 {
724     struct tb_tree_stats tst = {};
725     struct qht_stats hst;
726     size_t nb_tbs, flush_full, flush_part, flush_elide;
727 
728     tcg_tb_foreach(tb_tree_stats_iter, &tst);
729     nb_tbs = tst.nb_tbs;
730     /* XXX: avoid using doubles ? */
731     g_string_append_printf(buf, "Translation buffer state:\n");
732     /*
733      * Report total code size including the padding and TB structs;
734      * otherwise users might think "-accel tcg,tb-size" is not honoured.
735      * For avg host size we use the precise numbers from tb_tree_stats though.
736      */
737     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
738                            tcg_code_size(), tcg_code_capacity());
739     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
740     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
741                            nb_tbs ? tst.target_size / nb_tbs : 0,
742                            tst.max_target_size);
743     g_string_append_printf(buf, "TB avg host size    %zu bytes "
744                            "(expansion ratio: %0.1f)\n",
745                            nb_tbs ? tst.host_size / nb_tbs : 0,
746                            tst.target_size ?
747                            (double)tst.host_size / tst.target_size : 0);
748     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
749                            tst.cross_page,
750                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
751     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
752                            "(2 jumps=%zu %zu%%)\n",
753                            tst.direct_jmp_count,
754                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
755                            tst.direct_jmp2_count,
756                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
757 
758     qht_statistics_init(&tb_ctx.htable, &hst);
759     print_qht_statistics(hst, buf);
760     qht_statistics_destroy(&hst);
761 
762     g_string_append_printf(buf, "\nStatistics:\n");
763     g_string_append_printf(buf, "TB flush count      %u\n",
764                            qatomic_read(&tb_ctx.tb_flush_count));
765     g_string_append_printf(buf, "TB invalidate count %u\n",
766                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
767 
768     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
769     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
770     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
771     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
772     tcg_dump_info(buf);
773 }
774 
775 #else /* CONFIG_USER_ONLY */
776 
777 void cpu_interrupt(CPUState *cpu, int mask)
778 {
779     g_assert(qemu_mutex_iothread_locked());
780     cpu->interrupt_request |= mask;
781     qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
782 }
783 
784 #endif /* CONFIG_USER_ONLY */
785 
786 /*
787  * Called by generic code at e.g. cpu reset after cpu creation,
788  * therefore we must be prepared to allocate the jump cache.
789  */
790 void tcg_flush_jmp_cache(CPUState *cpu)
791 {
792     CPUJumpCache *jc = cpu->tb_jmp_cache;
793 
794     /* During early initialization, the cache may not yet be allocated. */
795     if (unlikely(jc == NULL)) {
796         return;
797     }
798 
799     for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
800         qatomic_set(&jc->array[i].tb, NULL);
801     }
802 }
803 
804 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
805 void tcg_flush_softmmu_tlb(CPUState *cs)
806 {
807 #ifdef CONFIG_SOFTMMU
808     tlb_flush(cs);
809 #endif
810 }
811