xref: /openbmc/qemu/accel/tcg/translate-all.c (revision 136cb9cc)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 
22 #include "trace.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg.h"
26 #if defined(CONFIG_USER_ONLY)
27 #include "qemu.h"
28 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
29 #include <sys/param.h>
30 #if __FreeBSD_version >= 700104
31 #define HAVE_KINFO_GETVMMAP
32 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
33 #include <sys/proc.h>
34 #include <machine/profile.h>
35 #define _KERNEL
36 #include <sys/user.h>
37 #undef _KERNEL
38 #undef sigqueue
39 #include <libutil.h>
40 #endif
41 #endif
42 #else
43 #include "exec/ram_addr.h"
44 #endif
45 
46 #include "exec/cputlb.h"
47 #include "exec/translate-all.h"
48 #include "exec/translator.h"
49 #include "exec/tb-flush.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/qemu-print.h"
52 #include "qemu/main-loop.h"
53 #include "qemu/cacheinfo.h"
54 #include "qemu/timer.h"
55 #include "exec/log.h"
56 #include "sysemu/cpus.h"
57 #include "sysemu/cpu-timers.h"
58 #include "sysemu/tcg.h"
59 #include "qapi/error.h"
60 #include "hw/core/tcg-cpu-ops.h"
61 #include "tb-jmp-cache.h"
62 #include "tb-hash.h"
63 #include "tb-context.h"
64 #include "internal.h"
65 #include "perf.h"
66 #include "tcg/insn-start-words.h"
67 
68 TBContext tb_ctx;
69 
70 /*
71  * Encode VAL as a signed leb128 sequence at P.
72  * Return P incremented past the encoded value.
73  */
74 static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
75 {
76     int more, byte;
77 
78     do {
79         byte = val & 0x7f;
80         val >>= 7;
81         more = !((val == 0 && (byte & 0x40) == 0)
82                  || (val == -1 && (byte & 0x40) != 0));
83         if (more) {
84             byte |= 0x80;
85         }
86         *p++ = byte;
87     } while (more);
88 
89     return p;
90 }
91 
92 /*
93  * Decode a signed leb128 sequence at *PP; increment *PP past the
94  * decoded value.  Return the decoded value.
95  */
96 static int64_t decode_sleb128(const uint8_t **pp)
97 {
98     const uint8_t *p = *pp;
99     int64_t val = 0;
100     int byte, shift = 0;
101 
102     do {
103         byte = *p++;
104         val |= (int64_t)(byte & 0x7f) << shift;
105         shift += 7;
106     } while (byte & 0x80);
107     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
108         val |= -(int64_t)1 << shift;
109     }
110 
111     *pp = p;
112     return val;
113 }
114 
115 /* Encode the data collected about the instructions while compiling TB.
116    Place the data at BLOCK, and return the number of bytes consumed.
117 
118    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
119    which come from the target's insn_start data, followed by a uintptr_t
120    which comes from the host pc of the end of the code implementing the insn.
121 
122    Each line of the table is encoded as sleb128 deltas from the previous
123    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
124    That is, the first column is seeded with the guest pc, the last column
125    with the host pc, and the middle columns with zeros.  */
126 
127 static int encode_search(TranslationBlock *tb, uint8_t *block)
128 {
129     uint8_t *highwater = tcg_ctx->code_gen_highwater;
130     uint64_t *insn_data = tcg_ctx->gen_insn_data;
131     uint16_t *insn_end_off = tcg_ctx->gen_insn_end_off;
132     uint8_t *p = block;
133     int i, j, n;
134 
135     for (i = 0, n = tb->icount; i < n; ++i) {
136         uint64_t prev, curr;
137 
138         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
139             if (i == 0) {
140                 prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
141             } else {
142                 prev = insn_data[(i - 1) * TARGET_INSN_START_WORDS + j];
143             }
144             curr = insn_data[i * TARGET_INSN_START_WORDS + j];
145             p = encode_sleb128(p, curr - prev);
146         }
147         prev = (i == 0 ? 0 : insn_end_off[i - 1]);
148         curr = insn_end_off[i];
149         p = encode_sleb128(p, curr - prev);
150 
151         /* Test for (pending) buffer overflow.  The assumption is that any
152            one row beginning below the high water mark cannot overrun
153            the buffer completely.  Thus we can test for overflow after
154            encoding a row without having to check during encoding.  */
155         if (unlikely(p > highwater)) {
156             return -1;
157         }
158     }
159 
160     return p - block;
161 }
162 
163 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
164                                    uint64_t *data)
165 {
166     uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
167     const uint8_t *p = tb->tc.ptr + tb->tc.size;
168     int i, j, num_insns = tb->icount;
169 
170     host_pc -= GETPC_ADJ;
171 
172     if (host_pc < iter_pc) {
173         return -1;
174     }
175 
176     memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
177     if (!(tb_cflags(tb) & CF_PCREL)) {
178         data[0] = tb->pc;
179     }
180 
181     /*
182      * Reconstruct the stored insn data while looking for the point
183      * at which the end of the insn exceeds host_pc.
184      */
185     for (i = 0; i < num_insns; ++i) {
186         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
187             data[j] += decode_sleb128(&p);
188         }
189         iter_pc += decode_sleb128(&p);
190         if (iter_pc > host_pc) {
191             return num_insns - i;
192         }
193     }
194     return -1;
195 }
196 
197 /*
198  * The cpu state corresponding to 'host_pc' is restored in
199  * preparation for exiting the TB.
200  */
201 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
202                                uintptr_t host_pc)
203 {
204     uint64_t data[TARGET_INSN_START_WORDS];
205     int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
206 
207     if (insns_left < 0) {
208         return;
209     }
210 
211     if (tb_cflags(tb) & CF_USE_ICOUNT) {
212         assert(icount_enabled());
213         /*
214          * Reset the cycle counter to the start of the block and
215          * shift if to the number of actually executed instructions.
216          */
217         cpu_neg(cpu)->icount_decr.u16.low += insns_left;
218     }
219 
220     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
221 }
222 
223 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
224 {
225     /*
226      * The host_pc has to be in the rx region of the code buffer.
227      * If it is not we will not be able to resolve it here.
228      * The two cases where host_pc will not be correct are:
229      *
230      *  - fault during translation (instruction fetch)
231      *  - fault from helper (not using GETPC() macro)
232      *
233      * Either way we need return early as we can't resolve it here.
234      */
235     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
236         TranslationBlock *tb = tcg_tb_lookup(host_pc);
237         if (tb) {
238             cpu_restore_state_from_tb(cpu, tb, host_pc);
239             return true;
240         }
241     }
242     return false;
243 }
244 
245 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
246 {
247     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
248         TranslationBlock *tb = tcg_tb_lookup(host_pc);
249         if (tb) {
250             return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
251         }
252     }
253     return false;
254 }
255 
256 void page_init(void)
257 {
258     page_size_init();
259     page_table_config_init();
260 }
261 
262 /*
263  * Isolate the portion of code gen which can setjmp/longjmp.
264  * Return the size of the generated code, or negative on error.
265  */
266 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
267                            vaddr pc, void *host_pc,
268                            int *max_insns, int64_t *ti)
269 {
270     int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
271     if (unlikely(ret != 0)) {
272         return ret;
273     }
274 
275     tcg_func_start(tcg_ctx);
276 
277     tcg_ctx->cpu = env_cpu(env);
278     gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
279     assert(tb->size != 0);
280     tcg_ctx->cpu = NULL;
281     *max_insns = tb->icount;
282 
283     return tcg_gen_code(tcg_ctx, tb, pc);
284 }
285 
286 /* Called with mmap_lock held for user mode emulation.  */
287 TranslationBlock *tb_gen_code(CPUState *cpu,
288                               vaddr pc, uint64_t cs_base,
289                               uint32_t flags, int cflags)
290 {
291     CPUArchState *env = cpu->env_ptr;
292     TranslationBlock *tb, *existing_tb;
293     tb_page_addr_t phys_pc, phys_p2;
294     tcg_insn_unit *gen_code_buf;
295     int gen_code_size, search_size, max_insns;
296     int64_t ti;
297     void *host_pc;
298 
299     assert_memory_lock();
300     qemu_thread_jit_write();
301 
302     phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
303 
304     if (phys_pc == -1) {
305         /* Generate a one-shot TB with 1 insn in it */
306         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
307     }
308 
309     max_insns = cflags & CF_COUNT_MASK;
310     if (max_insns == 0) {
311         max_insns = TCG_MAX_INSNS;
312     }
313     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
314 
315  buffer_overflow:
316     assert_no_pages_locked();
317     tb = tcg_tb_alloc(tcg_ctx);
318     if (unlikely(!tb)) {
319         /* flush must be done */
320         tb_flush(cpu);
321         mmap_unlock();
322         /* Make the execution loop process the flush as soon as possible.  */
323         cpu->exception_index = EXCP_INTERRUPT;
324         cpu_loop_exit(cpu);
325     }
326 
327     gen_code_buf = tcg_ctx->code_gen_ptr;
328     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
329     if (!(cflags & CF_PCREL)) {
330         tb->pc = pc;
331     }
332     tb->cs_base = cs_base;
333     tb->flags = flags;
334     tb->cflags = cflags;
335     tb_set_page_addr0(tb, phys_pc);
336     tb_set_page_addr1(tb, -1);
337     if (phys_pc != -1) {
338         tb_lock_page0(phys_pc);
339     }
340 
341     tcg_ctx->gen_tb = tb;
342     tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64;
343 #ifdef CONFIG_SOFTMMU
344     tcg_ctx->page_bits = TARGET_PAGE_BITS;
345     tcg_ctx->page_mask = TARGET_PAGE_MASK;
346     tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
347     tcg_ctx->tlb_fast_offset =
348         (int)offsetof(ArchCPU, neg.tlb.f) - (int)offsetof(ArchCPU, env);
349 #endif
350     tcg_ctx->insn_start_words = TARGET_INSN_START_WORDS;
351 #ifdef TCG_GUEST_DEFAULT_MO
352     tcg_ctx->guest_mo = TCG_GUEST_DEFAULT_MO;
353 #else
354     tcg_ctx->guest_mo = TCG_MO_ALL;
355 #endif
356 
357  restart_translate:
358     trace_translate_block(tb, pc, tb->tc.ptr);
359 
360     gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
361     if (unlikely(gen_code_size < 0)) {
362         switch (gen_code_size) {
363         case -1:
364             /*
365              * Overflow of code_gen_buffer, or the current slice of it.
366              *
367              * TODO: We don't need to re-do gen_intermediate_code, nor
368              * should we re-do the tcg optimization currently hidden
369              * inside tcg_gen_code.  All that should be required is to
370              * flush the TBs, allocate a new TB, re-initialize it per
371              * above, and re-do the actual code generation.
372              */
373             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
374                           "Restarting code generation for "
375                           "code_gen_buffer overflow\n");
376             tb_unlock_pages(tb);
377             tcg_ctx->gen_tb = NULL;
378             goto buffer_overflow;
379 
380         case -2:
381             /*
382              * The code generated for the TranslationBlock is too large.
383              * The maximum size allowed by the unwind info is 64k.
384              * There may be stricter constraints from relocations
385              * in the tcg backend.
386              *
387              * Try again with half as many insns as we attempted this time.
388              * If a single insn overflows, there's a bug somewhere...
389              */
390             assert(max_insns > 1);
391             max_insns /= 2;
392             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
393                           "Restarting code generation with "
394                           "smaller translation block (max %d insns)\n",
395                           max_insns);
396 
397             /*
398              * The half-sized TB may not cross pages.
399              * TODO: Fix all targets that cross pages except with
400              * the first insn, at which point this can't be reached.
401              */
402             phys_p2 = tb_page_addr1(tb);
403             if (unlikely(phys_p2 != -1)) {
404                 tb_unlock_page1(phys_pc, phys_p2);
405                 tb_set_page_addr1(tb, -1);
406             }
407             goto restart_translate;
408 
409         case -3:
410             /*
411              * We had a page lock ordering problem.  In order to avoid
412              * deadlock we had to drop the lock on page0, which means
413              * that everything we translated so far is compromised.
414              * Restart with locks held on both pages.
415              */
416             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
417                           "Restarting code generation with re-locked pages");
418             goto restart_translate;
419 
420         default:
421             g_assert_not_reached();
422         }
423     }
424     tcg_ctx->gen_tb = NULL;
425 
426     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
427     if (unlikely(search_size < 0)) {
428         tb_unlock_pages(tb);
429         goto buffer_overflow;
430     }
431     tb->tc.size = gen_code_size;
432 
433     /*
434      * For CF_PCREL, attribute all executions of the generated code
435      * to its first mapping.
436      */
437     perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
438 
439     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
440         qemu_log_in_addr_range(pc)) {
441         FILE *logfile = qemu_log_trylock();
442         if (logfile) {
443             int code_size, data_size;
444             const tcg_target_ulong *rx_data_gen_ptr;
445             size_t chunk_start;
446             int insn = 0;
447 
448             if (tcg_ctx->data_gen_ptr) {
449                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
450                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
451                 data_size = gen_code_size - code_size;
452             } else {
453                 rx_data_gen_ptr = 0;
454                 code_size = gen_code_size;
455                 data_size = 0;
456             }
457 
458             /* Dump header and the first instruction */
459             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
460             fprintf(logfile,
461                     "  -- guest addr 0x%016" PRIx64 " + tb prologue\n",
462                     tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
463             chunk_start = tcg_ctx->gen_insn_end_off[insn];
464             disas(logfile, tb->tc.ptr, chunk_start);
465 
466             /*
467              * Dump each instruction chunk, wrapping up empty chunks into
468              * the next instruction. The whole array is offset so the
469              * first entry is the beginning of the 2nd instruction.
470              */
471             while (insn < tb->icount) {
472                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
473                 if (chunk_end > chunk_start) {
474                     fprintf(logfile, "  -- guest addr 0x%016" PRIx64 "\n",
475                             tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
476                     disas(logfile, tb->tc.ptr + chunk_start,
477                           chunk_end - chunk_start);
478                     chunk_start = chunk_end;
479                 }
480                 insn++;
481             }
482 
483             if (chunk_start < code_size) {
484                 fprintf(logfile, "  -- tb slow paths + alignment\n");
485                 disas(logfile, tb->tc.ptr + chunk_start,
486                       code_size - chunk_start);
487             }
488 
489             /* Finally dump any data we may have after the block */
490             if (data_size) {
491                 int i;
492                 fprintf(logfile, "  data: [size=%d]\n", data_size);
493                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
494                     if (sizeof(tcg_target_ulong) == 8) {
495                         fprintf(logfile,
496                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
497                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
498                     } else if (sizeof(tcg_target_ulong) == 4) {
499                         fprintf(logfile,
500                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
501                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
502                     } else {
503                         qemu_build_not_reached();
504                     }
505                 }
506             }
507             fprintf(logfile, "\n");
508             qemu_log_unlock(logfile);
509         }
510     }
511 
512     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
513         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
514                  CODE_GEN_ALIGN));
515 
516     /* init jump list */
517     qemu_spin_init(&tb->jmp_lock);
518     tb->jmp_list_head = (uintptr_t)NULL;
519     tb->jmp_list_next[0] = (uintptr_t)NULL;
520     tb->jmp_list_next[1] = (uintptr_t)NULL;
521     tb->jmp_dest[0] = (uintptr_t)NULL;
522     tb->jmp_dest[1] = (uintptr_t)NULL;
523 
524     /* init original jump addresses which have been set during tcg_gen_code() */
525     if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
526         tb_reset_jump(tb, 0);
527     }
528     if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
529         tb_reset_jump(tb, 1);
530     }
531 
532     /*
533      * If the TB is not associated with a physical RAM page then it must be
534      * a temporary one-insn TB, and we have nothing left to do. Return early
535      * before attempting to link to other TBs or add to the lookup table.
536      */
537     if (tb_page_addr0(tb) == -1) {
538         assert_no_pages_locked();
539         return tb;
540     }
541 
542     /*
543      * Insert TB into the corresponding region tree before publishing it
544      * through QHT. Otherwise rewinding happened in the TB might fail to
545      * lookup itself using host PC.
546      */
547     tcg_tb_insert(tb);
548 
549     /*
550      * No explicit memory barrier is required -- tb_link_page() makes the
551      * TB visible in a consistent state.
552      */
553     existing_tb = tb_link_page(tb);
554     assert_no_pages_locked();
555 
556     /* if the TB already exists, discard what we just translated */
557     if (unlikely(existing_tb != tb)) {
558         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
559 
560         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
561         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
562         tcg_tb_remove(tb);
563         return existing_tb;
564     }
565     return tb;
566 }
567 
568 /* user-mode: call with mmap_lock held */
569 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
570 {
571     TranslationBlock *tb;
572 
573     assert_memory_lock();
574 
575     tb = tcg_tb_lookup(retaddr);
576     if (tb) {
577         /* We can use retranslation to find the PC.  */
578         cpu_restore_state_from_tb(cpu, tb, retaddr);
579         tb_phys_invalidate(tb, -1);
580     } else {
581         /* The exception probably happened in a helper.  The CPU state should
582            have been saved before calling it. Fetch the PC from there.  */
583         CPUArchState *env = cpu->env_ptr;
584         vaddr pc;
585         uint64_t cs_base;
586         tb_page_addr_t addr;
587         uint32_t flags;
588 
589         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
590         addr = get_page_addr_code(env, pc);
591         if (addr != -1) {
592             tb_invalidate_phys_range(addr, addr);
593         }
594     }
595 }
596 
597 #ifndef CONFIG_USER_ONLY
598 /*
599  * In deterministic execution mode, instructions doing device I/Os
600  * must be at the end of the TB.
601  *
602  * Called by softmmu_template.h, with iothread mutex not held.
603  */
604 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
605 {
606     TranslationBlock *tb;
607     CPUClass *cc;
608     uint32_t n;
609 
610     tb = tcg_tb_lookup(retaddr);
611     if (!tb) {
612         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
613                   (void *)retaddr);
614     }
615     cpu_restore_state_from_tb(cpu, tb, retaddr);
616 
617     /*
618      * Some guests must re-execute the branch when re-executing a delay
619      * slot instruction.  When this is the case, adjust icount and N
620      * to account for the re-execution of the branch.
621      */
622     n = 1;
623     cc = CPU_GET_CLASS(cpu);
624     if (cc->tcg_ops->io_recompile_replay_branch &&
625         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
626         cpu_neg(cpu)->icount_decr.u16.low++;
627         n = 2;
628     }
629 
630     /*
631      * Exit the loop and potentially generate a new TB executing the
632      * just the I/O insns. We also limit instrumentation to memory
633      * operations only (which execute after completion) so we don't
634      * double instrument the instruction.
635      */
636     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
637 
638     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
639         vaddr pc = log_pc(cpu, tb);
640         if (qemu_log_in_addr_range(pc)) {
641             qemu_log("cpu_io_recompile: rewound execution of TB to %016"
642                      VADDR_PRIx "\n", pc);
643         }
644     }
645 
646     cpu_loop_exit_noexc(cpu);
647 }
648 
649 static void print_qht_statistics(struct qht_stats hst, GString *buf)
650 {
651     uint32_t hgram_opts;
652     size_t hgram_bins;
653     char *hgram;
654 
655     if (!hst.head_buckets) {
656         return;
657     }
658     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
659                            "(%0.2f%% head buckets used)\n",
660                            hst.used_head_buckets, hst.head_buckets,
661                            (double)hst.used_head_buckets /
662                            hst.head_buckets * 100);
663 
664     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
665     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
666     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
667         hgram_opts |= QDIST_PR_NODECIMAL;
668     }
669     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
670     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
671                            "Histogram: %s\n",
672                            qdist_avg(&hst.occupancy) * 100, hgram);
673     g_free(hgram);
674 
675     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
676     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
677     if (hgram_bins > 10) {
678         hgram_bins = 10;
679     } else {
680         hgram_bins = 0;
681         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
682     }
683     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
684     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
685                            "Histogram: %s\n",
686                            qdist_avg(&hst.chain), hgram);
687     g_free(hgram);
688 }
689 
690 struct tb_tree_stats {
691     size_t nb_tbs;
692     size_t host_size;
693     size_t target_size;
694     size_t max_target_size;
695     size_t direct_jmp_count;
696     size_t direct_jmp2_count;
697     size_t cross_page;
698 };
699 
700 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
701 {
702     const TranslationBlock *tb = value;
703     struct tb_tree_stats *tst = data;
704 
705     tst->nb_tbs++;
706     tst->host_size += tb->tc.size;
707     tst->target_size += tb->size;
708     if (tb->size > tst->max_target_size) {
709         tst->max_target_size = tb->size;
710     }
711     if (tb_page_addr1(tb) != -1) {
712         tst->cross_page++;
713     }
714     if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
715         tst->direct_jmp_count++;
716         if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
717             tst->direct_jmp2_count++;
718         }
719     }
720     return false;
721 }
722 
723 void dump_exec_info(GString *buf)
724 {
725     struct tb_tree_stats tst = {};
726     struct qht_stats hst;
727     size_t nb_tbs, flush_full, flush_part, flush_elide;
728 
729     tcg_tb_foreach(tb_tree_stats_iter, &tst);
730     nb_tbs = tst.nb_tbs;
731     /* XXX: avoid using doubles ? */
732     g_string_append_printf(buf, "Translation buffer state:\n");
733     /*
734      * Report total code size including the padding and TB structs;
735      * otherwise users might think "-accel tcg,tb-size" is not honoured.
736      * For avg host size we use the precise numbers from tb_tree_stats though.
737      */
738     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
739                            tcg_code_size(), tcg_code_capacity());
740     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
741     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
742                            nb_tbs ? tst.target_size / nb_tbs : 0,
743                            tst.max_target_size);
744     g_string_append_printf(buf, "TB avg host size    %zu bytes "
745                            "(expansion ratio: %0.1f)\n",
746                            nb_tbs ? tst.host_size / nb_tbs : 0,
747                            tst.target_size ?
748                            (double)tst.host_size / tst.target_size : 0);
749     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
750                            tst.cross_page,
751                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
752     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
753                            "(2 jumps=%zu %zu%%)\n",
754                            tst.direct_jmp_count,
755                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
756                            tst.direct_jmp2_count,
757                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
758 
759     qht_statistics_init(&tb_ctx.htable, &hst);
760     print_qht_statistics(hst, buf);
761     qht_statistics_destroy(&hst);
762 
763     g_string_append_printf(buf, "\nStatistics:\n");
764     g_string_append_printf(buf, "TB flush count      %u\n",
765                            qatomic_read(&tb_ctx.tb_flush_count));
766     g_string_append_printf(buf, "TB invalidate count %u\n",
767                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
768 
769     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
770     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
771     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
772     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
773     tcg_dump_info(buf);
774 }
775 
776 #else /* CONFIG_USER_ONLY */
777 
778 void cpu_interrupt(CPUState *cpu, int mask)
779 {
780     g_assert(qemu_mutex_iothread_locked());
781     cpu->interrupt_request |= mask;
782     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
783 }
784 
785 #endif /* CONFIG_USER_ONLY */
786 
787 /*
788  * Called by generic code at e.g. cpu reset after cpu creation,
789  * therefore we must be prepared to allocate the jump cache.
790  */
791 void tcg_flush_jmp_cache(CPUState *cpu)
792 {
793     CPUJumpCache *jc = cpu->tb_jmp_cache;
794 
795     /* During early initialization, the cache may not yet be allocated. */
796     if (unlikely(jc == NULL)) {
797         return;
798     }
799 
800     for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
801         qatomic_set(&jc->array[i].tb, NULL);
802     }
803 }
804 
805 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
806 void tcg_flush_softmmu_tlb(CPUState *cs)
807 {
808 #ifdef CONFIG_SOFTMMU
809     tlb_flush(cs);
810 #endif
811 }
812