xref: /openbmc/qemu/accel/tcg/translate-all.c (revision 6c1e3906)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 
22 #define NO_CPU_IO_DEFS
23 #include "trace.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg.h"
27 #if defined(CONFIG_USER_ONLY)
28 #include "qemu.h"
29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
30 #include <sys/param.h>
31 #if __FreeBSD_version >= 700104
32 #define HAVE_KINFO_GETVMMAP
33 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
34 #include <sys/proc.h>
35 #include <machine/profile.h>
36 #define _KERNEL
37 #include <sys/user.h>
38 #undef _KERNEL
39 #undef sigqueue
40 #include <libutil.h>
41 #endif
42 #endif
43 #else
44 #include "exec/ram_addr.h"
45 #endif
46 
47 #include "exec/cputlb.h"
48 #include "exec/translate-all.h"
49 #include "exec/translator.h"
50 #include "exec/tb-flush.h"
51 #include "qemu/bitmap.h"
52 #include "qemu/qemu-print.h"
53 #include "qemu/main-loop.h"
54 #include "qemu/cacheinfo.h"
55 #include "qemu/timer.h"
56 #include "exec/log.h"
57 #include "sysemu/cpus.h"
58 #include "sysemu/cpu-timers.h"
59 #include "sysemu/tcg.h"
60 #include "qapi/error.h"
61 #include "hw/core/tcg-cpu-ops.h"
62 #include "tb-jmp-cache.h"
63 #include "tb-hash.h"
64 #include "tb-context.h"
65 #include "internal.h"
66 #include "perf.h"
67 
68 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
69 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
70                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
71                   * BITS_PER_BYTE);
72 
73 TBContext tb_ctx;
74 
75 /*
76  * Encode VAL as a signed leb128 sequence at P.
77  * Return P incremented past the encoded value.
78  */
79 static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
80 {
81     int more, byte;
82 
83     do {
84         byte = val & 0x7f;
85         val >>= 7;
86         more = !((val == 0 && (byte & 0x40) == 0)
87                  || (val == -1 && (byte & 0x40) != 0));
88         if (more) {
89             byte |= 0x80;
90         }
91         *p++ = byte;
92     } while (more);
93 
94     return p;
95 }
96 
97 /*
98  * Decode a signed leb128 sequence at *PP; increment *PP past the
99  * decoded value.  Return the decoded value.
100  */
101 static int64_t decode_sleb128(const uint8_t **pp)
102 {
103     const uint8_t *p = *pp;
104     int64_t val = 0;
105     int byte, shift = 0;
106 
107     do {
108         byte = *p++;
109         val |= (int64_t)(byte & 0x7f) << shift;
110         shift += 7;
111     } while (byte & 0x80);
112     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
113         val |= -(int64_t)1 << shift;
114     }
115 
116     *pp = p;
117     return val;
118 }
119 
120 /* Encode the data collected about the instructions while compiling TB.
121    Place the data at BLOCK, and return the number of bytes consumed.
122 
123    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
124    which come from the target's insn_start data, followed by a uintptr_t
125    which comes from the host pc of the end of the code implementing the insn.
126 
127    Each line of the table is encoded as sleb128 deltas from the previous
128    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
129    That is, the first column is seeded with the guest pc, the last column
130    with the host pc, and the middle columns with zeros.  */
131 
132 static int encode_search(TranslationBlock *tb, uint8_t *block)
133 {
134     uint8_t *highwater = tcg_ctx->code_gen_highwater;
135     uint8_t *p = block;
136     int i, j, n;
137 
138     for (i = 0, n = tb->icount; i < n; ++i) {
139         uint64_t prev;
140 
141         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
142             if (i == 0) {
143                 prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
144             } else {
145                 prev = tcg_ctx->gen_insn_data[i - 1][j];
146             }
147             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
148         }
149         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
150         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
151 
152         /* Test for (pending) buffer overflow.  The assumption is that any
153            one row beginning below the high water mark cannot overrun
154            the buffer completely.  Thus we can test for overflow after
155            encoding a row without having to check during encoding.  */
156         if (unlikely(p > highwater)) {
157             return -1;
158         }
159     }
160 
161     return p - block;
162 }
163 
164 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
165                                    uint64_t *data)
166 {
167     uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
168     const uint8_t *p = tb->tc.ptr + tb->tc.size;
169     int i, j, num_insns = tb->icount;
170 
171     host_pc -= GETPC_ADJ;
172 
173     if (host_pc < iter_pc) {
174         return -1;
175     }
176 
177     memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
178     if (!(tb_cflags(tb) & CF_PCREL)) {
179         data[0] = tb->pc;
180     }
181 
182     /*
183      * Reconstruct the stored insn data while looking for the point
184      * at which the end of the insn exceeds host_pc.
185      */
186     for (i = 0; i < num_insns; ++i) {
187         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
188             data[j] += decode_sleb128(&p);
189         }
190         iter_pc += decode_sleb128(&p);
191         if (iter_pc > host_pc) {
192             return num_insns - i;
193         }
194     }
195     return -1;
196 }
197 
198 /*
199  * The cpu state corresponding to 'host_pc' is restored in
200  * preparation for exiting the TB.
201  */
202 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
203                                uintptr_t host_pc)
204 {
205     uint64_t data[TARGET_INSN_START_WORDS];
206 #ifdef CONFIG_PROFILER
207     TCGProfile *prof = &tcg_ctx->prof;
208     int64_t ti = profile_getclock();
209 #endif
210     int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
211 
212     if (insns_left < 0) {
213         return;
214     }
215 
216     if (tb_cflags(tb) & CF_USE_ICOUNT) {
217         assert(icount_enabled());
218         /*
219          * Reset the cycle counter to the start of the block and
220          * shift if to the number of actually executed instructions.
221          */
222         cpu_neg(cpu)->icount_decr.u16.low += insns_left;
223     }
224 
225     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
226 
227 #ifdef CONFIG_PROFILER
228     qatomic_set(&prof->restore_time,
229                 prof->restore_time + profile_getclock() - ti);
230     qatomic_set(&prof->restore_count, prof->restore_count + 1);
231 #endif
232 }
233 
234 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
235 {
236     /*
237      * The host_pc has to be in the rx region of the code buffer.
238      * If it is not we will not be able to resolve it here.
239      * The two cases where host_pc will not be correct are:
240      *
241      *  - fault during translation (instruction fetch)
242      *  - fault from helper (not using GETPC() macro)
243      *
244      * Either way we need return early as we can't resolve it here.
245      */
246     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
247         TranslationBlock *tb = tcg_tb_lookup(host_pc);
248         if (tb) {
249             cpu_restore_state_from_tb(cpu, tb, host_pc);
250             return true;
251         }
252     }
253     return false;
254 }
255 
256 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
257 {
258     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
259         TranslationBlock *tb = tcg_tb_lookup(host_pc);
260         if (tb) {
261             return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
262         }
263     }
264     return false;
265 }
266 
267 void page_init(void)
268 {
269     page_size_init();
270     page_table_config_init();
271 }
272 
273 /*
274  * Isolate the portion of code gen which can setjmp/longjmp.
275  * Return the size of the generated code, or negative on error.
276  */
277 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
278                            target_ulong pc, void *host_pc,
279                            int *max_insns, int64_t *ti)
280 {
281     int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
282     if (unlikely(ret != 0)) {
283         return ret;
284     }
285 
286     tcg_func_start(tcg_ctx);
287 
288     tcg_ctx->cpu = env_cpu(env);
289     gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
290     assert(tb->size != 0);
291     tcg_ctx->cpu = NULL;
292     *max_insns = tb->icount;
293 
294 #ifdef CONFIG_PROFILER
295     qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
296     qatomic_set(&tcg_ctx->prof.interm_time,
297                 tcg_ctx->prof.interm_time + profile_getclock() - *ti);
298     *ti = profile_getclock();
299 #endif
300 
301     return tcg_gen_code(tcg_ctx, tb, pc);
302 }
303 
304 /* Called with mmap_lock held for user mode emulation.  */
305 TranslationBlock *tb_gen_code(CPUState *cpu,
306                               target_ulong pc, target_ulong cs_base,
307                               uint32_t flags, int cflags)
308 {
309     CPUArchState *env = cpu->env_ptr;
310     TranslationBlock *tb, *existing_tb;
311     tb_page_addr_t phys_pc;
312     tcg_insn_unit *gen_code_buf;
313     int gen_code_size, search_size, max_insns;
314 #ifdef CONFIG_PROFILER
315     TCGProfile *prof = &tcg_ctx->prof;
316 #endif
317     int64_t ti;
318     void *host_pc;
319 
320     assert_memory_lock();
321     qemu_thread_jit_write();
322 
323     phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
324 
325     if (phys_pc == -1) {
326         /* Generate a one-shot TB with 1 insn in it */
327         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
328     }
329 
330     max_insns = cflags & CF_COUNT_MASK;
331     if (max_insns == 0) {
332         max_insns = TCG_MAX_INSNS;
333     }
334     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
335 
336  buffer_overflow:
337     tb = tcg_tb_alloc(tcg_ctx);
338     if (unlikely(!tb)) {
339         /* flush must be done */
340         tb_flush(cpu);
341         mmap_unlock();
342         /* Make the execution loop process the flush as soon as possible.  */
343         cpu->exception_index = EXCP_INTERRUPT;
344         cpu_loop_exit(cpu);
345     }
346 
347     gen_code_buf = tcg_ctx->code_gen_ptr;
348     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
349     if (!(cflags & CF_PCREL)) {
350         tb->pc = pc;
351     }
352     tb->cs_base = cs_base;
353     tb->flags = flags;
354     tb->cflags = cflags;
355     tb->trace_vcpu_dstate = *cpu->trace_dstate;
356     tb_set_page_addr0(tb, phys_pc);
357     tb_set_page_addr1(tb, -1);
358     tcg_ctx->gen_tb = tb;
359     tcg_ctx->addr_type = TCG_TYPE_TL;
360 #ifdef CONFIG_SOFTMMU
361     tcg_ctx->page_bits = TARGET_PAGE_BITS;
362     tcg_ctx->page_mask = TARGET_PAGE_MASK;
363     tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
364 #endif
365 
366  tb_overflow:
367 
368 #ifdef CONFIG_PROFILER
369     /* includes aborted translations because of exceptions */
370     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
371     ti = profile_getclock();
372 #endif
373 
374     trace_translate_block(tb, pc, tb->tc.ptr);
375 
376     gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
377     if (unlikely(gen_code_size < 0)) {
378         switch (gen_code_size) {
379         case -1:
380             /*
381              * Overflow of code_gen_buffer, or the current slice of it.
382              *
383              * TODO: We don't need to re-do gen_intermediate_code, nor
384              * should we re-do the tcg optimization currently hidden
385              * inside tcg_gen_code.  All that should be required is to
386              * flush the TBs, allocate a new TB, re-initialize it per
387              * above, and re-do the actual code generation.
388              */
389             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
390                           "Restarting code generation for "
391                           "code_gen_buffer overflow\n");
392             goto buffer_overflow;
393 
394         case -2:
395             /*
396              * The code generated for the TranslationBlock is too large.
397              * The maximum size allowed by the unwind info is 64k.
398              * There may be stricter constraints from relocations
399              * in the tcg backend.
400              *
401              * Try again with half as many insns as we attempted this time.
402              * If a single insn overflows, there's a bug somewhere...
403              */
404             assert(max_insns > 1);
405             max_insns /= 2;
406             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
407                           "Restarting code generation with "
408                           "smaller translation block (max %d insns)\n",
409                           max_insns);
410             goto tb_overflow;
411 
412         default:
413             g_assert_not_reached();
414         }
415     }
416     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
417     if (unlikely(search_size < 0)) {
418         goto buffer_overflow;
419     }
420     tb->tc.size = gen_code_size;
421 
422     /*
423      * For CF_PCREL, attribute all executions of the generated code
424      * to its first mapping.
425      */
426     perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
427 
428 #ifdef CONFIG_PROFILER
429     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
430     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
431     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
432     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
433 #endif
434 
435 #ifdef DEBUG_DISAS
436     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
437         qemu_log_in_addr_range(pc)) {
438         FILE *logfile = qemu_log_trylock();
439         if (logfile) {
440             int code_size, data_size;
441             const tcg_target_ulong *rx_data_gen_ptr;
442             size_t chunk_start;
443             int insn = 0;
444 
445             if (tcg_ctx->data_gen_ptr) {
446                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
447                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
448                 data_size = gen_code_size - code_size;
449             } else {
450                 rx_data_gen_ptr = 0;
451                 code_size = gen_code_size;
452                 data_size = 0;
453             }
454 
455             /* Dump header and the first instruction */
456             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
457             fprintf(logfile,
458                     "  -- guest addr 0x%016" PRIx64 " + tb prologue\n",
459                     tcg_ctx->gen_insn_data[insn][0]);
460             chunk_start = tcg_ctx->gen_insn_end_off[insn];
461             disas(logfile, tb->tc.ptr, chunk_start);
462 
463             /*
464              * Dump each instruction chunk, wrapping up empty chunks into
465              * the next instruction. The whole array is offset so the
466              * first entry is the beginning of the 2nd instruction.
467              */
468             while (insn < tb->icount) {
469                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
470                 if (chunk_end > chunk_start) {
471                     fprintf(logfile, "  -- guest addr 0x%016" PRIx64 "\n",
472                             tcg_ctx->gen_insn_data[insn][0]);
473                     disas(logfile, tb->tc.ptr + chunk_start,
474                           chunk_end - chunk_start);
475                     chunk_start = chunk_end;
476                 }
477                 insn++;
478             }
479 
480             if (chunk_start < code_size) {
481                 fprintf(logfile, "  -- tb slow paths + alignment\n");
482                 disas(logfile, tb->tc.ptr + chunk_start,
483                       code_size - chunk_start);
484             }
485 
486             /* Finally dump any data we may have after the block */
487             if (data_size) {
488                 int i;
489                 fprintf(logfile, "  data: [size=%d]\n", data_size);
490                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
491                     if (sizeof(tcg_target_ulong) == 8) {
492                         fprintf(logfile,
493                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
494                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
495                     } else if (sizeof(tcg_target_ulong) == 4) {
496                         fprintf(logfile,
497                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
498                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
499                     } else {
500                         qemu_build_not_reached();
501                     }
502                 }
503             }
504             fprintf(logfile, "\n");
505             qemu_log_unlock(logfile);
506         }
507     }
508 #endif
509 
510     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
511         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
512                  CODE_GEN_ALIGN));
513 
514     /* init jump list */
515     qemu_spin_init(&tb->jmp_lock);
516     tb->jmp_list_head = (uintptr_t)NULL;
517     tb->jmp_list_next[0] = (uintptr_t)NULL;
518     tb->jmp_list_next[1] = (uintptr_t)NULL;
519     tb->jmp_dest[0] = (uintptr_t)NULL;
520     tb->jmp_dest[1] = (uintptr_t)NULL;
521 
522     /* init original jump addresses which have been set during tcg_gen_code() */
523     if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
524         tb_reset_jump(tb, 0);
525     }
526     if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
527         tb_reset_jump(tb, 1);
528     }
529 
530     /*
531      * If the TB is not associated with a physical RAM page then it must be
532      * a temporary one-insn TB, and we have nothing left to do. Return early
533      * before attempting to link to other TBs or add to the lookup table.
534      */
535     if (tb_page_addr0(tb) == -1) {
536         return tb;
537     }
538 
539     /*
540      * Insert TB into the corresponding region tree before publishing it
541      * through QHT. Otherwise rewinding happened in the TB might fail to
542      * lookup itself using host PC.
543      */
544     tcg_tb_insert(tb);
545 
546     /*
547      * No explicit memory barrier is required -- tb_link_page() makes the
548      * TB visible in a consistent state.
549      */
550     existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
551     /* if the TB already exists, discard what we just translated */
552     if (unlikely(existing_tb != tb)) {
553         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
554 
555         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
556         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
557         tcg_tb_remove(tb);
558         return existing_tb;
559     }
560     return tb;
561 }
562 
563 /* user-mode: call with mmap_lock held */
564 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
565 {
566     TranslationBlock *tb;
567 
568     assert_memory_lock();
569 
570     tb = tcg_tb_lookup(retaddr);
571     if (tb) {
572         /* We can use retranslation to find the PC.  */
573         cpu_restore_state_from_tb(cpu, tb, retaddr);
574         tb_phys_invalidate(tb, -1);
575     } else {
576         /* The exception probably happened in a helper.  The CPU state should
577            have been saved before calling it. Fetch the PC from there.  */
578         CPUArchState *env = cpu->env_ptr;
579         target_ulong pc, cs_base;
580         tb_page_addr_t addr;
581         uint32_t flags;
582 
583         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
584         addr = get_page_addr_code(env, pc);
585         if (addr != -1) {
586             tb_invalidate_phys_range(addr, addr);
587         }
588     }
589 }
590 
591 #ifndef CONFIG_USER_ONLY
592 /*
593  * In deterministic execution mode, instructions doing device I/Os
594  * must be at the end of the TB.
595  *
596  * Called by softmmu_template.h, with iothread mutex not held.
597  */
598 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
599 {
600     TranslationBlock *tb;
601     CPUClass *cc;
602     uint32_t n;
603 
604     tb = tcg_tb_lookup(retaddr);
605     if (!tb) {
606         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
607                   (void *)retaddr);
608     }
609     cpu_restore_state_from_tb(cpu, tb, retaddr);
610 
611     /*
612      * Some guests must re-execute the branch when re-executing a delay
613      * slot instruction.  When this is the case, adjust icount and N
614      * to account for the re-execution of the branch.
615      */
616     n = 1;
617     cc = CPU_GET_CLASS(cpu);
618     if (cc->tcg_ops->io_recompile_replay_branch &&
619         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
620         cpu_neg(cpu)->icount_decr.u16.low++;
621         n = 2;
622     }
623 
624     /*
625      * Exit the loop and potentially generate a new TB executing the
626      * just the I/O insns. We also limit instrumentation to memory
627      * operations only (which execute after completion) so we don't
628      * double instrument the instruction.
629      */
630     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
631 
632     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
633         target_ulong pc = log_pc(cpu, tb);
634         if (qemu_log_in_addr_range(pc)) {
635             qemu_log("cpu_io_recompile: rewound execution of TB to "
636                      TARGET_FMT_lx "\n", pc);
637         }
638     }
639 
640     cpu_loop_exit_noexc(cpu);
641 }
642 
643 static void print_qht_statistics(struct qht_stats hst, GString *buf)
644 {
645     uint32_t hgram_opts;
646     size_t hgram_bins;
647     char *hgram;
648 
649     if (!hst.head_buckets) {
650         return;
651     }
652     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
653                            "(%0.2f%% head buckets used)\n",
654                            hst.used_head_buckets, hst.head_buckets,
655                            (double)hst.used_head_buckets /
656                            hst.head_buckets * 100);
657 
658     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
659     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
660     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
661         hgram_opts |= QDIST_PR_NODECIMAL;
662     }
663     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
664     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
665                            "Histogram: %s\n",
666                            qdist_avg(&hst.occupancy) * 100, hgram);
667     g_free(hgram);
668 
669     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
670     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
671     if (hgram_bins > 10) {
672         hgram_bins = 10;
673     } else {
674         hgram_bins = 0;
675         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
676     }
677     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
678     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
679                            "Histogram: %s\n",
680                            qdist_avg(&hst.chain), hgram);
681     g_free(hgram);
682 }
683 
684 struct tb_tree_stats {
685     size_t nb_tbs;
686     size_t host_size;
687     size_t target_size;
688     size_t max_target_size;
689     size_t direct_jmp_count;
690     size_t direct_jmp2_count;
691     size_t cross_page;
692 };
693 
694 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
695 {
696     const TranslationBlock *tb = value;
697     struct tb_tree_stats *tst = data;
698 
699     tst->nb_tbs++;
700     tst->host_size += tb->tc.size;
701     tst->target_size += tb->size;
702     if (tb->size > tst->max_target_size) {
703         tst->max_target_size = tb->size;
704     }
705     if (tb_page_addr1(tb) != -1) {
706         tst->cross_page++;
707     }
708     if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
709         tst->direct_jmp_count++;
710         if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
711             tst->direct_jmp2_count++;
712         }
713     }
714     return false;
715 }
716 
717 void dump_exec_info(GString *buf)
718 {
719     struct tb_tree_stats tst = {};
720     struct qht_stats hst;
721     size_t nb_tbs, flush_full, flush_part, flush_elide;
722 
723     tcg_tb_foreach(tb_tree_stats_iter, &tst);
724     nb_tbs = tst.nb_tbs;
725     /* XXX: avoid using doubles ? */
726     g_string_append_printf(buf, "Translation buffer state:\n");
727     /*
728      * Report total code size including the padding and TB structs;
729      * otherwise users might think "-accel tcg,tb-size" is not honoured.
730      * For avg host size we use the precise numbers from tb_tree_stats though.
731      */
732     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
733                            tcg_code_size(), tcg_code_capacity());
734     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
735     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
736                            nb_tbs ? tst.target_size / nb_tbs : 0,
737                            tst.max_target_size);
738     g_string_append_printf(buf, "TB avg host size    %zu bytes "
739                            "(expansion ratio: %0.1f)\n",
740                            nb_tbs ? tst.host_size / nb_tbs : 0,
741                            tst.target_size ?
742                            (double)tst.host_size / tst.target_size : 0);
743     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
744                            tst.cross_page,
745                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
746     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
747                            "(2 jumps=%zu %zu%%)\n",
748                            tst.direct_jmp_count,
749                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
750                            tst.direct_jmp2_count,
751                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
752 
753     qht_statistics_init(&tb_ctx.htable, &hst);
754     print_qht_statistics(hst, buf);
755     qht_statistics_destroy(&hst);
756 
757     g_string_append_printf(buf, "\nStatistics:\n");
758     g_string_append_printf(buf, "TB flush count      %u\n",
759                            qatomic_read(&tb_ctx.tb_flush_count));
760     g_string_append_printf(buf, "TB invalidate count %u\n",
761                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
762 
763     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
764     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
765     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
766     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
767     tcg_dump_info(buf);
768 }
769 
770 #else /* CONFIG_USER_ONLY */
771 
772 void cpu_interrupt(CPUState *cpu, int mask)
773 {
774     g_assert(qemu_mutex_iothread_locked());
775     cpu->interrupt_request |= mask;
776     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
777 }
778 
779 #endif /* CONFIG_USER_ONLY */
780 
781 /*
782  * Called by generic code at e.g. cpu reset after cpu creation,
783  * therefore we must be prepared to allocate the jump cache.
784  */
785 void tcg_flush_jmp_cache(CPUState *cpu)
786 {
787     CPUJumpCache *jc = cpu->tb_jmp_cache;
788 
789     /* During early initialization, the cache may not yet be allocated. */
790     if (unlikely(jc == NULL)) {
791         return;
792     }
793 
794     for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
795         qatomic_set(&jc->array[i].tb, NULL);
796     }
797 }
798 
799 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
800 void tcg_flush_softmmu_tlb(CPUState *cs)
801 {
802 #ifdef CONFIG_SOFTMMU
803     tlb_flush(cs);
804 #endif
805 }
806