xref: /openbmc/qemu/accel/tcg/translate-all.c (revision 89aafcf2)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 
22 #define NO_CPU_IO_DEFS
23 #include "trace.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg.h"
27 #if defined(CONFIG_USER_ONLY)
28 #include "qemu.h"
29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
30 #include <sys/param.h>
31 #if __FreeBSD_version >= 700104
32 #define HAVE_KINFO_GETVMMAP
33 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
34 #include <sys/proc.h>
35 #include <machine/profile.h>
36 #define _KERNEL
37 #include <sys/user.h>
38 #undef _KERNEL
39 #undef sigqueue
40 #include <libutil.h>
41 #endif
42 #endif
43 #else
44 #include "exec/ram_addr.h"
45 #endif
46 
47 #include "exec/cputlb.h"
48 #include "exec/translate-all.h"
49 #include "exec/translator.h"
50 #include "exec/tb-flush.h"
51 #include "qemu/bitmap.h"
52 #include "qemu/qemu-print.h"
53 #include "qemu/main-loop.h"
54 #include "qemu/cacheinfo.h"
55 #include "qemu/timer.h"
56 #include "exec/log.h"
57 #include "sysemu/cpus.h"
58 #include "sysemu/cpu-timers.h"
59 #include "sysemu/tcg.h"
60 #include "qapi/error.h"
61 #include "hw/core/tcg-cpu-ops.h"
62 #include "tb-jmp-cache.h"
63 #include "tb-hash.h"
64 #include "tb-context.h"
65 #include "internal.h"
66 #include "perf.h"
67 
68 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
69 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
70                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
71                   * BITS_PER_BYTE);
72 
73 TBContext tb_ctx;
74 
75 /*
76  * Encode VAL as a signed leb128 sequence at P.
77  * Return P incremented past the encoded value.
78  */
79 static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
80 {
81     int more, byte;
82 
83     do {
84         byte = val & 0x7f;
85         val >>= 7;
86         more = !((val == 0 && (byte & 0x40) == 0)
87                  || (val == -1 && (byte & 0x40) != 0));
88         if (more) {
89             byte |= 0x80;
90         }
91         *p++ = byte;
92     } while (more);
93 
94     return p;
95 }
96 
97 /*
98  * Decode a signed leb128 sequence at *PP; increment *PP past the
99  * decoded value.  Return the decoded value.
100  */
101 static int64_t decode_sleb128(const uint8_t **pp)
102 {
103     const uint8_t *p = *pp;
104     int64_t val = 0;
105     int byte, shift = 0;
106 
107     do {
108         byte = *p++;
109         val |= (int64_t)(byte & 0x7f) << shift;
110         shift += 7;
111     } while (byte & 0x80);
112     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
113         val |= -(int64_t)1 << shift;
114     }
115 
116     *pp = p;
117     return val;
118 }
119 
120 /* Encode the data collected about the instructions while compiling TB.
121    Place the data at BLOCK, and return the number of bytes consumed.
122 
123    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
124    which come from the target's insn_start data, followed by a uintptr_t
125    which comes from the host pc of the end of the code implementing the insn.
126 
127    Each line of the table is encoded as sleb128 deltas from the previous
128    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
129    That is, the first column is seeded with the guest pc, the last column
130    with the host pc, and the middle columns with zeros.  */
131 
132 static int encode_search(TranslationBlock *tb, uint8_t *block)
133 {
134     uint8_t *highwater = tcg_ctx->code_gen_highwater;
135     uint8_t *p = block;
136     int i, j, n;
137 
138     for (i = 0, n = tb->icount; i < n; ++i) {
139         uint64_t prev;
140 
141         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
142             if (i == 0) {
143                 prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
144             } else {
145                 prev = tcg_ctx->gen_insn_data[i - 1][j];
146             }
147             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
148         }
149         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
150         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
151 
152         /* Test for (pending) buffer overflow.  The assumption is that any
153            one row beginning below the high water mark cannot overrun
154            the buffer completely.  Thus we can test for overflow after
155            encoding a row without having to check during encoding.  */
156         if (unlikely(p > highwater)) {
157             return -1;
158         }
159     }
160 
161     return p - block;
162 }
163 
164 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
165                                    uint64_t *data)
166 {
167     uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
168     const uint8_t *p = tb->tc.ptr + tb->tc.size;
169     int i, j, num_insns = tb->icount;
170 
171     host_pc -= GETPC_ADJ;
172 
173     if (host_pc < iter_pc) {
174         return -1;
175     }
176 
177     memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
178     if (!(tb_cflags(tb) & CF_PCREL)) {
179         data[0] = tb->pc;
180     }
181 
182     /*
183      * Reconstruct the stored insn data while looking for the point
184      * at which the end of the insn exceeds host_pc.
185      */
186     for (i = 0; i < num_insns; ++i) {
187         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
188             data[j] += decode_sleb128(&p);
189         }
190         iter_pc += decode_sleb128(&p);
191         if (iter_pc > host_pc) {
192             return num_insns - i;
193         }
194     }
195     return -1;
196 }
197 
198 /*
199  * The cpu state corresponding to 'host_pc' is restored in
200  * preparation for exiting the TB.
201  */
202 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
203                                uintptr_t host_pc)
204 {
205     uint64_t data[TARGET_INSN_START_WORDS];
206 #ifdef CONFIG_PROFILER
207     TCGProfile *prof = &tcg_ctx->prof;
208     int64_t ti = profile_getclock();
209 #endif
210     int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
211 
212     if (insns_left < 0) {
213         return;
214     }
215 
216     if (tb_cflags(tb) & CF_USE_ICOUNT) {
217         assert(icount_enabled());
218         /*
219          * Reset the cycle counter to the start of the block and
220          * shift if to the number of actually executed instructions.
221          */
222         cpu_neg(cpu)->icount_decr.u16.low += insns_left;
223     }
224 
225     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
226 
227 #ifdef CONFIG_PROFILER
228     qatomic_set(&prof->restore_time,
229                 prof->restore_time + profile_getclock() - ti);
230     qatomic_set(&prof->restore_count, prof->restore_count + 1);
231 #endif
232 }
233 
234 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
235 {
236     /*
237      * The host_pc has to be in the rx region of the code buffer.
238      * If it is not we will not be able to resolve it here.
239      * The two cases where host_pc will not be correct are:
240      *
241      *  - fault during translation (instruction fetch)
242      *  - fault from helper (not using GETPC() macro)
243      *
244      * Either way we need return early as we can't resolve it here.
245      */
246     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
247         TranslationBlock *tb = tcg_tb_lookup(host_pc);
248         if (tb) {
249             cpu_restore_state_from_tb(cpu, tb, host_pc);
250             return true;
251         }
252     }
253     return false;
254 }
255 
256 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
257 {
258     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
259         TranslationBlock *tb = tcg_tb_lookup(host_pc);
260         if (tb) {
261             return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
262         }
263     }
264     return false;
265 }
266 
267 void page_init(void)
268 {
269     page_size_init();
270     page_table_config_init();
271 }
272 
273 /*
274  * Isolate the portion of code gen which can setjmp/longjmp.
275  * Return the size of the generated code, or negative on error.
276  */
277 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
278                            target_ulong pc, void *host_pc,
279                            int *max_insns, int64_t *ti)
280 {
281     int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
282     if (unlikely(ret != 0)) {
283         return ret;
284     }
285 
286     tcg_func_start(tcg_ctx);
287 
288     tcg_ctx->cpu = env_cpu(env);
289     gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
290     assert(tb->size != 0);
291     tcg_ctx->cpu = NULL;
292     *max_insns = tb->icount;
293 
294 #ifdef CONFIG_PROFILER
295     qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
296     qatomic_set(&tcg_ctx->prof.interm_time,
297                 tcg_ctx->prof.interm_time + profile_getclock() - *ti);
298     *ti = profile_getclock();
299 #endif
300 
301     return tcg_gen_code(tcg_ctx, tb, pc);
302 }
303 
304 /* Called with mmap_lock held for user mode emulation.  */
305 TranslationBlock *tb_gen_code(CPUState *cpu,
306                               target_ulong pc, target_ulong cs_base,
307                               uint32_t flags, int cflags)
308 {
309     CPUArchState *env = cpu->env_ptr;
310     TranslationBlock *tb, *existing_tb;
311     tb_page_addr_t phys_pc;
312     tcg_insn_unit *gen_code_buf;
313     int gen_code_size, search_size, max_insns;
314 #ifdef CONFIG_PROFILER
315     TCGProfile *prof = &tcg_ctx->prof;
316 #endif
317     int64_t ti;
318     void *host_pc;
319 
320     assert_memory_lock();
321     qemu_thread_jit_write();
322 
323     phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
324 
325     if (phys_pc == -1) {
326         /* Generate a one-shot TB with 1 insn in it */
327         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
328     }
329 
330     max_insns = cflags & CF_COUNT_MASK;
331     if (max_insns == 0) {
332         max_insns = TCG_MAX_INSNS;
333     }
334     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
335 
336  buffer_overflow:
337     tb = tcg_tb_alloc(tcg_ctx);
338     if (unlikely(!tb)) {
339         /* flush must be done */
340         tb_flush(cpu);
341         mmap_unlock();
342         /* Make the execution loop process the flush as soon as possible.  */
343         cpu->exception_index = EXCP_INTERRUPT;
344         cpu_loop_exit(cpu);
345     }
346 
347     gen_code_buf = tcg_ctx->code_gen_ptr;
348     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
349     if (!(cflags & CF_PCREL)) {
350         tb->pc = pc;
351     }
352     tb->cs_base = cs_base;
353     tb->flags = flags;
354     tb->cflags = cflags;
355     tb->trace_vcpu_dstate = *cpu->trace_dstate;
356     tb_set_page_addr0(tb, phys_pc);
357     tb_set_page_addr1(tb, -1);
358     tcg_ctx->gen_tb = tb;
359     tcg_ctx->addr_type = TCG_TYPE_TL;
360 #ifdef CONFIG_SOFTMMU
361     tcg_ctx->page_bits = TARGET_PAGE_BITS;
362     tcg_ctx->page_mask = TARGET_PAGE_MASK;
363     tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
364 #endif
365 
366  tb_overflow:
367 
368 #ifdef CONFIG_PROFILER
369     /* includes aborted translations because of exceptions */
370     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
371     ti = profile_getclock();
372 #endif
373 
374     trace_translate_block(tb, pc, tb->tc.ptr);
375 
376     gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
377     if (unlikely(gen_code_size < 0)) {
378         switch (gen_code_size) {
379         case -1:
380             /*
381              * Overflow of code_gen_buffer, or the current slice of it.
382              *
383              * TODO: We don't need to re-do gen_intermediate_code, nor
384              * should we re-do the tcg optimization currently hidden
385              * inside tcg_gen_code.  All that should be required is to
386              * flush the TBs, allocate a new TB, re-initialize it per
387              * above, and re-do the actual code generation.
388              */
389             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
390                           "Restarting code generation for "
391                           "code_gen_buffer overflow\n");
392             goto buffer_overflow;
393 
394         case -2:
395             /*
396              * The code generated for the TranslationBlock is too large.
397              * The maximum size allowed by the unwind info is 64k.
398              * There may be stricter constraints from relocations
399              * in the tcg backend.
400              *
401              * Try again with half as many insns as we attempted this time.
402              * If a single insn overflows, there's a bug somewhere...
403              */
404             assert(max_insns > 1);
405             max_insns /= 2;
406             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
407                           "Restarting code generation with "
408                           "smaller translation block (max %d insns)\n",
409                           max_insns);
410             goto tb_overflow;
411 
412         default:
413             g_assert_not_reached();
414         }
415     }
416     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
417     if (unlikely(search_size < 0)) {
418         goto buffer_overflow;
419     }
420     tb->tc.size = gen_code_size;
421 
422     /*
423      * For CF_PCREL, attribute all executions of the generated code
424      * to its first mapping.
425      */
426     perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
427 
428 #ifdef CONFIG_PROFILER
429     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
430     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
431     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
432     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
433 #endif
434 
435     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
436         qemu_log_in_addr_range(pc)) {
437         FILE *logfile = qemu_log_trylock();
438         if (logfile) {
439             int code_size, data_size;
440             const tcg_target_ulong *rx_data_gen_ptr;
441             size_t chunk_start;
442             int insn = 0;
443 
444             if (tcg_ctx->data_gen_ptr) {
445                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
446                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
447                 data_size = gen_code_size - code_size;
448             } else {
449                 rx_data_gen_ptr = 0;
450                 code_size = gen_code_size;
451                 data_size = 0;
452             }
453 
454             /* Dump header and the first instruction */
455             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
456             fprintf(logfile,
457                     "  -- guest addr 0x%016" PRIx64 " + tb prologue\n",
458                     tcg_ctx->gen_insn_data[insn][0]);
459             chunk_start = tcg_ctx->gen_insn_end_off[insn];
460             disas(logfile, tb->tc.ptr, chunk_start);
461 
462             /*
463              * Dump each instruction chunk, wrapping up empty chunks into
464              * the next instruction. The whole array is offset so the
465              * first entry is the beginning of the 2nd instruction.
466              */
467             while (insn < tb->icount) {
468                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
469                 if (chunk_end > chunk_start) {
470                     fprintf(logfile, "  -- guest addr 0x%016" PRIx64 "\n",
471                             tcg_ctx->gen_insn_data[insn][0]);
472                     disas(logfile, tb->tc.ptr + chunk_start,
473                           chunk_end - chunk_start);
474                     chunk_start = chunk_end;
475                 }
476                 insn++;
477             }
478 
479             if (chunk_start < code_size) {
480                 fprintf(logfile, "  -- tb slow paths + alignment\n");
481                 disas(logfile, tb->tc.ptr + chunk_start,
482                       code_size - chunk_start);
483             }
484 
485             /* Finally dump any data we may have after the block */
486             if (data_size) {
487                 int i;
488                 fprintf(logfile, "  data: [size=%d]\n", data_size);
489                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
490                     if (sizeof(tcg_target_ulong) == 8) {
491                         fprintf(logfile,
492                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
493                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
494                     } else if (sizeof(tcg_target_ulong) == 4) {
495                         fprintf(logfile,
496                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
497                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
498                     } else {
499                         qemu_build_not_reached();
500                     }
501                 }
502             }
503             fprintf(logfile, "\n");
504             qemu_log_unlock(logfile);
505         }
506     }
507 
508     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
509         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
510                  CODE_GEN_ALIGN));
511 
512     /* init jump list */
513     qemu_spin_init(&tb->jmp_lock);
514     tb->jmp_list_head = (uintptr_t)NULL;
515     tb->jmp_list_next[0] = (uintptr_t)NULL;
516     tb->jmp_list_next[1] = (uintptr_t)NULL;
517     tb->jmp_dest[0] = (uintptr_t)NULL;
518     tb->jmp_dest[1] = (uintptr_t)NULL;
519 
520     /* init original jump addresses which have been set during tcg_gen_code() */
521     if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
522         tb_reset_jump(tb, 0);
523     }
524     if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
525         tb_reset_jump(tb, 1);
526     }
527 
528     /*
529      * If the TB is not associated with a physical RAM page then it must be
530      * a temporary one-insn TB, and we have nothing left to do. Return early
531      * before attempting to link to other TBs or add to the lookup table.
532      */
533     if (tb_page_addr0(tb) == -1) {
534         return tb;
535     }
536 
537     /*
538      * Insert TB into the corresponding region tree before publishing it
539      * through QHT. Otherwise rewinding happened in the TB might fail to
540      * lookup itself using host PC.
541      */
542     tcg_tb_insert(tb);
543 
544     /*
545      * No explicit memory barrier is required -- tb_link_page() makes the
546      * TB visible in a consistent state.
547      */
548     existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
549     /* if the TB already exists, discard what we just translated */
550     if (unlikely(existing_tb != tb)) {
551         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
552 
553         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
554         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
555         tcg_tb_remove(tb);
556         return existing_tb;
557     }
558     return tb;
559 }
560 
561 /* user-mode: call with mmap_lock held */
562 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
563 {
564     TranslationBlock *tb;
565 
566     assert_memory_lock();
567 
568     tb = tcg_tb_lookup(retaddr);
569     if (tb) {
570         /* We can use retranslation to find the PC.  */
571         cpu_restore_state_from_tb(cpu, tb, retaddr);
572         tb_phys_invalidate(tb, -1);
573     } else {
574         /* The exception probably happened in a helper.  The CPU state should
575            have been saved before calling it. Fetch the PC from there.  */
576         CPUArchState *env = cpu->env_ptr;
577         target_ulong pc, cs_base;
578         tb_page_addr_t addr;
579         uint32_t flags;
580 
581         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
582         addr = get_page_addr_code(env, pc);
583         if (addr != -1) {
584             tb_invalidate_phys_range(addr, addr);
585         }
586     }
587 }
588 
589 #ifndef CONFIG_USER_ONLY
590 /*
591  * In deterministic execution mode, instructions doing device I/Os
592  * must be at the end of the TB.
593  *
594  * Called by softmmu_template.h, with iothread mutex not held.
595  */
596 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
597 {
598     TranslationBlock *tb;
599     CPUClass *cc;
600     uint32_t n;
601 
602     tb = tcg_tb_lookup(retaddr);
603     if (!tb) {
604         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
605                   (void *)retaddr);
606     }
607     cpu_restore_state_from_tb(cpu, tb, retaddr);
608 
609     /*
610      * Some guests must re-execute the branch when re-executing a delay
611      * slot instruction.  When this is the case, adjust icount and N
612      * to account for the re-execution of the branch.
613      */
614     n = 1;
615     cc = CPU_GET_CLASS(cpu);
616     if (cc->tcg_ops->io_recompile_replay_branch &&
617         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
618         cpu_neg(cpu)->icount_decr.u16.low++;
619         n = 2;
620     }
621 
622     /*
623      * Exit the loop and potentially generate a new TB executing the
624      * just the I/O insns. We also limit instrumentation to memory
625      * operations only (which execute after completion) so we don't
626      * double instrument the instruction.
627      */
628     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
629 
630     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
631         target_ulong pc = log_pc(cpu, tb);
632         if (qemu_log_in_addr_range(pc)) {
633             qemu_log("cpu_io_recompile: rewound execution of TB to "
634                      TARGET_FMT_lx "\n", pc);
635         }
636     }
637 
638     cpu_loop_exit_noexc(cpu);
639 }
640 
641 static void print_qht_statistics(struct qht_stats hst, GString *buf)
642 {
643     uint32_t hgram_opts;
644     size_t hgram_bins;
645     char *hgram;
646 
647     if (!hst.head_buckets) {
648         return;
649     }
650     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
651                            "(%0.2f%% head buckets used)\n",
652                            hst.used_head_buckets, hst.head_buckets,
653                            (double)hst.used_head_buckets /
654                            hst.head_buckets * 100);
655 
656     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
657     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
658     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
659         hgram_opts |= QDIST_PR_NODECIMAL;
660     }
661     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
662     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
663                            "Histogram: %s\n",
664                            qdist_avg(&hst.occupancy) * 100, hgram);
665     g_free(hgram);
666 
667     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
668     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
669     if (hgram_bins > 10) {
670         hgram_bins = 10;
671     } else {
672         hgram_bins = 0;
673         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
674     }
675     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
676     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
677                            "Histogram: %s\n",
678                            qdist_avg(&hst.chain), hgram);
679     g_free(hgram);
680 }
681 
682 struct tb_tree_stats {
683     size_t nb_tbs;
684     size_t host_size;
685     size_t target_size;
686     size_t max_target_size;
687     size_t direct_jmp_count;
688     size_t direct_jmp2_count;
689     size_t cross_page;
690 };
691 
692 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
693 {
694     const TranslationBlock *tb = value;
695     struct tb_tree_stats *tst = data;
696 
697     tst->nb_tbs++;
698     tst->host_size += tb->tc.size;
699     tst->target_size += tb->size;
700     if (tb->size > tst->max_target_size) {
701         tst->max_target_size = tb->size;
702     }
703     if (tb_page_addr1(tb) != -1) {
704         tst->cross_page++;
705     }
706     if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
707         tst->direct_jmp_count++;
708         if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
709             tst->direct_jmp2_count++;
710         }
711     }
712     return false;
713 }
714 
715 void dump_exec_info(GString *buf)
716 {
717     struct tb_tree_stats tst = {};
718     struct qht_stats hst;
719     size_t nb_tbs, flush_full, flush_part, flush_elide;
720 
721     tcg_tb_foreach(tb_tree_stats_iter, &tst);
722     nb_tbs = tst.nb_tbs;
723     /* XXX: avoid using doubles ? */
724     g_string_append_printf(buf, "Translation buffer state:\n");
725     /*
726      * Report total code size including the padding and TB structs;
727      * otherwise users might think "-accel tcg,tb-size" is not honoured.
728      * For avg host size we use the precise numbers from tb_tree_stats though.
729      */
730     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
731                            tcg_code_size(), tcg_code_capacity());
732     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
733     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
734                            nb_tbs ? tst.target_size / nb_tbs : 0,
735                            tst.max_target_size);
736     g_string_append_printf(buf, "TB avg host size    %zu bytes "
737                            "(expansion ratio: %0.1f)\n",
738                            nb_tbs ? tst.host_size / nb_tbs : 0,
739                            tst.target_size ?
740                            (double)tst.host_size / tst.target_size : 0);
741     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
742                            tst.cross_page,
743                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
744     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
745                            "(2 jumps=%zu %zu%%)\n",
746                            tst.direct_jmp_count,
747                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
748                            tst.direct_jmp2_count,
749                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
750 
751     qht_statistics_init(&tb_ctx.htable, &hst);
752     print_qht_statistics(hst, buf);
753     qht_statistics_destroy(&hst);
754 
755     g_string_append_printf(buf, "\nStatistics:\n");
756     g_string_append_printf(buf, "TB flush count      %u\n",
757                            qatomic_read(&tb_ctx.tb_flush_count));
758     g_string_append_printf(buf, "TB invalidate count %u\n",
759                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
760 
761     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
762     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
763     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
764     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
765     tcg_dump_info(buf);
766 }
767 
768 #else /* CONFIG_USER_ONLY */
769 
770 void cpu_interrupt(CPUState *cpu, int mask)
771 {
772     g_assert(qemu_mutex_iothread_locked());
773     cpu->interrupt_request |= mask;
774     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
775 }
776 
777 #endif /* CONFIG_USER_ONLY */
778 
779 /*
780  * Called by generic code at e.g. cpu reset after cpu creation,
781  * therefore we must be prepared to allocate the jump cache.
782  */
783 void tcg_flush_jmp_cache(CPUState *cpu)
784 {
785     CPUJumpCache *jc = cpu->tb_jmp_cache;
786 
787     /* During early initialization, the cache may not yet be allocated. */
788     if (unlikely(jc == NULL)) {
789         return;
790     }
791 
792     for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
793         qatomic_set(&jc->array[i].tb, NULL);
794     }
795 }
796 
797 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
798 void tcg_flush_softmmu_tlb(CPUState *cs)
799 {
800 #ifdef CONFIG_SOFTMMU
801     tlb_flush(cs);
802 #endif
803 }
804