xref: /openbmc/qemu/accel/tcg/cpu-exec.c (revision 8905770b)
1 /*
2  *  emulator main execution loop
3  *
4  *  Copyright (c) 2003-2005 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/qemu-print.h"
22 #include "qapi/error.h"
23 #include "qapi/qapi-commands-machine.h"
24 #include "qapi/type-helpers.h"
25 #include "hw/core/tcg-cpu-ops.h"
26 #include "trace.h"
27 #include "disas/disas.h"
28 #include "exec/exec-all.h"
29 #include "tcg/tcg.h"
30 #include "qemu/atomic.h"
31 #include "qemu/compiler.h"
32 #include "qemu/timer.h"
33 #include "qemu/rcu.h"
34 #include "exec/log.h"
35 #include "qemu/main-loop.h"
36 #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
37 #include "hw/i386/apic.h"
38 #endif
39 #include "sysemu/cpus.h"
40 #include "exec/cpu-all.h"
41 #include "sysemu/cpu-timers.h"
42 #include "sysemu/replay.h"
43 #include "sysemu/tcg.h"
44 #include "exec/helper-proto.h"
45 #include "tb-hash.h"
46 #include "tb-context.h"
47 #include "internal.h"
48 
49 /* -icount align implementation. */
50 
51 typedef struct SyncClocks {
52     int64_t diff_clk;
53     int64_t last_cpu_icount;
54     int64_t realtime_clock;
55 } SyncClocks;
56 
57 #if !defined(CONFIG_USER_ONLY)
58 /* Allow the guest to have a max 3ms advance.
59  * The difference between the 2 clocks could therefore
60  * oscillate around 0.
61  */
62 #define VM_CLOCK_ADVANCE 3000000
63 #define THRESHOLD_REDUCE 1.5
64 #define MAX_DELAY_PRINT_RATE 2000000000LL
65 #define MAX_NB_PRINTS 100
66 
67 static int64_t max_delay;
68 static int64_t max_advance;
69 
70 static void align_clocks(SyncClocks *sc, CPUState *cpu)
71 {
72     int64_t cpu_icount;
73 
74     if (!icount_align_option) {
75         return;
76     }
77 
78     cpu_icount = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low;
79     sc->diff_clk += icount_to_ns(sc->last_cpu_icount - cpu_icount);
80     sc->last_cpu_icount = cpu_icount;
81 
82     if (sc->diff_clk > VM_CLOCK_ADVANCE) {
83 #ifndef _WIN32
84         struct timespec sleep_delay, rem_delay;
85         sleep_delay.tv_sec = sc->diff_clk / 1000000000LL;
86         sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL;
87         if (nanosleep(&sleep_delay, &rem_delay) < 0) {
88             sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec;
89         } else {
90             sc->diff_clk = 0;
91         }
92 #else
93         Sleep(sc->diff_clk / SCALE_MS);
94         sc->diff_clk = 0;
95 #endif
96     }
97 }
98 
99 static void print_delay(const SyncClocks *sc)
100 {
101     static float threshold_delay;
102     static int64_t last_realtime_clock;
103     static int nb_prints;
104 
105     if (icount_align_option &&
106         sc->realtime_clock - last_realtime_clock >= MAX_DELAY_PRINT_RATE &&
107         nb_prints < MAX_NB_PRINTS) {
108         if ((-sc->diff_clk / (float)1000000000LL > threshold_delay) ||
109             (-sc->diff_clk / (float)1000000000LL <
110              (threshold_delay - THRESHOLD_REDUCE))) {
111             threshold_delay = (-sc->diff_clk / 1000000000LL) + 1;
112             qemu_printf("Warning: The guest is now late by %.1f to %.1f seconds\n",
113                         threshold_delay - 1,
114                         threshold_delay);
115             nb_prints++;
116             last_realtime_clock = sc->realtime_clock;
117         }
118     }
119 }
120 
121 static void init_delay_params(SyncClocks *sc, CPUState *cpu)
122 {
123     if (!icount_align_option) {
124         return;
125     }
126     sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
127     sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock;
128     sc->last_cpu_icount
129         = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low;
130     if (sc->diff_clk < max_delay) {
131         max_delay = sc->diff_clk;
132     }
133     if (sc->diff_clk > max_advance) {
134         max_advance = sc->diff_clk;
135     }
136 
137     /* Print every 2s max if the guest is late. We limit the number
138        of printed messages to NB_PRINT_MAX(currently 100) */
139     print_delay(sc);
140 }
141 #else
142 static void align_clocks(SyncClocks *sc, const CPUState *cpu)
143 {
144 }
145 
146 static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
147 {
148 }
149 #endif /* CONFIG USER ONLY */
150 
151 uint32_t curr_cflags(CPUState *cpu)
152 {
153     uint32_t cflags = cpu->tcg_cflags;
154 
155     /*
156      * Record gdb single-step.  We should be exiting the TB by raising
157      * EXCP_DEBUG, but to simplify other tests, disable chaining too.
158      *
159      * For singlestep and -d nochain, suppress goto_tb so that
160      * we can log -d cpu,exec after every TB.
161      */
162     if (unlikely(cpu->singlestep_enabled)) {
163         cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1;
164     } else if (singlestep) {
165         cflags |= CF_NO_GOTO_TB | 1;
166     } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
167         cflags |= CF_NO_GOTO_TB;
168     }
169 
170     return cflags;
171 }
172 
173 /* Might cause an exception, so have a longjmp destination ready */
174 static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
175                                           target_ulong cs_base,
176                                           uint32_t flags, uint32_t cflags)
177 {
178     TranslationBlock *tb;
179     uint32_t hash;
180 
181     /* we should never be trying to look up an INVALID tb */
182     tcg_debug_assert(!(cflags & CF_INVALID));
183 
184     hash = tb_jmp_cache_hash_func(pc);
185     tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
186 
187     if (likely(tb &&
188                tb->pc == pc &&
189                tb->cs_base == cs_base &&
190                tb->flags == flags &&
191                tb->trace_vcpu_dstate == *cpu->trace_dstate &&
192                tb_cflags(tb) == cflags)) {
193         return tb;
194     }
195     tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
196     if (tb == NULL) {
197         return NULL;
198     }
199     qatomic_set(&cpu->tb_jmp_cache[hash], tb);
200     return tb;
201 }
202 
203 static inline void log_cpu_exec(target_ulong pc, CPUState *cpu,
204                                 const TranslationBlock *tb)
205 {
206     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC))
207         && qemu_log_in_addr_range(pc)) {
208 
209         qemu_log_mask(CPU_LOG_EXEC,
210                       "Trace %d: %p [" TARGET_FMT_lx
211                       "/" TARGET_FMT_lx "/%08x/%08x] %s\n",
212                       cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
213                       tb->flags, tb->cflags, lookup_symbol(pc));
214 
215 #if defined(DEBUG_DISAS)
216         if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
217             FILE *logfile = qemu_log_trylock();
218             if (logfile) {
219                 int flags = 0;
220 
221                 if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
222                     flags |= CPU_DUMP_FPU;
223                 }
224 #if defined(TARGET_I386)
225                 flags |= CPU_DUMP_CCOP;
226 #endif
227                 cpu_dump_state(cpu, logfile, flags);
228                 qemu_log_unlock(logfile);
229             }
230         }
231 #endif /* DEBUG_DISAS */
232     }
233 }
234 
235 static bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
236                                   uint32_t *cflags)
237 {
238     CPUBreakpoint *bp;
239     bool match_page = false;
240 
241     if (likely(QTAILQ_EMPTY(&cpu->breakpoints))) {
242         return false;
243     }
244 
245     /*
246      * Singlestep overrides breakpoints.
247      * This requirement is visible in the record-replay tests, where
248      * we would fail to make forward progress in reverse-continue.
249      *
250      * TODO: gdb singlestep should only override gdb breakpoints,
251      * so that one could (gdb) singlestep into the guest kernel's
252      * architectural breakpoint handler.
253      */
254     if (cpu->singlestep_enabled) {
255         return false;
256     }
257 
258     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
259         /*
260          * If we have an exact pc match, trigger the breakpoint.
261          * Otherwise, note matches within the page.
262          */
263         if (pc == bp->pc) {
264             bool match_bp = false;
265 
266             if (bp->flags & BP_GDB) {
267                 match_bp = true;
268             } else if (bp->flags & BP_CPU) {
269 #ifdef CONFIG_USER_ONLY
270                 g_assert_not_reached();
271 #else
272                 CPUClass *cc = CPU_GET_CLASS(cpu);
273                 assert(cc->tcg_ops->debug_check_breakpoint);
274                 match_bp = cc->tcg_ops->debug_check_breakpoint(cpu);
275 #endif
276             }
277 
278             if (match_bp) {
279                 cpu->exception_index = EXCP_DEBUG;
280                 return true;
281             }
282         } else if (((pc ^ bp->pc) & TARGET_PAGE_MASK) == 0) {
283             match_page = true;
284         }
285     }
286 
287     /*
288      * Within the same page as a breakpoint, single-step,
289      * returning to helper_lookup_tb_ptr after each insn looking
290      * for the actual breakpoint.
291      *
292      * TODO: Perhaps better to record all of the TBs associated
293      * with a given virtual page that contains a breakpoint, and
294      * then invalidate them when a new overlapping breakpoint is
295      * set on the page.  Non-overlapping TBs would not be
296      * invalidated, nor would any TB need to be invalidated as
297      * breakpoints are removed.
298      */
299     if (match_page) {
300         *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | 1;
301     }
302     return false;
303 }
304 
305 /**
306  * helper_lookup_tb_ptr: quick check for next tb
307  * @env: current cpu state
308  *
309  * Look for an existing TB matching the current cpu state.
310  * If found, return the code pointer.  If not found, return
311  * the tcg epilogue so that we return into cpu_tb_exec.
312  */
313 const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
314 {
315     CPUState *cpu = env_cpu(env);
316     TranslationBlock *tb;
317     target_ulong cs_base, pc;
318     uint32_t flags, cflags;
319 
320     cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
321 
322     cflags = curr_cflags(cpu);
323     if (check_for_breakpoints(cpu, pc, &cflags)) {
324         cpu_loop_exit(cpu);
325     }
326 
327     tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
328     if (tb == NULL) {
329         return tcg_code_gen_epilogue;
330     }
331 
332     log_cpu_exec(pc, cpu, tb);
333 
334     return tb->tc.ptr;
335 }
336 
337 /* Execute a TB, and fix up the CPU state afterwards if necessary */
338 /*
339  * Disable CFI checks.
340  * TCG creates binary blobs at runtime, with the transformed code.
341  * A TB is a blob of binary code, created at runtime and called with an
342  * indirect function call. Since such function did not exist at compile time,
343  * the CFI runtime has no way to verify its signature and would fail.
344  * TCG is not considered a security-sensitive part of QEMU so this does not
345  * affect the impact of CFI in environment with high security requirements
346  */
347 static inline TranslationBlock * QEMU_DISABLE_CFI
348 cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
349 {
350     CPUArchState *env = cpu->env_ptr;
351     uintptr_t ret;
352     TranslationBlock *last_tb;
353     const void *tb_ptr = itb->tc.ptr;
354 
355     log_cpu_exec(itb->pc, cpu, itb);
356 
357     qemu_thread_jit_execute();
358     ret = tcg_qemu_tb_exec(env, tb_ptr);
359     cpu->can_do_io = 1;
360     /*
361      * TODO: Delay swapping back to the read-write region of the TB
362      * until we actually need to modify the TB.  The read-only copy,
363      * coming from the rx region, shares the same host TLB entry as
364      * the code that executed the exit_tb opcode that arrived here.
365      * If we insist on touching both the RX and the RW pages, we
366      * double the host TLB pressure.
367      */
368     last_tb = tcg_splitwx_to_rw((void *)(ret & ~TB_EXIT_MASK));
369     *tb_exit = ret & TB_EXIT_MASK;
370 
371     trace_exec_tb_exit(last_tb, *tb_exit);
372 
373     if (*tb_exit > TB_EXIT_IDX1) {
374         /* We didn't start executing this TB (eg because the instruction
375          * counter hit zero); we must restore the guest PC to the address
376          * of the start of the TB.
377          */
378         CPUClass *cc = CPU_GET_CLASS(cpu);
379         qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc,
380                                "Stopped execution of TB chain before %p ["
381                                TARGET_FMT_lx "] %s\n",
382                                last_tb->tc.ptr, last_tb->pc,
383                                lookup_symbol(last_tb->pc));
384         if (cc->tcg_ops->synchronize_from_tb) {
385             cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
386         } else {
387             assert(cc->set_pc);
388             cc->set_pc(cpu, last_tb->pc);
389         }
390     }
391 
392     /*
393      * If gdb single-step, and we haven't raised another exception,
394      * raise a debug exception.  Single-step with another exception
395      * is handled in cpu_handle_exception.
396      */
397     if (unlikely(cpu->singlestep_enabled) && cpu->exception_index == -1) {
398         cpu->exception_index = EXCP_DEBUG;
399         cpu_loop_exit(cpu);
400     }
401 
402     return last_tb;
403 }
404 
405 
406 static void cpu_exec_enter(CPUState *cpu)
407 {
408     CPUClass *cc = CPU_GET_CLASS(cpu);
409 
410     if (cc->tcg_ops->cpu_exec_enter) {
411         cc->tcg_ops->cpu_exec_enter(cpu);
412     }
413 }
414 
415 static void cpu_exec_exit(CPUState *cpu)
416 {
417     CPUClass *cc = CPU_GET_CLASS(cpu);
418 
419     if (cc->tcg_ops->cpu_exec_exit) {
420         cc->tcg_ops->cpu_exec_exit(cpu);
421     }
422 }
423 
424 void cpu_exec_step_atomic(CPUState *cpu)
425 {
426     CPUArchState *env = cpu->env_ptr;
427     TranslationBlock *tb;
428     target_ulong cs_base, pc;
429     uint32_t flags, cflags;
430     int tb_exit;
431 
432     if (sigsetjmp(cpu->jmp_env, 0) == 0) {
433         start_exclusive();
434         g_assert(cpu == current_cpu);
435         g_assert(!cpu->running);
436         cpu->running = true;
437 
438         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
439 
440         cflags = curr_cflags(cpu);
441         /* Execute in a serial context. */
442         cflags &= ~CF_PARALLEL;
443         /* After 1 insn, return and release the exclusive lock. */
444         cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1;
445         /*
446          * No need to check_for_breakpoints here.
447          * We only arrive in cpu_exec_step_atomic after beginning execution
448          * of an insn that includes an atomic operation we can't handle.
449          * Any breakpoint for this insn will have been recognized earlier.
450          */
451 
452         tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
453         if (tb == NULL) {
454             mmap_lock();
455             tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
456             mmap_unlock();
457         }
458 
459         cpu_exec_enter(cpu);
460         /* execute the generated code */
461         trace_exec_tb(tb, pc);
462         cpu_tb_exec(cpu, tb, &tb_exit);
463         cpu_exec_exit(cpu);
464     } else {
465         /*
466          * The mmap_lock is dropped by tb_gen_code if it runs out of
467          * memory.
468          */
469 #ifndef CONFIG_SOFTMMU
470         clear_helper_retaddr();
471         tcg_debug_assert(!have_mmap_lock());
472 #endif
473         if (qemu_mutex_iothread_locked()) {
474             qemu_mutex_unlock_iothread();
475         }
476         assert_no_pages_locked();
477         qemu_plugin_disable_mem_helpers(cpu);
478     }
479 
480     /*
481      * As we start the exclusive region before codegen we must still
482      * be in the region if we longjump out of either the codegen or
483      * the execution.
484      */
485     g_assert(cpu_in_exclusive_context(cpu));
486     cpu->running = false;
487     end_exclusive();
488 }
489 
490 struct tb_desc {
491     target_ulong pc;
492     target_ulong cs_base;
493     CPUArchState *env;
494     tb_page_addr_t phys_page1;
495     uint32_t flags;
496     uint32_t cflags;
497     uint32_t trace_vcpu_dstate;
498 };
499 
500 static bool tb_lookup_cmp(const void *p, const void *d)
501 {
502     const TranslationBlock *tb = p;
503     const struct tb_desc *desc = d;
504 
505     if (tb->pc == desc->pc &&
506         tb->page_addr[0] == desc->phys_page1 &&
507         tb->cs_base == desc->cs_base &&
508         tb->flags == desc->flags &&
509         tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
510         tb_cflags(tb) == desc->cflags) {
511         /* check next page if needed */
512         if (tb->page_addr[1] == -1) {
513             return true;
514         } else {
515             tb_page_addr_t phys_page2;
516             target_ulong virt_page2;
517 
518             virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
519             phys_page2 = get_page_addr_code(desc->env, virt_page2);
520             if (tb->page_addr[1] == phys_page2) {
521                 return true;
522             }
523         }
524     }
525     return false;
526 }
527 
528 TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
529                                    target_ulong cs_base, uint32_t flags,
530                                    uint32_t cflags)
531 {
532     tb_page_addr_t phys_pc;
533     struct tb_desc desc;
534     uint32_t h;
535 
536     desc.env = cpu->env_ptr;
537     desc.cs_base = cs_base;
538     desc.flags = flags;
539     desc.cflags = cflags;
540     desc.trace_vcpu_dstate = *cpu->trace_dstate;
541     desc.pc = pc;
542     phys_pc = get_page_addr_code(desc.env, pc);
543     if (phys_pc == -1) {
544         return NULL;
545     }
546     desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
547     h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
548     return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
549 }
550 
551 void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
552 {
553     if (TCG_TARGET_HAS_direct_jump) {
554         uintptr_t offset = tb->jmp_target_arg[n];
555         uintptr_t tc_ptr = (uintptr_t)tb->tc.ptr;
556         uintptr_t jmp_rx = tc_ptr + offset;
557         uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff;
558         tb_target_set_jmp_target(tc_ptr, jmp_rx, jmp_rw, addr);
559     } else {
560         tb->jmp_target_arg[n] = addr;
561     }
562 }
563 
564 static inline void tb_add_jump(TranslationBlock *tb, int n,
565                                TranslationBlock *tb_next)
566 {
567     uintptr_t old;
568 
569     qemu_thread_jit_write();
570     assert(n < ARRAY_SIZE(tb->jmp_list_next));
571     qemu_spin_lock(&tb_next->jmp_lock);
572 
573     /* make sure the destination TB is valid */
574     if (tb_next->cflags & CF_INVALID) {
575         goto out_unlock_next;
576     }
577     /* Atomically claim the jump destination slot only if it was NULL */
578     old = qatomic_cmpxchg(&tb->jmp_dest[n], (uintptr_t)NULL,
579                           (uintptr_t)tb_next);
580     if (old) {
581         goto out_unlock_next;
582     }
583 
584     /* patch the native jump address */
585     tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr);
586 
587     /* add in TB jmp list */
588     tb->jmp_list_next[n] = tb_next->jmp_list_head;
589     tb_next->jmp_list_head = (uintptr_t)tb | n;
590 
591     qemu_spin_unlock(&tb_next->jmp_lock);
592 
593     qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
594                            "Linking TBs %p [" TARGET_FMT_lx
595                            "] index %d -> %p [" TARGET_FMT_lx "]\n",
596                            tb->tc.ptr, tb->pc, n,
597                            tb_next->tc.ptr, tb_next->pc);
598     return;
599 
600  out_unlock_next:
601     qemu_spin_unlock(&tb_next->jmp_lock);
602     return;
603 }
604 
605 static inline bool cpu_handle_halt(CPUState *cpu)
606 {
607 #ifndef CONFIG_USER_ONLY
608     if (cpu->halted) {
609 #if defined(TARGET_I386)
610         if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
611             X86CPU *x86_cpu = X86_CPU(cpu);
612             qemu_mutex_lock_iothread();
613             apic_poll_irq(x86_cpu->apic_state);
614             cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
615             qemu_mutex_unlock_iothread();
616         }
617 #endif /* TARGET_I386 */
618         if (!cpu_has_work(cpu)) {
619             return true;
620         }
621 
622         cpu->halted = 0;
623     }
624 #endif /* !CONFIG_USER_ONLY */
625 
626     return false;
627 }
628 
629 static inline void cpu_handle_debug_exception(CPUState *cpu)
630 {
631     CPUClass *cc = CPU_GET_CLASS(cpu);
632     CPUWatchpoint *wp;
633 
634     if (!cpu->watchpoint_hit) {
635         QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
636             wp->flags &= ~BP_WATCHPOINT_HIT;
637         }
638     }
639 
640     if (cc->tcg_ops->debug_excp_handler) {
641         cc->tcg_ops->debug_excp_handler(cpu);
642     }
643 }
644 
645 static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
646 {
647     if (cpu->exception_index < 0) {
648 #ifndef CONFIG_USER_ONLY
649         if (replay_has_exception()
650             && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) {
651             /* Execute just one insn to trigger exception pending in the log */
652             cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT)
653                 | CF_NOIRQ | 1;
654         }
655 #endif
656         return false;
657     }
658     if (cpu->exception_index >= EXCP_INTERRUPT) {
659         /* exit request from the cpu execution loop */
660         *ret = cpu->exception_index;
661         if (*ret == EXCP_DEBUG) {
662             cpu_handle_debug_exception(cpu);
663         }
664         cpu->exception_index = -1;
665         return true;
666     } else {
667 #if defined(CONFIG_USER_ONLY)
668         /* if user mode only, we simulate a fake exception
669            which will be handled outside the cpu execution
670            loop */
671 #if defined(TARGET_I386)
672         CPUClass *cc = CPU_GET_CLASS(cpu);
673         cc->tcg_ops->fake_user_interrupt(cpu);
674 #endif /* TARGET_I386 */
675         *ret = cpu->exception_index;
676         cpu->exception_index = -1;
677         return true;
678 #else
679         if (replay_exception()) {
680             CPUClass *cc = CPU_GET_CLASS(cpu);
681             qemu_mutex_lock_iothread();
682             cc->tcg_ops->do_interrupt(cpu);
683             qemu_mutex_unlock_iothread();
684             cpu->exception_index = -1;
685 
686             if (unlikely(cpu->singlestep_enabled)) {
687                 /*
688                  * After processing the exception, ensure an EXCP_DEBUG is
689                  * raised when single-stepping so that GDB doesn't miss the
690                  * next instruction.
691                  */
692                 *ret = EXCP_DEBUG;
693                 cpu_handle_debug_exception(cpu);
694                 return true;
695             }
696         } else if (!replay_has_interrupt()) {
697             /* give a chance to iothread in replay mode */
698             *ret = EXCP_INTERRUPT;
699             return true;
700         }
701 #endif
702     }
703 
704     return false;
705 }
706 
707 #ifndef CONFIG_USER_ONLY
708 /*
709  * CPU_INTERRUPT_POLL is a virtual event which gets converted into a
710  * "real" interrupt event later. It does not need to be recorded for
711  * replay purposes.
712  */
713 static inline bool need_replay_interrupt(int interrupt_request)
714 {
715 #if defined(TARGET_I386)
716     return !(interrupt_request & CPU_INTERRUPT_POLL);
717 #else
718     return true;
719 #endif
720 }
721 #endif /* !CONFIG_USER_ONLY */
722 
723 static inline bool cpu_handle_interrupt(CPUState *cpu,
724                                         TranslationBlock **last_tb)
725 {
726     /*
727      * If we have requested custom cflags with CF_NOIRQ we should
728      * skip checking here. Any pending interrupts will get picked up
729      * by the next TB we execute under normal cflags.
730      */
731     if (cpu->cflags_next_tb != -1 && cpu->cflags_next_tb & CF_NOIRQ) {
732         return false;
733     }
734 
735     /* Clear the interrupt flag now since we're processing
736      * cpu->interrupt_request and cpu->exit_request.
737      * Ensure zeroing happens before reading cpu->exit_request or
738      * cpu->interrupt_request (see also smp_wmb in cpu_exit())
739      */
740     qatomic_mb_set(&cpu_neg(cpu)->icount_decr.u16.high, 0);
741 
742     if (unlikely(qatomic_read(&cpu->interrupt_request))) {
743         int interrupt_request;
744         qemu_mutex_lock_iothread();
745         interrupt_request = cpu->interrupt_request;
746         if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
747             /* Mask out external interrupts for this step. */
748             interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
749         }
750         if (interrupt_request & CPU_INTERRUPT_DEBUG) {
751             cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
752             cpu->exception_index = EXCP_DEBUG;
753             qemu_mutex_unlock_iothread();
754             return true;
755         }
756 #if !defined(CONFIG_USER_ONLY)
757         if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
758             /* Do nothing */
759         } else if (interrupt_request & CPU_INTERRUPT_HALT) {
760             replay_interrupt();
761             cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
762             cpu->halted = 1;
763             cpu->exception_index = EXCP_HLT;
764             qemu_mutex_unlock_iothread();
765             return true;
766         }
767 #if defined(TARGET_I386)
768         else if (interrupt_request & CPU_INTERRUPT_INIT) {
769             X86CPU *x86_cpu = X86_CPU(cpu);
770             CPUArchState *env = &x86_cpu->env;
771             replay_interrupt();
772             cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
773             do_cpu_init(x86_cpu);
774             cpu->exception_index = EXCP_HALTED;
775             qemu_mutex_unlock_iothread();
776             return true;
777         }
778 #else
779         else if (interrupt_request & CPU_INTERRUPT_RESET) {
780             replay_interrupt();
781             cpu_reset(cpu);
782             qemu_mutex_unlock_iothread();
783             return true;
784         }
785 #endif /* !TARGET_I386 */
786         /* The target hook has 3 exit conditions:
787            False when the interrupt isn't processed,
788            True when it is, and we should restart on a new TB,
789            and via longjmp via cpu_loop_exit.  */
790         else {
791             CPUClass *cc = CPU_GET_CLASS(cpu);
792 
793             if (cc->tcg_ops->cpu_exec_interrupt &&
794                 cc->tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) {
795                 if (need_replay_interrupt(interrupt_request)) {
796                     replay_interrupt();
797                 }
798                 /*
799                  * After processing the interrupt, ensure an EXCP_DEBUG is
800                  * raised when single-stepping so that GDB doesn't miss the
801                  * next instruction.
802                  */
803                 if (unlikely(cpu->singlestep_enabled)) {
804                     cpu->exception_index = EXCP_DEBUG;
805                     qemu_mutex_unlock_iothread();
806                     return true;
807                 }
808                 cpu->exception_index = -1;
809                 *last_tb = NULL;
810             }
811             /* The target hook may have updated the 'cpu->interrupt_request';
812              * reload the 'interrupt_request' value */
813             interrupt_request = cpu->interrupt_request;
814         }
815 #endif /* !CONFIG_USER_ONLY */
816         if (interrupt_request & CPU_INTERRUPT_EXITTB) {
817             cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
818             /* ensure that no TB jump will be modified as
819                the program flow was changed */
820             *last_tb = NULL;
821         }
822 
823         /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
824         qemu_mutex_unlock_iothread();
825     }
826 
827     /* Finally, check if we need to exit to the main loop.  */
828     if (unlikely(qatomic_read(&cpu->exit_request))
829         || (icount_enabled()
830             && (cpu->cflags_next_tb == -1 || cpu->cflags_next_tb & CF_USE_ICOUNT)
831             && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0)) {
832         qatomic_set(&cpu->exit_request, 0);
833         if (cpu->exception_index == -1) {
834             cpu->exception_index = EXCP_INTERRUPT;
835         }
836         return true;
837     }
838 
839     return false;
840 }
841 
842 static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
843                                     TranslationBlock **last_tb, int *tb_exit)
844 {
845     int32_t insns_left;
846 
847     trace_exec_tb(tb, tb->pc);
848     tb = cpu_tb_exec(cpu, tb, tb_exit);
849     if (*tb_exit != TB_EXIT_REQUESTED) {
850         *last_tb = tb;
851         return;
852     }
853 
854     *last_tb = NULL;
855     insns_left = qatomic_read(&cpu_neg(cpu)->icount_decr.u32);
856     if (insns_left < 0) {
857         /* Something asked us to stop executing chained TBs; just
858          * continue round the main loop. Whatever requested the exit
859          * will also have set something else (eg exit_request or
860          * interrupt_request) which will be handled by
861          * cpu_handle_interrupt.  cpu_handle_interrupt will also
862          * clear cpu->icount_decr.u16.high.
863          */
864         return;
865     }
866 
867     /* Instruction counter expired.  */
868     assert(icount_enabled());
869 #ifndef CONFIG_USER_ONLY
870     /* Ensure global icount has gone forward */
871     icount_update(cpu);
872     /* Refill decrementer and continue execution.  */
873     insns_left = MIN(0xffff, cpu->icount_budget);
874     cpu_neg(cpu)->icount_decr.u16.low = insns_left;
875     cpu->icount_extra = cpu->icount_budget - insns_left;
876 
877     /*
878      * If the next tb has more instructions than we have left to
879      * execute we need to ensure we find/generate a TB with exactly
880      * insns_left instructions in it.
881      */
882     if (insns_left > 0 && insns_left < tb->icount)  {
883         assert(insns_left <= CF_COUNT_MASK);
884         assert(cpu->icount_extra == 0);
885         cpu->cflags_next_tb = (tb->cflags & ~CF_COUNT_MASK) | insns_left;
886     }
887 #endif
888 }
889 
890 /* main execution loop */
891 
892 int cpu_exec(CPUState *cpu)
893 {
894     int ret;
895     SyncClocks sc = { 0 };
896 
897     /* replay_interrupt may need current_cpu */
898     current_cpu = cpu;
899 
900     if (cpu_handle_halt(cpu)) {
901         return EXCP_HALTED;
902     }
903 
904     rcu_read_lock();
905 
906     cpu_exec_enter(cpu);
907 
908     /* Calculate difference between guest clock and host clock.
909      * This delay includes the delay of the last cycle, so
910      * what we have to do is sleep until it is 0. As for the
911      * advance/delay we gain here, we try to fix it next time.
912      */
913     init_delay_params(&sc, cpu);
914 
915     /* prepare setjmp context for exception handling */
916     if (sigsetjmp(cpu->jmp_env, 0) != 0) {
917 #if defined(__clang__)
918         /*
919          * Some compilers wrongly smash all local variables after
920          * siglongjmp (the spec requires that only non-volatile locals
921          * which are changed between the sigsetjmp and siglongjmp are
922          * permitted to be trashed). There were bug reports for gcc
923          * 4.5.0 and clang.  The bug is fixed in all versions of gcc
924          * that we support, but is still unfixed in clang:
925          *   https://bugs.llvm.org/show_bug.cgi?id=21183
926          *
927          * Reload an essential local variable here for those compilers.
928          * Newer versions of gcc would complain about this code (-Wclobbered),
929          * so we only perform the workaround for clang.
930          */
931         cpu = current_cpu;
932 #else
933         /* Non-buggy compilers preserve this; assert the correct value. */
934         g_assert(cpu == current_cpu);
935 #endif
936 
937 #ifndef CONFIG_SOFTMMU
938         clear_helper_retaddr();
939         tcg_debug_assert(!have_mmap_lock());
940 #endif
941         if (qemu_mutex_iothread_locked()) {
942             qemu_mutex_unlock_iothread();
943         }
944         qemu_plugin_disable_mem_helpers(cpu);
945 
946         assert_no_pages_locked();
947     }
948 
949     /* if an exception is pending, we execute it here */
950     while (!cpu_handle_exception(cpu, &ret)) {
951         TranslationBlock *last_tb = NULL;
952         int tb_exit = 0;
953 
954         while (!cpu_handle_interrupt(cpu, &last_tb)) {
955             TranslationBlock *tb;
956             target_ulong cs_base, pc;
957             uint32_t flags, cflags;
958 
959             cpu_get_tb_cpu_state(cpu->env_ptr, &pc, &cs_base, &flags);
960 
961             /*
962              * When requested, use an exact setting for cflags for the next
963              * execution.  This is used for icount, precise smc, and stop-
964              * after-access watchpoints.  Since this request should never
965              * have CF_INVALID set, -1 is a convenient invalid value that
966              * does not require tcg headers for cpu_common_reset.
967              */
968             cflags = cpu->cflags_next_tb;
969             if (cflags == -1) {
970                 cflags = curr_cflags(cpu);
971             } else {
972                 cpu->cflags_next_tb = -1;
973             }
974 
975             if (check_for_breakpoints(cpu, pc, &cflags)) {
976                 break;
977             }
978 
979             tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
980             if (tb == NULL) {
981                 mmap_lock();
982                 tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
983                 mmap_unlock();
984                 /*
985                  * We add the TB in the virtual pc hash table
986                  * for the fast lookup
987                  */
988                 qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
989             }
990 
991 #ifndef CONFIG_USER_ONLY
992             /*
993              * We don't take care of direct jumps when address mapping
994              * changes in system emulation.  So it's not safe to make a
995              * direct jump to a TB spanning two pages because the mapping
996              * for the second page can change.
997              */
998             if (tb->page_addr[1] != -1) {
999                 last_tb = NULL;
1000             }
1001 #endif
1002             /* See if we can patch the calling TB. */
1003             if (last_tb) {
1004                 tb_add_jump(last_tb, tb_exit, tb);
1005             }
1006 
1007             cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit);
1008 
1009             /* Try to align the host and virtual clocks
1010                if the guest is in advance */
1011             align_clocks(&sc, cpu);
1012         }
1013     }
1014 
1015     cpu_exec_exit(cpu);
1016     rcu_read_unlock();
1017 
1018     return ret;
1019 }
1020 
1021 void tcg_exec_realizefn(CPUState *cpu, Error **errp)
1022 {
1023     static bool tcg_target_initialized;
1024     CPUClass *cc = CPU_GET_CLASS(cpu);
1025 
1026     if (!tcg_target_initialized) {
1027         cc->tcg_ops->initialize();
1028         tcg_target_initialized = true;
1029     }
1030     tlb_init(cpu);
1031     qemu_plugin_vcpu_init_hook(cpu);
1032 
1033 #ifndef CONFIG_USER_ONLY
1034     tcg_iommu_init_notifier_list(cpu);
1035 #endif /* !CONFIG_USER_ONLY */
1036 }
1037 
1038 /* undo the initializations in reverse order */
1039 void tcg_exec_unrealizefn(CPUState *cpu)
1040 {
1041 #ifndef CONFIG_USER_ONLY
1042     tcg_iommu_free_notifier_list(cpu);
1043 #endif /* !CONFIG_USER_ONLY */
1044 
1045     qemu_plugin_vcpu_exit_hook(cpu);
1046     tlb_destroy(cpu);
1047 }
1048 
1049 #ifndef CONFIG_USER_ONLY
1050 
1051 void dump_drift_info(GString *buf)
1052 {
1053     if (!icount_enabled()) {
1054         return;
1055     }
1056 
1057     g_string_append_printf(buf, "Host - Guest clock  %"PRIi64" ms\n",
1058                            (cpu_get_clock() - icount_get()) / SCALE_MS);
1059     if (icount_align_option) {
1060         g_string_append_printf(buf, "Max guest delay     %"PRIi64" ms\n",
1061                                -max_delay / SCALE_MS);
1062         g_string_append_printf(buf, "Max guest advance   %"PRIi64" ms\n",
1063                                max_advance / SCALE_MS);
1064     } else {
1065         g_string_append_printf(buf, "Max guest delay     NA\n");
1066         g_string_append_printf(buf, "Max guest advance   NA\n");
1067     }
1068 }
1069 
1070 HumanReadableText *qmp_x_query_jit(Error **errp)
1071 {
1072     g_autoptr(GString) buf = g_string_new("");
1073 
1074     if (!tcg_enabled()) {
1075         error_setg(errp, "JIT information is only available with accel=tcg");
1076         return NULL;
1077     }
1078 
1079     dump_exec_info(buf);
1080     dump_drift_info(buf);
1081 
1082     return human_readable_text_from_str(buf);
1083 }
1084 
1085 HumanReadableText *qmp_x_query_opcount(Error **errp)
1086 {
1087     g_autoptr(GString) buf = g_string_new("");
1088 
1089     if (!tcg_enabled()) {
1090         error_setg(errp, "Opcode count information is only available with accel=tcg");
1091         return NULL;
1092     }
1093 
1094     dump_opcount_info(buf);
1095 
1096     return human_readable_text_from_str(buf);
1097 }
1098 
1099 #ifdef CONFIG_PROFILER
1100 
1101 int64_t dev_time;
1102 
1103 HumanReadableText *qmp_x_query_profile(Error **errp)
1104 {
1105     g_autoptr(GString) buf = g_string_new("");
1106     static int64_t last_cpu_exec_time;
1107     int64_t cpu_exec_time;
1108     int64_t delta;
1109 
1110     cpu_exec_time = tcg_cpu_exec_time();
1111     delta = cpu_exec_time - last_cpu_exec_time;
1112 
1113     g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
1114                            dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
1115     g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
1116                            delta, delta / (double)NANOSECONDS_PER_SECOND);
1117     last_cpu_exec_time = cpu_exec_time;
1118     dev_time = 0;
1119 
1120     return human_readable_text_from_str(buf);
1121 }
1122 #else
1123 HumanReadableText *qmp_x_query_profile(Error **errp)
1124 {
1125     error_setg(errp, "Internal profiler not compiled");
1126     return NULL;
1127 }
1128 #endif
1129 
1130 #endif /* !CONFIG_USER_ONLY */
1131