xref: /openbmc/qemu/accel/tcg/cpu-exec.c (revision bbdbc47b5c6907e065f84e751d127dae3cebfd54)
1 /*
2  *  emulator main execution loop
3  *
4  *  Copyright (c) 2003-2005 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/qemu-print.h"
22 #include "qapi/error.h"
23 #include "qapi/type-helpers.h"
24 #include "hw/core/cpu.h"
25 #include "accel/tcg/cpu-ops.h"
26 #include "accel/tcg/helper-retaddr.h"
27 #include "trace.h"
28 #include "disas/disas.h"
29 #include "exec/cpu-common.h"
30 #include "exec/cpu-interrupt.h"
31 #include "exec/page-protection.h"
32 #include "exec/mmap-lock.h"
33 #include "exec/translation-block.h"
34 #include "tcg/tcg.h"
35 #include "qemu/atomic.h"
36 #include "qemu/rcu.h"
37 #include "exec/log.h"
38 #include "qemu/main-loop.h"
39 #include "exec/icount.h"
40 #include "exec/replay-core.h"
41 #include "system/tcg.h"
42 #include "exec/helper-proto-common.h"
43 #include "tb-jmp-cache.h"
44 #include "tb-hash.h"
45 #include "tb-context.h"
46 #include "tb-internal.h"
47 #include "internal-common.h"
48 
49 /* -icount align implementation. */
50 
51 typedef struct SyncClocks {
52     int64_t diff_clk;
53     int64_t last_cpu_icount;
54     int64_t realtime_clock;
55 } SyncClocks;
56 
57 #if !defined(CONFIG_USER_ONLY)
58 /* Allow the guest to have a max 3ms advance.
59  * The difference between the 2 clocks could therefore
60  * oscillate around 0.
61  */
62 #define VM_CLOCK_ADVANCE 3000000
63 #define THRESHOLD_REDUCE 1.5
64 #define MAX_DELAY_PRINT_RATE 2000000000LL
65 #define MAX_NB_PRINTS 100
66 
67 int64_t max_delay;
68 int64_t max_advance;
69 
70 static void align_clocks(SyncClocks *sc, CPUState *cpu)
71 {
72     int64_t cpu_icount;
73 
74     if (!icount_align_option) {
75         return;
76     }
77 
78     cpu_icount = cpu->icount_extra + cpu->neg.icount_decr.u16.low;
79     sc->diff_clk += icount_to_ns(sc->last_cpu_icount - cpu_icount);
80     sc->last_cpu_icount = cpu_icount;
81 
82     if (sc->diff_clk > VM_CLOCK_ADVANCE) {
83 #ifndef _WIN32
84         struct timespec sleep_delay, rem_delay;
85         sleep_delay.tv_sec = sc->diff_clk / 1000000000LL;
86         sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL;
87         if (nanosleep(&sleep_delay, &rem_delay) < 0) {
88             sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec;
89         } else {
90             sc->diff_clk = 0;
91         }
92 #else
93         Sleep(sc->diff_clk / SCALE_MS);
94         sc->diff_clk = 0;
95 #endif
96     }
97 }
98 
99 static void print_delay(const SyncClocks *sc)
100 {
101     static float threshold_delay;
102     static int64_t last_realtime_clock;
103     static int nb_prints;
104 
105     if (icount_align_option &&
106         sc->realtime_clock - last_realtime_clock >= MAX_DELAY_PRINT_RATE &&
107         nb_prints < MAX_NB_PRINTS) {
108         if ((-sc->diff_clk / (float)1000000000LL > threshold_delay) ||
109             (-sc->diff_clk / (float)1000000000LL <
110              (threshold_delay - THRESHOLD_REDUCE))) {
111             threshold_delay = (-sc->diff_clk / 1000000000LL) + 1;
112             qemu_printf("Warning: The guest is now late by %.1f to %.1f seconds\n",
113                         threshold_delay - 1,
114                         threshold_delay);
115             nb_prints++;
116             last_realtime_clock = sc->realtime_clock;
117         }
118     }
119 }
120 
121 static void init_delay_params(SyncClocks *sc, CPUState *cpu)
122 {
123     if (!icount_align_option) {
124         return;
125     }
126     sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
127     sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock;
128     sc->last_cpu_icount
129         = cpu->icount_extra + cpu->neg.icount_decr.u16.low;
130     if (sc->diff_clk < max_delay) {
131         max_delay = sc->diff_clk;
132     }
133     if (sc->diff_clk > max_advance) {
134         max_advance = sc->diff_clk;
135     }
136 
137     /* Print every 2s max if the guest is late. We limit the number
138        of printed messages to NB_PRINT_MAX(currently 100) */
139     print_delay(sc);
140 }
141 #else
142 static void align_clocks(SyncClocks *sc, const CPUState *cpu)
143 {
144 }
145 
146 static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
147 {
148 }
149 #endif /* CONFIG USER ONLY */
150 
151 struct tb_desc {
152     TCGTBCPUState s;
153     CPUArchState *env;
154     tb_page_addr_t page_addr0;
155 };
156 
157 static bool tb_lookup_cmp(const void *p, const void *d)
158 {
159     const TranslationBlock *tb = p;
160     const struct tb_desc *desc = d;
161 
162     if ((tb_cflags(tb) & CF_PCREL || tb->pc == desc->s.pc) &&
163         tb_page_addr0(tb) == desc->page_addr0 &&
164         tb->cs_base == desc->s.cs_base &&
165         tb->flags == desc->s.flags &&
166         tb_cflags(tb) == desc->s.cflags) {
167         /* check next page if needed */
168         tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb);
169         if (tb_phys_page1 == -1) {
170             return true;
171         } else {
172             tb_page_addr_t phys_page1;
173             vaddr virt_page1;
174 
175             /*
176              * We know that the first page matched, and an otherwise valid TB
177              * encountered an incomplete instruction at the end of that page,
178              * therefore we know that generating a new TB from the current PC
179              * must also require reading from the next page -- even if the
180              * second pages do not match, and therefore the resulting insn
181              * is different for the new TB.  Therefore any exception raised
182              * here by the faulting lookup is not premature.
183              */
184             virt_page1 = TARGET_PAGE_ALIGN(desc->s.pc);
185             phys_page1 = get_page_addr_code(desc->env, virt_page1);
186             if (tb_phys_page1 == phys_page1) {
187                 return true;
188             }
189         }
190     }
191     return false;
192 }
193 
194 static TranslationBlock *tb_htable_lookup(CPUState *cpu, TCGTBCPUState s)
195 {
196     tb_page_addr_t phys_pc;
197     struct tb_desc desc;
198     uint32_t h;
199 
200     desc.s = s;
201     desc.env = cpu_env(cpu);
202     phys_pc = get_page_addr_code(desc.env, s.pc);
203     if (phys_pc == -1) {
204         return NULL;
205     }
206     desc.page_addr0 = phys_pc;
207     h = tb_hash_func(phys_pc, (s.cflags & CF_PCREL ? 0 : s.pc),
208                      s.flags, s.cs_base, s.cflags);
209     return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
210 }
211 
212 /**
213  * tb_lookup:
214  * @cpu: CPU that will execute the returned translation block
215  * @pc: guest PC
216  * @cs_base: arch-specific value associated with translation block
217  * @flags: arch-specific translation block flags
218  * @cflags: CF_* flags
219  *
220  * Look up a translation block inside the QHT using @pc, @cs_base, @flags and
221  * @cflags. Uses @cpu's tb_jmp_cache. Might cause an exception, so have a
222  * longjmp destination ready.
223  *
224  * Returns: an existing translation block or NULL.
225  */
226 static inline TranslationBlock *tb_lookup(CPUState *cpu, TCGTBCPUState s)
227 {
228     TranslationBlock *tb;
229     CPUJumpCache *jc;
230     uint32_t hash;
231 
232     /* we should never be trying to look up an INVALID tb */
233     tcg_debug_assert(!(s.cflags & CF_INVALID));
234 
235     hash = tb_jmp_cache_hash_func(s.pc);
236     jc = cpu->tb_jmp_cache;
237 
238     tb = qatomic_read(&jc->array[hash].tb);
239     if (likely(tb &&
240                jc->array[hash].pc == s.pc &&
241                tb->cs_base == s.cs_base &&
242                tb->flags == s.flags &&
243                tb_cflags(tb) == s.cflags)) {
244         goto hit;
245     }
246 
247     tb = tb_htable_lookup(cpu, s);
248     if (tb == NULL) {
249         return NULL;
250     }
251 
252     jc->array[hash].pc = s.pc;
253     qatomic_set(&jc->array[hash].tb, tb);
254 
255 hit:
256     /*
257      * As long as tb is not NULL, the contents are consistent.  Therefore,
258      * the virtual PC has to match for non-CF_PCREL translations.
259      */
260     assert((tb_cflags(tb) & CF_PCREL) || tb->pc == s.pc);
261     return tb;
262 }
263 
264 static void log_cpu_exec(vaddr pc, CPUState *cpu,
265                          const TranslationBlock *tb)
266 {
267     if (qemu_log_in_addr_range(pc)) {
268         qemu_log_mask(CPU_LOG_EXEC,
269                       "Trace %d: %p [%08" PRIx64
270                       "/%016" VADDR_PRIx "/%08x/%08x] %s\n",
271                       cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
272                       tb->flags, tb->cflags, lookup_symbol(pc));
273 
274         if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
275             FILE *logfile = qemu_log_trylock();
276             if (logfile) {
277                 int flags = CPU_DUMP_CCOP;
278 
279                 if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
280                     flags |= CPU_DUMP_FPU;
281                 }
282                 if (qemu_loglevel_mask(CPU_LOG_TB_VPU)) {
283                     flags |= CPU_DUMP_VPU;
284                 }
285                 cpu_dump_state(cpu, logfile, flags);
286                 qemu_log_unlock(logfile);
287             }
288         }
289     }
290 }
291 
292 static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc,
293                                        uint32_t *cflags)
294 {
295     CPUBreakpoint *bp;
296     bool match_page = false;
297 
298     /*
299      * Singlestep overrides breakpoints.
300      * This requirement is visible in the record-replay tests, where
301      * we would fail to make forward progress in reverse-continue.
302      *
303      * TODO: gdb singlestep should only override gdb breakpoints,
304      * so that one could (gdb) singlestep into the guest kernel's
305      * architectural breakpoint handler.
306      */
307     if (cpu->singlestep_enabled) {
308         return false;
309     }
310 
311     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
312         /*
313          * If we have an exact pc match, trigger the breakpoint.
314          * Otherwise, note matches within the page.
315          */
316         if (pc == bp->pc) {
317             bool match_bp = false;
318 
319             if (bp->flags & BP_GDB) {
320                 match_bp = true;
321             } else if (bp->flags & BP_CPU) {
322 #ifdef CONFIG_USER_ONLY
323                 g_assert_not_reached();
324 #else
325                 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
326                 assert(tcg_ops->debug_check_breakpoint);
327                 match_bp = tcg_ops->debug_check_breakpoint(cpu);
328 #endif
329             }
330 
331             if (match_bp) {
332                 cpu->exception_index = EXCP_DEBUG;
333                 return true;
334             }
335         } else if (((pc ^ bp->pc) & TARGET_PAGE_MASK) == 0) {
336             match_page = true;
337         }
338     }
339 
340     /*
341      * Within the same page as a breakpoint, single-step,
342      * returning to helper_lookup_tb_ptr after each insn looking
343      * for the actual breakpoint.
344      *
345      * TODO: Perhaps better to record all of the TBs associated
346      * with a given virtual page that contains a breakpoint, and
347      * then invalidate them when a new overlapping breakpoint is
348      * set on the page.  Non-overlapping TBs would not be
349      * invalidated, nor would any TB need to be invalidated as
350      * breakpoints are removed.
351      */
352     if (match_page) {
353         *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | CF_BP_PAGE | 1;
354     }
355     return false;
356 }
357 
358 static inline bool check_for_breakpoints(CPUState *cpu, vaddr pc,
359                                          uint32_t *cflags)
360 {
361     return unlikely(!QTAILQ_EMPTY(&cpu->breakpoints)) &&
362         check_for_breakpoints_slow(cpu, pc, cflags);
363 }
364 
365 /**
366  * helper_lookup_tb_ptr: quick check for next tb
367  * @env: current cpu state
368  *
369  * Look for an existing TB matching the current cpu state.
370  * If found, return the code pointer.  If not found, return
371  * the tcg epilogue so that we return into cpu_tb_exec.
372  */
373 const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
374 {
375     CPUState *cpu = env_cpu(env);
376     TranslationBlock *tb;
377 
378     /*
379      * By definition we've just finished a TB, so I/O is OK.
380      * Avoid the possibility of calling cpu_io_recompile() if
381      * a page table walk triggered by tb_lookup() calling
382      * probe_access_internal() happens to touch an MMIO device.
383      * The next TB, if we chain to it, will clear the flag again.
384      */
385     cpu->neg.can_do_io = true;
386 
387     TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu);
388     s.cflags = curr_cflags(cpu);
389 
390     if (check_for_breakpoints(cpu, s.pc, &s.cflags)) {
391         cpu_loop_exit(cpu);
392     }
393 
394     tb = tb_lookup(cpu, s);
395     if (tb == NULL) {
396         return tcg_code_gen_epilogue;
397     }
398 
399     if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
400         log_cpu_exec(s.pc, cpu, tb);
401     }
402 
403     return tb->tc.ptr;
404 }
405 
406 /* Return the current PC from CPU, which may be cached in TB. */
407 static vaddr log_pc(CPUState *cpu, const TranslationBlock *tb)
408 {
409     if (tb_cflags(tb) & CF_PCREL) {
410         return cpu->cc->get_pc(cpu);
411     } else {
412         return tb->pc;
413     }
414 }
415 
416 /* Execute a TB, and fix up the CPU state afterwards if necessary */
417 /*
418  * Disable CFI checks.
419  * TCG creates binary blobs at runtime, with the transformed code.
420  * A TB is a blob of binary code, created at runtime and called with an
421  * indirect function call. Since such function did not exist at compile time,
422  * the CFI runtime has no way to verify its signature and would fail.
423  * TCG is not considered a security-sensitive part of QEMU so this does not
424  * affect the impact of CFI in environment with high security requirements
425  */
426 static inline TranslationBlock * QEMU_DISABLE_CFI
427 cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
428 {
429     uintptr_t ret;
430     TranslationBlock *last_tb;
431     const void *tb_ptr = itb->tc.ptr;
432 
433     if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
434         log_cpu_exec(log_pc(cpu, itb), cpu, itb);
435     }
436 
437     qemu_thread_jit_execute();
438     ret = tcg_qemu_tb_exec(cpu_env(cpu), tb_ptr);
439     cpu->neg.can_do_io = true;
440     qemu_plugin_disable_mem_helpers(cpu);
441     /*
442      * TODO: Delay swapping back to the read-write region of the TB
443      * until we actually need to modify the TB.  The read-only copy,
444      * coming from the rx region, shares the same host TLB entry as
445      * the code that executed the exit_tb opcode that arrived here.
446      * If we insist on touching both the RX and the RW pages, we
447      * double the host TLB pressure.
448      */
449     last_tb = tcg_splitwx_to_rw((void *)(ret & ~TB_EXIT_MASK));
450     *tb_exit = ret & TB_EXIT_MASK;
451 
452     trace_exec_tb_exit(last_tb, *tb_exit);
453 
454     if (*tb_exit > TB_EXIT_IDX1) {
455         /* We didn't start executing this TB (eg because the instruction
456          * counter hit zero); we must restore the guest PC to the address
457          * of the start of the TB.
458          */
459         CPUClass *cc = cpu->cc;
460         const TCGCPUOps *tcg_ops = cc->tcg_ops;
461 
462         if (tcg_ops->synchronize_from_tb) {
463             tcg_ops->synchronize_from_tb(cpu, last_tb);
464         } else {
465             tcg_debug_assert(!(tb_cflags(last_tb) & CF_PCREL));
466             assert(cc->set_pc);
467             cc->set_pc(cpu, last_tb->pc);
468         }
469         if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
470             vaddr pc = log_pc(cpu, last_tb);
471             if (qemu_log_in_addr_range(pc)) {
472                 qemu_log("Stopped execution of TB chain before %p [%016"
473                          VADDR_PRIx "] %s\n",
474                          last_tb->tc.ptr, pc, lookup_symbol(pc));
475             }
476         }
477     }
478 
479     /*
480      * If gdb single-step, and we haven't raised another exception,
481      * raise a debug exception.  Single-step with another exception
482      * is handled in cpu_handle_exception.
483      */
484     if (unlikely(cpu->singlestep_enabled) && cpu->exception_index == -1) {
485         cpu->exception_index = EXCP_DEBUG;
486         cpu_loop_exit(cpu);
487     }
488 
489     return last_tb;
490 }
491 
492 
493 static void cpu_exec_enter(CPUState *cpu)
494 {
495     const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
496 
497     if (tcg_ops->cpu_exec_enter) {
498         tcg_ops->cpu_exec_enter(cpu);
499     }
500 }
501 
502 static void cpu_exec_exit(CPUState *cpu)
503 {
504     const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
505 
506     if (tcg_ops->cpu_exec_exit) {
507         tcg_ops->cpu_exec_exit(cpu);
508     }
509 }
510 
511 static void cpu_exec_longjmp_cleanup(CPUState *cpu)
512 {
513     /* Non-buggy compilers preserve this; assert the correct value. */
514     g_assert(cpu == current_cpu);
515 
516 #ifdef CONFIG_USER_ONLY
517     clear_helper_retaddr();
518     if (have_mmap_lock()) {
519         mmap_unlock();
520     }
521 #else
522     /*
523      * For softmmu, a tlb_fill fault during translation will land here,
524      * and we need to release any page locks held.  In system mode we
525      * have one tcg_ctx per thread, so we know it was this cpu doing
526      * the translation.
527      *
528      * Alternative 1: Install a cleanup to be called via an exception
529      * handling safe longjmp.  It seems plausible that all our hosts
530      * support such a thing.  We'd have to properly register unwind info
531      * for the JIT for EH, rather that just for GDB.
532      *
533      * Alternative 2: Set and restore cpu->jmp_env in tb_gen_code to
534      * capture the cpu_loop_exit longjmp, perform the cleanup, and
535      * jump again to arrive here.
536      */
537     if (tcg_ctx->gen_tb) {
538         tb_unlock_pages(tcg_ctx->gen_tb);
539         tcg_ctx->gen_tb = NULL;
540     }
541 #endif
542     if (bql_locked()) {
543         bql_unlock();
544     }
545     assert_no_pages_locked();
546 }
547 
548 void cpu_exec_step_atomic(CPUState *cpu)
549 {
550     TranslationBlock *tb;
551     int tb_exit;
552 
553     if (sigsetjmp(cpu->jmp_env, 0) == 0) {
554         start_exclusive();
555         g_assert(cpu == current_cpu);
556         g_assert(!cpu->running);
557         cpu->running = true;
558 
559         TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu);
560         s.cflags = curr_cflags(cpu);
561 
562         /* Execute in a serial context. */
563         s.cflags &= ~CF_PARALLEL;
564         /* After 1 insn, return and release the exclusive lock. */
565         s.cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1;
566         /*
567          * No need to check_for_breakpoints here.
568          * We only arrive in cpu_exec_step_atomic after beginning execution
569          * of an insn that includes an atomic operation we can't handle.
570          * Any breakpoint for this insn will have been recognized earlier.
571          */
572 
573         tb = tb_lookup(cpu, s);
574         if (tb == NULL) {
575             mmap_lock();
576             tb = tb_gen_code(cpu, s);
577             mmap_unlock();
578         }
579 
580         cpu_exec_enter(cpu);
581         /* execute the generated code */
582         trace_exec_tb(tb, s.pc);
583         cpu_tb_exec(cpu, tb, &tb_exit);
584         cpu_exec_exit(cpu);
585     } else {
586         cpu_exec_longjmp_cleanup(cpu);
587     }
588 
589     /*
590      * As we start the exclusive region before codegen we must still
591      * be in the region if we longjump out of either the codegen or
592      * the execution.
593      */
594     g_assert(cpu_in_exclusive_context(cpu));
595     cpu->running = false;
596     end_exclusive();
597 }
598 
599 void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
600 {
601     /*
602      * Get the rx view of the structure, from which we find the
603      * executable code address, and tb_target_set_jmp_target can
604      * produce a pc-relative displacement to jmp_target_addr[n].
605      */
606     const TranslationBlock *c_tb = tcg_splitwx_to_rx(tb);
607     uintptr_t offset = tb->jmp_insn_offset[n];
608     uintptr_t jmp_rx = (uintptr_t)tb->tc.ptr + offset;
609     uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff;
610 
611     tb->jmp_target_addr[n] = addr;
612     tb_target_set_jmp_target(c_tb, n, jmp_rx, jmp_rw);
613 }
614 
615 static inline void tb_add_jump(TranslationBlock *tb, int n,
616                                TranslationBlock *tb_next)
617 {
618     uintptr_t old;
619 
620     qemu_thread_jit_write();
621     assert(n < ARRAY_SIZE(tb->jmp_list_next));
622     qemu_spin_lock(&tb_next->jmp_lock);
623 
624     /* make sure the destination TB is valid */
625     if (tb_next->cflags & CF_INVALID) {
626         goto out_unlock_next;
627     }
628     /* Atomically claim the jump destination slot only if it was NULL */
629     old = qatomic_cmpxchg(&tb->jmp_dest[n], (uintptr_t)NULL,
630                           (uintptr_t)tb_next);
631     if (old) {
632         goto out_unlock_next;
633     }
634 
635     /* patch the native jump address */
636     tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr);
637 
638     /* add in TB jmp list */
639     tb->jmp_list_next[n] = tb_next->jmp_list_head;
640     tb_next->jmp_list_head = (uintptr_t)tb | n;
641 
642     qemu_spin_unlock(&tb_next->jmp_lock);
643 
644     qemu_log_mask(CPU_LOG_EXEC, "Linking TBs %p index %d -> %p\n",
645                   tb->tc.ptr, n, tb_next->tc.ptr);
646     return;
647 
648  out_unlock_next:
649     qemu_spin_unlock(&tb_next->jmp_lock);
650 }
651 
652 static inline bool cpu_handle_halt(CPUState *cpu)
653 {
654 #ifndef CONFIG_USER_ONLY
655     if (cpu->halted) {
656         const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
657         bool leave_halt = tcg_ops->cpu_exec_halt(cpu);
658 
659         if (!leave_halt) {
660             return true;
661         }
662 
663         cpu->halted = 0;
664     }
665 #endif /* !CONFIG_USER_ONLY */
666 
667     return false;
668 }
669 
670 static inline void cpu_handle_debug_exception(CPUState *cpu)
671 {
672     const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
673     CPUWatchpoint *wp;
674 
675     if (!cpu->watchpoint_hit) {
676         QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
677             wp->flags &= ~BP_WATCHPOINT_HIT;
678         }
679     }
680 
681     if (tcg_ops->debug_excp_handler) {
682         tcg_ops->debug_excp_handler(cpu);
683     }
684 }
685 
686 static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
687 {
688     if (cpu->exception_index < 0) {
689 #ifndef CONFIG_USER_ONLY
690         if (replay_has_exception()
691             && cpu->neg.icount_decr.u16.low + cpu->icount_extra == 0) {
692             /* Execute just one insn to trigger exception pending in the log */
693             cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT)
694                 | CF_NOIRQ | 1;
695         }
696 #endif
697         return false;
698     }
699 
700     if (cpu->exception_index >= EXCP_INTERRUPT) {
701         /* exit request from the cpu execution loop */
702         *ret = cpu->exception_index;
703         if (*ret == EXCP_DEBUG) {
704             cpu_handle_debug_exception(cpu);
705         }
706         cpu->exception_index = -1;
707         return true;
708     }
709 
710 #if defined(CONFIG_USER_ONLY)
711     /*
712      * If user mode only, we simulate a fake exception which will be
713      * handled outside the cpu execution loop.
714      */
715     const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
716     if (tcg_ops->fake_user_interrupt) {
717         tcg_ops->fake_user_interrupt(cpu);
718     }
719     *ret = cpu->exception_index;
720     cpu->exception_index = -1;
721     return true;
722 #else
723     if (replay_exception()) {
724         const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
725 
726         bql_lock();
727         tcg_ops->do_interrupt(cpu);
728         bql_unlock();
729         cpu->exception_index = -1;
730 
731         if (unlikely(cpu->singlestep_enabled)) {
732             /*
733              * After processing the exception, ensure an EXCP_DEBUG is
734              * raised when single-stepping so that GDB doesn't miss the
735              * next instruction.
736              */
737             *ret = EXCP_DEBUG;
738             cpu_handle_debug_exception(cpu);
739             return true;
740         }
741     } else if (!replay_has_interrupt()) {
742         /* give a chance to iothread in replay mode */
743         *ret = EXCP_INTERRUPT;
744         return true;
745     }
746 #endif
747 
748     return false;
749 }
750 
751 static inline bool icount_exit_request(CPUState *cpu)
752 {
753     if (!icount_enabled()) {
754         return false;
755     }
756     if (cpu->cflags_next_tb != -1 && !(cpu->cflags_next_tb & CF_USE_ICOUNT)) {
757         return false;
758     }
759     return cpu->neg.icount_decr.u16.low + cpu->icount_extra == 0;
760 }
761 
762 static inline bool cpu_handle_interrupt(CPUState *cpu,
763                                         TranslationBlock **last_tb)
764 {
765     /*
766      * If we have requested custom cflags with CF_NOIRQ we should
767      * skip checking here. Any pending interrupts will get picked up
768      * by the next TB we execute under normal cflags.
769      */
770     if (cpu->cflags_next_tb != -1 && cpu->cflags_next_tb & CF_NOIRQ) {
771         return false;
772     }
773 
774     /* Clear the interrupt flag now since we're processing
775      * cpu->interrupt_request and cpu->exit_request.
776      * Ensure zeroing happens before reading cpu->exit_request or
777      * cpu->interrupt_request (see also smp_wmb in cpu_exit())
778      */
779     qatomic_set_mb(&cpu->neg.icount_decr.u16.high, 0);
780 
781     if (unlikely(qatomic_read(&cpu->interrupt_request))) {
782         int interrupt_request;
783         bql_lock();
784         interrupt_request = cpu->interrupt_request;
785         if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
786             /* Mask out external interrupts for this step. */
787             interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
788         }
789         if (interrupt_request & CPU_INTERRUPT_DEBUG) {
790             cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
791             cpu->exception_index = EXCP_DEBUG;
792             bql_unlock();
793             return true;
794         }
795 #if !defined(CONFIG_USER_ONLY)
796         if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
797             /* Do nothing */
798         } else if (interrupt_request & CPU_INTERRUPT_HALT) {
799             replay_interrupt();
800             cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
801             cpu->halted = 1;
802             cpu->exception_index = EXCP_HLT;
803             bql_unlock();
804             return true;
805         } else {
806             const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
807 
808             if (interrupt_request & CPU_INTERRUPT_RESET) {
809                 replay_interrupt();
810                 tcg_ops->cpu_exec_reset(cpu);
811                 bql_unlock();
812                 return true;
813             }
814 
815             /*
816              * The target hook has 3 exit conditions:
817              * False when the interrupt isn't processed,
818              * True when it is, and we should restart on a new TB,
819              * and via longjmp via cpu_loop_exit.
820              */
821             if (tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) {
822                 if (!tcg_ops->need_replay_interrupt ||
823                     tcg_ops->need_replay_interrupt(interrupt_request)) {
824                     replay_interrupt();
825                 }
826                 /*
827                  * After processing the interrupt, ensure an EXCP_DEBUG is
828                  * raised when single-stepping so that GDB doesn't miss the
829                  * next instruction.
830                  */
831                 if (unlikely(cpu->singlestep_enabled)) {
832                     cpu->exception_index = EXCP_DEBUG;
833                     bql_unlock();
834                     return true;
835                 }
836                 cpu->exception_index = -1;
837                 *last_tb = NULL;
838             }
839             /* The target hook may have updated the 'cpu->interrupt_request';
840              * reload the 'interrupt_request' value */
841             interrupt_request = cpu->interrupt_request;
842         }
843 #endif /* !CONFIG_USER_ONLY */
844         if (interrupt_request & CPU_INTERRUPT_EXITTB) {
845             cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
846             /* ensure that no TB jump will be modified as
847                the program flow was changed */
848             *last_tb = NULL;
849         }
850 
851         /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
852         bql_unlock();
853     }
854 
855     /* Finally, check if we need to exit to the main loop.  */
856     if (unlikely(qatomic_read(&cpu->exit_request)) || icount_exit_request(cpu)) {
857         qatomic_set(&cpu->exit_request, 0);
858         if (cpu->exception_index == -1) {
859             cpu->exception_index = EXCP_INTERRUPT;
860         }
861         return true;
862     }
863 
864     return false;
865 }
866 
867 static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
868                                     vaddr pc, TranslationBlock **last_tb,
869                                     int *tb_exit)
870 {
871     trace_exec_tb(tb, pc);
872     tb = cpu_tb_exec(cpu, tb, tb_exit);
873     if (*tb_exit != TB_EXIT_REQUESTED) {
874         *last_tb = tb;
875         return;
876     }
877 
878     *last_tb = NULL;
879     if (cpu_loop_exit_requested(cpu)) {
880         /* Something asked us to stop executing chained TBs; just
881          * continue round the main loop. Whatever requested the exit
882          * will also have set something else (eg exit_request or
883          * interrupt_request) which will be handled by
884          * cpu_handle_interrupt.  cpu_handle_interrupt will also
885          * clear cpu->icount_decr.u16.high.
886          */
887         return;
888     }
889 
890     /* Instruction counter expired.  */
891     assert(icount_enabled());
892 #ifndef CONFIG_USER_ONLY
893     /* Ensure global icount has gone forward */
894     icount_update(cpu);
895     /* Refill decrementer and continue execution.  */
896     int32_t insns_left = MIN(0xffff, cpu->icount_budget);
897     cpu->neg.icount_decr.u16.low = insns_left;
898     cpu->icount_extra = cpu->icount_budget - insns_left;
899 
900     /*
901      * If the next tb has more instructions than we have left to
902      * execute we need to ensure we find/generate a TB with exactly
903      * insns_left instructions in it.
904      */
905     if (insns_left > 0 && insns_left < tb->icount)  {
906         assert(insns_left <= CF_COUNT_MASK);
907         assert(cpu->icount_extra == 0);
908         cpu->cflags_next_tb = (tb->cflags & ~CF_COUNT_MASK) | insns_left;
909     }
910 #endif
911 }
912 
913 /* main execution loop */
914 
915 static int __attribute__((noinline))
916 cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
917 {
918     int ret;
919 
920     /* if an exception is pending, we execute it here */
921     while (!cpu_handle_exception(cpu, &ret)) {
922         TranslationBlock *last_tb = NULL;
923         int tb_exit = 0;
924 
925         while (!cpu_handle_interrupt(cpu, &last_tb)) {
926             TranslationBlock *tb;
927             TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu);
928             s.cflags = cpu->cflags_next_tb;
929 
930             /*
931              * When requested, use an exact setting for cflags for the next
932              * execution.  This is used for icount, precise smc, and stop-
933              * after-access watchpoints.  Since this request should never
934              * have CF_INVALID set, -1 is a convenient invalid value that
935              * does not require tcg headers for cpu_common_reset.
936              */
937             if (s.cflags == -1) {
938                 s.cflags = curr_cflags(cpu);
939             } else {
940                 cpu->cflags_next_tb = -1;
941             }
942 
943             if (check_for_breakpoints(cpu, s.pc, &s.cflags)) {
944                 break;
945             }
946 
947             tb = tb_lookup(cpu, s);
948             if (tb == NULL) {
949                 CPUJumpCache *jc;
950                 uint32_t h;
951 
952                 mmap_lock();
953                 tb = tb_gen_code(cpu, s);
954                 mmap_unlock();
955 
956                 /*
957                  * We add the TB in the virtual pc hash table
958                  * for the fast lookup
959                  */
960                 h = tb_jmp_cache_hash_func(s.pc);
961                 jc = cpu->tb_jmp_cache;
962                 jc->array[h].pc = s.pc;
963                 qatomic_set(&jc->array[h].tb, tb);
964             }
965 
966 #ifndef CONFIG_USER_ONLY
967             /*
968              * We don't take care of direct jumps when address mapping
969              * changes in system emulation.  So it's not safe to make a
970              * direct jump to a TB spanning two pages because the mapping
971              * for the second page can change.
972              */
973             if (tb_page_addr1(tb) != -1) {
974                 last_tb = NULL;
975             }
976 #endif
977             /* See if we can patch the calling TB. */
978             if (last_tb) {
979                 tb_add_jump(last_tb, tb_exit, tb);
980             }
981 
982             cpu_loop_exec_tb(cpu, tb, s.pc, &last_tb, &tb_exit);
983 
984             /* Try to align the host and virtual clocks
985                if the guest is in advance */
986             align_clocks(sc, cpu);
987         }
988     }
989     return ret;
990 }
991 
992 static int cpu_exec_setjmp(CPUState *cpu, SyncClocks *sc)
993 {
994     /* Prepare setjmp context for exception handling. */
995     if (unlikely(sigsetjmp(cpu->jmp_env, 0) != 0)) {
996         cpu_exec_longjmp_cleanup(cpu);
997     }
998 
999     return cpu_exec_loop(cpu, sc);
1000 }
1001 
1002 int cpu_exec(CPUState *cpu)
1003 {
1004     int ret;
1005     SyncClocks sc = { 0 };
1006 
1007     /* replay_interrupt may need current_cpu */
1008     current_cpu = cpu;
1009 
1010     if (cpu_handle_halt(cpu)) {
1011         return EXCP_HALTED;
1012     }
1013 
1014     RCU_READ_LOCK_GUARD();
1015     cpu_exec_enter(cpu);
1016 
1017     /*
1018      * Calculate difference between guest clock and host clock.
1019      * This delay includes the delay of the last cycle, so
1020      * what we have to do is sleep until it is 0. As for the
1021      * advance/delay we gain here, we try to fix it next time.
1022      */
1023     init_delay_params(&sc, cpu);
1024 
1025     ret = cpu_exec_setjmp(cpu, &sc);
1026 
1027     cpu_exec_exit(cpu);
1028     return ret;
1029 }
1030 
1031 bool tcg_exec_realizefn(CPUState *cpu, Error **errp)
1032 {
1033     static bool tcg_target_initialized;
1034 
1035     if (!tcg_target_initialized) {
1036         /* Check mandatory TCGCPUOps handlers */
1037         const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
1038 #ifndef CONFIG_USER_ONLY
1039         assert(tcg_ops->cpu_exec_halt);
1040         assert(tcg_ops->cpu_exec_interrupt);
1041         assert(tcg_ops->cpu_exec_reset);
1042 #endif /* !CONFIG_USER_ONLY */
1043         assert(tcg_ops->translate_code);
1044         assert(tcg_ops->get_tb_cpu_state);
1045         assert(tcg_ops->mmu_index);
1046         tcg_ops->initialize();
1047         tcg_target_initialized = true;
1048     }
1049 
1050     cpu->tb_jmp_cache = g_new0(CPUJumpCache, 1);
1051     tlb_init(cpu);
1052 #ifndef CONFIG_USER_ONLY
1053     tcg_iommu_init_notifier_list(cpu);
1054 #endif /* !CONFIG_USER_ONLY */
1055     /* qemu_plugin_vcpu_init_hook delayed until cpu_index assigned. */
1056 
1057     return true;
1058 }
1059 
1060 /* undo the initializations in reverse order */
1061 void tcg_exec_unrealizefn(CPUState *cpu)
1062 {
1063 #ifndef CONFIG_USER_ONLY
1064     tcg_iommu_free_notifier_list(cpu);
1065 #endif /* !CONFIG_USER_ONLY */
1066 
1067     tlb_destroy(cpu);
1068     g_free_rcu(cpu->tb_jmp_cache, rcu);
1069 }
1070