1 /*
2 * emulator main execution loop
3 *
4 * Copyright (c) 2003-2005 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "qemu/qemu-print.h"
22 #include "qapi/error.h"
23 #include "qapi/type-helpers.h"
24 #include "hw/core/cpu.h"
25 #include "accel/tcg/cpu-ops.h"
26 #include "accel/tcg/helper-retaddr.h"
27 #include "trace.h"
28 #include "disas/disas.h"
29 #include "exec/cpu-common.h"
30 #include "exec/cpu-interrupt.h"
31 #include "exec/page-protection.h"
32 #include "exec/mmap-lock.h"
33 #include "exec/translation-block.h"
34 #include "tcg/tcg.h"
35 #include "qemu/atomic.h"
36 #include "qemu/rcu.h"
37 #include "exec/log.h"
38 #include "qemu/main-loop.h"
39 #include "exec/icount.h"
40 #include "exec/replay-core.h"
41 #include "system/tcg.h"
42 #include "exec/helper-proto-common.h"
43 #include "tcg-accel-ops.h"
44 #include "tb-jmp-cache.h"
45 #include "tb-hash.h"
46 #include "tb-context.h"
47 #include "tb-internal.h"
48 #include "internal-common.h"
49
50 /* -icount align implementation. */
51
52 typedef struct SyncClocks {
53 int64_t diff_clk;
54 int64_t last_cpu_icount;
55 int64_t realtime_clock;
56 } SyncClocks;
57
58 #if !defined(CONFIG_USER_ONLY)
59 /* Allow the guest to have a max 3ms advance.
60 * The difference between the 2 clocks could therefore
61 * oscillate around 0.
62 */
63 #define VM_CLOCK_ADVANCE 3000000
64 #define THRESHOLD_REDUCE 1.5
65 #define MAX_DELAY_PRINT_RATE 2000000000LL
66 #define MAX_NB_PRINTS 100
67
68 int64_t max_delay;
69 int64_t max_advance;
70
align_clocks(SyncClocks * sc,CPUState * cpu)71 static void align_clocks(SyncClocks *sc, CPUState *cpu)
72 {
73 int64_t cpu_icount;
74
75 if (!icount_align_option) {
76 return;
77 }
78
79 cpu_icount = cpu->icount_extra + cpu->neg.icount_decr.u16.low;
80 sc->diff_clk += icount_to_ns(sc->last_cpu_icount - cpu_icount);
81 sc->last_cpu_icount = cpu_icount;
82
83 if (sc->diff_clk > VM_CLOCK_ADVANCE) {
84 #ifndef _WIN32
85 struct timespec sleep_delay, rem_delay;
86 sleep_delay.tv_sec = sc->diff_clk / 1000000000LL;
87 sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL;
88 if (nanosleep(&sleep_delay, &rem_delay) < 0) {
89 sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec;
90 } else {
91 sc->diff_clk = 0;
92 }
93 #else
94 Sleep(sc->diff_clk / SCALE_MS);
95 sc->diff_clk = 0;
96 #endif
97 }
98 }
99
print_delay(const SyncClocks * sc)100 static void print_delay(const SyncClocks *sc)
101 {
102 static float threshold_delay;
103 static int64_t last_realtime_clock;
104 static int nb_prints;
105
106 if (icount_align_option &&
107 sc->realtime_clock - last_realtime_clock >= MAX_DELAY_PRINT_RATE &&
108 nb_prints < MAX_NB_PRINTS) {
109 if ((-sc->diff_clk / (float)1000000000LL > threshold_delay) ||
110 (-sc->diff_clk / (float)1000000000LL <
111 (threshold_delay - THRESHOLD_REDUCE))) {
112 threshold_delay = (-sc->diff_clk / 1000000000LL) + 1;
113 qemu_printf("Warning: The guest is now late by %.1f to %.1f seconds\n",
114 threshold_delay - 1,
115 threshold_delay);
116 nb_prints++;
117 last_realtime_clock = sc->realtime_clock;
118 }
119 }
120 }
121
init_delay_params(SyncClocks * sc,CPUState * cpu)122 static void init_delay_params(SyncClocks *sc, CPUState *cpu)
123 {
124 if (!icount_align_option) {
125 return;
126 }
127 sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
128 sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock;
129 sc->last_cpu_icount
130 = cpu->icount_extra + cpu->neg.icount_decr.u16.low;
131 if (sc->diff_clk < max_delay) {
132 max_delay = sc->diff_clk;
133 }
134 if (sc->diff_clk > max_advance) {
135 max_advance = sc->diff_clk;
136 }
137
138 /* Print every 2s max if the guest is late. We limit the number
139 of printed messages to NB_PRINT_MAX(currently 100) */
140 print_delay(sc);
141 }
142 #else
align_clocks(SyncClocks * sc,const CPUState * cpu)143 static void align_clocks(SyncClocks *sc, const CPUState *cpu)
144 {
145 }
146
init_delay_params(SyncClocks * sc,const CPUState * cpu)147 static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
148 {
149 }
150 #endif /* CONFIG USER ONLY */
151
152 struct tb_desc {
153 TCGTBCPUState s;
154 CPUArchState *env;
155 tb_page_addr_t page_addr0;
156 };
157
tb_lookup_cmp(const void * p,const void * d)158 static bool tb_lookup_cmp(const void *p, const void *d)
159 {
160 const TranslationBlock *tb = p;
161 const struct tb_desc *desc = d;
162
163 if ((tb_cflags(tb) & CF_PCREL || tb->pc == desc->s.pc) &&
164 tb_page_addr0(tb) == desc->page_addr0 &&
165 tb->cs_base == desc->s.cs_base &&
166 tb->flags == desc->s.flags &&
167 tb_cflags(tb) == desc->s.cflags) {
168 /* check next page if needed */
169 tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb);
170 if (tb_phys_page1 == -1) {
171 return true;
172 } else {
173 tb_page_addr_t phys_page1;
174 vaddr virt_page1;
175
176 /*
177 * We know that the first page matched, and an otherwise valid TB
178 * encountered an incomplete instruction at the end of that page,
179 * therefore we know that generating a new TB from the current PC
180 * must also require reading from the next page -- even if the
181 * second pages do not match, and therefore the resulting insn
182 * is different for the new TB. Therefore any exception raised
183 * here by the faulting lookup is not premature.
184 */
185 virt_page1 = TARGET_PAGE_ALIGN(desc->s.pc);
186 phys_page1 = get_page_addr_code(desc->env, virt_page1);
187 if (tb_phys_page1 == phys_page1) {
188 return true;
189 }
190 }
191 }
192 return false;
193 }
194
tb_htable_lookup(CPUState * cpu,TCGTBCPUState s)195 static TranslationBlock *tb_htable_lookup(CPUState *cpu, TCGTBCPUState s)
196 {
197 tb_page_addr_t phys_pc;
198 struct tb_desc desc;
199 uint32_t h;
200
201 desc.s = s;
202 desc.env = cpu_env(cpu);
203 phys_pc = get_page_addr_code(desc.env, s.pc);
204 if (phys_pc == -1) {
205 return NULL;
206 }
207 desc.page_addr0 = phys_pc;
208 h = tb_hash_func(phys_pc, (s.cflags & CF_PCREL ? 0 : s.pc),
209 s.flags, s.cs_base, s.cflags);
210 return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
211 }
212
213 /**
214 * tb_lookup:
215 * @cpu: CPU that will execute the returned translation block
216 * @pc: guest PC
217 * @cs_base: arch-specific value associated with translation block
218 * @flags: arch-specific translation block flags
219 * @cflags: CF_* flags
220 *
221 * Look up a translation block inside the QHT using @pc, @cs_base, @flags and
222 * @cflags. Uses @cpu's tb_jmp_cache. Might cause an exception, so have a
223 * longjmp destination ready.
224 *
225 * Returns: an existing translation block or NULL.
226 */
tb_lookup(CPUState * cpu,TCGTBCPUState s)227 static inline TranslationBlock *tb_lookup(CPUState *cpu, TCGTBCPUState s)
228 {
229 TranslationBlock *tb;
230 CPUJumpCache *jc;
231 uint32_t hash;
232
233 /* we should never be trying to look up an INVALID tb */
234 tcg_debug_assert(!(s.cflags & CF_INVALID));
235
236 hash = tb_jmp_cache_hash_func(s.pc);
237 jc = cpu->tb_jmp_cache;
238
239 tb = qatomic_read(&jc->array[hash].tb);
240 if (likely(tb &&
241 jc->array[hash].pc == s.pc &&
242 tb->cs_base == s.cs_base &&
243 tb->flags == s.flags &&
244 tb_cflags(tb) == s.cflags)) {
245 goto hit;
246 }
247
248 tb = tb_htable_lookup(cpu, s);
249 if (tb == NULL) {
250 return NULL;
251 }
252
253 jc->array[hash].pc = s.pc;
254 qatomic_set(&jc->array[hash].tb, tb);
255
256 hit:
257 /*
258 * As long as tb is not NULL, the contents are consistent. Therefore,
259 * the virtual PC has to match for non-CF_PCREL translations.
260 */
261 assert((tb_cflags(tb) & CF_PCREL) || tb->pc == s.pc);
262 return tb;
263 }
264
log_cpu_exec(vaddr pc,CPUState * cpu,const TranslationBlock * tb)265 static void log_cpu_exec(vaddr pc, CPUState *cpu,
266 const TranslationBlock *tb)
267 {
268 if (qemu_log_in_addr_range(pc)) {
269 qemu_log_mask(CPU_LOG_EXEC,
270 "Trace %d: %p [%08" PRIx64
271 "/%016" VADDR_PRIx "/%08x/%08x] %s\n",
272 cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
273 tb->flags, tb->cflags, lookup_symbol(pc));
274
275 if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
276 FILE *logfile = qemu_log_trylock();
277 if (logfile) {
278 int flags = CPU_DUMP_CCOP;
279
280 if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
281 flags |= CPU_DUMP_FPU;
282 }
283 if (qemu_loglevel_mask(CPU_LOG_TB_VPU)) {
284 flags |= CPU_DUMP_VPU;
285 }
286 cpu_dump_state(cpu, logfile, flags);
287 qemu_log_unlock(logfile);
288 }
289 }
290 }
291 }
292
check_for_breakpoints_slow(CPUState * cpu,vaddr pc,uint32_t * cflags)293 static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc,
294 uint32_t *cflags)
295 {
296 CPUBreakpoint *bp;
297 bool match_page = false;
298
299 /*
300 * Singlestep overrides breakpoints.
301 * This requirement is visible in the record-replay tests, where
302 * we would fail to make forward progress in reverse-continue.
303 *
304 * TODO: gdb singlestep should only override gdb breakpoints,
305 * so that one could (gdb) singlestep into the guest kernel's
306 * architectural breakpoint handler.
307 */
308 if (cpu->singlestep_enabled) {
309 return false;
310 }
311
312 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
313 /*
314 * If we have an exact pc match, trigger the breakpoint.
315 * Otherwise, note matches within the page.
316 */
317 if (pc == bp->pc) {
318 bool match_bp = false;
319
320 if (bp->flags & BP_GDB) {
321 match_bp = true;
322 } else if (bp->flags & BP_CPU) {
323 #ifdef CONFIG_USER_ONLY
324 g_assert_not_reached();
325 #else
326 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
327 assert(tcg_ops->debug_check_breakpoint);
328 match_bp = tcg_ops->debug_check_breakpoint(cpu);
329 #endif
330 }
331
332 if (match_bp) {
333 cpu->exception_index = EXCP_DEBUG;
334 return true;
335 }
336 } else if (((pc ^ bp->pc) & TARGET_PAGE_MASK) == 0) {
337 match_page = true;
338 }
339 }
340
341 /*
342 * Within the same page as a breakpoint, single-step,
343 * returning to helper_lookup_tb_ptr after each insn looking
344 * for the actual breakpoint.
345 *
346 * TODO: Perhaps better to record all of the TBs associated
347 * with a given virtual page that contains a breakpoint, and
348 * then invalidate them when a new overlapping breakpoint is
349 * set on the page. Non-overlapping TBs would not be
350 * invalidated, nor would any TB need to be invalidated as
351 * breakpoints are removed.
352 */
353 if (match_page) {
354 *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | CF_BP_PAGE | 1;
355 }
356 return false;
357 }
358
check_for_breakpoints(CPUState * cpu,vaddr pc,uint32_t * cflags)359 static inline bool check_for_breakpoints(CPUState *cpu, vaddr pc,
360 uint32_t *cflags)
361 {
362 return unlikely(!QTAILQ_EMPTY(&cpu->breakpoints)) &&
363 check_for_breakpoints_slow(cpu, pc, cflags);
364 }
365
366 /**
367 * helper_lookup_tb_ptr: quick check for next tb
368 * @env: current cpu state
369 *
370 * Look for an existing TB matching the current cpu state.
371 * If found, return the code pointer. If not found, return
372 * the tcg epilogue so that we return into cpu_tb_exec.
373 */
HELPER(lookup_tb_ptr)374 const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
375 {
376 CPUState *cpu = env_cpu(env);
377 TranslationBlock *tb;
378
379 /*
380 * By definition we've just finished a TB, so I/O is OK.
381 * Avoid the possibility of calling cpu_io_recompile() if
382 * a page table walk triggered by tb_lookup() calling
383 * probe_access_internal() happens to touch an MMIO device.
384 * The next TB, if we chain to it, will clear the flag again.
385 */
386 cpu->neg.can_do_io = true;
387
388 TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu);
389 s.cflags = curr_cflags(cpu);
390
391 if (check_for_breakpoints(cpu, s.pc, &s.cflags)) {
392 cpu_loop_exit(cpu);
393 }
394
395 tb = tb_lookup(cpu, s);
396 if (tb == NULL) {
397 return tcg_code_gen_epilogue;
398 }
399
400 if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
401 log_cpu_exec(s.pc, cpu, tb);
402 }
403
404 return tb->tc.ptr;
405 }
406
407 /* Return the current PC from CPU, which may be cached in TB. */
log_pc(CPUState * cpu,const TranslationBlock * tb)408 static vaddr log_pc(CPUState *cpu, const TranslationBlock *tb)
409 {
410 if (tb_cflags(tb) & CF_PCREL) {
411 return cpu->cc->get_pc(cpu);
412 } else {
413 return tb->pc;
414 }
415 }
416
417 /* Execute a TB, and fix up the CPU state afterwards if necessary */
418 /*
419 * Disable CFI checks.
420 * TCG creates binary blobs at runtime, with the transformed code.
421 * A TB is a blob of binary code, created at runtime and called with an
422 * indirect function call. Since such function did not exist at compile time,
423 * the CFI runtime has no way to verify its signature and would fail.
424 * TCG is not considered a security-sensitive part of QEMU so this does not
425 * affect the impact of CFI in environment with high security requirements
426 */
427 static inline TranslationBlock * QEMU_DISABLE_CFI
cpu_tb_exec(CPUState * cpu,TranslationBlock * itb,int * tb_exit)428 cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
429 {
430 uintptr_t ret;
431 TranslationBlock *last_tb;
432 const void *tb_ptr = itb->tc.ptr;
433
434 if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
435 log_cpu_exec(log_pc(cpu, itb), cpu, itb);
436 }
437
438 qemu_thread_jit_execute();
439 ret = tcg_qemu_tb_exec(cpu_env(cpu), tb_ptr);
440 cpu->neg.can_do_io = true;
441 qemu_plugin_disable_mem_helpers(cpu);
442 /*
443 * TODO: Delay swapping back to the read-write region of the TB
444 * until we actually need to modify the TB. The read-only copy,
445 * coming from the rx region, shares the same host TLB entry as
446 * the code that executed the exit_tb opcode that arrived here.
447 * If we insist on touching both the RX and the RW pages, we
448 * double the host TLB pressure.
449 */
450 last_tb = tcg_splitwx_to_rw((void *)(ret & ~TB_EXIT_MASK));
451 *tb_exit = ret & TB_EXIT_MASK;
452
453 trace_exec_tb_exit(last_tb, *tb_exit);
454
455 if (*tb_exit > TB_EXIT_IDX1) {
456 /* We didn't start executing this TB (eg because the instruction
457 * counter hit zero); we must restore the guest PC to the address
458 * of the start of the TB.
459 */
460 CPUClass *cc = cpu->cc;
461 const TCGCPUOps *tcg_ops = cc->tcg_ops;
462
463 if (tcg_ops->synchronize_from_tb) {
464 tcg_ops->synchronize_from_tb(cpu, last_tb);
465 } else {
466 tcg_debug_assert(!(tb_cflags(last_tb) & CF_PCREL));
467 assert(cc->set_pc);
468 cc->set_pc(cpu, last_tb->pc);
469 }
470 if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
471 vaddr pc = log_pc(cpu, last_tb);
472 if (qemu_log_in_addr_range(pc)) {
473 qemu_log("Stopped execution of TB chain before %p [%016"
474 VADDR_PRIx "] %s\n",
475 last_tb->tc.ptr, pc, lookup_symbol(pc));
476 }
477 }
478 }
479
480 /*
481 * If gdb single-step, and we haven't raised another exception,
482 * raise a debug exception. Single-step with another exception
483 * is handled in cpu_handle_exception.
484 */
485 if (unlikely(cpu->singlestep_enabled) && cpu->exception_index == -1) {
486 cpu->exception_index = EXCP_DEBUG;
487 cpu_loop_exit(cpu);
488 }
489
490 return last_tb;
491 }
492
493
cpu_exec_enter(CPUState * cpu)494 static void cpu_exec_enter(CPUState *cpu)
495 {
496 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
497
498 if (tcg_ops->cpu_exec_enter) {
499 tcg_ops->cpu_exec_enter(cpu);
500 }
501 }
502
cpu_exec_exit(CPUState * cpu)503 static void cpu_exec_exit(CPUState *cpu)
504 {
505 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
506
507 if (tcg_ops->cpu_exec_exit) {
508 tcg_ops->cpu_exec_exit(cpu);
509 }
510 }
511
cpu_exec_longjmp_cleanup(CPUState * cpu)512 static void cpu_exec_longjmp_cleanup(CPUState *cpu)
513 {
514 /* Non-buggy compilers preserve this; assert the correct value. */
515 g_assert(cpu == current_cpu);
516
517 #ifdef CONFIG_USER_ONLY
518 clear_helper_retaddr();
519 if (have_mmap_lock()) {
520 mmap_unlock();
521 }
522 #else
523 /*
524 * For softmmu, a tlb_fill fault during translation will land here,
525 * and we need to release any page locks held. In system mode we
526 * have one tcg_ctx per thread, so we know it was this cpu doing
527 * the translation.
528 *
529 * Alternative 1: Install a cleanup to be called via an exception
530 * handling safe longjmp. It seems plausible that all our hosts
531 * support such a thing. We'd have to properly register unwind info
532 * for the JIT for EH, rather that just for GDB.
533 *
534 * Alternative 2: Set and restore cpu->jmp_env in tb_gen_code to
535 * capture the cpu_loop_exit longjmp, perform the cleanup, and
536 * jump again to arrive here.
537 */
538 if (tcg_ctx->gen_tb) {
539 tb_unlock_pages(tcg_ctx->gen_tb);
540 tcg_ctx->gen_tb = NULL;
541 }
542 #endif
543 if (bql_locked()) {
544 bql_unlock();
545 }
546 assert_no_pages_locked();
547 }
548
cpu_exec_step_atomic(CPUState * cpu)549 void cpu_exec_step_atomic(CPUState *cpu)
550 {
551 TranslationBlock *tb;
552 int tb_exit;
553
554 if (sigsetjmp(cpu->jmp_env, 0) == 0) {
555 start_exclusive();
556 g_assert(cpu == current_cpu);
557 g_assert(!cpu->running);
558 cpu->running = true;
559
560 TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu);
561 s.cflags = curr_cflags(cpu);
562
563 /* Execute in a serial context. */
564 s.cflags &= ~CF_PARALLEL;
565 /* After 1 insn, return and release the exclusive lock. */
566 s.cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1;
567 /*
568 * No need to check_for_breakpoints here.
569 * We only arrive in cpu_exec_step_atomic after beginning execution
570 * of an insn that includes an atomic operation we can't handle.
571 * Any breakpoint for this insn will have been recognized earlier.
572 */
573
574 tb = tb_lookup(cpu, s);
575 if (tb == NULL) {
576 mmap_lock();
577 tb = tb_gen_code(cpu, s);
578 mmap_unlock();
579 }
580
581 cpu_exec_enter(cpu);
582 /* execute the generated code */
583 trace_exec_tb(tb, s.pc);
584 cpu_tb_exec(cpu, tb, &tb_exit);
585 cpu_exec_exit(cpu);
586 } else {
587 cpu_exec_longjmp_cleanup(cpu);
588 }
589
590 /*
591 * As we start the exclusive region before codegen we must still
592 * be in the region if we longjump out of either the codegen or
593 * the execution.
594 */
595 g_assert(cpu_in_exclusive_context(cpu));
596 cpu->running = false;
597 end_exclusive();
598 }
599
tb_set_jmp_target(TranslationBlock * tb,int n,uintptr_t addr)600 void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
601 {
602 /*
603 * Get the rx view of the structure, from which we find the
604 * executable code address, and tb_target_set_jmp_target can
605 * produce a pc-relative displacement to jmp_target_addr[n].
606 */
607 const TranslationBlock *c_tb = tcg_splitwx_to_rx(tb);
608 uintptr_t offset = tb->jmp_insn_offset[n];
609 uintptr_t jmp_rx = (uintptr_t)tb->tc.ptr + offset;
610 uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff;
611
612 tb->jmp_target_addr[n] = addr;
613 tb_target_set_jmp_target(c_tb, n, jmp_rx, jmp_rw);
614 }
615
tb_add_jump(TranslationBlock * tb,int n,TranslationBlock * tb_next)616 static inline void tb_add_jump(TranslationBlock *tb, int n,
617 TranslationBlock *tb_next)
618 {
619 uintptr_t old;
620
621 qemu_thread_jit_write();
622 assert(n < ARRAY_SIZE(tb->jmp_list_next));
623 qemu_spin_lock(&tb_next->jmp_lock);
624
625 /* make sure the destination TB is valid */
626 if (tb_next->cflags & CF_INVALID) {
627 goto out_unlock_next;
628 }
629 /* Atomically claim the jump destination slot only if it was NULL */
630 old = qatomic_cmpxchg(&tb->jmp_dest[n], (uintptr_t)NULL,
631 (uintptr_t)tb_next);
632 if (old) {
633 goto out_unlock_next;
634 }
635
636 /* patch the native jump address */
637 tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr);
638
639 /* add in TB jmp list */
640 tb->jmp_list_next[n] = tb_next->jmp_list_head;
641 tb_next->jmp_list_head = (uintptr_t)tb | n;
642
643 qemu_spin_unlock(&tb_next->jmp_lock);
644
645 qemu_log_mask(CPU_LOG_EXEC, "Linking TBs %p index %d -> %p\n",
646 tb->tc.ptr, n, tb_next->tc.ptr);
647 return;
648
649 out_unlock_next:
650 qemu_spin_unlock(&tb_next->jmp_lock);
651 }
652
cpu_handle_halt(CPUState * cpu)653 static inline bool cpu_handle_halt(CPUState *cpu)
654 {
655 #ifndef CONFIG_USER_ONLY
656 if (cpu->halted) {
657 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
658 bool leave_halt = tcg_ops->cpu_exec_halt(cpu);
659
660 if (!leave_halt) {
661 return true;
662 }
663
664 cpu->halted = 0;
665 }
666 #endif /* !CONFIG_USER_ONLY */
667
668 return false;
669 }
670
cpu_handle_debug_exception(CPUState * cpu)671 static inline void cpu_handle_debug_exception(CPUState *cpu)
672 {
673 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
674 CPUWatchpoint *wp;
675
676 if (!cpu->watchpoint_hit) {
677 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
678 wp->flags &= ~BP_WATCHPOINT_HIT;
679 }
680 }
681
682 if (tcg_ops->debug_excp_handler) {
683 tcg_ops->debug_excp_handler(cpu);
684 }
685 }
686
cpu_handle_exception(CPUState * cpu,int * ret)687 static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
688 {
689 if (cpu->exception_index < 0) {
690 #ifndef CONFIG_USER_ONLY
691 if (replay_has_exception()
692 && cpu->neg.icount_decr.u16.low + cpu->icount_extra == 0) {
693 /* Execute just one insn to trigger exception pending in the log */
694 cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT)
695 | CF_NOIRQ | 1;
696 }
697 #endif
698 return false;
699 }
700
701 if (cpu->exception_index >= EXCP_INTERRUPT) {
702 /* exit request from the cpu execution loop */
703 *ret = cpu->exception_index;
704 if (*ret == EXCP_DEBUG) {
705 cpu_handle_debug_exception(cpu);
706 }
707 cpu->exception_index = -1;
708 return true;
709 }
710
711 #if defined(CONFIG_USER_ONLY)
712 /*
713 * If user mode only, we simulate a fake exception which will be
714 * handled outside the cpu execution loop.
715 */
716 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
717 if (tcg_ops->fake_user_interrupt) {
718 tcg_ops->fake_user_interrupt(cpu);
719 }
720 *ret = cpu->exception_index;
721 cpu->exception_index = -1;
722 return true;
723 #else
724 if (replay_exception()) {
725 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
726
727 bql_lock();
728 tcg_ops->do_interrupt(cpu);
729 bql_unlock();
730 cpu->exception_index = -1;
731
732 if (unlikely(cpu->singlestep_enabled)) {
733 /*
734 * After processing the exception, ensure an EXCP_DEBUG is
735 * raised when single-stepping so that GDB doesn't miss the
736 * next instruction.
737 */
738 *ret = EXCP_DEBUG;
739 cpu_handle_debug_exception(cpu);
740 return true;
741 }
742 } else if (!replay_has_interrupt()) {
743 /* give a chance to iothread in replay mode */
744 *ret = EXCP_INTERRUPT;
745 return true;
746 }
747 #endif
748
749 return false;
750 }
751
tcg_kick_vcpu_thread(CPUState * cpu)752 void tcg_kick_vcpu_thread(CPUState *cpu)
753 {
754 /*
755 * Ensure cpu_exec will see the reason why the exit request was set.
756 * FIXME: this is not always needed. Other accelerators instead
757 * read interrupt_request and set exit_request on demand from the
758 * CPU thread; see kvm_arch_pre_run() for example.
759 */
760 qatomic_store_release(&cpu->exit_request, true);
761
762 /* Ensure cpu_exec will see the exit request after TCG has exited. */
763 qatomic_store_release(&cpu->neg.icount_decr.u16.high, -1);
764 }
765
icount_exit_request(CPUState * cpu)766 static inline bool icount_exit_request(CPUState *cpu)
767 {
768 if (!icount_enabled()) {
769 return false;
770 }
771 if (cpu->cflags_next_tb != -1 && !(cpu->cflags_next_tb & CF_USE_ICOUNT)) {
772 return false;
773 }
774 return cpu->neg.icount_decr.u16.low + cpu->icount_extra == 0;
775 }
776
cpu_handle_interrupt(CPUState * cpu,TranslationBlock ** last_tb)777 static inline bool cpu_handle_interrupt(CPUState *cpu,
778 TranslationBlock **last_tb)
779 {
780 /*
781 * If we have requested custom cflags with CF_NOIRQ we should
782 * skip checking here. Any pending interrupts will get picked up
783 * by the next TB we execute under normal cflags.
784 */
785 if (cpu->cflags_next_tb != -1 && cpu->cflags_next_tb & CF_NOIRQ) {
786 return false;
787 }
788
789 /* Clear the interrupt flag now since we're processing
790 * cpu->interrupt_request and cpu->exit_request.
791 * Ensure zeroing happens before reading cpu->exit_request or
792 * cpu->interrupt_request (see also store-release in
793 * tcg_kick_vcpu_thread())
794 */
795 qatomic_set_mb(&cpu->neg.icount_decr.u16.high, 0);
796
797 #ifdef CONFIG_USER_ONLY
798 assert(!cpu_test_interrupt(cpu, ~0));
799 #else
800 if (unlikely(cpu_test_interrupt(cpu, ~0))) {
801 bql_lock();
802 if (cpu_test_interrupt(cpu, CPU_INTERRUPT_DEBUG)) {
803 cpu_reset_interrupt(cpu, CPU_INTERRUPT_DEBUG);
804 cpu->exception_index = EXCP_DEBUG;
805 bql_unlock();
806 return true;
807 }
808 if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
809 /* Do nothing */
810 } else if (cpu_test_interrupt(cpu, CPU_INTERRUPT_HALT)) {
811 replay_interrupt();
812 cpu_reset_interrupt(cpu, CPU_INTERRUPT_HALT);
813 cpu->halted = 1;
814 cpu->exception_index = EXCP_HLT;
815 bql_unlock();
816 return true;
817 } else {
818 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
819 int interrupt_request = cpu->interrupt_request;
820
821 if (cpu_test_interrupt(cpu, CPU_INTERRUPT_RESET)) {
822 replay_interrupt();
823 tcg_ops->cpu_exec_reset(cpu);
824 bql_unlock();
825 return true;
826 }
827
828 if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
829 /* Mask out external interrupts for this step. */
830 interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
831 }
832
833 /*
834 * The target hook has 3 exit conditions:
835 * False when the interrupt isn't processed,
836 * True when it is, and we should restart on a new TB,
837 * and via longjmp via cpu_loop_exit.
838 */
839 if (tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) {
840 if (!tcg_ops->need_replay_interrupt ||
841 tcg_ops->need_replay_interrupt(interrupt_request)) {
842 replay_interrupt();
843 }
844 /*
845 * After processing the interrupt, ensure an EXCP_DEBUG is
846 * raised when single-stepping so that GDB doesn't miss the
847 * next instruction.
848 */
849 if (unlikely(cpu->singlestep_enabled)) {
850 cpu->exception_index = EXCP_DEBUG;
851 bql_unlock();
852 return true;
853 }
854 cpu->exception_index = -1;
855 *last_tb = NULL;
856 }
857 }
858 if (cpu_test_interrupt(cpu, CPU_INTERRUPT_EXITTB)) {
859 cpu_reset_interrupt(cpu, CPU_INTERRUPT_EXITTB);
860 /* ensure that no TB jump will be modified as
861 the program flow was changed */
862 *last_tb = NULL;
863 }
864
865 /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
866 bql_unlock();
867 }
868 #endif /* !CONFIG_USER_ONLY */
869
870 /*
871 * Finally, check if we need to exit to the main loop.
872 * The corresponding store-release is in cpu_exit.
873 */
874 if (unlikely(qatomic_load_acquire(&cpu->exit_request)) || icount_exit_request(cpu)) {
875 if (cpu->exception_index == -1) {
876 cpu->exception_index = EXCP_INTERRUPT;
877 }
878 return true;
879 }
880
881 return false;
882 }
883
cpu_loop_exec_tb(CPUState * cpu,TranslationBlock * tb,vaddr pc,TranslationBlock ** last_tb,int * tb_exit)884 static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
885 vaddr pc, TranslationBlock **last_tb,
886 int *tb_exit)
887 {
888 trace_exec_tb(tb, pc);
889 tb = cpu_tb_exec(cpu, tb, tb_exit);
890 if (*tb_exit != TB_EXIT_REQUESTED) {
891 *last_tb = tb;
892 return;
893 }
894
895 *last_tb = NULL;
896 if (cpu_loop_exit_requested(cpu)) {
897 /* Something asked us to stop executing chained TBs; just
898 * continue round the main loop. Whatever requested the exit
899 * will also have set something else (eg exit_request or
900 * interrupt_request) which will be handled by
901 * cpu_handle_interrupt. cpu_handle_interrupt will also
902 * clear cpu->icount_decr.u16.high.
903 */
904 return;
905 }
906
907 /* Instruction counter expired. */
908 assert(icount_enabled());
909 #ifndef CONFIG_USER_ONLY
910 /* Ensure global icount has gone forward */
911 icount_update(cpu);
912 /* Refill decrementer and continue execution. */
913 int32_t insns_left = MIN(0xffff, cpu->icount_budget);
914 cpu->neg.icount_decr.u16.low = insns_left;
915 cpu->icount_extra = cpu->icount_budget - insns_left;
916
917 /*
918 * If the next tb has more instructions than we have left to
919 * execute we need to ensure we find/generate a TB with exactly
920 * insns_left instructions in it.
921 */
922 if (insns_left > 0 && insns_left < tb->icount) {
923 assert(insns_left <= CF_COUNT_MASK);
924 assert(cpu->icount_extra == 0);
925 cpu->cflags_next_tb = (tb->cflags & ~CF_COUNT_MASK) | insns_left;
926 }
927 #endif
928 }
929
930 /* main execution loop */
931
932 static int __attribute__((noinline))
cpu_exec_loop(CPUState * cpu,SyncClocks * sc)933 cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
934 {
935 int ret;
936
937 /* if an exception is pending, we execute it here */
938 while (!cpu_handle_exception(cpu, &ret)) {
939 TranslationBlock *last_tb = NULL;
940 int tb_exit = 0;
941
942 while (!cpu_handle_interrupt(cpu, &last_tb)) {
943 TranslationBlock *tb;
944 TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu);
945 s.cflags = cpu->cflags_next_tb;
946
947 /*
948 * When requested, use an exact setting for cflags for the next
949 * execution. This is used for icount, precise smc, and stop-
950 * after-access watchpoints. Since this request should never
951 * have CF_INVALID set, -1 is a convenient invalid value that
952 * does not require tcg headers for cpu_common_reset.
953 */
954 if (s.cflags == -1) {
955 s.cflags = curr_cflags(cpu);
956 } else {
957 cpu->cflags_next_tb = -1;
958 }
959
960 if (check_for_breakpoints(cpu, s.pc, &s.cflags)) {
961 break;
962 }
963
964 tb = tb_lookup(cpu, s);
965 if (tb == NULL) {
966 CPUJumpCache *jc;
967 uint32_t h;
968
969 mmap_lock();
970 tb = tb_gen_code(cpu, s);
971 mmap_unlock();
972
973 /*
974 * We add the TB in the virtual pc hash table
975 * for the fast lookup
976 */
977 h = tb_jmp_cache_hash_func(s.pc);
978 jc = cpu->tb_jmp_cache;
979 jc->array[h].pc = s.pc;
980 qatomic_set(&jc->array[h].tb, tb);
981 }
982
983 #ifndef CONFIG_USER_ONLY
984 /*
985 * We don't take care of direct jumps when address mapping
986 * changes in system emulation. So it's not safe to make a
987 * direct jump to a TB spanning two pages because the mapping
988 * for the second page can change.
989 */
990 if (tb_page_addr1(tb) != -1) {
991 last_tb = NULL;
992 }
993 #endif
994 /* See if we can patch the calling TB. */
995 if (last_tb) {
996 tb_add_jump(last_tb, tb_exit, tb);
997 }
998
999 cpu_loop_exec_tb(cpu, tb, s.pc, &last_tb, &tb_exit);
1000
1001 /* Try to align the host and virtual clocks
1002 if the guest is in advance */
1003 align_clocks(sc, cpu);
1004 }
1005 }
1006 return ret;
1007 }
1008
cpu_exec_setjmp(CPUState * cpu,SyncClocks * sc)1009 static int cpu_exec_setjmp(CPUState *cpu, SyncClocks *sc)
1010 {
1011 /* Prepare setjmp context for exception handling. */
1012 if (unlikely(sigsetjmp(cpu->jmp_env, 0) != 0)) {
1013 cpu_exec_longjmp_cleanup(cpu);
1014 }
1015
1016 return cpu_exec_loop(cpu, sc);
1017 }
1018
cpu_exec(CPUState * cpu)1019 int cpu_exec(CPUState *cpu)
1020 {
1021 int ret;
1022 SyncClocks sc = { 0 };
1023
1024 /* replay_interrupt may need current_cpu */
1025 current_cpu = cpu;
1026
1027 if (cpu_handle_halt(cpu)) {
1028 return EXCP_HALTED;
1029 }
1030
1031 RCU_READ_LOCK_GUARD();
1032 cpu_exec_enter(cpu);
1033
1034 /*
1035 * Calculate difference between guest clock and host clock.
1036 * This delay includes the delay of the last cycle, so
1037 * what we have to do is sleep until it is 0. As for the
1038 * advance/delay we gain here, we try to fix it next time.
1039 */
1040 init_delay_params(&sc, cpu);
1041
1042 ret = cpu_exec_setjmp(cpu, &sc);
1043
1044 cpu_exec_exit(cpu);
1045 return ret;
1046 }
1047
tcg_exec_realizefn(CPUState * cpu,Error ** errp)1048 bool tcg_exec_realizefn(CPUState *cpu, Error **errp)
1049 {
1050 static bool tcg_target_initialized;
1051
1052 if (!tcg_target_initialized) {
1053 /* Check mandatory TCGCPUOps handlers */
1054 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
1055 #ifndef CONFIG_USER_ONLY
1056 assert(tcg_ops->cpu_exec_halt);
1057 assert(tcg_ops->cpu_exec_interrupt);
1058 assert(tcg_ops->cpu_exec_reset);
1059 assert(tcg_ops->pointer_wrap);
1060 #endif /* !CONFIG_USER_ONLY */
1061 assert(tcg_ops->translate_code);
1062 assert(tcg_ops->get_tb_cpu_state);
1063 assert(tcg_ops->mmu_index);
1064 tcg_ops->initialize();
1065 tcg_target_initialized = true;
1066 }
1067
1068 cpu->tb_jmp_cache = g_new0(CPUJumpCache, 1);
1069 tlb_init(cpu);
1070 #ifndef CONFIG_USER_ONLY
1071 tcg_iommu_init_notifier_list(cpu);
1072 #endif /* !CONFIG_USER_ONLY */
1073 /* qemu_plugin_vcpu_init_hook delayed until cpu_index assigned. */
1074
1075 return true;
1076 }
1077
1078 /* undo the initializations in reverse order */
tcg_exec_unrealizefn(CPUState * cpu)1079 void tcg_exec_unrealizefn(CPUState *cpu)
1080 {
1081 #ifndef CONFIG_USER_ONLY
1082 tcg_iommu_free_notifier_list(cpu);
1083 #endif /* !CONFIG_USER_ONLY */
1084
1085 tlb_destroy(cpu);
1086 g_free_rcu(cpu->tb_jmp_cache, rcu);
1087 }
1088