xref: /openbmc/qemu/accel/tcg/translate-all.c (revision 587adaca)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu-common.h"
22 
23 #define NO_CPU_IO_DEFS
24 #include "trace.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg.h"
28 #if defined(CONFIG_USER_ONLY)
29 #include "qemu.h"
30 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
31 #include <sys/param.h>
32 #if __FreeBSD_version >= 700104
33 #define HAVE_KINFO_GETVMMAP
34 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
35 #include <sys/proc.h>
36 #include <machine/profile.h>
37 #define _KERNEL
38 #include <sys/user.h>
39 #undef _KERNEL
40 #undef sigqueue
41 #include <libutil.h>
42 #endif
43 #endif
44 #else
45 #include "exec/ram_addr.h"
46 #endif
47 
48 #include "exec/cputlb.h"
49 #include "exec/translate-all.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/qemu-print.h"
52 #include "qemu/timer.h"
53 #include "qemu/main-loop.h"
54 #include "exec/log.h"
55 #include "sysemu/cpus.h"
56 #include "sysemu/cpu-timers.h"
57 #include "sysemu/tcg.h"
58 #include "qapi/error.h"
59 #include "hw/core/tcg-cpu-ops.h"
60 #include "tb-hash.h"
61 #include "tb-context.h"
62 #include "internal.h"
63 
64 /* #define DEBUG_TB_INVALIDATE */
65 /* #define DEBUG_TB_FLUSH */
66 /* make various TB consistency checks */
67 /* #define DEBUG_TB_CHECK */
68 
69 #ifdef DEBUG_TB_INVALIDATE
70 #define DEBUG_TB_INVALIDATE_GATE 1
71 #else
72 #define DEBUG_TB_INVALIDATE_GATE 0
73 #endif
74 
75 #ifdef DEBUG_TB_FLUSH
76 #define DEBUG_TB_FLUSH_GATE 1
77 #else
78 #define DEBUG_TB_FLUSH_GATE 0
79 #endif
80 
81 #if !defined(CONFIG_USER_ONLY)
82 /* TB consistency checks only implemented for usermode emulation.  */
83 #undef DEBUG_TB_CHECK
84 #endif
85 
86 #ifdef DEBUG_TB_CHECK
87 #define DEBUG_TB_CHECK_GATE 1
88 #else
89 #define DEBUG_TB_CHECK_GATE 0
90 #endif
91 
92 /* Access to the various translations structures need to be serialised via locks
93  * for consistency.
94  * In user-mode emulation access to the memory related structures are protected
95  * with mmap_lock.
96  * In !user-mode we use per-page locks.
97  */
98 #ifdef CONFIG_SOFTMMU
99 #define assert_memory_lock()
100 #else
101 #define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
102 #endif
103 
104 #define SMC_BITMAP_USE_THRESHOLD 10
105 
106 typedef struct PageDesc {
107     /* list of TBs intersecting this ram page */
108     uintptr_t first_tb;
109 #ifdef CONFIG_SOFTMMU
110     /* in order to optimize self modifying code, we count the number
111        of lookups we do to a given page to use a bitmap */
112     unsigned long *code_bitmap;
113     unsigned int code_write_count;
114 #else
115     unsigned long flags;
116     void *target_data;
117 #endif
118 #ifndef CONFIG_USER_ONLY
119     QemuSpin lock;
120 #endif
121 } PageDesc;
122 
123 /**
124  * struct page_entry - page descriptor entry
125  * @pd:     pointer to the &struct PageDesc of the page this entry represents
126  * @index:  page index of the page
127  * @locked: whether the page is locked
128  *
129  * This struct helps us keep track of the locked state of a page, without
130  * bloating &struct PageDesc.
131  *
132  * A page lock protects accesses to all fields of &struct PageDesc.
133  *
134  * See also: &struct page_collection.
135  */
136 struct page_entry {
137     PageDesc *pd;
138     tb_page_addr_t index;
139     bool locked;
140 };
141 
142 /**
143  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
144  * @tree:   Binary search tree (BST) of the pages, with key == page index
145  * @max:    Pointer to the page in @tree with the highest page index
146  *
147  * To avoid deadlock we lock pages in ascending order of page index.
148  * When operating on a set of pages, we need to keep track of them so that
149  * we can lock them in order and also unlock them later. For this we collect
150  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
151  * @tree implementation we use does not provide an O(1) operation to obtain the
152  * highest-ranked element, we use @max to keep track of the inserted page
153  * with the highest index. This is valuable because if a page is not in
154  * the tree and its index is higher than @max's, then we can lock it
155  * without breaking the locking order rule.
156  *
157  * Note on naming: 'struct page_set' would be shorter, but we already have a few
158  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
159  *
160  * See also: page_collection_lock().
161  */
162 struct page_collection {
163     GTree *tree;
164     struct page_entry *max;
165 };
166 
167 /* list iterators for lists of tagged pointers in TranslationBlock */
168 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
169     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
170          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
171              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
172 
173 #define PAGE_FOR_EACH_TB(pagedesc, tb, n)                       \
174     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
175 
176 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
177     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
178 
179 /*
180  * In system mode we want L1_MAP to be based on ram offsets,
181  * while in user mode we want it to be based on virtual addresses.
182  *
183  * TODO: For user mode, see the caveat re host vs guest virtual
184  * address spaces near GUEST_ADDR_MAX.
185  */
186 #if !defined(CONFIG_USER_ONLY)
187 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
188 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
189 #else
190 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
191 #endif
192 #else
193 # define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
194 #endif
195 
196 /* Size of the L2 (and L3, etc) page tables.  */
197 #define V_L2_BITS 10
198 #define V_L2_SIZE (1 << V_L2_BITS)
199 
200 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
201 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
202                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
203                   * BITS_PER_BYTE);
204 
205 /*
206  * L1 Mapping properties
207  */
208 static int v_l1_size;
209 static int v_l1_shift;
210 static int v_l2_levels;
211 
212 /* The bottom level has pointers to PageDesc, and is indexed by
213  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
214  */
215 #define V_L1_MIN_BITS 4
216 #define V_L1_MAX_BITS (V_L2_BITS + 3)
217 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
218 
219 static void *l1_map[V_L1_MAX_SIZE];
220 
221 TBContext tb_ctx;
222 
223 static void page_table_config_init(void)
224 {
225     uint32_t v_l1_bits;
226 
227     assert(TARGET_PAGE_BITS);
228     /* The bits remaining after N lower levels of page tables.  */
229     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
230     if (v_l1_bits < V_L1_MIN_BITS) {
231         v_l1_bits += V_L2_BITS;
232     }
233 
234     v_l1_size = 1 << v_l1_bits;
235     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
236     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
237 
238     assert(v_l1_bits <= V_L1_MAX_BITS);
239     assert(v_l1_shift % V_L2_BITS == 0);
240     assert(v_l2_levels >= 0);
241 }
242 
243 /* Encode VAL as a signed leb128 sequence at P.
244    Return P incremented past the encoded value.  */
245 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
246 {
247     int more, byte;
248 
249     do {
250         byte = val & 0x7f;
251         val >>= 7;
252         more = !((val == 0 && (byte & 0x40) == 0)
253                  || (val == -1 && (byte & 0x40) != 0));
254         if (more) {
255             byte |= 0x80;
256         }
257         *p++ = byte;
258     } while (more);
259 
260     return p;
261 }
262 
263 /* Decode a signed leb128 sequence at *PP; increment *PP past the
264    decoded value.  Return the decoded value.  */
265 static target_long decode_sleb128(const uint8_t **pp)
266 {
267     const uint8_t *p = *pp;
268     target_long val = 0;
269     int byte, shift = 0;
270 
271     do {
272         byte = *p++;
273         val |= (target_ulong)(byte & 0x7f) << shift;
274         shift += 7;
275     } while (byte & 0x80);
276     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
277         val |= -(target_ulong)1 << shift;
278     }
279 
280     *pp = p;
281     return val;
282 }
283 
284 /* Encode the data collected about the instructions while compiling TB.
285    Place the data at BLOCK, and return the number of bytes consumed.
286 
287    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
288    which come from the target's insn_start data, followed by a uintptr_t
289    which comes from the host pc of the end of the code implementing the insn.
290 
291    Each line of the table is encoded as sleb128 deltas from the previous
292    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
293    That is, the first column is seeded with the guest pc, the last column
294    with the host pc, and the middle columns with zeros.  */
295 
296 static int encode_search(TranslationBlock *tb, uint8_t *block)
297 {
298     uint8_t *highwater = tcg_ctx->code_gen_highwater;
299     uint8_t *p = block;
300     int i, j, n;
301 
302     for (i = 0, n = tb->icount; i < n; ++i) {
303         target_ulong prev;
304 
305         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
306             if (i == 0) {
307                 prev = (j == 0 ? tb->pc : 0);
308             } else {
309                 prev = tcg_ctx->gen_insn_data[i - 1][j];
310             }
311             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
312         }
313         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
314         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
315 
316         /* Test for (pending) buffer overflow.  The assumption is that any
317            one row beginning below the high water mark cannot overrun
318            the buffer completely.  Thus we can test for overflow after
319            encoding a row without having to check during encoding.  */
320         if (unlikely(p > highwater)) {
321             return -1;
322         }
323     }
324 
325     return p - block;
326 }
327 
328 /* The cpu state corresponding to 'searched_pc' is restored.
329  * When reset_icount is true, current TB will be interrupted and
330  * icount should be recalculated.
331  */
332 static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
333                                      uintptr_t searched_pc, bool reset_icount)
334 {
335     target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
336     uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
337     CPUArchState *env = cpu->env_ptr;
338     const uint8_t *p = tb->tc.ptr + tb->tc.size;
339     int i, j, num_insns = tb->icount;
340 #ifdef CONFIG_PROFILER
341     TCGProfile *prof = &tcg_ctx->prof;
342     int64_t ti = profile_getclock();
343 #endif
344 
345     searched_pc -= GETPC_ADJ;
346 
347     if (searched_pc < host_pc) {
348         return -1;
349     }
350 
351     /* Reconstruct the stored insn data while looking for the point at
352        which the end of the insn exceeds the searched_pc.  */
353     for (i = 0; i < num_insns; ++i) {
354         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
355             data[j] += decode_sleb128(&p);
356         }
357         host_pc += decode_sleb128(&p);
358         if (host_pc > searched_pc) {
359             goto found;
360         }
361     }
362     return -1;
363 
364  found:
365     if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
366         assert(icount_enabled());
367         /* Reset the cycle counter to the start of the block
368            and shift if to the number of actually executed instructions */
369         cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
370     }
371     restore_state_to_opc(env, tb, data);
372 
373 #ifdef CONFIG_PROFILER
374     qatomic_set(&prof->restore_time,
375                 prof->restore_time + profile_getclock() - ti);
376     qatomic_set(&prof->restore_count, prof->restore_count + 1);
377 #endif
378     return 0;
379 }
380 
381 void tb_destroy(TranslationBlock *tb)
382 {
383     qemu_spin_destroy(&tb->jmp_lock);
384 }
385 
386 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
387 {
388     /*
389      * The host_pc has to be in the rx region of the code buffer.
390      * If it is not we will not be able to resolve it here.
391      * The two cases where host_pc will not be correct are:
392      *
393      *  - fault during translation (instruction fetch)
394      *  - fault from helper (not using GETPC() macro)
395      *
396      * Either way we need return early as we can't resolve it here.
397      */
398     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
399         TranslationBlock *tb = tcg_tb_lookup(host_pc);
400         if (tb) {
401             cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
402             return true;
403         }
404     }
405     return false;
406 }
407 
408 void page_init(void)
409 {
410     page_size_init();
411     page_table_config_init();
412 
413 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
414     {
415 #ifdef HAVE_KINFO_GETVMMAP
416         struct kinfo_vmentry *freep;
417         int i, cnt;
418 
419         freep = kinfo_getvmmap(getpid(), &cnt);
420         if (freep) {
421             mmap_lock();
422             for (i = 0; i < cnt; i++) {
423                 unsigned long startaddr, endaddr;
424 
425                 startaddr = freep[i].kve_start;
426                 endaddr = freep[i].kve_end;
427                 if (h2g_valid(startaddr)) {
428                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
429 
430                     if (h2g_valid(endaddr)) {
431                         endaddr = h2g(endaddr);
432                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
433                     } else {
434 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
435                         endaddr = ~0ul;
436                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
437 #endif
438                     }
439                 }
440             }
441             free(freep);
442             mmap_unlock();
443         }
444 #else
445         FILE *f;
446 
447         last_brk = (unsigned long)sbrk(0);
448 
449         f = fopen("/compat/linux/proc/self/maps", "r");
450         if (f) {
451             mmap_lock();
452 
453             do {
454                 unsigned long startaddr, endaddr;
455                 int n;
456 
457                 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
458 
459                 if (n == 2 && h2g_valid(startaddr)) {
460                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
461 
462                     if (h2g_valid(endaddr)) {
463                         endaddr = h2g(endaddr);
464                     } else {
465                         endaddr = ~0ul;
466                     }
467                     page_set_flags(startaddr, endaddr, PAGE_RESERVED);
468                 }
469             } while (!feof(f));
470 
471             fclose(f);
472             mmap_unlock();
473         }
474 #endif
475     }
476 #endif
477 }
478 
479 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
480 {
481     PageDesc *pd;
482     void **lp;
483     int i;
484 
485     /* Level 1.  Always allocated.  */
486     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
487 
488     /* Level 2..N-1.  */
489     for (i = v_l2_levels; i > 0; i--) {
490         void **p = qatomic_rcu_read(lp);
491 
492         if (p == NULL) {
493             void *existing;
494 
495             if (!alloc) {
496                 return NULL;
497             }
498             p = g_new0(void *, V_L2_SIZE);
499             existing = qatomic_cmpxchg(lp, NULL, p);
500             if (unlikely(existing)) {
501                 g_free(p);
502                 p = existing;
503             }
504         }
505 
506         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
507     }
508 
509     pd = qatomic_rcu_read(lp);
510     if (pd == NULL) {
511         void *existing;
512 
513         if (!alloc) {
514             return NULL;
515         }
516         pd = g_new0(PageDesc, V_L2_SIZE);
517 #ifndef CONFIG_USER_ONLY
518         {
519             int i;
520 
521             for (i = 0; i < V_L2_SIZE; i++) {
522                 qemu_spin_init(&pd[i].lock);
523             }
524         }
525 #endif
526         existing = qatomic_cmpxchg(lp, NULL, pd);
527         if (unlikely(existing)) {
528 #ifndef CONFIG_USER_ONLY
529             {
530                 int i;
531 
532                 for (i = 0; i < V_L2_SIZE; i++) {
533                     qemu_spin_destroy(&pd[i].lock);
534                 }
535             }
536 #endif
537             g_free(pd);
538             pd = existing;
539         }
540     }
541 
542     return pd + (index & (V_L2_SIZE - 1));
543 }
544 
545 static inline PageDesc *page_find(tb_page_addr_t index)
546 {
547     return page_find_alloc(index, 0);
548 }
549 
550 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
551                            PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
552 
553 /* In user-mode page locks aren't used; mmap_lock is enough */
554 #ifdef CONFIG_USER_ONLY
555 
556 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
557 
558 static inline void page_lock(PageDesc *pd)
559 { }
560 
561 static inline void page_unlock(PageDesc *pd)
562 { }
563 
564 static inline void page_lock_tb(const TranslationBlock *tb)
565 { }
566 
567 static inline void page_unlock_tb(const TranslationBlock *tb)
568 { }
569 
570 struct page_collection *
571 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
572 {
573     return NULL;
574 }
575 
576 void page_collection_unlock(struct page_collection *set)
577 { }
578 #else /* !CONFIG_USER_ONLY */
579 
580 #ifdef CONFIG_DEBUG_TCG
581 
582 static __thread GHashTable *ht_pages_locked_debug;
583 
584 static void ht_pages_locked_debug_init(void)
585 {
586     if (ht_pages_locked_debug) {
587         return;
588     }
589     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
590 }
591 
592 static bool page_is_locked(const PageDesc *pd)
593 {
594     PageDesc *found;
595 
596     ht_pages_locked_debug_init();
597     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
598     return !!found;
599 }
600 
601 static void page_lock__debug(PageDesc *pd)
602 {
603     ht_pages_locked_debug_init();
604     g_assert(!page_is_locked(pd));
605     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
606 }
607 
608 static void page_unlock__debug(const PageDesc *pd)
609 {
610     bool removed;
611 
612     ht_pages_locked_debug_init();
613     g_assert(page_is_locked(pd));
614     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
615     g_assert(removed);
616 }
617 
618 static void
619 do_assert_page_locked(const PageDesc *pd, const char *file, int line)
620 {
621     if (unlikely(!page_is_locked(pd))) {
622         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
623                      pd, file, line);
624         abort();
625     }
626 }
627 
628 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
629 
630 void assert_no_pages_locked(void)
631 {
632     ht_pages_locked_debug_init();
633     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
634 }
635 
636 #else /* !CONFIG_DEBUG_TCG */
637 
638 #define assert_page_locked(pd)
639 
640 static inline void page_lock__debug(const PageDesc *pd)
641 {
642 }
643 
644 static inline void page_unlock__debug(const PageDesc *pd)
645 {
646 }
647 
648 #endif /* CONFIG_DEBUG_TCG */
649 
650 static inline void page_lock(PageDesc *pd)
651 {
652     page_lock__debug(pd);
653     qemu_spin_lock(&pd->lock);
654 }
655 
656 static inline void page_unlock(PageDesc *pd)
657 {
658     qemu_spin_unlock(&pd->lock);
659     page_unlock__debug(pd);
660 }
661 
662 /* lock the page(s) of a TB in the correct acquisition order */
663 static inline void page_lock_tb(const TranslationBlock *tb)
664 {
665     page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
666 }
667 
668 static inline void page_unlock_tb(const TranslationBlock *tb)
669 {
670     PageDesc *p1 = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
671 
672     page_unlock(p1);
673     if (unlikely(tb->page_addr[1] != -1)) {
674         PageDesc *p2 = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
675 
676         if (p2 != p1) {
677             page_unlock(p2);
678         }
679     }
680 }
681 
682 static inline struct page_entry *
683 page_entry_new(PageDesc *pd, tb_page_addr_t index)
684 {
685     struct page_entry *pe = g_malloc(sizeof(*pe));
686 
687     pe->index = index;
688     pe->pd = pd;
689     pe->locked = false;
690     return pe;
691 }
692 
693 static void page_entry_destroy(gpointer p)
694 {
695     struct page_entry *pe = p;
696 
697     g_assert(pe->locked);
698     page_unlock(pe->pd);
699     g_free(pe);
700 }
701 
702 /* returns false on success */
703 static bool page_entry_trylock(struct page_entry *pe)
704 {
705     bool busy;
706 
707     busy = qemu_spin_trylock(&pe->pd->lock);
708     if (!busy) {
709         g_assert(!pe->locked);
710         pe->locked = true;
711         page_lock__debug(pe->pd);
712     }
713     return busy;
714 }
715 
716 static void do_page_entry_lock(struct page_entry *pe)
717 {
718     page_lock(pe->pd);
719     g_assert(!pe->locked);
720     pe->locked = true;
721 }
722 
723 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
724 {
725     struct page_entry *pe = value;
726 
727     do_page_entry_lock(pe);
728     return FALSE;
729 }
730 
731 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
732 {
733     struct page_entry *pe = value;
734 
735     if (pe->locked) {
736         pe->locked = false;
737         page_unlock(pe->pd);
738     }
739     return FALSE;
740 }
741 
742 /*
743  * Trylock a page, and if successful, add the page to a collection.
744  * Returns true ("busy") if the page could not be locked; false otherwise.
745  */
746 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
747 {
748     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
749     struct page_entry *pe;
750     PageDesc *pd;
751 
752     pe = g_tree_lookup(set->tree, &index);
753     if (pe) {
754         return false;
755     }
756 
757     pd = page_find(index);
758     if (pd == NULL) {
759         return false;
760     }
761 
762     pe = page_entry_new(pd, index);
763     g_tree_insert(set->tree, &pe->index, pe);
764 
765     /*
766      * If this is either (1) the first insertion or (2) a page whose index
767      * is higher than any other so far, just lock the page and move on.
768      */
769     if (set->max == NULL || pe->index > set->max->index) {
770         set->max = pe;
771         do_page_entry_lock(pe);
772         return false;
773     }
774     /*
775      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
776      * locks in order.
777      */
778     return page_entry_trylock(pe);
779 }
780 
781 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
782 {
783     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
784     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
785 
786     if (a == b) {
787         return 0;
788     } else if (a < b) {
789         return -1;
790     }
791     return 1;
792 }
793 
794 /*
795  * Lock a range of pages ([@start,@end[) as well as the pages of all
796  * intersecting TBs.
797  * Locking order: acquire locks in ascending order of page index.
798  */
799 struct page_collection *
800 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
801 {
802     struct page_collection *set = g_malloc(sizeof(*set));
803     tb_page_addr_t index;
804     PageDesc *pd;
805 
806     start >>= TARGET_PAGE_BITS;
807     end   >>= TARGET_PAGE_BITS;
808     g_assert(start <= end);
809 
810     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
811                                 page_entry_destroy);
812     set->max = NULL;
813     assert_no_pages_locked();
814 
815  retry:
816     g_tree_foreach(set->tree, page_entry_lock, NULL);
817 
818     for (index = start; index <= end; index++) {
819         TranslationBlock *tb;
820         int n;
821 
822         pd = page_find(index);
823         if (pd == NULL) {
824             continue;
825         }
826         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
827             g_tree_foreach(set->tree, page_entry_unlock, NULL);
828             goto retry;
829         }
830         assert_page_locked(pd);
831         PAGE_FOR_EACH_TB(pd, tb, n) {
832             if (page_trylock_add(set, tb->page_addr[0]) ||
833                 (tb->page_addr[1] != -1 &&
834                  page_trylock_add(set, tb->page_addr[1]))) {
835                 /* drop all locks, and reacquire in order */
836                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
837                 goto retry;
838             }
839         }
840     }
841     return set;
842 }
843 
844 void page_collection_unlock(struct page_collection *set)
845 {
846     /* entries are unlocked and freed via page_entry_destroy */
847     g_tree_destroy(set->tree);
848     g_free(set);
849 }
850 
851 #endif /* !CONFIG_USER_ONLY */
852 
853 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
854                            PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
855 {
856     PageDesc *p1, *p2;
857     tb_page_addr_t page1;
858     tb_page_addr_t page2;
859 
860     assert_memory_lock();
861     g_assert(phys1 != -1);
862 
863     page1 = phys1 >> TARGET_PAGE_BITS;
864     page2 = phys2 >> TARGET_PAGE_BITS;
865 
866     p1 = page_find_alloc(page1, alloc);
867     if (ret_p1) {
868         *ret_p1 = p1;
869     }
870     if (likely(phys2 == -1)) {
871         page_lock(p1);
872         return;
873     } else if (page1 == page2) {
874         page_lock(p1);
875         if (ret_p2) {
876             *ret_p2 = p1;
877         }
878         return;
879     }
880     p2 = page_find_alloc(page2, alloc);
881     if (ret_p2) {
882         *ret_p2 = p2;
883     }
884     if (page1 < page2) {
885         page_lock(p1);
886         page_lock(p2);
887     } else {
888         page_lock(p2);
889         page_lock(p1);
890     }
891 }
892 
893 static bool tb_cmp(const void *ap, const void *bp)
894 {
895     const TranslationBlock *a = ap;
896     const TranslationBlock *b = bp;
897 
898     return a->pc == b->pc &&
899         a->cs_base == b->cs_base &&
900         a->flags == b->flags &&
901         (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
902         a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
903         a->page_addr[0] == b->page_addr[0] &&
904         a->page_addr[1] == b->page_addr[1];
905 }
906 
907 void tb_htable_init(void)
908 {
909     unsigned int mode = QHT_MODE_AUTO_RESIZE;
910 
911     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
912 }
913 
914 /* call with @p->lock held */
915 static inline void invalidate_page_bitmap(PageDesc *p)
916 {
917     assert_page_locked(p);
918 #ifdef CONFIG_SOFTMMU
919     g_free(p->code_bitmap);
920     p->code_bitmap = NULL;
921     p->code_write_count = 0;
922 #endif
923 }
924 
925 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
926 static void page_flush_tb_1(int level, void **lp)
927 {
928     int i;
929 
930     if (*lp == NULL) {
931         return;
932     }
933     if (level == 0) {
934         PageDesc *pd = *lp;
935 
936         for (i = 0; i < V_L2_SIZE; ++i) {
937             page_lock(&pd[i]);
938             pd[i].first_tb = (uintptr_t)NULL;
939             invalidate_page_bitmap(pd + i);
940             page_unlock(&pd[i]);
941         }
942     } else {
943         void **pp = *lp;
944 
945         for (i = 0; i < V_L2_SIZE; ++i) {
946             page_flush_tb_1(level - 1, pp + i);
947         }
948     }
949 }
950 
951 static void page_flush_tb(void)
952 {
953     int i, l1_sz = v_l1_size;
954 
955     for (i = 0; i < l1_sz; i++) {
956         page_flush_tb_1(v_l2_levels, l1_map + i);
957     }
958 }
959 
960 static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
961 {
962     const TranslationBlock *tb = value;
963     size_t *size = data;
964 
965     *size += tb->tc.size;
966     return false;
967 }
968 
969 /* flush all the translation blocks */
970 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
971 {
972     bool did_flush = false;
973 
974     mmap_lock();
975     /* If it is already been done on request of another CPU,
976      * just retry.
977      */
978     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
979         goto done;
980     }
981     did_flush = true;
982 
983     if (DEBUG_TB_FLUSH_GATE) {
984         size_t nb_tbs = tcg_nb_tbs();
985         size_t host_size = 0;
986 
987         tcg_tb_foreach(tb_host_size_iter, &host_size);
988         printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
989                tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
990     }
991 
992     CPU_FOREACH(cpu) {
993         cpu_tb_jmp_cache_clear(cpu);
994     }
995 
996     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
997     page_flush_tb();
998 
999     tcg_region_reset_all();
1000     /* XXX: flush processor icache at this point if cache flush is
1001        expensive */
1002     qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
1003 
1004 done:
1005     mmap_unlock();
1006     if (did_flush) {
1007         qemu_plugin_flush_cb();
1008     }
1009 }
1010 
1011 void tb_flush(CPUState *cpu)
1012 {
1013     if (tcg_enabled()) {
1014         unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);
1015 
1016         if (cpu_in_exclusive_context(cpu)) {
1017             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
1018         } else {
1019             async_safe_run_on_cpu(cpu, do_tb_flush,
1020                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
1021         }
1022     }
1023 }
1024 
1025 /*
1026  * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
1027  * so in order to prevent bit rot we compile them unconditionally in user-mode,
1028  * and let the optimizer get rid of them by wrapping their user-only callers
1029  * with if (DEBUG_TB_CHECK_GATE).
1030  */
1031 #ifdef CONFIG_USER_ONLY
1032 
1033 static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
1034 {
1035     TranslationBlock *tb = p;
1036     target_ulong addr = *(target_ulong *)userp;
1037 
1038     if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
1039         printf("ERROR invalidate: address=" TARGET_FMT_lx
1040                " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
1041     }
1042 }
1043 
1044 /* verify that all the pages have correct rights for code
1045  *
1046  * Called with mmap_lock held.
1047  */
1048 static void tb_invalidate_check(target_ulong address)
1049 {
1050     address &= TARGET_PAGE_MASK;
1051     qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address);
1052 }
1053 
1054 static void do_tb_page_check(void *p, uint32_t hash, void *userp)
1055 {
1056     TranslationBlock *tb = p;
1057     int flags1, flags2;
1058 
1059     flags1 = page_get_flags(tb->pc);
1060     flags2 = page_get_flags(tb->pc + tb->size - 1);
1061     if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
1062         printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
1063                (long)tb->pc, tb->size, flags1, flags2);
1064     }
1065 }
1066 
1067 /* verify that all the pages have correct rights for code */
1068 static void tb_page_check(void)
1069 {
1070     qht_iter(&tb_ctx.htable, do_tb_page_check, NULL);
1071 }
1072 
1073 #endif /* CONFIG_USER_ONLY */
1074 
1075 /*
1076  * user-mode: call with mmap_lock held
1077  * !user-mode: call with @pd->lock held
1078  */
1079 static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
1080 {
1081     TranslationBlock *tb1;
1082     uintptr_t *pprev;
1083     unsigned int n1;
1084 
1085     assert_page_locked(pd);
1086     pprev = &pd->first_tb;
1087     PAGE_FOR_EACH_TB(pd, tb1, n1) {
1088         if (tb1 == tb) {
1089             *pprev = tb1->page_next[n1];
1090             return;
1091         }
1092         pprev = &tb1->page_next[n1];
1093     }
1094     g_assert_not_reached();
1095 }
1096 
1097 /* remove @orig from its @n_orig-th jump list */
1098 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
1099 {
1100     uintptr_t ptr, ptr_locked;
1101     TranslationBlock *dest;
1102     TranslationBlock *tb;
1103     uintptr_t *pprev;
1104     int n;
1105 
1106     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
1107     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
1108     dest = (TranslationBlock *)(ptr & ~1);
1109     if (dest == NULL) {
1110         return;
1111     }
1112 
1113     qemu_spin_lock(&dest->jmp_lock);
1114     /*
1115      * While acquiring the lock, the jump might have been removed if the
1116      * destination TB was invalidated; check again.
1117      */
1118     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
1119     if (ptr_locked != ptr) {
1120         qemu_spin_unlock(&dest->jmp_lock);
1121         /*
1122          * The only possibility is that the jump was unlinked via
1123          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
1124          * because we set the LSB above.
1125          */
1126         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
1127         return;
1128     }
1129     /*
1130      * We first acquired the lock, and since the destination pointer matches,
1131      * we know for sure that @orig is in the jmp list.
1132      */
1133     pprev = &dest->jmp_list_head;
1134     TB_FOR_EACH_JMP(dest, tb, n) {
1135         if (tb == orig && n == n_orig) {
1136             *pprev = tb->jmp_list_next[n];
1137             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
1138             qemu_spin_unlock(&dest->jmp_lock);
1139             return;
1140         }
1141         pprev = &tb->jmp_list_next[n];
1142     }
1143     g_assert_not_reached();
1144 }
1145 
1146 /* reset the jump entry 'n' of a TB so that it is not chained to
1147    another TB */
1148 static inline void tb_reset_jump(TranslationBlock *tb, int n)
1149 {
1150     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
1151     tb_set_jmp_target(tb, n, addr);
1152 }
1153 
1154 /* remove any jumps to the TB */
1155 static inline void tb_jmp_unlink(TranslationBlock *dest)
1156 {
1157     TranslationBlock *tb;
1158     int n;
1159 
1160     qemu_spin_lock(&dest->jmp_lock);
1161 
1162     TB_FOR_EACH_JMP(dest, tb, n) {
1163         tb_reset_jump(tb, n);
1164         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
1165         /* No need to clear the list entry; setting the dest ptr is enough */
1166     }
1167     dest->jmp_list_head = (uintptr_t)NULL;
1168 
1169     qemu_spin_unlock(&dest->jmp_lock);
1170 }
1171 
1172 /*
1173  * In user-mode, call with mmap_lock held.
1174  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
1175  * locks held.
1176  */
1177 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
1178 {
1179     CPUState *cpu;
1180     PageDesc *p;
1181     uint32_t h;
1182     tb_page_addr_t phys_pc;
1183     uint32_t orig_cflags = tb_cflags(tb);
1184 
1185     assert_memory_lock();
1186 
1187     /* make sure no further incoming jumps will be chained to this TB */
1188     qemu_spin_lock(&tb->jmp_lock);
1189     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
1190     qemu_spin_unlock(&tb->jmp_lock);
1191 
1192     /* remove the TB from the hash list */
1193     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1194     h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
1195                      tb->trace_vcpu_dstate);
1196     if (!qht_remove(&tb_ctx.htable, tb, h)) {
1197         return;
1198     }
1199 
1200     /* remove the TB from the page list */
1201     if (rm_from_page_list) {
1202         p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
1203         tb_page_remove(p, tb);
1204         invalidate_page_bitmap(p);
1205         if (tb->page_addr[1] != -1) {
1206             p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
1207             tb_page_remove(p, tb);
1208             invalidate_page_bitmap(p);
1209         }
1210     }
1211 
1212     /* remove the TB from the hash list */
1213     h = tb_jmp_cache_hash_func(tb->pc);
1214     CPU_FOREACH(cpu) {
1215         if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) {
1216             qatomic_set(&cpu->tb_jmp_cache[h], NULL);
1217         }
1218     }
1219 
1220     /* suppress this TB from the two jump lists */
1221     tb_remove_from_jmp_list(tb, 0);
1222     tb_remove_from_jmp_list(tb, 1);
1223 
1224     /* suppress any remaining jumps to this TB */
1225     tb_jmp_unlink(tb);
1226 
1227     qatomic_set(&tcg_ctx->tb_phys_invalidate_count,
1228                tcg_ctx->tb_phys_invalidate_count + 1);
1229 }
1230 
1231 static void tb_phys_invalidate__locked(TranslationBlock *tb)
1232 {
1233     qemu_thread_jit_write();
1234     do_tb_phys_invalidate(tb, true);
1235     qemu_thread_jit_execute();
1236 }
1237 
1238 /* invalidate one TB
1239  *
1240  * Called with mmap_lock held in user-mode.
1241  */
1242 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
1243 {
1244     if (page_addr == -1 && tb->page_addr[0] != -1) {
1245         page_lock_tb(tb);
1246         do_tb_phys_invalidate(tb, true);
1247         page_unlock_tb(tb);
1248     } else {
1249         do_tb_phys_invalidate(tb, false);
1250     }
1251 }
1252 
1253 #ifdef CONFIG_SOFTMMU
1254 /* call with @p->lock held */
1255 static void build_page_bitmap(PageDesc *p)
1256 {
1257     int n, tb_start, tb_end;
1258     TranslationBlock *tb;
1259 
1260     assert_page_locked(p);
1261     p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
1262 
1263     PAGE_FOR_EACH_TB(p, tb, n) {
1264         /* NOTE: this is subtle as a TB may span two physical pages */
1265         if (n == 0) {
1266             /* NOTE: tb_end may be after the end of the page, but
1267                it is not a problem */
1268             tb_start = tb->pc & ~TARGET_PAGE_MASK;
1269             tb_end = tb_start + tb->size;
1270             if (tb_end > TARGET_PAGE_SIZE) {
1271                 tb_end = TARGET_PAGE_SIZE;
1272              }
1273         } else {
1274             tb_start = 0;
1275             tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1276         }
1277         bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
1278     }
1279 }
1280 #endif
1281 
1282 /* add the tb in the target page and protect it if necessary
1283  *
1284  * Called with mmap_lock held for user-mode emulation.
1285  * Called with @p->lock held in !user-mode.
1286  */
1287 static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
1288                                unsigned int n, tb_page_addr_t page_addr)
1289 {
1290 #ifndef CONFIG_USER_ONLY
1291     bool page_already_protected;
1292 #endif
1293 
1294     assert_page_locked(p);
1295 
1296     tb->page_addr[n] = page_addr;
1297     tb->page_next[n] = p->first_tb;
1298 #ifndef CONFIG_USER_ONLY
1299     page_already_protected = p->first_tb != (uintptr_t)NULL;
1300 #endif
1301     p->first_tb = (uintptr_t)tb | n;
1302     invalidate_page_bitmap(p);
1303 
1304 #if defined(CONFIG_USER_ONLY)
1305     if (p->flags & PAGE_WRITE) {
1306         target_ulong addr;
1307         PageDesc *p2;
1308         int prot;
1309 
1310         /* force the host page as non writable (writes will have a
1311            page fault + mprotect overhead) */
1312         page_addr &= qemu_host_page_mask;
1313         prot = 0;
1314         for (addr = page_addr; addr < page_addr + qemu_host_page_size;
1315             addr += TARGET_PAGE_SIZE) {
1316 
1317             p2 = page_find(addr >> TARGET_PAGE_BITS);
1318             if (!p2) {
1319                 continue;
1320             }
1321             prot |= p2->flags;
1322             p2->flags &= ~PAGE_WRITE;
1323           }
1324         mprotect(g2h_untagged(page_addr), qemu_host_page_size,
1325                  (prot & PAGE_BITS) & ~PAGE_WRITE);
1326         if (DEBUG_TB_INVALIDATE_GATE) {
1327             printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
1328         }
1329     }
1330 #else
1331     /* if some code is already present, then the pages are already
1332        protected. So we handle the case where only the first TB is
1333        allocated in a physical page */
1334     if (!page_already_protected) {
1335         tlb_protect_code(page_addr);
1336     }
1337 #endif
1338 }
1339 
1340 /*
1341  * Add a new TB and link it to the physical page tables. phys_page2 is
1342  * (-1) to indicate that only one page contains the TB.
1343  *
1344  * Called with mmap_lock held for user-mode emulation.
1345  *
1346  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
1347  * Note that in !user-mode, another thread might have already added a TB
1348  * for the same block of guest code that @tb corresponds to. In that case,
1349  * the caller should discard the original @tb, and use instead the returned TB.
1350  */
1351 static TranslationBlock *
1352 tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1353              tb_page_addr_t phys_page2)
1354 {
1355     PageDesc *p;
1356     PageDesc *p2 = NULL;
1357     void *existing_tb = NULL;
1358     uint32_t h;
1359 
1360     assert_memory_lock();
1361     tcg_debug_assert(!(tb->cflags & CF_INVALID));
1362 
1363     /*
1364      * Add the TB to the page list, acquiring first the pages's locks.
1365      * We keep the locks held until after inserting the TB in the hash table,
1366      * so that if the insertion fails we know for sure that the TBs are still
1367      * in the page descriptors.
1368      * Note that inserting into the hash table first isn't an option, since
1369      * we can only insert TBs that are fully initialized.
1370      */
1371     page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
1372     tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
1373     if (p2) {
1374         tb_page_add(p2, tb, 1, phys_page2);
1375     } else {
1376         tb->page_addr[1] = -1;
1377     }
1378 
1379     /* add in the hash table */
1380     h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
1381                      tb->trace_vcpu_dstate);
1382     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
1383 
1384     /* remove TB from the page(s) if we couldn't insert it */
1385     if (unlikely(existing_tb)) {
1386         tb_page_remove(p, tb);
1387         invalidate_page_bitmap(p);
1388         if (p2) {
1389             tb_page_remove(p2, tb);
1390             invalidate_page_bitmap(p2);
1391         }
1392         tb = existing_tb;
1393     }
1394 
1395     if (p2 && p2 != p) {
1396         page_unlock(p2);
1397     }
1398     page_unlock(p);
1399 
1400 #ifdef CONFIG_USER_ONLY
1401     if (DEBUG_TB_CHECK_GATE) {
1402         tb_page_check();
1403     }
1404 #endif
1405     return tb;
1406 }
1407 
1408 /* Called with mmap_lock held for user mode emulation.  */
1409 TranslationBlock *tb_gen_code(CPUState *cpu,
1410                               target_ulong pc, target_ulong cs_base,
1411                               uint32_t flags, int cflags)
1412 {
1413     CPUArchState *env = cpu->env_ptr;
1414     TranslationBlock *tb, *existing_tb;
1415     tb_page_addr_t phys_pc, phys_page2;
1416     target_ulong virt_page2;
1417     tcg_insn_unit *gen_code_buf;
1418     int gen_code_size, search_size, max_insns;
1419 #ifdef CONFIG_PROFILER
1420     TCGProfile *prof = &tcg_ctx->prof;
1421     int64_t ti;
1422 #endif
1423 
1424     assert_memory_lock();
1425     qemu_thread_jit_write();
1426 
1427     phys_pc = get_page_addr_code(env, pc);
1428 
1429     if (phys_pc == -1) {
1430         /* Generate a one-shot TB with 1 insn in it */
1431         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
1432     }
1433 
1434     max_insns = cflags & CF_COUNT_MASK;
1435     if (max_insns == 0) {
1436         max_insns = CF_COUNT_MASK;
1437     }
1438     if (max_insns > TCG_MAX_INSNS) {
1439         max_insns = TCG_MAX_INSNS;
1440     }
1441     if (cpu->singlestep_enabled || singlestep) {
1442         max_insns = 1;
1443     }
1444 
1445  buffer_overflow:
1446     tb = tcg_tb_alloc(tcg_ctx);
1447     if (unlikely(!tb)) {
1448         /* flush must be done */
1449         tb_flush(cpu);
1450         mmap_unlock();
1451         /* Make the execution loop process the flush as soon as possible.  */
1452         cpu->exception_index = EXCP_INTERRUPT;
1453         cpu_loop_exit(cpu);
1454     }
1455 
1456     gen_code_buf = tcg_ctx->code_gen_ptr;
1457     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
1458     tb->pc = pc;
1459     tb->cs_base = cs_base;
1460     tb->flags = flags;
1461     tb->cflags = cflags;
1462     tb->trace_vcpu_dstate = *cpu->trace_dstate;
1463     tcg_ctx->tb_cflags = cflags;
1464  tb_overflow:
1465 
1466 #ifdef CONFIG_PROFILER
1467     /* includes aborted translations because of exceptions */
1468     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
1469     ti = profile_getclock();
1470 #endif
1471 
1472     gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
1473     if (unlikely(gen_code_size != 0)) {
1474         goto error_return;
1475     }
1476 
1477     tcg_func_start(tcg_ctx);
1478 
1479     tcg_ctx->cpu = env_cpu(env);
1480     gen_intermediate_code(cpu, tb, max_insns);
1481     assert(tb->size != 0);
1482     tcg_ctx->cpu = NULL;
1483     max_insns = tb->icount;
1484 
1485     trace_translate_block(tb, tb->pc, tb->tc.ptr);
1486 
1487     /* generate machine code */
1488     tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
1489     tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
1490     tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
1491     if (TCG_TARGET_HAS_direct_jump) {
1492         tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
1493         tcg_ctx->tb_jmp_target_addr = NULL;
1494     } else {
1495         tcg_ctx->tb_jmp_insn_offset = NULL;
1496         tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
1497     }
1498 
1499 #ifdef CONFIG_PROFILER
1500     qatomic_set(&prof->tb_count, prof->tb_count + 1);
1501     qatomic_set(&prof->interm_time,
1502                 prof->interm_time + profile_getclock() - ti);
1503     ti = profile_getclock();
1504 #endif
1505 
1506     gen_code_size = tcg_gen_code(tcg_ctx, tb);
1507     if (unlikely(gen_code_size < 0)) {
1508  error_return:
1509         switch (gen_code_size) {
1510         case -1:
1511             /*
1512              * Overflow of code_gen_buffer, or the current slice of it.
1513              *
1514              * TODO: We don't need to re-do gen_intermediate_code, nor
1515              * should we re-do the tcg optimization currently hidden
1516              * inside tcg_gen_code.  All that should be required is to
1517              * flush the TBs, allocate a new TB, re-initialize it per
1518              * above, and re-do the actual code generation.
1519              */
1520             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1521                           "Restarting code generation for "
1522                           "code_gen_buffer overflow\n");
1523             goto buffer_overflow;
1524 
1525         case -2:
1526             /*
1527              * The code generated for the TranslationBlock is too large.
1528              * The maximum size allowed by the unwind info is 64k.
1529              * There may be stricter constraints from relocations
1530              * in the tcg backend.
1531              *
1532              * Try again with half as many insns as we attempted this time.
1533              * If a single insn overflows, there's a bug somewhere...
1534              */
1535             assert(max_insns > 1);
1536             max_insns /= 2;
1537             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1538                           "Restarting code generation with "
1539                           "smaller translation block (max %d insns)\n",
1540                           max_insns);
1541             goto tb_overflow;
1542 
1543         default:
1544             g_assert_not_reached();
1545         }
1546     }
1547     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
1548     if (unlikely(search_size < 0)) {
1549         goto buffer_overflow;
1550     }
1551     tb->tc.size = gen_code_size;
1552 
1553 #ifdef CONFIG_PROFILER
1554     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
1555     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
1556     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
1557     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
1558 #endif
1559 
1560 #ifdef DEBUG_DISAS
1561     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
1562         qemu_log_in_addr_range(tb->pc)) {
1563         FILE *logfile = qemu_log_lock();
1564         int code_size, data_size;
1565         const tcg_target_ulong *rx_data_gen_ptr;
1566         size_t chunk_start;
1567         int insn = 0;
1568 
1569         if (tcg_ctx->data_gen_ptr) {
1570             rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
1571             code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
1572             data_size = gen_code_size - code_size;
1573         } else {
1574             rx_data_gen_ptr = 0;
1575             code_size = gen_code_size;
1576             data_size = 0;
1577         }
1578 
1579         /* Dump header and the first instruction */
1580         qemu_log("OUT: [size=%d]\n", gen_code_size);
1581         qemu_log("  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
1582                  tcg_ctx->gen_insn_data[insn][0]);
1583         chunk_start = tcg_ctx->gen_insn_end_off[insn];
1584         log_disas(tb->tc.ptr, chunk_start);
1585 
1586         /*
1587          * Dump each instruction chunk, wrapping up empty chunks into
1588          * the next instruction. The whole array is offset so the
1589          * first entry is the beginning of the 2nd instruction.
1590          */
1591         while (insn < tb->icount) {
1592             size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
1593             if (chunk_end > chunk_start) {
1594                 qemu_log("  -- guest addr 0x" TARGET_FMT_lx "\n",
1595                          tcg_ctx->gen_insn_data[insn][0]);
1596                 log_disas(tb->tc.ptr + chunk_start, chunk_end - chunk_start);
1597                 chunk_start = chunk_end;
1598             }
1599             insn++;
1600         }
1601 
1602         if (chunk_start < code_size) {
1603             qemu_log("  -- tb slow paths + alignment\n");
1604             log_disas(tb->tc.ptr + chunk_start, code_size - chunk_start);
1605         }
1606 
1607         /* Finally dump any data we may have after the block */
1608         if (data_size) {
1609             int i;
1610             qemu_log("  data: [size=%d]\n", data_size);
1611             for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
1612                 if (sizeof(tcg_target_ulong) == 8) {
1613                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
1614                              (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
1615                 } else if (sizeof(tcg_target_ulong) == 4) {
1616                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
1617                              (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
1618                 } else {
1619                     qemu_build_not_reached();
1620                 }
1621             }
1622         }
1623         qemu_log("\n");
1624         qemu_log_flush();
1625         qemu_log_unlock(logfile);
1626     }
1627 #endif
1628 
1629     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
1630         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
1631                  CODE_GEN_ALIGN));
1632 
1633     /* init jump list */
1634     qemu_spin_init(&tb->jmp_lock);
1635     tb->jmp_list_head = (uintptr_t)NULL;
1636     tb->jmp_list_next[0] = (uintptr_t)NULL;
1637     tb->jmp_list_next[1] = (uintptr_t)NULL;
1638     tb->jmp_dest[0] = (uintptr_t)NULL;
1639     tb->jmp_dest[1] = (uintptr_t)NULL;
1640 
1641     /* init original jump addresses which have been set during tcg_gen_code() */
1642     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1643         tb_reset_jump(tb, 0);
1644     }
1645     if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1646         tb_reset_jump(tb, 1);
1647     }
1648 
1649     /*
1650      * If the TB is not associated with a physical RAM page then
1651      * it must be a temporary one-insn TB, and we have nothing to do
1652      * except fill in the page_addr[] fields. Return early before
1653      * attempting to link to other TBs or add to the lookup table.
1654      */
1655     if (phys_pc == -1) {
1656         tb->page_addr[0] = tb->page_addr[1] = -1;
1657         return tb;
1658     }
1659 
1660     /* check next page if needed */
1661     virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1662     phys_page2 = -1;
1663     if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1664         phys_page2 = get_page_addr_code(env, virt_page2);
1665     }
1666     /*
1667      * No explicit memory barrier is required -- tb_link_page() makes the
1668      * TB visible in a consistent state.
1669      */
1670     existing_tb = tb_link_page(tb, phys_pc, phys_page2);
1671     /* if the TB already exists, discard what we just translated */
1672     if (unlikely(existing_tb != tb)) {
1673         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
1674 
1675         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
1676         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
1677         tb_destroy(tb);
1678         return existing_tb;
1679     }
1680     tcg_tb_insert(tb);
1681     return tb;
1682 }
1683 
1684 /*
1685  * @p must be non-NULL.
1686  * user-mode: call with mmap_lock held.
1687  * !user-mode: call with all @pages locked.
1688  */
1689 static void
1690 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1691                                       PageDesc *p, tb_page_addr_t start,
1692                                       tb_page_addr_t end,
1693                                       uintptr_t retaddr)
1694 {
1695     TranslationBlock *tb;
1696     tb_page_addr_t tb_start, tb_end;
1697     int n;
1698 #ifdef TARGET_HAS_PRECISE_SMC
1699     CPUState *cpu = current_cpu;
1700     CPUArchState *env = NULL;
1701     bool current_tb_not_found = retaddr != 0;
1702     bool current_tb_modified = false;
1703     TranslationBlock *current_tb = NULL;
1704     target_ulong current_pc = 0;
1705     target_ulong current_cs_base = 0;
1706     uint32_t current_flags = 0;
1707 #endif /* TARGET_HAS_PRECISE_SMC */
1708 
1709     assert_page_locked(p);
1710 
1711 #if defined(TARGET_HAS_PRECISE_SMC)
1712     if (cpu != NULL) {
1713         env = cpu->env_ptr;
1714     }
1715 #endif
1716 
1717     /* we remove all the TBs in the range [start, end[ */
1718     /* XXX: see if in some cases it could be faster to invalidate all
1719        the code */
1720     PAGE_FOR_EACH_TB(p, tb, n) {
1721         assert_page_locked(p);
1722         /* NOTE: this is subtle as a TB may span two physical pages */
1723         if (n == 0) {
1724             /* NOTE: tb_end may be after the end of the page, but
1725                it is not a problem */
1726             tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1727             tb_end = tb_start + tb->size;
1728         } else {
1729             tb_start = tb->page_addr[1];
1730             tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1731         }
1732         if (!(tb_end <= start || tb_start >= end)) {
1733 #ifdef TARGET_HAS_PRECISE_SMC
1734             if (current_tb_not_found) {
1735                 current_tb_not_found = false;
1736                 /* now we have a real cpu fault */
1737                 current_tb = tcg_tb_lookup(retaddr);
1738             }
1739             if (current_tb == tb &&
1740                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1741                 /*
1742                  * If we are modifying the current TB, we must stop
1743                  * its execution. We could be more precise by checking
1744                  * that the modification is after the current PC, but it
1745                  * would require a specialized function to partially
1746                  * restore the CPU state.
1747                  */
1748                 current_tb_modified = true;
1749                 cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
1750                 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1751                                      &current_flags);
1752             }
1753 #endif /* TARGET_HAS_PRECISE_SMC */
1754             tb_phys_invalidate__locked(tb);
1755         }
1756     }
1757 #if !defined(CONFIG_USER_ONLY)
1758     /* if no code remaining, no need to continue to use slow writes */
1759     if (!p->first_tb) {
1760         invalidate_page_bitmap(p);
1761         tlb_unprotect_code(start);
1762     }
1763 #endif
1764 #ifdef TARGET_HAS_PRECISE_SMC
1765     if (current_tb_modified) {
1766         page_collection_unlock(pages);
1767         /* Force execution of one insn next time.  */
1768         cpu->cflags_next_tb = 1 | curr_cflags(cpu);
1769         mmap_unlock();
1770         cpu_loop_exit_noexc(cpu);
1771     }
1772 #endif
1773 }
1774 
1775 /*
1776  * Invalidate all TBs which intersect with the target physical address range
1777  * [start;end[. NOTE: start and end must refer to the *same* physical page.
1778  * 'is_cpu_write_access' should be true if called from a real cpu write
1779  * access: the virtual CPU will exit the current TB if code is modified inside
1780  * this TB.
1781  *
1782  * Called with mmap_lock held for user-mode emulation
1783  */
1784 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end)
1785 {
1786     struct page_collection *pages;
1787     PageDesc *p;
1788 
1789     assert_memory_lock();
1790 
1791     p = page_find(start >> TARGET_PAGE_BITS);
1792     if (p == NULL) {
1793         return;
1794     }
1795     pages = page_collection_lock(start, end);
1796     tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
1797     page_collection_unlock(pages);
1798 }
1799 
1800 /*
1801  * Invalidate all TBs which intersect with the target physical address range
1802  * [start;end[. NOTE: start and end may refer to *different* physical pages.
1803  * 'is_cpu_write_access' should be true if called from a real cpu write
1804  * access: the virtual CPU will exit the current TB if code is modified inside
1805  * this TB.
1806  *
1807  * Called with mmap_lock held for user-mode emulation.
1808  */
1809 #ifdef CONFIG_SOFTMMU
1810 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end)
1811 #else
1812 void tb_invalidate_phys_range(target_ulong start, target_ulong end)
1813 #endif
1814 {
1815     struct page_collection *pages;
1816     tb_page_addr_t next;
1817 
1818     assert_memory_lock();
1819 
1820     pages = page_collection_lock(start, end);
1821     for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
1822          start < end;
1823          start = next, next += TARGET_PAGE_SIZE) {
1824         PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
1825         tb_page_addr_t bound = MIN(next, end);
1826 
1827         if (pd == NULL) {
1828             continue;
1829         }
1830         tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
1831     }
1832     page_collection_unlock(pages);
1833 }
1834 
1835 #ifdef CONFIG_SOFTMMU
1836 /* len must be <= 8 and start must be a multiple of len.
1837  * Called via softmmu_template.h when code areas are written to with
1838  * iothread mutex not held.
1839  *
1840  * Call with all @pages in the range [@start, @start + len[ locked.
1841  */
1842 void tb_invalidate_phys_page_fast(struct page_collection *pages,
1843                                   tb_page_addr_t start, int len,
1844                                   uintptr_t retaddr)
1845 {
1846     PageDesc *p;
1847 
1848     assert_memory_lock();
1849 
1850     p = page_find(start >> TARGET_PAGE_BITS);
1851     if (!p) {
1852         return;
1853     }
1854 
1855     assert_page_locked(p);
1856     if (!p->code_bitmap &&
1857         ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
1858         build_page_bitmap(p);
1859     }
1860     if (p->code_bitmap) {
1861         unsigned int nr;
1862         unsigned long b;
1863 
1864         nr = start & ~TARGET_PAGE_MASK;
1865         b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
1866         if (b & ((1 << len) - 1)) {
1867             goto do_invalidate;
1868         }
1869     } else {
1870     do_invalidate:
1871         tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
1872                                               retaddr);
1873     }
1874 }
1875 #else
1876 /* Called with mmap_lock held. If pc is not 0 then it indicates the
1877  * host PC of the faulting store instruction that caused this invalidate.
1878  * Returns true if the caller needs to abort execution of the current
1879  * TB (because it was modified by this store and the guest CPU has
1880  * precise-SMC semantics).
1881  */
1882 static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
1883 {
1884     TranslationBlock *tb;
1885     PageDesc *p;
1886     int n;
1887 #ifdef TARGET_HAS_PRECISE_SMC
1888     TranslationBlock *current_tb = NULL;
1889     CPUState *cpu = current_cpu;
1890     CPUArchState *env = NULL;
1891     int current_tb_modified = 0;
1892     target_ulong current_pc = 0;
1893     target_ulong current_cs_base = 0;
1894     uint32_t current_flags = 0;
1895 #endif
1896 
1897     assert_memory_lock();
1898 
1899     addr &= TARGET_PAGE_MASK;
1900     p = page_find(addr >> TARGET_PAGE_BITS);
1901     if (!p) {
1902         return false;
1903     }
1904 
1905 #ifdef TARGET_HAS_PRECISE_SMC
1906     if (p->first_tb && pc != 0) {
1907         current_tb = tcg_tb_lookup(pc);
1908     }
1909     if (cpu != NULL) {
1910         env = cpu->env_ptr;
1911     }
1912 #endif
1913     assert_page_locked(p);
1914     PAGE_FOR_EACH_TB(p, tb, n) {
1915 #ifdef TARGET_HAS_PRECISE_SMC
1916         if (current_tb == tb &&
1917             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1918                 /* If we are modifying the current TB, we must stop
1919                    its execution. We could be more precise by checking
1920                    that the modification is after the current PC, but it
1921                    would require a specialized function to partially
1922                    restore the CPU state */
1923 
1924             current_tb_modified = 1;
1925             cpu_restore_state_from_tb(cpu, current_tb, pc, true);
1926             cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1927                                  &current_flags);
1928         }
1929 #endif /* TARGET_HAS_PRECISE_SMC */
1930         tb_phys_invalidate(tb, addr);
1931     }
1932     p->first_tb = (uintptr_t)NULL;
1933 #ifdef TARGET_HAS_PRECISE_SMC
1934     if (current_tb_modified) {
1935         /* Force execution of one insn next time.  */
1936         cpu->cflags_next_tb = 1 | curr_cflags(cpu);
1937         return true;
1938     }
1939 #endif
1940 
1941     return false;
1942 }
1943 #endif
1944 
1945 /* user-mode: call with mmap_lock held */
1946 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
1947 {
1948     TranslationBlock *tb;
1949 
1950     assert_memory_lock();
1951 
1952     tb = tcg_tb_lookup(retaddr);
1953     if (tb) {
1954         /* We can use retranslation to find the PC.  */
1955         cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1956         tb_phys_invalidate(tb, -1);
1957     } else {
1958         /* The exception probably happened in a helper.  The CPU state should
1959            have been saved before calling it. Fetch the PC from there.  */
1960         CPUArchState *env = cpu->env_ptr;
1961         target_ulong pc, cs_base;
1962         tb_page_addr_t addr;
1963         uint32_t flags;
1964 
1965         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
1966         addr = get_page_addr_code(env, pc);
1967         if (addr != -1) {
1968             tb_invalidate_phys_range(addr, addr + 1);
1969         }
1970     }
1971 }
1972 
1973 #ifndef CONFIG_USER_ONLY
1974 /*
1975  * In deterministic execution mode, instructions doing device I/Os
1976  * must be at the end of the TB.
1977  *
1978  * Called by softmmu_template.h, with iothread mutex not held.
1979  */
1980 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
1981 {
1982     TranslationBlock *tb;
1983     CPUClass *cc;
1984     uint32_t n;
1985 
1986     tb = tcg_tb_lookup(retaddr);
1987     if (!tb) {
1988         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
1989                   (void *)retaddr);
1990     }
1991     cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1992 
1993     /*
1994      * Some guests must re-execute the branch when re-executing a delay
1995      * slot instruction.  When this is the case, adjust icount and N
1996      * to account for the re-execution of the branch.
1997      */
1998     n = 1;
1999     cc = CPU_GET_CLASS(cpu);
2000     if (cc->tcg_ops->io_recompile_replay_branch &&
2001         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
2002         cpu_neg(cpu)->icount_decr.u16.low++;
2003         n = 2;
2004     }
2005 
2006     /*
2007      * Exit the loop and potentially generate a new TB executing the
2008      * just the I/O insns. We also limit instrumentation to memory
2009      * operations only (which execute after completion) so we don't
2010      * double instrument the instruction.
2011      */
2012     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
2013 
2014     qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
2015                            "cpu_io_recompile: rewound execution of TB to "
2016                            TARGET_FMT_lx "\n", tb->pc);
2017 
2018     cpu_loop_exit_noexc(cpu);
2019 }
2020 
2021 static void print_qht_statistics(struct qht_stats hst)
2022 {
2023     uint32_t hgram_opts;
2024     size_t hgram_bins;
2025     char *hgram;
2026 
2027     if (!hst.head_buckets) {
2028         return;
2029     }
2030     qemu_printf("TB hash buckets     %zu/%zu (%0.2f%% head buckets used)\n",
2031                 hst.used_head_buckets, hst.head_buckets,
2032                 (double)hst.used_head_buckets / hst.head_buckets * 100);
2033 
2034     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
2035     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
2036     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
2037         hgram_opts |= QDIST_PR_NODECIMAL;
2038     }
2039     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
2040     qemu_printf("TB hash occupancy   %0.2f%% avg chain occ. Histogram: %s\n",
2041                 qdist_avg(&hst.occupancy) * 100, hgram);
2042     g_free(hgram);
2043 
2044     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
2045     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
2046     if (hgram_bins > 10) {
2047         hgram_bins = 10;
2048     } else {
2049         hgram_bins = 0;
2050         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
2051     }
2052     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
2053     qemu_printf("TB hash avg chain   %0.3f buckets. Histogram: %s\n",
2054                 qdist_avg(&hst.chain), hgram);
2055     g_free(hgram);
2056 }
2057 
2058 struct tb_tree_stats {
2059     size_t nb_tbs;
2060     size_t host_size;
2061     size_t target_size;
2062     size_t max_target_size;
2063     size_t direct_jmp_count;
2064     size_t direct_jmp2_count;
2065     size_t cross_page;
2066 };
2067 
2068 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
2069 {
2070     const TranslationBlock *tb = value;
2071     struct tb_tree_stats *tst = data;
2072 
2073     tst->nb_tbs++;
2074     tst->host_size += tb->tc.size;
2075     tst->target_size += tb->size;
2076     if (tb->size > tst->max_target_size) {
2077         tst->max_target_size = tb->size;
2078     }
2079     if (tb->page_addr[1] != -1) {
2080         tst->cross_page++;
2081     }
2082     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
2083         tst->direct_jmp_count++;
2084         if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
2085             tst->direct_jmp2_count++;
2086         }
2087     }
2088     return false;
2089 }
2090 
2091 void dump_exec_info(void)
2092 {
2093     struct tb_tree_stats tst = {};
2094     struct qht_stats hst;
2095     size_t nb_tbs, flush_full, flush_part, flush_elide;
2096 
2097     tcg_tb_foreach(tb_tree_stats_iter, &tst);
2098     nb_tbs = tst.nb_tbs;
2099     /* XXX: avoid using doubles ? */
2100     qemu_printf("Translation buffer state:\n");
2101     /*
2102      * Report total code size including the padding and TB structs;
2103      * otherwise users might think "-accel tcg,tb-size" is not honoured.
2104      * For avg host size we use the precise numbers from tb_tree_stats though.
2105      */
2106     qemu_printf("gen code size       %zu/%zu\n",
2107                 tcg_code_size(), tcg_code_capacity());
2108     qemu_printf("TB count            %zu\n", nb_tbs);
2109     qemu_printf("TB avg target size  %zu max=%zu bytes\n",
2110                 nb_tbs ? tst.target_size / nb_tbs : 0,
2111                 tst.max_target_size);
2112     qemu_printf("TB avg host size    %zu bytes (expansion ratio: %0.1f)\n",
2113                 nb_tbs ? tst.host_size / nb_tbs : 0,
2114                 tst.target_size ? (double)tst.host_size / tst.target_size : 0);
2115     qemu_printf("cross page TB count %zu (%zu%%)\n", tst.cross_page,
2116                 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
2117     qemu_printf("direct jump count   %zu (%zu%%) (2 jumps=%zu %zu%%)\n",
2118                 tst.direct_jmp_count,
2119                 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
2120                 tst.direct_jmp2_count,
2121                 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
2122 
2123     qht_statistics_init(&tb_ctx.htable, &hst);
2124     print_qht_statistics(hst);
2125     qht_statistics_destroy(&hst);
2126 
2127     qemu_printf("\nStatistics:\n");
2128     qemu_printf("TB flush count      %u\n",
2129                 qatomic_read(&tb_ctx.tb_flush_count));
2130     qemu_printf("TB invalidate count %zu\n",
2131                 tcg_tb_phys_invalidate_count());
2132 
2133     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
2134     qemu_printf("TLB full flushes    %zu\n", flush_full);
2135     qemu_printf("TLB partial flushes %zu\n", flush_part);
2136     qemu_printf("TLB elided flushes  %zu\n", flush_elide);
2137     tcg_dump_info();
2138 }
2139 
2140 void dump_opcount_info(void)
2141 {
2142     tcg_dump_op_count();
2143 }
2144 
2145 #else /* CONFIG_USER_ONLY */
2146 
2147 void cpu_interrupt(CPUState *cpu, int mask)
2148 {
2149     g_assert(qemu_mutex_iothread_locked());
2150     cpu->interrupt_request |= mask;
2151     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
2152 }
2153 
2154 /*
2155  * Walks guest process memory "regions" one by one
2156  * and calls callback function 'fn' for each region.
2157  */
2158 struct walk_memory_regions_data {
2159     walk_memory_regions_fn fn;
2160     void *priv;
2161     target_ulong start;
2162     int prot;
2163 };
2164 
2165 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2166                                    target_ulong end, int new_prot)
2167 {
2168     if (data->start != -1u) {
2169         int rc = data->fn(data->priv, data->start, end, data->prot);
2170         if (rc != 0) {
2171             return rc;
2172         }
2173     }
2174 
2175     data->start = (new_prot ? end : -1u);
2176     data->prot = new_prot;
2177 
2178     return 0;
2179 }
2180 
2181 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2182                                  target_ulong base, int level, void **lp)
2183 {
2184     target_ulong pa;
2185     int i, rc;
2186 
2187     if (*lp == NULL) {
2188         return walk_memory_regions_end(data, base, 0);
2189     }
2190 
2191     if (level == 0) {
2192         PageDesc *pd = *lp;
2193 
2194         for (i = 0; i < V_L2_SIZE; ++i) {
2195             int prot = pd[i].flags;
2196 
2197             pa = base | (i << TARGET_PAGE_BITS);
2198             if (prot != data->prot) {
2199                 rc = walk_memory_regions_end(data, pa, prot);
2200                 if (rc != 0) {
2201                     return rc;
2202                 }
2203             }
2204         }
2205     } else {
2206         void **pp = *lp;
2207 
2208         for (i = 0; i < V_L2_SIZE; ++i) {
2209             pa = base | ((target_ulong)i <<
2210                 (TARGET_PAGE_BITS + V_L2_BITS * level));
2211             rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2212             if (rc != 0) {
2213                 return rc;
2214             }
2215         }
2216     }
2217 
2218     return 0;
2219 }
2220 
2221 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2222 {
2223     struct walk_memory_regions_data data;
2224     uintptr_t i, l1_sz = v_l1_size;
2225 
2226     data.fn = fn;
2227     data.priv = priv;
2228     data.start = -1u;
2229     data.prot = 0;
2230 
2231     for (i = 0; i < l1_sz; i++) {
2232         target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
2233         int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
2234         if (rc != 0) {
2235             return rc;
2236         }
2237     }
2238 
2239     return walk_memory_regions_end(&data, 0, 0);
2240 }
2241 
2242 static int dump_region(void *priv, target_ulong start,
2243     target_ulong end, unsigned long prot)
2244 {
2245     FILE *f = (FILE *)priv;
2246 
2247     (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
2248         " "TARGET_FMT_lx" %c%c%c\n",
2249         start, end, end - start,
2250         ((prot & PAGE_READ) ? 'r' : '-'),
2251         ((prot & PAGE_WRITE) ? 'w' : '-'),
2252         ((prot & PAGE_EXEC) ? 'x' : '-'));
2253 
2254     return 0;
2255 }
2256 
2257 /* dump memory mappings */
2258 void page_dump(FILE *f)
2259 {
2260     const int length = sizeof(target_ulong) * 2;
2261     (void) fprintf(f, "%-*s %-*s %-*s %s\n",
2262             length, "start", length, "end", length, "size", "prot");
2263     walk_memory_regions(f, dump_region);
2264 }
2265 
2266 int page_get_flags(target_ulong address)
2267 {
2268     PageDesc *p;
2269 
2270     p = page_find(address >> TARGET_PAGE_BITS);
2271     if (!p) {
2272         return 0;
2273     }
2274     return p->flags;
2275 }
2276 
2277 /* Modify the flags of a page and invalidate the code if necessary.
2278    The flag PAGE_WRITE_ORG is positioned automatically depending
2279    on PAGE_WRITE.  The mmap_lock should already be held.  */
2280 void page_set_flags(target_ulong start, target_ulong end, int flags)
2281 {
2282     target_ulong addr, len;
2283     bool reset_target_data;
2284 
2285     /* This function should never be called with addresses outside the
2286        guest address space.  If this assert fires, it probably indicates
2287        a missing call to h2g_valid.  */
2288     assert(end - 1 <= GUEST_ADDR_MAX);
2289     assert(start < end);
2290     /* Only set PAGE_ANON with new mappings. */
2291     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
2292     assert_memory_lock();
2293 
2294     start = start & TARGET_PAGE_MASK;
2295     end = TARGET_PAGE_ALIGN(end);
2296 
2297     if (flags & PAGE_WRITE) {
2298         flags |= PAGE_WRITE_ORG;
2299     }
2300     reset_target_data = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
2301     flags &= ~PAGE_RESET;
2302 
2303     for (addr = start, len = end - start;
2304          len != 0;
2305          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2306         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2307 
2308         /* If the write protection bit is set, then we invalidate
2309            the code inside.  */
2310         if (!(p->flags & PAGE_WRITE) &&
2311             (flags & PAGE_WRITE) &&
2312             p->first_tb) {
2313             tb_invalidate_phys_page(addr, 0);
2314         }
2315         if (reset_target_data) {
2316             g_free(p->target_data);
2317             p->target_data = NULL;
2318             p->flags = flags;
2319         } else {
2320             /* Using mprotect on a page does not change MAP_ANON. */
2321             p->flags = (p->flags & PAGE_ANON) | flags;
2322         }
2323     }
2324 }
2325 
2326 void *page_get_target_data(target_ulong address)
2327 {
2328     PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2329     return p ? p->target_data : NULL;
2330 }
2331 
2332 void *page_alloc_target_data(target_ulong address, size_t size)
2333 {
2334     PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2335     void *ret = NULL;
2336 
2337     if (p->flags & PAGE_VALID) {
2338         ret = p->target_data;
2339         if (!ret) {
2340             p->target_data = ret = g_malloc0(size);
2341         }
2342     }
2343     return ret;
2344 }
2345 
2346 int page_check_range(target_ulong start, target_ulong len, int flags)
2347 {
2348     PageDesc *p;
2349     target_ulong end;
2350     target_ulong addr;
2351 
2352     /* This function should never be called with addresses outside the
2353        guest address space.  If this assert fires, it probably indicates
2354        a missing call to h2g_valid.  */
2355     if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
2356         assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2357     }
2358 
2359     if (len == 0) {
2360         return 0;
2361     }
2362     if (start + len - 1 < start) {
2363         /* We've wrapped around.  */
2364         return -1;
2365     }
2366 
2367     /* must do before we loose bits in the next step */
2368     end = TARGET_PAGE_ALIGN(start + len);
2369     start = start & TARGET_PAGE_MASK;
2370 
2371     for (addr = start, len = end - start;
2372          len != 0;
2373          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2374         p = page_find(addr >> TARGET_PAGE_BITS);
2375         if (!p) {
2376             return -1;
2377         }
2378         if (!(p->flags & PAGE_VALID)) {
2379             return -1;
2380         }
2381 
2382         if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
2383             return -1;
2384         }
2385         if (flags & PAGE_WRITE) {
2386             if (!(p->flags & PAGE_WRITE_ORG)) {
2387                 return -1;
2388             }
2389             /* unprotect the page if it was put read-only because it
2390                contains translated code */
2391             if (!(p->flags & PAGE_WRITE)) {
2392                 if (!page_unprotect(addr, 0)) {
2393                     return -1;
2394                 }
2395             }
2396         }
2397     }
2398     return 0;
2399 }
2400 
2401 /* called from signal handler: invalidate the code and unprotect the
2402  * page. Return 0 if the fault was not handled, 1 if it was handled,
2403  * and 2 if it was handled but the caller must cause the TB to be
2404  * immediately exited. (We can only return 2 if the 'pc' argument is
2405  * non-zero.)
2406  */
2407 int page_unprotect(target_ulong address, uintptr_t pc)
2408 {
2409     unsigned int prot;
2410     bool current_tb_invalidated;
2411     PageDesc *p;
2412     target_ulong host_start, host_end, addr;
2413 
2414     /* Technically this isn't safe inside a signal handler.  However we
2415        know this only ever happens in a synchronous SEGV handler, so in
2416        practice it seems to be ok.  */
2417     mmap_lock();
2418 
2419     p = page_find(address >> TARGET_PAGE_BITS);
2420     if (!p) {
2421         mmap_unlock();
2422         return 0;
2423     }
2424 
2425     /* if the page was really writable, then we change its
2426        protection back to writable */
2427     if (p->flags & PAGE_WRITE_ORG) {
2428         current_tb_invalidated = false;
2429         if (p->flags & PAGE_WRITE) {
2430             /* If the page is actually marked WRITE then assume this is because
2431              * this thread raced with another one which got here first and
2432              * set the page to PAGE_WRITE and did the TB invalidate for us.
2433              */
2434 #ifdef TARGET_HAS_PRECISE_SMC
2435             TranslationBlock *current_tb = tcg_tb_lookup(pc);
2436             if (current_tb) {
2437                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
2438             }
2439 #endif
2440         } else {
2441             host_start = address & qemu_host_page_mask;
2442             host_end = host_start + qemu_host_page_size;
2443 
2444             prot = 0;
2445             for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
2446                 p = page_find(addr >> TARGET_PAGE_BITS);
2447                 p->flags |= PAGE_WRITE;
2448                 prot |= p->flags;
2449 
2450                 /* and since the content will be modified, we must invalidate
2451                    the corresponding translated code. */
2452                 current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
2453 #ifdef CONFIG_USER_ONLY
2454                 if (DEBUG_TB_CHECK_GATE) {
2455                     tb_invalidate_check(addr);
2456                 }
2457 #endif
2458             }
2459             mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
2460                      prot & PAGE_BITS);
2461         }
2462         mmap_unlock();
2463         /* If current TB was invalidated return to main loop */
2464         return current_tb_invalidated ? 2 : 1;
2465     }
2466     mmap_unlock();
2467     return 0;
2468 }
2469 #endif /* CONFIG_USER_ONLY */
2470 
2471 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
2472 void tcg_flush_softmmu_tlb(CPUState *cs)
2473 {
2474 #ifdef CONFIG_SOFTMMU
2475     tlb_flush(cs);
2476 #endif
2477 }
2478