xref: /openbmc/qemu/accel/tcg/translate-all.c (revision 306c8721)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 
22 #define NO_CPU_IO_DEFS
23 #include "trace.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg.h"
27 #if defined(CONFIG_USER_ONLY)
28 #include "qemu.h"
29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
30 #include <sys/param.h>
31 #if __FreeBSD_version >= 700104
32 #define HAVE_KINFO_GETVMMAP
33 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
34 #include <sys/proc.h>
35 #include <machine/profile.h>
36 #define _KERNEL
37 #include <sys/user.h>
38 #undef _KERNEL
39 #undef sigqueue
40 #include <libutil.h>
41 #endif
42 #endif
43 #else
44 #include "exec/ram_addr.h"
45 #endif
46 
47 #include "exec/cputlb.h"
48 #include "exec/translate-all.h"
49 #include "exec/translator.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/qemu-print.h"
52 #include "qemu/timer.h"
53 #include "qemu/main-loop.h"
54 #include "qemu/cacheinfo.h"
55 #include "exec/log.h"
56 #include "sysemu/cpus.h"
57 #include "sysemu/cpu-timers.h"
58 #include "sysemu/tcg.h"
59 #include "qapi/error.h"
60 #include "hw/core/tcg-cpu-ops.h"
61 #include "tb-hash.h"
62 #include "tb-context.h"
63 #include "internal.h"
64 
65 /* #define DEBUG_TB_INVALIDATE */
66 /* #define DEBUG_TB_FLUSH */
67 /* make various TB consistency checks */
68 /* #define DEBUG_TB_CHECK */
69 
70 #ifdef DEBUG_TB_INVALIDATE
71 #define DEBUG_TB_INVALIDATE_GATE 1
72 #else
73 #define DEBUG_TB_INVALIDATE_GATE 0
74 #endif
75 
76 #ifdef DEBUG_TB_FLUSH
77 #define DEBUG_TB_FLUSH_GATE 1
78 #else
79 #define DEBUG_TB_FLUSH_GATE 0
80 #endif
81 
82 #if !defined(CONFIG_USER_ONLY)
83 /* TB consistency checks only implemented for usermode emulation.  */
84 #undef DEBUG_TB_CHECK
85 #endif
86 
87 #ifdef DEBUG_TB_CHECK
88 #define DEBUG_TB_CHECK_GATE 1
89 #else
90 #define DEBUG_TB_CHECK_GATE 0
91 #endif
92 
93 /* Access to the various translations structures need to be serialised via locks
94  * for consistency.
95  * In user-mode emulation access to the memory related structures are protected
96  * with mmap_lock.
97  * In !user-mode we use per-page locks.
98  */
99 #ifdef CONFIG_SOFTMMU
100 #define assert_memory_lock()
101 #else
102 #define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
103 #endif
104 
105 #define SMC_BITMAP_USE_THRESHOLD 10
106 
107 typedef struct PageDesc {
108     /* list of TBs intersecting this ram page */
109     uintptr_t first_tb;
110 #ifdef CONFIG_SOFTMMU
111     /* in order to optimize self modifying code, we count the number
112        of lookups we do to a given page to use a bitmap */
113     unsigned long *code_bitmap;
114     unsigned int code_write_count;
115 #else
116     unsigned long flags;
117     void *target_data;
118 #endif
119 #ifndef CONFIG_USER_ONLY
120     QemuSpin lock;
121 #endif
122 } PageDesc;
123 
124 /**
125  * struct page_entry - page descriptor entry
126  * @pd:     pointer to the &struct PageDesc of the page this entry represents
127  * @index:  page index of the page
128  * @locked: whether the page is locked
129  *
130  * This struct helps us keep track of the locked state of a page, without
131  * bloating &struct PageDesc.
132  *
133  * A page lock protects accesses to all fields of &struct PageDesc.
134  *
135  * See also: &struct page_collection.
136  */
137 struct page_entry {
138     PageDesc *pd;
139     tb_page_addr_t index;
140     bool locked;
141 };
142 
143 /**
144  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
145  * @tree:   Binary search tree (BST) of the pages, with key == page index
146  * @max:    Pointer to the page in @tree with the highest page index
147  *
148  * To avoid deadlock we lock pages in ascending order of page index.
149  * When operating on a set of pages, we need to keep track of them so that
150  * we can lock them in order and also unlock them later. For this we collect
151  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
152  * @tree implementation we use does not provide an O(1) operation to obtain the
153  * highest-ranked element, we use @max to keep track of the inserted page
154  * with the highest index. This is valuable because if a page is not in
155  * the tree and its index is higher than @max's, then we can lock it
156  * without breaking the locking order rule.
157  *
158  * Note on naming: 'struct page_set' would be shorter, but we already have a few
159  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
160  *
161  * See also: page_collection_lock().
162  */
163 struct page_collection {
164     GTree *tree;
165     struct page_entry *max;
166 };
167 
168 /* list iterators for lists of tagged pointers in TranslationBlock */
169 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
170     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
171          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
172              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
173 
174 #define PAGE_FOR_EACH_TB(pagedesc, tb, n)                       \
175     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
176 
177 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
178     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
179 
180 /*
181  * In system mode we want L1_MAP to be based on ram offsets,
182  * while in user mode we want it to be based on virtual addresses.
183  *
184  * TODO: For user mode, see the caveat re host vs guest virtual
185  * address spaces near GUEST_ADDR_MAX.
186  */
187 #if !defined(CONFIG_USER_ONLY)
188 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
189 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
190 #else
191 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
192 #endif
193 #else
194 # define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
195 #endif
196 
197 /* Size of the L2 (and L3, etc) page tables.  */
198 #define V_L2_BITS 10
199 #define V_L2_SIZE (1 << V_L2_BITS)
200 
201 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
202 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
203                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
204                   * BITS_PER_BYTE);
205 
206 /*
207  * L1 Mapping properties
208  */
209 static int v_l1_size;
210 static int v_l1_shift;
211 static int v_l2_levels;
212 
213 /* The bottom level has pointers to PageDesc, and is indexed by
214  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
215  */
216 #define V_L1_MIN_BITS 4
217 #define V_L1_MAX_BITS (V_L2_BITS + 3)
218 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
219 
220 static void *l1_map[V_L1_MAX_SIZE];
221 
222 TBContext tb_ctx;
223 
224 static void page_table_config_init(void)
225 {
226     uint32_t v_l1_bits;
227 
228     assert(TARGET_PAGE_BITS);
229     /* The bits remaining after N lower levels of page tables.  */
230     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
231     if (v_l1_bits < V_L1_MIN_BITS) {
232         v_l1_bits += V_L2_BITS;
233     }
234 
235     v_l1_size = 1 << v_l1_bits;
236     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
237     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
238 
239     assert(v_l1_bits <= V_L1_MAX_BITS);
240     assert(v_l1_shift % V_L2_BITS == 0);
241     assert(v_l2_levels >= 0);
242 }
243 
244 /* Encode VAL as a signed leb128 sequence at P.
245    Return P incremented past the encoded value.  */
246 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
247 {
248     int more, byte;
249 
250     do {
251         byte = val & 0x7f;
252         val >>= 7;
253         more = !((val == 0 && (byte & 0x40) == 0)
254                  || (val == -1 && (byte & 0x40) != 0));
255         if (more) {
256             byte |= 0x80;
257         }
258         *p++ = byte;
259     } while (more);
260 
261     return p;
262 }
263 
264 /* Decode a signed leb128 sequence at *PP; increment *PP past the
265    decoded value.  Return the decoded value.  */
266 static target_long decode_sleb128(const uint8_t **pp)
267 {
268     const uint8_t *p = *pp;
269     target_long val = 0;
270     int byte, shift = 0;
271 
272     do {
273         byte = *p++;
274         val |= (target_ulong)(byte & 0x7f) << shift;
275         shift += 7;
276     } while (byte & 0x80);
277     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
278         val |= -(target_ulong)1 << shift;
279     }
280 
281     *pp = p;
282     return val;
283 }
284 
285 /* Encode the data collected about the instructions while compiling TB.
286    Place the data at BLOCK, and return the number of bytes consumed.
287 
288    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
289    which come from the target's insn_start data, followed by a uintptr_t
290    which comes from the host pc of the end of the code implementing the insn.
291 
292    Each line of the table is encoded as sleb128 deltas from the previous
293    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
294    That is, the first column is seeded with the guest pc, the last column
295    with the host pc, and the middle columns with zeros.  */
296 
297 static int encode_search(TranslationBlock *tb, uint8_t *block)
298 {
299     uint8_t *highwater = tcg_ctx->code_gen_highwater;
300     uint8_t *p = block;
301     int i, j, n;
302 
303     for (i = 0, n = tb->icount; i < n; ++i) {
304         target_ulong prev;
305 
306         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
307             if (i == 0) {
308                 prev = (j == 0 ? tb->pc : 0);
309             } else {
310                 prev = tcg_ctx->gen_insn_data[i - 1][j];
311             }
312             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
313         }
314         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
315         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
316 
317         /* Test for (pending) buffer overflow.  The assumption is that any
318            one row beginning below the high water mark cannot overrun
319            the buffer completely.  Thus we can test for overflow after
320            encoding a row without having to check during encoding.  */
321         if (unlikely(p > highwater)) {
322             return -1;
323         }
324     }
325 
326     return p - block;
327 }
328 
329 /* The cpu state corresponding to 'searched_pc' is restored.
330  * When reset_icount is true, current TB will be interrupted and
331  * icount should be recalculated.
332  */
333 static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
334                                      uintptr_t searched_pc, bool reset_icount)
335 {
336     target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
337     uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
338     CPUArchState *env = cpu->env_ptr;
339     const uint8_t *p = tb->tc.ptr + tb->tc.size;
340     int i, j, num_insns = tb->icount;
341 #ifdef CONFIG_PROFILER
342     TCGProfile *prof = &tcg_ctx->prof;
343     int64_t ti = profile_getclock();
344 #endif
345 
346     searched_pc -= GETPC_ADJ;
347 
348     if (searched_pc < host_pc) {
349         return -1;
350     }
351 
352     /* Reconstruct the stored insn data while looking for the point at
353        which the end of the insn exceeds the searched_pc.  */
354     for (i = 0; i < num_insns; ++i) {
355         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
356             data[j] += decode_sleb128(&p);
357         }
358         host_pc += decode_sleb128(&p);
359         if (host_pc > searched_pc) {
360             goto found;
361         }
362     }
363     return -1;
364 
365  found:
366     if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
367         assert(icount_enabled());
368         /* Reset the cycle counter to the start of the block
369            and shift if to the number of actually executed instructions */
370         cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
371     }
372     restore_state_to_opc(env, tb, data);
373 
374 #ifdef CONFIG_PROFILER
375     qatomic_set(&prof->restore_time,
376                 prof->restore_time + profile_getclock() - ti);
377     qatomic_set(&prof->restore_count, prof->restore_count + 1);
378 #endif
379     return 0;
380 }
381 
382 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
383 {
384     /*
385      * The host_pc has to be in the rx region of the code buffer.
386      * If it is not we will not be able to resolve it here.
387      * The two cases where host_pc will not be correct are:
388      *
389      *  - fault during translation (instruction fetch)
390      *  - fault from helper (not using GETPC() macro)
391      *
392      * Either way we need return early as we can't resolve it here.
393      */
394     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
395         TranslationBlock *tb = tcg_tb_lookup(host_pc);
396         if (tb) {
397             cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
398             return true;
399         }
400     }
401     return false;
402 }
403 
404 void page_init(void)
405 {
406     page_size_init();
407     page_table_config_init();
408 
409 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
410     {
411 #ifdef HAVE_KINFO_GETVMMAP
412         struct kinfo_vmentry *freep;
413         int i, cnt;
414 
415         freep = kinfo_getvmmap(getpid(), &cnt);
416         if (freep) {
417             mmap_lock();
418             for (i = 0; i < cnt; i++) {
419                 unsigned long startaddr, endaddr;
420 
421                 startaddr = freep[i].kve_start;
422                 endaddr = freep[i].kve_end;
423                 if (h2g_valid(startaddr)) {
424                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
425 
426                     if (h2g_valid(endaddr)) {
427                         endaddr = h2g(endaddr);
428                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
429                     } else {
430 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
431                         endaddr = ~0ul;
432                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
433 #endif
434                     }
435                 }
436             }
437             free(freep);
438             mmap_unlock();
439         }
440 #else
441         FILE *f;
442 
443         last_brk = (unsigned long)sbrk(0);
444 
445         f = fopen("/compat/linux/proc/self/maps", "r");
446         if (f) {
447             mmap_lock();
448 
449             do {
450                 unsigned long startaddr, endaddr;
451                 int n;
452 
453                 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
454 
455                 if (n == 2 && h2g_valid(startaddr)) {
456                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
457 
458                     if (h2g_valid(endaddr)) {
459                         endaddr = h2g(endaddr);
460                     } else {
461                         endaddr = ~0ul;
462                     }
463                     page_set_flags(startaddr, endaddr, PAGE_RESERVED);
464                 }
465             } while (!feof(f));
466 
467             fclose(f);
468             mmap_unlock();
469         }
470 #endif
471     }
472 #endif
473 }
474 
475 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
476 {
477     PageDesc *pd;
478     void **lp;
479     int i;
480 
481     /* Level 1.  Always allocated.  */
482     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
483 
484     /* Level 2..N-1.  */
485     for (i = v_l2_levels; i > 0; i--) {
486         void **p = qatomic_rcu_read(lp);
487 
488         if (p == NULL) {
489             void *existing;
490 
491             if (!alloc) {
492                 return NULL;
493             }
494             p = g_new0(void *, V_L2_SIZE);
495             existing = qatomic_cmpxchg(lp, NULL, p);
496             if (unlikely(existing)) {
497                 g_free(p);
498                 p = existing;
499             }
500         }
501 
502         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
503     }
504 
505     pd = qatomic_rcu_read(lp);
506     if (pd == NULL) {
507         void *existing;
508 
509         if (!alloc) {
510             return NULL;
511         }
512         pd = g_new0(PageDesc, V_L2_SIZE);
513 #ifndef CONFIG_USER_ONLY
514         {
515             int i;
516 
517             for (i = 0; i < V_L2_SIZE; i++) {
518                 qemu_spin_init(&pd[i].lock);
519             }
520         }
521 #endif
522         existing = qatomic_cmpxchg(lp, NULL, pd);
523         if (unlikely(existing)) {
524 #ifndef CONFIG_USER_ONLY
525             {
526                 int i;
527 
528                 for (i = 0; i < V_L2_SIZE; i++) {
529                     qemu_spin_destroy(&pd[i].lock);
530                 }
531             }
532 #endif
533             g_free(pd);
534             pd = existing;
535         }
536     }
537 
538     return pd + (index & (V_L2_SIZE - 1));
539 }
540 
541 static inline PageDesc *page_find(tb_page_addr_t index)
542 {
543     return page_find_alloc(index, 0);
544 }
545 
546 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
547                            PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
548 
549 /* In user-mode page locks aren't used; mmap_lock is enough */
550 #ifdef CONFIG_USER_ONLY
551 
552 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
553 
554 static inline void page_lock(PageDesc *pd)
555 { }
556 
557 static inline void page_unlock(PageDesc *pd)
558 { }
559 
560 static inline void page_lock_tb(const TranslationBlock *tb)
561 { }
562 
563 static inline void page_unlock_tb(const TranslationBlock *tb)
564 { }
565 
566 struct page_collection *
567 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
568 {
569     return NULL;
570 }
571 
572 void page_collection_unlock(struct page_collection *set)
573 { }
574 #else /* !CONFIG_USER_ONLY */
575 
576 #ifdef CONFIG_DEBUG_TCG
577 
578 static __thread GHashTable *ht_pages_locked_debug;
579 
580 static void ht_pages_locked_debug_init(void)
581 {
582     if (ht_pages_locked_debug) {
583         return;
584     }
585     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
586 }
587 
588 static bool page_is_locked(const PageDesc *pd)
589 {
590     PageDesc *found;
591 
592     ht_pages_locked_debug_init();
593     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
594     return !!found;
595 }
596 
597 static void page_lock__debug(PageDesc *pd)
598 {
599     ht_pages_locked_debug_init();
600     g_assert(!page_is_locked(pd));
601     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
602 }
603 
604 static void page_unlock__debug(const PageDesc *pd)
605 {
606     bool removed;
607 
608     ht_pages_locked_debug_init();
609     g_assert(page_is_locked(pd));
610     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
611     g_assert(removed);
612 }
613 
614 static void
615 do_assert_page_locked(const PageDesc *pd, const char *file, int line)
616 {
617     if (unlikely(!page_is_locked(pd))) {
618         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
619                      pd, file, line);
620         abort();
621     }
622 }
623 
624 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
625 
626 void assert_no_pages_locked(void)
627 {
628     ht_pages_locked_debug_init();
629     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
630 }
631 
632 #else /* !CONFIG_DEBUG_TCG */
633 
634 #define assert_page_locked(pd)
635 
636 static inline void page_lock__debug(const PageDesc *pd)
637 {
638 }
639 
640 static inline void page_unlock__debug(const PageDesc *pd)
641 {
642 }
643 
644 #endif /* CONFIG_DEBUG_TCG */
645 
646 static inline void page_lock(PageDesc *pd)
647 {
648     page_lock__debug(pd);
649     qemu_spin_lock(&pd->lock);
650 }
651 
652 static inline void page_unlock(PageDesc *pd)
653 {
654     qemu_spin_unlock(&pd->lock);
655     page_unlock__debug(pd);
656 }
657 
658 /* lock the page(s) of a TB in the correct acquisition order */
659 static inline void page_lock_tb(const TranslationBlock *tb)
660 {
661     page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
662 }
663 
664 static inline void page_unlock_tb(const TranslationBlock *tb)
665 {
666     PageDesc *p1 = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
667 
668     page_unlock(p1);
669     if (unlikely(tb->page_addr[1] != -1)) {
670         PageDesc *p2 = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
671 
672         if (p2 != p1) {
673             page_unlock(p2);
674         }
675     }
676 }
677 
678 static inline struct page_entry *
679 page_entry_new(PageDesc *pd, tb_page_addr_t index)
680 {
681     struct page_entry *pe = g_malloc(sizeof(*pe));
682 
683     pe->index = index;
684     pe->pd = pd;
685     pe->locked = false;
686     return pe;
687 }
688 
689 static void page_entry_destroy(gpointer p)
690 {
691     struct page_entry *pe = p;
692 
693     g_assert(pe->locked);
694     page_unlock(pe->pd);
695     g_free(pe);
696 }
697 
698 /* returns false on success */
699 static bool page_entry_trylock(struct page_entry *pe)
700 {
701     bool busy;
702 
703     busy = qemu_spin_trylock(&pe->pd->lock);
704     if (!busy) {
705         g_assert(!pe->locked);
706         pe->locked = true;
707         page_lock__debug(pe->pd);
708     }
709     return busy;
710 }
711 
712 static void do_page_entry_lock(struct page_entry *pe)
713 {
714     page_lock(pe->pd);
715     g_assert(!pe->locked);
716     pe->locked = true;
717 }
718 
719 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
720 {
721     struct page_entry *pe = value;
722 
723     do_page_entry_lock(pe);
724     return FALSE;
725 }
726 
727 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
728 {
729     struct page_entry *pe = value;
730 
731     if (pe->locked) {
732         pe->locked = false;
733         page_unlock(pe->pd);
734     }
735     return FALSE;
736 }
737 
738 /*
739  * Trylock a page, and if successful, add the page to a collection.
740  * Returns true ("busy") if the page could not be locked; false otherwise.
741  */
742 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
743 {
744     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
745     struct page_entry *pe;
746     PageDesc *pd;
747 
748     pe = g_tree_lookup(set->tree, &index);
749     if (pe) {
750         return false;
751     }
752 
753     pd = page_find(index);
754     if (pd == NULL) {
755         return false;
756     }
757 
758     pe = page_entry_new(pd, index);
759     g_tree_insert(set->tree, &pe->index, pe);
760 
761     /*
762      * If this is either (1) the first insertion or (2) a page whose index
763      * is higher than any other so far, just lock the page and move on.
764      */
765     if (set->max == NULL || pe->index > set->max->index) {
766         set->max = pe;
767         do_page_entry_lock(pe);
768         return false;
769     }
770     /*
771      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
772      * locks in order.
773      */
774     return page_entry_trylock(pe);
775 }
776 
777 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
778 {
779     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
780     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
781 
782     if (a == b) {
783         return 0;
784     } else if (a < b) {
785         return -1;
786     }
787     return 1;
788 }
789 
790 /*
791  * Lock a range of pages ([@start,@end[) as well as the pages of all
792  * intersecting TBs.
793  * Locking order: acquire locks in ascending order of page index.
794  */
795 struct page_collection *
796 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
797 {
798     struct page_collection *set = g_malloc(sizeof(*set));
799     tb_page_addr_t index;
800     PageDesc *pd;
801 
802     start >>= TARGET_PAGE_BITS;
803     end   >>= TARGET_PAGE_BITS;
804     g_assert(start <= end);
805 
806     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
807                                 page_entry_destroy);
808     set->max = NULL;
809     assert_no_pages_locked();
810 
811  retry:
812     g_tree_foreach(set->tree, page_entry_lock, NULL);
813 
814     for (index = start; index <= end; index++) {
815         TranslationBlock *tb;
816         int n;
817 
818         pd = page_find(index);
819         if (pd == NULL) {
820             continue;
821         }
822         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
823             g_tree_foreach(set->tree, page_entry_unlock, NULL);
824             goto retry;
825         }
826         assert_page_locked(pd);
827         PAGE_FOR_EACH_TB(pd, tb, n) {
828             if (page_trylock_add(set, tb->page_addr[0]) ||
829                 (tb->page_addr[1] != -1 &&
830                  page_trylock_add(set, tb->page_addr[1]))) {
831                 /* drop all locks, and reacquire in order */
832                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
833                 goto retry;
834             }
835         }
836     }
837     return set;
838 }
839 
840 void page_collection_unlock(struct page_collection *set)
841 {
842     /* entries are unlocked and freed via page_entry_destroy */
843     g_tree_destroy(set->tree);
844     g_free(set);
845 }
846 
847 #endif /* !CONFIG_USER_ONLY */
848 
849 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
850                            PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
851 {
852     PageDesc *p1, *p2;
853     tb_page_addr_t page1;
854     tb_page_addr_t page2;
855 
856     assert_memory_lock();
857     g_assert(phys1 != -1);
858 
859     page1 = phys1 >> TARGET_PAGE_BITS;
860     page2 = phys2 >> TARGET_PAGE_BITS;
861 
862     p1 = page_find_alloc(page1, alloc);
863     if (ret_p1) {
864         *ret_p1 = p1;
865     }
866     if (likely(phys2 == -1)) {
867         page_lock(p1);
868         return;
869     } else if (page1 == page2) {
870         page_lock(p1);
871         if (ret_p2) {
872             *ret_p2 = p1;
873         }
874         return;
875     }
876     p2 = page_find_alloc(page2, alloc);
877     if (ret_p2) {
878         *ret_p2 = p2;
879     }
880     if (page1 < page2) {
881         page_lock(p1);
882         page_lock(p2);
883     } else {
884         page_lock(p2);
885         page_lock(p1);
886     }
887 }
888 
889 static bool tb_cmp(const void *ap, const void *bp)
890 {
891     const TranslationBlock *a = ap;
892     const TranslationBlock *b = bp;
893 
894     return a->pc == b->pc &&
895         a->cs_base == b->cs_base &&
896         a->flags == b->flags &&
897         (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
898         a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
899         a->page_addr[0] == b->page_addr[0] &&
900         a->page_addr[1] == b->page_addr[1];
901 }
902 
903 void tb_htable_init(void)
904 {
905     unsigned int mode = QHT_MODE_AUTO_RESIZE;
906 
907     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
908 }
909 
910 /* call with @p->lock held */
911 static inline void invalidate_page_bitmap(PageDesc *p)
912 {
913     assert_page_locked(p);
914 #ifdef CONFIG_SOFTMMU
915     g_free(p->code_bitmap);
916     p->code_bitmap = NULL;
917     p->code_write_count = 0;
918 #endif
919 }
920 
921 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
922 static void page_flush_tb_1(int level, void **lp)
923 {
924     int i;
925 
926     if (*lp == NULL) {
927         return;
928     }
929     if (level == 0) {
930         PageDesc *pd = *lp;
931 
932         for (i = 0; i < V_L2_SIZE; ++i) {
933             page_lock(&pd[i]);
934             pd[i].first_tb = (uintptr_t)NULL;
935             invalidate_page_bitmap(pd + i);
936             page_unlock(&pd[i]);
937         }
938     } else {
939         void **pp = *lp;
940 
941         for (i = 0; i < V_L2_SIZE; ++i) {
942             page_flush_tb_1(level - 1, pp + i);
943         }
944     }
945 }
946 
947 static void page_flush_tb(void)
948 {
949     int i, l1_sz = v_l1_size;
950 
951     for (i = 0; i < l1_sz; i++) {
952         page_flush_tb_1(v_l2_levels, l1_map + i);
953     }
954 }
955 
956 static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
957 {
958     const TranslationBlock *tb = value;
959     size_t *size = data;
960 
961     *size += tb->tc.size;
962     return false;
963 }
964 
965 /* flush all the translation blocks */
966 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
967 {
968     bool did_flush = false;
969 
970     mmap_lock();
971     /* If it is already been done on request of another CPU,
972      * just retry.
973      */
974     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
975         goto done;
976     }
977     did_flush = true;
978 
979     if (DEBUG_TB_FLUSH_GATE) {
980         size_t nb_tbs = tcg_nb_tbs();
981         size_t host_size = 0;
982 
983         tcg_tb_foreach(tb_host_size_iter, &host_size);
984         printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
985                tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
986     }
987 
988     CPU_FOREACH(cpu) {
989         cpu_tb_jmp_cache_clear(cpu);
990     }
991 
992     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
993     page_flush_tb();
994 
995     tcg_region_reset_all();
996     /* XXX: flush processor icache at this point if cache flush is
997        expensive */
998     qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
999 
1000 done:
1001     mmap_unlock();
1002     if (did_flush) {
1003         qemu_plugin_flush_cb();
1004     }
1005 }
1006 
1007 void tb_flush(CPUState *cpu)
1008 {
1009     if (tcg_enabled()) {
1010         unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);
1011 
1012         if (cpu_in_exclusive_context(cpu)) {
1013             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
1014         } else {
1015             async_safe_run_on_cpu(cpu, do_tb_flush,
1016                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
1017         }
1018     }
1019 }
1020 
1021 /*
1022  * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
1023  * so in order to prevent bit rot we compile them unconditionally in user-mode,
1024  * and let the optimizer get rid of them by wrapping their user-only callers
1025  * with if (DEBUG_TB_CHECK_GATE).
1026  */
1027 #ifdef CONFIG_USER_ONLY
1028 
1029 static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
1030 {
1031     TranslationBlock *tb = p;
1032     target_ulong addr = *(target_ulong *)userp;
1033 
1034     if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
1035         printf("ERROR invalidate: address=" TARGET_FMT_lx
1036                " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
1037     }
1038 }
1039 
1040 /* verify that all the pages have correct rights for code
1041  *
1042  * Called with mmap_lock held.
1043  */
1044 static void tb_invalidate_check(target_ulong address)
1045 {
1046     address &= TARGET_PAGE_MASK;
1047     qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address);
1048 }
1049 
1050 static void do_tb_page_check(void *p, uint32_t hash, void *userp)
1051 {
1052     TranslationBlock *tb = p;
1053     int flags1, flags2;
1054 
1055     flags1 = page_get_flags(tb->pc);
1056     flags2 = page_get_flags(tb->pc + tb->size - 1);
1057     if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
1058         printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
1059                (long)tb->pc, tb->size, flags1, flags2);
1060     }
1061 }
1062 
1063 /* verify that all the pages have correct rights for code */
1064 static void tb_page_check(void)
1065 {
1066     qht_iter(&tb_ctx.htable, do_tb_page_check, NULL);
1067 }
1068 
1069 #endif /* CONFIG_USER_ONLY */
1070 
1071 /*
1072  * user-mode: call with mmap_lock held
1073  * !user-mode: call with @pd->lock held
1074  */
1075 static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
1076 {
1077     TranslationBlock *tb1;
1078     uintptr_t *pprev;
1079     unsigned int n1;
1080 
1081     assert_page_locked(pd);
1082     pprev = &pd->first_tb;
1083     PAGE_FOR_EACH_TB(pd, tb1, n1) {
1084         if (tb1 == tb) {
1085             *pprev = tb1->page_next[n1];
1086             return;
1087         }
1088         pprev = &tb1->page_next[n1];
1089     }
1090     g_assert_not_reached();
1091 }
1092 
1093 /* remove @orig from its @n_orig-th jump list */
1094 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
1095 {
1096     uintptr_t ptr, ptr_locked;
1097     TranslationBlock *dest;
1098     TranslationBlock *tb;
1099     uintptr_t *pprev;
1100     int n;
1101 
1102     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
1103     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
1104     dest = (TranslationBlock *)(ptr & ~1);
1105     if (dest == NULL) {
1106         return;
1107     }
1108 
1109     qemu_spin_lock(&dest->jmp_lock);
1110     /*
1111      * While acquiring the lock, the jump might have been removed if the
1112      * destination TB was invalidated; check again.
1113      */
1114     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
1115     if (ptr_locked != ptr) {
1116         qemu_spin_unlock(&dest->jmp_lock);
1117         /*
1118          * The only possibility is that the jump was unlinked via
1119          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
1120          * because we set the LSB above.
1121          */
1122         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
1123         return;
1124     }
1125     /*
1126      * We first acquired the lock, and since the destination pointer matches,
1127      * we know for sure that @orig is in the jmp list.
1128      */
1129     pprev = &dest->jmp_list_head;
1130     TB_FOR_EACH_JMP(dest, tb, n) {
1131         if (tb == orig && n == n_orig) {
1132             *pprev = tb->jmp_list_next[n];
1133             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
1134             qemu_spin_unlock(&dest->jmp_lock);
1135             return;
1136         }
1137         pprev = &tb->jmp_list_next[n];
1138     }
1139     g_assert_not_reached();
1140 }
1141 
1142 /* reset the jump entry 'n' of a TB so that it is not chained to
1143    another TB */
1144 static inline void tb_reset_jump(TranslationBlock *tb, int n)
1145 {
1146     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
1147     tb_set_jmp_target(tb, n, addr);
1148 }
1149 
1150 /* remove any jumps to the TB */
1151 static inline void tb_jmp_unlink(TranslationBlock *dest)
1152 {
1153     TranslationBlock *tb;
1154     int n;
1155 
1156     qemu_spin_lock(&dest->jmp_lock);
1157 
1158     TB_FOR_EACH_JMP(dest, tb, n) {
1159         tb_reset_jump(tb, n);
1160         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
1161         /* No need to clear the list entry; setting the dest ptr is enough */
1162     }
1163     dest->jmp_list_head = (uintptr_t)NULL;
1164 
1165     qemu_spin_unlock(&dest->jmp_lock);
1166 }
1167 
1168 /*
1169  * In user-mode, call with mmap_lock held.
1170  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
1171  * locks held.
1172  */
1173 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
1174 {
1175     CPUState *cpu;
1176     PageDesc *p;
1177     uint32_t h;
1178     tb_page_addr_t phys_pc;
1179     uint32_t orig_cflags = tb_cflags(tb);
1180 
1181     assert_memory_lock();
1182 
1183     /* make sure no further incoming jumps will be chained to this TB */
1184     qemu_spin_lock(&tb->jmp_lock);
1185     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
1186     qemu_spin_unlock(&tb->jmp_lock);
1187 
1188     /* remove the TB from the hash list */
1189     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1190     h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
1191                      tb->trace_vcpu_dstate);
1192     if (!qht_remove(&tb_ctx.htable, tb, h)) {
1193         return;
1194     }
1195 
1196     /* remove the TB from the page list */
1197     if (rm_from_page_list) {
1198         p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
1199         tb_page_remove(p, tb);
1200         invalidate_page_bitmap(p);
1201         if (tb->page_addr[1] != -1) {
1202             p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
1203             tb_page_remove(p, tb);
1204             invalidate_page_bitmap(p);
1205         }
1206     }
1207 
1208     /* remove the TB from the hash list */
1209     h = tb_jmp_cache_hash_func(tb->pc);
1210     CPU_FOREACH(cpu) {
1211         if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) {
1212             qatomic_set(&cpu->tb_jmp_cache[h], NULL);
1213         }
1214     }
1215 
1216     /* suppress this TB from the two jump lists */
1217     tb_remove_from_jmp_list(tb, 0);
1218     tb_remove_from_jmp_list(tb, 1);
1219 
1220     /* suppress any remaining jumps to this TB */
1221     tb_jmp_unlink(tb);
1222 
1223     qatomic_set(&tb_ctx.tb_phys_invalidate_count,
1224                 tb_ctx.tb_phys_invalidate_count + 1);
1225 }
1226 
1227 static void tb_phys_invalidate__locked(TranslationBlock *tb)
1228 {
1229     qemu_thread_jit_write();
1230     do_tb_phys_invalidate(tb, true);
1231     qemu_thread_jit_execute();
1232 }
1233 
1234 /* invalidate one TB
1235  *
1236  * Called with mmap_lock held in user-mode.
1237  */
1238 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
1239 {
1240     if (page_addr == -1 && tb->page_addr[0] != -1) {
1241         page_lock_tb(tb);
1242         do_tb_phys_invalidate(tb, true);
1243         page_unlock_tb(tb);
1244     } else {
1245         do_tb_phys_invalidate(tb, false);
1246     }
1247 }
1248 
1249 #ifdef CONFIG_SOFTMMU
1250 /* call with @p->lock held */
1251 static void build_page_bitmap(PageDesc *p)
1252 {
1253     int n, tb_start, tb_end;
1254     TranslationBlock *tb;
1255 
1256     assert_page_locked(p);
1257     p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
1258 
1259     PAGE_FOR_EACH_TB(p, tb, n) {
1260         /* NOTE: this is subtle as a TB may span two physical pages */
1261         if (n == 0) {
1262             /* NOTE: tb_end may be after the end of the page, but
1263                it is not a problem */
1264             tb_start = tb->pc & ~TARGET_PAGE_MASK;
1265             tb_end = tb_start + tb->size;
1266             if (tb_end > TARGET_PAGE_SIZE) {
1267                 tb_end = TARGET_PAGE_SIZE;
1268              }
1269         } else {
1270             tb_start = 0;
1271             tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1272         }
1273         bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
1274     }
1275 }
1276 #endif
1277 
1278 /* add the tb in the target page and protect it if necessary
1279  *
1280  * Called with mmap_lock held for user-mode emulation.
1281  * Called with @p->lock held in !user-mode.
1282  */
1283 static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
1284                                unsigned int n, tb_page_addr_t page_addr)
1285 {
1286 #ifndef CONFIG_USER_ONLY
1287     bool page_already_protected;
1288 #endif
1289 
1290     assert_page_locked(p);
1291 
1292     tb->page_addr[n] = page_addr;
1293     tb->page_next[n] = p->first_tb;
1294 #ifndef CONFIG_USER_ONLY
1295     page_already_protected = p->first_tb != (uintptr_t)NULL;
1296 #endif
1297     p->first_tb = (uintptr_t)tb | n;
1298     invalidate_page_bitmap(p);
1299 
1300 #if defined(CONFIG_USER_ONLY)
1301     /* translator_loop() must have made all TB pages non-writable */
1302     assert(!(p->flags & PAGE_WRITE));
1303 #else
1304     /* if some code is already present, then the pages are already
1305        protected. So we handle the case where only the first TB is
1306        allocated in a physical page */
1307     if (!page_already_protected) {
1308         tlb_protect_code(page_addr);
1309     }
1310 #endif
1311 }
1312 
1313 /*
1314  * Add a new TB and link it to the physical page tables. phys_page2 is
1315  * (-1) to indicate that only one page contains the TB.
1316  *
1317  * Called with mmap_lock held for user-mode emulation.
1318  *
1319  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
1320  * Note that in !user-mode, another thread might have already added a TB
1321  * for the same block of guest code that @tb corresponds to. In that case,
1322  * the caller should discard the original @tb, and use instead the returned TB.
1323  */
1324 static TranslationBlock *
1325 tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1326              tb_page_addr_t phys_page2)
1327 {
1328     PageDesc *p;
1329     PageDesc *p2 = NULL;
1330     void *existing_tb = NULL;
1331     uint32_t h;
1332 
1333     assert_memory_lock();
1334     tcg_debug_assert(!(tb->cflags & CF_INVALID));
1335 
1336     /*
1337      * Add the TB to the page list, acquiring first the pages's locks.
1338      * We keep the locks held until after inserting the TB in the hash table,
1339      * so that if the insertion fails we know for sure that the TBs are still
1340      * in the page descriptors.
1341      * Note that inserting into the hash table first isn't an option, since
1342      * we can only insert TBs that are fully initialized.
1343      */
1344     page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
1345     tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
1346     if (p2) {
1347         tb_page_add(p2, tb, 1, phys_page2);
1348     } else {
1349         tb->page_addr[1] = -1;
1350     }
1351 
1352     /* add in the hash table */
1353     h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
1354                      tb->trace_vcpu_dstate);
1355     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
1356 
1357     /* remove TB from the page(s) if we couldn't insert it */
1358     if (unlikely(existing_tb)) {
1359         tb_page_remove(p, tb);
1360         invalidate_page_bitmap(p);
1361         if (p2) {
1362             tb_page_remove(p2, tb);
1363             invalidate_page_bitmap(p2);
1364         }
1365         tb = existing_tb;
1366     }
1367 
1368     if (p2 && p2 != p) {
1369         page_unlock(p2);
1370     }
1371     page_unlock(p);
1372 
1373 #ifdef CONFIG_USER_ONLY
1374     if (DEBUG_TB_CHECK_GATE) {
1375         tb_page_check();
1376     }
1377 #endif
1378     return tb;
1379 }
1380 
1381 /* Called with mmap_lock held for user mode emulation.  */
1382 TranslationBlock *tb_gen_code(CPUState *cpu,
1383                               target_ulong pc, target_ulong cs_base,
1384                               uint32_t flags, int cflags)
1385 {
1386     CPUArchState *env = cpu->env_ptr;
1387     TranslationBlock *tb, *existing_tb;
1388     tb_page_addr_t phys_pc, phys_page2;
1389     target_ulong virt_page2;
1390     tcg_insn_unit *gen_code_buf;
1391     int gen_code_size, search_size, max_insns;
1392 #ifdef CONFIG_PROFILER
1393     TCGProfile *prof = &tcg_ctx->prof;
1394     int64_t ti;
1395 #endif
1396     void *host_pc;
1397 
1398     assert_memory_lock();
1399     qemu_thread_jit_write();
1400 
1401     phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
1402 
1403     if (phys_pc == -1) {
1404         /* Generate a one-shot TB with 1 insn in it */
1405         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
1406     }
1407 
1408     max_insns = cflags & CF_COUNT_MASK;
1409     if (max_insns == 0) {
1410         max_insns = TCG_MAX_INSNS;
1411     }
1412     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
1413 
1414  buffer_overflow:
1415     tb = tcg_tb_alloc(tcg_ctx);
1416     if (unlikely(!tb)) {
1417         /* flush must be done */
1418         tb_flush(cpu);
1419         mmap_unlock();
1420         /* Make the execution loop process the flush as soon as possible.  */
1421         cpu->exception_index = EXCP_INTERRUPT;
1422         cpu_loop_exit(cpu);
1423     }
1424 
1425     gen_code_buf = tcg_ctx->code_gen_ptr;
1426     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
1427     tb->pc = pc;
1428     tb->cs_base = cs_base;
1429     tb->flags = flags;
1430     tb->cflags = cflags;
1431     tb->trace_vcpu_dstate = *cpu->trace_dstate;
1432     tcg_ctx->tb_cflags = cflags;
1433  tb_overflow:
1434 
1435 #ifdef CONFIG_PROFILER
1436     /* includes aborted translations because of exceptions */
1437     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
1438     ti = profile_getclock();
1439 #endif
1440 
1441     gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
1442     if (unlikely(gen_code_size != 0)) {
1443         goto error_return;
1444     }
1445 
1446     tcg_func_start(tcg_ctx);
1447 
1448     tcg_ctx->cpu = env_cpu(env);
1449     gen_intermediate_code(cpu, tb, max_insns, pc, host_pc);
1450     assert(tb->size != 0);
1451     tcg_ctx->cpu = NULL;
1452     max_insns = tb->icount;
1453 
1454     trace_translate_block(tb, tb->pc, tb->tc.ptr);
1455 
1456     /* generate machine code */
1457     tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
1458     tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
1459     tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
1460     if (TCG_TARGET_HAS_direct_jump) {
1461         tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
1462         tcg_ctx->tb_jmp_target_addr = NULL;
1463     } else {
1464         tcg_ctx->tb_jmp_insn_offset = NULL;
1465         tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
1466     }
1467 
1468 #ifdef CONFIG_PROFILER
1469     qatomic_set(&prof->tb_count, prof->tb_count + 1);
1470     qatomic_set(&prof->interm_time,
1471                 prof->interm_time + profile_getclock() - ti);
1472     ti = profile_getclock();
1473 #endif
1474 
1475     gen_code_size = tcg_gen_code(tcg_ctx, tb);
1476     if (unlikely(gen_code_size < 0)) {
1477  error_return:
1478         switch (gen_code_size) {
1479         case -1:
1480             /*
1481              * Overflow of code_gen_buffer, or the current slice of it.
1482              *
1483              * TODO: We don't need to re-do gen_intermediate_code, nor
1484              * should we re-do the tcg optimization currently hidden
1485              * inside tcg_gen_code.  All that should be required is to
1486              * flush the TBs, allocate a new TB, re-initialize it per
1487              * above, and re-do the actual code generation.
1488              */
1489             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1490                           "Restarting code generation for "
1491                           "code_gen_buffer overflow\n");
1492             goto buffer_overflow;
1493 
1494         case -2:
1495             /*
1496              * The code generated for the TranslationBlock is too large.
1497              * The maximum size allowed by the unwind info is 64k.
1498              * There may be stricter constraints from relocations
1499              * in the tcg backend.
1500              *
1501              * Try again with half as many insns as we attempted this time.
1502              * If a single insn overflows, there's a bug somewhere...
1503              */
1504             assert(max_insns > 1);
1505             max_insns /= 2;
1506             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1507                           "Restarting code generation with "
1508                           "smaller translation block (max %d insns)\n",
1509                           max_insns);
1510             goto tb_overflow;
1511 
1512         default:
1513             g_assert_not_reached();
1514         }
1515     }
1516     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
1517     if (unlikely(search_size < 0)) {
1518         goto buffer_overflow;
1519     }
1520     tb->tc.size = gen_code_size;
1521 
1522 #ifdef CONFIG_PROFILER
1523     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
1524     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
1525     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
1526     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
1527 #endif
1528 
1529 #ifdef DEBUG_DISAS
1530     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
1531         qemu_log_in_addr_range(tb->pc)) {
1532         FILE *logfile = qemu_log_trylock();
1533         if (logfile) {
1534             int code_size, data_size;
1535             const tcg_target_ulong *rx_data_gen_ptr;
1536             size_t chunk_start;
1537             int insn = 0;
1538 
1539             if (tcg_ctx->data_gen_ptr) {
1540                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
1541                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
1542                 data_size = gen_code_size - code_size;
1543             } else {
1544                 rx_data_gen_ptr = 0;
1545                 code_size = gen_code_size;
1546                 data_size = 0;
1547             }
1548 
1549             /* Dump header and the first instruction */
1550             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
1551             fprintf(logfile,
1552                     "  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
1553                     tcg_ctx->gen_insn_data[insn][0]);
1554             chunk_start = tcg_ctx->gen_insn_end_off[insn];
1555             disas(logfile, tb->tc.ptr, chunk_start);
1556 
1557             /*
1558              * Dump each instruction chunk, wrapping up empty chunks into
1559              * the next instruction. The whole array is offset so the
1560              * first entry is the beginning of the 2nd instruction.
1561              */
1562             while (insn < tb->icount) {
1563                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
1564                 if (chunk_end > chunk_start) {
1565                     fprintf(logfile, "  -- guest addr 0x" TARGET_FMT_lx "\n",
1566                             tcg_ctx->gen_insn_data[insn][0]);
1567                     disas(logfile, tb->tc.ptr + chunk_start,
1568                           chunk_end - chunk_start);
1569                     chunk_start = chunk_end;
1570                 }
1571                 insn++;
1572             }
1573 
1574             if (chunk_start < code_size) {
1575                 fprintf(logfile, "  -- tb slow paths + alignment\n");
1576                 disas(logfile, tb->tc.ptr + chunk_start,
1577                       code_size - chunk_start);
1578             }
1579 
1580             /* Finally dump any data we may have after the block */
1581             if (data_size) {
1582                 int i;
1583                 fprintf(logfile, "  data: [size=%d]\n", data_size);
1584                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
1585                     if (sizeof(tcg_target_ulong) == 8) {
1586                         fprintf(logfile,
1587                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
1588                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
1589                     } else if (sizeof(tcg_target_ulong) == 4) {
1590                         fprintf(logfile,
1591                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
1592                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
1593                     } else {
1594                         qemu_build_not_reached();
1595                     }
1596                 }
1597             }
1598             fprintf(logfile, "\n");
1599             qemu_log_unlock(logfile);
1600         }
1601     }
1602 #endif
1603 
1604     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
1605         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
1606                  CODE_GEN_ALIGN));
1607 
1608     /* init jump list */
1609     qemu_spin_init(&tb->jmp_lock);
1610     tb->jmp_list_head = (uintptr_t)NULL;
1611     tb->jmp_list_next[0] = (uintptr_t)NULL;
1612     tb->jmp_list_next[1] = (uintptr_t)NULL;
1613     tb->jmp_dest[0] = (uintptr_t)NULL;
1614     tb->jmp_dest[1] = (uintptr_t)NULL;
1615 
1616     /* init original jump addresses which have been set during tcg_gen_code() */
1617     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1618         tb_reset_jump(tb, 0);
1619     }
1620     if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1621         tb_reset_jump(tb, 1);
1622     }
1623 
1624     /*
1625      * If the TB is not associated with a physical RAM page then
1626      * it must be a temporary one-insn TB, and we have nothing to do
1627      * except fill in the page_addr[] fields. Return early before
1628      * attempting to link to other TBs or add to the lookup table.
1629      */
1630     if (phys_pc == -1) {
1631         tb->page_addr[0] = tb->page_addr[1] = -1;
1632         return tb;
1633     }
1634 
1635     /*
1636      * Insert TB into the corresponding region tree before publishing it
1637      * through QHT. Otherwise rewinding happened in the TB might fail to
1638      * lookup itself using host PC.
1639      */
1640     tcg_tb_insert(tb);
1641 
1642     /* check next page if needed */
1643     virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1644     phys_page2 = -1;
1645     if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1646         phys_page2 = get_page_addr_code(env, virt_page2);
1647     }
1648     /*
1649      * No explicit memory barrier is required -- tb_link_page() makes the
1650      * TB visible in a consistent state.
1651      */
1652     existing_tb = tb_link_page(tb, phys_pc, phys_page2);
1653     /* if the TB already exists, discard what we just translated */
1654     if (unlikely(existing_tb != tb)) {
1655         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
1656 
1657         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
1658         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
1659         tcg_tb_remove(tb);
1660         return existing_tb;
1661     }
1662     return tb;
1663 }
1664 
1665 /*
1666  * @p must be non-NULL.
1667  * user-mode: call with mmap_lock held.
1668  * !user-mode: call with all @pages locked.
1669  */
1670 static void
1671 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1672                                       PageDesc *p, tb_page_addr_t start,
1673                                       tb_page_addr_t end,
1674                                       uintptr_t retaddr)
1675 {
1676     TranslationBlock *tb;
1677     tb_page_addr_t tb_start, tb_end;
1678     int n;
1679 #ifdef TARGET_HAS_PRECISE_SMC
1680     CPUState *cpu = current_cpu;
1681     CPUArchState *env = NULL;
1682     bool current_tb_not_found = retaddr != 0;
1683     bool current_tb_modified = false;
1684     TranslationBlock *current_tb = NULL;
1685     target_ulong current_pc = 0;
1686     target_ulong current_cs_base = 0;
1687     uint32_t current_flags = 0;
1688 #endif /* TARGET_HAS_PRECISE_SMC */
1689 
1690     assert_page_locked(p);
1691 
1692 #if defined(TARGET_HAS_PRECISE_SMC)
1693     if (cpu != NULL) {
1694         env = cpu->env_ptr;
1695     }
1696 #endif
1697 
1698     /* we remove all the TBs in the range [start, end[ */
1699     /* XXX: see if in some cases it could be faster to invalidate all
1700        the code */
1701     PAGE_FOR_EACH_TB(p, tb, n) {
1702         assert_page_locked(p);
1703         /* NOTE: this is subtle as a TB may span two physical pages */
1704         if (n == 0) {
1705             /* NOTE: tb_end may be after the end of the page, but
1706                it is not a problem */
1707             tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1708             tb_end = tb_start + tb->size;
1709         } else {
1710             tb_start = tb->page_addr[1];
1711             tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1712         }
1713         if (!(tb_end <= start || tb_start >= end)) {
1714 #ifdef TARGET_HAS_PRECISE_SMC
1715             if (current_tb_not_found) {
1716                 current_tb_not_found = false;
1717                 /* now we have a real cpu fault */
1718                 current_tb = tcg_tb_lookup(retaddr);
1719             }
1720             if (current_tb == tb &&
1721                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1722                 /*
1723                  * If we are modifying the current TB, we must stop
1724                  * its execution. We could be more precise by checking
1725                  * that the modification is after the current PC, but it
1726                  * would require a specialized function to partially
1727                  * restore the CPU state.
1728                  */
1729                 current_tb_modified = true;
1730                 cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
1731                 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1732                                      &current_flags);
1733             }
1734 #endif /* TARGET_HAS_PRECISE_SMC */
1735             tb_phys_invalidate__locked(tb);
1736         }
1737     }
1738 #if !defined(CONFIG_USER_ONLY)
1739     /* if no code remaining, no need to continue to use slow writes */
1740     if (!p->first_tb) {
1741         invalidate_page_bitmap(p);
1742         tlb_unprotect_code(start);
1743     }
1744 #endif
1745 #ifdef TARGET_HAS_PRECISE_SMC
1746     if (current_tb_modified) {
1747         page_collection_unlock(pages);
1748         /* Force execution of one insn next time.  */
1749         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
1750         mmap_unlock();
1751         cpu_loop_exit_noexc(cpu);
1752     }
1753 #endif
1754 }
1755 
1756 /*
1757  * Invalidate all TBs which intersect with the target physical address range
1758  * [start;end[. NOTE: start and end must refer to the *same* physical page.
1759  * 'is_cpu_write_access' should be true if called from a real cpu write
1760  * access: the virtual CPU will exit the current TB if code is modified inside
1761  * this TB.
1762  *
1763  * Called with mmap_lock held for user-mode emulation
1764  */
1765 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end)
1766 {
1767     struct page_collection *pages;
1768     PageDesc *p;
1769 
1770     assert_memory_lock();
1771 
1772     p = page_find(start >> TARGET_PAGE_BITS);
1773     if (p == NULL) {
1774         return;
1775     }
1776     pages = page_collection_lock(start, end);
1777     tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
1778     page_collection_unlock(pages);
1779 }
1780 
1781 /*
1782  * Invalidate all TBs which intersect with the target physical address range
1783  * [start;end[. NOTE: start and end may refer to *different* physical pages.
1784  * 'is_cpu_write_access' should be true if called from a real cpu write
1785  * access: the virtual CPU will exit the current TB if code is modified inside
1786  * this TB.
1787  *
1788  * Called with mmap_lock held for user-mode emulation.
1789  */
1790 #ifdef CONFIG_SOFTMMU
1791 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end)
1792 #else
1793 void tb_invalidate_phys_range(target_ulong start, target_ulong end)
1794 #endif
1795 {
1796     struct page_collection *pages;
1797     tb_page_addr_t next;
1798 
1799     assert_memory_lock();
1800 
1801     pages = page_collection_lock(start, end);
1802     for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
1803          start < end;
1804          start = next, next += TARGET_PAGE_SIZE) {
1805         PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
1806         tb_page_addr_t bound = MIN(next, end);
1807 
1808         if (pd == NULL) {
1809             continue;
1810         }
1811         tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
1812     }
1813     page_collection_unlock(pages);
1814 }
1815 
1816 #ifdef CONFIG_SOFTMMU
1817 /* len must be <= 8 and start must be a multiple of len.
1818  * Called via softmmu_template.h when code areas are written to with
1819  * iothread mutex not held.
1820  *
1821  * Call with all @pages in the range [@start, @start + len[ locked.
1822  */
1823 void tb_invalidate_phys_page_fast(struct page_collection *pages,
1824                                   tb_page_addr_t start, int len,
1825                                   uintptr_t retaddr)
1826 {
1827     PageDesc *p;
1828 
1829     assert_memory_lock();
1830 
1831     p = page_find(start >> TARGET_PAGE_BITS);
1832     if (!p) {
1833         return;
1834     }
1835 
1836     assert_page_locked(p);
1837     if (!p->code_bitmap &&
1838         ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
1839         build_page_bitmap(p);
1840     }
1841     if (p->code_bitmap) {
1842         unsigned int nr;
1843         unsigned long b;
1844 
1845         nr = start & ~TARGET_PAGE_MASK;
1846         b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
1847         if (b & ((1 << len) - 1)) {
1848             goto do_invalidate;
1849         }
1850     } else {
1851     do_invalidate:
1852         tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
1853                                               retaddr);
1854     }
1855 }
1856 #else
1857 /* Called with mmap_lock held. If pc is not 0 then it indicates the
1858  * host PC of the faulting store instruction that caused this invalidate.
1859  * Returns true if the caller needs to abort execution of the current
1860  * TB (because it was modified by this store and the guest CPU has
1861  * precise-SMC semantics).
1862  */
1863 static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
1864 {
1865     TranslationBlock *tb;
1866     PageDesc *p;
1867     int n;
1868 #ifdef TARGET_HAS_PRECISE_SMC
1869     TranslationBlock *current_tb = NULL;
1870     CPUState *cpu = current_cpu;
1871     CPUArchState *env = NULL;
1872     int current_tb_modified = 0;
1873     target_ulong current_pc = 0;
1874     target_ulong current_cs_base = 0;
1875     uint32_t current_flags = 0;
1876 #endif
1877 
1878     assert_memory_lock();
1879 
1880     addr &= TARGET_PAGE_MASK;
1881     p = page_find(addr >> TARGET_PAGE_BITS);
1882     if (!p) {
1883         return false;
1884     }
1885 
1886 #ifdef TARGET_HAS_PRECISE_SMC
1887     if (p->first_tb && pc != 0) {
1888         current_tb = tcg_tb_lookup(pc);
1889     }
1890     if (cpu != NULL) {
1891         env = cpu->env_ptr;
1892     }
1893 #endif
1894     assert_page_locked(p);
1895     PAGE_FOR_EACH_TB(p, tb, n) {
1896 #ifdef TARGET_HAS_PRECISE_SMC
1897         if (current_tb == tb &&
1898             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1899                 /* If we are modifying the current TB, we must stop
1900                    its execution. We could be more precise by checking
1901                    that the modification is after the current PC, but it
1902                    would require a specialized function to partially
1903                    restore the CPU state */
1904 
1905             current_tb_modified = 1;
1906             cpu_restore_state_from_tb(cpu, current_tb, pc, true);
1907             cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1908                                  &current_flags);
1909         }
1910 #endif /* TARGET_HAS_PRECISE_SMC */
1911         tb_phys_invalidate(tb, addr);
1912     }
1913     p->first_tb = (uintptr_t)NULL;
1914 #ifdef TARGET_HAS_PRECISE_SMC
1915     if (current_tb_modified) {
1916         /* Force execution of one insn next time.  */
1917         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
1918         return true;
1919     }
1920 #endif
1921 
1922     return false;
1923 }
1924 #endif
1925 
1926 /* user-mode: call with mmap_lock held */
1927 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
1928 {
1929     TranslationBlock *tb;
1930 
1931     assert_memory_lock();
1932 
1933     tb = tcg_tb_lookup(retaddr);
1934     if (tb) {
1935         /* We can use retranslation to find the PC.  */
1936         cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1937         tb_phys_invalidate(tb, -1);
1938     } else {
1939         /* The exception probably happened in a helper.  The CPU state should
1940            have been saved before calling it. Fetch the PC from there.  */
1941         CPUArchState *env = cpu->env_ptr;
1942         target_ulong pc, cs_base;
1943         tb_page_addr_t addr;
1944         uint32_t flags;
1945 
1946         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
1947         addr = get_page_addr_code(env, pc);
1948         if (addr != -1) {
1949             tb_invalidate_phys_range(addr, addr + 1);
1950         }
1951     }
1952 }
1953 
1954 #ifndef CONFIG_USER_ONLY
1955 /*
1956  * In deterministic execution mode, instructions doing device I/Os
1957  * must be at the end of the TB.
1958  *
1959  * Called by softmmu_template.h, with iothread mutex not held.
1960  */
1961 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
1962 {
1963     TranslationBlock *tb;
1964     CPUClass *cc;
1965     uint32_t n;
1966 
1967     tb = tcg_tb_lookup(retaddr);
1968     if (!tb) {
1969         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
1970                   (void *)retaddr);
1971     }
1972     cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1973 
1974     /*
1975      * Some guests must re-execute the branch when re-executing a delay
1976      * slot instruction.  When this is the case, adjust icount and N
1977      * to account for the re-execution of the branch.
1978      */
1979     n = 1;
1980     cc = CPU_GET_CLASS(cpu);
1981     if (cc->tcg_ops->io_recompile_replay_branch &&
1982         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
1983         cpu_neg(cpu)->icount_decr.u16.low++;
1984         n = 2;
1985     }
1986 
1987     /*
1988      * Exit the loop and potentially generate a new TB executing the
1989      * just the I/O insns. We also limit instrumentation to memory
1990      * operations only (which execute after completion) so we don't
1991      * double instrument the instruction.
1992      */
1993     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
1994 
1995     qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
1996                            "cpu_io_recompile: rewound execution of TB to "
1997                            TARGET_FMT_lx "\n", tb->pc);
1998 
1999     cpu_loop_exit_noexc(cpu);
2000 }
2001 
2002 static void print_qht_statistics(struct qht_stats hst, GString *buf)
2003 {
2004     uint32_t hgram_opts;
2005     size_t hgram_bins;
2006     char *hgram;
2007 
2008     if (!hst.head_buckets) {
2009         return;
2010     }
2011     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
2012                            "(%0.2f%% head buckets used)\n",
2013                            hst.used_head_buckets, hst.head_buckets,
2014                            (double)hst.used_head_buckets /
2015                            hst.head_buckets * 100);
2016 
2017     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
2018     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
2019     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
2020         hgram_opts |= QDIST_PR_NODECIMAL;
2021     }
2022     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
2023     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
2024                            "Histogram: %s\n",
2025                            qdist_avg(&hst.occupancy) * 100, hgram);
2026     g_free(hgram);
2027 
2028     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
2029     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
2030     if (hgram_bins > 10) {
2031         hgram_bins = 10;
2032     } else {
2033         hgram_bins = 0;
2034         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
2035     }
2036     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
2037     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
2038                            "Histogram: %s\n",
2039                            qdist_avg(&hst.chain), hgram);
2040     g_free(hgram);
2041 }
2042 
2043 struct tb_tree_stats {
2044     size_t nb_tbs;
2045     size_t host_size;
2046     size_t target_size;
2047     size_t max_target_size;
2048     size_t direct_jmp_count;
2049     size_t direct_jmp2_count;
2050     size_t cross_page;
2051 };
2052 
2053 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
2054 {
2055     const TranslationBlock *tb = value;
2056     struct tb_tree_stats *tst = data;
2057 
2058     tst->nb_tbs++;
2059     tst->host_size += tb->tc.size;
2060     tst->target_size += tb->size;
2061     if (tb->size > tst->max_target_size) {
2062         tst->max_target_size = tb->size;
2063     }
2064     if (tb->page_addr[1] != -1) {
2065         tst->cross_page++;
2066     }
2067     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
2068         tst->direct_jmp_count++;
2069         if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
2070             tst->direct_jmp2_count++;
2071         }
2072     }
2073     return false;
2074 }
2075 
2076 void dump_exec_info(GString *buf)
2077 {
2078     struct tb_tree_stats tst = {};
2079     struct qht_stats hst;
2080     size_t nb_tbs, flush_full, flush_part, flush_elide;
2081 
2082     tcg_tb_foreach(tb_tree_stats_iter, &tst);
2083     nb_tbs = tst.nb_tbs;
2084     /* XXX: avoid using doubles ? */
2085     g_string_append_printf(buf, "Translation buffer state:\n");
2086     /*
2087      * Report total code size including the padding and TB structs;
2088      * otherwise users might think "-accel tcg,tb-size" is not honoured.
2089      * For avg host size we use the precise numbers from tb_tree_stats though.
2090      */
2091     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
2092                            tcg_code_size(), tcg_code_capacity());
2093     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
2094     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
2095                            nb_tbs ? tst.target_size / nb_tbs : 0,
2096                            tst.max_target_size);
2097     g_string_append_printf(buf, "TB avg host size    %zu bytes "
2098                            "(expansion ratio: %0.1f)\n",
2099                            nb_tbs ? tst.host_size / nb_tbs : 0,
2100                            tst.target_size ?
2101                            (double)tst.host_size / tst.target_size : 0);
2102     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
2103                            tst.cross_page,
2104                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
2105     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
2106                            "(2 jumps=%zu %zu%%)\n",
2107                            tst.direct_jmp_count,
2108                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
2109                            tst.direct_jmp2_count,
2110                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
2111 
2112     qht_statistics_init(&tb_ctx.htable, &hst);
2113     print_qht_statistics(hst, buf);
2114     qht_statistics_destroy(&hst);
2115 
2116     g_string_append_printf(buf, "\nStatistics:\n");
2117     g_string_append_printf(buf, "TB flush count      %u\n",
2118                            qatomic_read(&tb_ctx.tb_flush_count));
2119     g_string_append_printf(buf, "TB invalidate count %u\n",
2120                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
2121 
2122     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
2123     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
2124     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
2125     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
2126     tcg_dump_info(buf);
2127 }
2128 
2129 #else /* CONFIG_USER_ONLY */
2130 
2131 void cpu_interrupt(CPUState *cpu, int mask)
2132 {
2133     g_assert(qemu_mutex_iothread_locked());
2134     cpu->interrupt_request |= mask;
2135     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
2136 }
2137 
2138 /*
2139  * Walks guest process memory "regions" one by one
2140  * and calls callback function 'fn' for each region.
2141  */
2142 struct walk_memory_regions_data {
2143     walk_memory_regions_fn fn;
2144     void *priv;
2145     target_ulong start;
2146     int prot;
2147 };
2148 
2149 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2150                                    target_ulong end, int new_prot)
2151 {
2152     if (data->start != -1u) {
2153         int rc = data->fn(data->priv, data->start, end, data->prot);
2154         if (rc != 0) {
2155             return rc;
2156         }
2157     }
2158 
2159     data->start = (new_prot ? end : -1u);
2160     data->prot = new_prot;
2161 
2162     return 0;
2163 }
2164 
2165 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2166                                  target_ulong base, int level, void **lp)
2167 {
2168     target_ulong pa;
2169     int i, rc;
2170 
2171     if (*lp == NULL) {
2172         return walk_memory_regions_end(data, base, 0);
2173     }
2174 
2175     if (level == 0) {
2176         PageDesc *pd = *lp;
2177 
2178         for (i = 0; i < V_L2_SIZE; ++i) {
2179             int prot = pd[i].flags;
2180 
2181             pa = base | (i << TARGET_PAGE_BITS);
2182             if (prot != data->prot) {
2183                 rc = walk_memory_regions_end(data, pa, prot);
2184                 if (rc != 0) {
2185                     return rc;
2186                 }
2187             }
2188         }
2189     } else {
2190         void **pp = *lp;
2191 
2192         for (i = 0; i < V_L2_SIZE; ++i) {
2193             pa = base | ((target_ulong)i <<
2194                 (TARGET_PAGE_BITS + V_L2_BITS * level));
2195             rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2196             if (rc != 0) {
2197                 return rc;
2198             }
2199         }
2200     }
2201 
2202     return 0;
2203 }
2204 
2205 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2206 {
2207     struct walk_memory_regions_data data;
2208     uintptr_t i, l1_sz = v_l1_size;
2209 
2210     data.fn = fn;
2211     data.priv = priv;
2212     data.start = -1u;
2213     data.prot = 0;
2214 
2215     for (i = 0; i < l1_sz; i++) {
2216         target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
2217         int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
2218         if (rc != 0) {
2219             return rc;
2220         }
2221     }
2222 
2223     return walk_memory_regions_end(&data, 0, 0);
2224 }
2225 
2226 static int dump_region(void *priv, target_ulong start,
2227     target_ulong end, unsigned long prot)
2228 {
2229     FILE *f = (FILE *)priv;
2230 
2231     (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
2232         " "TARGET_FMT_lx" %c%c%c\n",
2233         start, end, end - start,
2234         ((prot & PAGE_READ) ? 'r' : '-'),
2235         ((prot & PAGE_WRITE) ? 'w' : '-'),
2236         ((prot & PAGE_EXEC) ? 'x' : '-'));
2237 
2238     return 0;
2239 }
2240 
2241 /* dump memory mappings */
2242 void page_dump(FILE *f)
2243 {
2244     const int length = sizeof(target_ulong) * 2;
2245     (void) fprintf(f, "%-*s %-*s %-*s %s\n",
2246             length, "start", length, "end", length, "size", "prot");
2247     walk_memory_regions(f, dump_region);
2248 }
2249 
2250 int page_get_flags(target_ulong address)
2251 {
2252     PageDesc *p;
2253 
2254     p = page_find(address >> TARGET_PAGE_BITS);
2255     if (!p) {
2256         return 0;
2257     }
2258     return p->flags;
2259 }
2260 
2261 /*
2262  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
2263  * By default, they are not kept.
2264  */
2265 #ifndef PAGE_TARGET_STICKY
2266 #define PAGE_TARGET_STICKY  0
2267 #endif
2268 #define PAGE_STICKY  (PAGE_ANON | PAGE_TARGET_STICKY)
2269 
2270 /* Modify the flags of a page and invalidate the code if necessary.
2271    The flag PAGE_WRITE_ORG is positioned automatically depending
2272    on PAGE_WRITE.  The mmap_lock should already be held.  */
2273 void page_set_flags(target_ulong start, target_ulong end, int flags)
2274 {
2275     target_ulong addr, len;
2276     bool reset_target_data;
2277 
2278     /* This function should never be called with addresses outside the
2279        guest address space.  If this assert fires, it probably indicates
2280        a missing call to h2g_valid.  */
2281     assert(end - 1 <= GUEST_ADDR_MAX);
2282     assert(start < end);
2283     /* Only set PAGE_ANON with new mappings. */
2284     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
2285     assert_memory_lock();
2286 
2287     start = start & TARGET_PAGE_MASK;
2288     end = TARGET_PAGE_ALIGN(end);
2289 
2290     if (flags & PAGE_WRITE) {
2291         flags |= PAGE_WRITE_ORG;
2292     }
2293     reset_target_data = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
2294     flags &= ~PAGE_RESET;
2295 
2296     for (addr = start, len = end - start;
2297          len != 0;
2298          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2299         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2300 
2301         /* If the write protection bit is set, then we invalidate
2302            the code inside.  */
2303         if (!(p->flags & PAGE_WRITE) &&
2304             (flags & PAGE_WRITE) &&
2305             p->first_tb) {
2306             tb_invalidate_phys_page(addr, 0);
2307         }
2308         if (reset_target_data) {
2309             g_free(p->target_data);
2310             p->target_data = NULL;
2311             p->flags = flags;
2312         } else {
2313             /* Using mprotect on a page does not change sticky bits. */
2314             p->flags = (p->flags & PAGE_STICKY) | flags;
2315         }
2316     }
2317 }
2318 
2319 void page_reset_target_data(target_ulong start, target_ulong end)
2320 {
2321     target_ulong addr, len;
2322 
2323     /*
2324      * This function should never be called with addresses outside the
2325      * guest address space.  If this assert fires, it probably indicates
2326      * a missing call to h2g_valid.
2327      */
2328     assert(end - 1 <= GUEST_ADDR_MAX);
2329     assert(start < end);
2330     assert_memory_lock();
2331 
2332     start = start & TARGET_PAGE_MASK;
2333     end = TARGET_PAGE_ALIGN(end);
2334 
2335     for (addr = start, len = end - start;
2336          len != 0;
2337          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2338         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2339 
2340         g_free(p->target_data);
2341         p->target_data = NULL;
2342     }
2343 }
2344 
2345 void *page_get_target_data(target_ulong address)
2346 {
2347     PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2348     return p ? p->target_data : NULL;
2349 }
2350 
2351 void *page_alloc_target_data(target_ulong address, size_t size)
2352 {
2353     PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2354     void *ret = NULL;
2355 
2356     if (p->flags & PAGE_VALID) {
2357         ret = p->target_data;
2358         if (!ret) {
2359             p->target_data = ret = g_malloc0(size);
2360         }
2361     }
2362     return ret;
2363 }
2364 
2365 int page_check_range(target_ulong start, target_ulong len, int flags)
2366 {
2367     PageDesc *p;
2368     target_ulong end;
2369     target_ulong addr;
2370 
2371     /* This function should never be called with addresses outside the
2372        guest address space.  If this assert fires, it probably indicates
2373        a missing call to h2g_valid.  */
2374     if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
2375         assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2376     }
2377 
2378     if (len == 0) {
2379         return 0;
2380     }
2381     if (start + len - 1 < start) {
2382         /* We've wrapped around.  */
2383         return -1;
2384     }
2385 
2386     /* must do before we loose bits in the next step */
2387     end = TARGET_PAGE_ALIGN(start + len);
2388     start = start & TARGET_PAGE_MASK;
2389 
2390     for (addr = start, len = end - start;
2391          len != 0;
2392          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2393         p = page_find(addr >> TARGET_PAGE_BITS);
2394         if (!p) {
2395             return -1;
2396         }
2397         if (!(p->flags & PAGE_VALID)) {
2398             return -1;
2399         }
2400 
2401         if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
2402             return -1;
2403         }
2404         if (flags & PAGE_WRITE) {
2405             if (!(p->flags & PAGE_WRITE_ORG)) {
2406                 return -1;
2407             }
2408             /* unprotect the page if it was put read-only because it
2409                contains translated code */
2410             if (!(p->flags & PAGE_WRITE)) {
2411                 if (!page_unprotect(addr, 0)) {
2412                     return -1;
2413                 }
2414             }
2415         }
2416     }
2417     return 0;
2418 }
2419 
2420 void page_protect(tb_page_addr_t page_addr)
2421 {
2422     target_ulong addr;
2423     PageDesc *p;
2424     int prot;
2425 
2426     p = page_find(page_addr >> TARGET_PAGE_BITS);
2427     if (p && (p->flags & PAGE_WRITE)) {
2428         /*
2429          * Force the host page as non writable (writes will have a page fault +
2430          * mprotect overhead).
2431          */
2432         page_addr &= qemu_host_page_mask;
2433         prot = 0;
2434         for (addr = page_addr; addr < page_addr + qemu_host_page_size;
2435              addr += TARGET_PAGE_SIZE) {
2436 
2437             p = page_find(addr >> TARGET_PAGE_BITS);
2438             if (!p) {
2439                 continue;
2440             }
2441             prot |= p->flags;
2442             p->flags &= ~PAGE_WRITE;
2443         }
2444         mprotect(g2h_untagged(page_addr), qemu_host_page_size,
2445                  (prot & PAGE_BITS) & ~PAGE_WRITE);
2446         if (DEBUG_TB_INVALIDATE_GATE) {
2447             printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
2448         }
2449     }
2450 }
2451 
2452 /* called from signal handler: invalidate the code and unprotect the
2453  * page. Return 0 if the fault was not handled, 1 if it was handled,
2454  * and 2 if it was handled but the caller must cause the TB to be
2455  * immediately exited. (We can only return 2 if the 'pc' argument is
2456  * non-zero.)
2457  */
2458 int page_unprotect(target_ulong address, uintptr_t pc)
2459 {
2460     unsigned int prot;
2461     bool current_tb_invalidated;
2462     PageDesc *p;
2463     target_ulong host_start, host_end, addr;
2464 
2465     /* Technically this isn't safe inside a signal handler.  However we
2466        know this only ever happens in a synchronous SEGV handler, so in
2467        practice it seems to be ok.  */
2468     mmap_lock();
2469 
2470     p = page_find(address >> TARGET_PAGE_BITS);
2471     if (!p) {
2472         mmap_unlock();
2473         return 0;
2474     }
2475 
2476     /* if the page was really writable, then we change its
2477        protection back to writable */
2478     if (p->flags & PAGE_WRITE_ORG) {
2479         current_tb_invalidated = false;
2480         if (p->flags & PAGE_WRITE) {
2481             /* If the page is actually marked WRITE then assume this is because
2482              * this thread raced with another one which got here first and
2483              * set the page to PAGE_WRITE and did the TB invalidate for us.
2484              */
2485 #ifdef TARGET_HAS_PRECISE_SMC
2486             TranslationBlock *current_tb = tcg_tb_lookup(pc);
2487             if (current_tb) {
2488                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
2489             }
2490 #endif
2491         } else {
2492             host_start = address & qemu_host_page_mask;
2493             host_end = host_start + qemu_host_page_size;
2494 
2495             prot = 0;
2496             for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
2497                 p = page_find(addr >> TARGET_PAGE_BITS);
2498                 p->flags |= PAGE_WRITE;
2499                 prot |= p->flags;
2500 
2501                 /* and since the content will be modified, we must invalidate
2502                    the corresponding translated code. */
2503                 current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
2504 #ifdef CONFIG_USER_ONLY
2505                 if (DEBUG_TB_CHECK_GATE) {
2506                     tb_invalidate_check(addr);
2507                 }
2508 #endif
2509             }
2510             mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
2511                      prot & PAGE_BITS);
2512         }
2513         mmap_unlock();
2514         /* If current TB was invalidated return to main loop */
2515         return current_tb_invalidated ? 2 : 1;
2516     }
2517     mmap_unlock();
2518     return 0;
2519 }
2520 #endif /* CONFIG_USER_ONLY */
2521 
2522 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
2523 void tcg_flush_softmmu_tlb(CPUState *cs)
2524 {
2525 #ifdef CONFIG_SOFTMMU
2526     tlb_flush(cs);
2527 #endif
2528 }
2529