xref: /openbmc/qemu/accel/tcg/translate-all.c (revision f7bbb156)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 
22 #define NO_CPU_IO_DEFS
23 #include "trace.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg.h"
27 #if defined(CONFIG_USER_ONLY)
28 #include "qemu.h"
29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
30 #include <sys/param.h>
31 #if __FreeBSD_version >= 700104
32 #define HAVE_KINFO_GETVMMAP
33 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
34 #include <sys/proc.h>
35 #include <machine/profile.h>
36 #define _KERNEL
37 #include <sys/user.h>
38 #undef _KERNEL
39 #undef sigqueue
40 #include <libutil.h>
41 #endif
42 #endif
43 #else
44 #include "exec/ram_addr.h"
45 #endif
46 
47 #include "exec/cputlb.h"
48 #include "exec/translate-all.h"
49 #include "exec/translator.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/qemu-print.h"
52 #include "qemu/timer.h"
53 #include "qemu/main-loop.h"
54 #include "qemu/cacheinfo.h"
55 #include "exec/log.h"
56 #include "sysemu/cpus.h"
57 #include "sysemu/cpu-timers.h"
58 #include "sysemu/tcg.h"
59 #include "qapi/error.h"
60 #include "hw/core/tcg-cpu-ops.h"
61 #include "tb-jmp-cache.h"
62 #include "tb-hash.h"
63 #include "tb-context.h"
64 #include "internal.h"
65 
66 /* #define DEBUG_TB_INVALIDATE */
67 /* #define DEBUG_TB_FLUSH */
68 /* make various TB consistency checks */
69 /* #define DEBUG_TB_CHECK */
70 
71 #ifdef DEBUG_TB_INVALIDATE
72 #define DEBUG_TB_INVALIDATE_GATE 1
73 #else
74 #define DEBUG_TB_INVALIDATE_GATE 0
75 #endif
76 
77 #ifdef DEBUG_TB_FLUSH
78 #define DEBUG_TB_FLUSH_GATE 1
79 #else
80 #define DEBUG_TB_FLUSH_GATE 0
81 #endif
82 
83 #if !defined(CONFIG_USER_ONLY)
84 /* TB consistency checks only implemented for usermode emulation.  */
85 #undef DEBUG_TB_CHECK
86 #endif
87 
88 #ifdef DEBUG_TB_CHECK
89 #define DEBUG_TB_CHECK_GATE 1
90 #else
91 #define DEBUG_TB_CHECK_GATE 0
92 #endif
93 
94 /* Access to the various translations structures need to be serialised via locks
95  * for consistency.
96  * In user-mode emulation access to the memory related structures are protected
97  * with mmap_lock.
98  * In !user-mode we use per-page locks.
99  */
100 #ifdef CONFIG_SOFTMMU
101 #define assert_memory_lock()
102 #else
103 #define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
104 #endif
105 
106 typedef struct PageDesc {
107     /* list of TBs intersecting this ram page */
108     uintptr_t first_tb;
109 #ifdef CONFIG_USER_ONLY
110     unsigned long flags;
111     void *target_data;
112 #endif
113 #ifdef CONFIG_SOFTMMU
114     QemuSpin lock;
115 #endif
116 } PageDesc;
117 
118 /**
119  * struct page_entry - page descriptor entry
120  * @pd:     pointer to the &struct PageDesc of the page this entry represents
121  * @index:  page index of the page
122  * @locked: whether the page is locked
123  *
124  * This struct helps us keep track of the locked state of a page, without
125  * bloating &struct PageDesc.
126  *
127  * A page lock protects accesses to all fields of &struct PageDesc.
128  *
129  * See also: &struct page_collection.
130  */
131 struct page_entry {
132     PageDesc *pd;
133     tb_page_addr_t index;
134     bool locked;
135 };
136 
137 /**
138  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
139  * @tree:   Binary search tree (BST) of the pages, with key == page index
140  * @max:    Pointer to the page in @tree with the highest page index
141  *
142  * To avoid deadlock we lock pages in ascending order of page index.
143  * When operating on a set of pages, we need to keep track of them so that
144  * we can lock them in order and also unlock them later. For this we collect
145  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
146  * @tree implementation we use does not provide an O(1) operation to obtain the
147  * highest-ranked element, we use @max to keep track of the inserted page
148  * with the highest index. This is valuable because if a page is not in
149  * the tree and its index is higher than @max's, then we can lock it
150  * without breaking the locking order rule.
151  *
152  * Note on naming: 'struct page_set' would be shorter, but we already have a few
153  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
154  *
155  * See also: page_collection_lock().
156  */
157 struct page_collection {
158     GTree *tree;
159     struct page_entry *max;
160 };
161 
162 /* list iterators for lists of tagged pointers in TranslationBlock */
163 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
164     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
165          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
166              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
167 
168 #define PAGE_FOR_EACH_TB(pagedesc, tb, n)                       \
169     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
170 
171 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
172     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
173 
174 /*
175  * In system mode we want L1_MAP to be based on ram offsets,
176  * while in user mode we want it to be based on virtual addresses.
177  *
178  * TODO: For user mode, see the caveat re host vs guest virtual
179  * address spaces near GUEST_ADDR_MAX.
180  */
181 #if !defined(CONFIG_USER_ONLY)
182 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
183 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
184 #else
185 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
186 #endif
187 #else
188 # define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
189 #endif
190 
191 /* Size of the L2 (and L3, etc) page tables.  */
192 #define V_L2_BITS 10
193 #define V_L2_SIZE (1 << V_L2_BITS)
194 
195 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
196 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
197                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
198                   * BITS_PER_BYTE);
199 
200 /*
201  * L1 Mapping properties
202  */
203 static int v_l1_size;
204 static int v_l1_shift;
205 static int v_l2_levels;
206 
207 /* The bottom level has pointers to PageDesc, and is indexed by
208  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
209  */
210 #define V_L1_MIN_BITS 4
211 #define V_L1_MAX_BITS (V_L2_BITS + 3)
212 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
213 
214 static void *l1_map[V_L1_MAX_SIZE];
215 
216 TBContext tb_ctx;
217 
218 static void page_table_config_init(void)
219 {
220     uint32_t v_l1_bits;
221 
222     assert(TARGET_PAGE_BITS);
223     /* The bits remaining after N lower levels of page tables.  */
224     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
225     if (v_l1_bits < V_L1_MIN_BITS) {
226         v_l1_bits += V_L2_BITS;
227     }
228 
229     v_l1_size = 1 << v_l1_bits;
230     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
231     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
232 
233     assert(v_l1_bits <= V_L1_MAX_BITS);
234     assert(v_l1_shift % V_L2_BITS == 0);
235     assert(v_l2_levels >= 0);
236 }
237 
238 /* Encode VAL as a signed leb128 sequence at P.
239    Return P incremented past the encoded value.  */
240 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
241 {
242     int more, byte;
243 
244     do {
245         byte = val & 0x7f;
246         val >>= 7;
247         more = !((val == 0 && (byte & 0x40) == 0)
248                  || (val == -1 && (byte & 0x40) != 0));
249         if (more) {
250             byte |= 0x80;
251         }
252         *p++ = byte;
253     } while (more);
254 
255     return p;
256 }
257 
258 /* Decode a signed leb128 sequence at *PP; increment *PP past the
259    decoded value.  Return the decoded value.  */
260 static target_long decode_sleb128(const uint8_t **pp)
261 {
262     const uint8_t *p = *pp;
263     target_long val = 0;
264     int byte, shift = 0;
265 
266     do {
267         byte = *p++;
268         val |= (target_ulong)(byte & 0x7f) << shift;
269         shift += 7;
270     } while (byte & 0x80);
271     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
272         val |= -(target_ulong)1 << shift;
273     }
274 
275     *pp = p;
276     return val;
277 }
278 
279 /* Encode the data collected about the instructions while compiling TB.
280    Place the data at BLOCK, and return the number of bytes consumed.
281 
282    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
283    which come from the target's insn_start data, followed by a uintptr_t
284    which comes from the host pc of the end of the code implementing the insn.
285 
286    Each line of the table is encoded as sleb128 deltas from the previous
287    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
288    That is, the first column is seeded with the guest pc, the last column
289    with the host pc, and the middle columns with zeros.  */
290 
291 static int encode_search(TranslationBlock *tb, uint8_t *block)
292 {
293     uint8_t *highwater = tcg_ctx->code_gen_highwater;
294     uint8_t *p = block;
295     int i, j, n;
296 
297     for (i = 0, n = tb->icount; i < n; ++i) {
298         target_ulong prev;
299 
300         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
301             if (i == 0) {
302                 prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
303             } else {
304                 prev = tcg_ctx->gen_insn_data[i - 1][j];
305             }
306             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
307         }
308         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
309         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
310 
311         /* Test for (pending) buffer overflow.  The assumption is that any
312            one row beginning below the high water mark cannot overrun
313            the buffer completely.  Thus we can test for overflow after
314            encoding a row without having to check during encoding.  */
315         if (unlikely(p > highwater)) {
316             return -1;
317         }
318     }
319 
320     return p - block;
321 }
322 
323 /* The cpu state corresponding to 'searched_pc' is restored.
324  * When reset_icount is true, current TB will be interrupted and
325  * icount should be recalculated.
326  */
327 static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
328                                      uintptr_t searched_pc, bool reset_icount)
329 {
330     target_ulong data[TARGET_INSN_START_WORDS];
331     uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
332     CPUArchState *env = cpu->env_ptr;
333     const uint8_t *p = tb->tc.ptr + tb->tc.size;
334     int i, j, num_insns = tb->icount;
335 #ifdef CONFIG_PROFILER
336     TCGProfile *prof = &tcg_ctx->prof;
337     int64_t ti = profile_getclock();
338 #endif
339 
340     searched_pc -= GETPC_ADJ;
341 
342     if (searched_pc < host_pc) {
343         return -1;
344     }
345 
346     memset(data, 0, sizeof(data));
347     if (!TARGET_TB_PCREL) {
348         data[0] = tb_pc(tb);
349     }
350 
351     /* Reconstruct the stored insn data while looking for the point at
352        which the end of the insn exceeds the searched_pc.  */
353     for (i = 0; i < num_insns; ++i) {
354         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
355             data[j] += decode_sleb128(&p);
356         }
357         host_pc += decode_sleb128(&p);
358         if (host_pc > searched_pc) {
359             goto found;
360         }
361     }
362     return -1;
363 
364  found:
365     if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
366         assert(icount_enabled());
367         /* Reset the cycle counter to the start of the block
368            and shift if to the number of actually executed instructions */
369         cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
370     }
371     restore_state_to_opc(env, tb, data);
372 
373 #ifdef CONFIG_PROFILER
374     qatomic_set(&prof->restore_time,
375                 prof->restore_time + profile_getclock() - ti);
376     qatomic_set(&prof->restore_count, prof->restore_count + 1);
377 #endif
378     return 0;
379 }
380 
381 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
382 {
383     /*
384      * The host_pc has to be in the rx region of the code buffer.
385      * If it is not we will not be able to resolve it here.
386      * The two cases where host_pc will not be correct are:
387      *
388      *  - fault during translation (instruction fetch)
389      *  - fault from helper (not using GETPC() macro)
390      *
391      * Either way we need return early as we can't resolve it here.
392      */
393     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
394         TranslationBlock *tb = tcg_tb_lookup(host_pc);
395         if (tb) {
396             cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
397             return true;
398         }
399     }
400     return false;
401 }
402 
403 void page_init(void)
404 {
405     page_size_init();
406     page_table_config_init();
407 
408 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
409     {
410 #ifdef HAVE_KINFO_GETVMMAP
411         struct kinfo_vmentry *freep;
412         int i, cnt;
413 
414         freep = kinfo_getvmmap(getpid(), &cnt);
415         if (freep) {
416             mmap_lock();
417             for (i = 0; i < cnt; i++) {
418                 unsigned long startaddr, endaddr;
419 
420                 startaddr = freep[i].kve_start;
421                 endaddr = freep[i].kve_end;
422                 if (h2g_valid(startaddr)) {
423                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
424 
425                     if (h2g_valid(endaddr)) {
426                         endaddr = h2g(endaddr);
427                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
428                     } else {
429 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
430                         endaddr = ~0ul;
431                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
432 #endif
433                     }
434                 }
435             }
436             free(freep);
437             mmap_unlock();
438         }
439 #else
440         FILE *f;
441 
442         last_brk = (unsigned long)sbrk(0);
443 
444         f = fopen("/compat/linux/proc/self/maps", "r");
445         if (f) {
446             mmap_lock();
447 
448             do {
449                 unsigned long startaddr, endaddr;
450                 int n;
451 
452                 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
453 
454                 if (n == 2 && h2g_valid(startaddr)) {
455                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
456 
457                     if (h2g_valid(endaddr)) {
458                         endaddr = h2g(endaddr);
459                     } else {
460                         endaddr = ~0ul;
461                     }
462                     page_set_flags(startaddr, endaddr, PAGE_RESERVED);
463                 }
464             } while (!feof(f));
465 
466             fclose(f);
467             mmap_unlock();
468         }
469 #endif
470     }
471 #endif
472 }
473 
474 static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
475 {
476     PageDesc *pd;
477     void **lp;
478     int i;
479 
480     /* Level 1.  Always allocated.  */
481     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
482 
483     /* Level 2..N-1.  */
484     for (i = v_l2_levels; i > 0; i--) {
485         void **p = qatomic_rcu_read(lp);
486 
487         if (p == NULL) {
488             void *existing;
489 
490             if (!alloc) {
491                 return NULL;
492             }
493             p = g_new0(void *, V_L2_SIZE);
494             existing = qatomic_cmpxchg(lp, NULL, p);
495             if (unlikely(existing)) {
496                 g_free(p);
497                 p = existing;
498             }
499         }
500 
501         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
502     }
503 
504     pd = qatomic_rcu_read(lp);
505     if (pd == NULL) {
506         void *existing;
507 
508         if (!alloc) {
509             return NULL;
510         }
511         pd = g_new0(PageDesc, V_L2_SIZE);
512 #ifndef CONFIG_USER_ONLY
513         {
514             int i;
515 
516             for (i = 0; i < V_L2_SIZE; i++) {
517                 qemu_spin_init(&pd[i].lock);
518             }
519         }
520 #endif
521         existing = qatomic_cmpxchg(lp, NULL, pd);
522         if (unlikely(existing)) {
523 #ifndef CONFIG_USER_ONLY
524             {
525                 int i;
526 
527                 for (i = 0; i < V_L2_SIZE; i++) {
528                     qemu_spin_destroy(&pd[i].lock);
529                 }
530             }
531 #endif
532             g_free(pd);
533             pd = existing;
534         }
535     }
536 
537     return pd + (index & (V_L2_SIZE - 1));
538 }
539 
540 static inline PageDesc *page_find(tb_page_addr_t index)
541 {
542     return page_find_alloc(index, false);
543 }
544 
545 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
546                            PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc);
547 
548 /* In user-mode page locks aren't used; mmap_lock is enough */
549 #ifdef CONFIG_USER_ONLY
550 
551 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
552 
553 static inline void page_lock(PageDesc *pd)
554 { }
555 
556 static inline void page_unlock(PageDesc *pd)
557 { }
558 
559 static inline void page_lock_tb(const TranslationBlock *tb)
560 { }
561 
562 static inline void page_unlock_tb(const TranslationBlock *tb)
563 { }
564 
565 struct page_collection *
566 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
567 {
568     return NULL;
569 }
570 
571 void page_collection_unlock(struct page_collection *set)
572 { }
573 #else /* !CONFIG_USER_ONLY */
574 
575 #ifdef CONFIG_DEBUG_TCG
576 
577 static __thread GHashTable *ht_pages_locked_debug;
578 
579 static void ht_pages_locked_debug_init(void)
580 {
581     if (ht_pages_locked_debug) {
582         return;
583     }
584     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
585 }
586 
587 static bool page_is_locked(const PageDesc *pd)
588 {
589     PageDesc *found;
590 
591     ht_pages_locked_debug_init();
592     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
593     return !!found;
594 }
595 
596 static void page_lock__debug(PageDesc *pd)
597 {
598     ht_pages_locked_debug_init();
599     g_assert(!page_is_locked(pd));
600     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
601 }
602 
603 static void page_unlock__debug(const PageDesc *pd)
604 {
605     bool removed;
606 
607     ht_pages_locked_debug_init();
608     g_assert(page_is_locked(pd));
609     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
610     g_assert(removed);
611 }
612 
613 static void
614 do_assert_page_locked(const PageDesc *pd, const char *file, int line)
615 {
616     if (unlikely(!page_is_locked(pd))) {
617         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
618                      pd, file, line);
619         abort();
620     }
621 }
622 
623 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
624 
625 void assert_no_pages_locked(void)
626 {
627     ht_pages_locked_debug_init();
628     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
629 }
630 
631 #else /* !CONFIG_DEBUG_TCG */
632 
633 #define assert_page_locked(pd)
634 
635 static inline void page_lock__debug(const PageDesc *pd)
636 {
637 }
638 
639 static inline void page_unlock__debug(const PageDesc *pd)
640 {
641 }
642 
643 #endif /* CONFIG_DEBUG_TCG */
644 
645 static inline void page_lock(PageDesc *pd)
646 {
647     page_lock__debug(pd);
648     qemu_spin_lock(&pd->lock);
649 }
650 
651 static inline void page_unlock(PageDesc *pd)
652 {
653     qemu_spin_unlock(&pd->lock);
654     page_unlock__debug(pd);
655 }
656 
657 /* lock the page(s) of a TB in the correct acquisition order */
658 static inline void page_lock_tb(const TranslationBlock *tb)
659 {
660     page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], false);
661 }
662 
663 static inline void page_unlock_tb(const TranslationBlock *tb)
664 {
665     PageDesc *p1 = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
666 
667     page_unlock(p1);
668     if (unlikely(tb->page_addr[1] != -1)) {
669         PageDesc *p2 = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
670 
671         if (p2 != p1) {
672             page_unlock(p2);
673         }
674     }
675 }
676 
677 static inline struct page_entry *
678 page_entry_new(PageDesc *pd, tb_page_addr_t index)
679 {
680     struct page_entry *pe = g_malloc(sizeof(*pe));
681 
682     pe->index = index;
683     pe->pd = pd;
684     pe->locked = false;
685     return pe;
686 }
687 
688 static void page_entry_destroy(gpointer p)
689 {
690     struct page_entry *pe = p;
691 
692     g_assert(pe->locked);
693     page_unlock(pe->pd);
694     g_free(pe);
695 }
696 
697 /* returns false on success */
698 static bool page_entry_trylock(struct page_entry *pe)
699 {
700     bool busy;
701 
702     busy = qemu_spin_trylock(&pe->pd->lock);
703     if (!busy) {
704         g_assert(!pe->locked);
705         pe->locked = true;
706         page_lock__debug(pe->pd);
707     }
708     return busy;
709 }
710 
711 static void do_page_entry_lock(struct page_entry *pe)
712 {
713     page_lock(pe->pd);
714     g_assert(!pe->locked);
715     pe->locked = true;
716 }
717 
718 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
719 {
720     struct page_entry *pe = value;
721 
722     do_page_entry_lock(pe);
723     return FALSE;
724 }
725 
726 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
727 {
728     struct page_entry *pe = value;
729 
730     if (pe->locked) {
731         pe->locked = false;
732         page_unlock(pe->pd);
733     }
734     return FALSE;
735 }
736 
737 /*
738  * Trylock a page, and if successful, add the page to a collection.
739  * Returns true ("busy") if the page could not be locked; false otherwise.
740  */
741 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
742 {
743     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
744     struct page_entry *pe;
745     PageDesc *pd;
746 
747     pe = g_tree_lookup(set->tree, &index);
748     if (pe) {
749         return false;
750     }
751 
752     pd = page_find(index);
753     if (pd == NULL) {
754         return false;
755     }
756 
757     pe = page_entry_new(pd, index);
758     g_tree_insert(set->tree, &pe->index, pe);
759 
760     /*
761      * If this is either (1) the first insertion or (2) a page whose index
762      * is higher than any other so far, just lock the page and move on.
763      */
764     if (set->max == NULL || pe->index > set->max->index) {
765         set->max = pe;
766         do_page_entry_lock(pe);
767         return false;
768     }
769     /*
770      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
771      * locks in order.
772      */
773     return page_entry_trylock(pe);
774 }
775 
776 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
777 {
778     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
779     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
780 
781     if (a == b) {
782         return 0;
783     } else if (a < b) {
784         return -1;
785     }
786     return 1;
787 }
788 
789 /*
790  * Lock a range of pages ([@start,@end[) as well as the pages of all
791  * intersecting TBs.
792  * Locking order: acquire locks in ascending order of page index.
793  */
794 struct page_collection *
795 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
796 {
797     struct page_collection *set = g_malloc(sizeof(*set));
798     tb_page_addr_t index;
799     PageDesc *pd;
800 
801     start >>= TARGET_PAGE_BITS;
802     end   >>= TARGET_PAGE_BITS;
803     g_assert(start <= end);
804 
805     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
806                                 page_entry_destroy);
807     set->max = NULL;
808     assert_no_pages_locked();
809 
810  retry:
811     g_tree_foreach(set->tree, page_entry_lock, NULL);
812 
813     for (index = start; index <= end; index++) {
814         TranslationBlock *tb;
815         int n;
816 
817         pd = page_find(index);
818         if (pd == NULL) {
819             continue;
820         }
821         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
822             g_tree_foreach(set->tree, page_entry_unlock, NULL);
823             goto retry;
824         }
825         assert_page_locked(pd);
826         PAGE_FOR_EACH_TB(pd, tb, n) {
827             if (page_trylock_add(set, tb->page_addr[0]) ||
828                 (tb->page_addr[1] != -1 &&
829                  page_trylock_add(set, tb->page_addr[1]))) {
830                 /* drop all locks, and reacquire in order */
831                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
832                 goto retry;
833             }
834         }
835     }
836     return set;
837 }
838 
839 void page_collection_unlock(struct page_collection *set)
840 {
841     /* entries are unlocked and freed via page_entry_destroy */
842     g_tree_destroy(set->tree);
843     g_free(set);
844 }
845 
846 #endif /* !CONFIG_USER_ONLY */
847 
848 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
849                            PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc)
850 {
851     PageDesc *p1, *p2;
852     tb_page_addr_t page1;
853     tb_page_addr_t page2;
854 
855     assert_memory_lock();
856     g_assert(phys1 != -1);
857 
858     page1 = phys1 >> TARGET_PAGE_BITS;
859     page2 = phys2 >> TARGET_PAGE_BITS;
860 
861     p1 = page_find_alloc(page1, alloc);
862     if (ret_p1) {
863         *ret_p1 = p1;
864     }
865     if (likely(phys2 == -1)) {
866         page_lock(p1);
867         return;
868     } else if (page1 == page2) {
869         page_lock(p1);
870         if (ret_p2) {
871             *ret_p2 = p1;
872         }
873         return;
874     }
875     p2 = page_find_alloc(page2, alloc);
876     if (ret_p2) {
877         *ret_p2 = p2;
878     }
879     if (page1 < page2) {
880         page_lock(p1);
881         page_lock(p2);
882     } else {
883         page_lock(p2);
884         page_lock(p1);
885     }
886 }
887 
888 static bool tb_cmp(const void *ap, const void *bp)
889 {
890     const TranslationBlock *a = ap;
891     const TranslationBlock *b = bp;
892 
893     return ((TARGET_TB_PCREL || tb_pc(a) == tb_pc(b)) &&
894             a->cs_base == b->cs_base &&
895             a->flags == b->flags &&
896             (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
897             a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
898             a->page_addr[0] == b->page_addr[0] &&
899             a->page_addr[1] == b->page_addr[1]);
900 }
901 
902 void tb_htable_init(void)
903 {
904     unsigned int mode = QHT_MODE_AUTO_RESIZE;
905 
906     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
907 }
908 
909 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
910 static void page_flush_tb_1(int level, void **lp)
911 {
912     int i;
913 
914     if (*lp == NULL) {
915         return;
916     }
917     if (level == 0) {
918         PageDesc *pd = *lp;
919 
920         for (i = 0; i < V_L2_SIZE; ++i) {
921             page_lock(&pd[i]);
922             pd[i].first_tb = (uintptr_t)NULL;
923             page_unlock(&pd[i]);
924         }
925     } else {
926         void **pp = *lp;
927 
928         for (i = 0; i < V_L2_SIZE; ++i) {
929             page_flush_tb_1(level - 1, pp + i);
930         }
931     }
932 }
933 
934 static void page_flush_tb(void)
935 {
936     int i, l1_sz = v_l1_size;
937 
938     for (i = 0; i < l1_sz; i++) {
939         page_flush_tb_1(v_l2_levels, l1_map + i);
940     }
941 }
942 
943 static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
944 {
945     const TranslationBlock *tb = value;
946     size_t *size = data;
947 
948     *size += tb->tc.size;
949     return false;
950 }
951 
952 /* flush all the translation blocks */
953 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
954 {
955     bool did_flush = false;
956 
957     mmap_lock();
958     /* If it is already been done on request of another CPU,
959      * just retry.
960      */
961     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
962         goto done;
963     }
964     did_flush = true;
965 
966     if (DEBUG_TB_FLUSH_GATE) {
967         size_t nb_tbs = tcg_nb_tbs();
968         size_t host_size = 0;
969 
970         tcg_tb_foreach(tb_host_size_iter, &host_size);
971         printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
972                tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
973     }
974 
975     CPU_FOREACH(cpu) {
976         tcg_flush_jmp_cache(cpu);
977     }
978 
979     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
980     page_flush_tb();
981 
982     tcg_region_reset_all();
983     /* XXX: flush processor icache at this point if cache flush is
984        expensive */
985     qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
986 
987 done:
988     mmap_unlock();
989     if (did_flush) {
990         qemu_plugin_flush_cb();
991     }
992 }
993 
994 void tb_flush(CPUState *cpu)
995 {
996     if (tcg_enabled()) {
997         unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);
998 
999         if (cpu_in_exclusive_context(cpu)) {
1000             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
1001         } else {
1002             async_safe_run_on_cpu(cpu, do_tb_flush,
1003                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
1004         }
1005     }
1006 }
1007 
1008 /*
1009  * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
1010  * so in order to prevent bit rot we compile them unconditionally in user-mode,
1011  * and let the optimizer get rid of them by wrapping their user-only callers
1012  * with if (DEBUG_TB_CHECK_GATE).
1013  */
1014 #ifdef CONFIG_USER_ONLY
1015 
1016 static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
1017 {
1018     TranslationBlock *tb = p;
1019     target_ulong addr = *(target_ulong *)userp;
1020 
1021     if (!(addr + TARGET_PAGE_SIZE <= tb_pc(tb) ||
1022           addr >= tb_pc(tb) + tb->size)) {
1023         printf("ERROR invalidate: address=" TARGET_FMT_lx
1024                " PC=%08lx size=%04x\n", addr, (long)tb_pc(tb), tb->size);
1025     }
1026 }
1027 
1028 /* verify that all the pages have correct rights for code
1029  *
1030  * Called with mmap_lock held.
1031  */
1032 static void tb_invalidate_check(target_ulong address)
1033 {
1034     address &= TARGET_PAGE_MASK;
1035     qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address);
1036 }
1037 
1038 static void do_tb_page_check(void *p, uint32_t hash, void *userp)
1039 {
1040     TranslationBlock *tb = p;
1041     int flags1, flags2;
1042 
1043     flags1 = page_get_flags(tb_pc(tb));
1044     flags2 = page_get_flags(tb_pc(tb) + tb->size - 1);
1045     if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
1046         printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
1047                (long)tb_pc(tb), tb->size, flags1, flags2);
1048     }
1049 }
1050 
1051 /* verify that all the pages have correct rights for code */
1052 static void tb_page_check(void)
1053 {
1054     qht_iter(&tb_ctx.htable, do_tb_page_check, NULL);
1055 }
1056 
1057 #endif /* CONFIG_USER_ONLY */
1058 
1059 /*
1060  * user-mode: call with mmap_lock held
1061  * !user-mode: call with @pd->lock held
1062  */
1063 static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
1064 {
1065     TranslationBlock *tb1;
1066     uintptr_t *pprev;
1067     unsigned int n1;
1068 
1069     assert_page_locked(pd);
1070     pprev = &pd->first_tb;
1071     PAGE_FOR_EACH_TB(pd, tb1, n1) {
1072         if (tb1 == tb) {
1073             *pprev = tb1->page_next[n1];
1074             return;
1075         }
1076         pprev = &tb1->page_next[n1];
1077     }
1078     g_assert_not_reached();
1079 }
1080 
1081 /* remove @orig from its @n_orig-th jump list */
1082 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
1083 {
1084     uintptr_t ptr, ptr_locked;
1085     TranslationBlock *dest;
1086     TranslationBlock *tb;
1087     uintptr_t *pprev;
1088     int n;
1089 
1090     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
1091     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
1092     dest = (TranslationBlock *)(ptr & ~1);
1093     if (dest == NULL) {
1094         return;
1095     }
1096 
1097     qemu_spin_lock(&dest->jmp_lock);
1098     /*
1099      * While acquiring the lock, the jump might have been removed if the
1100      * destination TB was invalidated; check again.
1101      */
1102     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
1103     if (ptr_locked != ptr) {
1104         qemu_spin_unlock(&dest->jmp_lock);
1105         /*
1106          * The only possibility is that the jump was unlinked via
1107          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
1108          * because we set the LSB above.
1109          */
1110         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
1111         return;
1112     }
1113     /*
1114      * We first acquired the lock, and since the destination pointer matches,
1115      * we know for sure that @orig is in the jmp list.
1116      */
1117     pprev = &dest->jmp_list_head;
1118     TB_FOR_EACH_JMP(dest, tb, n) {
1119         if (tb == orig && n == n_orig) {
1120             *pprev = tb->jmp_list_next[n];
1121             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
1122             qemu_spin_unlock(&dest->jmp_lock);
1123             return;
1124         }
1125         pprev = &tb->jmp_list_next[n];
1126     }
1127     g_assert_not_reached();
1128 }
1129 
1130 /* reset the jump entry 'n' of a TB so that it is not chained to
1131    another TB */
1132 static inline void tb_reset_jump(TranslationBlock *tb, int n)
1133 {
1134     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
1135     tb_set_jmp_target(tb, n, addr);
1136 }
1137 
1138 /* remove any jumps to the TB */
1139 static inline void tb_jmp_unlink(TranslationBlock *dest)
1140 {
1141     TranslationBlock *tb;
1142     int n;
1143 
1144     qemu_spin_lock(&dest->jmp_lock);
1145 
1146     TB_FOR_EACH_JMP(dest, tb, n) {
1147         tb_reset_jump(tb, n);
1148         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
1149         /* No need to clear the list entry; setting the dest ptr is enough */
1150     }
1151     dest->jmp_list_head = (uintptr_t)NULL;
1152 
1153     qemu_spin_unlock(&dest->jmp_lock);
1154 }
1155 
1156 static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
1157 {
1158     CPUState *cpu;
1159 
1160     if (TARGET_TB_PCREL) {
1161         /* A TB may be at any virtual address */
1162         CPU_FOREACH(cpu) {
1163             tcg_flush_jmp_cache(cpu);
1164         }
1165     } else {
1166         uint32_t h = tb_jmp_cache_hash_func(tb_pc(tb));
1167 
1168         CPU_FOREACH(cpu) {
1169             CPUJumpCache *jc = cpu->tb_jmp_cache;
1170 
1171             if (qatomic_read(&jc->array[h].tb) == tb) {
1172                 qatomic_set(&jc->array[h].tb, NULL);
1173             }
1174         }
1175     }
1176 }
1177 
1178 /*
1179  * In user-mode, call with mmap_lock held.
1180  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
1181  * locks held.
1182  */
1183 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
1184 {
1185     PageDesc *p;
1186     uint32_t h;
1187     tb_page_addr_t phys_pc;
1188     uint32_t orig_cflags = tb_cflags(tb);
1189 
1190     assert_memory_lock();
1191 
1192     /* make sure no further incoming jumps will be chained to this TB */
1193     qemu_spin_lock(&tb->jmp_lock);
1194     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
1195     qemu_spin_unlock(&tb->jmp_lock);
1196 
1197     /* remove the TB from the hash list */
1198     phys_pc = tb->page_addr[0];
1199     h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
1200                      tb->flags, orig_cflags, tb->trace_vcpu_dstate);
1201     if (!qht_remove(&tb_ctx.htable, tb, h)) {
1202         return;
1203     }
1204 
1205     /* remove the TB from the page list */
1206     if (rm_from_page_list) {
1207         p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
1208         tb_page_remove(p, tb);
1209         if (tb->page_addr[1] != -1) {
1210             p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
1211             tb_page_remove(p, tb);
1212         }
1213     }
1214 
1215     /* remove the TB from the hash list */
1216     tb_jmp_cache_inval_tb(tb);
1217 
1218     /* suppress this TB from the two jump lists */
1219     tb_remove_from_jmp_list(tb, 0);
1220     tb_remove_from_jmp_list(tb, 1);
1221 
1222     /* suppress any remaining jumps to this TB */
1223     tb_jmp_unlink(tb);
1224 
1225     qatomic_set(&tb_ctx.tb_phys_invalidate_count,
1226                 tb_ctx.tb_phys_invalidate_count + 1);
1227 }
1228 
1229 static void tb_phys_invalidate__locked(TranslationBlock *tb)
1230 {
1231     qemu_thread_jit_write();
1232     do_tb_phys_invalidate(tb, true);
1233     qemu_thread_jit_execute();
1234 }
1235 
1236 /* invalidate one TB
1237  *
1238  * Called with mmap_lock held in user-mode.
1239  */
1240 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
1241 {
1242     if (page_addr == -1 && tb->page_addr[0] != -1) {
1243         page_lock_tb(tb);
1244         do_tb_phys_invalidate(tb, true);
1245         page_unlock_tb(tb);
1246     } else {
1247         do_tb_phys_invalidate(tb, false);
1248     }
1249 }
1250 
1251 /* add the tb in the target page and protect it if necessary
1252  *
1253  * Called with mmap_lock held for user-mode emulation.
1254  * Called with @p->lock held in !user-mode.
1255  */
1256 static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
1257                                unsigned int n, tb_page_addr_t page_addr)
1258 {
1259 #ifndef CONFIG_USER_ONLY
1260     bool page_already_protected;
1261 #endif
1262 
1263     assert_page_locked(p);
1264 
1265     tb->page_addr[n] = page_addr;
1266     tb->page_next[n] = p->first_tb;
1267 #ifndef CONFIG_USER_ONLY
1268     page_already_protected = p->first_tb != (uintptr_t)NULL;
1269 #endif
1270     p->first_tb = (uintptr_t)tb | n;
1271 
1272 #if defined(CONFIG_USER_ONLY)
1273     /* translator_loop() must have made all TB pages non-writable */
1274     assert(!(p->flags & PAGE_WRITE));
1275 #else
1276     /* if some code is already present, then the pages are already
1277        protected. So we handle the case where only the first TB is
1278        allocated in a physical page */
1279     if (!page_already_protected) {
1280         tlb_protect_code(page_addr);
1281     }
1282 #endif
1283 }
1284 
1285 /*
1286  * Add a new TB and link it to the physical page tables. phys_page2 is
1287  * (-1) to indicate that only one page contains the TB.
1288  *
1289  * Called with mmap_lock held for user-mode emulation.
1290  *
1291  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
1292  * Note that in !user-mode, another thread might have already added a TB
1293  * for the same block of guest code that @tb corresponds to. In that case,
1294  * the caller should discard the original @tb, and use instead the returned TB.
1295  */
1296 static TranslationBlock *
1297 tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1298              tb_page_addr_t phys_page2)
1299 {
1300     PageDesc *p;
1301     PageDesc *p2 = NULL;
1302     void *existing_tb = NULL;
1303     uint32_t h;
1304 
1305     assert_memory_lock();
1306     tcg_debug_assert(!(tb->cflags & CF_INVALID));
1307 
1308     /*
1309      * Add the TB to the page list, acquiring first the pages's locks.
1310      * We keep the locks held until after inserting the TB in the hash table,
1311      * so that if the insertion fails we know for sure that the TBs are still
1312      * in the page descriptors.
1313      * Note that inserting into the hash table first isn't an option, since
1314      * we can only insert TBs that are fully initialized.
1315      */
1316     page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
1317     tb_page_add(p, tb, 0, phys_pc);
1318     if (p2) {
1319         tb_page_add(p2, tb, 1, phys_page2);
1320     } else {
1321         tb->page_addr[1] = -1;
1322     }
1323 
1324     /* add in the hash table */
1325     h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
1326                      tb->flags, tb->cflags, tb->trace_vcpu_dstate);
1327     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
1328 
1329     /* remove TB from the page(s) if we couldn't insert it */
1330     if (unlikely(existing_tb)) {
1331         tb_page_remove(p, tb);
1332         if (p2) {
1333             tb_page_remove(p2, tb);
1334         }
1335         tb = existing_tb;
1336     }
1337 
1338     if (p2 && p2 != p) {
1339         page_unlock(p2);
1340     }
1341     page_unlock(p);
1342 
1343 #ifdef CONFIG_USER_ONLY
1344     if (DEBUG_TB_CHECK_GATE) {
1345         tb_page_check();
1346     }
1347 #endif
1348     return tb;
1349 }
1350 
1351 /* Called with mmap_lock held for user mode emulation.  */
1352 TranslationBlock *tb_gen_code(CPUState *cpu,
1353                               target_ulong pc, target_ulong cs_base,
1354                               uint32_t flags, int cflags)
1355 {
1356     CPUArchState *env = cpu->env_ptr;
1357     TranslationBlock *tb, *existing_tb;
1358     tb_page_addr_t phys_pc;
1359     tcg_insn_unit *gen_code_buf;
1360     int gen_code_size, search_size, max_insns;
1361 #ifdef CONFIG_PROFILER
1362     TCGProfile *prof = &tcg_ctx->prof;
1363     int64_t ti;
1364 #endif
1365     void *host_pc;
1366 
1367     assert_memory_lock();
1368     qemu_thread_jit_write();
1369 
1370     phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
1371 
1372     if (phys_pc == -1) {
1373         /* Generate a one-shot TB with 1 insn in it */
1374         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
1375     }
1376 
1377     max_insns = cflags & CF_COUNT_MASK;
1378     if (max_insns == 0) {
1379         max_insns = TCG_MAX_INSNS;
1380     }
1381     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
1382 
1383  buffer_overflow:
1384     tb = tcg_tb_alloc(tcg_ctx);
1385     if (unlikely(!tb)) {
1386         /* flush must be done */
1387         tb_flush(cpu);
1388         mmap_unlock();
1389         /* Make the execution loop process the flush as soon as possible.  */
1390         cpu->exception_index = EXCP_INTERRUPT;
1391         cpu_loop_exit(cpu);
1392     }
1393 
1394     gen_code_buf = tcg_ctx->code_gen_ptr;
1395     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
1396 #if !TARGET_TB_PCREL
1397     tb->pc = pc;
1398 #endif
1399     tb->cs_base = cs_base;
1400     tb->flags = flags;
1401     tb->cflags = cflags;
1402     tb->trace_vcpu_dstate = *cpu->trace_dstate;
1403     tb->page_addr[0] = phys_pc;
1404     tb->page_addr[1] = -1;
1405     tcg_ctx->tb_cflags = cflags;
1406  tb_overflow:
1407 
1408 #ifdef CONFIG_PROFILER
1409     /* includes aborted translations because of exceptions */
1410     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
1411     ti = profile_getclock();
1412 #endif
1413 
1414     gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
1415     if (unlikely(gen_code_size != 0)) {
1416         goto error_return;
1417     }
1418 
1419     tcg_func_start(tcg_ctx);
1420 
1421     tcg_ctx->cpu = env_cpu(env);
1422     gen_intermediate_code(cpu, tb, max_insns, pc, host_pc);
1423     assert(tb->size != 0);
1424     tcg_ctx->cpu = NULL;
1425     max_insns = tb->icount;
1426 
1427     trace_translate_block(tb, pc, tb->tc.ptr);
1428 
1429     /* generate machine code */
1430     tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
1431     tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
1432     tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
1433     if (TCG_TARGET_HAS_direct_jump) {
1434         tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
1435         tcg_ctx->tb_jmp_target_addr = NULL;
1436     } else {
1437         tcg_ctx->tb_jmp_insn_offset = NULL;
1438         tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
1439     }
1440 
1441 #ifdef CONFIG_PROFILER
1442     qatomic_set(&prof->tb_count, prof->tb_count + 1);
1443     qatomic_set(&prof->interm_time,
1444                 prof->interm_time + profile_getclock() - ti);
1445     ti = profile_getclock();
1446 #endif
1447 
1448     gen_code_size = tcg_gen_code(tcg_ctx, tb, pc);
1449     if (unlikely(gen_code_size < 0)) {
1450  error_return:
1451         switch (gen_code_size) {
1452         case -1:
1453             /*
1454              * Overflow of code_gen_buffer, or the current slice of it.
1455              *
1456              * TODO: We don't need to re-do gen_intermediate_code, nor
1457              * should we re-do the tcg optimization currently hidden
1458              * inside tcg_gen_code.  All that should be required is to
1459              * flush the TBs, allocate a new TB, re-initialize it per
1460              * above, and re-do the actual code generation.
1461              */
1462             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1463                           "Restarting code generation for "
1464                           "code_gen_buffer overflow\n");
1465             goto buffer_overflow;
1466 
1467         case -2:
1468             /*
1469              * The code generated for the TranslationBlock is too large.
1470              * The maximum size allowed by the unwind info is 64k.
1471              * There may be stricter constraints from relocations
1472              * in the tcg backend.
1473              *
1474              * Try again with half as many insns as we attempted this time.
1475              * If a single insn overflows, there's a bug somewhere...
1476              */
1477             assert(max_insns > 1);
1478             max_insns /= 2;
1479             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1480                           "Restarting code generation with "
1481                           "smaller translation block (max %d insns)\n",
1482                           max_insns);
1483             goto tb_overflow;
1484 
1485         default:
1486             g_assert_not_reached();
1487         }
1488     }
1489     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
1490     if (unlikely(search_size < 0)) {
1491         goto buffer_overflow;
1492     }
1493     tb->tc.size = gen_code_size;
1494 
1495 #ifdef CONFIG_PROFILER
1496     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
1497     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
1498     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
1499     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
1500 #endif
1501 
1502 #ifdef DEBUG_DISAS
1503     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
1504         qemu_log_in_addr_range(pc)) {
1505         FILE *logfile = qemu_log_trylock();
1506         if (logfile) {
1507             int code_size, data_size;
1508             const tcg_target_ulong *rx_data_gen_ptr;
1509             size_t chunk_start;
1510             int insn = 0;
1511 
1512             if (tcg_ctx->data_gen_ptr) {
1513                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
1514                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
1515                 data_size = gen_code_size - code_size;
1516             } else {
1517                 rx_data_gen_ptr = 0;
1518                 code_size = gen_code_size;
1519                 data_size = 0;
1520             }
1521 
1522             /* Dump header and the first instruction */
1523             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
1524             fprintf(logfile,
1525                     "  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
1526                     tcg_ctx->gen_insn_data[insn][0]);
1527             chunk_start = tcg_ctx->gen_insn_end_off[insn];
1528             disas(logfile, tb->tc.ptr, chunk_start);
1529 
1530             /*
1531              * Dump each instruction chunk, wrapping up empty chunks into
1532              * the next instruction. The whole array is offset so the
1533              * first entry is the beginning of the 2nd instruction.
1534              */
1535             while (insn < tb->icount) {
1536                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
1537                 if (chunk_end > chunk_start) {
1538                     fprintf(logfile, "  -- guest addr 0x" TARGET_FMT_lx "\n",
1539                             tcg_ctx->gen_insn_data[insn][0]);
1540                     disas(logfile, tb->tc.ptr + chunk_start,
1541                           chunk_end - chunk_start);
1542                     chunk_start = chunk_end;
1543                 }
1544                 insn++;
1545             }
1546 
1547             if (chunk_start < code_size) {
1548                 fprintf(logfile, "  -- tb slow paths + alignment\n");
1549                 disas(logfile, tb->tc.ptr + chunk_start,
1550                       code_size - chunk_start);
1551             }
1552 
1553             /* Finally dump any data we may have after the block */
1554             if (data_size) {
1555                 int i;
1556                 fprintf(logfile, "  data: [size=%d]\n", data_size);
1557                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
1558                     if (sizeof(tcg_target_ulong) == 8) {
1559                         fprintf(logfile,
1560                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
1561                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
1562                     } else if (sizeof(tcg_target_ulong) == 4) {
1563                         fprintf(logfile,
1564                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
1565                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
1566                     } else {
1567                         qemu_build_not_reached();
1568                     }
1569                 }
1570             }
1571             fprintf(logfile, "\n");
1572             qemu_log_unlock(logfile);
1573         }
1574     }
1575 #endif
1576 
1577     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
1578         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
1579                  CODE_GEN_ALIGN));
1580 
1581     /* init jump list */
1582     qemu_spin_init(&tb->jmp_lock);
1583     tb->jmp_list_head = (uintptr_t)NULL;
1584     tb->jmp_list_next[0] = (uintptr_t)NULL;
1585     tb->jmp_list_next[1] = (uintptr_t)NULL;
1586     tb->jmp_dest[0] = (uintptr_t)NULL;
1587     tb->jmp_dest[1] = (uintptr_t)NULL;
1588 
1589     /* init original jump addresses which have been set during tcg_gen_code() */
1590     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1591         tb_reset_jump(tb, 0);
1592     }
1593     if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1594         tb_reset_jump(tb, 1);
1595     }
1596 
1597     /*
1598      * If the TB is not associated with a physical RAM page then it must be
1599      * a temporary one-insn TB, and we have nothing left to do. Return early
1600      * before attempting to link to other TBs or add to the lookup table.
1601      */
1602     if (tb->page_addr[0] == -1) {
1603         return tb;
1604     }
1605 
1606     /*
1607      * Insert TB into the corresponding region tree before publishing it
1608      * through QHT. Otherwise rewinding happened in the TB might fail to
1609      * lookup itself using host PC.
1610      */
1611     tcg_tb_insert(tb);
1612 
1613     /*
1614      * No explicit memory barrier is required -- tb_link_page() makes the
1615      * TB visible in a consistent state.
1616      */
1617     existing_tb = tb_link_page(tb, tb->page_addr[0], tb->page_addr[1]);
1618     /* if the TB already exists, discard what we just translated */
1619     if (unlikely(existing_tb != tb)) {
1620         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
1621 
1622         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
1623         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
1624         tcg_tb_remove(tb);
1625         return existing_tb;
1626     }
1627     return tb;
1628 }
1629 
1630 /*
1631  * @p must be non-NULL.
1632  * user-mode: call with mmap_lock held.
1633  * !user-mode: call with all @pages locked.
1634  */
1635 static void
1636 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1637                                       PageDesc *p, tb_page_addr_t start,
1638                                       tb_page_addr_t end,
1639                                       uintptr_t retaddr)
1640 {
1641     TranslationBlock *tb;
1642     tb_page_addr_t tb_start, tb_end;
1643     int n;
1644 #ifdef TARGET_HAS_PRECISE_SMC
1645     CPUState *cpu = current_cpu;
1646     CPUArchState *env = NULL;
1647     bool current_tb_not_found = retaddr != 0;
1648     bool current_tb_modified = false;
1649     TranslationBlock *current_tb = NULL;
1650     target_ulong current_pc = 0;
1651     target_ulong current_cs_base = 0;
1652     uint32_t current_flags = 0;
1653 #endif /* TARGET_HAS_PRECISE_SMC */
1654 
1655     assert_page_locked(p);
1656 
1657 #if defined(TARGET_HAS_PRECISE_SMC)
1658     if (cpu != NULL) {
1659         env = cpu->env_ptr;
1660     }
1661 #endif
1662 
1663     /* we remove all the TBs in the range [start, end[ */
1664     /* XXX: see if in some cases it could be faster to invalidate all
1665        the code */
1666     PAGE_FOR_EACH_TB(p, tb, n) {
1667         assert_page_locked(p);
1668         /* NOTE: this is subtle as a TB may span two physical pages */
1669         if (n == 0) {
1670             /* NOTE: tb_end may be after the end of the page, but
1671                it is not a problem */
1672             tb_start = tb->page_addr[0];
1673             tb_end = tb_start + tb->size;
1674         } else {
1675             tb_start = tb->page_addr[1];
1676             tb_end = tb_start + ((tb->page_addr[0] + tb->size)
1677                                  & ~TARGET_PAGE_MASK);
1678         }
1679         if (!(tb_end <= start || tb_start >= end)) {
1680 #ifdef TARGET_HAS_PRECISE_SMC
1681             if (current_tb_not_found) {
1682                 current_tb_not_found = false;
1683                 /* now we have a real cpu fault */
1684                 current_tb = tcg_tb_lookup(retaddr);
1685             }
1686             if (current_tb == tb &&
1687                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1688                 /*
1689                  * If we are modifying the current TB, we must stop
1690                  * its execution. We could be more precise by checking
1691                  * that the modification is after the current PC, but it
1692                  * would require a specialized function to partially
1693                  * restore the CPU state.
1694                  */
1695                 current_tb_modified = true;
1696                 cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
1697                 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1698                                      &current_flags);
1699             }
1700 #endif /* TARGET_HAS_PRECISE_SMC */
1701             tb_phys_invalidate__locked(tb);
1702         }
1703     }
1704 #if !defined(CONFIG_USER_ONLY)
1705     /* if no code remaining, no need to continue to use slow writes */
1706     if (!p->first_tb) {
1707         tlb_unprotect_code(start);
1708     }
1709 #endif
1710 #ifdef TARGET_HAS_PRECISE_SMC
1711     if (current_tb_modified) {
1712         page_collection_unlock(pages);
1713         /* Force execution of one insn next time.  */
1714         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
1715         mmap_unlock();
1716         cpu_loop_exit_noexc(cpu);
1717     }
1718 #endif
1719 }
1720 
1721 /*
1722  * Invalidate all TBs which intersect with the target physical address range
1723  * [start;end[. NOTE: start and end must refer to the *same* physical page.
1724  * 'is_cpu_write_access' should be true if called from a real cpu write
1725  * access: the virtual CPU will exit the current TB if code is modified inside
1726  * this TB.
1727  *
1728  * Called with mmap_lock held for user-mode emulation
1729  */
1730 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end)
1731 {
1732     struct page_collection *pages;
1733     PageDesc *p;
1734 
1735     assert_memory_lock();
1736 
1737     p = page_find(start >> TARGET_PAGE_BITS);
1738     if (p == NULL) {
1739         return;
1740     }
1741     pages = page_collection_lock(start, end);
1742     tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
1743     page_collection_unlock(pages);
1744 }
1745 
1746 /*
1747  * Invalidate all TBs which intersect with the target physical address range
1748  * [start;end[. NOTE: start and end may refer to *different* physical pages.
1749  * 'is_cpu_write_access' should be true if called from a real cpu write
1750  * access: the virtual CPU will exit the current TB if code is modified inside
1751  * this TB.
1752  *
1753  * Called with mmap_lock held for user-mode emulation.
1754  */
1755 #ifdef CONFIG_SOFTMMU
1756 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end)
1757 #else
1758 void tb_invalidate_phys_range(target_ulong start, target_ulong end)
1759 #endif
1760 {
1761     struct page_collection *pages;
1762     tb_page_addr_t next;
1763 
1764     assert_memory_lock();
1765 
1766     pages = page_collection_lock(start, end);
1767     for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
1768          start < end;
1769          start = next, next += TARGET_PAGE_SIZE) {
1770         PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
1771         tb_page_addr_t bound = MIN(next, end);
1772 
1773         if (pd == NULL) {
1774             continue;
1775         }
1776         tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
1777     }
1778     page_collection_unlock(pages);
1779 }
1780 
1781 #ifdef CONFIG_SOFTMMU
1782 /* len must be <= 8 and start must be a multiple of len.
1783  * Called via softmmu_template.h when code areas are written to with
1784  * iothread mutex not held.
1785  *
1786  * Call with all @pages in the range [@start, @start + len[ locked.
1787  */
1788 void tb_invalidate_phys_page_fast(struct page_collection *pages,
1789                                   tb_page_addr_t start, int len,
1790                                   uintptr_t retaddr)
1791 {
1792     PageDesc *p;
1793 
1794     assert_memory_lock();
1795 
1796     p = page_find(start >> TARGET_PAGE_BITS);
1797     if (!p) {
1798         return;
1799     }
1800 
1801     assert_page_locked(p);
1802     tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
1803                                           retaddr);
1804 }
1805 #else
1806 /* Called with mmap_lock held. If pc is not 0 then it indicates the
1807  * host PC of the faulting store instruction that caused this invalidate.
1808  * Returns true if the caller needs to abort execution of the current
1809  * TB (because it was modified by this store and the guest CPU has
1810  * precise-SMC semantics).
1811  */
1812 static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
1813 {
1814     TranslationBlock *tb;
1815     PageDesc *p;
1816     int n;
1817 #ifdef TARGET_HAS_PRECISE_SMC
1818     TranslationBlock *current_tb = NULL;
1819     CPUState *cpu = current_cpu;
1820     CPUArchState *env = NULL;
1821     int current_tb_modified = 0;
1822     target_ulong current_pc = 0;
1823     target_ulong current_cs_base = 0;
1824     uint32_t current_flags = 0;
1825 #endif
1826 
1827     assert_memory_lock();
1828 
1829     addr &= TARGET_PAGE_MASK;
1830     p = page_find(addr >> TARGET_PAGE_BITS);
1831     if (!p) {
1832         return false;
1833     }
1834 
1835 #ifdef TARGET_HAS_PRECISE_SMC
1836     if (p->first_tb && pc != 0) {
1837         current_tb = tcg_tb_lookup(pc);
1838     }
1839     if (cpu != NULL) {
1840         env = cpu->env_ptr;
1841     }
1842 #endif
1843     assert_page_locked(p);
1844     PAGE_FOR_EACH_TB(p, tb, n) {
1845 #ifdef TARGET_HAS_PRECISE_SMC
1846         if (current_tb == tb &&
1847             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1848                 /* If we are modifying the current TB, we must stop
1849                    its execution. We could be more precise by checking
1850                    that the modification is after the current PC, but it
1851                    would require a specialized function to partially
1852                    restore the CPU state */
1853 
1854             current_tb_modified = 1;
1855             cpu_restore_state_from_tb(cpu, current_tb, pc, true);
1856             cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1857                                  &current_flags);
1858         }
1859 #endif /* TARGET_HAS_PRECISE_SMC */
1860         tb_phys_invalidate(tb, addr);
1861     }
1862     p->first_tb = (uintptr_t)NULL;
1863 #ifdef TARGET_HAS_PRECISE_SMC
1864     if (current_tb_modified) {
1865         /* Force execution of one insn next time.  */
1866         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
1867         return true;
1868     }
1869 #endif
1870 
1871     return false;
1872 }
1873 #endif
1874 
1875 /* user-mode: call with mmap_lock held */
1876 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
1877 {
1878     TranslationBlock *tb;
1879 
1880     assert_memory_lock();
1881 
1882     tb = tcg_tb_lookup(retaddr);
1883     if (tb) {
1884         /* We can use retranslation to find the PC.  */
1885         cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1886         tb_phys_invalidate(tb, -1);
1887     } else {
1888         /* The exception probably happened in a helper.  The CPU state should
1889            have been saved before calling it. Fetch the PC from there.  */
1890         CPUArchState *env = cpu->env_ptr;
1891         target_ulong pc, cs_base;
1892         tb_page_addr_t addr;
1893         uint32_t flags;
1894 
1895         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
1896         addr = get_page_addr_code(env, pc);
1897         if (addr != -1) {
1898             tb_invalidate_phys_range(addr, addr + 1);
1899         }
1900     }
1901 }
1902 
1903 #ifndef CONFIG_USER_ONLY
1904 /*
1905  * In deterministic execution mode, instructions doing device I/Os
1906  * must be at the end of the TB.
1907  *
1908  * Called by softmmu_template.h, with iothread mutex not held.
1909  */
1910 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
1911 {
1912     TranslationBlock *tb;
1913     CPUClass *cc;
1914     uint32_t n;
1915 
1916     tb = tcg_tb_lookup(retaddr);
1917     if (!tb) {
1918         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
1919                   (void *)retaddr);
1920     }
1921     cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1922 
1923     /*
1924      * Some guests must re-execute the branch when re-executing a delay
1925      * slot instruction.  When this is the case, adjust icount and N
1926      * to account for the re-execution of the branch.
1927      */
1928     n = 1;
1929     cc = CPU_GET_CLASS(cpu);
1930     if (cc->tcg_ops->io_recompile_replay_branch &&
1931         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
1932         cpu_neg(cpu)->icount_decr.u16.low++;
1933         n = 2;
1934     }
1935 
1936     /*
1937      * Exit the loop and potentially generate a new TB executing the
1938      * just the I/O insns. We also limit instrumentation to memory
1939      * operations only (which execute after completion) so we don't
1940      * double instrument the instruction.
1941      */
1942     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
1943 
1944     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
1945         target_ulong pc = log_pc(cpu, tb);
1946         if (qemu_log_in_addr_range(pc)) {
1947             qemu_log("cpu_io_recompile: rewound execution of TB to "
1948                      TARGET_FMT_lx "\n", pc);
1949         }
1950     }
1951 
1952     cpu_loop_exit_noexc(cpu);
1953 }
1954 
1955 static void print_qht_statistics(struct qht_stats hst, GString *buf)
1956 {
1957     uint32_t hgram_opts;
1958     size_t hgram_bins;
1959     char *hgram;
1960 
1961     if (!hst.head_buckets) {
1962         return;
1963     }
1964     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
1965                            "(%0.2f%% head buckets used)\n",
1966                            hst.used_head_buckets, hst.head_buckets,
1967                            (double)hst.used_head_buckets /
1968                            hst.head_buckets * 100);
1969 
1970     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
1971     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
1972     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
1973         hgram_opts |= QDIST_PR_NODECIMAL;
1974     }
1975     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
1976     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
1977                            "Histogram: %s\n",
1978                            qdist_avg(&hst.occupancy) * 100, hgram);
1979     g_free(hgram);
1980 
1981     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
1982     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
1983     if (hgram_bins > 10) {
1984         hgram_bins = 10;
1985     } else {
1986         hgram_bins = 0;
1987         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
1988     }
1989     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
1990     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
1991                            "Histogram: %s\n",
1992                            qdist_avg(&hst.chain), hgram);
1993     g_free(hgram);
1994 }
1995 
1996 struct tb_tree_stats {
1997     size_t nb_tbs;
1998     size_t host_size;
1999     size_t target_size;
2000     size_t max_target_size;
2001     size_t direct_jmp_count;
2002     size_t direct_jmp2_count;
2003     size_t cross_page;
2004 };
2005 
2006 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
2007 {
2008     const TranslationBlock *tb = value;
2009     struct tb_tree_stats *tst = data;
2010 
2011     tst->nb_tbs++;
2012     tst->host_size += tb->tc.size;
2013     tst->target_size += tb->size;
2014     if (tb->size > tst->max_target_size) {
2015         tst->max_target_size = tb->size;
2016     }
2017     if (tb->page_addr[1] != -1) {
2018         tst->cross_page++;
2019     }
2020     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
2021         tst->direct_jmp_count++;
2022         if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
2023             tst->direct_jmp2_count++;
2024         }
2025     }
2026     return false;
2027 }
2028 
2029 void dump_exec_info(GString *buf)
2030 {
2031     struct tb_tree_stats tst = {};
2032     struct qht_stats hst;
2033     size_t nb_tbs, flush_full, flush_part, flush_elide;
2034 
2035     tcg_tb_foreach(tb_tree_stats_iter, &tst);
2036     nb_tbs = tst.nb_tbs;
2037     /* XXX: avoid using doubles ? */
2038     g_string_append_printf(buf, "Translation buffer state:\n");
2039     /*
2040      * Report total code size including the padding and TB structs;
2041      * otherwise users might think "-accel tcg,tb-size" is not honoured.
2042      * For avg host size we use the precise numbers from tb_tree_stats though.
2043      */
2044     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
2045                            tcg_code_size(), tcg_code_capacity());
2046     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
2047     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
2048                            nb_tbs ? tst.target_size / nb_tbs : 0,
2049                            tst.max_target_size);
2050     g_string_append_printf(buf, "TB avg host size    %zu bytes "
2051                            "(expansion ratio: %0.1f)\n",
2052                            nb_tbs ? tst.host_size / nb_tbs : 0,
2053                            tst.target_size ?
2054                            (double)tst.host_size / tst.target_size : 0);
2055     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
2056                            tst.cross_page,
2057                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
2058     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
2059                            "(2 jumps=%zu %zu%%)\n",
2060                            tst.direct_jmp_count,
2061                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
2062                            tst.direct_jmp2_count,
2063                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
2064 
2065     qht_statistics_init(&tb_ctx.htable, &hst);
2066     print_qht_statistics(hst, buf);
2067     qht_statistics_destroy(&hst);
2068 
2069     g_string_append_printf(buf, "\nStatistics:\n");
2070     g_string_append_printf(buf, "TB flush count      %u\n",
2071                            qatomic_read(&tb_ctx.tb_flush_count));
2072     g_string_append_printf(buf, "TB invalidate count %u\n",
2073                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
2074 
2075     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
2076     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
2077     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
2078     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
2079     tcg_dump_info(buf);
2080 }
2081 
2082 #else /* CONFIG_USER_ONLY */
2083 
2084 void cpu_interrupt(CPUState *cpu, int mask)
2085 {
2086     g_assert(qemu_mutex_iothread_locked());
2087     cpu->interrupt_request |= mask;
2088     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
2089 }
2090 
2091 /*
2092  * Walks guest process memory "regions" one by one
2093  * and calls callback function 'fn' for each region.
2094  */
2095 struct walk_memory_regions_data {
2096     walk_memory_regions_fn fn;
2097     void *priv;
2098     target_ulong start;
2099     int prot;
2100 };
2101 
2102 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2103                                    target_ulong end, int new_prot)
2104 {
2105     if (data->start != -1u) {
2106         int rc = data->fn(data->priv, data->start, end, data->prot);
2107         if (rc != 0) {
2108             return rc;
2109         }
2110     }
2111 
2112     data->start = (new_prot ? end : -1u);
2113     data->prot = new_prot;
2114 
2115     return 0;
2116 }
2117 
2118 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2119                                  target_ulong base, int level, void **lp)
2120 {
2121     target_ulong pa;
2122     int i, rc;
2123 
2124     if (*lp == NULL) {
2125         return walk_memory_regions_end(data, base, 0);
2126     }
2127 
2128     if (level == 0) {
2129         PageDesc *pd = *lp;
2130 
2131         for (i = 0; i < V_L2_SIZE; ++i) {
2132             int prot = pd[i].flags;
2133 
2134             pa = base | (i << TARGET_PAGE_BITS);
2135             if (prot != data->prot) {
2136                 rc = walk_memory_regions_end(data, pa, prot);
2137                 if (rc != 0) {
2138                     return rc;
2139                 }
2140             }
2141         }
2142     } else {
2143         void **pp = *lp;
2144 
2145         for (i = 0; i < V_L2_SIZE; ++i) {
2146             pa = base | ((target_ulong)i <<
2147                 (TARGET_PAGE_BITS + V_L2_BITS * level));
2148             rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2149             if (rc != 0) {
2150                 return rc;
2151             }
2152         }
2153     }
2154 
2155     return 0;
2156 }
2157 
2158 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2159 {
2160     struct walk_memory_regions_data data;
2161     uintptr_t i, l1_sz = v_l1_size;
2162 
2163     data.fn = fn;
2164     data.priv = priv;
2165     data.start = -1u;
2166     data.prot = 0;
2167 
2168     for (i = 0; i < l1_sz; i++) {
2169         target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
2170         int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
2171         if (rc != 0) {
2172             return rc;
2173         }
2174     }
2175 
2176     return walk_memory_regions_end(&data, 0, 0);
2177 }
2178 
2179 static int dump_region(void *priv, target_ulong start,
2180     target_ulong end, unsigned long prot)
2181 {
2182     FILE *f = (FILE *)priv;
2183 
2184     (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
2185         " "TARGET_FMT_lx" %c%c%c\n",
2186         start, end, end - start,
2187         ((prot & PAGE_READ) ? 'r' : '-'),
2188         ((prot & PAGE_WRITE) ? 'w' : '-'),
2189         ((prot & PAGE_EXEC) ? 'x' : '-'));
2190 
2191     return 0;
2192 }
2193 
2194 /* dump memory mappings */
2195 void page_dump(FILE *f)
2196 {
2197     const int length = sizeof(target_ulong) * 2;
2198     (void) fprintf(f, "%-*s %-*s %-*s %s\n",
2199             length, "start", length, "end", length, "size", "prot");
2200     walk_memory_regions(f, dump_region);
2201 }
2202 
2203 int page_get_flags(target_ulong address)
2204 {
2205     PageDesc *p;
2206 
2207     p = page_find(address >> TARGET_PAGE_BITS);
2208     if (!p) {
2209         return 0;
2210     }
2211     return p->flags;
2212 }
2213 
2214 /*
2215  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
2216  * By default, they are not kept.
2217  */
2218 #ifndef PAGE_TARGET_STICKY
2219 #define PAGE_TARGET_STICKY  0
2220 #endif
2221 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
2222 
2223 /* Modify the flags of a page and invalidate the code if necessary.
2224    The flag PAGE_WRITE_ORG is positioned automatically depending
2225    on PAGE_WRITE.  The mmap_lock should already be held.  */
2226 void page_set_flags(target_ulong start, target_ulong end, int flags)
2227 {
2228     target_ulong addr, len;
2229     bool reset_target_data;
2230 
2231     /* This function should never be called with addresses outside the
2232        guest address space.  If this assert fires, it probably indicates
2233        a missing call to h2g_valid.  */
2234     assert(end - 1 <= GUEST_ADDR_MAX);
2235     assert(start < end);
2236     /* Only set PAGE_ANON with new mappings. */
2237     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
2238     assert_memory_lock();
2239 
2240     start = start & TARGET_PAGE_MASK;
2241     end = TARGET_PAGE_ALIGN(end);
2242 
2243     if (flags & PAGE_WRITE) {
2244         flags |= PAGE_WRITE_ORG;
2245     }
2246     reset_target_data = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
2247     flags &= ~PAGE_RESET;
2248 
2249     for (addr = start, len = end - start;
2250          len != 0;
2251          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2252         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true);
2253 
2254         /* If the write protection bit is set, then we invalidate
2255            the code inside.  */
2256         if (!(p->flags & PAGE_WRITE) &&
2257             (flags & PAGE_WRITE) &&
2258             p->first_tb) {
2259             tb_invalidate_phys_page(addr, 0);
2260         }
2261         if (reset_target_data) {
2262             g_free(p->target_data);
2263             p->target_data = NULL;
2264             p->flags = flags;
2265         } else {
2266             /* Using mprotect on a page does not change sticky bits. */
2267             p->flags = (p->flags & PAGE_STICKY) | flags;
2268         }
2269     }
2270 }
2271 
2272 void page_reset_target_data(target_ulong start, target_ulong end)
2273 {
2274     target_ulong addr, len;
2275 
2276     /*
2277      * This function should never be called with addresses outside the
2278      * guest address space.  If this assert fires, it probably indicates
2279      * a missing call to h2g_valid.
2280      */
2281     assert(end - 1 <= GUEST_ADDR_MAX);
2282     assert(start < end);
2283     assert_memory_lock();
2284 
2285     start = start & TARGET_PAGE_MASK;
2286     end = TARGET_PAGE_ALIGN(end);
2287 
2288     for (addr = start, len = end - start;
2289          len != 0;
2290          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2291         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2292 
2293         g_free(p->target_data);
2294         p->target_data = NULL;
2295     }
2296 }
2297 
2298 void *page_get_target_data(target_ulong address)
2299 {
2300     PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2301     return p ? p->target_data : NULL;
2302 }
2303 
2304 void *page_alloc_target_data(target_ulong address, size_t size)
2305 {
2306     PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2307     void *ret = NULL;
2308 
2309     if (p->flags & PAGE_VALID) {
2310         ret = p->target_data;
2311         if (!ret) {
2312             p->target_data = ret = g_malloc0(size);
2313         }
2314     }
2315     return ret;
2316 }
2317 
2318 int page_check_range(target_ulong start, target_ulong len, int flags)
2319 {
2320     PageDesc *p;
2321     target_ulong end;
2322     target_ulong addr;
2323 
2324     /* This function should never be called with addresses outside the
2325        guest address space.  If this assert fires, it probably indicates
2326        a missing call to h2g_valid.  */
2327     if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
2328         assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2329     }
2330 
2331     if (len == 0) {
2332         return 0;
2333     }
2334     if (start + len - 1 < start) {
2335         /* We've wrapped around.  */
2336         return -1;
2337     }
2338 
2339     /* must do before we loose bits in the next step */
2340     end = TARGET_PAGE_ALIGN(start + len);
2341     start = start & TARGET_PAGE_MASK;
2342 
2343     for (addr = start, len = end - start;
2344          len != 0;
2345          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2346         p = page_find(addr >> TARGET_PAGE_BITS);
2347         if (!p) {
2348             return -1;
2349         }
2350         if (!(p->flags & PAGE_VALID)) {
2351             return -1;
2352         }
2353 
2354         if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
2355             return -1;
2356         }
2357         if (flags & PAGE_WRITE) {
2358             if (!(p->flags & PAGE_WRITE_ORG)) {
2359                 return -1;
2360             }
2361             /* unprotect the page if it was put read-only because it
2362                contains translated code */
2363             if (!(p->flags & PAGE_WRITE)) {
2364                 if (!page_unprotect(addr, 0)) {
2365                     return -1;
2366                 }
2367             }
2368         }
2369     }
2370     return 0;
2371 }
2372 
2373 void page_protect(tb_page_addr_t page_addr)
2374 {
2375     target_ulong addr;
2376     PageDesc *p;
2377     int prot;
2378 
2379     p = page_find(page_addr >> TARGET_PAGE_BITS);
2380     if (p && (p->flags & PAGE_WRITE)) {
2381         /*
2382          * Force the host page as non writable (writes will have a page fault +
2383          * mprotect overhead).
2384          */
2385         page_addr &= qemu_host_page_mask;
2386         prot = 0;
2387         for (addr = page_addr; addr < page_addr + qemu_host_page_size;
2388              addr += TARGET_PAGE_SIZE) {
2389 
2390             p = page_find(addr >> TARGET_PAGE_BITS);
2391             if (!p) {
2392                 continue;
2393             }
2394             prot |= p->flags;
2395             p->flags &= ~PAGE_WRITE;
2396         }
2397         mprotect(g2h_untagged(page_addr), qemu_host_page_size,
2398                  (prot & PAGE_BITS) & ~PAGE_WRITE);
2399         if (DEBUG_TB_INVALIDATE_GATE) {
2400             printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
2401         }
2402     }
2403 }
2404 
2405 /* called from signal handler: invalidate the code and unprotect the
2406  * page. Return 0 if the fault was not handled, 1 if it was handled,
2407  * and 2 if it was handled but the caller must cause the TB to be
2408  * immediately exited. (We can only return 2 if the 'pc' argument is
2409  * non-zero.)
2410  */
2411 int page_unprotect(target_ulong address, uintptr_t pc)
2412 {
2413     unsigned int prot;
2414     bool current_tb_invalidated;
2415     PageDesc *p;
2416     target_ulong host_start, host_end, addr;
2417 
2418     /* Technically this isn't safe inside a signal handler.  However we
2419        know this only ever happens in a synchronous SEGV handler, so in
2420        practice it seems to be ok.  */
2421     mmap_lock();
2422 
2423     p = page_find(address >> TARGET_PAGE_BITS);
2424     if (!p) {
2425         mmap_unlock();
2426         return 0;
2427     }
2428 
2429     /* if the page was really writable, then we change its
2430        protection back to writable */
2431     if (p->flags & PAGE_WRITE_ORG) {
2432         current_tb_invalidated = false;
2433         if (p->flags & PAGE_WRITE) {
2434             /* If the page is actually marked WRITE then assume this is because
2435              * this thread raced with another one which got here first and
2436              * set the page to PAGE_WRITE and did the TB invalidate for us.
2437              */
2438 #ifdef TARGET_HAS_PRECISE_SMC
2439             TranslationBlock *current_tb = tcg_tb_lookup(pc);
2440             if (current_tb) {
2441                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
2442             }
2443 #endif
2444         } else {
2445             host_start = address & qemu_host_page_mask;
2446             host_end = host_start + qemu_host_page_size;
2447 
2448             prot = 0;
2449             for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
2450                 p = page_find(addr >> TARGET_PAGE_BITS);
2451                 p->flags |= PAGE_WRITE;
2452                 prot |= p->flags;
2453 
2454                 /* and since the content will be modified, we must invalidate
2455                    the corresponding translated code. */
2456                 current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
2457 #ifdef CONFIG_USER_ONLY
2458                 if (DEBUG_TB_CHECK_GATE) {
2459                     tb_invalidate_check(addr);
2460                 }
2461 #endif
2462             }
2463             mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
2464                      prot & PAGE_BITS);
2465         }
2466         mmap_unlock();
2467         /* If current TB was invalidated return to main loop */
2468         return current_tb_invalidated ? 2 : 1;
2469     }
2470     mmap_unlock();
2471     return 0;
2472 }
2473 #endif /* CONFIG_USER_ONLY */
2474 
2475 /*
2476  * Called by generic code at e.g. cpu reset after cpu creation,
2477  * therefore we must be prepared to allocate the jump cache.
2478  */
2479 void tcg_flush_jmp_cache(CPUState *cpu)
2480 {
2481     CPUJumpCache *jc = cpu->tb_jmp_cache;
2482 
2483     if (likely(jc)) {
2484         for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
2485             qatomic_set(&jc->array[i].tb, NULL);
2486         }
2487     } else {
2488         /* This should happen once during realize, and thus never race. */
2489         jc = g_new0(CPUJumpCache, 1);
2490         jc = qatomic_xchg(&cpu->tb_jmp_cache, jc);
2491         assert(jc == NULL);
2492     }
2493 }
2494 
2495 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
2496 void tcg_flush_softmmu_tlb(CPUState *cs)
2497 {
2498 #ifdef CONFIG_SOFTMMU
2499     tlb_flush(cs);
2500 #endif
2501 }
2502