xref: /openbmc/qemu/accel/tcg/translate-all.c (revision 8eb806a7)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 
22 #define NO_CPU_IO_DEFS
23 #include "trace.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg.h"
27 #if defined(CONFIG_USER_ONLY)
28 #include "qemu.h"
29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
30 #include <sys/param.h>
31 #if __FreeBSD_version >= 700104
32 #define HAVE_KINFO_GETVMMAP
33 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
34 #include <sys/proc.h>
35 #include <machine/profile.h>
36 #define _KERNEL
37 #include <sys/user.h>
38 #undef _KERNEL
39 #undef sigqueue
40 #include <libutil.h>
41 #endif
42 #endif
43 #else
44 #include "exec/ram_addr.h"
45 #endif
46 
47 #include "exec/cputlb.h"
48 #include "exec/translate-all.h"
49 #include "qemu/bitmap.h"
50 #include "qemu/qemu-print.h"
51 #include "qemu/timer.h"
52 #include "qemu/main-loop.h"
53 #include "qemu/cacheinfo.h"
54 #include "exec/log.h"
55 #include "sysemu/cpus.h"
56 #include "sysemu/cpu-timers.h"
57 #include "sysemu/tcg.h"
58 #include "qapi/error.h"
59 #include "hw/core/tcg-cpu-ops.h"
60 #include "tb-hash.h"
61 #include "tb-context.h"
62 #include "internal.h"
63 
64 /* #define DEBUG_TB_INVALIDATE */
65 /* #define DEBUG_TB_FLUSH */
66 /* make various TB consistency checks */
67 /* #define DEBUG_TB_CHECK */
68 
69 #ifdef DEBUG_TB_INVALIDATE
70 #define DEBUG_TB_INVALIDATE_GATE 1
71 #else
72 #define DEBUG_TB_INVALIDATE_GATE 0
73 #endif
74 
75 #ifdef DEBUG_TB_FLUSH
76 #define DEBUG_TB_FLUSH_GATE 1
77 #else
78 #define DEBUG_TB_FLUSH_GATE 0
79 #endif
80 
81 #if !defined(CONFIG_USER_ONLY)
82 /* TB consistency checks only implemented for usermode emulation.  */
83 #undef DEBUG_TB_CHECK
84 #endif
85 
86 #ifdef DEBUG_TB_CHECK
87 #define DEBUG_TB_CHECK_GATE 1
88 #else
89 #define DEBUG_TB_CHECK_GATE 0
90 #endif
91 
92 /* Access to the various translations structures need to be serialised via locks
93  * for consistency.
94  * In user-mode emulation access to the memory related structures are protected
95  * with mmap_lock.
96  * In !user-mode we use per-page locks.
97  */
98 #ifdef CONFIG_SOFTMMU
99 #define assert_memory_lock()
100 #else
101 #define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
102 #endif
103 
104 #define SMC_BITMAP_USE_THRESHOLD 10
105 
106 typedef struct PageDesc {
107     /* list of TBs intersecting this ram page */
108     uintptr_t first_tb;
109 #ifdef CONFIG_SOFTMMU
110     /* in order to optimize self modifying code, we count the number
111        of lookups we do to a given page to use a bitmap */
112     unsigned long *code_bitmap;
113     unsigned int code_write_count;
114 #else
115     unsigned long flags;
116     void *target_data;
117 #endif
118 #ifndef CONFIG_USER_ONLY
119     QemuSpin lock;
120 #endif
121 } PageDesc;
122 
123 /**
124  * struct page_entry - page descriptor entry
125  * @pd:     pointer to the &struct PageDesc of the page this entry represents
126  * @index:  page index of the page
127  * @locked: whether the page is locked
128  *
129  * This struct helps us keep track of the locked state of a page, without
130  * bloating &struct PageDesc.
131  *
132  * A page lock protects accesses to all fields of &struct PageDesc.
133  *
134  * See also: &struct page_collection.
135  */
136 struct page_entry {
137     PageDesc *pd;
138     tb_page_addr_t index;
139     bool locked;
140 };
141 
142 /**
143  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
144  * @tree:   Binary search tree (BST) of the pages, with key == page index
145  * @max:    Pointer to the page in @tree with the highest page index
146  *
147  * To avoid deadlock we lock pages in ascending order of page index.
148  * When operating on a set of pages, we need to keep track of them so that
149  * we can lock them in order and also unlock them later. For this we collect
150  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
151  * @tree implementation we use does not provide an O(1) operation to obtain the
152  * highest-ranked element, we use @max to keep track of the inserted page
153  * with the highest index. This is valuable because if a page is not in
154  * the tree and its index is higher than @max's, then we can lock it
155  * without breaking the locking order rule.
156  *
157  * Note on naming: 'struct page_set' would be shorter, but we already have a few
158  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
159  *
160  * See also: page_collection_lock().
161  */
162 struct page_collection {
163     GTree *tree;
164     struct page_entry *max;
165 };
166 
167 /* list iterators for lists of tagged pointers in TranslationBlock */
168 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
169     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
170          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
171              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
172 
173 #define PAGE_FOR_EACH_TB(pagedesc, tb, n)                       \
174     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
175 
176 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
177     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
178 
179 /*
180  * In system mode we want L1_MAP to be based on ram offsets,
181  * while in user mode we want it to be based on virtual addresses.
182  *
183  * TODO: For user mode, see the caveat re host vs guest virtual
184  * address spaces near GUEST_ADDR_MAX.
185  */
186 #if !defined(CONFIG_USER_ONLY)
187 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
188 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
189 #else
190 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
191 #endif
192 #else
193 # define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
194 #endif
195 
196 /* Size of the L2 (and L3, etc) page tables.  */
197 #define V_L2_BITS 10
198 #define V_L2_SIZE (1 << V_L2_BITS)
199 
200 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
201 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
202                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
203                   * BITS_PER_BYTE);
204 
205 /*
206  * L1 Mapping properties
207  */
208 static int v_l1_size;
209 static int v_l1_shift;
210 static int v_l2_levels;
211 
212 /* The bottom level has pointers to PageDesc, and is indexed by
213  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
214  */
215 #define V_L1_MIN_BITS 4
216 #define V_L1_MAX_BITS (V_L2_BITS + 3)
217 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
218 
219 static void *l1_map[V_L1_MAX_SIZE];
220 
221 TBContext tb_ctx;
222 
223 static void page_table_config_init(void)
224 {
225     uint32_t v_l1_bits;
226 
227     assert(TARGET_PAGE_BITS);
228     /* The bits remaining after N lower levels of page tables.  */
229     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
230     if (v_l1_bits < V_L1_MIN_BITS) {
231         v_l1_bits += V_L2_BITS;
232     }
233 
234     v_l1_size = 1 << v_l1_bits;
235     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
236     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
237 
238     assert(v_l1_bits <= V_L1_MAX_BITS);
239     assert(v_l1_shift % V_L2_BITS == 0);
240     assert(v_l2_levels >= 0);
241 }
242 
243 /* Encode VAL as a signed leb128 sequence at P.
244    Return P incremented past the encoded value.  */
245 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
246 {
247     int more, byte;
248 
249     do {
250         byte = val & 0x7f;
251         val >>= 7;
252         more = !((val == 0 && (byte & 0x40) == 0)
253                  || (val == -1 && (byte & 0x40) != 0));
254         if (more) {
255             byte |= 0x80;
256         }
257         *p++ = byte;
258     } while (more);
259 
260     return p;
261 }
262 
263 /* Decode a signed leb128 sequence at *PP; increment *PP past the
264    decoded value.  Return the decoded value.  */
265 static target_long decode_sleb128(const uint8_t **pp)
266 {
267     const uint8_t *p = *pp;
268     target_long val = 0;
269     int byte, shift = 0;
270 
271     do {
272         byte = *p++;
273         val |= (target_ulong)(byte & 0x7f) << shift;
274         shift += 7;
275     } while (byte & 0x80);
276     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
277         val |= -(target_ulong)1 << shift;
278     }
279 
280     *pp = p;
281     return val;
282 }
283 
284 /* Encode the data collected about the instructions while compiling TB.
285    Place the data at BLOCK, and return the number of bytes consumed.
286 
287    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
288    which come from the target's insn_start data, followed by a uintptr_t
289    which comes from the host pc of the end of the code implementing the insn.
290 
291    Each line of the table is encoded as sleb128 deltas from the previous
292    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
293    That is, the first column is seeded with the guest pc, the last column
294    with the host pc, and the middle columns with zeros.  */
295 
296 static int encode_search(TranslationBlock *tb, uint8_t *block)
297 {
298     uint8_t *highwater = tcg_ctx->code_gen_highwater;
299     uint8_t *p = block;
300     int i, j, n;
301 
302     for (i = 0, n = tb->icount; i < n; ++i) {
303         target_ulong prev;
304 
305         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
306             if (i == 0) {
307                 prev = (j == 0 ? tb->pc : 0);
308             } else {
309                 prev = tcg_ctx->gen_insn_data[i - 1][j];
310             }
311             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
312         }
313         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
314         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
315 
316         /* Test for (pending) buffer overflow.  The assumption is that any
317            one row beginning below the high water mark cannot overrun
318            the buffer completely.  Thus we can test for overflow after
319            encoding a row without having to check during encoding.  */
320         if (unlikely(p > highwater)) {
321             return -1;
322         }
323     }
324 
325     return p - block;
326 }
327 
328 /* The cpu state corresponding to 'searched_pc' is restored.
329  * When reset_icount is true, current TB will be interrupted and
330  * icount should be recalculated.
331  */
332 static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
333                                      uintptr_t searched_pc, bool reset_icount)
334 {
335     target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
336     uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
337     CPUArchState *env = cpu->env_ptr;
338     const uint8_t *p = tb->tc.ptr + tb->tc.size;
339     int i, j, num_insns = tb->icount;
340 #ifdef CONFIG_PROFILER
341     TCGProfile *prof = &tcg_ctx->prof;
342     int64_t ti = profile_getclock();
343 #endif
344 
345     searched_pc -= GETPC_ADJ;
346 
347     if (searched_pc < host_pc) {
348         return -1;
349     }
350 
351     /* Reconstruct the stored insn data while looking for the point at
352        which the end of the insn exceeds the searched_pc.  */
353     for (i = 0; i < num_insns; ++i) {
354         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
355             data[j] += decode_sleb128(&p);
356         }
357         host_pc += decode_sleb128(&p);
358         if (host_pc > searched_pc) {
359             goto found;
360         }
361     }
362     return -1;
363 
364  found:
365     if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
366         assert(icount_enabled());
367         /* Reset the cycle counter to the start of the block
368            and shift if to the number of actually executed instructions */
369         cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
370     }
371     restore_state_to_opc(env, tb, data);
372 
373 #ifdef CONFIG_PROFILER
374     qatomic_set(&prof->restore_time,
375                 prof->restore_time + profile_getclock() - ti);
376     qatomic_set(&prof->restore_count, prof->restore_count + 1);
377 #endif
378     return 0;
379 }
380 
381 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
382 {
383     /*
384      * The host_pc has to be in the rx region of the code buffer.
385      * If it is not we will not be able to resolve it here.
386      * The two cases where host_pc will not be correct are:
387      *
388      *  - fault during translation (instruction fetch)
389      *  - fault from helper (not using GETPC() macro)
390      *
391      * Either way we need return early as we can't resolve it here.
392      */
393     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
394         TranslationBlock *tb = tcg_tb_lookup(host_pc);
395         if (tb) {
396             cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
397             return true;
398         }
399     }
400     return false;
401 }
402 
403 void page_init(void)
404 {
405     page_size_init();
406     page_table_config_init();
407 
408 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
409     {
410 #ifdef HAVE_KINFO_GETVMMAP
411         struct kinfo_vmentry *freep;
412         int i, cnt;
413 
414         freep = kinfo_getvmmap(getpid(), &cnt);
415         if (freep) {
416             mmap_lock();
417             for (i = 0; i < cnt; i++) {
418                 unsigned long startaddr, endaddr;
419 
420                 startaddr = freep[i].kve_start;
421                 endaddr = freep[i].kve_end;
422                 if (h2g_valid(startaddr)) {
423                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
424 
425                     if (h2g_valid(endaddr)) {
426                         endaddr = h2g(endaddr);
427                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
428                     } else {
429 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
430                         endaddr = ~0ul;
431                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
432 #endif
433                     }
434                 }
435             }
436             free(freep);
437             mmap_unlock();
438         }
439 #else
440         FILE *f;
441 
442         last_brk = (unsigned long)sbrk(0);
443 
444         f = fopen("/compat/linux/proc/self/maps", "r");
445         if (f) {
446             mmap_lock();
447 
448             do {
449                 unsigned long startaddr, endaddr;
450                 int n;
451 
452                 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
453 
454                 if (n == 2 && h2g_valid(startaddr)) {
455                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
456 
457                     if (h2g_valid(endaddr)) {
458                         endaddr = h2g(endaddr);
459                     } else {
460                         endaddr = ~0ul;
461                     }
462                     page_set_flags(startaddr, endaddr, PAGE_RESERVED);
463                 }
464             } while (!feof(f));
465 
466             fclose(f);
467             mmap_unlock();
468         }
469 #endif
470     }
471 #endif
472 }
473 
474 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
475 {
476     PageDesc *pd;
477     void **lp;
478     int i;
479 
480     /* Level 1.  Always allocated.  */
481     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
482 
483     /* Level 2..N-1.  */
484     for (i = v_l2_levels; i > 0; i--) {
485         void **p = qatomic_rcu_read(lp);
486 
487         if (p == NULL) {
488             void *existing;
489 
490             if (!alloc) {
491                 return NULL;
492             }
493             p = g_new0(void *, V_L2_SIZE);
494             existing = qatomic_cmpxchg(lp, NULL, p);
495             if (unlikely(existing)) {
496                 g_free(p);
497                 p = existing;
498             }
499         }
500 
501         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
502     }
503 
504     pd = qatomic_rcu_read(lp);
505     if (pd == NULL) {
506         void *existing;
507 
508         if (!alloc) {
509             return NULL;
510         }
511         pd = g_new0(PageDesc, V_L2_SIZE);
512 #ifndef CONFIG_USER_ONLY
513         {
514             int i;
515 
516             for (i = 0; i < V_L2_SIZE; i++) {
517                 qemu_spin_init(&pd[i].lock);
518             }
519         }
520 #endif
521         existing = qatomic_cmpxchg(lp, NULL, pd);
522         if (unlikely(existing)) {
523 #ifndef CONFIG_USER_ONLY
524             {
525                 int i;
526 
527                 for (i = 0; i < V_L2_SIZE; i++) {
528                     qemu_spin_destroy(&pd[i].lock);
529                 }
530             }
531 #endif
532             g_free(pd);
533             pd = existing;
534         }
535     }
536 
537     return pd + (index & (V_L2_SIZE - 1));
538 }
539 
540 static inline PageDesc *page_find(tb_page_addr_t index)
541 {
542     return page_find_alloc(index, 0);
543 }
544 
545 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
546                            PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
547 
548 /* In user-mode page locks aren't used; mmap_lock is enough */
549 #ifdef CONFIG_USER_ONLY
550 
551 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
552 
553 static inline void page_lock(PageDesc *pd)
554 { }
555 
556 static inline void page_unlock(PageDesc *pd)
557 { }
558 
559 static inline void page_lock_tb(const TranslationBlock *tb)
560 { }
561 
562 static inline void page_unlock_tb(const TranslationBlock *tb)
563 { }
564 
565 struct page_collection *
566 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
567 {
568     return NULL;
569 }
570 
571 void page_collection_unlock(struct page_collection *set)
572 { }
573 #else /* !CONFIG_USER_ONLY */
574 
575 #ifdef CONFIG_DEBUG_TCG
576 
577 static __thread GHashTable *ht_pages_locked_debug;
578 
579 static void ht_pages_locked_debug_init(void)
580 {
581     if (ht_pages_locked_debug) {
582         return;
583     }
584     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
585 }
586 
587 static bool page_is_locked(const PageDesc *pd)
588 {
589     PageDesc *found;
590 
591     ht_pages_locked_debug_init();
592     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
593     return !!found;
594 }
595 
596 static void page_lock__debug(PageDesc *pd)
597 {
598     ht_pages_locked_debug_init();
599     g_assert(!page_is_locked(pd));
600     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
601 }
602 
603 static void page_unlock__debug(const PageDesc *pd)
604 {
605     bool removed;
606 
607     ht_pages_locked_debug_init();
608     g_assert(page_is_locked(pd));
609     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
610     g_assert(removed);
611 }
612 
613 static void
614 do_assert_page_locked(const PageDesc *pd, const char *file, int line)
615 {
616     if (unlikely(!page_is_locked(pd))) {
617         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
618                      pd, file, line);
619         abort();
620     }
621 }
622 
623 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
624 
625 void assert_no_pages_locked(void)
626 {
627     ht_pages_locked_debug_init();
628     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
629 }
630 
631 #else /* !CONFIG_DEBUG_TCG */
632 
633 #define assert_page_locked(pd)
634 
635 static inline void page_lock__debug(const PageDesc *pd)
636 {
637 }
638 
639 static inline void page_unlock__debug(const PageDesc *pd)
640 {
641 }
642 
643 #endif /* CONFIG_DEBUG_TCG */
644 
645 static inline void page_lock(PageDesc *pd)
646 {
647     page_lock__debug(pd);
648     qemu_spin_lock(&pd->lock);
649 }
650 
651 static inline void page_unlock(PageDesc *pd)
652 {
653     qemu_spin_unlock(&pd->lock);
654     page_unlock__debug(pd);
655 }
656 
657 /* lock the page(s) of a TB in the correct acquisition order */
658 static inline void page_lock_tb(const TranslationBlock *tb)
659 {
660     page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
661 }
662 
663 static inline void page_unlock_tb(const TranslationBlock *tb)
664 {
665     PageDesc *p1 = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
666 
667     page_unlock(p1);
668     if (unlikely(tb->page_addr[1] != -1)) {
669         PageDesc *p2 = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
670 
671         if (p2 != p1) {
672             page_unlock(p2);
673         }
674     }
675 }
676 
677 static inline struct page_entry *
678 page_entry_new(PageDesc *pd, tb_page_addr_t index)
679 {
680     struct page_entry *pe = g_malloc(sizeof(*pe));
681 
682     pe->index = index;
683     pe->pd = pd;
684     pe->locked = false;
685     return pe;
686 }
687 
688 static void page_entry_destroy(gpointer p)
689 {
690     struct page_entry *pe = p;
691 
692     g_assert(pe->locked);
693     page_unlock(pe->pd);
694     g_free(pe);
695 }
696 
697 /* returns false on success */
698 static bool page_entry_trylock(struct page_entry *pe)
699 {
700     bool busy;
701 
702     busy = qemu_spin_trylock(&pe->pd->lock);
703     if (!busy) {
704         g_assert(!pe->locked);
705         pe->locked = true;
706         page_lock__debug(pe->pd);
707     }
708     return busy;
709 }
710 
711 static void do_page_entry_lock(struct page_entry *pe)
712 {
713     page_lock(pe->pd);
714     g_assert(!pe->locked);
715     pe->locked = true;
716 }
717 
718 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
719 {
720     struct page_entry *pe = value;
721 
722     do_page_entry_lock(pe);
723     return FALSE;
724 }
725 
726 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
727 {
728     struct page_entry *pe = value;
729 
730     if (pe->locked) {
731         pe->locked = false;
732         page_unlock(pe->pd);
733     }
734     return FALSE;
735 }
736 
737 /*
738  * Trylock a page, and if successful, add the page to a collection.
739  * Returns true ("busy") if the page could not be locked; false otherwise.
740  */
741 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
742 {
743     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
744     struct page_entry *pe;
745     PageDesc *pd;
746 
747     pe = g_tree_lookup(set->tree, &index);
748     if (pe) {
749         return false;
750     }
751 
752     pd = page_find(index);
753     if (pd == NULL) {
754         return false;
755     }
756 
757     pe = page_entry_new(pd, index);
758     g_tree_insert(set->tree, &pe->index, pe);
759 
760     /*
761      * If this is either (1) the first insertion or (2) a page whose index
762      * is higher than any other so far, just lock the page and move on.
763      */
764     if (set->max == NULL || pe->index > set->max->index) {
765         set->max = pe;
766         do_page_entry_lock(pe);
767         return false;
768     }
769     /*
770      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
771      * locks in order.
772      */
773     return page_entry_trylock(pe);
774 }
775 
776 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
777 {
778     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
779     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
780 
781     if (a == b) {
782         return 0;
783     } else if (a < b) {
784         return -1;
785     }
786     return 1;
787 }
788 
789 /*
790  * Lock a range of pages ([@start,@end[) as well as the pages of all
791  * intersecting TBs.
792  * Locking order: acquire locks in ascending order of page index.
793  */
794 struct page_collection *
795 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
796 {
797     struct page_collection *set = g_malloc(sizeof(*set));
798     tb_page_addr_t index;
799     PageDesc *pd;
800 
801     start >>= TARGET_PAGE_BITS;
802     end   >>= TARGET_PAGE_BITS;
803     g_assert(start <= end);
804 
805     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
806                                 page_entry_destroy);
807     set->max = NULL;
808     assert_no_pages_locked();
809 
810  retry:
811     g_tree_foreach(set->tree, page_entry_lock, NULL);
812 
813     for (index = start; index <= end; index++) {
814         TranslationBlock *tb;
815         int n;
816 
817         pd = page_find(index);
818         if (pd == NULL) {
819             continue;
820         }
821         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
822             g_tree_foreach(set->tree, page_entry_unlock, NULL);
823             goto retry;
824         }
825         assert_page_locked(pd);
826         PAGE_FOR_EACH_TB(pd, tb, n) {
827             if (page_trylock_add(set, tb->page_addr[0]) ||
828                 (tb->page_addr[1] != -1 &&
829                  page_trylock_add(set, tb->page_addr[1]))) {
830                 /* drop all locks, and reacquire in order */
831                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
832                 goto retry;
833             }
834         }
835     }
836     return set;
837 }
838 
839 void page_collection_unlock(struct page_collection *set)
840 {
841     /* entries are unlocked and freed via page_entry_destroy */
842     g_tree_destroy(set->tree);
843     g_free(set);
844 }
845 
846 #endif /* !CONFIG_USER_ONLY */
847 
848 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
849                            PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
850 {
851     PageDesc *p1, *p2;
852     tb_page_addr_t page1;
853     tb_page_addr_t page2;
854 
855     assert_memory_lock();
856     g_assert(phys1 != -1);
857 
858     page1 = phys1 >> TARGET_PAGE_BITS;
859     page2 = phys2 >> TARGET_PAGE_BITS;
860 
861     p1 = page_find_alloc(page1, alloc);
862     if (ret_p1) {
863         *ret_p1 = p1;
864     }
865     if (likely(phys2 == -1)) {
866         page_lock(p1);
867         return;
868     } else if (page1 == page2) {
869         page_lock(p1);
870         if (ret_p2) {
871             *ret_p2 = p1;
872         }
873         return;
874     }
875     p2 = page_find_alloc(page2, alloc);
876     if (ret_p2) {
877         *ret_p2 = p2;
878     }
879     if (page1 < page2) {
880         page_lock(p1);
881         page_lock(p2);
882     } else {
883         page_lock(p2);
884         page_lock(p1);
885     }
886 }
887 
888 static bool tb_cmp(const void *ap, const void *bp)
889 {
890     const TranslationBlock *a = ap;
891     const TranslationBlock *b = bp;
892 
893     return a->pc == b->pc &&
894         a->cs_base == b->cs_base &&
895         a->flags == b->flags &&
896         (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
897         a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
898         a->page_addr[0] == b->page_addr[0] &&
899         a->page_addr[1] == b->page_addr[1];
900 }
901 
902 void tb_htable_init(void)
903 {
904     unsigned int mode = QHT_MODE_AUTO_RESIZE;
905 
906     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
907 }
908 
909 /* call with @p->lock held */
910 static inline void invalidate_page_bitmap(PageDesc *p)
911 {
912     assert_page_locked(p);
913 #ifdef CONFIG_SOFTMMU
914     g_free(p->code_bitmap);
915     p->code_bitmap = NULL;
916     p->code_write_count = 0;
917 #endif
918 }
919 
920 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
921 static void page_flush_tb_1(int level, void **lp)
922 {
923     int i;
924 
925     if (*lp == NULL) {
926         return;
927     }
928     if (level == 0) {
929         PageDesc *pd = *lp;
930 
931         for (i = 0; i < V_L2_SIZE; ++i) {
932             page_lock(&pd[i]);
933             pd[i].first_tb = (uintptr_t)NULL;
934             invalidate_page_bitmap(pd + i);
935             page_unlock(&pd[i]);
936         }
937     } else {
938         void **pp = *lp;
939 
940         for (i = 0; i < V_L2_SIZE; ++i) {
941             page_flush_tb_1(level - 1, pp + i);
942         }
943     }
944 }
945 
946 static void page_flush_tb(void)
947 {
948     int i, l1_sz = v_l1_size;
949 
950     for (i = 0; i < l1_sz; i++) {
951         page_flush_tb_1(v_l2_levels, l1_map + i);
952     }
953 }
954 
955 static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
956 {
957     const TranslationBlock *tb = value;
958     size_t *size = data;
959 
960     *size += tb->tc.size;
961     return false;
962 }
963 
964 /* flush all the translation blocks */
965 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
966 {
967     bool did_flush = false;
968 
969     mmap_lock();
970     /* If it is already been done on request of another CPU,
971      * just retry.
972      */
973     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
974         goto done;
975     }
976     did_flush = true;
977 
978     if (DEBUG_TB_FLUSH_GATE) {
979         size_t nb_tbs = tcg_nb_tbs();
980         size_t host_size = 0;
981 
982         tcg_tb_foreach(tb_host_size_iter, &host_size);
983         printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
984                tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
985     }
986 
987     CPU_FOREACH(cpu) {
988         cpu_tb_jmp_cache_clear(cpu);
989     }
990 
991     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
992     page_flush_tb();
993 
994     tcg_region_reset_all();
995     /* XXX: flush processor icache at this point if cache flush is
996        expensive */
997     qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
998 
999 done:
1000     mmap_unlock();
1001     if (did_flush) {
1002         qemu_plugin_flush_cb();
1003     }
1004 }
1005 
1006 void tb_flush(CPUState *cpu)
1007 {
1008     if (tcg_enabled()) {
1009         unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);
1010 
1011         if (cpu_in_exclusive_context(cpu)) {
1012             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
1013         } else {
1014             async_safe_run_on_cpu(cpu, do_tb_flush,
1015                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
1016         }
1017     }
1018 }
1019 
1020 /*
1021  * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
1022  * so in order to prevent bit rot we compile them unconditionally in user-mode,
1023  * and let the optimizer get rid of them by wrapping their user-only callers
1024  * with if (DEBUG_TB_CHECK_GATE).
1025  */
1026 #ifdef CONFIG_USER_ONLY
1027 
1028 static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
1029 {
1030     TranslationBlock *tb = p;
1031     target_ulong addr = *(target_ulong *)userp;
1032 
1033     if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
1034         printf("ERROR invalidate: address=" TARGET_FMT_lx
1035                " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
1036     }
1037 }
1038 
1039 /* verify that all the pages have correct rights for code
1040  *
1041  * Called with mmap_lock held.
1042  */
1043 static void tb_invalidate_check(target_ulong address)
1044 {
1045     address &= TARGET_PAGE_MASK;
1046     qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address);
1047 }
1048 
1049 static void do_tb_page_check(void *p, uint32_t hash, void *userp)
1050 {
1051     TranslationBlock *tb = p;
1052     int flags1, flags2;
1053 
1054     flags1 = page_get_flags(tb->pc);
1055     flags2 = page_get_flags(tb->pc + tb->size - 1);
1056     if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
1057         printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
1058                (long)tb->pc, tb->size, flags1, flags2);
1059     }
1060 }
1061 
1062 /* verify that all the pages have correct rights for code */
1063 static void tb_page_check(void)
1064 {
1065     qht_iter(&tb_ctx.htable, do_tb_page_check, NULL);
1066 }
1067 
1068 #endif /* CONFIG_USER_ONLY */
1069 
1070 /*
1071  * user-mode: call with mmap_lock held
1072  * !user-mode: call with @pd->lock held
1073  */
1074 static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
1075 {
1076     TranslationBlock *tb1;
1077     uintptr_t *pprev;
1078     unsigned int n1;
1079 
1080     assert_page_locked(pd);
1081     pprev = &pd->first_tb;
1082     PAGE_FOR_EACH_TB(pd, tb1, n1) {
1083         if (tb1 == tb) {
1084             *pprev = tb1->page_next[n1];
1085             return;
1086         }
1087         pprev = &tb1->page_next[n1];
1088     }
1089     g_assert_not_reached();
1090 }
1091 
1092 /* remove @orig from its @n_orig-th jump list */
1093 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
1094 {
1095     uintptr_t ptr, ptr_locked;
1096     TranslationBlock *dest;
1097     TranslationBlock *tb;
1098     uintptr_t *pprev;
1099     int n;
1100 
1101     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
1102     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
1103     dest = (TranslationBlock *)(ptr & ~1);
1104     if (dest == NULL) {
1105         return;
1106     }
1107 
1108     qemu_spin_lock(&dest->jmp_lock);
1109     /*
1110      * While acquiring the lock, the jump might have been removed if the
1111      * destination TB was invalidated; check again.
1112      */
1113     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
1114     if (ptr_locked != ptr) {
1115         qemu_spin_unlock(&dest->jmp_lock);
1116         /*
1117          * The only possibility is that the jump was unlinked via
1118          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
1119          * because we set the LSB above.
1120          */
1121         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
1122         return;
1123     }
1124     /*
1125      * We first acquired the lock, and since the destination pointer matches,
1126      * we know for sure that @orig is in the jmp list.
1127      */
1128     pprev = &dest->jmp_list_head;
1129     TB_FOR_EACH_JMP(dest, tb, n) {
1130         if (tb == orig && n == n_orig) {
1131             *pprev = tb->jmp_list_next[n];
1132             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
1133             qemu_spin_unlock(&dest->jmp_lock);
1134             return;
1135         }
1136         pprev = &tb->jmp_list_next[n];
1137     }
1138     g_assert_not_reached();
1139 }
1140 
1141 /* reset the jump entry 'n' of a TB so that it is not chained to
1142    another TB */
1143 static inline void tb_reset_jump(TranslationBlock *tb, int n)
1144 {
1145     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
1146     tb_set_jmp_target(tb, n, addr);
1147 }
1148 
1149 /* remove any jumps to the TB */
1150 static inline void tb_jmp_unlink(TranslationBlock *dest)
1151 {
1152     TranslationBlock *tb;
1153     int n;
1154 
1155     qemu_spin_lock(&dest->jmp_lock);
1156 
1157     TB_FOR_EACH_JMP(dest, tb, n) {
1158         tb_reset_jump(tb, n);
1159         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
1160         /* No need to clear the list entry; setting the dest ptr is enough */
1161     }
1162     dest->jmp_list_head = (uintptr_t)NULL;
1163 
1164     qemu_spin_unlock(&dest->jmp_lock);
1165 }
1166 
1167 /*
1168  * In user-mode, call with mmap_lock held.
1169  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
1170  * locks held.
1171  */
1172 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
1173 {
1174     CPUState *cpu;
1175     PageDesc *p;
1176     uint32_t h;
1177     tb_page_addr_t phys_pc;
1178     uint32_t orig_cflags = tb_cflags(tb);
1179 
1180     assert_memory_lock();
1181 
1182     /* make sure no further incoming jumps will be chained to this TB */
1183     qemu_spin_lock(&tb->jmp_lock);
1184     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
1185     qemu_spin_unlock(&tb->jmp_lock);
1186 
1187     /* remove the TB from the hash list */
1188     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1189     h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
1190                      tb->trace_vcpu_dstate);
1191     if (!qht_remove(&tb_ctx.htable, tb, h)) {
1192         return;
1193     }
1194 
1195     /* remove the TB from the page list */
1196     if (rm_from_page_list) {
1197         p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
1198         tb_page_remove(p, tb);
1199         invalidate_page_bitmap(p);
1200         if (tb->page_addr[1] != -1) {
1201             p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
1202             tb_page_remove(p, tb);
1203             invalidate_page_bitmap(p);
1204         }
1205     }
1206 
1207     /* remove the TB from the hash list */
1208     h = tb_jmp_cache_hash_func(tb->pc);
1209     CPU_FOREACH(cpu) {
1210         if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) {
1211             qatomic_set(&cpu->tb_jmp_cache[h], NULL);
1212         }
1213     }
1214 
1215     /* suppress this TB from the two jump lists */
1216     tb_remove_from_jmp_list(tb, 0);
1217     tb_remove_from_jmp_list(tb, 1);
1218 
1219     /* suppress any remaining jumps to this TB */
1220     tb_jmp_unlink(tb);
1221 
1222     qatomic_set(&tb_ctx.tb_phys_invalidate_count,
1223                 tb_ctx.tb_phys_invalidate_count + 1);
1224 }
1225 
1226 static void tb_phys_invalidate__locked(TranslationBlock *tb)
1227 {
1228     qemu_thread_jit_write();
1229     do_tb_phys_invalidate(tb, true);
1230     qemu_thread_jit_execute();
1231 }
1232 
1233 /* invalidate one TB
1234  *
1235  * Called with mmap_lock held in user-mode.
1236  */
1237 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
1238 {
1239     if (page_addr == -1 && tb->page_addr[0] != -1) {
1240         page_lock_tb(tb);
1241         do_tb_phys_invalidate(tb, true);
1242         page_unlock_tb(tb);
1243     } else {
1244         do_tb_phys_invalidate(tb, false);
1245     }
1246 }
1247 
1248 #ifdef CONFIG_SOFTMMU
1249 /* call with @p->lock held */
1250 static void build_page_bitmap(PageDesc *p)
1251 {
1252     int n, tb_start, tb_end;
1253     TranslationBlock *tb;
1254 
1255     assert_page_locked(p);
1256     p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
1257 
1258     PAGE_FOR_EACH_TB(p, tb, n) {
1259         /* NOTE: this is subtle as a TB may span two physical pages */
1260         if (n == 0) {
1261             /* NOTE: tb_end may be after the end of the page, but
1262                it is not a problem */
1263             tb_start = tb->pc & ~TARGET_PAGE_MASK;
1264             tb_end = tb_start + tb->size;
1265             if (tb_end > TARGET_PAGE_SIZE) {
1266                 tb_end = TARGET_PAGE_SIZE;
1267              }
1268         } else {
1269             tb_start = 0;
1270             tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1271         }
1272         bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
1273     }
1274 }
1275 #endif
1276 
1277 /* add the tb in the target page and protect it if necessary
1278  *
1279  * Called with mmap_lock held for user-mode emulation.
1280  * Called with @p->lock held in !user-mode.
1281  */
1282 static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
1283                                unsigned int n, tb_page_addr_t page_addr)
1284 {
1285 #ifndef CONFIG_USER_ONLY
1286     bool page_already_protected;
1287 #endif
1288 
1289     assert_page_locked(p);
1290 
1291     tb->page_addr[n] = page_addr;
1292     tb->page_next[n] = p->first_tb;
1293 #ifndef CONFIG_USER_ONLY
1294     page_already_protected = p->first_tb != (uintptr_t)NULL;
1295 #endif
1296     p->first_tb = (uintptr_t)tb | n;
1297     invalidate_page_bitmap(p);
1298 
1299 #if defined(CONFIG_USER_ONLY)
1300     /* translator_loop() must have made all TB pages non-writable */
1301     assert(!(p->flags & PAGE_WRITE));
1302 #else
1303     /* if some code is already present, then the pages are already
1304        protected. So we handle the case where only the first TB is
1305        allocated in a physical page */
1306     if (!page_already_protected) {
1307         tlb_protect_code(page_addr);
1308     }
1309 #endif
1310 }
1311 
1312 /*
1313  * Add a new TB and link it to the physical page tables. phys_page2 is
1314  * (-1) to indicate that only one page contains the TB.
1315  *
1316  * Called with mmap_lock held for user-mode emulation.
1317  *
1318  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
1319  * Note that in !user-mode, another thread might have already added a TB
1320  * for the same block of guest code that @tb corresponds to. In that case,
1321  * the caller should discard the original @tb, and use instead the returned TB.
1322  */
1323 static TranslationBlock *
1324 tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1325              tb_page_addr_t phys_page2)
1326 {
1327     PageDesc *p;
1328     PageDesc *p2 = NULL;
1329     void *existing_tb = NULL;
1330     uint32_t h;
1331 
1332     assert_memory_lock();
1333     tcg_debug_assert(!(tb->cflags & CF_INVALID));
1334 
1335     /*
1336      * Add the TB to the page list, acquiring first the pages's locks.
1337      * We keep the locks held until after inserting the TB in the hash table,
1338      * so that if the insertion fails we know for sure that the TBs are still
1339      * in the page descriptors.
1340      * Note that inserting into the hash table first isn't an option, since
1341      * we can only insert TBs that are fully initialized.
1342      */
1343     page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
1344     tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
1345     if (p2) {
1346         tb_page_add(p2, tb, 1, phys_page2);
1347     } else {
1348         tb->page_addr[1] = -1;
1349     }
1350 
1351     /* add in the hash table */
1352     h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
1353                      tb->trace_vcpu_dstate);
1354     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
1355 
1356     /* remove TB from the page(s) if we couldn't insert it */
1357     if (unlikely(existing_tb)) {
1358         tb_page_remove(p, tb);
1359         invalidate_page_bitmap(p);
1360         if (p2) {
1361             tb_page_remove(p2, tb);
1362             invalidate_page_bitmap(p2);
1363         }
1364         tb = existing_tb;
1365     }
1366 
1367     if (p2 && p2 != p) {
1368         page_unlock(p2);
1369     }
1370     page_unlock(p);
1371 
1372 #ifdef CONFIG_USER_ONLY
1373     if (DEBUG_TB_CHECK_GATE) {
1374         tb_page_check();
1375     }
1376 #endif
1377     return tb;
1378 }
1379 
1380 /* Called with mmap_lock held for user mode emulation.  */
1381 TranslationBlock *tb_gen_code(CPUState *cpu,
1382                               target_ulong pc, target_ulong cs_base,
1383                               uint32_t flags, int cflags)
1384 {
1385     CPUArchState *env = cpu->env_ptr;
1386     TranslationBlock *tb, *existing_tb;
1387     tb_page_addr_t phys_pc, phys_page2;
1388     target_ulong virt_page2;
1389     tcg_insn_unit *gen_code_buf;
1390     int gen_code_size, search_size, max_insns;
1391 #ifdef CONFIG_PROFILER
1392     TCGProfile *prof = &tcg_ctx->prof;
1393     int64_t ti;
1394 #endif
1395 
1396     assert_memory_lock();
1397     qemu_thread_jit_write();
1398 
1399     phys_pc = get_page_addr_code(env, pc);
1400 
1401     if (phys_pc == -1) {
1402         /* Generate a one-shot TB with 1 insn in it */
1403         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
1404     }
1405 
1406     max_insns = cflags & CF_COUNT_MASK;
1407     if (max_insns == 0) {
1408         max_insns = TCG_MAX_INSNS;
1409     }
1410     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
1411 
1412  buffer_overflow:
1413     tb = tcg_tb_alloc(tcg_ctx);
1414     if (unlikely(!tb)) {
1415         /* flush must be done */
1416         tb_flush(cpu);
1417         mmap_unlock();
1418         /* Make the execution loop process the flush as soon as possible.  */
1419         cpu->exception_index = EXCP_INTERRUPT;
1420         cpu_loop_exit(cpu);
1421     }
1422 
1423     gen_code_buf = tcg_ctx->code_gen_ptr;
1424     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
1425     tb->pc = pc;
1426     tb->cs_base = cs_base;
1427     tb->flags = flags;
1428     tb->cflags = cflags;
1429     tb->trace_vcpu_dstate = *cpu->trace_dstate;
1430     tcg_ctx->tb_cflags = cflags;
1431  tb_overflow:
1432 
1433 #ifdef CONFIG_PROFILER
1434     /* includes aborted translations because of exceptions */
1435     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
1436     ti = profile_getclock();
1437 #endif
1438 
1439     gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
1440     if (unlikely(gen_code_size != 0)) {
1441         goto error_return;
1442     }
1443 
1444     tcg_func_start(tcg_ctx);
1445 
1446     tcg_ctx->cpu = env_cpu(env);
1447     gen_intermediate_code(cpu, tb, max_insns);
1448     assert(tb->size != 0);
1449     tcg_ctx->cpu = NULL;
1450     max_insns = tb->icount;
1451 
1452     trace_translate_block(tb, tb->pc, tb->tc.ptr);
1453 
1454     /* generate machine code */
1455     tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
1456     tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
1457     tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
1458     if (TCG_TARGET_HAS_direct_jump) {
1459         tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
1460         tcg_ctx->tb_jmp_target_addr = NULL;
1461     } else {
1462         tcg_ctx->tb_jmp_insn_offset = NULL;
1463         tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
1464     }
1465 
1466 #ifdef CONFIG_PROFILER
1467     qatomic_set(&prof->tb_count, prof->tb_count + 1);
1468     qatomic_set(&prof->interm_time,
1469                 prof->interm_time + profile_getclock() - ti);
1470     ti = profile_getclock();
1471 #endif
1472 
1473     gen_code_size = tcg_gen_code(tcg_ctx, tb);
1474     if (unlikely(gen_code_size < 0)) {
1475  error_return:
1476         switch (gen_code_size) {
1477         case -1:
1478             /*
1479              * Overflow of code_gen_buffer, or the current slice of it.
1480              *
1481              * TODO: We don't need to re-do gen_intermediate_code, nor
1482              * should we re-do the tcg optimization currently hidden
1483              * inside tcg_gen_code.  All that should be required is to
1484              * flush the TBs, allocate a new TB, re-initialize it per
1485              * above, and re-do the actual code generation.
1486              */
1487             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1488                           "Restarting code generation for "
1489                           "code_gen_buffer overflow\n");
1490             goto buffer_overflow;
1491 
1492         case -2:
1493             /*
1494              * The code generated for the TranslationBlock is too large.
1495              * The maximum size allowed by the unwind info is 64k.
1496              * There may be stricter constraints from relocations
1497              * in the tcg backend.
1498              *
1499              * Try again with half as many insns as we attempted this time.
1500              * If a single insn overflows, there's a bug somewhere...
1501              */
1502             assert(max_insns > 1);
1503             max_insns /= 2;
1504             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1505                           "Restarting code generation with "
1506                           "smaller translation block (max %d insns)\n",
1507                           max_insns);
1508             goto tb_overflow;
1509 
1510         default:
1511             g_assert_not_reached();
1512         }
1513     }
1514     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
1515     if (unlikely(search_size < 0)) {
1516         goto buffer_overflow;
1517     }
1518     tb->tc.size = gen_code_size;
1519 
1520 #ifdef CONFIG_PROFILER
1521     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
1522     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
1523     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
1524     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
1525 #endif
1526 
1527 #ifdef DEBUG_DISAS
1528     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
1529         qemu_log_in_addr_range(tb->pc)) {
1530         FILE *logfile = qemu_log_trylock();
1531         if (logfile) {
1532             int code_size, data_size;
1533             const tcg_target_ulong *rx_data_gen_ptr;
1534             size_t chunk_start;
1535             int insn = 0;
1536 
1537             if (tcg_ctx->data_gen_ptr) {
1538                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
1539                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
1540                 data_size = gen_code_size - code_size;
1541             } else {
1542                 rx_data_gen_ptr = 0;
1543                 code_size = gen_code_size;
1544                 data_size = 0;
1545             }
1546 
1547             /* Dump header and the first instruction */
1548             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
1549             fprintf(logfile,
1550                     "  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
1551                     tcg_ctx->gen_insn_data[insn][0]);
1552             chunk_start = tcg_ctx->gen_insn_end_off[insn];
1553             disas(logfile, tb->tc.ptr, chunk_start);
1554 
1555             /*
1556              * Dump each instruction chunk, wrapping up empty chunks into
1557              * the next instruction. The whole array is offset so the
1558              * first entry is the beginning of the 2nd instruction.
1559              */
1560             while (insn < tb->icount) {
1561                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
1562                 if (chunk_end > chunk_start) {
1563                     fprintf(logfile, "  -- guest addr 0x" TARGET_FMT_lx "\n",
1564                             tcg_ctx->gen_insn_data[insn][0]);
1565                     disas(logfile, tb->tc.ptr + chunk_start,
1566                           chunk_end - chunk_start);
1567                     chunk_start = chunk_end;
1568                 }
1569                 insn++;
1570             }
1571 
1572             if (chunk_start < code_size) {
1573                 fprintf(logfile, "  -- tb slow paths + alignment\n");
1574                 disas(logfile, tb->tc.ptr + chunk_start,
1575                       code_size - chunk_start);
1576             }
1577 
1578             /* Finally dump any data we may have after the block */
1579             if (data_size) {
1580                 int i;
1581                 fprintf(logfile, "  data: [size=%d]\n", data_size);
1582                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
1583                     if (sizeof(tcg_target_ulong) == 8) {
1584                         fprintf(logfile,
1585                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
1586                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
1587                     } else if (sizeof(tcg_target_ulong) == 4) {
1588                         fprintf(logfile,
1589                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
1590                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
1591                     } else {
1592                         qemu_build_not_reached();
1593                     }
1594                 }
1595             }
1596             fprintf(logfile, "\n");
1597             qemu_log_flush();
1598             qemu_log_unlock(logfile);
1599         }
1600     }
1601 #endif
1602 
1603     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
1604         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
1605                  CODE_GEN_ALIGN));
1606 
1607     /* init jump list */
1608     qemu_spin_init(&tb->jmp_lock);
1609     tb->jmp_list_head = (uintptr_t)NULL;
1610     tb->jmp_list_next[0] = (uintptr_t)NULL;
1611     tb->jmp_list_next[1] = (uintptr_t)NULL;
1612     tb->jmp_dest[0] = (uintptr_t)NULL;
1613     tb->jmp_dest[1] = (uintptr_t)NULL;
1614 
1615     /* init original jump addresses which have been set during tcg_gen_code() */
1616     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1617         tb_reset_jump(tb, 0);
1618     }
1619     if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1620         tb_reset_jump(tb, 1);
1621     }
1622 
1623     /*
1624      * If the TB is not associated with a physical RAM page then
1625      * it must be a temporary one-insn TB, and we have nothing to do
1626      * except fill in the page_addr[] fields. Return early before
1627      * attempting to link to other TBs or add to the lookup table.
1628      */
1629     if (phys_pc == -1) {
1630         tb->page_addr[0] = tb->page_addr[1] = -1;
1631         return tb;
1632     }
1633 
1634     /*
1635      * Insert TB into the corresponding region tree before publishing it
1636      * through QHT. Otherwise rewinding happened in the TB might fail to
1637      * lookup itself using host PC.
1638      */
1639     tcg_tb_insert(tb);
1640 
1641     /* check next page if needed */
1642     virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1643     phys_page2 = -1;
1644     if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1645         phys_page2 = get_page_addr_code(env, virt_page2);
1646     }
1647     /*
1648      * No explicit memory barrier is required -- tb_link_page() makes the
1649      * TB visible in a consistent state.
1650      */
1651     existing_tb = tb_link_page(tb, phys_pc, phys_page2);
1652     /* if the TB already exists, discard what we just translated */
1653     if (unlikely(existing_tb != tb)) {
1654         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
1655 
1656         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
1657         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
1658         tcg_tb_remove(tb);
1659         return existing_tb;
1660     }
1661     return tb;
1662 }
1663 
1664 /*
1665  * @p must be non-NULL.
1666  * user-mode: call with mmap_lock held.
1667  * !user-mode: call with all @pages locked.
1668  */
1669 static void
1670 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1671                                       PageDesc *p, tb_page_addr_t start,
1672                                       tb_page_addr_t end,
1673                                       uintptr_t retaddr)
1674 {
1675     TranslationBlock *tb;
1676     tb_page_addr_t tb_start, tb_end;
1677     int n;
1678 #ifdef TARGET_HAS_PRECISE_SMC
1679     CPUState *cpu = current_cpu;
1680     CPUArchState *env = NULL;
1681     bool current_tb_not_found = retaddr != 0;
1682     bool current_tb_modified = false;
1683     TranslationBlock *current_tb = NULL;
1684     target_ulong current_pc = 0;
1685     target_ulong current_cs_base = 0;
1686     uint32_t current_flags = 0;
1687 #endif /* TARGET_HAS_PRECISE_SMC */
1688 
1689     assert_page_locked(p);
1690 
1691 #if defined(TARGET_HAS_PRECISE_SMC)
1692     if (cpu != NULL) {
1693         env = cpu->env_ptr;
1694     }
1695 #endif
1696 
1697     /* we remove all the TBs in the range [start, end[ */
1698     /* XXX: see if in some cases it could be faster to invalidate all
1699        the code */
1700     PAGE_FOR_EACH_TB(p, tb, n) {
1701         assert_page_locked(p);
1702         /* NOTE: this is subtle as a TB may span two physical pages */
1703         if (n == 0) {
1704             /* NOTE: tb_end may be after the end of the page, but
1705                it is not a problem */
1706             tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1707             tb_end = tb_start + tb->size;
1708         } else {
1709             tb_start = tb->page_addr[1];
1710             tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1711         }
1712         if (!(tb_end <= start || tb_start >= end)) {
1713 #ifdef TARGET_HAS_PRECISE_SMC
1714             if (current_tb_not_found) {
1715                 current_tb_not_found = false;
1716                 /* now we have a real cpu fault */
1717                 current_tb = tcg_tb_lookup(retaddr);
1718             }
1719             if (current_tb == tb &&
1720                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1721                 /*
1722                  * If we are modifying the current TB, we must stop
1723                  * its execution. We could be more precise by checking
1724                  * that the modification is after the current PC, but it
1725                  * would require a specialized function to partially
1726                  * restore the CPU state.
1727                  */
1728                 current_tb_modified = true;
1729                 cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
1730                 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1731                                      &current_flags);
1732             }
1733 #endif /* TARGET_HAS_PRECISE_SMC */
1734             tb_phys_invalidate__locked(tb);
1735         }
1736     }
1737 #if !defined(CONFIG_USER_ONLY)
1738     /* if no code remaining, no need to continue to use slow writes */
1739     if (!p->first_tb) {
1740         invalidate_page_bitmap(p);
1741         tlb_unprotect_code(start);
1742     }
1743 #endif
1744 #ifdef TARGET_HAS_PRECISE_SMC
1745     if (current_tb_modified) {
1746         page_collection_unlock(pages);
1747         /* Force execution of one insn next time.  */
1748         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
1749         mmap_unlock();
1750         cpu_loop_exit_noexc(cpu);
1751     }
1752 #endif
1753 }
1754 
1755 /*
1756  * Invalidate all TBs which intersect with the target physical address range
1757  * [start;end[. NOTE: start and end must refer to the *same* physical page.
1758  * 'is_cpu_write_access' should be true if called from a real cpu write
1759  * access: the virtual CPU will exit the current TB if code is modified inside
1760  * this TB.
1761  *
1762  * Called with mmap_lock held for user-mode emulation
1763  */
1764 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end)
1765 {
1766     struct page_collection *pages;
1767     PageDesc *p;
1768 
1769     assert_memory_lock();
1770 
1771     p = page_find(start >> TARGET_PAGE_BITS);
1772     if (p == NULL) {
1773         return;
1774     }
1775     pages = page_collection_lock(start, end);
1776     tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
1777     page_collection_unlock(pages);
1778 }
1779 
1780 /*
1781  * Invalidate all TBs which intersect with the target physical address range
1782  * [start;end[. NOTE: start and end may refer to *different* physical pages.
1783  * 'is_cpu_write_access' should be true if called from a real cpu write
1784  * access: the virtual CPU will exit the current TB if code is modified inside
1785  * this TB.
1786  *
1787  * Called with mmap_lock held for user-mode emulation.
1788  */
1789 #ifdef CONFIG_SOFTMMU
1790 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end)
1791 #else
1792 void tb_invalidate_phys_range(target_ulong start, target_ulong end)
1793 #endif
1794 {
1795     struct page_collection *pages;
1796     tb_page_addr_t next;
1797 
1798     assert_memory_lock();
1799 
1800     pages = page_collection_lock(start, end);
1801     for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
1802          start < end;
1803          start = next, next += TARGET_PAGE_SIZE) {
1804         PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
1805         tb_page_addr_t bound = MIN(next, end);
1806 
1807         if (pd == NULL) {
1808             continue;
1809         }
1810         tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
1811     }
1812     page_collection_unlock(pages);
1813 }
1814 
1815 #ifdef CONFIG_SOFTMMU
1816 /* len must be <= 8 and start must be a multiple of len.
1817  * Called via softmmu_template.h when code areas are written to with
1818  * iothread mutex not held.
1819  *
1820  * Call with all @pages in the range [@start, @start + len[ locked.
1821  */
1822 void tb_invalidate_phys_page_fast(struct page_collection *pages,
1823                                   tb_page_addr_t start, int len,
1824                                   uintptr_t retaddr)
1825 {
1826     PageDesc *p;
1827 
1828     assert_memory_lock();
1829 
1830     p = page_find(start >> TARGET_PAGE_BITS);
1831     if (!p) {
1832         return;
1833     }
1834 
1835     assert_page_locked(p);
1836     if (!p->code_bitmap &&
1837         ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
1838         build_page_bitmap(p);
1839     }
1840     if (p->code_bitmap) {
1841         unsigned int nr;
1842         unsigned long b;
1843 
1844         nr = start & ~TARGET_PAGE_MASK;
1845         b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
1846         if (b & ((1 << len) - 1)) {
1847             goto do_invalidate;
1848         }
1849     } else {
1850     do_invalidate:
1851         tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
1852                                               retaddr);
1853     }
1854 }
1855 #else
1856 /* Called with mmap_lock held. If pc is not 0 then it indicates the
1857  * host PC of the faulting store instruction that caused this invalidate.
1858  * Returns true if the caller needs to abort execution of the current
1859  * TB (because it was modified by this store and the guest CPU has
1860  * precise-SMC semantics).
1861  */
1862 static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
1863 {
1864     TranslationBlock *tb;
1865     PageDesc *p;
1866     int n;
1867 #ifdef TARGET_HAS_PRECISE_SMC
1868     TranslationBlock *current_tb = NULL;
1869     CPUState *cpu = current_cpu;
1870     CPUArchState *env = NULL;
1871     int current_tb_modified = 0;
1872     target_ulong current_pc = 0;
1873     target_ulong current_cs_base = 0;
1874     uint32_t current_flags = 0;
1875 #endif
1876 
1877     assert_memory_lock();
1878 
1879     addr &= TARGET_PAGE_MASK;
1880     p = page_find(addr >> TARGET_PAGE_BITS);
1881     if (!p) {
1882         return false;
1883     }
1884 
1885 #ifdef TARGET_HAS_PRECISE_SMC
1886     if (p->first_tb && pc != 0) {
1887         current_tb = tcg_tb_lookup(pc);
1888     }
1889     if (cpu != NULL) {
1890         env = cpu->env_ptr;
1891     }
1892 #endif
1893     assert_page_locked(p);
1894     PAGE_FOR_EACH_TB(p, tb, n) {
1895 #ifdef TARGET_HAS_PRECISE_SMC
1896         if (current_tb == tb &&
1897             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1898                 /* If we are modifying the current TB, we must stop
1899                    its execution. We could be more precise by checking
1900                    that the modification is after the current PC, but it
1901                    would require a specialized function to partially
1902                    restore the CPU state */
1903 
1904             current_tb_modified = 1;
1905             cpu_restore_state_from_tb(cpu, current_tb, pc, true);
1906             cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1907                                  &current_flags);
1908         }
1909 #endif /* TARGET_HAS_PRECISE_SMC */
1910         tb_phys_invalidate(tb, addr);
1911     }
1912     p->first_tb = (uintptr_t)NULL;
1913 #ifdef TARGET_HAS_PRECISE_SMC
1914     if (current_tb_modified) {
1915         /* Force execution of one insn next time.  */
1916         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
1917         return true;
1918     }
1919 #endif
1920 
1921     return false;
1922 }
1923 #endif
1924 
1925 /* user-mode: call with mmap_lock held */
1926 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
1927 {
1928     TranslationBlock *tb;
1929 
1930     assert_memory_lock();
1931 
1932     tb = tcg_tb_lookup(retaddr);
1933     if (tb) {
1934         /* We can use retranslation to find the PC.  */
1935         cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1936         tb_phys_invalidate(tb, -1);
1937     } else {
1938         /* The exception probably happened in a helper.  The CPU state should
1939            have been saved before calling it. Fetch the PC from there.  */
1940         CPUArchState *env = cpu->env_ptr;
1941         target_ulong pc, cs_base;
1942         tb_page_addr_t addr;
1943         uint32_t flags;
1944 
1945         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
1946         addr = get_page_addr_code(env, pc);
1947         if (addr != -1) {
1948             tb_invalidate_phys_range(addr, addr + 1);
1949         }
1950     }
1951 }
1952 
1953 #ifndef CONFIG_USER_ONLY
1954 /*
1955  * In deterministic execution mode, instructions doing device I/Os
1956  * must be at the end of the TB.
1957  *
1958  * Called by softmmu_template.h, with iothread mutex not held.
1959  */
1960 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
1961 {
1962     TranslationBlock *tb;
1963     CPUClass *cc;
1964     uint32_t n;
1965 
1966     tb = tcg_tb_lookup(retaddr);
1967     if (!tb) {
1968         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
1969                   (void *)retaddr);
1970     }
1971     cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1972 
1973     /*
1974      * Some guests must re-execute the branch when re-executing a delay
1975      * slot instruction.  When this is the case, adjust icount and N
1976      * to account for the re-execution of the branch.
1977      */
1978     n = 1;
1979     cc = CPU_GET_CLASS(cpu);
1980     if (cc->tcg_ops->io_recompile_replay_branch &&
1981         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
1982         cpu_neg(cpu)->icount_decr.u16.low++;
1983         n = 2;
1984     }
1985 
1986     /*
1987      * Exit the loop and potentially generate a new TB executing the
1988      * just the I/O insns. We also limit instrumentation to memory
1989      * operations only (which execute after completion) so we don't
1990      * double instrument the instruction.
1991      */
1992     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
1993 
1994     qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
1995                            "cpu_io_recompile: rewound execution of TB to "
1996                            TARGET_FMT_lx "\n", tb->pc);
1997 
1998     cpu_loop_exit_noexc(cpu);
1999 }
2000 
2001 static void print_qht_statistics(struct qht_stats hst, GString *buf)
2002 {
2003     uint32_t hgram_opts;
2004     size_t hgram_bins;
2005     char *hgram;
2006 
2007     if (!hst.head_buckets) {
2008         return;
2009     }
2010     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
2011                            "(%0.2f%% head buckets used)\n",
2012                            hst.used_head_buckets, hst.head_buckets,
2013                            (double)hst.used_head_buckets /
2014                            hst.head_buckets * 100);
2015 
2016     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
2017     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
2018     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
2019         hgram_opts |= QDIST_PR_NODECIMAL;
2020     }
2021     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
2022     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
2023                            "Histogram: %s\n",
2024                            qdist_avg(&hst.occupancy) * 100, hgram);
2025     g_free(hgram);
2026 
2027     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
2028     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
2029     if (hgram_bins > 10) {
2030         hgram_bins = 10;
2031     } else {
2032         hgram_bins = 0;
2033         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
2034     }
2035     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
2036     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
2037                            "Histogram: %s\n",
2038                            qdist_avg(&hst.chain), hgram);
2039     g_free(hgram);
2040 }
2041 
2042 struct tb_tree_stats {
2043     size_t nb_tbs;
2044     size_t host_size;
2045     size_t target_size;
2046     size_t max_target_size;
2047     size_t direct_jmp_count;
2048     size_t direct_jmp2_count;
2049     size_t cross_page;
2050 };
2051 
2052 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
2053 {
2054     const TranslationBlock *tb = value;
2055     struct tb_tree_stats *tst = data;
2056 
2057     tst->nb_tbs++;
2058     tst->host_size += tb->tc.size;
2059     tst->target_size += tb->size;
2060     if (tb->size > tst->max_target_size) {
2061         tst->max_target_size = tb->size;
2062     }
2063     if (tb->page_addr[1] != -1) {
2064         tst->cross_page++;
2065     }
2066     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
2067         tst->direct_jmp_count++;
2068         if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
2069             tst->direct_jmp2_count++;
2070         }
2071     }
2072     return false;
2073 }
2074 
2075 void dump_exec_info(GString *buf)
2076 {
2077     struct tb_tree_stats tst = {};
2078     struct qht_stats hst;
2079     size_t nb_tbs, flush_full, flush_part, flush_elide;
2080 
2081     tcg_tb_foreach(tb_tree_stats_iter, &tst);
2082     nb_tbs = tst.nb_tbs;
2083     /* XXX: avoid using doubles ? */
2084     g_string_append_printf(buf, "Translation buffer state:\n");
2085     /*
2086      * Report total code size including the padding and TB structs;
2087      * otherwise users might think "-accel tcg,tb-size" is not honoured.
2088      * For avg host size we use the precise numbers from tb_tree_stats though.
2089      */
2090     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
2091                            tcg_code_size(), tcg_code_capacity());
2092     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
2093     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
2094                            nb_tbs ? tst.target_size / nb_tbs : 0,
2095                            tst.max_target_size);
2096     g_string_append_printf(buf, "TB avg host size    %zu bytes "
2097                            "(expansion ratio: %0.1f)\n",
2098                            nb_tbs ? tst.host_size / nb_tbs : 0,
2099                            tst.target_size ?
2100                            (double)tst.host_size / tst.target_size : 0);
2101     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
2102                            tst.cross_page,
2103                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
2104     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
2105                            "(2 jumps=%zu %zu%%)\n",
2106                            tst.direct_jmp_count,
2107                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
2108                            tst.direct_jmp2_count,
2109                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
2110 
2111     qht_statistics_init(&tb_ctx.htable, &hst);
2112     print_qht_statistics(hst, buf);
2113     qht_statistics_destroy(&hst);
2114 
2115     g_string_append_printf(buf, "\nStatistics:\n");
2116     g_string_append_printf(buf, "TB flush count      %u\n",
2117                            qatomic_read(&tb_ctx.tb_flush_count));
2118     g_string_append_printf(buf, "TB invalidate count %u\n",
2119                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
2120 
2121     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
2122     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
2123     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
2124     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
2125     tcg_dump_info(buf);
2126 }
2127 
2128 void dump_opcount_info(GString *buf)
2129 {
2130     tcg_dump_op_count(buf);
2131 }
2132 
2133 #else /* CONFIG_USER_ONLY */
2134 
2135 void cpu_interrupt(CPUState *cpu, int mask)
2136 {
2137     g_assert(qemu_mutex_iothread_locked());
2138     cpu->interrupt_request |= mask;
2139     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
2140 }
2141 
2142 /*
2143  * Walks guest process memory "regions" one by one
2144  * and calls callback function 'fn' for each region.
2145  */
2146 struct walk_memory_regions_data {
2147     walk_memory_regions_fn fn;
2148     void *priv;
2149     target_ulong start;
2150     int prot;
2151 };
2152 
2153 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2154                                    target_ulong end, int new_prot)
2155 {
2156     if (data->start != -1u) {
2157         int rc = data->fn(data->priv, data->start, end, data->prot);
2158         if (rc != 0) {
2159             return rc;
2160         }
2161     }
2162 
2163     data->start = (new_prot ? end : -1u);
2164     data->prot = new_prot;
2165 
2166     return 0;
2167 }
2168 
2169 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2170                                  target_ulong base, int level, void **lp)
2171 {
2172     target_ulong pa;
2173     int i, rc;
2174 
2175     if (*lp == NULL) {
2176         return walk_memory_regions_end(data, base, 0);
2177     }
2178 
2179     if (level == 0) {
2180         PageDesc *pd = *lp;
2181 
2182         for (i = 0; i < V_L2_SIZE; ++i) {
2183             int prot = pd[i].flags;
2184 
2185             pa = base | (i << TARGET_PAGE_BITS);
2186             if (prot != data->prot) {
2187                 rc = walk_memory_regions_end(data, pa, prot);
2188                 if (rc != 0) {
2189                     return rc;
2190                 }
2191             }
2192         }
2193     } else {
2194         void **pp = *lp;
2195 
2196         for (i = 0; i < V_L2_SIZE; ++i) {
2197             pa = base | ((target_ulong)i <<
2198                 (TARGET_PAGE_BITS + V_L2_BITS * level));
2199             rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2200             if (rc != 0) {
2201                 return rc;
2202             }
2203         }
2204     }
2205 
2206     return 0;
2207 }
2208 
2209 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2210 {
2211     struct walk_memory_regions_data data;
2212     uintptr_t i, l1_sz = v_l1_size;
2213 
2214     data.fn = fn;
2215     data.priv = priv;
2216     data.start = -1u;
2217     data.prot = 0;
2218 
2219     for (i = 0; i < l1_sz; i++) {
2220         target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
2221         int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
2222         if (rc != 0) {
2223             return rc;
2224         }
2225     }
2226 
2227     return walk_memory_regions_end(&data, 0, 0);
2228 }
2229 
2230 static int dump_region(void *priv, target_ulong start,
2231     target_ulong end, unsigned long prot)
2232 {
2233     FILE *f = (FILE *)priv;
2234 
2235     (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
2236         " "TARGET_FMT_lx" %c%c%c\n",
2237         start, end, end - start,
2238         ((prot & PAGE_READ) ? 'r' : '-'),
2239         ((prot & PAGE_WRITE) ? 'w' : '-'),
2240         ((prot & PAGE_EXEC) ? 'x' : '-'));
2241 
2242     return 0;
2243 }
2244 
2245 /* dump memory mappings */
2246 void page_dump(FILE *f)
2247 {
2248     const int length = sizeof(target_ulong) * 2;
2249     (void) fprintf(f, "%-*s %-*s %-*s %s\n",
2250             length, "start", length, "end", length, "size", "prot");
2251     walk_memory_regions(f, dump_region);
2252 }
2253 
2254 int page_get_flags(target_ulong address)
2255 {
2256     PageDesc *p;
2257 
2258     p = page_find(address >> TARGET_PAGE_BITS);
2259     if (!p) {
2260         return 0;
2261     }
2262     return p->flags;
2263 }
2264 
2265 /* Modify the flags of a page and invalidate the code if necessary.
2266    The flag PAGE_WRITE_ORG is positioned automatically depending
2267    on PAGE_WRITE.  The mmap_lock should already be held.  */
2268 void page_set_flags(target_ulong start, target_ulong end, int flags)
2269 {
2270     target_ulong addr, len;
2271     bool reset_target_data;
2272 
2273     /* This function should never be called with addresses outside the
2274        guest address space.  If this assert fires, it probably indicates
2275        a missing call to h2g_valid.  */
2276     assert(end - 1 <= GUEST_ADDR_MAX);
2277     assert(start < end);
2278     /* Only set PAGE_ANON with new mappings. */
2279     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
2280     assert_memory_lock();
2281 
2282     start = start & TARGET_PAGE_MASK;
2283     end = TARGET_PAGE_ALIGN(end);
2284 
2285     if (flags & PAGE_WRITE) {
2286         flags |= PAGE_WRITE_ORG;
2287     }
2288     reset_target_data = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
2289     flags &= ~PAGE_RESET;
2290 
2291     for (addr = start, len = end - start;
2292          len != 0;
2293          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2294         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2295 
2296         /* If the write protection bit is set, then we invalidate
2297            the code inside.  */
2298         if (!(p->flags & PAGE_WRITE) &&
2299             (flags & PAGE_WRITE) &&
2300             p->first_tb) {
2301             tb_invalidate_phys_page(addr, 0);
2302         }
2303         if (reset_target_data) {
2304             g_free(p->target_data);
2305             p->target_data = NULL;
2306             p->flags = flags;
2307         } else {
2308             /* Using mprotect on a page does not change MAP_ANON. */
2309             p->flags = (p->flags & PAGE_ANON) | flags;
2310         }
2311     }
2312 }
2313 
2314 void *page_get_target_data(target_ulong address)
2315 {
2316     PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2317     return p ? p->target_data : NULL;
2318 }
2319 
2320 void *page_alloc_target_data(target_ulong address, size_t size)
2321 {
2322     PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2323     void *ret = NULL;
2324 
2325     if (p->flags & PAGE_VALID) {
2326         ret = p->target_data;
2327         if (!ret) {
2328             p->target_data = ret = g_malloc0(size);
2329         }
2330     }
2331     return ret;
2332 }
2333 
2334 int page_check_range(target_ulong start, target_ulong len, int flags)
2335 {
2336     PageDesc *p;
2337     target_ulong end;
2338     target_ulong addr;
2339 
2340     /* This function should never be called with addresses outside the
2341        guest address space.  If this assert fires, it probably indicates
2342        a missing call to h2g_valid.  */
2343     if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
2344         assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2345     }
2346 
2347     if (len == 0) {
2348         return 0;
2349     }
2350     if (start + len - 1 < start) {
2351         /* We've wrapped around.  */
2352         return -1;
2353     }
2354 
2355     /* must do before we loose bits in the next step */
2356     end = TARGET_PAGE_ALIGN(start + len);
2357     start = start & TARGET_PAGE_MASK;
2358 
2359     for (addr = start, len = end - start;
2360          len != 0;
2361          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2362         p = page_find(addr >> TARGET_PAGE_BITS);
2363         if (!p) {
2364             return -1;
2365         }
2366         if (!(p->flags & PAGE_VALID)) {
2367             return -1;
2368         }
2369 
2370         if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
2371             return -1;
2372         }
2373         if (flags & PAGE_WRITE) {
2374             if (!(p->flags & PAGE_WRITE_ORG)) {
2375                 return -1;
2376             }
2377             /* unprotect the page if it was put read-only because it
2378                contains translated code */
2379             if (!(p->flags & PAGE_WRITE)) {
2380                 if (!page_unprotect(addr, 0)) {
2381                     return -1;
2382                 }
2383             }
2384         }
2385     }
2386     return 0;
2387 }
2388 
2389 void page_protect(tb_page_addr_t page_addr)
2390 {
2391     target_ulong addr;
2392     PageDesc *p;
2393     int prot;
2394 
2395     p = page_find(page_addr >> TARGET_PAGE_BITS);
2396     if (p && (p->flags & PAGE_WRITE)) {
2397         /*
2398          * Force the host page as non writable (writes will have a page fault +
2399          * mprotect overhead).
2400          */
2401         page_addr &= qemu_host_page_mask;
2402         prot = 0;
2403         for (addr = page_addr; addr < page_addr + qemu_host_page_size;
2404              addr += TARGET_PAGE_SIZE) {
2405 
2406             p = page_find(addr >> TARGET_PAGE_BITS);
2407             if (!p) {
2408                 continue;
2409             }
2410             prot |= p->flags;
2411             p->flags &= ~PAGE_WRITE;
2412         }
2413         mprotect(g2h_untagged(page_addr), qemu_host_page_size,
2414                  (prot & PAGE_BITS) & ~PAGE_WRITE);
2415         if (DEBUG_TB_INVALIDATE_GATE) {
2416             printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
2417         }
2418     }
2419 }
2420 
2421 /* called from signal handler: invalidate the code and unprotect the
2422  * page. Return 0 if the fault was not handled, 1 if it was handled,
2423  * and 2 if it was handled but the caller must cause the TB to be
2424  * immediately exited. (We can only return 2 if the 'pc' argument is
2425  * non-zero.)
2426  */
2427 int page_unprotect(target_ulong address, uintptr_t pc)
2428 {
2429     unsigned int prot;
2430     bool current_tb_invalidated;
2431     PageDesc *p;
2432     target_ulong host_start, host_end, addr;
2433 
2434     /* Technically this isn't safe inside a signal handler.  However we
2435        know this only ever happens in a synchronous SEGV handler, so in
2436        practice it seems to be ok.  */
2437     mmap_lock();
2438 
2439     p = page_find(address >> TARGET_PAGE_BITS);
2440     if (!p) {
2441         mmap_unlock();
2442         return 0;
2443     }
2444 
2445     /* if the page was really writable, then we change its
2446        protection back to writable */
2447     if (p->flags & PAGE_WRITE_ORG) {
2448         current_tb_invalidated = false;
2449         if (p->flags & PAGE_WRITE) {
2450             /* If the page is actually marked WRITE then assume this is because
2451              * this thread raced with another one which got here first and
2452              * set the page to PAGE_WRITE and did the TB invalidate for us.
2453              */
2454 #ifdef TARGET_HAS_PRECISE_SMC
2455             TranslationBlock *current_tb = tcg_tb_lookup(pc);
2456             if (current_tb) {
2457                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
2458             }
2459 #endif
2460         } else {
2461             host_start = address & qemu_host_page_mask;
2462             host_end = host_start + qemu_host_page_size;
2463 
2464             prot = 0;
2465             for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
2466                 p = page_find(addr >> TARGET_PAGE_BITS);
2467                 p->flags |= PAGE_WRITE;
2468                 prot |= p->flags;
2469 
2470                 /* and since the content will be modified, we must invalidate
2471                    the corresponding translated code. */
2472                 current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
2473 #ifdef CONFIG_USER_ONLY
2474                 if (DEBUG_TB_CHECK_GATE) {
2475                     tb_invalidate_check(addr);
2476                 }
2477 #endif
2478             }
2479             mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
2480                      prot & PAGE_BITS);
2481         }
2482         mmap_unlock();
2483         /* If current TB was invalidated return to main loop */
2484         return current_tb_invalidated ? 2 : 1;
2485     }
2486     mmap_unlock();
2487     return 0;
2488 }
2489 #endif /* CONFIG_USER_ONLY */
2490 
2491 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
2492 void tcg_flush_softmmu_tlb(CPUState *cs)
2493 {
2494 #ifdef CONFIG_SOFTMMU
2495     tlb_flush(cs);
2496 #endif
2497 }
2498