xref: /openbmc/qemu/accel/tcg/translate-all.c (revision c53cd04e)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/units.h"
22 #include "qemu-common.h"
23 
24 #define NO_CPU_IO_DEFS
25 #include "trace.h"
26 #include "disas/disas.h"
27 #include "exec/exec-all.h"
28 #include "tcg/tcg.h"
29 #if defined(CONFIG_USER_ONLY)
30 #include "qemu.h"
31 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
32 #include <sys/param.h>
33 #if __FreeBSD_version >= 700104
34 #define HAVE_KINFO_GETVMMAP
35 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
36 #include <sys/proc.h>
37 #include <machine/profile.h>
38 #define _KERNEL
39 #include <sys/user.h>
40 #undef _KERNEL
41 #undef sigqueue
42 #include <libutil.h>
43 #endif
44 #endif
45 #else
46 #include "exec/ram_addr.h"
47 #endif
48 
49 #include "exec/cputlb.h"
50 #include "exec/tb-hash.h"
51 #include "exec/translate-all.h"
52 #include "qemu/bitmap.h"
53 #include "qemu/error-report.h"
54 #include "qemu/qemu-print.h"
55 #include "qemu/timer.h"
56 #include "qemu/main-loop.h"
57 #include "exec/log.h"
58 #include "sysemu/cpus.h"
59 #include "sysemu/cpu-timers.h"
60 #include "sysemu/tcg.h"
61 #include "qapi/error.h"
62 #include "hw/core/tcg-cpu-ops.h"
63 #include "internal.h"
64 
65 /* #define DEBUG_TB_INVALIDATE */
66 /* #define DEBUG_TB_FLUSH */
67 /* make various TB consistency checks */
68 /* #define DEBUG_TB_CHECK */
69 
70 #ifdef DEBUG_TB_INVALIDATE
71 #define DEBUG_TB_INVALIDATE_GATE 1
72 #else
73 #define DEBUG_TB_INVALIDATE_GATE 0
74 #endif
75 
76 #ifdef DEBUG_TB_FLUSH
77 #define DEBUG_TB_FLUSH_GATE 1
78 #else
79 #define DEBUG_TB_FLUSH_GATE 0
80 #endif
81 
82 #if !defined(CONFIG_USER_ONLY)
83 /* TB consistency checks only implemented for usermode emulation.  */
84 #undef DEBUG_TB_CHECK
85 #endif
86 
87 #ifdef DEBUG_TB_CHECK
88 #define DEBUG_TB_CHECK_GATE 1
89 #else
90 #define DEBUG_TB_CHECK_GATE 0
91 #endif
92 
93 /* Access to the various translations structures need to be serialised via locks
94  * for consistency.
95  * In user-mode emulation access to the memory related structures are protected
96  * with mmap_lock.
97  * In !user-mode we use per-page locks.
98  */
99 #ifdef CONFIG_SOFTMMU
100 #define assert_memory_lock()
101 #else
102 #define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
103 #endif
104 
105 #define SMC_BITMAP_USE_THRESHOLD 10
106 
107 typedef struct PageDesc {
108     /* list of TBs intersecting this ram page */
109     uintptr_t first_tb;
110 #ifdef CONFIG_SOFTMMU
111     /* in order to optimize self modifying code, we count the number
112        of lookups we do to a given page to use a bitmap */
113     unsigned long *code_bitmap;
114     unsigned int code_write_count;
115 #else
116     unsigned long flags;
117     void *target_data;
118 #endif
119 #ifndef CONFIG_USER_ONLY
120     QemuSpin lock;
121 #endif
122 } PageDesc;
123 
124 /**
125  * struct page_entry - page descriptor entry
126  * @pd:     pointer to the &struct PageDesc of the page this entry represents
127  * @index:  page index of the page
128  * @locked: whether the page is locked
129  *
130  * This struct helps us keep track of the locked state of a page, without
131  * bloating &struct PageDesc.
132  *
133  * A page lock protects accesses to all fields of &struct PageDesc.
134  *
135  * See also: &struct page_collection.
136  */
137 struct page_entry {
138     PageDesc *pd;
139     tb_page_addr_t index;
140     bool locked;
141 };
142 
143 /**
144  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
145  * @tree:   Binary search tree (BST) of the pages, with key == page index
146  * @max:    Pointer to the page in @tree with the highest page index
147  *
148  * To avoid deadlock we lock pages in ascending order of page index.
149  * When operating on a set of pages, we need to keep track of them so that
150  * we can lock them in order and also unlock them later. For this we collect
151  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
152  * @tree implementation we use does not provide an O(1) operation to obtain the
153  * highest-ranked element, we use @max to keep track of the inserted page
154  * with the highest index. This is valuable because if a page is not in
155  * the tree and its index is higher than @max's, then we can lock it
156  * without breaking the locking order rule.
157  *
158  * Note on naming: 'struct page_set' would be shorter, but we already have a few
159  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
160  *
161  * See also: page_collection_lock().
162  */
163 struct page_collection {
164     GTree *tree;
165     struct page_entry *max;
166 };
167 
168 /* list iterators for lists of tagged pointers in TranslationBlock */
169 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
170     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
171          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
172              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
173 
174 #define PAGE_FOR_EACH_TB(pagedesc, tb, n)                       \
175     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
176 
177 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
178     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
179 
180 /*
181  * In system mode we want L1_MAP to be based on ram offsets,
182  * while in user mode we want it to be based on virtual addresses.
183  *
184  * TODO: For user mode, see the caveat re host vs guest virtual
185  * address spaces near GUEST_ADDR_MAX.
186  */
187 #if !defined(CONFIG_USER_ONLY)
188 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
189 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
190 #else
191 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
192 #endif
193 #else
194 # define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
195 #endif
196 
197 /* Size of the L2 (and L3, etc) page tables.  */
198 #define V_L2_BITS 10
199 #define V_L2_SIZE (1 << V_L2_BITS)
200 
201 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
202 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
203                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
204                   * BITS_PER_BYTE);
205 
206 /*
207  * L1 Mapping properties
208  */
209 static int v_l1_size;
210 static int v_l1_shift;
211 static int v_l2_levels;
212 
213 /* The bottom level has pointers to PageDesc, and is indexed by
214  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
215  */
216 #define V_L1_MIN_BITS 4
217 #define V_L1_MAX_BITS (V_L2_BITS + 3)
218 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
219 
220 static void *l1_map[V_L1_MAX_SIZE];
221 
222 /* code generation context */
223 TCGContext tcg_init_ctx;
224 __thread TCGContext *tcg_ctx;
225 TBContext tb_ctx;
226 
227 static void page_table_config_init(void)
228 {
229     uint32_t v_l1_bits;
230 
231     assert(TARGET_PAGE_BITS);
232     /* The bits remaining after N lower levels of page tables.  */
233     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
234     if (v_l1_bits < V_L1_MIN_BITS) {
235         v_l1_bits += V_L2_BITS;
236     }
237 
238     v_l1_size = 1 << v_l1_bits;
239     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
240     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
241 
242     assert(v_l1_bits <= V_L1_MAX_BITS);
243     assert(v_l1_shift % V_L2_BITS == 0);
244     assert(v_l2_levels >= 0);
245 }
246 
247 static void cpu_gen_init(void)
248 {
249     tcg_context_init(&tcg_init_ctx);
250 }
251 
252 /* Encode VAL as a signed leb128 sequence at P.
253    Return P incremented past the encoded value.  */
254 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
255 {
256     int more, byte;
257 
258     do {
259         byte = val & 0x7f;
260         val >>= 7;
261         more = !((val == 0 && (byte & 0x40) == 0)
262                  || (val == -1 && (byte & 0x40) != 0));
263         if (more) {
264             byte |= 0x80;
265         }
266         *p++ = byte;
267     } while (more);
268 
269     return p;
270 }
271 
272 /* Decode a signed leb128 sequence at *PP; increment *PP past the
273    decoded value.  Return the decoded value.  */
274 static target_long decode_sleb128(const uint8_t **pp)
275 {
276     const uint8_t *p = *pp;
277     target_long val = 0;
278     int byte, shift = 0;
279 
280     do {
281         byte = *p++;
282         val |= (target_ulong)(byte & 0x7f) << shift;
283         shift += 7;
284     } while (byte & 0x80);
285     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
286         val |= -(target_ulong)1 << shift;
287     }
288 
289     *pp = p;
290     return val;
291 }
292 
293 /* Encode the data collected about the instructions while compiling TB.
294    Place the data at BLOCK, and return the number of bytes consumed.
295 
296    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
297    which come from the target's insn_start data, followed by a uintptr_t
298    which comes from the host pc of the end of the code implementing the insn.
299 
300    Each line of the table is encoded as sleb128 deltas from the previous
301    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
302    That is, the first column is seeded with the guest pc, the last column
303    with the host pc, and the middle columns with zeros.  */
304 
305 static int encode_search(TranslationBlock *tb, uint8_t *block)
306 {
307     uint8_t *highwater = tcg_ctx->code_gen_highwater;
308     uint8_t *p = block;
309     int i, j, n;
310 
311     for (i = 0, n = tb->icount; i < n; ++i) {
312         target_ulong prev;
313 
314         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
315             if (i == 0) {
316                 prev = (j == 0 ? tb->pc : 0);
317             } else {
318                 prev = tcg_ctx->gen_insn_data[i - 1][j];
319             }
320             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
321         }
322         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
323         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
324 
325         /* Test for (pending) buffer overflow.  The assumption is that any
326            one row beginning below the high water mark cannot overrun
327            the buffer completely.  Thus we can test for overflow after
328            encoding a row without having to check during encoding.  */
329         if (unlikely(p > highwater)) {
330             return -1;
331         }
332     }
333 
334     return p - block;
335 }
336 
337 /* The cpu state corresponding to 'searched_pc' is restored.
338  * When reset_icount is true, current TB will be interrupted and
339  * icount should be recalculated.
340  */
341 static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
342                                      uintptr_t searched_pc, bool reset_icount)
343 {
344     target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
345     uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
346     CPUArchState *env = cpu->env_ptr;
347     const uint8_t *p = tb->tc.ptr + tb->tc.size;
348     int i, j, num_insns = tb->icount;
349 #ifdef CONFIG_PROFILER
350     TCGProfile *prof = &tcg_ctx->prof;
351     int64_t ti = profile_getclock();
352 #endif
353 
354     searched_pc -= GETPC_ADJ;
355 
356     if (searched_pc < host_pc) {
357         return -1;
358     }
359 
360     /* Reconstruct the stored insn data while looking for the point at
361        which the end of the insn exceeds the searched_pc.  */
362     for (i = 0; i < num_insns; ++i) {
363         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
364             data[j] += decode_sleb128(&p);
365         }
366         host_pc += decode_sleb128(&p);
367         if (host_pc > searched_pc) {
368             goto found;
369         }
370     }
371     return -1;
372 
373  found:
374     if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
375         assert(icount_enabled());
376         /* Reset the cycle counter to the start of the block
377            and shift if to the number of actually executed instructions */
378         cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
379     }
380     restore_state_to_opc(env, tb, data);
381 
382 #ifdef CONFIG_PROFILER
383     qatomic_set(&prof->restore_time,
384                 prof->restore_time + profile_getclock() - ti);
385     qatomic_set(&prof->restore_count, prof->restore_count + 1);
386 #endif
387     return 0;
388 }
389 
390 void tb_destroy(TranslationBlock *tb)
391 {
392     qemu_spin_destroy(&tb->jmp_lock);
393 }
394 
395 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
396 {
397     /*
398      * The host_pc has to be in the rx region of the code buffer.
399      * If it is not we will not be able to resolve it here.
400      * The two cases where host_pc will not be correct are:
401      *
402      *  - fault during translation (instruction fetch)
403      *  - fault from helper (not using GETPC() macro)
404      *
405      * Either way we need return early as we can't resolve it here.
406      */
407     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
408         TranslationBlock *tb = tcg_tb_lookup(host_pc);
409         if (tb) {
410             cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
411             return true;
412         }
413     }
414     return false;
415 }
416 
417 static void page_init(void)
418 {
419     page_size_init();
420     page_table_config_init();
421 
422 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
423     {
424 #ifdef HAVE_KINFO_GETVMMAP
425         struct kinfo_vmentry *freep;
426         int i, cnt;
427 
428         freep = kinfo_getvmmap(getpid(), &cnt);
429         if (freep) {
430             mmap_lock();
431             for (i = 0; i < cnt; i++) {
432                 unsigned long startaddr, endaddr;
433 
434                 startaddr = freep[i].kve_start;
435                 endaddr = freep[i].kve_end;
436                 if (h2g_valid(startaddr)) {
437                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
438 
439                     if (h2g_valid(endaddr)) {
440                         endaddr = h2g(endaddr);
441                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
442                     } else {
443 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
444                         endaddr = ~0ul;
445                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
446 #endif
447                     }
448                 }
449             }
450             free(freep);
451             mmap_unlock();
452         }
453 #else
454         FILE *f;
455 
456         last_brk = (unsigned long)sbrk(0);
457 
458         f = fopen("/compat/linux/proc/self/maps", "r");
459         if (f) {
460             mmap_lock();
461 
462             do {
463                 unsigned long startaddr, endaddr;
464                 int n;
465 
466                 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
467 
468                 if (n == 2 && h2g_valid(startaddr)) {
469                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
470 
471                     if (h2g_valid(endaddr)) {
472                         endaddr = h2g(endaddr);
473                     } else {
474                         endaddr = ~0ul;
475                     }
476                     page_set_flags(startaddr, endaddr, PAGE_RESERVED);
477                 }
478             } while (!feof(f));
479 
480             fclose(f);
481             mmap_unlock();
482         }
483 #endif
484     }
485 #endif
486 }
487 
488 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
489 {
490     PageDesc *pd;
491     void **lp;
492     int i;
493 
494     /* Level 1.  Always allocated.  */
495     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
496 
497     /* Level 2..N-1.  */
498     for (i = v_l2_levels; i > 0; i--) {
499         void **p = qatomic_rcu_read(lp);
500 
501         if (p == NULL) {
502             void *existing;
503 
504             if (!alloc) {
505                 return NULL;
506             }
507             p = g_new0(void *, V_L2_SIZE);
508             existing = qatomic_cmpxchg(lp, NULL, p);
509             if (unlikely(existing)) {
510                 g_free(p);
511                 p = existing;
512             }
513         }
514 
515         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
516     }
517 
518     pd = qatomic_rcu_read(lp);
519     if (pd == NULL) {
520         void *existing;
521 
522         if (!alloc) {
523             return NULL;
524         }
525         pd = g_new0(PageDesc, V_L2_SIZE);
526 #ifndef CONFIG_USER_ONLY
527         {
528             int i;
529 
530             for (i = 0; i < V_L2_SIZE; i++) {
531                 qemu_spin_init(&pd[i].lock);
532             }
533         }
534 #endif
535         existing = qatomic_cmpxchg(lp, NULL, pd);
536         if (unlikely(existing)) {
537 #ifndef CONFIG_USER_ONLY
538             {
539                 int i;
540 
541                 for (i = 0; i < V_L2_SIZE; i++) {
542                     qemu_spin_destroy(&pd[i].lock);
543                 }
544             }
545 #endif
546             g_free(pd);
547             pd = existing;
548         }
549     }
550 
551     return pd + (index & (V_L2_SIZE - 1));
552 }
553 
554 static inline PageDesc *page_find(tb_page_addr_t index)
555 {
556     return page_find_alloc(index, 0);
557 }
558 
559 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
560                            PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
561 
562 /* In user-mode page locks aren't used; mmap_lock is enough */
563 #ifdef CONFIG_USER_ONLY
564 
565 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
566 
567 static inline void page_lock(PageDesc *pd)
568 { }
569 
570 static inline void page_unlock(PageDesc *pd)
571 { }
572 
573 static inline void page_lock_tb(const TranslationBlock *tb)
574 { }
575 
576 static inline void page_unlock_tb(const TranslationBlock *tb)
577 { }
578 
579 struct page_collection *
580 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
581 {
582     return NULL;
583 }
584 
585 void page_collection_unlock(struct page_collection *set)
586 { }
587 #else /* !CONFIG_USER_ONLY */
588 
589 #ifdef CONFIG_DEBUG_TCG
590 
591 static __thread GHashTable *ht_pages_locked_debug;
592 
593 static void ht_pages_locked_debug_init(void)
594 {
595     if (ht_pages_locked_debug) {
596         return;
597     }
598     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
599 }
600 
601 static bool page_is_locked(const PageDesc *pd)
602 {
603     PageDesc *found;
604 
605     ht_pages_locked_debug_init();
606     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
607     return !!found;
608 }
609 
610 static void page_lock__debug(PageDesc *pd)
611 {
612     ht_pages_locked_debug_init();
613     g_assert(!page_is_locked(pd));
614     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
615 }
616 
617 static void page_unlock__debug(const PageDesc *pd)
618 {
619     bool removed;
620 
621     ht_pages_locked_debug_init();
622     g_assert(page_is_locked(pd));
623     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
624     g_assert(removed);
625 }
626 
627 static void
628 do_assert_page_locked(const PageDesc *pd, const char *file, int line)
629 {
630     if (unlikely(!page_is_locked(pd))) {
631         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
632                      pd, file, line);
633         abort();
634     }
635 }
636 
637 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
638 
639 void assert_no_pages_locked(void)
640 {
641     ht_pages_locked_debug_init();
642     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
643 }
644 
645 #else /* !CONFIG_DEBUG_TCG */
646 
647 #define assert_page_locked(pd)
648 
649 static inline void page_lock__debug(const PageDesc *pd)
650 {
651 }
652 
653 static inline void page_unlock__debug(const PageDesc *pd)
654 {
655 }
656 
657 #endif /* CONFIG_DEBUG_TCG */
658 
659 static inline void page_lock(PageDesc *pd)
660 {
661     page_lock__debug(pd);
662     qemu_spin_lock(&pd->lock);
663 }
664 
665 static inline void page_unlock(PageDesc *pd)
666 {
667     qemu_spin_unlock(&pd->lock);
668     page_unlock__debug(pd);
669 }
670 
671 /* lock the page(s) of a TB in the correct acquisition order */
672 static inline void page_lock_tb(const TranslationBlock *tb)
673 {
674     page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
675 }
676 
677 static inline void page_unlock_tb(const TranslationBlock *tb)
678 {
679     PageDesc *p1 = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
680 
681     page_unlock(p1);
682     if (unlikely(tb->page_addr[1] != -1)) {
683         PageDesc *p2 = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
684 
685         if (p2 != p1) {
686             page_unlock(p2);
687         }
688     }
689 }
690 
691 static inline struct page_entry *
692 page_entry_new(PageDesc *pd, tb_page_addr_t index)
693 {
694     struct page_entry *pe = g_malloc(sizeof(*pe));
695 
696     pe->index = index;
697     pe->pd = pd;
698     pe->locked = false;
699     return pe;
700 }
701 
702 static void page_entry_destroy(gpointer p)
703 {
704     struct page_entry *pe = p;
705 
706     g_assert(pe->locked);
707     page_unlock(pe->pd);
708     g_free(pe);
709 }
710 
711 /* returns false on success */
712 static bool page_entry_trylock(struct page_entry *pe)
713 {
714     bool busy;
715 
716     busy = qemu_spin_trylock(&pe->pd->lock);
717     if (!busy) {
718         g_assert(!pe->locked);
719         pe->locked = true;
720         page_lock__debug(pe->pd);
721     }
722     return busy;
723 }
724 
725 static void do_page_entry_lock(struct page_entry *pe)
726 {
727     page_lock(pe->pd);
728     g_assert(!pe->locked);
729     pe->locked = true;
730 }
731 
732 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
733 {
734     struct page_entry *pe = value;
735 
736     do_page_entry_lock(pe);
737     return FALSE;
738 }
739 
740 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
741 {
742     struct page_entry *pe = value;
743 
744     if (pe->locked) {
745         pe->locked = false;
746         page_unlock(pe->pd);
747     }
748     return FALSE;
749 }
750 
751 /*
752  * Trylock a page, and if successful, add the page to a collection.
753  * Returns true ("busy") if the page could not be locked; false otherwise.
754  */
755 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
756 {
757     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
758     struct page_entry *pe;
759     PageDesc *pd;
760 
761     pe = g_tree_lookup(set->tree, &index);
762     if (pe) {
763         return false;
764     }
765 
766     pd = page_find(index);
767     if (pd == NULL) {
768         return false;
769     }
770 
771     pe = page_entry_new(pd, index);
772     g_tree_insert(set->tree, &pe->index, pe);
773 
774     /*
775      * If this is either (1) the first insertion or (2) a page whose index
776      * is higher than any other so far, just lock the page and move on.
777      */
778     if (set->max == NULL || pe->index > set->max->index) {
779         set->max = pe;
780         do_page_entry_lock(pe);
781         return false;
782     }
783     /*
784      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
785      * locks in order.
786      */
787     return page_entry_trylock(pe);
788 }
789 
790 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
791 {
792     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
793     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
794 
795     if (a == b) {
796         return 0;
797     } else if (a < b) {
798         return -1;
799     }
800     return 1;
801 }
802 
803 /*
804  * Lock a range of pages ([@start,@end[) as well as the pages of all
805  * intersecting TBs.
806  * Locking order: acquire locks in ascending order of page index.
807  */
808 struct page_collection *
809 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
810 {
811     struct page_collection *set = g_malloc(sizeof(*set));
812     tb_page_addr_t index;
813     PageDesc *pd;
814 
815     start >>= TARGET_PAGE_BITS;
816     end   >>= TARGET_PAGE_BITS;
817     g_assert(start <= end);
818 
819     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
820                                 page_entry_destroy);
821     set->max = NULL;
822     assert_no_pages_locked();
823 
824  retry:
825     g_tree_foreach(set->tree, page_entry_lock, NULL);
826 
827     for (index = start; index <= end; index++) {
828         TranslationBlock *tb;
829         int n;
830 
831         pd = page_find(index);
832         if (pd == NULL) {
833             continue;
834         }
835         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
836             g_tree_foreach(set->tree, page_entry_unlock, NULL);
837             goto retry;
838         }
839         assert_page_locked(pd);
840         PAGE_FOR_EACH_TB(pd, tb, n) {
841             if (page_trylock_add(set, tb->page_addr[0]) ||
842                 (tb->page_addr[1] != -1 &&
843                  page_trylock_add(set, tb->page_addr[1]))) {
844                 /* drop all locks, and reacquire in order */
845                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
846                 goto retry;
847             }
848         }
849     }
850     return set;
851 }
852 
853 void page_collection_unlock(struct page_collection *set)
854 {
855     /* entries are unlocked and freed via page_entry_destroy */
856     g_tree_destroy(set->tree);
857     g_free(set);
858 }
859 
860 #endif /* !CONFIG_USER_ONLY */
861 
862 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
863                            PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
864 {
865     PageDesc *p1, *p2;
866     tb_page_addr_t page1;
867     tb_page_addr_t page2;
868 
869     assert_memory_lock();
870     g_assert(phys1 != -1);
871 
872     page1 = phys1 >> TARGET_PAGE_BITS;
873     page2 = phys2 >> TARGET_PAGE_BITS;
874 
875     p1 = page_find_alloc(page1, alloc);
876     if (ret_p1) {
877         *ret_p1 = p1;
878     }
879     if (likely(phys2 == -1)) {
880         page_lock(p1);
881         return;
882     } else if (page1 == page2) {
883         page_lock(p1);
884         if (ret_p2) {
885             *ret_p2 = p1;
886         }
887         return;
888     }
889     p2 = page_find_alloc(page2, alloc);
890     if (ret_p2) {
891         *ret_p2 = p2;
892     }
893     if (page1 < page2) {
894         page_lock(p1);
895         page_lock(p2);
896     } else {
897         page_lock(p2);
898         page_lock(p1);
899     }
900 }
901 
902 /* Minimum size of the code gen buffer.  This number is randomly chosen,
903    but not so small that we can't have a fair number of TB's live.  */
904 #define MIN_CODE_GEN_BUFFER_SIZE     (1 * MiB)
905 
906 /* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
907    indicated, this is constrained by the range of direct branches on the
908    host cpu, as used by the TCG implementation of goto_tb.  */
909 #if defined(__x86_64__)
910 # define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
911 #elif defined(__sparc__)
912 # define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
913 #elif defined(__powerpc64__)
914 # define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
915 #elif defined(__powerpc__)
916 # define MAX_CODE_GEN_BUFFER_SIZE  (32 * MiB)
917 #elif defined(__aarch64__)
918 # define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
919 #elif defined(__s390x__)
920   /* We have a +- 4GB range on the branches; leave some slop.  */
921 # define MAX_CODE_GEN_BUFFER_SIZE  (3 * GiB)
922 #elif defined(__mips__)
923   /* We have a 256MB branch region, but leave room to make sure the
924      main executable is also within that region.  */
925 # define MAX_CODE_GEN_BUFFER_SIZE  (128 * MiB)
926 #else
927 # define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
928 #endif
929 
930 #if TCG_TARGET_REG_BITS == 32
931 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
932 #ifdef CONFIG_USER_ONLY
933 /*
934  * For user mode on smaller 32 bit systems we may run into trouble
935  * allocating big chunks of data in the right place. On these systems
936  * we utilise a static code generation buffer directly in the binary.
937  */
938 #define USE_STATIC_CODE_GEN_BUFFER
939 #endif
940 #else /* TCG_TARGET_REG_BITS == 64 */
941 #ifdef CONFIG_USER_ONLY
942 /*
943  * As user-mode emulation typically means running multiple instances
944  * of the translator don't go too nuts with our default code gen
945  * buffer lest we make things too hard for the OS.
946  */
947 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
948 #else
949 /*
950  * We expect most system emulation to run one or two guests per host.
951  * Users running large scale system emulation may want to tweak their
952  * runtime setup via the tb-size control on the command line.
953  */
954 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
955 #endif
956 #endif
957 
958 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
959   (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
960    ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
961 
962 static size_t size_code_gen_buffer(size_t tb_size)
963 {
964     /* Size the buffer.  */
965     if (tb_size == 0) {
966         size_t phys_mem = qemu_get_host_physmem();
967         if (phys_mem == 0) {
968             tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
969         } else {
970             tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, phys_mem / 8);
971         }
972     }
973     if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
974         tb_size = MIN_CODE_GEN_BUFFER_SIZE;
975     }
976     if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
977         tb_size = MAX_CODE_GEN_BUFFER_SIZE;
978     }
979     return tb_size;
980 }
981 
982 #ifdef __mips__
983 /* In order to use J and JAL within the code_gen_buffer, we require
984    that the buffer not cross a 256MB boundary.  */
985 static inline bool cross_256mb(void *addr, size_t size)
986 {
987     return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
988 }
989 
990 /* We weren't able to allocate a buffer without crossing that boundary,
991    so make do with the larger portion of the buffer that doesn't cross.
992    Returns the new base of the buffer, and adjusts code_gen_buffer_size.  */
993 static inline void *split_cross_256mb(void *buf1, size_t size1)
994 {
995     void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
996     size_t size2 = buf1 + size1 - buf2;
997 
998     size1 = buf2 - buf1;
999     if (size1 < size2) {
1000         size1 = size2;
1001         buf1 = buf2;
1002     }
1003 
1004     tcg_ctx->code_gen_buffer_size = size1;
1005     return buf1;
1006 }
1007 #endif
1008 
1009 #ifdef USE_STATIC_CODE_GEN_BUFFER
1010 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
1011     __attribute__((aligned(CODE_GEN_ALIGN)));
1012 
1013 static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
1014 {
1015     void *buf, *end;
1016     size_t size;
1017 
1018     if (splitwx > 0) {
1019         error_setg(errp, "jit split-wx not supported");
1020         return false;
1021     }
1022 
1023     /* page-align the beginning and end of the buffer */
1024     buf = static_code_gen_buffer;
1025     end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
1026     buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
1027     end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
1028 
1029     size = end - buf;
1030 
1031     /* Honor a command-line option limiting the size of the buffer.  */
1032     if (size > tb_size) {
1033         size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
1034     }
1035     tcg_ctx->code_gen_buffer_size = size;
1036 
1037 #ifdef __mips__
1038     if (cross_256mb(buf, size)) {
1039         buf = split_cross_256mb(buf, size);
1040         size = tcg_ctx->code_gen_buffer_size;
1041     }
1042 #endif
1043 
1044     if (qemu_mprotect_rwx(buf, size)) {
1045         error_setg_errno(errp, errno, "mprotect of jit buffer");
1046         return false;
1047     }
1048     qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
1049 
1050     tcg_ctx->code_gen_buffer = buf;
1051     return true;
1052 }
1053 #elif defined(_WIN32)
1054 static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
1055 {
1056     void *buf;
1057 
1058     if (splitwx > 0) {
1059         error_setg(errp, "jit split-wx not supported");
1060         return false;
1061     }
1062 
1063     buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
1064                              PAGE_EXECUTE_READWRITE);
1065     if (buf == NULL) {
1066         error_setg_win32(errp, GetLastError(),
1067                          "allocate %zu bytes for jit buffer", size);
1068         return false;
1069     }
1070 
1071     tcg_ctx->code_gen_buffer = buf;
1072     tcg_ctx->code_gen_buffer_size = size;
1073     return true;
1074 }
1075 #else
1076 static bool alloc_code_gen_buffer_anon(size_t size, int prot,
1077                                        int flags, Error **errp)
1078 {
1079     void *buf;
1080 
1081     buf = mmap(NULL, size, prot, flags, -1, 0);
1082     if (buf == MAP_FAILED) {
1083         error_setg_errno(errp, errno,
1084                          "allocate %zu bytes for jit buffer", size);
1085         return false;
1086     }
1087     tcg_ctx->code_gen_buffer_size = size;
1088 
1089 #ifdef __mips__
1090     if (cross_256mb(buf, size)) {
1091         /*
1092          * Try again, with the original still mapped, to avoid re-acquiring
1093          * the same 256mb crossing.
1094          */
1095         size_t size2;
1096         void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
1097         switch ((int)(buf2 != MAP_FAILED)) {
1098         case 1:
1099             if (!cross_256mb(buf2, size)) {
1100                 /* Success!  Use the new buffer.  */
1101                 munmap(buf, size);
1102                 break;
1103             }
1104             /* Failure.  Work with what we had.  */
1105             munmap(buf2, size);
1106             /* fallthru */
1107         default:
1108             /* Split the original buffer.  Free the smaller half.  */
1109             buf2 = split_cross_256mb(buf, size);
1110             size2 = tcg_ctx->code_gen_buffer_size;
1111             if (buf == buf2) {
1112                 munmap(buf + size2, size - size2);
1113             } else {
1114                 munmap(buf, size - size2);
1115             }
1116             size = size2;
1117             break;
1118         }
1119         buf = buf2;
1120     }
1121 #endif
1122 
1123     /* Request large pages for the buffer.  */
1124     qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
1125 
1126     tcg_ctx->code_gen_buffer = buf;
1127     return true;
1128 }
1129 
1130 #ifndef CONFIG_TCG_INTERPRETER
1131 #ifdef CONFIG_POSIX
1132 #include "qemu/memfd.h"
1133 
1134 static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
1135 {
1136     void *buf_rw = NULL, *buf_rx = MAP_FAILED;
1137     int fd = -1;
1138 
1139 #ifdef __mips__
1140     /* Find space for the RX mapping, vs the 256MiB regions. */
1141     if (!alloc_code_gen_buffer_anon(size, PROT_NONE,
1142                                     MAP_PRIVATE | MAP_ANONYMOUS |
1143                                     MAP_NORESERVE, errp)) {
1144         return false;
1145     }
1146     /* The size of the mapping may have been adjusted. */
1147     size = tcg_ctx->code_gen_buffer_size;
1148     buf_rx = tcg_ctx->code_gen_buffer;
1149 #endif
1150 
1151     buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
1152     if (buf_rw == NULL) {
1153         goto fail;
1154     }
1155 
1156 #ifdef __mips__
1157     void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
1158                      MAP_SHARED | MAP_FIXED, fd, 0);
1159     if (tmp != buf_rx) {
1160         goto fail_rx;
1161     }
1162 #else
1163     buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
1164     if (buf_rx == MAP_FAILED) {
1165         goto fail_rx;
1166     }
1167 #endif
1168 
1169     close(fd);
1170     tcg_ctx->code_gen_buffer = buf_rw;
1171     tcg_ctx->code_gen_buffer_size = size;
1172     tcg_splitwx_diff = buf_rx - buf_rw;
1173 
1174     /* Request large pages for the buffer and the splitwx.  */
1175     qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE);
1176     qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE);
1177     return true;
1178 
1179  fail_rx:
1180     error_setg_errno(errp, errno, "failed to map shared memory for execute");
1181  fail:
1182     if (buf_rx != MAP_FAILED) {
1183         munmap(buf_rx, size);
1184     }
1185     if (buf_rw) {
1186         munmap(buf_rw, size);
1187     }
1188     if (fd >= 0) {
1189         close(fd);
1190     }
1191     return false;
1192 }
1193 #endif /* CONFIG_POSIX */
1194 
1195 #ifdef CONFIG_DARWIN
1196 #include <mach/mach.h>
1197 
1198 extern kern_return_t mach_vm_remap(vm_map_t target_task,
1199                                    mach_vm_address_t *target_address,
1200                                    mach_vm_size_t size,
1201                                    mach_vm_offset_t mask,
1202                                    int flags,
1203                                    vm_map_t src_task,
1204                                    mach_vm_address_t src_address,
1205                                    boolean_t copy,
1206                                    vm_prot_t *cur_protection,
1207                                    vm_prot_t *max_protection,
1208                                    vm_inherit_t inheritance);
1209 
1210 static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
1211 {
1212     kern_return_t ret;
1213     mach_vm_address_t buf_rw, buf_rx;
1214     vm_prot_t cur_prot, max_prot;
1215 
1216     /* Map the read-write portion via normal anon memory. */
1217     if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
1218                                     MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
1219         return false;
1220     }
1221 
1222     buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer;
1223     buf_rx = 0;
1224     ret = mach_vm_remap(mach_task_self(),
1225                         &buf_rx,
1226                         size,
1227                         0,
1228                         VM_FLAGS_ANYWHERE,
1229                         mach_task_self(),
1230                         buf_rw,
1231                         false,
1232                         &cur_prot,
1233                         &max_prot,
1234                         VM_INHERIT_NONE);
1235     if (ret != KERN_SUCCESS) {
1236         /* TODO: Convert "ret" to a human readable error message. */
1237         error_setg(errp, "vm_remap for jit splitwx failed");
1238         munmap((void *)buf_rw, size);
1239         return false;
1240     }
1241 
1242     if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
1243         error_setg_errno(errp, errno, "mprotect for jit splitwx");
1244         munmap((void *)buf_rx, size);
1245         munmap((void *)buf_rw, size);
1246         return false;
1247     }
1248 
1249     tcg_splitwx_diff = buf_rx - buf_rw;
1250     return true;
1251 }
1252 #endif /* CONFIG_DARWIN */
1253 #endif /* CONFIG_TCG_INTERPRETER */
1254 
1255 static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
1256 {
1257 #ifndef CONFIG_TCG_INTERPRETER
1258 # ifdef CONFIG_DARWIN
1259     return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
1260 # endif
1261 # ifdef CONFIG_POSIX
1262     return alloc_code_gen_buffer_splitwx_memfd(size, errp);
1263 # endif
1264 #endif
1265     error_setg(errp, "jit split-wx not supported");
1266     return false;
1267 }
1268 
1269 static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
1270 {
1271     ERRP_GUARD();
1272     int prot, flags;
1273 
1274     if (splitwx) {
1275         if (alloc_code_gen_buffer_splitwx(size, errp)) {
1276             return true;
1277         }
1278         /*
1279          * If splitwx force-on (1), fail;
1280          * if splitwx default-on (-1), fall through to splitwx off.
1281          */
1282         if (splitwx > 0) {
1283             return false;
1284         }
1285         error_free_or_abort(errp);
1286     }
1287 
1288     prot = PROT_READ | PROT_WRITE | PROT_EXEC;
1289     flags = MAP_PRIVATE | MAP_ANONYMOUS;
1290 #ifdef CONFIG_TCG_INTERPRETER
1291     /* The tcg interpreter does not need execute permission. */
1292     prot = PROT_READ | PROT_WRITE;
1293 #elif defined(CONFIG_DARWIN)
1294     /* Applicable to both iOS and macOS (Apple Silicon). */
1295     if (!splitwx) {
1296         flags |= MAP_JIT;
1297     }
1298 #endif
1299 
1300     return alloc_code_gen_buffer_anon(size, prot, flags, errp);
1301 }
1302 #endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
1303 
1304 static bool tb_cmp(const void *ap, const void *bp)
1305 {
1306     const TranslationBlock *a = ap;
1307     const TranslationBlock *b = bp;
1308 
1309     return a->pc == b->pc &&
1310         a->cs_base == b->cs_base &&
1311         a->flags == b->flags &&
1312         (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
1313         a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
1314         a->page_addr[0] == b->page_addr[0] &&
1315         a->page_addr[1] == b->page_addr[1];
1316 }
1317 
1318 static void tb_htable_init(void)
1319 {
1320     unsigned int mode = QHT_MODE_AUTO_RESIZE;
1321 
1322     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
1323 }
1324 
1325 /* Must be called before using the QEMU cpus. 'tb_size' is the size
1326    (in bytes) allocated to the translation buffer. Zero means default
1327    size. */
1328 void tcg_exec_init(unsigned long tb_size, int splitwx)
1329 {
1330     bool ok;
1331 
1332     tcg_allowed = true;
1333     cpu_gen_init();
1334     page_init();
1335     tb_htable_init();
1336 
1337     ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size),
1338                                splitwx, &error_fatal);
1339     assert(ok);
1340 
1341 #if defined(CONFIG_SOFTMMU)
1342     /* There's no guest base to take into account, so go ahead and
1343        initialize the prologue now.  */
1344     tcg_prologue_init(tcg_ctx);
1345 #endif
1346 }
1347 
1348 /* call with @p->lock held */
1349 static inline void invalidate_page_bitmap(PageDesc *p)
1350 {
1351     assert_page_locked(p);
1352 #ifdef CONFIG_SOFTMMU
1353     g_free(p->code_bitmap);
1354     p->code_bitmap = NULL;
1355     p->code_write_count = 0;
1356 #endif
1357 }
1358 
1359 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
1360 static void page_flush_tb_1(int level, void **lp)
1361 {
1362     int i;
1363 
1364     if (*lp == NULL) {
1365         return;
1366     }
1367     if (level == 0) {
1368         PageDesc *pd = *lp;
1369 
1370         for (i = 0; i < V_L2_SIZE; ++i) {
1371             page_lock(&pd[i]);
1372             pd[i].first_tb = (uintptr_t)NULL;
1373             invalidate_page_bitmap(pd + i);
1374             page_unlock(&pd[i]);
1375         }
1376     } else {
1377         void **pp = *lp;
1378 
1379         for (i = 0; i < V_L2_SIZE; ++i) {
1380             page_flush_tb_1(level - 1, pp + i);
1381         }
1382     }
1383 }
1384 
1385 static void page_flush_tb(void)
1386 {
1387     int i, l1_sz = v_l1_size;
1388 
1389     for (i = 0; i < l1_sz; i++) {
1390         page_flush_tb_1(v_l2_levels, l1_map + i);
1391     }
1392 }
1393 
1394 static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
1395 {
1396     const TranslationBlock *tb = value;
1397     size_t *size = data;
1398 
1399     *size += tb->tc.size;
1400     return false;
1401 }
1402 
1403 /* flush all the translation blocks */
1404 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
1405 {
1406     bool did_flush = false;
1407 
1408     mmap_lock();
1409     /* If it is already been done on request of another CPU,
1410      * just retry.
1411      */
1412     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
1413         goto done;
1414     }
1415     did_flush = true;
1416 
1417     if (DEBUG_TB_FLUSH_GATE) {
1418         size_t nb_tbs = tcg_nb_tbs();
1419         size_t host_size = 0;
1420 
1421         tcg_tb_foreach(tb_host_size_iter, &host_size);
1422         printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
1423                tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
1424     }
1425 
1426     CPU_FOREACH(cpu) {
1427         cpu_tb_jmp_cache_clear(cpu);
1428     }
1429 
1430     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
1431     page_flush_tb();
1432 
1433     tcg_region_reset_all();
1434     /* XXX: flush processor icache at this point if cache flush is
1435        expensive */
1436     qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
1437 
1438 done:
1439     mmap_unlock();
1440     if (did_flush) {
1441         qemu_plugin_flush_cb();
1442     }
1443 }
1444 
1445 void tb_flush(CPUState *cpu)
1446 {
1447     if (tcg_enabled()) {
1448         unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);
1449 
1450         if (cpu_in_exclusive_context(cpu)) {
1451             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
1452         } else {
1453             async_safe_run_on_cpu(cpu, do_tb_flush,
1454                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
1455         }
1456     }
1457 }
1458 
1459 /*
1460  * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
1461  * so in order to prevent bit rot we compile them unconditionally in user-mode,
1462  * and let the optimizer get rid of them by wrapping their user-only callers
1463  * with if (DEBUG_TB_CHECK_GATE).
1464  */
1465 #ifdef CONFIG_USER_ONLY
1466 
1467 static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
1468 {
1469     TranslationBlock *tb = p;
1470     target_ulong addr = *(target_ulong *)userp;
1471 
1472     if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
1473         printf("ERROR invalidate: address=" TARGET_FMT_lx
1474                " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
1475     }
1476 }
1477 
1478 /* verify that all the pages have correct rights for code
1479  *
1480  * Called with mmap_lock held.
1481  */
1482 static void tb_invalidate_check(target_ulong address)
1483 {
1484     address &= TARGET_PAGE_MASK;
1485     qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address);
1486 }
1487 
1488 static void do_tb_page_check(void *p, uint32_t hash, void *userp)
1489 {
1490     TranslationBlock *tb = p;
1491     int flags1, flags2;
1492 
1493     flags1 = page_get_flags(tb->pc);
1494     flags2 = page_get_flags(tb->pc + tb->size - 1);
1495     if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
1496         printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
1497                (long)tb->pc, tb->size, flags1, flags2);
1498     }
1499 }
1500 
1501 /* verify that all the pages have correct rights for code */
1502 static void tb_page_check(void)
1503 {
1504     qht_iter(&tb_ctx.htable, do_tb_page_check, NULL);
1505 }
1506 
1507 #endif /* CONFIG_USER_ONLY */
1508 
1509 /*
1510  * user-mode: call with mmap_lock held
1511  * !user-mode: call with @pd->lock held
1512  */
1513 static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
1514 {
1515     TranslationBlock *tb1;
1516     uintptr_t *pprev;
1517     unsigned int n1;
1518 
1519     assert_page_locked(pd);
1520     pprev = &pd->first_tb;
1521     PAGE_FOR_EACH_TB(pd, tb1, n1) {
1522         if (tb1 == tb) {
1523             *pprev = tb1->page_next[n1];
1524             return;
1525         }
1526         pprev = &tb1->page_next[n1];
1527     }
1528     g_assert_not_reached();
1529 }
1530 
1531 /* remove @orig from its @n_orig-th jump list */
1532 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
1533 {
1534     uintptr_t ptr, ptr_locked;
1535     TranslationBlock *dest;
1536     TranslationBlock *tb;
1537     uintptr_t *pprev;
1538     int n;
1539 
1540     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
1541     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
1542     dest = (TranslationBlock *)(ptr & ~1);
1543     if (dest == NULL) {
1544         return;
1545     }
1546 
1547     qemu_spin_lock(&dest->jmp_lock);
1548     /*
1549      * While acquiring the lock, the jump might have been removed if the
1550      * destination TB was invalidated; check again.
1551      */
1552     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
1553     if (ptr_locked != ptr) {
1554         qemu_spin_unlock(&dest->jmp_lock);
1555         /*
1556          * The only possibility is that the jump was unlinked via
1557          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
1558          * because we set the LSB above.
1559          */
1560         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
1561         return;
1562     }
1563     /*
1564      * We first acquired the lock, and since the destination pointer matches,
1565      * we know for sure that @orig is in the jmp list.
1566      */
1567     pprev = &dest->jmp_list_head;
1568     TB_FOR_EACH_JMP(dest, tb, n) {
1569         if (tb == orig && n == n_orig) {
1570             *pprev = tb->jmp_list_next[n];
1571             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
1572             qemu_spin_unlock(&dest->jmp_lock);
1573             return;
1574         }
1575         pprev = &tb->jmp_list_next[n];
1576     }
1577     g_assert_not_reached();
1578 }
1579 
1580 /* reset the jump entry 'n' of a TB so that it is not chained to
1581    another TB */
1582 static inline void tb_reset_jump(TranslationBlock *tb, int n)
1583 {
1584     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
1585     tb_set_jmp_target(tb, n, addr);
1586 }
1587 
1588 /* remove any jumps to the TB */
1589 static inline void tb_jmp_unlink(TranslationBlock *dest)
1590 {
1591     TranslationBlock *tb;
1592     int n;
1593 
1594     qemu_spin_lock(&dest->jmp_lock);
1595 
1596     TB_FOR_EACH_JMP(dest, tb, n) {
1597         tb_reset_jump(tb, n);
1598         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
1599         /* No need to clear the list entry; setting the dest ptr is enough */
1600     }
1601     dest->jmp_list_head = (uintptr_t)NULL;
1602 
1603     qemu_spin_unlock(&dest->jmp_lock);
1604 }
1605 
1606 /*
1607  * In user-mode, call with mmap_lock held.
1608  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
1609  * locks held.
1610  */
1611 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
1612 {
1613     CPUState *cpu;
1614     PageDesc *p;
1615     uint32_t h;
1616     tb_page_addr_t phys_pc;
1617     uint32_t orig_cflags = tb_cflags(tb);
1618 
1619     assert_memory_lock();
1620 
1621     /* make sure no further incoming jumps will be chained to this TB */
1622     qemu_spin_lock(&tb->jmp_lock);
1623     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
1624     qemu_spin_unlock(&tb->jmp_lock);
1625 
1626     /* remove the TB from the hash list */
1627     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1628     h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
1629                      tb->trace_vcpu_dstate);
1630     if (!qht_remove(&tb_ctx.htable, tb, h)) {
1631         return;
1632     }
1633 
1634     /* remove the TB from the page list */
1635     if (rm_from_page_list) {
1636         p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
1637         tb_page_remove(p, tb);
1638         invalidate_page_bitmap(p);
1639         if (tb->page_addr[1] != -1) {
1640             p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
1641             tb_page_remove(p, tb);
1642             invalidate_page_bitmap(p);
1643         }
1644     }
1645 
1646     /* remove the TB from the hash list */
1647     h = tb_jmp_cache_hash_func(tb->pc);
1648     CPU_FOREACH(cpu) {
1649         if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) {
1650             qatomic_set(&cpu->tb_jmp_cache[h], NULL);
1651         }
1652     }
1653 
1654     /* suppress this TB from the two jump lists */
1655     tb_remove_from_jmp_list(tb, 0);
1656     tb_remove_from_jmp_list(tb, 1);
1657 
1658     /* suppress any remaining jumps to this TB */
1659     tb_jmp_unlink(tb);
1660 
1661     qatomic_set(&tcg_ctx->tb_phys_invalidate_count,
1662                tcg_ctx->tb_phys_invalidate_count + 1);
1663 }
1664 
1665 static void tb_phys_invalidate__locked(TranslationBlock *tb)
1666 {
1667     qemu_thread_jit_write();
1668     do_tb_phys_invalidate(tb, true);
1669     qemu_thread_jit_execute();
1670 }
1671 
1672 /* invalidate one TB
1673  *
1674  * Called with mmap_lock held in user-mode.
1675  */
1676 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
1677 {
1678     if (page_addr == -1 && tb->page_addr[0] != -1) {
1679         page_lock_tb(tb);
1680         do_tb_phys_invalidate(tb, true);
1681         page_unlock_tb(tb);
1682     } else {
1683         do_tb_phys_invalidate(tb, false);
1684     }
1685 }
1686 
1687 #ifdef CONFIG_SOFTMMU
1688 /* call with @p->lock held */
1689 static void build_page_bitmap(PageDesc *p)
1690 {
1691     int n, tb_start, tb_end;
1692     TranslationBlock *tb;
1693 
1694     assert_page_locked(p);
1695     p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
1696 
1697     PAGE_FOR_EACH_TB(p, tb, n) {
1698         /* NOTE: this is subtle as a TB may span two physical pages */
1699         if (n == 0) {
1700             /* NOTE: tb_end may be after the end of the page, but
1701                it is not a problem */
1702             tb_start = tb->pc & ~TARGET_PAGE_MASK;
1703             tb_end = tb_start + tb->size;
1704             if (tb_end > TARGET_PAGE_SIZE) {
1705                 tb_end = TARGET_PAGE_SIZE;
1706              }
1707         } else {
1708             tb_start = 0;
1709             tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1710         }
1711         bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
1712     }
1713 }
1714 #endif
1715 
1716 /* add the tb in the target page and protect it if necessary
1717  *
1718  * Called with mmap_lock held for user-mode emulation.
1719  * Called with @p->lock held in !user-mode.
1720  */
1721 static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
1722                                unsigned int n, tb_page_addr_t page_addr)
1723 {
1724 #ifndef CONFIG_USER_ONLY
1725     bool page_already_protected;
1726 #endif
1727 
1728     assert_page_locked(p);
1729 
1730     tb->page_addr[n] = page_addr;
1731     tb->page_next[n] = p->first_tb;
1732 #ifndef CONFIG_USER_ONLY
1733     page_already_protected = p->first_tb != (uintptr_t)NULL;
1734 #endif
1735     p->first_tb = (uintptr_t)tb | n;
1736     invalidate_page_bitmap(p);
1737 
1738 #if defined(CONFIG_USER_ONLY)
1739     if (p->flags & PAGE_WRITE) {
1740         target_ulong addr;
1741         PageDesc *p2;
1742         int prot;
1743 
1744         /* force the host page as non writable (writes will have a
1745            page fault + mprotect overhead) */
1746         page_addr &= qemu_host_page_mask;
1747         prot = 0;
1748         for (addr = page_addr; addr < page_addr + qemu_host_page_size;
1749             addr += TARGET_PAGE_SIZE) {
1750 
1751             p2 = page_find(addr >> TARGET_PAGE_BITS);
1752             if (!p2) {
1753                 continue;
1754             }
1755             prot |= p2->flags;
1756             p2->flags &= ~PAGE_WRITE;
1757           }
1758         mprotect(g2h_untagged(page_addr), qemu_host_page_size,
1759                  (prot & PAGE_BITS) & ~PAGE_WRITE);
1760         if (DEBUG_TB_INVALIDATE_GATE) {
1761             printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
1762         }
1763     }
1764 #else
1765     /* if some code is already present, then the pages are already
1766        protected. So we handle the case where only the first TB is
1767        allocated in a physical page */
1768     if (!page_already_protected) {
1769         tlb_protect_code(page_addr);
1770     }
1771 #endif
1772 }
1773 
1774 /*
1775  * Add a new TB and link it to the physical page tables. phys_page2 is
1776  * (-1) to indicate that only one page contains the TB.
1777  *
1778  * Called with mmap_lock held for user-mode emulation.
1779  *
1780  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
1781  * Note that in !user-mode, another thread might have already added a TB
1782  * for the same block of guest code that @tb corresponds to. In that case,
1783  * the caller should discard the original @tb, and use instead the returned TB.
1784  */
1785 static TranslationBlock *
1786 tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1787              tb_page_addr_t phys_page2)
1788 {
1789     PageDesc *p;
1790     PageDesc *p2 = NULL;
1791     void *existing_tb = NULL;
1792     uint32_t h;
1793 
1794     assert_memory_lock();
1795     tcg_debug_assert(!(tb->cflags & CF_INVALID));
1796 
1797     /*
1798      * Add the TB to the page list, acquiring first the pages's locks.
1799      * We keep the locks held until after inserting the TB in the hash table,
1800      * so that if the insertion fails we know for sure that the TBs are still
1801      * in the page descriptors.
1802      * Note that inserting into the hash table first isn't an option, since
1803      * we can only insert TBs that are fully initialized.
1804      */
1805     page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
1806     tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
1807     if (p2) {
1808         tb_page_add(p2, tb, 1, phys_page2);
1809     } else {
1810         tb->page_addr[1] = -1;
1811     }
1812 
1813     /* add in the hash table */
1814     h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
1815                      tb->trace_vcpu_dstate);
1816     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
1817 
1818     /* remove TB from the page(s) if we couldn't insert it */
1819     if (unlikely(existing_tb)) {
1820         tb_page_remove(p, tb);
1821         invalidate_page_bitmap(p);
1822         if (p2) {
1823             tb_page_remove(p2, tb);
1824             invalidate_page_bitmap(p2);
1825         }
1826         tb = existing_tb;
1827     }
1828 
1829     if (p2 && p2 != p) {
1830         page_unlock(p2);
1831     }
1832     page_unlock(p);
1833 
1834 #ifdef CONFIG_USER_ONLY
1835     if (DEBUG_TB_CHECK_GATE) {
1836         tb_page_check();
1837     }
1838 #endif
1839     return tb;
1840 }
1841 
1842 /* Called with mmap_lock held for user mode emulation.  */
1843 TranslationBlock *tb_gen_code(CPUState *cpu,
1844                               target_ulong pc, target_ulong cs_base,
1845                               uint32_t flags, int cflags)
1846 {
1847     CPUArchState *env = cpu->env_ptr;
1848     TranslationBlock *tb, *existing_tb;
1849     tb_page_addr_t phys_pc, phys_page2;
1850     target_ulong virt_page2;
1851     tcg_insn_unit *gen_code_buf;
1852     int gen_code_size, search_size, max_insns;
1853 #ifdef CONFIG_PROFILER
1854     TCGProfile *prof = &tcg_ctx->prof;
1855     int64_t ti;
1856 #endif
1857 
1858     assert_memory_lock();
1859     qemu_thread_jit_write();
1860 
1861     phys_pc = get_page_addr_code(env, pc);
1862 
1863     if (phys_pc == -1) {
1864         /* Generate a one-shot TB with 1 insn in it */
1865         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
1866     }
1867 
1868     max_insns = cflags & CF_COUNT_MASK;
1869     if (max_insns == 0) {
1870         max_insns = CF_COUNT_MASK;
1871     }
1872     if (max_insns > TCG_MAX_INSNS) {
1873         max_insns = TCG_MAX_INSNS;
1874     }
1875     if (cpu->singlestep_enabled || singlestep) {
1876         max_insns = 1;
1877     }
1878 
1879  buffer_overflow:
1880     tb = tcg_tb_alloc(tcg_ctx);
1881     if (unlikely(!tb)) {
1882         /* flush must be done */
1883         tb_flush(cpu);
1884         mmap_unlock();
1885         /* Make the execution loop process the flush as soon as possible.  */
1886         cpu->exception_index = EXCP_INTERRUPT;
1887         cpu_loop_exit(cpu);
1888     }
1889 
1890     gen_code_buf = tcg_ctx->code_gen_ptr;
1891     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
1892     tb->pc = pc;
1893     tb->cs_base = cs_base;
1894     tb->flags = flags;
1895     tb->cflags = cflags;
1896     tb->trace_vcpu_dstate = *cpu->trace_dstate;
1897     tcg_ctx->tb_cflags = cflags;
1898  tb_overflow:
1899 
1900 #ifdef CONFIG_PROFILER
1901     /* includes aborted translations because of exceptions */
1902     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
1903     ti = profile_getclock();
1904 #endif
1905 
1906     gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
1907     if (unlikely(gen_code_size != 0)) {
1908         goto error_return;
1909     }
1910 
1911     tcg_func_start(tcg_ctx);
1912 
1913     tcg_ctx->cpu = env_cpu(env);
1914     gen_intermediate_code(cpu, tb, max_insns);
1915     assert(tb->size != 0);
1916     tcg_ctx->cpu = NULL;
1917     max_insns = tb->icount;
1918 
1919     trace_translate_block(tb, tb->pc, tb->tc.ptr);
1920 
1921     /* generate machine code */
1922     tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
1923     tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
1924     tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
1925     if (TCG_TARGET_HAS_direct_jump) {
1926         tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
1927         tcg_ctx->tb_jmp_target_addr = NULL;
1928     } else {
1929         tcg_ctx->tb_jmp_insn_offset = NULL;
1930         tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
1931     }
1932 
1933 #ifdef CONFIG_PROFILER
1934     qatomic_set(&prof->tb_count, prof->tb_count + 1);
1935     qatomic_set(&prof->interm_time,
1936                 prof->interm_time + profile_getclock() - ti);
1937     ti = profile_getclock();
1938 #endif
1939 
1940     gen_code_size = tcg_gen_code(tcg_ctx, tb);
1941     if (unlikely(gen_code_size < 0)) {
1942  error_return:
1943         switch (gen_code_size) {
1944         case -1:
1945             /*
1946              * Overflow of code_gen_buffer, or the current slice of it.
1947              *
1948              * TODO: We don't need to re-do gen_intermediate_code, nor
1949              * should we re-do the tcg optimization currently hidden
1950              * inside tcg_gen_code.  All that should be required is to
1951              * flush the TBs, allocate a new TB, re-initialize it per
1952              * above, and re-do the actual code generation.
1953              */
1954             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1955                           "Restarting code generation for "
1956                           "code_gen_buffer overflow\n");
1957             goto buffer_overflow;
1958 
1959         case -2:
1960             /*
1961              * The code generated for the TranslationBlock is too large.
1962              * The maximum size allowed by the unwind info is 64k.
1963              * There may be stricter constraints from relocations
1964              * in the tcg backend.
1965              *
1966              * Try again with half as many insns as we attempted this time.
1967              * If a single insn overflows, there's a bug somewhere...
1968              */
1969             assert(max_insns > 1);
1970             max_insns /= 2;
1971             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1972                           "Restarting code generation with "
1973                           "smaller translation block (max %d insns)\n",
1974                           max_insns);
1975             goto tb_overflow;
1976 
1977         default:
1978             g_assert_not_reached();
1979         }
1980     }
1981     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
1982     if (unlikely(search_size < 0)) {
1983         goto buffer_overflow;
1984     }
1985     tb->tc.size = gen_code_size;
1986 
1987 #ifdef CONFIG_PROFILER
1988     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
1989     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
1990     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
1991     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
1992 #endif
1993 
1994 #ifdef DEBUG_DISAS
1995     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
1996         qemu_log_in_addr_range(tb->pc)) {
1997         FILE *logfile = qemu_log_lock();
1998         int code_size, data_size;
1999         const tcg_target_ulong *rx_data_gen_ptr;
2000         size_t chunk_start;
2001         int insn = 0;
2002 
2003         if (tcg_ctx->data_gen_ptr) {
2004             rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
2005             code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
2006             data_size = gen_code_size - code_size;
2007         } else {
2008             rx_data_gen_ptr = 0;
2009             code_size = gen_code_size;
2010             data_size = 0;
2011         }
2012 
2013         /* Dump header and the first instruction */
2014         qemu_log("OUT: [size=%d]\n", gen_code_size);
2015         qemu_log("  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
2016                  tcg_ctx->gen_insn_data[insn][0]);
2017         chunk_start = tcg_ctx->gen_insn_end_off[insn];
2018         log_disas(tb->tc.ptr, chunk_start);
2019 
2020         /*
2021          * Dump each instruction chunk, wrapping up empty chunks into
2022          * the next instruction. The whole array is offset so the
2023          * first entry is the beginning of the 2nd instruction.
2024          */
2025         while (insn < tb->icount) {
2026             size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
2027             if (chunk_end > chunk_start) {
2028                 qemu_log("  -- guest addr 0x" TARGET_FMT_lx "\n",
2029                          tcg_ctx->gen_insn_data[insn][0]);
2030                 log_disas(tb->tc.ptr + chunk_start, chunk_end - chunk_start);
2031                 chunk_start = chunk_end;
2032             }
2033             insn++;
2034         }
2035 
2036         if (chunk_start < code_size) {
2037             qemu_log("  -- tb slow paths + alignment\n");
2038             log_disas(tb->tc.ptr + chunk_start, code_size - chunk_start);
2039         }
2040 
2041         /* Finally dump any data we may have after the block */
2042         if (data_size) {
2043             int i;
2044             qemu_log("  data: [size=%d]\n", data_size);
2045             for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
2046                 if (sizeof(tcg_target_ulong) == 8) {
2047                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
2048                              (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
2049                 } else if (sizeof(tcg_target_ulong) == 4) {
2050                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
2051                              (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
2052                 } else {
2053                     qemu_build_not_reached();
2054                 }
2055             }
2056         }
2057         qemu_log("\n");
2058         qemu_log_flush();
2059         qemu_log_unlock(logfile);
2060     }
2061 #endif
2062 
2063     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
2064         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
2065                  CODE_GEN_ALIGN));
2066 
2067     /* init jump list */
2068     qemu_spin_init(&tb->jmp_lock);
2069     tb->jmp_list_head = (uintptr_t)NULL;
2070     tb->jmp_list_next[0] = (uintptr_t)NULL;
2071     tb->jmp_list_next[1] = (uintptr_t)NULL;
2072     tb->jmp_dest[0] = (uintptr_t)NULL;
2073     tb->jmp_dest[1] = (uintptr_t)NULL;
2074 
2075     /* init original jump addresses which have been set during tcg_gen_code() */
2076     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
2077         tb_reset_jump(tb, 0);
2078     }
2079     if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
2080         tb_reset_jump(tb, 1);
2081     }
2082 
2083     /*
2084      * If the TB is not associated with a physical RAM page then
2085      * it must be a temporary one-insn TB, and we have nothing to do
2086      * except fill in the page_addr[] fields. Return early before
2087      * attempting to link to other TBs or add to the lookup table.
2088      */
2089     if (phys_pc == -1) {
2090         tb->page_addr[0] = tb->page_addr[1] = -1;
2091         return tb;
2092     }
2093 
2094     /* check next page if needed */
2095     virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
2096     phys_page2 = -1;
2097     if ((pc & TARGET_PAGE_MASK) != virt_page2) {
2098         phys_page2 = get_page_addr_code(env, virt_page2);
2099     }
2100     /*
2101      * No explicit memory barrier is required -- tb_link_page() makes the
2102      * TB visible in a consistent state.
2103      */
2104     existing_tb = tb_link_page(tb, phys_pc, phys_page2);
2105     /* if the TB already exists, discard what we just translated */
2106     if (unlikely(existing_tb != tb)) {
2107         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
2108 
2109         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
2110         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
2111         tb_destroy(tb);
2112         return existing_tb;
2113     }
2114     tcg_tb_insert(tb);
2115     return tb;
2116 }
2117 
2118 /*
2119  * @p must be non-NULL.
2120  * user-mode: call with mmap_lock held.
2121  * !user-mode: call with all @pages locked.
2122  */
2123 static void
2124 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
2125                                       PageDesc *p, tb_page_addr_t start,
2126                                       tb_page_addr_t end,
2127                                       uintptr_t retaddr)
2128 {
2129     TranslationBlock *tb;
2130     tb_page_addr_t tb_start, tb_end;
2131     int n;
2132 #ifdef TARGET_HAS_PRECISE_SMC
2133     CPUState *cpu = current_cpu;
2134     CPUArchState *env = NULL;
2135     bool current_tb_not_found = retaddr != 0;
2136     bool current_tb_modified = false;
2137     TranslationBlock *current_tb = NULL;
2138     target_ulong current_pc = 0;
2139     target_ulong current_cs_base = 0;
2140     uint32_t current_flags = 0;
2141 #endif /* TARGET_HAS_PRECISE_SMC */
2142 
2143     assert_page_locked(p);
2144 
2145 #if defined(TARGET_HAS_PRECISE_SMC)
2146     if (cpu != NULL) {
2147         env = cpu->env_ptr;
2148     }
2149 #endif
2150 
2151     /* we remove all the TBs in the range [start, end[ */
2152     /* XXX: see if in some cases it could be faster to invalidate all
2153        the code */
2154     PAGE_FOR_EACH_TB(p, tb, n) {
2155         assert_page_locked(p);
2156         /* NOTE: this is subtle as a TB may span two physical pages */
2157         if (n == 0) {
2158             /* NOTE: tb_end may be after the end of the page, but
2159                it is not a problem */
2160             tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
2161             tb_end = tb_start + tb->size;
2162         } else {
2163             tb_start = tb->page_addr[1];
2164             tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
2165         }
2166         if (!(tb_end <= start || tb_start >= end)) {
2167 #ifdef TARGET_HAS_PRECISE_SMC
2168             if (current_tb_not_found) {
2169                 current_tb_not_found = false;
2170                 /* now we have a real cpu fault */
2171                 current_tb = tcg_tb_lookup(retaddr);
2172             }
2173             if (current_tb == tb &&
2174                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
2175                 /*
2176                  * If we are modifying the current TB, we must stop
2177                  * its execution. We could be more precise by checking
2178                  * that the modification is after the current PC, but it
2179                  * would require a specialized function to partially
2180                  * restore the CPU state.
2181                  */
2182                 current_tb_modified = true;
2183                 cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
2184                 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
2185                                      &current_flags);
2186             }
2187 #endif /* TARGET_HAS_PRECISE_SMC */
2188             tb_phys_invalidate__locked(tb);
2189         }
2190     }
2191 #if !defined(CONFIG_USER_ONLY)
2192     /* if no code remaining, no need to continue to use slow writes */
2193     if (!p->first_tb) {
2194         invalidate_page_bitmap(p);
2195         tlb_unprotect_code(start);
2196     }
2197 #endif
2198 #ifdef TARGET_HAS_PRECISE_SMC
2199     if (current_tb_modified) {
2200         page_collection_unlock(pages);
2201         /* Force execution of one insn next time.  */
2202         cpu->cflags_next_tb = 1 | curr_cflags(cpu);
2203         mmap_unlock();
2204         cpu_loop_exit_noexc(cpu);
2205     }
2206 #endif
2207 }
2208 
2209 /*
2210  * Invalidate all TBs which intersect with the target physical address range
2211  * [start;end[. NOTE: start and end must refer to the *same* physical page.
2212  * 'is_cpu_write_access' should be true if called from a real cpu write
2213  * access: the virtual CPU will exit the current TB if code is modified inside
2214  * this TB.
2215  *
2216  * Called with mmap_lock held for user-mode emulation
2217  */
2218 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end)
2219 {
2220     struct page_collection *pages;
2221     PageDesc *p;
2222 
2223     assert_memory_lock();
2224 
2225     p = page_find(start >> TARGET_PAGE_BITS);
2226     if (p == NULL) {
2227         return;
2228     }
2229     pages = page_collection_lock(start, end);
2230     tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
2231     page_collection_unlock(pages);
2232 }
2233 
2234 /*
2235  * Invalidate all TBs which intersect with the target physical address range
2236  * [start;end[. NOTE: start and end may refer to *different* physical pages.
2237  * 'is_cpu_write_access' should be true if called from a real cpu write
2238  * access: the virtual CPU will exit the current TB if code is modified inside
2239  * this TB.
2240  *
2241  * Called with mmap_lock held for user-mode emulation.
2242  */
2243 #ifdef CONFIG_SOFTMMU
2244 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end)
2245 #else
2246 void tb_invalidate_phys_range(target_ulong start, target_ulong end)
2247 #endif
2248 {
2249     struct page_collection *pages;
2250     tb_page_addr_t next;
2251 
2252     assert_memory_lock();
2253 
2254     pages = page_collection_lock(start, end);
2255     for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
2256          start < end;
2257          start = next, next += TARGET_PAGE_SIZE) {
2258         PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
2259         tb_page_addr_t bound = MIN(next, end);
2260 
2261         if (pd == NULL) {
2262             continue;
2263         }
2264         tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
2265     }
2266     page_collection_unlock(pages);
2267 }
2268 
2269 #ifdef CONFIG_SOFTMMU
2270 /* len must be <= 8 and start must be a multiple of len.
2271  * Called via softmmu_template.h when code areas are written to with
2272  * iothread mutex not held.
2273  *
2274  * Call with all @pages in the range [@start, @start + len[ locked.
2275  */
2276 void tb_invalidate_phys_page_fast(struct page_collection *pages,
2277                                   tb_page_addr_t start, int len,
2278                                   uintptr_t retaddr)
2279 {
2280     PageDesc *p;
2281 
2282     assert_memory_lock();
2283 
2284     p = page_find(start >> TARGET_PAGE_BITS);
2285     if (!p) {
2286         return;
2287     }
2288 
2289     assert_page_locked(p);
2290     if (!p->code_bitmap &&
2291         ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
2292         build_page_bitmap(p);
2293     }
2294     if (p->code_bitmap) {
2295         unsigned int nr;
2296         unsigned long b;
2297 
2298         nr = start & ~TARGET_PAGE_MASK;
2299         b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
2300         if (b & ((1 << len) - 1)) {
2301             goto do_invalidate;
2302         }
2303     } else {
2304     do_invalidate:
2305         tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
2306                                               retaddr);
2307     }
2308 }
2309 #else
2310 /* Called with mmap_lock held. If pc is not 0 then it indicates the
2311  * host PC of the faulting store instruction that caused this invalidate.
2312  * Returns true if the caller needs to abort execution of the current
2313  * TB (because it was modified by this store and the guest CPU has
2314  * precise-SMC semantics).
2315  */
2316 static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
2317 {
2318     TranslationBlock *tb;
2319     PageDesc *p;
2320     int n;
2321 #ifdef TARGET_HAS_PRECISE_SMC
2322     TranslationBlock *current_tb = NULL;
2323     CPUState *cpu = current_cpu;
2324     CPUArchState *env = NULL;
2325     int current_tb_modified = 0;
2326     target_ulong current_pc = 0;
2327     target_ulong current_cs_base = 0;
2328     uint32_t current_flags = 0;
2329 #endif
2330 
2331     assert_memory_lock();
2332 
2333     addr &= TARGET_PAGE_MASK;
2334     p = page_find(addr >> TARGET_PAGE_BITS);
2335     if (!p) {
2336         return false;
2337     }
2338 
2339 #ifdef TARGET_HAS_PRECISE_SMC
2340     if (p->first_tb && pc != 0) {
2341         current_tb = tcg_tb_lookup(pc);
2342     }
2343     if (cpu != NULL) {
2344         env = cpu->env_ptr;
2345     }
2346 #endif
2347     assert_page_locked(p);
2348     PAGE_FOR_EACH_TB(p, tb, n) {
2349 #ifdef TARGET_HAS_PRECISE_SMC
2350         if (current_tb == tb &&
2351             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
2352                 /* If we are modifying the current TB, we must stop
2353                    its execution. We could be more precise by checking
2354                    that the modification is after the current PC, but it
2355                    would require a specialized function to partially
2356                    restore the CPU state */
2357 
2358             current_tb_modified = 1;
2359             cpu_restore_state_from_tb(cpu, current_tb, pc, true);
2360             cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
2361                                  &current_flags);
2362         }
2363 #endif /* TARGET_HAS_PRECISE_SMC */
2364         tb_phys_invalidate(tb, addr);
2365     }
2366     p->first_tb = (uintptr_t)NULL;
2367 #ifdef TARGET_HAS_PRECISE_SMC
2368     if (current_tb_modified) {
2369         /* Force execution of one insn next time.  */
2370         cpu->cflags_next_tb = 1 | curr_cflags(cpu);
2371         return true;
2372     }
2373 #endif
2374 
2375     return false;
2376 }
2377 #endif
2378 
2379 /* user-mode: call with mmap_lock held */
2380 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
2381 {
2382     TranslationBlock *tb;
2383 
2384     assert_memory_lock();
2385 
2386     tb = tcg_tb_lookup(retaddr);
2387     if (tb) {
2388         /* We can use retranslation to find the PC.  */
2389         cpu_restore_state_from_tb(cpu, tb, retaddr, true);
2390         tb_phys_invalidate(tb, -1);
2391     } else {
2392         /* The exception probably happened in a helper.  The CPU state should
2393            have been saved before calling it. Fetch the PC from there.  */
2394         CPUArchState *env = cpu->env_ptr;
2395         target_ulong pc, cs_base;
2396         tb_page_addr_t addr;
2397         uint32_t flags;
2398 
2399         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
2400         addr = get_page_addr_code(env, pc);
2401         if (addr != -1) {
2402             tb_invalidate_phys_range(addr, addr + 1);
2403         }
2404     }
2405 }
2406 
2407 #ifndef CONFIG_USER_ONLY
2408 /*
2409  * In deterministic execution mode, instructions doing device I/Os
2410  * must be at the end of the TB.
2411  *
2412  * Called by softmmu_template.h, with iothread mutex not held.
2413  */
2414 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
2415 {
2416     TranslationBlock *tb;
2417     CPUClass *cc;
2418     uint32_t n;
2419 
2420     tb = tcg_tb_lookup(retaddr);
2421     if (!tb) {
2422         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
2423                   (void *)retaddr);
2424     }
2425     cpu_restore_state_from_tb(cpu, tb, retaddr, true);
2426 
2427     /*
2428      * Some guests must re-execute the branch when re-executing a delay
2429      * slot instruction.  When this is the case, adjust icount and N
2430      * to account for the re-execution of the branch.
2431      */
2432     n = 1;
2433     cc = CPU_GET_CLASS(cpu);
2434     if (cc->tcg_ops->io_recompile_replay_branch &&
2435         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
2436         cpu_neg(cpu)->icount_decr.u16.low++;
2437         n = 2;
2438     }
2439 
2440     /*
2441      * Exit the loop and potentially generate a new TB executing the
2442      * just the I/O insns. We also limit instrumentation to memory
2443      * operations only (which execute after completion) so we don't
2444      * double instrument the instruction.
2445      */
2446     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
2447 
2448     qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
2449                            "cpu_io_recompile: rewound execution of TB to "
2450                            TARGET_FMT_lx "\n", tb->pc);
2451 
2452     cpu_loop_exit_noexc(cpu);
2453 }
2454 
2455 static void print_qht_statistics(struct qht_stats hst)
2456 {
2457     uint32_t hgram_opts;
2458     size_t hgram_bins;
2459     char *hgram;
2460 
2461     if (!hst.head_buckets) {
2462         return;
2463     }
2464     qemu_printf("TB hash buckets     %zu/%zu (%0.2f%% head buckets used)\n",
2465                 hst.used_head_buckets, hst.head_buckets,
2466                 (double)hst.used_head_buckets / hst.head_buckets * 100);
2467 
2468     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
2469     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
2470     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
2471         hgram_opts |= QDIST_PR_NODECIMAL;
2472     }
2473     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
2474     qemu_printf("TB hash occupancy   %0.2f%% avg chain occ. Histogram: %s\n",
2475                 qdist_avg(&hst.occupancy) * 100, hgram);
2476     g_free(hgram);
2477 
2478     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
2479     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
2480     if (hgram_bins > 10) {
2481         hgram_bins = 10;
2482     } else {
2483         hgram_bins = 0;
2484         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
2485     }
2486     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
2487     qemu_printf("TB hash avg chain   %0.3f buckets. Histogram: %s\n",
2488                 qdist_avg(&hst.chain), hgram);
2489     g_free(hgram);
2490 }
2491 
2492 struct tb_tree_stats {
2493     size_t nb_tbs;
2494     size_t host_size;
2495     size_t target_size;
2496     size_t max_target_size;
2497     size_t direct_jmp_count;
2498     size_t direct_jmp2_count;
2499     size_t cross_page;
2500 };
2501 
2502 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
2503 {
2504     const TranslationBlock *tb = value;
2505     struct tb_tree_stats *tst = data;
2506 
2507     tst->nb_tbs++;
2508     tst->host_size += tb->tc.size;
2509     tst->target_size += tb->size;
2510     if (tb->size > tst->max_target_size) {
2511         tst->max_target_size = tb->size;
2512     }
2513     if (tb->page_addr[1] != -1) {
2514         tst->cross_page++;
2515     }
2516     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
2517         tst->direct_jmp_count++;
2518         if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
2519             tst->direct_jmp2_count++;
2520         }
2521     }
2522     return false;
2523 }
2524 
2525 void dump_exec_info(void)
2526 {
2527     struct tb_tree_stats tst = {};
2528     struct qht_stats hst;
2529     size_t nb_tbs, flush_full, flush_part, flush_elide;
2530 
2531     tcg_tb_foreach(tb_tree_stats_iter, &tst);
2532     nb_tbs = tst.nb_tbs;
2533     /* XXX: avoid using doubles ? */
2534     qemu_printf("Translation buffer state:\n");
2535     /*
2536      * Report total code size including the padding and TB structs;
2537      * otherwise users might think "-accel tcg,tb-size" is not honoured.
2538      * For avg host size we use the precise numbers from tb_tree_stats though.
2539      */
2540     qemu_printf("gen code size       %zu/%zu\n",
2541                 tcg_code_size(), tcg_code_capacity());
2542     qemu_printf("TB count            %zu\n", nb_tbs);
2543     qemu_printf("TB avg target size  %zu max=%zu bytes\n",
2544                 nb_tbs ? tst.target_size / nb_tbs : 0,
2545                 tst.max_target_size);
2546     qemu_printf("TB avg host size    %zu bytes (expansion ratio: %0.1f)\n",
2547                 nb_tbs ? tst.host_size / nb_tbs : 0,
2548                 tst.target_size ? (double)tst.host_size / tst.target_size : 0);
2549     qemu_printf("cross page TB count %zu (%zu%%)\n", tst.cross_page,
2550                 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
2551     qemu_printf("direct jump count   %zu (%zu%%) (2 jumps=%zu %zu%%)\n",
2552                 tst.direct_jmp_count,
2553                 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
2554                 tst.direct_jmp2_count,
2555                 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
2556 
2557     qht_statistics_init(&tb_ctx.htable, &hst);
2558     print_qht_statistics(hst);
2559     qht_statistics_destroy(&hst);
2560 
2561     qemu_printf("\nStatistics:\n");
2562     qemu_printf("TB flush count      %u\n",
2563                 qatomic_read(&tb_ctx.tb_flush_count));
2564     qemu_printf("TB invalidate count %zu\n",
2565                 tcg_tb_phys_invalidate_count());
2566 
2567     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
2568     qemu_printf("TLB full flushes    %zu\n", flush_full);
2569     qemu_printf("TLB partial flushes %zu\n", flush_part);
2570     qemu_printf("TLB elided flushes  %zu\n", flush_elide);
2571     tcg_dump_info();
2572 }
2573 
2574 void dump_opcount_info(void)
2575 {
2576     tcg_dump_op_count();
2577 }
2578 
2579 #else /* CONFIG_USER_ONLY */
2580 
2581 void cpu_interrupt(CPUState *cpu, int mask)
2582 {
2583     g_assert(qemu_mutex_iothread_locked());
2584     cpu->interrupt_request |= mask;
2585     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
2586 }
2587 
2588 /*
2589  * Walks guest process memory "regions" one by one
2590  * and calls callback function 'fn' for each region.
2591  */
2592 struct walk_memory_regions_data {
2593     walk_memory_regions_fn fn;
2594     void *priv;
2595     target_ulong start;
2596     int prot;
2597 };
2598 
2599 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2600                                    target_ulong end, int new_prot)
2601 {
2602     if (data->start != -1u) {
2603         int rc = data->fn(data->priv, data->start, end, data->prot);
2604         if (rc != 0) {
2605             return rc;
2606         }
2607     }
2608 
2609     data->start = (new_prot ? end : -1u);
2610     data->prot = new_prot;
2611 
2612     return 0;
2613 }
2614 
2615 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2616                                  target_ulong base, int level, void **lp)
2617 {
2618     target_ulong pa;
2619     int i, rc;
2620 
2621     if (*lp == NULL) {
2622         return walk_memory_regions_end(data, base, 0);
2623     }
2624 
2625     if (level == 0) {
2626         PageDesc *pd = *lp;
2627 
2628         for (i = 0; i < V_L2_SIZE; ++i) {
2629             int prot = pd[i].flags;
2630 
2631             pa = base | (i << TARGET_PAGE_BITS);
2632             if (prot != data->prot) {
2633                 rc = walk_memory_regions_end(data, pa, prot);
2634                 if (rc != 0) {
2635                     return rc;
2636                 }
2637             }
2638         }
2639     } else {
2640         void **pp = *lp;
2641 
2642         for (i = 0; i < V_L2_SIZE; ++i) {
2643             pa = base | ((target_ulong)i <<
2644                 (TARGET_PAGE_BITS + V_L2_BITS * level));
2645             rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2646             if (rc != 0) {
2647                 return rc;
2648             }
2649         }
2650     }
2651 
2652     return 0;
2653 }
2654 
2655 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2656 {
2657     struct walk_memory_regions_data data;
2658     uintptr_t i, l1_sz = v_l1_size;
2659 
2660     data.fn = fn;
2661     data.priv = priv;
2662     data.start = -1u;
2663     data.prot = 0;
2664 
2665     for (i = 0; i < l1_sz; i++) {
2666         target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
2667         int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
2668         if (rc != 0) {
2669             return rc;
2670         }
2671     }
2672 
2673     return walk_memory_regions_end(&data, 0, 0);
2674 }
2675 
2676 static int dump_region(void *priv, target_ulong start,
2677     target_ulong end, unsigned long prot)
2678 {
2679     FILE *f = (FILE *)priv;
2680 
2681     (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
2682         " "TARGET_FMT_lx" %c%c%c\n",
2683         start, end, end - start,
2684         ((prot & PAGE_READ) ? 'r' : '-'),
2685         ((prot & PAGE_WRITE) ? 'w' : '-'),
2686         ((prot & PAGE_EXEC) ? 'x' : '-'));
2687 
2688     return 0;
2689 }
2690 
2691 /* dump memory mappings */
2692 void page_dump(FILE *f)
2693 {
2694     const int length = sizeof(target_ulong) * 2;
2695     (void) fprintf(f, "%-*s %-*s %-*s %s\n",
2696             length, "start", length, "end", length, "size", "prot");
2697     walk_memory_regions(f, dump_region);
2698 }
2699 
2700 int page_get_flags(target_ulong address)
2701 {
2702     PageDesc *p;
2703 
2704     p = page_find(address >> TARGET_PAGE_BITS);
2705     if (!p) {
2706         return 0;
2707     }
2708     return p->flags;
2709 }
2710 
2711 /* Modify the flags of a page and invalidate the code if necessary.
2712    The flag PAGE_WRITE_ORG is positioned automatically depending
2713    on PAGE_WRITE.  The mmap_lock should already be held.  */
2714 void page_set_flags(target_ulong start, target_ulong end, int flags)
2715 {
2716     target_ulong addr, len;
2717     bool reset_target_data;
2718 
2719     /* This function should never be called with addresses outside the
2720        guest address space.  If this assert fires, it probably indicates
2721        a missing call to h2g_valid.  */
2722     assert(end - 1 <= GUEST_ADDR_MAX);
2723     assert(start < end);
2724     /* Only set PAGE_ANON with new mappings. */
2725     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
2726     assert_memory_lock();
2727 
2728     start = start & TARGET_PAGE_MASK;
2729     end = TARGET_PAGE_ALIGN(end);
2730 
2731     if (flags & PAGE_WRITE) {
2732         flags |= PAGE_WRITE_ORG;
2733     }
2734     reset_target_data = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
2735     flags &= ~PAGE_RESET;
2736 
2737     for (addr = start, len = end - start;
2738          len != 0;
2739          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2740         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2741 
2742         /* If the write protection bit is set, then we invalidate
2743            the code inside.  */
2744         if (!(p->flags & PAGE_WRITE) &&
2745             (flags & PAGE_WRITE) &&
2746             p->first_tb) {
2747             tb_invalidate_phys_page(addr, 0);
2748         }
2749         if (reset_target_data) {
2750             g_free(p->target_data);
2751             p->target_data = NULL;
2752             p->flags = flags;
2753         } else {
2754             /* Using mprotect on a page does not change MAP_ANON. */
2755             p->flags = (p->flags & PAGE_ANON) | flags;
2756         }
2757     }
2758 }
2759 
2760 void *page_get_target_data(target_ulong address)
2761 {
2762     PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2763     return p ? p->target_data : NULL;
2764 }
2765 
2766 void *page_alloc_target_data(target_ulong address, size_t size)
2767 {
2768     PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2769     void *ret = NULL;
2770 
2771     if (p->flags & PAGE_VALID) {
2772         ret = p->target_data;
2773         if (!ret) {
2774             p->target_data = ret = g_malloc0(size);
2775         }
2776     }
2777     return ret;
2778 }
2779 
2780 int page_check_range(target_ulong start, target_ulong len, int flags)
2781 {
2782     PageDesc *p;
2783     target_ulong end;
2784     target_ulong addr;
2785 
2786     /* This function should never be called with addresses outside the
2787        guest address space.  If this assert fires, it probably indicates
2788        a missing call to h2g_valid.  */
2789     if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
2790         assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2791     }
2792 
2793     if (len == 0) {
2794         return 0;
2795     }
2796     if (start + len - 1 < start) {
2797         /* We've wrapped around.  */
2798         return -1;
2799     }
2800 
2801     /* must do before we loose bits in the next step */
2802     end = TARGET_PAGE_ALIGN(start + len);
2803     start = start & TARGET_PAGE_MASK;
2804 
2805     for (addr = start, len = end - start;
2806          len != 0;
2807          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2808         p = page_find(addr >> TARGET_PAGE_BITS);
2809         if (!p) {
2810             return -1;
2811         }
2812         if (!(p->flags & PAGE_VALID)) {
2813             return -1;
2814         }
2815 
2816         if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
2817             return -1;
2818         }
2819         if (flags & PAGE_WRITE) {
2820             if (!(p->flags & PAGE_WRITE_ORG)) {
2821                 return -1;
2822             }
2823             /* unprotect the page if it was put read-only because it
2824                contains translated code */
2825             if (!(p->flags & PAGE_WRITE)) {
2826                 if (!page_unprotect(addr, 0)) {
2827                     return -1;
2828                 }
2829             }
2830         }
2831     }
2832     return 0;
2833 }
2834 
2835 /* called from signal handler: invalidate the code and unprotect the
2836  * page. Return 0 if the fault was not handled, 1 if it was handled,
2837  * and 2 if it was handled but the caller must cause the TB to be
2838  * immediately exited. (We can only return 2 if the 'pc' argument is
2839  * non-zero.)
2840  */
2841 int page_unprotect(target_ulong address, uintptr_t pc)
2842 {
2843     unsigned int prot;
2844     bool current_tb_invalidated;
2845     PageDesc *p;
2846     target_ulong host_start, host_end, addr;
2847 
2848     /* Technically this isn't safe inside a signal handler.  However we
2849        know this only ever happens in a synchronous SEGV handler, so in
2850        practice it seems to be ok.  */
2851     mmap_lock();
2852 
2853     p = page_find(address >> TARGET_PAGE_BITS);
2854     if (!p) {
2855         mmap_unlock();
2856         return 0;
2857     }
2858 
2859     /* if the page was really writable, then we change its
2860        protection back to writable */
2861     if (p->flags & PAGE_WRITE_ORG) {
2862         current_tb_invalidated = false;
2863         if (p->flags & PAGE_WRITE) {
2864             /* If the page is actually marked WRITE then assume this is because
2865              * this thread raced with another one which got here first and
2866              * set the page to PAGE_WRITE and did the TB invalidate for us.
2867              */
2868 #ifdef TARGET_HAS_PRECISE_SMC
2869             TranslationBlock *current_tb = tcg_tb_lookup(pc);
2870             if (current_tb) {
2871                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
2872             }
2873 #endif
2874         } else {
2875             host_start = address & qemu_host_page_mask;
2876             host_end = host_start + qemu_host_page_size;
2877 
2878             prot = 0;
2879             for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
2880                 p = page_find(addr >> TARGET_PAGE_BITS);
2881                 p->flags |= PAGE_WRITE;
2882                 prot |= p->flags;
2883 
2884                 /* and since the content will be modified, we must invalidate
2885                    the corresponding translated code. */
2886                 current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
2887 #ifdef CONFIG_USER_ONLY
2888                 if (DEBUG_TB_CHECK_GATE) {
2889                     tb_invalidate_check(addr);
2890                 }
2891 #endif
2892             }
2893             mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
2894                      prot & PAGE_BITS);
2895         }
2896         mmap_unlock();
2897         /* If current TB was invalidated return to main loop */
2898         return current_tb_invalidated ? 2 : 1;
2899     }
2900     mmap_unlock();
2901     return 0;
2902 }
2903 #endif /* CONFIG_USER_ONLY */
2904 
2905 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
2906 void tcg_flush_softmmu_tlb(CPUState *cs)
2907 {
2908 #ifdef CONFIG_SOFTMMU
2909     tlb_flush(cs);
2910 #endif
2911 }
2912