xref: /openbmc/qemu/accel/tcg/translate-all.c (revision 4d87fcdd)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/units.h"
22 #include "qemu-common.h"
23 
24 #define NO_CPU_IO_DEFS
25 #include "trace.h"
26 #include "disas/disas.h"
27 #include "exec/exec-all.h"
28 #include "tcg/tcg.h"
29 #if defined(CONFIG_USER_ONLY)
30 #include "qemu.h"
31 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
32 #include <sys/param.h>
33 #if __FreeBSD_version >= 700104
34 #define HAVE_KINFO_GETVMMAP
35 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
36 #include <sys/proc.h>
37 #include <machine/profile.h>
38 #define _KERNEL
39 #include <sys/user.h>
40 #undef _KERNEL
41 #undef sigqueue
42 #include <libutil.h>
43 #endif
44 #endif
45 #else
46 #include "exec/ram_addr.h"
47 #endif
48 
49 #include "exec/cputlb.h"
50 #include "exec/tb-hash.h"
51 #include "exec/translate-all.h"
52 #include "qemu/bitmap.h"
53 #include "qemu/error-report.h"
54 #include "qemu/qemu-print.h"
55 #include "qemu/timer.h"
56 #include "qemu/main-loop.h"
57 #include "exec/log.h"
58 #include "sysemu/cpus.h"
59 #include "sysemu/cpu-timers.h"
60 #include "sysemu/tcg.h"
61 #include "qapi/error.h"
62 #include "hw/core/tcg-cpu-ops.h"
63 #include "internal.h"
64 
65 /* #define DEBUG_TB_INVALIDATE */
66 /* #define DEBUG_TB_FLUSH */
67 /* make various TB consistency checks */
68 /* #define DEBUG_TB_CHECK */
69 
70 #ifdef DEBUG_TB_INVALIDATE
71 #define DEBUG_TB_INVALIDATE_GATE 1
72 #else
73 #define DEBUG_TB_INVALIDATE_GATE 0
74 #endif
75 
76 #ifdef DEBUG_TB_FLUSH
77 #define DEBUG_TB_FLUSH_GATE 1
78 #else
79 #define DEBUG_TB_FLUSH_GATE 0
80 #endif
81 
82 #if !defined(CONFIG_USER_ONLY)
83 /* TB consistency checks only implemented for usermode emulation.  */
84 #undef DEBUG_TB_CHECK
85 #endif
86 
87 #ifdef DEBUG_TB_CHECK
88 #define DEBUG_TB_CHECK_GATE 1
89 #else
90 #define DEBUG_TB_CHECK_GATE 0
91 #endif
92 
93 /* Access to the various translations structures need to be serialised via locks
94  * for consistency.
95  * In user-mode emulation access to the memory related structures are protected
96  * with mmap_lock.
97  * In !user-mode we use per-page locks.
98  */
99 #ifdef CONFIG_SOFTMMU
100 #define assert_memory_lock()
101 #else
102 #define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
103 #endif
104 
105 #define SMC_BITMAP_USE_THRESHOLD 10
106 
107 typedef struct PageDesc {
108     /* list of TBs intersecting this ram page */
109     uintptr_t first_tb;
110 #ifdef CONFIG_SOFTMMU
111     /* in order to optimize self modifying code, we count the number
112        of lookups we do to a given page to use a bitmap */
113     unsigned long *code_bitmap;
114     unsigned int code_write_count;
115 #else
116     unsigned long flags;
117     void *target_data;
118 #endif
119 #ifndef CONFIG_USER_ONLY
120     QemuSpin lock;
121 #endif
122 } PageDesc;
123 
124 /**
125  * struct page_entry - page descriptor entry
126  * @pd:     pointer to the &struct PageDesc of the page this entry represents
127  * @index:  page index of the page
128  * @locked: whether the page is locked
129  *
130  * This struct helps us keep track of the locked state of a page, without
131  * bloating &struct PageDesc.
132  *
133  * A page lock protects accesses to all fields of &struct PageDesc.
134  *
135  * See also: &struct page_collection.
136  */
137 struct page_entry {
138     PageDesc *pd;
139     tb_page_addr_t index;
140     bool locked;
141 };
142 
143 /**
144  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
145  * @tree:   Binary search tree (BST) of the pages, with key == page index
146  * @max:    Pointer to the page in @tree with the highest page index
147  *
148  * To avoid deadlock we lock pages in ascending order of page index.
149  * When operating on a set of pages, we need to keep track of them so that
150  * we can lock them in order and also unlock them later. For this we collect
151  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
152  * @tree implementation we use does not provide an O(1) operation to obtain the
153  * highest-ranked element, we use @max to keep track of the inserted page
154  * with the highest index. This is valuable because if a page is not in
155  * the tree and its index is higher than @max's, then we can lock it
156  * without breaking the locking order rule.
157  *
158  * Note on naming: 'struct page_set' would be shorter, but we already have a few
159  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
160  *
161  * See also: page_collection_lock().
162  */
163 struct page_collection {
164     GTree *tree;
165     struct page_entry *max;
166 };
167 
168 /* list iterators for lists of tagged pointers in TranslationBlock */
169 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
170     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
171          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
172              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
173 
174 #define PAGE_FOR_EACH_TB(pagedesc, tb, n)                       \
175     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
176 
177 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
178     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
179 
180 /*
181  * In system mode we want L1_MAP to be based on ram offsets,
182  * while in user mode we want it to be based on virtual addresses.
183  *
184  * TODO: For user mode, see the caveat re host vs guest virtual
185  * address spaces near GUEST_ADDR_MAX.
186  */
187 #if !defined(CONFIG_USER_ONLY)
188 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
189 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
190 #else
191 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
192 #endif
193 #else
194 # define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
195 #endif
196 
197 /* Size of the L2 (and L3, etc) page tables.  */
198 #define V_L2_BITS 10
199 #define V_L2_SIZE (1 << V_L2_BITS)
200 
201 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
202 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
203                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
204                   * BITS_PER_BYTE);
205 
206 /*
207  * L1 Mapping properties
208  */
209 static int v_l1_size;
210 static int v_l1_shift;
211 static int v_l2_levels;
212 
213 /* The bottom level has pointers to PageDesc, and is indexed by
214  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
215  */
216 #define V_L1_MIN_BITS 4
217 #define V_L1_MAX_BITS (V_L2_BITS + 3)
218 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
219 
220 static void *l1_map[V_L1_MAX_SIZE];
221 
222 /* code generation context */
223 TCGContext tcg_init_ctx;
224 __thread TCGContext *tcg_ctx;
225 TBContext tb_ctx;
226 
227 static void page_table_config_init(void)
228 {
229     uint32_t v_l1_bits;
230 
231     assert(TARGET_PAGE_BITS);
232     /* The bits remaining after N lower levels of page tables.  */
233     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
234     if (v_l1_bits < V_L1_MIN_BITS) {
235         v_l1_bits += V_L2_BITS;
236     }
237 
238     v_l1_size = 1 << v_l1_bits;
239     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
240     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
241 
242     assert(v_l1_bits <= V_L1_MAX_BITS);
243     assert(v_l1_shift % V_L2_BITS == 0);
244     assert(v_l2_levels >= 0);
245 }
246 
247 static void cpu_gen_init(void)
248 {
249     tcg_context_init(&tcg_init_ctx);
250 }
251 
252 /* Encode VAL as a signed leb128 sequence at P.
253    Return P incremented past the encoded value.  */
254 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
255 {
256     int more, byte;
257 
258     do {
259         byte = val & 0x7f;
260         val >>= 7;
261         more = !((val == 0 && (byte & 0x40) == 0)
262                  || (val == -1 && (byte & 0x40) != 0));
263         if (more) {
264             byte |= 0x80;
265         }
266         *p++ = byte;
267     } while (more);
268 
269     return p;
270 }
271 
272 /* Decode a signed leb128 sequence at *PP; increment *PP past the
273    decoded value.  Return the decoded value.  */
274 static target_long decode_sleb128(const uint8_t **pp)
275 {
276     const uint8_t *p = *pp;
277     target_long val = 0;
278     int byte, shift = 0;
279 
280     do {
281         byte = *p++;
282         val |= (target_ulong)(byte & 0x7f) << shift;
283         shift += 7;
284     } while (byte & 0x80);
285     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
286         val |= -(target_ulong)1 << shift;
287     }
288 
289     *pp = p;
290     return val;
291 }
292 
293 /* Encode the data collected about the instructions while compiling TB.
294    Place the data at BLOCK, and return the number of bytes consumed.
295 
296    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
297    which come from the target's insn_start data, followed by a uintptr_t
298    which comes from the host pc of the end of the code implementing the insn.
299 
300    Each line of the table is encoded as sleb128 deltas from the previous
301    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
302    That is, the first column is seeded with the guest pc, the last column
303    with the host pc, and the middle columns with zeros.  */
304 
305 static int encode_search(TranslationBlock *tb, uint8_t *block)
306 {
307     uint8_t *highwater = tcg_ctx->code_gen_highwater;
308     uint8_t *p = block;
309     int i, j, n;
310 
311     for (i = 0, n = tb->icount; i < n; ++i) {
312         target_ulong prev;
313 
314         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
315             if (i == 0) {
316                 prev = (j == 0 ? tb->pc : 0);
317             } else {
318                 prev = tcg_ctx->gen_insn_data[i - 1][j];
319             }
320             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
321         }
322         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
323         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
324 
325         /* Test for (pending) buffer overflow.  The assumption is that any
326            one row beginning below the high water mark cannot overrun
327            the buffer completely.  Thus we can test for overflow after
328            encoding a row without having to check during encoding.  */
329         if (unlikely(p > highwater)) {
330             return -1;
331         }
332     }
333 
334     return p - block;
335 }
336 
337 /* The cpu state corresponding to 'searched_pc' is restored.
338  * When reset_icount is true, current TB will be interrupted and
339  * icount should be recalculated.
340  */
341 static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
342                                      uintptr_t searched_pc, bool reset_icount)
343 {
344     target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
345     uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
346     CPUArchState *env = cpu->env_ptr;
347     const uint8_t *p = tb->tc.ptr + tb->tc.size;
348     int i, j, num_insns = tb->icount;
349 #ifdef CONFIG_PROFILER
350     TCGProfile *prof = &tcg_ctx->prof;
351     int64_t ti = profile_getclock();
352 #endif
353 
354     searched_pc -= GETPC_ADJ;
355 
356     if (searched_pc < host_pc) {
357         return -1;
358     }
359 
360     /* Reconstruct the stored insn data while looking for the point at
361        which the end of the insn exceeds the searched_pc.  */
362     for (i = 0; i < num_insns; ++i) {
363         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
364             data[j] += decode_sleb128(&p);
365         }
366         host_pc += decode_sleb128(&p);
367         if (host_pc > searched_pc) {
368             goto found;
369         }
370     }
371     return -1;
372 
373  found:
374     if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
375         assert(icount_enabled());
376         /* Reset the cycle counter to the start of the block
377            and shift if to the number of actually executed instructions */
378         cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
379     }
380     restore_state_to_opc(env, tb, data);
381 
382 #ifdef CONFIG_PROFILER
383     qatomic_set(&prof->restore_time,
384                 prof->restore_time + profile_getclock() - ti);
385     qatomic_set(&prof->restore_count, prof->restore_count + 1);
386 #endif
387     return 0;
388 }
389 
390 void tb_destroy(TranslationBlock *tb)
391 {
392     qemu_spin_destroy(&tb->jmp_lock);
393 }
394 
395 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
396 {
397     /*
398      * The host_pc has to be in the rx region of the code buffer.
399      * If it is not we will not be able to resolve it here.
400      * The two cases where host_pc will not be correct are:
401      *
402      *  - fault during translation (instruction fetch)
403      *  - fault from helper (not using GETPC() macro)
404      *
405      * Either way we need return early as we can't resolve it here.
406      */
407     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
408         TranslationBlock *tb = tcg_tb_lookup(host_pc);
409         if (tb) {
410             cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
411             return true;
412         }
413     }
414     return false;
415 }
416 
417 static void page_init(void)
418 {
419     page_size_init();
420     page_table_config_init();
421 
422 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
423     {
424 #ifdef HAVE_KINFO_GETVMMAP
425         struct kinfo_vmentry *freep;
426         int i, cnt;
427 
428         freep = kinfo_getvmmap(getpid(), &cnt);
429         if (freep) {
430             mmap_lock();
431             for (i = 0; i < cnt; i++) {
432                 unsigned long startaddr, endaddr;
433 
434                 startaddr = freep[i].kve_start;
435                 endaddr = freep[i].kve_end;
436                 if (h2g_valid(startaddr)) {
437                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
438 
439                     if (h2g_valid(endaddr)) {
440                         endaddr = h2g(endaddr);
441                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
442                     } else {
443 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
444                         endaddr = ~0ul;
445                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
446 #endif
447                     }
448                 }
449             }
450             free(freep);
451             mmap_unlock();
452         }
453 #else
454         FILE *f;
455 
456         last_brk = (unsigned long)sbrk(0);
457 
458         f = fopen("/compat/linux/proc/self/maps", "r");
459         if (f) {
460             mmap_lock();
461 
462             do {
463                 unsigned long startaddr, endaddr;
464                 int n;
465 
466                 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
467 
468                 if (n == 2 && h2g_valid(startaddr)) {
469                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
470 
471                     if (h2g_valid(endaddr)) {
472                         endaddr = h2g(endaddr);
473                     } else {
474                         endaddr = ~0ul;
475                     }
476                     page_set_flags(startaddr, endaddr, PAGE_RESERVED);
477                 }
478             } while (!feof(f));
479 
480             fclose(f);
481             mmap_unlock();
482         }
483 #endif
484     }
485 #endif
486 }
487 
488 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
489 {
490     PageDesc *pd;
491     void **lp;
492     int i;
493 
494     /* Level 1.  Always allocated.  */
495     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
496 
497     /* Level 2..N-1.  */
498     for (i = v_l2_levels; i > 0; i--) {
499         void **p = qatomic_rcu_read(lp);
500 
501         if (p == NULL) {
502             void *existing;
503 
504             if (!alloc) {
505                 return NULL;
506             }
507             p = g_new0(void *, V_L2_SIZE);
508             existing = qatomic_cmpxchg(lp, NULL, p);
509             if (unlikely(existing)) {
510                 g_free(p);
511                 p = existing;
512             }
513         }
514 
515         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
516     }
517 
518     pd = qatomic_rcu_read(lp);
519     if (pd == NULL) {
520         void *existing;
521 
522         if (!alloc) {
523             return NULL;
524         }
525         pd = g_new0(PageDesc, V_L2_SIZE);
526 #ifndef CONFIG_USER_ONLY
527         {
528             int i;
529 
530             for (i = 0; i < V_L2_SIZE; i++) {
531                 qemu_spin_init(&pd[i].lock);
532             }
533         }
534 #endif
535         existing = qatomic_cmpxchg(lp, NULL, pd);
536         if (unlikely(existing)) {
537 #ifndef CONFIG_USER_ONLY
538             {
539                 int i;
540 
541                 for (i = 0; i < V_L2_SIZE; i++) {
542                     qemu_spin_destroy(&pd[i].lock);
543                 }
544             }
545 #endif
546             g_free(pd);
547             pd = existing;
548         }
549     }
550 
551     return pd + (index & (V_L2_SIZE - 1));
552 }
553 
554 static inline PageDesc *page_find(tb_page_addr_t index)
555 {
556     return page_find_alloc(index, 0);
557 }
558 
559 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
560                            PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
561 
562 /* In user-mode page locks aren't used; mmap_lock is enough */
563 #ifdef CONFIG_USER_ONLY
564 
565 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
566 
567 static inline void page_lock(PageDesc *pd)
568 { }
569 
570 static inline void page_unlock(PageDesc *pd)
571 { }
572 
573 static inline void page_lock_tb(const TranslationBlock *tb)
574 { }
575 
576 static inline void page_unlock_tb(const TranslationBlock *tb)
577 { }
578 
579 struct page_collection *
580 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
581 {
582     return NULL;
583 }
584 
585 void page_collection_unlock(struct page_collection *set)
586 { }
587 #else /* !CONFIG_USER_ONLY */
588 
589 #ifdef CONFIG_DEBUG_TCG
590 
591 static __thread GHashTable *ht_pages_locked_debug;
592 
593 static void ht_pages_locked_debug_init(void)
594 {
595     if (ht_pages_locked_debug) {
596         return;
597     }
598     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
599 }
600 
601 static bool page_is_locked(const PageDesc *pd)
602 {
603     PageDesc *found;
604 
605     ht_pages_locked_debug_init();
606     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
607     return !!found;
608 }
609 
610 static void page_lock__debug(PageDesc *pd)
611 {
612     ht_pages_locked_debug_init();
613     g_assert(!page_is_locked(pd));
614     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
615 }
616 
617 static void page_unlock__debug(const PageDesc *pd)
618 {
619     bool removed;
620 
621     ht_pages_locked_debug_init();
622     g_assert(page_is_locked(pd));
623     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
624     g_assert(removed);
625 }
626 
627 static void
628 do_assert_page_locked(const PageDesc *pd, const char *file, int line)
629 {
630     if (unlikely(!page_is_locked(pd))) {
631         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
632                      pd, file, line);
633         abort();
634     }
635 }
636 
637 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
638 
639 void assert_no_pages_locked(void)
640 {
641     ht_pages_locked_debug_init();
642     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
643 }
644 
645 #else /* !CONFIG_DEBUG_TCG */
646 
647 #define assert_page_locked(pd)
648 
649 static inline void page_lock__debug(const PageDesc *pd)
650 {
651 }
652 
653 static inline void page_unlock__debug(const PageDesc *pd)
654 {
655 }
656 
657 #endif /* CONFIG_DEBUG_TCG */
658 
659 static inline void page_lock(PageDesc *pd)
660 {
661     page_lock__debug(pd);
662     qemu_spin_lock(&pd->lock);
663 }
664 
665 static inline void page_unlock(PageDesc *pd)
666 {
667     qemu_spin_unlock(&pd->lock);
668     page_unlock__debug(pd);
669 }
670 
671 /* lock the page(s) of a TB in the correct acquisition order */
672 static inline void page_lock_tb(const TranslationBlock *tb)
673 {
674     page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
675 }
676 
677 static inline void page_unlock_tb(const TranslationBlock *tb)
678 {
679     PageDesc *p1 = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
680 
681     page_unlock(p1);
682     if (unlikely(tb->page_addr[1] != -1)) {
683         PageDesc *p2 = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
684 
685         if (p2 != p1) {
686             page_unlock(p2);
687         }
688     }
689 }
690 
691 static inline struct page_entry *
692 page_entry_new(PageDesc *pd, tb_page_addr_t index)
693 {
694     struct page_entry *pe = g_malloc(sizeof(*pe));
695 
696     pe->index = index;
697     pe->pd = pd;
698     pe->locked = false;
699     return pe;
700 }
701 
702 static void page_entry_destroy(gpointer p)
703 {
704     struct page_entry *pe = p;
705 
706     g_assert(pe->locked);
707     page_unlock(pe->pd);
708     g_free(pe);
709 }
710 
711 /* returns false on success */
712 static bool page_entry_trylock(struct page_entry *pe)
713 {
714     bool busy;
715 
716     busy = qemu_spin_trylock(&pe->pd->lock);
717     if (!busy) {
718         g_assert(!pe->locked);
719         pe->locked = true;
720         page_lock__debug(pe->pd);
721     }
722     return busy;
723 }
724 
725 static void do_page_entry_lock(struct page_entry *pe)
726 {
727     page_lock(pe->pd);
728     g_assert(!pe->locked);
729     pe->locked = true;
730 }
731 
732 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
733 {
734     struct page_entry *pe = value;
735 
736     do_page_entry_lock(pe);
737     return FALSE;
738 }
739 
740 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
741 {
742     struct page_entry *pe = value;
743 
744     if (pe->locked) {
745         pe->locked = false;
746         page_unlock(pe->pd);
747     }
748     return FALSE;
749 }
750 
751 /*
752  * Trylock a page, and if successful, add the page to a collection.
753  * Returns true ("busy") if the page could not be locked; false otherwise.
754  */
755 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
756 {
757     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
758     struct page_entry *pe;
759     PageDesc *pd;
760 
761     pe = g_tree_lookup(set->tree, &index);
762     if (pe) {
763         return false;
764     }
765 
766     pd = page_find(index);
767     if (pd == NULL) {
768         return false;
769     }
770 
771     pe = page_entry_new(pd, index);
772     g_tree_insert(set->tree, &pe->index, pe);
773 
774     /*
775      * If this is either (1) the first insertion or (2) a page whose index
776      * is higher than any other so far, just lock the page and move on.
777      */
778     if (set->max == NULL || pe->index > set->max->index) {
779         set->max = pe;
780         do_page_entry_lock(pe);
781         return false;
782     }
783     /*
784      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
785      * locks in order.
786      */
787     return page_entry_trylock(pe);
788 }
789 
790 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
791 {
792     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
793     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
794 
795     if (a == b) {
796         return 0;
797     } else if (a < b) {
798         return -1;
799     }
800     return 1;
801 }
802 
803 /*
804  * Lock a range of pages ([@start,@end[) as well as the pages of all
805  * intersecting TBs.
806  * Locking order: acquire locks in ascending order of page index.
807  */
808 struct page_collection *
809 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
810 {
811     struct page_collection *set = g_malloc(sizeof(*set));
812     tb_page_addr_t index;
813     PageDesc *pd;
814 
815     start >>= TARGET_PAGE_BITS;
816     end   >>= TARGET_PAGE_BITS;
817     g_assert(start <= end);
818 
819     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
820                                 page_entry_destroy);
821     set->max = NULL;
822     assert_no_pages_locked();
823 
824  retry:
825     g_tree_foreach(set->tree, page_entry_lock, NULL);
826 
827     for (index = start; index <= end; index++) {
828         TranslationBlock *tb;
829         int n;
830 
831         pd = page_find(index);
832         if (pd == NULL) {
833             continue;
834         }
835         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
836             g_tree_foreach(set->tree, page_entry_unlock, NULL);
837             goto retry;
838         }
839         assert_page_locked(pd);
840         PAGE_FOR_EACH_TB(pd, tb, n) {
841             if (page_trylock_add(set, tb->page_addr[0]) ||
842                 (tb->page_addr[1] != -1 &&
843                  page_trylock_add(set, tb->page_addr[1]))) {
844                 /* drop all locks, and reacquire in order */
845                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
846                 goto retry;
847             }
848         }
849     }
850     return set;
851 }
852 
853 void page_collection_unlock(struct page_collection *set)
854 {
855     /* entries are unlocked and freed via page_entry_destroy */
856     g_tree_destroy(set->tree);
857     g_free(set);
858 }
859 
860 #endif /* !CONFIG_USER_ONLY */
861 
862 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
863                            PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
864 {
865     PageDesc *p1, *p2;
866     tb_page_addr_t page1;
867     tb_page_addr_t page2;
868 
869     assert_memory_lock();
870     g_assert(phys1 != -1);
871 
872     page1 = phys1 >> TARGET_PAGE_BITS;
873     page2 = phys2 >> TARGET_PAGE_BITS;
874 
875     p1 = page_find_alloc(page1, alloc);
876     if (ret_p1) {
877         *ret_p1 = p1;
878     }
879     if (likely(phys2 == -1)) {
880         page_lock(p1);
881         return;
882     } else if (page1 == page2) {
883         page_lock(p1);
884         if (ret_p2) {
885             *ret_p2 = p1;
886         }
887         return;
888     }
889     p2 = page_find_alloc(page2, alloc);
890     if (ret_p2) {
891         *ret_p2 = p2;
892     }
893     if (page1 < page2) {
894         page_lock(p1);
895         page_lock(p2);
896     } else {
897         page_lock(p2);
898         page_lock(p1);
899     }
900 }
901 
902 /* Minimum size of the code gen buffer.  This number is randomly chosen,
903    but not so small that we can't have a fair number of TB's live.  */
904 #define MIN_CODE_GEN_BUFFER_SIZE     (1 * MiB)
905 
906 /* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
907    indicated, this is constrained by the range of direct branches on the
908    host cpu, as used by the TCG implementation of goto_tb.  */
909 #if defined(__x86_64__)
910 # define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
911 #elif defined(__sparc__)
912 # define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
913 #elif defined(__powerpc64__)
914 # define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
915 #elif defined(__powerpc__)
916 # define MAX_CODE_GEN_BUFFER_SIZE  (32 * MiB)
917 #elif defined(__aarch64__)
918 # define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
919 #elif defined(__s390x__)
920   /* We have a +- 4GB range on the branches; leave some slop.  */
921 # define MAX_CODE_GEN_BUFFER_SIZE  (3 * GiB)
922 #elif defined(__mips__)
923   /* We have a 256MB branch region, but leave room to make sure the
924      main executable is also within that region.  */
925 # define MAX_CODE_GEN_BUFFER_SIZE  (128 * MiB)
926 #else
927 # define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
928 #endif
929 
930 #if TCG_TARGET_REG_BITS == 32
931 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
932 #ifdef CONFIG_USER_ONLY
933 /*
934  * For user mode on smaller 32 bit systems we may run into trouble
935  * allocating big chunks of data in the right place. On these systems
936  * we utilise a static code generation buffer directly in the binary.
937  */
938 #define USE_STATIC_CODE_GEN_BUFFER
939 #endif
940 #else /* TCG_TARGET_REG_BITS == 64 */
941 #ifdef CONFIG_USER_ONLY
942 /*
943  * As user-mode emulation typically means running multiple instances
944  * of the translator don't go too nuts with our default code gen
945  * buffer lest we make things too hard for the OS.
946  */
947 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
948 #else
949 /*
950  * We expect most system emulation to run one or two guests per host.
951  * Users running large scale system emulation may want to tweak their
952  * runtime setup via the tb-size control on the command line.
953  */
954 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
955 #endif
956 #endif
957 
958 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
959   (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
960    ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
961 
962 static size_t size_code_gen_buffer(size_t tb_size)
963 {
964     /* Size the buffer.  */
965     if (tb_size == 0) {
966         size_t phys_mem = qemu_get_host_physmem();
967         if (phys_mem == 0) {
968             tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
969         } else {
970             tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, phys_mem / 8);
971         }
972     }
973     if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
974         tb_size = MIN_CODE_GEN_BUFFER_SIZE;
975     }
976     if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
977         tb_size = MAX_CODE_GEN_BUFFER_SIZE;
978     }
979     return tb_size;
980 }
981 
982 #ifdef __mips__
983 /* In order to use J and JAL within the code_gen_buffer, we require
984    that the buffer not cross a 256MB boundary.  */
985 static inline bool cross_256mb(void *addr, size_t size)
986 {
987     return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
988 }
989 
990 /* We weren't able to allocate a buffer without crossing that boundary,
991    so make do with the larger portion of the buffer that doesn't cross.
992    Returns the new base of the buffer, and adjusts code_gen_buffer_size.  */
993 static inline void *split_cross_256mb(void *buf1, size_t size1)
994 {
995     void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
996     size_t size2 = buf1 + size1 - buf2;
997 
998     size1 = buf2 - buf1;
999     if (size1 < size2) {
1000         size1 = size2;
1001         buf1 = buf2;
1002     }
1003 
1004     tcg_ctx->code_gen_buffer_size = size1;
1005     return buf1;
1006 }
1007 #endif
1008 
1009 #ifdef USE_STATIC_CODE_GEN_BUFFER
1010 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
1011     __attribute__((aligned(CODE_GEN_ALIGN)));
1012 
1013 static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
1014 {
1015     void *buf, *end;
1016     size_t size;
1017 
1018     if (splitwx > 0) {
1019         error_setg(errp, "jit split-wx not supported");
1020         return false;
1021     }
1022 
1023     /* page-align the beginning and end of the buffer */
1024     buf = static_code_gen_buffer;
1025     end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
1026     buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
1027     end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
1028 
1029     size = end - buf;
1030 
1031     /* Honor a command-line option limiting the size of the buffer.  */
1032     if (size > tb_size) {
1033         size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
1034     }
1035     tcg_ctx->code_gen_buffer_size = size;
1036 
1037 #ifdef __mips__
1038     if (cross_256mb(buf, size)) {
1039         buf = split_cross_256mb(buf, size);
1040         size = tcg_ctx->code_gen_buffer_size;
1041     }
1042 #endif
1043 
1044     if (qemu_mprotect_rwx(buf, size)) {
1045         error_setg_errno(errp, errno, "mprotect of jit buffer");
1046         return false;
1047     }
1048     qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
1049 
1050     tcg_ctx->code_gen_buffer = buf;
1051     return true;
1052 }
1053 #elif defined(_WIN32)
1054 static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
1055 {
1056     void *buf;
1057 
1058     if (splitwx > 0) {
1059         error_setg(errp, "jit split-wx not supported");
1060         return false;
1061     }
1062 
1063     buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
1064                              PAGE_EXECUTE_READWRITE);
1065     if (buf == NULL) {
1066         error_setg_win32(errp, GetLastError(),
1067                          "allocate %zu bytes for jit buffer", size);
1068         return false;
1069     }
1070 
1071     tcg_ctx->code_gen_buffer = buf;
1072     tcg_ctx->code_gen_buffer_size = size;
1073     return true;
1074 }
1075 #else
1076 static bool alloc_code_gen_buffer_anon(size_t size, int prot,
1077                                        int flags, Error **errp)
1078 {
1079     void *buf;
1080 
1081     buf = mmap(NULL, size, prot, flags, -1, 0);
1082     if (buf == MAP_FAILED) {
1083         error_setg_errno(errp, errno,
1084                          "allocate %zu bytes for jit buffer", size);
1085         return false;
1086     }
1087     tcg_ctx->code_gen_buffer_size = size;
1088 
1089 #ifdef __mips__
1090     if (cross_256mb(buf, size)) {
1091         /*
1092          * Try again, with the original still mapped, to avoid re-acquiring
1093          * the same 256mb crossing.
1094          */
1095         size_t size2;
1096         void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
1097         switch ((int)(buf2 != MAP_FAILED)) {
1098         case 1:
1099             if (!cross_256mb(buf2, size)) {
1100                 /* Success!  Use the new buffer.  */
1101                 munmap(buf, size);
1102                 break;
1103             }
1104             /* Failure.  Work with what we had.  */
1105             munmap(buf2, size);
1106             /* fallthru */
1107         default:
1108             /* Split the original buffer.  Free the smaller half.  */
1109             buf2 = split_cross_256mb(buf, size);
1110             size2 = tcg_ctx->code_gen_buffer_size;
1111             if (buf == buf2) {
1112                 munmap(buf + size2, size - size2);
1113             } else {
1114                 munmap(buf, size - size2);
1115             }
1116             size = size2;
1117             break;
1118         }
1119         buf = buf2;
1120     }
1121 #endif
1122 
1123     /* Request large pages for the buffer.  */
1124     qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
1125 
1126     tcg_ctx->code_gen_buffer = buf;
1127     return true;
1128 }
1129 
1130 #ifndef CONFIG_TCG_INTERPRETER
1131 #ifdef CONFIG_POSIX
1132 #include "qemu/memfd.h"
1133 
1134 static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
1135 {
1136     void *buf_rw = NULL, *buf_rx = MAP_FAILED;
1137     int fd = -1;
1138 
1139 #ifdef __mips__
1140     /* Find space for the RX mapping, vs the 256MiB regions. */
1141     if (!alloc_code_gen_buffer_anon(size, PROT_NONE,
1142                                     MAP_PRIVATE | MAP_ANONYMOUS |
1143                                     MAP_NORESERVE, errp)) {
1144         return false;
1145     }
1146     /* The size of the mapping may have been adjusted. */
1147     size = tcg_ctx->code_gen_buffer_size;
1148     buf_rx = tcg_ctx->code_gen_buffer;
1149 #endif
1150 
1151     buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
1152     if (buf_rw == NULL) {
1153         goto fail;
1154     }
1155 
1156 #ifdef __mips__
1157     void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
1158                      MAP_SHARED | MAP_FIXED, fd, 0);
1159     if (tmp != buf_rx) {
1160         goto fail_rx;
1161     }
1162 #else
1163     buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
1164     if (buf_rx == MAP_FAILED) {
1165         goto fail_rx;
1166     }
1167 #endif
1168 
1169     close(fd);
1170     tcg_ctx->code_gen_buffer = buf_rw;
1171     tcg_ctx->code_gen_buffer_size = size;
1172     tcg_splitwx_diff = buf_rx - buf_rw;
1173 
1174     /* Request large pages for the buffer and the splitwx.  */
1175     qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE);
1176     qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE);
1177     return true;
1178 
1179  fail_rx:
1180     error_setg_errno(errp, errno, "failed to map shared memory for execute");
1181  fail:
1182     if (buf_rx != MAP_FAILED) {
1183         munmap(buf_rx, size);
1184     }
1185     if (buf_rw) {
1186         munmap(buf_rw, size);
1187     }
1188     if (fd >= 0) {
1189         close(fd);
1190     }
1191     return false;
1192 }
1193 #endif /* CONFIG_POSIX */
1194 
1195 #ifdef CONFIG_DARWIN
1196 #include <mach/mach.h>
1197 
1198 extern kern_return_t mach_vm_remap(vm_map_t target_task,
1199                                    mach_vm_address_t *target_address,
1200                                    mach_vm_size_t size,
1201                                    mach_vm_offset_t mask,
1202                                    int flags,
1203                                    vm_map_t src_task,
1204                                    mach_vm_address_t src_address,
1205                                    boolean_t copy,
1206                                    vm_prot_t *cur_protection,
1207                                    vm_prot_t *max_protection,
1208                                    vm_inherit_t inheritance);
1209 
1210 static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
1211 {
1212     kern_return_t ret;
1213     mach_vm_address_t buf_rw, buf_rx;
1214     vm_prot_t cur_prot, max_prot;
1215 
1216     /* Map the read-write portion via normal anon memory. */
1217     if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
1218                                     MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
1219         return false;
1220     }
1221 
1222     buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer;
1223     buf_rx = 0;
1224     ret = mach_vm_remap(mach_task_self(),
1225                         &buf_rx,
1226                         size,
1227                         0,
1228                         VM_FLAGS_ANYWHERE,
1229                         mach_task_self(),
1230                         buf_rw,
1231                         false,
1232                         &cur_prot,
1233                         &max_prot,
1234                         VM_INHERIT_NONE);
1235     if (ret != KERN_SUCCESS) {
1236         /* TODO: Convert "ret" to a human readable error message. */
1237         error_setg(errp, "vm_remap for jit splitwx failed");
1238         munmap((void *)buf_rw, size);
1239         return false;
1240     }
1241 
1242     if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
1243         error_setg_errno(errp, errno, "mprotect for jit splitwx");
1244         munmap((void *)buf_rx, size);
1245         munmap((void *)buf_rw, size);
1246         return false;
1247     }
1248 
1249     tcg_splitwx_diff = buf_rx - buf_rw;
1250     return true;
1251 }
1252 #endif /* CONFIG_DARWIN */
1253 #endif /* CONFIG_TCG_INTERPRETER */
1254 
1255 static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
1256 {
1257 #ifndef CONFIG_TCG_INTERPRETER
1258 # ifdef CONFIG_DARWIN
1259     return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
1260 # endif
1261 # ifdef CONFIG_POSIX
1262     return alloc_code_gen_buffer_splitwx_memfd(size, errp);
1263 # endif
1264 #endif
1265     error_setg(errp, "jit split-wx not supported");
1266     return false;
1267 }
1268 
1269 static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
1270 {
1271     ERRP_GUARD();
1272     int prot, flags;
1273 
1274     if (splitwx) {
1275         if (alloc_code_gen_buffer_splitwx(size, errp)) {
1276             return true;
1277         }
1278         /*
1279          * If splitwx force-on (1), fail;
1280          * if splitwx default-on (-1), fall through to splitwx off.
1281          */
1282         if (splitwx > 0) {
1283             return false;
1284         }
1285         error_free_or_abort(errp);
1286     }
1287 
1288     prot = PROT_READ | PROT_WRITE | PROT_EXEC;
1289     flags = MAP_PRIVATE | MAP_ANONYMOUS;
1290 #ifdef CONFIG_TCG_INTERPRETER
1291     /* The tcg interpreter does not need execute permission. */
1292     prot = PROT_READ | PROT_WRITE;
1293 #elif defined(CONFIG_DARWIN)
1294     /* Applicable to both iOS and macOS (Apple Silicon). */
1295     if (!splitwx) {
1296         flags |= MAP_JIT;
1297     }
1298 #endif
1299 
1300     return alloc_code_gen_buffer_anon(size, prot, flags, errp);
1301 }
1302 #endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
1303 
1304 static bool tb_cmp(const void *ap, const void *bp)
1305 {
1306     const TranslationBlock *a = ap;
1307     const TranslationBlock *b = bp;
1308 
1309     return a->pc == b->pc &&
1310         a->cs_base == b->cs_base &&
1311         a->flags == b->flags &&
1312         (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
1313         a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
1314         a->page_addr[0] == b->page_addr[0] &&
1315         a->page_addr[1] == b->page_addr[1];
1316 }
1317 
1318 static void tb_htable_init(void)
1319 {
1320     unsigned int mode = QHT_MODE_AUTO_RESIZE;
1321 
1322     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
1323 }
1324 
1325 /* Must be called before using the QEMU cpus. 'tb_size' is the size
1326    (in bytes) allocated to the translation buffer. Zero means default
1327    size. */
1328 void tcg_exec_init(unsigned long tb_size, int splitwx)
1329 {
1330     bool ok;
1331 
1332     tcg_allowed = true;
1333     cpu_gen_init();
1334     page_init();
1335     tb_htable_init();
1336 
1337     ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size),
1338                                splitwx, &error_fatal);
1339     assert(ok);
1340 
1341 #if defined(CONFIG_SOFTMMU)
1342     /* There's no guest base to take into account, so go ahead and
1343        initialize the prologue now.  */
1344     tcg_prologue_init(tcg_ctx);
1345 #endif
1346 }
1347 
1348 /* call with @p->lock held */
1349 static inline void invalidate_page_bitmap(PageDesc *p)
1350 {
1351     assert_page_locked(p);
1352 #ifdef CONFIG_SOFTMMU
1353     g_free(p->code_bitmap);
1354     p->code_bitmap = NULL;
1355     p->code_write_count = 0;
1356 #endif
1357 }
1358 
1359 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
1360 static void page_flush_tb_1(int level, void **lp)
1361 {
1362     int i;
1363 
1364     if (*lp == NULL) {
1365         return;
1366     }
1367     if (level == 0) {
1368         PageDesc *pd = *lp;
1369 
1370         for (i = 0; i < V_L2_SIZE; ++i) {
1371             page_lock(&pd[i]);
1372             pd[i].first_tb = (uintptr_t)NULL;
1373             invalidate_page_bitmap(pd + i);
1374             page_unlock(&pd[i]);
1375         }
1376     } else {
1377         void **pp = *lp;
1378 
1379         for (i = 0; i < V_L2_SIZE; ++i) {
1380             page_flush_tb_1(level - 1, pp + i);
1381         }
1382     }
1383 }
1384 
1385 static void page_flush_tb(void)
1386 {
1387     int i, l1_sz = v_l1_size;
1388 
1389     for (i = 0; i < l1_sz; i++) {
1390         page_flush_tb_1(v_l2_levels, l1_map + i);
1391     }
1392 }
1393 
1394 static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
1395 {
1396     const TranslationBlock *tb = value;
1397     size_t *size = data;
1398 
1399     *size += tb->tc.size;
1400     return false;
1401 }
1402 
1403 /* flush all the translation blocks */
1404 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
1405 {
1406     bool did_flush = false;
1407 
1408     mmap_lock();
1409     /* If it is already been done on request of another CPU,
1410      * just retry.
1411      */
1412     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
1413         goto done;
1414     }
1415     did_flush = true;
1416 
1417     if (DEBUG_TB_FLUSH_GATE) {
1418         size_t nb_tbs = tcg_nb_tbs();
1419         size_t host_size = 0;
1420 
1421         tcg_tb_foreach(tb_host_size_iter, &host_size);
1422         printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
1423                tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
1424     }
1425 
1426     CPU_FOREACH(cpu) {
1427         cpu_tb_jmp_cache_clear(cpu);
1428     }
1429 
1430     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
1431     page_flush_tb();
1432 
1433     tcg_region_reset_all();
1434     /* XXX: flush processor icache at this point if cache flush is
1435        expensive */
1436     qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
1437 
1438 done:
1439     mmap_unlock();
1440     if (did_flush) {
1441         qemu_plugin_flush_cb();
1442     }
1443 }
1444 
1445 void tb_flush(CPUState *cpu)
1446 {
1447     if (tcg_enabled()) {
1448         unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);
1449 
1450         if (cpu_in_exclusive_context(cpu)) {
1451             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
1452         } else {
1453             async_safe_run_on_cpu(cpu, do_tb_flush,
1454                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
1455         }
1456     }
1457 }
1458 
1459 /*
1460  * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
1461  * so in order to prevent bit rot we compile them unconditionally in user-mode,
1462  * and let the optimizer get rid of them by wrapping their user-only callers
1463  * with if (DEBUG_TB_CHECK_GATE).
1464  */
1465 #ifdef CONFIG_USER_ONLY
1466 
1467 static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
1468 {
1469     TranslationBlock *tb = p;
1470     target_ulong addr = *(target_ulong *)userp;
1471 
1472     if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
1473         printf("ERROR invalidate: address=" TARGET_FMT_lx
1474                " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
1475     }
1476 }
1477 
1478 /* verify that all the pages have correct rights for code
1479  *
1480  * Called with mmap_lock held.
1481  */
1482 static void tb_invalidate_check(target_ulong address)
1483 {
1484     address &= TARGET_PAGE_MASK;
1485     qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address);
1486 }
1487 
1488 static void do_tb_page_check(void *p, uint32_t hash, void *userp)
1489 {
1490     TranslationBlock *tb = p;
1491     int flags1, flags2;
1492 
1493     flags1 = page_get_flags(tb->pc);
1494     flags2 = page_get_flags(tb->pc + tb->size - 1);
1495     if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
1496         printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
1497                (long)tb->pc, tb->size, flags1, flags2);
1498     }
1499 }
1500 
1501 /* verify that all the pages have correct rights for code */
1502 static void tb_page_check(void)
1503 {
1504     qht_iter(&tb_ctx.htable, do_tb_page_check, NULL);
1505 }
1506 
1507 #endif /* CONFIG_USER_ONLY */
1508 
1509 /*
1510  * user-mode: call with mmap_lock held
1511  * !user-mode: call with @pd->lock held
1512  */
1513 static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
1514 {
1515     TranslationBlock *tb1;
1516     uintptr_t *pprev;
1517     unsigned int n1;
1518 
1519     assert_page_locked(pd);
1520     pprev = &pd->first_tb;
1521     PAGE_FOR_EACH_TB(pd, tb1, n1) {
1522         if (tb1 == tb) {
1523             *pprev = tb1->page_next[n1];
1524             return;
1525         }
1526         pprev = &tb1->page_next[n1];
1527     }
1528     g_assert_not_reached();
1529 }
1530 
1531 /* remove @orig from its @n_orig-th jump list */
1532 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
1533 {
1534     uintptr_t ptr, ptr_locked;
1535     TranslationBlock *dest;
1536     TranslationBlock *tb;
1537     uintptr_t *pprev;
1538     int n;
1539 
1540     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
1541     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
1542     dest = (TranslationBlock *)(ptr & ~1);
1543     if (dest == NULL) {
1544         return;
1545     }
1546 
1547     qemu_spin_lock(&dest->jmp_lock);
1548     /*
1549      * While acquiring the lock, the jump might have been removed if the
1550      * destination TB was invalidated; check again.
1551      */
1552     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
1553     if (ptr_locked != ptr) {
1554         qemu_spin_unlock(&dest->jmp_lock);
1555         /*
1556          * The only possibility is that the jump was unlinked via
1557          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
1558          * because we set the LSB above.
1559          */
1560         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
1561         return;
1562     }
1563     /*
1564      * We first acquired the lock, and since the destination pointer matches,
1565      * we know for sure that @orig is in the jmp list.
1566      */
1567     pprev = &dest->jmp_list_head;
1568     TB_FOR_EACH_JMP(dest, tb, n) {
1569         if (tb == orig && n == n_orig) {
1570             *pprev = tb->jmp_list_next[n];
1571             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
1572             qemu_spin_unlock(&dest->jmp_lock);
1573             return;
1574         }
1575         pprev = &tb->jmp_list_next[n];
1576     }
1577     g_assert_not_reached();
1578 }
1579 
1580 /* reset the jump entry 'n' of a TB so that it is not chained to
1581    another TB */
1582 static inline void tb_reset_jump(TranslationBlock *tb, int n)
1583 {
1584     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
1585     tb_set_jmp_target(tb, n, addr);
1586 }
1587 
1588 /* remove any jumps to the TB */
1589 static inline void tb_jmp_unlink(TranslationBlock *dest)
1590 {
1591     TranslationBlock *tb;
1592     int n;
1593 
1594     qemu_spin_lock(&dest->jmp_lock);
1595 
1596     TB_FOR_EACH_JMP(dest, tb, n) {
1597         tb_reset_jump(tb, n);
1598         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
1599         /* No need to clear the list entry; setting the dest ptr is enough */
1600     }
1601     dest->jmp_list_head = (uintptr_t)NULL;
1602 
1603     qemu_spin_unlock(&dest->jmp_lock);
1604 }
1605 
1606 /*
1607  * In user-mode, call with mmap_lock held.
1608  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
1609  * locks held.
1610  */
1611 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
1612 {
1613     CPUState *cpu;
1614     PageDesc *p;
1615     uint32_t h;
1616     tb_page_addr_t phys_pc;
1617     uint32_t orig_cflags = tb_cflags(tb);
1618 
1619     assert_memory_lock();
1620 
1621     /* make sure no further incoming jumps will be chained to this TB */
1622     qemu_spin_lock(&tb->jmp_lock);
1623     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
1624     qemu_spin_unlock(&tb->jmp_lock);
1625 
1626     /* remove the TB from the hash list */
1627     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1628     h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
1629                      tb->trace_vcpu_dstate);
1630     if (!qht_remove(&tb_ctx.htable, tb, h)) {
1631         return;
1632     }
1633 
1634     /* remove the TB from the page list */
1635     if (rm_from_page_list) {
1636         p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
1637         tb_page_remove(p, tb);
1638         invalidate_page_bitmap(p);
1639         if (tb->page_addr[1] != -1) {
1640             p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
1641             tb_page_remove(p, tb);
1642             invalidate_page_bitmap(p);
1643         }
1644     }
1645 
1646     /* remove the TB from the hash list */
1647     h = tb_jmp_cache_hash_func(tb->pc);
1648     CPU_FOREACH(cpu) {
1649         if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) {
1650             qatomic_set(&cpu->tb_jmp_cache[h], NULL);
1651         }
1652     }
1653 
1654     /* suppress this TB from the two jump lists */
1655     tb_remove_from_jmp_list(tb, 0);
1656     tb_remove_from_jmp_list(tb, 1);
1657 
1658     /* suppress any remaining jumps to this TB */
1659     tb_jmp_unlink(tb);
1660 
1661     qatomic_set(&tcg_ctx->tb_phys_invalidate_count,
1662                tcg_ctx->tb_phys_invalidate_count + 1);
1663 }
1664 
1665 static void tb_phys_invalidate__locked(TranslationBlock *tb)
1666 {
1667     qemu_thread_jit_write();
1668     do_tb_phys_invalidate(tb, true);
1669     qemu_thread_jit_execute();
1670 }
1671 
1672 /* invalidate one TB
1673  *
1674  * Called with mmap_lock held in user-mode.
1675  */
1676 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
1677 {
1678     if (page_addr == -1 && tb->page_addr[0] != -1) {
1679         page_lock_tb(tb);
1680         do_tb_phys_invalidate(tb, true);
1681         page_unlock_tb(tb);
1682     } else {
1683         do_tb_phys_invalidate(tb, false);
1684     }
1685 }
1686 
1687 #ifdef CONFIG_SOFTMMU
1688 /* call with @p->lock held */
1689 static void build_page_bitmap(PageDesc *p)
1690 {
1691     int n, tb_start, tb_end;
1692     TranslationBlock *tb;
1693 
1694     assert_page_locked(p);
1695     p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
1696 
1697     PAGE_FOR_EACH_TB(p, tb, n) {
1698         /* NOTE: this is subtle as a TB may span two physical pages */
1699         if (n == 0) {
1700             /* NOTE: tb_end may be after the end of the page, but
1701                it is not a problem */
1702             tb_start = tb->pc & ~TARGET_PAGE_MASK;
1703             tb_end = tb_start + tb->size;
1704             if (tb_end > TARGET_PAGE_SIZE) {
1705                 tb_end = TARGET_PAGE_SIZE;
1706              }
1707         } else {
1708             tb_start = 0;
1709             tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1710         }
1711         bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
1712     }
1713 }
1714 #endif
1715 
1716 /* add the tb in the target page and protect it if necessary
1717  *
1718  * Called with mmap_lock held for user-mode emulation.
1719  * Called with @p->lock held in !user-mode.
1720  */
1721 static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
1722                                unsigned int n, tb_page_addr_t page_addr)
1723 {
1724 #ifndef CONFIG_USER_ONLY
1725     bool page_already_protected;
1726 #endif
1727 
1728     assert_page_locked(p);
1729 
1730     tb->page_addr[n] = page_addr;
1731     tb->page_next[n] = p->first_tb;
1732 #ifndef CONFIG_USER_ONLY
1733     page_already_protected = p->first_tb != (uintptr_t)NULL;
1734 #endif
1735     p->first_tb = (uintptr_t)tb | n;
1736     invalidate_page_bitmap(p);
1737 
1738 #if defined(CONFIG_USER_ONLY)
1739     if (p->flags & PAGE_WRITE) {
1740         target_ulong addr;
1741         PageDesc *p2;
1742         int prot;
1743 
1744         /* force the host page as non writable (writes will have a
1745            page fault + mprotect overhead) */
1746         page_addr &= qemu_host_page_mask;
1747         prot = 0;
1748         for (addr = page_addr; addr < page_addr + qemu_host_page_size;
1749             addr += TARGET_PAGE_SIZE) {
1750 
1751             p2 = page_find(addr >> TARGET_PAGE_BITS);
1752             if (!p2) {
1753                 continue;
1754             }
1755             prot |= p2->flags;
1756             p2->flags &= ~PAGE_WRITE;
1757           }
1758         mprotect(g2h_untagged(page_addr), qemu_host_page_size,
1759                  (prot & PAGE_BITS) & ~PAGE_WRITE);
1760         if (DEBUG_TB_INVALIDATE_GATE) {
1761             printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
1762         }
1763     }
1764 #else
1765     /* if some code is already present, then the pages are already
1766        protected. So we handle the case where only the first TB is
1767        allocated in a physical page */
1768     if (!page_already_protected) {
1769         tlb_protect_code(page_addr);
1770     }
1771 #endif
1772 }
1773 
1774 /*
1775  * Add a new TB and link it to the physical page tables. phys_page2 is
1776  * (-1) to indicate that only one page contains the TB.
1777  *
1778  * Called with mmap_lock held for user-mode emulation.
1779  *
1780  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
1781  * Note that in !user-mode, another thread might have already added a TB
1782  * for the same block of guest code that @tb corresponds to. In that case,
1783  * the caller should discard the original @tb, and use instead the returned TB.
1784  */
1785 static TranslationBlock *
1786 tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1787              tb_page_addr_t phys_page2)
1788 {
1789     PageDesc *p;
1790     PageDesc *p2 = NULL;
1791     void *existing_tb = NULL;
1792     uint32_t h;
1793 
1794     assert_memory_lock();
1795     tcg_debug_assert(!(tb->cflags & CF_INVALID));
1796 
1797     /*
1798      * Add the TB to the page list, acquiring first the pages's locks.
1799      * We keep the locks held until after inserting the TB in the hash table,
1800      * so that if the insertion fails we know for sure that the TBs are still
1801      * in the page descriptors.
1802      * Note that inserting into the hash table first isn't an option, since
1803      * we can only insert TBs that are fully initialized.
1804      */
1805     page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
1806     tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
1807     if (p2) {
1808         tb_page_add(p2, tb, 1, phys_page2);
1809     } else {
1810         tb->page_addr[1] = -1;
1811     }
1812 
1813     /* add in the hash table */
1814     h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
1815                      tb->trace_vcpu_dstate);
1816     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
1817 
1818     /* remove TB from the page(s) if we couldn't insert it */
1819     if (unlikely(existing_tb)) {
1820         tb_page_remove(p, tb);
1821         invalidate_page_bitmap(p);
1822         if (p2) {
1823             tb_page_remove(p2, tb);
1824             invalidate_page_bitmap(p2);
1825         }
1826         tb = existing_tb;
1827     }
1828 
1829     if (p2 && p2 != p) {
1830         page_unlock(p2);
1831     }
1832     page_unlock(p);
1833 
1834 #ifdef CONFIG_USER_ONLY
1835     if (DEBUG_TB_CHECK_GATE) {
1836         tb_page_check();
1837     }
1838 #endif
1839     return tb;
1840 }
1841 
1842 /* Called with mmap_lock held for user mode emulation.  */
1843 TranslationBlock *tb_gen_code(CPUState *cpu,
1844                               target_ulong pc, target_ulong cs_base,
1845                               uint32_t flags, int cflags)
1846 {
1847     CPUArchState *env = cpu->env_ptr;
1848     TranslationBlock *tb, *existing_tb;
1849     tb_page_addr_t phys_pc, phys_page2;
1850     target_ulong virt_page2;
1851     tcg_insn_unit *gen_code_buf;
1852     int gen_code_size, search_size, max_insns;
1853 #ifdef CONFIG_PROFILER
1854     TCGProfile *prof = &tcg_ctx->prof;
1855     int64_t ti;
1856 #endif
1857 
1858     assert_memory_lock();
1859     qemu_thread_jit_write();
1860 
1861     phys_pc = get_page_addr_code(env, pc);
1862 
1863     if (phys_pc == -1) {
1864         /* Generate a one-shot TB with 1 insn in it */
1865         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
1866     }
1867 
1868     max_insns = cflags & CF_COUNT_MASK;
1869     if (max_insns == 0) {
1870         max_insns = CF_COUNT_MASK;
1871     }
1872     if (max_insns > TCG_MAX_INSNS) {
1873         max_insns = TCG_MAX_INSNS;
1874     }
1875     if (cpu->singlestep_enabled || singlestep) {
1876         max_insns = 1;
1877     }
1878 
1879  buffer_overflow:
1880     tb = tcg_tb_alloc(tcg_ctx);
1881     if (unlikely(!tb)) {
1882         /* flush must be done */
1883         tb_flush(cpu);
1884         mmap_unlock();
1885         /* Make the execution loop process the flush as soon as possible.  */
1886         cpu->exception_index = EXCP_INTERRUPT;
1887         cpu_loop_exit(cpu);
1888     }
1889 
1890     gen_code_buf = tcg_ctx->code_gen_ptr;
1891     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
1892     tb->pc = pc;
1893     tb->cs_base = cs_base;
1894     tb->flags = flags;
1895     tb->cflags = cflags;
1896     tb->trace_vcpu_dstate = *cpu->trace_dstate;
1897     tcg_ctx->tb_cflags = cflags;
1898  tb_overflow:
1899 
1900 #ifdef CONFIG_PROFILER
1901     /* includes aborted translations because of exceptions */
1902     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
1903     ti = profile_getclock();
1904 #endif
1905 
1906     gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
1907     if (unlikely(gen_code_size != 0)) {
1908         goto error_return;
1909     }
1910 
1911     tcg_func_start(tcg_ctx);
1912 
1913     tcg_ctx->cpu = env_cpu(env);
1914     gen_intermediate_code(cpu, tb, max_insns);
1915     tcg_ctx->cpu = NULL;
1916     max_insns = tb->icount;
1917 
1918     trace_translate_block(tb, tb->pc, tb->tc.ptr);
1919 
1920     /* generate machine code */
1921     tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
1922     tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
1923     tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
1924     if (TCG_TARGET_HAS_direct_jump) {
1925         tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
1926         tcg_ctx->tb_jmp_target_addr = NULL;
1927     } else {
1928         tcg_ctx->tb_jmp_insn_offset = NULL;
1929         tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
1930     }
1931 
1932 #ifdef CONFIG_PROFILER
1933     qatomic_set(&prof->tb_count, prof->tb_count + 1);
1934     qatomic_set(&prof->interm_time,
1935                 prof->interm_time + profile_getclock() - ti);
1936     ti = profile_getclock();
1937 #endif
1938 
1939     gen_code_size = tcg_gen_code(tcg_ctx, tb);
1940     if (unlikely(gen_code_size < 0)) {
1941  error_return:
1942         switch (gen_code_size) {
1943         case -1:
1944             /*
1945              * Overflow of code_gen_buffer, or the current slice of it.
1946              *
1947              * TODO: We don't need to re-do gen_intermediate_code, nor
1948              * should we re-do the tcg optimization currently hidden
1949              * inside tcg_gen_code.  All that should be required is to
1950              * flush the TBs, allocate a new TB, re-initialize it per
1951              * above, and re-do the actual code generation.
1952              */
1953             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1954                           "Restarting code generation for "
1955                           "code_gen_buffer overflow\n");
1956             goto buffer_overflow;
1957 
1958         case -2:
1959             /*
1960              * The code generated for the TranslationBlock is too large.
1961              * The maximum size allowed by the unwind info is 64k.
1962              * There may be stricter constraints from relocations
1963              * in the tcg backend.
1964              *
1965              * Try again with half as many insns as we attempted this time.
1966              * If a single insn overflows, there's a bug somewhere...
1967              */
1968             assert(max_insns > 1);
1969             max_insns /= 2;
1970             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1971                           "Restarting code generation with "
1972                           "smaller translation block (max %d insns)\n",
1973                           max_insns);
1974             goto tb_overflow;
1975 
1976         default:
1977             g_assert_not_reached();
1978         }
1979     }
1980     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
1981     if (unlikely(search_size < 0)) {
1982         goto buffer_overflow;
1983     }
1984     tb->tc.size = gen_code_size;
1985 
1986 #ifdef CONFIG_PROFILER
1987     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
1988     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
1989     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
1990     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
1991 #endif
1992 
1993 #ifdef DEBUG_DISAS
1994     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
1995         qemu_log_in_addr_range(tb->pc)) {
1996         FILE *logfile = qemu_log_lock();
1997         int code_size, data_size;
1998         const tcg_target_ulong *rx_data_gen_ptr;
1999         size_t chunk_start;
2000         int insn = 0;
2001 
2002         if (tcg_ctx->data_gen_ptr) {
2003             rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
2004             code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
2005             data_size = gen_code_size - code_size;
2006         } else {
2007             rx_data_gen_ptr = 0;
2008             code_size = gen_code_size;
2009             data_size = 0;
2010         }
2011 
2012         /* Dump header and the first instruction */
2013         qemu_log("OUT: [size=%d]\n", gen_code_size);
2014         qemu_log("  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
2015                  tcg_ctx->gen_insn_data[insn][0]);
2016         chunk_start = tcg_ctx->gen_insn_end_off[insn];
2017         log_disas(tb->tc.ptr, chunk_start);
2018 
2019         /*
2020          * Dump each instruction chunk, wrapping up empty chunks into
2021          * the next instruction. The whole array is offset so the
2022          * first entry is the beginning of the 2nd instruction.
2023          */
2024         while (insn < tb->icount) {
2025             size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
2026             if (chunk_end > chunk_start) {
2027                 qemu_log("  -- guest addr 0x" TARGET_FMT_lx "\n",
2028                          tcg_ctx->gen_insn_data[insn][0]);
2029                 log_disas(tb->tc.ptr + chunk_start, chunk_end - chunk_start);
2030                 chunk_start = chunk_end;
2031             }
2032             insn++;
2033         }
2034 
2035         if (chunk_start < code_size) {
2036             qemu_log("  -- tb slow paths + alignment\n");
2037             log_disas(tb->tc.ptr + chunk_start, code_size - chunk_start);
2038         }
2039 
2040         /* Finally dump any data we may have after the block */
2041         if (data_size) {
2042             int i;
2043             qemu_log("  data: [size=%d]\n", data_size);
2044             for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
2045                 qemu_log("0x%08" PRIxPTR ":  .quad  0x%" TCG_PRIlx "\n",
2046                          (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
2047             }
2048         }
2049         qemu_log("\n");
2050         qemu_log_flush();
2051         qemu_log_unlock(logfile);
2052     }
2053 #endif
2054 
2055     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
2056         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
2057                  CODE_GEN_ALIGN));
2058 
2059     /* init jump list */
2060     qemu_spin_init(&tb->jmp_lock);
2061     tb->jmp_list_head = (uintptr_t)NULL;
2062     tb->jmp_list_next[0] = (uintptr_t)NULL;
2063     tb->jmp_list_next[1] = (uintptr_t)NULL;
2064     tb->jmp_dest[0] = (uintptr_t)NULL;
2065     tb->jmp_dest[1] = (uintptr_t)NULL;
2066 
2067     /* init original jump addresses which have been set during tcg_gen_code() */
2068     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
2069         tb_reset_jump(tb, 0);
2070     }
2071     if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
2072         tb_reset_jump(tb, 1);
2073     }
2074 
2075     /*
2076      * If the TB is not associated with a physical RAM page then
2077      * it must be a temporary one-insn TB, and we have nothing to do
2078      * except fill in the page_addr[] fields. Return early before
2079      * attempting to link to other TBs or add to the lookup table.
2080      */
2081     if (phys_pc == -1) {
2082         tb->page_addr[0] = tb->page_addr[1] = -1;
2083         return tb;
2084     }
2085 
2086     /* check next page if needed */
2087     virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
2088     phys_page2 = -1;
2089     if ((pc & TARGET_PAGE_MASK) != virt_page2) {
2090         phys_page2 = get_page_addr_code(env, virt_page2);
2091     }
2092     /*
2093      * No explicit memory barrier is required -- tb_link_page() makes the
2094      * TB visible in a consistent state.
2095      */
2096     existing_tb = tb_link_page(tb, phys_pc, phys_page2);
2097     /* if the TB already exists, discard what we just translated */
2098     if (unlikely(existing_tb != tb)) {
2099         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
2100 
2101         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
2102         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
2103         tb_destroy(tb);
2104         return existing_tb;
2105     }
2106     tcg_tb_insert(tb);
2107     return tb;
2108 }
2109 
2110 /*
2111  * @p must be non-NULL.
2112  * user-mode: call with mmap_lock held.
2113  * !user-mode: call with all @pages locked.
2114  */
2115 static void
2116 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
2117                                       PageDesc *p, tb_page_addr_t start,
2118                                       tb_page_addr_t end,
2119                                       uintptr_t retaddr)
2120 {
2121     TranslationBlock *tb;
2122     tb_page_addr_t tb_start, tb_end;
2123     int n;
2124 #ifdef TARGET_HAS_PRECISE_SMC
2125     CPUState *cpu = current_cpu;
2126     CPUArchState *env = NULL;
2127     bool current_tb_not_found = retaddr != 0;
2128     bool current_tb_modified = false;
2129     TranslationBlock *current_tb = NULL;
2130     target_ulong current_pc = 0;
2131     target_ulong current_cs_base = 0;
2132     uint32_t current_flags = 0;
2133 #endif /* TARGET_HAS_PRECISE_SMC */
2134 
2135     assert_page_locked(p);
2136 
2137 #if defined(TARGET_HAS_PRECISE_SMC)
2138     if (cpu != NULL) {
2139         env = cpu->env_ptr;
2140     }
2141 #endif
2142 
2143     /* we remove all the TBs in the range [start, end[ */
2144     /* XXX: see if in some cases it could be faster to invalidate all
2145        the code */
2146     PAGE_FOR_EACH_TB(p, tb, n) {
2147         assert_page_locked(p);
2148         /* NOTE: this is subtle as a TB may span two physical pages */
2149         if (n == 0) {
2150             /* NOTE: tb_end may be after the end of the page, but
2151                it is not a problem */
2152             tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
2153             tb_end = tb_start + tb->size;
2154         } else {
2155             tb_start = tb->page_addr[1];
2156             tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
2157         }
2158         if (!(tb_end <= start || tb_start >= end)) {
2159 #ifdef TARGET_HAS_PRECISE_SMC
2160             if (current_tb_not_found) {
2161                 current_tb_not_found = false;
2162                 /* now we have a real cpu fault */
2163                 current_tb = tcg_tb_lookup(retaddr);
2164             }
2165             if (current_tb == tb &&
2166                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
2167                 /*
2168                  * If we are modifying the current TB, we must stop
2169                  * its execution. We could be more precise by checking
2170                  * that the modification is after the current PC, but it
2171                  * would require a specialized function to partially
2172                  * restore the CPU state.
2173                  */
2174                 current_tb_modified = true;
2175                 cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
2176                 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
2177                                      &current_flags);
2178             }
2179 #endif /* TARGET_HAS_PRECISE_SMC */
2180             tb_phys_invalidate__locked(tb);
2181         }
2182     }
2183 #if !defined(CONFIG_USER_ONLY)
2184     /* if no code remaining, no need to continue to use slow writes */
2185     if (!p->first_tb) {
2186         invalidate_page_bitmap(p);
2187         tlb_unprotect_code(start);
2188     }
2189 #endif
2190 #ifdef TARGET_HAS_PRECISE_SMC
2191     if (current_tb_modified) {
2192         page_collection_unlock(pages);
2193         /* Force execution of one insn next time.  */
2194         cpu->cflags_next_tb = 1 | curr_cflags(cpu);
2195         mmap_unlock();
2196         cpu_loop_exit_noexc(cpu);
2197     }
2198 #endif
2199 }
2200 
2201 /*
2202  * Invalidate all TBs which intersect with the target physical address range
2203  * [start;end[. NOTE: start and end must refer to the *same* physical page.
2204  * 'is_cpu_write_access' should be true if called from a real cpu write
2205  * access: the virtual CPU will exit the current TB if code is modified inside
2206  * this TB.
2207  *
2208  * Called with mmap_lock held for user-mode emulation
2209  */
2210 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end)
2211 {
2212     struct page_collection *pages;
2213     PageDesc *p;
2214 
2215     assert_memory_lock();
2216 
2217     p = page_find(start >> TARGET_PAGE_BITS);
2218     if (p == NULL) {
2219         return;
2220     }
2221     pages = page_collection_lock(start, end);
2222     tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
2223     page_collection_unlock(pages);
2224 }
2225 
2226 /*
2227  * Invalidate all TBs which intersect with the target physical address range
2228  * [start;end[. NOTE: start and end may refer to *different* physical pages.
2229  * 'is_cpu_write_access' should be true if called from a real cpu write
2230  * access: the virtual CPU will exit the current TB if code is modified inside
2231  * this TB.
2232  *
2233  * Called with mmap_lock held for user-mode emulation.
2234  */
2235 #ifdef CONFIG_SOFTMMU
2236 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end)
2237 #else
2238 void tb_invalidate_phys_range(target_ulong start, target_ulong end)
2239 #endif
2240 {
2241     struct page_collection *pages;
2242     tb_page_addr_t next;
2243 
2244     assert_memory_lock();
2245 
2246     pages = page_collection_lock(start, end);
2247     for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
2248          start < end;
2249          start = next, next += TARGET_PAGE_SIZE) {
2250         PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
2251         tb_page_addr_t bound = MIN(next, end);
2252 
2253         if (pd == NULL) {
2254             continue;
2255         }
2256         tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
2257     }
2258     page_collection_unlock(pages);
2259 }
2260 
2261 #ifdef CONFIG_SOFTMMU
2262 /* len must be <= 8 and start must be a multiple of len.
2263  * Called via softmmu_template.h when code areas are written to with
2264  * iothread mutex not held.
2265  *
2266  * Call with all @pages in the range [@start, @start + len[ locked.
2267  */
2268 void tb_invalidate_phys_page_fast(struct page_collection *pages,
2269                                   tb_page_addr_t start, int len,
2270                                   uintptr_t retaddr)
2271 {
2272     PageDesc *p;
2273 
2274     assert_memory_lock();
2275 
2276     p = page_find(start >> TARGET_PAGE_BITS);
2277     if (!p) {
2278         return;
2279     }
2280 
2281     assert_page_locked(p);
2282     if (!p->code_bitmap &&
2283         ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
2284         build_page_bitmap(p);
2285     }
2286     if (p->code_bitmap) {
2287         unsigned int nr;
2288         unsigned long b;
2289 
2290         nr = start & ~TARGET_PAGE_MASK;
2291         b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
2292         if (b & ((1 << len) - 1)) {
2293             goto do_invalidate;
2294         }
2295     } else {
2296     do_invalidate:
2297         tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
2298                                               retaddr);
2299     }
2300 }
2301 #else
2302 /* Called with mmap_lock held. If pc is not 0 then it indicates the
2303  * host PC of the faulting store instruction that caused this invalidate.
2304  * Returns true if the caller needs to abort execution of the current
2305  * TB (because it was modified by this store and the guest CPU has
2306  * precise-SMC semantics).
2307  */
2308 static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
2309 {
2310     TranslationBlock *tb;
2311     PageDesc *p;
2312     int n;
2313 #ifdef TARGET_HAS_PRECISE_SMC
2314     TranslationBlock *current_tb = NULL;
2315     CPUState *cpu = current_cpu;
2316     CPUArchState *env = NULL;
2317     int current_tb_modified = 0;
2318     target_ulong current_pc = 0;
2319     target_ulong current_cs_base = 0;
2320     uint32_t current_flags = 0;
2321 #endif
2322 
2323     assert_memory_lock();
2324 
2325     addr &= TARGET_PAGE_MASK;
2326     p = page_find(addr >> TARGET_PAGE_BITS);
2327     if (!p) {
2328         return false;
2329     }
2330 
2331 #ifdef TARGET_HAS_PRECISE_SMC
2332     if (p->first_tb && pc != 0) {
2333         current_tb = tcg_tb_lookup(pc);
2334     }
2335     if (cpu != NULL) {
2336         env = cpu->env_ptr;
2337     }
2338 #endif
2339     assert_page_locked(p);
2340     PAGE_FOR_EACH_TB(p, tb, n) {
2341 #ifdef TARGET_HAS_PRECISE_SMC
2342         if (current_tb == tb &&
2343             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
2344                 /* If we are modifying the current TB, we must stop
2345                    its execution. We could be more precise by checking
2346                    that the modification is after the current PC, but it
2347                    would require a specialized function to partially
2348                    restore the CPU state */
2349 
2350             current_tb_modified = 1;
2351             cpu_restore_state_from_tb(cpu, current_tb, pc, true);
2352             cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
2353                                  &current_flags);
2354         }
2355 #endif /* TARGET_HAS_PRECISE_SMC */
2356         tb_phys_invalidate(tb, addr);
2357     }
2358     p->first_tb = (uintptr_t)NULL;
2359 #ifdef TARGET_HAS_PRECISE_SMC
2360     if (current_tb_modified) {
2361         /* Force execution of one insn next time.  */
2362         cpu->cflags_next_tb = 1 | curr_cflags(cpu);
2363         return true;
2364     }
2365 #endif
2366 
2367     return false;
2368 }
2369 #endif
2370 
2371 /* user-mode: call with mmap_lock held */
2372 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
2373 {
2374     TranslationBlock *tb;
2375 
2376     assert_memory_lock();
2377 
2378     tb = tcg_tb_lookup(retaddr);
2379     if (tb) {
2380         /* We can use retranslation to find the PC.  */
2381         cpu_restore_state_from_tb(cpu, tb, retaddr, true);
2382         tb_phys_invalidate(tb, -1);
2383     } else {
2384         /* The exception probably happened in a helper.  The CPU state should
2385            have been saved before calling it. Fetch the PC from there.  */
2386         CPUArchState *env = cpu->env_ptr;
2387         target_ulong pc, cs_base;
2388         tb_page_addr_t addr;
2389         uint32_t flags;
2390 
2391         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
2392         addr = get_page_addr_code(env, pc);
2393         if (addr != -1) {
2394             tb_invalidate_phys_range(addr, addr + 1);
2395         }
2396     }
2397 }
2398 
2399 #ifndef CONFIG_USER_ONLY
2400 /*
2401  * In deterministic execution mode, instructions doing device I/Os
2402  * must be at the end of the TB.
2403  *
2404  * Called by softmmu_template.h, with iothread mutex not held.
2405  */
2406 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
2407 {
2408     TranslationBlock *tb;
2409     CPUClass *cc;
2410     uint32_t n;
2411 
2412     tb = tcg_tb_lookup(retaddr);
2413     if (!tb) {
2414         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
2415                   (void *)retaddr);
2416     }
2417     cpu_restore_state_from_tb(cpu, tb, retaddr, true);
2418 
2419     /*
2420      * Some guests must re-execute the branch when re-executing a delay
2421      * slot instruction.  When this is the case, adjust icount and N
2422      * to account for the re-execution of the branch.
2423      */
2424     n = 1;
2425     cc = CPU_GET_CLASS(cpu);
2426     if (cc->tcg_ops->io_recompile_replay_branch &&
2427         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
2428         cpu_neg(cpu)->icount_decr.u16.low++;
2429         n = 2;
2430     }
2431 
2432     /*
2433      * Exit the loop and potentially generate a new TB executing the
2434      * just the I/O insns. We also limit instrumentation to memory
2435      * operations only (which execute after completion) so we don't
2436      * double instrument the instruction.
2437      */
2438     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
2439 
2440     qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
2441                            "cpu_io_recompile: rewound execution of TB to "
2442                            TARGET_FMT_lx "\n", tb->pc);
2443 
2444     cpu_loop_exit_noexc(cpu);
2445 }
2446 
2447 static void print_qht_statistics(struct qht_stats hst)
2448 {
2449     uint32_t hgram_opts;
2450     size_t hgram_bins;
2451     char *hgram;
2452 
2453     if (!hst.head_buckets) {
2454         return;
2455     }
2456     qemu_printf("TB hash buckets     %zu/%zu (%0.2f%% head buckets used)\n",
2457                 hst.used_head_buckets, hst.head_buckets,
2458                 (double)hst.used_head_buckets / hst.head_buckets * 100);
2459 
2460     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
2461     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
2462     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
2463         hgram_opts |= QDIST_PR_NODECIMAL;
2464     }
2465     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
2466     qemu_printf("TB hash occupancy   %0.2f%% avg chain occ. Histogram: %s\n",
2467                 qdist_avg(&hst.occupancy) * 100, hgram);
2468     g_free(hgram);
2469 
2470     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
2471     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
2472     if (hgram_bins > 10) {
2473         hgram_bins = 10;
2474     } else {
2475         hgram_bins = 0;
2476         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
2477     }
2478     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
2479     qemu_printf("TB hash avg chain   %0.3f buckets. Histogram: %s\n",
2480                 qdist_avg(&hst.chain), hgram);
2481     g_free(hgram);
2482 }
2483 
2484 struct tb_tree_stats {
2485     size_t nb_tbs;
2486     size_t host_size;
2487     size_t target_size;
2488     size_t max_target_size;
2489     size_t direct_jmp_count;
2490     size_t direct_jmp2_count;
2491     size_t cross_page;
2492 };
2493 
2494 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
2495 {
2496     const TranslationBlock *tb = value;
2497     struct tb_tree_stats *tst = data;
2498 
2499     tst->nb_tbs++;
2500     tst->host_size += tb->tc.size;
2501     tst->target_size += tb->size;
2502     if (tb->size > tst->max_target_size) {
2503         tst->max_target_size = tb->size;
2504     }
2505     if (tb->page_addr[1] != -1) {
2506         tst->cross_page++;
2507     }
2508     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
2509         tst->direct_jmp_count++;
2510         if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
2511             tst->direct_jmp2_count++;
2512         }
2513     }
2514     return false;
2515 }
2516 
2517 void dump_exec_info(void)
2518 {
2519     struct tb_tree_stats tst = {};
2520     struct qht_stats hst;
2521     size_t nb_tbs, flush_full, flush_part, flush_elide;
2522 
2523     tcg_tb_foreach(tb_tree_stats_iter, &tst);
2524     nb_tbs = tst.nb_tbs;
2525     /* XXX: avoid using doubles ? */
2526     qemu_printf("Translation buffer state:\n");
2527     /*
2528      * Report total code size including the padding and TB structs;
2529      * otherwise users might think "-accel tcg,tb-size" is not honoured.
2530      * For avg host size we use the precise numbers from tb_tree_stats though.
2531      */
2532     qemu_printf("gen code size       %zu/%zu\n",
2533                 tcg_code_size(), tcg_code_capacity());
2534     qemu_printf("TB count            %zu\n", nb_tbs);
2535     qemu_printf("TB avg target size  %zu max=%zu bytes\n",
2536                 nb_tbs ? tst.target_size / nb_tbs : 0,
2537                 tst.max_target_size);
2538     qemu_printf("TB avg host size    %zu bytes (expansion ratio: %0.1f)\n",
2539                 nb_tbs ? tst.host_size / nb_tbs : 0,
2540                 tst.target_size ? (double)tst.host_size / tst.target_size : 0);
2541     qemu_printf("cross page TB count %zu (%zu%%)\n", tst.cross_page,
2542                 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
2543     qemu_printf("direct jump count   %zu (%zu%%) (2 jumps=%zu %zu%%)\n",
2544                 tst.direct_jmp_count,
2545                 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
2546                 tst.direct_jmp2_count,
2547                 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
2548 
2549     qht_statistics_init(&tb_ctx.htable, &hst);
2550     print_qht_statistics(hst);
2551     qht_statistics_destroy(&hst);
2552 
2553     qemu_printf("\nStatistics:\n");
2554     qemu_printf("TB flush count      %u\n",
2555                 qatomic_read(&tb_ctx.tb_flush_count));
2556     qemu_printf("TB invalidate count %zu\n",
2557                 tcg_tb_phys_invalidate_count());
2558 
2559     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
2560     qemu_printf("TLB full flushes    %zu\n", flush_full);
2561     qemu_printf("TLB partial flushes %zu\n", flush_part);
2562     qemu_printf("TLB elided flushes  %zu\n", flush_elide);
2563     tcg_dump_info();
2564 }
2565 
2566 void dump_opcount_info(void)
2567 {
2568     tcg_dump_op_count();
2569 }
2570 
2571 #else /* CONFIG_USER_ONLY */
2572 
2573 void cpu_interrupt(CPUState *cpu, int mask)
2574 {
2575     g_assert(qemu_mutex_iothread_locked());
2576     cpu->interrupt_request |= mask;
2577     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
2578 }
2579 
2580 /*
2581  * Walks guest process memory "regions" one by one
2582  * and calls callback function 'fn' for each region.
2583  */
2584 struct walk_memory_regions_data {
2585     walk_memory_regions_fn fn;
2586     void *priv;
2587     target_ulong start;
2588     int prot;
2589 };
2590 
2591 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2592                                    target_ulong end, int new_prot)
2593 {
2594     if (data->start != -1u) {
2595         int rc = data->fn(data->priv, data->start, end, data->prot);
2596         if (rc != 0) {
2597             return rc;
2598         }
2599     }
2600 
2601     data->start = (new_prot ? end : -1u);
2602     data->prot = new_prot;
2603 
2604     return 0;
2605 }
2606 
2607 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2608                                  target_ulong base, int level, void **lp)
2609 {
2610     target_ulong pa;
2611     int i, rc;
2612 
2613     if (*lp == NULL) {
2614         return walk_memory_regions_end(data, base, 0);
2615     }
2616 
2617     if (level == 0) {
2618         PageDesc *pd = *lp;
2619 
2620         for (i = 0; i < V_L2_SIZE; ++i) {
2621             int prot = pd[i].flags;
2622 
2623             pa = base | (i << TARGET_PAGE_BITS);
2624             if (prot != data->prot) {
2625                 rc = walk_memory_regions_end(data, pa, prot);
2626                 if (rc != 0) {
2627                     return rc;
2628                 }
2629             }
2630         }
2631     } else {
2632         void **pp = *lp;
2633 
2634         for (i = 0; i < V_L2_SIZE; ++i) {
2635             pa = base | ((target_ulong)i <<
2636                 (TARGET_PAGE_BITS + V_L2_BITS * level));
2637             rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2638             if (rc != 0) {
2639                 return rc;
2640             }
2641         }
2642     }
2643 
2644     return 0;
2645 }
2646 
2647 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2648 {
2649     struct walk_memory_regions_data data;
2650     uintptr_t i, l1_sz = v_l1_size;
2651 
2652     data.fn = fn;
2653     data.priv = priv;
2654     data.start = -1u;
2655     data.prot = 0;
2656 
2657     for (i = 0; i < l1_sz; i++) {
2658         target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
2659         int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
2660         if (rc != 0) {
2661             return rc;
2662         }
2663     }
2664 
2665     return walk_memory_regions_end(&data, 0, 0);
2666 }
2667 
2668 static int dump_region(void *priv, target_ulong start,
2669     target_ulong end, unsigned long prot)
2670 {
2671     FILE *f = (FILE *)priv;
2672 
2673     (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
2674         " "TARGET_FMT_lx" %c%c%c\n",
2675         start, end, end - start,
2676         ((prot & PAGE_READ) ? 'r' : '-'),
2677         ((prot & PAGE_WRITE) ? 'w' : '-'),
2678         ((prot & PAGE_EXEC) ? 'x' : '-'));
2679 
2680     return 0;
2681 }
2682 
2683 /* dump memory mappings */
2684 void page_dump(FILE *f)
2685 {
2686     const int length = sizeof(target_ulong) * 2;
2687     (void) fprintf(f, "%-*s %-*s %-*s %s\n",
2688             length, "start", length, "end", length, "size", "prot");
2689     walk_memory_regions(f, dump_region);
2690 }
2691 
2692 int page_get_flags(target_ulong address)
2693 {
2694     PageDesc *p;
2695 
2696     p = page_find(address >> TARGET_PAGE_BITS);
2697     if (!p) {
2698         return 0;
2699     }
2700     return p->flags;
2701 }
2702 
2703 /* Modify the flags of a page and invalidate the code if necessary.
2704    The flag PAGE_WRITE_ORG is positioned automatically depending
2705    on PAGE_WRITE.  The mmap_lock should already be held.  */
2706 void page_set_flags(target_ulong start, target_ulong end, int flags)
2707 {
2708     target_ulong addr, len;
2709     bool reset_target_data;
2710 
2711     /* This function should never be called with addresses outside the
2712        guest address space.  If this assert fires, it probably indicates
2713        a missing call to h2g_valid.  */
2714     assert(end - 1 <= GUEST_ADDR_MAX);
2715     assert(start < end);
2716     /* Only set PAGE_ANON with new mappings. */
2717     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
2718     assert_memory_lock();
2719 
2720     start = start & TARGET_PAGE_MASK;
2721     end = TARGET_PAGE_ALIGN(end);
2722 
2723     if (flags & PAGE_WRITE) {
2724         flags |= PAGE_WRITE_ORG;
2725     }
2726     reset_target_data = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
2727     flags &= ~PAGE_RESET;
2728 
2729     for (addr = start, len = end - start;
2730          len != 0;
2731          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2732         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2733 
2734         /* If the write protection bit is set, then we invalidate
2735            the code inside.  */
2736         if (!(p->flags & PAGE_WRITE) &&
2737             (flags & PAGE_WRITE) &&
2738             p->first_tb) {
2739             tb_invalidate_phys_page(addr, 0);
2740         }
2741         if (reset_target_data) {
2742             g_free(p->target_data);
2743             p->target_data = NULL;
2744             p->flags = flags;
2745         } else {
2746             /* Using mprotect on a page does not change MAP_ANON. */
2747             p->flags = (p->flags & PAGE_ANON) | flags;
2748         }
2749     }
2750 }
2751 
2752 void *page_get_target_data(target_ulong address)
2753 {
2754     PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2755     return p ? p->target_data : NULL;
2756 }
2757 
2758 void *page_alloc_target_data(target_ulong address, size_t size)
2759 {
2760     PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2761     void *ret = NULL;
2762 
2763     if (p->flags & PAGE_VALID) {
2764         ret = p->target_data;
2765         if (!ret) {
2766             p->target_data = ret = g_malloc0(size);
2767         }
2768     }
2769     return ret;
2770 }
2771 
2772 int page_check_range(target_ulong start, target_ulong len, int flags)
2773 {
2774     PageDesc *p;
2775     target_ulong end;
2776     target_ulong addr;
2777 
2778     /* This function should never be called with addresses outside the
2779        guest address space.  If this assert fires, it probably indicates
2780        a missing call to h2g_valid.  */
2781     if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
2782         assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2783     }
2784 
2785     if (len == 0) {
2786         return 0;
2787     }
2788     if (start + len - 1 < start) {
2789         /* We've wrapped around.  */
2790         return -1;
2791     }
2792 
2793     /* must do before we loose bits in the next step */
2794     end = TARGET_PAGE_ALIGN(start + len);
2795     start = start & TARGET_PAGE_MASK;
2796 
2797     for (addr = start, len = end - start;
2798          len != 0;
2799          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2800         p = page_find(addr >> TARGET_PAGE_BITS);
2801         if (!p) {
2802             return -1;
2803         }
2804         if (!(p->flags & PAGE_VALID)) {
2805             return -1;
2806         }
2807 
2808         if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
2809             return -1;
2810         }
2811         if (flags & PAGE_WRITE) {
2812             if (!(p->flags & PAGE_WRITE_ORG)) {
2813                 return -1;
2814             }
2815             /* unprotect the page if it was put read-only because it
2816                contains translated code */
2817             if (!(p->flags & PAGE_WRITE)) {
2818                 if (!page_unprotect(addr, 0)) {
2819                     return -1;
2820                 }
2821             }
2822         }
2823     }
2824     return 0;
2825 }
2826 
2827 /* called from signal handler: invalidate the code and unprotect the
2828  * page. Return 0 if the fault was not handled, 1 if it was handled,
2829  * and 2 if it was handled but the caller must cause the TB to be
2830  * immediately exited. (We can only return 2 if the 'pc' argument is
2831  * non-zero.)
2832  */
2833 int page_unprotect(target_ulong address, uintptr_t pc)
2834 {
2835     unsigned int prot;
2836     bool current_tb_invalidated;
2837     PageDesc *p;
2838     target_ulong host_start, host_end, addr;
2839 
2840     /* Technically this isn't safe inside a signal handler.  However we
2841        know this only ever happens in a synchronous SEGV handler, so in
2842        practice it seems to be ok.  */
2843     mmap_lock();
2844 
2845     p = page_find(address >> TARGET_PAGE_BITS);
2846     if (!p) {
2847         mmap_unlock();
2848         return 0;
2849     }
2850 
2851     /* if the page was really writable, then we change its
2852        protection back to writable */
2853     if (p->flags & PAGE_WRITE_ORG) {
2854         current_tb_invalidated = false;
2855         if (p->flags & PAGE_WRITE) {
2856             /* If the page is actually marked WRITE then assume this is because
2857              * this thread raced with another one which got here first and
2858              * set the page to PAGE_WRITE and did the TB invalidate for us.
2859              */
2860 #ifdef TARGET_HAS_PRECISE_SMC
2861             TranslationBlock *current_tb = tcg_tb_lookup(pc);
2862             if (current_tb) {
2863                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
2864             }
2865 #endif
2866         } else {
2867             host_start = address & qemu_host_page_mask;
2868             host_end = host_start + qemu_host_page_size;
2869 
2870             prot = 0;
2871             for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
2872                 p = page_find(addr >> TARGET_PAGE_BITS);
2873                 p->flags |= PAGE_WRITE;
2874                 prot |= p->flags;
2875 
2876                 /* and since the content will be modified, we must invalidate
2877                    the corresponding translated code. */
2878                 current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
2879 #ifdef CONFIG_USER_ONLY
2880                 if (DEBUG_TB_CHECK_GATE) {
2881                     tb_invalidate_check(addr);
2882                 }
2883 #endif
2884             }
2885             mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
2886                      prot & PAGE_BITS);
2887         }
2888         mmap_unlock();
2889         /* If current TB was invalidated return to main loop */
2890         return current_tb_invalidated ? 2 : 1;
2891     }
2892     mmap_unlock();
2893     return 0;
2894 }
2895 #endif /* CONFIG_USER_ONLY */
2896 
2897 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
2898 void tcg_flush_softmmu_tlb(CPUState *cs)
2899 {
2900 #ifdef CONFIG_SOFTMMU
2901     tlb_flush(cs);
2902 #endif
2903 }
2904