xref: /openbmc/qemu/accel/tcg/translate-all.c (revision 3766855c)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 
22 #define NO_CPU_IO_DEFS
23 #include "trace.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg.h"
27 #if defined(CONFIG_USER_ONLY)
28 #include "qemu.h"
29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
30 #include <sys/param.h>
31 #if __FreeBSD_version >= 700104
32 #define HAVE_KINFO_GETVMMAP
33 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
34 #include <sys/proc.h>
35 #include <machine/profile.h>
36 #define _KERNEL
37 #include <sys/user.h>
38 #undef _KERNEL
39 #undef sigqueue
40 #include <libutil.h>
41 #endif
42 #endif
43 #else
44 #include "exec/ram_addr.h"
45 #endif
46 
47 #include "exec/cputlb.h"
48 #include "exec/translate-all.h"
49 #include "exec/translator.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/qemu-print.h"
52 #include "qemu/timer.h"
53 #include "qemu/main-loop.h"
54 #include "qemu/cacheinfo.h"
55 #include "exec/log.h"
56 #include "sysemu/cpus.h"
57 #include "sysemu/cpu-timers.h"
58 #include "sysemu/tcg.h"
59 #include "qapi/error.h"
60 #include "hw/core/tcg-cpu-ops.h"
61 #include "tb-jmp-cache.h"
62 #include "tb-hash.h"
63 #include "tb-context.h"
64 #include "internal.h"
65 
66 /* make various TB consistency checks */
67 
68 /**
69  * struct page_entry - page descriptor entry
70  * @pd:     pointer to the &struct PageDesc of the page this entry represents
71  * @index:  page index of the page
72  * @locked: whether the page is locked
73  *
74  * This struct helps us keep track of the locked state of a page, without
75  * bloating &struct PageDesc.
76  *
77  * A page lock protects accesses to all fields of &struct PageDesc.
78  *
79  * See also: &struct page_collection.
80  */
81 struct page_entry {
82     PageDesc *pd;
83     tb_page_addr_t index;
84     bool locked;
85 };
86 
87 /**
88  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
89  * @tree:   Binary search tree (BST) of the pages, with key == page index
90  * @max:    Pointer to the page in @tree with the highest page index
91  *
92  * To avoid deadlock we lock pages in ascending order of page index.
93  * When operating on a set of pages, we need to keep track of them so that
94  * we can lock them in order and also unlock them later. For this we collect
95  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
96  * @tree implementation we use does not provide an O(1) operation to obtain the
97  * highest-ranked element, we use @max to keep track of the inserted page
98  * with the highest index. This is valuable because if a page is not in
99  * the tree and its index is higher than @max's, then we can lock it
100  * without breaking the locking order rule.
101  *
102  * Note on naming: 'struct page_set' would be shorter, but we already have a few
103  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
104  *
105  * See also: page_collection_lock().
106  */
107 struct page_collection {
108     GTree *tree;
109     struct page_entry *max;
110 };
111 
112 /*
113  * In system mode we want L1_MAP to be based on ram offsets,
114  * while in user mode we want it to be based on virtual addresses.
115  *
116  * TODO: For user mode, see the caveat re host vs guest virtual
117  * address spaces near GUEST_ADDR_MAX.
118  */
119 #if !defined(CONFIG_USER_ONLY)
120 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
121 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
122 #else
123 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
124 #endif
125 #else
126 # define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
127 #endif
128 
129 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
130 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
131                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
132                   * BITS_PER_BYTE);
133 
134 /*
135  * L1 Mapping properties
136  */
137 int v_l1_size;
138 int v_l1_shift;
139 int v_l2_levels;
140 
141 void *l1_map[V_L1_MAX_SIZE];
142 
143 TBContext tb_ctx;
144 
145 static void page_table_config_init(void)
146 {
147     uint32_t v_l1_bits;
148 
149     assert(TARGET_PAGE_BITS);
150     /* The bits remaining after N lower levels of page tables.  */
151     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
152     if (v_l1_bits < V_L1_MIN_BITS) {
153         v_l1_bits += V_L2_BITS;
154     }
155 
156     v_l1_size = 1 << v_l1_bits;
157     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
158     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
159 
160     assert(v_l1_bits <= V_L1_MAX_BITS);
161     assert(v_l1_shift % V_L2_BITS == 0);
162     assert(v_l2_levels >= 0);
163 }
164 
165 /* Encode VAL as a signed leb128 sequence at P.
166    Return P incremented past the encoded value.  */
167 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
168 {
169     int more, byte;
170 
171     do {
172         byte = val & 0x7f;
173         val >>= 7;
174         more = !((val == 0 && (byte & 0x40) == 0)
175                  || (val == -1 && (byte & 0x40) != 0));
176         if (more) {
177             byte |= 0x80;
178         }
179         *p++ = byte;
180     } while (more);
181 
182     return p;
183 }
184 
185 /* Decode a signed leb128 sequence at *PP; increment *PP past the
186    decoded value.  Return the decoded value.  */
187 static target_long decode_sleb128(const uint8_t **pp)
188 {
189     const uint8_t *p = *pp;
190     target_long val = 0;
191     int byte, shift = 0;
192 
193     do {
194         byte = *p++;
195         val |= (target_ulong)(byte & 0x7f) << shift;
196         shift += 7;
197     } while (byte & 0x80);
198     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
199         val |= -(target_ulong)1 << shift;
200     }
201 
202     *pp = p;
203     return val;
204 }
205 
206 /* Encode the data collected about the instructions while compiling TB.
207    Place the data at BLOCK, and return the number of bytes consumed.
208 
209    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
210    which come from the target's insn_start data, followed by a uintptr_t
211    which comes from the host pc of the end of the code implementing the insn.
212 
213    Each line of the table is encoded as sleb128 deltas from the previous
214    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
215    That is, the first column is seeded with the guest pc, the last column
216    with the host pc, and the middle columns with zeros.  */
217 
218 static int encode_search(TranslationBlock *tb, uint8_t *block)
219 {
220     uint8_t *highwater = tcg_ctx->code_gen_highwater;
221     uint8_t *p = block;
222     int i, j, n;
223 
224     for (i = 0, n = tb->icount; i < n; ++i) {
225         target_ulong prev;
226 
227         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
228             if (i == 0) {
229                 prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
230             } else {
231                 prev = tcg_ctx->gen_insn_data[i - 1][j];
232             }
233             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
234         }
235         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
236         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
237 
238         /* Test for (pending) buffer overflow.  The assumption is that any
239            one row beginning below the high water mark cannot overrun
240            the buffer completely.  Thus we can test for overflow after
241            encoding a row without having to check during encoding.  */
242         if (unlikely(p > highwater)) {
243             return -1;
244         }
245     }
246 
247     return p - block;
248 }
249 
250 /* The cpu state corresponding to 'searched_pc' is restored.
251  * When reset_icount is true, current TB will be interrupted and
252  * icount should be recalculated.
253  */
254 int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
255                               uintptr_t searched_pc, bool reset_icount)
256 {
257     target_ulong data[TARGET_INSN_START_WORDS];
258     uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
259     const uint8_t *p = tb->tc.ptr + tb->tc.size;
260     int i, j, num_insns = tb->icount;
261 #ifdef CONFIG_PROFILER
262     TCGProfile *prof = &tcg_ctx->prof;
263     int64_t ti = profile_getclock();
264 #endif
265 
266     searched_pc -= GETPC_ADJ;
267 
268     if (searched_pc < host_pc) {
269         return -1;
270     }
271 
272     memset(data, 0, sizeof(data));
273     if (!TARGET_TB_PCREL) {
274         data[0] = tb_pc(tb);
275     }
276 
277     /* Reconstruct the stored insn data while looking for the point at
278        which the end of the insn exceeds the searched_pc.  */
279     for (i = 0; i < num_insns; ++i) {
280         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
281             data[j] += decode_sleb128(&p);
282         }
283         host_pc += decode_sleb128(&p);
284         if (host_pc > searched_pc) {
285             goto found;
286         }
287     }
288     return -1;
289 
290  found:
291     if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
292         assert(icount_enabled());
293         /* Reset the cycle counter to the start of the block
294            and shift if to the number of actually executed instructions */
295         cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
296     }
297 
298     {
299         const struct TCGCPUOps *ops = cpu->cc->tcg_ops;
300         __typeof(ops->restore_state_to_opc) restore = ops->restore_state_to_opc;
301         if (restore) {
302             uint64_t d64[TARGET_INSN_START_WORDS];
303             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
304                 d64[i] = data[i];
305             }
306             restore(cpu, tb, d64);
307         } else {
308             restore_state_to_opc(cpu->env_ptr, tb, data);
309         }
310     }
311 
312 #ifdef CONFIG_PROFILER
313     qatomic_set(&prof->restore_time,
314                 prof->restore_time + profile_getclock() - ti);
315     qatomic_set(&prof->restore_count, prof->restore_count + 1);
316 #endif
317     return 0;
318 }
319 
320 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
321 {
322     /*
323      * The pc update associated with restore without exit will
324      * break the relative pc adjustments performed by TARGET_TB_PCREL.
325      */
326     if (TARGET_TB_PCREL) {
327         assert(will_exit);
328     }
329 
330     /*
331      * The host_pc has to be in the rx region of the code buffer.
332      * If it is not we will not be able to resolve it here.
333      * The two cases where host_pc will not be correct are:
334      *
335      *  - fault during translation (instruction fetch)
336      *  - fault from helper (not using GETPC() macro)
337      *
338      * Either way we need return early as we can't resolve it here.
339      */
340     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
341         TranslationBlock *tb = tcg_tb_lookup(host_pc);
342         if (tb) {
343             cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
344             return true;
345         }
346     }
347     return false;
348 }
349 
350 void page_init(void)
351 {
352     page_size_init();
353     page_table_config_init();
354 
355 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
356     {
357 #ifdef HAVE_KINFO_GETVMMAP
358         struct kinfo_vmentry *freep;
359         int i, cnt;
360 
361         freep = kinfo_getvmmap(getpid(), &cnt);
362         if (freep) {
363             mmap_lock();
364             for (i = 0; i < cnt; i++) {
365                 unsigned long startaddr, endaddr;
366 
367                 startaddr = freep[i].kve_start;
368                 endaddr = freep[i].kve_end;
369                 if (h2g_valid(startaddr)) {
370                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
371 
372                     if (h2g_valid(endaddr)) {
373                         endaddr = h2g(endaddr);
374                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
375                     } else {
376 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
377                         endaddr = ~0ul;
378                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
379 #endif
380                     }
381                 }
382             }
383             free(freep);
384             mmap_unlock();
385         }
386 #else
387         FILE *f;
388 
389         last_brk = (unsigned long)sbrk(0);
390 
391         f = fopen("/compat/linux/proc/self/maps", "r");
392         if (f) {
393             mmap_lock();
394 
395             do {
396                 unsigned long startaddr, endaddr;
397                 int n;
398 
399                 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
400 
401                 if (n == 2 && h2g_valid(startaddr)) {
402                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
403 
404                     if (h2g_valid(endaddr)) {
405                         endaddr = h2g(endaddr);
406                     } else {
407                         endaddr = ~0ul;
408                     }
409                     page_set_flags(startaddr, endaddr, PAGE_RESERVED);
410                 }
411             } while (!feof(f));
412 
413             fclose(f);
414             mmap_unlock();
415         }
416 #endif
417     }
418 #endif
419 }
420 
421 PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
422 {
423     PageDesc *pd;
424     void **lp;
425     int i;
426 
427     /* Level 1.  Always allocated.  */
428     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
429 
430     /* Level 2..N-1.  */
431     for (i = v_l2_levels; i > 0; i--) {
432         void **p = qatomic_rcu_read(lp);
433 
434         if (p == NULL) {
435             void *existing;
436 
437             if (!alloc) {
438                 return NULL;
439             }
440             p = g_new0(void *, V_L2_SIZE);
441             existing = qatomic_cmpxchg(lp, NULL, p);
442             if (unlikely(existing)) {
443                 g_free(p);
444                 p = existing;
445             }
446         }
447 
448         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
449     }
450 
451     pd = qatomic_rcu_read(lp);
452     if (pd == NULL) {
453         void *existing;
454 
455         if (!alloc) {
456             return NULL;
457         }
458         pd = g_new0(PageDesc, V_L2_SIZE);
459 #ifndef CONFIG_USER_ONLY
460         {
461             int i;
462 
463             for (i = 0; i < V_L2_SIZE; i++) {
464                 qemu_spin_init(&pd[i].lock);
465             }
466         }
467 #endif
468         existing = qatomic_cmpxchg(lp, NULL, pd);
469         if (unlikely(existing)) {
470 #ifndef CONFIG_USER_ONLY
471             {
472                 int i;
473 
474                 for (i = 0; i < V_L2_SIZE; i++) {
475                     qemu_spin_destroy(&pd[i].lock);
476                 }
477             }
478 #endif
479             g_free(pd);
480             pd = existing;
481         }
482     }
483 
484     return pd + (index & (V_L2_SIZE - 1));
485 }
486 
487 /* In user-mode page locks aren't used; mmap_lock is enough */
488 #ifdef CONFIG_USER_ONLY
489 struct page_collection *
490 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
491 {
492     return NULL;
493 }
494 
495 void page_collection_unlock(struct page_collection *set)
496 { }
497 #else /* !CONFIG_USER_ONLY */
498 
499 #ifdef CONFIG_DEBUG_TCG
500 
501 static __thread GHashTable *ht_pages_locked_debug;
502 
503 static void ht_pages_locked_debug_init(void)
504 {
505     if (ht_pages_locked_debug) {
506         return;
507     }
508     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
509 }
510 
511 static bool page_is_locked(const PageDesc *pd)
512 {
513     PageDesc *found;
514 
515     ht_pages_locked_debug_init();
516     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
517     return !!found;
518 }
519 
520 static void page_lock__debug(PageDesc *pd)
521 {
522     ht_pages_locked_debug_init();
523     g_assert(!page_is_locked(pd));
524     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
525 }
526 
527 static void page_unlock__debug(const PageDesc *pd)
528 {
529     bool removed;
530 
531     ht_pages_locked_debug_init();
532     g_assert(page_is_locked(pd));
533     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
534     g_assert(removed);
535 }
536 
537 void do_assert_page_locked(const PageDesc *pd, const char *file, int line)
538 {
539     if (unlikely(!page_is_locked(pd))) {
540         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
541                      pd, file, line);
542         abort();
543     }
544 }
545 
546 void assert_no_pages_locked(void)
547 {
548     ht_pages_locked_debug_init();
549     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
550 }
551 
552 #else /* !CONFIG_DEBUG_TCG */
553 
554 static inline void page_lock__debug(const PageDesc *pd) { }
555 static inline void page_unlock__debug(const PageDesc *pd) { }
556 
557 #endif /* CONFIG_DEBUG_TCG */
558 
559 void page_lock(PageDesc *pd)
560 {
561     page_lock__debug(pd);
562     qemu_spin_lock(&pd->lock);
563 }
564 
565 void page_unlock(PageDesc *pd)
566 {
567     qemu_spin_unlock(&pd->lock);
568     page_unlock__debug(pd);
569 }
570 
571 static inline struct page_entry *
572 page_entry_new(PageDesc *pd, tb_page_addr_t index)
573 {
574     struct page_entry *pe = g_malloc(sizeof(*pe));
575 
576     pe->index = index;
577     pe->pd = pd;
578     pe->locked = false;
579     return pe;
580 }
581 
582 static void page_entry_destroy(gpointer p)
583 {
584     struct page_entry *pe = p;
585 
586     g_assert(pe->locked);
587     page_unlock(pe->pd);
588     g_free(pe);
589 }
590 
591 /* returns false on success */
592 static bool page_entry_trylock(struct page_entry *pe)
593 {
594     bool busy;
595 
596     busy = qemu_spin_trylock(&pe->pd->lock);
597     if (!busy) {
598         g_assert(!pe->locked);
599         pe->locked = true;
600         page_lock__debug(pe->pd);
601     }
602     return busy;
603 }
604 
605 static void do_page_entry_lock(struct page_entry *pe)
606 {
607     page_lock(pe->pd);
608     g_assert(!pe->locked);
609     pe->locked = true;
610 }
611 
612 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
613 {
614     struct page_entry *pe = value;
615 
616     do_page_entry_lock(pe);
617     return FALSE;
618 }
619 
620 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
621 {
622     struct page_entry *pe = value;
623 
624     if (pe->locked) {
625         pe->locked = false;
626         page_unlock(pe->pd);
627     }
628     return FALSE;
629 }
630 
631 /*
632  * Trylock a page, and if successful, add the page to a collection.
633  * Returns true ("busy") if the page could not be locked; false otherwise.
634  */
635 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
636 {
637     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
638     struct page_entry *pe;
639     PageDesc *pd;
640 
641     pe = g_tree_lookup(set->tree, &index);
642     if (pe) {
643         return false;
644     }
645 
646     pd = page_find(index);
647     if (pd == NULL) {
648         return false;
649     }
650 
651     pe = page_entry_new(pd, index);
652     g_tree_insert(set->tree, &pe->index, pe);
653 
654     /*
655      * If this is either (1) the first insertion or (2) a page whose index
656      * is higher than any other so far, just lock the page and move on.
657      */
658     if (set->max == NULL || pe->index > set->max->index) {
659         set->max = pe;
660         do_page_entry_lock(pe);
661         return false;
662     }
663     /*
664      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
665      * locks in order.
666      */
667     return page_entry_trylock(pe);
668 }
669 
670 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
671 {
672     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
673     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
674 
675     if (a == b) {
676         return 0;
677     } else if (a < b) {
678         return -1;
679     }
680     return 1;
681 }
682 
683 /*
684  * Lock a range of pages ([@start,@end[) as well as the pages of all
685  * intersecting TBs.
686  * Locking order: acquire locks in ascending order of page index.
687  */
688 struct page_collection *
689 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
690 {
691     struct page_collection *set = g_malloc(sizeof(*set));
692     tb_page_addr_t index;
693     PageDesc *pd;
694 
695     start >>= TARGET_PAGE_BITS;
696     end   >>= TARGET_PAGE_BITS;
697     g_assert(start <= end);
698 
699     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
700                                 page_entry_destroy);
701     set->max = NULL;
702     assert_no_pages_locked();
703 
704  retry:
705     g_tree_foreach(set->tree, page_entry_lock, NULL);
706 
707     for (index = start; index <= end; index++) {
708         TranslationBlock *tb;
709         int n;
710 
711         pd = page_find(index);
712         if (pd == NULL) {
713             continue;
714         }
715         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
716             g_tree_foreach(set->tree, page_entry_unlock, NULL);
717             goto retry;
718         }
719         assert_page_locked(pd);
720         PAGE_FOR_EACH_TB(pd, tb, n) {
721             if (page_trylock_add(set, tb_page_addr0(tb)) ||
722                 (tb_page_addr1(tb) != -1 &&
723                  page_trylock_add(set, tb_page_addr1(tb)))) {
724                 /* drop all locks, and reacquire in order */
725                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
726                 goto retry;
727             }
728         }
729     }
730     return set;
731 }
732 
733 void page_collection_unlock(struct page_collection *set)
734 {
735     /* entries are unlocked and freed via page_entry_destroy */
736     g_tree_destroy(set->tree);
737     g_free(set);
738 }
739 
740 #endif /* !CONFIG_USER_ONLY */
741 
742 /* Called with mmap_lock held for user mode emulation.  */
743 TranslationBlock *tb_gen_code(CPUState *cpu,
744                               target_ulong pc, target_ulong cs_base,
745                               uint32_t flags, int cflags)
746 {
747     CPUArchState *env = cpu->env_ptr;
748     TranslationBlock *tb, *existing_tb;
749     tb_page_addr_t phys_pc;
750     tcg_insn_unit *gen_code_buf;
751     int gen_code_size, search_size, max_insns;
752 #ifdef CONFIG_PROFILER
753     TCGProfile *prof = &tcg_ctx->prof;
754     int64_t ti;
755 #endif
756     void *host_pc;
757 
758     assert_memory_lock();
759     qemu_thread_jit_write();
760 
761     phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
762 
763     if (phys_pc == -1) {
764         /* Generate a one-shot TB with 1 insn in it */
765         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
766     }
767 
768     max_insns = cflags & CF_COUNT_MASK;
769     if (max_insns == 0) {
770         max_insns = TCG_MAX_INSNS;
771     }
772     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
773 
774  buffer_overflow:
775     tb = tcg_tb_alloc(tcg_ctx);
776     if (unlikely(!tb)) {
777         /* flush must be done */
778         tb_flush(cpu);
779         mmap_unlock();
780         /* Make the execution loop process the flush as soon as possible.  */
781         cpu->exception_index = EXCP_INTERRUPT;
782         cpu_loop_exit(cpu);
783     }
784 
785     gen_code_buf = tcg_ctx->code_gen_ptr;
786     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
787 #if !TARGET_TB_PCREL
788     tb->pc = pc;
789 #endif
790     tb->cs_base = cs_base;
791     tb->flags = flags;
792     tb->cflags = cflags;
793     tb->trace_vcpu_dstate = *cpu->trace_dstate;
794     tb_set_page_addr0(tb, phys_pc);
795     tb_set_page_addr1(tb, -1);
796     tcg_ctx->tb_cflags = cflags;
797  tb_overflow:
798 
799 #ifdef CONFIG_PROFILER
800     /* includes aborted translations because of exceptions */
801     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
802     ti = profile_getclock();
803 #endif
804 
805     gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
806     if (unlikely(gen_code_size != 0)) {
807         goto error_return;
808     }
809 
810     tcg_func_start(tcg_ctx);
811 
812     tcg_ctx->cpu = env_cpu(env);
813     gen_intermediate_code(cpu, tb, max_insns, pc, host_pc);
814     assert(tb->size != 0);
815     tcg_ctx->cpu = NULL;
816     max_insns = tb->icount;
817 
818     trace_translate_block(tb, pc, tb->tc.ptr);
819 
820     /* generate machine code */
821     tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
822     tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
823     tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
824     if (TCG_TARGET_HAS_direct_jump) {
825         tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
826         tcg_ctx->tb_jmp_target_addr = NULL;
827     } else {
828         tcg_ctx->tb_jmp_insn_offset = NULL;
829         tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
830     }
831 
832 #ifdef CONFIG_PROFILER
833     qatomic_set(&prof->tb_count, prof->tb_count + 1);
834     qatomic_set(&prof->interm_time,
835                 prof->interm_time + profile_getclock() - ti);
836     ti = profile_getclock();
837 #endif
838 
839     gen_code_size = tcg_gen_code(tcg_ctx, tb, pc);
840     if (unlikely(gen_code_size < 0)) {
841  error_return:
842         switch (gen_code_size) {
843         case -1:
844             /*
845              * Overflow of code_gen_buffer, or the current slice of it.
846              *
847              * TODO: We don't need to re-do gen_intermediate_code, nor
848              * should we re-do the tcg optimization currently hidden
849              * inside tcg_gen_code.  All that should be required is to
850              * flush the TBs, allocate a new TB, re-initialize it per
851              * above, and re-do the actual code generation.
852              */
853             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
854                           "Restarting code generation for "
855                           "code_gen_buffer overflow\n");
856             goto buffer_overflow;
857 
858         case -2:
859             /*
860              * The code generated for the TranslationBlock is too large.
861              * The maximum size allowed by the unwind info is 64k.
862              * There may be stricter constraints from relocations
863              * in the tcg backend.
864              *
865              * Try again with half as many insns as we attempted this time.
866              * If a single insn overflows, there's a bug somewhere...
867              */
868             assert(max_insns > 1);
869             max_insns /= 2;
870             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
871                           "Restarting code generation with "
872                           "smaller translation block (max %d insns)\n",
873                           max_insns);
874             goto tb_overflow;
875 
876         default:
877             g_assert_not_reached();
878         }
879     }
880     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
881     if (unlikely(search_size < 0)) {
882         goto buffer_overflow;
883     }
884     tb->tc.size = gen_code_size;
885 
886 #ifdef CONFIG_PROFILER
887     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
888     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
889     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
890     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
891 #endif
892 
893 #ifdef DEBUG_DISAS
894     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
895         qemu_log_in_addr_range(pc)) {
896         FILE *logfile = qemu_log_trylock();
897         if (logfile) {
898             int code_size, data_size;
899             const tcg_target_ulong *rx_data_gen_ptr;
900             size_t chunk_start;
901             int insn = 0;
902 
903             if (tcg_ctx->data_gen_ptr) {
904                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
905                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
906                 data_size = gen_code_size - code_size;
907             } else {
908                 rx_data_gen_ptr = 0;
909                 code_size = gen_code_size;
910                 data_size = 0;
911             }
912 
913             /* Dump header and the first instruction */
914             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
915             fprintf(logfile,
916                     "  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
917                     tcg_ctx->gen_insn_data[insn][0]);
918             chunk_start = tcg_ctx->gen_insn_end_off[insn];
919             disas(logfile, tb->tc.ptr, chunk_start);
920 
921             /*
922              * Dump each instruction chunk, wrapping up empty chunks into
923              * the next instruction. The whole array is offset so the
924              * first entry is the beginning of the 2nd instruction.
925              */
926             while (insn < tb->icount) {
927                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
928                 if (chunk_end > chunk_start) {
929                     fprintf(logfile, "  -- guest addr 0x" TARGET_FMT_lx "\n",
930                             tcg_ctx->gen_insn_data[insn][0]);
931                     disas(logfile, tb->tc.ptr + chunk_start,
932                           chunk_end - chunk_start);
933                     chunk_start = chunk_end;
934                 }
935                 insn++;
936             }
937 
938             if (chunk_start < code_size) {
939                 fprintf(logfile, "  -- tb slow paths + alignment\n");
940                 disas(logfile, tb->tc.ptr + chunk_start,
941                       code_size - chunk_start);
942             }
943 
944             /* Finally dump any data we may have after the block */
945             if (data_size) {
946                 int i;
947                 fprintf(logfile, "  data: [size=%d]\n", data_size);
948                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
949                     if (sizeof(tcg_target_ulong) == 8) {
950                         fprintf(logfile,
951                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
952                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
953                     } else if (sizeof(tcg_target_ulong) == 4) {
954                         fprintf(logfile,
955                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
956                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
957                     } else {
958                         qemu_build_not_reached();
959                     }
960                 }
961             }
962             fprintf(logfile, "\n");
963             qemu_log_unlock(logfile);
964         }
965     }
966 #endif
967 
968     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
969         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
970                  CODE_GEN_ALIGN));
971 
972     /* init jump list */
973     qemu_spin_init(&tb->jmp_lock);
974     tb->jmp_list_head = (uintptr_t)NULL;
975     tb->jmp_list_next[0] = (uintptr_t)NULL;
976     tb->jmp_list_next[1] = (uintptr_t)NULL;
977     tb->jmp_dest[0] = (uintptr_t)NULL;
978     tb->jmp_dest[1] = (uintptr_t)NULL;
979 
980     /* init original jump addresses which have been set during tcg_gen_code() */
981     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
982         tb_reset_jump(tb, 0);
983     }
984     if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
985         tb_reset_jump(tb, 1);
986     }
987 
988     /*
989      * If the TB is not associated with a physical RAM page then it must be
990      * a temporary one-insn TB, and we have nothing left to do. Return early
991      * before attempting to link to other TBs or add to the lookup table.
992      */
993     if (tb_page_addr0(tb) == -1) {
994         return tb;
995     }
996 
997     /*
998      * Insert TB into the corresponding region tree before publishing it
999      * through QHT. Otherwise rewinding happened in the TB might fail to
1000      * lookup itself using host PC.
1001      */
1002     tcg_tb_insert(tb);
1003 
1004     /*
1005      * No explicit memory barrier is required -- tb_link_page() makes the
1006      * TB visible in a consistent state.
1007      */
1008     existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
1009     /* if the TB already exists, discard what we just translated */
1010     if (unlikely(existing_tb != tb)) {
1011         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
1012 
1013         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
1014         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
1015         tcg_tb_remove(tb);
1016         return existing_tb;
1017     }
1018     return tb;
1019 }
1020 
1021 /* user-mode: call with mmap_lock held */
1022 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
1023 {
1024     TranslationBlock *tb;
1025 
1026     assert_memory_lock();
1027 
1028     tb = tcg_tb_lookup(retaddr);
1029     if (tb) {
1030         /* We can use retranslation to find the PC.  */
1031         cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1032         tb_phys_invalidate(tb, -1);
1033     } else {
1034         /* The exception probably happened in a helper.  The CPU state should
1035            have been saved before calling it. Fetch the PC from there.  */
1036         CPUArchState *env = cpu->env_ptr;
1037         target_ulong pc, cs_base;
1038         tb_page_addr_t addr;
1039         uint32_t flags;
1040 
1041         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
1042         addr = get_page_addr_code(env, pc);
1043         if (addr != -1) {
1044             tb_invalidate_phys_range(addr, addr + 1);
1045         }
1046     }
1047 }
1048 
1049 #ifndef CONFIG_USER_ONLY
1050 /*
1051  * In deterministic execution mode, instructions doing device I/Os
1052  * must be at the end of the TB.
1053  *
1054  * Called by softmmu_template.h, with iothread mutex not held.
1055  */
1056 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
1057 {
1058     TranslationBlock *tb;
1059     CPUClass *cc;
1060     uint32_t n;
1061 
1062     tb = tcg_tb_lookup(retaddr);
1063     if (!tb) {
1064         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
1065                   (void *)retaddr);
1066     }
1067     cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1068 
1069     /*
1070      * Some guests must re-execute the branch when re-executing a delay
1071      * slot instruction.  When this is the case, adjust icount and N
1072      * to account for the re-execution of the branch.
1073      */
1074     n = 1;
1075     cc = CPU_GET_CLASS(cpu);
1076     if (cc->tcg_ops->io_recompile_replay_branch &&
1077         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
1078         cpu_neg(cpu)->icount_decr.u16.low++;
1079         n = 2;
1080     }
1081 
1082     /*
1083      * Exit the loop and potentially generate a new TB executing the
1084      * just the I/O insns. We also limit instrumentation to memory
1085      * operations only (which execute after completion) so we don't
1086      * double instrument the instruction.
1087      */
1088     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
1089 
1090     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
1091         target_ulong pc = log_pc(cpu, tb);
1092         if (qemu_log_in_addr_range(pc)) {
1093             qemu_log("cpu_io_recompile: rewound execution of TB to "
1094                      TARGET_FMT_lx "\n", pc);
1095         }
1096     }
1097 
1098     cpu_loop_exit_noexc(cpu);
1099 }
1100 
1101 static void print_qht_statistics(struct qht_stats hst, GString *buf)
1102 {
1103     uint32_t hgram_opts;
1104     size_t hgram_bins;
1105     char *hgram;
1106 
1107     if (!hst.head_buckets) {
1108         return;
1109     }
1110     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
1111                            "(%0.2f%% head buckets used)\n",
1112                            hst.used_head_buckets, hst.head_buckets,
1113                            (double)hst.used_head_buckets /
1114                            hst.head_buckets * 100);
1115 
1116     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
1117     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
1118     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
1119         hgram_opts |= QDIST_PR_NODECIMAL;
1120     }
1121     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
1122     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
1123                            "Histogram: %s\n",
1124                            qdist_avg(&hst.occupancy) * 100, hgram);
1125     g_free(hgram);
1126 
1127     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
1128     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
1129     if (hgram_bins > 10) {
1130         hgram_bins = 10;
1131     } else {
1132         hgram_bins = 0;
1133         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
1134     }
1135     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
1136     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
1137                            "Histogram: %s\n",
1138                            qdist_avg(&hst.chain), hgram);
1139     g_free(hgram);
1140 }
1141 
1142 struct tb_tree_stats {
1143     size_t nb_tbs;
1144     size_t host_size;
1145     size_t target_size;
1146     size_t max_target_size;
1147     size_t direct_jmp_count;
1148     size_t direct_jmp2_count;
1149     size_t cross_page;
1150 };
1151 
1152 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
1153 {
1154     const TranslationBlock *tb = value;
1155     struct tb_tree_stats *tst = data;
1156 
1157     tst->nb_tbs++;
1158     tst->host_size += tb->tc.size;
1159     tst->target_size += tb->size;
1160     if (tb->size > tst->max_target_size) {
1161         tst->max_target_size = tb->size;
1162     }
1163     if (tb_page_addr1(tb) != -1) {
1164         tst->cross_page++;
1165     }
1166     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1167         tst->direct_jmp_count++;
1168         if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1169             tst->direct_jmp2_count++;
1170         }
1171     }
1172     return false;
1173 }
1174 
1175 void dump_exec_info(GString *buf)
1176 {
1177     struct tb_tree_stats tst = {};
1178     struct qht_stats hst;
1179     size_t nb_tbs, flush_full, flush_part, flush_elide;
1180 
1181     tcg_tb_foreach(tb_tree_stats_iter, &tst);
1182     nb_tbs = tst.nb_tbs;
1183     /* XXX: avoid using doubles ? */
1184     g_string_append_printf(buf, "Translation buffer state:\n");
1185     /*
1186      * Report total code size including the padding and TB structs;
1187      * otherwise users might think "-accel tcg,tb-size" is not honoured.
1188      * For avg host size we use the precise numbers from tb_tree_stats though.
1189      */
1190     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
1191                            tcg_code_size(), tcg_code_capacity());
1192     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
1193     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
1194                            nb_tbs ? tst.target_size / nb_tbs : 0,
1195                            tst.max_target_size);
1196     g_string_append_printf(buf, "TB avg host size    %zu bytes "
1197                            "(expansion ratio: %0.1f)\n",
1198                            nb_tbs ? tst.host_size / nb_tbs : 0,
1199                            tst.target_size ?
1200                            (double)tst.host_size / tst.target_size : 0);
1201     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
1202                            tst.cross_page,
1203                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
1204     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
1205                            "(2 jumps=%zu %zu%%)\n",
1206                            tst.direct_jmp_count,
1207                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
1208                            tst.direct_jmp2_count,
1209                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
1210 
1211     qht_statistics_init(&tb_ctx.htable, &hst);
1212     print_qht_statistics(hst, buf);
1213     qht_statistics_destroy(&hst);
1214 
1215     g_string_append_printf(buf, "\nStatistics:\n");
1216     g_string_append_printf(buf, "TB flush count      %u\n",
1217                            qatomic_read(&tb_ctx.tb_flush_count));
1218     g_string_append_printf(buf, "TB invalidate count %u\n",
1219                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
1220 
1221     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
1222     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
1223     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
1224     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
1225     tcg_dump_info(buf);
1226 }
1227 
1228 #else /* CONFIG_USER_ONLY */
1229 
1230 void cpu_interrupt(CPUState *cpu, int mask)
1231 {
1232     g_assert(qemu_mutex_iothread_locked());
1233     cpu->interrupt_request |= mask;
1234     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
1235 }
1236 
1237 /*
1238  * Walks guest process memory "regions" one by one
1239  * and calls callback function 'fn' for each region.
1240  */
1241 struct walk_memory_regions_data {
1242     walk_memory_regions_fn fn;
1243     void *priv;
1244     target_ulong start;
1245     int prot;
1246 };
1247 
1248 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1249                                    target_ulong end, int new_prot)
1250 {
1251     if (data->start != -1u) {
1252         int rc = data->fn(data->priv, data->start, end, data->prot);
1253         if (rc != 0) {
1254             return rc;
1255         }
1256     }
1257 
1258     data->start = (new_prot ? end : -1u);
1259     data->prot = new_prot;
1260 
1261     return 0;
1262 }
1263 
1264 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1265                                  target_ulong base, int level, void **lp)
1266 {
1267     target_ulong pa;
1268     int i, rc;
1269 
1270     if (*lp == NULL) {
1271         return walk_memory_regions_end(data, base, 0);
1272     }
1273 
1274     if (level == 0) {
1275         PageDesc *pd = *lp;
1276 
1277         for (i = 0; i < V_L2_SIZE; ++i) {
1278             int prot = pd[i].flags;
1279 
1280             pa = base | (i << TARGET_PAGE_BITS);
1281             if (prot != data->prot) {
1282                 rc = walk_memory_regions_end(data, pa, prot);
1283                 if (rc != 0) {
1284                     return rc;
1285                 }
1286             }
1287         }
1288     } else {
1289         void **pp = *lp;
1290 
1291         for (i = 0; i < V_L2_SIZE; ++i) {
1292             pa = base | ((target_ulong)i <<
1293                 (TARGET_PAGE_BITS + V_L2_BITS * level));
1294             rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1295             if (rc != 0) {
1296                 return rc;
1297             }
1298         }
1299     }
1300 
1301     return 0;
1302 }
1303 
1304 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1305 {
1306     struct walk_memory_regions_data data;
1307     uintptr_t i, l1_sz = v_l1_size;
1308 
1309     data.fn = fn;
1310     data.priv = priv;
1311     data.start = -1u;
1312     data.prot = 0;
1313 
1314     for (i = 0; i < l1_sz; i++) {
1315         target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
1316         int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
1317         if (rc != 0) {
1318             return rc;
1319         }
1320     }
1321 
1322     return walk_memory_regions_end(&data, 0, 0);
1323 }
1324 
1325 static int dump_region(void *priv, target_ulong start,
1326     target_ulong end, unsigned long prot)
1327 {
1328     FILE *f = (FILE *)priv;
1329 
1330     (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
1331         " "TARGET_FMT_lx" %c%c%c\n",
1332         start, end, end - start,
1333         ((prot & PAGE_READ) ? 'r' : '-'),
1334         ((prot & PAGE_WRITE) ? 'w' : '-'),
1335         ((prot & PAGE_EXEC) ? 'x' : '-'));
1336 
1337     return 0;
1338 }
1339 
1340 /* dump memory mappings */
1341 void page_dump(FILE *f)
1342 {
1343     const int length = sizeof(target_ulong) * 2;
1344     (void) fprintf(f, "%-*s %-*s %-*s %s\n",
1345             length, "start", length, "end", length, "size", "prot");
1346     walk_memory_regions(f, dump_region);
1347 }
1348 
1349 int page_get_flags(target_ulong address)
1350 {
1351     PageDesc *p;
1352 
1353     p = page_find(address >> TARGET_PAGE_BITS);
1354     if (!p) {
1355         return 0;
1356     }
1357     return p->flags;
1358 }
1359 
1360 /*
1361  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
1362  * By default, they are not kept.
1363  */
1364 #ifndef PAGE_TARGET_STICKY
1365 #define PAGE_TARGET_STICKY  0
1366 #endif
1367 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
1368 
1369 /* Modify the flags of a page and invalidate the code if necessary.
1370    The flag PAGE_WRITE_ORG is positioned automatically depending
1371    on PAGE_WRITE.  The mmap_lock should already be held.  */
1372 void page_set_flags(target_ulong start, target_ulong end, int flags)
1373 {
1374     target_ulong addr, len;
1375     bool reset, inval_tb = false;
1376 
1377     /* This function should never be called with addresses outside the
1378        guest address space.  If this assert fires, it probably indicates
1379        a missing call to h2g_valid.  */
1380     assert(end - 1 <= GUEST_ADDR_MAX);
1381     assert(start < end);
1382     /* Only set PAGE_ANON with new mappings. */
1383     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
1384     assert_memory_lock();
1385 
1386     start = start & TARGET_PAGE_MASK;
1387     end = TARGET_PAGE_ALIGN(end);
1388 
1389     if (flags & PAGE_WRITE) {
1390         flags |= PAGE_WRITE_ORG;
1391     }
1392     reset = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
1393     if (reset) {
1394         page_reset_target_data(start, end);
1395     }
1396     flags &= ~PAGE_RESET;
1397 
1398     for (addr = start, len = end - start;
1399          len != 0;
1400          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
1401         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true);
1402 
1403         /*
1404          * If the page was executable, but is reset, or is no longer
1405          * executable, or has become writable, then invalidate any code.
1406          */
1407         if ((p->flags & PAGE_EXEC)
1408             && (reset ||
1409                 !(flags & PAGE_EXEC) ||
1410                 (flags & ~p->flags & PAGE_WRITE))) {
1411             inval_tb = true;
1412         }
1413         /* Using mprotect on a page does not change sticky bits. */
1414         p->flags = (reset ? 0 : p->flags & PAGE_STICKY) | flags;
1415     }
1416 
1417     if (inval_tb) {
1418         tb_invalidate_phys_range(start, end);
1419     }
1420 }
1421 
1422 int page_check_range(target_ulong start, target_ulong len, int flags)
1423 {
1424     PageDesc *p;
1425     target_ulong end;
1426     target_ulong addr;
1427 
1428     /* This function should never be called with addresses outside the
1429        guest address space.  If this assert fires, it probably indicates
1430        a missing call to h2g_valid.  */
1431     if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
1432         assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
1433     }
1434 
1435     if (len == 0) {
1436         return 0;
1437     }
1438     if (start + len - 1 < start) {
1439         /* We've wrapped around.  */
1440         return -1;
1441     }
1442 
1443     /* must do before we loose bits in the next step */
1444     end = TARGET_PAGE_ALIGN(start + len);
1445     start = start & TARGET_PAGE_MASK;
1446 
1447     for (addr = start, len = end - start;
1448          len != 0;
1449          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
1450         p = page_find(addr >> TARGET_PAGE_BITS);
1451         if (!p) {
1452             return -1;
1453         }
1454         if (!(p->flags & PAGE_VALID)) {
1455             return -1;
1456         }
1457 
1458         if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
1459             return -1;
1460         }
1461         if (flags & PAGE_WRITE) {
1462             if (!(p->flags & PAGE_WRITE_ORG)) {
1463                 return -1;
1464             }
1465             /* unprotect the page if it was put read-only because it
1466                contains translated code */
1467             if (!(p->flags & PAGE_WRITE)) {
1468                 if (!page_unprotect(addr, 0)) {
1469                     return -1;
1470                 }
1471             }
1472         }
1473     }
1474     return 0;
1475 }
1476 
1477 void page_protect(tb_page_addr_t page_addr)
1478 {
1479     target_ulong addr;
1480     PageDesc *p;
1481     int prot;
1482 
1483     p = page_find(page_addr >> TARGET_PAGE_BITS);
1484     if (p && (p->flags & PAGE_WRITE)) {
1485         /*
1486          * Force the host page as non writable (writes will have a page fault +
1487          * mprotect overhead).
1488          */
1489         page_addr &= qemu_host_page_mask;
1490         prot = 0;
1491         for (addr = page_addr; addr < page_addr + qemu_host_page_size;
1492              addr += TARGET_PAGE_SIZE) {
1493 
1494             p = page_find(addr >> TARGET_PAGE_BITS);
1495             if (!p) {
1496                 continue;
1497             }
1498             prot |= p->flags;
1499             p->flags &= ~PAGE_WRITE;
1500         }
1501         mprotect(g2h_untagged(page_addr), qemu_host_page_size,
1502                  (prot & PAGE_BITS) & ~PAGE_WRITE);
1503     }
1504 }
1505 
1506 /* called from signal handler: invalidate the code and unprotect the
1507  * page. Return 0 if the fault was not handled, 1 if it was handled,
1508  * and 2 if it was handled but the caller must cause the TB to be
1509  * immediately exited. (We can only return 2 if the 'pc' argument is
1510  * non-zero.)
1511  */
1512 int page_unprotect(target_ulong address, uintptr_t pc)
1513 {
1514     unsigned int prot;
1515     bool current_tb_invalidated;
1516     PageDesc *p;
1517     target_ulong host_start, host_end, addr;
1518 
1519     /* Technically this isn't safe inside a signal handler.  However we
1520        know this only ever happens in a synchronous SEGV handler, so in
1521        practice it seems to be ok.  */
1522     mmap_lock();
1523 
1524     p = page_find(address >> TARGET_PAGE_BITS);
1525     if (!p) {
1526         mmap_unlock();
1527         return 0;
1528     }
1529 
1530     /* if the page was really writable, then we change its
1531        protection back to writable */
1532     if (p->flags & PAGE_WRITE_ORG) {
1533         current_tb_invalidated = false;
1534         if (p->flags & PAGE_WRITE) {
1535             /* If the page is actually marked WRITE then assume this is because
1536              * this thread raced with another one which got here first and
1537              * set the page to PAGE_WRITE and did the TB invalidate for us.
1538              */
1539 #ifdef TARGET_HAS_PRECISE_SMC
1540             TranslationBlock *current_tb = tcg_tb_lookup(pc);
1541             if (current_tb) {
1542                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
1543             }
1544 #endif
1545         } else {
1546             host_start = address & qemu_host_page_mask;
1547             host_end = host_start + qemu_host_page_size;
1548 
1549             prot = 0;
1550             for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
1551                 p = page_find(addr >> TARGET_PAGE_BITS);
1552                 p->flags |= PAGE_WRITE;
1553                 prot |= p->flags;
1554 
1555                 /* and since the content will be modified, we must invalidate
1556                    the corresponding translated code. */
1557                 current_tb_invalidated |=
1558                     tb_invalidate_phys_page_unwind(addr, pc);
1559             }
1560             mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
1561                      prot & PAGE_BITS);
1562         }
1563         mmap_unlock();
1564         /* If current TB was invalidated return to main loop */
1565         return current_tb_invalidated ? 2 : 1;
1566     }
1567     mmap_unlock();
1568     return 0;
1569 }
1570 #endif /* CONFIG_USER_ONLY */
1571 
1572 /*
1573  * Called by generic code at e.g. cpu reset after cpu creation,
1574  * therefore we must be prepared to allocate the jump cache.
1575  */
1576 void tcg_flush_jmp_cache(CPUState *cpu)
1577 {
1578     CPUJumpCache *jc = cpu->tb_jmp_cache;
1579 
1580     if (likely(jc)) {
1581         for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
1582             qatomic_set(&jc->array[i].tb, NULL);
1583         }
1584     } else {
1585         /* This should happen once during realize, and thus never race. */
1586         jc = g_new0(CPUJumpCache, 1);
1587         jc = qatomic_xchg(&cpu->tb_jmp_cache, jc);
1588         assert(jc == NULL);
1589     }
1590 }
1591 
1592 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
1593 void tcg_flush_softmmu_tlb(CPUState *cs)
1594 {
1595 #ifdef CONFIG_SOFTMMU
1596     tlb_flush(cs);
1597 #endif
1598 }
1599