xref: /openbmc/qemu/accel/tcg/translate-all.c (revision be5df2edb5d69ff3107c5616aa035a9ba8d0422e)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 
22 #define NO_CPU_IO_DEFS
23 #include "trace.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg.h"
27 #if defined(CONFIG_USER_ONLY)
28 #include "qemu.h"
29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
30 #include <sys/param.h>
31 #if __FreeBSD_version >= 700104
32 #define HAVE_KINFO_GETVMMAP
33 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
34 #include <sys/proc.h>
35 #include <machine/profile.h>
36 #define _KERNEL
37 #include <sys/user.h>
38 #undef _KERNEL
39 #undef sigqueue
40 #include <libutil.h>
41 #endif
42 #endif
43 #else
44 #include "exec/ram_addr.h"
45 #endif
46 
47 #include "exec/cputlb.h"
48 #include "exec/translate-all.h"
49 #include "exec/translator.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/qemu-print.h"
52 #include "qemu/timer.h"
53 #include "qemu/main-loop.h"
54 #include "qemu/cacheinfo.h"
55 #include "exec/log.h"
56 #include "sysemu/cpus.h"
57 #include "sysemu/cpu-timers.h"
58 #include "sysemu/tcg.h"
59 #include "qapi/error.h"
60 #include "hw/core/tcg-cpu-ops.h"
61 #include "tb-jmp-cache.h"
62 #include "tb-hash.h"
63 #include "tb-context.h"
64 #include "internal.h"
65 
66 /* make various TB consistency checks */
67 
68 /**
69  * struct page_entry - page descriptor entry
70  * @pd:     pointer to the &struct PageDesc of the page this entry represents
71  * @index:  page index of the page
72  * @locked: whether the page is locked
73  *
74  * This struct helps us keep track of the locked state of a page, without
75  * bloating &struct PageDesc.
76  *
77  * A page lock protects accesses to all fields of &struct PageDesc.
78  *
79  * See also: &struct page_collection.
80  */
81 struct page_entry {
82     PageDesc *pd;
83     tb_page_addr_t index;
84     bool locked;
85 };
86 
87 /**
88  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
89  * @tree:   Binary search tree (BST) of the pages, with key == page index
90  * @max:    Pointer to the page in @tree with the highest page index
91  *
92  * To avoid deadlock we lock pages in ascending order of page index.
93  * When operating on a set of pages, we need to keep track of them so that
94  * we can lock them in order and also unlock them later. For this we collect
95  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
96  * @tree implementation we use does not provide an O(1) operation to obtain the
97  * highest-ranked element, we use @max to keep track of the inserted page
98  * with the highest index. This is valuable because if a page is not in
99  * the tree and its index is higher than @max's, then we can lock it
100  * without breaking the locking order rule.
101  *
102  * Note on naming: 'struct page_set' would be shorter, but we already have a few
103  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
104  *
105  * See also: page_collection_lock().
106  */
107 struct page_collection {
108     GTree *tree;
109     struct page_entry *max;
110 };
111 
112 /*
113  * In system mode we want L1_MAP to be based on ram offsets,
114  * while in user mode we want it to be based on virtual addresses.
115  *
116  * TODO: For user mode, see the caveat re host vs guest virtual
117  * address spaces near GUEST_ADDR_MAX.
118  */
119 #if !defined(CONFIG_USER_ONLY)
120 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
121 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
122 #else
123 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
124 #endif
125 #else
126 # define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
127 #endif
128 
129 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
130 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
131                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
132                   * BITS_PER_BYTE);
133 
134 /*
135  * L1 Mapping properties
136  */
137 int v_l1_size;
138 int v_l1_shift;
139 int v_l2_levels;
140 
141 void *l1_map[V_L1_MAX_SIZE];
142 
143 TBContext tb_ctx;
144 
145 static void page_table_config_init(void)
146 {
147     uint32_t v_l1_bits;
148 
149     assert(TARGET_PAGE_BITS);
150     /* The bits remaining after N lower levels of page tables.  */
151     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
152     if (v_l1_bits < V_L1_MIN_BITS) {
153         v_l1_bits += V_L2_BITS;
154     }
155 
156     v_l1_size = 1 << v_l1_bits;
157     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
158     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
159 
160     assert(v_l1_bits <= V_L1_MAX_BITS);
161     assert(v_l1_shift % V_L2_BITS == 0);
162     assert(v_l2_levels >= 0);
163 }
164 
165 /* Encode VAL as a signed leb128 sequence at P.
166    Return P incremented past the encoded value.  */
167 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
168 {
169     int more, byte;
170 
171     do {
172         byte = val & 0x7f;
173         val >>= 7;
174         more = !((val == 0 && (byte & 0x40) == 0)
175                  || (val == -1 && (byte & 0x40) != 0));
176         if (more) {
177             byte |= 0x80;
178         }
179         *p++ = byte;
180     } while (more);
181 
182     return p;
183 }
184 
185 /* Decode a signed leb128 sequence at *PP; increment *PP past the
186    decoded value.  Return the decoded value.  */
187 static target_long decode_sleb128(const uint8_t **pp)
188 {
189     const uint8_t *p = *pp;
190     target_long val = 0;
191     int byte, shift = 0;
192 
193     do {
194         byte = *p++;
195         val |= (target_ulong)(byte & 0x7f) << shift;
196         shift += 7;
197     } while (byte & 0x80);
198     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
199         val |= -(target_ulong)1 << shift;
200     }
201 
202     *pp = p;
203     return val;
204 }
205 
206 /* Encode the data collected about the instructions while compiling TB.
207    Place the data at BLOCK, and return the number of bytes consumed.
208 
209    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
210    which come from the target's insn_start data, followed by a uintptr_t
211    which comes from the host pc of the end of the code implementing the insn.
212 
213    Each line of the table is encoded as sleb128 deltas from the previous
214    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
215    That is, the first column is seeded with the guest pc, the last column
216    with the host pc, and the middle columns with zeros.  */
217 
218 static int encode_search(TranslationBlock *tb, uint8_t *block)
219 {
220     uint8_t *highwater = tcg_ctx->code_gen_highwater;
221     uint8_t *p = block;
222     int i, j, n;
223 
224     for (i = 0, n = tb->icount; i < n; ++i) {
225         target_ulong prev;
226 
227         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
228             if (i == 0) {
229                 prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
230             } else {
231                 prev = tcg_ctx->gen_insn_data[i - 1][j];
232             }
233             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
234         }
235         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
236         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
237 
238         /* Test for (pending) buffer overflow.  The assumption is that any
239            one row beginning below the high water mark cannot overrun
240            the buffer completely.  Thus we can test for overflow after
241            encoding a row without having to check during encoding.  */
242         if (unlikely(p > highwater)) {
243             return -1;
244         }
245     }
246 
247     return p - block;
248 }
249 
250 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
251                                    uint64_t *data)
252 {
253     uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
254     const uint8_t *p = tb->tc.ptr + tb->tc.size;
255     int i, j, num_insns = tb->icount;
256 
257     host_pc -= GETPC_ADJ;
258 
259     if (host_pc < iter_pc) {
260         return -1;
261     }
262 
263     memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
264     if (!TARGET_TB_PCREL) {
265         data[0] = tb_pc(tb);
266     }
267 
268     /*
269      * Reconstruct the stored insn data while looking for the point
270      * at which the end of the insn exceeds host_pc.
271      */
272     for (i = 0; i < num_insns; ++i) {
273         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
274             data[j] += decode_sleb128(&p);
275         }
276         iter_pc += decode_sleb128(&p);
277         if (iter_pc > host_pc) {
278             return num_insns - i;
279         }
280     }
281     return -1;
282 }
283 
284 /*
285  * The cpu state corresponding to 'host_pc' is restored in
286  * preparation for exiting the TB.
287  */
288 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
289                                uintptr_t host_pc)
290 {
291     uint64_t data[TARGET_INSN_START_WORDS];
292 #ifdef CONFIG_PROFILER
293     TCGProfile *prof = &tcg_ctx->prof;
294     int64_t ti = profile_getclock();
295 #endif
296     int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
297 
298     if (insns_left < 0) {
299         return;
300     }
301 
302     if (tb_cflags(tb) & CF_USE_ICOUNT) {
303         assert(icount_enabled());
304         /*
305          * Reset the cycle counter to the start of the block and
306          * shift if to the number of actually executed instructions.
307          */
308         cpu_neg(cpu)->icount_decr.u16.low += insns_left;
309     }
310 
311     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
312 
313 #ifdef CONFIG_PROFILER
314     qatomic_set(&prof->restore_time,
315                 prof->restore_time + profile_getclock() - ti);
316     qatomic_set(&prof->restore_count, prof->restore_count + 1);
317 #endif
318 }
319 
320 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
321 {
322     /*
323      * The host_pc has to be in the rx region of the code buffer.
324      * If it is not we will not be able to resolve it here.
325      * The two cases where host_pc will not be correct are:
326      *
327      *  - fault during translation (instruction fetch)
328      *  - fault from helper (not using GETPC() macro)
329      *
330      * Either way we need return early as we can't resolve it here.
331      */
332     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
333         TranslationBlock *tb = tcg_tb_lookup(host_pc);
334         if (tb) {
335             cpu_restore_state_from_tb(cpu, tb, host_pc);
336             return true;
337         }
338     }
339     return false;
340 }
341 
342 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
343 {
344     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
345         TranslationBlock *tb = tcg_tb_lookup(host_pc);
346         if (tb) {
347             return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
348         }
349     }
350     return false;
351 }
352 
353 void page_init(void)
354 {
355     page_size_init();
356     page_table_config_init();
357 
358 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
359     {
360 #ifdef HAVE_KINFO_GETVMMAP
361         struct kinfo_vmentry *freep;
362         int i, cnt;
363 
364         freep = kinfo_getvmmap(getpid(), &cnt);
365         if (freep) {
366             mmap_lock();
367             for (i = 0; i < cnt; i++) {
368                 unsigned long startaddr, endaddr;
369 
370                 startaddr = freep[i].kve_start;
371                 endaddr = freep[i].kve_end;
372                 if (h2g_valid(startaddr)) {
373                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
374 
375                     if (h2g_valid(endaddr)) {
376                         endaddr = h2g(endaddr);
377                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
378                     } else {
379 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
380                         endaddr = ~0ul;
381                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
382 #endif
383                     }
384                 }
385             }
386             free(freep);
387             mmap_unlock();
388         }
389 #else
390         FILE *f;
391 
392         last_brk = (unsigned long)sbrk(0);
393 
394         f = fopen("/compat/linux/proc/self/maps", "r");
395         if (f) {
396             mmap_lock();
397 
398             do {
399                 unsigned long startaddr, endaddr;
400                 int n;
401 
402                 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
403 
404                 if (n == 2 && h2g_valid(startaddr)) {
405                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
406 
407                     if (h2g_valid(endaddr)) {
408                         endaddr = h2g(endaddr);
409                     } else {
410                         endaddr = ~0ul;
411                     }
412                     page_set_flags(startaddr, endaddr, PAGE_RESERVED);
413                 }
414             } while (!feof(f));
415 
416             fclose(f);
417             mmap_unlock();
418         }
419 #endif
420     }
421 #endif
422 }
423 
424 PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
425 {
426     PageDesc *pd;
427     void **lp;
428     int i;
429 
430     /* Level 1.  Always allocated.  */
431     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
432 
433     /* Level 2..N-1.  */
434     for (i = v_l2_levels; i > 0; i--) {
435         void **p = qatomic_rcu_read(lp);
436 
437         if (p == NULL) {
438             void *existing;
439 
440             if (!alloc) {
441                 return NULL;
442             }
443             p = g_new0(void *, V_L2_SIZE);
444             existing = qatomic_cmpxchg(lp, NULL, p);
445             if (unlikely(existing)) {
446                 g_free(p);
447                 p = existing;
448             }
449         }
450 
451         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
452     }
453 
454     pd = qatomic_rcu_read(lp);
455     if (pd == NULL) {
456         void *existing;
457 
458         if (!alloc) {
459             return NULL;
460         }
461         pd = g_new0(PageDesc, V_L2_SIZE);
462 #ifndef CONFIG_USER_ONLY
463         {
464             int i;
465 
466             for (i = 0; i < V_L2_SIZE; i++) {
467                 qemu_spin_init(&pd[i].lock);
468             }
469         }
470 #endif
471         existing = qatomic_cmpxchg(lp, NULL, pd);
472         if (unlikely(existing)) {
473 #ifndef CONFIG_USER_ONLY
474             {
475                 int i;
476 
477                 for (i = 0; i < V_L2_SIZE; i++) {
478                     qemu_spin_destroy(&pd[i].lock);
479                 }
480             }
481 #endif
482             g_free(pd);
483             pd = existing;
484         }
485     }
486 
487     return pd + (index & (V_L2_SIZE - 1));
488 }
489 
490 /* In user-mode page locks aren't used; mmap_lock is enough */
491 #ifdef CONFIG_USER_ONLY
492 struct page_collection *
493 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
494 {
495     return NULL;
496 }
497 
498 void page_collection_unlock(struct page_collection *set)
499 { }
500 #else /* !CONFIG_USER_ONLY */
501 
502 #ifdef CONFIG_DEBUG_TCG
503 
504 static __thread GHashTable *ht_pages_locked_debug;
505 
506 static void ht_pages_locked_debug_init(void)
507 {
508     if (ht_pages_locked_debug) {
509         return;
510     }
511     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
512 }
513 
514 static bool page_is_locked(const PageDesc *pd)
515 {
516     PageDesc *found;
517 
518     ht_pages_locked_debug_init();
519     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
520     return !!found;
521 }
522 
523 static void page_lock__debug(PageDesc *pd)
524 {
525     ht_pages_locked_debug_init();
526     g_assert(!page_is_locked(pd));
527     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
528 }
529 
530 static void page_unlock__debug(const PageDesc *pd)
531 {
532     bool removed;
533 
534     ht_pages_locked_debug_init();
535     g_assert(page_is_locked(pd));
536     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
537     g_assert(removed);
538 }
539 
540 void do_assert_page_locked(const PageDesc *pd, const char *file, int line)
541 {
542     if (unlikely(!page_is_locked(pd))) {
543         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
544                      pd, file, line);
545         abort();
546     }
547 }
548 
549 void assert_no_pages_locked(void)
550 {
551     ht_pages_locked_debug_init();
552     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
553 }
554 
555 #else /* !CONFIG_DEBUG_TCG */
556 
557 static inline void page_lock__debug(const PageDesc *pd) { }
558 static inline void page_unlock__debug(const PageDesc *pd) { }
559 
560 #endif /* CONFIG_DEBUG_TCG */
561 
562 void page_lock(PageDesc *pd)
563 {
564     page_lock__debug(pd);
565     qemu_spin_lock(&pd->lock);
566 }
567 
568 void page_unlock(PageDesc *pd)
569 {
570     qemu_spin_unlock(&pd->lock);
571     page_unlock__debug(pd);
572 }
573 
574 static inline struct page_entry *
575 page_entry_new(PageDesc *pd, tb_page_addr_t index)
576 {
577     struct page_entry *pe = g_malloc(sizeof(*pe));
578 
579     pe->index = index;
580     pe->pd = pd;
581     pe->locked = false;
582     return pe;
583 }
584 
585 static void page_entry_destroy(gpointer p)
586 {
587     struct page_entry *pe = p;
588 
589     g_assert(pe->locked);
590     page_unlock(pe->pd);
591     g_free(pe);
592 }
593 
594 /* returns false on success */
595 static bool page_entry_trylock(struct page_entry *pe)
596 {
597     bool busy;
598 
599     busy = qemu_spin_trylock(&pe->pd->lock);
600     if (!busy) {
601         g_assert(!pe->locked);
602         pe->locked = true;
603         page_lock__debug(pe->pd);
604     }
605     return busy;
606 }
607 
608 static void do_page_entry_lock(struct page_entry *pe)
609 {
610     page_lock(pe->pd);
611     g_assert(!pe->locked);
612     pe->locked = true;
613 }
614 
615 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
616 {
617     struct page_entry *pe = value;
618 
619     do_page_entry_lock(pe);
620     return FALSE;
621 }
622 
623 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
624 {
625     struct page_entry *pe = value;
626 
627     if (pe->locked) {
628         pe->locked = false;
629         page_unlock(pe->pd);
630     }
631     return FALSE;
632 }
633 
634 /*
635  * Trylock a page, and if successful, add the page to a collection.
636  * Returns true ("busy") if the page could not be locked; false otherwise.
637  */
638 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
639 {
640     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
641     struct page_entry *pe;
642     PageDesc *pd;
643 
644     pe = g_tree_lookup(set->tree, &index);
645     if (pe) {
646         return false;
647     }
648 
649     pd = page_find(index);
650     if (pd == NULL) {
651         return false;
652     }
653 
654     pe = page_entry_new(pd, index);
655     g_tree_insert(set->tree, &pe->index, pe);
656 
657     /*
658      * If this is either (1) the first insertion or (2) a page whose index
659      * is higher than any other so far, just lock the page and move on.
660      */
661     if (set->max == NULL || pe->index > set->max->index) {
662         set->max = pe;
663         do_page_entry_lock(pe);
664         return false;
665     }
666     /*
667      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
668      * locks in order.
669      */
670     return page_entry_trylock(pe);
671 }
672 
673 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
674 {
675     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
676     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
677 
678     if (a == b) {
679         return 0;
680     } else if (a < b) {
681         return -1;
682     }
683     return 1;
684 }
685 
686 /*
687  * Lock a range of pages ([@start,@end[) as well as the pages of all
688  * intersecting TBs.
689  * Locking order: acquire locks in ascending order of page index.
690  */
691 struct page_collection *
692 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
693 {
694     struct page_collection *set = g_malloc(sizeof(*set));
695     tb_page_addr_t index;
696     PageDesc *pd;
697 
698     start >>= TARGET_PAGE_BITS;
699     end   >>= TARGET_PAGE_BITS;
700     g_assert(start <= end);
701 
702     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
703                                 page_entry_destroy);
704     set->max = NULL;
705     assert_no_pages_locked();
706 
707  retry:
708     g_tree_foreach(set->tree, page_entry_lock, NULL);
709 
710     for (index = start; index <= end; index++) {
711         TranslationBlock *tb;
712         int n;
713 
714         pd = page_find(index);
715         if (pd == NULL) {
716             continue;
717         }
718         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
719             g_tree_foreach(set->tree, page_entry_unlock, NULL);
720             goto retry;
721         }
722         assert_page_locked(pd);
723         PAGE_FOR_EACH_TB(pd, tb, n) {
724             if (page_trylock_add(set, tb_page_addr0(tb)) ||
725                 (tb_page_addr1(tb) != -1 &&
726                  page_trylock_add(set, tb_page_addr1(tb)))) {
727                 /* drop all locks, and reacquire in order */
728                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
729                 goto retry;
730             }
731         }
732     }
733     return set;
734 }
735 
736 void page_collection_unlock(struct page_collection *set)
737 {
738     /* entries are unlocked and freed via page_entry_destroy */
739     g_tree_destroy(set->tree);
740     g_free(set);
741 }
742 
743 #endif /* !CONFIG_USER_ONLY */
744 
745 /*
746  * Isolate the portion of code gen which can setjmp/longjmp.
747  * Return the size of the generated code, or negative on error.
748  */
749 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
750                            target_ulong pc, void *host_pc,
751                            int *max_insns, int64_t *ti)
752 {
753     int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
754     if (unlikely(ret != 0)) {
755         return ret;
756     }
757 
758     tcg_func_start(tcg_ctx);
759 
760     tcg_ctx->cpu = env_cpu(env);
761     gen_intermediate_code(env_cpu(env), tb, *max_insns, pc, host_pc);
762     assert(tb->size != 0);
763     tcg_ctx->cpu = NULL;
764     *max_insns = tb->icount;
765 
766 #ifdef CONFIG_PROFILER
767     qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
768     qatomic_set(&tcg_ctx->prof.interm_time,
769                 tcg_ctx->prof.interm_time + profile_getclock() - *ti);
770     *ti = profile_getclock();
771 #endif
772 
773     return tcg_gen_code(tcg_ctx, tb, pc);
774 }
775 
776 /* Called with mmap_lock held for user mode emulation.  */
777 TranslationBlock *tb_gen_code(CPUState *cpu,
778                               target_ulong pc, target_ulong cs_base,
779                               uint32_t flags, int cflags)
780 {
781     CPUArchState *env = cpu->env_ptr;
782     TranslationBlock *tb, *existing_tb;
783     tb_page_addr_t phys_pc;
784     tcg_insn_unit *gen_code_buf;
785     int gen_code_size, search_size, max_insns;
786 #ifdef CONFIG_PROFILER
787     TCGProfile *prof = &tcg_ctx->prof;
788 #endif
789     int64_t ti;
790     void *host_pc;
791 
792     assert_memory_lock();
793     qemu_thread_jit_write();
794 
795     phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
796 
797     if (phys_pc == -1) {
798         /* Generate a one-shot TB with 1 insn in it */
799         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
800     }
801 
802     max_insns = cflags & CF_COUNT_MASK;
803     if (max_insns == 0) {
804         max_insns = TCG_MAX_INSNS;
805     }
806     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
807 
808  buffer_overflow:
809     tb = tcg_tb_alloc(tcg_ctx);
810     if (unlikely(!tb)) {
811         /* flush must be done */
812         tb_flush(cpu);
813         mmap_unlock();
814         /* Make the execution loop process the flush as soon as possible.  */
815         cpu->exception_index = EXCP_INTERRUPT;
816         cpu_loop_exit(cpu);
817     }
818 
819     gen_code_buf = tcg_ctx->code_gen_ptr;
820     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
821 #if !TARGET_TB_PCREL
822     tb->pc = pc;
823 #endif
824     tb->cs_base = cs_base;
825     tb->flags = flags;
826     tb->cflags = cflags;
827     tb->trace_vcpu_dstate = *cpu->trace_dstate;
828     tb_set_page_addr0(tb, phys_pc);
829     tb_set_page_addr1(tb, -1);
830     tcg_ctx->tb_cflags = cflags;
831  tb_overflow:
832 
833 #ifdef CONFIG_PROFILER
834     /* includes aborted translations because of exceptions */
835     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
836     ti = profile_getclock();
837 #endif
838 
839     trace_translate_block(tb, pc, tb->tc.ptr);
840 
841     gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
842     if (unlikely(gen_code_size < 0)) {
843         switch (gen_code_size) {
844         case -1:
845             /*
846              * Overflow of code_gen_buffer, or the current slice of it.
847              *
848              * TODO: We don't need to re-do gen_intermediate_code, nor
849              * should we re-do the tcg optimization currently hidden
850              * inside tcg_gen_code.  All that should be required is to
851              * flush the TBs, allocate a new TB, re-initialize it per
852              * above, and re-do the actual code generation.
853              */
854             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
855                           "Restarting code generation for "
856                           "code_gen_buffer overflow\n");
857             goto buffer_overflow;
858 
859         case -2:
860             /*
861              * The code generated for the TranslationBlock is too large.
862              * The maximum size allowed by the unwind info is 64k.
863              * There may be stricter constraints from relocations
864              * in the tcg backend.
865              *
866              * Try again with half as many insns as we attempted this time.
867              * If a single insn overflows, there's a bug somewhere...
868              */
869             assert(max_insns > 1);
870             max_insns /= 2;
871             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
872                           "Restarting code generation with "
873                           "smaller translation block (max %d insns)\n",
874                           max_insns);
875             goto tb_overflow;
876 
877         default:
878             g_assert_not_reached();
879         }
880     }
881     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
882     if (unlikely(search_size < 0)) {
883         goto buffer_overflow;
884     }
885     tb->tc.size = gen_code_size;
886 
887 #ifdef CONFIG_PROFILER
888     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
889     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
890     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
891     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
892 #endif
893 
894 #ifdef DEBUG_DISAS
895     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
896         qemu_log_in_addr_range(pc)) {
897         FILE *logfile = qemu_log_trylock();
898         if (logfile) {
899             int code_size, data_size;
900             const tcg_target_ulong *rx_data_gen_ptr;
901             size_t chunk_start;
902             int insn = 0;
903 
904             if (tcg_ctx->data_gen_ptr) {
905                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
906                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
907                 data_size = gen_code_size - code_size;
908             } else {
909                 rx_data_gen_ptr = 0;
910                 code_size = gen_code_size;
911                 data_size = 0;
912             }
913 
914             /* Dump header and the first instruction */
915             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
916             fprintf(logfile,
917                     "  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
918                     tcg_ctx->gen_insn_data[insn][0]);
919             chunk_start = tcg_ctx->gen_insn_end_off[insn];
920             disas(logfile, tb->tc.ptr, chunk_start);
921 
922             /*
923              * Dump each instruction chunk, wrapping up empty chunks into
924              * the next instruction. The whole array is offset so the
925              * first entry is the beginning of the 2nd instruction.
926              */
927             while (insn < tb->icount) {
928                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
929                 if (chunk_end > chunk_start) {
930                     fprintf(logfile, "  -- guest addr 0x" TARGET_FMT_lx "\n",
931                             tcg_ctx->gen_insn_data[insn][0]);
932                     disas(logfile, tb->tc.ptr + chunk_start,
933                           chunk_end - chunk_start);
934                     chunk_start = chunk_end;
935                 }
936                 insn++;
937             }
938 
939             if (chunk_start < code_size) {
940                 fprintf(logfile, "  -- tb slow paths + alignment\n");
941                 disas(logfile, tb->tc.ptr + chunk_start,
942                       code_size - chunk_start);
943             }
944 
945             /* Finally dump any data we may have after the block */
946             if (data_size) {
947                 int i;
948                 fprintf(logfile, "  data: [size=%d]\n", data_size);
949                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
950                     if (sizeof(tcg_target_ulong) == 8) {
951                         fprintf(logfile,
952                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
953                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
954                     } else if (sizeof(tcg_target_ulong) == 4) {
955                         fprintf(logfile,
956                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
957                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
958                     } else {
959                         qemu_build_not_reached();
960                     }
961                 }
962             }
963             fprintf(logfile, "\n");
964             qemu_log_unlock(logfile);
965         }
966     }
967 #endif
968 
969     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
970         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
971                  CODE_GEN_ALIGN));
972 
973     /* init jump list */
974     qemu_spin_init(&tb->jmp_lock);
975     tb->jmp_list_head = (uintptr_t)NULL;
976     tb->jmp_list_next[0] = (uintptr_t)NULL;
977     tb->jmp_list_next[1] = (uintptr_t)NULL;
978     tb->jmp_dest[0] = (uintptr_t)NULL;
979     tb->jmp_dest[1] = (uintptr_t)NULL;
980 
981     /* init original jump addresses which have been set during tcg_gen_code() */
982     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
983         tb_reset_jump(tb, 0);
984     }
985     if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
986         tb_reset_jump(tb, 1);
987     }
988 
989     /*
990      * If the TB is not associated with a physical RAM page then it must be
991      * a temporary one-insn TB, and we have nothing left to do. Return early
992      * before attempting to link to other TBs or add to the lookup table.
993      */
994     if (tb_page_addr0(tb) == -1) {
995         return tb;
996     }
997 
998     /*
999      * Insert TB into the corresponding region tree before publishing it
1000      * through QHT. Otherwise rewinding happened in the TB might fail to
1001      * lookup itself using host PC.
1002      */
1003     tcg_tb_insert(tb);
1004 
1005     /*
1006      * No explicit memory barrier is required -- tb_link_page() makes the
1007      * TB visible in a consistent state.
1008      */
1009     existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
1010     /* if the TB already exists, discard what we just translated */
1011     if (unlikely(existing_tb != tb)) {
1012         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
1013 
1014         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
1015         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
1016         tcg_tb_remove(tb);
1017         return existing_tb;
1018     }
1019     return tb;
1020 }
1021 
1022 /* user-mode: call with mmap_lock held */
1023 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
1024 {
1025     TranslationBlock *tb;
1026 
1027     assert_memory_lock();
1028 
1029     tb = tcg_tb_lookup(retaddr);
1030     if (tb) {
1031         /* We can use retranslation to find the PC.  */
1032         cpu_restore_state_from_tb(cpu, tb, retaddr);
1033         tb_phys_invalidate(tb, -1);
1034     } else {
1035         /* The exception probably happened in a helper.  The CPU state should
1036            have been saved before calling it. Fetch the PC from there.  */
1037         CPUArchState *env = cpu->env_ptr;
1038         target_ulong pc, cs_base;
1039         tb_page_addr_t addr;
1040         uint32_t flags;
1041 
1042         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
1043         addr = get_page_addr_code(env, pc);
1044         if (addr != -1) {
1045             tb_invalidate_phys_range(addr, addr + 1);
1046         }
1047     }
1048 }
1049 
1050 #ifndef CONFIG_USER_ONLY
1051 /*
1052  * In deterministic execution mode, instructions doing device I/Os
1053  * must be at the end of the TB.
1054  *
1055  * Called by softmmu_template.h, with iothread mutex not held.
1056  */
1057 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
1058 {
1059     TranslationBlock *tb;
1060     CPUClass *cc;
1061     uint32_t n;
1062 
1063     tb = tcg_tb_lookup(retaddr);
1064     if (!tb) {
1065         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
1066                   (void *)retaddr);
1067     }
1068     cpu_restore_state_from_tb(cpu, tb, retaddr);
1069 
1070     /*
1071      * Some guests must re-execute the branch when re-executing a delay
1072      * slot instruction.  When this is the case, adjust icount and N
1073      * to account for the re-execution of the branch.
1074      */
1075     n = 1;
1076     cc = CPU_GET_CLASS(cpu);
1077     if (cc->tcg_ops->io_recompile_replay_branch &&
1078         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
1079         cpu_neg(cpu)->icount_decr.u16.low++;
1080         n = 2;
1081     }
1082 
1083     /*
1084      * Exit the loop and potentially generate a new TB executing the
1085      * just the I/O insns. We also limit instrumentation to memory
1086      * operations only (which execute after completion) so we don't
1087      * double instrument the instruction.
1088      */
1089     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
1090 
1091     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
1092         target_ulong pc = log_pc(cpu, tb);
1093         if (qemu_log_in_addr_range(pc)) {
1094             qemu_log("cpu_io_recompile: rewound execution of TB to "
1095                      TARGET_FMT_lx "\n", pc);
1096         }
1097     }
1098 
1099     cpu_loop_exit_noexc(cpu);
1100 }
1101 
1102 static void print_qht_statistics(struct qht_stats hst, GString *buf)
1103 {
1104     uint32_t hgram_opts;
1105     size_t hgram_bins;
1106     char *hgram;
1107 
1108     if (!hst.head_buckets) {
1109         return;
1110     }
1111     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
1112                            "(%0.2f%% head buckets used)\n",
1113                            hst.used_head_buckets, hst.head_buckets,
1114                            (double)hst.used_head_buckets /
1115                            hst.head_buckets * 100);
1116 
1117     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
1118     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
1119     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
1120         hgram_opts |= QDIST_PR_NODECIMAL;
1121     }
1122     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
1123     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
1124                            "Histogram: %s\n",
1125                            qdist_avg(&hst.occupancy) * 100, hgram);
1126     g_free(hgram);
1127 
1128     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
1129     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
1130     if (hgram_bins > 10) {
1131         hgram_bins = 10;
1132     } else {
1133         hgram_bins = 0;
1134         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
1135     }
1136     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
1137     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
1138                            "Histogram: %s\n",
1139                            qdist_avg(&hst.chain), hgram);
1140     g_free(hgram);
1141 }
1142 
1143 struct tb_tree_stats {
1144     size_t nb_tbs;
1145     size_t host_size;
1146     size_t target_size;
1147     size_t max_target_size;
1148     size_t direct_jmp_count;
1149     size_t direct_jmp2_count;
1150     size_t cross_page;
1151 };
1152 
1153 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
1154 {
1155     const TranslationBlock *tb = value;
1156     struct tb_tree_stats *tst = data;
1157 
1158     tst->nb_tbs++;
1159     tst->host_size += tb->tc.size;
1160     tst->target_size += tb->size;
1161     if (tb->size > tst->max_target_size) {
1162         tst->max_target_size = tb->size;
1163     }
1164     if (tb_page_addr1(tb) != -1) {
1165         tst->cross_page++;
1166     }
1167     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1168         tst->direct_jmp_count++;
1169         if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1170             tst->direct_jmp2_count++;
1171         }
1172     }
1173     return false;
1174 }
1175 
1176 void dump_exec_info(GString *buf)
1177 {
1178     struct tb_tree_stats tst = {};
1179     struct qht_stats hst;
1180     size_t nb_tbs, flush_full, flush_part, flush_elide;
1181 
1182     tcg_tb_foreach(tb_tree_stats_iter, &tst);
1183     nb_tbs = tst.nb_tbs;
1184     /* XXX: avoid using doubles ? */
1185     g_string_append_printf(buf, "Translation buffer state:\n");
1186     /*
1187      * Report total code size including the padding and TB structs;
1188      * otherwise users might think "-accel tcg,tb-size" is not honoured.
1189      * For avg host size we use the precise numbers from tb_tree_stats though.
1190      */
1191     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
1192                            tcg_code_size(), tcg_code_capacity());
1193     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
1194     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
1195                            nb_tbs ? tst.target_size / nb_tbs : 0,
1196                            tst.max_target_size);
1197     g_string_append_printf(buf, "TB avg host size    %zu bytes "
1198                            "(expansion ratio: %0.1f)\n",
1199                            nb_tbs ? tst.host_size / nb_tbs : 0,
1200                            tst.target_size ?
1201                            (double)tst.host_size / tst.target_size : 0);
1202     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
1203                            tst.cross_page,
1204                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
1205     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
1206                            "(2 jumps=%zu %zu%%)\n",
1207                            tst.direct_jmp_count,
1208                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
1209                            tst.direct_jmp2_count,
1210                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
1211 
1212     qht_statistics_init(&tb_ctx.htable, &hst);
1213     print_qht_statistics(hst, buf);
1214     qht_statistics_destroy(&hst);
1215 
1216     g_string_append_printf(buf, "\nStatistics:\n");
1217     g_string_append_printf(buf, "TB flush count      %u\n",
1218                            qatomic_read(&tb_ctx.tb_flush_count));
1219     g_string_append_printf(buf, "TB invalidate count %u\n",
1220                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
1221 
1222     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
1223     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
1224     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
1225     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
1226     tcg_dump_info(buf);
1227 }
1228 
1229 #else /* CONFIG_USER_ONLY */
1230 
1231 void cpu_interrupt(CPUState *cpu, int mask)
1232 {
1233     g_assert(qemu_mutex_iothread_locked());
1234     cpu->interrupt_request |= mask;
1235     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
1236 }
1237 
1238 /*
1239  * Walks guest process memory "regions" one by one
1240  * and calls callback function 'fn' for each region.
1241  */
1242 struct walk_memory_regions_data {
1243     walk_memory_regions_fn fn;
1244     void *priv;
1245     target_ulong start;
1246     int prot;
1247 };
1248 
1249 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1250                                    target_ulong end, int new_prot)
1251 {
1252     if (data->start != -1u) {
1253         int rc = data->fn(data->priv, data->start, end, data->prot);
1254         if (rc != 0) {
1255             return rc;
1256         }
1257     }
1258 
1259     data->start = (new_prot ? end : -1u);
1260     data->prot = new_prot;
1261 
1262     return 0;
1263 }
1264 
1265 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1266                                  target_ulong base, int level, void **lp)
1267 {
1268     target_ulong pa;
1269     int i, rc;
1270 
1271     if (*lp == NULL) {
1272         return walk_memory_regions_end(data, base, 0);
1273     }
1274 
1275     if (level == 0) {
1276         PageDesc *pd = *lp;
1277 
1278         for (i = 0; i < V_L2_SIZE; ++i) {
1279             int prot = pd[i].flags;
1280 
1281             pa = base | (i << TARGET_PAGE_BITS);
1282             if (prot != data->prot) {
1283                 rc = walk_memory_regions_end(data, pa, prot);
1284                 if (rc != 0) {
1285                     return rc;
1286                 }
1287             }
1288         }
1289     } else {
1290         void **pp = *lp;
1291 
1292         for (i = 0; i < V_L2_SIZE; ++i) {
1293             pa = base | ((target_ulong)i <<
1294                 (TARGET_PAGE_BITS + V_L2_BITS * level));
1295             rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1296             if (rc != 0) {
1297                 return rc;
1298             }
1299         }
1300     }
1301 
1302     return 0;
1303 }
1304 
1305 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1306 {
1307     struct walk_memory_regions_data data;
1308     uintptr_t i, l1_sz = v_l1_size;
1309 
1310     data.fn = fn;
1311     data.priv = priv;
1312     data.start = -1u;
1313     data.prot = 0;
1314 
1315     for (i = 0; i < l1_sz; i++) {
1316         target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
1317         int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
1318         if (rc != 0) {
1319             return rc;
1320         }
1321     }
1322 
1323     return walk_memory_regions_end(&data, 0, 0);
1324 }
1325 
1326 static int dump_region(void *priv, target_ulong start,
1327     target_ulong end, unsigned long prot)
1328 {
1329     FILE *f = (FILE *)priv;
1330 
1331     (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
1332         " "TARGET_FMT_lx" %c%c%c\n",
1333         start, end, end - start,
1334         ((prot & PAGE_READ) ? 'r' : '-'),
1335         ((prot & PAGE_WRITE) ? 'w' : '-'),
1336         ((prot & PAGE_EXEC) ? 'x' : '-'));
1337 
1338     return 0;
1339 }
1340 
1341 /* dump memory mappings */
1342 void page_dump(FILE *f)
1343 {
1344     const int length = sizeof(target_ulong) * 2;
1345     (void) fprintf(f, "%-*s %-*s %-*s %s\n",
1346             length, "start", length, "end", length, "size", "prot");
1347     walk_memory_regions(f, dump_region);
1348 }
1349 
1350 int page_get_flags(target_ulong address)
1351 {
1352     PageDesc *p;
1353 
1354     p = page_find(address >> TARGET_PAGE_BITS);
1355     if (!p) {
1356         return 0;
1357     }
1358     return p->flags;
1359 }
1360 
1361 /*
1362  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
1363  * By default, they are not kept.
1364  */
1365 #ifndef PAGE_TARGET_STICKY
1366 #define PAGE_TARGET_STICKY  0
1367 #endif
1368 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
1369 
1370 /* Modify the flags of a page and invalidate the code if necessary.
1371    The flag PAGE_WRITE_ORG is positioned automatically depending
1372    on PAGE_WRITE.  The mmap_lock should already be held.  */
1373 void page_set_flags(target_ulong start, target_ulong end, int flags)
1374 {
1375     target_ulong addr, len;
1376     bool reset, inval_tb = false;
1377 
1378     /* This function should never be called with addresses outside the
1379        guest address space.  If this assert fires, it probably indicates
1380        a missing call to h2g_valid.  */
1381     assert(end - 1 <= GUEST_ADDR_MAX);
1382     assert(start < end);
1383     /* Only set PAGE_ANON with new mappings. */
1384     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
1385     assert_memory_lock();
1386 
1387     start = start & TARGET_PAGE_MASK;
1388     end = TARGET_PAGE_ALIGN(end);
1389 
1390     if (flags & PAGE_WRITE) {
1391         flags |= PAGE_WRITE_ORG;
1392     }
1393     reset = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
1394     if (reset) {
1395         page_reset_target_data(start, end);
1396     }
1397     flags &= ~PAGE_RESET;
1398 
1399     for (addr = start, len = end - start;
1400          len != 0;
1401          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
1402         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true);
1403 
1404         /*
1405          * If the page was executable, but is reset, or is no longer
1406          * executable, or has become writable, then invalidate any code.
1407          */
1408         if ((p->flags & PAGE_EXEC)
1409             && (reset ||
1410                 !(flags & PAGE_EXEC) ||
1411                 (flags & ~p->flags & PAGE_WRITE))) {
1412             inval_tb = true;
1413         }
1414         /* Using mprotect on a page does not change sticky bits. */
1415         p->flags = (reset ? 0 : p->flags & PAGE_STICKY) | flags;
1416     }
1417 
1418     if (inval_tb) {
1419         tb_invalidate_phys_range(start, end);
1420     }
1421 }
1422 
1423 int page_check_range(target_ulong start, target_ulong len, int flags)
1424 {
1425     PageDesc *p;
1426     target_ulong end;
1427     target_ulong addr;
1428 
1429     /* This function should never be called with addresses outside the
1430        guest address space.  If this assert fires, it probably indicates
1431        a missing call to h2g_valid.  */
1432     if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
1433         assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
1434     }
1435 
1436     if (len == 0) {
1437         return 0;
1438     }
1439     if (start + len - 1 < start) {
1440         /* We've wrapped around.  */
1441         return -1;
1442     }
1443 
1444     /* must do before we loose bits in the next step */
1445     end = TARGET_PAGE_ALIGN(start + len);
1446     start = start & TARGET_PAGE_MASK;
1447 
1448     for (addr = start, len = end - start;
1449          len != 0;
1450          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
1451         p = page_find(addr >> TARGET_PAGE_BITS);
1452         if (!p) {
1453             return -1;
1454         }
1455         if (!(p->flags & PAGE_VALID)) {
1456             return -1;
1457         }
1458 
1459         if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
1460             return -1;
1461         }
1462         if (flags & PAGE_WRITE) {
1463             if (!(p->flags & PAGE_WRITE_ORG)) {
1464                 return -1;
1465             }
1466             /* unprotect the page if it was put read-only because it
1467                contains translated code */
1468             if (!(p->flags & PAGE_WRITE)) {
1469                 if (!page_unprotect(addr, 0)) {
1470                     return -1;
1471                 }
1472             }
1473         }
1474     }
1475     return 0;
1476 }
1477 
1478 void page_protect(tb_page_addr_t page_addr)
1479 {
1480     target_ulong addr;
1481     PageDesc *p;
1482     int prot;
1483 
1484     p = page_find(page_addr >> TARGET_PAGE_BITS);
1485     if (p && (p->flags & PAGE_WRITE)) {
1486         /*
1487          * Force the host page as non writable (writes will have a page fault +
1488          * mprotect overhead).
1489          */
1490         page_addr &= qemu_host_page_mask;
1491         prot = 0;
1492         for (addr = page_addr; addr < page_addr + qemu_host_page_size;
1493              addr += TARGET_PAGE_SIZE) {
1494 
1495             p = page_find(addr >> TARGET_PAGE_BITS);
1496             if (!p) {
1497                 continue;
1498             }
1499             prot |= p->flags;
1500             p->flags &= ~PAGE_WRITE;
1501         }
1502         mprotect(g2h_untagged(page_addr), qemu_host_page_size,
1503                  (prot & PAGE_BITS) & ~PAGE_WRITE);
1504     }
1505 }
1506 
1507 /* called from signal handler: invalidate the code and unprotect the
1508  * page. Return 0 if the fault was not handled, 1 if it was handled,
1509  * and 2 if it was handled but the caller must cause the TB to be
1510  * immediately exited. (We can only return 2 if the 'pc' argument is
1511  * non-zero.)
1512  */
1513 int page_unprotect(target_ulong address, uintptr_t pc)
1514 {
1515     unsigned int prot;
1516     bool current_tb_invalidated;
1517     PageDesc *p;
1518     target_ulong host_start, host_end, addr;
1519 
1520     /* Technically this isn't safe inside a signal handler.  However we
1521        know this only ever happens in a synchronous SEGV handler, so in
1522        practice it seems to be ok.  */
1523     mmap_lock();
1524 
1525     p = page_find(address >> TARGET_PAGE_BITS);
1526     if (!p) {
1527         mmap_unlock();
1528         return 0;
1529     }
1530 
1531     /* if the page was really writable, then we change its
1532        protection back to writable */
1533     if (p->flags & PAGE_WRITE_ORG) {
1534         current_tb_invalidated = false;
1535         if (p->flags & PAGE_WRITE) {
1536             /* If the page is actually marked WRITE then assume this is because
1537              * this thread raced with another one which got here first and
1538              * set the page to PAGE_WRITE and did the TB invalidate for us.
1539              */
1540 #ifdef TARGET_HAS_PRECISE_SMC
1541             TranslationBlock *current_tb = tcg_tb_lookup(pc);
1542             if (current_tb) {
1543                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
1544             }
1545 #endif
1546         } else {
1547             host_start = address & qemu_host_page_mask;
1548             host_end = host_start + qemu_host_page_size;
1549 
1550             prot = 0;
1551             for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
1552                 p = page_find(addr >> TARGET_PAGE_BITS);
1553                 p->flags |= PAGE_WRITE;
1554                 prot |= p->flags;
1555 
1556                 /* and since the content will be modified, we must invalidate
1557                    the corresponding translated code. */
1558                 current_tb_invalidated |=
1559                     tb_invalidate_phys_page_unwind(addr, pc);
1560             }
1561             mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
1562                      prot & PAGE_BITS);
1563         }
1564         mmap_unlock();
1565         /* If current TB was invalidated return to main loop */
1566         return current_tb_invalidated ? 2 : 1;
1567     }
1568     mmap_unlock();
1569     return 0;
1570 }
1571 #endif /* CONFIG_USER_ONLY */
1572 
1573 /*
1574  * Called by generic code at e.g. cpu reset after cpu creation,
1575  * therefore we must be prepared to allocate the jump cache.
1576  */
1577 void tcg_flush_jmp_cache(CPUState *cpu)
1578 {
1579     CPUJumpCache *jc = cpu->tb_jmp_cache;
1580 
1581     /* During early initialization, the cache may not yet be allocated. */
1582     if (unlikely(jc == NULL)) {
1583         return;
1584     }
1585 
1586     for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
1587         qatomic_set(&jc->array[i].tb, NULL);
1588     }
1589 }
1590 
1591 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
1592 void tcg_flush_softmmu_tlb(CPUState *cs)
1593 {
1594 #ifdef CONFIG_SOFTMMU
1595     tlb_flush(cs);
1596 #endif
1597 }
1598