xref: /openbmc/qemu/accel/tcg/translate-all.c (revision c9923550b446e54413024117c0ed978a08e3ab1a)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 
22 #define NO_CPU_IO_DEFS
23 #include "trace.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg.h"
27 #if defined(CONFIG_USER_ONLY)
28 #include "qemu.h"
29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
30 #include <sys/param.h>
31 #if __FreeBSD_version >= 700104
32 #define HAVE_KINFO_GETVMMAP
33 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
34 #include <sys/proc.h>
35 #include <machine/profile.h>
36 #define _KERNEL
37 #include <sys/user.h>
38 #undef _KERNEL
39 #undef sigqueue
40 #include <libutil.h>
41 #endif
42 #endif
43 #else
44 #include "exec/ram_addr.h"
45 #endif
46 
47 #include "exec/cputlb.h"
48 #include "exec/translate-all.h"
49 #include "exec/translator.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/qemu-print.h"
52 #include "qemu/timer.h"
53 #include "qemu/main-loop.h"
54 #include "qemu/cacheinfo.h"
55 #include "exec/log.h"
56 #include "sysemu/cpus.h"
57 #include "sysemu/cpu-timers.h"
58 #include "sysemu/tcg.h"
59 #include "qapi/error.h"
60 #include "hw/core/tcg-cpu-ops.h"
61 #include "tb-jmp-cache.h"
62 #include "tb-hash.h"
63 #include "tb-context.h"
64 #include "internal.h"
65 
66 /* make various TB consistency checks */
67 
68 /**
69  * struct page_entry - page descriptor entry
70  * @pd:     pointer to the &struct PageDesc of the page this entry represents
71  * @index:  page index of the page
72  * @locked: whether the page is locked
73  *
74  * This struct helps us keep track of the locked state of a page, without
75  * bloating &struct PageDesc.
76  *
77  * A page lock protects accesses to all fields of &struct PageDesc.
78  *
79  * See also: &struct page_collection.
80  */
81 struct page_entry {
82     PageDesc *pd;
83     tb_page_addr_t index;
84     bool locked;
85 };
86 
87 /**
88  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
89  * @tree:   Binary search tree (BST) of the pages, with key == page index
90  * @max:    Pointer to the page in @tree with the highest page index
91  *
92  * To avoid deadlock we lock pages in ascending order of page index.
93  * When operating on a set of pages, we need to keep track of them so that
94  * we can lock them in order and also unlock them later. For this we collect
95  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
96  * @tree implementation we use does not provide an O(1) operation to obtain the
97  * highest-ranked element, we use @max to keep track of the inserted page
98  * with the highest index. This is valuable because if a page is not in
99  * the tree and its index is higher than @max's, then we can lock it
100  * without breaking the locking order rule.
101  *
102  * Note on naming: 'struct page_set' would be shorter, but we already have a few
103  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
104  *
105  * See also: page_collection_lock().
106  */
107 struct page_collection {
108     GTree *tree;
109     struct page_entry *max;
110 };
111 
112 /*
113  * In system mode we want L1_MAP to be based on ram offsets,
114  * while in user mode we want it to be based on virtual addresses.
115  *
116  * TODO: For user mode, see the caveat re host vs guest virtual
117  * address spaces near GUEST_ADDR_MAX.
118  */
119 #if !defined(CONFIG_USER_ONLY)
120 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
121 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
122 #else
123 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
124 #endif
125 #else
126 # define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
127 #endif
128 
129 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
130 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
131                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
132                   * BITS_PER_BYTE);
133 
134 /*
135  * L1 Mapping properties
136  */
137 int v_l1_size;
138 int v_l1_shift;
139 int v_l2_levels;
140 
141 void *l1_map[V_L1_MAX_SIZE];
142 
143 TBContext tb_ctx;
144 
145 static void page_table_config_init(void)
146 {
147     uint32_t v_l1_bits;
148 
149     assert(TARGET_PAGE_BITS);
150     /* The bits remaining after N lower levels of page tables.  */
151     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
152     if (v_l1_bits < V_L1_MIN_BITS) {
153         v_l1_bits += V_L2_BITS;
154     }
155 
156     v_l1_size = 1 << v_l1_bits;
157     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
158     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
159 
160     assert(v_l1_bits <= V_L1_MAX_BITS);
161     assert(v_l1_shift % V_L2_BITS == 0);
162     assert(v_l2_levels >= 0);
163 }
164 
165 /* Encode VAL as a signed leb128 sequence at P.
166    Return P incremented past the encoded value.  */
167 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
168 {
169     int more, byte;
170 
171     do {
172         byte = val & 0x7f;
173         val >>= 7;
174         more = !((val == 0 && (byte & 0x40) == 0)
175                  || (val == -1 && (byte & 0x40) != 0));
176         if (more) {
177             byte |= 0x80;
178         }
179         *p++ = byte;
180     } while (more);
181 
182     return p;
183 }
184 
185 /* Decode a signed leb128 sequence at *PP; increment *PP past the
186    decoded value.  Return the decoded value.  */
187 static target_long decode_sleb128(const uint8_t **pp)
188 {
189     const uint8_t *p = *pp;
190     target_long val = 0;
191     int byte, shift = 0;
192 
193     do {
194         byte = *p++;
195         val |= (target_ulong)(byte & 0x7f) << shift;
196         shift += 7;
197     } while (byte & 0x80);
198     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
199         val |= -(target_ulong)1 << shift;
200     }
201 
202     *pp = p;
203     return val;
204 }
205 
206 /* Encode the data collected about the instructions while compiling TB.
207    Place the data at BLOCK, and return the number of bytes consumed.
208 
209    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
210    which come from the target's insn_start data, followed by a uintptr_t
211    which comes from the host pc of the end of the code implementing the insn.
212 
213    Each line of the table is encoded as sleb128 deltas from the previous
214    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
215    That is, the first column is seeded with the guest pc, the last column
216    with the host pc, and the middle columns with zeros.  */
217 
218 static int encode_search(TranslationBlock *tb, uint8_t *block)
219 {
220     uint8_t *highwater = tcg_ctx->code_gen_highwater;
221     uint8_t *p = block;
222     int i, j, n;
223 
224     for (i = 0, n = tb->icount; i < n; ++i) {
225         target_ulong prev;
226 
227         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
228             if (i == 0) {
229                 prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
230             } else {
231                 prev = tcg_ctx->gen_insn_data[i - 1][j];
232             }
233             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
234         }
235         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
236         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
237 
238         /* Test for (pending) buffer overflow.  The assumption is that any
239            one row beginning below the high water mark cannot overrun
240            the buffer completely.  Thus we can test for overflow after
241            encoding a row without having to check during encoding.  */
242         if (unlikely(p > highwater)) {
243             return -1;
244         }
245     }
246 
247     return p - block;
248 }
249 
250 /* The cpu state corresponding to 'searched_pc' is restored.
251  * When reset_icount is true, current TB will be interrupted and
252  * icount should be recalculated.
253  */
254 int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
255                               uintptr_t searched_pc, bool reset_icount)
256 {
257     uint64_t data[TARGET_INSN_START_WORDS];
258     uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
259     const uint8_t *p = tb->tc.ptr + tb->tc.size;
260     int i, j, num_insns = tb->icount;
261 #ifdef CONFIG_PROFILER
262     TCGProfile *prof = &tcg_ctx->prof;
263     int64_t ti = profile_getclock();
264 #endif
265 
266     searched_pc -= GETPC_ADJ;
267 
268     if (searched_pc < host_pc) {
269         return -1;
270     }
271 
272     memset(data, 0, sizeof(data));
273     if (!TARGET_TB_PCREL) {
274         data[0] = tb_pc(tb);
275     }
276 
277     /* Reconstruct the stored insn data while looking for the point at
278        which the end of the insn exceeds the searched_pc.  */
279     for (i = 0; i < num_insns; ++i) {
280         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
281             data[j] += decode_sleb128(&p);
282         }
283         host_pc += decode_sleb128(&p);
284         if (host_pc > searched_pc) {
285             goto found;
286         }
287     }
288     return -1;
289 
290  found:
291     if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
292         assert(icount_enabled());
293         /* Reset the cycle counter to the start of the block
294            and shift if to the number of actually executed instructions */
295         cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
296     }
297 
298     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
299 
300 #ifdef CONFIG_PROFILER
301     qatomic_set(&prof->restore_time,
302                 prof->restore_time + profile_getclock() - ti);
303     qatomic_set(&prof->restore_count, prof->restore_count + 1);
304 #endif
305     return 0;
306 }
307 
308 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
309 {
310     /*
311      * The pc update associated with restore without exit will
312      * break the relative pc adjustments performed by TARGET_TB_PCREL.
313      */
314     if (TARGET_TB_PCREL) {
315         assert(will_exit);
316     }
317 
318     /*
319      * The host_pc has to be in the rx region of the code buffer.
320      * If it is not we will not be able to resolve it here.
321      * The two cases where host_pc will not be correct are:
322      *
323      *  - fault during translation (instruction fetch)
324      *  - fault from helper (not using GETPC() macro)
325      *
326      * Either way we need return early as we can't resolve it here.
327      */
328     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
329         TranslationBlock *tb = tcg_tb_lookup(host_pc);
330         if (tb) {
331             cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
332             return true;
333         }
334     }
335     return false;
336 }
337 
338 void page_init(void)
339 {
340     page_size_init();
341     page_table_config_init();
342 
343 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
344     {
345 #ifdef HAVE_KINFO_GETVMMAP
346         struct kinfo_vmentry *freep;
347         int i, cnt;
348 
349         freep = kinfo_getvmmap(getpid(), &cnt);
350         if (freep) {
351             mmap_lock();
352             for (i = 0; i < cnt; i++) {
353                 unsigned long startaddr, endaddr;
354 
355                 startaddr = freep[i].kve_start;
356                 endaddr = freep[i].kve_end;
357                 if (h2g_valid(startaddr)) {
358                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
359 
360                     if (h2g_valid(endaddr)) {
361                         endaddr = h2g(endaddr);
362                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
363                     } else {
364 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
365                         endaddr = ~0ul;
366                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
367 #endif
368                     }
369                 }
370             }
371             free(freep);
372             mmap_unlock();
373         }
374 #else
375         FILE *f;
376 
377         last_brk = (unsigned long)sbrk(0);
378 
379         f = fopen("/compat/linux/proc/self/maps", "r");
380         if (f) {
381             mmap_lock();
382 
383             do {
384                 unsigned long startaddr, endaddr;
385                 int n;
386 
387                 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
388 
389                 if (n == 2 && h2g_valid(startaddr)) {
390                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
391 
392                     if (h2g_valid(endaddr)) {
393                         endaddr = h2g(endaddr);
394                     } else {
395                         endaddr = ~0ul;
396                     }
397                     page_set_flags(startaddr, endaddr, PAGE_RESERVED);
398                 }
399             } while (!feof(f));
400 
401             fclose(f);
402             mmap_unlock();
403         }
404 #endif
405     }
406 #endif
407 }
408 
409 PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
410 {
411     PageDesc *pd;
412     void **lp;
413     int i;
414 
415     /* Level 1.  Always allocated.  */
416     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
417 
418     /* Level 2..N-1.  */
419     for (i = v_l2_levels; i > 0; i--) {
420         void **p = qatomic_rcu_read(lp);
421 
422         if (p == NULL) {
423             void *existing;
424 
425             if (!alloc) {
426                 return NULL;
427             }
428             p = g_new0(void *, V_L2_SIZE);
429             existing = qatomic_cmpxchg(lp, NULL, p);
430             if (unlikely(existing)) {
431                 g_free(p);
432                 p = existing;
433             }
434         }
435 
436         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
437     }
438 
439     pd = qatomic_rcu_read(lp);
440     if (pd == NULL) {
441         void *existing;
442 
443         if (!alloc) {
444             return NULL;
445         }
446         pd = g_new0(PageDesc, V_L2_SIZE);
447 #ifndef CONFIG_USER_ONLY
448         {
449             int i;
450 
451             for (i = 0; i < V_L2_SIZE; i++) {
452                 qemu_spin_init(&pd[i].lock);
453             }
454         }
455 #endif
456         existing = qatomic_cmpxchg(lp, NULL, pd);
457         if (unlikely(existing)) {
458 #ifndef CONFIG_USER_ONLY
459             {
460                 int i;
461 
462                 for (i = 0; i < V_L2_SIZE; i++) {
463                     qemu_spin_destroy(&pd[i].lock);
464                 }
465             }
466 #endif
467             g_free(pd);
468             pd = existing;
469         }
470     }
471 
472     return pd + (index & (V_L2_SIZE - 1));
473 }
474 
475 /* In user-mode page locks aren't used; mmap_lock is enough */
476 #ifdef CONFIG_USER_ONLY
477 struct page_collection *
478 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
479 {
480     return NULL;
481 }
482 
483 void page_collection_unlock(struct page_collection *set)
484 { }
485 #else /* !CONFIG_USER_ONLY */
486 
487 #ifdef CONFIG_DEBUG_TCG
488 
489 static __thread GHashTable *ht_pages_locked_debug;
490 
491 static void ht_pages_locked_debug_init(void)
492 {
493     if (ht_pages_locked_debug) {
494         return;
495     }
496     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
497 }
498 
499 static bool page_is_locked(const PageDesc *pd)
500 {
501     PageDesc *found;
502 
503     ht_pages_locked_debug_init();
504     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
505     return !!found;
506 }
507 
508 static void page_lock__debug(PageDesc *pd)
509 {
510     ht_pages_locked_debug_init();
511     g_assert(!page_is_locked(pd));
512     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
513 }
514 
515 static void page_unlock__debug(const PageDesc *pd)
516 {
517     bool removed;
518 
519     ht_pages_locked_debug_init();
520     g_assert(page_is_locked(pd));
521     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
522     g_assert(removed);
523 }
524 
525 void do_assert_page_locked(const PageDesc *pd, const char *file, int line)
526 {
527     if (unlikely(!page_is_locked(pd))) {
528         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
529                      pd, file, line);
530         abort();
531     }
532 }
533 
534 void assert_no_pages_locked(void)
535 {
536     ht_pages_locked_debug_init();
537     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
538 }
539 
540 #else /* !CONFIG_DEBUG_TCG */
541 
542 static inline void page_lock__debug(const PageDesc *pd) { }
543 static inline void page_unlock__debug(const PageDesc *pd) { }
544 
545 #endif /* CONFIG_DEBUG_TCG */
546 
547 void page_lock(PageDesc *pd)
548 {
549     page_lock__debug(pd);
550     qemu_spin_lock(&pd->lock);
551 }
552 
553 void page_unlock(PageDesc *pd)
554 {
555     qemu_spin_unlock(&pd->lock);
556     page_unlock__debug(pd);
557 }
558 
559 static inline struct page_entry *
560 page_entry_new(PageDesc *pd, tb_page_addr_t index)
561 {
562     struct page_entry *pe = g_malloc(sizeof(*pe));
563 
564     pe->index = index;
565     pe->pd = pd;
566     pe->locked = false;
567     return pe;
568 }
569 
570 static void page_entry_destroy(gpointer p)
571 {
572     struct page_entry *pe = p;
573 
574     g_assert(pe->locked);
575     page_unlock(pe->pd);
576     g_free(pe);
577 }
578 
579 /* returns false on success */
580 static bool page_entry_trylock(struct page_entry *pe)
581 {
582     bool busy;
583 
584     busy = qemu_spin_trylock(&pe->pd->lock);
585     if (!busy) {
586         g_assert(!pe->locked);
587         pe->locked = true;
588         page_lock__debug(pe->pd);
589     }
590     return busy;
591 }
592 
593 static void do_page_entry_lock(struct page_entry *pe)
594 {
595     page_lock(pe->pd);
596     g_assert(!pe->locked);
597     pe->locked = true;
598 }
599 
600 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
601 {
602     struct page_entry *pe = value;
603 
604     do_page_entry_lock(pe);
605     return FALSE;
606 }
607 
608 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
609 {
610     struct page_entry *pe = value;
611 
612     if (pe->locked) {
613         pe->locked = false;
614         page_unlock(pe->pd);
615     }
616     return FALSE;
617 }
618 
619 /*
620  * Trylock a page, and if successful, add the page to a collection.
621  * Returns true ("busy") if the page could not be locked; false otherwise.
622  */
623 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
624 {
625     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
626     struct page_entry *pe;
627     PageDesc *pd;
628 
629     pe = g_tree_lookup(set->tree, &index);
630     if (pe) {
631         return false;
632     }
633 
634     pd = page_find(index);
635     if (pd == NULL) {
636         return false;
637     }
638 
639     pe = page_entry_new(pd, index);
640     g_tree_insert(set->tree, &pe->index, pe);
641 
642     /*
643      * If this is either (1) the first insertion or (2) a page whose index
644      * is higher than any other so far, just lock the page and move on.
645      */
646     if (set->max == NULL || pe->index > set->max->index) {
647         set->max = pe;
648         do_page_entry_lock(pe);
649         return false;
650     }
651     /*
652      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
653      * locks in order.
654      */
655     return page_entry_trylock(pe);
656 }
657 
658 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
659 {
660     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
661     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
662 
663     if (a == b) {
664         return 0;
665     } else if (a < b) {
666         return -1;
667     }
668     return 1;
669 }
670 
671 /*
672  * Lock a range of pages ([@start,@end[) as well as the pages of all
673  * intersecting TBs.
674  * Locking order: acquire locks in ascending order of page index.
675  */
676 struct page_collection *
677 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
678 {
679     struct page_collection *set = g_malloc(sizeof(*set));
680     tb_page_addr_t index;
681     PageDesc *pd;
682 
683     start >>= TARGET_PAGE_BITS;
684     end   >>= TARGET_PAGE_BITS;
685     g_assert(start <= end);
686 
687     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
688                                 page_entry_destroy);
689     set->max = NULL;
690     assert_no_pages_locked();
691 
692  retry:
693     g_tree_foreach(set->tree, page_entry_lock, NULL);
694 
695     for (index = start; index <= end; index++) {
696         TranslationBlock *tb;
697         int n;
698 
699         pd = page_find(index);
700         if (pd == NULL) {
701             continue;
702         }
703         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
704             g_tree_foreach(set->tree, page_entry_unlock, NULL);
705             goto retry;
706         }
707         assert_page_locked(pd);
708         PAGE_FOR_EACH_TB(pd, tb, n) {
709             if (page_trylock_add(set, tb_page_addr0(tb)) ||
710                 (tb_page_addr1(tb) != -1 &&
711                  page_trylock_add(set, tb_page_addr1(tb)))) {
712                 /* drop all locks, and reacquire in order */
713                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
714                 goto retry;
715             }
716         }
717     }
718     return set;
719 }
720 
721 void page_collection_unlock(struct page_collection *set)
722 {
723     /* entries are unlocked and freed via page_entry_destroy */
724     g_tree_destroy(set->tree);
725     g_free(set);
726 }
727 
728 #endif /* !CONFIG_USER_ONLY */
729 
730 /* Called with mmap_lock held for user mode emulation.  */
731 TranslationBlock *tb_gen_code(CPUState *cpu,
732                               target_ulong pc, target_ulong cs_base,
733                               uint32_t flags, int cflags)
734 {
735     CPUArchState *env = cpu->env_ptr;
736     TranslationBlock *tb, *existing_tb;
737     tb_page_addr_t phys_pc;
738     tcg_insn_unit *gen_code_buf;
739     int gen_code_size, search_size, max_insns;
740 #ifdef CONFIG_PROFILER
741     TCGProfile *prof = &tcg_ctx->prof;
742     int64_t ti;
743 #endif
744     void *host_pc;
745 
746     assert_memory_lock();
747     qemu_thread_jit_write();
748 
749     phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
750 
751     if (phys_pc == -1) {
752         /* Generate a one-shot TB with 1 insn in it */
753         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
754     }
755 
756     max_insns = cflags & CF_COUNT_MASK;
757     if (max_insns == 0) {
758         max_insns = TCG_MAX_INSNS;
759     }
760     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
761 
762  buffer_overflow:
763     tb = tcg_tb_alloc(tcg_ctx);
764     if (unlikely(!tb)) {
765         /* flush must be done */
766         tb_flush(cpu);
767         mmap_unlock();
768         /* Make the execution loop process the flush as soon as possible.  */
769         cpu->exception_index = EXCP_INTERRUPT;
770         cpu_loop_exit(cpu);
771     }
772 
773     gen_code_buf = tcg_ctx->code_gen_ptr;
774     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
775 #if !TARGET_TB_PCREL
776     tb->pc = pc;
777 #endif
778     tb->cs_base = cs_base;
779     tb->flags = flags;
780     tb->cflags = cflags;
781     tb->trace_vcpu_dstate = *cpu->trace_dstate;
782     tb_set_page_addr0(tb, phys_pc);
783     tb_set_page_addr1(tb, -1);
784     tcg_ctx->tb_cflags = cflags;
785  tb_overflow:
786 
787 #ifdef CONFIG_PROFILER
788     /* includes aborted translations because of exceptions */
789     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
790     ti = profile_getclock();
791 #endif
792 
793     gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
794     if (unlikely(gen_code_size != 0)) {
795         goto error_return;
796     }
797 
798     tcg_func_start(tcg_ctx);
799 
800     tcg_ctx->cpu = env_cpu(env);
801     gen_intermediate_code(cpu, tb, max_insns, pc, host_pc);
802     assert(tb->size != 0);
803     tcg_ctx->cpu = NULL;
804     max_insns = tb->icount;
805 
806     trace_translate_block(tb, pc, tb->tc.ptr);
807 
808     /* generate machine code */
809     tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
810     tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
811     tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
812     if (TCG_TARGET_HAS_direct_jump) {
813         tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
814         tcg_ctx->tb_jmp_target_addr = NULL;
815     } else {
816         tcg_ctx->tb_jmp_insn_offset = NULL;
817         tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
818     }
819 
820 #ifdef CONFIG_PROFILER
821     qatomic_set(&prof->tb_count, prof->tb_count + 1);
822     qatomic_set(&prof->interm_time,
823                 prof->interm_time + profile_getclock() - ti);
824     ti = profile_getclock();
825 #endif
826 
827     gen_code_size = tcg_gen_code(tcg_ctx, tb, pc);
828     if (unlikely(gen_code_size < 0)) {
829  error_return:
830         switch (gen_code_size) {
831         case -1:
832             /*
833              * Overflow of code_gen_buffer, or the current slice of it.
834              *
835              * TODO: We don't need to re-do gen_intermediate_code, nor
836              * should we re-do the tcg optimization currently hidden
837              * inside tcg_gen_code.  All that should be required is to
838              * flush the TBs, allocate a new TB, re-initialize it per
839              * above, and re-do the actual code generation.
840              */
841             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
842                           "Restarting code generation for "
843                           "code_gen_buffer overflow\n");
844             goto buffer_overflow;
845 
846         case -2:
847             /*
848              * The code generated for the TranslationBlock is too large.
849              * The maximum size allowed by the unwind info is 64k.
850              * There may be stricter constraints from relocations
851              * in the tcg backend.
852              *
853              * Try again with half as many insns as we attempted this time.
854              * If a single insn overflows, there's a bug somewhere...
855              */
856             assert(max_insns > 1);
857             max_insns /= 2;
858             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
859                           "Restarting code generation with "
860                           "smaller translation block (max %d insns)\n",
861                           max_insns);
862             goto tb_overflow;
863 
864         default:
865             g_assert_not_reached();
866         }
867     }
868     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
869     if (unlikely(search_size < 0)) {
870         goto buffer_overflow;
871     }
872     tb->tc.size = gen_code_size;
873 
874 #ifdef CONFIG_PROFILER
875     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
876     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
877     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
878     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
879 #endif
880 
881 #ifdef DEBUG_DISAS
882     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
883         qemu_log_in_addr_range(pc)) {
884         FILE *logfile = qemu_log_trylock();
885         if (logfile) {
886             int code_size, data_size;
887             const tcg_target_ulong *rx_data_gen_ptr;
888             size_t chunk_start;
889             int insn = 0;
890 
891             if (tcg_ctx->data_gen_ptr) {
892                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
893                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
894                 data_size = gen_code_size - code_size;
895             } else {
896                 rx_data_gen_ptr = 0;
897                 code_size = gen_code_size;
898                 data_size = 0;
899             }
900 
901             /* Dump header and the first instruction */
902             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
903             fprintf(logfile,
904                     "  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
905                     tcg_ctx->gen_insn_data[insn][0]);
906             chunk_start = tcg_ctx->gen_insn_end_off[insn];
907             disas(logfile, tb->tc.ptr, chunk_start);
908 
909             /*
910              * Dump each instruction chunk, wrapping up empty chunks into
911              * the next instruction. The whole array is offset so the
912              * first entry is the beginning of the 2nd instruction.
913              */
914             while (insn < tb->icount) {
915                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
916                 if (chunk_end > chunk_start) {
917                     fprintf(logfile, "  -- guest addr 0x" TARGET_FMT_lx "\n",
918                             tcg_ctx->gen_insn_data[insn][0]);
919                     disas(logfile, tb->tc.ptr + chunk_start,
920                           chunk_end - chunk_start);
921                     chunk_start = chunk_end;
922                 }
923                 insn++;
924             }
925 
926             if (chunk_start < code_size) {
927                 fprintf(logfile, "  -- tb slow paths + alignment\n");
928                 disas(logfile, tb->tc.ptr + chunk_start,
929                       code_size - chunk_start);
930             }
931 
932             /* Finally dump any data we may have after the block */
933             if (data_size) {
934                 int i;
935                 fprintf(logfile, "  data: [size=%d]\n", data_size);
936                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
937                     if (sizeof(tcg_target_ulong) == 8) {
938                         fprintf(logfile,
939                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
940                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
941                     } else if (sizeof(tcg_target_ulong) == 4) {
942                         fprintf(logfile,
943                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
944                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
945                     } else {
946                         qemu_build_not_reached();
947                     }
948                 }
949             }
950             fprintf(logfile, "\n");
951             qemu_log_unlock(logfile);
952         }
953     }
954 #endif
955 
956     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
957         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
958                  CODE_GEN_ALIGN));
959 
960     /* init jump list */
961     qemu_spin_init(&tb->jmp_lock);
962     tb->jmp_list_head = (uintptr_t)NULL;
963     tb->jmp_list_next[0] = (uintptr_t)NULL;
964     tb->jmp_list_next[1] = (uintptr_t)NULL;
965     tb->jmp_dest[0] = (uintptr_t)NULL;
966     tb->jmp_dest[1] = (uintptr_t)NULL;
967 
968     /* init original jump addresses which have been set during tcg_gen_code() */
969     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
970         tb_reset_jump(tb, 0);
971     }
972     if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
973         tb_reset_jump(tb, 1);
974     }
975 
976     /*
977      * If the TB is not associated with a physical RAM page then it must be
978      * a temporary one-insn TB, and we have nothing left to do. Return early
979      * before attempting to link to other TBs or add to the lookup table.
980      */
981     if (tb_page_addr0(tb) == -1) {
982         return tb;
983     }
984 
985     /*
986      * Insert TB into the corresponding region tree before publishing it
987      * through QHT. Otherwise rewinding happened in the TB might fail to
988      * lookup itself using host PC.
989      */
990     tcg_tb_insert(tb);
991 
992     /*
993      * No explicit memory barrier is required -- tb_link_page() makes the
994      * TB visible in a consistent state.
995      */
996     existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
997     /* if the TB already exists, discard what we just translated */
998     if (unlikely(existing_tb != tb)) {
999         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
1000 
1001         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
1002         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
1003         tcg_tb_remove(tb);
1004         return existing_tb;
1005     }
1006     return tb;
1007 }
1008 
1009 /* user-mode: call with mmap_lock held */
1010 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
1011 {
1012     TranslationBlock *tb;
1013 
1014     assert_memory_lock();
1015 
1016     tb = tcg_tb_lookup(retaddr);
1017     if (tb) {
1018         /* We can use retranslation to find the PC.  */
1019         cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1020         tb_phys_invalidate(tb, -1);
1021     } else {
1022         /* The exception probably happened in a helper.  The CPU state should
1023            have been saved before calling it. Fetch the PC from there.  */
1024         CPUArchState *env = cpu->env_ptr;
1025         target_ulong pc, cs_base;
1026         tb_page_addr_t addr;
1027         uint32_t flags;
1028 
1029         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
1030         addr = get_page_addr_code(env, pc);
1031         if (addr != -1) {
1032             tb_invalidate_phys_range(addr, addr + 1);
1033         }
1034     }
1035 }
1036 
1037 #ifndef CONFIG_USER_ONLY
1038 /*
1039  * In deterministic execution mode, instructions doing device I/Os
1040  * must be at the end of the TB.
1041  *
1042  * Called by softmmu_template.h, with iothread mutex not held.
1043  */
1044 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
1045 {
1046     TranslationBlock *tb;
1047     CPUClass *cc;
1048     uint32_t n;
1049 
1050     tb = tcg_tb_lookup(retaddr);
1051     if (!tb) {
1052         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
1053                   (void *)retaddr);
1054     }
1055     cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1056 
1057     /*
1058      * Some guests must re-execute the branch when re-executing a delay
1059      * slot instruction.  When this is the case, adjust icount and N
1060      * to account for the re-execution of the branch.
1061      */
1062     n = 1;
1063     cc = CPU_GET_CLASS(cpu);
1064     if (cc->tcg_ops->io_recompile_replay_branch &&
1065         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
1066         cpu_neg(cpu)->icount_decr.u16.low++;
1067         n = 2;
1068     }
1069 
1070     /*
1071      * Exit the loop and potentially generate a new TB executing the
1072      * just the I/O insns. We also limit instrumentation to memory
1073      * operations only (which execute after completion) so we don't
1074      * double instrument the instruction.
1075      */
1076     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
1077 
1078     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
1079         target_ulong pc = log_pc(cpu, tb);
1080         if (qemu_log_in_addr_range(pc)) {
1081             qemu_log("cpu_io_recompile: rewound execution of TB to "
1082                      TARGET_FMT_lx "\n", pc);
1083         }
1084     }
1085 
1086     cpu_loop_exit_noexc(cpu);
1087 }
1088 
1089 static void print_qht_statistics(struct qht_stats hst, GString *buf)
1090 {
1091     uint32_t hgram_opts;
1092     size_t hgram_bins;
1093     char *hgram;
1094 
1095     if (!hst.head_buckets) {
1096         return;
1097     }
1098     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
1099                            "(%0.2f%% head buckets used)\n",
1100                            hst.used_head_buckets, hst.head_buckets,
1101                            (double)hst.used_head_buckets /
1102                            hst.head_buckets * 100);
1103 
1104     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
1105     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
1106     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
1107         hgram_opts |= QDIST_PR_NODECIMAL;
1108     }
1109     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
1110     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
1111                            "Histogram: %s\n",
1112                            qdist_avg(&hst.occupancy) * 100, hgram);
1113     g_free(hgram);
1114 
1115     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
1116     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
1117     if (hgram_bins > 10) {
1118         hgram_bins = 10;
1119     } else {
1120         hgram_bins = 0;
1121         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
1122     }
1123     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
1124     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
1125                            "Histogram: %s\n",
1126                            qdist_avg(&hst.chain), hgram);
1127     g_free(hgram);
1128 }
1129 
1130 struct tb_tree_stats {
1131     size_t nb_tbs;
1132     size_t host_size;
1133     size_t target_size;
1134     size_t max_target_size;
1135     size_t direct_jmp_count;
1136     size_t direct_jmp2_count;
1137     size_t cross_page;
1138 };
1139 
1140 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
1141 {
1142     const TranslationBlock *tb = value;
1143     struct tb_tree_stats *tst = data;
1144 
1145     tst->nb_tbs++;
1146     tst->host_size += tb->tc.size;
1147     tst->target_size += tb->size;
1148     if (tb->size > tst->max_target_size) {
1149         tst->max_target_size = tb->size;
1150     }
1151     if (tb_page_addr1(tb) != -1) {
1152         tst->cross_page++;
1153     }
1154     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1155         tst->direct_jmp_count++;
1156         if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1157             tst->direct_jmp2_count++;
1158         }
1159     }
1160     return false;
1161 }
1162 
1163 void dump_exec_info(GString *buf)
1164 {
1165     struct tb_tree_stats tst = {};
1166     struct qht_stats hst;
1167     size_t nb_tbs, flush_full, flush_part, flush_elide;
1168 
1169     tcg_tb_foreach(tb_tree_stats_iter, &tst);
1170     nb_tbs = tst.nb_tbs;
1171     /* XXX: avoid using doubles ? */
1172     g_string_append_printf(buf, "Translation buffer state:\n");
1173     /*
1174      * Report total code size including the padding and TB structs;
1175      * otherwise users might think "-accel tcg,tb-size" is not honoured.
1176      * For avg host size we use the precise numbers from tb_tree_stats though.
1177      */
1178     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
1179                            tcg_code_size(), tcg_code_capacity());
1180     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
1181     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
1182                            nb_tbs ? tst.target_size / nb_tbs : 0,
1183                            tst.max_target_size);
1184     g_string_append_printf(buf, "TB avg host size    %zu bytes "
1185                            "(expansion ratio: %0.1f)\n",
1186                            nb_tbs ? tst.host_size / nb_tbs : 0,
1187                            tst.target_size ?
1188                            (double)tst.host_size / tst.target_size : 0);
1189     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
1190                            tst.cross_page,
1191                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
1192     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
1193                            "(2 jumps=%zu %zu%%)\n",
1194                            tst.direct_jmp_count,
1195                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
1196                            tst.direct_jmp2_count,
1197                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
1198 
1199     qht_statistics_init(&tb_ctx.htable, &hst);
1200     print_qht_statistics(hst, buf);
1201     qht_statistics_destroy(&hst);
1202 
1203     g_string_append_printf(buf, "\nStatistics:\n");
1204     g_string_append_printf(buf, "TB flush count      %u\n",
1205                            qatomic_read(&tb_ctx.tb_flush_count));
1206     g_string_append_printf(buf, "TB invalidate count %u\n",
1207                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
1208 
1209     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
1210     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
1211     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
1212     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
1213     tcg_dump_info(buf);
1214 }
1215 
1216 #else /* CONFIG_USER_ONLY */
1217 
1218 void cpu_interrupt(CPUState *cpu, int mask)
1219 {
1220     g_assert(qemu_mutex_iothread_locked());
1221     cpu->interrupt_request |= mask;
1222     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
1223 }
1224 
1225 /*
1226  * Walks guest process memory "regions" one by one
1227  * and calls callback function 'fn' for each region.
1228  */
1229 struct walk_memory_regions_data {
1230     walk_memory_regions_fn fn;
1231     void *priv;
1232     target_ulong start;
1233     int prot;
1234 };
1235 
1236 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1237                                    target_ulong end, int new_prot)
1238 {
1239     if (data->start != -1u) {
1240         int rc = data->fn(data->priv, data->start, end, data->prot);
1241         if (rc != 0) {
1242             return rc;
1243         }
1244     }
1245 
1246     data->start = (new_prot ? end : -1u);
1247     data->prot = new_prot;
1248 
1249     return 0;
1250 }
1251 
1252 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1253                                  target_ulong base, int level, void **lp)
1254 {
1255     target_ulong pa;
1256     int i, rc;
1257 
1258     if (*lp == NULL) {
1259         return walk_memory_regions_end(data, base, 0);
1260     }
1261 
1262     if (level == 0) {
1263         PageDesc *pd = *lp;
1264 
1265         for (i = 0; i < V_L2_SIZE; ++i) {
1266             int prot = pd[i].flags;
1267 
1268             pa = base | (i << TARGET_PAGE_BITS);
1269             if (prot != data->prot) {
1270                 rc = walk_memory_regions_end(data, pa, prot);
1271                 if (rc != 0) {
1272                     return rc;
1273                 }
1274             }
1275         }
1276     } else {
1277         void **pp = *lp;
1278 
1279         for (i = 0; i < V_L2_SIZE; ++i) {
1280             pa = base | ((target_ulong)i <<
1281                 (TARGET_PAGE_BITS + V_L2_BITS * level));
1282             rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1283             if (rc != 0) {
1284                 return rc;
1285             }
1286         }
1287     }
1288 
1289     return 0;
1290 }
1291 
1292 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1293 {
1294     struct walk_memory_regions_data data;
1295     uintptr_t i, l1_sz = v_l1_size;
1296 
1297     data.fn = fn;
1298     data.priv = priv;
1299     data.start = -1u;
1300     data.prot = 0;
1301 
1302     for (i = 0; i < l1_sz; i++) {
1303         target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
1304         int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
1305         if (rc != 0) {
1306             return rc;
1307         }
1308     }
1309 
1310     return walk_memory_regions_end(&data, 0, 0);
1311 }
1312 
1313 static int dump_region(void *priv, target_ulong start,
1314     target_ulong end, unsigned long prot)
1315 {
1316     FILE *f = (FILE *)priv;
1317 
1318     (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
1319         " "TARGET_FMT_lx" %c%c%c\n",
1320         start, end, end - start,
1321         ((prot & PAGE_READ) ? 'r' : '-'),
1322         ((prot & PAGE_WRITE) ? 'w' : '-'),
1323         ((prot & PAGE_EXEC) ? 'x' : '-'));
1324 
1325     return 0;
1326 }
1327 
1328 /* dump memory mappings */
1329 void page_dump(FILE *f)
1330 {
1331     const int length = sizeof(target_ulong) * 2;
1332     (void) fprintf(f, "%-*s %-*s %-*s %s\n",
1333             length, "start", length, "end", length, "size", "prot");
1334     walk_memory_regions(f, dump_region);
1335 }
1336 
1337 int page_get_flags(target_ulong address)
1338 {
1339     PageDesc *p;
1340 
1341     p = page_find(address >> TARGET_PAGE_BITS);
1342     if (!p) {
1343         return 0;
1344     }
1345     return p->flags;
1346 }
1347 
1348 /*
1349  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
1350  * By default, they are not kept.
1351  */
1352 #ifndef PAGE_TARGET_STICKY
1353 #define PAGE_TARGET_STICKY  0
1354 #endif
1355 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
1356 
1357 /* Modify the flags of a page and invalidate the code if necessary.
1358    The flag PAGE_WRITE_ORG is positioned automatically depending
1359    on PAGE_WRITE.  The mmap_lock should already be held.  */
1360 void page_set_flags(target_ulong start, target_ulong end, int flags)
1361 {
1362     target_ulong addr, len;
1363     bool reset, inval_tb = false;
1364 
1365     /* This function should never be called with addresses outside the
1366        guest address space.  If this assert fires, it probably indicates
1367        a missing call to h2g_valid.  */
1368     assert(end - 1 <= GUEST_ADDR_MAX);
1369     assert(start < end);
1370     /* Only set PAGE_ANON with new mappings. */
1371     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
1372     assert_memory_lock();
1373 
1374     start = start & TARGET_PAGE_MASK;
1375     end = TARGET_PAGE_ALIGN(end);
1376 
1377     if (flags & PAGE_WRITE) {
1378         flags |= PAGE_WRITE_ORG;
1379     }
1380     reset = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
1381     if (reset) {
1382         page_reset_target_data(start, end);
1383     }
1384     flags &= ~PAGE_RESET;
1385 
1386     for (addr = start, len = end - start;
1387          len != 0;
1388          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
1389         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true);
1390 
1391         /*
1392          * If the page was executable, but is reset, or is no longer
1393          * executable, or has become writable, then invalidate any code.
1394          */
1395         if ((p->flags & PAGE_EXEC)
1396             && (reset ||
1397                 !(flags & PAGE_EXEC) ||
1398                 (flags & ~p->flags & PAGE_WRITE))) {
1399             inval_tb = true;
1400         }
1401         /* Using mprotect on a page does not change sticky bits. */
1402         p->flags = (reset ? 0 : p->flags & PAGE_STICKY) | flags;
1403     }
1404 
1405     if (inval_tb) {
1406         tb_invalidate_phys_range(start, end);
1407     }
1408 }
1409 
1410 int page_check_range(target_ulong start, target_ulong len, int flags)
1411 {
1412     PageDesc *p;
1413     target_ulong end;
1414     target_ulong addr;
1415 
1416     /* This function should never be called with addresses outside the
1417        guest address space.  If this assert fires, it probably indicates
1418        a missing call to h2g_valid.  */
1419     if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
1420         assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
1421     }
1422 
1423     if (len == 0) {
1424         return 0;
1425     }
1426     if (start + len - 1 < start) {
1427         /* We've wrapped around.  */
1428         return -1;
1429     }
1430 
1431     /* must do before we loose bits in the next step */
1432     end = TARGET_PAGE_ALIGN(start + len);
1433     start = start & TARGET_PAGE_MASK;
1434 
1435     for (addr = start, len = end - start;
1436          len != 0;
1437          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
1438         p = page_find(addr >> TARGET_PAGE_BITS);
1439         if (!p) {
1440             return -1;
1441         }
1442         if (!(p->flags & PAGE_VALID)) {
1443             return -1;
1444         }
1445 
1446         if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
1447             return -1;
1448         }
1449         if (flags & PAGE_WRITE) {
1450             if (!(p->flags & PAGE_WRITE_ORG)) {
1451                 return -1;
1452             }
1453             /* unprotect the page if it was put read-only because it
1454                contains translated code */
1455             if (!(p->flags & PAGE_WRITE)) {
1456                 if (!page_unprotect(addr, 0)) {
1457                     return -1;
1458                 }
1459             }
1460         }
1461     }
1462     return 0;
1463 }
1464 
1465 void page_protect(tb_page_addr_t page_addr)
1466 {
1467     target_ulong addr;
1468     PageDesc *p;
1469     int prot;
1470 
1471     p = page_find(page_addr >> TARGET_PAGE_BITS);
1472     if (p && (p->flags & PAGE_WRITE)) {
1473         /*
1474          * Force the host page as non writable (writes will have a page fault +
1475          * mprotect overhead).
1476          */
1477         page_addr &= qemu_host_page_mask;
1478         prot = 0;
1479         for (addr = page_addr; addr < page_addr + qemu_host_page_size;
1480              addr += TARGET_PAGE_SIZE) {
1481 
1482             p = page_find(addr >> TARGET_PAGE_BITS);
1483             if (!p) {
1484                 continue;
1485             }
1486             prot |= p->flags;
1487             p->flags &= ~PAGE_WRITE;
1488         }
1489         mprotect(g2h_untagged(page_addr), qemu_host_page_size,
1490                  (prot & PAGE_BITS) & ~PAGE_WRITE);
1491     }
1492 }
1493 
1494 /* called from signal handler: invalidate the code and unprotect the
1495  * page. Return 0 if the fault was not handled, 1 if it was handled,
1496  * and 2 if it was handled but the caller must cause the TB to be
1497  * immediately exited. (We can only return 2 if the 'pc' argument is
1498  * non-zero.)
1499  */
1500 int page_unprotect(target_ulong address, uintptr_t pc)
1501 {
1502     unsigned int prot;
1503     bool current_tb_invalidated;
1504     PageDesc *p;
1505     target_ulong host_start, host_end, addr;
1506 
1507     /* Technically this isn't safe inside a signal handler.  However we
1508        know this only ever happens in a synchronous SEGV handler, so in
1509        practice it seems to be ok.  */
1510     mmap_lock();
1511 
1512     p = page_find(address >> TARGET_PAGE_BITS);
1513     if (!p) {
1514         mmap_unlock();
1515         return 0;
1516     }
1517 
1518     /* if the page was really writable, then we change its
1519        protection back to writable */
1520     if (p->flags & PAGE_WRITE_ORG) {
1521         current_tb_invalidated = false;
1522         if (p->flags & PAGE_WRITE) {
1523             /* If the page is actually marked WRITE then assume this is because
1524              * this thread raced with another one which got here first and
1525              * set the page to PAGE_WRITE and did the TB invalidate for us.
1526              */
1527 #ifdef TARGET_HAS_PRECISE_SMC
1528             TranslationBlock *current_tb = tcg_tb_lookup(pc);
1529             if (current_tb) {
1530                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
1531             }
1532 #endif
1533         } else {
1534             host_start = address & qemu_host_page_mask;
1535             host_end = host_start + qemu_host_page_size;
1536 
1537             prot = 0;
1538             for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
1539                 p = page_find(addr >> TARGET_PAGE_BITS);
1540                 p->flags |= PAGE_WRITE;
1541                 prot |= p->flags;
1542 
1543                 /* and since the content will be modified, we must invalidate
1544                    the corresponding translated code. */
1545                 current_tb_invalidated |=
1546                     tb_invalidate_phys_page_unwind(addr, pc);
1547             }
1548             mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
1549                      prot & PAGE_BITS);
1550         }
1551         mmap_unlock();
1552         /* If current TB was invalidated return to main loop */
1553         return current_tb_invalidated ? 2 : 1;
1554     }
1555     mmap_unlock();
1556     return 0;
1557 }
1558 #endif /* CONFIG_USER_ONLY */
1559 
1560 /*
1561  * Called by generic code at e.g. cpu reset after cpu creation,
1562  * therefore we must be prepared to allocate the jump cache.
1563  */
1564 void tcg_flush_jmp_cache(CPUState *cpu)
1565 {
1566     CPUJumpCache *jc = cpu->tb_jmp_cache;
1567 
1568     if (likely(jc)) {
1569         for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
1570             qatomic_set(&jc->array[i].tb, NULL);
1571         }
1572     } else {
1573         /* This should happen once during realize, and thus never race. */
1574         jc = g_new0(CPUJumpCache, 1);
1575         jc = qatomic_xchg(&cpu->tb_jmp_cache, jc);
1576         assert(jc == NULL);
1577     }
1578 }
1579 
1580 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
1581 void tcg_flush_softmmu_tlb(CPUState *cs)
1582 {
1583 #ifdef CONFIG_SOFTMMU
1584     tlb_flush(cs);
1585 #endif
1586 }
1587