xref: /openbmc/qemu/accel/tcg/translate-all.c (revision 0b5c91f7)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #ifdef _WIN32
20 #include <windows.h>
21 #endif
22 #include "qemu/osdep.h"
23 
24 
25 #include "qemu-common.h"
26 #define NO_CPU_IO_DEFS
27 #include "cpu.h"
28 #include "trace.h"
29 #include "disas/disas.h"
30 #include "exec/exec-all.h"
31 #include "tcg.h"
32 #if defined(CONFIG_USER_ONLY)
33 #include "qemu.h"
34 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
35 #include <sys/param.h>
36 #if __FreeBSD_version >= 700104
37 #define HAVE_KINFO_GETVMMAP
38 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
39 #include <sys/proc.h>
40 #include <machine/profile.h>
41 #define _KERNEL
42 #include <sys/user.h>
43 #undef _KERNEL
44 #undef sigqueue
45 #include <libutil.h>
46 #endif
47 #endif
48 #else
49 #include "exec/address-spaces.h"
50 #endif
51 
52 #include "exec/cputlb.h"
53 #include "exec/tb-hash.h"
54 #include "translate-all.h"
55 #include "qemu/bitmap.h"
56 #include "qemu/error-report.h"
57 #include "qemu/timer.h"
58 #include "qemu/main-loop.h"
59 #include "exec/log.h"
60 #include "sysemu/cpus.h"
61 
62 /* #define DEBUG_TB_INVALIDATE */
63 /* #define DEBUG_TB_FLUSH */
64 /* make various TB consistency checks */
65 /* #define DEBUG_TB_CHECK */
66 
67 #ifdef DEBUG_TB_INVALIDATE
68 #define DEBUG_TB_INVALIDATE_GATE 1
69 #else
70 #define DEBUG_TB_INVALIDATE_GATE 0
71 #endif
72 
73 #ifdef DEBUG_TB_FLUSH
74 #define DEBUG_TB_FLUSH_GATE 1
75 #else
76 #define DEBUG_TB_FLUSH_GATE 0
77 #endif
78 
79 #if !defined(CONFIG_USER_ONLY)
80 /* TB consistency checks only implemented for usermode emulation.  */
81 #undef DEBUG_TB_CHECK
82 #endif
83 
84 #ifdef DEBUG_TB_CHECK
85 #define DEBUG_TB_CHECK_GATE 1
86 #else
87 #define DEBUG_TB_CHECK_GATE 0
88 #endif
89 
90 /* Access to the various translations structures need to be serialised via locks
91  * for consistency. This is automatic for SoftMMU based system
92  * emulation due to its single threaded nature. In user-mode emulation
93  * access to the memory related structures are protected with the
94  * mmap_lock.
95  */
96 #ifdef CONFIG_SOFTMMU
97 #define assert_memory_lock() tcg_debug_assert(have_tb_lock)
98 #else
99 #define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
100 #endif
101 
102 #define SMC_BITMAP_USE_THRESHOLD 10
103 
104 typedef struct PageDesc {
105     /* list of TBs intersecting this ram page */
106     uintptr_t first_tb;
107 #ifdef CONFIG_SOFTMMU
108     /* in order to optimize self modifying code, we count the number
109        of lookups we do to a given page to use a bitmap */
110     unsigned long *code_bitmap;
111     unsigned int code_write_count;
112 #else
113     unsigned long flags;
114 #endif
115 #ifndef CONFIG_USER_ONLY
116     QemuSpin lock;
117 #endif
118 } PageDesc;
119 
120 /**
121  * struct page_entry - page descriptor entry
122  * @pd:     pointer to the &struct PageDesc of the page this entry represents
123  * @index:  page index of the page
124  * @locked: whether the page is locked
125  *
126  * This struct helps us keep track of the locked state of a page, without
127  * bloating &struct PageDesc.
128  *
129  * A page lock protects accesses to all fields of &struct PageDesc.
130  *
131  * See also: &struct page_collection.
132  */
133 struct page_entry {
134     PageDesc *pd;
135     tb_page_addr_t index;
136     bool locked;
137 };
138 
139 /**
140  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
141  * @tree:   Binary search tree (BST) of the pages, with key == page index
142  * @max:    Pointer to the page in @tree with the highest page index
143  *
144  * To avoid deadlock we lock pages in ascending order of page index.
145  * When operating on a set of pages, we need to keep track of them so that
146  * we can lock them in order and also unlock them later. For this we collect
147  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
148  * @tree implementation we use does not provide an O(1) operation to obtain the
149  * highest-ranked element, we use @max to keep track of the inserted page
150  * with the highest index. This is valuable because if a page is not in
151  * the tree and its index is higher than @max's, then we can lock it
152  * without breaking the locking order rule.
153  *
154  * Note on naming: 'struct page_set' would be shorter, but we already have a few
155  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
156  *
157  * See also: page_collection_lock().
158  */
159 struct page_collection {
160     GTree *tree;
161     struct page_entry *max;
162 };
163 
164 /* list iterators for lists of tagged pointers in TranslationBlock */
165 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
166     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
167          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
168              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
169 
170 #define PAGE_FOR_EACH_TB(pagedesc, tb, n)                       \
171     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
172 
173 /* In system mode we want L1_MAP to be based on ram offsets,
174    while in user mode we want it to be based on virtual addresses.  */
175 #if !defined(CONFIG_USER_ONLY)
176 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
177 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
178 #else
179 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
180 #endif
181 #else
182 # define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
183 #endif
184 
185 /* Size of the L2 (and L3, etc) page tables.  */
186 #define V_L2_BITS 10
187 #define V_L2_SIZE (1 << V_L2_BITS)
188 
189 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
190 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
191                   sizeof(((TranslationBlock *)0)->trace_vcpu_dstate)
192                   * BITS_PER_BYTE);
193 
194 /*
195  * L1 Mapping properties
196  */
197 static int v_l1_size;
198 static int v_l1_shift;
199 static int v_l2_levels;
200 
201 /* The bottom level has pointers to PageDesc, and is indexed by
202  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
203  */
204 #define V_L1_MIN_BITS 4
205 #define V_L1_MAX_BITS (V_L2_BITS + 3)
206 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
207 
208 static void *l1_map[V_L1_MAX_SIZE];
209 
210 /* code generation context */
211 TCGContext tcg_init_ctx;
212 __thread TCGContext *tcg_ctx;
213 TBContext tb_ctx;
214 bool parallel_cpus;
215 
216 /* translation block context */
217 static __thread int have_tb_lock;
218 
219 static void page_table_config_init(void)
220 {
221     uint32_t v_l1_bits;
222 
223     assert(TARGET_PAGE_BITS);
224     /* The bits remaining after N lower levels of page tables.  */
225     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
226     if (v_l1_bits < V_L1_MIN_BITS) {
227         v_l1_bits += V_L2_BITS;
228     }
229 
230     v_l1_size = 1 << v_l1_bits;
231     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
232     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
233 
234     assert(v_l1_bits <= V_L1_MAX_BITS);
235     assert(v_l1_shift % V_L2_BITS == 0);
236     assert(v_l2_levels >= 0);
237 }
238 
239 #define assert_tb_locked() tcg_debug_assert(have_tb_lock)
240 #define assert_tb_unlocked() tcg_debug_assert(!have_tb_lock)
241 
242 void tb_lock(void)
243 {
244     assert_tb_unlocked();
245     qemu_mutex_lock(&tb_ctx.tb_lock);
246     have_tb_lock++;
247 }
248 
249 void tb_unlock(void)
250 {
251     assert_tb_locked();
252     have_tb_lock--;
253     qemu_mutex_unlock(&tb_ctx.tb_lock);
254 }
255 
256 void tb_lock_reset(void)
257 {
258     if (have_tb_lock) {
259         qemu_mutex_unlock(&tb_ctx.tb_lock);
260         have_tb_lock = 0;
261     }
262 }
263 
264 void cpu_gen_init(void)
265 {
266     tcg_context_init(&tcg_init_ctx);
267 }
268 
269 /* Encode VAL as a signed leb128 sequence at P.
270    Return P incremented past the encoded value.  */
271 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
272 {
273     int more, byte;
274 
275     do {
276         byte = val & 0x7f;
277         val >>= 7;
278         more = !((val == 0 && (byte & 0x40) == 0)
279                  || (val == -1 && (byte & 0x40) != 0));
280         if (more) {
281             byte |= 0x80;
282         }
283         *p++ = byte;
284     } while (more);
285 
286     return p;
287 }
288 
289 /* Decode a signed leb128 sequence at *PP; increment *PP past the
290    decoded value.  Return the decoded value.  */
291 static target_long decode_sleb128(uint8_t **pp)
292 {
293     uint8_t *p = *pp;
294     target_long val = 0;
295     int byte, shift = 0;
296 
297     do {
298         byte = *p++;
299         val |= (target_ulong)(byte & 0x7f) << shift;
300         shift += 7;
301     } while (byte & 0x80);
302     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
303         val |= -(target_ulong)1 << shift;
304     }
305 
306     *pp = p;
307     return val;
308 }
309 
310 /* Encode the data collected about the instructions while compiling TB.
311    Place the data at BLOCK, and return the number of bytes consumed.
312 
313    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
314    which come from the target's insn_start data, followed by a uintptr_t
315    which comes from the host pc of the end of the code implementing the insn.
316 
317    Each line of the table is encoded as sleb128 deltas from the previous
318    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
319    That is, the first column is seeded with the guest pc, the last column
320    with the host pc, and the middle columns with zeros.  */
321 
322 static int encode_search(TranslationBlock *tb, uint8_t *block)
323 {
324     uint8_t *highwater = tcg_ctx->code_gen_highwater;
325     uint8_t *p = block;
326     int i, j, n;
327 
328     for (i = 0, n = tb->icount; i < n; ++i) {
329         target_ulong prev;
330 
331         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
332             if (i == 0) {
333                 prev = (j == 0 ? tb->pc : 0);
334             } else {
335                 prev = tcg_ctx->gen_insn_data[i - 1][j];
336             }
337             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
338         }
339         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
340         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
341 
342         /* Test for (pending) buffer overflow.  The assumption is that any
343            one row beginning below the high water mark cannot overrun
344            the buffer completely.  Thus we can test for overflow after
345            encoding a row without having to check during encoding.  */
346         if (unlikely(p > highwater)) {
347             return -1;
348         }
349     }
350 
351     return p - block;
352 }
353 
354 /* The cpu state corresponding to 'searched_pc' is restored.
355  * Called with tb_lock held.
356  * When reset_icount is true, current TB will be interrupted and
357  * icount should be recalculated.
358  */
359 static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
360                                      uintptr_t searched_pc, bool reset_icount)
361 {
362     target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
363     uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
364     CPUArchState *env = cpu->env_ptr;
365     uint8_t *p = tb->tc.ptr + tb->tc.size;
366     int i, j, num_insns = tb->icount;
367 #ifdef CONFIG_PROFILER
368     TCGProfile *prof = &tcg_ctx->prof;
369     int64_t ti = profile_getclock();
370 #endif
371 
372     searched_pc -= GETPC_ADJ;
373 
374     if (searched_pc < host_pc) {
375         return -1;
376     }
377 
378     /* Reconstruct the stored insn data while looking for the point at
379        which the end of the insn exceeds the searched_pc.  */
380     for (i = 0; i < num_insns; ++i) {
381         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
382             data[j] += decode_sleb128(&p);
383         }
384         host_pc += decode_sleb128(&p);
385         if (host_pc > searched_pc) {
386             goto found;
387         }
388     }
389     return -1;
390 
391  found:
392     if (reset_icount && (tb->cflags & CF_USE_ICOUNT)) {
393         assert(use_icount);
394         /* Reset the cycle counter to the start of the block
395            and shift if to the number of actually executed instructions */
396         cpu->icount_decr.u16.low += num_insns - i;
397     }
398     restore_state_to_opc(env, tb, data);
399 
400 #ifdef CONFIG_PROFILER
401     atomic_set(&prof->restore_time,
402                 prof->restore_time + profile_getclock() - ti);
403     atomic_set(&prof->restore_count, prof->restore_count + 1);
404 #endif
405     return 0;
406 }
407 
408 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
409 {
410     TranslationBlock *tb;
411     bool r = false;
412     uintptr_t check_offset;
413 
414     /* The host_pc has to be in the region of current code buffer. If
415      * it is not we will not be able to resolve it here. The two cases
416      * where host_pc will not be correct are:
417      *
418      *  - fault during translation (instruction fetch)
419      *  - fault from helper (not using GETPC() macro)
420      *
421      * Either way we need return early to avoid blowing up on a
422      * recursive tb_lock() as we can't resolve it here.
423      *
424      * We are using unsigned arithmetic so if host_pc <
425      * tcg_init_ctx.code_gen_buffer check_offset will wrap to way
426      * above the code_gen_buffer_size
427      */
428     check_offset = host_pc - (uintptr_t) tcg_init_ctx.code_gen_buffer;
429 
430     if (check_offset < tcg_init_ctx.code_gen_buffer_size) {
431         tb_lock();
432         tb = tcg_tb_lookup(host_pc);
433         if (tb) {
434             cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
435             if (tb->cflags & CF_NOCACHE) {
436                 /* one-shot translation, invalidate it immediately */
437                 tb_phys_invalidate(tb, -1);
438                 tcg_tb_remove(tb);
439             }
440             r = true;
441         }
442         tb_unlock();
443     }
444 
445     return r;
446 }
447 
448 static void page_init(void)
449 {
450     page_size_init();
451     page_table_config_init();
452 
453 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
454     {
455 #ifdef HAVE_KINFO_GETVMMAP
456         struct kinfo_vmentry *freep;
457         int i, cnt;
458 
459         freep = kinfo_getvmmap(getpid(), &cnt);
460         if (freep) {
461             mmap_lock();
462             for (i = 0; i < cnt; i++) {
463                 unsigned long startaddr, endaddr;
464 
465                 startaddr = freep[i].kve_start;
466                 endaddr = freep[i].kve_end;
467                 if (h2g_valid(startaddr)) {
468                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
469 
470                     if (h2g_valid(endaddr)) {
471                         endaddr = h2g(endaddr);
472                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
473                     } else {
474 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
475                         endaddr = ~0ul;
476                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
477 #endif
478                     }
479                 }
480             }
481             free(freep);
482             mmap_unlock();
483         }
484 #else
485         FILE *f;
486 
487         last_brk = (unsigned long)sbrk(0);
488 
489         f = fopen("/compat/linux/proc/self/maps", "r");
490         if (f) {
491             mmap_lock();
492 
493             do {
494                 unsigned long startaddr, endaddr;
495                 int n;
496 
497                 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
498 
499                 if (n == 2 && h2g_valid(startaddr)) {
500                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
501 
502                     if (h2g_valid(endaddr)) {
503                         endaddr = h2g(endaddr);
504                     } else {
505                         endaddr = ~0ul;
506                     }
507                     page_set_flags(startaddr, endaddr, PAGE_RESERVED);
508                 }
509             } while (!feof(f));
510 
511             fclose(f);
512             mmap_unlock();
513         }
514 #endif
515     }
516 #endif
517 }
518 
519 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
520 {
521     PageDesc *pd;
522     void **lp;
523     int i;
524 
525     /* Level 1.  Always allocated.  */
526     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
527 
528     /* Level 2..N-1.  */
529     for (i = v_l2_levels; i > 0; i--) {
530         void **p = atomic_rcu_read(lp);
531 
532         if (p == NULL) {
533             void *existing;
534 
535             if (!alloc) {
536                 return NULL;
537             }
538             p = g_new0(void *, V_L2_SIZE);
539             existing = atomic_cmpxchg(lp, NULL, p);
540             if (unlikely(existing)) {
541                 g_free(p);
542                 p = existing;
543             }
544         }
545 
546         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
547     }
548 
549     pd = atomic_rcu_read(lp);
550     if (pd == NULL) {
551         void *existing;
552 
553         if (!alloc) {
554             return NULL;
555         }
556         pd = g_new0(PageDesc, V_L2_SIZE);
557 #ifndef CONFIG_USER_ONLY
558         {
559             int i;
560 
561             for (i = 0; i < V_L2_SIZE; i++) {
562                 qemu_spin_init(&pd[i].lock);
563             }
564         }
565 #endif
566         existing = atomic_cmpxchg(lp, NULL, pd);
567         if (unlikely(existing)) {
568             g_free(pd);
569             pd = existing;
570         }
571     }
572 
573     return pd + (index & (V_L2_SIZE - 1));
574 }
575 
576 static inline PageDesc *page_find(tb_page_addr_t index)
577 {
578     return page_find_alloc(index, 0);
579 }
580 
581 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
582                            PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
583 
584 /* In user-mode page locks aren't used; mmap_lock is enough */
585 #ifdef CONFIG_USER_ONLY
586 static inline void page_lock(PageDesc *pd)
587 { }
588 
589 static inline void page_unlock(PageDesc *pd)
590 { }
591 
592 static inline void page_lock_tb(const TranslationBlock *tb)
593 { }
594 
595 static inline void page_unlock_tb(const TranslationBlock *tb)
596 { }
597 
598 struct page_collection *
599 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
600 {
601     return NULL;
602 }
603 
604 void page_collection_unlock(struct page_collection *set)
605 { }
606 #else /* !CONFIG_USER_ONLY */
607 
608 static inline void page_lock(PageDesc *pd)
609 {
610     qemu_spin_lock(&pd->lock);
611 }
612 
613 static inline void page_unlock(PageDesc *pd)
614 {
615     qemu_spin_unlock(&pd->lock);
616 }
617 
618 /* lock the page(s) of a TB in the correct acquisition order */
619 static inline void page_lock_tb(const TranslationBlock *tb)
620 {
621     page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
622 }
623 
624 static inline void page_unlock_tb(const TranslationBlock *tb)
625 {
626     page_unlock(page_find(tb->page_addr[0] >> TARGET_PAGE_BITS));
627     if (unlikely(tb->page_addr[1] != -1)) {
628         page_unlock(page_find(tb->page_addr[1] >> TARGET_PAGE_BITS));
629     }
630 }
631 
632 static inline struct page_entry *
633 page_entry_new(PageDesc *pd, tb_page_addr_t index)
634 {
635     struct page_entry *pe = g_malloc(sizeof(*pe));
636 
637     pe->index = index;
638     pe->pd = pd;
639     pe->locked = false;
640     return pe;
641 }
642 
643 static void page_entry_destroy(gpointer p)
644 {
645     struct page_entry *pe = p;
646 
647     g_assert(pe->locked);
648     page_unlock(pe->pd);
649     g_free(pe);
650 }
651 
652 /* returns false on success */
653 static bool page_entry_trylock(struct page_entry *pe)
654 {
655     bool busy;
656 
657     busy = qemu_spin_trylock(&pe->pd->lock);
658     if (!busy) {
659         g_assert(!pe->locked);
660         pe->locked = true;
661     }
662     return busy;
663 }
664 
665 static void do_page_entry_lock(struct page_entry *pe)
666 {
667     page_lock(pe->pd);
668     g_assert(!pe->locked);
669     pe->locked = true;
670 }
671 
672 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
673 {
674     struct page_entry *pe = value;
675 
676     do_page_entry_lock(pe);
677     return FALSE;
678 }
679 
680 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
681 {
682     struct page_entry *pe = value;
683 
684     if (pe->locked) {
685         pe->locked = false;
686         page_unlock(pe->pd);
687     }
688     return FALSE;
689 }
690 
691 /*
692  * Trylock a page, and if successful, add the page to a collection.
693  * Returns true ("busy") if the page could not be locked; false otherwise.
694  */
695 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
696 {
697     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
698     struct page_entry *pe;
699     PageDesc *pd;
700 
701     pe = g_tree_lookup(set->tree, &index);
702     if (pe) {
703         return false;
704     }
705 
706     pd = page_find(index);
707     if (pd == NULL) {
708         return false;
709     }
710 
711     pe = page_entry_new(pd, index);
712     g_tree_insert(set->tree, &pe->index, pe);
713 
714     /*
715      * If this is either (1) the first insertion or (2) a page whose index
716      * is higher than any other so far, just lock the page and move on.
717      */
718     if (set->max == NULL || pe->index > set->max->index) {
719         set->max = pe;
720         do_page_entry_lock(pe);
721         return false;
722     }
723     /*
724      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
725      * locks in order.
726      */
727     return page_entry_trylock(pe);
728 }
729 
730 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
731 {
732     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
733     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
734 
735     if (a == b) {
736         return 0;
737     } else if (a < b) {
738         return -1;
739     }
740     return 1;
741 }
742 
743 /*
744  * Lock a range of pages ([@start,@end[) as well as the pages of all
745  * intersecting TBs.
746  * Locking order: acquire locks in ascending order of page index.
747  */
748 struct page_collection *
749 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
750 {
751     struct page_collection *set = g_malloc(sizeof(*set));
752     tb_page_addr_t index;
753     PageDesc *pd;
754 
755     start >>= TARGET_PAGE_BITS;
756     end   >>= TARGET_PAGE_BITS;
757     g_assert(start <= end);
758 
759     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
760                                 page_entry_destroy);
761     set->max = NULL;
762 
763  retry:
764     g_tree_foreach(set->tree, page_entry_lock, NULL);
765 
766     for (index = start; index <= end; index++) {
767         TranslationBlock *tb;
768         int n;
769 
770         pd = page_find(index);
771         if (pd == NULL) {
772             continue;
773         }
774         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
775             g_tree_foreach(set->tree, page_entry_unlock, NULL);
776             goto retry;
777         }
778         PAGE_FOR_EACH_TB(pd, tb, n) {
779             if (page_trylock_add(set, tb->page_addr[0]) ||
780                 (tb->page_addr[1] != -1 &&
781                  page_trylock_add(set, tb->page_addr[1]))) {
782                 /* drop all locks, and reacquire in order */
783                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
784                 goto retry;
785             }
786         }
787     }
788     return set;
789 }
790 
791 void page_collection_unlock(struct page_collection *set)
792 {
793     /* entries are unlocked and freed via page_entry_destroy */
794     g_tree_destroy(set->tree);
795     g_free(set);
796 }
797 
798 #endif /* !CONFIG_USER_ONLY */
799 
800 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
801                            PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
802 {
803     PageDesc *p1, *p2;
804 
805     assert_memory_lock();
806     g_assert(phys1 != -1 && phys1 != phys2);
807     p1 = page_find_alloc(phys1 >> TARGET_PAGE_BITS, alloc);
808     if (ret_p1) {
809         *ret_p1 = p1;
810     }
811     if (likely(phys2 == -1)) {
812         page_lock(p1);
813         return;
814     }
815     p2 = page_find_alloc(phys2 >> TARGET_PAGE_BITS, alloc);
816     if (ret_p2) {
817         *ret_p2 = p2;
818     }
819     if (phys1 < phys2) {
820         page_lock(p1);
821         page_lock(p2);
822     } else {
823         page_lock(p2);
824         page_lock(p1);
825     }
826 }
827 
828 #if defined(CONFIG_USER_ONLY)
829 /* Currently it is not recommended to allocate big chunks of data in
830    user mode. It will change when a dedicated libc will be used.  */
831 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
832    region in which the guest needs to run.  Revisit this.  */
833 #define USE_STATIC_CODE_GEN_BUFFER
834 #endif
835 
836 /* Minimum size of the code gen buffer.  This number is randomly chosen,
837    but not so small that we can't have a fair number of TB's live.  */
838 #define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
839 
840 /* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
841    indicated, this is constrained by the range of direct branches on the
842    host cpu, as used by the TCG implementation of goto_tb.  */
843 #if defined(__x86_64__)
844 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
845 #elif defined(__sparc__)
846 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
847 #elif defined(__powerpc64__)
848 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
849 #elif defined(__powerpc__)
850 # define MAX_CODE_GEN_BUFFER_SIZE  (32u * 1024 * 1024)
851 #elif defined(__aarch64__)
852 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
853 #elif defined(__s390x__)
854   /* We have a +- 4GB range on the branches; leave some slop.  */
855 # define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
856 #elif defined(__mips__)
857   /* We have a 256MB branch region, but leave room to make sure the
858      main executable is also within that region.  */
859 # define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
860 #else
861 # define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
862 #endif
863 
864 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
865 
866 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
867   (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
868    ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
869 
870 static inline size_t size_code_gen_buffer(size_t tb_size)
871 {
872     /* Size the buffer.  */
873     if (tb_size == 0) {
874 #ifdef USE_STATIC_CODE_GEN_BUFFER
875         tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
876 #else
877         /* ??? Needs adjustments.  */
878         /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
879            static buffer, we could size this on RESERVED_VA, on the text
880            segment size of the executable, or continue to use the default.  */
881         tb_size = (unsigned long)(ram_size / 4);
882 #endif
883     }
884     if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
885         tb_size = MIN_CODE_GEN_BUFFER_SIZE;
886     }
887     if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
888         tb_size = MAX_CODE_GEN_BUFFER_SIZE;
889     }
890     return tb_size;
891 }
892 
893 #ifdef __mips__
894 /* In order to use J and JAL within the code_gen_buffer, we require
895    that the buffer not cross a 256MB boundary.  */
896 static inline bool cross_256mb(void *addr, size_t size)
897 {
898     return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
899 }
900 
901 /* We weren't able to allocate a buffer without crossing that boundary,
902    so make do with the larger portion of the buffer that doesn't cross.
903    Returns the new base of the buffer, and adjusts code_gen_buffer_size.  */
904 static inline void *split_cross_256mb(void *buf1, size_t size1)
905 {
906     void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
907     size_t size2 = buf1 + size1 - buf2;
908 
909     size1 = buf2 - buf1;
910     if (size1 < size2) {
911         size1 = size2;
912         buf1 = buf2;
913     }
914 
915     tcg_ctx->code_gen_buffer_size = size1;
916     return buf1;
917 }
918 #endif
919 
920 #ifdef USE_STATIC_CODE_GEN_BUFFER
921 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
922     __attribute__((aligned(CODE_GEN_ALIGN)));
923 
924 static inline void *alloc_code_gen_buffer(void)
925 {
926     void *buf = static_code_gen_buffer;
927     void *end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
928     size_t size;
929 
930     /* page-align the beginning and end of the buffer */
931     buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
932     end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
933 
934     size = end - buf;
935 
936     /* Honor a command-line option limiting the size of the buffer.  */
937     if (size > tcg_ctx->code_gen_buffer_size) {
938         size = QEMU_ALIGN_DOWN(tcg_ctx->code_gen_buffer_size,
939                                qemu_real_host_page_size);
940     }
941     tcg_ctx->code_gen_buffer_size = size;
942 
943 #ifdef __mips__
944     if (cross_256mb(buf, size)) {
945         buf = split_cross_256mb(buf, size);
946         size = tcg_ctx->code_gen_buffer_size;
947     }
948 #endif
949 
950     if (qemu_mprotect_rwx(buf, size)) {
951         abort();
952     }
953     qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
954 
955     return buf;
956 }
957 #elif defined(_WIN32)
958 static inline void *alloc_code_gen_buffer(void)
959 {
960     size_t size = tcg_ctx->code_gen_buffer_size;
961     return VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
962                         PAGE_EXECUTE_READWRITE);
963 }
964 #else
965 static inline void *alloc_code_gen_buffer(void)
966 {
967     int prot = PROT_WRITE | PROT_READ | PROT_EXEC;
968     int flags = MAP_PRIVATE | MAP_ANONYMOUS;
969     uintptr_t start = 0;
970     size_t size = tcg_ctx->code_gen_buffer_size;
971     void *buf;
972 
973     /* Constrain the position of the buffer based on the host cpu.
974        Note that these addresses are chosen in concert with the
975        addresses assigned in the relevant linker script file.  */
976 # if defined(__PIE__) || defined(__PIC__)
977     /* Don't bother setting a preferred location if we're building
978        a position-independent executable.  We're more likely to get
979        an address near the main executable if we let the kernel
980        choose the address.  */
981 # elif defined(__x86_64__) && defined(MAP_32BIT)
982     /* Force the memory down into low memory with the executable.
983        Leave the choice of exact location with the kernel.  */
984     flags |= MAP_32BIT;
985     /* Cannot expect to map more than 800MB in low memory.  */
986     if (size > 800u * 1024 * 1024) {
987         tcg_ctx->code_gen_buffer_size = size = 800u * 1024 * 1024;
988     }
989 # elif defined(__sparc__)
990     start = 0x40000000ul;
991 # elif defined(__s390x__)
992     start = 0x90000000ul;
993 # elif defined(__mips__)
994 #  if _MIPS_SIM == _ABI64
995     start = 0x128000000ul;
996 #  else
997     start = 0x08000000ul;
998 #  endif
999 # endif
1000 
1001     buf = mmap((void *)start, size, prot, flags, -1, 0);
1002     if (buf == MAP_FAILED) {
1003         return NULL;
1004     }
1005 
1006 #ifdef __mips__
1007     if (cross_256mb(buf, size)) {
1008         /* Try again, with the original still mapped, to avoid re-acquiring
1009            that 256mb crossing.  This time don't specify an address.  */
1010         size_t size2;
1011         void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
1012         switch ((int)(buf2 != MAP_FAILED)) {
1013         case 1:
1014             if (!cross_256mb(buf2, size)) {
1015                 /* Success!  Use the new buffer.  */
1016                 munmap(buf, size);
1017                 break;
1018             }
1019             /* Failure.  Work with what we had.  */
1020             munmap(buf2, size);
1021             /* fallthru */
1022         default:
1023             /* Split the original buffer.  Free the smaller half.  */
1024             buf2 = split_cross_256mb(buf, size);
1025             size2 = tcg_ctx->code_gen_buffer_size;
1026             if (buf == buf2) {
1027                 munmap(buf + size2, size - size2);
1028             } else {
1029                 munmap(buf, size - size2);
1030             }
1031             size = size2;
1032             break;
1033         }
1034         buf = buf2;
1035     }
1036 #endif
1037 
1038     /* Request large pages for the buffer.  */
1039     qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
1040 
1041     return buf;
1042 }
1043 #endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
1044 
1045 static inline void code_gen_alloc(size_t tb_size)
1046 {
1047     tcg_ctx->code_gen_buffer_size = size_code_gen_buffer(tb_size);
1048     tcg_ctx->code_gen_buffer = alloc_code_gen_buffer();
1049     if (tcg_ctx->code_gen_buffer == NULL) {
1050         fprintf(stderr, "Could not allocate dynamic translator buffer\n");
1051         exit(1);
1052     }
1053     qemu_mutex_init(&tb_ctx.tb_lock);
1054 }
1055 
1056 static bool tb_cmp(const void *ap, const void *bp)
1057 {
1058     const TranslationBlock *a = ap;
1059     const TranslationBlock *b = bp;
1060 
1061     return a->pc == b->pc &&
1062         a->cs_base == b->cs_base &&
1063         a->flags == b->flags &&
1064         (tb_cflags(a) & CF_HASH_MASK) == (tb_cflags(b) & CF_HASH_MASK) &&
1065         a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
1066         a->page_addr[0] == b->page_addr[0] &&
1067         a->page_addr[1] == b->page_addr[1];
1068 }
1069 
1070 static void tb_htable_init(void)
1071 {
1072     unsigned int mode = QHT_MODE_AUTO_RESIZE;
1073 
1074     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
1075 }
1076 
1077 /* Must be called before using the QEMU cpus. 'tb_size' is the size
1078    (in bytes) allocated to the translation buffer. Zero means default
1079    size. */
1080 void tcg_exec_init(unsigned long tb_size)
1081 {
1082     tcg_allowed = true;
1083     cpu_gen_init();
1084     page_init();
1085     tb_htable_init();
1086     code_gen_alloc(tb_size);
1087 #if defined(CONFIG_SOFTMMU)
1088     /* There's no guest base to take into account, so go ahead and
1089        initialize the prologue now.  */
1090     tcg_prologue_init(tcg_ctx);
1091 #endif
1092 }
1093 
1094 /*
1095  * Allocate a new translation block. Flush the translation buffer if
1096  * too many translation blocks or too much generated code.
1097  *
1098  * Called with tb_lock held.
1099  */
1100 static TranslationBlock *tb_alloc(target_ulong pc)
1101 {
1102     TranslationBlock *tb;
1103 
1104     assert_tb_locked();
1105 
1106     tb = tcg_tb_alloc(tcg_ctx);
1107     if (unlikely(tb == NULL)) {
1108         return NULL;
1109     }
1110     return tb;
1111 }
1112 
1113 /* call with @p->lock held */
1114 static inline void invalidate_page_bitmap(PageDesc *p)
1115 {
1116 #ifdef CONFIG_SOFTMMU
1117     g_free(p->code_bitmap);
1118     p->code_bitmap = NULL;
1119     p->code_write_count = 0;
1120 #endif
1121 }
1122 
1123 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
1124 static void page_flush_tb_1(int level, void **lp)
1125 {
1126     int i;
1127 
1128     if (*lp == NULL) {
1129         return;
1130     }
1131     if (level == 0) {
1132         PageDesc *pd = *lp;
1133 
1134         for (i = 0; i < V_L2_SIZE; ++i) {
1135             page_lock(&pd[i]);
1136             pd[i].first_tb = (uintptr_t)NULL;
1137             invalidate_page_bitmap(pd + i);
1138             page_unlock(&pd[i]);
1139         }
1140     } else {
1141         void **pp = *lp;
1142 
1143         for (i = 0; i < V_L2_SIZE; ++i) {
1144             page_flush_tb_1(level - 1, pp + i);
1145         }
1146     }
1147 }
1148 
1149 static void page_flush_tb(void)
1150 {
1151     int i, l1_sz = v_l1_size;
1152 
1153     for (i = 0; i < l1_sz; i++) {
1154         page_flush_tb_1(v_l2_levels, l1_map + i);
1155     }
1156 }
1157 
1158 static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
1159 {
1160     const TranslationBlock *tb = value;
1161     size_t *size = data;
1162 
1163     *size += tb->tc.size;
1164     return false;
1165 }
1166 
1167 /* flush all the translation blocks */
1168 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
1169 {
1170     tb_lock();
1171 
1172     /* If it is already been done on request of another CPU,
1173      * just retry.
1174      */
1175     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
1176         goto done;
1177     }
1178 
1179     if (DEBUG_TB_FLUSH_GATE) {
1180         size_t nb_tbs = tcg_nb_tbs();
1181         size_t host_size = 0;
1182 
1183         tcg_tb_foreach(tb_host_size_iter, &host_size);
1184         printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
1185                tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
1186     }
1187 
1188     CPU_FOREACH(cpu) {
1189         cpu_tb_jmp_cache_clear(cpu);
1190     }
1191 
1192     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
1193     page_flush_tb();
1194 
1195     tcg_region_reset_all();
1196     /* XXX: flush processor icache at this point if cache flush is
1197        expensive */
1198     atomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
1199 
1200 done:
1201     tb_unlock();
1202 }
1203 
1204 void tb_flush(CPUState *cpu)
1205 {
1206     if (tcg_enabled()) {
1207         unsigned tb_flush_count = atomic_mb_read(&tb_ctx.tb_flush_count);
1208         async_safe_run_on_cpu(cpu, do_tb_flush,
1209                               RUN_ON_CPU_HOST_INT(tb_flush_count));
1210     }
1211 }
1212 
1213 /*
1214  * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
1215  * so in order to prevent bit rot we compile them unconditionally in user-mode,
1216  * and let the optimizer get rid of them by wrapping their user-only callers
1217  * with if (DEBUG_TB_CHECK_GATE).
1218  */
1219 #ifdef CONFIG_USER_ONLY
1220 
1221 static void
1222 do_tb_invalidate_check(struct qht *ht, void *p, uint32_t hash, void *userp)
1223 {
1224     TranslationBlock *tb = p;
1225     target_ulong addr = *(target_ulong *)userp;
1226 
1227     if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
1228         printf("ERROR invalidate: address=" TARGET_FMT_lx
1229                " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
1230     }
1231 }
1232 
1233 /* verify that all the pages have correct rights for code
1234  *
1235  * Called with tb_lock held.
1236  */
1237 static void tb_invalidate_check(target_ulong address)
1238 {
1239     address &= TARGET_PAGE_MASK;
1240     qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address);
1241 }
1242 
1243 static void
1244 do_tb_page_check(struct qht *ht, void *p, uint32_t hash, void *userp)
1245 {
1246     TranslationBlock *tb = p;
1247     int flags1, flags2;
1248 
1249     flags1 = page_get_flags(tb->pc);
1250     flags2 = page_get_flags(tb->pc + tb->size - 1);
1251     if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
1252         printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
1253                (long)tb->pc, tb->size, flags1, flags2);
1254     }
1255 }
1256 
1257 /* verify that all the pages have correct rights for code */
1258 static void tb_page_check(void)
1259 {
1260     qht_iter(&tb_ctx.htable, do_tb_page_check, NULL);
1261 }
1262 
1263 #endif /* CONFIG_USER_ONLY */
1264 
1265 /* call with @pd->lock held */
1266 static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
1267 {
1268     TranslationBlock *tb1;
1269     uintptr_t *pprev;
1270     unsigned int n1;
1271 
1272     pprev = &pd->first_tb;
1273     PAGE_FOR_EACH_TB(pd, tb1, n1) {
1274         if (tb1 == tb) {
1275             *pprev = tb1->page_next[n1];
1276             return;
1277         }
1278         pprev = &tb1->page_next[n1];
1279     }
1280     g_assert_not_reached();
1281 }
1282 
1283 /* remove the TB from a list of TBs jumping to the n-th jump target of the TB */
1284 static inline void tb_remove_from_jmp_list(TranslationBlock *tb, int n)
1285 {
1286     TranslationBlock *tb1;
1287     uintptr_t *ptb, ntb;
1288     unsigned int n1;
1289 
1290     ptb = &tb->jmp_list_next[n];
1291     if (*ptb) {
1292         /* find tb(n) in circular list */
1293         for (;;) {
1294             ntb = *ptb;
1295             n1 = ntb & 3;
1296             tb1 = (TranslationBlock *)(ntb & ~3);
1297             if (n1 == n && tb1 == tb) {
1298                 break;
1299             }
1300             if (n1 == 2) {
1301                 ptb = &tb1->jmp_list_first;
1302             } else {
1303                 ptb = &tb1->jmp_list_next[n1];
1304             }
1305         }
1306         /* now we can suppress tb(n) from the list */
1307         *ptb = tb->jmp_list_next[n];
1308 
1309         tb->jmp_list_next[n] = (uintptr_t)NULL;
1310     }
1311 }
1312 
1313 /* reset the jump entry 'n' of a TB so that it is not chained to
1314    another TB */
1315 static inline void tb_reset_jump(TranslationBlock *tb, int n)
1316 {
1317     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
1318     tb_set_jmp_target(tb, n, addr);
1319 }
1320 
1321 /* remove any jumps to the TB */
1322 static inline void tb_jmp_unlink(TranslationBlock *tb)
1323 {
1324     TranslationBlock *tb1;
1325     uintptr_t *ptb, ntb;
1326     unsigned int n1;
1327 
1328     ptb = &tb->jmp_list_first;
1329     for (;;) {
1330         ntb = *ptb;
1331         n1 = ntb & 3;
1332         tb1 = (TranslationBlock *)(ntb & ~3);
1333         if (n1 == 2) {
1334             break;
1335         }
1336         tb_reset_jump(tb1, n1);
1337         *ptb = tb1->jmp_list_next[n1];
1338         tb1->jmp_list_next[n1] = (uintptr_t)NULL;
1339     }
1340 }
1341 
1342 /* If @rm_from_page_list is set, call with the TB's pages' locks held */
1343 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
1344 {
1345     CPUState *cpu;
1346     PageDesc *p;
1347     uint32_t h;
1348     tb_page_addr_t phys_pc;
1349 
1350     assert_tb_locked();
1351 
1352     atomic_set(&tb->cflags, tb->cflags | CF_INVALID);
1353 
1354     /* remove the TB from the hash list */
1355     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1356     h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
1357                      tb->trace_vcpu_dstate);
1358     if (!qht_remove(&tb_ctx.htable, tb, h)) {
1359         return;
1360     }
1361 
1362     /* remove the TB from the page list */
1363     if (rm_from_page_list) {
1364         p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
1365         tb_page_remove(p, tb);
1366         invalidate_page_bitmap(p);
1367         if (tb->page_addr[1] != -1) {
1368             p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
1369             tb_page_remove(p, tb);
1370             invalidate_page_bitmap(p);
1371         }
1372     }
1373 
1374     /* remove the TB from the hash list */
1375     h = tb_jmp_cache_hash_func(tb->pc);
1376     CPU_FOREACH(cpu) {
1377         if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) {
1378             atomic_set(&cpu->tb_jmp_cache[h], NULL);
1379         }
1380     }
1381 
1382     /* suppress this TB from the two jump lists */
1383     tb_remove_from_jmp_list(tb, 0);
1384     tb_remove_from_jmp_list(tb, 1);
1385 
1386     /* suppress any remaining jumps to this TB */
1387     tb_jmp_unlink(tb);
1388 
1389     atomic_set(&tcg_ctx->tb_phys_invalidate_count,
1390                tcg_ctx->tb_phys_invalidate_count + 1);
1391 }
1392 
1393 static void tb_phys_invalidate__locked(TranslationBlock *tb)
1394 {
1395     do_tb_phys_invalidate(tb, true);
1396 }
1397 
1398 /* invalidate one TB
1399  *
1400  * Called with tb_lock held.
1401  */
1402 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
1403 {
1404     if (page_addr == -1) {
1405         page_lock_tb(tb);
1406         do_tb_phys_invalidate(tb, true);
1407         page_unlock_tb(tb);
1408     } else {
1409         do_tb_phys_invalidate(tb, false);
1410     }
1411 }
1412 
1413 #ifdef CONFIG_SOFTMMU
1414 /* call with @p->lock held */
1415 static void build_page_bitmap(PageDesc *p)
1416 {
1417     int n, tb_start, tb_end;
1418     TranslationBlock *tb;
1419 
1420     p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
1421 
1422     PAGE_FOR_EACH_TB(p, tb, n) {
1423         /* NOTE: this is subtle as a TB may span two physical pages */
1424         if (n == 0) {
1425             /* NOTE: tb_end may be after the end of the page, but
1426                it is not a problem */
1427             tb_start = tb->pc & ~TARGET_PAGE_MASK;
1428             tb_end = tb_start + tb->size;
1429             if (tb_end > TARGET_PAGE_SIZE) {
1430                 tb_end = TARGET_PAGE_SIZE;
1431              }
1432         } else {
1433             tb_start = 0;
1434             tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1435         }
1436         bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
1437     }
1438 }
1439 #endif
1440 
1441 /* add the tb in the target page and protect it if necessary
1442  *
1443  * Called with mmap_lock held for user-mode emulation.
1444  * Called with @p->lock held.
1445  */
1446 static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
1447                                unsigned int n, tb_page_addr_t page_addr)
1448 {
1449 #ifndef CONFIG_USER_ONLY
1450     bool page_already_protected;
1451 #endif
1452 
1453     assert_memory_lock();
1454 
1455     tb->page_addr[n] = page_addr;
1456     tb->page_next[n] = p->first_tb;
1457 #ifndef CONFIG_USER_ONLY
1458     page_already_protected = p->first_tb != (uintptr_t)NULL;
1459 #endif
1460     p->first_tb = (uintptr_t)tb | n;
1461     invalidate_page_bitmap(p);
1462 
1463 #if defined(CONFIG_USER_ONLY)
1464     if (p->flags & PAGE_WRITE) {
1465         target_ulong addr;
1466         PageDesc *p2;
1467         int prot;
1468 
1469         /* force the host page as non writable (writes will have a
1470            page fault + mprotect overhead) */
1471         page_addr &= qemu_host_page_mask;
1472         prot = 0;
1473         for (addr = page_addr; addr < page_addr + qemu_host_page_size;
1474             addr += TARGET_PAGE_SIZE) {
1475 
1476             p2 = page_find(addr >> TARGET_PAGE_BITS);
1477             if (!p2) {
1478                 continue;
1479             }
1480             prot |= p2->flags;
1481             p2->flags &= ~PAGE_WRITE;
1482           }
1483         mprotect(g2h(page_addr), qemu_host_page_size,
1484                  (prot & PAGE_BITS) & ~PAGE_WRITE);
1485         if (DEBUG_TB_INVALIDATE_GATE) {
1486             printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
1487         }
1488     }
1489 #else
1490     /* if some code is already present, then the pages are already
1491        protected. So we handle the case where only the first TB is
1492        allocated in a physical page */
1493     if (!page_already_protected) {
1494         tlb_protect_code(page_addr);
1495     }
1496 #endif
1497 }
1498 
1499 /* add a new TB and link it to the physical page tables. phys_page2 is
1500  * (-1) to indicate that only one page contains the TB.
1501  *
1502  * Called with mmap_lock held for user-mode emulation.
1503  */
1504 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1505                          tb_page_addr_t phys_page2)
1506 {
1507     PageDesc *p;
1508     PageDesc *p2 = NULL;
1509     uint32_t h;
1510 
1511     assert_memory_lock();
1512 
1513     /*
1514      * Add the TB to the page list, acquiring first the pages's locks.
1515      */
1516     page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
1517     tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
1518     if (p2) {
1519         tb_page_add(p2, tb, 1, phys_page2);
1520     } else {
1521         tb->page_addr[1] = -1;
1522     }
1523 
1524     if (p2) {
1525         page_unlock(p2);
1526     }
1527     page_unlock(p);
1528 
1529     /* add in the hash table */
1530     h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
1531                      tb->trace_vcpu_dstate);
1532     qht_insert(&tb_ctx.htable, tb, h, NULL);
1533 
1534 #ifdef CONFIG_USER_ONLY
1535     if (DEBUG_TB_CHECK_GATE) {
1536         tb_page_check();
1537     }
1538 #endif
1539 }
1540 
1541 /* Called with mmap_lock held for user mode emulation.  */
1542 TranslationBlock *tb_gen_code(CPUState *cpu,
1543                               target_ulong pc, target_ulong cs_base,
1544                               uint32_t flags, int cflags)
1545 {
1546     CPUArchState *env = cpu->env_ptr;
1547     TranslationBlock *tb;
1548     tb_page_addr_t phys_pc, phys_page2;
1549     target_ulong virt_page2;
1550     tcg_insn_unit *gen_code_buf;
1551     int gen_code_size, search_size;
1552 #ifdef CONFIG_PROFILER
1553     TCGProfile *prof = &tcg_ctx->prof;
1554     int64_t ti;
1555 #endif
1556     assert_memory_lock();
1557 
1558     phys_pc = get_page_addr_code(env, pc);
1559 
1560  buffer_overflow:
1561     tb = tb_alloc(pc);
1562     if (unlikely(!tb)) {
1563         /* flush must be done */
1564         tb_flush(cpu);
1565         mmap_unlock();
1566         /* Make the execution loop process the flush as soon as possible.  */
1567         cpu->exception_index = EXCP_INTERRUPT;
1568         cpu_loop_exit(cpu);
1569     }
1570 
1571     gen_code_buf = tcg_ctx->code_gen_ptr;
1572     tb->tc.ptr = gen_code_buf;
1573     tb->pc = pc;
1574     tb->cs_base = cs_base;
1575     tb->flags = flags;
1576     tb->cflags = cflags;
1577     tb->trace_vcpu_dstate = *cpu->trace_dstate;
1578     tcg_ctx->tb_cflags = cflags;
1579 
1580 #ifdef CONFIG_PROFILER
1581     /* includes aborted translations because of exceptions */
1582     atomic_set(&prof->tb_count1, prof->tb_count1 + 1);
1583     ti = profile_getclock();
1584 #endif
1585 
1586     tcg_func_start(tcg_ctx);
1587 
1588     tcg_ctx->cpu = ENV_GET_CPU(env);
1589     gen_intermediate_code(cpu, tb);
1590     tcg_ctx->cpu = NULL;
1591 
1592     trace_translate_block(tb, tb->pc, tb->tc.ptr);
1593 
1594     /* generate machine code */
1595     tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
1596     tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
1597     tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
1598     if (TCG_TARGET_HAS_direct_jump) {
1599         tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
1600         tcg_ctx->tb_jmp_target_addr = NULL;
1601     } else {
1602         tcg_ctx->tb_jmp_insn_offset = NULL;
1603         tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
1604     }
1605 
1606 #ifdef CONFIG_PROFILER
1607     atomic_set(&prof->tb_count, prof->tb_count + 1);
1608     atomic_set(&prof->interm_time, prof->interm_time + profile_getclock() - ti);
1609     ti = profile_getclock();
1610 #endif
1611 
1612     /* ??? Overflow could be handled better here.  In particular, we
1613        don't need to re-do gen_intermediate_code, nor should we re-do
1614        the tcg optimization currently hidden inside tcg_gen_code.  All
1615        that should be required is to flush the TBs, allocate a new TB,
1616        re-initialize it per above, and re-do the actual code generation.  */
1617     gen_code_size = tcg_gen_code(tcg_ctx, tb);
1618     if (unlikely(gen_code_size < 0)) {
1619         goto buffer_overflow;
1620     }
1621     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
1622     if (unlikely(search_size < 0)) {
1623         goto buffer_overflow;
1624     }
1625     tb->tc.size = gen_code_size;
1626 
1627 #ifdef CONFIG_PROFILER
1628     atomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
1629     atomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
1630     atomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
1631     atomic_set(&prof->search_out_len, prof->search_out_len + search_size);
1632 #endif
1633 
1634 #ifdef DEBUG_DISAS
1635     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
1636         qemu_log_in_addr_range(tb->pc)) {
1637         qemu_log_lock();
1638         qemu_log("OUT: [size=%d]\n", gen_code_size);
1639         if (tcg_ctx->data_gen_ptr) {
1640             size_t code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr;
1641             size_t data_size = gen_code_size - code_size;
1642             size_t i;
1643 
1644             log_disas(tb->tc.ptr, code_size);
1645 
1646             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1647                 if (sizeof(tcg_target_ulong) == 8) {
1648                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1649                              (uintptr_t)tcg_ctx->data_gen_ptr + i,
1650                              *(uint64_t *)(tcg_ctx->data_gen_ptr + i));
1651                 } else {
1652                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1653                              (uintptr_t)tcg_ctx->data_gen_ptr + i,
1654                              *(uint32_t *)(tcg_ctx->data_gen_ptr + i));
1655                 }
1656             }
1657         } else {
1658             log_disas(tb->tc.ptr, gen_code_size);
1659         }
1660         qemu_log("\n");
1661         qemu_log_flush();
1662         qemu_log_unlock();
1663     }
1664 #endif
1665 
1666     atomic_set(&tcg_ctx->code_gen_ptr, (void *)
1667         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
1668                  CODE_GEN_ALIGN));
1669 
1670     /* init jump list */
1671     assert(((uintptr_t)tb & 3) == 0);
1672     tb->jmp_list_first = (uintptr_t)tb | 2;
1673     tb->jmp_list_next[0] = (uintptr_t)NULL;
1674     tb->jmp_list_next[1] = (uintptr_t)NULL;
1675 
1676     /* init original jump addresses wich has been set during tcg_gen_code() */
1677     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1678         tb_reset_jump(tb, 0);
1679     }
1680     if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1681         tb_reset_jump(tb, 1);
1682     }
1683 
1684     /* check next page if needed */
1685     virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1686     phys_page2 = -1;
1687     if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1688         phys_page2 = get_page_addr_code(env, virt_page2);
1689     }
1690     /* As long as consistency of the TB stuff is provided by tb_lock in user
1691      * mode and is implicit in single-threaded softmmu emulation, no explicit
1692      * memory barrier is required before tb_link_page() makes the TB visible
1693      * through the physical hash table and physical page list.
1694      */
1695     tb_link_page(tb, phys_pc, phys_page2);
1696     tcg_tb_insert(tb);
1697     return tb;
1698 }
1699 
1700 /*
1701  * Call with all @pages locked.
1702  * @p must be non-NULL.
1703  */
1704 static void
1705 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1706                                       PageDesc *p, tb_page_addr_t start,
1707                                       tb_page_addr_t end,
1708                                       int is_cpu_write_access)
1709 {
1710     TranslationBlock *tb;
1711     tb_page_addr_t tb_start, tb_end;
1712     int n;
1713 #ifdef TARGET_HAS_PRECISE_SMC
1714     CPUState *cpu = current_cpu;
1715     CPUArchState *env = NULL;
1716     int current_tb_not_found = is_cpu_write_access;
1717     TranslationBlock *current_tb = NULL;
1718     int current_tb_modified = 0;
1719     target_ulong current_pc = 0;
1720     target_ulong current_cs_base = 0;
1721     uint32_t current_flags = 0;
1722 #endif /* TARGET_HAS_PRECISE_SMC */
1723 
1724     assert_memory_lock();
1725     assert_tb_locked();
1726 
1727 #if defined(TARGET_HAS_PRECISE_SMC)
1728     if (cpu != NULL) {
1729         env = cpu->env_ptr;
1730     }
1731 #endif
1732 
1733     /* we remove all the TBs in the range [start, end[ */
1734     /* XXX: see if in some cases it could be faster to invalidate all
1735        the code */
1736     PAGE_FOR_EACH_TB(p, tb, n) {
1737         /* NOTE: this is subtle as a TB may span two physical pages */
1738         if (n == 0) {
1739             /* NOTE: tb_end may be after the end of the page, but
1740                it is not a problem */
1741             tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1742             tb_end = tb_start + tb->size;
1743         } else {
1744             tb_start = tb->page_addr[1];
1745             tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1746         }
1747         if (!(tb_end <= start || tb_start >= end)) {
1748 #ifdef TARGET_HAS_PRECISE_SMC
1749             if (current_tb_not_found) {
1750                 current_tb_not_found = 0;
1751                 current_tb = NULL;
1752                 if (cpu->mem_io_pc) {
1753                     /* now we have a real cpu fault */
1754                     current_tb = tcg_tb_lookup(cpu->mem_io_pc);
1755                 }
1756             }
1757             if (current_tb == tb &&
1758                 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1759                 /* If we are modifying the current TB, we must stop
1760                 its execution. We could be more precise by checking
1761                 that the modification is after the current PC, but it
1762                 would require a specialized function to partially
1763                 restore the CPU state */
1764 
1765                 current_tb_modified = 1;
1766                 cpu_restore_state_from_tb(cpu, current_tb,
1767                                           cpu->mem_io_pc, true);
1768                 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1769                                      &current_flags);
1770             }
1771 #endif /* TARGET_HAS_PRECISE_SMC */
1772             tb_phys_invalidate__locked(tb);
1773         }
1774     }
1775 #if !defined(CONFIG_USER_ONLY)
1776     /* if no code remaining, no need to continue to use slow writes */
1777     if (!p->first_tb) {
1778         invalidate_page_bitmap(p);
1779         tlb_unprotect_code(start);
1780     }
1781 #endif
1782 #ifdef TARGET_HAS_PRECISE_SMC
1783     if (current_tb_modified) {
1784         page_collection_unlock(pages);
1785         /* Force execution of one insn next time.  */
1786         cpu->cflags_next_tb = 1 | curr_cflags();
1787         cpu_loop_exit_noexc(cpu);
1788     }
1789 #endif
1790 }
1791 
1792 /*
1793  * Invalidate all TBs which intersect with the target physical address range
1794  * [start;end[. NOTE: start and end must refer to the *same* physical page.
1795  * 'is_cpu_write_access' should be true if called from a real cpu write
1796  * access: the virtual CPU will exit the current TB if code is modified inside
1797  * this TB.
1798  *
1799  * Called with tb_lock/mmap_lock held for user-mode emulation
1800  * Called with tb_lock held for system-mode emulation
1801  */
1802 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1803                                    int is_cpu_write_access)
1804 {
1805     struct page_collection *pages;
1806     PageDesc *p;
1807 
1808     assert_memory_lock();
1809     assert_tb_locked();
1810 
1811     p = page_find(start >> TARGET_PAGE_BITS);
1812     if (p == NULL) {
1813         return;
1814     }
1815     pages = page_collection_lock(start, end);
1816     tb_invalidate_phys_page_range__locked(pages, p, start, end,
1817                                           is_cpu_write_access);
1818     page_collection_unlock(pages);
1819 }
1820 
1821 /*
1822  * Invalidate all TBs which intersect with the target physical address range
1823  * [start;end[. NOTE: start and end may refer to *different* physical pages.
1824  * 'is_cpu_write_access' should be true if called from a real cpu write
1825  * access: the virtual CPU will exit the current TB if code is modified inside
1826  * this TB.
1827  *
1828  * Called with mmap_lock held for user-mode emulation, grabs tb_lock
1829  * Called with tb_lock held for system-mode emulation
1830  */
1831 static void tb_invalidate_phys_range_1(tb_page_addr_t start, tb_page_addr_t end)
1832 {
1833     struct page_collection *pages;
1834     tb_page_addr_t next;
1835 
1836     pages = page_collection_lock(start, end);
1837     for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
1838          start < end;
1839          start = next, next += TARGET_PAGE_SIZE) {
1840         PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
1841         tb_page_addr_t bound = MIN(next, end);
1842 
1843         if (pd == NULL) {
1844             continue;
1845         }
1846         tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
1847     }
1848     page_collection_unlock(pages);
1849 }
1850 
1851 #ifdef CONFIG_SOFTMMU
1852 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
1853 {
1854     assert_tb_locked();
1855     tb_invalidate_phys_range_1(start, end);
1856 }
1857 #else
1858 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
1859 {
1860     assert_memory_lock();
1861     tb_lock();
1862     tb_invalidate_phys_range_1(start, end);
1863     tb_unlock();
1864 }
1865 #endif
1866 
1867 #ifdef CONFIG_SOFTMMU
1868 /* len must be <= 8 and start must be a multiple of len.
1869  * Called via softmmu_template.h when code areas are written to with
1870  * iothread mutex not held.
1871  */
1872 void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1873 {
1874     struct page_collection *pages;
1875     PageDesc *p;
1876 
1877 #if 0
1878     if (1) {
1879         qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1880                   cpu_single_env->mem_io_vaddr, len,
1881                   cpu_single_env->eip,
1882                   cpu_single_env->eip +
1883                   (intptr_t)cpu_single_env->segs[R_CS].base);
1884     }
1885 #endif
1886     assert_memory_lock();
1887 
1888     p = page_find(start >> TARGET_PAGE_BITS);
1889     if (!p) {
1890         return;
1891     }
1892 
1893     pages = page_collection_lock(start, start + len);
1894     if (!p->code_bitmap &&
1895         ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
1896         build_page_bitmap(p);
1897     }
1898     if (p->code_bitmap) {
1899         unsigned int nr;
1900         unsigned long b;
1901 
1902         nr = start & ~TARGET_PAGE_MASK;
1903         b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
1904         if (b & ((1 << len) - 1)) {
1905             goto do_invalidate;
1906         }
1907     } else {
1908     do_invalidate:
1909         tb_invalidate_phys_page_range__locked(pages, p, start, start + len, 1);
1910     }
1911     page_collection_unlock(pages);
1912 }
1913 #else
1914 /* Called with mmap_lock held. If pc is not 0 then it indicates the
1915  * host PC of the faulting store instruction that caused this invalidate.
1916  * Returns true if the caller needs to abort execution of the current
1917  * TB (because it was modified by this store and the guest CPU has
1918  * precise-SMC semantics).
1919  */
1920 static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
1921 {
1922     TranslationBlock *tb;
1923     PageDesc *p;
1924     int n;
1925 #ifdef TARGET_HAS_PRECISE_SMC
1926     TranslationBlock *current_tb = NULL;
1927     CPUState *cpu = current_cpu;
1928     CPUArchState *env = NULL;
1929     int current_tb_modified = 0;
1930     target_ulong current_pc = 0;
1931     target_ulong current_cs_base = 0;
1932     uint32_t current_flags = 0;
1933 #endif
1934 
1935     assert_memory_lock();
1936 
1937     addr &= TARGET_PAGE_MASK;
1938     p = page_find(addr >> TARGET_PAGE_BITS);
1939     if (!p) {
1940         return false;
1941     }
1942 
1943     tb_lock();
1944 #ifdef TARGET_HAS_PRECISE_SMC
1945     if (p->first_tb && pc != 0) {
1946         current_tb = tcg_tb_lookup(pc);
1947     }
1948     if (cpu != NULL) {
1949         env = cpu->env_ptr;
1950     }
1951 #endif
1952     PAGE_FOR_EACH_TB(p, tb, n) {
1953 #ifdef TARGET_HAS_PRECISE_SMC
1954         if (current_tb == tb &&
1955             (current_tb->cflags & CF_COUNT_MASK) != 1) {
1956                 /* If we are modifying the current TB, we must stop
1957                    its execution. We could be more precise by checking
1958                    that the modification is after the current PC, but it
1959                    would require a specialized function to partially
1960                    restore the CPU state */
1961 
1962             current_tb_modified = 1;
1963             cpu_restore_state_from_tb(cpu, current_tb, pc, true);
1964             cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1965                                  &current_flags);
1966         }
1967 #endif /* TARGET_HAS_PRECISE_SMC */
1968         tb_phys_invalidate(tb, addr);
1969     }
1970     p->first_tb = (uintptr_t)NULL;
1971 #ifdef TARGET_HAS_PRECISE_SMC
1972     if (current_tb_modified) {
1973         /* Force execution of one insn next time.  */
1974         cpu->cflags_next_tb = 1 | curr_cflags();
1975         /* tb_lock will be reset after cpu_loop_exit_noexc longjmps
1976          * back into the cpu_exec loop. */
1977         return true;
1978     }
1979 #endif
1980     tb_unlock();
1981 
1982     return false;
1983 }
1984 #endif
1985 
1986 #if !defined(CONFIG_USER_ONLY)
1987 void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs)
1988 {
1989     ram_addr_t ram_addr;
1990     MemoryRegion *mr;
1991     hwaddr l = 1;
1992 
1993     rcu_read_lock();
1994     mr = address_space_translate(as, addr, &addr, &l, false, attrs);
1995     if (!(memory_region_is_ram(mr)
1996           || memory_region_is_romd(mr))) {
1997         rcu_read_unlock();
1998         return;
1999     }
2000     ram_addr = memory_region_get_ram_addr(mr) + addr;
2001     tb_lock();
2002     tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
2003     tb_unlock();
2004     rcu_read_unlock();
2005 }
2006 #endif /* !defined(CONFIG_USER_ONLY) */
2007 
2008 /* Called with tb_lock held.  */
2009 void tb_check_watchpoint(CPUState *cpu)
2010 {
2011     TranslationBlock *tb;
2012 
2013     tb = tcg_tb_lookup(cpu->mem_io_pc);
2014     if (tb) {
2015         /* We can use retranslation to find the PC.  */
2016         cpu_restore_state_from_tb(cpu, tb, cpu->mem_io_pc, true);
2017         tb_phys_invalidate(tb, -1);
2018     } else {
2019         /* The exception probably happened in a helper.  The CPU state should
2020            have been saved before calling it. Fetch the PC from there.  */
2021         CPUArchState *env = cpu->env_ptr;
2022         target_ulong pc, cs_base;
2023         tb_page_addr_t addr;
2024         uint32_t flags;
2025 
2026         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
2027         addr = get_page_addr_code(env, pc);
2028         tb_invalidate_phys_range(addr, addr + 1);
2029     }
2030 }
2031 
2032 #ifndef CONFIG_USER_ONLY
2033 /* in deterministic execution mode, instructions doing device I/Os
2034  * must be at the end of the TB.
2035  *
2036  * Called by softmmu_template.h, with iothread mutex not held.
2037  */
2038 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
2039 {
2040 #if defined(TARGET_MIPS) || defined(TARGET_SH4)
2041     CPUArchState *env = cpu->env_ptr;
2042 #endif
2043     TranslationBlock *tb;
2044     uint32_t n;
2045 
2046     tb_lock();
2047     tb = tcg_tb_lookup(retaddr);
2048     if (!tb) {
2049         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
2050                   (void *)retaddr);
2051     }
2052     cpu_restore_state_from_tb(cpu, tb, retaddr, true);
2053 
2054     /* On MIPS and SH, delay slot instructions can only be restarted if
2055        they were already the first instruction in the TB.  If this is not
2056        the first instruction in a TB then re-execute the preceding
2057        branch.  */
2058     n = 1;
2059 #if defined(TARGET_MIPS)
2060     if ((env->hflags & MIPS_HFLAG_BMASK) != 0
2061         && env->active_tc.PC != tb->pc) {
2062         env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
2063         cpu->icount_decr.u16.low++;
2064         env->hflags &= ~MIPS_HFLAG_BMASK;
2065         n = 2;
2066     }
2067 #elif defined(TARGET_SH4)
2068     if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
2069         && env->pc != tb->pc) {
2070         env->pc -= 2;
2071         cpu->icount_decr.u16.low++;
2072         env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
2073         n = 2;
2074     }
2075 #endif
2076 
2077     /* Generate a new TB executing the I/O insn.  */
2078     cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | n;
2079 
2080     if (tb->cflags & CF_NOCACHE) {
2081         if (tb->orig_tb) {
2082             /* Invalidate original TB if this TB was generated in
2083              * cpu_exec_nocache() */
2084             tb_phys_invalidate(tb->orig_tb, -1);
2085         }
2086         tcg_tb_remove(tb);
2087     }
2088 
2089     /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
2090      * the first in the TB) then we end up generating a whole new TB and
2091      *  repeating the fault, which is horribly inefficient.
2092      *  Better would be to execute just this insn uncached, or generate a
2093      *  second new TB.
2094      *
2095      * cpu_loop_exit_noexc will longjmp back to cpu_exec where the
2096      * tb_lock gets reset.
2097      */
2098     cpu_loop_exit_noexc(cpu);
2099 }
2100 
2101 static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
2102 {
2103     unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
2104 
2105     for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
2106         atomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
2107     }
2108 }
2109 
2110 void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
2111 {
2112     /* Discard jump cache entries for any tb which might potentially
2113        overlap the flushed page.  */
2114     tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
2115     tb_jmp_cache_clear_page(cpu, addr);
2116 }
2117 
2118 static void print_qht_statistics(FILE *f, fprintf_function cpu_fprintf,
2119                                  struct qht_stats hst)
2120 {
2121     uint32_t hgram_opts;
2122     size_t hgram_bins;
2123     char *hgram;
2124 
2125     if (!hst.head_buckets) {
2126         return;
2127     }
2128     cpu_fprintf(f, "TB hash buckets     %zu/%zu (%0.2f%% head buckets used)\n",
2129                 hst.used_head_buckets, hst.head_buckets,
2130                 (double)hst.used_head_buckets / hst.head_buckets * 100);
2131 
2132     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
2133     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
2134     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
2135         hgram_opts |= QDIST_PR_NODECIMAL;
2136     }
2137     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
2138     cpu_fprintf(f, "TB hash occupancy   %0.2f%% avg chain occ. Histogram: %s\n",
2139                 qdist_avg(&hst.occupancy) * 100, hgram);
2140     g_free(hgram);
2141 
2142     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
2143     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
2144     if (hgram_bins > 10) {
2145         hgram_bins = 10;
2146     } else {
2147         hgram_bins = 0;
2148         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
2149     }
2150     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
2151     cpu_fprintf(f, "TB hash avg chain   %0.3f buckets. Histogram: %s\n",
2152                 qdist_avg(&hst.chain), hgram);
2153     g_free(hgram);
2154 }
2155 
2156 struct tb_tree_stats {
2157     size_t nb_tbs;
2158     size_t host_size;
2159     size_t target_size;
2160     size_t max_target_size;
2161     size_t direct_jmp_count;
2162     size_t direct_jmp2_count;
2163     size_t cross_page;
2164 };
2165 
2166 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
2167 {
2168     const TranslationBlock *tb = value;
2169     struct tb_tree_stats *tst = data;
2170 
2171     tst->nb_tbs++;
2172     tst->host_size += tb->tc.size;
2173     tst->target_size += tb->size;
2174     if (tb->size > tst->max_target_size) {
2175         tst->max_target_size = tb->size;
2176     }
2177     if (tb->page_addr[1] != -1) {
2178         tst->cross_page++;
2179     }
2180     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
2181         tst->direct_jmp_count++;
2182         if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
2183             tst->direct_jmp2_count++;
2184         }
2185     }
2186     return false;
2187 }
2188 
2189 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
2190 {
2191     struct tb_tree_stats tst = {};
2192     struct qht_stats hst;
2193     size_t nb_tbs;
2194 
2195     tcg_tb_foreach(tb_tree_stats_iter, &tst);
2196     nb_tbs = tst.nb_tbs;
2197     /* XXX: avoid using doubles ? */
2198     cpu_fprintf(f, "Translation buffer state:\n");
2199     /*
2200      * Report total code size including the padding and TB structs;
2201      * otherwise users might think "-tb-size" is not honoured.
2202      * For avg host size we use the precise numbers from tb_tree_stats though.
2203      */
2204     cpu_fprintf(f, "gen code size       %zu/%zu\n",
2205                 tcg_code_size(), tcg_code_capacity());
2206     cpu_fprintf(f, "TB count            %zu\n", nb_tbs);
2207     cpu_fprintf(f, "TB avg target size  %zu max=%zu bytes\n",
2208                 nb_tbs ? tst.target_size / nb_tbs : 0,
2209                 tst.max_target_size);
2210     cpu_fprintf(f, "TB avg host size    %zu bytes (expansion ratio: %0.1f)\n",
2211                 nb_tbs ? tst.host_size / nb_tbs : 0,
2212                 tst.target_size ? (double)tst.host_size / tst.target_size : 0);
2213     cpu_fprintf(f, "cross page TB count %zu (%zu%%)\n", tst.cross_page,
2214             nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
2215     cpu_fprintf(f, "direct jump count   %zu (%zu%%) (2 jumps=%zu %zu%%)\n",
2216                 tst.direct_jmp_count,
2217                 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
2218                 tst.direct_jmp2_count,
2219                 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
2220 
2221     qht_statistics_init(&tb_ctx.htable, &hst);
2222     print_qht_statistics(f, cpu_fprintf, hst);
2223     qht_statistics_destroy(&hst);
2224 
2225     cpu_fprintf(f, "\nStatistics:\n");
2226     cpu_fprintf(f, "TB flush count      %u\n",
2227                 atomic_read(&tb_ctx.tb_flush_count));
2228     cpu_fprintf(f, "TB invalidate count %zu\n", tcg_tb_phys_invalidate_count());
2229     cpu_fprintf(f, "TLB flush count     %zu\n", tlb_flush_count());
2230     tcg_dump_info(f, cpu_fprintf);
2231 }
2232 
2233 void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)
2234 {
2235     tcg_dump_op_count(f, cpu_fprintf);
2236 }
2237 
2238 #else /* CONFIG_USER_ONLY */
2239 
2240 void cpu_interrupt(CPUState *cpu, int mask)
2241 {
2242     g_assert(qemu_mutex_iothread_locked());
2243     cpu->interrupt_request |= mask;
2244     cpu->icount_decr.u16.high = -1;
2245 }
2246 
2247 /*
2248  * Walks guest process memory "regions" one by one
2249  * and calls callback function 'fn' for each region.
2250  */
2251 struct walk_memory_regions_data {
2252     walk_memory_regions_fn fn;
2253     void *priv;
2254     target_ulong start;
2255     int prot;
2256 };
2257 
2258 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2259                                    target_ulong end, int new_prot)
2260 {
2261     if (data->start != -1u) {
2262         int rc = data->fn(data->priv, data->start, end, data->prot);
2263         if (rc != 0) {
2264             return rc;
2265         }
2266     }
2267 
2268     data->start = (new_prot ? end : -1u);
2269     data->prot = new_prot;
2270 
2271     return 0;
2272 }
2273 
2274 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2275                                  target_ulong base, int level, void **lp)
2276 {
2277     target_ulong pa;
2278     int i, rc;
2279 
2280     if (*lp == NULL) {
2281         return walk_memory_regions_end(data, base, 0);
2282     }
2283 
2284     if (level == 0) {
2285         PageDesc *pd = *lp;
2286 
2287         for (i = 0; i < V_L2_SIZE; ++i) {
2288             int prot = pd[i].flags;
2289 
2290             pa = base | (i << TARGET_PAGE_BITS);
2291             if (prot != data->prot) {
2292                 rc = walk_memory_regions_end(data, pa, prot);
2293                 if (rc != 0) {
2294                     return rc;
2295                 }
2296             }
2297         }
2298     } else {
2299         void **pp = *lp;
2300 
2301         for (i = 0; i < V_L2_SIZE; ++i) {
2302             pa = base | ((target_ulong)i <<
2303                 (TARGET_PAGE_BITS + V_L2_BITS * level));
2304             rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2305             if (rc != 0) {
2306                 return rc;
2307             }
2308         }
2309     }
2310 
2311     return 0;
2312 }
2313 
2314 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2315 {
2316     struct walk_memory_regions_data data;
2317     uintptr_t i, l1_sz = v_l1_size;
2318 
2319     data.fn = fn;
2320     data.priv = priv;
2321     data.start = -1u;
2322     data.prot = 0;
2323 
2324     for (i = 0; i < l1_sz; i++) {
2325         target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
2326         int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
2327         if (rc != 0) {
2328             return rc;
2329         }
2330     }
2331 
2332     return walk_memory_regions_end(&data, 0, 0);
2333 }
2334 
2335 static int dump_region(void *priv, target_ulong start,
2336     target_ulong end, unsigned long prot)
2337 {
2338     FILE *f = (FILE *)priv;
2339 
2340     (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
2341         " "TARGET_FMT_lx" %c%c%c\n",
2342         start, end, end - start,
2343         ((prot & PAGE_READ) ? 'r' : '-'),
2344         ((prot & PAGE_WRITE) ? 'w' : '-'),
2345         ((prot & PAGE_EXEC) ? 'x' : '-'));
2346 
2347     return 0;
2348 }
2349 
2350 /* dump memory mappings */
2351 void page_dump(FILE *f)
2352 {
2353     const int length = sizeof(target_ulong) * 2;
2354     (void) fprintf(f, "%-*s %-*s %-*s %s\n",
2355             length, "start", length, "end", length, "size", "prot");
2356     walk_memory_regions(f, dump_region);
2357 }
2358 
2359 int page_get_flags(target_ulong address)
2360 {
2361     PageDesc *p;
2362 
2363     p = page_find(address >> TARGET_PAGE_BITS);
2364     if (!p) {
2365         return 0;
2366     }
2367     return p->flags;
2368 }
2369 
2370 /* Modify the flags of a page and invalidate the code if necessary.
2371    The flag PAGE_WRITE_ORG is positioned automatically depending
2372    on PAGE_WRITE.  The mmap_lock should already be held.  */
2373 void page_set_flags(target_ulong start, target_ulong end, int flags)
2374 {
2375     target_ulong addr, len;
2376 
2377     /* This function should never be called with addresses outside the
2378        guest address space.  If this assert fires, it probably indicates
2379        a missing call to h2g_valid.  */
2380 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2381     assert(end <= ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2382 #endif
2383     assert(start < end);
2384     assert_memory_lock();
2385 
2386     start = start & TARGET_PAGE_MASK;
2387     end = TARGET_PAGE_ALIGN(end);
2388 
2389     if (flags & PAGE_WRITE) {
2390         flags |= PAGE_WRITE_ORG;
2391     }
2392 
2393     for (addr = start, len = end - start;
2394          len != 0;
2395          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2396         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2397 
2398         /* If the write protection bit is set, then we invalidate
2399            the code inside.  */
2400         if (!(p->flags & PAGE_WRITE) &&
2401             (flags & PAGE_WRITE) &&
2402             p->first_tb) {
2403             tb_invalidate_phys_page(addr, 0);
2404         }
2405         p->flags = flags;
2406     }
2407 }
2408 
2409 int page_check_range(target_ulong start, target_ulong len, int flags)
2410 {
2411     PageDesc *p;
2412     target_ulong end;
2413     target_ulong addr;
2414 
2415     /* This function should never be called with addresses outside the
2416        guest address space.  If this assert fires, it probably indicates
2417        a missing call to h2g_valid.  */
2418 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2419     assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2420 #endif
2421 
2422     if (len == 0) {
2423         return 0;
2424     }
2425     if (start + len - 1 < start) {
2426         /* We've wrapped around.  */
2427         return -1;
2428     }
2429 
2430     /* must do before we loose bits in the next step */
2431     end = TARGET_PAGE_ALIGN(start + len);
2432     start = start & TARGET_PAGE_MASK;
2433 
2434     for (addr = start, len = end - start;
2435          len != 0;
2436          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2437         p = page_find(addr >> TARGET_PAGE_BITS);
2438         if (!p) {
2439             return -1;
2440         }
2441         if (!(p->flags & PAGE_VALID)) {
2442             return -1;
2443         }
2444 
2445         if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
2446             return -1;
2447         }
2448         if (flags & PAGE_WRITE) {
2449             if (!(p->flags & PAGE_WRITE_ORG)) {
2450                 return -1;
2451             }
2452             /* unprotect the page if it was put read-only because it
2453                contains translated code */
2454             if (!(p->flags & PAGE_WRITE)) {
2455                 if (!page_unprotect(addr, 0)) {
2456                     return -1;
2457                 }
2458             }
2459         }
2460     }
2461     return 0;
2462 }
2463 
2464 /* called from signal handler: invalidate the code and unprotect the
2465  * page. Return 0 if the fault was not handled, 1 if it was handled,
2466  * and 2 if it was handled but the caller must cause the TB to be
2467  * immediately exited. (We can only return 2 if the 'pc' argument is
2468  * non-zero.)
2469  */
2470 int page_unprotect(target_ulong address, uintptr_t pc)
2471 {
2472     unsigned int prot;
2473     bool current_tb_invalidated;
2474     PageDesc *p;
2475     target_ulong host_start, host_end, addr;
2476 
2477     /* Technically this isn't safe inside a signal handler.  However we
2478        know this only ever happens in a synchronous SEGV handler, so in
2479        practice it seems to be ok.  */
2480     mmap_lock();
2481 
2482     p = page_find(address >> TARGET_PAGE_BITS);
2483     if (!p) {
2484         mmap_unlock();
2485         return 0;
2486     }
2487 
2488     /* if the page was really writable, then we change its
2489        protection back to writable */
2490     if (p->flags & PAGE_WRITE_ORG) {
2491         current_tb_invalidated = false;
2492         if (p->flags & PAGE_WRITE) {
2493             /* If the page is actually marked WRITE then assume this is because
2494              * this thread raced with another one which got here first and
2495              * set the page to PAGE_WRITE and did the TB invalidate for us.
2496              */
2497 #ifdef TARGET_HAS_PRECISE_SMC
2498             TranslationBlock *current_tb = tcg_tb_lookup(pc);
2499             if (current_tb) {
2500                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
2501             }
2502 #endif
2503         } else {
2504             host_start = address & qemu_host_page_mask;
2505             host_end = host_start + qemu_host_page_size;
2506 
2507             prot = 0;
2508             for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
2509                 p = page_find(addr >> TARGET_PAGE_BITS);
2510                 p->flags |= PAGE_WRITE;
2511                 prot |= p->flags;
2512 
2513                 /* and since the content will be modified, we must invalidate
2514                    the corresponding translated code. */
2515                 current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
2516 #ifdef CONFIG_USER_ONLY
2517                 if (DEBUG_TB_CHECK_GATE) {
2518                     tb_invalidate_check(addr);
2519                 }
2520 #endif
2521             }
2522             mprotect((void *)g2h(host_start), qemu_host_page_size,
2523                      prot & PAGE_BITS);
2524         }
2525         mmap_unlock();
2526         /* If current TB was invalidated return to main loop */
2527         return current_tb_invalidated ? 2 : 1;
2528     }
2529     mmap_unlock();
2530     return 0;
2531 }
2532 #endif /* CONFIG_USER_ONLY */
2533 
2534 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
2535 void tcg_flush_softmmu_tlb(CPUState *cs)
2536 {
2537 #ifdef CONFIG_SOFTMMU
2538     tlb_flush(cs);
2539 #endif
2540 }
2541