xref: /openbmc/qemu/accel/tcg/translate-all.c (revision 705ad1ff)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #ifdef _WIN32
20 #include <windows.h>
21 #endif
22 #include "qemu/osdep.h"
23 
24 
25 #include "qemu-common.h"
26 #define NO_CPU_IO_DEFS
27 #include "cpu.h"
28 #include "trace.h"
29 #include "disas/disas.h"
30 #include "exec/exec-all.h"
31 #include "tcg.h"
32 #if defined(CONFIG_USER_ONLY)
33 #include "qemu.h"
34 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
35 #include <sys/param.h>
36 #if __FreeBSD_version >= 700104
37 #define HAVE_KINFO_GETVMMAP
38 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
39 #include <sys/proc.h>
40 #include <machine/profile.h>
41 #define _KERNEL
42 #include <sys/user.h>
43 #undef _KERNEL
44 #undef sigqueue
45 #include <libutil.h>
46 #endif
47 #endif
48 #else
49 #include "exec/address-spaces.h"
50 #endif
51 
52 #include "exec/cputlb.h"
53 #include "exec/tb-hash.h"
54 #include "translate-all.h"
55 #include "qemu/bitmap.h"
56 #include "qemu/error-report.h"
57 #include "qemu/timer.h"
58 #include "qemu/main-loop.h"
59 #include "exec/log.h"
60 #include "sysemu/cpus.h"
61 
62 /* #define DEBUG_TB_INVALIDATE */
63 /* #define DEBUG_TB_FLUSH */
64 /* make various TB consistency checks */
65 /* #define DEBUG_TB_CHECK */
66 
67 #ifdef DEBUG_TB_INVALIDATE
68 #define DEBUG_TB_INVALIDATE_GATE 1
69 #else
70 #define DEBUG_TB_INVALIDATE_GATE 0
71 #endif
72 
73 #ifdef DEBUG_TB_FLUSH
74 #define DEBUG_TB_FLUSH_GATE 1
75 #else
76 #define DEBUG_TB_FLUSH_GATE 0
77 #endif
78 
79 #if !defined(CONFIG_USER_ONLY)
80 /* TB consistency checks only implemented for usermode emulation.  */
81 #undef DEBUG_TB_CHECK
82 #endif
83 
84 #ifdef DEBUG_TB_CHECK
85 #define DEBUG_TB_CHECK_GATE 1
86 #else
87 #define DEBUG_TB_CHECK_GATE 0
88 #endif
89 
90 /* Access to the various translations structures need to be serialised via locks
91  * for consistency. This is automatic for SoftMMU based system
92  * emulation due to its single threaded nature. In user-mode emulation
93  * access to the memory related structures are protected with the
94  * mmap_lock.
95  */
96 #ifdef CONFIG_SOFTMMU
97 #define assert_memory_lock() tcg_debug_assert(have_tb_lock)
98 #else
99 #define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
100 #endif
101 
102 #define SMC_BITMAP_USE_THRESHOLD 10
103 
104 typedef struct PageDesc {
105     /* list of TBs intersecting this ram page */
106     uintptr_t first_tb;
107 #ifdef CONFIG_SOFTMMU
108     /* in order to optimize self modifying code, we count the number
109        of lookups we do to a given page to use a bitmap */
110     unsigned long *code_bitmap;
111     unsigned int code_write_count;
112 #else
113     unsigned long flags;
114 #endif
115 #ifndef CONFIG_USER_ONLY
116     QemuSpin lock;
117 #endif
118 } PageDesc;
119 
120 /**
121  * struct page_entry - page descriptor entry
122  * @pd:     pointer to the &struct PageDesc of the page this entry represents
123  * @index:  page index of the page
124  * @locked: whether the page is locked
125  *
126  * This struct helps us keep track of the locked state of a page, without
127  * bloating &struct PageDesc.
128  *
129  * A page lock protects accesses to all fields of &struct PageDesc.
130  *
131  * See also: &struct page_collection.
132  */
133 struct page_entry {
134     PageDesc *pd;
135     tb_page_addr_t index;
136     bool locked;
137 };
138 
139 /**
140  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
141  * @tree:   Binary search tree (BST) of the pages, with key == page index
142  * @max:    Pointer to the page in @tree with the highest page index
143  *
144  * To avoid deadlock we lock pages in ascending order of page index.
145  * When operating on a set of pages, we need to keep track of them so that
146  * we can lock them in order and also unlock them later. For this we collect
147  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
148  * @tree implementation we use does not provide an O(1) operation to obtain the
149  * highest-ranked element, we use @max to keep track of the inserted page
150  * with the highest index. This is valuable because if a page is not in
151  * the tree and its index is higher than @max's, then we can lock it
152  * without breaking the locking order rule.
153  *
154  * Note on naming: 'struct page_set' would be shorter, but we already have a few
155  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
156  *
157  * See also: page_collection_lock().
158  */
159 struct page_collection {
160     GTree *tree;
161     struct page_entry *max;
162 };
163 
164 /* list iterators for lists of tagged pointers in TranslationBlock */
165 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
166     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
167          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
168              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
169 
170 #define PAGE_FOR_EACH_TB(pagedesc, tb, n)                       \
171     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
172 
173 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
174     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
175 
176 /* In system mode we want L1_MAP to be based on ram offsets,
177    while in user mode we want it to be based on virtual addresses.  */
178 #if !defined(CONFIG_USER_ONLY)
179 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
180 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
181 #else
182 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
183 #endif
184 #else
185 # define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
186 #endif
187 
188 /* Size of the L2 (and L3, etc) page tables.  */
189 #define V_L2_BITS 10
190 #define V_L2_SIZE (1 << V_L2_BITS)
191 
192 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
193 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
194                   sizeof(((TranslationBlock *)0)->trace_vcpu_dstate)
195                   * BITS_PER_BYTE);
196 
197 /*
198  * L1 Mapping properties
199  */
200 static int v_l1_size;
201 static int v_l1_shift;
202 static int v_l2_levels;
203 
204 /* The bottom level has pointers to PageDesc, and is indexed by
205  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
206  */
207 #define V_L1_MIN_BITS 4
208 #define V_L1_MAX_BITS (V_L2_BITS + 3)
209 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
210 
211 static void *l1_map[V_L1_MAX_SIZE];
212 
213 /* code generation context */
214 TCGContext tcg_init_ctx;
215 __thread TCGContext *tcg_ctx;
216 TBContext tb_ctx;
217 bool parallel_cpus;
218 
219 /* translation block context */
220 static __thread int have_tb_lock;
221 
222 static void page_table_config_init(void)
223 {
224     uint32_t v_l1_bits;
225 
226     assert(TARGET_PAGE_BITS);
227     /* The bits remaining after N lower levels of page tables.  */
228     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
229     if (v_l1_bits < V_L1_MIN_BITS) {
230         v_l1_bits += V_L2_BITS;
231     }
232 
233     v_l1_size = 1 << v_l1_bits;
234     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
235     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
236 
237     assert(v_l1_bits <= V_L1_MAX_BITS);
238     assert(v_l1_shift % V_L2_BITS == 0);
239     assert(v_l2_levels >= 0);
240 }
241 
242 #define assert_tb_locked() tcg_debug_assert(have_tb_lock)
243 #define assert_tb_unlocked() tcg_debug_assert(!have_tb_lock)
244 
245 void tb_lock(void)
246 {
247     assert_tb_unlocked();
248     qemu_mutex_lock(&tb_ctx.tb_lock);
249     have_tb_lock++;
250 }
251 
252 void tb_unlock(void)
253 {
254     assert_tb_locked();
255     have_tb_lock--;
256     qemu_mutex_unlock(&tb_ctx.tb_lock);
257 }
258 
259 void tb_lock_reset(void)
260 {
261     if (have_tb_lock) {
262         qemu_mutex_unlock(&tb_ctx.tb_lock);
263         have_tb_lock = 0;
264     }
265 }
266 
267 void cpu_gen_init(void)
268 {
269     tcg_context_init(&tcg_init_ctx);
270 }
271 
272 /* Encode VAL as a signed leb128 sequence at P.
273    Return P incremented past the encoded value.  */
274 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
275 {
276     int more, byte;
277 
278     do {
279         byte = val & 0x7f;
280         val >>= 7;
281         more = !((val == 0 && (byte & 0x40) == 0)
282                  || (val == -1 && (byte & 0x40) != 0));
283         if (more) {
284             byte |= 0x80;
285         }
286         *p++ = byte;
287     } while (more);
288 
289     return p;
290 }
291 
292 /* Decode a signed leb128 sequence at *PP; increment *PP past the
293    decoded value.  Return the decoded value.  */
294 static target_long decode_sleb128(uint8_t **pp)
295 {
296     uint8_t *p = *pp;
297     target_long val = 0;
298     int byte, shift = 0;
299 
300     do {
301         byte = *p++;
302         val |= (target_ulong)(byte & 0x7f) << shift;
303         shift += 7;
304     } while (byte & 0x80);
305     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
306         val |= -(target_ulong)1 << shift;
307     }
308 
309     *pp = p;
310     return val;
311 }
312 
313 /* Encode the data collected about the instructions while compiling TB.
314    Place the data at BLOCK, and return the number of bytes consumed.
315 
316    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
317    which come from the target's insn_start data, followed by a uintptr_t
318    which comes from the host pc of the end of the code implementing the insn.
319 
320    Each line of the table is encoded as sleb128 deltas from the previous
321    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
322    That is, the first column is seeded with the guest pc, the last column
323    with the host pc, and the middle columns with zeros.  */
324 
325 static int encode_search(TranslationBlock *tb, uint8_t *block)
326 {
327     uint8_t *highwater = tcg_ctx->code_gen_highwater;
328     uint8_t *p = block;
329     int i, j, n;
330 
331     for (i = 0, n = tb->icount; i < n; ++i) {
332         target_ulong prev;
333 
334         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
335             if (i == 0) {
336                 prev = (j == 0 ? tb->pc : 0);
337             } else {
338                 prev = tcg_ctx->gen_insn_data[i - 1][j];
339             }
340             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
341         }
342         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
343         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
344 
345         /* Test for (pending) buffer overflow.  The assumption is that any
346            one row beginning below the high water mark cannot overrun
347            the buffer completely.  Thus we can test for overflow after
348            encoding a row without having to check during encoding.  */
349         if (unlikely(p > highwater)) {
350             return -1;
351         }
352     }
353 
354     return p - block;
355 }
356 
357 /* The cpu state corresponding to 'searched_pc' is restored.
358  * When reset_icount is true, current TB will be interrupted and
359  * icount should be recalculated.
360  */
361 static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
362                                      uintptr_t searched_pc, bool reset_icount)
363 {
364     target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
365     uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
366     CPUArchState *env = cpu->env_ptr;
367     uint8_t *p = tb->tc.ptr + tb->tc.size;
368     int i, j, num_insns = tb->icount;
369 #ifdef CONFIG_PROFILER
370     TCGProfile *prof = &tcg_ctx->prof;
371     int64_t ti = profile_getclock();
372 #endif
373 
374     searched_pc -= GETPC_ADJ;
375 
376     if (searched_pc < host_pc) {
377         return -1;
378     }
379 
380     /* Reconstruct the stored insn data while looking for the point at
381        which the end of the insn exceeds the searched_pc.  */
382     for (i = 0; i < num_insns; ++i) {
383         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
384             data[j] += decode_sleb128(&p);
385         }
386         host_pc += decode_sleb128(&p);
387         if (host_pc > searched_pc) {
388             goto found;
389         }
390     }
391     return -1;
392 
393  found:
394     if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
395         assert(use_icount);
396         /* Reset the cycle counter to the start of the block
397            and shift if to the number of actually executed instructions */
398         cpu->icount_decr.u16.low += num_insns - i;
399     }
400     restore_state_to_opc(env, tb, data);
401 
402 #ifdef CONFIG_PROFILER
403     atomic_set(&prof->restore_time,
404                 prof->restore_time + profile_getclock() - ti);
405     atomic_set(&prof->restore_count, prof->restore_count + 1);
406 #endif
407     return 0;
408 }
409 
410 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
411 {
412     TranslationBlock *tb;
413     bool r = false;
414     uintptr_t check_offset;
415 
416     /* The host_pc has to be in the region of current code buffer. If
417      * it is not we will not be able to resolve it here. The two cases
418      * where host_pc will not be correct are:
419      *
420      *  - fault during translation (instruction fetch)
421      *  - fault from helper (not using GETPC() macro)
422      *
423      * Either way we need return early to avoid blowing up on a
424      * recursive tb_lock() as we can't resolve it here.
425      *
426      * We are using unsigned arithmetic so if host_pc <
427      * tcg_init_ctx.code_gen_buffer check_offset will wrap to way
428      * above the code_gen_buffer_size
429      */
430     check_offset = host_pc - (uintptr_t) tcg_init_ctx.code_gen_buffer;
431 
432     if (check_offset < tcg_init_ctx.code_gen_buffer_size) {
433         tb_lock();
434         tb = tcg_tb_lookup(host_pc);
435         if (tb) {
436             cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
437             if (tb_cflags(tb) & CF_NOCACHE) {
438                 /* one-shot translation, invalidate it immediately */
439                 tb_phys_invalidate(tb, -1);
440                 tcg_tb_remove(tb);
441             }
442             r = true;
443         }
444         tb_unlock();
445     }
446 
447     return r;
448 }
449 
450 static void page_init(void)
451 {
452     page_size_init();
453     page_table_config_init();
454 
455 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
456     {
457 #ifdef HAVE_KINFO_GETVMMAP
458         struct kinfo_vmentry *freep;
459         int i, cnt;
460 
461         freep = kinfo_getvmmap(getpid(), &cnt);
462         if (freep) {
463             mmap_lock();
464             for (i = 0; i < cnt; i++) {
465                 unsigned long startaddr, endaddr;
466 
467                 startaddr = freep[i].kve_start;
468                 endaddr = freep[i].kve_end;
469                 if (h2g_valid(startaddr)) {
470                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
471 
472                     if (h2g_valid(endaddr)) {
473                         endaddr = h2g(endaddr);
474                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
475                     } else {
476 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
477                         endaddr = ~0ul;
478                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
479 #endif
480                     }
481                 }
482             }
483             free(freep);
484             mmap_unlock();
485         }
486 #else
487         FILE *f;
488 
489         last_brk = (unsigned long)sbrk(0);
490 
491         f = fopen("/compat/linux/proc/self/maps", "r");
492         if (f) {
493             mmap_lock();
494 
495             do {
496                 unsigned long startaddr, endaddr;
497                 int n;
498 
499                 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
500 
501                 if (n == 2 && h2g_valid(startaddr)) {
502                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
503 
504                     if (h2g_valid(endaddr)) {
505                         endaddr = h2g(endaddr);
506                     } else {
507                         endaddr = ~0ul;
508                     }
509                     page_set_flags(startaddr, endaddr, PAGE_RESERVED);
510                 }
511             } while (!feof(f));
512 
513             fclose(f);
514             mmap_unlock();
515         }
516 #endif
517     }
518 #endif
519 }
520 
521 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
522 {
523     PageDesc *pd;
524     void **lp;
525     int i;
526 
527     /* Level 1.  Always allocated.  */
528     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
529 
530     /* Level 2..N-1.  */
531     for (i = v_l2_levels; i > 0; i--) {
532         void **p = atomic_rcu_read(lp);
533 
534         if (p == NULL) {
535             void *existing;
536 
537             if (!alloc) {
538                 return NULL;
539             }
540             p = g_new0(void *, V_L2_SIZE);
541             existing = atomic_cmpxchg(lp, NULL, p);
542             if (unlikely(existing)) {
543                 g_free(p);
544                 p = existing;
545             }
546         }
547 
548         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
549     }
550 
551     pd = atomic_rcu_read(lp);
552     if (pd == NULL) {
553         void *existing;
554 
555         if (!alloc) {
556             return NULL;
557         }
558         pd = g_new0(PageDesc, V_L2_SIZE);
559 #ifndef CONFIG_USER_ONLY
560         {
561             int i;
562 
563             for (i = 0; i < V_L2_SIZE; i++) {
564                 qemu_spin_init(&pd[i].lock);
565             }
566         }
567 #endif
568         existing = atomic_cmpxchg(lp, NULL, pd);
569         if (unlikely(existing)) {
570             g_free(pd);
571             pd = existing;
572         }
573     }
574 
575     return pd + (index & (V_L2_SIZE - 1));
576 }
577 
578 static inline PageDesc *page_find(tb_page_addr_t index)
579 {
580     return page_find_alloc(index, 0);
581 }
582 
583 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
584                            PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
585 
586 /* In user-mode page locks aren't used; mmap_lock is enough */
587 #ifdef CONFIG_USER_ONLY
588 
589 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
590 
591 static inline void page_lock(PageDesc *pd)
592 { }
593 
594 static inline void page_unlock(PageDesc *pd)
595 { }
596 
597 static inline void page_lock_tb(const TranslationBlock *tb)
598 { }
599 
600 static inline void page_unlock_tb(const TranslationBlock *tb)
601 { }
602 
603 struct page_collection *
604 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
605 {
606     return NULL;
607 }
608 
609 void page_collection_unlock(struct page_collection *set)
610 { }
611 #else /* !CONFIG_USER_ONLY */
612 
613 #ifdef CONFIG_DEBUG_TCG
614 
615 static __thread GHashTable *ht_pages_locked_debug;
616 
617 static void ht_pages_locked_debug_init(void)
618 {
619     if (ht_pages_locked_debug) {
620         return;
621     }
622     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
623 }
624 
625 static bool page_is_locked(const PageDesc *pd)
626 {
627     PageDesc *found;
628 
629     ht_pages_locked_debug_init();
630     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
631     return !!found;
632 }
633 
634 static void page_lock__debug(PageDesc *pd)
635 {
636     ht_pages_locked_debug_init();
637     g_assert(!page_is_locked(pd));
638     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
639 }
640 
641 static void page_unlock__debug(const PageDesc *pd)
642 {
643     bool removed;
644 
645     ht_pages_locked_debug_init();
646     g_assert(page_is_locked(pd));
647     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
648     g_assert(removed);
649 }
650 
651 static void
652 do_assert_page_locked(const PageDesc *pd, const char *file, int line)
653 {
654     if (unlikely(!page_is_locked(pd))) {
655         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
656                      pd, file, line);
657         abort();
658     }
659 }
660 
661 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
662 
663 void assert_no_pages_locked(void)
664 {
665     ht_pages_locked_debug_init();
666     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
667 }
668 
669 #else /* !CONFIG_DEBUG_TCG */
670 
671 #define assert_page_locked(pd)
672 
673 static inline void page_lock__debug(const PageDesc *pd)
674 {
675 }
676 
677 static inline void page_unlock__debug(const PageDesc *pd)
678 {
679 }
680 
681 #endif /* CONFIG_DEBUG_TCG */
682 
683 static inline void page_lock(PageDesc *pd)
684 {
685     page_lock__debug(pd);
686     qemu_spin_lock(&pd->lock);
687 }
688 
689 static inline void page_unlock(PageDesc *pd)
690 {
691     qemu_spin_unlock(&pd->lock);
692     page_unlock__debug(pd);
693 }
694 
695 /* lock the page(s) of a TB in the correct acquisition order */
696 static inline void page_lock_tb(const TranslationBlock *tb)
697 {
698     page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
699 }
700 
701 static inline void page_unlock_tb(const TranslationBlock *tb)
702 {
703     page_unlock(page_find(tb->page_addr[0] >> TARGET_PAGE_BITS));
704     if (unlikely(tb->page_addr[1] != -1)) {
705         page_unlock(page_find(tb->page_addr[1] >> TARGET_PAGE_BITS));
706     }
707 }
708 
709 static inline struct page_entry *
710 page_entry_new(PageDesc *pd, tb_page_addr_t index)
711 {
712     struct page_entry *pe = g_malloc(sizeof(*pe));
713 
714     pe->index = index;
715     pe->pd = pd;
716     pe->locked = false;
717     return pe;
718 }
719 
720 static void page_entry_destroy(gpointer p)
721 {
722     struct page_entry *pe = p;
723 
724     g_assert(pe->locked);
725     page_unlock(pe->pd);
726     g_free(pe);
727 }
728 
729 /* returns false on success */
730 static bool page_entry_trylock(struct page_entry *pe)
731 {
732     bool busy;
733 
734     busy = qemu_spin_trylock(&pe->pd->lock);
735     if (!busy) {
736         g_assert(!pe->locked);
737         pe->locked = true;
738         page_lock__debug(pe->pd);
739     }
740     return busy;
741 }
742 
743 static void do_page_entry_lock(struct page_entry *pe)
744 {
745     page_lock(pe->pd);
746     g_assert(!pe->locked);
747     pe->locked = true;
748 }
749 
750 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
751 {
752     struct page_entry *pe = value;
753 
754     do_page_entry_lock(pe);
755     return FALSE;
756 }
757 
758 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
759 {
760     struct page_entry *pe = value;
761 
762     if (pe->locked) {
763         pe->locked = false;
764         page_unlock(pe->pd);
765     }
766     return FALSE;
767 }
768 
769 /*
770  * Trylock a page, and if successful, add the page to a collection.
771  * Returns true ("busy") if the page could not be locked; false otherwise.
772  */
773 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
774 {
775     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
776     struct page_entry *pe;
777     PageDesc *pd;
778 
779     pe = g_tree_lookup(set->tree, &index);
780     if (pe) {
781         return false;
782     }
783 
784     pd = page_find(index);
785     if (pd == NULL) {
786         return false;
787     }
788 
789     pe = page_entry_new(pd, index);
790     g_tree_insert(set->tree, &pe->index, pe);
791 
792     /*
793      * If this is either (1) the first insertion or (2) a page whose index
794      * is higher than any other so far, just lock the page and move on.
795      */
796     if (set->max == NULL || pe->index > set->max->index) {
797         set->max = pe;
798         do_page_entry_lock(pe);
799         return false;
800     }
801     /*
802      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
803      * locks in order.
804      */
805     return page_entry_trylock(pe);
806 }
807 
808 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
809 {
810     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
811     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
812 
813     if (a == b) {
814         return 0;
815     } else if (a < b) {
816         return -1;
817     }
818     return 1;
819 }
820 
821 /*
822  * Lock a range of pages ([@start,@end[) as well as the pages of all
823  * intersecting TBs.
824  * Locking order: acquire locks in ascending order of page index.
825  */
826 struct page_collection *
827 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
828 {
829     struct page_collection *set = g_malloc(sizeof(*set));
830     tb_page_addr_t index;
831     PageDesc *pd;
832 
833     start >>= TARGET_PAGE_BITS;
834     end   >>= TARGET_PAGE_BITS;
835     g_assert(start <= end);
836 
837     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
838                                 page_entry_destroy);
839     set->max = NULL;
840     assert_no_pages_locked();
841 
842  retry:
843     g_tree_foreach(set->tree, page_entry_lock, NULL);
844 
845     for (index = start; index <= end; index++) {
846         TranslationBlock *tb;
847         int n;
848 
849         pd = page_find(index);
850         if (pd == NULL) {
851             continue;
852         }
853         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
854             g_tree_foreach(set->tree, page_entry_unlock, NULL);
855             goto retry;
856         }
857         assert_page_locked(pd);
858         PAGE_FOR_EACH_TB(pd, tb, n) {
859             if (page_trylock_add(set, tb->page_addr[0]) ||
860                 (tb->page_addr[1] != -1 &&
861                  page_trylock_add(set, tb->page_addr[1]))) {
862                 /* drop all locks, and reacquire in order */
863                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
864                 goto retry;
865             }
866         }
867     }
868     return set;
869 }
870 
871 void page_collection_unlock(struct page_collection *set)
872 {
873     /* entries are unlocked and freed via page_entry_destroy */
874     g_tree_destroy(set->tree);
875     g_free(set);
876 }
877 
878 #endif /* !CONFIG_USER_ONLY */
879 
880 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
881                            PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
882 {
883     PageDesc *p1, *p2;
884 
885     assert_memory_lock();
886     g_assert(phys1 != -1 && phys1 != phys2);
887     p1 = page_find_alloc(phys1 >> TARGET_PAGE_BITS, alloc);
888     if (ret_p1) {
889         *ret_p1 = p1;
890     }
891     if (likely(phys2 == -1)) {
892         page_lock(p1);
893         return;
894     }
895     p2 = page_find_alloc(phys2 >> TARGET_PAGE_BITS, alloc);
896     if (ret_p2) {
897         *ret_p2 = p2;
898     }
899     if (phys1 < phys2) {
900         page_lock(p1);
901         page_lock(p2);
902     } else {
903         page_lock(p2);
904         page_lock(p1);
905     }
906 }
907 
908 #if defined(CONFIG_USER_ONLY)
909 /* Currently it is not recommended to allocate big chunks of data in
910    user mode. It will change when a dedicated libc will be used.  */
911 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
912    region in which the guest needs to run.  Revisit this.  */
913 #define USE_STATIC_CODE_GEN_BUFFER
914 #endif
915 
916 /* Minimum size of the code gen buffer.  This number is randomly chosen,
917    but not so small that we can't have a fair number of TB's live.  */
918 #define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
919 
920 /* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
921    indicated, this is constrained by the range of direct branches on the
922    host cpu, as used by the TCG implementation of goto_tb.  */
923 #if defined(__x86_64__)
924 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
925 #elif defined(__sparc__)
926 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
927 #elif defined(__powerpc64__)
928 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
929 #elif defined(__powerpc__)
930 # define MAX_CODE_GEN_BUFFER_SIZE  (32u * 1024 * 1024)
931 #elif defined(__aarch64__)
932 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
933 #elif defined(__s390x__)
934   /* We have a +- 4GB range on the branches; leave some slop.  */
935 # define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
936 #elif defined(__mips__)
937   /* We have a 256MB branch region, but leave room to make sure the
938      main executable is also within that region.  */
939 # define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
940 #else
941 # define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
942 #endif
943 
944 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
945 
946 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
947   (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
948    ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
949 
950 static inline size_t size_code_gen_buffer(size_t tb_size)
951 {
952     /* Size the buffer.  */
953     if (tb_size == 0) {
954 #ifdef USE_STATIC_CODE_GEN_BUFFER
955         tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
956 #else
957         /* ??? Needs adjustments.  */
958         /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
959            static buffer, we could size this on RESERVED_VA, on the text
960            segment size of the executable, or continue to use the default.  */
961         tb_size = (unsigned long)(ram_size / 4);
962 #endif
963     }
964     if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
965         tb_size = MIN_CODE_GEN_BUFFER_SIZE;
966     }
967     if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
968         tb_size = MAX_CODE_GEN_BUFFER_SIZE;
969     }
970     return tb_size;
971 }
972 
973 #ifdef __mips__
974 /* In order to use J and JAL within the code_gen_buffer, we require
975    that the buffer not cross a 256MB boundary.  */
976 static inline bool cross_256mb(void *addr, size_t size)
977 {
978     return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
979 }
980 
981 /* We weren't able to allocate a buffer without crossing that boundary,
982    so make do with the larger portion of the buffer that doesn't cross.
983    Returns the new base of the buffer, and adjusts code_gen_buffer_size.  */
984 static inline void *split_cross_256mb(void *buf1, size_t size1)
985 {
986     void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
987     size_t size2 = buf1 + size1 - buf2;
988 
989     size1 = buf2 - buf1;
990     if (size1 < size2) {
991         size1 = size2;
992         buf1 = buf2;
993     }
994 
995     tcg_ctx->code_gen_buffer_size = size1;
996     return buf1;
997 }
998 #endif
999 
1000 #ifdef USE_STATIC_CODE_GEN_BUFFER
1001 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
1002     __attribute__((aligned(CODE_GEN_ALIGN)));
1003 
1004 static inline void *alloc_code_gen_buffer(void)
1005 {
1006     void *buf = static_code_gen_buffer;
1007     void *end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
1008     size_t size;
1009 
1010     /* page-align the beginning and end of the buffer */
1011     buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
1012     end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
1013 
1014     size = end - buf;
1015 
1016     /* Honor a command-line option limiting the size of the buffer.  */
1017     if (size > tcg_ctx->code_gen_buffer_size) {
1018         size = QEMU_ALIGN_DOWN(tcg_ctx->code_gen_buffer_size,
1019                                qemu_real_host_page_size);
1020     }
1021     tcg_ctx->code_gen_buffer_size = size;
1022 
1023 #ifdef __mips__
1024     if (cross_256mb(buf, size)) {
1025         buf = split_cross_256mb(buf, size);
1026         size = tcg_ctx->code_gen_buffer_size;
1027     }
1028 #endif
1029 
1030     if (qemu_mprotect_rwx(buf, size)) {
1031         abort();
1032     }
1033     qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
1034 
1035     return buf;
1036 }
1037 #elif defined(_WIN32)
1038 static inline void *alloc_code_gen_buffer(void)
1039 {
1040     size_t size = tcg_ctx->code_gen_buffer_size;
1041     return VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
1042                         PAGE_EXECUTE_READWRITE);
1043 }
1044 #else
1045 static inline void *alloc_code_gen_buffer(void)
1046 {
1047     int prot = PROT_WRITE | PROT_READ | PROT_EXEC;
1048     int flags = MAP_PRIVATE | MAP_ANONYMOUS;
1049     uintptr_t start = 0;
1050     size_t size = tcg_ctx->code_gen_buffer_size;
1051     void *buf;
1052 
1053     /* Constrain the position of the buffer based on the host cpu.
1054        Note that these addresses are chosen in concert with the
1055        addresses assigned in the relevant linker script file.  */
1056 # if defined(__PIE__) || defined(__PIC__)
1057     /* Don't bother setting a preferred location if we're building
1058        a position-independent executable.  We're more likely to get
1059        an address near the main executable if we let the kernel
1060        choose the address.  */
1061 # elif defined(__x86_64__) && defined(MAP_32BIT)
1062     /* Force the memory down into low memory with the executable.
1063        Leave the choice of exact location with the kernel.  */
1064     flags |= MAP_32BIT;
1065     /* Cannot expect to map more than 800MB in low memory.  */
1066     if (size > 800u * 1024 * 1024) {
1067         tcg_ctx->code_gen_buffer_size = size = 800u * 1024 * 1024;
1068     }
1069 # elif defined(__sparc__)
1070     start = 0x40000000ul;
1071 # elif defined(__s390x__)
1072     start = 0x90000000ul;
1073 # elif defined(__mips__)
1074 #  if _MIPS_SIM == _ABI64
1075     start = 0x128000000ul;
1076 #  else
1077     start = 0x08000000ul;
1078 #  endif
1079 # endif
1080 
1081     buf = mmap((void *)start, size, prot, flags, -1, 0);
1082     if (buf == MAP_FAILED) {
1083         return NULL;
1084     }
1085 
1086 #ifdef __mips__
1087     if (cross_256mb(buf, size)) {
1088         /* Try again, with the original still mapped, to avoid re-acquiring
1089            that 256mb crossing.  This time don't specify an address.  */
1090         size_t size2;
1091         void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
1092         switch ((int)(buf2 != MAP_FAILED)) {
1093         case 1:
1094             if (!cross_256mb(buf2, size)) {
1095                 /* Success!  Use the new buffer.  */
1096                 munmap(buf, size);
1097                 break;
1098             }
1099             /* Failure.  Work with what we had.  */
1100             munmap(buf2, size);
1101             /* fallthru */
1102         default:
1103             /* Split the original buffer.  Free the smaller half.  */
1104             buf2 = split_cross_256mb(buf, size);
1105             size2 = tcg_ctx->code_gen_buffer_size;
1106             if (buf == buf2) {
1107                 munmap(buf + size2, size - size2);
1108             } else {
1109                 munmap(buf, size - size2);
1110             }
1111             size = size2;
1112             break;
1113         }
1114         buf = buf2;
1115     }
1116 #endif
1117 
1118     /* Request large pages for the buffer.  */
1119     qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
1120 
1121     return buf;
1122 }
1123 #endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
1124 
1125 static inline void code_gen_alloc(size_t tb_size)
1126 {
1127     tcg_ctx->code_gen_buffer_size = size_code_gen_buffer(tb_size);
1128     tcg_ctx->code_gen_buffer = alloc_code_gen_buffer();
1129     if (tcg_ctx->code_gen_buffer == NULL) {
1130         fprintf(stderr, "Could not allocate dynamic translator buffer\n");
1131         exit(1);
1132     }
1133     qemu_mutex_init(&tb_ctx.tb_lock);
1134 }
1135 
1136 static bool tb_cmp(const void *ap, const void *bp)
1137 {
1138     const TranslationBlock *a = ap;
1139     const TranslationBlock *b = bp;
1140 
1141     return a->pc == b->pc &&
1142         a->cs_base == b->cs_base &&
1143         a->flags == b->flags &&
1144         (tb_cflags(a) & CF_HASH_MASK) == (tb_cflags(b) & CF_HASH_MASK) &&
1145         a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
1146         a->page_addr[0] == b->page_addr[0] &&
1147         a->page_addr[1] == b->page_addr[1];
1148 }
1149 
1150 static void tb_htable_init(void)
1151 {
1152     unsigned int mode = QHT_MODE_AUTO_RESIZE;
1153 
1154     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
1155 }
1156 
1157 /* Must be called before using the QEMU cpus. 'tb_size' is the size
1158    (in bytes) allocated to the translation buffer. Zero means default
1159    size. */
1160 void tcg_exec_init(unsigned long tb_size)
1161 {
1162     tcg_allowed = true;
1163     cpu_gen_init();
1164     page_init();
1165     tb_htable_init();
1166     code_gen_alloc(tb_size);
1167 #if defined(CONFIG_SOFTMMU)
1168     /* There's no guest base to take into account, so go ahead and
1169        initialize the prologue now.  */
1170     tcg_prologue_init(tcg_ctx);
1171 #endif
1172 }
1173 
1174 /*
1175  * Allocate a new translation block. Flush the translation buffer if
1176  * too many translation blocks or too much generated code.
1177  *
1178  * Called with tb_lock held.
1179  */
1180 static TranslationBlock *tb_alloc(target_ulong pc)
1181 {
1182     TranslationBlock *tb;
1183 
1184     assert_tb_locked();
1185 
1186     tb = tcg_tb_alloc(tcg_ctx);
1187     if (unlikely(tb == NULL)) {
1188         return NULL;
1189     }
1190     return tb;
1191 }
1192 
1193 /* call with @p->lock held */
1194 static inline void invalidate_page_bitmap(PageDesc *p)
1195 {
1196     assert_page_locked(p);
1197 #ifdef CONFIG_SOFTMMU
1198     g_free(p->code_bitmap);
1199     p->code_bitmap = NULL;
1200     p->code_write_count = 0;
1201 #endif
1202 }
1203 
1204 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
1205 static void page_flush_tb_1(int level, void **lp)
1206 {
1207     int i;
1208 
1209     if (*lp == NULL) {
1210         return;
1211     }
1212     if (level == 0) {
1213         PageDesc *pd = *lp;
1214 
1215         for (i = 0; i < V_L2_SIZE; ++i) {
1216             page_lock(&pd[i]);
1217             pd[i].first_tb = (uintptr_t)NULL;
1218             invalidate_page_bitmap(pd + i);
1219             page_unlock(&pd[i]);
1220         }
1221     } else {
1222         void **pp = *lp;
1223 
1224         for (i = 0; i < V_L2_SIZE; ++i) {
1225             page_flush_tb_1(level - 1, pp + i);
1226         }
1227     }
1228 }
1229 
1230 static void page_flush_tb(void)
1231 {
1232     int i, l1_sz = v_l1_size;
1233 
1234     for (i = 0; i < l1_sz; i++) {
1235         page_flush_tb_1(v_l2_levels, l1_map + i);
1236     }
1237 }
1238 
1239 static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
1240 {
1241     const TranslationBlock *tb = value;
1242     size_t *size = data;
1243 
1244     *size += tb->tc.size;
1245     return false;
1246 }
1247 
1248 /* flush all the translation blocks */
1249 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
1250 {
1251     tb_lock();
1252 
1253     /* If it is already been done on request of another CPU,
1254      * just retry.
1255      */
1256     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
1257         goto done;
1258     }
1259 
1260     if (DEBUG_TB_FLUSH_GATE) {
1261         size_t nb_tbs = tcg_nb_tbs();
1262         size_t host_size = 0;
1263 
1264         tcg_tb_foreach(tb_host_size_iter, &host_size);
1265         printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
1266                tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
1267     }
1268 
1269     CPU_FOREACH(cpu) {
1270         cpu_tb_jmp_cache_clear(cpu);
1271     }
1272 
1273     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
1274     page_flush_tb();
1275 
1276     tcg_region_reset_all();
1277     /* XXX: flush processor icache at this point if cache flush is
1278        expensive */
1279     atomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
1280 
1281 done:
1282     tb_unlock();
1283 }
1284 
1285 void tb_flush(CPUState *cpu)
1286 {
1287     if (tcg_enabled()) {
1288         unsigned tb_flush_count = atomic_mb_read(&tb_ctx.tb_flush_count);
1289         async_safe_run_on_cpu(cpu, do_tb_flush,
1290                               RUN_ON_CPU_HOST_INT(tb_flush_count));
1291     }
1292 }
1293 
1294 /*
1295  * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
1296  * so in order to prevent bit rot we compile them unconditionally in user-mode,
1297  * and let the optimizer get rid of them by wrapping their user-only callers
1298  * with if (DEBUG_TB_CHECK_GATE).
1299  */
1300 #ifdef CONFIG_USER_ONLY
1301 
1302 static void
1303 do_tb_invalidate_check(struct qht *ht, void *p, uint32_t hash, void *userp)
1304 {
1305     TranslationBlock *tb = p;
1306     target_ulong addr = *(target_ulong *)userp;
1307 
1308     if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
1309         printf("ERROR invalidate: address=" TARGET_FMT_lx
1310                " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
1311     }
1312 }
1313 
1314 /* verify that all the pages have correct rights for code
1315  *
1316  * Called with tb_lock held.
1317  */
1318 static void tb_invalidate_check(target_ulong address)
1319 {
1320     address &= TARGET_PAGE_MASK;
1321     qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address);
1322 }
1323 
1324 static void
1325 do_tb_page_check(struct qht *ht, void *p, uint32_t hash, void *userp)
1326 {
1327     TranslationBlock *tb = p;
1328     int flags1, flags2;
1329 
1330     flags1 = page_get_flags(tb->pc);
1331     flags2 = page_get_flags(tb->pc + tb->size - 1);
1332     if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
1333         printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
1334                (long)tb->pc, tb->size, flags1, flags2);
1335     }
1336 }
1337 
1338 /* verify that all the pages have correct rights for code */
1339 static void tb_page_check(void)
1340 {
1341     qht_iter(&tb_ctx.htable, do_tb_page_check, NULL);
1342 }
1343 
1344 #endif /* CONFIG_USER_ONLY */
1345 
1346 /* call with @pd->lock held */
1347 static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
1348 {
1349     TranslationBlock *tb1;
1350     uintptr_t *pprev;
1351     unsigned int n1;
1352 
1353     assert_page_locked(pd);
1354     pprev = &pd->first_tb;
1355     PAGE_FOR_EACH_TB(pd, tb1, n1) {
1356         if (tb1 == tb) {
1357             *pprev = tb1->page_next[n1];
1358             return;
1359         }
1360         pprev = &tb1->page_next[n1];
1361     }
1362     g_assert_not_reached();
1363 }
1364 
1365 /* remove @orig from its @n_orig-th jump list */
1366 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
1367 {
1368     uintptr_t ptr, ptr_locked;
1369     TranslationBlock *dest;
1370     TranslationBlock *tb;
1371     uintptr_t *pprev;
1372     int n;
1373 
1374     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
1375     ptr = atomic_or_fetch(&orig->jmp_dest[n_orig], 1);
1376     dest = (TranslationBlock *)(ptr & ~1);
1377     if (dest == NULL) {
1378         return;
1379     }
1380 
1381     qemu_spin_lock(&dest->jmp_lock);
1382     /*
1383      * While acquiring the lock, the jump might have been removed if the
1384      * destination TB was invalidated; check again.
1385      */
1386     ptr_locked = atomic_read(&orig->jmp_dest[n_orig]);
1387     if (ptr_locked != ptr) {
1388         qemu_spin_unlock(&dest->jmp_lock);
1389         /*
1390          * The only possibility is that the jump was unlinked via
1391          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
1392          * because we set the LSB above.
1393          */
1394         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
1395         return;
1396     }
1397     /*
1398      * We first acquired the lock, and since the destination pointer matches,
1399      * we know for sure that @orig is in the jmp list.
1400      */
1401     pprev = &dest->jmp_list_head;
1402     TB_FOR_EACH_JMP(dest, tb, n) {
1403         if (tb == orig && n == n_orig) {
1404             *pprev = tb->jmp_list_next[n];
1405             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
1406             qemu_spin_unlock(&dest->jmp_lock);
1407             return;
1408         }
1409         pprev = &tb->jmp_list_next[n];
1410     }
1411     g_assert_not_reached();
1412 }
1413 
1414 /* reset the jump entry 'n' of a TB so that it is not chained to
1415    another TB */
1416 static inline void tb_reset_jump(TranslationBlock *tb, int n)
1417 {
1418     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
1419     tb_set_jmp_target(tb, n, addr);
1420 }
1421 
1422 /* remove any jumps to the TB */
1423 static inline void tb_jmp_unlink(TranslationBlock *dest)
1424 {
1425     TranslationBlock *tb;
1426     int n;
1427 
1428     qemu_spin_lock(&dest->jmp_lock);
1429 
1430     TB_FOR_EACH_JMP(dest, tb, n) {
1431         tb_reset_jump(tb, n);
1432         atomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
1433         /* No need to clear the list entry; setting the dest ptr is enough */
1434     }
1435     dest->jmp_list_head = (uintptr_t)NULL;
1436 
1437     qemu_spin_unlock(&dest->jmp_lock);
1438 }
1439 
1440 /* If @rm_from_page_list is set, call with the TB's pages' locks held */
1441 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
1442 {
1443     CPUState *cpu;
1444     PageDesc *p;
1445     uint32_t h;
1446     tb_page_addr_t phys_pc;
1447 
1448     assert_tb_locked();
1449 
1450     /* make sure no further incoming jumps will be chained to this TB */
1451     qemu_spin_lock(&tb->jmp_lock);
1452     atomic_set(&tb->cflags, tb->cflags | CF_INVALID);
1453     qemu_spin_unlock(&tb->jmp_lock);
1454 
1455     /* remove the TB from the hash list */
1456     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1457     h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK,
1458                      tb->trace_vcpu_dstate);
1459     if (!qht_remove(&tb_ctx.htable, tb, h)) {
1460         return;
1461     }
1462 
1463     /* remove the TB from the page list */
1464     if (rm_from_page_list) {
1465         p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
1466         tb_page_remove(p, tb);
1467         invalidate_page_bitmap(p);
1468         if (tb->page_addr[1] != -1) {
1469             p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
1470             tb_page_remove(p, tb);
1471             invalidate_page_bitmap(p);
1472         }
1473     }
1474 
1475     /* remove the TB from the hash list */
1476     h = tb_jmp_cache_hash_func(tb->pc);
1477     CPU_FOREACH(cpu) {
1478         if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) {
1479             atomic_set(&cpu->tb_jmp_cache[h], NULL);
1480         }
1481     }
1482 
1483     /* suppress this TB from the two jump lists */
1484     tb_remove_from_jmp_list(tb, 0);
1485     tb_remove_from_jmp_list(tb, 1);
1486 
1487     /* suppress any remaining jumps to this TB */
1488     tb_jmp_unlink(tb);
1489 
1490     atomic_set(&tcg_ctx->tb_phys_invalidate_count,
1491                tcg_ctx->tb_phys_invalidate_count + 1);
1492 }
1493 
1494 static void tb_phys_invalidate__locked(TranslationBlock *tb)
1495 {
1496     do_tb_phys_invalidate(tb, true);
1497 }
1498 
1499 /* invalidate one TB
1500  *
1501  * Called with tb_lock held.
1502  */
1503 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
1504 {
1505     if (page_addr == -1) {
1506         page_lock_tb(tb);
1507         do_tb_phys_invalidate(tb, true);
1508         page_unlock_tb(tb);
1509     } else {
1510         do_tb_phys_invalidate(tb, false);
1511     }
1512 }
1513 
1514 #ifdef CONFIG_SOFTMMU
1515 /* call with @p->lock held */
1516 static void build_page_bitmap(PageDesc *p)
1517 {
1518     int n, tb_start, tb_end;
1519     TranslationBlock *tb;
1520 
1521     assert_page_locked(p);
1522     p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
1523 
1524     PAGE_FOR_EACH_TB(p, tb, n) {
1525         /* NOTE: this is subtle as a TB may span two physical pages */
1526         if (n == 0) {
1527             /* NOTE: tb_end may be after the end of the page, but
1528                it is not a problem */
1529             tb_start = tb->pc & ~TARGET_PAGE_MASK;
1530             tb_end = tb_start + tb->size;
1531             if (tb_end > TARGET_PAGE_SIZE) {
1532                 tb_end = TARGET_PAGE_SIZE;
1533              }
1534         } else {
1535             tb_start = 0;
1536             tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1537         }
1538         bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
1539     }
1540 }
1541 #endif
1542 
1543 /* add the tb in the target page and protect it if necessary
1544  *
1545  * Called with mmap_lock held for user-mode emulation.
1546  * Called with @p->lock held.
1547  */
1548 static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
1549                                unsigned int n, tb_page_addr_t page_addr)
1550 {
1551 #ifndef CONFIG_USER_ONLY
1552     bool page_already_protected;
1553 #endif
1554 
1555     assert_page_locked(p);
1556 
1557     tb->page_addr[n] = page_addr;
1558     tb->page_next[n] = p->first_tb;
1559 #ifndef CONFIG_USER_ONLY
1560     page_already_protected = p->first_tb != (uintptr_t)NULL;
1561 #endif
1562     p->first_tb = (uintptr_t)tb | n;
1563     invalidate_page_bitmap(p);
1564 
1565 #if defined(CONFIG_USER_ONLY)
1566     if (p->flags & PAGE_WRITE) {
1567         target_ulong addr;
1568         PageDesc *p2;
1569         int prot;
1570 
1571         /* force the host page as non writable (writes will have a
1572            page fault + mprotect overhead) */
1573         page_addr &= qemu_host_page_mask;
1574         prot = 0;
1575         for (addr = page_addr; addr < page_addr + qemu_host_page_size;
1576             addr += TARGET_PAGE_SIZE) {
1577 
1578             p2 = page_find(addr >> TARGET_PAGE_BITS);
1579             if (!p2) {
1580                 continue;
1581             }
1582             prot |= p2->flags;
1583             p2->flags &= ~PAGE_WRITE;
1584           }
1585         mprotect(g2h(page_addr), qemu_host_page_size,
1586                  (prot & PAGE_BITS) & ~PAGE_WRITE);
1587         if (DEBUG_TB_INVALIDATE_GATE) {
1588             printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
1589         }
1590     }
1591 #else
1592     /* if some code is already present, then the pages are already
1593        protected. So we handle the case where only the first TB is
1594        allocated in a physical page */
1595     if (!page_already_protected) {
1596         tlb_protect_code(page_addr);
1597     }
1598 #endif
1599 }
1600 
1601 /* add a new TB and link it to the physical page tables. phys_page2 is
1602  * (-1) to indicate that only one page contains the TB.
1603  *
1604  * Called with mmap_lock held for user-mode emulation.
1605  *
1606  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
1607  * Note that in !user-mode, another thread might have already added a TB
1608  * for the same block of guest code that @tb corresponds to. In that case,
1609  * the caller should discard the original @tb, and use instead the returned TB.
1610  */
1611 static TranslationBlock *
1612 tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1613              tb_page_addr_t phys_page2)
1614 {
1615     PageDesc *p;
1616     PageDesc *p2 = NULL;
1617     void *existing_tb = NULL;
1618     uint32_t h;
1619 
1620     assert_memory_lock();
1621 
1622     /*
1623      * Add the TB to the page list, acquiring first the pages's locks.
1624      * We keep the locks held until after inserting the TB in the hash table,
1625      * so that if the insertion fails we know for sure that the TBs are still
1626      * in the page descriptors.
1627      * Note that inserting into the hash table first isn't an option, since
1628      * we can only insert TBs that are fully initialized.
1629      */
1630     page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
1631     tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
1632     if (p2) {
1633         tb_page_add(p2, tb, 1, phys_page2);
1634     } else {
1635         tb->page_addr[1] = -1;
1636     }
1637 
1638     /* add in the hash table */
1639     h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
1640                      tb->trace_vcpu_dstate);
1641     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
1642 
1643     /* remove TB from the page(s) if we couldn't insert it */
1644     if (unlikely(existing_tb)) {
1645         tb_page_remove(p, tb);
1646         invalidate_page_bitmap(p);
1647         if (p2) {
1648             tb_page_remove(p2, tb);
1649             invalidate_page_bitmap(p2);
1650         }
1651         tb = existing_tb;
1652     }
1653 
1654     if (p2) {
1655         page_unlock(p2);
1656     }
1657     page_unlock(p);
1658 
1659 #ifdef CONFIG_USER_ONLY
1660     if (DEBUG_TB_CHECK_GATE) {
1661         tb_page_check();
1662     }
1663 #endif
1664     return tb;
1665 }
1666 
1667 /* Called with mmap_lock held for user mode emulation.  */
1668 TranslationBlock *tb_gen_code(CPUState *cpu,
1669                               target_ulong pc, target_ulong cs_base,
1670                               uint32_t flags, int cflags)
1671 {
1672     CPUArchState *env = cpu->env_ptr;
1673     TranslationBlock *tb, *existing_tb;
1674     tb_page_addr_t phys_pc, phys_page2;
1675     target_ulong virt_page2;
1676     tcg_insn_unit *gen_code_buf;
1677     int gen_code_size, search_size;
1678 #ifdef CONFIG_PROFILER
1679     TCGProfile *prof = &tcg_ctx->prof;
1680     int64_t ti;
1681 #endif
1682     assert_memory_lock();
1683 
1684     phys_pc = get_page_addr_code(env, pc);
1685 
1686  buffer_overflow:
1687     tb = tb_alloc(pc);
1688     if (unlikely(!tb)) {
1689         /* flush must be done */
1690         tb_flush(cpu);
1691         mmap_unlock();
1692         /* Make the execution loop process the flush as soon as possible.  */
1693         cpu->exception_index = EXCP_INTERRUPT;
1694         cpu_loop_exit(cpu);
1695     }
1696 
1697     gen_code_buf = tcg_ctx->code_gen_ptr;
1698     tb->tc.ptr = gen_code_buf;
1699     tb->pc = pc;
1700     tb->cs_base = cs_base;
1701     tb->flags = flags;
1702     tb->cflags = cflags;
1703     tb->trace_vcpu_dstate = *cpu->trace_dstate;
1704     tcg_ctx->tb_cflags = cflags;
1705 
1706 #ifdef CONFIG_PROFILER
1707     /* includes aborted translations because of exceptions */
1708     atomic_set(&prof->tb_count1, prof->tb_count1 + 1);
1709     ti = profile_getclock();
1710 #endif
1711 
1712     tcg_func_start(tcg_ctx);
1713 
1714     tcg_ctx->cpu = ENV_GET_CPU(env);
1715     gen_intermediate_code(cpu, tb);
1716     tcg_ctx->cpu = NULL;
1717 
1718     trace_translate_block(tb, tb->pc, tb->tc.ptr);
1719 
1720     /* generate machine code */
1721     tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
1722     tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
1723     tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
1724     if (TCG_TARGET_HAS_direct_jump) {
1725         tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
1726         tcg_ctx->tb_jmp_target_addr = NULL;
1727     } else {
1728         tcg_ctx->tb_jmp_insn_offset = NULL;
1729         tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
1730     }
1731 
1732 #ifdef CONFIG_PROFILER
1733     atomic_set(&prof->tb_count, prof->tb_count + 1);
1734     atomic_set(&prof->interm_time, prof->interm_time + profile_getclock() - ti);
1735     ti = profile_getclock();
1736 #endif
1737 
1738     /* ??? Overflow could be handled better here.  In particular, we
1739        don't need to re-do gen_intermediate_code, nor should we re-do
1740        the tcg optimization currently hidden inside tcg_gen_code.  All
1741        that should be required is to flush the TBs, allocate a new TB,
1742        re-initialize it per above, and re-do the actual code generation.  */
1743     gen_code_size = tcg_gen_code(tcg_ctx, tb);
1744     if (unlikely(gen_code_size < 0)) {
1745         goto buffer_overflow;
1746     }
1747     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
1748     if (unlikely(search_size < 0)) {
1749         goto buffer_overflow;
1750     }
1751     tb->tc.size = gen_code_size;
1752 
1753 #ifdef CONFIG_PROFILER
1754     atomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
1755     atomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
1756     atomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
1757     atomic_set(&prof->search_out_len, prof->search_out_len + search_size);
1758 #endif
1759 
1760 #ifdef DEBUG_DISAS
1761     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
1762         qemu_log_in_addr_range(tb->pc)) {
1763         qemu_log_lock();
1764         qemu_log("OUT: [size=%d]\n", gen_code_size);
1765         if (tcg_ctx->data_gen_ptr) {
1766             size_t code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr;
1767             size_t data_size = gen_code_size - code_size;
1768             size_t i;
1769 
1770             log_disas(tb->tc.ptr, code_size);
1771 
1772             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1773                 if (sizeof(tcg_target_ulong) == 8) {
1774                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1775                              (uintptr_t)tcg_ctx->data_gen_ptr + i,
1776                              *(uint64_t *)(tcg_ctx->data_gen_ptr + i));
1777                 } else {
1778                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1779                              (uintptr_t)tcg_ctx->data_gen_ptr + i,
1780                              *(uint32_t *)(tcg_ctx->data_gen_ptr + i));
1781                 }
1782             }
1783         } else {
1784             log_disas(tb->tc.ptr, gen_code_size);
1785         }
1786         qemu_log("\n");
1787         qemu_log_flush();
1788         qemu_log_unlock();
1789     }
1790 #endif
1791 
1792     atomic_set(&tcg_ctx->code_gen_ptr, (void *)
1793         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
1794                  CODE_GEN_ALIGN));
1795 
1796     /* init jump list */
1797     qemu_spin_init(&tb->jmp_lock);
1798     tb->jmp_list_head = (uintptr_t)NULL;
1799     tb->jmp_list_next[0] = (uintptr_t)NULL;
1800     tb->jmp_list_next[1] = (uintptr_t)NULL;
1801     tb->jmp_dest[0] = (uintptr_t)NULL;
1802     tb->jmp_dest[1] = (uintptr_t)NULL;
1803 
1804     /* init original jump addresses wich has been set during tcg_gen_code() */
1805     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1806         tb_reset_jump(tb, 0);
1807     }
1808     if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1809         tb_reset_jump(tb, 1);
1810     }
1811 
1812     /* check next page if needed */
1813     virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1814     phys_page2 = -1;
1815     if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1816         phys_page2 = get_page_addr_code(env, virt_page2);
1817     }
1818     /* As long as consistency of the TB stuff is provided by tb_lock in user
1819      * mode and is implicit in single-threaded softmmu emulation, no explicit
1820      * memory barrier is required before tb_link_page() makes the TB visible
1821      * through the physical hash table and physical page list.
1822      */
1823     existing_tb = tb_link_page(tb, phys_pc, phys_page2);
1824     /* if the TB already exists, discard what we just translated */
1825     if (unlikely(existing_tb != tb)) {
1826         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
1827 
1828         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
1829         atomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
1830         return existing_tb;
1831     }
1832     tcg_tb_insert(tb);
1833     return tb;
1834 }
1835 
1836 /*
1837  * Call with all @pages locked.
1838  * @p must be non-NULL.
1839  */
1840 static void
1841 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1842                                       PageDesc *p, tb_page_addr_t start,
1843                                       tb_page_addr_t end,
1844                                       int is_cpu_write_access)
1845 {
1846     TranslationBlock *tb;
1847     tb_page_addr_t tb_start, tb_end;
1848     int n;
1849 #ifdef TARGET_HAS_PRECISE_SMC
1850     CPUState *cpu = current_cpu;
1851     CPUArchState *env = NULL;
1852     int current_tb_not_found = is_cpu_write_access;
1853     TranslationBlock *current_tb = NULL;
1854     int current_tb_modified = 0;
1855     target_ulong current_pc = 0;
1856     target_ulong current_cs_base = 0;
1857     uint32_t current_flags = 0;
1858 #endif /* TARGET_HAS_PRECISE_SMC */
1859 
1860     assert_page_locked(p);
1861 
1862 #if defined(TARGET_HAS_PRECISE_SMC)
1863     if (cpu != NULL) {
1864         env = cpu->env_ptr;
1865     }
1866 #endif
1867 
1868     /* we remove all the TBs in the range [start, end[ */
1869     /* XXX: see if in some cases it could be faster to invalidate all
1870        the code */
1871     PAGE_FOR_EACH_TB(p, tb, n) {
1872         assert_page_locked(p);
1873         /* NOTE: this is subtle as a TB may span two physical pages */
1874         if (n == 0) {
1875             /* NOTE: tb_end may be after the end of the page, but
1876                it is not a problem */
1877             tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1878             tb_end = tb_start + tb->size;
1879         } else {
1880             tb_start = tb->page_addr[1];
1881             tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1882         }
1883         if (!(tb_end <= start || tb_start >= end)) {
1884 #ifdef TARGET_HAS_PRECISE_SMC
1885             if (current_tb_not_found) {
1886                 current_tb_not_found = 0;
1887                 current_tb = NULL;
1888                 if (cpu->mem_io_pc) {
1889                     /* now we have a real cpu fault */
1890                     current_tb = tcg_tb_lookup(cpu->mem_io_pc);
1891                 }
1892             }
1893             if (current_tb == tb &&
1894                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1895                 /* If we are modifying the current TB, we must stop
1896                 its execution. We could be more precise by checking
1897                 that the modification is after the current PC, but it
1898                 would require a specialized function to partially
1899                 restore the CPU state */
1900 
1901                 current_tb_modified = 1;
1902                 cpu_restore_state_from_tb(cpu, current_tb,
1903                                           cpu->mem_io_pc, true);
1904                 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1905                                      &current_flags);
1906             }
1907 #endif /* TARGET_HAS_PRECISE_SMC */
1908             tb_phys_invalidate__locked(tb);
1909         }
1910     }
1911 #if !defined(CONFIG_USER_ONLY)
1912     /* if no code remaining, no need to continue to use slow writes */
1913     if (!p->first_tb) {
1914         invalidate_page_bitmap(p);
1915         tlb_unprotect_code(start);
1916     }
1917 #endif
1918 #ifdef TARGET_HAS_PRECISE_SMC
1919     if (current_tb_modified) {
1920         page_collection_unlock(pages);
1921         /* Force execution of one insn next time.  */
1922         cpu->cflags_next_tb = 1 | curr_cflags();
1923         cpu_loop_exit_noexc(cpu);
1924     }
1925 #endif
1926 }
1927 
1928 /*
1929  * Invalidate all TBs which intersect with the target physical address range
1930  * [start;end[. NOTE: start and end must refer to the *same* physical page.
1931  * 'is_cpu_write_access' should be true if called from a real cpu write
1932  * access: the virtual CPU will exit the current TB if code is modified inside
1933  * this TB.
1934  *
1935  * Called with tb_lock/mmap_lock held for user-mode emulation
1936  * Called with tb_lock held for system-mode emulation
1937  */
1938 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1939                                    int is_cpu_write_access)
1940 {
1941     struct page_collection *pages;
1942     PageDesc *p;
1943 
1944     assert_memory_lock();
1945     assert_tb_locked();
1946 
1947     p = page_find(start >> TARGET_PAGE_BITS);
1948     if (p == NULL) {
1949         return;
1950     }
1951     pages = page_collection_lock(start, end);
1952     tb_invalidate_phys_page_range__locked(pages, p, start, end,
1953                                           is_cpu_write_access);
1954     page_collection_unlock(pages);
1955 }
1956 
1957 /*
1958  * Invalidate all TBs which intersect with the target physical address range
1959  * [start;end[. NOTE: start and end may refer to *different* physical pages.
1960  * 'is_cpu_write_access' should be true if called from a real cpu write
1961  * access: the virtual CPU will exit the current TB if code is modified inside
1962  * this TB.
1963  *
1964  * Called with mmap_lock held for user-mode emulation, grabs tb_lock
1965  * Called with tb_lock held for system-mode emulation
1966  */
1967 static void tb_invalidate_phys_range_1(tb_page_addr_t start, tb_page_addr_t end)
1968 {
1969     struct page_collection *pages;
1970     tb_page_addr_t next;
1971 
1972     pages = page_collection_lock(start, end);
1973     for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
1974          start < end;
1975          start = next, next += TARGET_PAGE_SIZE) {
1976         PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
1977         tb_page_addr_t bound = MIN(next, end);
1978 
1979         if (pd == NULL) {
1980             continue;
1981         }
1982         tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
1983     }
1984     page_collection_unlock(pages);
1985 }
1986 
1987 #ifdef CONFIG_SOFTMMU
1988 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
1989 {
1990     assert_tb_locked();
1991     tb_invalidate_phys_range_1(start, end);
1992 }
1993 #else
1994 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
1995 {
1996     assert_memory_lock();
1997     tb_lock();
1998     tb_invalidate_phys_range_1(start, end);
1999     tb_unlock();
2000 }
2001 #endif
2002 
2003 #ifdef CONFIG_SOFTMMU
2004 /* len must be <= 8 and start must be a multiple of len.
2005  * Called via softmmu_template.h when code areas are written to with
2006  * iothread mutex not held.
2007  */
2008 void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
2009 {
2010     struct page_collection *pages;
2011     PageDesc *p;
2012 
2013 #if 0
2014     if (1) {
2015         qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
2016                   cpu_single_env->mem_io_vaddr, len,
2017                   cpu_single_env->eip,
2018                   cpu_single_env->eip +
2019                   (intptr_t)cpu_single_env->segs[R_CS].base);
2020     }
2021 #endif
2022     assert_memory_lock();
2023 
2024     p = page_find(start >> TARGET_PAGE_BITS);
2025     if (!p) {
2026         return;
2027     }
2028 
2029     pages = page_collection_lock(start, start + len);
2030     assert_page_locked(p);
2031     if (!p->code_bitmap &&
2032         ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
2033         build_page_bitmap(p);
2034     }
2035     if (p->code_bitmap) {
2036         unsigned int nr;
2037         unsigned long b;
2038 
2039         nr = start & ~TARGET_PAGE_MASK;
2040         b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
2041         if (b & ((1 << len) - 1)) {
2042             goto do_invalidate;
2043         }
2044     } else {
2045     do_invalidate:
2046         tb_invalidate_phys_page_range__locked(pages, p, start, start + len, 1);
2047     }
2048     page_collection_unlock(pages);
2049 }
2050 #else
2051 /* Called with mmap_lock held. If pc is not 0 then it indicates the
2052  * host PC of the faulting store instruction that caused this invalidate.
2053  * Returns true if the caller needs to abort execution of the current
2054  * TB (because it was modified by this store and the guest CPU has
2055  * precise-SMC semantics).
2056  */
2057 static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
2058 {
2059     TranslationBlock *tb;
2060     PageDesc *p;
2061     int n;
2062 #ifdef TARGET_HAS_PRECISE_SMC
2063     TranslationBlock *current_tb = NULL;
2064     CPUState *cpu = current_cpu;
2065     CPUArchState *env = NULL;
2066     int current_tb_modified = 0;
2067     target_ulong current_pc = 0;
2068     target_ulong current_cs_base = 0;
2069     uint32_t current_flags = 0;
2070 #endif
2071 
2072     assert_memory_lock();
2073 
2074     addr &= TARGET_PAGE_MASK;
2075     p = page_find(addr >> TARGET_PAGE_BITS);
2076     if (!p) {
2077         return false;
2078     }
2079 
2080     tb_lock();
2081 #ifdef TARGET_HAS_PRECISE_SMC
2082     if (p->first_tb && pc != 0) {
2083         current_tb = tcg_tb_lookup(pc);
2084     }
2085     if (cpu != NULL) {
2086         env = cpu->env_ptr;
2087     }
2088 #endif
2089     assert_page_locked(p);
2090     PAGE_FOR_EACH_TB(p, tb, n) {
2091 #ifdef TARGET_HAS_PRECISE_SMC
2092         if (current_tb == tb &&
2093             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
2094                 /* If we are modifying the current TB, we must stop
2095                    its execution. We could be more precise by checking
2096                    that the modification is after the current PC, but it
2097                    would require a specialized function to partially
2098                    restore the CPU state */
2099 
2100             current_tb_modified = 1;
2101             cpu_restore_state_from_tb(cpu, current_tb, pc, true);
2102             cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
2103                                  &current_flags);
2104         }
2105 #endif /* TARGET_HAS_PRECISE_SMC */
2106         tb_phys_invalidate(tb, addr);
2107     }
2108     p->first_tb = (uintptr_t)NULL;
2109 #ifdef TARGET_HAS_PRECISE_SMC
2110     if (current_tb_modified) {
2111         /* Force execution of one insn next time.  */
2112         cpu->cflags_next_tb = 1 | curr_cflags();
2113         /* tb_lock will be reset after cpu_loop_exit_noexc longjmps
2114          * back into the cpu_exec loop. */
2115         return true;
2116     }
2117 #endif
2118     tb_unlock();
2119 
2120     return false;
2121 }
2122 #endif
2123 
2124 #if !defined(CONFIG_USER_ONLY)
2125 void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs)
2126 {
2127     ram_addr_t ram_addr;
2128     MemoryRegion *mr;
2129     hwaddr l = 1;
2130 
2131     rcu_read_lock();
2132     mr = address_space_translate(as, addr, &addr, &l, false, attrs);
2133     if (!(memory_region_is_ram(mr)
2134           || memory_region_is_romd(mr))) {
2135         rcu_read_unlock();
2136         return;
2137     }
2138     ram_addr = memory_region_get_ram_addr(mr) + addr;
2139     tb_lock();
2140     tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
2141     tb_unlock();
2142     rcu_read_unlock();
2143 }
2144 #endif /* !defined(CONFIG_USER_ONLY) */
2145 
2146 /* Called with tb_lock held.  */
2147 void tb_check_watchpoint(CPUState *cpu)
2148 {
2149     TranslationBlock *tb;
2150 
2151     tb = tcg_tb_lookup(cpu->mem_io_pc);
2152     if (tb) {
2153         /* We can use retranslation to find the PC.  */
2154         cpu_restore_state_from_tb(cpu, tb, cpu->mem_io_pc, true);
2155         tb_phys_invalidate(tb, -1);
2156     } else {
2157         /* The exception probably happened in a helper.  The CPU state should
2158            have been saved before calling it. Fetch the PC from there.  */
2159         CPUArchState *env = cpu->env_ptr;
2160         target_ulong pc, cs_base;
2161         tb_page_addr_t addr;
2162         uint32_t flags;
2163 
2164         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
2165         addr = get_page_addr_code(env, pc);
2166         tb_invalidate_phys_range(addr, addr + 1);
2167     }
2168 }
2169 
2170 #ifndef CONFIG_USER_ONLY
2171 /* in deterministic execution mode, instructions doing device I/Os
2172  * must be at the end of the TB.
2173  *
2174  * Called by softmmu_template.h, with iothread mutex not held.
2175  */
2176 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
2177 {
2178 #if defined(TARGET_MIPS) || defined(TARGET_SH4)
2179     CPUArchState *env = cpu->env_ptr;
2180 #endif
2181     TranslationBlock *tb;
2182     uint32_t n;
2183 
2184     tb_lock();
2185     tb = tcg_tb_lookup(retaddr);
2186     if (!tb) {
2187         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
2188                   (void *)retaddr);
2189     }
2190     cpu_restore_state_from_tb(cpu, tb, retaddr, true);
2191 
2192     /* On MIPS and SH, delay slot instructions can only be restarted if
2193        they were already the first instruction in the TB.  If this is not
2194        the first instruction in a TB then re-execute the preceding
2195        branch.  */
2196     n = 1;
2197 #if defined(TARGET_MIPS)
2198     if ((env->hflags & MIPS_HFLAG_BMASK) != 0
2199         && env->active_tc.PC != tb->pc) {
2200         env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
2201         cpu->icount_decr.u16.low++;
2202         env->hflags &= ~MIPS_HFLAG_BMASK;
2203         n = 2;
2204     }
2205 #elif defined(TARGET_SH4)
2206     if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
2207         && env->pc != tb->pc) {
2208         env->pc -= 2;
2209         cpu->icount_decr.u16.low++;
2210         env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
2211         n = 2;
2212     }
2213 #endif
2214 
2215     /* Generate a new TB executing the I/O insn.  */
2216     cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | n;
2217 
2218     if (tb_cflags(tb) & CF_NOCACHE) {
2219         if (tb->orig_tb) {
2220             /* Invalidate original TB if this TB was generated in
2221              * cpu_exec_nocache() */
2222             tb_phys_invalidate(tb->orig_tb, -1);
2223         }
2224         tcg_tb_remove(tb);
2225     }
2226 
2227     /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
2228      * the first in the TB) then we end up generating a whole new TB and
2229      *  repeating the fault, which is horribly inefficient.
2230      *  Better would be to execute just this insn uncached, or generate a
2231      *  second new TB.
2232      *
2233      * cpu_loop_exit_noexc will longjmp back to cpu_exec where the
2234      * tb_lock gets reset.
2235      */
2236     cpu_loop_exit_noexc(cpu);
2237 }
2238 
2239 static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
2240 {
2241     unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
2242 
2243     for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
2244         atomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
2245     }
2246 }
2247 
2248 void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
2249 {
2250     /* Discard jump cache entries for any tb which might potentially
2251        overlap the flushed page.  */
2252     tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
2253     tb_jmp_cache_clear_page(cpu, addr);
2254 }
2255 
2256 static void print_qht_statistics(FILE *f, fprintf_function cpu_fprintf,
2257                                  struct qht_stats hst)
2258 {
2259     uint32_t hgram_opts;
2260     size_t hgram_bins;
2261     char *hgram;
2262 
2263     if (!hst.head_buckets) {
2264         return;
2265     }
2266     cpu_fprintf(f, "TB hash buckets     %zu/%zu (%0.2f%% head buckets used)\n",
2267                 hst.used_head_buckets, hst.head_buckets,
2268                 (double)hst.used_head_buckets / hst.head_buckets * 100);
2269 
2270     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
2271     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
2272     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
2273         hgram_opts |= QDIST_PR_NODECIMAL;
2274     }
2275     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
2276     cpu_fprintf(f, "TB hash occupancy   %0.2f%% avg chain occ. Histogram: %s\n",
2277                 qdist_avg(&hst.occupancy) * 100, hgram);
2278     g_free(hgram);
2279 
2280     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
2281     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
2282     if (hgram_bins > 10) {
2283         hgram_bins = 10;
2284     } else {
2285         hgram_bins = 0;
2286         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
2287     }
2288     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
2289     cpu_fprintf(f, "TB hash avg chain   %0.3f buckets. Histogram: %s\n",
2290                 qdist_avg(&hst.chain), hgram);
2291     g_free(hgram);
2292 }
2293 
2294 struct tb_tree_stats {
2295     size_t nb_tbs;
2296     size_t host_size;
2297     size_t target_size;
2298     size_t max_target_size;
2299     size_t direct_jmp_count;
2300     size_t direct_jmp2_count;
2301     size_t cross_page;
2302 };
2303 
2304 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
2305 {
2306     const TranslationBlock *tb = value;
2307     struct tb_tree_stats *tst = data;
2308 
2309     tst->nb_tbs++;
2310     tst->host_size += tb->tc.size;
2311     tst->target_size += tb->size;
2312     if (tb->size > tst->max_target_size) {
2313         tst->max_target_size = tb->size;
2314     }
2315     if (tb->page_addr[1] != -1) {
2316         tst->cross_page++;
2317     }
2318     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
2319         tst->direct_jmp_count++;
2320         if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
2321             tst->direct_jmp2_count++;
2322         }
2323     }
2324     return false;
2325 }
2326 
2327 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
2328 {
2329     struct tb_tree_stats tst = {};
2330     struct qht_stats hst;
2331     size_t nb_tbs;
2332 
2333     tcg_tb_foreach(tb_tree_stats_iter, &tst);
2334     nb_tbs = tst.nb_tbs;
2335     /* XXX: avoid using doubles ? */
2336     cpu_fprintf(f, "Translation buffer state:\n");
2337     /*
2338      * Report total code size including the padding and TB structs;
2339      * otherwise users might think "-tb-size" is not honoured.
2340      * For avg host size we use the precise numbers from tb_tree_stats though.
2341      */
2342     cpu_fprintf(f, "gen code size       %zu/%zu\n",
2343                 tcg_code_size(), tcg_code_capacity());
2344     cpu_fprintf(f, "TB count            %zu\n", nb_tbs);
2345     cpu_fprintf(f, "TB avg target size  %zu max=%zu bytes\n",
2346                 nb_tbs ? tst.target_size / nb_tbs : 0,
2347                 tst.max_target_size);
2348     cpu_fprintf(f, "TB avg host size    %zu bytes (expansion ratio: %0.1f)\n",
2349                 nb_tbs ? tst.host_size / nb_tbs : 0,
2350                 tst.target_size ? (double)tst.host_size / tst.target_size : 0);
2351     cpu_fprintf(f, "cross page TB count %zu (%zu%%)\n", tst.cross_page,
2352             nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
2353     cpu_fprintf(f, "direct jump count   %zu (%zu%%) (2 jumps=%zu %zu%%)\n",
2354                 tst.direct_jmp_count,
2355                 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
2356                 tst.direct_jmp2_count,
2357                 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
2358 
2359     qht_statistics_init(&tb_ctx.htable, &hst);
2360     print_qht_statistics(f, cpu_fprintf, hst);
2361     qht_statistics_destroy(&hst);
2362 
2363     cpu_fprintf(f, "\nStatistics:\n");
2364     cpu_fprintf(f, "TB flush count      %u\n",
2365                 atomic_read(&tb_ctx.tb_flush_count));
2366     cpu_fprintf(f, "TB invalidate count %zu\n", tcg_tb_phys_invalidate_count());
2367     cpu_fprintf(f, "TLB flush count     %zu\n", tlb_flush_count());
2368     tcg_dump_info(f, cpu_fprintf);
2369 }
2370 
2371 void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)
2372 {
2373     tcg_dump_op_count(f, cpu_fprintf);
2374 }
2375 
2376 #else /* CONFIG_USER_ONLY */
2377 
2378 void cpu_interrupt(CPUState *cpu, int mask)
2379 {
2380     g_assert(qemu_mutex_iothread_locked());
2381     cpu->interrupt_request |= mask;
2382     cpu->icount_decr.u16.high = -1;
2383 }
2384 
2385 /*
2386  * Walks guest process memory "regions" one by one
2387  * and calls callback function 'fn' for each region.
2388  */
2389 struct walk_memory_regions_data {
2390     walk_memory_regions_fn fn;
2391     void *priv;
2392     target_ulong start;
2393     int prot;
2394 };
2395 
2396 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2397                                    target_ulong end, int new_prot)
2398 {
2399     if (data->start != -1u) {
2400         int rc = data->fn(data->priv, data->start, end, data->prot);
2401         if (rc != 0) {
2402             return rc;
2403         }
2404     }
2405 
2406     data->start = (new_prot ? end : -1u);
2407     data->prot = new_prot;
2408 
2409     return 0;
2410 }
2411 
2412 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2413                                  target_ulong base, int level, void **lp)
2414 {
2415     target_ulong pa;
2416     int i, rc;
2417 
2418     if (*lp == NULL) {
2419         return walk_memory_regions_end(data, base, 0);
2420     }
2421 
2422     if (level == 0) {
2423         PageDesc *pd = *lp;
2424 
2425         for (i = 0; i < V_L2_SIZE; ++i) {
2426             int prot = pd[i].flags;
2427 
2428             pa = base | (i << TARGET_PAGE_BITS);
2429             if (prot != data->prot) {
2430                 rc = walk_memory_regions_end(data, pa, prot);
2431                 if (rc != 0) {
2432                     return rc;
2433                 }
2434             }
2435         }
2436     } else {
2437         void **pp = *lp;
2438 
2439         for (i = 0; i < V_L2_SIZE; ++i) {
2440             pa = base | ((target_ulong)i <<
2441                 (TARGET_PAGE_BITS + V_L2_BITS * level));
2442             rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2443             if (rc != 0) {
2444                 return rc;
2445             }
2446         }
2447     }
2448 
2449     return 0;
2450 }
2451 
2452 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2453 {
2454     struct walk_memory_regions_data data;
2455     uintptr_t i, l1_sz = v_l1_size;
2456 
2457     data.fn = fn;
2458     data.priv = priv;
2459     data.start = -1u;
2460     data.prot = 0;
2461 
2462     for (i = 0; i < l1_sz; i++) {
2463         target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
2464         int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
2465         if (rc != 0) {
2466             return rc;
2467         }
2468     }
2469 
2470     return walk_memory_regions_end(&data, 0, 0);
2471 }
2472 
2473 static int dump_region(void *priv, target_ulong start,
2474     target_ulong end, unsigned long prot)
2475 {
2476     FILE *f = (FILE *)priv;
2477 
2478     (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
2479         " "TARGET_FMT_lx" %c%c%c\n",
2480         start, end, end - start,
2481         ((prot & PAGE_READ) ? 'r' : '-'),
2482         ((prot & PAGE_WRITE) ? 'w' : '-'),
2483         ((prot & PAGE_EXEC) ? 'x' : '-'));
2484 
2485     return 0;
2486 }
2487 
2488 /* dump memory mappings */
2489 void page_dump(FILE *f)
2490 {
2491     const int length = sizeof(target_ulong) * 2;
2492     (void) fprintf(f, "%-*s %-*s %-*s %s\n",
2493             length, "start", length, "end", length, "size", "prot");
2494     walk_memory_regions(f, dump_region);
2495 }
2496 
2497 int page_get_flags(target_ulong address)
2498 {
2499     PageDesc *p;
2500 
2501     p = page_find(address >> TARGET_PAGE_BITS);
2502     if (!p) {
2503         return 0;
2504     }
2505     return p->flags;
2506 }
2507 
2508 /* Modify the flags of a page and invalidate the code if necessary.
2509    The flag PAGE_WRITE_ORG is positioned automatically depending
2510    on PAGE_WRITE.  The mmap_lock should already be held.  */
2511 void page_set_flags(target_ulong start, target_ulong end, int flags)
2512 {
2513     target_ulong addr, len;
2514 
2515     /* This function should never be called with addresses outside the
2516        guest address space.  If this assert fires, it probably indicates
2517        a missing call to h2g_valid.  */
2518 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2519     assert(end <= ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2520 #endif
2521     assert(start < end);
2522     assert_memory_lock();
2523 
2524     start = start & TARGET_PAGE_MASK;
2525     end = TARGET_PAGE_ALIGN(end);
2526 
2527     if (flags & PAGE_WRITE) {
2528         flags |= PAGE_WRITE_ORG;
2529     }
2530 
2531     for (addr = start, len = end - start;
2532          len != 0;
2533          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2534         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2535 
2536         /* If the write protection bit is set, then we invalidate
2537            the code inside.  */
2538         if (!(p->flags & PAGE_WRITE) &&
2539             (flags & PAGE_WRITE) &&
2540             p->first_tb) {
2541             tb_invalidate_phys_page(addr, 0);
2542         }
2543         p->flags = flags;
2544     }
2545 }
2546 
2547 int page_check_range(target_ulong start, target_ulong len, int flags)
2548 {
2549     PageDesc *p;
2550     target_ulong end;
2551     target_ulong addr;
2552 
2553     /* This function should never be called with addresses outside the
2554        guest address space.  If this assert fires, it probably indicates
2555        a missing call to h2g_valid.  */
2556 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2557     assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2558 #endif
2559 
2560     if (len == 0) {
2561         return 0;
2562     }
2563     if (start + len - 1 < start) {
2564         /* We've wrapped around.  */
2565         return -1;
2566     }
2567 
2568     /* must do before we loose bits in the next step */
2569     end = TARGET_PAGE_ALIGN(start + len);
2570     start = start & TARGET_PAGE_MASK;
2571 
2572     for (addr = start, len = end - start;
2573          len != 0;
2574          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2575         p = page_find(addr >> TARGET_PAGE_BITS);
2576         if (!p) {
2577             return -1;
2578         }
2579         if (!(p->flags & PAGE_VALID)) {
2580             return -1;
2581         }
2582 
2583         if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
2584             return -1;
2585         }
2586         if (flags & PAGE_WRITE) {
2587             if (!(p->flags & PAGE_WRITE_ORG)) {
2588                 return -1;
2589             }
2590             /* unprotect the page if it was put read-only because it
2591                contains translated code */
2592             if (!(p->flags & PAGE_WRITE)) {
2593                 if (!page_unprotect(addr, 0)) {
2594                     return -1;
2595                 }
2596             }
2597         }
2598     }
2599     return 0;
2600 }
2601 
2602 /* called from signal handler: invalidate the code and unprotect the
2603  * page. Return 0 if the fault was not handled, 1 if it was handled,
2604  * and 2 if it was handled but the caller must cause the TB to be
2605  * immediately exited. (We can only return 2 if the 'pc' argument is
2606  * non-zero.)
2607  */
2608 int page_unprotect(target_ulong address, uintptr_t pc)
2609 {
2610     unsigned int prot;
2611     bool current_tb_invalidated;
2612     PageDesc *p;
2613     target_ulong host_start, host_end, addr;
2614 
2615     /* Technically this isn't safe inside a signal handler.  However we
2616        know this only ever happens in a synchronous SEGV handler, so in
2617        practice it seems to be ok.  */
2618     mmap_lock();
2619 
2620     p = page_find(address >> TARGET_PAGE_BITS);
2621     if (!p) {
2622         mmap_unlock();
2623         return 0;
2624     }
2625 
2626     /* if the page was really writable, then we change its
2627        protection back to writable */
2628     if (p->flags & PAGE_WRITE_ORG) {
2629         current_tb_invalidated = false;
2630         if (p->flags & PAGE_WRITE) {
2631             /* If the page is actually marked WRITE then assume this is because
2632              * this thread raced with another one which got here first and
2633              * set the page to PAGE_WRITE and did the TB invalidate for us.
2634              */
2635 #ifdef TARGET_HAS_PRECISE_SMC
2636             TranslationBlock *current_tb = tcg_tb_lookup(pc);
2637             if (current_tb) {
2638                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
2639             }
2640 #endif
2641         } else {
2642             host_start = address & qemu_host_page_mask;
2643             host_end = host_start + qemu_host_page_size;
2644 
2645             prot = 0;
2646             for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
2647                 p = page_find(addr >> TARGET_PAGE_BITS);
2648                 p->flags |= PAGE_WRITE;
2649                 prot |= p->flags;
2650 
2651                 /* and since the content will be modified, we must invalidate
2652                    the corresponding translated code. */
2653                 current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
2654 #ifdef CONFIG_USER_ONLY
2655                 if (DEBUG_TB_CHECK_GATE) {
2656                     tb_invalidate_check(addr);
2657                 }
2658 #endif
2659             }
2660             mprotect((void *)g2h(host_start), qemu_host_page_size,
2661                      prot & PAGE_BITS);
2662         }
2663         mmap_unlock();
2664         /* If current TB was invalidated return to main loop */
2665         return current_tb_invalidated ? 2 : 1;
2666     }
2667     mmap_unlock();
2668     return 0;
2669 }
2670 #endif /* CONFIG_USER_ONLY */
2671 
2672 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
2673 void tcg_flush_softmmu_tlb(CPUState *cs)
2674 {
2675 #ifdef CONFIG_SOFTMMU
2676     tlb_flush(cs);
2677 #endif
2678 }
2679