xref: /openbmc/qemu/accel/tcg/tb-maint.c (revision 55abfc1f)
1 /*
2  * Translation Block Maintenance
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/interval-tree.h"
22 #include "qemu/qtree.h"
23 #include "exec/cputlb.h"
24 #include "exec/log.h"
25 #include "exec/exec-all.h"
26 #include "exec/tb-flush.h"
27 #include "exec/translate-all.h"
28 #include "sysemu/tcg.h"
29 #include "tcg/tcg.h"
30 #include "tb-hash.h"
31 #include "tb-context.h"
32 #include "internal-common.h"
33 #include "internal-target.h"
34 
35 
36 /* List iterators for lists of tagged pointers in TranslationBlock. */
37 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
38     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
39          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
40              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
41 
42 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
43     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
44 
45 static bool tb_cmp(const void *ap, const void *bp)
46 {
47     const TranslationBlock *a = ap;
48     const TranslationBlock *b = bp;
49 
50     return ((tb_cflags(a) & CF_PCREL || a->pc == b->pc) &&
51             a->cs_base == b->cs_base &&
52             a->flags == b->flags &&
53             (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
54             tb_page_addr0(a) == tb_page_addr0(b) &&
55             tb_page_addr1(a) == tb_page_addr1(b));
56 }
57 
58 void tb_htable_init(void)
59 {
60     unsigned int mode = QHT_MODE_AUTO_RESIZE;
61 
62     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
63 }
64 
65 typedef struct PageDesc PageDesc;
66 
67 #ifdef CONFIG_USER_ONLY
68 
69 /*
70  * In user-mode page locks aren't used; mmap_lock is enough.
71  */
72 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
73 
74 static inline void tb_lock_pages(const TranslationBlock *tb) { }
75 
76 /*
77  * For user-only, since we are protecting all of memory with a single lock,
78  * and because the two pages of a TranslationBlock are always contiguous,
79  * use a single data structure to record all TranslationBlocks.
80  */
81 static IntervalTreeRoot tb_root;
82 
83 static void tb_remove_all(void)
84 {
85     assert_memory_lock();
86     memset(&tb_root, 0, sizeof(tb_root));
87 }
88 
89 /* Call with mmap_lock held. */
90 static void tb_record(TranslationBlock *tb)
91 {
92     vaddr addr;
93     int flags;
94 
95     assert_memory_lock();
96     tb->itree.last = tb->itree.start + tb->size - 1;
97 
98     /* translator_loop() must have made all TB pages non-writable */
99     addr = tb_page_addr0(tb);
100     flags = page_get_flags(addr);
101     assert(!(flags & PAGE_WRITE));
102 
103     addr = tb_page_addr1(tb);
104     if (addr != -1) {
105         flags = page_get_flags(addr);
106         assert(!(flags & PAGE_WRITE));
107     }
108 
109     interval_tree_insert(&tb->itree, &tb_root);
110 }
111 
112 /* Call with mmap_lock held. */
113 static void tb_remove(TranslationBlock *tb)
114 {
115     assert_memory_lock();
116     interval_tree_remove(&tb->itree, &tb_root);
117 }
118 
119 /* TODO: For now, still shared with translate-all.c for system mode. */
120 #define PAGE_FOR_EACH_TB(start, last, pagedesc, T, N)   \
121     for (T = foreach_tb_first(start, last),             \
122          N = foreach_tb_next(T, start, last);           \
123          T != NULL;                                     \
124          T = N, N = foreach_tb_next(N, start, last))
125 
126 typedef TranslationBlock *PageForEachNext;
127 
128 static PageForEachNext foreach_tb_first(tb_page_addr_t start,
129                                         tb_page_addr_t last)
130 {
131     IntervalTreeNode *n = interval_tree_iter_first(&tb_root, start, last);
132     return n ? container_of(n, TranslationBlock, itree) : NULL;
133 }
134 
135 static PageForEachNext foreach_tb_next(PageForEachNext tb,
136                                        tb_page_addr_t start,
137                                        tb_page_addr_t last)
138 {
139     IntervalTreeNode *n;
140 
141     if (tb) {
142         n = interval_tree_iter_next(&tb->itree, start, last);
143         if (n) {
144             return container_of(n, TranslationBlock, itree);
145         }
146     }
147     return NULL;
148 }
149 
150 #else
151 /*
152  * In system mode we want L1_MAP to be based on ram offsets.
153  */
154 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
155 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
156 #else
157 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
158 #endif
159 
160 /* Size of the L2 (and L3, etc) page tables.  */
161 #define V_L2_BITS 10
162 #define V_L2_SIZE (1 << V_L2_BITS)
163 
164 /*
165  * L1 Mapping properties
166  */
167 static int v_l1_size;
168 static int v_l1_shift;
169 static int v_l2_levels;
170 
171 /*
172  * The bottom level has pointers to PageDesc, and is indexed by
173  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
174  */
175 #define V_L1_MIN_BITS 4
176 #define V_L1_MAX_BITS (V_L2_BITS + 3)
177 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
178 
179 static void *l1_map[V_L1_MAX_SIZE];
180 
181 struct PageDesc {
182     QemuSpin lock;
183     /* list of TBs intersecting this ram page */
184     uintptr_t first_tb;
185 };
186 
187 void page_table_config_init(void)
188 {
189     uint32_t v_l1_bits;
190 
191     assert(TARGET_PAGE_BITS);
192     /* The bits remaining after N lower levels of page tables.  */
193     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
194     if (v_l1_bits < V_L1_MIN_BITS) {
195         v_l1_bits += V_L2_BITS;
196     }
197 
198     v_l1_size = 1 << v_l1_bits;
199     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
200     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
201 
202     assert(v_l1_bits <= V_L1_MAX_BITS);
203     assert(v_l1_shift % V_L2_BITS == 0);
204     assert(v_l2_levels >= 0);
205 }
206 
207 static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
208 {
209     PageDesc *pd;
210     void **lp;
211 
212     /* Level 1.  Always allocated.  */
213     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
214 
215     /* Level 2..N-1.  */
216     for (int i = v_l2_levels; i > 0; i--) {
217         void **p = qatomic_rcu_read(lp);
218 
219         if (p == NULL) {
220             void *existing;
221 
222             if (!alloc) {
223                 return NULL;
224             }
225             p = g_new0(void *, V_L2_SIZE);
226             existing = qatomic_cmpxchg(lp, NULL, p);
227             if (unlikely(existing)) {
228                 g_free(p);
229                 p = existing;
230             }
231         }
232 
233         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
234     }
235 
236     pd = qatomic_rcu_read(lp);
237     if (pd == NULL) {
238         void *existing;
239 
240         if (!alloc) {
241             return NULL;
242         }
243 
244         pd = g_new0(PageDesc, V_L2_SIZE);
245         for (int i = 0; i < V_L2_SIZE; i++) {
246             qemu_spin_init(&pd[i].lock);
247         }
248 
249         existing = qatomic_cmpxchg(lp, NULL, pd);
250         if (unlikely(existing)) {
251             for (int i = 0; i < V_L2_SIZE; i++) {
252                 qemu_spin_destroy(&pd[i].lock);
253             }
254             g_free(pd);
255             pd = existing;
256         }
257     }
258 
259     return pd + (index & (V_L2_SIZE - 1));
260 }
261 
262 static inline PageDesc *page_find(tb_page_addr_t index)
263 {
264     return page_find_alloc(index, false);
265 }
266 
267 /**
268  * struct page_entry - page descriptor entry
269  * @pd:     pointer to the &struct PageDesc of the page this entry represents
270  * @index:  page index of the page
271  * @locked: whether the page is locked
272  *
273  * This struct helps us keep track of the locked state of a page, without
274  * bloating &struct PageDesc.
275  *
276  * A page lock protects accesses to all fields of &struct PageDesc.
277  *
278  * See also: &struct page_collection.
279  */
280 struct page_entry {
281     PageDesc *pd;
282     tb_page_addr_t index;
283     bool locked;
284 };
285 
286 /**
287  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
288  * @tree:   Binary search tree (BST) of the pages, with key == page index
289  * @max:    Pointer to the page in @tree with the highest page index
290  *
291  * To avoid deadlock we lock pages in ascending order of page index.
292  * When operating on a set of pages, we need to keep track of them so that
293  * we can lock them in order and also unlock them later. For this we collect
294  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
295  * @tree implementation we use does not provide an O(1) operation to obtain the
296  * highest-ranked element, we use @max to keep track of the inserted page
297  * with the highest index. This is valuable because if a page is not in
298  * the tree and its index is higher than @max's, then we can lock it
299  * without breaking the locking order rule.
300  *
301  * Note on naming: 'struct page_set' would be shorter, but we already have a few
302  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
303  *
304  * See also: page_collection_lock().
305  */
306 struct page_collection {
307     QTree *tree;
308     struct page_entry *max;
309 };
310 
311 typedef int PageForEachNext;
312 #define PAGE_FOR_EACH_TB(start, last, pagedesc, tb, n) \
313     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
314 
315 #ifdef CONFIG_DEBUG_TCG
316 
317 static __thread GHashTable *ht_pages_locked_debug;
318 
319 static void ht_pages_locked_debug_init(void)
320 {
321     if (ht_pages_locked_debug) {
322         return;
323     }
324     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
325 }
326 
327 static bool page_is_locked(const PageDesc *pd)
328 {
329     PageDesc *found;
330 
331     ht_pages_locked_debug_init();
332     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
333     return !!found;
334 }
335 
336 static void page_lock__debug(PageDesc *pd)
337 {
338     ht_pages_locked_debug_init();
339     g_assert(!page_is_locked(pd));
340     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
341 }
342 
343 static void page_unlock__debug(const PageDesc *pd)
344 {
345     bool removed;
346 
347     ht_pages_locked_debug_init();
348     g_assert(page_is_locked(pd));
349     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
350     g_assert(removed);
351 }
352 
353 static void do_assert_page_locked(const PageDesc *pd,
354                                   const char *file, int line)
355 {
356     if (unlikely(!page_is_locked(pd))) {
357         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
358                      pd, file, line);
359         abort();
360     }
361 }
362 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
363 
364 void assert_no_pages_locked(void)
365 {
366     ht_pages_locked_debug_init();
367     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
368 }
369 
370 #else /* !CONFIG_DEBUG_TCG */
371 
372 static inline void page_lock__debug(const PageDesc *pd) { }
373 static inline void page_unlock__debug(const PageDesc *pd) { }
374 static inline void assert_page_locked(const PageDesc *pd) { }
375 
376 #endif /* CONFIG_DEBUG_TCG */
377 
378 static void page_lock(PageDesc *pd)
379 {
380     page_lock__debug(pd);
381     qemu_spin_lock(&pd->lock);
382 }
383 
384 /* Like qemu_spin_trylock, returns false on success */
385 static bool page_trylock(PageDesc *pd)
386 {
387     bool busy = qemu_spin_trylock(&pd->lock);
388     if (!busy) {
389         page_lock__debug(pd);
390     }
391     return busy;
392 }
393 
394 static void page_unlock(PageDesc *pd)
395 {
396     qemu_spin_unlock(&pd->lock);
397     page_unlock__debug(pd);
398 }
399 
400 void tb_lock_page0(tb_page_addr_t paddr)
401 {
402     page_lock(page_find_alloc(paddr >> TARGET_PAGE_BITS, true));
403 }
404 
405 void tb_lock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
406 {
407     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
408     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
409     PageDesc *pd0, *pd1;
410 
411     if (pindex0 == pindex1) {
412         /* Identical pages, and the first page is already locked. */
413         return;
414     }
415 
416     pd1 = page_find_alloc(pindex1, true);
417     if (pindex0 < pindex1) {
418         /* Correct locking order, we may block. */
419         page_lock(pd1);
420         return;
421     }
422 
423     /* Incorrect locking order, we cannot block lest we deadlock. */
424     if (!page_trylock(pd1)) {
425         return;
426     }
427 
428     /*
429      * Drop the lock on page0 and get both page locks in the right order.
430      * Restart translation via longjmp.
431      */
432     pd0 = page_find_alloc(pindex0, false);
433     page_unlock(pd0);
434     page_lock(pd1);
435     page_lock(pd0);
436     siglongjmp(tcg_ctx->jmp_trans, -3);
437 }
438 
439 void tb_unlock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
440 {
441     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
442     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
443 
444     if (pindex0 != pindex1) {
445         page_unlock(page_find_alloc(pindex1, false));
446     }
447 }
448 
449 static void tb_lock_pages(TranslationBlock *tb)
450 {
451     tb_page_addr_t paddr0 = tb_page_addr0(tb);
452     tb_page_addr_t paddr1 = tb_page_addr1(tb);
453     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
454     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
455 
456     if (unlikely(paddr0 == -1)) {
457         return;
458     }
459     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
460         if (pindex0 < pindex1) {
461             page_lock(page_find_alloc(pindex0, true));
462             page_lock(page_find_alloc(pindex1, true));
463             return;
464         }
465         page_lock(page_find_alloc(pindex1, true));
466     }
467     page_lock(page_find_alloc(pindex0, true));
468 }
469 
470 void tb_unlock_pages(TranslationBlock *tb)
471 {
472     tb_page_addr_t paddr0 = tb_page_addr0(tb);
473     tb_page_addr_t paddr1 = tb_page_addr1(tb);
474     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
475     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
476 
477     if (unlikely(paddr0 == -1)) {
478         return;
479     }
480     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
481         page_unlock(page_find_alloc(pindex1, false));
482     }
483     page_unlock(page_find_alloc(pindex0, false));
484 }
485 
486 static inline struct page_entry *
487 page_entry_new(PageDesc *pd, tb_page_addr_t index)
488 {
489     struct page_entry *pe = g_malloc(sizeof(*pe));
490 
491     pe->index = index;
492     pe->pd = pd;
493     pe->locked = false;
494     return pe;
495 }
496 
497 static void page_entry_destroy(gpointer p)
498 {
499     struct page_entry *pe = p;
500 
501     g_assert(pe->locked);
502     page_unlock(pe->pd);
503     g_free(pe);
504 }
505 
506 /* returns false on success */
507 static bool page_entry_trylock(struct page_entry *pe)
508 {
509     bool busy = page_trylock(pe->pd);
510     if (!busy) {
511         g_assert(!pe->locked);
512         pe->locked = true;
513     }
514     return busy;
515 }
516 
517 static void do_page_entry_lock(struct page_entry *pe)
518 {
519     page_lock(pe->pd);
520     g_assert(!pe->locked);
521     pe->locked = true;
522 }
523 
524 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
525 {
526     struct page_entry *pe = value;
527 
528     do_page_entry_lock(pe);
529     return FALSE;
530 }
531 
532 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
533 {
534     struct page_entry *pe = value;
535 
536     if (pe->locked) {
537         pe->locked = false;
538         page_unlock(pe->pd);
539     }
540     return FALSE;
541 }
542 
543 /*
544  * Trylock a page, and if successful, add the page to a collection.
545  * Returns true ("busy") if the page could not be locked; false otherwise.
546  */
547 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
548 {
549     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
550     struct page_entry *pe;
551     PageDesc *pd;
552 
553     pe = q_tree_lookup(set->tree, &index);
554     if (pe) {
555         return false;
556     }
557 
558     pd = page_find(index);
559     if (pd == NULL) {
560         return false;
561     }
562 
563     pe = page_entry_new(pd, index);
564     q_tree_insert(set->tree, &pe->index, pe);
565 
566     /*
567      * If this is either (1) the first insertion or (2) a page whose index
568      * is higher than any other so far, just lock the page and move on.
569      */
570     if (set->max == NULL || pe->index > set->max->index) {
571         set->max = pe;
572         do_page_entry_lock(pe);
573         return false;
574     }
575     /*
576      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
577      * locks in order.
578      */
579     return page_entry_trylock(pe);
580 }
581 
582 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
583 {
584     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
585     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
586 
587     if (a == b) {
588         return 0;
589     } else if (a < b) {
590         return -1;
591     }
592     return 1;
593 }
594 
595 /*
596  * Lock a range of pages ([@start,@last]) as well as the pages of all
597  * intersecting TBs.
598  * Locking order: acquire locks in ascending order of page index.
599  */
600 static struct page_collection *page_collection_lock(tb_page_addr_t start,
601                                                     tb_page_addr_t last)
602 {
603     struct page_collection *set = g_malloc(sizeof(*set));
604     tb_page_addr_t index;
605     PageDesc *pd;
606 
607     start >>= TARGET_PAGE_BITS;
608     last >>= TARGET_PAGE_BITS;
609     g_assert(start <= last);
610 
611     set->tree = q_tree_new_full(tb_page_addr_cmp, NULL, NULL,
612                                 page_entry_destroy);
613     set->max = NULL;
614     assert_no_pages_locked();
615 
616  retry:
617     q_tree_foreach(set->tree, page_entry_lock, NULL);
618 
619     for (index = start; index <= last; index++) {
620         TranslationBlock *tb;
621         PageForEachNext n;
622 
623         pd = page_find(index);
624         if (pd == NULL) {
625             continue;
626         }
627         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
628             q_tree_foreach(set->tree, page_entry_unlock, NULL);
629             goto retry;
630         }
631         assert_page_locked(pd);
632         PAGE_FOR_EACH_TB(unused, unused, pd, tb, n) {
633             if (page_trylock_add(set, tb_page_addr0(tb)) ||
634                 (tb_page_addr1(tb) != -1 &&
635                  page_trylock_add(set, tb_page_addr1(tb)))) {
636                 /* drop all locks, and reacquire in order */
637                 q_tree_foreach(set->tree, page_entry_unlock, NULL);
638                 goto retry;
639             }
640         }
641     }
642     return set;
643 }
644 
645 static void page_collection_unlock(struct page_collection *set)
646 {
647     /* entries are unlocked and freed via page_entry_destroy */
648     q_tree_destroy(set->tree);
649     g_free(set);
650 }
651 
652 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
653 static void tb_remove_all_1(int level, void **lp)
654 {
655     int i;
656 
657     if (*lp == NULL) {
658         return;
659     }
660     if (level == 0) {
661         PageDesc *pd = *lp;
662 
663         for (i = 0; i < V_L2_SIZE; ++i) {
664             page_lock(&pd[i]);
665             pd[i].first_tb = (uintptr_t)NULL;
666             page_unlock(&pd[i]);
667         }
668     } else {
669         void **pp = *lp;
670 
671         for (i = 0; i < V_L2_SIZE; ++i) {
672             tb_remove_all_1(level - 1, pp + i);
673         }
674     }
675 }
676 
677 static void tb_remove_all(void)
678 {
679     int i, l1_sz = v_l1_size;
680 
681     for (i = 0; i < l1_sz; i++) {
682         tb_remove_all_1(v_l2_levels, l1_map + i);
683     }
684 }
685 
686 /*
687  * Add the tb in the target page and protect it if necessary.
688  * Called with @p->lock held.
689  */
690 static void tb_page_add(PageDesc *p, TranslationBlock *tb, unsigned int n)
691 {
692     bool page_already_protected;
693 
694     assert_page_locked(p);
695 
696     tb->page_next[n] = p->first_tb;
697     page_already_protected = p->first_tb != 0;
698     p->first_tb = (uintptr_t)tb | n;
699 
700     /*
701      * If some code is already present, then the pages are already
702      * protected. So we handle the case where only the first TB is
703      * allocated in a physical page.
704      */
705     if (!page_already_protected) {
706         tlb_protect_code(tb->page_addr[n] & TARGET_PAGE_MASK);
707     }
708 }
709 
710 static void tb_record(TranslationBlock *tb)
711 {
712     tb_page_addr_t paddr0 = tb_page_addr0(tb);
713     tb_page_addr_t paddr1 = tb_page_addr1(tb);
714     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
715     tb_page_addr_t pindex1 = paddr0 >> TARGET_PAGE_BITS;
716 
717     assert(paddr0 != -1);
718     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
719         tb_page_add(page_find_alloc(pindex1, false), tb, 1);
720     }
721     tb_page_add(page_find_alloc(pindex0, false), tb, 0);
722 }
723 
724 static void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
725 {
726     TranslationBlock *tb1;
727     uintptr_t *pprev;
728     PageForEachNext n1;
729 
730     assert_page_locked(pd);
731     pprev = &pd->first_tb;
732     PAGE_FOR_EACH_TB(unused, unused, pd, tb1, n1) {
733         if (tb1 == tb) {
734             *pprev = tb1->page_next[n1];
735             return;
736         }
737         pprev = &tb1->page_next[n1];
738     }
739     g_assert_not_reached();
740 }
741 
742 static void tb_remove(TranslationBlock *tb)
743 {
744     tb_page_addr_t paddr0 = tb_page_addr0(tb);
745     tb_page_addr_t paddr1 = tb_page_addr1(tb);
746     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
747     tb_page_addr_t pindex1 = paddr0 >> TARGET_PAGE_BITS;
748 
749     assert(paddr0 != -1);
750     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
751         tb_page_remove(page_find_alloc(pindex1, false), tb);
752     }
753     tb_page_remove(page_find_alloc(pindex0, false), tb);
754 }
755 #endif /* CONFIG_USER_ONLY */
756 
757 /* flush all the translation blocks */
758 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
759 {
760     bool did_flush = false;
761 
762     mmap_lock();
763     /* If it is already been done on request of another CPU, just retry. */
764     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
765         goto done;
766     }
767     did_flush = true;
768 
769     CPU_FOREACH(cpu) {
770         tcg_flush_jmp_cache(cpu);
771     }
772 
773     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
774     tb_remove_all();
775 
776     tcg_region_reset_all();
777     /* XXX: flush processor icache at this point if cache flush is expensive */
778     qatomic_inc(&tb_ctx.tb_flush_count);
779 
780 done:
781     mmap_unlock();
782     if (did_flush) {
783         qemu_plugin_flush_cb();
784     }
785 }
786 
787 void tb_flush(CPUState *cpu)
788 {
789     if (tcg_enabled()) {
790         unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
791 
792         if (cpu_in_serial_context(cpu)) {
793             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
794         } else {
795             async_safe_run_on_cpu(cpu, do_tb_flush,
796                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
797         }
798     }
799 }
800 
801 /* remove @orig from its @n_orig-th jump list */
802 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
803 {
804     uintptr_t ptr, ptr_locked;
805     TranslationBlock *dest;
806     TranslationBlock *tb;
807     uintptr_t *pprev;
808     int n;
809 
810     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
811     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
812     dest = (TranslationBlock *)(ptr & ~1);
813     if (dest == NULL) {
814         return;
815     }
816 
817     qemu_spin_lock(&dest->jmp_lock);
818     /*
819      * While acquiring the lock, the jump might have been removed if the
820      * destination TB was invalidated; check again.
821      */
822     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
823     if (ptr_locked != ptr) {
824         qemu_spin_unlock(&dest->jmp_lock);
825         /*
826          * The only possibility is that the jump was unlinked via
827          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
828          * because we set the LSB above.
829          */
830         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
831         return;
832     }
833     /*
834      * We first acquired the lock, and since the destination pointer matches,
835      * we know for sure that @orig is in the jmp list.
836      */
837     pprev = &dest->jmp_list_head;
838     TB_FOR_EACH_JMP(dest, tb, n) {
839         if (tb == orig && n == n_orig) {
840             *pprev = tb->jmp_list_next[n];
841             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
842             qemu_spin_unlock(&dest->jmp_lock);
843             return;
844         }
845         pprev = &tb->jmp_list_next[n];
846     }
847     g_assert_not_reached();
848 }
849 
850 /*
851  * Reset the jump entry 'n' of a TB so that it is not chained to another TB.
852  */
853 void tb_reset_jump(TranslationBlock *tb, int n)
854 {
855     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
856     tb_set_jmp_target(tb, n, addr);
857 }
858 
859 /* remove any jumps to the TB */
860 static inline void tb_jmp_unlink(TranslationBlock *dest)
861 {
862     TranslationBlock *tb;
863     int n;
864 
865     qemu_spin_lock(&dest->jmp_lock);
866 
867     TB_FOR_EACH_JMP(dest, tb, n) {
868         tb_reset_jump(tb, n);
869         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
870         /* No need to clear the list entry; setting the dest ptr is enough */
871     }
872     dest->jmp_list_head = (uintptr_t)NULL;
873 
874     qemu_spin_unlock(&dest->jmp_lock);
875 }
876 
877 static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
878 {
879     CPUState *cpu;
880 
881     if (tb_cflags(tb) & CF_PCREL) {
882         /* A TB may be at any virtual address */
883         CPU_FOREACH(cpu) {
884             tcg_flush_jmp_cache(cpu);
885         }
886     } else {
887         uint32_t h = tb_jmp_cache_hash_func(tb->pc);
888 
889         CPU_FOREACH(cpu) {
890             CPUJumpCache *jc = cpu->tb_jmp_cache;
891 
892             if (qatomic_read(&jc->array[h].tb) == tb) {
893                 qatomic_set(&jc->array[h].tb, NULL);
894             }
895         }
896     }
897 }
898 
899 /*
900  * In user-mode, call with mmap_lock held.
901  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
902  * locks held.
903  */
904 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
905 {
906     uint32_t h;
907     tb_page_addr_t phys_pc;
908     uint32_t orig_cflags = tb_cflags(tb);
909 
910     assert_memory_lock();
911 
912     /* make sure no further incoming jumps will be chained to this TB */
913     qemu_spin_lock(&tb->jmp_lock);
914     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
915     qemu_spin_unlock(&tb->jmp_lock);
916 
917     /* remove the TB from the hash list */
918     phys_pc = tb_page_addr0(tb);
919     h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
920                      tb->flags, tb->cs_base, orig_cflags);
921     if (!qht_remove(&tb_ctx.htable, tb, h)) {
922         return;
923     }
924 
925     /* remove the TB from the page list */
926     if (rm_from_page_list) {
927         tb_remove(tb);
928     }
929 
930     /* remove the TB from the hash list */
931     tb_jmp_cache_inval_tb(tb);
932 
933     /* suppress this TB from the two jump lists */
934     tb_remove_from_jmp_list(tb, 0);
935     tb_remove_from_jmp_list(tb, 1);
936 
937     /* suppress any remaining jumps to this TB */
938     tb_jmp_unlink(tb);
939 
940     qatomic_set(&tb_ctx.tb_phys_invalidate_count,
941                 tb_ctx.tb_phys_invalidate_count + 1);
942 }
943 
944 static void tb_phys_invalidate__locked(TranslationBlock *tb)
945 {
946     qemu_thread_jit_write();
947     do_tb_phys_invalidate(tb, true);
948     qemu_thread_jit_execute();
949 }
950 
951 /*
952  * Invalidate one TB.
953  * Called with mmap_lock held in user-mode.
954  */
955 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
956 {
957     if (page_addr == -1 && tb_page_addr0(tb) != -1) {
958         tb_lock_pages(tb);
959         do_tb_phys_invalidate(tb, true);
960         tb_unlock_pages(tb);
961     } else {
962         do_tb_phys_invalidate(tb, false);
963     }
964 }
965 
966 /*
967  * Add a new TB and link it to the physical page tables.
968  * Called with mmap_lock held for user-mode emulation.
969  *
970  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
971  * Note that in !user-mode, another thread might have already added a TB
972  * for the same block of guest code that @tb corresponds to. In that case,
973  * the caller should discard the original @tb, and use instead the returned TB.
974  */
975 TranslationBlock *tb_link_page(TranslationBlock *tb)
976 {
977     void *existing_tb = NULL;
978     uint32_t h;
979 
980     assert_memory_lock();
981     tcg_debug_assert(!(tb->cflags & CF_INVALID));
982 
983     tb_record(tb);
984 
985     /* add in the hash table */
986     h = tb_hash_func(tb_page_addr0(tb), (tb->cflags & CF_PCREL ? 0 : tb->pc),
987                      tb->flags, tb->cs_base, tb->cflags);
988     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
989 
990     /* remove TB from the page(s) if we couldn't insert it */
991     if (unlikely(existing_tb)) {
992         tb_remove(tb);
993         tb_unlock_pages(tb);
994         return existing_tb;
995     }
996 
997     tb_unlock_pages(tb);
998     return tb;
999 }
1000 
1001 #ifdef CONFIG_USER_ONLY
1002 /*
1003  * Invalidate all TBs which intersect with the target address range.
1004  * Called with mmap_lock held for user-mode emulation.
1005  * NOTE: this function must not be called while a TB is running.
1006  */
1007 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1008 {
1009     TranslationBlock *tb;
1010     PageForEachNext n;
1011 
1012     assert_memory_lock();
1013 
1014     PAGE_FOR_EACH_TB(start, last, unused, tb, n) {
1015         tb_phys_invalidate__locked(tb);
1016     }
1017 }
1018 
1019 /*
1020  * Invalidate all TBs which intersect with the target address page @addr.
1021  * Called with mmap_lock held for user-mode emulation
1022  * NOTE: this function must not be called while a TB is running.
1023  */
1024 void tb_invalidate_phys_page(tb_page_addr_t addr)
1025 {
1026     tb_page_addr_t start, last;
1027 
1028     start = addr & TARGET_PAGE_MASK;
1029     last = addr | ~TARGET_PAGE_MASK;
1030     tb_invalidate_phys_range(start, last);
1031 }
1032 
1033 /*
1034  * Called with mmap_lock held. If pc is not 0 then it indicates the
1035  * host PC of the faulting store instruction that caused this invalidate.
1036  * Returns true if the caller needs to abort execution of the current
1037  * TB (because it was modified by this store and the guest CPU has
1038  * precise-SMC semantics).
1039  */
1040 bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
1041 {
1042     TranslationBlock *current_tb;
1043     bool current_tb_modified;
1044     TranslationBlock *tb;
1045     PageForEachNext n;
1046     tb_page_addr_t last;
1047 
1048     /*
1049      * Without precise smc semantics, or when outside of a TB,
1050      * we can skip to invalidate.
1051      */
1052 #ifndef TARGET_HAS_PRECISE_SMC
1053     pc = 0;
1054 #endif
1055     if (!pc) {
1056         tb_invalidate_phys_page(addr);
1057         return false;
1058     }
1059 
1060     assert_memory_lock();
1061     current_tb = tcg_tb_lookup(pc);
1062 
1063     last = addr | ~TARGET_PAGE_MASK;
1064     addr &= TARGET_PAGE_MASK;
1065     current_tb_modified = false;
1066 
1067     PAGE_FOR_EACH_TB(addr, last, unused, tb, n) {
1068         if (current_tb == tb &&
1069             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1070             /*
1071              * If we are modifying the current TB, we must stop its
1072              * execution. We could be more precise by checking that
1073              * the modification is after the current PC, but it would
1074              * require a specialized function to partially restore
1075              * the CPU state.
1076              */
1077             current_tb_modified = true;
1078             cpu_restore_state_from_tb(current_cpu, current_tb, pc);
1079         }
1080         tb_phys_invalidate__locked(tb);
1081     }
1082 
1083     if (current_tb_modified) {
1084         /* Force execution of one insn next time.  */
1085         CPUState *cpu = current_cpu;
1086         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1087         return true;
1088     }
1089     return false;
1090 }
1091 #else
1092 /*
1093  * @p must be non-NULL.
1094  * Call with all @pages locked.
1095  */
1096 static void
1097 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1098                                       PageDesc *p, tb_page_addr_t start,
1099                                       tb_page_addr_t last,
1100                                       uintptr_t retaddr)
1101 {
1102     TranslationBlock *tb;
1103     PageForEachNext n;
1104 #ifdef TARGET_HAS_PRECISE_SMC
1105     bool current_tb_modified = false;
1106     TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
1107 #endif /* TARGET_HAS_PRECISE_SMC */
1108 
1109     /* Range may not cross a page. */
1110     tcg_debug_assert(((start ^ last) & TARGET_PAGE_MASK) == 0);
1111 
1112     /*
1113      * We remove all the TBs in the range [start, last].
1114      * XXX: see if in some cases it could be faster to invalidate all the code
1115      */
1116     PAGE_FOR_EACH_TB(start, last, p, tb, n) {
1117         tb_page_addr_t tb_start, tb_last;
1118 
1119         /* NOTE: this is subtle as a TB may span two physical pages */
1120         tb_start = tb_page_addr0(tb);
1121         tb_last = tb_start + tb->size - 1;
1122         if (n == 0) {
1123             tb_last = MIN(tb_last, tb_start | ~TARGET_PAGE_MASK);
1124         } else {
1125             tb_start = tb_page_addr1(tb);
1126             tb_last = tb_start + (tb_last & ~TARGET_PAGE_MASK);
1127         }
1128         if (!(tb_last < start || tb_start > last)) {
1129 #ifdef TARGET_HAS_PRECISE_SMC
1130             if (current_tb == tb &&
1131                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1132                 /*
1133                  * If we are modifying the current TB, we must stop
1134                  * its execution. We could be more precise by checking
1135                  * that the modification is after the current PC, but it
1136                  * would require a specialized function to partially
1137                  * restore the CPU state.
1138                  */
1139                 current_tb_modified = true;
1140                 cpu_restore_state_from_tb(current_cpu, current_tb, retaddr);
1141             }
1142 #endif /* TARGET_HAS_PRECISE_SMC */
1143             tb_phys_invalidate__locked(tb);
1144         }
1145     }
1146 
1147     /* if no code remaining, no need to continue to use slow writes */
1148     if (!p->first_tb) {
1149         tlb_unprotect_code(start);
1150     }
1151 
1152 #ifdef TARGET_HAS_PRECISE_SMC
1153     if (current_tb_modified) {
1154         page_collection_unlock(pages);
1155         /* Force execution of one insn next time.  */
1156         current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1157         mmap_unlock();
1158         cpu_loop_exit_noexc(current_cpu);
1159     }
1160 #endif
1161 }
1162 
1163 /*
1164  * Invalidate all TBs which intersect with the target physical
1165  * address page @addr.
1166  */
1167 void tb_invalidate_phys_page(tb_page_addr_t addr)
1168 {
1169     struct page_collection *pages;
1170     tb_page_addr_t start, last;
1171     PageDesc *p;
1172 
1173     p = page_find(addr >> TARGET_PAGE_BITS);
1174     if (p == NULL) {
1175         return;
1176     }
1177 
1178     start = addr & TARGET_PAGE_MASK;
1179     last = addr | ~TARGET_PAGE_MASK;
1180     pages = page_collection_lock(start, last);
1181     tb_invalidate_phys_page_range__locked(pages, p, start, last, 0);
1182     page_collection_unlock(pages);
1183 }
1184 
1185 /*
1186  * Invalidate all TBs which intersect with the target physical address range
1187  * [start;last]. NOTE: start and end may refer to *different* physical pages.
1188  * 'is_cpu_write_access' should be true if called from a real cpu write
1189  * access: the virtual CPU will exit the current TB if code is modified inside
1190  * this TB.
1191  */
1192 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1193 {
1194     struct page_collection *pages;
1195     tb_page_addr_t index, index_last;
1196 
1197     pages = page_collection_lock(start, last);
1198 
1199     index_last = last >> TARGET_PAGE_BITS;
1200     for (index = start >> TARGET_PAGE_BITS; index <= index_last; index++) {
1201         PageDesc *pd = page_find(index);
1202         tb_page_addr_t page_start, page_last;
1203 
1204         if (pd == NULL) {
1205             continue;
1206         }
1207         assert_page_locked(pd);
1208         page_start = index << TARGET_PAGE_BITS;
1209         page_last = page_start | ~TARGET_PAGE_MASK;
1210         page_last = MIN(page_last, last);
1211         tb_invalidate_phys_page_range__locked(pages, pd,
1212                                               page_start, page_last, 0);
1213     }
1214     page_collection_unlock(pages);
1215 }
1216 
1217 /*
1218  * Call with all @pages in the range [@start, @start + len[ locked.
1219  */
1220 static void tb_invalidate_phys_page_fast__locked(struct page_collection *pages,
1221                                                  tb_page_addr_t start,
1222                                                  unsigned len, uintptr_t ra)
1223 {
1224     PageDesc *p;
1225 
1226     p = page_find(start >> TARGET_PAGE_BITS);
1227     if (!p) {
1228         return;
1229     }
1230 
1231     assert_page_locked(p);
1232     tb_invalidate_phys_page_range__locked(pages, p, start, start + len - 1, ra);
1233 }
1234 
1235 /*
1236  * len must be <= 8 and start must be a multiple of len.
1237  * Called via softmmu_template.h when code areas are written to with
1238  * iothread mutex not held.
1239  */
1240 void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
1241                                    unsigned size,
1242                                    uintptr_t retaddr)
1243 {
1244     struct page_collection *pages;
1245 
1246     pages = page_collection_lock(ram_addr, ram_addr + size - 1);
1247     tb_invalidate_phys_page_fast__locked(pages, ram_addr, size, retaddr);
1248     page_collection_unlock(pages);
1249 }
1250 
1251 #endif /* CONFIG_USER_ONLY */
1252