xref: /openbmc/qemu/accel/tcg/tb-maint.c (revision 93e0932b)
1 /*
2  * Translation Block Maintaince
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/interval-tree.h"
22 #include "exec/cputlb.h"
23 #include "exec/log.h"
24 #include "exec/exec-all.h"
25 #include "exec/translate-all.h"
26 #include "sysemu/tcg.h"
27 #include "tcg/tcg.h"
28 #include "tb-hash.h"
29 #include "tb-context.h"
30 #include "internal.h"
31 
32 
33 /* List iterators for lists of tagged pointers in TranslationBlock. */
34 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
35     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
36          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
37              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
38 
39 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
40     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
41 
42 static bool tb_cmp(const void *ap, const void *bp)
43 {
44     const TranslationBlock *a = ap;
45     const TranslationBlock *b = bp;
46 
47     return ((TARGET_TB_PCREL || tb_pc(a) == tb_pc(b)) &&
48             a->cs_base == b->cs_base &&
49             a->flags == b->flags &&
50             (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
51             a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
52             tb_page_addr0(a) == tb_page_addr0(b) &&
53             tb_page_addr1(a) == tb_page_addr1(b));
54 }
55 
56 void tb_htable_init(void)
57 {
58     unsigned int mode = QHT_MODE_AUTO_RESIZE;
59 
60     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
61 }
62 
63 typedef struct PageDesc PageDesc;
64 
65 #ifdef CONFIG_USER_ONLY
66 
67 /*
68  * In user-mode page locks aren't used; mmap_lock is enough.
69  */
70 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
71 
72 static inline void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
73                                   PageDesc **ret_p2, tb_page_addr_t phys2,
74                                   bool alloc)
75 {
76     *ret_p1 = NULL;
77     *ret_p2 = NULL;
78 }
79 
80 static inline void page_unlock(PageDesc *pd) { }
81 static inline void page_lock_tb(const TranslationBlock *tb) { }
82 static inline void page_unlock_tb(const TranslationBlock *tb) { }
83 
84 /*
85  * For user-only, since we are protecting all of memory with a single lock,
86  * and because the two pages of a TranslationBlock are always contiguous,
87  * use a single data structure to record all TranslationBlocks.
88  */
89 static IntervalTreeRoot tb_root;
90 
91 static void tb_remove_all(void)
92 {
93     assert_memory_lock();
94     memset(&tb_root, 0, sizeof(tb_root));
95 }
96 
97 /* Call with mmap_lock held. */
98 static void tb_record(TranslationBlock *tb, PageDesc *p1, PageDesc *p2)
99 {
100     target_ulong addr;
101     int flags;
102 
103     assert_memory_lock();
104     tb->itree.last = tb->itree.start + tb->size - 1;
105 
106     /* translator_loop() must have made all TB pages non-writable */
107     addr = tb_page_addr0(tb);
108     flags = page_get_flags(addr);
109     assert(!(flags & PAGE_WRITE));
110 
111     addr = tb_page_addr1(tb);
112     if (addr != -1) {
113         flags = page_get_flags(addr);
114         assert(!(flags & PAGE_WRITE));
115     }
116 
117     interval_tree_insert(&tb->itree, &tb_root);
118 }
119 
120 /* Call with mmap_lock held. */
121 static void tb_remove(TranslationBlock *tb)
122 {
123     assert_memory_lock();
124     interval_tree_remove(&tb->itree, &tb_root);
125 }
126 
127 /* TODO: For now, still shared with translate-all.c for system mode. */
128 #define PAGE_FOR_EACH_TB(start, end, pagedesc, T, N)    \
129     for (T = foreach_tb_first(start, end),              \
130          N = foreach_tb_next(T, start, end);            \
131          T != NULL;                                     \
132          T = N, N = foreach_tb_next(N, start, end))
133 
134 typedef TranslationBlock *PageForEachNext;
135 
136 static PageForEachNext foreach_tb_first(tb_page_addr_t start,
137                                         tb_page_addr_t end)
138 {
139     IntervalTreeNode *n = interval_tree_iter_first(&tb_root, start, end - 1);
140     return n ? container_of(n, TranslationBlock, itree) : NULL;
141 }
142 
143 static PageForEachNext foreach_tb_next(PageForEachNext tb,
144                                        tb_page_addr_t start,
145                                        tb_page_addr_t end)
146 {
147     IntervalTreeNode *n;
148 
149     if (tb) {
150         n = interval_tree_iter_next(&tb->itree, start, end - 1);
151         if (n) {
152             return container_of(n, TranslationBlock, itree);
153         }
154     }
155     return NULL;
156 }
157 
158 #else
159 /*
160  * In system mode we want L1_MAP to be based on ram offsets.
161  */
162 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
163 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
164 #else
165 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
166 #endif
167 
168 /* Size of the L2 (and L3, etc) page tables.  */
169 #define V_L2_BITS 10
170 #define V_L2_SIZE (1 << V_L2_BITS)
171 
172 /*
173  * L1 Mapping properties
174  */
175 static int v_l1_size;
176 static int v_l1_shift;
177 static int v_l2_levels;
178 
179 /*
180  * The bottom level has pointers to PageDesc, and is indexed by
181  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
182  */
183 #define V_L1_MIN_BITS 4
184 #define V_L1_MAX_BITS (V_L2_BITS + 3)
185 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
186 
187 static void *l1_map[V_L1_MAX_SIZE];
188 
189 struct PageDesc {
190     QemuSpin lock;
191     /* list of TBs intersecting this ram page */
192     uintptr_t first_tb;
193 };
194 
195 void page_table_config_init(void)
196 {
197     uint32_t v_l1_bits;
198 
199     assert(TARGET_PAGE_BITS);
200     /* The bits remaining after N lower levels of page tables.  */
201     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
202     if (v_l1_bits < V_L1_MIN_BITS) {
203         v_l1_bits += V_L2_BITS;
204     }
205 
206     v_l1_size = 1 << v_l1_bits;
207     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
208     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
209 
210     assert(v_l1_bits <= V_L1_MAX_BITS);
211     assert(v_l1_shift % V_L2_BITS == 0);
212     assert(v_l2_levels >= 0);
213 }
214 
215 static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
216 {
217     PageDesc *pd;
218     void **lp;
219     int i;
220 
221     /* Level 1.  Always allocated.  */
222     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
223 
224     /* Level 2..N-1.  */
225     for (i = v_l2_levels; i > 0; i--) {
226         void **p = qatomic_rcu_read(lp);
227 
228         if (p == NULL) {
229             void *existing;
230 
231             if (!alloc) {
232                 return NULL;
233             }
234             p = g_new0(void *, V_L2_SIZE);
235             existing = qatomic_cmpxchg(lp, NULL, p);
236             if (unlikely(existing)) {
237                 g_free(p);
238                 p = existing;
239             }
240         }
241 
242         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
243     }
244 
245     pd = qatomic_rcu_read(lp);
246     if (pd == NULL) {
247         void *existing;
248 
249         if (!alloc) {
250             return NULL;
251         }
252 
253         pd = g_new0(PageDesc, V_L2_SIZE);
254         for (int i = 0; i < V_L2_SIZE; i++) {
255             qemu_spin_init(&pd[i].lock);
256         }
257 
258         existing = qatomic_cmpxchg(lp, NULL, pd);
259         if (unlikely(existing)) {
260             for (int i = 0; i < V_L2_SIZE; i++) {
261                 qemu_spin_destroy(&pd[i].lock);
262             }
263             g_free(pd);
264             pd = existing;
265         }
266     }
267 
268     return pd + (index & (V_L2_SIZE - 1));
269 }
270 
271 static inline PageDesc *page_find(tb_page_addr_t index)
272 {
273     return page_find_alloc(index, false);
274 }
275 
276 /**
277  * struct page_entry - page descriptor entry
278  * @pd:     pointer to the &struct PageDesc of the page this entry represents
279  * @index:  page index of the page
280  * @locked: whether the page is locked
281  *
282  * This struct helps us keep track of the locked state of a page, without
283  * bloating &struct PageDesc.
284  *
285  * A page lock protects accesses to all fields of &struct PageDesc.
286  *
287  * See also: &struct page_collection.
288  */
289 struct page_entry {
290     PageDesc *pd;
291     tb_page_addr_t index;
292     bool locked;
293 };
294 
295 /**
296  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
297  * @tree:   Binary search tree (BST) of the pages, with key == page index
298  * @max:    Pointer to the page in @tree with the highest page index
299  *
300  * To avoid deadlock we lock pages in ascending order of page index.
301  * When operating on a set of pages, we need to keep track of them so that
302  * we can lock them in order and also unlock them later. For this we collect
303  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
304  * @tree implementation we use does not provide an O(1) operation to obtain the
305  * highest-ranked element, we use @max to keep track of the inserted page
306  * with the highest index. This is valuable because if a page is not in
307  * the tree and its index is higher than @max's, then we can lock it
308  * without breaking the locking order rule.
309  *
310  * Note on naming: 'struct page_set' would be shorter, but we already have a few
311  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
312  *
313  * See also: page_collection_lock().
314  */
315 struct page_collection {
316     GTree *tree;
317     struct page_entry *max;
318 };
319 
320 typedef int PageForEachNext;
321 #define PAGE_FOR_EACH_TB(start, end, pagedesc, tb, n) \
322     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
323 
324 #ifdef CONFIG_DEBUG_TCG
325 
326 static __thread GHashTable *ht_pages_locked_debug;
327 
328 static void ht_pages_locked_debug_init(void)
329 {
330     if (ht_pages_locked_debug) {
331         return;
332     }
333     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
334 }
335 
336 static bool page_is_locked(const PageDesc *pd)
337 {
338     PageDesc *found;
339 
340     ht_pages_locked_debug_init();
341     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
342     return !!found;
343 }
344 
345 static void page_lock__debug(PageDesc *pd)
346 {
347     ht_pages_locked_debug_init();
348     g_assert(!page_is_locked(pd));
349     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
350 }
351 
352 static void page_unlock__debug(const PageDesc *pd)
353 {
354     bool removed;
355 
356     ht_pages_locked_debug_init();
357     g_assert(page_is_locked(pd));
358     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
359     g_assert(removed);
360 }
361 
362 static void do_assert_page_locked(const PageDesc *pd,
363                                   const char *file, int line)
364 {
365     if (unlikely(!page_is_locked(pd))) {
366         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
367                      pd, file, line);
368         abort();
369     }
370 }
371 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
372 
373 void assert_no_pages_locked(void)
374 {
375     ht_pages_locked_debug_init();
376     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
377 }
378 
379 #else /* !CONFIG_DEBUG_TCG */
380 
381 static inline void page_lock__debug(const PageDesc *pd) { }
382 static inline void page_unlock__debug(const PageDesc *pd) { }
383 static inline void assert_page_locked(const PageDesc *pd) { }
384 
385 #endif /* CONFIG_DEBUG_TCG */
386 
387 static void page_lock(PageDesc *pd)
388 {
389     page_lock__debug(pd);
390     qemu_spin_lock(&pd->lock);
391 }
392 
393 static void page_unlock(PageDesc *pd)
394 {
395     qemu_spin_unlock(&pd->lock);
396     page_unlock__debug(pd);
397 }
398 
399 static inline struct page_entry *
400 page_entry_new(PageDesc *pd, tb_page_addr_t index)
401 {
402     struct page_entry *pe = g_malloc(sizeof(*pe));
403 
404     pe->index = index;
405     pe->pd = pd;
406     pe->locked = false;
407     return pe;
408 }
409 
410 static void page_entry_destroy(gpointer p)
411 {
412     struct page_entry *pe = p;
413 
414     g_assert(pe->locked);
415     page_unlock(pe->pd);
416     g_free(pe);
417 }
418 
419 /* returns false on success */
420 static bool page_entry_trylock(struct page_entry *pe)
421 {
422     bool busy;
423 
424     busy = qemu_spin_trylock(&pe->pd->lock);
425     if (!busy) {
426         g_assert(!pe->locked);
427         pe->locked = true;
428         page_lock__debug(pe->pd);
429     }
430     return busy;
431 }
432 
433 static void do_page_entry_lock(struct page_entry *pe)
434 {
435     page_lock(pe->pd);
436     g_assert(!pe->locked);
437     pe->locked = true;
438 }
439 
440 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
441 {
442     struct page_entry *pe = value;
443 
444     do_page_entry_lock(pe);
445     return FALSE;
446 }
447 
448 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
449 {
450     struct page_entry *pe = value;
451 
452     if (pe->locked) {
453         pe->locked = false;
454         page_unlock(pe->pd);
455     }
456     return FALSE;
457 }
458 
459 /*
460  * Trylock a page, and if successful, add the page to a collection.
461  * Returns true ("busy") if the page could not be locked; false otherwise.
462  */
463 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
464 {
465     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
466     struct page_entry *pe;
467     PageDesc *pd;
468 
469     pe = g_tree_lookup(set->tree, &index);
470     if (pe) {
471         return false;
472     }
473 
474     pd = page_find(index);
475     if (pd == NULL) {
476         return false;
477     }
478 
479     pe = page_entry_new(pd, index);
480     g_tree_insert(set->tree, &pe->index, pe);
481 
482     /*
483      * If this is either (1) the first insertion or (2) a page whose index
484      * is higher than any other so far, just lock the page and move on.
485      */
486     if (set->max == NULL || pe->index > set->max->index) {
487         set->max = pe;
488         do_page_entry_lock(pe);
489         return false;
490     }
491     /*
492      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
493      * locks in order.
494      */
495     return page_entry_trylock(pe);
496 }
497 
498 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
499 {
500     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
501     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
502 
503     if (a == b) {
504         return 0;
505     } else if (a < b) {
506         return -1;
507     }
508     return 1;
509 }
510 
511 /*
512  * Lock a range of pages ([@start,@end[) as well as the pages of all
513  * intersecting TBs.
514  * Locking order: acquire locks in ascending order of page index.
515  */
516 static struct page_collection *page_collection_lock(tb_page_addr_t start,
517                                                     tb_page_addr_t end)
518 {
519     struct page_collection *set = g_malloc(sizeof(*set));
520     tb_page_addr_t index;
521     PageDesc *pd;
522 
523     start >>= TARGET_PAGE_BITS;
524     end   >>= TARGET_PAGE_BITS;
525     g_assert(start <= end);
526 
527     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
528                                 page_entry_destroy);
529     set->max = NULL;
530     assert_no_pages_locked();
531 
532  retry:
533     g_tree_foreach(set->tree, page_entry_lock, NULL);
534 
535     for (index = start; index <= end; index++) {
536         TranslationBlock *tb;
537         PageForEachNext n;
538 
539         pd = page_find(index);
540         if (pd == NULL) {
541             continue;
542         }
543         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
544             g_tree_foreach(set->tree, page_entry_unlock, NULL);
545             goto retry;
546         }
547         assert_page_locked(pd);
548         PAGE_FOR_EACH_TB(unused, unused, pd, tb, n) {
549             if (page_trylock_add(set, tb_page_addr0(tb)) ||
550                 (tb_page_addr1(tb) != -1 &&
551                  page_trylock_add(set, tb_page_addr1(tb)))) {
552                 /* drop all locks, and reacquire in order */
553                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
554                 goto retry;
555             }
556         }
557     }
558     return set;
559 }
560 
561 static void page_collection_unlock(struct page_collection *set)
562 {
563     /* entries are unlocked and freed via page_entry_destroy */
564     g_tree_destroy(set->tree);
565     g_free(set);
566 }
567 
568 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
569 static void tb_remove_all_1(int level, void **lp)
570 {
571     int i;
572 
573     if (*lp == NULL) {
574         return;
575     }
576     if (level == 0) {
577         PageDesc *pd = *lp;
578 
579         for (i = 0; i < V_L2_SIZE; ++i) {
580             page_lock(&pd[i]);
581             pd[i].first_tb = (uintptr_t)NULL;
582             page_unlock(&pd[i]);
583         }
584     } else {
585         void **pp = *lp;
586 
587         for (i = 0; i < V_L2_SIZE; ++i) {
588             tb_remove_all_1(level - 1, pp + i);
589         }
590     }
591 }
592 
593 static void tb_remove_all(void)
594 {
595     int i, l1_sz = v_l1_size;
596 
597     for (i = 0; i < l1_sz; i++) {
598         tb_remove_all_1(v_l2_levels, l1_map + i);
599     }
600 }
601 
602 /*
603  * Add the tb in the target page and protect it if necessary.
604  * Called with @p->lock held.
605  */
606 static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
607                                unsigned int n)
608 {
609     bool page_already_protected;
610 
611     assert_page_locked(p);
612 
613     tb->page_next[n] = p->first_tb;
614     page_already_protected = p->first_tb != 0;
615     p->first_tb = (uintptr_t)tb | n;
616 
617     /*
618      * If some code is already present, then the pages are already
619      * protected. So we handle the case where only the first TB is
620      * allocated in a physical page.
621      */
622     if (!page_already_protected) {
623         tlb_protect_code(tb->page_addr[n] & TARGET_PAGE_MASK);
624     }
625 }
626 
627 static void tb_record(TranslationBlock *tb, PageDesc *p1, PageDesc *p2)
628 {
629     tb_page_add(p1, tb, 0);
630     if (unlikely(p2)) {
631         tb_page_add(p2, tb, 1);
632     }
633 }
634 
635 static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
636 {
637     TranslationBlock *tb1;
638     uintptr_t *pprev;
639     PageForEachNext n1;
640 
641     assert_page_locked(pd);
642     pprev = &pd->first_tb;
643     PAGE_FOR_EACH_TB(unused, unused, pd, tb1, n1) {
644         if (tb1 == tb) {
645             *pprev = tb1->page_next[n1];
646             return;
647         }
648         pprev = &tb1->page_next[n1];
649     }
650     g_assert_not_reached();
651 }
652 
653 static void tb_remove(TranslationBlock *tb)
654 {
655     PageDesc *pd;
656 
657     pd = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
658     tb_page_remove(pd, tb);
659     if (unlikely(tb->page_addr[1] != -1)) {
660         pd = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
661         tb_page_remove(pd, tb);
662     }
663 }
664 
665 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
666                            PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc)
667 {
668     PageDesc *p1, *p2;
669     tb_page_addr_t page1;
670     tb_page_addr_t page2;
671 
672     assert_memory_lock();
673     g_assert(phys1 != -1);
674 
675     page1 = phys1 >> TARGET_PAGE_BITS;
676     page2 = phys2 >> TARGET_PAGE_BITS;
677 
678     p1 = page_find_alloc(page1, alloc);
679     if (ret_p1) {
680         *ret_p1 = p1;
681     }
682     if (likely(phys2 == -1)) {
683         page_lock(p1);
684         return;
685     } else if (page1 == page2) {
686         page_lock(p1);
687         if (ret_p2) {
688             *ret_p2 = p1;
689         }
690         return;
691     }
692     p2 = page_find_alloc(page2, alloc);
693     if (ret_p2) {
694         *ret_p2 = p2;
695     }
696     if (page1 < page2) {
697         page_lock(p1);
698         page_lock(p2);
699     } else {
700         page_lock(p2);
701         page_lock(p1);
702     }
703 }
704 
705 /* lock the page(s) of a TB in the correct acquisition order */
706 static void page_lock_tb(const TranslationBlock *tb)
707 {
708     page_lock_pair(NULL, tb_page_addr0(tb), NULL, tb_page_addr1(tb), false);
709 }
710 
711 static void page_unlock_tb(const TranslationBlock *tb)
712 {
713     PageDesc *p1 = page_find(tb_page_addr0(tb) >> TARGET_PAGE_BITS);
714 
715     page_unlock(p1);
716     if (unlikely(tb_page_addr1(tb) != -1)) {
717         PageDesc *p2 = page_find(tb_page_addr1(tb) >> TARGET_PAGE_BITS);
718 
719         if (p2 != p1) {
720             page_unlock(p2);
721         }
722     }
723 }
724 #endif /* CONFIG_USER_ONLY */
725 
726 /* flush all the translation blocks */
727 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
728 {
729     bool did_flush = false;
730 
731     mmap_lock();
732     /* If it is already been done on request of another CPU, just retry. */
733     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
734         goto done;
735     }
736     did_flush = true;
737 
738     CPU_FOREACH(cpu) {
739         tcg_flush_jmp_cache(cpu);
740     }
741 
742     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
743     tb_remove_all();
744 
745     tcg_region_reset_all();
746     /* XXX: flush processor icache at this point if cache flush is expensive */
747     qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
748 
749 done:
750     mmap_unlock();
751     if (did_flush) {
752         qemu_plugin_flush_cb();
753     }
754 }
755 
756 void tb_flush(CPUState *cpu)
757 {
758     if (tcg_enabled()) {
759         unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);
760 
761         if (cpu_in_exclusive_context(cpu)) {
762             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
763         } else {
764             async_safe_run_on_cpu(cpu, do_tb_flush,
765                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
766         }
767     }
768 }
769 
770 /* remove @orig from its @n_orig-th jump list */
771 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
772 {
773     uintptr_t ptr, ptr_locked;
774     TranslationBlock *dest;
775     TranslationBlock *tb;
776     uintptr_t *pprev;
777     int n;
778 
779     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
780     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
781     dest = (TranslationBlock *)(ptr & ~1);
782     if (dest == NULL) {
783         return;
784     }
785 
786     qemu_spin_lock(&dest->jmp_lock);
787     /*
788      * While acquiring the lock, the jump might have been removed if the
789      * destination TB was invalidated; check again.
790      */
791     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
792     if (ptr_locked != ptr) {
793         qemu_spin_unlock(&dest->jmp_lock);
794         /*
795          * The only possibility is that the jump was unlinked via
796          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
797          * because we set the LSB above.
798          */
799         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
800         return;
801     }
802     /*
803      * We first acquired the lock, and since the destination pointer matches,
804      * we know for sure that @orig is in the jmp list.
805      */
806     pprev = &dest->jmp_list_head;
807     TB_FOR_EACH_JMP(dest, tb, n) {
808         if (tb == orig && n == n_orig) {
809             *pprev = tb->jmp_list_next[n];
810             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
811             qemu_spin_unlock(&dest->jmp_lock);
812             return;
813         }
814         pprev = &tb->jmp_list_next[n];
815     }
816     g_assert_not_reached();
817 }
818 
819 /*
820  * Reset the jump entry 'n' of a TB so that it is not chained to another TB.
821  */
822 void tb_reset_jump(TranslationBlock *tb, int n)
823 {
824     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
825     tb_set_jmp_target(tb, n, addr);
826 }
827 
828 /* remove any jumps to the TB */
829 static inline void tb_jmp_unlink(TranslationBlock *dest)
830 {
831     TranslationBlock *tb;
832     int n;
833 
834     qemu_spin_lock(&dest->jmp_lock);
835 
836     TB_FOR_EACH_JMP(dest, tb, n) {
837         tb_reset_jump(tb, n);
838         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
839         /* No need to clear the list entry; setting the dest ptr is enough */
840     }
841     dest->jmp_list_head = (uintptr_t)NULL;
842 
843     qemu_spin_unlock(&dest->jmp_lock);
844 }
845 
846 static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
847 {
848     CPUState *cpu;
849 
850     if (TARGET_TB_PCREL) {
851         /* A TB may be at any virtual address */
852         CPU_FOREACH(cpu) {
853             tcg_flush_jmp_cache(cpu);
854         }
855     } else {
856         uint32_t h = tb_jmp_cache_hash_func(tb_pc(tb));
857 
858         CPU_FOREACH(cpu) {
859             CPUJumpCache *jc = cpu->tb_jmp_cache;
860 
861             if (qatomic_read(&jc->array[h].tb) == tb) {
862                 qatomic_set(&jc->array[h].tb, NULL);
863             }
864         }
865     }
866 }
867 
868 /*
869  * In user-mode, call with mmap_lock held.
870  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
871  * locks held.
872  */
873 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
874 {
875     uint32_t h;
876     tb_page_addr_t phys_pc;
877     uint32_t orig_cflags = tb_cflags(tb);
878 
879     assert_memory_lock();
880 
881     /* make sure no further incoming jumps will be chained to this TB */
882     qemu_spin_lock(&tb->jmp_lock);
883     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
884     qemu_spin_unlock(&tb->jmp_lock);
885 
886     /* remove the TB from the hash list */
887     phys_pc = tb_page_addr0(tb);
888     h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
889                      tb->flags, orig_cflags, tb->trace_vcpu_dstate);
890     if (!qht_remove(&tb_ctx.htable, tb, h)) {
891         return;
892     }
893 
894     /* remove the TB from the page list */
895     if (rm_from_page_list) {
896         tb_remove(tb);
897     }
898 
899     /* remove the TB from the hash list */
900     tb_jmp_cache_inval_tb(tb);
901 
902     /* suppress this TB from the two jump lists */
903     tb_remove_from_jmp_list(tb, 0);
904     tb_remove_from_jmp_list(tb, 1);
905 
906     /* suppress any remaining jumps to this TB */
907     tb_jmp_unlink(tb);
908 
909     qatomic_set(&tb_ctx.tb_phys_invalidate_count,
910                 tb_ctx.tb_phys_invalidate_count + 1);
911 }
912 
913 static void tb_phys_invalidate__locked(TranslationBlock *tb)
914 {
915     qemu_thread_jit_write();
916     do_tb_phys_invalidate(tb, true);
917     qemu_thread_jit_execute();
918 }
919 
920 /*
921  * Invalidate one TB.
922  * Called with mmap_lock held in user-mode.
923  */
924 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
925 {
926     if (page_addr == -1 && tb_page_addr0(tb) != -1) {
927         page_lock_tb(tb);
928         do_tb_phys_invalidate(tb, true);
929         page_unlock_tb(tb);
930     } else {
931         do_tb_phys_invalidate(tb, false);
932     }
933 }
934 
935 /*
936  * Add a new TB and link it to the physical page tables. phys_page2 is
937  * (-1) to indicate that only one page contains the TB.
938  *
939  * Called with mmap_lock held for user-mode emulation.
940  *
941  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
942  * Note that in !user-mode, another thread might have already added a TB
943  * for the same block of guest code that @tb corresponds to. In that case,
944  * the caller should discard the original @tb, and use instead the returned TB.
945  */
946 TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
947                                tb_page_addr_t phys_page2)
948 {
949     PageDesc *p;
950     PageDesc *p2 = NULL;
951     void *existing_tb = NULL;
952     uint32_t h;
953 
954     assert_memory_lock();
955     tcg_debug_assert(!(tb->cflags & CF_INVALID));
956 
957     /*
958      * Add the TB to the page list, acquiring first the pages's locks.
959      * We keep the locks held until after inserting the TB in the hash table,
960      * so that if the insertion fails we know for sure that the TBs are still
961      * in the page descriptors.
962      * Note that inserting into the hash table first isn't an option, since
963      * we can only insert TBs that are fully initialized.
964      */
965     page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
966     tb_record(tb, p, p2);
967 
968     /* add in the hash table */
969     h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
970                      tb->flags, tb->cflags, tb->trace_vcpu_dstate);
971     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
972 
973     /* remove TB from the page(s) if we couldn't insert it */
974     if (unlikely(existing_tb)) {
975         tb_remove(tb);
976         tb = existing_tb;
977     }
978 
979     if (p2 && p2 != p) {
980         page_unlock(p2);
981     }
982     page_unlock(p);
983     return tb;
984 }
985 
986 #ifdef CONFIG_USER_ONLY
987 /*
988  * Invalidate all TBs which intersect with the target address range.
989  * Called with mmap_lock held for user-mode emulation.
990  * NOTE: this function must not be called while a TB is running.
991  */
992 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
993 {
994     TranslationBlock *tb;
995     PageForEachNext n;
996 
997     assert_memory_lock();
998 
999     PAGE_FOR_EACH_TB(start, end, unused, tb, n) {
1000         tb_phys_invalidate__locked(tb);
1001     }
1002 }
1003 
1004 /*
1005  * Invalidate all TBs which intersect with the target address page @addr.
1006  * Called with mmap_lock held for user-mode emulation
1007  * NOTE: this function must not be called while a TB is running.
1008  */
1009 void tb_invalidate_phys_page(tb_page_addr_t addr)
1010 {
1011     tb_page_addr_t start, end;
1012 
1013     start = addr & TARGET_PAGE_MASK;
1014     end = start + TARGET_PAGE_SIZE;
1015     tb_invalidate_phys_range(start, end);
1016 }
1017 
1018 /*
1019  * Called with mmap_lock held. If pc is not 0 then it indicates the
1020  * host PC of the faulting store instruction that caused this invalidate.
1021  * Returns true if the caller needs to abort execution of the current
1022  * TB (because it was modified by this store and the guest CPU has
1023  * precise-SMC semantics).
1024  */
1025 bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
1026 {
1027     TranslationBlock *current_tb;
1028     bool current_tb_modified;
1029     TranslationBlock *tb;
1030     PageForEachNext n;
1031 
1032     /*
1033      * Without precise smc semantics, or when outside of a TB,
1034      * we can skip to invalidate.
1035      */
1036 #ifndef TARGET_HAS_PRECISE_SMC
1037     pc = 0;
1038 #endif
1039     if (!pc) {
1040         tb_invalidate_phys_page(addr);
1041         return false;
1042     }
1043 
1044     assert_memory_lock();
1045     current_tb = tcg_tb_lookup(pc);
1046 
1047     addr &= TARGET_PAGE_MASK;
1048     current_tb_modified = false;
1049 
1050     PAGE_FOR_EACH_TB(addr, addr + TARGET_PAGE_SIZE, unused, tb, n) {
1051         if (current_tb == tb &&
1052             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1053             /*
1054              * If we are modifying the current TB, we must stop its
1055              * execution. We could be more precise by checking that
1056              * the modification is after the current PC, but it would
1057              * require a specialized function to partially restore
1058              * the CPU state.
1059              */
1060             current_tb_modified = true;
1061             cpu_restore_state_from_tb(current_cpu, current_tb, pc);
1062         }
1063         tb_phys_invalidate__locked(tb);
1064     }
1065 
1066     if (current_tb_modified) {
1067         /* Force execution of one insn next time.  */
1068         CPUState *cpu = current_cpu;
1069         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1070         return true;
1071     }
1072     return false;
1073 }
1074 #else
1075 /*
1076  * @p must be non-NULL.
1077  * Call with all @pages locked.
1078  */
1079 static void
1080 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1081                                       PageDesc *p, tb_page_addr_t start,
1082                                       tb_page_addr_t end,
1083                                       uintptr_t retaddr)
1084 {
1085     TranslationBlock *tb;
1086     tb_page_addr_t tb_start, tb_end;
1087     PageForEachNext n;
1088 #ifdef TARGET_HAS_PRECISE_SMC
1089     bool current_tb_modified = false;
1090     TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
1091 #endif /* TARGET_HAS_PRECISE_SMC */
1092 
1093     /*
1094      * We remove all the TBs in the range [start, end[.
1095      * XXX: see if in some cases it could be faster to invalidate all the code
1096      */
1097     PAGE_FOR_EACH_TB(start, end, p, tb, n) {
1098         /* NOTE: this is subtle as a TB may span two physical pages */
1099         if (n == 0) {
1100             /* NOTE: tb_end may be after the end of the page, but
1101                it is not a problem */
1102             tb_start = tb_page_addr0(tb);
1103             tb_end = tb_start + tb->size;
1104         } else {
1105             tb_start = tb_page_addr1(tb);
1106             tb_end = tb_start + ((tb_page_addr0(tb) + tb->size)
1107                                  & ~TARGET_PAGE_MASK);
1108         }
1109         if (!(tb_end <= start || tb_start >= end)) {
1110 #ifdef TARGET_HAS_PRECISE_SMC
1111             if (current_tb == tb &&
1112                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1113                 /*
1114                  * If we are modifying the current TB, we must stop
1115                  * its execution. We could be more precise by checking
1116                  * that the modification is after the current PC, but it
1117                  * would require a specialized function to partially
1118                  * restore the CPU state.
1119                  */
1120                 current_tb_modified = true;
1121                 cpu_restore_state_from_tb(current_cpu, current_tb, retaddr);
1122             }
1123 #endif /* TARGET_HAS_PRECISE_SMC */
1124             tb_phys_invalidate__locked(tb);
1125         }
1126     }
1127 
1128     /* if no code remaining, no need to continue to use slow writes */
1129     if (!p->first_tb) {
1130         tlb_unprotect_code(start);
1131     }
1132 
1133 #ifdef TARGET_HAS_PRECISE_SMC
1134     if (current_tb_modified) {
1135         page_collection_unlock(pages);
1136         /* Force execution of one insn next time.  */
1137         current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1138         mmap_unlock();
1139         cpu_loop_exit_noexc(current_cpu);
1140     }
1141 #endif
1142 }
1143 
1144 /*
1145  * Invalidate all TBs which intersect with the target physical
1146  * address page @addr.
1147  */
1148 void tb_invalidate_phys_page(tb_page_addr_t addr)
1149 {
1150     struct page_collection *pages;
1151     tb_page_addr_t start, end;
1152     PageDesc *p;
1153 
1154     p = page_find(addr >> TARGET_PAGE_BITS);
1155     if (p == NULL) {
1156         return;
1157     }
1158 
1159     start = addr & TARGET_PAGE_MASK;
1160     end = start + TARGET_PAGE_SIZE;
1161     pages = page_collection_lock(start, end);
1162     tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
1163     page_collection_unlock(pages);
1164 }
1165 
1166 /*
1167  * Invalidate all TBs which intersect with the target physical address range
1168  * [start;end[. NOTE: start and end may refer to *different* physical pages.
1169  * 'is_cpu_write_access' should be true if called from a real cpu write
1170  * access: the virtual CPU will exit the current TB if code is modified inside
1171  * this TB.
1172  */
1173 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
1174 {
1175     struct page_collection *pages;
1176     tb_page_addr_t next;
1177 
1178     pages = page_collection_lock(start, end);
1179     for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
1180          start < end;
1181          start = next, next += TARGET_PAGE_SIZE) {
1182         PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
1183         tb_page_addr_t bound = MIN(next, end);
1184 
1185         if (pd == NULL) {
1186             continue;
1187         }
1188         assert_page_locked(pd);
1189         tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
1190     }
1191     page_collection_unlock(pages);
1192 }
1193 
1194 /*
1195  * Call with all @pages in the range [@start, @start + len[ locked.
1196  */
1197 static void tb_invalidate_phys_page_fast__locked(struct page_collection *pages,
1198                                                  tb_page_addr_t start,
1199                                                  unsigned len, uintptr_t ra)
1200 {
1201     PageDesc *p;
1202 
1203     p = page_find(start >> TARGET_PAGE_BITS);
1204     if (!p) {
1205         return;
1206     }
1207 
1208     assert_page_locked(p);
1209     tb_invalidate_phys_page_range__locked(pages, p, start, start + len, ra);
1210 }
1211 
1212 /*
1213  * len must be <= 8 and start must be a multiple of len.
1214  * Called via softmmu_template.h when code areas are written to with
1215  * iothread mutex not held.
1216  */
1217 void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
1218                                    unsigned size,
1219                                    uintptr_t retaddr)
1220 {
1221     struct page_collection *pages;
1222 
1223     pages = page_collection_lock(ram_addr, ram_addr + size);
1224     tb_invalidate_phys_page_fast__locked(pages, ram_addr, size, retaddr);
1225     page_collection_unlock(pages);
1226 }
1227 
1228 #endif /* CONFIG_USER_ONLY */
1229