xref: /openbmc/qemu/accel/tcg/tb-maint.c (revision 6c1e3906)
1 /*
2  * Translation Block Maintaince
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/interval-tree.h"
22 #include "qemu/qtree.h"
23 #include "exec/cputlb.h"
24 #include "exec/log.h"
25 #include "exec/exec-all.h"
26 #include "exec/tb-flush.h"
27 #include "exec/translate-all.h"
28 #include "sysemu/tcg.h"
29 #include "tcg/tcg.h"
30 #include "tb-hash.h"
31 #include "tb-context.h"
32 #include "internal.h"
33 
34 
35 /* List iterators for lists of tagged pointers in TranslationBlock. */
36 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
37     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
38          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
39              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
40 
41 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
42     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
43 
44 static bool tb_cmp(const void *ap, const void *bp)
45 {
46     const TranslationBlock *a = ap;
47     const TranslationBlock *b = bp;
48 
49     return ((tb_cflags(a) & CF_PCREL || a->pc == b->pc) &&
50             a->cs_base == b->cs_base &&
51             a->flags == b->flags &&
52             (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
53             a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
54             tb_page_addr0(a) == tb_page_addr0(b) &&
55             tb_page_addr1(a) == tb_page_addr1(b));
56 }
57 
58 void tb_htable_init(void)
59 {
60     unsigned int mode = QHT_MODE_AUTO_RESIZE;
61 
62     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
63 }
64 
65 typedef struct PageDesc PageDesc;
66 
67 #ifdef CONFIG_USER_ONLY
68 
69 /*
70  * In user-mode page locks aren't used; mmap_lock is enough.
71  */
72 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
73 
74 static inline void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
75                                   PageDesc **ret_p2, tb_page_addr_t phys2,
76                                   bool alloc)
77 {
78     *ret_p1 = NULL;
79     *ret_p2 = NULL;
80 }
81 
82 static inline void page_unlock(PageDesc *pd) { }
83 static inline void page_lock_tb(const TranslationBlock *tb) { }
84 static inline void page_unlock_tb(const TranslationBlock *tb) { }
85 
86 /*
87  * For user-only, since we are protecting all of memory with a single lock,
88  * and because the two pages of a TranslationBlock are always contiguous,
89  * use a single data structure to record all TranslationBlocks.
90  */
91 static IntervalTreeRoot tb_root;
92 
93 static void tb_remove_all(void)
94 {
95     assert_memory_lock();
96     memset(&tb_root, 0, sizeof(tb_root));
97 }
98 
99 /* Call with mmap_lock held. */
100 static void tb_record(TranslationBlock *tb, PageDesc *p1, PageDesc *p2)
101 {
102     target_ulong addr;
103     int flags;
104 
105     assert_memory_lock();
106     tb->itree.last = tb->itree.start + tb->size - 1;
107 
108     /* translator_loop() must have made all TB pages non-writable */
109     addr = tb_page_addr0(tb);
110     flags = page_get_flags(addr);
111     assert(!(flags & PAGE_WRITE));
112 
113     addr = tb_page_addr1(tb);
114     if (addr != -1) {
115         flags = page_get_flags(addr);
116         assert(!(flags & PAGE_WRITE));
117     }
118 
119     interval_tree_insert(&tb->itree, &tb_root);
120 }
121 
122 /* Call with mmap_lock held. */
123 static void tb_remove(TranslationBlock *tb)
124 {
125     assert_memory_lock();
126     interval_tree_remove(&tb->itree, &tb_root);
127 }
128 
129 /* TODO: For now, still shared with translate-all.c for system mode. */
130 #define PAGE_FOR_EACH_TB(start, last, pagedesc, T, N)   \
131     for (T = foreach_tb_first(start, last),             \
132          N = foreach_tb_next(T, start, last);           \
133          T != NULL;                                     \
134          T = N, N = foreach_tb_next(N, start, last))
135 
136 typedef TranslationBlock *PageForEachNext;
137 
138 static PageForEachNext foreach_tb_first(tb_page_addr_t start,
139                                         tb_page_addr_t last)
140 {
141     IntervalTreeNode *n = interval_tree_iter_first(&tb_root, start, last);
142     return n ? container_of(n, TranslationBlock, itree) : NULL;
143 }
144 
145 static PageForEachNext foreach_tb_next(PageForEachNext tb,
146                                        tb_page_addr_t start,
147                                        tb_page_addr_t last)
148 {
149     IntervalTreeNode *n;
150 
151     if (tb) {
152         n = interval_tree_iter_next(&tb->itree, start, last);
153         if (n) {
154             return container_of(n, TranslationBlock, itree);
155         }
156     }
157     return NULL;
158 }
159 
160 #else
161 /*
162  * In system mode we want L1_MAP to be based on ram offsets.
163  */
164 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
165 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
166 #else
167 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
168 #endif
169 
170 /* Size of the L2 (and L3, etc) page tables.  */
171 #define V_L2_BITS 10
172 #define V_L2_SIZE (1 << V_L2_BITS)
173 
174 /*
175  * L1 Mapping properties
176  */
177 static int v_l1_size;
178 static int v_l1_shift;
179 static int v_l2_levels;
180 
181 /*
182  * The bottom level has pointers to PageDesc, and is indexed by
183  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
184  */
185 #define V_L1_MIN_BITS 4
186 #define V_L1_MAX_BITS (V_L2_BITS + 3)
187 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
188 
189 static void *l1_map[V_L1_MAX_SIZE];
190 
191 struct PageDesc {
192     QemuSpin lock;
193     /* list of TBs intersecting this ram page */
194     uintptr_t first_tb;
195 };
196 
197 void page_table_config_init(void)
198 {
199     uint32_t v_l1_bits;
200 
201     assert(TARGET_PAGE_BITS);
202     /* The bits remaining after N lower levels of page tables.  */
203     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
204     if (v_l1_bits < V_L1_MIN_BITS) {
205         v_l1_bits += V_L2_BITS;
206     }
207 
208     v_l1_size = 1 << v_l1_bits;
209     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
210     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
211 
212     assert(v_l1_bits <= V_L1_MAX_BITS);
213     assert(v_l1_shift % V_L2_BITS == 0);
214     assert(v_l2_levels >= 0);
215 }
216 
217 static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
218 {
219     PageDesc *pd;
220     void **lp;
221     int i;
222 
223     /* Level 1.  Always allocated.  */
224     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
225 
226     /* Level 2..N-1.  */
227     for (i = v_l2_levels; i > 0; i--) {
228         void **p = qatomic_rcu_read(lp);
229 
230         if (p == NULL) {
231             void *existing;
232 
233             if (!alloc) {
234                 return NULL;
235             }
236             p = g_new0(void *, V_L2_SIZE);
237             existing = qatomic_cmpxchg(lp, NULL, p);
238             if (unlikely(existing)) {
239                 g_free(p);
240                 p = existing;
241             }
242         }
243 
244         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
245     }
246 
247     pd = qatomic_rcu_read(lp);
248     if (pd == NULL) {
249         void *existing;
250 
251         if (!alloc) {
252             return NULL;
253         }
254 
255         pd = g_new0(PageDesc, V_L2_SIZE);
256         for (int i = 0; i < V_L2_SIZE; i++) {
257             qemu_spin_init(&pd[i].lock);
258         }
259 
260         existing = qatomic_cmpxchg(lp, NULL, pd);
261         if (unlikely(existing)) {
262             for (int i = 0; i < V_L2_SIZE; i++) {
263                 qemu_spin_destroy(&pd[i].lock);
264             }
265             g_free(pd);
266             pd = existing;
267         }
268     }
269 
270     return pd + (index & (V_L2_SIZE - 1));
271 }
272 
273 static inline PageDesc *page_find(tb_page_addr_t index)
274 {
275     return page_find_alloc(index, false);
276 }
277 
278 /**
279  * struct page_entry - page descriptor entry
280  * @pd:     pointer to the &struct PageDesc of the page this entry represents
281  * @index:  page index of the page
282  * @locked: whether the page is locked
283  *
284  * This struct helps us keep track of the locked state of a page, without
285  * bloating &struct PageDesc.
286  *
287  * A page lock protects accesses to all fields of &struct PageDesc.
288  *
289  * See also: &struct page_collection.
290  */
291 struct page_entry {
292     PageDesc *pd;
293     tb_page_addr_t index;
294     bool locked;
295 };
296 
297 /**
298  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
299  * @tree:   Binary search tree (BST) of the pages, with key == page index
300  * @max:    Pointer to the page in @tree with the highest page index
301  *
302  * To avoid deadlock we lock pages in ascending order of page index.
303  * When operating on a set of pages, we need to keep track of them so that
304  * we can lock them in order and also unlock them later. For this we collect
305  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
306  * @tree implementation we use does not provide an O(1) operation to obtain the
307  * highest-ranked element, we use @max to keep track of the inserted page
308  * with the highest index. This is valuable because if a page is not in
309  * the tree and its index is higher than @max's, then we can lock it
310  * without breaking the locking order rule.
311  *
312  * Note on naming: 'struct page_set' would be shorter, but we already have a few
313  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
314  *
315  * See also: page_collection_lock().
316  */
317 struct page_collection {
318     QTree *tree;
319     struct page_entry *max;
320 };
321 
322 typedef int PageForEachNext;
323 #define PAGE_FOR_EACH_TB(start, last, pagedesc, tb, n) \
324     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
325 
326 #ifdef CONFIG_DEBUG_TCG
327 
328 static __thread GHashTable *ht_pages_locked_debug;
329 
330 static void ht_pages_locked_debug_init(void)
331 {
332     if (ht_pages_locked_debug) {
333         return;
334     }
335     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
336 }
337 
338 static bool page_is_locked(const PageDesc *pd)
339 {
340     PageDesc *found;
341 
342     ht_pages_locked_debug_init();
343     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
344     return !!found;
345 }
346 
347 static void page_lock__debug(PageDesc *pd)
348 {
349     ht_pages_locked_debug_init();
350     g_assert(!page_is_locked(pd));
351     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
352 }
353 
354 static void page_unlock__debug(const PageDesc *pd)
355 {
356     bool removed;
357 
358     ht_pages_locked_debug_init();
359     g_assert(page_is_locked(pd));
360     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
361     g_assert(removed);
362 }
363 
364 static void do_assert_page_locked(const PageDesc *pd,
365                                   const char *file, int line)
366 {
367     if (unlikely(!page_is_locked(pd))) {
368         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
369                      pd, file, line);
370         abort();
371     }
372 }
373 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
374 
375 void assert_no_pages_locked(void)
376 {
377     ht_pages_locked_debug_init();
378     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
379 }
380 
381 #else /* !CONFIG_DEBUG_TCG */
382 
383 static inline void page_lock__debug(const PageDesc *pd) { }
384 static inline void page_unlock__debug(const PageDesc *pd) { }
385 static inline void assert_page_locked(const PageDesc *pd) { }
386 
387 #endif /* CONFIG_DEBUG_TCG */
388 
389 static void page_lock(PageDesc *pd)
390 {
391     page_lock__debug(pd);
392     qemu_spin_lock(&pd->lock);
393 }
394 
395 static void page_unlock(PageDesc *pd)
396 {
397     qemu_spin_unlock(&pd->lock);
398     page_unlock__debug(pd);
399 }
400 
401 static inline struct page_entry *
402 page_entry_new(PageDesc *pd, tb_page_addr_t index)
403 {
404     struct page_entry *pe = g_malloc(sizeof(*pe));
405 
406     pe->index = index;
407     pe->pd = pd;
408     pe->locked = false;
409     return pe;
410 }
411 
412 static void page_entry_destroy(gpointer p)
413 {
414     struct page_entry *pe = p;
415 
416     g_assert(pe->locked);
417     page_unlock(pe->pd);
418     g_free(pe);
419 }
420 
421 /* returns false on success */
422 static bool page_entry_trylock(struct page_entry *pe)
423 {
424     bool busy;
425 
426     busy = qemu_spin_trylock(&pe->pd->lock);
427     if (!busy) {
428         g_assert(!pe->locked);
429         pe->locked = true;
430         page_lock__debug(pe->pd);
431     }
432     return busy;
433 }
434 
435 static void do_page_entry_lock(struct page_entry *pe)
436 {
437     page_lock(pe->pd);
438     g_assert(!pe->locked);
439     pe->locked = true;
440 }
441 
442 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
443 {
444     struct page_entry *pe = value;
445 
446     do_page_entry_lock(pe);
447     return FALSE;
448 }
449 
450 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
451 {
452     struct page_entry *pe = value;
453 
454     if (pe->locked) {
455         pe->locked = false;
456         page_unlock(pe->pd);
457     }
458     return FALSE;
459 }
460 
461 /*
462  * Trylock a page, and if successful, add the page to a collection.
463  * Returns true ("busy") if the page could not be locked; false otherwise.
464  */
465 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
466 {
467     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
468     struct page_entry *pe;
469     PageDesc *pd;
470 
471     pe = q_tree_lookup(set->tree, &index);
472     if (pe) {
473         return false;
474     }
475 
476     pd = page_find(index);
477     if (pd == NULL) {
478         return false;
479     }
480 
481     pe = page_entry_new(pd, index);
482     q_tree_insert(set->tree, &pe->index, pe);
483 
484     /*
485      * If this is either (1) the first insertion or (2) a page whose index
486      * is higher than any other so far, just lock the page and move on.
487      */
488     if (set->max == NULL || pe->index > set->max->index) {
489         set->max = pe;
490         do_page_entry_lock(pe);
491         return false;
492     }
493     /*
494      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
495      * locks in order.
496      */
497     return page_entry_trylock(pe);
498 }
499 
500 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
501 {
502     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
503     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
504 
505     if (a == b) {
506         return 0;
507     } else if (a < b) {
508         return -1;
509     }
510     return 1;
511 }
512 
513 /*
514  * Lock a range of pages ([@start,@last]) as well as the pages of all
515  * intersecting TBs.
516  * Locking order: acquire locks in ascending order of page index.
517  */
518 static struct page_collection *page_collection_lock(tb_page_addr_t start,
519                                                     tb_page_addr_t last)
520 {
521     struct page_collection *set = g_malloc(sizeof(*set));
522     tb_page_addr_t index;
523     PageDesc *pd;
524 
525     start >>= TARGET_PAGE_BITS;
526     last >>= TARGET_PAGE_BITS;
527     g_assert(start <= last);
528 
529     set->tree = q_tree_new_full(tb_page_addr_cmp, NULL, NULL,
530                                 page_entry_destroy);
531     set->max = NULL;
532     assert_no_pages_locked();
533 
534  retry:
535     q_tree_foreach(set->tree, page_entry_lock, NULL);
536 
537     for (index = start; index <= last; index++) {
538         TranslationBlock *tb;
539         PageForEachNext n;
540 
541         pd = page_find(index);
542         if (pd == NULL) {
543             continue;
544         }
545         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
546             q_tree_foreach(set->tree, page_entry_unlock, NULL);
547             goto retry;
548         }
549         assert_page_locked(pd);
550         PAGE_FOR_EACH_TB(unused, unused, pd, tb, n) {
551             if (page_trylock_add(set, tb_page_addr0(tb)) ||
552                 (tb_page_addr1(tb) != -1 &&
553                  page_trylock_add(set, tb_page_addr1(tb)))) {
554                 /* drop all locks, and reacquire in order */
555                 q_tree_foreach(set->tree, page_entry_unlock, NULL);
556                 goto retry;
557             }
558         }
559     }
560     return set;
561 }
562 
563 static void page_collection_unlock(struct page_collection *set)
564 {
565     /* entries are unlocked and freed via page_entry_destroy */
566     q_tree_destroy(set->tree);
567     g_free(set);
568 }
569 
570 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
571 static void tb_remove_all_1(int level, void **lp)
572 {
573     int i;
574 
575     if (*lp == NULL) {
576         return;
577     }
578     if (level == 0) {
579         PageDesc *pd = *lp;
580 
581         for (i = 0; i < V_L2_SIZE; ++i) {
582             page_lock(&pd[i]);
583             pd[i].first_tb = (uintptr_t)NULL;
584             page_unlock(&pd[i]);
585         }
586     } else {
587         void **pp = *lp;
588 
589         for (i = 0; i < V_L2_SIZE; ++i) {
590             tb_remove_all_1(level - 1, pp + i);
591         }
592     }
593 }
594 
595 static void tb_remove_all(void)
596 {
597     int i, l1_sz = v_l1_size;
598 
599     for (i = 0; i < l1_sz; i++) {
600         tb_remove_all_1(v_l2_levels, l1_map + i);
601     }
602 }
603 
604 /*
605  * Add the tb in the target page and protect it if necessary.
606  * Called with @p->lock held.
607  */
608 static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
609                                unsigned int n)
610 {
611     bool page_already_protected;
612 
613     assert_page_locked(p);
614 
615     tb->page_next[n] = p->first_tb;
616     page_already_protected = p->first_tb != 0;
617     p->first_tb = (uintptr_t)tb | n;
618 
619     /*
620      * If some code is already present, then the pages are already
621      * protected. So we handle the case where only the first TB is
622      * allocated in a physical page.
623      */
624     if (!page_already_protected) {
625         tlb_protect_code(tb->page_addr[n] & TARGET_PAGE_MASK);
626     }
627 }
628 
629 static void tb_record(TranslationBlock *tb, PageDesc *p1, PageDesc *p2)
630 {
631     tb_page_add(p1, tb, 0);
632     if (unlikely(p2)) {
633         tb_page_add(p2, tb, 1);
634     }
635 }
636 
637 static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
638 {
639     TranslationBlock *tb1;
640     uintptr_t *pprev;
641     PageForEachNext n1;
642 
643     assert_page_locked(pd);
644     pprev = &pd->first_tb;
645     PAGE_FOR_EACH_TB(unused, unused, pd, tb1, n1) {
646         if (tb1 == tb) {
647             *pprev = tb1->page_next[n1];
648             return;
649         }
650         pprev = &tb1->page_next[n1];
651     }
652     g_assert_not_reached();
653 }
654 
655 static void tb_remove(TranslationBlock *tb)
656 {
657     PageDesc *pd;
658 
659     pd = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
660     tb_page_remove(pd, tb);
661     if (unlikely(tb->page_addr[1] != -1)) {
662         pd = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
663         tb_page_remove(pd, tb);
664     }
665 }
666 
667 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
668                            PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc)
669 {
670     PageDesc *p1, *p2;
671     tb_page_addr_t page1;
672     tb_page_addr_t page2;
673 
674     assert_memory_lock();
675     g_assert(phys1 != -1);
676 
677     page1 = phys1 >> TARGET_PAGE_BITS;
678     page2 = phys2 >> TARGET_PAGE_BITS;
679 
680     p1 = page_find_alloc(page1, alloc);
681     if (ret_p1) {
682         *ret_p1 = p1;
683     }
684     if (likely(phys2 == -1)) {
685         page_lock(p1);
686         return;
687     } else if (page1 == page2) {
688         page_lock(p1);
689         if (ret_p2) {
690             *ret_p2 = p1;
691         }
692         return;
693     }
694     p2 = page_find_alloc(page2, alloc);
695     if (ret_p2) {
696         *ret_p2 = p2;
697     }
698     if (page1 < page2) {
699         page_lock(p1);
700         page_lock(p2);
701     } else {
702         page_lock(p2);
703         page_lock(p1);
704     }
705 }
706 
707 /* lock the page(s) of a TB in the correct acquisition order */
708 static void page_lock_tb(const TranslationBlock *tb)
709 {
710     page_lock_pair(NULL, tb_page_addr0(tb), NULL, tb_page_addr1(tb), false);
711 }
712 
713 static void page_unlock_tb(const TranslationBlock *tb)
714 {
715     PageDesc *p1 = page_find(tb_page_addr0(tb) >> TARGET_PAGE_BITS);
716 
717     page_unlock(p1);
718     if (unlikely(tb_page_addr1(tb) != -1)) {
719         PageDesc *p2 = page_find(tb_page_addr1(tb) >> TARGET_PAGE_BITS);
720 
721         if (p2 != p1) {
722             page_unlock(p2);
723         }
724     }
725 }
726 #endif /* CONFIG_USER_ONLY */
727 
728 /* flush all the translation blocks */
729 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
730 {
731     bool did_flush = false;
732 
733     mmap_lock();
734     /* If it is already been done on request of another CPU, just retry. */
735     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
736         goto done;
737     }
738     did_flush = true;
739 
740     CPU_FOREACH(cpu) {
741         tcg_flush_jmp_cache(cpu);
742     }
743 
744     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
745     tb_remove_all();
746 
747     tcg_region_reset_all();
748     /* XXX: flush processor icache at this point if cache flush is expensive */
749     qatomic_inc(&tb_ctx.tb_flush_count);
750 
751 done:
752     mmap_unlock();
753     if (did_flush) {
754         qemu_plugin_flush_cb();
755     }
756 }
757 
758 void tb_flush(CPUState *cpu)
759 {
760     if (tcg_enabled()) {
761         unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
762 
763         if (cpu_in_serial_context(cpu)) {
764             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
765         } else {
766             async_safe_run_on_cpu(cpu, do_tb_flush,
767                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
768         }
769     }
770 }
771 
772 /* remove @orig from its @n_orig-th jump list */
773 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
774 {
775     uintptr_t ptr, ptr_locked;
776     TranslationBlock *dest;
777     TranslationBlock *tb;
778     uintptr_t *pprev;
779     int n;
780 
781     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
782     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
783     dest = (TranslationBlock *)(ptr & ~1);
784     if (dest == NULL) {
785         return;
786     }
787 
788     qemu_spin_lock(&dest->jmp_lock);
789     /*
790      * While acquiring the lock, the jump might have been removed if the
791      * destination TB was invalidated; check again.
792      */
793     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
794     if (ptr_locked != ptr) {
795         qemu_spin_unlock(&dest->jmp_lock);
796         /*
797          * The only possibility is that the jump was unlinked via
798          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
799          * because we set the LSB above.
800          */
801         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
802         return;
803     }
804     /*
805      * We first acquired the lock, and since the destination pointer matches,
806      * we know for sure that @orig is in the jmp list.
807      */
808     pprev = &dest->jmp_list_head;
809     TB_FOR_EACH_JMP(dest, tb, n) {
810         if (tb == orig && n == n_orig) {
811             *pprev = tb->jmp_list_next[n];
812             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
813             qemu_spin_unlock(&dest->jmp_lock);
814             return;
815         }
816         pprev = &tb->jmp_list_next[n];
817     }
818     g_assert_not_reached();
819 }
820 
821 /*
822  * Reset the jump entry 'n' of a TB so that it is not chained to another TB.
823  */
824 void tb_reset_jump(TranslationBlock *tb, int n)
825 {
826     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
827     tb_set_jmp_target(tb, n, addr);
828 }
829 
830 /* remove any jumps to the TB */
831 static inline void tb_jmp_unlink(TranslationBlock *dest)
832 {
833     TranslationBlock *tb;
834     int n;
835 
836     qemu_spin_lock(&dest->jmp_lock);
837 
838     TB_FOR_EACH_JMP(dest, tb, n) {
839         tb_reset_jump(tb, n);
840         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
841         /* No need to clear the list entry; setting the dest ptr is enough */
842     }
843     dest->jmp_list_head = (uintptr_t)NULL;
844 
845     qemu_spin_unlock(&dest->jmp_lock);
846 }
847 
848 static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
849 {
850     CPUState *cpu;
851 
852     if (tb_cflags(tb) & CF_PCREL) {
853         /* A TB may be at any virtual address */
854         CPU_FOREACH(cpu) {
855             tcg_flush_jmp_cache(cpu);
856         }
857     } else {
858         uint32_t h = tb_jmp_cache_hash_func(tb->pc);
859 
860         CPU_FOREACH(cpu) {
861             CPUJumpCache *jc = cpu->tb_jmp_cache;
862 
863             if (qatomic_read(&jc->array[h].tb) == tb) {
864                 qatomic_set(&jc->array[h].tb, NULL);
865             }
866         }
867     }
868 }
869 
870 /*
871  * In user-mode, call with mmap_lock held.
872  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
873  * locks held.
874  */
875 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
876 {
877     uint32_t h;
878     tb_page_addr_t phys_pc;
879     uint32_t orig_cflags = tb_cflags(tb);
880 
881     assert_memory_lock();
882 
883     /* make sure no further incoming jumps will be chained to this TB */
884     qemu_spin_lock(&tb->jmp_lock);
885     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
886     qemu_spin_unlock(&tb->jmp_lock);
887 
888     /* remove the TB from the hash list */
889     phys_pc = tb_page_addr0(tb);
890     h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
891                      tb->flags, orig_cflags, tb->trace_vcpu_dstate);
892     if (!qht_remove(&tb_ctx.htable, tb, h)) {
893         return;
894     }
895 
896     /* remove the TB from the page list */
897     if (rm_from_page_list) {
898         tb_remove(tb);
899     }
900 
901     /* remove the TB from the hash list */
902     tb_jmp_cache_inval_tb(tb);
903 
904     /* suppress this TB from the two jump lists */
905     tb_remove_from_jmp_list(tb, 0);
906     tb_remove_from_jmp_list(tb, 1);
907 
908     /* suppress any remaining jumps to this TB */
909     tb_jmp_unlink(tb);
910 
911     qatomic_set(&tb_ctx.tb_phys_invalidate_count,
912                 tb_ctx.tb_phys_invalidate_count + 1);
913 }
914 
915 static void tb_phys_invalidate__locked(TranslationBlock *tb)
916 {
917     qemu_thread_jit_write();
918     do_tb_phys_invalidate(tb, true);
919     qemu_thread_jit_execute();
920 }
921 
922 /*
923  * Invalidate one TB.
924  * Called with mmap_lock held in user-mode.
925  */
926 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
927 {
928     if (page_addr == -1 && tb_page_addr0(tb) != -1) {
929         page_lock_tb(tb);
930         do_tb_phys_invalidate(tb, true);
931         page_unlock_tb(tb);
932     } else {
933         do_tb_phys_invalidate(tb, false);
934     }
935 }
936 
937 /*
938  * Add a new TB and link it to the physical page tables. phys_page2 is
939  * (-1) to indicate that only one page contains the TB.
940  *
941  * Called with mmap_lock held for user-mode emulation.
942  *
943  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
944  * Note that in !user-mode, another thread might have already added a TB
945  * for the same block of guest code that @tb corresponds to. In that case,
946  * the caller should discard the original @tb, and use instead the returned TB.
947  */
948 TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
949                                tb_page_addr_t phys_page2)
950 {
951     PageDesc *p;
952     PageDesc *p2 = NULL;
953     void *existing_tb = NULL;
954     uint32_t h;
955 
956     assert_memory_lock();
957     tcg_debug_assert(!(tb->cflags & CF_INVALID));
958 
959     /*
960      * Add the TB to the page list, acquiring first the pages's locks.
961      * We keep the locks held until after inserting the TB in the hash table,
962      * so that if the insertion fails we know for sure that the TBs are still
963      * in the page descriptors.
964      * Note that inserting into the hash table first isn't an option, since
965      * we can only insert TBs that are fully initialized.
966      */
967     page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
968     tb_record(tb, p, p2);
969 
970     /* add in the hash table */
971     h = tb_hash_func(phys_pc, (tb->cflags & CF_PCREL ? 0 : tb->pc),
972                      tb->flags, tb->cflags, tb->trace_vcpu_dstate);
973     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
974 
975     /* remove TB from the page(s) if we couldn't insert it */
976     if (unlikely(existing_tb)) {
977         tb_remove(tb);
978         tb = existing_tb;
979     }
980 
981     if (p2 && p2 != p) {
982         page_unlock(p2);
983     }
984     page_unlock(p);
985     return tb;
986 }
987 
988 #ifdef CONFIG_USER_ONLY
989 /*
990  * Invalidate all TBs which intersect with the target address range.
991  * Called with mmap_lock held for user-mode emulation.
992  * NOTE: this function must not be called while a TB is running.
993  */
994 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
995 {
996     TranslationBlock *tb;
997     PageForEachNext n;
998 
999     assert_memory_lock();
1000 
1001     PAGE_FOR_EACH_TB(start, last, unused, tb, n) {
1002         tb_phys_invalidate__locked(tb);
1003     }
1004 }
1005 
1006 /*
1007  * Invalidate all TBs which intersect with the target address page @addr.
1008  * Called with mmap_lock held for user-mode emulation
1009  * NOTE: this function must not be called while a TB is running.
1010  */
1011 void tb_invalidate_phys_page(tb_page_addr_t addr)
1012 {
1013     tb_page_addr_t start, last;
1014 
1015     start = addr & TARGET_PAGE_MASK;
1016     last = addr | ~TARGET_PAGE_MASK;
1017     tb_invalidate_phys_range(start, last);
1018 }
1019 
1020 /*
1021  * Called with mmap_lock held. If pc is not 0 then it indicates the
1022  * host PC of the faulting store instruction that caused this invalidate.
1023  * Returns true if the caller needs to abort execution of the current
1024  * TB (because it was modified by this store and the guest CPU has
1025  * precise-SMC semantics).
1026  */
1027 bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
1028 {
1029     TranslationBlock *current_tb;
1030     bool current_tb_modified;
1031     TranslationBlock *tb;
1032     PageForEachNext n;
1033     tb_page_addr_t last;
1034 
1035     /*
1036      * Without precise smc semantics, or when outside of a TB,
1037      * we can skip to invalidate.
1038      */
1039 #ifndef TARGET_HAS_PRECISE_SMC
1040     pc = 0;
1041 #endif
1042     if (!pc) {
1043         tb_invalidate_phys_page(addr);
1044         return false;
1045     }
1046 
1047     assert_memory_lock();
1048     current_tb = tcg_tb_lookup(pc);
1049 
1050     last = addr | ~TARGET_PAGE_MASK;
1051     addr &= TARGET_PAGE_MASK;
1052     current_tb_modified = false;
1053 
1054     PAGE_FOR_EACH_TB(addr, last, unused, tb, n) {
1055         if (current_tb == tb &&
1056             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1057             /*
1058              * If we are modifying the current TB, we must stop its
1059              * execution. We could be more precise by checking that
1060              * the modification is after the current PC, but it would
1061              * require a specialized function to partially restore
1062              * the CPU state.
1063              */
1064             current_tb_modified = true;
1065             cpu_restore_state_from_tb(current_cpu, current_tb, pc);
1066         }
1067         tb_phys_invalidate__locked(tb);
1068     }
1069 
1070     if (current_tb_modified) {
1071         /* Force execution of one insn next time.  */
1072         CPUState *cpu = current_cpu;
1073         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1074         return true;
1075     }
1076     return false;
1077 }
1078 #else
1079 /*
1080  * @p must be non-NULL.
1081  * Call with all @pages locked.
1082  */
1083 static void
1084 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1085                                       PageDesc *p, tb_page_addr_t start,
1086                                       tb_page_addr_t last,
1087                                       uintptr_t retaddr)
1088 {
1089     TranslationBlock *tb;
1090     PageForEachNext n;
1091 #ifdef TARGET_HAS_PRECISE_SMC
1092     bool current_tb_modified = false;
1093     TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
1094 #endif /* TARGET_HAS_PRECISE_SMC */
1095 
1096     /*
1097      * We remove all the TBs in the range [start, last].
1098      * XXX: see if in some cases it could be faster to invalidate all the code
1099      */
1100     PAGE_FOR_EACH_TB(start, last, p, tb, n) {
1101         tb_page_addr_t tb_start, tb_last;
1102 
1103         /* NOTE: this is subtle as a TB may span two physical pages */
1104         tb_start = tb_page_addr0(tb);
1105         tb_last = tb_start + tb->size - 1;
1106         if (n == 0) {
1107             tb_last = MIN(tb_last, tb_start | ~TARGET_PAGE_MASK);
1108         } else {
1109             tb_start = tb_page_addr1(tb);
1110             tb_last = tb_start + (tb_last & ~TARGET_PAGE_MASK);
1111         }
1112         if (!(tb_last < start || tb_start > last)) {
1113 #ifdef TARGET_HAS_PRECISE_SMC
1114             if (current_tb == tb &&
1115                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1116                 /*
1117                  * If we are modifying the current TB, we must stop
1118                  * its execution. We could be more precise by checking
1119                  * that the modification is after the current PC, but it
1120                  * would require a specialized function to partially
1121                  * restore the CPU state.
1122                  */
1123                 current_tb_modified = true;
1124                 cpu_restore_state_from_tb(current_cpu, current_tb, retaddr);
1125             }
1126 #endif /* TARGET_HAS_PRECISE_SMC */
1127             tb_phys_invalidate__locked(tb);
1128         }
1129     }
1130 
1131     /* if no code remaining, no need to continue to use slow writes */
1132     if (!p->first_tb) {
1133         tlb_unprotect_code(start);
1134     }
1135 
1136 #ifdef TARGET_HAS_PRECISE_SMC
1137     if (current_tb_modified) {
1138         page_collection_unlock(pages);
1139         /* Force execution of one insn next time.  */
1140         current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1141         mmap_unlock();
1142         cpu_loop_exit_noexc(current_cpu);
1143     }
1144 #endif
1145 }
1146 
1147 /*
1148  * Invalidate all TBs which intersect with the target physical
1149  * address page @addr.
1150  */
1151 void tb_invalidate_phys_page(tb_page_addr_t addr)
1152 {
1153     struct page_collection *pages;
1154     tb_page_addr_t start, last;
1155     PageDesc *p;
1156 
1157     p = page_find(addr >> TARGET_PAGE_BITS);
1158     if (p == NULL) {
1159         return;
1160     }
1161 
1162     start = addr & TARGET_PAGE_MASK;
1163     last = addr | ~TARGET_PAGE_MASK;
1164     pages = page_collection_lock(start, last);
1165     tb_invalidate_phys_page_range__locked(pages, p, start, last, 0);
1166     page_collection_unlock(pages);
1167 }
1168 
1169 /*
1170  * Invalidate all TBs which intersect with the target physical address range
1171  * [start;last]. NOTE: start and end may refer to *different* physical pages.
1172  * 'is_cpu_write_access' should be true if called from a real cpu write
1173  * access: the virtual CPU will exit the current TB if code is modified inside
1174  * this TB.
1175  */
1176 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1177 {
1178     struct page_collection *pages;
1179     tb_page_addr_t index, index_last;
1180 
1181     pages = page_collection_lock(start, last);
1182 
1183     index_last = last >> TARGET_PAGE_BITS;
1184     for (index = start >> TARGET_PAGE_BITS; index <= index_last; index++) {
1185         PageDesc *pd = page_find(index);
1186         tb_page_addr_t bound;
1187 
1188         if (pd == NULL) {
1189             continue;
1190         }
1191         assert_page_locked(pd);
1192         bound = (index << TARGET_PAGE_BITS) | ~TARGET_PAGE_MASK;
1193         bound = MIN(bound, last);
1194         tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
1195     }
1196     page_collection_unlock(pages);
1197 }
1198 
1199 /*
1200  * Call with all @pages in the range [@start, @start + len[ locked.
1201  */
1202 static void tb_invalidate_phys_page_fast__locked(struct page_collection *pages,
1203                                                  tb_page_addr_t start,
1204                                                  unsigned len, uintptr_t ra)
1205 {
1206     PageDesc *p;
1207 
1208     p = page_find(start >> TARGET_PAGE_BITS);
1209     if (!p) {
1210         return;
1211     }
1212 
1213     assert_page_locked(p);
1214     tb_invalidate_phys_page_range__locked(pages, p, start, start + len - 1, ra);
1215 }
1216 
1217 /*
1218  * len must be <= 8 and start must be a multiple of len.
1219  * Called via softmmu_template.h when code areas are written to with
1220  * iothread mutex not held.
1221  */
1222 void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
1223                                    unsigned size,
1224                                    uintptr_t retaddr)
1225 {
1226     struct page_collection *pages;
1227 
1228     pages = page_collection_lock(ram_addr, ram_addr + size - 1);
1229     tb_invalidate_phys_page_fast__locked(pages, ram_addr, size, retaddr);
1230     page_collection_unlock(pages);
1231 }
1232 
1233 #endif /* CONFIG_USER_ONLY */
1234