xref: /openbmc/qemu/accel/tcg/tb-maint.c (revision 197a137290103993b33f93c90e788ab4984f103a)
1 /*
2  * Translation Block Maintaince
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/interval-tree.h"
22 #include "exec/cputlb.h"
23 #include "exec/log.h"
24 #include "exec/exec-all.h"
25 #include "exec/tb-flush.h"
26 #include "exec/translate-all.h"
27 #include "sysemu/tcg.h"
28 #include "tcg/tcg.h"
29 #include "tb-hash.h"
30 #include "tb-context.h"
31 #include "internal.h"
32 
33 
34 /* List iterators for lists of tagged pointers in TranslationBlock. */
35 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
36     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
37          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
38              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
39 
40 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
41     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
42 
43 static bool tb_cmp(const void *ap, const void *bp)
44 {
45     const TranslationBlock *a = ap;
46     const TranslationBlock *b = bp;
47 
48     return ((tb_cflags(a) & CF_PCREL || a->pc == b->pc) &&
49             a->cs_base == b->cs_base &&
50             a->flags == b->flags &&
51             (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
52             a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
53             tb_page_addr0(a) == tb_page_addr0(b) &&
54             tb_page_addr1(a) == tb_page_addr1(b));
55 }
56 
57 void tb_htable_init(void)
58 {
59     unsigned int mode = QHT_MODE_AUTO_RESIZE;
60 
61     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
62 }
63 
64 typedef struct PageDesc PageDesc;
65 
66 #ifdef CONFIG_USER_ONLY
67 
68 /*
69  * In user-mode page locks aren't used; mmap_lock is enough.
70  */
71 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
72 
73 static inline void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
74                                   PageDesc **ret_p2, tb_page_addr_t phys2,
75                                   bool alloc)
76 {
77     *ret_p1 = NULL;
78     *ret_p2 = NULL;
79 }
80 
81 static inline void page_unlock(PageDesc *pd) { }
82 static inline void page_lock_tb(const TranslationBlock *tb) { }
83 static inline void page_unlock_tb(const TranslationBlock *tb) { }
84 
85 /*
86  * For user-only, since we are protecting all of memory with a single lock,
87  * and because the two pages of a TranslationBlock are always contiguous,
88  * use a single data structure to record all TranslationBlocks.
89  */
90 static IntervalTreeRoot tb_root;
91 
92 static void tb_remove_all(void)
93 {
94     assert_memory_lock();
95     memset(&tb_root, 0, sizeof(tb_root));
96 }
97 
98 /* Call with mmap_lock held. */
99 static void tb_record(TranslationBlock *tb, PageDesc *p1, PageDesc *p2)
100 {
101     target_ulong addr;
102     int flags;
103 
104     assert_memory_lock();
105     tb->itree.last = tb->itree.start + tb->size - 1;
106 
107     /* translator_loop() must have made all TB pages non-writable */
108     addr = tb_page_addr0(tb);
109     flags = page_get_flags(addr);
110     assert(!(flags & PAGE_WRITE));
111 
112     addr = tb_page_addr1(tb);
113     if (addr != -1) {
114         flags = page_get_flags(addr);
115         assert(!(flags & PAGE_WRITE));
116     }
117 
118     interval_tree_insert(&tb->itree, &tb_root);
119 }
120 
121 /* Call with mmap_lock held. */
122 static void tb_remove(TranslationBlock *tb)
123 {
124     assert_memory_lock();
125     interval_tree_remove(&tb->itree, &tb_root);
126 }
127 
128 /* TODO: For now, still shared with translate-all.c for system mode. */
129 #define PAGE_FOR_EACH_TB(start, end, pagedesc, T, N)    \
130     for (T = foreach_tb_first(start, end),              \
131          N = foreach_tb_next(T, start, end);            \
132          T != NULL;                                     \
133          T = N, N = foreach_tb_next(N, start, end))
134 
135 typedef TranslationBlock *PageForEachNext;
136 
137 static PageForEachNext foreach_tb_first(tb_page_addr_t start,
138                                         tb_page_addr_t end)
139 {
140     IntervalTreeNode *n = interval_tree_iter_first(&tb_root, start, end - 1);
141     return n ? container_of(n, TranslationBlock, itree) : NULL;
142 }
143 
144 static PageForEachNext foreach_tb_next(PageForEachNext tb,
145                                        tb_page_addr_t start,
146                                        tb_page_addr_t end)
147 {
148     IntervalTreeNode *n;
149 
150     if (tb) {
151         n = interval_tree_iter_next(&tb->itree, start, end - 1);
152         if (n) {
153             return container_of(n, TranslationBlock, itree);
154         }
155     }
156     return NULL;
157 }
158 
159 #else
160 /*
161  * In system mode we want L1_MAP to be based on ram offsets.
162  */
163 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
164 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
165 #else
166 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
167 #endif
168 
169 /* Size of the L2 (and L3, etc) page tables.  */
170 #define V_L2_BITS 10
171 #define V_L2_SIZE (1 << V_L2_BITS)
172 
173 /*
174  * L1 Mapping properties
175  */
176 static int v_l1_size;
177 static int v_l1_shift;
178 static int v_l2_levels;
179 
180 /*
181  * The bottom level has pointers to PageDesc, and is indexed by
182  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
183  */
184 #define V_L1_MIN_BITS 4
185 #define V_L1_MAX_BITS (V_L2_BITS + 3)
186 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
187 
188 static void *l1_map[V_L1_MAX_SIZE];
189 
190 struct PageDesc {
191     QemuSpin lock;
192     /* list of TBs intersecting this ram page */
193     uintptr_t first_tb;
194 };
195 
196 void page_table_config_init(void)
197 {
198     uint32_t v_l1_bits;
199 
200     assert(TARGET_PAGE_BITS);
201     /* The bits remaining after N lower levels of page tables.  */
202     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
203     if (v_l1_bits < V_L1_MIN_BITS) {
204         v_l1_bits += V_L2_BITS;
205     }
206 
207     v_l1_size = 1 << v_l1_bits;
208     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
209     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
210 
211     assert(v_l1_bits <= V_L1_MAX_BITS);
212     assert(v_l1_shift % V_L2_BITS == 0);
213     assert(v_l2_levels >= 0);
214 }
215 
216 static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
217 {
218     PageDesc *pd;
219     void **lp;
220     int i;
221 
222     /* Level 1.  Always allocated.  */
223     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
224 
225     /* Level 2..N-1.  */
226     for (i = v_l2_levels; i > 0; i--) {
227         void **p = qatomic_rcu_read(lp);
228 
229         if (p == NULL) {
230             void *existing;
231 
232             if (!alloc) {
233                 return NULL;
234             }
235             p = g_new0(void *, V_L2_SIZE);
236             existing = qatomic_cmpxchg(lp, NULL, p);
237             if (unlikely(existing)) {
238                 g_free(p);
239                 p = existing;
240             }
241         }
242 
243         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
244     }
245 
246     pd = qatomic_rcu_read(lp);
247     if (pd == NULL) {
248         void *existing;
249 
250         if (!alloc) {
251             return NULL;
252         }
253 
254         pd = g_new0(PageDesc, V_L2_SIZE);
255         for (int i = 0; i < V_L2_SIZE; i++) {
256             qemu_spin_init(&pd[i].lock);
257         }
258 
259         existing = qatomic_cmpxchg(lp, NULL, pd);
260         if (unlikely(existing)) {
261             for (int i = 0; i < V_L2_SIZE; i++) {
262                 qemu_spin_destroy(&pd[i].lock);
263             }
264             g_free(pd);
265             pd = existing;
266         }
267     }
268 
269     return pd + (index & (V_L2_SIZE - 1));
270 }
271 
272 static inline PageDesc *page_find(tb_page_addr_t index)
273 {
274     return page_find_alloc(index, false);
275 }
276 
277 /**
278  * struct page_entry - page descriptor entry
279  * @pd:     pointer to the &struct PageDesc of the page this entry represents
280  * @index:  page index of the page
281  * @locked: whether the page is locked
282  *
283  * This struct helps us keep track of the locked state of a page, without
284  * bloating &struct PageDesc.
285  *
286  * A page lock protects accesses to all fields of &struct PageDesc.
287  *
288  * See also: &struct page_collection.
289  */
290 struct page_entry {
291     PageDesc *pd;
292     tb_page_addr_t index;
293     bool locked;
294 };
295 
296 /**
297  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
298  * @tree:   Binary search tree (BST) of the pages, with key == page index
299  * @max:    Pointer to the page in @tree with the highest page index
300  *
301  * To avoid deadlock we lock pages in ascending order of page index.
302  * When operating on a set of pages, we need to keep track of them so that
303  * we can lock them in order and also unlock them later. For this we collect
304  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
305  * @tree implementation we use does not provide an O(1) operation to obtain the
306  * highest-ranked element, we use @max to keep track of the inserted page
307  * with the highest index. This is valuable because if a page is not in
308  * the tree and its index is higher than @max's, then we can lock it
309  * without breaking the locking order rule.
310  *
311  * Note on naming: 'struct page_set' would be shorter, but we already have a few
312  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
313  *
314  * See also: page_collection_lock().
315  */
316 struct page_collection {
317     GTree *tree;
318     struct page_entry *max;
319 };
320 
321 typedef int PageForEachNext;
322 #define PAGE_FOR_EACH_TB(start, end, pagedesc, tb, n) \
323     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
324 
325 #ifdef CONFIG_DEBUG_TCG
326 
327 static __thread GHashTable *ht_pages_locked_debug;
328 
329 static void ht_pages_locked_debug_init(void)
330 {
331     if (ht_pages_locked_debug) {
332         return;
333     }
334     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
335 }
336 
337 static bool page_is_locked(const PageDesc *pd)
338 {
339     PageDesc *found;
340 
341     ht_pages_locked_debug_init();
342     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
343     return !!found;
344 }
345 
346 static void page_lock__debug(PageDesc *pd)
347 {
348     ht_pages_locked_debug_init();
349     g_assert(!page_is_locked(pd));
350     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
351 }
352 
353 static void page_unlock__debug(const PageDesc *pd)
354 {
355     bool removed;
356 
357     ht_pages_locked_debug_init();
358     g_assert(page_is_locked(pd));
359     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
360     g_assert(removed);
361 }
362 
363 static void do_assert_page_locked(const PageDesc *pd,
364                                   const char *file, int line)
365 {
366     if (unlikely(!page_is_locked(pd))) {
367         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
368                      pd, file, line);
369         abort();
370     }
371 }
372 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
373 
374 void assert_no_pages_locked(void)
375 {
376     ht_pages_locked_debug_init();
377     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
378 }
379 
380 #else /* !CONFIG_DEBUG_TCG */
381 
382 static inline void page_lock__debug(const PageDesc *pd) { }
383 static inline void page_unlock__debug(const PageDesc *pd) { }
384 static inline void assert_page_locked(const PageDesc *pd) { }
385 
386 #endif /* CONFIG_DEBUG_TCG */
387 
388 static void page_lock(PageDesc *pd)
389 {
390     page_lock__debug(pd);
391     qemu_spin_lock(&pd->lock);
392 }
393 
394 static void page_unlock(PageDesc *pd)
395 {
396     qemu_spin_unlock(&pd->lock);
397     page_unlock__debug(pd);
398 }
399 
400 static inline struct page_entry *
401 page_entry_new(PageDesc *pd, tb_page_addr_t index)
402 {
403     struct page_entry *pe = g_malloc(sizeof(*pe));
404 
405     pe->index = index;
406     pe->pd = pd;
407     pe->locked = false;
408     return pe;
409 }
410 
411 static void page_entry_destroy(gpointer p)
412 {
413     struct page_entry *pe = p;
414 
415     g_assert(pe->locked);
416     page_unlock(pe->pd);
417     g_free(pe);
418 }
419 
420 /* returns false on success */
421 static bool page_entry_trylock(struct page_entry *pe)
422 {
423     bool busy;
424 
425     busy = qemu_spin_trylock(&pe->pd->lock);
426     if (!busy) {
427         g_assert(!pe->locked);
428         pe->locked = true;
429         page_lock__debug(pe->pd);
430     }
431     return busy;
432 }
433 
434 static void do_page_entry_lock(struct page_entry *pe)
435 {
436     page_lock(pe->pd);
437     g_assert(!pe->locked);
438     pe->locked = true;
439 }
440 
441 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
442 {
443     struct page_entry *pe = value;
444 
445     do_page_entry_lock(pe);
446     return FALSE;
447 }
448 
449 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
450 {
451     struct page_entry *pe = value;
452 
453     if (pe->locked) {
454         pe->locked = false;
455         page_unlock(pe->pd);
456     }
457     return FALSE;
458 }
459 
460 /*
461  * Trylock a page, and if successful, add the page to a collection.
462  * Returns true ("busy") if the page could not be locked; false otherwise.
463  */
464 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
465 {
466     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
467     struct page_entry *pe;
468     PageDesc *pd;
469 
470     pe = g_tree_lookup(set->tree, &index);
471     if (pe) {
472         return false;
473     }
474 
475     pd = page_find(index);
476     if (pd == NULL) {
477         return false;
478     }
479 
480     pe = page_entry_new(pd, index);
481     g_tree_insert(set->tree, &pe->index, pe);
482 
483     /*
484      * If this is either (1) the first insertion or (2) a page whose index
485      * is higher than any other so far, just lock the page and move on.
486      */
487     if (set->max == NULL || pe->index > set->max->index) {
488         set->max = pe;
489         do_page_entry_lock(pe);
490         return false;
491     }
492     /*
493      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
494      * locks in order.
495      */
496     return page_entry_trylock(pe);
497 }
498 
499 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
500 {
501     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
502     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
503 
504     if (a == b) {
505         return 0;
506     } else if (a < b) {
507         return -1;
508     }
509     return 1;
510 }
511 
512 /*
513  * Lock a range of pages ([@start,@end[) as well as the pages of all
514  * intersecting TBs.
515  * Locking order: acquire locks in ascending order of page index.
516  */
517 static struct page_collection *page_collection_lock(tb_page_addr_t start,
518                                                     tb_page_addr_t end)
519 {
520     struct page_collection *set = g_malloc(sizeof(*set));
521     tb_page_addr_t index;
522     PageDesc *pd;
523 
524     start >>= TARGET_PAGE_BITS;
525     end   >>= TARGET_PAGE_BITS;
526     g_assert(start <= end);
527 
528     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
529                                 page_entry_destroy);
530     set->max = NULL;
531     assert_no_pages_locked();
532 
533  retry:
534     g_tree_foreach(set->tree, page_entry_lock, NULL);
535 
536     for (index = start; index <= end; index++) {
537         TranslationBlock *tb;
538         PageForEachNext n;
539 
540         pd = page_find(index);
541         if (pd == NULL) {
542             continue;
543         }
544         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
545             g_tree_foreach(set->tree, page_entry_unlock, NULL);
546             goto retry;
547         }
548         assert_page_locked(pd);
549         PAGE_FOR_EACH_TB(unused, unused, pd, tb, n) {
550             if (page_trylock_add(set, tb_page_addr0(tb)) ||
551                 (tb_page_addr1(tb) != -1 &&
552                  page_trylock_add(set, tb_page_addr1(tb)))) {
553                 /* drop all locks, and reacquire in order */
554                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
555                 goto retry;
556             }
557         }
558     }
559     return set;
560 }
561 
562 static void page_collection_unlock(struct page_collection *set)
563 {
564     /* entries are unlocked and freed via page_entry_destroy */
565     g_tree_destroy(set->tree);
566     g_free(set);
567 }
568 
569 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
570 static void tb_remove_all_1(int level, void **lp)
571 {
572     int i;
573 
574     if (*lp == NULL) {
575         return;
576     }
577     if (level == 0) {
578         PageDesc *pd = *lp;
579 
580         for (i = 0; i < V_L2_SIZE; ++i) {
581             page_lock(&pd[i]);
582             pd[i].first_tb = (uintptr_t)NULL;
583             page_unlock(&pd[i]);
584         }
585     } else {
586         void **pp = *lp;
587 
588         for (i = 0; i < V_L2_SIZE; ++i) {
589             tb_remove_all_1(level - 1, pp + i);
590         }
591     }
592 }
593 
594 static void tb_remove_all(void)
595 {
596     int i, l1_sz = v_l1_size;
597 
598     for (i = 0; i < l1_sz; i++) {
599         tb_remove_all_1(v_l2_levels, l1_map + i);
600     }
601 }
602 
603 /*
604  * Add the tb in the target page and protect it if necessary.
605  * Called with @p->lock held.
606  */
607 static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
608                                unsigned int n)
609 {
610     bool page_already_protected;
611 
612     assert_page_locked(p);
613 
614     tb->page_next[n] = p->first_tb;
615     page_already_protected = p->first_tb != 0;
616     p->first_tb = (uintptr_t)tb | n;
617 
618     /*
619      * If some code is already present, then the pages are already
620      * protected. So we handle the case where only the first TB is
621      * allocated in a physical page.
622      */
623     if (!page_already_protected) {
624         tlb_protect_code(tb->page_addr[n] & TARGET_PAGE_MASK);
625     }
626 }
627 
628 static void tb_record(TranslationBlock *tb, PageDesc *p1, PageDesc *p2)
629 {
630     tb_page_add(p1, tb, 0);
631     if (unlikely(p2)) {
632         tb_page_add(p2, tb, 1);
633     }
634 }
635 
636 static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
637 {
638     TranslationBlock *tb1;
639     uintptr_t *pprev;
640     PageForEachNext n1;
641 
642     assert_page_locked(pd);
643     pprev = &pd->first_tb;
644     PAGE_FOR_EACH_TB(unused, unused, pd, tb1, n1) {
645         if (tb1 == tb) {
646             *pprev = tb1->page_next[n1];
647             return;
648         }
649         pprev = &tb1->page_next[n1];
650     }
651     g_assert_not_reached();
652 }
653 
654 static void tb_remove(TranslationBlock *tb)
655 {
656     PageDesc *pd;
657 
658     pd = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
659     tb_page_remove(pd, tb);
660     if (unlikely(tb->page_addr[1] != -1)) {
661         pd = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
662         tb_page_remove(pd, tb);
663     }
664 }
665 
666 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
667                            PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc)
668 {
669     PageDesc *p1, *p2;
670     tb_page_addr_t page1;
671     tb_page_addr_t page2;
672 
673     assert_memory_lock();
674     g_assert(phys1 != -1);
675 
676     page1 = phys1 >> TARGET_PAGE_BITS;
677     page2 = phys2 >> TARGET_PAGE_BITS;
678 
679     p1 = page_find_alloc(page1, alloc);
680     if (ret_p1) {
681         *ret_p1 = p1;
682     }
683     if (likely(phys2 == -1)) {
684         page_lock(p1);
685         return;
686     } else if (page1 == page2) {
687         page_lock(p1);
688         if (ret_p2) {
689             *ret_p2 = p1;
690         }
691         return;
692     }
693     p2 = page_find_alloc(page2, alloc);
694     if (ret_p2) {
695         *ret_p2 = p2;
696     }
697     if (page1 < page2) {
698         page_lock(p1);
699         page_lock(p2);
700     } else {
701         page_lock(p2);
702         page_lock(p1);
703     }
704 }
705 
706 /* lock the page(s) of a TB in the correct acquisition order */
707 static void page_lock_tb(const TranslationBlock *tb)
708 {
709     page_lock_pair(NULL, tb_page_addr0(tb), NULL, tb_page_addr1(tb), false);
710 }
711 
712 static void page_unlock_tb(const TranslationBlock *tb)
713 {
714     PageDesc *p1 = page_find(tb_page_addr0(tb) >> TARGET_PAGE_BITS);
715 
716     page_unlock(p1);
717     if (unlikely(tb_page_addr1(tb) != -1)) {
718         PageDesc *p2 = page_find(tb_page_addr1(tb) >> TARGET_PAGE_BITS);
719 
720         if (p2 != p1) {
721             page_unlock(p2);
722         }
723     }
724 }
725 #endif /* CONFIG_USER_ONLY */
726 
727 /* flush all the translation blocks */
728 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
729 {
730     bool did_flush = false;
731 
732     mmap_lock();
733     /* If it is already been done on request of another CPU, just retry. */
734     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
735         goto done;
736     }
737     did_flush = true;
738 
739     CPU_FOREACH(cpu) {
740         tcg_flush_jmp_cache(cpu);
741     }
742 
743     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
744     tb_remove_all();
745 
746     tcg_region_reset_all();
747     /* XXX: flush processor icache at this point if cache flush is expensive */
748     qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
749 
750 done:
751     mmap_unlock();
752     if (did_flush) {
753         qemu_plugin_flush_cb();
754     }
755 }
756 
757 void tb_flush(CPUState *cpu)
758 {
759     if (tcg_enabled()) {
760         unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);
761 
762         if (cpu_in_exclusive_context(cpu)) {
763             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
764         } else {
765             async_safe_run_on_cpu(cpu, do_tb_flush,
766                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
767         }
768     }
769 }
770 
771 /* remove @orig from its @n_orig-th jump list */
772 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
773 {
774     uintptr_t ptr, ptr_locked;
775     TranslationBlock *dest;
776     TranslationBlock *tb;
777     uintptr_t *pprev;
778     int n;
779 
780     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
781     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
782     dest = (TranslationBlock *)(ptr & ~1);
783     if (dest == NULL) {
784         return;
785     }
786 
787     qemu_spin_lock(&dest->jmp_lock);
788     /*
789      * While acquiring the lock, the jump might have been removed if the
790      * destination TB was invalidated; check again.
791      */
792     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
793     if (ptr_locked != ptr) {
794         qemu_spin_unlock(&dest->jmp_lock);
795         /*
796          * The only possibility is that the jump was unlinked via
797          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
798          * because we set the LSB above.
799          */
800         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
801         return;
802     }
803     /*
804      * We first acquired the lock, and since the destination pointer matches,
805      * we know for sure that @orig is in the jmp list.
806      */
807     pprev = &dest->jmp_list_head;
808     TB_FOR_EACH_JMP(dest, tb, n) {
809         if (tb == orig && n == n_orig) {
810             *pprev = tb->jmp_list_next[n];
811             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
812             qemu_spin_unlock(&dest->jmp_lock);
813             return;
814         }
815         pprev = &tb->jmp_list_next[n];
816     }
817     g_assert_not_reached();
818 }
819 
820 /*
821  * Reset the jump entry 'n' of a TB so that it is not chained to another TB.
822  */
823 void tb_reset_jump(TranslationBlock *tb, int n)
824 {
825     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
826     tb_set_jmp_target(tb, n, addr);
827 }
828 
829 /* remove any jumps to the TB */
830 static inline void tb_jmp_unlink(TranslationBlock *dest)
831 {
832     TranslationBlock *tb;
833     int n;
834 
835     qemu_spin_lock(&dest->jmp_lock);
836 
837     TB_FOR_EACH_JMP(dest, tb, n) {
838         tb_reset_jump(tb, n);
839         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
840         /* No need to clear the list entry; setting the dest ptr is enough */
841     }
842     dest->jmp_list_head = (uintptr_t)NULL;
843 
844     qemu_spin_unlock(&dest->jmp_lock);
845 }
846 
847 static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
848 {
849     CPUState *cpu;
850 
851     if (tb_cflags(tb) & CF_PCREL) {
852         /* A TB may be at any virtual address */
853         CPU_FOREACH(cpu) {
854             tcg_flush_jmp_cache(cpu);
855         }
856     } else {
857         uint32_t h = tb_jmp_cache_hash_func(tb->pc);
858 
859         CPU_FOREACH(cpu) {
860             CPUJumpCache *jc = cpu->tb_jmp_cache;
861 
862             if (qatomic_read(&jc->array[h].tb) == tb) {
863                 qatomic_set(&jc->array[h].tb, NULL);
864             }
865         }
866     }
867 }
868 
869 /*
870  * In user-mode, call with mmap_lock held.
871  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
872  * locks held.
873  */
874 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
875 {
876     uint32_t h;
877     tb_page_addr_t phys_pc;
878     uint32_t orig_cflags = tb_cflags(tb);
879 
880     assert_memory_lock();
881 
882     /* make sure no further incoming jumps will be chained to this TB */
883     qemu_spin_lock(&tb->jmp_lock);
884     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
885     qemu_spin_unlock(&tb->jmp_lock);
886 
887     /* remove the TB from the hash list */
888     phys_pc = tb_page_addr0(tb);
889     h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
890                      tb->flags, orig_cflags, tb->trace_vcpu_dstate);
891     if (!qht_remove(&tb_ctx.htable, tb, h)) {
892         return;
893     }
894 
895     /* remove the TB from the page list */
896     if (rm_from_page_list) {
897         tb_remove(tb);
898     }
899 
900     /* remove the TB from the hash list */
901     tb_jmp_cache_inval_tb(tb);
902 
903     /* suppress this TB from the two jump lists */
904     tb_remove_from_jmp_list(tb, 0);
905     tb_remove_from_jmp_list(tb, 1);
906 
907     /* suppress any remaining jumps to this TB */
908     tb_jmp_unlink(tb);
909 
910     qatomic_set(&tb_ctx.tb_phys_invalidate_count,
911                 tb_ctx.tb_phys_invalidate_count + 1);
912 }
913 
914 static void tb_phys_invalidate__locked(TranslationBlock *tb)
915 {
916     qemu_thread_jit_write();
917     do_tb_phys_invalidate(tb, true);
918     qemu_thread_jit_execute();
919 }
920 
921 /*
922  * Invalidate one TB.
923  * Called with mmap_lock held in user-mode.
924  */
925 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
926 {
927     if (page_addr == -1 && tb_page_addr0(tb) != -1) {
928         page_lock_tb(tb);
929         do_tb_phys_invalidate(tb, true);
930         page_unlock_tb(tb);
931     } else {
932         do_tb_phys_invalidate(tb, false);
933     }
934 }
935 
936 /*
937  * Add a new TB and link it to the physical page tables. phys_page2 is
938  * (-1) to indicate that only one page contains the TB.
939  *
940  * Called with mmap_lock held for user-mode emulation.
941  *
942  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
943  * Note that in !user-mode, another thread might have already added a TB
944  * for the same block of guest code that @tb corresponds to. In that case,
945  * the caller should discard the original @tb, and use instead the returned TB.
946  */
947 TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
948                                tb_page_addr_t phys_page2)
949 {
950     PageDesc *p;
951     PageDesc *p2 = NULL;
952     void *existing_tb = NULL;
953     uint32_t h;
954 
955     assert_memory_lock();
956     tcg_debug_assert(!(tb->cflags & CF_INVALID));
957 
958     /*
959      * Add the TB to the page list, acquiring first the pages's locks.
960      * We keep the locks held until after inserting the TB in the hash table,
961      * so that if the insertion fails we know for sure that the TBs are still
962      * in the page descriptors.
963      * Note that inserting into the hash table first isn't an option, since
964      * we can only insert TBs that are fully initialized.
965      */
966     page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
967     tb_record(tb, p, p2);
968 
969     /* add in the hash table */
970     h = tb_hash_func(phys_pc, (tb->cflags & CF_PCREL ? 0 : tb->pc),
971                      tb->flags, tb->cflags, tb->trace_vcpu_dstate);
972     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
973 
974     /* remove TB from the page(s) if we couldn't insert it */
975     if (unlikely(existing_tb)) {
976         tb_remove(tb);
977         tb = existing_tb;
978     }
979 
980     if (p2 && p2 != p) {
981         page_unlock(p2);
982     }
983     page_unlock(p);
984     return tb;
985 }
986 
987 #ifdef CONFIG_USER_ONLY
988 /*
989  * Invalidate all TBs which intersect with the target address range.
990  * Called with mmap_lock held for user-mode emulation.
991  * NOTE: this function must not be called while a TB is running.
992  */
993 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
994 {
995     TranslationBlock *tb;
996     PageForEachNext n;
997 
998     assert_memory_lock();
999 
1000     PAGE_FOR_EACH_TB(start, end, unused, tb, n) {
1001         tb_phys_invalidate__locked(tb);
1002     }
1003 }
1004 
1005 /*
1006  * Invalidate all TBs which intersect with the target address page @addr.
1007  * Called with mmap_lock held for user-mode emulation
1008  * NOTE: this function must not be called while a TB is running.
1009  */
1010 void tb_invalidate_phys_page(tb_page_addr_t addr)
1011 {
1012     tb_page_addr_t start, end;
1013 
1014     start = addr & TARGET_PAGE_MASK;
1015     end = start + TARGET_PAGE_SIZE;
1016     tb_invalidate_phys_range(start, end);
1017 }
1018 
1019 /*
1020  * Called with mmap_lock held. If pc is not 0 then it indicates the
1021  * host PC of the faulting store instruction that caused this invalidate.
1022  * Returns true if the caller needs to abort execution of the current
1023  * TB (because it was modified by this store and the guest CPU has
1024  * precise-SMC semantics).
1025  */
1026 bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
1027 {
1028     TranslationBlock *current_tb;
1029     bool current_tb_modified;
1030     TranslationBlock *tb;
1031     PageForEachNext n;
1032 
1033     /*
1034      * Without precise smc semantics, or when outside of a TB,
1035      * we can skip to invalidate.
1036      */
1037 #ifndef TARGET_HAS_PRECISE_SMC
1038     pc = 0;
1039 #endif
1040     if (!pc) {
1041         tb_invalidate_phys_page(addr);
1042         return false;
1043     }
1044 
1045     assert_memory_lock();
1046     current_tb = tcg_tb_lookup(pc);
1047 
1048     addr &= TARGET_PAGE_MASK;
1049     current_tb_modified = false;
1050 
1051     PAGE_FOR_EACH_TB(addr, addr + TARGET_PAGE_SIZE, unused, tb, n) {
1052         if (current_tb == tb &&
1053             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1054             /*
1055              * If we are modifying the current TB, we must stop its
1056              * execution. We could be more precise by checking that
1057              * the modification is after the current PC, but it would
1058              * require a specialized function to partially restore
1059              * the CPU state.
1060              */
1061             current_tb_modified = true;
1062             cpu_restore_state_from_tb(current_cpu, current_tb, pc);
1063         }
1064         tb_phys_invalidate__locked(tb);
1065     }
1066 
1067     if (current_tb_modified) {
1068         /* Force execution of one insn next time.  */
1069         CPUState *cpu = current_cpu;
1070         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1071         return true;
1072     }
1073     return false;
1074 }
1075 #else
1076 /*
1077  * @p must be non-NULL.
1078  * Call with all @pages locked.
1079  */
1080 static void
1081 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1082                                       PageDesc *p, tb_page_addr_t start,
1083                                       tb_page_addr_t end,
1084                                       uintptr_t retaddr)
1085 {
1086     TranslationBlock *tb;
1087     tb_page_addr_t tb_start, tb_end;
1088     PageForEachNext n;
1089 #ifdef TARGET_HAS_PRECISE_SMC
1090     bool current_tb_modified = false;
1091     TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
1092 #endif /* TARGET_HAS_PRECISE_SMC */
1093 
1094     /*
1095      * We remove all the TBs in the range [start, end[.
1096      * XXX: see if in some cases it could be faster to invalidate all the code
1097      */
1098     PAGE_FOR_EACH_TB(start, end, p, tb, n) {
1099         /* NOTE: this is subtle as a TB may span two physical pages */
1100         if (n == 0) {
1101             /* NOTE: tb_end may be after the end of the page, but
1102                it is not a problem */
1103             tb_start = tb_page_addr0(tb);
1104             tb_end = tb_start + tb->size;
1105         } else {
1106             tb_start = tb_page_addr1(tb);
1107             tb_end = tb_start + ((tb_page_addr0(tb) + tb->size)
1108                                  & ~TARGET_PAGE_MASK);
1109         }
1110         if (!(tb_end <= start || tb_start >= end)) {
1111 #ifdef TARGET_HAS_PRECISE_SMC
1112             if (current_tb == tb &&
1113                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1114                 /*
1115                  * If we are modifying the current TB, we must stop
1116                  * its execution. We could be more precise by checking
1117                  * that the modification is after the current PC, but it
1118                  * would require a specialized function to partially
1119                  * restore the CPU state.
1120                  */
1121                 current_tb_modified = true;
1122                 cpu_restore_state_from_tb(current_cpu, current_tb, retaddr);
1123             }
1124 #endif /* TARGET_HAS_PRECISE_SMC */
1125             tb_phys_invalidate__locked(tb);
1126         }
1127     }
1128 
1129     /* if no code remaining, no need to continue to use slow writes */
1130     if (!p->first_tb) {
1131         tlb_unprotect_code(start);
1132     }
1133 
1134 #ifdef TARGET_HAS_PRECISE_SMC
1135     if (current_tb_modified) {
1136         page_collection_unlock(pages);
1137         /* Force execution of one insn next time.  */
1138         current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1139         mmap_unlock();
1140         cpu_loop_exit_noexc(current_cpu);
1141     }
1142 #endif
1143 }
1144 
1145 /*
1146  * Invalidate all TBs which intersect with the target physical
1147  * address page @addr.
1148  */
1149 void tb_invalidate_phys_page(tb_page_addr_t addr)
1150 {
1151     struct page_collection *pages;
1152     tb_page_addr_t start, end;
1153     PageDesc *p;
1154 
1155     p = page_find(addr >> TARGET_PAGE_BITS);
1156     if (p == NULL) {
1157         return;
1158     }
1159 
1160     start = addr & TARGET_PAGE_MASK;
1161     end = start + TARGET_PAGE_SIZE;
1162     pages = page_collection_lock(start, end);
1163     tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
1164     page_collection_unlock(pages);
1165 }
1166 
1167 /*
1168  * Invalidate all TBs which intersect with the target physical address range
1169  * [start;end[. NOTE: start and end may refer to *different* physical pages.
1170  * 'is_cpu_write_access' should be true if called from a real cpu write
1171  * access: the virtual CPU will exit the current TB if code is modified inside
1172  * this TB.
1173  */
1174 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
1175 {
1176     struct page_collection *pages;
1177     tb_page_addr_t next;
1178 
1179     pages = page_collection_lock(start, end);
1180     for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
1181          start < end;
1182          start = next, next += TARGET_PAGE_SIZE) {
1183         PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
1184         tb_page_addr_t bound = MIN(next, end);
1185 
1186         if (pd == NULL) {
1187             continue;
1188         }
1189         assert_page_locked(pd);
1190         tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
1191     }
1192     page_collection_unlock(pages);
1193 }
1194 
1195 /*
1196  * Call with all @pages in the range [@start, @start + len[ locked.
1197  */
1198 static void tb_invalidate_phys_page_fast__locked(struct page_collection *pages,
1199                                                  tb_page_addr_t start,
1200                                                  unsigned len, uintptr_t ra)
1201 {
1202     PageDesc *p;
1203 
1204     p = page_find(start >> TARGET_PAGE_BITS);
1205     if (!p) {
1206         return;
1207     }
1208 
1209     assert_page_locked(p);
1210     tb_invalidate_phys_page_range__locked(pages, p, start, start + len, ra);
1211 }
1212 
1213 /*
1214  * len must be <= 8 and start must be a multiple of len.
1215  * Called via softmmu_template.h when code areas are written to with
1216  * iothread mutex not held.
1217  */
1218 void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
1219                                    unsigned size,
1220                                    uintptr_t retaddr)
1221 {
1222     struct page_collection *pages;
1223 
1224     pages = page_collection_lock(ram_addr, ram_addr + size);
1225     tb_invalidate_phys_page_fast__locked(pages, ram_addr, size, retaddr);
1226     page_collection_unlock(pages);
1227 }
1228 
1229 #endif /* CONFIG_USER_ONLY */
1230