xref: /openbmc/qemu/accel/tcg/user-exec.c (revision e4751d34)
1 /*
2  *  User emulator execution
3  *
4  *  Copyright (c) 2003-2005 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "hw/core/tcg-cpu-ops.h"
21 #include "disas/disas.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg.h"
24 #include "qemu/bitops.h"
25 #include "qemu/rcu.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translate-all.h"
28 #include "exec/helper-proto.h"
29 #include "qemu/atomic128.h"
30 #include "trace/trace-root.h"
31 #include "tcg/tcg-ldst.h"
32 #include "internal-common.h"
33 #include "internal-target.h"
34 #include "user-retaddr.h"
35 
36 __thread uintptr_t helper_retaddr;
37 
38 //#define DEBUG_SIGNAL
39 
40 /*
41  * Adjust the pc to pass to cpu_restore_state; return the memop type.
42  */
43 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
44 {
45     switch (helper_retaddr) {
46     default:
47         /*
48          * Fault during host memory operation within a helper function.
49          * The helper's host return address, saved here, gives us a
50          * pointer into the generated code that will unwind to the
51          * correct guest pc.
52          */
53         *pc = helper_retaddr;
54         break;
55 
56     case 0:
57         /*
58          * Fault during host memory operation within generated code.
59          * (Or, a unrelated bug within qemu, but we can't tell from here).
60          *
61          * We take the host pc from the signal frame.  However, we cannot
62          * use that value directly.  Within cpu_restore_state_from_tb, we
63          * assume PC comes from GETPC(), as used by the helper functions,
64          * so we adjust the address by -GETPC_ADJ to form an address that
65          * is within the call insn, so that the address does not accidentally
66          * match the beginning of the next guest insn.  However, when the
67          * pc comes from the signal frame it points to the actual faulting
68          * host memory insn and not the return from a call insn.
69          *
70          * Therefore, adjust to compensate for what will be done later
71          * by cpu_restore_state_from_tb.
72          */
73         *pc += GETPC_ADJ;
74         break;
75 
76     case 1:
77         /*
78          * Fault during host read for translation, or loosely, "execution".
79          *
80          * The guest pc is already pointing to the start of the TB for which
81          * code is being generated.  If the guest translator manages the
82          * page crossings correctly, this is exactly the correct address
83          * (and if the translator doesn't handle page boundaries correctly
84          * there's little we can do about that here).  Therefore, do not
85          * trigger the unwinder.
86          */
87         *pc = 0;
88         return MMU_INST_FETCH;
89     }
90 
91     return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
92 }
93 
94 /**
95  * handle_sigsegv_accerr_write:
96  * @cpu: the cpu context
97  * @old_set: the sigset_t from the signal ucontext_t
98  * @host_pc: the host pc, adjusted for the signal
99  * @guest_addr: the guest address of the fault
100  *
101  * Return true if the write fault has been handled, and should be re-tried.
102  *
103  * Note that it is important that we don't call page_unprotect() unless
104  * this is really a "write to nonwritable page" fault, because
105  * page_unprotect() assumes that if it is called for an access to
106  * a page that's writable this means we had two threads racing and
107  * another thread got there first and already made the page writable;
108  * so we will retry the access. If we were to call page_unprotect()
109  * for some other kind of fault that should really be passed to the
110  * guest, we'd end up in an infinite loop of retrying the faulting access.
111  */
112 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
113                                  uintptr_t host_pc, abi_ptr guest_addr)
114 {
115     switch (page_unprotect(guest_addr, host_pc)) {
116     case 0:
117         /*
118          * Fault not caused by a page marked unwritable to protect
119          * cached translations, must be the guest binary's problem.
120          */
121         return false;
122     case 1:
123         /*
124          * Fault caused by protection of cached translation; TBs
125          * invalidated, so resume execution.
126          */
127         return true;
128     case 2:
129         /*
130          * Fault caused by protection of cached translation, and the
131          * currently executing TB was modified and must be exited immediately.
132          */
133         sigprocmask(SIG_SETMASK, old_set, NULL);
134         cpu_loop_exit_noexc(cpu);
135         /* NORETURN */
136     default:
137         g_assert_not_reached();
138     }
139 }
140 
141 typedef struct PageFlagsNode {
142     struct rcu_head rcu;
143     IntervalTreeNode itree;
144     int flags;
145 } PageFlagsNode;
146 
147 static IntervalTreeRoot pageflags_root;
148 
149 static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
150 {
151     IntervalTreeNode *n;
152 
153     n = interval_tree_iter_first(&pageflags_root, start, last);
154     return n ? container_of(n, PageFlagsNode, itree) : NULL;
155 }
156 
157 static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start,
158                                      target_ulong last)
159 {
160     IntervalTreeNode *n;
161 
162     n = interval_tree_iter_next(&p->itree, start, last);
163     return n ? container_of(n, PageFlagsNode, itree) : NULL;
164 }
165 
166 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
167 {
168     IntervalTreeNode *n;
169     int rc = 0;
170 
171     mmap_lock();
172     for (n = interval_tree_iter_first(&pageflags_root, 0, -1);
173          n != NULL;
174          n = interval_tree_iter_next(n, 0, -1)) {
175         PageFlagsNode *p = container_of(n, PageFlagsNode, itree);
176 
177         rc = fn(priv, n->start, n->last + 1, p->flags);
178         if (rc != 0) {
179             break;
180         }
181     }
182     mmap_unlock();
183 
184     return rc;
185 }
186 
187 static int dump_region(void *priv, target_ulong start,
188                        target_ulong end, unsigned long prot)
189 {
190     FILE *f = (FILE *)priv;
191 
192     fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx" "TARGET_FMT_lx" %c%c%c\n",
193             start, end, end - start,
194             ((prot & PAGE_READ) ? 'r' : '-'),
195             ((prot & PAGE_WRITE) ? 'w' : '-'),
196             ((prot & PAGE_EXEC) ? 'x' : '-'));
197     return 0;
198 }
199 
200 /* dump memory mappings */
201 void page_dump(FILE *f)
202 {
203     const int length = sizeof(target_ulong) * 2;
204 
205     fprintf(f, "%-*s %-*s %-*s %s\n",
206             length, "start", length, "end", length, "size", "prot");
207     walk_memory_regions(f, dump_region);
208 }
209 
210 int page_get_flags(target_ulong address)
211 {
212     PageFlagsNode *p = pageflags_find(address, address);
213 
214     /*
215      * See util/interval-tree.c re lockless lookups: no false positives but
216      * there are false negatives.  If we find nothing, retry with the mmap
217      * lock acquired.
218      */
219     if (p) {
220         return p->flags;
221     }
222     if (have_mmap_lock()) {
223         return 0;
224     }
225 
226     mmap_lock();
227     p = pageflags_find(address, address);
228     mmap_unlock();
229     return p ? p->flags : 0;
230 }
231 
232 /* A subroutine of page_set_flags: insert a new node for [start,last]. */
233 static void pageflags_create(target_ulong start, target_ulong last, int flags)
234 {
235     PageFlagsNode *p = g_new(PageFlagsNode, 1);
236 
237     p->itree.start = start;
238     p->itree.last = last;
239     p->flags = flags;
240     interval_tree_insert(&p->itree, &pageflags_root);
241 }
242 
243 /* A subroutine of page_set_flags: remove everything in [start,last]. */
244 static bool pageflags_unset(target_ulong start, target_ulong last)
245 {
246     bool inval_tb = false;
247 
248     while (true) {
249         PageFlagsNode *p = pageflags_find(start, last);
250         target_ulong p_last;
251 
252         if (!p) {
253             break;
254         }
255 
256         if (p->flags & PAGE_EXEC) {
257             inval_tb = true;
258         }
259 
260         interval_tree_remove(&p->itree, &pageflags_root);
261         p_last = p->itree.last;
262 
263         if (p->itree.start < start) {
264             /* Truncate the node from the end, or split out the middle. */
265             p->itree.last = start - 1;
266             interval_tree_insert(&p->itree, &pageflags_root);
267             if (last < p_last) {
268                 pageflags_create(last + 1, p_last, p->flags);
269                 break;
270             }
271         } else if (p_last <= last) {
272             /* Range completely covers node -- remove it. */
273             g_free_rcu(p, rcu);
274         } else {
275             /* Truncate the node from the start. */
276             p->itree.start = last + 1;
277             interval_tree_insert(&p->itree, &pageflags_root);
278             break;
279         }
280     }
281 
282     return inval_tb;
283 }
284 
285 /*
286  * A subroutine of page_set_flags: nothing overlaps [start,last],
287  * but check adjacent mappings and maybe merge into a single range.
288  */
289 static void pageflags_create_merge(target_ulong start, target_ulong last,
290                                    int flags)
291 {
292     PageFlagsNode *next = NULL, *prev = NULL;
293 
294     if (start > 0) {
295         prev = pageflags_find(start - 1, start - 1);
296         if (prev) {
297             if (prev->flags == flags) {
298                 interval_tree_remove(&prev->itree, &pageflags_root);
299             } else {
300                 prev = NULL;
301             }
302         }
303     }
304     if (last + 1 != 0) {
305         next = pageflags_find(last + 1, last + 1);
306         if (next) {
307             if (next->flags == flags) {
308                 interval_tree_remove(&next->itree, &pageflags_root);
309             } else {
310                 next = NULL;
311             }
312         }
313     }
314 
315     if (prev) {
316         if (next) {
317             prev->itree.last = next->itree.last;
318             g_free_rcu(next, rcu);
319         } else {
320             prev->itree.last = last;
321         }
322         interval_tree_insert(&prev->itree, &pageflags_root);
323     } else if (next) {
324         next->itree.start = start;
325         interval_tree_insert(&next->itree, &pageflags_root);
326     } else {
327         pageflags_create(start, last, flags);
328     }
329 }
330 
331 /*
332  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
333  * By default, they are not kept.
334  */
335 #ifndef PAGE_TARGET_STICKY
336 #define PAGE_TARGET_STICKY  0
337 #endif
338 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
339 
340 /* A subroutine of page_set_flags: add flags to [start,last]. */
341 static bool pageflags_set_clear(target_ulong start, target_ulong last,
342                                 int set_flags, int clear_flags)
343 {
344     PageFlagsNode *p;
345     target_ulong p_start, p_last;
346     int p_flags, merge_flags;
347     bool inval_tb = false;
348 
349  restart:
350     p = pageflags_find(start, last);
351     if (!p) {
352         if (set_flags) {
353             pageflags_create_merge(start, last, set_flags);
354         }
355         goto done;
356     }
357 
358     p_start = p->itree.start;
359     p_last = p->itree.last;
360     p_flags = p->flags;
361     /* Using mprotect on a page does not change sticky bits. */
362     merge_flags = (p_flags & ~clear_flags) | set_flags;
363 
364     /*
365      * Need to flush if an overlapping executable region
366      * removes exec, or adds write.
367      */
368     if ((p_flags & PAGE_EXEC)
369         && (!(merge_flags & PAGE_EXEC)
370             || (merge_flags & ~p_flags & PAGE_WRITE))) {
371         inval_tb = true;
372     }
373 
374     /*
375      * If there is an exact range match, update and return without
376      * attempting to merge with adjacent regions.
377      */
378     if (start == p_start && last == p_last) {
379         if (merge_flags) {
380             p->flags = merge_flags;
381         } else {
382             interval_tree_remove(&p->itree, &pageflags_root);
383             g_free_rcu(p, rcu);
384         }
385         goto done;
386     }
387 
388     /*
389      * If sticky bits affect the original mapping, then we must be more
390      * careful about the existing intervals and the separate flags.
391      */
392     if (set_flags != merge_flags) {
393         if (p_start < start) {
394             interval_tree_remove(&p->itree, &pageflags_root);
395             p->itree.last = start - 1;
396             interval_tree_insert(&p->itree, &pageflags_root);
397 
398             if (last < p_last) {
399                 if (merge_flags) {
400                     pageflags_create(start, last, merge_flags);
401                 }
402                 pageflags_create(last + 1, p_last, p_flags);
403             } else {
404                 if (merge_flags) {
405                     pageflags_create(start, p_last, merge_flags);
406                 }
407                 if (p_last < last) {
408                     start = p_last + 1;
409                     goto restart;
410                 }
411             }
412         } else {
413             if (start < p_start && set_flags) {
414                 pageflags_create(start, p_start - 1, set_flags);
415             }
416             if (last < p_last) {
417                 interval_tree_remove(&p->itree, &pageflags_root);
418                 p->itree.start = last + 1;
419                 interval_tree_insert(&p->itree, &pageflags_root);
420                 if (merge_flags) {
421                     pageflags_create(start, last, merge_flags);
422                 }
423             } else {
424                 if (merge_flags) {
425                     p->flags = merge_flags;
426                 } else {
427                     interval_tree_remove(&p->itree, &pageflags_root);
428                     g_free_rcu(p, rcu);
429                 }
430                 if (p_last < last) {
431                     start = p_last + 1;
432                     goto restart;
433                 }
434             }
435         }
436         goto done;
437     }
438 
439     /* If flags are not changing for this range, incorporate it. */
440     if (set_flags == p_flags) {
441         if (start < p_start) {
442             interval_tree_remove(&p->itree, &pageflags_root);
443             p->itree.start = start;
444             interval_tree_insert(&p->itree, &pageflags_root);
445         }
446         if (p_last < last) {
447             start = p_last + 1;
448             goto restart;
449         }
450         goto done;
451     }
452 
453     /* Maybe split out head and/or tail ranges with the original flags. */
454     interval_tree_remove(&p->itree, &pageflags_root);
455     if (p_start < start) {
456         p->itree.last = start - 1;
457         interval_tree_insert(&p->itree, &pageflags_root);
458 
459         if (p_last < last) {
460             goto restart;
461         }
462         if (last < p_last) {
463             pageflags_create(last + 1, p_last, p_flags);
464         }
465     } else if (last < p_last) {
466         p->itree.start = last + 1;
467         interval_tree_insert(&p->itree, &pageflags_root);
468     } else {
469         g_free_rcu(p, rcu);
470         goto restart;
471     }
472     if (set_flags) {
473         pageflags_create(start, last, set_flags);
474     }
475 
476  done:
477     return inval_tb;
478 }
479 
480 /*
481  * Modify the flags of a page and invalidate the code if necessary.
482  * The flag PAGE_WRITE_ORG is positioned automatically depending
483  * on PAGE_WRITE.  The mmap_lock should already be held.
484  */
485 void page_set_flags(target_ulong start, target_ulong last, int flags)
486 {
487     bool reset = false;
488     bool inval_tb = false;
489 
490     /* This function should never be called with addresses outside the
491        guest address space.  If this assert fires, it probably indicates
492        a missing call to h2g_valid.  */
493     assert(start <= last);
494     assert(last <= GUEST_ADDR_MAX);
495     /* Only set PAGE_ANON with new mappings. */
496     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
497     assert_memory_lock();
498 
499     start &= TARGET_PAGE_MASK;
500     last |= ~TARGET_PAGE_MASK;
501 
502     if (!(flags & PAGE_VALID)) {
503         flags = 0;
504     } else {
505         reset = flags & PAGE_RESET;
506         flags &= ~PAGE_RESET;
507         if (flags & PAGE_WRITE) {
508             flags |= PAGE_WRITE_ORG;
509         }
510     }
511 
512     if (!flags || reset) {
513         page_reset_target_data(start, last);
514         inval_tb |= pageflags_unset(start, last);
515     }
516     if (flags) {
517         inval_tb |= pageflags_set_clear(start, last, flags,
518                                         ~(reset ? 0 : PAGE_STICKY));
519     }
520     if (inval_tb) {
521         tb_invalidate_phys_range(start, last);
522     }
523 }
524 
525 bool page_check_range(target_ulong start, target_ulong len, int flags)
526 {
527     target_ulong last;
528     int locked;  /* tri-state: =0: unlocked, +1: global, -1: local */
529     bool ret;
530 
531     if (len == 0) {
532         return true;  /* trivial length */
533     }
534 
535     last = start + len - 1;
536     if (last < start) {
537         return false; /* wrap around */
538     }
539 
540     locked = have_mmap_lock();
541     while (true) {
542         PageFlagsNode *p = pageflags_find(start, last);
543         int missing;
544 
545         if (!p) {
546             if (!locked) {
547                 /*
548                  * Lockless lookups have false negatives.
549                  * Retry with the lock held.
550                  */
551                 mmap_lock();
552                 locked = -1;
553                 p = pageflags_find(start, last);
554             }
555             if (!p) {
556                 ret = false; /* entire region invalid */
557                 break;
558             }
559         }
560         if (start < p->itree.start) {
561             ret = false; /* initial bytes invalid */
562             break;
563         }
564 
565         missing = flags & ~p->flags;
566         if (missing & ~PAGE_WRITE) {
567             ret = false; /* page doesn't match */
568             break;
569         }
570         if (missing & PAGE_WRITE) {
571             if (!(p->flags & PAGE_WRITE_ORG)) {
572                 ret = false; /* page not writable */
573                 break;
574             }
575             /* Asking about writable, but has been protected: undo. */
576             if (!page_unprotect(start, 0)) {
577                 ret = false;
578                 break;
579             }
580             /* TODO: page_unprotect should take a range, not a single page. */
581             if (last - start < TARGET_PAGE_SIZE) {
582                 ret = true; /* ok */
583                 break;
584             }
585             start += TARGET_PAGE_SIZE;
586             continue;
587         }
588 
589         if (last <= p->itree.last) {
590             ret = true; /* ok */
591             break;
592         }
593         start = p->itree.last + 1;
594     }
595 
596     /* Release the lock if acquired locally. */
597     if (locked < 0) {
598         mmap_unlock();
599     }
600     return ret;
601 }
602 
603 bool page_check_range_empty(target_ulong start, target_ulong last)
604 {
605     assert(last >= start);
606     assert_memory_lock();
607     return pageflags_find(start, last) == NULL;
608 }
609 
610 target_ulong page_find_range_empty(target_ulong min, target_ulong max,
611                                    target_ulong len, target_ulong align)
612 {
613     target_ulong len_m1, align_m1;
614 
615     assert(min <= max);
616     assert(max <= GUEST_ADDR_MAX);
617     assert(len != 0);
618     assert(is_power_of_2(align));
619     assert_memory_lock();
620 
621     len_m1 = len - 1;
622     align_m1 = align - 1;
623 
624     /* Iteratively narrow the search region. */
625     while (1) {
626         PageFlagsNode *p;
627 
628         /* Align min and double-check there's enough space remaining. */
629         min = (min + align_m1) & ~align_m1;
630         if (min > max) {
631             return -1;
632         }
633         if (len_m1 > max - min) {
634             return -1;
635         }
636 
637         p = pageflags_find(min, min + len_m1);
638         if (p == NULL) {
639             /* Found! */
640             return min;
641         }
642         if (max <= p->itree.last) {
643             /* Existing allocation fills the remainder of the search region. */
644             return -1;
645         }
646         /* Skip across existing allocation. */
647         min = p->itree.last + 1;
648     }
649 }
650 
651 void page_protect(tb_page_addr_t address)
652 {
653     PageFlagsNode *p;
654     target_ulong start, last;
655     int host_page_size = qemu_real_host_page_size();
656     int prot;
657 
658     assert_memory_lock();
659 
660     if (host_page_size <= TARGET_PAGE_SIZE) {
661         start = address & TARGET_PAGE_MASK;
662         last = start + TARGET_PAGE_SIZE - 1;
663     } else {
664         start = address & -host_page_size;
665         last = start + host_page_size - 1;
666     }
667 
668     p = pageflags_find(start, last);
669     if (!p) {
670         return;
671     }
672     prot = p->flags;
673 
674     if (unlikely(p->itree.last < last)) {
675         /* More than one protection region covers the one host page. */
676         assert(TARGET_PAGE_SIZE < host_page_size);
677         while ((p = pageflags_next(p, start, last)) != NULL) {
678             prot |= p->flags;
679         }
680     }
681 
682     if (prot & PAGE_WRITE) {
683         pageflags_set_clear(start, last, 0, PAGE_WRITE);
684         mprotect(g2h_untagged(start), last - start + 1,
685                  prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE);
686     }
687 }
688 
689 /*
690  * Called from signal handler: invalidate the code and unprotect the
691  * page. Return 0 if the fault was not handled, 1 if it was handled,
692  * and 2 if it was handled but the caller must cause the TB to be
693  * immediately exited. (We can only return 2 if the 'pc' argument is
694  * non-zero.)
695  */
696 int page_unprotect(target_ulong address, uintptr_t pc)
697 {
698     PageFlagsNode *p;
699     bool current_tb_invalidated;
700 
701     /*
702      * Technically this isn't safe inside a signal handler.  However we
703      * know this only ever happens in a synchronous SEGV handler, so in
704      * practice it seems to be ok.
705      */
706     mmap_lock();
707 
708     p = pageflags_find(address, address);
709 
710     /* If this address was not really writable, nothing to do. */
711     if (!p || !(p->flags & PAGE_WRITE_ORG)) {
712         mmap_unlock();
713         return 0;
714     }
715 
716     current_tb_invalidated = false;
717     if (p->flags & PAGE_WRITE) {
718         /*
719          * If the page is actually marked WRITE then assume this is because
720          * this thread raced with another one which got here first and
721          * set the page to PAGE_WRITE and did the TB invalidate for us.
722          */
723 #ifdef TARGET_HAS_PRECISE_SMC
724         TranslationBlock *current_tb = tcg_tb_lookup(pc);
725         if (current_tb) {
726             current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
727         }
728 #endif
729     } else {
730         int host_page_size = qemu_real_host_page_size();
731         target_ulong start, len, i;
732         int prot;
733 
734         if (host_page_size <= TARGET_PAGE_SIZE) {
735             start = address & TARGET_PAGE_MASK;
736             len = TARGET_PAGE_SIZE;
737             prot = p->flags | PAGE_WRITE;
738             pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
739             current_tb_invalidated = tb_invalidate_phys_page_unwind(start, pc);
740         } else {
741             start = address & -host_page_size;
742             len = host_page_size;
743             prot = 0;
744 
745             for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
746                 target_ulong addr = start + i;
747 
748                 p = pageflags_find(addr, addr);
749                 if (p) {
750                     prot |= p->flags;
751                     if (p->flags & PAGE_WRITE_ORG) {
752                         prot |= PAGE_WRITE;
753                         pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1,
754                                             PAGE_WRITE, 0);
755                     }
756                 }
757                 /*
758                  * Since the content will be modified, we must invalidate
759                  * the corresponding translated code.
760                  */
761                 current_tb_invalidated |=
762                     tb_invalidate_phys_page_unwind(addr, pc);
763             }
764         }
765         if (prot & PAGE_EXEC) {
766             prot = (prot & ~PAGE_EXEC) | PAGE_READ;
767         }
768         mprotect((void *)g2h_untagged(start), len, prot & PAGE_BITS);
769     }
770     mmap_unlock();
771 
772     /* If current TB was invalidated return to main loop */
773     return current_tb_invalidated ? 2 : 1;
774 }
775 
776 static int probe_access_internal(CPUArchState *env, vaddr addr,
777                                  int fault_size, MMUAccessType access_type,
778                                  bool nonfault, uintptr_t ra)
779 {
780     int acc_flag;
781     bool maperr;
782 
783     switch (access_type) {
784     case MMU_DATA_STORE:
785         acc_flag = PAGE_WRITE_ORG;
786         break;
787     case MMU_DATA_LOAD:
788         acc_flag = PAGE_READ;
789         break;
790     case MMU_INST_FETCH:
791         acc_flag = PAGE_EXEC;
792         break;
793     default:
794         g_assert_not_reached();
795     }
796 
797     if (guest_addr_valid_untagged(addr)) {
798         int page_flags = page_get_flags(addr);
799         if (page_flags & acc_flag) {
800             if ((acc_flag == PAGE_READ || acc_flag == PAGE_WRITE)
801                 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
802                 return TLB_MMIO;
803             }
804             return 0; /* success */
805         }
806         maperr = !(page_flags & PAGE_VALID);
807     } else {
808         maperr = true;
809     }
810 
811     if (nonfault) {
812         return TLB_INVALID_MASK;
813     }
814 
815     cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
816 }
817 
818 int probe_access_flags(CPUArchState *env, vaddr addr, int size,
819                        MMUAccessType access_type, int mmu_idx,
820                        bool nonfault, void **phost, uintptr_t ra)
821 {
822     int flags;
823 
824     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
825     flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
826     *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr);
827     return flags;
828 }
829 
830 void *probe_access(CPUArchState *env, vaddr addr, int size,
831                    MMUAccessType access_type, int mmu_idx, uintptr_t ra)
832 {
833     int flags;
834 
835     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
836     flags = probe_access_internal(env, addr, size, access_type, false, ra);
837     g_assert((flags & ~TLB_MMIO) == 0);
838 
839     return size ? g2h(env_cpu(env), addr) : NULL;
840 }
841 
842 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
843                                         void **hostp)
844 {
845     int flags;
846 
847     flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
848     g_assert(flags == 0);
849 
850     if (hostp) {
851         *hostp = g2h_untagged(addr);
852     }
853     return addr;
854 }
855 
856 #ifdef TARGET_PAGE_DATA_SIZE
857 /*
858  * Allocate chunks of target data together.  For the only current user,
859  * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
860  * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
861  */
862 #define TPD_PAGES  64
863 #define TBD_MASK   (TARGET_PAGE_MASK * TPD_PAGES)
864 
865 typedef struct TargetPageDataNode {
866     struct rcu_head rcu;
867     IntervalTreeNode itree;
868     char data[] __attribute__((aligned));
869 } TargetPageDataNode;
870 
871 static IntervalTreeRoot targetdata_root;
872 
873 void page_reset_target_data(target_ulong start, target_ulong last)
874 {
875     IntervalTreeNode *n, *next;
876 
877     assert_memory_lock();
878 
879     start &= TARGET_PAGE_MASK;
880     last |= ~TARGET_PAGE_MASK;
881 
882     for (n = interval_tree_iter_first(&targetdata_root, start, last),
883          next = n ? interval_tree_iter_next(n, start, last) : NULL;
884          n != NULL;
885          n = next,
886          next = next ? interval_tree_iter_next(n, start, last) : NULL) {
887         target_ulong n_start, n_last, p_ofs, p_len;
888         TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
889 
890         if (n->start >= start && n->last <= last) {
891             interval_tree_remove(n, &targetdata_root);
892             g_free_rcu(t, rcu);
893             continue;
894         }
895 
896         if (n->start < start) {
897             n_start = start;
898             p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
899         } else {
900             n_start = n->start;
901             p_ofs = 0;
902         }
903         n_last = MIN(last, n->last);
904         p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
905 
906         memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0,
907                p_len * TARGET_PAGE_DATA_SIZE);
908     }
909 }
910 
911 void *page_get_target_data(target_ulong address)
912 {
913     IntervalTreeNode *n;
914     TargetPageDataNode *t;
915     target_ulong page, region, p_ofs;
916 
917     page = address & TARGET_PAGE_MASK;
918     region = address & TBD_MASK;
919 
920     n = interval_tree_iter_first(&targetdata_root, page, page);
921     if (!n) {
922         /*
923          * See util/interval-tree.c re lockless lookups: no false positives
924          * but there are false negatives.  If we find nothing, retry with
925          * the mmap lock acquired.  We also need the lock for the
926          * allocation + insert.
927          */
928         mmap_lock();
929         n = interval_tree_iter_first(&targetdata_root, page, page);
930         if (!n) {
931             t = g_malloc0(sizeof(TargetPageDataNode)
932                           + TPD_PAGES * TARGET_PAGE_DATA_SIZE);
933             n = &t->itree;
934             n->start = region;
935             n->last = region | ~TBD_MASK;
936             interval_tree_insert(n, &targetdata_root);
937         }
938         mmap_unlock();
939     }
940 
941     t = container_of(n, TargetPageDataNode, itree);
942     p_ofs = (page - region) >> TARGET_PAGE_BITS;
943     return t->data + p_ofs * TARGET_PAGE_DATA_SIZE;
944 }
945 #else
946 void page_reset_target_data(target_ulong start, target_ulong last) { }
947 #endif /* TARGET_PAGE_DATA_SIZE */
948 
949 /* The system-mode versions of these helpers are in cputlb.c.  */
950 
951 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr,
952                             MemOp mop, uintptr_t ra, MMUAccessType type)
953 {
954     int a_bits = get_alignment_bits(mop);
955     void *ret;
956 
957     /* Enforce guest required alignment.  */
958     if (unlikely(addr & ((1 << a_bits) - 1))) {
959         cpu_loop_exit_sigbus(cpu, addr, type, ra);
960     }
961 
962     ret = g2h(cpu, addr);
963     set_helper_retaddr(ra);
964     return ret;
965 }
966 
967 #include "ldst_atomicity.c.inc"
968 
969 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
970                           uintptr_t ra, MMUAccessType access_type)
971 {
972     void *haddr;
973     uint8_t ret;
974 
975     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
976     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type);
977     ret = ldub_p(haddr);
978     clear_helper_retaddr();
979     return ret;
980 }
981 
982 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
983                            uintptr_t ra, MMUAccessType access_type)
984 {
985     void *haddr;
986     uint16_t ret;
987     MemOp mop = get_memop(oi);
988 
989     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
990     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
991     ret = load_atom_2(cpu, ra, haddr, mop);
992     clear_helper_retaddr();
993 
994     if (mop & MO_BSWAP) {
995         ret = bswap16(ret);
996     }
997     return ret;
998 }
999 
1000 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1001                            uintptr_t ra, MMUAccessType access_type)
1002 {
1003     void *haddr;
1004     uint32_t ret;
1005     MemOp mop = get_memop(oi);
1006 
1007     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1008     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1009     ret = load_atom_4(cpu, ra, haddr, mop);
1010     clear_helper_retaddr();
1011 
1012     if (mop & MO_BSWAP) {
1013         ret = bswap32(ret);
1014     }
1015     return ret;
1016 }
1017 
1018 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1019                            uintptr_t ra, MMUAccessType access_type)
1020 {
1021     void *haddr;
1022     uint64_t ret;
1023     MemOp mop = get_memop(oi);
1024 
1025     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1026     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1027     ret = load_atom_8(cpu, ra, haddr, mop);
1028     clear_helper_retaddr();
1029 
1030     if (mop & MO_BSWAP) {
1031         ret = bswap64(ret);
1032     }
1033     return ret;
1034 }
1035 
1036 static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr,
1037                           MemOpIdx oi, uintptr_t ra)
1038 {
1039     void *haddr;
1040     Int128 ret;
1041     MemOp mop = get_memop(oi);
1042 
1043     tcg_debug_assert((mop & MO_SIZE) == MO_128);
1044     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1045     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD);
1046     ret = load_atom_16(cpu, ra, haddr, mop);
1047     clear_helper_retaddr();
1048 
1049     if (mop & MO_BSWAP) {
1050         ret = bswap128(ret);
1051     }
1052     return ret;
1053 }
1054 
1055 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
1056                        MemOpIdx oi, uintptr_t ra)
1057 {
1058     void *haddr;
1059 
1060     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1061     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE);
1062     stb_p(haddr, val);
1063     clear_helper_retaddr();
1064 }
1065 
1066 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
1067                        MemOpIdx oi, uintptr_t ra)
1068 {
1069     void *haddr;
1070     MemOp mop = get_memop(oi);
1071 
1072     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1073     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1074 
1075     if (mop & MO_BSWAP) {
1076         val = bswap16(val);
1077     }
1078     store_atom_2(cpu, ra, haddr, mop, val);
1079     clear_helper_retaddr();
1080 }
1081 
1082 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
1083                        MemOpIdx oi, uintptr_t ra)
1084 {
1085     void *haddr;
1086     MemOp mop = get_memop(oi);
1087 
1088     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1089     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1090 
1091     if (mop & MO_BSWAP) {
1092         val = bswap32(val);
1093     }
1094     store_atom_4(cpu, ra, haddr, mop, val);
1095     clear_helper_retaddr();
1096 }
1097 
1098 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
1099                        MemOpIdx oi, uintptr_t ra)
1100 {
1101     void *haddr;
1102     MemOp mop = get_memop(oi);
1103 
1104     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1105     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1106 
1107     if (mop & MO_BSWAP) {
1108         val = bswap64(val);
1109     }
1110     store_atom_8(cpu, ra, haddr, mop, val);
1111     clear_helper_retaddr();
1112 }
1113 
1114 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
1115                         MemOpIdx oi, uintptr_t ra)
1116 {
1117     void *haddr;
1118     MemOpIdx mop = get_memop(oi);
1119 
1120     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1121     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1122 
1123     if (mop & MO_BSWAP) {
1124         val = bswap128(val);
1125     }
1126     store_atom_16(cpu, ra, haddr, mop, val);
1127     clear_helper_retaddr();
1128 }
1129 
1130 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr)
1131 {
1132     uint32_t ret;
1133 
1134     set_helper_retaddr(1);
1135     ret = ldub_p(g2h_untagged(ptr));
1136     clear_helper_retaddr();
1137     return ret;
1138 }
1139 
1140 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr ptr)
1141 {
1142     uint32_t ret;
1143 
1144     set_helper_retaddr(1);
1145     ret = lduw_p(g2h_untagged(ptr));
1146     clear_helper_retaddr();
1147     return ret;
1148 }
1149 
1150 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr ptr)
1151 {
1152     uint32_t ret;
1153 
1154     set_helper_retaddr(1);
1155     ret = ldl_p(g2h_untagged(ptr));
1156     clear_helper_retaddr();
1157     return ret;
1158 }
1159 
1160 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
1161 {
1162     uint64_t ret;
1163 
1164     set_helper_retaddr(1);
1165     ret = ldq_p(g2h_untagged(ptr));
1166     clear_helper_retaddr();
1167     return ret;
1168 }
1169 
1170 uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
1171                          MemOpIdx oi, uintptr_t ra)
1172 {
1173     void *haddr;
1174     uint8_t ret;
1175 
1176     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1177     ret = ldub_p(haddr);
1178     clear_helper_retaddr();
1179     return ret;
1180 }
1181 
1182 uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
1183                           MemOpIdx oi, uintptr_t ra)
1184 {
1185     void *haddr;
1186     uint16_t ret;
1187 
1188     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1189     ret = lduw_p(haddr);
1190     clear_helper_retaddr();
1191     if (get_memop(oi) & MO_BSWAP) {
1192         ret = bswap16(ret);
1193     }
1194     return ret;
1195 }
1196 
1197 uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
1198                           MemOpIdx oi, uintptr_t ra)
1199 {
1200     void *haddr;
1201     uint32_t ret;
1202 
1203     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1204     ret = ldl_p(haddr);
1205     clear_helper_retaddr();
1206     if (get_memop(oi) & MO_BSWAP) {
1207         ret = bswap32(ret);
1208     }
1209     return ret;
1210 }
1211 
1212 uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
1213                           MemOpIdx oi, uintptr_t ra)
1214 {
1215     void *haddr;
1216     uint64_t ret;
1217 
1218     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
1219     ret = ldq_p(haddr);
1220     clear_helper_retaddr();
1221     if (get_memop(oi) & MO_BSWAP) {
1222         ret = bswap64(ret);
1223     }
1224     return ret;
1225 }
1226 
1227 #include "ldst_common.c.inc"
1228 
1229 /*
1230  * Do not allow unaligned operations to proceed.  Return the host address.
1231  */
1232 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1233                                int size, uintptr_t retaddr)
1234 {
1235     MemOp mop = get_memop(oi);
1236     int a_bits = get_alignment_bits(mop);
1237     void *ret;
1238 
1239     /* Enforce guest required alignment.  */
1240     if (unlikely(addr & ((1 << a_bits) - 1))) {
1241         cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr);
1242     }
1243 
1244     /* Enforce qemu required alignment.  */
1245     if (unlikely(addr & (size - 1))) {
1246         cpu_loop_exit_atomic(cpu, retaddr);
1247     }
1248 
1249     ret = g2h(cpu, addr);
1250     set_helper_retaddr(retaddr);
1251     return ret;
1252 }
1253 
1254 #include "atomic_common.c.inc"
1255 
1256 /*
1257  * First set of functions passes in OI and RETADDR.
1258  * This makes them callable from other helpers.
1259  */
1260 
1261 #define ATOMIC_NAME(X) \
1262     glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
1263 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
1264 
1265 #define DATA_SIZE 1
1266 #include "atomic_template.h"
1267 
1268 #define DATA_SIZE 2
1269 #include "atomic_template.h"
1270 
1271 #define DATA_SIZE 4
1272 #include "atomic_template.h"
1273 
1274 #ifdef CONFIG_ATOMIC64
1275 #define DATA_SIZE 8
1276 #include "atomic_template.h"
1277 #endif
1278 
1279 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
1280 #define DATA_SIZE 16
1281 #include "atomic_template.h"
1282 #endif
1283