xref: /openbmc/qemu/accel/tcg/user-exec.c (revision 7cac7aa7040a823c585f1578a38f28e83c8bf3e1)
1 /*
2  *  User emulator execution
3  *
4  *  Copyright (c) 2003-2005 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "hw/core/tcg-cpu-ops.h"
21 #include "disas/disas.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg.h"
24 #include "qemu/bitops.h"
25 #include "qemu/rcu.h"
26 #include "exec/cpu_ldst.h"
27 #include "qemu/main-loop.h"
28 #include "exec/translate-all.h"
29 #include "exec/page-protection.h"
30 #include "exec/helper-proto.h"
31 #include "qemu/atomic128.h"
32 #include "trace.h"
33 #include "tcg/tcg-ldst.h"
34 #include "internal-common.h"
35 #include "internal-target.h"
36 
37 __thread uintptr_t helper_retaddr;
38 
39 //#define DEBUG_SIGNAL
40 
41 void cpu_interrupt(CPUState *cpu, int mask)
42 {
43     g_assert(bql_locked());
44     cpu->interrupt_request |= mask;
45     qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
46 }
47 
48 /*
49  * Adjust the pc to pass to cpu_restore_state; return the memop type.
50  */
51 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
52 {
53     switch (helper_retaddr) {
54     default:
55         /*
56          * Fault during host memory operation within a helper function.
57          * The helper's host return address, saved here, gives us a
58          * pointer into the generated code that will unwind to the
59          * correct guest pc.
60          */
61         *pc = helper_retaddr;
62         break;
63 
64     case 0:
65         /*
66          * Fault during host memory operation within generated code.
67          * (Or, a unrelated bug within qemu, but we can't tell from here).
68          *
69          * We take the host pc from the signal frame.  However, we cannot
70          * use that value directly.  Within cpu_restore_state_from_tb, we
71          * assume PC comes from GETPC(), as used by the helper functions,
72          * so we adjust the address by -GETPC_ADJ to form an address that
73          * is within the call insn, so that the address does not accidentally
74          * match the beginning of the next guest insn.  However, when the
75          * pc comes from the signal frame it points to the actual faulting
76          * host memory insn and not the return from a call insn.
77          *
78          * Therefore, adjust to compensate for what will be done later
79          * by cpu_restore_state_from_tb.
80          */
81         *pc += GETPC_ADJ;
82         break;
83 
84     case 1:
85         /*
86          * Fault during host read for translation, or loosely, "execution".
87          *
88          * The guest pc is already pointing to the start of the TB for which
89          * code is being generated.  If the guest translator manages the
90          * page crossings correctly, this is exactly the correct address
91          * (and if the translator doesn't handle page boundaries correctly
92          * there's little we can do about that here).  Therefore, do not
93          * trigger the unwinder.
94          */
95         *pc = 0;
96         return MMU_INST_FETCH;
97     }
98 
99     return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
100 }
101 
102 /**
103  * handle_sigsegv_accerr_write:
104  * @cpu: the cpu context
105  * @old_set: the sigset_t from the signal ucontext_t
106  * @host_pc: the host pc, adjusted for the signal
107  * @guest_addr: the guest address of the fault
108  *
109  * Return true if the write fault has been handled, and should be re-tried.
110  *
111  * Note that it is important that we don't call page_unprotect() unless
112  * this is really a "write to nonwritable page" fault, because
113  * page_unprotect() assumes that if it is called for an access to
114  * a page that's writable this means we had two threads racing and
115  * another thread got there first and already made the page writable;
116  * so we will retry the access. If we were to call page_unprotect()
117  * for some other kind of fault that should really be passed to the
118  * guest, we'd end up in an infinite loop of retrying the faulting access.
119  */
120 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
121                                  uintptr_t host_pc, abi_ptr guest_addr)
122 {
123     switch (page_unprotect(guest_addr, host_pc)) {
124     case 0:
125         /*
126          * Fault not caused by a page marked unwritable to protect
127          * cached translations, must be the guest binary's problem.
128          */
129         return false;
130     case 1:
131         /*
132          * Fault caused by protection of cached translation; TBs
133          * invalidated, so resume execution.
134          */
135         return true;
136     case 2:
137         /*
138          * Fault caused by protection of cached translation, and the
139          * currently executing TB was modified and must be exited immediately.
140          */
141         sigprocmask(SIG_SETMASK, old_set, NULL);
142         cpu_loop_exit_noexc(cpu);
143         /* NORETURN */
144     default:
145         g_assert_not_reached();
146     }
147 }
148 
149 typedef struct PageFlagsNode {
150     struct rcu_head rcu;
151     IntervalTreeNode itree;
152     int flags;
153 } PageFlagsNode;
154 
155 static IntervalTreeRoot pageflags_root;
156 
157 static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
158 {
159     IntervalTreeNode *n;
160 
161     n = interval_tree_iter_first(&pageflags_root, start, last);
162     return n ? container_of(n, PageFlagsNode, itree) : NULL;
163 }
164 
165 static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start,
166                                      target_ulong last)
167 {
168     IntervalTreeNode *n;
169 
170     n = interval_tree_iter_next(&p->itree, start, last);
171     return n ? container_of(n, PageFlagsNode, itree) : NULL;
172 }
173 
174 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
175 {
176     IntervalTreeNode *n;
177     int rc = 0;
178 
179     mmap_lock();
180     for (n = interval_tree_iter_first(&pageflags_root, 0, -1);
181          n != NULL;
182          n = interval_tree_iter_next(n, 0, -1)) {
183         PageFlagsNode *p = container_of(n, PageFlagsNode, itree);
184 
185         rc = fn(priv, n->start, n->last + 1, p->flags);
186         if (rc != 0) {
187             break;
188         }
189     }
190     mmap_unlock();
191 
192     return rc;
193 }
194 
195 static int dump_region(void *priv, target_ulong start,
196                        target_ulong end, unsigned long prot)
197 {
198     FILE *f = (FILE *)priv;
199 
200     fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx" "TARGET_FMT_lx" %c%c%c\n",
201             start, end, end - start,
202             ((prot & PAGE_READ) ? 'r' : '-'),
203             ((prot & PAGE_WRITE) ? 'w' : '-'),
204             ((prot & PAGE_EXEC) ? 'x' : '-'));
205     return 0;
206 }
207 
208 /* dump memory mappings */
209 void page_dump(FILE *f)
210 {
211     const int length = sizeof(target_ulong) * 2;
212 
213     fprintf(f, "%-*s %-*s %-*s %s\n",
214             length, "start", length, "end", length, "size", "prot");
215     walk_memory_regions(f, dump_region);
216 }
217 
218 int page_get_flags(target_ulong address)
219 {
220     PageFlagsNode *p = pageflags_find(address, address);
221 
222     /*
223      * See util/interval-tree.c re lockless lookups: no false positives but
224      * there are false negatives.  If we find nothing, retry with the mmap
225      * lock acquired.
226      */
227     if (p) {
228         return p->flags;
229     }
230     if (have_mmap_lock()) {
231         return 0;
232     }
233 
234     mmap_lock();
235     p = pageflags_find(address, address);
236     mmap_unlock();
237     return p ? p->flags : 0;
238 }
239 
240 /* A subroutine of page_set_flags: insert a new node for [start,last]. */
241 static void pageflags_create(target_ulong start, target_ulong last, int flags)
242 {
243     PageFlagsNode *p = g_new(PageFlagsNode, 1);
244 
245     p->itree.start = start;
246     p->itree.last = last;
247     p->flags = flags;
248     interval_tree_insert(&p->itree, &pageflags_root);
249 }
250 
251 /* A subroutine of page_set_flags: remove everything in [start,last]. */
252 static bool pageflags_unset(target_ulong start, target_ulong last)
253 {
254     bool inval_tb = false;
255 
256     while (true) {
257         PageFlagsNode *p = pageflags_find(start, last);
258         target_ulong p_last;
259 
260         if (!p) {
261             break;
262         }
263 
264         if (p->flags & PAGE_EXEC) {
265             inval_tb = true;
266         }
267 
268         interval_tree_remove(&p->itree, &pageflags_root);
269         p_last = p->itree.last;
270 
271         if (p->itree.start < start) {
272             /* Truncate the node from the end, or split out the middle. */
273             p->itree.last = start - 1;
274             interval_tree_insert(&p->itree, &pageflags_root);
275             if (last < p_last) {
276                 pageflags_create(last + 1, p_last, p->flags);
277                 break;
278             }
279         } else if (p_last <= last) {
280             /* Range completely covers node -- remove it. */
281             g_free_rcu(p, rcu);
282         } else {
283             /* Truncate the node from the start. */
284             p->itree.start = last + 1;
285             interval_tree_insert(&p->itree, &pageflags_root);
286             break;
287         }
288     }
289 
290     return inval_tb;
291 }
292 
293 /*
294  * A subroutine of page_set_flags: nothing overlaps [start,last],
295  * but check adjacent mappings and maybe merge into a single range.
296  */
297 static void pageflags_create_merge(target_ulong start, target_ulong last,
298                                    int flags)
299 {
300     PageFlagsNode *next = NULL, *prev = NULL;
301 
302     if (start > 0) {
303         prev = pageflags_find(start - 1, start - 1);
304         if (prev) {
305             if (prev->flags == flags) {
306                 interval_tree_remove(&prev->itree, &pageflags_root);
307             } else {
308                 prev = NULL;
309             }
310         }
311     }
312     if (last + 1 != 0) {
313         next = pageflags_find(last + 1, last + 1);
314         if (next) {
315             if (next->flags == flags) {
316                 interval_tree_remove(&next->itree, &pageflags_root);
317             } else {
318                 next = NULL;
319             }
320         }
321     }
322 
323     if (prev) {
324         if (next) {
325             prev->itree.last = next->itree.last;
326             g_free_rcu(next, rcu);
327         } else {
328             prev->itree.last = last;
329         }
330         interval_tree_insert(&prev->itree, &pageflags_root);
331     } else if (next) {
332         next->itree.start = start;
333         interval_tree_insert(&next->itree, &pageflags_root);
334     } else {
335         pageflags_create(start, last, flags);
336     }
337 }
338 
339 /*
340  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
341  * By default, they are not kept.
342  */
343 #ifndef PAGE_TARGET_STICKY
344 #define PAGE_TARGET_STICKY  0
345 #endif
346 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
347 
348 /* A subroutine of page_set_flags: add flags to [start,last]. */
349 static bool pageflags_set_clear(target_ulong start, target_ulong last,
350                                 int set_flags, int clear_flags)
351 {
352     PageFlagsNode *p;
353     target_ulong p_start, p_last;
354     int p_flags, merge_flags;
355     bool inval_tb = false;
356 
357  restart:
358     p = pageflags_find(start, last);
359     if (!p) {
360         if (set_flags) {
361             pageflags_create_merge(start, last, set_flags);
362         }
363         goto done;
364     }
365 
366     p_start = p->itree.start;
367     p_last = p->itree.last;
368     p_flags = p->flags;
369     /* Using mprotect on a page does not change sticky bits. */
370     merge_flags = (p_flags & ~clear_flags) | set_flags;
371 
372     /*
373      * Need to flush if an overlapping executable region
374      * removes exec, or adds write.
375      */
376     if ((p_flags & PAGE_EXEC)
377         && (!(merge_flags & PAGE_EXEC)
378             || (merge_flags & ~p_flags & PAGE_WRITE))) {
379         inval_tb = true;
380     }
381 
382     /*
383      * If there is an exact range match, update and return without
384      * attempting to merge with adjacent regions.
385      */
386     if (start == p_start && last == p_last) {
387         if (merge_flags) {
388             p->flags = merge_flags;
389         } else {
390             interval_tree_remove(&p->itree, &pageflags_root);
391             g_free_rcu(p, rcu);
392         }
393         goto done;
394     }
395 
396     /*
397      * If sticky bits affect the original mapping, then we must be more
398      * careful about the existing intervals and the separate flags.
399      */
400     if (set_flags != merge_flags) {
401         if (p_start < start) {
402             interval_tree_remove(&p->itree, &pageflags_root);
403             p->itree.last = start - 1;
404             interval_tree_insert(&p->itree, &pageflags_root);
405 
406             if (last < p_last) {
407                 if (merge_flags) {
408                     pageflags_create(start, last, merge_flags);
409                 }
410                 pageflags_create(last + 1, p_last, p_flags);
411             } else {
412                 if (merge_flags) {
413                     pageflags_create(start, p_last, merge_flags);
414                 }
415                 if (p_last < last) {
416                     start = p_last + 1;
417                     goto restart;
418                 }
419             }
420         } else {
421             if (start < p_start && set_flags) {
422                 pageflags_create(start, p_start - 1, set_flags);
423             }
424             if (last < p_last) {
425                 interval_tree_remove(&p->itree, &pageflags_root);
426                 p->itree.start = last + 1;
427                 interval_tree_insert(&p->itree, &pageflags_root);
428                 if (merge_flags) {
429                     pageflags_create(start, last, merge_flags);
430                 }
431             } else {
432                 if (merge_flags) {
433                     p->flags = merge_flags;
434                 } else {
435                     interval_tree_remove(&p->itree, &pageflags_root);
436                     g_free_rcu(p, rcu);
437                 }
438                 if (p_last < last) {
439                     start = p_last + 1;
440                     goto restart;
441                 }
442             }
443         }
444         goto done;
445     }
446 
447     /* If flags are not changing for this range, incorporate it. */
448     if (set_flags == p_flags) {
449         if (start < p_start) {
450             interval_tree_remove(&p->itree, &pageflags_root);
451             p->itree.start = start;
452             interval_tree_insert(&p->itree, &pageflags_root);
453         }
454         if (p_last < last) {
455             start = p_last + 1;
456             goto restart;
457         }
458         goto done;
459     }
460 
461     /* Maybe split out head and/or tail ranges with the original flags. */
462     interval_tree_remove(&p->itree, &pageflags_root);
463     if (p_start < start) {
464         p->itree.last = start - 1;
465         interval_tree_insert(&p->itree, &pageflags_root);
466 
467         if (p_last < last) {
468             goto restart;
469         }
470         if (last < p_last) {
471             pageflags_create(last + 1, p_last, p_flags);
472         }
473     } else if (last < p_last) {
474         p->itree.start = last + 1;
475         interval_tree_insert(&p->itree, &pageflags_root);
476     } else {
477         g_free_rcu(p, rcu);
478         goto restart;
479     }
480     if (set_flags) {
481         pageflags_create(start, last, set_flags);
482     }
483 
484  done:
485     return inval_tb;
486 }
487 
488 void page_set_flags(target_ulong start, target_ulong last, int flags)
489 {
490     bool reset = false;
491     bool inval_tb = false;
492 
493     /* This function should never be called with addresses outside the
494        guest address space.  If this assert fires, it probably indicates
495        a missing call to h2g_valid.  */
496     assert(start <= last);
497     assert(last <= GUEST_ADDR_MAX);
498     /* Only set PAGE_ANON with new mappings. */
499     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
500     assert_memory_lock();
501 
502     start &= TARGET_PAGE_MASK;
503     last |= ~TARGET_PAGE_MASK;
504 
505     if (!(flags & PAGE_VALID)) {
506         flags = 0;
507     } else {
508         reset = flags & PAGE_RESET;
509         flags &= ~PAGE_RESET;
510         if (flags & PAGE_WRITE) {
511             flags |= PAGE_WRITE_ORG;
512         }
513     }
514 
515     if (!flags || reset) {
516         page_reset_target_data(start, last);
517         inval_tb |= pageflags_unset(start, last);
518     }
519     if (flags) {
520         inval_tb |= pageflags_set_clear(start, last, flags,
521                                         ~(reset ? 0 : PAGE_STICKY));
522     }
523     if (inval_tb) {
524         tb_invalidate_phys_range(start, last);
525     }
526 }
527 
528 bool page_check_range(target_ulong start, target_ulong len, int flags)
529 {
530     target_ulong last;
531     int locked;  /* tri-state: =0: unlocked, +1: global, -1: local */
532     bool ret;
533 
534     if (len == 0) {
535         return true;  /* trivial length */
536     }
537 
538     last = start + len - 1;
539     if (last < start) {
540         return false; /* wrap around */
541     }
542 
543     locked = have_mmap_lock();
544     while (true) {
545         PageFlagsNode *p = pageflags_find(start, last);
546         int missing;
547 
548         if (!p) {
549             if (!locked) {
550                 /*
551                  * Lockless lookups have false negatives.
552                  * Retry with the lock held.
553                  */
554                 mmap_lock();
555                 locked = -1;
556                 p = pageflags_find(start, last);
557             }
558             if (!p) {
559                 ret = false; /* entire region invalid */
560                 break;
561             }
562         }
563         if (start < p->itree.start) {
564             ret = false; /* initial bytes invalid */
565             break;
566         }
567 
568         missing = flags & ~p->flags;
569         if (missing & ~PAGE_WRITE) {
570             ret = false; /* page doesn't match */
571             break;
572         }
573         if (missing & PAGE_WRITE) {
574             if (!(p->flags & PAGE_WRITE_ORG)) {
575                 ret = false; /* page not writable */
576                 break;
577             }
578             /* Asking about writable, but has been protected: undo. */
579             if (!page_unprotect(start, 0)) {
580                 ret = false;
581                 break;
582             }
583             /* TODO: page_unprotect should take a range, not a single page. */
584             if (last - start < TARGET_PAGE_SIZE) {
585                 ret = true; /* ok */
586                 break;
587             }
588             start += TARGET_PAGE_SIZE;
589             continue;
590         }
591 
592         if (last <= p->itree.last) {
593             ret = true; /* ok */
594             break;
595         }
596         start = p->itree.last + 1;
597     }
598 
599     /* Release the lock if acquired locally. */
600     if (locked < 0) {
601         mmap_unlock();
602     }
603     return ret;
604 }
605 
606 bool page_check_range_empty(target_ulong start, target_ulong last)
607 {
608     assert(last >= start);
609     assert_memory_lock();
610     return pageflags_find(start, last) == NULL;
611 }
612 
613 target_ulong page_find_range_empty(target_ulong min, target_ulong max,
614                                    target_ulong len, target_ulong align)
615 {
616     target_ulong len_m1, align_m1;
617 
618     assert(min <= max);
619     assert(max <= GUEST_ADDR_MAX);
620     assert(len != 0);
621     assert(is_power_of_2(align));
622     assert_memory_lock();
623 
624     len_m1 = len - 1;
625     align_m1 = align - 1;
626 
627     /* Iteratively narrow the search region. */
628     while (1) {
629         PageFlagsNode *p;
630 
631         /* Align min and double-check there's enough space remaining. */
632         min = (min + align_m1) & ~align_m1;
633         if (min > max) {
634             return -1;
635         }
636         if (len_m1 > max - min) {
637             return -1;
638         }
639 
640         p = pageflags_find(min, min + len_m1);
641         if (p == NULL) {
642             /* Found! */
643             return min;
644         }
645         if (max <= p->itree.last) {
646             /* Existing allocation fills the remainder of the search region. */
647             return -1;
648         }
649         /* Skip across existing allocation. */
650         min = p->itree.last + 1;
651     }
652 }
653 
654 void page_protect(tb_page_addr_t address)
655 {
656     PageFlagsNode *p;
657     target_ulong start, last;
658     int host_page_size = qemu_real_host_page_size();
659     int prot;
660 
661     assert_memory_lock();
662 
663     if (host_page_size <= TARGET_PAGE_SIZE) {
664         start = address & TARGET_PAGE_MASK;
665         last = start + TARGET_PAGE_SIZE - 1;
666     } else {
667         start = address & -host_page_size;
668         last = start + host_page_size - 1;
669     }
670 
671     p = pageflags_find(start, last);
672     if (!p) {
673         return;
674     }
675     prot = p->flags;
676 
677     if (unlikely(p->itree.last < last)) {
678         /* More than one protection region covers the one host page. */
679         assert(TARGET_PAGE_SIZE < host_page_size);
680         while ((p = pageflags_next(p, start, last)) != NULL) {
681             prot |= p->flags;
682         }
683     }
684 
685     if (prot & PAGE_WRITE) {
686         pageflags_set_clear(start, last, 0, PAGE_WRITE);
687         mprotect(g2h_untagged(start), last - start + 1,
688                  prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE);
689     }
690 }
691 
692 /*
693  * Called from signal handler: invalidate the code and unprotect the
694  * page. Return 0 if the fault was not handled, 1 if it was handled,
695  * and 2 if it was handled but the caller must cause the TB to be
696  * immediately exited. (We can only return 2 if the 'pc' argument is
697  * non-zero.)
698  */
699 int page_unprotect(target_ulong address, uintptr_t pc)
700 {
701     PageFlagsNode *p;
702     bool current_tb_invalidated;
703 
704     /*
705      * Technically this isn't safe inside a signal handler.  However we
706      * know this only ever happens in a synchronous SEGV handler, so in
707      * practice it seems to be ok.
708      */
709     mmap_lock();
710 
711     p = pageflags_find(address, address);
712 
713     /* If this address was not really writable, nothing to do. */
714     if (!p || !(p->flags & PAGE_WRITE_ORG)) {
715         mmap_unlock();
716         return 0;
717     }
718 
719     current_tb_invalidated = false;
720     if (p->flags & PAGE_WRITE) {
721         /*
722          * If the page is actually marked WRITE then assume this is because
723          * this thread raced with another one which got here first and
724          * set the page to PAGE_WRITE and did the TB invalidate for us.
725          */
726 #ifdef TARGET_HAS_PRECISE_SMC
727         TranslationBlock *current_tb = tcg_tb_lookup(pc);
728         if (current_tb) {
729             current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
730         }
731 #endif
732     } else {
733         int host_page_size = qemu_real_host_page_size();
734         target_ulong start, len, i;
735         int prot;
736 
737         if (host_page_size <= TARGET_PAGE_SIZE) {
738             start = address & TARGET_PAGE_MASK;
739             len = TARGET_PAGE_SIZE;
740             prot = p->flags | PAGE_WRITE;
741             pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
742             current_tb_invalidated = tb_invalidate_phys_page_unwind(start, pc);
743         } else {
744             start = address & -host_page_size;
745             len = host_page_size;
746             prot = 0;
747 
748             for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
749                 target_ulong addr = start + i;
750 
751                 p = pageflags_find(addr, addr);
752                 if (p) {
753                     prot |= p->flags;
754                     if (p->flags & PAGE_WRITE_ORG) {
755                         prot |= PAGE_WRITE;
756                         pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1,
757                                             PAGE_WRITE, 0);
758                     }
759                 }
760                 /*
761                  * Since the content will be modified, we must invalidate
762                  * the corresponding translated code.
763                  */
764                 current_tb_invalidated |=
765                     tb_invalidate_phys_page_unwind(addr, pc);
766             }
767         }
768         if (prot & PAGE_EXEC) {
769             prot = (prot & ~PAGE_EXEC) | PAGE_READ;
770         }
771         mprotect((void *)g2h_untagged(start), len, prot & PAGE_RWX);
772     }
773     mmap_unlock();
774 
775     /* If current TB was invalidated return to main loop */
776     return current_tb_invalidated ? 2 : 1;
777 }
778 
779 static int probe_access_internal(CPUArchState *env, vaddr addr,
780                                  int fault_size, MMUAccessType access_type,
781                                  bool nonfault, uintptr_t ra)
782 {
783     int acc_flag;
784     bool maperr;
785 
786     switch (access_type) {
787     case MMU_DATA_STORE:
788         acc_flag = PAGE_WRITE_ORG;
789         break;
790     case MMU_DATA_LOAD:
791         acc_flag = PAGE_READ;
792         break;
793     case MMU_INST_FETCH:
794         acc_flag = PAGE_EXEC;
795         break;
796     default:
797         g_assert_not_reached();
798     }
799 
800     if (guest_addr_valid_untagged(addr)) {
801         int page_flags = page_get_flags(addr);
802         if (page_flags & acc_flag) {
803             if ((acc_flag == PAGE_READ || acc_flag == PAGE_WRITE)
804                 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
805                 return TLB_MMIO;
806             }
807             return 0; /* success */
808         }
809         maperr = !(page_flags & PAGE_VALID);
810     } else {
811         maperr = true;
812     }
813 
814     if (nonfault) {
815         return TLB_INVALID_MASK;
816     }
817 
818     cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
819 }
820 
821 int probe_access_flags(CPUArchState *env, vaddr addr, int size,
822                        MMUAccessType access_type, int mmu_idx,
823                        bool nonfault, void **phost, uintptr_t ra)
824 {
825     int flags;
826 
827     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
828     flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
829     *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr);
830     return flags;
831 }
832 
833 void *probe_access(CPUArchState *env, vaddr addr, int size,
834                    MMUAccessType access_type, int mmu_idx, uintptr_t ra)
835 {
836     int flags;
837 
838     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
839     flags = probe_access_internal(env, addr, size, access_type, false, ra);
840     g_assert((flags & ~TLB_MMIO) == 0);
841 
842     return size ? g2h(env_cpu(env), addr) : NULL;
843 }
844 
845 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
846                                         void **hostp)
847 {
848     int flags;
849 
850     flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
851     g_assert(flags == 0);
852 
853     if (hostp) {
854         *hostp = g2h_untagged(addr);
855     }
856     return addr;
857 }
858 
859 #ifdef TARGET_PAGE_DATA_SIZE
860 /*
861  * Allocate chunks of target data together.  For the only current user,
862  * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
863  * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
864  */
865 #define TPD_PAGES  64
866 #define TBD_MASK   (TARGET_PAGE_MASK * TPD_PAGES)
867 
868 typedef struct TargetPageDataNode {
869     struct rcu_head rcu;
870     IntervalTreeNode itree;
871     char data[] __attribute__((aligned));
872 } TargetPageDataNode;
873 
874 static IntervalTreeRoot targetdata_root;
875 
876 void page_reset_target_data(target_ulong start, target_ulong last)
877 {
878     IntervalTreeNode *n, *next;
879 
880     assert_memory_lock();
881 
882     start &= TARGET_PAGE_MASK;
883     last |= ~TARGET_PAGE_MASK;
884 
885     for (n = interval_tree_iter_first(&targetdata_root, start, last),
886          next = n ? interval_tree_iter_next(n, start, last) : NULL;
887          n != NULL;
888          n = next,
889          next = next ? interval_tree_iter_next(n, start, last) : NULL) {
890         target_ulong n_start, n_last, p_ofs, p_len;
891         TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
892 
893         if (n->start >= start && n->last <= last) {
894             interval_tree_remove(n, &targetdata_root);
895             g_free_rcu(t, rcu);
896             continue;
897         }
898 
899         if (n->start < start) {
900             n_start = start;
901             p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
902         } else {
903             n_start = n->start;
904             p_ofs = 0;
905         }
906         n_last = MIN(last, n->last);
907         p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
908 
909         memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0,
910                p_len * TARGET_PAGE_DATA_SIZE);
911     }
912 }
913 
914 void *page_get_target_data(target_ulong address)
915 {
916     IntervalTreeNode *n;
917     TargetPageDataNode *t;
918     target_ulong page, region, p_ofs;
919 
920     page = address & TARGET_PAGE_MASK;
921     region = address & TBD_MASK;
922 
923     n = interval_tree_iter_first(&targetdata_root, page, page);
924     if (!n) {
925         /*
926          * See util/interval-tree.c re lockless lookups: no false positives
927          * but there are false negatives.  If we find nothing, retry with
928          * the mmap lock acquired.  We also need the lock for the
929          * allocation + insert.
930          */
931         mmap_lock();
932         n = interval_tree_iter_first(&targetdata_root, page, page);
933         if (!n) {
934             t = g_malloc0(sizeof(TargetPageDataNode)
935                           + TPD_PAGES * TARGET_PAGE_DATA_SIZE);
936             n = &t->itree;
937             n->start = region;
938             n->last = region | ~TBD_MASK;
939             interval_tree_insert(n, &targetdata_root);
940         }
941         mmap_unlock();
942     }
943 
944     t = container_of(n, TargetPageDataNode, itree);
945     p_ofs = (page - region) >> TARGET_PAGE_BITS;
946     return t->data + p_ofs * TARGET_PAGE_DATA_SIZE;
947 }
948 #else
949 void page_reset_target_data(target_ulong start, target_ulong last) { }
950 #endif /* TARGET_PAGE_DATA_SIZE */
951 
952 /* The system-mode versions of these helpers are in cputlb.c.  */
953 
954 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr,
955                             MemOp mop, uintptr_t ra, MMUAccessType type)
956 {
957     int a_bits = memop_alignment_bits(mop);
958     void *ret;
959 
960     /* Enforce guest required alignment.  */
961     if (unlikely(addr & ((1 << a_bits) - 1))) {
962         cpu_loop_exit_sigbus(cpu, addr, type, ra);
963     }
964 
965     ret = g2h(cpu, addr);
966     set_helper_retaddr(ra);
967     return ret;
968 }
969 
970 #include "ldst_atomicity.c.inc"
971 
972 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
973                           uintptr_t ra, MMUAccessType access_type)
974 {
975     void *haddr;
976     uint8_t ret;
977 
978     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
979     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type);
980     ret = ldub_p(haddr);
981     clear_helper_retaddr();
982     return ret;
983 }
984 
985 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
986                            uintptr_t ra, MMUAccessType access_type)
987 {
988     void *haddr;
989     uint16_t ret;
990     MemOp mop = get_memop(oi);
991 
992     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
993     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
994     ret = load_atom_2(cpu, ra, haddr, mop);
995     clear_helper_retaddr();
996 
997     if (mop & MO_BSWAP) {
998         ret = bswap16(ret);
999     }
1000     return ret;
1001 }
1002 
1003 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1004                            uintptr_t ra, MMUAccessType access_type)
1005 {
1006     void *haddr;
1007     uint32_t ret;
1008     MemOp mop = get_memop(oi);
1009 
1010     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1011     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1012     ret = load_atom_4(cpu, ra, haddr, mop);
1013     clear_helper_retaddr();
1014 
1015     if (mop & MO_BSWAP) {
1016         ret = bswap32(ret);
1017     }
1018     return ret;
1019 }
1020 
1021 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1022                            uintptr_t ra, MMUAccessType access_type)
1023 {
1024     void *haddr;
1025     uint64_t ret;
1026     MemOp mop = get_memop(oi);
1027 
1028     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1029     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1030     ret = load_atom_8(cpu, ra, haddr, mop);
1031     clear_helper_retaddr();
1032 
1033     if (mop & MO_BSWAP) {
1034         ret = bswap64(ret);
1035     }
1036     return ret;
1037 }
1038 
1039 static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr,
1040                           MemOpIdx oi, uintptr_t ra)
1041 {
1042     void *haddr;
1043     Int128 ret;
1044     MemOp mop = get_memop(oi);
1045 
1046     tcg_debug_assert((mop & MO_SIZE) == MO_128);
1047     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1048     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD);
1049     ret = load_atom_16(cpu, ra, haddr, mop);
1050     clear_helper_retaddr();
1051 
1052     if (mop & MO_BSWAP) {
1053         ret = bswap128(ret);
1054     }
1055     return ret;
1056 }
1057 
1058 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
1059                        MemOpIdx oi, uintptr_t ra)
1060 {
1061     void *haddr;
1062 
1063     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1064     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE);
1065     stb_p(haddr, val);
1066     clear_helper_retaddr();
1067 }
1068 
1069 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
1070                        MemOpIdx oi, uintptr_t ra)
1071 {
1072     void *haddr;
1073     MemOp mop = get_memop(oi);
1074 
1075     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1076     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1077 
1078     if (mop & MO_BSWAP) {
1079         val = bswap16(val);
1080     }
1081     store_atom_2(cpu, ra, haddr, mop, val);
1082     clear_helper_retaddr();
1083 }
1084 
1085 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
1086                        MemOpIdx oi, uintptr_t ra)
1087 {
1088     void *haddr;
1089     MemOp mop = get_memop(oi);
1090 
1091     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1092     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1093 
1094     if (mop & MO_BSWAP) {
1095         val = bswap32(val);
1096     }
1097     store_atom_4(cpu, ra, haddr, mop, val);
1098     clear_helper_retaddr();
1099 }
1100 
1101 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
1102                        MemOpIdx oi, uintptr_t ra)
1103 {
1104     void *haddr;
1105     MemOp mop = get_memop(oi);
1106 
1107     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1108     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1109 
1110     if (mop & MO_BSWAP) {
1111         val = bswap64(val);
1112     }
1113     store_atom_8(cpu, ra, haddr, mop, val);
1114     clear_helper_retaddr();
1115 }
1116 
1117 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
1118                         MemOpIdx oi, uintptr_t ra)
1119 {
1120     void *haddr;
1121     MemOpIdx mop = get_memop(oi);
1122 
1123     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1124     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1125 
1126     if (mop & MO_BSWAP) {
1127         val = bswap128(val);
1128     }
1129     store_atom_16(cpu, ra, haddr, mop, val);
1130     clear_helper_retaddr();
1131 }
1132 
1133 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr)
1134 {
1135     uint32_t ret;
1136 
1137     set_helper_retaddr(1);
1138     ret = ldub_p(g2h_untagged(ptr));
1139     clear_helper_retaddr();
1140     return ret;
1141 }
1142 
1143 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr ptr)
1144 {
1145     uint32_t ret;
1146 
1147     set_helper_retaddr(1);
1148     ret = lduw_p(g2h_untagged(ptr));
1149     clear_helper_retaddr();
1150     return ret;
1151 }
1152 
1153 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr ptr)
1154 {
1155     uint32_t ret;
1156 
1157     set_helper_retaddr(1);
1158     ret = ldl_p(g2h_untagged(ptr));
1159     clear_helper_retaddr();
1160     return ret;
1161 }
1162 
1163 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
1164 {
1165     uint64_t ret;
1166 
1167     set_helper_retaddr(1);
1168     ret = ldq_p(g2h_untagged(ptr));
1169     clear_helper_retaddr();
1170     return ret;
1171 }
1172 
1173 uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
1174                          MemOpIdx oi, uintptr_t ra)
1175 {
1176     void *haddr;
1177     uint8_t ret;
1178 
1179     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1180     ret = ldub_p(haddr);
1181     clear_helper_retaddr();
1182     return ret;
1183 }
1184 
1185 uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
1186                           MemOpIdx oi, uintptr_t ra)
1187 {
1188     void *haddr;
1189     uint16_t ret;
1190 
1191     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1192     ret = lduw_p(haddr);
1193     clear_helper_retaddr();
1194     if (get_memop(oi) & MO_BSWAP) {
1195         ret = bswap16(ret);
1196     }
1197     return ret;
1198 }
1199 
1200 uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
1201                           MemOpIdx oi, uintptr_t ra)
1202 {
1203     void *haddr;
1204     uint32_t ret;
1205 
1206     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1207     ret = ldl_p(haddr);
1208     clear_helper_retaddr();
1209     if (get_memop(oi) & MO_BSWAP) {
1210         ret = bswap32(ret);
1211     }
1212     return ret;
1213 }
1214 
1215 uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
1216                           MemOpIdx oi, uintptr_t ra)
1217 {
1218     void *haddr;
1219     uint64_t ret;
1220 
1221     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
1222     ret = ldq_p(haddr);
1223     clear_helper_retaddr();
1224     if (get_memop(oi) & MO_BSWAP) {
1225         ret = bswap64(ret);
1226     }
1227     return ret;
1228 }
1229 
1230 #include "ldst_common.c.inc"
1231 
1232 /*
1233  * Do not allow unaligned operations to proceed.  Return the host address.
1234  */
1235 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1236                                int size, uintptr_t retaddr)
1237 {
1238     MemOp mop = get_memop(oi);
1239     int a_bits = memop_alignment_bits(mop);
1240     void *ret;
1241 
1242     /* Enforce guest required alignment.  */
1243     if (unlikely(addr & ((1 << a_bits) - 1))) {
1244         cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr);
1245     }
1246 
1247     /* Enforce qemu required alignment.  */
1248     if (unlikely(addr & (size - 1))) {
1249         cpu_loop_exit_atomic(cpu, retaddr);
1250     }
1251 
1252     ret = g2h(cpu, addr);
1253     set_helper_retaddr(retaddr);
1254     return ret;
1255 }
1256 
1257 #include "atomic_common.c.inc"
1258 
1259 /*
1260  * First set of functions passes in OI and RETADDR.
1261  * This makes them callable from other helpers.
1262  */
1263 
1264 #define ATOMIC_NAME(X) \
1265     glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
1266 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
1267 
1268 #define DATA_SIZE 1
1269 #include "atomic_template.h"
1270 
1271 #define DATA_SIZE 2
1272 #include "atomic_template.h"
1273 
1274 #define DATA_SIZE 4
1275 #include "atomic_template.h"
1276 
1277 #ifdef CONFIG_ATOMIC64
1278 #define DATA_SIZE 8
1279 #include "atomic_template.h"
1280 #endif
1281 
1282 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
1283 #define DATA_SIZE 16
1284 #include "atomic_template.h"
1285 #endif
1286