xref: /openbmc/qemu/accel/tcg/user-exec.c (revision 6a02465f)
1 /*
2  *  User emulator execution
3  *
4  *  Copyright (c) 2003-2005 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "hw/core/tcg-cpu-ops.h"
21 #include "disas/disas.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg.h"
24 #include "qemu/bitops.h"
25 #include "qemu/rcu.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translate-all.h"
28 #include "exec/helper-proto.h"
29 #include "qemu/atomic128.h"
30 #include "trace/trace-root.h"
31 #include "tcg/tcg-ldst.h"
32 #include "internal-common.h"
33 #include "internal-target.h"
34 
35 __thread uintptr_t helper_retaddr;
36 
37 //#define DEBUG_SIGNAL
38 
39 /*
40  * Adjust the pc to pass to cpu_restore_state; return the memop type.
41  */
42 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
43 {
44     switch (helper_retaddr) {
45     default:
46         /*
47          * Fault during host memory operation within a helper function.
48          * The helper's host return address, saved here, gives us a
49          * pointer into the generated code that will unwind to the
50          * correct guest pc.
51          */
52         *pc = helper_retaddr;
53         break;
54 
55     case 0:
56         /*
57          * Fault during host memory operation within generated code.
58          * (Or, a unrelated bug within qemu, but we can't tell from here).
59          *
60          * We take the host pc from the signal frame.  However, we cannot
61          * use that value directly.  Within cpu_restore_state_from_tb, we
62          * assume PC comes from GETPC(), as used by the helper functions,
63          * so we adjust the address by -GETPC_ADJ to form an address that
64          * is within the call insn, so that the address does not accidentally
65          * match the beginning of the next guest insn.  However, when the
66          * pc comes from the signal frame it points to the actual faulting
67          * host memory insn and not the return from a call insn.
68          *
69          * Therefore, adjust to compensate for what will be done later
70          * by cpu_restore_state_from_tb.
71          */
72         *pc += GETPC_ADJ;
73         break;
74 
75     case 1:
76         /*
77          * Fault during host read for translation, or loosely, "execution".
78          *
79          * The guest pc is already pointing to the start of the TB for which
80          * code is being generated.  If the guest translator manages the
81          * page crossings correctly, this is exactly the correct address
82          * (and if the translator doesn't handle page boundaries correctly
83          * there's little we can do about that here).  Therefore, do not
84          * trigger the unwinder.
85          */
86         *pc = 0;
87         return MMU_INST_FETCH;
88     }
89 
90     return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
91 }
92 
93 /**
94  * handle_sigsegv_accerr_write:
95  * @cpu: the cpu context
96  * @old_set: the sigset_t from the signal ucontext_t
97  * @host_pc: the host pc, adjusted for the signal
98  * @guest_addr: the guest address of the fault
99  *
100  * Return true if the write fault has been handled, and should be re-tried.
101  *
102  * Note that it is important that we don't call page_unprotect() unless
103  * this is really a "write to nonwritable page" fault, because
104  * page_unprotect() assumes that if it is called for an access to
105  * a page that's writable this means we had two threads racing and
106  * another thread got there first and already made the page writable;
107  * so we will retry the access. If we were to call page_unprotect()
108  * for some other kind of fault that should really be passed to the
109  * guest, we'd end up in an infinite loop of retrying the faulting access.
110  */
111 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
112                                  uintptr_t host_pc, abi_ptr guest_addr)
113 {
114     switch (page_unprotect(guest_addr, host_pc)) {
115     case 0:
116         /*
117          * Fault not caused by a page marked unwritable to protect
118          * cached translations, must be the guest binary's problem.
119          */
120         return false;
121     case 1:
122         /*
123          * Fault caused by protection of cached translation; TBs
124          * invalidated, so resume execution.
125          */
126         return true;
127     case 2:
128         /*
129          * Fault caused by protection of cached translation, and the
130          * currently executing TB was modified and must be exited immediately.
131          */
132         sigprocmask(SIG_SETMASK, old_set, NULL);
133         cpu_loop_exit_noexc(cpu);
134         /* NORETURN */
135     default:
136         g_assert_not_reached();
137     }
138 }
139 
140 typedef struct PageFlagsNode {
141     struct rcu_head rcu;
142     IntervalTreeNode itree;
143     int flags;
144 } PageFlagsNode;
145 
146 static IntervalTreeRoot pageflags_root;
147 
148 static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
149 {
150     IntervalTreeNode *n;
151 
152     n = interval_tree_iter_first(&pageflags_root, start, last);
153     return n ? container_of(n, PageFlagsNode, itree) : NULL;
154 }
155 
156 static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start,
157                                      target_ulong last)
158 {
159     IntervalTreeNode *n;
160 
161     n = interval_tree_iter_next(&p->itree, start, last);
162     return n ? container_of(n, PageFlagsNode, itree) : NULL;
163 }
164 
165 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
166 {
167     IntervalTreeNode *n;
168     int rc = 0;
169 
170     mmap_lock();
171     for (n = interval_tree_iter_first(&pageflags_root, 0, -1);
172          n != NULL;
173          n = interval_tree_iter_next(n, 0, -1)) {
174         PageFlagsNode *p = container_of(n, PageFlagsNode, itree);
175 
176         rc = fn(priv, n->start, n->last + 1, p->flags);
177         if (rc != 0) {
178             break;
179         }
180     }
181     mmap_unlock();
182 
183     return rc;
184 }
185 
186 static int dump_region(void *priv, target_ulong start,
187                        target_ulong end, unsigned long prot)
188 {
189     FILE *f = (FILE *)priv;
190 
191     fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx" "TARGET_FMT_lx" %c%c%c\n",
192             start, end, end - start,
193             ((prot & PAGE_READ) ? 'r' : '-'),
194             ((prot & PAGE_WRITE) ? 'w' : '-'),
195             ((prot & PAGE_EXEC) ? 'x' : '-'));
196     return 0;
197 }
198 
199 /* dump memory mappings */
200 void page_dump(FILE *f)
201 {
202     const int length = sizeof(target_ulong) * 2;
203 
204     fprintf(f, "%-*s %-*s %-*s %s\n",
205             length, "start", length, "end", length, "size", "prot");
206     walk_memory_regions(f, dump_region);
207 }
208 
209 int page_get_flags(target_ulong address)
210 {
211     PageFlagsNode *p = pageflags_find(address, address);
212 
213     /*
214      * See util/interval-tree.c re lockless lookups: no false positives but
215      * there are false negatives.  If we find nothing, retry with the mmap
216      * lock acquired.
217      */
218     if (p) {
219         return p->flags;
220     }
221     if (have_mmap_lock()) {
222         return 0;
223     }
224 
225     mmap_lock();
226     p = pageflags_find(address, address);
227     mmap_unlock();
228     return p ? p->flags : 0;
229 }
230 
231 /* A subroutine of page_set_flags: insert a new node for [start,last]. */
232 static void pageflags_create(target_ulong start, target_ulong last, int flags)
233 {
234     PageFlagsNode *p = g_new(PageFlagsNode, 1);
235 
236     p->itree.start = start;
237     p->itree.last = last;
238     p->flags = flags;
239     interval_tree_insert(&p->itree, &pageflags_root);
240 }
241 
242 /* A subroutine of page_set_flags: remove everything in [start,last]. */
243 static bool pageflags_unset(target_ulong start, target_ulong last)
244 {
245     bool inval_tb = false;
246 
247     while (true) {
248         PageFlagsNode *p = pageflags_find(start, last);
249         target_ulong p_last;
250 
251         if (!p) {
252             break;
253         }
254 
255         if (p->flags & PAGE_EXEC) {
256             inval_tb = true;
257         }
258 
259         interval_tree_remove(&p->itree, &pageflags_root);
260         p_last = p->itree.last;
261 
262         if (p->itree.start < start) {
263             /* Truncate the node from the end, or split out the middle. */
264             p->itree.last = start - 1;
265             interval_tree_insert(&p->itree, &pageflags_root);
266             if (last < p_last) {
267                 pageflags_create(last + 1, p_last, p->flags);
268                 break;
269             }
270         } else if (p_last <= last) {
271             /* Range completely covers node -- remove it. */
272             g_free_rcu(p, rcu);
273         } else {
274             /* Truncate the node from the start. */
275             p->itree.start = last + 1;
276             interval_tree_insert(&p->itree, &pageflags_root);
277             break;
278         }
279     }
280 
281     return inval_tb;
282 }
283 
284 /*
285  * A subroutine of page_set_flags: nothing overlaps [start,last],
286  * but check adjacent mappings and maybe merge into a single range.
287  */
288 static void pageflags_create_merge(target_ulong start, target_ulong last,
289                                    int flags)
290 {
291     PageFlagsNode *next = NULL, *prev = NULL;
292 
293     if (start > 0) {
294         prev = pageflags_find(start - 1, start - 1);
295         if (prev) {
296             if (prev->flags == flags) {
297                 interval_tree_remove(&prev->itree, &pageflags_root);
298             } else {
299                 prev = NULL;
300             }
301         }
302     }
303     if (last + 1 != 0) {
304         next = pageflags_find(last + 1, last + 1);
305         if (next) {
306             if (next->flags == flags) {
307                 interval_tree_remove(&next->itree, &pageflags_root);
308             } else {
309                 next = NULL;
310             }
311         }
312     }
313 
314     if (prev) {
315         if (next) {
316             prev->itree.last = next->itree.last;
317             g_free_rcu(next, rcu);
318         } else {
319             prev->itree.last = last;
320         }
321         interval_tree_insert(&prev->itree, &pageflags_root);
322     } else if (next) {
323         next->itree.start = start;
324         interval_tree_insert(&next->itree, &pageflags_root);
325     } else {
326         pageflags_create(start, last, flags);
327     }
328 }
329 
330 /*
331  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
332  * By default, they are not kept.
333  */
334 #ifndef PAGE_TARGET_STICKY
335 #define PAGE_TARGET_STICKY  0
336 #endif
337 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
338 
339 /* A subroutine of page_set_flags: add flags to [start,last]. */
340 static bool pageflags_set_clear(target_ulong start, target_ulong last,
341                                 int set_flags, int clear_flags)
342 {
343     PageFlagsNode *p;
344     target_ulong p_start, p_last;
345     int p_flags, merge_flags;
346     bool inval_tb = false;
347 
348  restart:
349     p = pageflags_find(start, last);
350     if (!p) {
351         if (set_flags) {
352             pageflags_create_merge(start, last, set_flags);
353         }
354         goto done;
355     }
356 
357     p_start = p->itree.start;
358     p_last = p->itree.last;
359     p_flags = p->flags;
360     /* Using mprotect on a page does not change sticky bits. */
361     merge_flags = (p_flags & ~clear_flags) | set_flags;
362 
363     /*
364      * Need to flush if an overlapping executable region
365      * removes exec, or adds write.
366      */
367     if ((p_flags & PAGE_EXEC)
368         && (!(merge_flags & PAGE_EXEC)
369             || (merge_flags & ~p_flags & PAGE_WRITE))) {
370         inval_tb = true;
371     }
372 
373     /*
374      * If there is an exact range match, update and return without
375      * attempting to merge with adjacent regions.
376      */
377     if (start == p_start && last == p_last) {
378         if (merge_flags) {
379             p->flags = merge_flags;
380         } else {
381             interval_tree_remove(&p->itree, &pageflags_root);
382             g_free_rcu(p, rcu);
383         }
384         goto done;
385     }
386 
387     /*
388      * If sticky bits affect the original mapping, then we must be more
389      * careful about the existing intervals and the separate flags.
390      */
391     if (set_flags != merge_flags) {
392         if (p_start < start) {
393             interval_tree_remove(&p->itree, &pageflags_root);
394             p->itree.last = start - 1;
395             interval_tree_insert(&p->itree, &pageflags_root);
396 
397             if (last < p_last) {
398                 if (merge_flags) {
399                     pageflags_create(start, last, merge_flags);
400                 }
401                 pageflags_create(last + 1, p_last, p_flags);
402             } else {
403                 if (merge_flags) {
404                     pageflags_create(start, p_last, merge_flags);
405                 }
406                 if (p_last < last) {
407                     start = p_last + 1;
408                     goto restart;
409                 }
410             }
411         } else {
412             if (start < p_start && set_flags) {
413                 pageflags_create(start, p_start - 1, set_flags);
414             }
415             if (last < p_last) {
416                 interval_tree_remove(&p->itree, &pageflags_root);
417                 p->itree.start = last + 1;
418                 interval_tree_insert(&p->itree, &pageflags_root);
419                 if (merge_flags) {
420                     pageflags_create(start, last, merge_flags);
421                 }
422             } else {
423                 if (merge_flags) {
424                     p->flags = merge_flags;
425                 } else {
426                     interval_tree_remove(&p->itree, &pageflags_root);
427                     g_free_rcu(p, rcu);
428                 }
429                 if (p_last < last) {
430                     start = p_last + 1;
431                     goto restart;
432                 }
433             }
434         }
435         goto done;
436     }
437 
438     /* If flags are not changing for this range, incorporate it. */
439     if (set_flags == p_flags) {
440         if (start < p_start) {
441             interval_tree_remove(&p->itree, &pageflags_root);
442             p->itree.start = start;
443             interval_tree_insert(&p->itree, &pageflags_root);
444         }
445         if (p_last < last) {
446             start = p_last + 1;
447             goto restart;
448         }
449         goto done;
450     }
451 
452     /* Maybe split out head and/or tail ranges with the original flags. */
453     interval_tree_remove(&p->itree, &pageflags_root);
454     if (p_start < start) {
455         p->itree.last = start - 1;
456         interval_tree_insert(&p->itree, &pageflags_root);
457 
458         if (p_last < last) {
459             goto restart;
460         }
461         if (last < p_last) {
462             pageflags_create(last + 1, p_last, p_flags);
463         }
464     } else if (last < p_last) {
465         p->itree.start = last + 1;
466         interval_tree_insert(&p->itree, &pageflags_root);
467     } else {
468         g_free_rcu(p, rcu);
469         goto restart;
470     }
471     if (set_flags) {
472         pageflags_create(start, last, set_flags);
473     }
474 
475  done:
476     return inval_tb;
477 }
478 
479 /*
480  * Modify the flags of a page and invalidate the code if necessary.
481  * The flag PAGE_WRITE_ORG is positioned automatically depending
482  * on PAGE_WRITE.  The mmap_lock should already be held.
483  */
484 void page_set_flags(target_ulong start, target_ulong last, int flags)
485 {
486     bool reset = false;
487     bool inval_tb = false;
488 
489     /* This function should never be called with addresses outside the
490        guest address space.  If this assert fires, it probably indicates
491        a missing call to h2g_valid.  */
492     assert(start <= last);
493     assert(last <= GUEST_ADDR_MAX);
494     /* Only set PAGE_ANON with new mappings. */
495     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
496     assert_memory_lock();
497 
498     start &= TARGET_PAGE_MASK;
499     last |= ~TARGET_PAGE_MASK;
500 
501     if (!(flags & PAGE_VALID)) {
502         flags = 0;
503     } else {
504         reset = flags & PAGE_RESET;
505         flags &= ~PAGE_RESET;
506         if (flags & PAGE_WRITE) {
507             flags |= PAGE_WRITE_ORG;
508         }
509     }
510 
511     if (!flags || reset) {
512         page_reset_target_data(start, last);
513         inval_tb |= pageflags_unset(start, last);
514     }
515     if (flags) {
516         inval_tb |= pageflags_set_clear(start, last, flags,
517                                         ~(reset ? 0 : PAGE_STICKY));
518     }
519     if (inval_tb) {
520         tb_invalidate_phys_range(start, last);
521     }
522 }
523 
524 bool page_check_range(target_ulong start, target_ulong len, int flags)
525 {
526     target_ulong last;
527     int locked;  /* tri-state: =0: unlocked, +1: global, -1: local */
528     bool ret;
529 
530     if (len == 0) {
531         return true;  /* trivial length */
532     }
533 
534     last = start + len - 1;
535     if (last < start) {
536         return false; /* wrap around */
537     }
538 
539     locked = have_mmap_lock();
540     while (true) {
541         PageFlagsNode *p = pageflags_find(start, last);
542         int missing;
543 
544         if (!p) {
545             if (!locked) {
546                 /*
547                  * Lockless lookups have false negatives.
548                  * Retry with the lock held.
549                  */
550                 mmap_lock();
551                 locked = -1;
552                 p = pageflags_find(start, last);
553             }
554             if (!p) {
555                 ret = false; /* entire region invalid */
556                 break;
557             }
558         }
559         if (start < p->itree.start) {
560             ret = false; /* initial bytes invalid */
561             break;
562         }
563 
564         missing = flags & ~p->flags;
565         if (missing & ~PAGE_WRITE) {
566             ret = false; /* page doesn't match */
567             break;
568         }
569         if (missing & PAGE_WRITE) {
570             if (!(p->flags & PAGE_WRITE_ORG)) {
571                 ret = false; /* page not writable */
572                 break;
573             }
574             /* Asking about writable, but has been protected: undo. */
575             if (!page_unprotect(start, 0)) {
576                 ret = false;
577                 break;
578             }
579             /* TODO: page_unprotect should take a range, not a single page. */
580             if (last - start < TARGET_PAGE_SIZE) {
581                 ret = true; /* ok */
582                 break;
583             }
584             start += TARGET_PAGE_SIZE;
585             continue;
586         }
587 
588         if (last <= p->itree.last) {
589             ret = true; /* ok */
590             break;
591         }
592         start = p->itree.last + 1;
593     }
594 
595     /* Release the lock if acquired locally. */
596     if (locked < 0) {
597         mmap_unlock();
598     }
599     return ret;
600 }
601 
602 bool page_check_range_empty(target_ulong start, target_ulong last)
603 {
604     assert(last >= start);
605     assert_memory_lock();
606     return pageflags_find(start, last) == NULL;
607 }
608 
609 target_ulong page_find_range_empty(target_ulong min, target_ulong max,
610                                    target_ulong len, target_ulong align)
611 {
612     target_ulong len_m1, align_m1;
613 
614     assert(min <= max);
615     assert(max <= GUEST_ADDR_MAX);
616     assert(len != 0);
617     assert(is_power_of_2(align));
618     assert_memory_lock();
619 
620     len_m1 = len - 1;
621     align_m1 = align - 1;
622 
623     /* Iteratively narrow the search region. */
624     while (1) {
625         PageFlagsNode *p;
626 
627         /* Align min and double-check there's enough space remaining. */
628         min = (min + align_m1) & ~align_m1;
629         if (min > max) {
630             return -1;
631         }
632         if (len_m1 > max - min) {
633             return -1;
634         }
635 
636         p = pageflags_find(min, min + len_m1);
637         if (p == NULL) {
638             /* Found! */
639             return min;
640         }
641         if (max <= p->itree.last) {
642             /* Existing allocation fills the remainder of the search region. */
643             return -1;
644         }
645         /* Skip across existing allocation. */
646         min = p->itree.last + 1;
647     }
648 }
649 
650 void page_protect(tb_page_addr_t address)
651 {
652     PageFlagsNode *p;
653     target_ulong start, last;
654     int host_page_size = qemu_real_host_page_size();
655     int prot;
656 
657     assert_memory_lock();
658 
659     if (host_page_size <= TARGET_PAGE_SIZE) {
660         start = address & TARGET_PAGE_MASK;
661         last = start + TARGET_PAGE_SIZE - 1;
662     } else {
663         start = address & -host_page_size;
664         last = start + host_page_size - 1;
665     }
666 
667     p = pageflags_find(start, last);
668     if (!p) {
669         return;
670     }
671     prot = p->flags;
672 
673     if (unlikely(p->itree.last < last)) {
674         /* More than one protection region covers the one host page. */
675         assert(TARGET_PAGE_SIZE < host_page_size);
676         while ((p = pageflags_next(p, start, last)) != NULL) {
677             prot |= p->flags;
678         }
679     }
680 
681     if (prot & PAGE_WRITE) {
682         pageflags_set_clear(start, last, 0, PAGE_WRITE);
683         mprotect(g2h_untagged(start), last - start + 1,
684                  prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE);
685     }
686 }
687 
688 /*
689  * Called from signal handler: invalidate the code and unprotect the
690  * page. Return 0 if the fault was not handled, 1 if it was handled,
691  * and 2 if it was handled but the caller must cause the TB to be
692  * immediately exited. (We can only return 2 if the 'pc' argument is
693  * non-zero.)
694  */
695 int page_unprotect(target_ulong address, uintptr_t pc)
696 {
697     PageFlagsNode *p;
698     bool current_tb_invalidated;
699 
700     /*
701      * Technically this isn't safe inside a signal handler.  However we
702      * know this only ever happens in a synchronous SEGV handler, so in
703      * practice it seems to be ok.
704      */
705     mmap_lock();
706 
707     p = pageflags_find(address, address);
708 
709     /* If this address was not really writable, nothing to do. */
710     if (!p || !(p->flags & PAGE_WRITE_ORG)) {
711         mmap_unlock();
712         return 0;
713     }
714 
715     current_tb_invalidated = false;
716     if (p->flags & PAGE_WRITE) {
717         /*
718          * If the page is actually marked WRITE then assume this is because
719          * this thread raced with another one which got here first and
720          * set the page to PAGE_WRITE and did the TB invalidate for us.
721          */
722 #ifdef TARGET_HAS_PRECISE_SMC
723         TranslationBlock *current_tb = tcg_tb_lookup(pc);
724         if (current_tb) {
725             current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
726         }
727 #endif
728     } else {
729         int host_page_size = qemu_real_host_page_size();
730         target_ulong start, len, i;
731         int prot;
732 
733         if (host_page_size <= TARGET_PAGE_SIZE) {
734             start = address & TARGET_PAGE_MASK;
735             len = TARGET_PAGE_SIZE;
736             prot = p->flags | PAGE_WRITE;
737             pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
738             current_tb_invalidated = tb_invalidate_phys_page_unwind(start, pc);
739         } else {
740             start = address & -host_page_size;
741             len = host_page_size;
742             prot = 0;
743 
744             for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
745                 target_ulong addr = start + i;
746 
747                 p = pageflags_find(addr, addr);
748                 if (p) {
749                     prot |= p->flags;
750                     if (p->flags & PAGE_WRITE_ORG) {
751                         prot |= PAGE_WRITE;
752                         pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1,
753                                             PAGE_WRITE, 0);
754                     }
755                 }
756                 /*
757                  * Since the content will be modified, we must invalidate
758                  * the corresponding translated code.
759                  */
760                 current_tb_invalidated |=
761                     tb_invalidate_phys_page_unwind(addr, pc);
762             }
763         }
764         if (prot & PAGE_EXEC) {
765             prot = (prot & ~PAGE_EXEC) | PAGE_READ;
766         }
767         mprotect((void *)g2h_untagged(start), len, prot & PAGE_BITS);
768     }
769     mmap_unlock();
770 
771     /* If current TB was invalidated return to main loop */
772     return current_tb_invalidated ? 2 : 1;
773 }
774 
775 static int probe_access_internal(CPUArchState *env, vaddr addr,
776                                  int fault_size, MMUAccessType access_type,
777                                  bool nonfault, uintptr_t ra)
778 {
779     int acc_flag;
780     bool maperr;
781 
782     switch (access_type) {
783     case MMU_DATA_STORE:
784         acc_flag = PAGE_WRITE_ORG;
785         break;
786     case MMU_DATA_LOAD:
787         acc_flag = PAGE_READ;
788         break;
789     case MMU_INST_FETCH:
790         acc_flag = PAGE_EXEC;
791         break;
792     default:
793         g_assert_not_reached();
794     }
795 
796     if (guest_addr_valid_untagged(addr)) {
797         int page_flags = page_get_flags(addr);
798         if (page_flags & acc_flag) {
799             if ((acc_flag == PAGE_READ || acc_flag == PAGE_WRITE)
800                 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
801                 return TLB_MMIO;
802             }
803             return 0; /* success */
804         }
805         maperr = !(page_flags & PAGE_VALID);
806     } else {
807         maperr = true;
808     }
809 
810     if (nonfault) {
811         return TLB_INVALID_MASK;
812     }
813 
814     cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
815 }
816 
817 int probe_access_flags(CPUArchState *env, vaddr addr, int size,
818                        MMUAccessType access_type, int mmu_idx,
819                        bool nonfault, void **phost, uintptr_t ra)
820 {
821     int flags;
822 
823     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
824     flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
825     *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr);
826     return flags;
827 }
828 
829 void *probe_access(CPUArchState *env, vaddr addr, int size,
830                    MMUAccessType access_type, int mmu_idx, uintptr_t ra)
831 {
832     int flags;
833 
834     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
835     flags = probe_access_internal(env, addr, size, access_type, false, ra);
836     g_assert((flags & ~TLB_MMIO) == 0);
837 
838     return size ? g2h(env_cpu(env), addr) : NULL;
839 }
840 
841 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
842                                         void **hostp)
843 {
844     int flags;
845 
846     flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
847     g_assert(flags == 0);
848 
849     if (hostp) {
850         *hostp = g2h_untagged(addr);
851     }
852     return addr;
853 }
854 
855 #ifdef TARGET_PAGE_DATA_SIZE
856 /*
857  * Allocate chunks of target data together.  For the only current user,
858  * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
859  * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
860  */
861 #define TPD_PAGES  64
862 #define TBD_MASK   (TARGET_PAGE_MASK * TPD_PAGES)
863 
864 typedef struct TargetPageDataNode {
865     struct rcu_head rcu;
866     IntervalTreeNode itree;
867     char data[] __attribute__((aligned));
868 } TargetPageDataNode;
869 
870 static IntervalTreeRoot targetdata_root;
871 
872 void page_reset_target_data(target_ulong start, target_ulong last)
873 {
874     IntervalTreeNode *n, *next;
875 
876     assert_memory_lock();
877 
878     start &= TARGET_PAGE_MASK;
879     last |= ~TARGET_PAGE_MASK;
880 
881     for (n = interval_tree_iter_first(&targetdata_root, start, last),
882          next = n ? interval_tree_iter_next(n, start, last) : NULL;
883          n != NULL;
884          n = next,
885          next = next ? interval_tree_iter_next(n, start, last) : NULL) {
886         target_ulong n_start, n_last, p_ofs, p_len;
887         TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
888 
889         if (n->start >= start && n->last <= last) {
890             interval_tree_remove(n, &targetdata_root);
891             g_free_rcu(t, rcu);
892             continue;
893         }
894 
895         if (n->start < start) {
896             n_start = start;
897             p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
898         } else {
899             n_start = n->start;
900             p_ofs = 0;
901         }
902         n_last = MIN(last, n->last);
903         p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
904 
905         memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0,
906                p_len * TARGET_PAGE_DATA_SIZE);
907     }
908 }
909 
910 void *page_get_target_data(target_ulong address)
911 {
912     IntervalTreeNode *n;
913     TargetPageDataNode *t;
914     target_ulong page, region, p_ofs;
915 
916     page = address & TARGET_PAGE_MASK;
917     region = address & TBD_MASK;
918 
919     n = interval_tree_iter_first(&targetdata_root, page, page);
920     if (!n) {
921         /*
922          * See util/interval-tree.c re lockless lookups: no false positives
923          * but there are false negatives.  If we find nothing, retry with
924          * the mmap lock acquired.  We also need the lock for the
925          * allocation + insert.
926          */
927         mmap_lock();
928         n = interval_tree_iter_first(&targetdata_root, page, page);
929         if (!n) {
930             t = g_malloc0(sizeof(TargetPageDataNode)
931                           + TPD_PAGES * TARGET_PAGE_DATA_SIZE);
932             n = &t->itree;
933             n->start = region;
934             n->last = region | ~TBD_MASK;
935             interval_tree_insert(n, &targetdata_root);
936         }
937         mmap_unlock();
938     }
939 
940     t = container_of(n, TargetPageDataNode, itree);
941     p_ofs = (page - region) >> TARGET_PAGE_BITS;
942     return t->data + p_ofs * TARGET_PAGE_DATA_SIZE;
943 }
944 #else
945 void page_reset_target_data(target_ulong start, target_ulong last) { }
946 #endif /* TARGET_PAGE_DATA_SIZE */
947 
948 /* The system-mode versions of these helpers are in cputlb.c.  */
949 
950 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr,
951                             MemOp mop, uintptr_t ra, MMUAccessType type)
952 {
953     int a_bits = get_alignment_bits(mop);
954     void *ret;
955 
956     /* Enforce guest required alignment.  */
957     if (unlikely(addr & ((1 << a_bits) - 1))) {
958         cpu_loop_exit_sigbus(cpu, addr, type, ra);
959     }
960 
961     ret = g2h(cpu, addr);
962     set_helper_retaddr(ra);
963     return ret;
964 }
965 
966 #include "ldst_atomicity.c.inc"
967 
968 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
969                           uintptr_t ra, MMUAccessType access_type)
970 {
971     void *haddr;
972     uint8_t ret;
973 
974     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
975     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type);
976     ret = ldub_p(haddr);
977     clear_helper_retaddr();
978     return ret;
979 }
980 
981 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
982                            uintptr_t ra, MMUAccessType access_type)
983 {
984     void *haddr;
985     uint16_t ret;
986     MemOp mop = get_memop(oi);
987 
988     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
989     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
990     ret = load_atom_2(cpu, ra, haddr, mop);
991     clear_helper_retaddr();
992 
993     if (mop & MO_BSWAP) {
994         ret = bswap16(ret);
995     }
996     return ret;
997 }
998 
999 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1000                            uintptr_t ra, MMUAccessType access_type)
1001 {
1002     void *haddr;
1003     uint32_t ret;
1004     MemOp mop = get_memop(oi);
1005 
1006     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1007     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1008     ret = load_atom_4(cpu, ra, haddr, mop);
1009     clear_helper_retaddr();
1010 
1011     if (mop & MO_BSWAP) {
1012         ret = bswap32(ret);
1013     }
1014     return ret;
1015 }
1016 
1017 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1018                            uintptr_t ra, MMUAccessType access_type)
1019 {
1020     void *haddr;
1021     uint64_t ret;
1022     MemOp mop = get_memop(oi);
1023 
1024     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1025     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1026     ret = load_atom_8(cpu, ra, haddr, mop);
1027     clear_helper_retaddr();
1028 
1029     if (mop & MO_BSWAP) {
1030         ret = bswap64(ret);
1031     }
1032     return ret;
1033 }
1034 
1035 static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr,
1036                           MemOpIdx oi, uintptr_t ra)
1037 {
1038     void *haddr;
1039     Int128 ret;
1040     MemOp mop = get_memop(oi);
1041 
1042     tcg_debug_assert((mop & MO_SIZE) == MO_128);
1043     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1044     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD);
1045     ret = load_atom_16(cpu, ra, haddr, mop);
1046     clear_helper_retaddr();
1047 
1048     if (mop & MO_BSWAP) {
1049         ret = bswap128(ret);
1050     }
1051     return ret;
1052 }
1053 
1054 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
1055                        MemOpIdx oi, uintptr_t ra)
1056 {
1057     void *haddr;
1058 
1059     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1060     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE);
1061     stb_p(haddr, val);
1062     clear_helper_retaddr();
1063 }
1064 
1065 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
1066                        MemOpIdx oi, uintptr_t ra)
1067 {
1068     void *haddr;
1069     MemOp mop = get_memop(oi);
1070 
1071     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1072     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1073 
1074     if (mop & MO_BSWAP) {
1075         val = bswap16(val);
1076     }
1077     store_atom_2(cpu, ra, haddr, mop, val);
1078     clear_helper_retaddr();
1079 }
1080 
1081 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
1082                        MemOpIdx oi, uintptr_t ra)
1083 {
1084     void *haddr;
1085     MemOp mop = get_memop(oi);
1086 
1087     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1088     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1089 
1090     if (mop & MO_BSWAP) {
1091         val = bswap32(val);
1092     }
1093     store_atom_4(cpu, ra, haddr, mop, val);
1094     clear_helper_retaddr();
1095 }
1096 
1097 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
1098                        MemOpIdx oi, uintptr_t ra)
1099 {
1100     void *haddr;
1101     MemOp mop = get_memop(oi);
1102 
1103     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1104     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1105 
1106     if (mop & MO_BSWAP) {
1107         val = bswap64(val);
1108     }
1109     store_atom_8(cpu, ra, haddr, mop, val);
1110     clear_helper_retaddr();
1111 }
1112 
1113 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
1114                         MemOpIdx oi, uintptr_t ra)
1115 {
1116     void *haddr;
1117     MemOpIdx mop = get_memop(oi);
1118 
1119     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1120     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1121 
1122     if (mop & MO_BSWAP) {
1123         val = bswap128(val);
1124     }
1125     store_atom_16(cpu, ra, haddr, mop, val);
1126     clear_helper_retaddr();
1127 }
1128 
1129 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr)
1130 {
1131     uint32_t ret;
1132 
1133     set_helper_retaddr(1);
1134     ret = ldub_p(g2h_untagged(ptr));
1135     clear_helper_retaddr();
1136     return ret;
1137 }
1138 
1139 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr ptr)
1140 {
1141     uint32_t ret;
1142 
1143     set_helper_retaddr(1);
1144     ret = lduw_p(g2h_untagged(ptr));
1145     clear_helper_retaddr();
1146     return ret;
1147 }
1148 
1149 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr ptr)
1150 {
1151     uint32_t ret;
1152 
1153     set_helper_retaddr(1);
1154     ret = ldl_p(g2h_untagged(ptr));
1155     clear_helper_retaddr();
1156     return ret;
1157 }
1158 
1159 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
1160 {
1161     uint64_t ret;
1162 
1163     set_helper_retaddr(1);
1164     ret = ldq_p(g2h_untagged(ptr));
1165     clear_helper_retaddr();
1166     return ret;
1167 }
1168 
1169 uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
1170                          MemOpIdx oi, uintptr_t ra)
1171 {
1172     void *haddr;
1173     uint8_t ret;
1174 
1175     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1176     ret = ldub_p(haddr);
1177     clear_helper_retaddr();
1178     return ret;
1179 }
1180 
1181 uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
1182                           MemOpIdx oi, uintptr_t ra)
1183 {
1184     void *haddr;
1185     uint16_t ret;
1186 
1187     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1188     ret = lduw_p(haddr);
1189     clear_helper_retaddr();
1190     if (get_memop(oi) & MO_BSWAP) {
1191         ret = bswap16(ret);
1192     }
1193     return ret;
1194 }
1195 
1196 uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
1197                           MemOpIdx oi, uintptr_t ra)
1198 {
1199     void *haddr;
1200     uint32_t ret;
1201 
1202     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1203     ret = ldl_p(haddr);
1204     clear_helper_retaddr();
1205     if (get_memop(oi) & MO_BSWAP) {
1206         ret = bswap32(ret);
1207     }
1208     return ret;
1209 }
1210 
1211 uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
1212                           MemOpIdx oi, uintptr_t ra)
1213 {
1214     void *haddr;
1215     uint64_t ret;
1216 
1217     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
1218     ret = ldq_p(haddr);
1219     clear_helper_retaddr();
1220     if (get_memop(oi) & MO_BSWAP) {
1221         ret = bswap64(ret);
1222     }
1223     return ret;
1224 }
1225 
1226 #include "ldst_common.c.inc"
1227 
1228 /*
1229  * Do not allow unaligned operations to proceed.  Return the host address.
1230  */
1231 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1232                                int size, uintptr_t retaddr)
1233 {
1234     MemOp mop = get_memop(oi);
1235     int a_bits = get_alignment_bits(mop);
1236     void *ret;
1237 
1238     /* Enforce guest required alignment.  */
1239     if (unlikely(addr & ((1 << a_bits) - 1))) {
1240         cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr);
1241     }
1242 
1243     /* Enforce qemu required alignment.  */
1244     if (unlikely(addr & (size - 1))) {
1245         cpu_loop_exit_atomic(cpu, retaddr);
1246     }
1247 
1248     ret = g2h(cpu, addr);
1249     set_helper_retaddr(retaddr);
1250     return ret;
1251 }
1252 
1253 #include "atomic_common.c.inc"
1254 
1255 /*
1256  * First set of functions passes in OI and RETADDR.
1257  * This makes them callable from other helpers.
1258  */
1259 
1260 #define ATOMIC_NAME(X) \
1261     glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
1262 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
1263 
1264 #define DATA_SIZE 1
1265 #include "atomic_template.h"
1266 
1267 #define DATA_SIZE 2
1268 #include "atomic_template.h"
1269 
1270 #define DATA_SIZE 4
1271 #include "atomic_template.h"
1272 
1273 #ifdef CONFIG_ATOMIC64
1274 #define DATA_SIZE 8
1275 #include "atomic_template.h"
1276 #endif
1277 
1278 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
1279 #define DATA_SIZE 16
1280 #include "atomic_template.h"
1281 #endif
1282