xref: /openbmc/qemu/accel/tcg/user-exec.c (revision 74781c0888e819552538593c0932d98ea16c766b)
1 /*
2  *  User emulator execution
3  *
4  *  Copyright (c) 2003-2005 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "hw/core/tcg-cpu-ops.h"
21 #include "disas/disas.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg.h"
24 #include "qemu/bitops.h"
25 #include "qemu/rcu.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translate-all.h"
28 #include "exec/page-protection.h"
29 #include "exec/helper-proto.h"
30 #include "qemu/atomic128.h"
31 #include "trace/trace-root.h"
32 #include "tcg/tcg-ldst.h"
33 #include "internal-common.h"
34 #include "internal-target.h"
35 #include "user-retaddr.h"
36 
37 __thread uintptr_t helper_retaddr;
38 
39 //#define DEBUG_SIGNAL
40 
41 /*
42  * Adjust the pc to pass to cpu_restore_state; return the memop type.
43  */
44 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
45 {
46     switch (helper_retaddr) {
47     default:
48         /*
49          * Fault during host memory operation within a helper function.
50          * The helper's host return address, saved here, gives us a
51          * pointer into the generated code that will unwind to the
52          * correct guest pc.
53          */
54         *pc = helper_retaddr;
55         break;
56 
57     case 0:
58         /*
59          * Fault during host memory operation within generated code.
60          * (Or, a unrelated bug within qemu, but we can't tell from here).
61          *
62          * We take the host pc from the signal frame.  However, we cannot
63          * use that value directly.  Within cpu_restore_state_from_tb, we
64          * assume PC comes from GETPC(), as used by the helper functions,
65          * so we adjust the address by -GETPC_ADJ to form an address that
66          * is within the call insn, so that the address does not accidentally
67          * match the beginning of the next guest insn.  However, when the
68          * pc comes from the signal frame it points to the actual faulting
69          * host memory insn and not the return from a call insn.
70          *
71          * Therefore, adjust to compensate for what will be done later
72          * by cpu_restore_state_from_tb.
73          */
74         *pc += GETPC_ADJ;
75         break;
76 
77     case 1:
78         /*
79          * Fault during host read for translation, or loosely, "execution".
80          *
81          * The guest pc is already pointing to the start of the TB for which
82          * code is being generated.  If the guest translator manages the
83          * page crossings correctly, this is exactly the correct address
84          * (and if the translator doesn't handle page boundaries correctly
85          * there's little we can do about that here).  Therefore, do not
86          * trigger the unwinder.
87          */
88         *pc = 0;
89         return MMU_INST_FETCH;
90     }
91 
92     return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
93 }
94 
95 /**
96  * handle_sigsegv_accerr_write:
97  * @cpu: the cpu context
98  * @old_set: the sigset_t from the signal ucontext_t
99  * @host_pc: the host pc, adjusted for the signal
100  * @guest_addr: the guest address of the fault
101  *
102  * Return true if the write fault has been handled, and should be re-tried.
103  *
104  * Note that it is important that we don't call page_unprotect() unless
105  * this is really a "write to nonwritable page" fault, because
106  * page_unprotect() assumes that if it is called for an access to
107  * a page that's writable this means we had two threads racing and
108  * another thread got there first and already made the page writable;
109  * so we will retry the access. If we were to call page_unprotect()
110  * for some other kind of fault that should really be passed to the
111  * guest, we'd end up in an infinite loop of retrying the faulting access.
112  */
113 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
114                                  uintptr_t host_pc, abi_ptr guest_addr)
115 {
116     switch (page_unprotect(guest_addr, host_pc)) {
117     case 0:
118         /*
119          * Fault not caused by a page marked unwritable to protect
120          * cached translations, must be the guest binary's problem.
121          */
122         return false;
123     case 1:
124         /*
125          * Fault caused by protection of cached translation; TBs
126          * invalidated, so resume execution.
127          */
128         return true;
129     case 2:
130         /*
131          * Fault caused by protection of cached translation, and the
132          * currently executing TB was modified and must be exited immediately.
133          */
134         sigprocmask(SIG_SETMASK, old_set, NULL);
135         cpu_loop_exit_noexc(cpu);
136         /* NORETURN */
137     default:
138         g_assert_not_reached();
139     }
140 }
141 
142 typedef struct PageFlagsNode {
143     struct rcu_head rcu;
144     IntervalTreeNode itree;
145     int flags;
146 } PageFlagsNode;
147 
148 static IntervalTreeRoot pageflags_root;
149 
150 static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
151 {
152     IntervalTreeNode *n;
153 
154     n = interval_tree_iter_first(&pageflags_root, start, last);
155     return n ? container_of(n, PageFlagsNode, itree) : NULL;
156 }
157 
158 static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start,
159                                      target_ulong last)
160 {
161     IntervalTreeNode *n;
162 
163     n = interval_tree_iter_next(&p->itree, start, last);
164     return n ? container_of(n, PageFlagsNode, itree) : NULL;
165 }
166 
167 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
168 {
169     IntervalTreeNode *n;
170     int rc = 0;
171 
172     mmap_lock();
173     for (n = interval_tree_iter_first(&pageflags_root, 0, -1);
174          n != NULL;
175          n = interval_tree_iter_next(n, 0, -1)) {
176         PageFlagsNode *p = container_of(n, PageFlagsNode, itree);
177 
178         rc = fn(priv, n->start, n->last + 1, p->flags);
179         if (rc != 0) {
180             break;
181         }
182     }
183     mmap_unlock();
184 
185     return rc;
186 }
187 
188 static int dump_region(void *priv, target_ulong start,
189                        target_ulong end, unsigned long prot)
190 {
191     FILE *f = (FILE *)priv;
192 
193     fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx" "TARGET_FMT_lx" %c%c%c\n",
194             start, end, end - start,
195             ((prot & PAGE_READ) ? 'r' : '-'),
196             ((prot & PAGE_WRITE) ? 'w' : '-'),
197             ((prot & PAGE_EXEC) ? 'x' : '-'));
198     return 0;
199 }
200 
201 /* dump memory mappings */
202 void page_dump(FILE *f)
203 {
204     const int length = sizeof(target_ulong) * 2;
205 
206     fprintf(f, "%-*s %-*s %-*s %s\n",
207             length, "start", length, "end", length, "size", "prot");
208     walk_memory_regions(f, dump_region);
209 }
210 
211 int page_get_flags(target_ulong address)
212 {
213     PageFlagsNode *p = pageflags_find(address, address);
214 
215     /*
216      * See util/interval-tree.c re lockless lookups: no false positives but
217      * there are false negatives.  If we find nothing, retry with the mmap
218      * lock acquired.
219      */
220     if (p) {
221         return p->flags;
222     }
223     if (have_mmap_lock()) {
224         return 0;
225     }
226 
227     mmap_lock();
228     p = pageflags_find(address, address);
229     mmap_unlock();
230     return p ? p->flags : 0;
231 }
232 
233 /* A subroutine of page_set_flags: insert a new node for [start,last]. */
234 static void pageflags_create(target_ulong start, target_ulong last, int flags)
235 {
236     PageFlagsNode *p = g_new(PageFlagsNode, 1);
237 
238     p->itree.start = start;
239     p->itree.last = last;
240     p->flags = flags;
241     interval_tree_insert(&p->itree, &pageflags_root);
242 }
243 
244 /* A subroutine of page_set_flags: remove everything in [start,last]. */
245 static bool pageflags_unset(target_ulong start, target_ulong last)
246 {
247     bool inval_tb = false;
248 
249     while (true) {
250         PageFlagsNode *p = pageflags_find(start, last);
251         target_ulong p_last;
252 
253         if (!p) {
254             break;
255         }
256 
257         if (p->flags & PAGE_EXEC) {
258             inval_tb = true;
259         }
260 
261         interval_tree_remove(&p->itree, &pageflags_root);
262         p_last = p->itree.last;
263 
264         if (p->itree.start < start) {
265             /* Truncate the node from the end, or split out the middle. */
266             p->itree.last = start - 1;
267             interval_tree_insert(&p->itree, &pageflags_root);
268             if (last < p_last) {
269                 pageflags_create(last + 1, p_last, p->flags);
270                 break;
271             }
272         } else if (p_last <= last) {
273             /* Range completely covers node -- remove it. */
274             g_free_rcu(p, rcu);
275         } else {
276             /* Truncate the node from the start. */
277             p->itree.start = last + 1;
278             interval_tree_insert(&p->itree, &pageflags_root);
279             break;
280         }
281     }
282 
283     return inval_tb;
284 }
285 
286 /*
287  * A subroutine of page_set_flags: nothing overlaps [start,last],
288  * but check adjacent mappings and maybe merge into a single range.
289  */
290 static void pageflags_create_merge(target_ulong start, target_ulong last,
291                                    int flags)
292 {
293     PageFlagsNode *next = NULL, *prev = NULL;
294 
295     if (start > 0) {
296         prev = pageflags_find(start - 1, start - 1);
297         if (prev) {
298             if (prev->flags == flags) {
299                 interval_tree_remove(&prev->itree, &pageflags_root);
300             } else {
301                 prev = NULL;
302             }
303         }
304     }
305     if (last + 1 != 0) {
306         next = pageflags_find(last + 1, last + 1);
307         if (next) {
308             if (next->flags == flags) {
309                 interval_tree_remove(&next->itree, &pageflags_root);
310             } else {
311                 next = NULL;
312             }
313         }
314     }
315 
316     if (prev) {
317         if (next) {
318             prev->itree.last = next->itree.last;
319             g_free_rcu(next, rcu);
320         } else {
321             prev->itree.last = last;
322         }
323         interval_tree_insert(&prev->itree, &pageflags_root);
324     } else if (next) {
325         next->itree.start = start;
326         interval_tree_insert(&next->itree, &pageflags_root);
327     } else {
328         pageflags_create(start, last, flags);
329     }
330 }
331 
332 /*
333  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
334  * By default, they are not kept.
335  */
336 #ifndef PAGE_TARGET_STICKY
337 #define PAGE_TARGET_STICKY  0
338 #endif
339 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
340 
341 /* A subroutine of page_set_flags: add flags to [start,last]. */
342 static bool pageflags_set_clear(target_ulong start, target_ulong last,
343                                 int set_flags, int clear_flags)
344 {
345     PageFlagsNode *p;
346     target_ulong p_start, p_last;
347     int p_flags, merge_flags;
348     bool inval_tb = false;
349 
350  restart:
351     p = pageflags_find(start, last);
352     if (!p) {
353         if (set_flags) {
354             pageflags_create_merge(start, last, set_flags);
355         }
356         goto done;
357     }
358 
359     p_start = p->itree.start;
360     p_last = p->itree.last;
361     p_flags = p->flags;
362     /* Using mprotect on a page does not change sticky bits. */
363     merge_flags = (p_flags & ~clear_flags) | set_flags;
364 
365     /*
366      * Need to flush if an overlapping executable region
367      * removes exec, or adds write.
368      */
369     if ((p_flags & PAGE_EXEC)
370         && (!(merge_flags & PAGE_EXEC)
371             || (merge_flags & ~p_flags & PAGE_WRITE))) {
372         inval_tb = true;
373     }
374 
375     /*
376      * If there is an exact range match, update and return without
377      * attempting to merge with adjacent regions.
378      */
379     if (start == p_start && last == p_last) {
380         if (merge_flags) {
381             p->flags = merge_flags;
382         } else {
383             interval_tree_remove(&p->itree, &pageflags_root);
384             g_free_rcu(p, rcu);
385         }
386         goto done;
387     }
388 
389     /*
390      * If sticky bits affect the original mapping, then we must be more
391      * careful about the existing intervals and the separate flags.
392      */
393     if (set_flags != merge_flags) {
394         if (p_start < start) {
395             interval_tree_remove(&p->itree, &pageflags_root);
396             p->itree.last = start - 1;
397             interval_tree_insert(&p->itree, &pageflags_root);
398 
399             if (last < p_last) {
400                 if (merge_flags) {
401                     pageflags_create(start, last, merge_flags);
402                 }
403                 pageflags_create(last + 1, p_last, p_flags);
404             } else {
405                 if (merge_flags) {
406                     pageflags_create(start, p_last, merge_flags);
407                 }
408                 if (p_last < last) {
409                     start = p_last + 1;
410                     goto restart;
411                 }
412             }
413         } else {
414             if (start < p_start && set_flags) {
415                 pageflags_create(start, p_start - 1, set_flags);
416             }
417             if (last < p_last) {
418                 interval_tree_remove(&p->itree, &pageflags_root);
419                 p->itree.start = last + 1;
420                 interval_tree_insert(&p->itree, &pageflags_root);
421                 if (merge_flags) {
422                     pageflags_create(start, last, merge_flags);
423                 }
424             } else {
425                 if (merge_flags) {
426                     p->flags = merge_flags;
427                 } else {
428                     interval_tree_remove(&p->itree, &pageflags_root);
429                     g_free_rcu(p, rcu);
430                 }
431                 if (p_last < last) {
432                     start = p_last + 1;
433                     goto restart;
434                 }
435             }
436         }
437         goto done;
438     }
439 
440     /* If flags are not changing for this range, incorporate it. */
441     if (set_flags == p_flags) {
442         if (start < p_start) {
443             interval_tree_remove(&p->itree, &pageflags_root);
444             p->itree.start = start;
445             interval_tree_insert(&p->itree, &pageflags_root);
446         }
447         if (p_last < last) {
448             start = p_last + 1;
449             goto restart;
450         }
451         goto done;
452     }
453 
454     /* Maybe split out head and/or tail ranges with the original flags. */
455     interval_tree_remove(&p->itree, &pageflags_root);
456     if (p_start < start) {
457         p->itree.last = start - 1;
458         interval_tree_insert(&p->itree, &pageflags_root);
459 
460         if (p_last < last) {
461             goto restart;
462         }
463         if (last < p_last) {
464             pageflags_create(last + 1, p_last, p_flags);
465         }
466     } else if (last < p_last) {
467         p->itree.start = last + 1;
468         interval_tree_insert(&p->itree, &pageflags_root);
469     } else {
470         g_free_rcu(p, rcu);
471         goto restart;
472     }
473     if (set_flags) {
474         pageflags_create(start, last, set_flags);
475     }
476 
477  done:
478     return inval_tb;
479 }
480 
481 /*
482  * Modify the flags of a page and invalidate the code if necessary.
483  * The flag PAGE_WRITE_ORG is positioned automatically depending
484  * on PAGE_WRITE.  The mmap_lock should already be held.
485  */
486 void page_set_flags(target_ulong start, target_ulong last, int flags)
487 {
488     bool reset = false;
489     bool inval_tb = false;
490 
491     /* This function should never be called with addresses outside the
492        guest address space.  If this assert fires, it probably indicates
493        a missing call to h2g_valid.  */
494     assert(start <= last);
495     assert(last <= GUEST_ADDR_MAX);
496     /* Only set PAGE_ANON with new mappings. */
497     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
498     assert_memory_lock();
499 
500     start &= TARGET_PAGE_MASK;
501     last |= ~TARGET_PAGE_MASK;
502 
503     if (!(flags & PAGE_VALID)) {
504         flags = 0;
505     } else {
506         reset = flags & PAGE_RESET;
507         flags &= ~PAGE_RESET;
508         if (flags & PAGE_WRITE) {
509             flags |= PAGE_WRITE_ORG;
510         }
511     }
512 
513     if (!flags || reset) {
514         page_reset_target_data(start, last);
515         inval_tb |= pageflags_unset(start, last);
516     }
517     if (flags) {
518         inval_tb |= pageflags_set_clear(start, last, flags,
519                                         ~(reset ? 0 : PAGE_STICKY));
520     }
521     if (inval_tb) {
522         tb_invalidate_phys_range(start, last);
523     }
524 }
525 
526 bool page_check_range(target_ulong start, target_ulong len, int flags)
527 {
528     target_ulong last;
529     int locked;  /* tri-state: =0: unlocked, +1: global, -1: local */
530     bool ret;
531 
532     if (len == 0) {
533         return true;  /* trivial length */
534     }
535 
536     last = start + len - 1;
537     if (last < start) {
538         return false; /* wrap around */
539     }
540 
541     locked = have_mmap_lock();
542     while (true) {
543         PageFlagsNode *p = pageflags_find(start, last);
544         int missing;
545 
546         if (!p) {
547             if (!locked) {
548                 /*
549                  * Lockless lookups have false negatives.
550                  * Retry with the lock held.
551                  */
552                 mmap_lock();
553                 locked = -1;
554                 p = pageflags_find(start, last);
555             }
556             if (!p) {
557                 ret = false; /* entire region invalid */
558                 break;
559             }
560         }
561         if (start < p->itree.start) {
562             ret = false; /* initial bytes invalid */
563             break;
564         }
565 
566         missing = flags & ~p->flags;
567         if (missing & ~PAGE_WRITE) {
568             ret = false; /* page doesn't match */
569             break;
570         }
571         if (missing & PAGE_WRITE) {
572             if (!(p->flags & PAGE_WRITE_ORG)) {
573                 ret = false; /* page not writable */
574                 break;
575             }
576             /* Asking about writable, but has been protected: undo. */
577             if (!page_unprotect(start, 0)) {
578                 ret = false;
579                 break;
580             }
581             /* TODO: page_unprotect should take a range, not a single page. */
582             if (last - start < TARGET_PAGE_SIZE) {
583                 ret = true; /* ok */
584                 break;
585             }
586             start += TARGET_PAGE_SIZE;
587             continue;
588         }
589 
590         if (last <= p->itree.last) {
591             ret = true; /* ok */
592             break;
593         }
594         start = p->itree.last + 1;
595     }
596 
597     /* Release the lock if acquired locally. */
598     if (locked < 0) {
599         mmap_unlock();
600     }
601     return ret;
602 }
603 
604 bool page_check_range_empty(target_ulong start, target_ulong last)
605 {
606     assert(last >= start);
607     assert_memory_lock();
608     return pageflags_find(start, last) == NULL;
609 }
610 
611 target_ulong page_find_range_empty(target_ulong min, target_ulong max,
612                                    target_ulong len, target_ulong align)
613 {
614     target_ulong len_m1, align_m1;
615 
616     assert(min <= max);
617     assert(max <= GUEST_ADDR_MAX);
618     assert(len != 0);
619     assert(is_power_of_2(align));
620     assert_memory_lock();
621 
622     len_m1 = len - 1;
623     align_m1 = align - 1;
624 
625     /* Iteratively narrow the search region. */
626     while (1) {
627         PageFlagsNode *p;
628 
629         /* Align min and double-check there's enough space remaining. */
630         min = (min + align_m1) & ~align_m1;
631         if (min > max) {
632             return -1;
633         }
634         if (len_m1 > max - min) {
635             return -1;
636         }
637 
638         p = pageflags_find(min, min + len_m1);
639         if (p == NULL) {
640             /* Found! */
641             return min;
642         }
643         if (max <= p->itree.last) {
644             /* Existing allocation fills the remainder of the search region. */
645             return -1;
646         }
647         /* Skip across existing allocation. */
648         min = p->itree.last + 1;
649     }
650 }
651 
652 void page_protect(tb_page_addr_t address)
653 {
654     PageFlagsNode *p;
655     target_ulong start, last;
656     int host_page_size = qemu_real_host_page_size();
657     int prot;
658 
659     assert_memory_lock();
660 
661     if (host_page_size <= TARGET_PAGE_SIZE) {
662         start = address & TARGET_PAGE_MASK;
663         last = start + TARGET_PAGE_SIZE - 1;
664     } else {
665         start = address & -host_page_size;
666         last = start + host_page_size - 1;
667     }
668 
669     p = pageflags_find(start, last);
670     if (!p) {
671         return;
672     }
673     prot = p->flags;
674 
675     if (unlikely(p->itree.last < last)) {
676         /* More than one protection region covers the one host page. */
677         assert(TARGET_PAGE_SIZE < host_page_size);
678         while ((p = pageflags_next(p, start, last)) != NULL) {
679             prot |= p->flags;
680         }
681     }
682 
683     if (prot & PAGE_WRITE) {
684         pageflags_set_clear(start, last, 0, PAGE_WRITE);
685         mprotect(g2h_untagged(start), last - start + 1,
686                  prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE);
687     }
688 }
689 
690 /*
691  * Called from signal handler: invalidate the code and unprotect the
692  * page. Return 0 if the fault was not handled, 1 if it was handled,
693  * and 2 if it was handled but the caller must cause the TB to be
694  * immediately exited. (We can only return 2 if the 'pc' argument is
695  * non-zero.)
696  */
697 int page_unprotect(target_ulong address, uintptr_t pc)
698 {
699     PageFlagsNode *p;
700     bool current_tb_invalidated;
701 
702     /*
703      * Technically this isn't safe inside a signal handler.  However we
704      * know this only ever happens in a synchronous SEGV handler, so in
705      * practice it seems to be ok.
706      */
707     mmap_lock();
708 
709     p = pageflags_find(address, address);
710 
711     /* If this address was not really writable, nothing to do. */
712     if (!p || !(p->flags & PAGE_WRITE_ORG)) {
713         mmap_unlock();
714         return 0;
715     }
716 
717     current_tb_invalidated = false;
718     if (p->flags & PAGE_WRITE) {
719         /*
720          * If the page is actually marked WRITE then assume this is because
721          * this thread raced with another one which got here first and
722          * set the page to PAGE_WRITE and did the TB invalidate for us.
723          */
724 #ifdef TARGET_HAS_PRECISE_SMC
725         TranslationBlock *current_tb = tcg_tb_lookup(pc);
726         if (current_tb) {
727             current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
728         }
729 #endif
730     } else {
731         int host_page_size = qemu_real_host_page_size();
732         target_ulong start, len, i;
733         int prot;
734 
735         if (host_page_size <= TARGET_PAGE_SIZE) {
736             start = address & TARGET_PAGE_MASK;
737             len = TARGET_PAGE_SIZE;
738             prot = p->flags | PAGE_WRITE;
739             pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
740             current_tb_invalidated = tb_invalidate_phys_page_unwind(start, pc);
741         } else {
742             start = address & -host_page_size;
743             len = host_page_size;
744             prot = 0;
745 
746             for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
747                 target_ulong addr = start + i;
748 
749                 p = pageflags_find(addr, addr);
750                 if (p) {
751                     prot |= p->flags;
752                     if (p->flags & PAGE_WRITE_ORG) {
753                         prot |= PAGE_WRITE;
754                         pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1,
755                                             PAGE_WRITE, 0);
756                     }
757                 }
758                 /*
759                  * Since the content will be modified, we must invalidate
760                  * the corresponding translated code.
761                  */
762                 current_tb_invalidated |=
763                     tb_invalidate_phys_page_unwind(addr, pc);
764             }
765         }
766         if (prot & PAGE_EXEC) {
767             prot = (prot & ~PAGE_EXEC) | PAGE_READ;
768         }
769         mprotect((void *)g2h_untagged(start), len, prot & PAGE_RWX);
770     }
771     mmap_unlock();
772 
773     /* If current TB was invalidated return to main loop */
774     return current_tb_invalidated ? 2 : 1;
775 }
776 
777 static int probe_access_internal(CPUArchState *env, vaddr addr,
778                                  int fault_size, MMUAccessType access_type,
779                                  bool nonfault, uintptr_t ra)
780 {
781     int acc_flag;
782     bool maperr;
783 
784     switch (access_type) {
785     case MMU_DATA_STORE:
786         acc_flag = PAGE_WRITE_ORG;
787         break;
788     case MMU_DATA_LOAD:
789         acc_flag = PAGE_READ;
790         break;
791     case MMU_INST_FETCH:
792         acc_flag = PAGE_EXEC;
793         break;
794     default:
795         g_assert_not_reached();
796     }
797 
798     if (guest_addr_valid_untagged(addr)) {
799         int page_flags = page_get_flags(addr);
800         if (page_flags & acc_flag) {
801             if ((acc_flag == PAGE_READ || acc_flag == PAGE_WRITE)
802                 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
803                 return TLB_MMIO;
804             }
805             return 0; /* success */
806         }
807         maperr = !(page_flags & PAGE_VALID);
808     } else {
809         maperr = true;
810     }
811 
812     if (nonfault) {
813         return TLB_INVALID_MASK;
814     }
815 
816     cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
817 }
818 
819 int probe_access_flags(CPUArchState *env, vaddr addr, int size,
820                        MMUAccessType access_type, int mmu_idx,
821                        bool nonfault, void **phost, uintptr_t ra)
822 {
823     int flags;
824 
825     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
826     flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
827     *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr);
828     return flags;
829 }
830 
831 void *probe_access(CPUArchState *env, vaddr addr, int size,
832                    MMUAccessType access_type, int mmu_idx, uintptr_t ra)
833 {
834     int flags;
835 
836     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
837     flags = probe_access_internal(env, addr, size, access_type, false, ra);
838     g_assert((flags & ~TLB_MMIO) == 0);
839 
840     return size ? g2h(env_cpu(env), addr) : NULL;
841 }
842 
843 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
844                                         void **hostp)
845 {
846     int flags;
847 
848     flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
849     g_assert(flags == 0);
850 
851     if (hostp) {
852         *hostp = g2h_untagged(addr);
853     }
854     return addr;
855 }
856 
857 #ifdef TARGET_PAGE_DATA_SIZE
858 /*
859  * Allocate chunks of target data together.  For the only current user,
860  * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
861  * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
862  */
863 #define TPD_PAGES  64
864 #define TBD_MASK   (TARGET_PAGE_MASK * TPD_PAGES)
865 
866 typedef struct TargetPageDataNode {
867     struct rcu_head rcu;
868     IntervalTreeNode itree;
869     char data[] __attribute__((aligned));
870 } TargetPageDataNode;
871 
872 static IntervalTreeRoot targetdata_root;
873 
874 void page_reset_target_data(target_ulong start, target_ulong last)
875 {
876     IntervalTreeNode *n, *next;
877 
878     assert_memory_lock();
879 
880     start &= TARGET_PAGE_MASK;
881     last |= ~TARGET_PAGE_MASK;
882 
883     for (n = interval_tree_iter_first(&targetdata_root, start, last),
884          next = n ? interval_tree_iter_next(n, start, last) : NULL;
885          n != NULL;
886          n = next,
887          next = next ? interval_tree_iter_next(n, start, last) : NULL) {
888         target_ulong n_start, n_last, p_ofs, p_len;
889         TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
890 
891         if (n->start >= start && n->last <= last) {
892             interval_tree_remove(n, &targetdata_root);
893             g_free_rcu(t, rcu);
894             continue;
895         }
896 
897         if (n->start < start) {
898             n_start = start;
899             p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
900         } else {
901             n_start = n->start;
902             p_ofs = 0;
903         }
904         n_last = MIN(last, n->last);
905         p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
906 
907         memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0,
908                p_len * TARGET_PAGE_DATA_SIZE);
909     }
910 }
911 
912 void *page_get_target_data(target_ulong address)
913 {
914     IntervalTreeNode *n;
915     TargetPageDataNode *t;
916     target_ulong page, region, p_ofs;
917 
918     page = address & TARGET_PAGE_MASK;
919     region = address & TBD_MASK;
920 
921     n = interval_tree_iter_first(&targetdata_root, page, page);
922     if (!n) {
923         /*
924          * See util/interval-tree.c re lockless lookups: no false positives
925          * but there are false negatives.  If we find nothing, retry with
926          * the mmap lock acquired.  We also need the lock for the
927          * allocation + insert.
928          */
929         mmap_lock();
930         n = interval_tree_iter_first(&targetdata_root, page, page);
931         if (!n) {
932             t = g_malloc0(sizeof(TargetPageDataNode)
933                           + TPD_PAGES * TARGET_PAGE_DATA_SIZE);
934             n = &t->itree;
935             n->start = region;
936             n->last = region | ~TBD_MASK;
937             interval_tree_insert(n, &targetdata_root);
938         }
939         mmap_unlock();
940     }
941 
942     t = container_of(n, TargetPageDataNode, itree);
943     p_ofs = (page - region) >> TARGET_PAGE_BITS;
944     return t->data + p_ofs * TARGET_PAGE_DATA_SIZE;
945 }
946 #else
947 void page_reset_target_data(target_ulong start, target_ulong last) { }
948 #endif /* TARGET_PAGE_DATA_SIZE */
949 
950 /* The system-mode versions of these helpers are in cputlb.c.  */
951 
952 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr,
953                             MemOp mop, uintptr_t ra, MMUAccessType type)
954 {
955     int a_bits = get_alignment_bits(mop);
956     void *ret;
957 
958     /* Enforce guest required alignment.  */
959     if (unlikely(addr & ((1 << a_bits) - 1))) {
960         cpu_loop_exit_sigbus(cpu, addr, type, ra);
961     }
962 
963     ret = g2h(cpu, addr);
964     set_helper_retaddr(ra);
965     return ret;
966 }
967 
968 #include "ldst_atomicity.c.inc"
969 
970 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
971                           uintptr_t ra, MMUAccessType access_type)
972 {
973     void *haddr;
974     uint8_t ret;
975 
976     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
977     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type);
978     ret = ldub_p(haddr);
979     clear_helper_retaddr();
980     return ret;
981 }
982 
983 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
984                            uintptr_t ra, MMUAccessType access_type)
985 {
986     void *haddr;
987     uint16_t ret;
988     MemOp mop = get_memop(oi);
989 
990     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
991     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
992     ret = load_atom_2(cpu, ra, haddr, mop);
993     clear_helper_retaddr();
994 
995     if (mop & MO_BSWAP) {
996         ret = bswap16(ret);
997     }
998     return ret;
999 }
1000 
1001 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1002                            uintptr_t ra, MMUAccessType access_type)
1003 {
1004     void *haddr;
1005     uint32_t ret;
1006     MemOp mop = get_memop(oi);
1007 
1008     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1009     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1010     ret = load_atom_4(cpu, ra, haddr, mop);
1011     clear_helper_retaddr();
1012 
1013     if (mop & MO_BSWAP) {
1014         ret = bswap32(ret);
1015     }
1016     return ret;
1017 }
1018 
1019 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1020                            uintptr_t ra, MMUAccessType access_type)
1021 {
1022     void *haddr;
1023     uint64_t ret;
1024     MemOp mop = get_memop(oi);
1025 
1026     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1027     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1028     ret = load_atom_8(cpu, ra, haddr, mop);
1029     clear_helper_retaddr();
1030 
1031     if (mop & MO_BSWAP) {
1032         ret = bswap64(ret);
1033     }
1034     return ret;
1035 }
1036 
1037 static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr,
1038                           MemOpIdx oi, uintptr_t ra)
1039 {
1040     void *haddr;
1041     Int128 ret;
1042     MemOp mop = get_memop(oi);
1043 
1044     tcg_debug_assert((mop & MO_SIZE) == MO_128);
1045     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1046     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD);
1047     ret = load_atom_16(cpu, ra, haddr, mop);
1048     clear_helper_retaddr();
1049 
1050     if (mop & MO_BSWAP) {
1051         ret = bswap128(ret);
1052     }
1053     return ret;
1054 }
1055 
1056 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
1057                        MemOpIdx oi, uintptr_t ra)
1058 {
1059     void *haddr;
1060 
1061     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1062     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE);
1063     stb_p(haddr, val);
1064     clear_helper_retaddr();
1065 }
1066 
1067 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
1068                        MemOpIdx oi, uintptr_t ra)
1069 {
1070     void *haddr;
1071     MemOp mop = get_memop(oi);
1072 
1073     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1074     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1075 
1076     if (mop & MO_BSWAP) {
1077         val = bswap16(val);
1078     }
1079     store_atom_2(cpu, ra, haddr, mop, val);
1080     clear_helper_retaddr();
1081 }
1082 
1083 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
1084                        MemOpIdx oi, uintptr_t ra)
1085 {
1086     void *haddr;
1087     MemOp mop = get_memop(oi);
1088 
1089     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1090     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1091 
1092     if (mop & MO_BSWAP) {
1093         val = bswap32(val);
1094     }
1095     store_atom_4(cpu, ra, haddr, mop, val);
1096     clear_helper_retaddr();
1097 }
1098 
1099 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
1100                        MemOpIdx oi, uintptr_t ra)
1101 {
1102     void *haddr;
1103     MemOp mop = get_memop(oi);
1104 
1105     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1106     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1107 
1108     if (mop & MO_BSWAP) {
1109         val = bswap64(val);
1110     }
1111     store_atom_8(cpu, ra, haddr, mop, val);
1112     clear_helper_retaddr();
1113 }
1114 
1115 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
1116                         MemOpIdx oi, uintptr_t ra)
1117 {
1118     void *haddr;
1119     MemOpIdx mop = get_memop(oi);
1120 
1121     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1122     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1123 
1124     if (mop & MO_BSWAP) {
1125         val = bswap128(val);
1126     }
1127     store_atom_16(cpu, ra, haddr, mop, val);
1128     clear_helper_retaddr();
1129 }
1130 
1131 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr)
1132 {
1133     uint32_t ret;
1134 
1135     set_helper_retaddr(1);
1136     ret = ldub_p(g2h_untagged(ptr));
1137     clear_helper_retaddr();
1138     return ret;
1139 }
1140 
1141 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr ptr)
1142 {
1143     uint32_t ret;
1144 
1145     set_helper_retaddr(1);
1146     ret = lduw_p(g2h_untagged(ptr));
1147     clear_helper_retaddr();
1148     return ret;
1149 }
1150 
1151 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr ptr)
1152 {
1153     uint32_t ret;
1154 
1155     set_helper_retaddr(1);
1156     ret = ldl_p(g2h_untagged(ptr));
1157     clear_helper_retaddr();
1158     return ret;
1159 }
1160 
1161 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
1162 {
1163     uint64_t ret;
1164 
1165     set_helper_retaddr(1);
1166     ret = ldq_p(g2h_untagged(ptr));
1167     clear_helper_retaddr();
1168     return ret;
1169 }
1170 
1171 uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
1172                          MemOpIdx oi, uintptr_t ra)
1173 {
1174     void *haddr;
1175     uint8_t ret;
1176 
1177     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1178     ret = ldub_p(haddr);
1179     clear_helper_retaddr();
1180     return ret;
1181 }
1182 
1183 uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
1184                           MemOpIdx oi, uintptr_t ra)
1185 {
1186     void *haddr;
1187     uint16_t ret;
1188 
1189     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1190     ret = lduw_p(haddr);
1191     clear_helper_retaddr();
1192     if (get_memop(oi) & MO_BSWAP) {
1193         ret = bswap16(ret);
1194     }
1195     return ret;
1196 }
1197 
1198 uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
1199                           MemOpIdx oi, uintptr_t ra)
1200 {
1201     void *haddr;
1202     uint32_t ret;
1203 
1204     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1205     ret = ldl_p(haddr);
1206     clear_helper_retaddr();
1207     if (get_memop(oi) & MO_BSWAP) {
1208         ret = bswap32(ret);
1209     }
1210     return ret;
1211 }
1212 
1213 uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
1214                           MemOpIdx oi, uintptr_t ra)
1215 {
1216     void *haddr;
1217     uint64_t ret;
1218 
1219     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
1220     ret = ldq_p(haddr);
1221     clear_helper_retaddr();
1222     if (get_memop(oi) & MO_BSWAP) {
1223         ret = bswap64(ret);
1224     }
1225     return ret;
1226 }
1227 
1228 #include "ldst_common.c.inc"
1229 
1230 /*
1231  * Do not allow unaligned operations to proceed.  Return the host address.
1232  */
1233 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1234                                int size, uintptr_t retaddr)
1235 {
1236     MemOp mop = get_memop(oi);
1237     int a_bits = get_alignment_bits(mop);
1238     void *ret;
1239 
1240     /* Enforce guest required alignment.  */
1241     if (unlikely(addr & ((1 << a_bits) - 1))) {
1242         cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr);
1243     }
1244 
1245     /* Enforce qemu required alignment.  */
1246     if (unlikely(addr & (size - 1))) {
1247         cpu_loop_exit_atomic(cpu, retaddr);
1248     }
1249 
1250     ret = g2h(cpu, addr);
1251     set_helper_retaddr(retaddr);
1252     return ret;
1253 }
1254 
1255 #include "atomic_common.c.inc"
1256 
1257 /*
1258  * First set of functions passes in OI and RETADDR.
1259  * This makes them callable from other helpers.
1260  */
1261 
1262 #define ATOMIC_NAME(X) \
1263     glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
1264 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
1265 
1266 #define DATA_SIZE 1
1267 #include "atomic_template.h"
1268 
1269 #define DATA_SIZE 2
1270 #include "atomic_template.h"
1271 
1272 #define DATA_SIZE 4
1273 #include "atomic_template.h"
1274 
1275 #ifdef CONFIG_ATOMIC64
1276 #define DATA_SIZE 8
1277 #include "atomic_template.h"
1278 #endif
1279 
1280 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
1281 #define DATA_SIZE 16
1282 #include "atomic_template.h"
1283 #endif
1284