xref: /openbmc/qemu/accel/tcg/user-exec.c (revision 1bbbe7cf2df11a1bc334489a3b87ee23e13c3c29)
1 /*
2  *  User emulator execution
3  *
4  *  Copyright (c) 2003-2005 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "accel/tcg/cpu-ops.h"
21 #include "disas/disas.h"
22 #include "exec/vaddr.h"
23 #include "exec/tlb-flags.h"
24 #include "tcg/tcg.h"
25 #include "qemu/bitops.h"
26 #include "qemu/rcu.h"
27 #include "accel/tcg/cpu-ldst-common.h"
28 #include "accel/tcg/helper-retaddr.h"
29 #include "accel/tcg/probe.h"
30 #include "user/cpu_loop.h"
31 #include "user/guest-host.h"
32 #include "qemu/main-loop.h"
33 #include "user/page-protection.h"
34 #include "exec/page-protection.h"
35 #include "exec/helper-proto-common.h"
36 #include "qemu/atomic128.h"
37 #include "qemu/bswap.h"
38 #include "qemu/int128.h"
39 #include "trace.h"
40 #include "tcg/tcg-ldst.h"
41 #include "backend-ldst.h"
42 #include "internal-common.h"
43 #include "tb-internal.h"
44 
45 __thread uintptr_t helper_retaddr;
46 
47 //#define DEBUG_SIGNAL
48 
49 void cpu_interrupt(CPUState *cpu, int mask)
50 {
51     g_assert(bql_locked());
52     cpu->interrupt_request |= mask;
53     qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
54 }
55 
56 /*
57  * Adjust the pc to pass to cpu_restore_state; return the memop type.
58  */
59 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
60 {
61     switch (helper_retaddr) {
62     default:
63         /*
64          * Fault during host memory operation within a helper function.
65          * The helper's host return address, saved here, gives us a
66          * pointer into the generated code that will unwind to the
67          * correct guest pc.
68          */
69         *pc = helper_retaddr;
70         break;
71 
72     case 0:
73         /*
74          * Fault during host memory operation within generated code.
75          * (Or, a unrelated bug within qemu, but we can't tell from here).
76          *
77          * We take the host pc from the signal frame.  However, we cannot
78          * use that value directly.  Within cpu_restore_state_from_tb, we
79          * assume PC comes from GETPC(), as used by the helper functions,
80          * so we adjust the address by -GETPC_ADJ to form an address that
81          * is within the call insn, so that the address does not accidentally
82          * match the beginning of the next guest insn.  However, when the
83          * pc comes from the signal frame it points to the actual faulting
84          * host memory insn and not the return from a call insn.
85          *
86          * Therefore, adjust to compensate for what will be done later
87          * by cpu_restore_state_from_tb.
88          */
89         *pc += GETPC_ADJ;
90         break;
91 
92     case 1:
93         /*
94          * Fault during host read for translation, or loosely, "execution".
95          *
96          * The guest pc is already pointing to the start of the TB for which
97          * code is being generated.  If the guest translator manages the
98          * page crossings correctly, this is exactly the correct address
99          * (and if the translator doesn't handle page boundaries correctly
100          * there's little we can do about that here).  Therefore, do not
101          * trigger the unwinder.
102          */
103         *pc = 0;
104         return MMU_INST_FETCH;
105     }
106 
107     return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
108 }
109 
110 /**
111  * handle_sigsegv_accerr_write:
112  * @cpu: the cpu context
113  * @old_set: the sigset_t from the signal ucontext_t
114  * @host_pc: the host pc, adjusted for the signal
115  * @guest_addr: the guest address of the fault
116  *
117  * Return true if the write fault has been handled, and should be re-tried.
118  *
119  * Note that it is important that we don't call page_unprotect() unless
120  * this is really a "write to nonwritable page" fault, because
121  * page_unprotect() assumes that if it is called for an access to
122  * a page that's writable this means we had two threads racing and
123  * another thread got there first and already made the page writable;
124  * so we will retry the access. If we were to call page_unprotect()
125  * for some other kind of fault that should really be passed to the
126  * guest, we'd end up in an infinite loop of retrying the faulting access.
127  */
128 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
129                                  uintptr_t host_pc, vaddr guest_addr)
130 {
131     switch (page_unprotect(cpu, guest_addr, host_pc)) {
132     case 0:
133         /*
134          * Fault not caused by a page marked unwritable to protect
135          * cached translations, must be the guest binary's problem.
136          */
137         return false;
138     case 1:
139         /*
140          * Fault caused by protection of cached translation; TBs
141          * invalidated, so resume execution.
142          */
143         return true;
144     case 2:
145         /*
146          * Fault caused by protection of cached translation, and the
147          * currently executing TB was modified and must be exited immediately.
148          */
149         sigprocmask(SIG_SETMASK, old_set, NULL);
150         cpu_loop_exit_noexc(cpu);
151         /* NORETURN */
152     default:
153         g_assert_not_reached();
154     }
155 }
156 
157 typedef struct PageFlagsNode {
158     struct rcu_head rcu;
159     IntervalTreeNode itree;
160     int flags;
161 } PageFlagsNode;
162 
163 static IntervalTreeRoot pageflags_root;
164 
165 static PageFlagsNode *pageflags_find(vaddr start, vaddr last)
166 {
167     IntervalTreeNode *n;
168 
169     n = interval_tree_iter_first(&pageflags_root, start, last);
170     return n ? container_of(n, PageFlagsNode, itree) : NULL;
171 }
172 
173 static PageFlagsNode *pageflags_next(PageFlagsNode *p, vaddr start, vaddr last)
174 {
175     IntervalTreeNode *n;
176 
177     n = interval_tree_iter_next(&p->itree, start, last);
178     return n ? container_of(n, PageFlagsNode, itree) : NULL;
179 }
180 
181 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
182 {
183     IntervalTreeNode *n;
184     int rc = 0;
185 
186     mmap_lock();
187     for (n = interval_tree_iter_first(&pageflags_root, 0, -1);
188          n != NULL;
189          n = interval_tree_iter_next(n, 0, -1)) {
190         PageFlagsNode *p = container_of(n, PageFlagsNode, itree);
191 
192         rc = fn(priv, n->start, n->last + 1, p->flags);
193         if (rc != 0) {
194             break;
195         }
196     }
197     mmap_unlock();
198 
199     return rc;
200 }
201 
202 static int dump_region(void *opaque, vaddr start, vaddr end, int prot)
203 {
204     FILE *f = opaque;
205     uint64_t mask;
206     int width;
207 
208     if (guest_addr_max <= UINT32_MAX) {
209         mask = UINT32_MAX, width = 8;
210     } else {
211         mask = UINT64_MAX, width = 16;
212     }
213 
214     fprintf(f, "%0*" PRIx64 "-%0*" PRIx64 " %0*" PRIx64 " %c%c%c\n",
215             width, start & mask,
216             width, end & mask,
217             width, (end - start) & mask,
218             ((prot & PAGE_READ) ? 'r' : '-'),
219             ((prot & PAGE_WRITE) ? 'w' : '-'),
220             ((prot & PAGE_EXEC) ? 'x' : '-'));
221     return 0;
222 }
223 
224 /* dump memory mappings */
225 void page_dump(FILE *f)
226 {
227     int width = guest_addr_max <= UINT32_MAX ? 8 : 16;
228 
229     fprintf(f, "%-*s %-*s %-*s %s\n",
230             width, "start", width, "end", width, "size", "prot");
231     walk_memory_regions(f, dump_region);
232 }
233 
234 int page_get_flags(vaddr address)
235 {
236     PageFlagsNode *p = pageflags_find(address, address);
237 
238     /*
239      * See util/interval-tree.c re lockless lookups: no false positives but
240      * there are false negatives.  If we find nothing, retry with the mmap
241      * lock acquired.
242      */
243     if (p) {
244         return p->flags;
245     }
246     if (have_mmap_lock()) {
247         return 0;
248     }
249 
250     mmap_lock();
251     p = pageflags_find(address, address);
252     mmap_unlock();
253     return p ? p->flags : 0;
254 }
255 
256 /* A subroutine of page_set_flags: insert a new node for [start,last]. */
257 static void pageflags_create(vaddr start, vaddr last, int flags)
258 {
259     PageFlagsNode *p = g_new(PageFlagsNode, 1);
260 
261     p->itree.start = start;
262     p->itree.last = last;
263     p->flags = flags;
264     interval_tree_insert(&p->itree, &pageflags_root);
265 }
266 
267 /* A subroutine of page_set_flags: remove everything in [start,last]. */
268 static bool pageflags_unset(vaddr start, vaddr last)
269 {
270     bool inval_tb = false;
271 
272     while (true) {
273         PageFlagsNode *p = pageflags_find(start, last);
274         vaddr p_last;
275 
276         if (!p) {
277             break;
278         }
279 
280         if (p->flags & PAGE_EXEC) {
281             inval_tb = true;
282         }
283 
284         interval_tree_remove(&p->itree, &pageflags_root);
285         p_last = p->itree.last;
286 
287         if (p->itree.start < start) {
288             /* Truncate the node from the end, or split out the middle. */
289             p->itree.last = start - 1;
290             interval_tree_insert(&p->itree, &pageflags_root);
291             if (last < p_last) {
292                 pageflags_create(last + 1, p_last, p->flags);
293                 break;
294             }
295         } else if (p_last <= last) {
296             /* Range completely covers node -- remove it. */
297             g_free_rcu(p, rcu);
298         } else {
299             /* Truncate the node from the start. */
300             p->itree.start = last + 1;
301             interval_tree_insert(&p->itree, &pageflags_root);
302             break;
303         }
304     }
305 
306     return inval_tb;
307 }
308 
309 /*
310  * A subroutine of page_set_flags: nothing overlaps [start,last],
311  * but check adjacent mappings and maybe merge into a single range.
312  */
313 static void pageflags_create_merge(vaddr start, vaddr last, int flags)
314 {
315     PageFlagsNode *next = NULL, *prev = NULL;
316 
317     if (start > 0) {
318         prev = pageflags_find(start - 1, start - 1);
319         if (prev) {
320             if (prev->flags == flags) {
321                 interval_tree_remove(&prev->itree, &pageflags_root);
322             } else {
323                 prev = NULL;
324             }
325         }
326     }
327     if (last + 1 != 0) {
328         next = pageflags_find(last + 1, last + 1);
329         if (next) {
330             if (next->flags == flags) {
331                 interval_tree_remove(&next->itree, &pageflags_root);
332             } else {
333                 next = NULL;
334             }
335         }
336     }
337 
338     if (prev) {
339         if (next) {
340             prev->itree.last = next->itree.last;
341             g_free_rcu(next, rcu);
342         } else {
343             prev->itree.last = last;
344         }
345         interval_tree_insert(&prev->itree, &pageflags_root);
346     } else if (next) {
347         next->itree.start = start;
348         interval_tree_insert(&next->itree, &pageflags_root);
349     } else {
350         pageflags_create(start, last, flags);
351     }
352 }
353 
354 /*
355  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
356  * By default, they are not kept.
357  */
358 #ifndef PAGE_TARGET_STICKY
359 #define PAGE_TARGET_STICKY  0
360 #endif
361 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
362 
363 /* A subroutine of page_set_flags: add flags to [start,last]. */
364 static bool pageflags_set_clear(vaddr start, vaddr last,
365                                 int set_flags, int clear_flags)
366 {
367     PageFlagsNode *p;
368     vaddr p_start, p_last;
369     int p_flags, merge_flags;
370     bool inval_tb = false;
371 
372  restart:
373     p = pageflags_find(start, last);
374     if (!p) {
375         if (set_flags) {
376             pageflags_create_merge(start, last, set_flags);
377         }
378         goto done;
379     }
380 
381     p_start = p->itree.start;
382     p_last = p->itree.last;
383     p_flags = p->flags;
384     /* Using mprotect on a page does not change sticky bits. */
385     merge_flags = (p_flags & ~clear_flags) | set_flags;
386 
387     /*
388      * Need to flush if an overlapping executable region
389      * removes exec, or adds write.
390      */
391     if ((p_flags & PAGE_EXEC)
392         && (!(merge_flags & PAGE_EXEC)
393             || (merge_flags & ~p_flags & PAGE_WRITE))) {
394         inval_tb = true;
395     }
396 
397     /*
398      * If there is an exact range match, update and return without
399      * attempting to merge with adjacent regions.
400      */
401     if (start == p_start && last == p_last) {
402         if (merge_flags) {
403             p->flags = merge_flags;
404         } else {
405             interval_tree_remove(&p->itree, &pageflags_root);
406             g_free_rcu(p, rcu);
407         }
408         goto done;
409     }
410 
411     /*
412      * If sticky bits affect the original mapping, then we must be more
413      * careful about the existing intervals and the separate flags.
414      */
415     if (set_flags != merge_flags) {
416         if (p_start < start) {
417             interval_tree_remove(&p->itree, &pageflags_root);
418             p->itree.last = start - 1;
419             interval_tree_insert(&p->itree, &pageflags_root);
420 
421             if (last < p_last) {
422                 if (merge_flags) {
423                     pageflags_create(start, last, merge_flags);
424                 }
425                 pageflags_create(last + 1, p_last, p_flags);
426             } else {
427                 if (merge_flags) {
428                     pageflags_create(start, p_last, merge_flags);
429                 }
430                 if (p_last < last) {
431                     start = p_last + 1;
432                     goto restart;
433                 }
434             }
435         } else {
436             if (start < p_start && set_flags) {
437                 pageflags_create(start, p_start - 1, set_flags);
438             }
439             if (last < p_last) {
440                 interval_tree_remove(&p->itree, &pageflags_root);
441                 p->itree.start = last + 1;
442                 interval_tree_insert(&p->itree, &pageflags_root);
443                 if (merge_flags) {
444                     pageflags_create(start, last, merge_flags);
445                 }
446             } else {
447                 if (merge_flags) {
448                     p->flags = merge_flags;
449                 } else {
450                     interval_tree_remove(&p->itree, &pageflags_root);
451                     g_free_rcu(p, rcu);
452                 }
453                 if (p_last < last) {
454                     start = p_last + 1;
455                     goto restart;
456                 }
457             }
458         }
459         goto done;
460     }
461 
462     /* If flags are not changing for this range, incorporate it. */
463     if (set_flags == p_flags) {
464         if (start < p_start) {
465             interval_tree_remove(&p->itree, &pageflags_root);
466             p->itree.start = start;
467             interval_tree_insert(&p->itree, &pageflags_root);
468         }
469         if (p_last < last) {
470             start = p_last + 1;
471             goto restart;
472         }
473         goto done;
474     }
475 
476     /* Maybe split out head and/or tail ranges with the original flags. */
477     interval_tree_remove(&p->itree, &pageflags_root);
478     if (p_start < start) {
479         p->itree.last = start - 1;
480         interval_tree_insert(&p->itree, &pageflags_root);
481 
482         if (p_last < last) {
483             goto restart;
484         }
485         if (last < p_last) {
486             pageflags_create(last + 1, p_last, p_flags);
487         }
488     } else if (last < p_last) {
489         p->itree.start = last + 1;
490         interval_tree_insert(&p->itree, &pageflags_root);
491     } else {
492         g_free_rcu(p, rcu);
493         goto restart;
494     }
495     if (set_flags) {
496         pageflags_create(start, last, set_flags);
497     }
498 
499  done:
500     return inval_tb;
501 }
502 
503 void page_set_flags(vaddr start, vaddr last, int flags)
504 {
505     bool reset = false;
506     bool inval_tb = false;
507 
508     /* This function should never be called with addresses outside the
509        guest address space.  If this assert fires, it probably indicates
510        a missing call to h2g_valid.  */
511     assert(start <= last);
512     assert(last <= guest_addr_max);
513     /* Only set PAGE_ANON with new mappings. */
514     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
515     assert_memory_lock();
516 
517     start &= TARGET_PAGE_MASK;
518     last |= ~TARGET_PAGE_MASK;
519 
520     if (!(flags & PAGE_VALID)) {
521         flags = 0;
522     } else {
523         reset = flags & PAGE_RESET;
524         flags &= ~PAGE_RESET;
525         if (flags & PAGE_WRITE) {
526             flags |= PAGE_WRITE_ORG;
527         }
528     }
529 
530     if (!flags || reset) {
531         page_reset_target_data(start, last);
532         inval_tb |= pageflags_unset(start, last);
533     }
534     if (flags) {
535         inval_tb |= pageflags_set_clear(start, last, flags,
536                                         ~(reset ? 0 : PAGE_STICKY));
537     }
538     if (inval_tb) {
539         tb_invalidate_phys_range(NULL, start, last);
540     }
541 }
542 
543 bool page_check_range(vaddr start, vaddr len, int flags)
544 {
545     vaddr last;
546     int locked;  /* tri-state: =0: unlocked, +1: global, -1: local */
547     bool ret;
548 
549     if (len == 0) {
550         return true;  /* trivial length */
551     }
552 
553     last = start + len - 1;
554     if (last < start) {
555         return false; /* wrap around */
556     }
557 
558     locked = have_mmap_lock();
559     while (true) {
560         PageFlagsNode *p = pageflags_find(start, last);
561         int missing;
562 
563         if (!p) {
564             if (!locked) {
565                 /*
566                  * Lockless lookups have false negatives.
567                  * Retry with the lock held.
568                  */
569                 mmap_lock();
570                 locked = -1;
571                 p = pageflags_find(start, last);
572             }
573             if (!p) {
574                 ret = false; /* entire region invalid */
575                 break;
576             }
577         }
578         if (start < p->itree.start) {
579             ret = false; /* initial bytes invalid */
580             break;
581         }
582 
583         missing = flags & ~p->flags;
584         if (missing & ~PAGE_WRITE) {
585             ret = false; /* page doesn't match */
586             break;
587         }
588         if (missing & PAGE_WRITE) {
589             if (!(p->flags & PAGE_WRITE_ORG)) {
590                 ret = false; /* page not writable */
591                 break;
592             }
593             /* Asking about writable, but has been protected: undo. */
594             if (!page_unprotect(NULL, start, 0)) {
595                 ret = false;
596                 break;
597             }
598             /* TODO: page_unprotect should take a range, not a single page. */
599             if (last - start < TARGET_PAGE_SIZE) {
600                 ret = true; /* ok */
601                 break;
602             }
603             start += TARGET_PAGE_SIZE;
604             continue;
605         }
606 
607         if (last <= p->itree.last) {
608             ret = true; /* ok */
609             break;
610         }
611         start = p->itree.last + 1;
612     }
613 
614     /* Release the lock if acquired locally. */
615     if (locked < 0) {
616         mmap_unlock();
617     }
618     return ret;
619 }
620 
621 bool page_check_range_empty(vaddr start, vaddr last)
622 {
623     assert(last >= start);
624     assert_memory_lock();
625     return pageflags_find(start, last) == NULL;
626 }
627 
628 vaddr page_find_range_empty(vaddr min, vaddr max, vaddr len, vaddr align)
629 {
630     vaddr len_m1, align_m1;
631 
632     assert(min <= max);
633     assert(max <= guest_addr_max);
634     assert(len != 0);
635     assert(is_power_of_2(align));
636     assert_memory_lock();
637 
638     len_m1 = len - 1;
639     align_m1 = align - 1;
640 
641     /* Iteratively narrow the search region. */
642     while (1) {
643         PageFlagsNode *p;
644 
645         /* Align min and double-check there's enough space remaining. */
646         min = (min + align_m1) & ~align_m1;
647         if (min > max) {
648             return -1;
649         }
650         if (len_m1 > max - min) {
651             return -1;
652         }
653 
654         p = pageflags_find(min, min + len_m1);
655         if (p == NULL) {
656             /* Found! */
657             return min;
658         }
659         if (max <= p->itree.last) {
660             /* Existing allocation fills the remainder of the search region. */
661             return -1;
662         }
663         /* Skip across existing allocation. */
664         min = p->itree.last + 1;
665     }
666 }
667 
668 void tb_lock_page0(tb_page_addr_t address)
669 {
670     PageFlagsNode *p;
671     vaddr start, last;
672     int host_page_size = qemu_real_host_page_size();
673     int prot;
674 
675     assert_memory_lock();
676 
677     if (host_page_size <= TARGET_PAGE_SIZE) {
678         start = address & TARGET_PAGE_MASK;
679         last = start + TARGET_PAGE_SIZE - 1;
680     } else {
681         start = address & -host_page_size;
682         last = start + host_page_size - 1;
683     }
684 
685     p = pageflags_find(start, last);
686     if (!p) {
687         return;
688     }
689     prot = p->flags;
690 
691     if (unlikely(p->itree.last < last)) {
692         /* More than one protection region covers the one host page. */
693         assert(TARGET_PAGE_SIZE < host_page_size);
694         while ((p = pageflags_next(p, start, last)) != NULL) {
695             prot |= p->flags;
696         }
697     }
698 
699     if (prot & PAGE_WRITE) {
700         pageflags_set_clear(start, last, 0, PAGE_WRITE);
701         mprotect(g2h_untagged(start), last - start + 1,
702                  prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE);
703     }
704 }
705 
706 /*
707  * Called from signal handler: invalidate the code and unprotect the
708  * page. Return 0 if the fault was not handled, 1 if it was handled,
709  * and 2 if it was handled but the caller must cause the TB to be
710  * immediately exited. (We can only return 2 if the 'pc' argument is
711  * non-zero.)
712  */
713 int page_unprotect(CPUState *cpu, tb_page_addr_t address, uintptr_t pc)
714 {
715     PageFlagsNode *p;
716     bool current_tb_invalidated;
717 
718     assert((cpu == NULL) == (pc == 0));
719 
720     /*
721      * Technically this isn't safe inside a signal handler.  However we
722      * know this only ever happens in a synchronous SEGV handler, so in
723      * practice it seems to be ok.
724      */
725     mmap_lock();
726 
727     p = pageflags_find(address, address);
728 
729     /* If this address was not really writable, nothing to do. */
730     if (!p || !(p->flags & PAGE_WRITE_ORG)) {
731         mmap_unlock();
732         return 0;
733     }
734 
735     current_tb_invalidated = false;
736     if (p->flags & PAGE_WRITE) {
737         /*
738          * If the page is actually marked WRITE then assume this is because
739          * this thread raced with another one which got here first and
740          * set the page to PAGE_WRITE and did the TB invalidate for us.
741          */
742         if (pc && cpu->cc->tcg_ops->precise_smc) {
743             TranslationBlock *current_tb = tcg_tb_lookup(pc);
744             if (current_tb) {
745                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
746             }
747         }
748     } else {
749         int host_page_size = qemu_real_host_page_size();
750         vaddr start, len, i;
751         int prot;
752 
753         if (host_page_size <= TARGET_PAGE_SIZE) {
754             start = address & TARGET_PAGE_MASK;
755             len = TARGET_PAGE_SIZE;
756             prot = p->flags | PAGE_WRITE;
757             pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
758             current_tb_invalidated =
759                 tb_invalidate_phys_page_unwind(cpu, start, pc);
760         } else {
761             start = address & -host_page_size;
762             len = host_page_size;
763             prot = 0;
764 
765             for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
766                 vaddr addr = start + i;
767 
768                 p = pageflags_find(addr, addr);
769                 if (p) {
770                     prot |= p->flags;
771                     if (p->flags & PAGE_WRITE_ORG) {
772                         prot |= PAGE_WRITE;
773                         pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1,
774                                             PAGE_WRITE, 0);
775                     }
776                 }
777                 /*
778                  * Since the content will be modified, we must invalidate
779                  * the corresponding translated code.
780                  */
781                 current_tb_invalidated |=
782                     tb_invalidate_phys_page_unwind(cpu, addr, pc);
783             }
784         }
785         if (prot & PAGE_EXEC) {
786             prot = (prot & ~PAGE_EXEC) | PAGE_READ;
787         }
788         mprotect((void *)g2h_untagged(start), len, prot & PAGE_RWX);
789     }
790     mmap_unlock();
791 
792     /* If current TB was invalidated return to main loop */
793     return current_tb_invalidated ? 2 : 1;
794 }
795 
796 static int probe_access_internal(CPUArchState *env, vaddr addr,
797                                  int fault_size, MMUAccessType access_type,
798                                  bool nonfault, uintptr_t ra)
799 {
800     int acc_flag;
801     bool maperr;
802 
803     switch (access_type) {
804     case MMU_DATA_STORE:
805         acc_flag = PAGE_WRITE_ORG;
806         break;
807     case MMU_DATA_LOAD:
808         acc_flag = PAGE_READ;
809         break;
810     case MMU_INST_FETCH:
811         acc_flag = PAGE_EXEC;
812         break;
813     default:
814         g_assert_not_reached();
815     }
816 
817     if (guest_addr_valid_untagged(addr)) {
818         int page_flags = page_get_flags(addr);
819         if (page_flags & acc_flag) {
820             if (access_type != MMU_INST_FETCH
821                 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
822                 return TLB_MMIO;
823             }
824             return 0; /* success */
825         }
826         maperr = !(page_flags & PAGE_VALID);
827     } else {
828         maperr = true;
829     }
830 
831     if (nonfault) {
832         return TLB_INVALID_MASK;
833     }
834 
835     cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
836 }
837 
838 int probe_access_flags(CPUArchState *env, vaddr addr, int size,
839                        MMUAccessType access_type, int mmu_idx,
840                        bool nonfault, void **phost, uintptr_t ra)
841 {
842     int flags;
843 
844     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
845     flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
846     *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr);
847     return flags;
848 }
849 
850 void *probe_access(CPUArchState *env, vaddr addr, int size,
851                    MMUAccessType access_type, int mmu_idx, uintptr_t ra)
852 {
853     int flags;
854 
855     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
856     flags = probe_access_internal(env, addr, size, access_type, false, ra);
857     g_assert((flags & ~TLB_MMIO) == 0);
858 
859     return size ? g2h(env_cpu(env), addr) : NULL;
860 }
861 
862 void *tlb_vaddr_to_host(CPUArchState *env, vaddr addr,
863                         MMUAccessType access_type, int mmu_idx)
864 {
865     return g2h(env_cpu(env), addr);
866 }
867 
868 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
869                                         void **hostp)
870 {
871     int flags;
872 
873     flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
874     g_assert(flags == 0);
875 
876     if (hostp) {
877         *hostp = g2h_untagged(addr);
878     }
879     return addr;
880 }
881 
882 /*
883  * Allocate chunks of target data together.  For the only current user,
884  * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
885  * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
886  */
887 #define TPD_PAGES  64
888 #define TBD_MASK   (TARGET_PAGE_MASK * TPD_PAGES)
889 
890 typedef struct TargetPageDataNode {
891     struct rcu_head rcu;
892     IntervalTreeNode itree;
893     char data[] __attribute__((aligned));
894 } TargetPageDataNode;
895 
896 static IntervalTreeRoot targetdata_root;
897 static size_t target_page_data_size;
898 
899 void page_reset_target_data(vaddr start, vaddr last)
900 {
901     IntervalTreeNode *n, *next;
902     size_t size = target_page_data_size;
903 
904     if (likely(size == 0)) {
905         return;
906     }
907 
908     assert_memory_lock();
909 
910     start &= TARGET_PAGE_MASK;
911     last |= ~TARGET_PAGE_MASK;
912 
913     for (n = interval_tree_iter_first(&targetdata_root, start, last),
914          next = n ? interval_tree_iter_next(n, start, last) : NULL;
915          n != NULL;
916          n = next,
917          next = next ? interval_tree_iter_next(n, start, last) : NULL) {
918         vaddr n_start, n_last, p_ofs, p_len;
919         TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
920 
921         if (n->start >= start && n->last <= last) {
922             interval_tree_remove(n, &targetdata_root);
923             g_free_rcu(t, rcu);
924             continue;
925         }
926 
927         if (n->start < start) {
928             n_start = start;
929             p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
930         } else {
931             n_start = n->start;
932             p_ofs = 0;
933         }
934         n_last = MIN(last, n->last);
935         p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
936 
937         memset(t->data + p_ofs * size, 0, p_len * size);
938     }
939 }
940 
941 void *page_get_target_data(vaddr address, size_t size)
942 {
943     IntervalTreeNode *n;
944     TargetPageDataNode *t;
945     vaddr page, region, p_ofs;
946 
947     /* Remember the size from the first call, and it should be constant. */
948     if (unlikely(target_page_data_size != size)) {
949         assert(target_page_data_size == 0);
950         target_page_data_size = size;
951     }
952 
953     page = address & TARGET_PAGE_MASK;
954     region = address & TBD_MASK;
955 
956     n = interval_tree_iter_first(&targetdata_root, page, page);
957     if (!n) {
958         /*
959          * See util/interval-tree.c re lockless lookups: no false positives
960          * but there are false negatives.  If we find nothing, retry with
961          * the mmap lock acquired.  We also need the lock for the
962          * allocation + insert.
963          */
964         mmap_lock();
965         n = interval_tree_iter_first(&targetdata_root, page, page);
966         if (!n) {
967             t = g_malloc0(sizeof(TargetPageDataNode) + TPD_PAGES * size);
968             n = &t->itree;
969             n->start = region;
970             n->last = region | ~TBD_MASK;
971             interval_tree_insert(n, &targetdata_root);
972         }
973         mmap_unlock();
974     }
975 
976     t = container_of(n, TargetPageDataNode, itree);
977     p_ofs = (page - region) >> TARGET_PAGE_BITS;
978     return t->data + p_ofs * size;
979 }
980 
981 /* The system-mode versions of these helpers are in cputlb.c.  */
982 
983 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr,
984                             MemOp mop, uintptr_t ra, MMUAccessType type)
985 {
986     int a_bits = memop_alignment_bits(mop);
987     void *ret;
988 
989     /* Enforce guest required alignment.  */
990     if (unlikely(addr & ((1 << a_bits) - 1))) {
991         cpu_loop_exit_sigbus(cpu, addr, type, ra);
992     }
993 
994     ret = g2h(cpu, addr);
995     set_helper_retaddr(ra);
996     return ret;
997 }
998 
999 /* physical memory access (slow version, mainly for debug) */
1000 int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
1001                         void *ptr, size_t len, bool is_write)
1002 {
1003     int flags;
1004     vaddr l, page;
1005     uint8_t *buf = ptr;
1006     ssize_t written;
1007     int ret = -1;
1008     int fd = -1;
1009 
1010     mmap_lock();
1011 
1012     while (len > 0) {
1013         page = addr & TARGET_PAGE_MASK;
1014         l = (page + TARGET_PAGE_SIZE) - addr;
1015         if (l > len) {
1016             l = len;
1017         }
1018         flags = page_get_flags(page);
1019         if (!(flags & PAGE_VALID)) {
1020             goto out_close;
1021         }
1022         if (is_write) {
1023             if (flags & PAGE_WRITE) {
1024                 memcpy(g2h(cpu, addr), buf, l);
1025             } else {
1026                 /* Bypass the host page protection using ptrace. */
1027                 if (fd == -1) {
1028                     fd = open("/proc/self/mem", O_WRONLY);
1029                     if (fd == -1) {
1030                         goto out;
1031                     }
1032                 }
1033                 /*
1034                  * If there is a TranslationBlock and we weren't bypassing the
1035                  * host page protection, the memcpy() above would SEGV,
1036                  * ultimately leading to page_unprotect(). So invalidate the
1037                  * translations manually. Both invalidation and pwrite() must
1038                  * be under mmap_lock() in order to prevent the creation of
1039                  * another TranslationBlock in between.
1040                  */
1041                 tb_invalidate_phys_range(NULL, addr, addr + l - 1);
1042                 written = pwrite(fd, buf, l,
1043                                  (off_t)(uintptr_t)g2h_untagged(addr));
1044                 if (written != l) {
1045                     goto out_close;
1046                 }
1047             }
1048         } else if (flags & PAGE_READ) {
1049             memcpy(buf, g2h(cpu, addr), l);
1050         } else {
1051             /* Bypass the host page protection using ptrace. */
1052             if (fd == -1) {
1053                 fd = open("/proc/self/mem", O_RDONLY);
1054                 if (fd == -1) {
1055                     goto out;
1056                 }
1057             }
1058             if (pread(fd, buf, l,
1059                       (off_t)(uintptr_t)g2h_untagged(addr)) != l) {
1060                 goto out_close;
1061             }
1062         }
1063         len -= l;
1064         buf += l;
1065         addr += l;
1066     }
1067     ret = 0;
1068 out_close:
1069     if (fd != -1) {
1070         close(fd);
1071     }
1072 out:
1073     mmap_unlock();
1074 
1075     return ret;
1076 }
1077 
1078 #include "ldst_atomicity.c.inc"
1079 
1080 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1081                           uintptr_t ra, MMUAccessType access_type)
1082 {
1083     void *haddr;
1084     uint8_t ret;
1085 
1086     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1087     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type);
1088     ret = ldub_p(haddr);
1089     clear_helper_retaddr();
1090     return ret;
1091 }
1092 
1093 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1094                            uintptr_t ra, MMUAccessType access_type)
1095 {
1096     void *haddr;
1097     uint16_t ret;
1098     MemOp mop = get_memop(oi);
1099 
1100     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1101     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1102     ret = load_atom_2(cpu, ra, haddr, mop);
1103     clear_helper_retaddr();
1104 
1105     if (mop & MO_BSWAP) {
1106         ret = bswap16(ret);
1107     }
1108     return ret;
1109 }
1110 
1111 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1112                            uintptr_t ra, MMUAccessType access_type)
1113 {
1114     void *haddr;
1115     uint32_t ret;
1116     MemOp mop = get_memop(oi);
1117 
1118     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1119     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1120     ret = load_atom_4(cpu, ra, haddr, mop);
1121     clear_helper_retaddr();
1122 
1123     if (mop & MO_BSWAP) {
1124         ret = bswap32(ret);
1125     }
1126     return ret;
1127 }
1128 
1129 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1130                            uintptr_t ra, MMUAccessType access_type)
1131 {
1132     void *haddr;
1133     uint64_t ret;
1134     MemOp mop = get_memop(oi);
1135 
1136     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1137     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1138     ret = load_atom_8(cpu, ra, haddr, mop);
1139     clear_helper_retaddr();
1140 
1141     if (mop & MO_BSWAP) {
1142         ret = bswap64(ret);
1143     }
1144     return ret;
1145 }
1146 
1147 static Int128 do_ld16_mmu(CPUState *cpu, vaddr addr,
1148                           MemOpIdx oi, uintptr_t ra)
1149 {
1150     void *haddr;
1151     Int128 ret;
1152     MemOp mop = get_memop(oi);
1153 
1154     tcg_debug_assert((mop & MO_SIZE) == MO_128);
1155     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1156     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD);
1157     ret = load_atom_16(cpu, ra, haddr, mop);
1158     clear_helper_retaddr();
1159 
1160     if (mop & MO_BSWAP) {
1161         ret = bswap128(ret);
1162     }
1163     return ret;
1164 }
1165 
1166 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
1167                        MemOpIdx oi, uintptr_t ra)
1168 {
1169     void *haddr;
1170 
1171     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1172     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE);
1173     stb_p(haddr, val);
1174     clear_helper_retaddr();
1175 }
1176 
1177 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
1178                        MemOpIdx oi, uintptr_t ra)
1179 {
1180     void *haddr;
1181     MemOp mop = get_memop(oi);
1182 
1183     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1184     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1185 
1186     if (mop & MO_BSWAP) {
1187         val = bswap16(val);
1188     }
1189     store_atom_2(cpu, ra, haddr, mop, val);
1190     clear_helper_retaddr();
1191 }
1192 
1193 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
1194                        MemOpIdx oi, uintptr_t ra)
1195 {
1196     void *haddr;
1197     MemOp mop = get_memop(oi);
1198 
1199     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1200     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1201 
1202     if (mop & MO_BSWAP) {
1203         val = bswap32(val);
1204     }
1205     store_atom_4(cpu, ra, haddr, mop, val);
1206     clear_helper_retaddr();
1207 }
1208 
1209 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
1210                        MemOpIdx oi, uintptr_t ra)
1211 {
1212     void *haddr;
1213     MemOp mop = get_memop(oi);
1214 
1215     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1216     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1217 
1218     if (mop & MO_BSWAP) {
1219         val = bswap64(val);
1220     }
1221     store_atom_8(cpu, ra, haddr, mop, val);
1222     clear_helper_retaddr();
1223 }
1224 
1225 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
1226                         MemOpIdx oi, uintptr_t ra)
1227 {
1228     void *haddr;
1229     MemOpIdx mop = get_memop(oi);
1230 
1231     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1232     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1233 
1234     if (mop & MO_BSWAP) {
1235         val = bswap128(val);
1236     }
1237     store_atom_16(cpu, ra, haddr, mop, val);
1238     clear_helper_retaddr();
1239 }
1240 
1241 uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr,
1242                          MemOpIdx oi, uintptr_t ra)
1243 {
1244     return do_ld1_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1245 }
1246 
1247 uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr,
1248                           MemOpIdx oi, uintptr_t ra)
1249 {
1250     return do_ld2_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1251 }
1252 
1253 uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr,
1254                           MemOpIdx oi, uintptr_t ra)
1255 {
1256     return do_ld4_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1257 }
1258 
1259 uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr,
1260                           MemOpIdx oi, uintptr_t ra)
1261 {
1262     return do_ld8_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1263 }
1264 
1265 #include "ldst_common.c.inc"
1266 
1267 /*
1268  * Do not allow unaligned operations to proceed.  Return the host address.
1269  */
1270 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1271                                int size, uintptr_t retaddr)
1272 {
1273     MemOp mop = get_memop(oi);
1274     int a_bits = memop_alignment_bits(mop);
1275     void *ret;
1276 
1277     /* Enforce guest required alignment.  */
1278     if (unlikely(addr & ((1 << a_bits) - 1))) {
1279         cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr);
1280     }
1281 
1282     /* Enforce qemu required alignment.  */
1283     if (unlikely(addr & (size - 1))) {
1284         cpu_loop_exit_atomic(cpu, retaddr);
1285     }
1286 
1287     ret = g2h(cpu, addr);
1288     set_helper_retaddr(retaddr);
1289     return ret;
1290 }
1291 
1292 #include "atomic_common.c.inc"
1293 
1294 /*
1295  * First set of functions passes in OI and RETADDR.
1296  * This makes them callable from other helpers.
1297  */
1298 
1299 #define ATOMIC_NAME(X) \
1300     glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
1301 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
1302 
1303 #define DATA_SIZE 1
1304 #include "atomic_template.h"
1305 
1306 #define DATA_SIZE 2
1307 #include "atomic_template.h"
1308 
1309 #define DATA_SIZE 4
1310 #include "atomic_template.h"
1311 
1312 #ifdef CONFIG_ATOMIC64
1313 #define DATA_SIZE 8
1314 #include "atomic_template.h"
1315 #endif
1316 
1317 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
1318 #define DATA_SIZE 16
1319 #include "atomic_template.h"
1320 #endif
1321