xref: /openbmc/qemu/accel/tcg/cputlb.c (revision eabfeb0c)
1 /*
2  *  Common CPU TLB handling
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 #include "translate-all.h"
37 #include "trace-root.h"
38 #include "trace/mem.h"
39 #ifdef CONFIG_PLUGIN
40 #include "qemu/plugin-memory.h"
41 #endif
42 
43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
44 /* #define DEBUG_TLB */
45 /* #define DEBUG_TLB_LOG */
46 
47 #ifdef DEBUG_TLB
48 # define DEBUG_TLB_GATE 1
49 # ifdef DEBUG_TLB_LOG
50 #  define DEBUG_TLB_LOG_GATE 1
51 # else
52 #  define DEBUG_TLB_LOG_GATE 0
53 # endif
54 #else
55 # define DEBUG_TLB_GATE 0
56 # define DEBUG_TLB_LOG_GATE 0
57 #endif
58 
59 #define tlb_debug(fmt, ...) do { \
60     if (DEBUG_TLB_LOG_GATE) { \
61         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
62                       ## __VA_ARGS__); \
63     } else if (DEBUG_TLB_GATE) { \
64         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
65     } \
66 } while (0)
67 
68 #define assert_cpu_is_self(cpu) do {                              \
69         if (DEBUG_TLB_GATE) {                                     \
70             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
71         }                                                         \
72     } while (0)
73 
74 /* run_on_cpu_data.target_ptr should always be big enough for a
75  * target_ulong even on 32 bit builds */
76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
77 
78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
79  */
80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
82 
83 static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
84 {
85     return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
86 }
87 
88 static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
89 {
90     return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
91 }
92 
93 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
94                              size_t max_entries)
95 {
96     desc->window_begin_ns = ns;
97     desc->window_max_entries = max_entries;
98 }
99 
100 /**
101  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
102  * @desc: The CPUTLBDesc portion of the TLB
103  * @fast: The CPUTLBDescFast portion of the same TLB
104  *
105  * Called with tlb_lock_held.
106  *
107  * We have two main constraints when resizing a TLB: (1) we only resize it
108  * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
109  * the array or unnecessarily flushing it), which means we do not control how
110  * frequently the resizing can occur; (2) we don't have access to the guest's
111  * future scheduling decisions, and therefore have to decide the magnitude of
112  * the resize based on past observations.
113  *
114  * In general, a memory-hungry process can benefit greatly from an appropriately
115  * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
116  * we just have to make the TLB as large as possible; while an oversized TLB
117  * results in minimal TLB miss rates, it also takes longer to be flushed
118  * (flushes can be _very_ frequent), and the reduced locality can also hurt
119  * performance.
120  *
121  * To achieve near-optimal performance for all kinds of workloads, we:
122  *
123  * 1. Aggressively increase the size of the TLB when the use rate of the
124  * TLB being flushed is high, since it is likely that in the near future this
125  * memory-hungry process will execute again, and its memory hungriness will
126  * probably be similar.
127  *
128  * 2. Slowly reduce the size of the TLB as the use rate declines over a
129  * reasonably large time window. The rationale is that if in such a time window
130  * we have not observed a high TLB use rate, it is likely that we won't observe
131  * it in the near future. In that case, once a time window expires we downsize
132  * the TLB to match the maximum use rate observed in the window.
133  *
134  * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
135  * since in that range performance is likely near-optimal. Recall that the TLB
136  * is direct mapped, so we want the use rate to be low (or at least not too
137  * high), since otherwise we are likely to have a significant amount of
138  * conflict misses.
139  */
140 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
141                                   int64_t now)
142 {
143     size_t old_size = tlb_n_entries(fast);
144     size_t rate;
145     size_t new_size = old_size;
146     int64_t window_len_ms = 100;
147     int64_t window_len_ns = window_len_ms * 1000 * 1000;
148     bool window_expired = now > desc->window_begin_ns + window_len_ns;
149 
150     if (desc->n_used_entries > desc->window_max_entries) {
151         desc->window_max_entries = desc->n_used_entries;
152     }
153     rate = desc->window_max_entries * 100 / old_size;
154 
155     if (rate > 70) {
156         new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
157     } else if (rate < 30 && window_expired) {
158         size_t ceil = pow2ceil(desc->window_max_entries);
159         size_t expected_rate = desc->window_max_entries * 100 / ceil;
160 
161         /*
162          * Avoid undersizing when the max number of entries seen is just below
163          * a pow2. For instance, if max_entries == 1025, the expected use rate
164          * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
165          * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
166          * later. Thus, make sure that the expected use rate remains below 70%.
167          * (and since we double the size, that means the lowest rate we'd
168          * expect to get is 35%, which is still in the 30-70% range where
169          * we consider that the size is appropriate.)
170          */
171         if (expected_rate > 70) {
172             ceil *= 2;
173         }
174         new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
175     }
176 
177     if (new_size == old_size) {
178         if (window_expired) {
179             tlb_window_reset(desc, now, desc->n_used_entries);
180         }
181         return;
182     }
183 
184     g_free(fast->table);
185     g_free(desc->iotlb);
186 
187     tlb_window_reset(desc, now, 0);
188     /* desc->n_used_entries is cleared by the caller */
189     fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
190     fast->table = g_try_new(CPUTLBEntry, new_size);
191     desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
192 
193     /*
194      * If the allocations fail, try smaller sizes. We just freed some
195      * memory, so going back to half of new_size has a good chance of working.
196      * Increased memory pressure elsewhere in the system might cause the
197      * allocations to fail though, so we progressively reduce the allocation
198      * size, aborting if we cannot even allocate the smallest TLB we support.
199      */
200     while (fast->table == NULL || desc->iotlb == NULL) {
201         if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
202             error_report("%s: %s", __func__, strerror(errno));
203             abort();
204         }
205         new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
206         fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
207 
208         g_free(fast->table);
209         g_free(desc->iotlb);
210         fast->table = g_try_new(CPUTLBEntry, new_size);
211         desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
212     }
213 }
214 
215 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
216 {
217     desc->n_used_entries = 0;
218     desc->large_page_addr = -1;
219     desc->large_page_mask = -1;
220     desc->vindex = 0;
221     memset(fast->table, -1, sizeof_tlb(fast));
222     memset(desc->vtable, -1, sizeof(desc->vtable));
223 }
224 
225 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
226                                         int64_t now)
227 {
228     CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
229     CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
230 
231     tlb_mmu_resize_locked(desc, fast, now);
232     tlb_mmu_flush_locked(desc, fast);
233 }
234 
235 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
236 {
237     size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
238 
239     tlb_window_reset(desc, now, 0);
240     desc->n_used_entries = 0;
241     fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
242     fast->table = g_new(CPUTLBEntry, n_entries);
243     desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
244     tlb_mmu_flush_locked(desc, fast);
245 }
246 
247 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
248 {
249     env_tlb(env)->d[mmu_idx].n_used_entries++;
250 }
251 
252 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
253 {
254     env_tlb(env)->d[mmu_idx].n_used_entries--;
255 }
256 
257 void tlb_init(CPUState *cpu)
258 {
259     CPUArchState *env = cpu->env_ptr;
260     int64_t now = get_clock_realtime();
261     int i;
262 
263     qemu_spin_init(&env_tlb(env)->c.lock);
264 
265     /* All tlbs are initialized flushed. */
266     env_tlb(env)->c.dirty = 0;
267 
268     for (i = 0; i < NB_MMU_MODES; i++) {
269         tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
270     }
271 }
272 
273 void tlb_destroy(CPUState *cpu)
274 {
275     CPUArchState *env = cpu->env_ptr;
276     int i;
277 
278     qemu_spin_destroy(&env_tlb(env)->c.lock);
279     for (i = 0; i < NB_MMU_MODES; i++) {
280         CPUTLBDesc *desc = &env_tlb(env)->d[i];
281         CPUTLBDescFast *fast = &env_tlb(env)->f[i];
282 
283         g_free(fast->table);
284         g_free(desc->iotlb);
285     }
286 }
287 
288 /* flush_all_helper: run fn across all cpus
289  *
290  * If the wait flag is set then the src cpu's helper will be queued as
291  * "safe" work and the loop exited creating a synchronisation point
292  * where all queued work will be finished before execution starts
293  * again.
294  */
295 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
296                              run_on_cpu_data d)
297 {
298     CPUState *cpu;
299 
300     CPU_FOREACH(cpu) {
301         if (cpu != src) {
302             async_run_on_cpu(cpu, fn, d);
303         }
304     }
305 }
306 
307 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
308 {
309     CPUState *cpu;
310     size_t full = 0, part = 0, elide = 0;
311 
312     CPU_FOREACH(cpu) {
313         CPUArchState *env = cpu->env_ptr;
314 
315         full += atomic_read(&env_tlb(env)->c.full_flush_count);
316         part += atomic_read(&env_tlb(env)->c.part_flush_count);
317         elide += atomic_read(&env_tlb(env)->c.elide_flush_count);
318     }
319     *pfull = full;
320     *ppart = part;
321     *pelide = elide;
322 }
323 
324 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
325 {
326     CPUArchState *env = cpu->env_ptr;
327     uint16_t asked = data.host_int;
328     uint16_t all_dirty, work, to_clean;
329     int64_t now = get_clock_realtime();
330 
331     assert_cpu_is_self(cpu);
332 
333     tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
334 
335     qemu_spin_lock(&env_tlb(env)->c.lock);
336 
337     all_dirty = env_tlb(env)->c.dirty;
338     to_clean = asked & all_dirty;
339     all_dirty &= ~to_clean;
340     env_tlb(env)->c.dirty = all_dirty;
341 
342     for (work = to_clean; work != 0; work &= work - 1) {
343         int mmu_idx = ctz32(work);
344         tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
345     }
346 
347     qemu_spin_unlock(&env_tlb(env)->c.lock);
348 
349     cpu_tb_jmp_cache_clear(cpu);
350 
351     if (to_clean == ALL_MMUIDX_BITS) {
352         atomic_set(&env_tlb(env)->c.full_flush_count,
353                    env_tlb(env)->c.full_flush_count + 1);
354     } else {
355         atomic_set(&env_tlb(env)->c.part_flush_count,
356                    env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
357         if (to_clean != asked) {
358             atomic_set(&env_tlb(env)->c.elide_flush_count,
359                        env_tlb(env)->c.elide_flush_count +
360                        ctpop16(asked & ~to_clean));
361         }
362     }
363 }
364 
365 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
366 {
367     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
368 
369     if (cpu->created && !qemu_cpu_is_self(cpu)) {
370         async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
371                          RUN_ON_CPU_HOST_INT(idxmap));
372     } else {
373         tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
374     }
375 }
376 
377 void tlb_flush(CPUState *cpu)
378 {
379     tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
380 }
381 
382 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
383 {
384     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
385 
386     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
387 
388     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
389     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
390 }
391 
392 void tlb_flush_all_cpus(CPUState *src_cpu)
393 {
394     tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
395 }
396 
397 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
398 {
399     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
400 
401     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
402 
403     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
404     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
405 }
406 
407 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
408 {
409     tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
410 }
411 
412 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
413                                         target_ulong page)
414 {
415     return tlb_hit_page(tlb_entry->addr_read, page) ||
416            tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
417            tlb_hit_page(tlb_entry->addr_code, page);
418 }
419 
420 /**
421  * tlb_entry_is_empty - return true if the entry is not in use
422  * @te: pointer to CPUTLBEntry
423  */
424 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
425 {
426     return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
427 }
428 
429 /* Called with tlb_c.lock held */
430 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
431                                           target_ulong page)
432 {
433     if (tlb_hit_page_anyprot(tlb_entry, page)) {
434         memset(tlb_entry, -1, sizeof(*tlb_entry));
435         return true;
436     }
437     return false;
438 }
439 
440 /* Called with tlb_c.lock held */
441 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
442                                               target_ulong page)
443 {
444     CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
445     int k;
446 
447     assert_cpu_is_self(env_cpu(env));
448     for (k = 0; k < CPU_VTLB_SIZE; k++) {
449         if (tlb_flush_entry_locked(&d->vtable[k], page)) {
450             tlb_n_used_entries_dec(env, mmu_idx);
451         }
452     }
453 }
454 
455 static void tlb_flush_page_locked(CPUArchState *env, int midx,
456                                   target_ulong page)
457 {
458     target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
459     target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
460 
461     /* Check if we need to flush due to large pages.  */
462     if ((page & lp_mask) == lp_addr) {
463         tlb_debug("forcing full flush midx %d ("
464                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
465                   midx, lp_addr, lp_mask);
466         tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
467     } else {
468         if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
469             tlb_n_used_entries_dec(env, midx);
470         }
471         tlb_flush_vtlb_page_locked(env, midx, page);
472     }
473 }
474 
475 /**
476  * tlb_flush_page_by_mmuidx_async_0:
477  * @cpu: cpu on which to flush
478  * @addr: page of virtual address to flush
479  * @idxmap: set of mmu_idx to flush
480  *
481  * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
482  * at @addr from the tlbs indicated by @idxmap from @cpu.
483  */
484 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
485                                              target_ulong addr,
486                                              uint16_t idxmap)
487 {
488     CPUArchState *env = cpu->env_ptr;
489     int mmu_idx;
490 
491     assert_cpu_is_self(cpu);
492 
493     tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
494 
495     qemu_spin_lock(&env_tlb(env)->c.lock);
496     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
497         if ((idxmap >> mmu_idx) & 1) {
498             tlb_flush_page_locked(env, mmu_idx, addr);
499         }
500     }
501     qemu_spin_unlock(&env_tlb(env)->c.lock);
502 
503     tb_flush_jmp_cache(cpu, addr);
504 }
505 
506 /**
507  * tlb_flush_page_by_mmuidx_async_1:
508  * @cpu: cpu on which to flush
509  * @data: encoded addr + idxmap
510  *
511  * Helper for tlb_flush_page_by_mmuidx and friends, called through
512  * async_run_on_cpu.  The idxmap parameter is encoded in the page
513  * offset of the target_ptr field.  This limits the set of mmu_idx
514  * that can be passed via this method.
515  */
516 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
517                                              run_on_cpu_data data)
518 {
519     target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
520     target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
521     uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
522 
523     tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
524 }
525 
526 typedef struct {
527     target_ulong addr;
528     uint16_t idxmap;
529 } TLBFlushPageByMMUIdxData;
530 
531 /**
532  * tlb_flush_page_by_mmuidx_async_2:
533  * @cpu: cpu on which to flush
534  * @data: allocated addr + idxmap
535  *
536  * Helper for tlb_flush_page_by_mmuidx and friends, called through
537  * async_run_on_cpu.  The addr+idxmap parameters are stored in a
538  * TLBFlushPageByMMUIdxData structure that has been allocated
539  * specifically for this helper.  Free the structure when done.
540  */
541 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
542                                              run_on_cpu_data data)
543 {
544     TLBFlushPageByMMUIdxData *d = data.host_ptr;
545 
546     tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
547     g_free(d);
548 }
549 
550 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
551 {
552     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
553 
554     /* This should already be page aligned */
555     addr &= TARGET_PAGE_MASK;
556 
557     if (qemu_cpu_is_self(cpu)) {
558         tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
559     } else if (idxmap < TARGET_PAGE_SIZE) {
560         /*
561          * Most targets have only a few mmu_idx.  In the case where
562          * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
563          * allocating memory for this operation.
564          */
565         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
566                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
567     } else {
568         TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
569 
570         /* Otherwise allocate a structure, freed by the worker.  */
571         d->addr = addr;
572         d->idxmap = idxmap;
573         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
574                          RUN_ON_CPU_HOST_PTR(d));
575     }
576 }
577 
578 void tlb_flush_page(CPUState *cpu, target_ulong addr)
579 {
580     tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
581 }
582 
583 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
584                                        uint16_t idxmap)
585 {
586     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
587 
588     /* This should already be page aligned */
589     addr &= TARGET_PAGE_MASK;
590 
591     /*
592      * Allocate memory to hold addr+idxmap only when needed.
593      * See tlb_flush_page_by_mmuidx for details.
594      */
595     if (idxmap < TARGET_PAGE_SIZE) {
596         flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
597                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
598     } else {
599         CPUState *dst_cpu;
600 
601         /* Allocate a separate data block for each destination cpu.  */
602         CPU_FOREACH(dst_cpu) {
603             if (dst_cpu != src_cpu) {
604                 TLBFlushPageByMMUIdxData *d
605                     = g_new(TLBFlushPageByMMUIdxData, 1);
606 
607                 d->addr = addr;
608                 d->idxmap = idxmap;
609                 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
610                                  RUN_ON_CPU_HOST_PTR(d));
611             }
612         }
613     }
614 
615     tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
616 }
617 
618 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
619 {
620     tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
621 }
622 
623 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
624                                               target_ulong addr,
625                                               uint16_t idxmap)
626 {
627     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
628 
629     /* This should already be page aligned */
630     addr &= TARGET_PAGE_MASK;
631 
632     /*
633      * Allocate memory to hold addr+idxmap only when needed.
634      * See tlb_flush_page_by_mmuidx for details.
635      */
636     if (idxmap < TARGET_PAGE_SIZE) {
637         flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
638                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
639         async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
640                               RUN_ON_CPU_TARGET_PTR(addr | idxmap));
641     } else {
642         CPUState *dst_cpu;
643         TLBFlushPageByMMUIdxData *d;
644 
645         /* Allocate a separate data block for each destination cpu.  */
646         CPU_FOREACH(dst_cpu) {
647             if (dst_cpu != src_cpu) {
648                 d = g_new(TLBFlushPageByMMUIdxData, 1);
649                 d->addr = addr;
650                 d->idxmap = idxmap;
651                 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
652                                  RUN_ON_CPU_HOST_PTR(d));
653             }
654         }
655 
656         d = g_new(TLBFlushPageByMMUIdxData, 1);
657         d->addr = addr;
658         d->idxmap = idxmap;
659         async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
660                               RUN_ON_CPU_HOST_PTR(d));
661     }
662 }
663 
664 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
665 {
666     tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
667 }
668 
669 /* update the TLBs so that writes to code in the virtual page 'addr'
670    can be detected */
671 void tlb_protect_code(ram_addr_t ram_addr)
672 {
673     cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
674                                              DIRTY_MEMORY_CODE);
675 }
676 
677 /* update the TLB so that writes in physical page 'phys_addr' are no longer
678    tested for self modifying code */
679 void tlb_unprotect_code(ram_addr_t ram_addr)
680 {
681     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
682 }
683 
684 
685 /*
686  * Dirty write flag handling
687  *
688  * When the TCG code writes to a location it looks up the address in
689  * the TLB and uses that data to compute the final address. If any of
690  * the lower bits of the address are set then the slow path is forced.
691  * There are a number of reasons to do this but for normal RAM the
692  * most usual is detecting writes to code regions which may invalidate
693  * generated code.
694  *
695  * Other vCPUs might be reading their TLBs during guest execution, so we update
696  * te->addr_write with atomic_set. We don't need to worry about this for
697  * oversized guests as MTTCG is disabled for them.
698  *
699  * Called with tlb_c.lock held.
700  */
701 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
702                                          uintptr_t start, uintptr_t length)
703 {
704     uintptr_t addr = tlb_entry->addr_write;
705 
706     if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
707                  TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
708         addr &= TARGET_PAGE_MASK;
709         addr += tlb_entry->addend;
710         if ((addr - start) < length) {
711 #if TCG_OVERSIZED_GUEST
712             tlb_entry->addr_write |= TLB_NOTDIRTY;
713 #else
714             atomic_set(&tlb_entry->addr_write,
715                        tlb_entry->addr_write | TLB_NOTDIRTY);
716 #endif
717         }
718     }
719 }
720 
721 /*
722  * Called with tlb_c.lock held.
723  * Called only from the vCPU context, i.e. the TLB's owner thread.
724  */
725 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
726 {
727     *d = *s;
728 }
729 
730 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
731  * the target vCPU).
732  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
733  * thing actually updated is the target TLB entry ->addr_write flags.
734  */
735 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
736 {
737     CPUArchState *env;
738 
739     int mmu_idx;
740 
741     env = cpu->env_ptr;
742     qemu_spin_lock(&env_tlb(env)->c.lock);
743     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
744         unsigned int i;
745         unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
746 
747         for (i = 0; i < n; i++) {
748             tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
749                                          start1, length);
750         }
751 
752         for (i = 0; i < CPU_VTLB_SIZE; i++) {
753             tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
754                                          start1, length);
755         }
756     }
757     qemu_spin_unlock(&env_tlb(env)->c.lock);
758 }
759 
760 /* Called with tlb_c.lock held */
761 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
762                                          target_ulong vaddr)
763 {
764     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
765         tlb_entry->addr_write = vaddr;
766     }
767 }
768 
769 /* update the TLB corresponding to virtual page vaddr
770    so that it is no longer dirty */
771 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
772 {
773     CPUArchState *env = cpu->env_ptr;
774     int mmu_idx;
775 
776     assert_cpu_is_self(cpu);
777 
778     vaddr &= TARGET_PAGE_MASK;
779     qemu_spin_lock(&env_tlb(env)->c.lock);
780     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
781         tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
782     }
783 
784     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
785         int k;
786         for (k = 0; k < CPU_VTLB_SIZE; k++) {
787             tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
788         }
789     }
790     qemu_spin_unlock(&env_tlb(env)->c.lock);
791 }
792 
793 /* Our TLB does not support large pages, so remember the area covered by
794    large pages and trigger a full TLB flush if these are invalidated.  */
795 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
796                                target_ulong vaddr, target_ulong size)
797 {
798     target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
799     target_ulong lp_mask = ~(size - 1);
800 
801     if (lp_addr == (target_ulong)-1) {
802         /* No previous large page.  */
803         lp_addr = vaddr;
804     } else {
805         /* Extend the existing region to include the new page.
806            This is a compromise between unnecessary flushes and
807            the cost of maintaining a full variable size TLB.  */
808         lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
809         while (((lp_addr ^ vaddr) & lp_mask) != 0) {
810             lp_mask <<= 1;
811         }
812     }
813     env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
814     env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
815 }
816 
817 /* Add a new TLB entry. At most one entry for a given virtual address
818  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
819  * supplied size is only used by tlb_flush_page.
820  *
821  * Called from TCG-generated code, which is under an RCU read-side
822  * critical section.
823  */
824 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
825                              hwaddr paddr, MemTxAttrs attrs, int prot,
826                              int mmu_idx, target_ulong size)
827 {
828     CPUArchState *env = cpu->env_ptr;
829     CPUTLB *tlb = env_tlb(env);
830     CPUTLBDesc *desc = &tlb->d[mmu_idx];
831     MemoryRegionSection *section;
832     unsigned int index;
833     target_ulong address;
834     target_ulong write_address;
835     uintptr_t addend;
836     CPUTLBEntry *te, tn;
837     hwaddr iotlb, xlat, sz, paddr_page;
838     target_ulong vaddr_page;
839     int asidx = cpu_asidx_from_attrs(cpu, attrs);
840     int wp_flags;
841     bool is_ram, is_romd;
842 
843     assert_cpu_is_self(cpu);
844 
845     if (size <= TARGET_PAGE_SIZE) {
846         sz = TARGET_PAGE_SIZE;
847     } else {
848         tlb_add_large_page(env, mmu_idx, vaddr, size);
849         sz = size;
850     }
851     vaddr_page = vaddr & TARGET_PAGE_MASK;
852     paddr_page = paddr & TARGET_PAGE_MASK;
853 
854     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
855                                                 &xlat, &sz, attrs, &prot);
856     assert(sz >= TARGET_PAGE_SIZE);
857 
858     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
859               " prot=%x idx=%d\n",
860               vaddr, paddr, prot, mmu_idx);
861 
862     address = vaddr_page;
863     if (size < TARGET_PAGE_SIZE) {
864         /* Repeat the MMU check and TLB fill on every access.  */
865         address |= TLB_INVALID_MASK;
866     }
867     if (attrs.byte_swap) {
868         address |= TLB_BSWAP;
869     }
870 
871     is_ram = memory_region_is_ram(section->mr);
872     is_romd = memory_region_is_romd(section->mr);
873 
874     if (is_ram || is_romd) {
875         /* RAM and ROMD both have associated host memory. */
876         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
877     } else {
878         /* I/O does not; force the host address to NULL. */
879         addend = 0;
880     }
881 
882     write_address = address;
883     if (is_ram) {
884         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
885         /*
886          * Computing is_clean is expensive; avoid all that unless
887          * the page is actually writable.
888          */
889         if (prot & PAGE_WRITE) {
890             if (section->readonly) {
891                 write_address |= TLB_DISCARD_WRITE;
892             } else if (cpu_physical_memory_is_clean(iotlb)) {
893                 write_address |= TLB_NOTDIRTY;
894             }
895         }
896     } else {
897         /* I/O or ROMD */
898         iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
899         /*
900          * Writes to romd devices must go through MMIO to enable write.
901          * Reads to romd devices go through the ram_ptr found above,
902          * but of course reads to I/O must go through MMIO.
903          */
904         write_address |= TLB_MMIO;
905         if (!is_romd) {
906             address = write_address;
907         }
908     }
909 
910     wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
911                                               TARGET_PAGE_SIZE);
912 
913     index = tlb_index(env, mmu_idx, vaddr_page);
914     te = tlb_entry(env, mmu_idx, vaddr_page);
915 
916     /*
917      * Hold the TLB lock for the rest of the function. We could acquire/release
918      * the lock several times in the function, but it is faster to amortize the
919      * acquisition cost by acquiring it just once. Note that this leads to
920      * a longer critical section, but this is not a concern since the TLB lock
921      * is unlikely to be contended.
922      */
923     qemu_spin_lock(&tlb->c.lock);
924 
925     /* Note that the tlb is no longer clean.  */
926     tlb->c.dirty |= 1 << mmu_idx;
927 
928     /* Make sure there's no cached translation for the new page.  */
929     tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
930 
931     /*
932      * Only evict the old entry to the victim tlb if it's for a
933      * different page; otherwise just overwrite the stale data.
934      */
935     if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
936         unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
937         CPUTLBEntry *tv = &desc->vtable[vidx];
938 
939         /* Evict the old entry into the victim tlb.  */
940         copy_tlb_helper_locked(tv, te);
941         desc->viotlb[vidx] = desc->iotlb[index];
942         tlb_n_used_entries_dec(env, mmu_idx);
943     }
944 
945     /* refill the tlb */
946     /*
947      * At this point iotlb contains a physical section number in the lower
948      * TARGET_PAGE_BITS, and either
949      *  + the ram_addr_t of the page base of the target RAM (RAM)
950      *  + the offset within section->mr of the page base (I/O, ROMD)
951      * We subtract the vaddr_page (which is page aligned and thus won't
952      * disturb the low bits) to give an offset which can be added to the
953      * (non-page-aligned) vaddr of the eventual memory access to get
954      * the MemoryRegion offset for the access. Note that the vaddr we
955      * subtract here is that of the page base, and not the same as the
956      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
957      */
958     desc->iotlb[index].addr = iotlb - vaddr_page;
959     desc->iotlb[index].attrs = attrs;
960 
961     /* Now calculate the new entry */
962     tn.addend = addend - vaddr_page;
963     if (prot & PAGE_READ) {
964         tn.addr_read = address;
965         if (wp_flags & BP_MEM_READ) {
966             tn.addr_read |= TLB_WATCHPOINT;
967         }
968     } else {
969         tn.addr_read = -1;
970     }
971 
972     if (prot & PAGE_EXEC) {
973         tn.addr_code = address;
974     } else {
975         tn.addr_code = -1;
976     }
977 
978     tn.addr_write = -1;
979     if (prot & PAGE_WRITE) {
980         tn.addr_write = write_address;
981         if (prot & PAGE_WRITE_INV) {
982             tn.addr_write |= TLB_INVALID_MASK;
983         }
984         if (wp_flags & BP_MEM_WRITE) {
985             tn.addr_write |= TLB_WATCHPOINT;
986         }
987     }
988 
989     copy_tlb_helper_locked(te, &tn);
990     tlb_n_used_entries_inc(env, mmu_idx);
991     qemu_spin_unlock(&tlb->c.lock);
992 }
993 
994 /* Add a new TLB entry, but without specifying the memory
995  * transaction attributes to be used.
996  */
997 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
998                   hwaddr paddr, int prot,
999                   int mmu_idx, target_ulong size)
1000 {
1001     tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
1002                             prot, mmu_idx, size);
1003 }
1004 
1005 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1006 {
1007     ram_addr_t ram_addr;
1008 
1009     ram_addr = qemu_ram_addr_from_host(ptr);
1010     if (ram_addr == RAM_ADDR_INVALID) {
1011         error_report("Bad ram pointer %p", ptr);
1012         abort();
1013     }
1014     return ram_addr;
1015 }
1016 
1017 /*
1018  * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
1019  * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
1020  * be discarded and looked up again (e.g. via tlb_entry()).
1021  */
1022 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
1023                      MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1024 {
1025     CPUClass *cc = CPU_GET_CLASS(cpu);
1026     bool ok;
1027 
1028     /*
1029      * This is not a probe, so only valid return is success; failure
1030      * should result in exception + longjmp to the cpu loop.
1031      */
1032     ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr);
1033     assert(ok);
1034 }
1035 
1036 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1037                          int mmu_idx, target_ulong addr, uintptr_t retaddr,
1038                          MMUAccessType access_type, MemOp op)
1039 {
1040     CPUState *cpu = env_cpu(env);
1041     hwaddr mr_offset;
1042     MemoryRegionSection *section;
1043     MemoryRegion *mr;
1044     uint64_t val;
1045     bool locked = false;
1046     MemTxResult r;
1047 
1048     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1049     mr = section->mr;
1050     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1051     cpu->mem_io_pc = retaddr;
1052     if (!cpu->can_do_io) {
1053         cpu_io_recompile(cpu, retaddr);
1054     }
1055 
1056     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
1057         qemu_mutex_lock_iothread();
1058         locked = true;
1059     }
1060     r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
1061     if (r != MEMTX_OK) {
1062         hwaddr physaddr = mr_offset +
1063             section->offset_within_address_space -
1064             section->offset_within_region;
1065 
1066         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
1067                                mmu_idx, iotlbentry->attrs, r, retaddr);
1068     }
1069     if (locked) {
1070         qemu_mutex_unlock_iothread();
1071     }
1072 
1073     return val;
1074 }
1075 
1076 /*
1077  * Save a potentially trashed IOTLB entry for later lookup by plugin.
1078  *
1079  * We also need to track the thread storage address because the RCU
1080  * cleanup that runs when we leave the critical region (the current
1081  * execution) is actually in a different thread.
1082  */
1083 static void save_iotlb_data(CPUState *cs, hwaddr addr,
1084                             MemoryRegionSection *section, hwaddr mr_offset)
1085 {
1086 #ifdef CONFIG_PLUGIN
1087     SavedIOTLB *saved = &cs->saved_iotlb;
1088     saved->addr = addr;
1089     saved->section = section;
1090     saved->mr_offset = mr_offset;
1091 #endif
1092 }
1093 
1094 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1095                       int mmu_idx, uint64_t val, target_ulong addr,
1096                       uintptr_t retaddr, MemOp op)
1097 {
1098     CPUState *cpu = env_cpu(env);
1099     hwaddr mr_offset;
1100     MemoryRegionSection *section;
1101     MemoryRegion *mr;
1102     bool locked = false;
1103     MemTxResult r;
1104 
1105     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1106     mr = section->mr;
1107     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1108     if (!cpu->can_do_io) {
1109         cpu_io_recompile(cpu, retaddr);
1110     }
1111     cpu->mem_io_pc = retaddr;
1112 
1113     /*
1114      * The memory_region_dispatch may trigger a flush/resize
1115      * so for plugins we save the iotlb_data just in case.
1116      */
1117     save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset);
1118 
1119     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
1120         qemu_mutex_lock_iothread();
1121         locked = true;
1122     }
1123     r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
1124     if (r != MEMTX_OK) {
1125         hwaddr physaddr = mr_offset +
1126             section->offset_within_address_space -
1127             section->offset_within_region;
1128 
1129         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
1130                                MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
1131                                retaddr);
1132     }
1133     if (locked) {
1134         qemu_mutex_unlock_iothread();
1135     }
1136 }
1137 
1138 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
1139 {
1140 #if TCG_OVERSIZED_GUEST
1141     return *(target_ulong *)((uintptr_t)entry + ofs);
1142 #else
1143     /* ofs might correspond to .addr_write, so use atomic_read */
1144     return atomic_read((target_ulong *)((uintptr_t)entry + ofs));
1145 #endif
1146 }
1147 
1148 /* Return true if ADDR is present in the victim tlb, and has been copied
1149    back to the main tlb.  */
1150 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
1151                            size_t elt_ofs, target_ulong page)
1152 {
1153     size_t vidx;
1154 
1155     assert_cpu_is_self(env_cpu(env));
1156     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1157         CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1158         target_ulong cmp;
1159 
1160         /* elt_ofs might correspond to .addr_write, so use atomic_read */
1161 #if TCG_OVERSIZED_GUEST
1162         cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
1163 #else
1164         cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1165 #endif
1166 
1167         if (cmp == page) {
1168             /* Found entry in victim tlb, swap tlb and iotlb.  */
1169             CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1170 
1171             qemu_spin_lock(&env_tlb(env)->c.lock);
1172             copy_tlb_helper_locked(&tmptlb, tlb);
1173             copy_tlb_helper_locked(tlb, vtlb);
1174             copy_tlb_helper_locked(vtlb, &tmptlb);
1175             qemu_spin_unlock(&env_tlb(env)->c.lock);
1176 
1177             CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1178             CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1179             tmpio = *io; *io = *vio; *vio = tmpio;
1180             return true;
1181         }
1182     }
1183     return false;
1184 }
1185 
1186 /* Macro to call the above, with local variables from the use context.  */
1187 #define VICTIM_TLB_HIT(TY, ADDR) \
1188   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1189                  (ADDR) & TARGET_PAGE_MASK)
1190 
1191 /*
1192  * Return a ram_addr_t for the virtual address for execution.
1193  *
1194  * Return -1 if we can't translate and execute from an entire page
1195  * of RAM.  This will force us to execute by loading and translating
1196  * one insn at a time, without caching.
1197  *
1198  * NOTE: This function will trigger an exception if the page is
1199  * not executable.
1200  */
1201 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
1202                                         void **hostp)
1203 {
1204     uintptr_t mmu_idx = cpu_mmu_index(env, true);
1205     uintptr_t index = tlb_index(env, mmu_idx, addr);
1206     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1207     void *p;
1208 
1209     if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1210         if (!VICTIM_TLB_HIT(addr_code, addr)) {
1211             tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
1212             index = tlb_index(env, mmu_idx, addr);
1213             entry = tlb_entry(env, mmu_idx, addr);
1214 
1215             if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1216                 /*
1217                  * The MMU protection covers a smaller range than a target
1218                  * page, so we must redo the MMU check for every insn.
1219                  */
1220                 return -1;
1221             }
1222         }
1223         assert(tlb_hit(entry->addr_code, addr));
1224     }
1225 
1226     if (unlikely(entry->addr_code & TLB_MMIO)) {
1227         /* The region is not backed by RAM.  */
1228         if (hostp) {
1229             *hostp = NULL;
1230         }
1231         return -1;
1232     }
1233 
1234     p = (void *)((uintptr_t)addr + entry->addend);
1235     if (hostp) {
1236         *hostp = p;
1237     }
1238     return qemu_ram_addr_from_host_nofail(p);
1239 }
1240 
1241 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1242 {
1243     return get_page_addr_code_hostp(env, addr, NULL);
1244 }
1245 
1246 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1247                            CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
1248 {
1249     ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
1250 
1251     trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1252 
1253     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1254         struct page_collection *pages
1255             = page_collection_lock(ram_addr, ram_addr + size);
1256         tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr);
1257         page_collection_unlock(pages);
1258     }
1259 
1260     /*
1261      * Set both VGA and migration bits for simplicity and to remove
1262      * the notdirty callback faster.
1263      */
1264     cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1265 
1266     /* We remove the notdirty callback only if the code has been flushed. */
1267     if (!cpu_physical_memory_is_clean(ram_addr)) {
1268         trace_memory_notdirty_set_dirty(mem_vaddr);
1269         tlb_set_dirty(cpu, mem_vaddr);
1270     }
1271 }
1272 
1273 static int probe_access_internal(CPUArchState *env, target_ulong addr,
1274                                  int fault_size, MMUAccessType access_type,
1275                                  int mmu_idx, bool nonfault,
1276                                  void **phost, uintptr_t retaddr)
1277 {
1278     uintptr_t index = tlb_index(env, mmu_idx, addr);
1279     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1280     target_ulong tlb_addr, page_addr;
1281     size_t elt_ofs;
1282     int flags;
1283 
1284     switch (access_type) {
1285     case MMU_DATA_LOAD:
1286         elt_ofs = offsetof(CPUTLBEntry, addr_read);
1287         break;
1288     case MMU_DATA_STORE:
1289         elt_ofs = offsetof(CPUTLBEntry, addr_write);
1290         break;
1291     case MMU_INST_FETCH:
1292         elt_ofs = offsetof(CPUTLBEntry, addr_code);
1293         break;
1294     default:
1295         g_assert_not_reached();
1296     }
1297     tlb_addr = tlb_read_ofs(entry, elt_ofs);
1298 
1299     page_addr = addr & TARGET_PAGE_MASK;
1300     if (!tlb_hit_page(tlb_addr, page_addr)) {
1301         if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
1302             CPUState *cs = env_cpu(env);
1303             CPUClass *cc = CPU_GET_CLASS(cs);
1304 
1305             if (!cc->tlb_fill(cs, addr, fault_size, access_type,
1306                               mmu_idx, nonfault, retaddr)) {
1307                 /* Non-faulting page table read failed.  */
1308                 *phost = NULL;
1309                 return TLB_INVALID_MASK;
1310             }
1311 
1312             /* TLB resize via tlb_fill may have moved the entry.  */
1313             entry = tlb_entry(env, mmu_idx, addr);
1314         }
1315         tlb_addr = tlb_read_ofs(entry, elt_ofs);
1316     }
1317     flags = tlb_addr & TLB_FLAGS_MASK;
1318 
1319     /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
1320     if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
1321         *phost = NULL;
1322         return TLB_MMIO;
1323     }
1324 
1325     /* Everything else is RAM. */
1326     *phost = (void *)((uintptr_t)addr + entry->addend);
1327     return flags;
1328 }
1329 
1330 int probe_access_flags(CPUArchState *env, target_ulong addr,
1331                        MMUAccessType access_type, int mmu_idx,
1332                        bool nonfault, void **phost, uintptr_t retaddr)
1333 {
1334     int flags;
1335 
1336     flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
1337                                   nonfault, phost, retaddr);
1338 
1339     /* Handle clean RAM pages.  */
1340     if (unlikely(flags & TLB_NOTDIRTY)) {
1341         uintptr_t index = tlb_index(env, mmu_idx, addr);
1342         CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1343 
1344         notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
1345         flags &= ~TLB_NOTDIRTY;
1346     }
1347 
1348     return flags;
1349 }
1350 
1351 void *probe_access(CPUArchState *env, target_ulong addr, int size,
1352                    MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1353 {
1354     void *host;
1355     int flags;
1356 
1357     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1358 
1359     flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
1360                                   false, &host, retaddr);
1361 
1362     /* Per the interface, size == 0 merely faults the access. */
1363     if (size == 0) {
1364         return NULL;
1365     }
1366 
1367     if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
1368         uintptr_t index = tlb_index(env, mmu_idx, addr);
1369         CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1370 
1371         /* Handle watchpoints.  */
1372         if (flags & TLB_WATCHPOINT) {
1373             int wp_access = (access_type == MMU_DATA_STORE
1374                              ? BP_MEM_WRITE : BP_MEM_READ);
1375             cpu_check_watchpoint(env_cpu(env), addr, size,
1376                                  iotlbentry->attrs, wp_access, retaddr);
1377         }
1378 
1379         /* Handle clean RAM pages.  */
1380         if (flags & TLB_NOTDIRTY) {
1381             notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
1382         }
1383     }
1384 
1385     return host;
1386 }
1387 
1388 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
1389                         MMUAccessType access_type, int mmu_idx)
1390 {
1391     void *host;
1392     int flags;
1393 
1394     flags = probe_access_internal(env, addr, 0, access_type,
1395                                   mmu_idx, true, &host, 0);
1396 
1397     /* No combination of flags are expected by the caller. */
1398     return flags ? NULL : host;
1399 }
1400 
1401 #ifdef CONFIG_PLUGIN
1402 /*
1403  * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
1404  * This should be a hot path as we will have just looked this path up
1405  * in the softmmu lookup code (or helper). We don't handle re-fills or
1406  * checking the victim table. This is purely informational.
1407  *
1408  * This almost never fails as the memory access being instrumented
1409  * should have just filled the TLB. The one corner case is io_writex
1410  * which can cause TLB flushes and potential resizing of the TLBs
1411  * loosing the information we need. In those cases we need to recover
1412  * data from a copy of the io_tlb entry.
1413  */
1414 
1415 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
1416                        bool is_store, struct qemu_plugin_hwaddr *data)
1417 {
1418     CPUArchState *env = cpu->env_ptr;
1419     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1420     uintptr_t index = tlb_index(env, mmu_idx, addr);
1421     target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
1422 
1423     if (likely(tlb_hit(tlb_addr, addr))) {
1424         /* We must have an iotlb entry for MMIO */
1425         if (tlb_addr & TLB_MMIO) {
1426             CPUIOTLBEntry *iotlbentry;
1427             iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1428             data->is_io = true;
1429             data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1430             data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1431         } else {
1432             data->is_io = false;
1433             data->v.ram.hostaddr = addr + tlbe->addend;
1434         }
1435         return true;
1436     } else {
1437         SavedIOTLB *saved = &cpu->saved_iotlb;
1438         data->is_io = true;
1439         data->v.io.section = saved->section;
1440         data->v.io.offset = saved->mr_offset;
1441         return true;
1442     }
1443 }
1444 
1445 #endif
1446 
1447 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
1448  * operations, or io operations to proceed.  Return the host address.  */
1449 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1450                                TCGMemOpIdx oi, uintptr_t retaddr)
1451 {
1452     size_t mmu_idx = get_mmuidx(oi);
1453     uintptr_t index = tlb_index(env, mmu_idx, addr);
1454     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1455     target_ulong tlb_addr = tlb_addr_write(tlbe);
1456     MemOp mop = get_memop(oi);
1457     int a_bits = get_alignment_bits(mop);
1458     int s_bits = mop & MO_SIZE;
1459     void *hostaddr;
1460 
1461     /* Adjust the given return address.  */
1462     retaddr -= GETPC_ADJ;
1463 
1464     /* Enforce guest required alignment.  */
1465     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1466         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1467         cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1468                              mmu_idx, retaddr);
1469     }
1470 
1471     /* Enforce qemu required alignment.  */
1472     if (unlikely(addr & ((1 << s_bits) - 1))) {
1473         /* We get here if guest alignment was not requested,
1474            or was not enforced by cpu_unaligned_access above.
1475            We might widen the access and emulate, but for now
1476            mark an exception and exit the cpu loop.  */
1477         goto stop_the_world;
1478     }
1479 
1480     /* Check TLB entry and enforce page permissions.  */
1481     if (!tlb_hit(tlb_addr, addr)) {
1482         if (!VICTIM_TLB_HIT(addr_write, addr)) {
1483             tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
1484                      mmu_idx, retaddr);
1485             index = tlb_index(env, mmu_idx, addr);
1486             tlbe = tlb_entry(env, mmu_idx, addr);
1487         }
1488         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1489     }
1490 
1491     /* Notice an IO access or a needs-MMU-lookup access */
1492     if (unlikely(tlb_addr & TLB_MMIO)) {
1493         /* There's really nothing that can be done to
1494            support this apart from stop-the-world.  */
1495         goto stop_the_world;
1496     }
1497 
1498     /* Let the guest notice RMW on a write-only page.  */
1499     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1500         tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1501                  mmu_idx, retaddr);
1502         /* Since we don't support reads and writes to different addresses,
1503            and we do have the proper page loaded for write, this shouldn't
1504            ever return.  But just in case, handle via stop-the-world.  */
1505         goto stop_the_world;
1506     }
1507 
1508     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1509 
1510     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1511         notdirty_write(env_cpu(env), addr, 1 << s_bits,
1512                        &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
1513     }
1514 
1515     return hostaddr;
1516 
1517  stop_the_world:
1518     cpu_loop_exit_atomic(env_cpu(env), retaddr);
1519 }
1520 
1521 /*
1522  * Load Helpers
1523  *
1524  * We support two different access types. SOFTMMU_CODE_ACCESS is
1525  * specifically for reading instructions from system memory. It is
1526  * called by the translation loop and in some helpers where the code
1527  * is disassembled. It shouldn't be called directly by guest code.
1528  */
1529 
1530 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1531                                 TCGMemOpIdx oi, uintptr_t retaddr);
1532 
1533 static inline uint64_t QEMU_ALWAYS_INLINE
1534 load_memop(const void *haddr, MemOp op)
1535 {
1536     switch (op) {
1537     case MO_UB:
1538         return ldub_p(haddr);
1539     case MO_BEUW:
1540         return lduw_be_p(haddr);
1541     case MO_LEUW:
1542         return lduw_le_p(haddr);
1543     case MO_BEUL:
1544         return (uint32_t)ldl_be_p(haddr);
1545     case MO_LEUL:
1546         return (uint32_t)ldl_le_p(haddr);
1547     case MO_BEQ:
1548         return ldq_be_p(haddr);
1549     case MO_LEQ:
1550         return ldq_le_p(haddr);
1551     default:
1552         qemu_build_not_reached();
1553     }
1554 }
1555 
1556 static inline uint64_t QEMU_ALWAYS_INLINE
1557 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1558             uintptr_t retaddr, MemOp op, bool code_read,
1559             FullLoadHelper *full_load)
1560 {
1561     uintptr_t mmu_idx = get_mmuidx(oi);
1562     uintptr_t index = tlb_index(env, mmu_idx, addr);
1563     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1564     target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1565     const size_t tlb_off = code_read ?
1566         offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1567     const MMUAccessType access_type =
1568         code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1569     unsigned a_bits = get_alignment_bits(get_memop(oi));
1570     void *haddr;
1571     uint64_t res;
1572     size_t size = memop_size(op);
1573 
1574     /* Handle CPU specific unaligned behaviour */
1575     if (addr & ((1 << a_bits) - 1)) {
1576         cpu_unaligned_access(env_cpu(env), addr, access_type,
1577                              mmu_idx, retaddr);
1578     }
1579 
1580     /* If the TLB entry is for a different page, reload and try again.  */
1581     if (!tlb_hit(tlb_addr, addr)) {
1582         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1583                             addr & TARGET_PAGE_MASK)) {
1584             tlb_fill(env_cpu(env), addr, size,
1585                      access_type, mmu_idx, retaddr);
1586             index = tlb_index(env, mmu_idx, addr);
1587             entry = tlb_entry(env, mmu_idx, addr);
1588         }
1589         tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1590         tlb_addr &= ~TLB_INVALID_MASK;
1591     }
1592 
1593     /* Handle anything that isn't just a straight memory access.  */
1594     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1595         CPUIOTLBEntry *iotlbentry;
1596         bool need_swap;
1597 
1598         /* For anything that is unaligned, recurse through full_load.  */
1599         if ((addr & (size - 1)) != 0) {
1600             goto do_unaligned_access;
1601         }
1602 
1603         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1604 
1605         /* Handle watchpoints.  */
1606         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1607             /* On watchpoint hit, this will longjmp out.  */
1608             cpu_check_watchpoint(env_cpu(env), addr, size,
1609                                  iotlbentry->attrs, BP_MEM_READ, retaddr);
1610         }
1611 
1612         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1613 
1614         /* Handle I/O access.  */
1615         if (likely(tlb_addr & TLB_MMIO)) {
1616             return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
1617                             access_type, op ^ (need_swap * MO_BSWAP));
1618         }
1619 
1620         haddr = (void *)((uintptr_t)addr + entry->addend);
1621 
1622         /*
1623          * Keep these two load_memop separate to ensure that the compiler
1624          * is able to fold the entire function to a single instruction.
1625          * There is a build-time assert inside to remind you of this.  ;-)
1626          */
1627         if (unlikely(need_swap)) {
1628             return load_memop(haddr, op ^ MO_BSWAP);
1629         }
1630         return load_memop(haddr, op);
1631     }
1632 
1633     /* Handle slow unaligned access (it spans two pages or IO).  */
1634     if (size > 1
1635         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1636                     >= TARGET_PAGE_SIZE)) {
1637         target_ulong addr1, addr2;
1638         uint64_t r1, r2;
1639         unsigned shift;
1640     do_unaligned_access:
1641         addr1 = addr & ~((target_ulong)size - 1);
1642         addr2 = addr1 + size;
1643         r1 = full_load(env, addr1, oi, retaddr);
1644         r2 = full_load(env, addr2, oi, retaddr);
1645         shift = (addr & (size - 1)) * 8;
1646 
1647         if (memop_big_endian(op)) {
1648             /* Big-endian combine.  */
1649             res = (r1 << shift) | (r2 >> ((size * 8) - shift));
1650         } else {
1651             /* Little-endian combine.  */
1652             res = (r1 >> shift) | (r2 << ((size * 8) - shift));
1653         }
1654         return res & MAKE_64BIT_MASK(0, size * 8);
1655     }
1656 
1657     haddr = (void *)((uintptr_t)addr + entry->addend);
1658     return load_memop(haddr, op);
1659 }
1660 
1661 /*
1662  * For the benefit of TCG generated code, we want to avoid the
1663  * complication of ABI-specific return type promotion and always
1664  * return a value extended to the register size of the host. This is
1665  * tcg_target_long, except in the case of a 32-bit host and 64-bit
1666  * data, and for that we always have uint64_t.
1667  *
1668  * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1669  */
1670 
1671 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1672                               TCGMemOpIdx oi, uintptr_t retaddr)
1673 {
1674     return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1675 }
1676 
1677 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1678                                      TCGMemOpIdx oi, uintptr_t retaddr)
1679 {
1680     return full_ldub_mmu(env, addr, oi, retaddr);
1681 }
1682 
1683 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1684                                  TCGMemOpIdx oi, uintptr_t retaddr)
1685 {
1686     return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1687                        full_le_lduw_mmu);
1688 }
1689 
1690 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1691                                     TCGMemOpIdx oi, uintptr_t retaddr)
1692 {
1693     return full_le_lduw_mmu(env, addr, oi, retaddr);
1694 }
1695 
1696 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1697                                  TCGMemOpIdx oi, uintptr_t retaddr)
1698 {
1699     return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
1700                        full_be_lduw_mmu);
1701 }
1702 
1703 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1704                                     TCGMemOpIdx oi, uintptr_t retaddr)
1705 {
1706     return full_be_lduw_mmu(env, addr, oi, retaddr);
1707 }
1708 
1709 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1710                                  TCGMemOpIdx oi, uintptr_t retaddr)
1711 {
1712     return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
1713                        full_le_ldul_mmu);
1714 }
1715 
1716 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1717                                     TCGMemOpIdx oi, uintptr_t retaddr)
1718 {
1719     return full_le_ldul_mmu(env, addr, oi, retaddr);
1720 }
1721 
1722 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1723                                  TCGMemOpIdx oi, uintptr_t retaddr)
1724 {
1725     return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
1726                        full_be_ldul_mmu);
1727 }
1728 
1729 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1730                                     TCGMemOpIdx oi, uintptr_t retaddr)
1731 {
1732     return full_be_ldul_mmu(env, addr, oi, retaddr);
1733 }
1734 
1735 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
1736                            TCGMemOpIdx oi, uintptr_t retaddr)
1737 {
1738     return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
1739                        helper_le_ldq_mmu);
1740 }
1741 
1742 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
1743                            TCGMemOpIdx oi, uintptr_t retaddr)
1744 {
1745     return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
1746                        helper_be_ldq_mmu);
1747 }
1748 
1749 /*
1750  * Provide signed versions of the load routines as well.  We can of course
1751  * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
1752  */
1753 
1754 
1755 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
1756                                      TCGMemOpIdx oi, uintptr_t retaddr)
1757 {
1758     return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
1759 }
1760 
1761 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
1762                                     TCGMemOpIdx oi, uintptr_t retaddr)
1763 {
1764     return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
1765 }
1766 
1767 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
1768                                     TCGMemOpIdx oi, uintptr_t retaddr)
1769 {
1770     return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
1771 }
1772 
1773 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
1774                                     TCGMemOpIdx oi, uintptr_t retaddr)
1775 {
1776     return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
1777 }
1778 
1779 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
1780                                     TCGMemOpIdx oi, uintptr_t retaddr)
1781 {
1782     return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
1783 }
1784 
1785 /*
1786  * Load helpers for cpu_ldst.h.
1787  */
1788 
1789 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
1790                                        int mmu_idx, uintptr_t retaddr,
1791                                        MemOp op, FullLoadHelper *full_load)
1792 {
1793     uint16_t meminfo;
1794     TCGMemOpIdx oi;
1795     uint64_t ret;
1796 
1797     meminfo = trace_mem_get_info(op, mmu_idx, false);
1798     trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
1799 
1800     op &= ~MO_SIGN;
1801     oi = make_memop_idx(op, mmu_idx);
1802     ret = full_load(env, addr, oi, retaddr);
1803 
1804     qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
1805 
1806     return ret;
1807 }
1808 
1809 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1810                             int mmu_idx, uintptr_t ra)
1811 {
1812     return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu);
1813 }
1814 
1815 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1816                        int mmu_idx, uintptr_t ra)
1817 {
1818     return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB,
1819                                    full_ldub_mmu);
1820 }
1821 
1822 uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1823                                int mmu_idx, uintptr_t ra)
1824 {
1825     return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu);
1826 }
1827 
1828 int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1829                           int mmu_idx, uintptr_t ra)
1830 {
1831     return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW,
1832                                     full_be_lduw_mmu);
1833 }
1834 
1835 uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1836                               int mmu_idx, uintptr_t ra)
1837 {
1838     return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu);
1839 }
1840 
1841 uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1842                               int mmu_idx, uintptr_t ra)
1843 {
1844     return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu);
1845 }
1846 
1847 uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1848                                int mmu_idx, uintptr_t ra)
1849 {
1850     return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu);
1851 }
1852 
1853 int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1854                           int mmu_idx, uintptr_t ra)
1855 {
1856     return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW,
1857                                     full_le_lduw_mmu);
1858 }
1859 
1860 uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1861                               int mmu_idx, uintptr_t ra)
1862 {
1863     return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu);
1864 }
1865 
1866 uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1867                               int mmu_idx, uintptr_t ra)
1868 {
1869     return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu);
1870 }
1871 
1872 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr,
1873                           uintptr_t retaddr)
1874 {
1875     return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1876 }
1877 
1878 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1879 {
1880     return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1881 }
1882 
1883 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr,
1884                              uintptr_t retaddr)
1885 {
1886     return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1887 }
1888 
1889 int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1890 {
1891     return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1892 }
1893 
1894 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr,
1895                             uintptr_t retaddr)
1896 {
1897     return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1898 }
1899 
1900 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr,
1901                             uintptr_t retaddr)
1902 {
1903     return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1904 }
1905 
1906 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr,
1907                              uintptr_t retaddr)
1908 {
1909     return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1910 }
1911 
1912 int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1913 {
1914     return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1915 }
1916 
1917 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr,
1918                             uintptr_t retaddr)
1919 {
1920     return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1921 }
1922 
1923 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr,
1924                             uintptr_t retaddr)
1925 {
1926     return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1927 }
1928 
1929 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr)
1930 {
1931     return cpu_ldub_data_ra(env, ptr, 0);
1932 }
1933 
1934 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr)
1935 {
1936     return cpu_ldsb_data_ra(env, ptr, 0);
1937 }
1938 
1939 uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr)
1940 {
1941     return cpu_lduw_be_data_ra(env, ptr, 0);
1942 }
1943 
1944 int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr)
1945 {
1946     return cpu_ldsw_be_data_ra(env, ptr, 0);
1947 }
1948 
1949 uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr)
1950 {
1951     return cpu_ldl_be_data_ra(env, ptr, 0);
1952 }
1953 
1954 uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr)
1955 {
1956     return cpu_ldq_be_data_ra(env, ptr, 0);
1957 }
1958 
1959 uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr)
1960 {
1961     return cpu_lduw_le_data_ra(env, ptr, 0);
1962 }
1963 
1964 int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr)
1965 {
1966     return cpu_ldsw_le_data_ra(env, ptr, 0);
1967 }
1968 
1969 uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr)
1970 {
1971     return cpu_ldl_le_data_ra(env, ptr, 0);
1972 }
1973 
1974 uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr)
1975 {
1976     return cpu_ldq_le_data_ra(env, ptr, 0);
1977 }
1978 
1979 /*
1980  * Store Helpers
1981  */
1982 
1983 static inline void QEMU_ALWAYS_INLINE
1984 store_memop(void *haddr, uint64_t val, MemOp op)
1985 {
1986     switch (op) {
1987     case MO_UB:
1988         stb_p(haddr, val);
1989         break;
1990     case MO_BEUW:
1991         stw_be_p(haddr, val);
1992         break;
1993     case MO_LEUW:
1994         stw_le_p(haddr, val);
1995         break;
1996     case MO_BEUL:
1997         stl_be_p(haddr, val);
1998         break;
1999     case MO_LEUL:
2000         stl_le_p(haddr, val);
2001         break;
2002     case MO_BEQ:
2003         stq_be_p(haddr, val);
2004         break;
2005     case MO_LEQ:
2006         stq_le_p(haddr, val);
2007         break;
2008     default:
2009         qemu_build_not_reached();
2010     }
2011 }
2012 
2013 static inline void QEMU_ALWAYS_INLINE
2014 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2015              TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
2016 {
2017     uintptr_t mmu_idx = get_mmuidx(oi);
2018     uintptr_t index = tlb_index(env, mmu_idx, addr);
2019     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
2020     target_ulong tlb_addr = tlb_addr_write(entry);
2021     const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
2022     unsigned a_bits = get_alignment_bits(get_memop(oi));
2023     void *haddr;
2024     size_t size = memop_size(op);
2025 
2026     /* Handle CPU specific unaligned behaviour */
2027     if (addr & ((1 << a_bits) - 1)) {
2028         cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
2029                              mmu_idx, retaddr);
2030     }
2031 
2032     /* If the TLB entry is for a different page, reload and try again.  */
2033     if (!tlb_hit(tlb_addr, addr)) {
2034         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
2035             addr & TARGET_PAGE_MASK)) {
2036             tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
2037                      mmu_idx, retaddr);
2038             index = tlb_index(env, mmu_idx, addr);
2039             entry = tlb_entry(env, mmu_idx, addr);
2040         }
2041         tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
2042     }
2043 
2044     /* Handle anything that isn't just a straight memory access.  */
2045     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
2046         CPUIOTLBEntry *iotlbentry;
2047         bool need_swap;
2048 
2049         /* For anything that is unaligned, recurse through byte stores.  */
2050         if ((addr & (size - 1)) != 0) {
2051             goto do_unaligned_access;
2052         }
2053 
2054         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
2055 
2056         /* Handle watchpoints.  */
2057         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2058             /* On watchpoint hit, this will longjmp out.  */
2059             cpu_check_watchpoint(env_cpu(env), addr, size,
2060                                  iotlbentry->attrs, BP_MEM_WRITE, retaddr);
2061         }
2062 
2063         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
2064 
2065         /* Handle I/O access.  */
2066         if (tlb_addr & TLB_MMIO) {
2067             io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
2068                       op ^ (need_swap * MO_BSWAP));
2069             return;
2070         }
2071 
2072         /* Ignore writes to ROM.  */
2073         if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
2074             return;
2075         }
2076 
2077         /* Handle clean RAM pages.  */
2078         if (tlb_addr & TLB_NOTDIRTY) {
2079             notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
2080         }
2081 
2082         haddr = (void *)((uintptr_t)addr + entry->addend);
2083 
2084         /*
2085          * Keep these two store_memop separate to ensure that the compiler
2086          * is able to fold the entire function to a single instruction.
2087          * There is a build-time assert inside to remind you of this.  ;-)
2088          */
2089         if (unlikely(need_swap)) {
2090             store_memop(haddr, val, op ^ MO_BSWAP);
2091         } else {
2092             store_memop(haddr, val, op);
2093         }
2094         return;
2095     }
2096 
2097     /* Handle slow unaligned access (it spans two pages or IO).  */
2098     if (size > 1
2099         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
2100                      >= TARGET_PAGE_SIZE)) {
2101         int i;
2102         uintptr_t index2;
2103         CPUTLBEntry *entry2;
2104         target_ulong page2, tlb_addr2;
2105         size_t size2;
2106 
2107     do_unaligned_access:
2108         /*
2109          * Ensure the second page is in the TLB.  Note that the first page
2110          * is already guaranteed to be filled, and that the second page
2111          * cannot evict the first.
2112          */
2113         page2 = (addr + size) & TARGET_PAGE_MASK;
2114         size2 = (addr + size) & ~TARGET_PAGE_MASK;
2115         index2 = tlb_index(env, mmu_idx, page2);
2116         entry2 = tlb_entry(env, mmu_idx, page2);
2117         tlb_addr2 = tlb_addr_write(entry2);
2118         if (!tlb_hit_page(tlb_addr2, page2)) {
2119             if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
2120                 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
2121                          mmu_idx, retaddr);
2122                 index2 = tlb_index(env, mmu_idx, page2);
2123                 entry2 = tlb_entry(env, mmu_idx, page2);
2124             }
2125             tlb_addr2 = tlb_addr_write(entry2);
2126         }
2127 
2128         /*
2129          * Handle watchpoints.  Since this may trap, all checks
2130          * must happen before any store.
2131          */
2132         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2133             cpu_check_watchpoint(env_cpu(env), addr, size - size2,
2134                                  env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
2135                                  BP_MEM_WRITE, retaddr);
2136         }
2137         if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
2138             cpu_check_watchpoint(env_cpu(env), page2, size2,
2139                                  env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
2140                                  BP_MEM_WRITE, retaddr);
2141         }
2142 
2143         /*
2144          * XXX: not efficient, but simple.
2145          * This loop must go in the forward direction to avoid issues
2146          * with self-modifying code in Windows 64-bit.
2147          */
2148         for (i = 0; i < size; ++i) {
2149             uint8_t val8;
2150             if (memop_big_endian(op)) {
2151                 /* Big-endian extract.  */
2152                 val8 = val >> (((size - 1) * 8) - (i * 8));
2153             } else {
2154                 /* Little-endian extract.  */
2155                 val8 = val >> (i * 8);
2156             }
2157             helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
2158         }
2159         return;
2160     }
2161 
2162     haddr = (void *)((uintptr_t)addr + entry->addend);
2163     store_memop(haddr, val, op);
2164 }
2165 
2166 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
2167                         TCGMemOpIdx oi, uintptr_t retaddr)
2168 {
2169     store_helper(env, addr, val, oi, retaddr, MO_UB);
2170 }
2171 
2172 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2173                        TCGMemOpIdx oi, uintptr_t retaddr)
2174 {
2175     store_helper(env, addr, val, oi, retaddr, MO_LEUW);
2176 }
2177 
2178 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2179                        TCGMemOpIdx oi, uintptr_t retaddr)
2180 {
2181     store_helper(env, addr, val, oi, retaddr, MO_BEUW);
2182 }
2183 
2184 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2185                        TCGMemOpIdx oi, uintptr_t retaddr)
2186 {
2187     store_helper(env, addr, val, oi, retaddr, MO_LEUL);
2188 }
2189 
2190 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2191                        TCGMemOpIdx oi, uintptr_t retaddr)
2192 {
2193     store_helper(env, addr, val, oi, retaddr, MO_BEUL);
2194 }
2195 
2196 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2197                        TCGMemOpIdx oi, uintptr_t retaddr)
2198 {
2199     store_helper(env, addr, val, oi, retaddr, MO_LEQ);
2200 }
2201 
2202 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2203                        TCGMemOpIdx oi, uintptr_t retaddr)
2204 {
2205     store_helper(env, addr, val, oi, retaddr, MO_BEQ);
2206 }
2207 
2208 /*
2209  * Store Helpers for cpu_ldst.h
2210  */
2211 
2212 static inline void QEMU_ALWAYS_INLINE
2213 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2214                  int mmu_idx, uintptr_t retaddr, MemOp op)
2215 {
2216     TCGMemOpIdx oi;
2217     uint16_t meminfo;
2218 
2219     meminfo = trace_mem_get_info(op, mmu_idx, true);
2220     trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
2221 
2222     oi = make_memop_idx(op, mmu_idx);
2223     store_helper(env, addr, val, oi, retaddr, op);
2224 
2225     qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
2226 }
2227 
2228 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2229                        int mmu_idx, uintptr_t retaddr)
2230 {
2231     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB);
2232 }
2233 
2234 void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2235                           int mmu_idx, uintptr_t retaddr)
2236 {
2237     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW);
2238 }
2239 
2240 void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2241                           int mmu_idx, uintptr_t retaddr)
2242 {
2243     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL);
2244 }
2245 
2246 void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2247                           int mmu_idx, uintptr_t retaddr)
2248 {
2249     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ);
2250 }
2251 
2252 void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2253                           int mmu_idx, uintptr_t retaddr)
2254 {
2255     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW);
2256 }
2257 
2258 void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2259                           int mmu_idx, uintptr_t retaddr)
2260 {
2261     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL);
2262 }
2263 
2264 void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2265                           int mmu_idx, uintptr_t retaddr)
2266 {
2267     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ);
2268 }
2269 
2270 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr,
2271                      uint32_t val, uintptr_t retaddr)
2272 {
2273     cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2274 }
2275 
2276 void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr,
2277                         uint32_t val, uintptr_t retaddr)
2278 {
2279     cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2280 }
2281 
2282 void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr,
2283                         uint32_t val, uintptr_t retaddr)
2284 {
2285     cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2286 }
2287 
2288 void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr,
2289                         uint64_t val, uintptr_t retaddr)
2290 {
2291     cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2292 }
2293 
2294 void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr,
2295                         uint32_t val, uintptr_t retaddr)
2296 {
2297     cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2298 }
2299 
2300 void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr,
2301                         uint32_t val, uintptr_t retaddr)
2302 {
2303     cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2304 }
2305 
2306 void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr,
2307                         uint64_t val, uintptr_t retaddr)
2308 {
2309     cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2310 }
2311 
2312 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2313 {
2314     cpu_stb_data_ra(env, ptr, val, 0);
2315 }
2316 
2317 void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2318 {
2319     cpu_stw_be_data_ra(env, ptr, val, 0);
2320 }
2321 
2322 void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2323 {
2324     cpu_stl_be_data_ra(env, ptr, val, 0);
2325 }
2326 
2327 void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2328 {
2329     cpu_stq_be_data_ra(env, ptr, val, 0);
2330 }
2331 
2332 void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2333 {
2334     cpu_stw_le_data_ra(env, ptr, val, 0);
2335 }
2336 
2337 void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2338 {
2339     cpu_stl_le_data_ra(env, ptr, val, 0);
2340 }
2341 
2342 void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2343 {
2344     cpu_stq_le_data_ra(env, ptr, val, 0);
2345 }
2346 
2347 /* First set of helpers allows passing in of OI and RETADDR.  This makes
2348    them callable from other helpers.  */
2349 
2350 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
2351 #define ATOMIC_NAME(X) \
2352     HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
2353 #define ATOMIC_MMU_DECLS
2354 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
2355 #define ATOMIC_MMU_CLEANUP
2356 #define ATOMIC_MMU_IDX   get_mmuidx(oi)
2357 
2358 #include "atomic_common.inc.c"
2359 
2360 #define DATA_SIZE 1
2361 #include "atomic_template.h"
2362 
2363 #define DATA_SIZE 2
2364 #include "atomic_template.h"
2365 
2366 #define DATA_SIZE 4
2367 #include "atomic_template.h"
2368 
2369 #ifdef CONFIG_ATOMIC64
2370 #define DATA_SIZE 8
2371 #include "atomic_template.h"
2372 #endif
2373 
2374 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
2375 #define DATA_SIZE 16
2376 #include "atomic_template.h"
2377 #endif
2378 
2379 /* Second set of helpers are directly callable from TCG as helpers.  */
2380 
2381 #undef EXTRA_ARGS
2382 #undef ATOMIC_NAME
2383 #undef ATOMIC_MMU_LOOKUP
2384 #define EXTRA_ARGS         , TCGMemOpIdx oi
2385 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
2386 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC())
2387 
2388 #define DATA_SIZE 1
2389 #include "atomic_template.h"
2390 
2391 #define DATA_SIZE 2
2392 #include "atomic_template.h"
2393 
2394 #define DATA_SIZE 4
2395 #include "atomic_template.h"
2396 
2397 #ifdef CONFIG_ATOMIC64
2398 #define DATA_SIZE 8
2399 #include "atomic_template.h"
2400 #endif
2401 #undef ATOMIC_MMU_IDX
2402 
2403 /* Code access functions.  */
2404 
2405 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
2406                                TCGMemOpIdx oi, uintptr_t retaddr)
2407 {
2408     return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
2409 }
2410 
2411 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
2412 {
2413     TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
2414     return full_ldub_code(env, addr, oi, 0);
2415 }
2416 
2417 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
2418                                TCGMemOpIdx oi, uintptr_t retaddr)
2419 {
2420     return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
2421 }
2422 
2423 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
2424 {
2425     TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
2426     return full_lduw_code(env, addr, oi, 0);
2427 }
2428 
2429 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
2430                               TCGMemOpIdx oi, uintptr_t retaddr)
2431 {
2432     return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
2433 }
2434 
2435 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
2436 {
2437     TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
2438     return full_ldl_code(env, addr, oi, 0);
2439 }
2440 
2441 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
2442                               TCGMemOpIdx oi, uintptr_t retaddr)
2443 {
2444     return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
2445 }
2446 
2447 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
2448 {
2449     TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
2450     return full_ldq_code(env, addr, oi, 0);
2451 }
2452