xref: /openbmc/qemu/accel/tcg/cputlb.c (revision 600b819f235d6b6eb33fc33e09fe64f53eb9a9a6)
1 /*
2  *  Common CPU TLB handling
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "hw/core/tcg-cpu-ops.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/cpu_ldst.h"
26 #include "exec/cputlb.h"
27 #include "exec/tb-hash.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 #include "exec/translate-all.h"
37 #include "trace/trace-root.h"
38 #include "trace/mem.h"
39 #include "internal.h"
40 #ifdef CONFIG_PLUGIN
41 #include "qemu/plugin-memory.h"
42 #endif
43 
44 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
45 /* #define DEBUG_TLB */
46 /* #define DEBUG_TLB_LOG */
47 
48 #ifdef DEBUG_TLB
49 # define DEBUG_TLB_GATE 1
50 # ifdef DEBUG_TLB_LOG
51 #  define DEBUG_TLB_LOG_GATE 1
52 # else
53 #  define DEBUG_TLB_LOG_GATE 0
54 # endif
55 #else
56 # define DEBUG_TLB_GATE 0
57 # define DEBUG_TLB_LOG_GATE 0
58 #endif
59 
60 #define tlb_debug(fmt, ...) do { \
61     if (DEBUG_TLB_LOG_GATE) { \
62         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
63                       ## __VA_ARGS__); \
64     } else if (DEBUG_TLB_GATE) { \
65         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
66     } \
67 } while (0)
68 
69 #define assert_cpu_is_self(cpu) do {                              \
70         if (DEBUG_TLB_GATE) {                                     \
71             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
72         }                                                         \
73     } while (0)
74 
75 /* run_on_cpu_data.target_ptr should always be big enough for a
76  * target_ulong even on 32 bit builds */
77 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
78 
79 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
80  */
81 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
82 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
83 
84 static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
85 {
86     return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
87 }
88 
89 static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
90 {
91     return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
92 }
93 
94 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
95                              size_t max_entries)
96 {
97     desc->window_begin_ns = ns;
98     desc->window_max_entries = max_entries;
99 }
100 
101 static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
102 {
103     unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
104 
105     for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
106         qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
107     }
108 }
109 
110 static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
111 {
112     /* Discard jump cache entries for any tb which might potentially
113        overlap the flushed page.  */
114     tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
115     tb_jmp_cache_clear_page(cpu, addr);
116 }
117 
118 /**
119  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
120  * @desc: The CPUTLBDesc portion of the TLB
121  * @fast: The CPUTLBDescFast portion of the same TLB
122  *
123  * Called with tlb_lock_held.
124  *
125  * We have two main constraints when resizing a TLB: (1) we only resize it
126  * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
127  * the array or unnecessarily flushing it), which means we do not control how
128  * frequently the resizing can occur; (2) we don't have access to the guest's
129  * future scheduling decisions, and therefore have to decide the magnitude of
130  * the resize based on past observations.
131  *
132  * In general, a memory-hungry process can benefit greatly from an appropriately
133  * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
134  * we just have to make the TLB as large as possible; while an oversized TLB
135  * results in minimal TLB miss rates, it also takes longer to be flushed
136  * (flushes can be _very_ frequent), and the reduced locality can also hurt
137  * performance.
138  *
139  * To achieve near-optimal performance for all kinds of workloads, we:
140  *
141  * 1. Aggressively increase the size of the TLB when the use rate of the
142  * TLB being flushed is high, since it is likely that in the near future this
143  * memory-hungry process will execute again, and its memory hungriness will
144  * probably be similar.
145  *
146  * 2. Slowly reduce the size of the TLB as the use rate declines over a
147  * reasonably large time window. The rationale is that if in such a time window
148  * we have not observed a high TLB use rate, it is likely that we won't observe
149  * it in the near future. In that case, once a time window expires we downsize
150  * the TLB to match the maximum use rate observed in the window.
151  *
152  * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
153  * since in that range performance is likely near-optimal. Recall that the TLB
154  * is direct mapped, so we want the use rate to be low (or at least not too
155  * high), since otherwise we are likely to have a significant amount of
156  * conflict misses.
157  */
158 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
159                                   int64_t now)
160 {
161     size_t old_size = tlb_n_entries(fast);
162     size_t rate;
163     size_t new_size = old_size;
164     int64_t window_len_ms = 100;
165     int64_t window_len_ns = window_len_ms * 1000 * 1000;
166     bool window_expired = now > desc->window_begin_ns + window_len_ns;
167 
168     if (desc->n_used_entries > desc->window_max_entries) {
169         desc->window_max_entries = desc->n_used_entries;
170     }
171     rate = desc->window_max_entries * 100 / old_size;
172 
173     if (rate > 70) {
174         new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
175     } else if (rate < 30 && window_expired) {
176         size_t ceil = pow2ceil(desc->window_max_entries);
177         size_t expected_rate = desc->window_max_entries * 100 / ceil;
178 
179         /*
180          * Avoid undersizing when the max number of entries seen is just below
181          * a pow2. For instance, if max_entries == 1025, the expected use rate
182          * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
183          * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
184          * later. Thus, make sure that the expected use rate remains below 70%.
185          * (and since we double the size, that means the lowest rate we'd
186          * expect to get is 35%, which is still in the 30-70% range where
187          * we consider that the size is appropriate.)
188          */
189         if (expected_rate > 70) {
190             ceil *= 2;
191         }
192         new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
193     }
194 
195     if (new_size == old_size) {
196         if (window_expired) {
197             tlb_window_reset(desc, now, desc->n_used_entries);
198         }
199         return;
200     }
201 
202     g_free(fast->table);
203     g_free(desc->iotlb);
204 
205     tlb_window_reset(desc, now, 0);
206     /* desc->n_used_entries is cleared by the caller */
207     fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
208     fast->table = g_try_new(CPUTLBEntry, new_size);
209     desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
210 
211     /*
212      * If the allocations fail, try smaller sizes. We just freed some
213      * memory, so going back to half of new_size has a good chance of working.
214      * Increased memory pressure elsewhere in the system might cause the
215      * allocations to fail though, so we progressively reduce the allocation
216      * size, aborting if we cannot even allocate the smallest TLB we support.
217      */
218     while (fast->table == NULL || desc->iotlb == NULL) {
219         if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
220             error_report("%s: %s", __func__, strerror(errno));
221             abort();
222         }
223         new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
224         fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
225 
226         g_free(fast->table);
227         g_free(desc->iotlb);
228         fast->table = g_try_new(CPUTLBEntry, new_size);
229         desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
230     }
231 }
232 
233 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
234 {
235     desc->n_used_entries = 0;
236     desc->large_page_addr = -1;
237     desc->large_page_mask = -1;
238     desc->vindex = 0;
239     memset(fast->table, -1, sizeof_tlb(fast));
240     memset(desc->vtable, -1, sizeof(desc->vtable));
241 }
242 
243 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
244                                         int64_t now)
245 {
246     CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
247     CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
248 
249     tlb_mmu_resize_locked(desc, fast, now);
250     tlb_mmu_flush_locked(desc, fast);
251 }
252 
253 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
254 {
255     size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
256 
257     tlb_window_reset(desc, now, 0);
258     desc->n_used_entries = 0;
259     fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
260     fast->table = g_new(CPUTLBEntry, n_entries);
261     desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
262     tlb_mmu_flush_locked(desc, fast);
263 }
264 
265 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
266 {
267     env_tlb(env)->d[mmu_idx].n_used_entries++;
268 }
269 
270 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
271 {
272     env_tlb(env)->d[mmu_idx].n_used_entries--;
273 }
274 
275 void tlb_init(CPUState *cpu)
276 {
277     CPUArchState *env = cpu->env_ptr;
278     int64_t now = get_clock_realtime();
279     int i;
280 
281     qemu_spin_init(&env_tlb(env)->c.lock);
282 
283     /* All tlbs are initialized flushed. */
284     env_tlb(env)->c.dirty = 0;
285 
286     for (i = 0; i < NB_MMU_MODES; i++) {
287         tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
288     }
289 }
290 
291 void tlb_destroy(CPUState *cpu)
292 {
293     CPUArchState *env = cpu->env_ptr;
294     int i;
295 
296     qemu_spin_destroy(&env_tlb(env)->c.lock);
297     for (i = 0; i < NB_MMU_MODES; i++) {
298         CPUTLBDesc *desc = &env_tlb(env)->d[i];
299         CPUTLBDescFast *fast = &env_tlb(env)->f[i];
300 
301         g_free(fast->table);
302         g_free(desc->iotlb);
303     }
304 }
305 
306 /* flush_all_helper: run fn across all cpus
307  *
308  * If the wait flag is set then the src cpu's helper will be queued as
309  * "safe" work and the loop exited creating a synchronisation point
310  * where all queued work will be finished before execution starts
311  * again.
312  */
313 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
314                              run_on_cpu_data d)
315 {
316     CPUState *cpu;
317 
318     CPU_FOREACH(cpu) {
319         if (cpu != src) {
320             async_run_on_cpu(cpu, fn, d);
321         }
322     }
323 }
324 
325 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
326 {
327     CPUState *cpu;
328     size_t full = 0, part = 0, elide = 0;
329 
330     CPU_FOREACH(cpu) {
331         CPUArchState *env = cpu->env_ptr;
332 
333         full += qatomic_read(&env_tlb(env)->c.full_flush_count);
334         part += qatomic_read(&env_tlb(env)->c.part_flush_count);
335         elide += qatomic_read(&env_tlb(env)->c.elide_flush_count);
336     }
337     *pfull = full;
338     *ppart = part;
339     *pelide = elide;
340 }
341 
342 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
343 {
344     CPUArchState *env = cpu->env_ptr;
345     uint16_t asked = data.host_int;
346     uint16_t all_dirty, work, to_clean;
347     int64_t now = get_clock_realtime();
348 
349     assert_cpu_is_self(cpu);
350 
351     tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
352 
353     qemu_spin_lock(&env_tlb(env)->c.lock);
354 
355     all_dirty = env_tlb(env)->c.dirty;
356     to_clean = asked & all_dirty;
357     all_dirty &= ~to_clean;
358     env_tlb(env)->c.dirty = all_dirty;
359 
360     for (work = to_clean; work != 0; work &= work - 1) {
361         int mmu_idx = ctz32(work);
362         tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
363     }
364 
365     qemu_spin_unlock(&env_tlb(env)->c.lock);
366 
367     cpu_tb_jmp_cache_clear(cpu);
368 
369     if (to_clean == ALL_MMUIDX_BITS) {
370         qatomic_set(&env_tlb(env)->c.full_flush_count,
371                    env_tlb(env)->c.full_flush_count + 1);
372     } else {
373         qatomic_set(&env_tlb(env)->c.part_flush_count,
374                    env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
375         if (to_clean != asked) {
376             qatomic_set(&env_tlb(env)->c.elide_flush_count,
377                        env_tlb(env)->c.elide_flush_count +
378                        ctpop16(asked & ~to_clean));
379         }
380     }
381 }
382 
383 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
384 {
385     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
386 
387     if (cpu->created && !qemu_cpu_is_self(cpu)) {
388         async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
389                          RUN_ON_CPU_HOST_INT(idxmap));
390     } else {
391         tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
392     }
393 }
394 
395 void tlb_flush(CPUState *cpu)
396 {
397     tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
398 }
399 
400 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
401 {
402     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
403 
404     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
405 
406     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
407     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
408 }
409 
410 void tlb_flush_all_cpus(CPUState *src_cpu)
411 {
412     tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
413 }
414 
415 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
416 {
417     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
418 
419     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
420 
421     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
422     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
423 }
424 
425 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
426 {
427     tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
428 }
429 
430 static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry,
431                                       target_ulong page, target_ulong mask)
432 {
433     page &= mask;
434     mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK;
435 
436     return (page == (tlb_entry->addr_read & mask) ||
437             page == (tlb_addr_write(tlb_entry) & mask) ||
438             page == (tlb_entry->addr_code & mask));
439 }
440 
441 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
442                                         target_ulong page)
443 {
444     return tlb_hit_page_mask_anyprot(tlb_entry, page, -1);
445 }
446 
447 /**
448  * tlb_entry_is_empty - return true if the entry is not in use
449  * @te: pointer to CPUTLBEntry
450  */
451 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
452 {
453     return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
454 }
455 
456 /* Called with tlb_c.lock held */
457 static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry,
458                                         target_ulong page,
459                                         target_ulong mask)
460 {
461     if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) {
462         memset(tlb_entry, -1, sizeof(*tlb_entry));
463         return true;
464     }
465     return false;
466 }
467 
468 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
469                                           target_ulong page)
470 {
471     return tlb_flush_entry_mask_locked(tlb_entry, page, -1);
472 }
473 
474 /* Called with tlb_c.lock held */
475 static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx,
476                                             target_ulong page,
477                                             target_ulong mask)
478 {
479     CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
480     int k;
481 
482     assert_cpu_is_self(env_cpu(env));
483     for (k = 0; k < CPU_VTLB_SIZE; k++) {
484         if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) {
485             tlb_n_used_entries_dec(env, mmu_idx);
486         }
487     }
488 }
489 
490 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
491                                               target_ulong page)
492 {
493     tlb_flush_vtlb_page_mask_locked(env, mmu_idx, page, -1);
494 }
495 
496 static void tlb_flush_page_locked(CPUArchState *env, int midx,
497                                   target_ulong page)
498 {
499     target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
500     target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
501 
502     /* Check if we need to flush due to large pages.  */
503     if ((page & lp_mask) == lp_addr) {
504         tlb_debug("forcing full flush midx %d ("
505                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
506                   midx, lp_addr, lp_mask);
507         tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
508     } else {
509         if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
510             tlb_n_used_entries_dec(env, midx);
511         }
512         tlb_flush_vtlb_page_locked(env, midx, page);
513     }
514 }
515 
516 /**
517  * tlb_flush_page_by_mmuidx_async_0:
518  * @cpu: cpu on which to flush
519  * @addr: page of virtual address to flush
520  * @idxmap: set of mmu_idx to flush
521  *
522  * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
523  * at @addr from the tlbs indicated by @idxmap from @cpu.
524  */
525 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
526                                              target_ulong addr,
527                                              uint16_t idxmap)
528 {
529     CPUArchState *env = cpu->env_ptr;
530     int mmu_idx;
531 
532     assert_cpu_is_self(cpu);
533 
534     tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
535 
536     qemu_spin_lock(&env_tlb(env)->c.lock);
537     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
538         if ((idxmap >> mmu_idx) & 1) {
539             tlb_flush_page_locked(env, mmu_idx, addr);
540         }
541     }
542     qemu_spin_unlock(&env_tlb(env)->c.lock);
543 
544     tb_flush_jmp_cache(cpu, addr);
545 }
546 
547 /**
548  * tlb_flush_page_by_mmuidx_async_1:
549  * @cpu: cpu on which to flush
550  * @data: encoded addr + idxmap
551  *
552  * Helper for tlb_flush_page_by_mmuidx and friends, called through
553  * async_run_on_cpu.  The idxmap parameter is encoded in the page
554  * offset of the target_ptr field.  This limits the set of mmu_idx
555  * that can be passed via this method.
556  */
557 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
558                                              run_on_cpu_data data)
559 {
560     target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
561     target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
562     uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
563 
564     tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
565 }
566 
567 typedef struct {
568     target_ulong addr;
569     uint16_t idxmap;
570 } TLBFlushPageByMMUIdxData;
571 
572 /**
573  * tlb_flush_page_by_mmuidx_async_2:
574  * @cpu: cpu on which to flush
575  * @data: allocated addr + idxmap
576  *
577  * Helper for tlb_flush_page_by_mmuidx and friends, called through
578  * async_run_on_cpu.  The addr+idxmap parameters are stored in a
579  * TLBFlushPageByMMUIdxData structure that has been allocated
580  * specifically for this helper.  Free the structure when done.
581  */
582 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
583                                              run_on_cpu_data data)
584 {
585     TLBFlushPageByMMUIdxData *d = data.host_ptr;
586 
587     tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
588     g_free(d);
589 }
590 
591 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
592 {
593     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
594 
595     /* This should already be page aligned */
596     addr &= TARGET_PAGE_MASK;
597 
598     if (qemu_cpu_is_self(cpu)) {
599         tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
600     } else if (idxmap < TARGET_PAGE_SIZE) {
601         /*
602          * Most targets have only a few mmu_idx.  In the case where
603          * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
604          * allocating memory for this operation.
605          */
606         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
607                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
608     } else {
609         TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
610 
611         /* Otherwise allocate a structure, freed by the worker.  */
612         d->addr = addr;
613         d->idxmap = idxmap;
614         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
615                          RUN_ON_CPU_HOST_PTR(d));
616     }
617 }
618 
619 void tlb_flush_page(CPUState *cpu, target_ulong addr)
620 {
621     tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
622 }
623 
624 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
625                                        uint16_t idxmap)
626 {
627     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
628 
629     /* This should already be page aligned */
630     addr &= TARGET_PAGE_MASK;
631 
632     /*
633      * Allocate memory to hold addr+idxmap only when needed.
634      * See tlb_flush_page_by_mmuidx for details.
635      */
636     if (idxmap < TARGET_PAGE_SIZE) {
637         flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
638                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
639     } else {
640         CPUState *dst_cpu;
641 
642         /* Allocate a separate data block for each destination cpu.  */
643         CPU_FOREACH(dst_cpu) {
644             if (dst_cpu != src_cpu) {
645                 TLBFlushPageByMMUIdxData *d
646                     = g_new(TLBFlushPageByMMUIdxData, 1);
647 
648                 d->addr = addr;
649                 d->idxmap = idxmap;
650                 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
651                                  RUN_ON_CPU_HOST_PTR(d));
652             }
653         }
654     }
655 
656     tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
657 }
658 
659 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
660 {
661     tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
662 }
663 
664 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
665                                               target_ulong addr,
666                                               uint16_t idxmap)
667 {
668     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
669 
670     /* This should already be page aligned */
671     addr &= TARGET_PAGE_MASK;
672 
673     /*
674      * Allocate memory to hold addr+idxmap only when needed.
675      * See tlb_flush_page_by_mmuidx for details.
676      */
677     if (idxmap < TARGET_PAGE_SIZE) {
678         flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
679                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
680         async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
681                               RUN_ON_CPU_TARGET_PTR(addr | idxmap));
682     } else {
683         CPUState *dst_cpu;
684         TLBFlushPageByMMUIdxData *d;
685 
686         /* Allocate a separate data block for each destination cpu.  */
687         CPU_FOREACH(dst_cpu) {
688             if (dst_cpu != src_cpu) {
689                 d = g_new(TLBFlushPageByMMUIdxData, 1);
690                 d->addr = addr;
691                 d->idxmap = idxmap;
692                 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
693                                  RUN_ON_CPU_HOST_PTR(d));
694             }
695         }
696 
697         d = g_new(TLBFlushPageByMMUIdxData, 1);
698         d->addr = addr;
699         d->idxmap = idxmap;
700         async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
701                               RUN_ON_CPU_HOST_PTR(d));
702     }
703 }
704 
705 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
706 {
707     tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
708 }
709 
710 static void tlb_flush_range_locked(CPUArchState *env, int midx,
711                                    target_ulong addr, target_ulong len,
712                                    unsigned bits)
713 {
714     CPUTLBDesc *d = &env_tlb(env)->d[midx];
715     CPUTLBDescFast *f = &env_tlb(env)->f[midx];
716     target_ulong mask = MAKE_64BIT_MASK(0, bits);
717 
718     /*
719      * If @bits is smaller than the tlb size, there may be multiple entries
720      * within the TLB; otherwise all addresses that match under @mask hit
721      * the same TLB entry.
722      * TODO: Perhaps allow bits to be a few bits less than the size.
723      * For now, just flush the entire TLB.
724      *
725      * If @len is larger than the tlb size, then it will take longer to
726      * test all of the entries in the TLB than it will to flush it all.
727      */
728     if (mask < f->mask || len > f->mask) {
729         tlb_debug("forcing full flush midx %d ("
730                   TARGET_FMT_lx "/" TARGET_FMT_lx "+" TARGET_FMT_lx ")\n",
731                   midx, addr, mask, len);
732         tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
733         return;
734     }
735 
736     /*
737      * Check if we need to flush due to large pages.
738      * Because large_page_mask contains all 1's from the msb,
739      * we only need to test the end of the range.
740      */
741     if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) {
742         tlb_debug("forcing full flush midx %d ("
743                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
744                   midx, d->large_page_addr, d->large_page_mask);
745         tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
746         return;
747     }
748 
749     for (target_ulong i = 0; i < len; i += TARGET_PAGE_SIZE) {
750         target_ulong page = addr + i;
751         CPUTLBEntry *entry = tlb_entry(env, midx, page);
752 
753         if (tlb_flush_entry_mask_locked(entry, page, mask)) {
754             tlb_n_used_entries_dec(env, midx);
755         }
756         tlb_flush_vtlb_page_mask_locked(env, midx, page, mask);
757     }
758 }
759 
760 typedef struct {
761     target_ulong addr;
762     target_ulong len;
763     uint16_t idxmap;
764     uint16_t bits;
765 } TLBFlushRangeData;
766 
767 static void
768 tlb_flush_page_bits_by_mmuidx_async_0(CPUState *cpu,
769                                       TLBFlushRangeData d)
770 {
771     CPUArchState *env = cpu->env_ptr;
772     int mmu_idx;
773 
774     assert_cpu_is_self(cpu);
775 
776     tlb_debug("range:" TARGET_FMT_lx "/%u+" TARGET_FMT_lx " mmu_map:0x%x\n",
777               d.addr, d.bits, d.len, d.idxmap);
778 
779     qemu_spin_lock(&env_tlb(env)->c.lock);
780     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
781         if ((d.idxmap >> mmu_idx) & 1) {
782             tlb_flush_range_locked(env, mmu_idx, d.addr, d.len, d.bits);
783         }
784     }
785     qemu_spin_unlock(&env_tlb(env)->c.lock);
786 
787     for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) {
788         tb_flush_jmp_cache(cpu, d.addr + i);
789     }
790 }
791 
792 static void tlb_flush_page_bits_by_mmuidx_async_2(CPUState *cpu,
793                                                   run_on_cpu_data data)
794 {
795     TLBFlushRangeData *d = data.host_ptr;
796     tlb_flush_page_bits_by_mmuidx_async_0(cpu, *d);
797     g_free(d);
798 }
799 
800 void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr,
801                                target_ulong len, uint16_t idxmap,
802                                unsigned bits)
803 {
804     TLBFlushRangeData d;
805 
806     /*
807      * If all bits are significant, and len is small,
808      * this devolves to tlb_flush_page.
809      */
810     if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
811         tlb_flush_page_by_mmuidx(cpu, addr, idxmap);
812         return;
813     }
814     /* If no page bits are significant, this devolves to tlb_flush. */
815     if (bits < TARGET_PAGE_BITS) {
816         tlb_flush_by_mmuidx(cpu, idxmap);
817         return;
818     }
819 
820     /* This should already be page aligned */
821     d.addr = addr & TARGET_PAGE_MASK;
822     d.len = len;
823     d.idxmap = idxmap;
824     d.bits = bits;
825 
826     if (qemu_cpu_is_self(cpu)) {
827         tlb_flush_page_bits_by_mmuidx_async_0(cpu, d);
828     } else {
829         /* Otherwise allocate a structure, freed by the worker.  */
830         TLBFlushRangeData *p = g_memdup(&d, sizeof(d));
831         async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_2,
832                          RUN_ON_CPU_HOST_PTR(p));
833     }
834 }
835 
836 void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,
837                                    uint16_t idxmap, unsigned bits)
838 {
839     tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits);
840 }
841 
842 void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu,
843                                         target_ulong addr, target_ulong len,
844                                         uint16_t idxmap, unsigned bits)
845 {
846     TLBFlushRangeData d;
847     CPUState *dst_cpu;
848 
849     /*
850      * If all bits are significant, and len is small,
851      * this devolves to tlb_flush_page.
852      */
853     if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
854         tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap);
855         return;
856     }
857     /* If no page bits are significant, this devolves to tlb_flush. */
858     if (bits < TARGET_PAGE_BITS) {
859         tlb_flush_by_mmuidx_all_cpus(src_cpu, idxmap);
860         return;
861     }
862 
863     /* This should already be page aligned */
864     d.addr = addr & TARGET_PAGE_MASK;
865     d.len = len;
866     d.idxmap = idxmap;
867     d.bits = bits;
868 
869     /* Allocate a separate data block for each destination cpu.  */
870     CPU_FOREACH(dst_cpu) {
871         if (dst_cpu != src_cpu) {
872             TLBFlushRangeData *p = g_memdup(&d, sizeof(d));
873             async_run_on_cpu(dst_cpu,
874                              tlb_flush_page_bits_by_mmuidx_async_2,
875                              RUN_ON_CPU_HOST_PTR(p));
876         }
877     }
878 
879     tlb_flush_page_bits_by_mmuidx_async_0(src_cpu, d);
880 }
881 
882 void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu,
883                                             target_ulong addr,
884                                             uint16_t idxmap, unsigned bits)
885 {
886     tlb_flush_range_by_mmuidx_all_cpus(src_cpu, addr, TARGET_PAGE_SIZE,
887                                        idxmap, bits);
888 }
889 
890 void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
891                                                    target_ulong addr,
892                                                    uint16_t idxmap,
893                                                    unsigned bits)
894 {
895     TLBFlushRangeData d, *p;
896     CPUState *dst_cpu;
897 
898     /* If all bits are significant, this devolves to tlb_flush_page. */
899     if (bits >= TARGET_LONG_BITS) {
900         tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap);
901         return;
902     }
903     /* If no page bits are significant, this devolves to tlb_flush. */
904     if (bits < TARGET_PAGE_BITS) {
905         tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap);
906         return;
907     }
908 
909     /* This should already be page aligned */
910     d.addr = addr & TARGET_PAGE_MASK;
911     d.len = TARGET_PAGE_SIZE;
912     d.idxmap = idxmap;
913     d.bits = bits;
914 
915     /* Allocate a separate data block for each destination cpu.  */
916     CPU_FOREACH(dst_cpu) {
917         if (dst_cpu != src_cpu) {
918             p = g_memdup(&d, sizeof(d));
919             async_run_on_cpu(dst_cpu, tlb_flush_page_bits_by_mmuidx_async_2,
920                              RUN_ON_CPU_HOST_PTR(p));
921         }
922     }
923 
924     p = g_memdup(&d, sizeof(d));
925     async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_2,
926                           RUN_ON_CPU_HOST_PTR(p));
927 }
928 
929 /* update the TLBs so that writes to code in the virtual page 'addr'
930    can be detected */
931 void tlb_protect_code(ram_addr_t ram_addr)
932 {
933     cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
934                                              DIRTY_MEMORY_CODE);
935 }
936 
937 /* update the TLB so that writes in physical page 'phys_addr' are no longer
938    tested for self modifying code */
939 void tlb_unprotect_code(ram_addr_t ram_addr)
940 {
941     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
942 }
943 
944 
945 /*
946  * Dirty write flag handling
947  *
948  * When the TCG code writes to a location it looks up the address in
949  * the TLB and uses that data to compute the final address. If any of
950  * the lower bits of the address are set then the slow path is forced.
951  * There are a number of reasons to do this but for normal RAM the
952  * most usual is detecting writes to code regions which may invalidate
953  * generated code.
954  *
955  * Other vCPUs might be reading their TLBs during guest execution, so we update
956  * te->addr_write with qatomic_set. We don't need to worry about this for
957  * oversized guests as MTTCG is disabled for them.
958  *
959  * Called with tlb_c.lock held.
960  */
961 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
962                                          uintptr_t start, uintptr_t length)
963 {
964     uintptr_t addr = tlb_entry->addr_write;
965 
966     if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
967                  TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
968         addr &= TARGET_PAGE_MASK;
969         addr += tlb_entry->addend;
970         if ((addr - start) < length) {
971 #if TCG_OVERSIZED_GUEST
972             tlb_entry->addr_write |= TLB_NOTDIRTY;
973 #else
974             qatomic_set(&tlb_entry->addr_write,
975                        tlb_entry->addr_write | TLB_NOTDIRTY);
976 #endif
977         }
978     }
979 }
980 
981 /*
982  * Called with tlb_c.lock held.
983  * Called only from the vCPU context, i.e. the TLB's owner thread.
984  */
985 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
986 {
987     *d = *s;
988 }
989 
990 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
991  * the target vCPU).
992  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
993  * thing actually updated is the target TLB entry ->addr_write flags.
994  */
995 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
996 {
997     CPUArchState *env;
998 
999     int mmu_idx;
1000 
1001     env = cpu->env_ptr;
1002     qemu_spin_lock(&env_tlb(env)->c.lock);
1003     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1004         unsigned int i;
1005         unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
1006 
1007         for (i = 0; i < n; i++) {
1008             tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
1009                                          start1, length);
1010         }
1011 
1012         for (i = 0; i < CPU_VTLB_SIZE; i++) {
1013             tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
1014                                          start1, length);
1015         }
1016     }
1017     qemu_spin_unlock(&env_tlb(env)->c.lock);
1018 }
1019 
1020 /* Called with tlb_c.lock held */
1021 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
1022                                          target_ulong vaddr)
1023 {
1024     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
1025         tlb_entry->addr_write = vaddr;
1026     }
1027 }
1028 
1029 /* update the TLB corresponding to virtual page vaddr
1030    so that it is no longer dirty */
1031 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
1032 {
1033     CPUArchState *env = cpu->env_ptr;
1034     int mmu_idx;
1035 
1036     assert_cpu_is_self(cpu);
1037 
1038     vaddr &= TARGET_PAGE_MASK;
1039     qemu_spin_lock(&env_tlb(env)->c.lock);
1040     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1041         tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
1042     }
1043 
1044     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1045         int k;
1046         for (k = 0; k < CPU_VTLB_SIZE; k++) {
1047             tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
1048         }
1049     }
1050     qemu_spin_unlock(&env_tlb(env)->c.lock);
1051 }
1052 
1053 /* Our TLB does not support large pages, so remember the area covered by
1054    large pages and trigger a full TLB flush if these are invalidated.  */
1055 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
1056                                target_ulong vaddr, target_ulong size)
1057 {
1058     target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
1059     target_ulong lp_mask = ~(size - 1);
1060 
1061     if (lp_addr == (target_ulong)-1) {
1062         /* No previous large page.  */
1063         lp_addr = vaddr;
1064     } else {
1065         /* Extend the existing region to include the new page.
1066            This is a compromise between unnecessary flushes and
1067            the cost of maintaining a full variable size TLB.  */
1068         lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
1069         while (((lp_addr ^ vaddr) & lp_mask) != 0) {
1070             lp_mask <<= 1;
1071         }
1072     }
1073     env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
1074     env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
1075 }
1076 
1077 /* Add a new TLB entry. At most one entry for a given virtual address
1078  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
1079  * supplied size is only used by tlb_flush_page.
1080  *
1081  * Called from TCG-generated code, which is under an RCU read-side
1082  * critical section.
1083  */
1084 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
1085                              hwaddr paddr, MemTxAttrs attrs, int prot,
1086                              int mmu_idx, target_ulong size)
1087 {
1088     CPUArchState *env = cpu->env_ptr;
1089     CPUTLB *tlb = env_tlb(env);
1090     CPUTLBDesc *desc = &tlb->d[mmu_idx];
1091     MemoryRegionSection *section;
1092     unsigned int index;
1093     target_ulong address;
1094     target_ulong write_address;
1095     uintptr_t addend;
1096     CPUTLBEntry *te, tn;
1097     hwaddr iotlb, xlat, sz, paddr_page;
1098     target_ulong vaddr_page;
1099     int asidx = cpu_asidx_from_attrs(cpu, attrs);
1100     int wp_flags;
1101     bool is_ram, is_romd;
1102 
1103     assert_cpu_is_self(cpu);
1104 
1105     if (size <= TARGET_PAGE_SIZE) {
1106         sz = TARGET_PAGE_SIZE;
1107     } else {
1108         tlb_add_large_page(env, mmu_idx, vaddr, size);
1109         sz = size;
1110     }
1111     vaddr_page = vaddr & TARGET_PAGE_MASK;
1112     paddr_page = paddr & TARGET_PAGE_MASK;
1113 
1114     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
1115                                                 &xlat, &sz, attrs, &prot);
1116     assert(sz >= TARGET_PAGE_SIZE);
1117 
1118     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
1119               " prot=%x idx=%d\n",
1120               vaddr, paddr, prot, mmu_idx);
1121 
1122     address = vaddr_page;
1123     if (size < TARGET_PAGE_SIZE) {
1124         /* Repeat the MMU check and TLB fill on every access.  */
1125         address |= TLB_INVALID_MASK;
1126     }
1127     if (attrs.byte_swap) {
1128         address |= TLB_BSWAP;
1129     }
1130 
1131     is_ram = memory_region_is_ram(section->mr);
1132     is_romd = memory_region_is_romd(section->mr);
1133 
1134     if (is_ram || is_romd) {
1135         /* RAM and ROMD both have associated host memory. */
1136         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
1137     } else {
1138         /* I/O does not; force the host address to NULL. */
1139         addend = 0;
1140     }
1141 
1142     write_address = address;
1143     if (is_ram) {
1144         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1145         /*
1146          * Computing is_clean is expensive; avoid all that unless
1147          * the page is actually writable.
1148          */
1149         if (prot & PAGE_WRITE) {
1150             if (section->readonly) {
1151                 write_address |= TLB_DISCARD_WRITE;
1152             } else if (cpu_physical_memory_is_clean(iotlb)) {
1153                 write_address |= TLB_NOTDIRTY;
1154             }
1155         }
1156     } else {
1157         /* I/O or ROMD */
1158         iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
1159         /*
1160          * Writes to romd devices must go through MMIO to enable write.
1161          * Reads to romd devices go through the ram_ptr found above,
1162          * but of course reads to I/O must go through MMIO.
1163          */
1164         write_address |= TLB_MMIO;
1165         if (!is_romd) {
1166             address = write_address;
1167         }
1168     }
1169 
1170     wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
1171                                               TARGET_PAGE_SIZE);
1172 
1173     index = tlb_index(env, mmu_idx, vaddr_page);
1174     te = tlb_entry(env, mmu_idx, vaddr_page);
1175 
1176     /*
1177      * Hold the TLB lock for the rest of the function. We could acquire/release
1178      * the lock several times in the function, but it is faster to amortize the
1179      * acquisition cost by acquiring it just once. Note that this leads to
1180      * a longer critical section, but this is not a concern since the TLB lock
1181      * is unlikely to be contended.
1182      */
1183     qemu_spin_lock(&tlb->c.lock);
1184 
1185     /* Note that the tlb is no longer clean.  */
1186     tlb->c.dirty |= 1 << mmu_idx;
1187 
1188     /* Make sure there's no cached translation for the new page.  */
1189     tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
1190 
1191     /*
1192      * Only evict the old entry to the victim tlb if it's for a
1193      * different page; otherwise just overwrite the stale data.
1194      */
1195     if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
1196         unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
1197         CPUTLBEntry *tv = &desc->vtable[vidx];
1198 
1199         /* Evict the old entry into the victim tlb.  */
1200         copy_tlb_helper_locked(tv, te);
1201         desc->viotlb[vidx] = desc->iotlb[index];
1202         tlb_n_used_entries_dec(env, mmu_idx);
1203     }
1204 
1205     /* refill the tlb */
1206     /*
1207      * At this point iotlb contains a physical section number in the lower
1208      * TARGET_PAGE_BITS, and either
1209      *  + the ram_addr_t of the page base of the target RAM (RAM)
1210      *  + the offset within section->mr of the page base (I/O, ROMD)
1211      * We subtract the vaddr_page (which is page aligned and thus won't
1212      * disturb the low bits) to give an offset which can be added to the
1213      * (non-page-aligned) vaddr of the eventual memory access to get
1214      * the MemoryRegion offset for the access. Note that the vaddr we
1215      * subtract here is that of the page base, and not the same as the
1216      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
1217      */
1218     desc->iotlb[index].addr = iotlb - vaddr_page;
1219     desc->iotlb[index].attrs = attrs;
1220 
1221     /* Now calculate the new entry */
1222     tn.addend = addend - vaddr_page;
1223     if (prot & PAGE_READ) {
1224         tn.addr_read = address;
1225         if (wp_flags & BP_MEM_READ) {
1226             tn.addr_read |= TLB_WATCHPOINT;
1227         }
1228     } else {
1229         tn.addr_read = -1;
1230     }
1231 
1232     if (prot & PAGE_EXEC) {
1233         tn.addr_code = address;
1234     } else {
1235         tn.addr_code = -1;
1236     }
1237 
1238     tn.addr_write = -1;
1239     if (prot & PAGE_WRITE) {
1240         tn.addr_write = write_address;
1241         if (prot & PAGE_WRITE_INV) {
1242             tn.addr_write |= TLB_INVALID_MASK;
1243         }
1244         if (wp_flags & BP_MEM_WRITE) {
1245             tn.addr_write |= TLB_WATCHPOINT;
1246         }
1247     }
1248 
1249     copy_tlb_helper_locked(te, &tn);
1250     tlb_n_used_entries_inc(env, mmu_idx);
1251     qemu_spin_unlock(&tlb->c.lock);
1252 }
1253 
1254 /* Add a new TLB entry, but without specifying the memory
1255  * transaction attributes to be used.
1256  */
1257 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
1258                   hwaddr paddr, int prot,
1259                   int mmu_idx, target_ulong size)
1260 {
1261     tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
1262                             prot, mmu_idx, size);
1263 }
1264 
1265 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1266 {
1267     ram_addr_t ram_addr;
1268 
1269     ram_addr = qemu_ram_addr_from_host(ptr);
1270     if (ram_addr == RAM_ADDR_INVALID) {
1271         error_report("Bad ram pointer %p", ptr);
1272         abort();
1273     }
1274     return ram_addr;
1275 }
1276 
1277 /*
1278  * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
1279  * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
1280  * be discarded and looked up again (e.g. via tlb_entry()).
1281  */
1282 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
1283                      MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1284 {
1285     CPUClass *cc = CPU_GET_CLASS(cpu);
1286     bool ok;
1287 
1288     /*
1289      * This is not a probe, so only valid return is success; failure
1290      * should result in exception + longjmp to the cpu loop.
1291      */
1292     ok = cc->tcg_ops->tlb_fill(cpu, addr, size,
1293                                access_type, mmu_idx, false, retaddr);
1294     assert(ok);
1295 }
1296 
1297 static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
1298                                         MMUAccessType access_type,
1299                                         int mmu_idx, uintptr_t retaddr)
1300 {
1301     CPUClass *cc = CPU_GET_CLASS(cpu);
1302 
1303     cc->tcg_ops->do_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr);
1304 }
1305 
1306 static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
1307                                           vaddr addr, unsigned size,
1308                                           MMUAccessType access_type,
1309                                           int mmu_idx, MemTxAttrs attrs,
1310                                           MemTxResult response,
1311                                           uintptr_t retaddr)
1312 {
1313     CPUClass *cc = CPU_GET_CLASS(cpu);
1314 
1315     if (!cpu->ignore_memory_transaction_failures &&
1316         cc->tcg_ops->do_transaction_failed) {
1317         cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size,
1318                                            access_type, mmu_idx, attrs,
1319                                            response, retaddr);
1320     }
1321 }
1322 
1323 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1324                          int mmu_idx, target_ulong addr, uintptr_t retaddr,
1325                          MMUAccessType access_type, MemOp op)
1326 {
1327     CPUState *cpu = env_cpu(env);
1328     hwaddr mr_offset;
1329     MemoryRegionSection *section;
1330     MemoryRegion *mr;
1331     uint64_t val;
1332     bool locked = false;
1333     MemTxResult r;
1334 
1335     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1336     mr = section->mr;
1337     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1338     cpu->mem_io_pc = retaddr;
1339     if (!cpu->can_do_io) {
1340         cpu_io_recompile(cpu, retaddr);
1341     }
1342 
1343     if (!qemu_mutex_iothread_locked()) {
1344         qemu_mutex_lock_iothread();
1345         locked = true;
1346     }
1347     r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
1348     if (r != MEMTX_OK) {
1349         hwaddr physaddr = mr_offset +
1350             section->offset_within_address_space -
1351             section->offset_within_region;
1352 
1353         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
1354                                mmu_idx, iotlbentry->attrs, r, retaddr);
1355     }
1356     if (locked) {
1357         qemu_mutex_unlock_iothread();
1358     }
1359 
1360     return val;
1361 }
1362 
1363 /*
1364  * Save a potentially trashed IOTLB entry for later lookup by plugin.
1365  * This is read by tlb_plugin_lookup if the iotlb entry doesn't match
1366  * because of the side effect of io_writex changing memory layout.
1367  */
1368 static void save_iotlb_data(CPUState *cs, hwaddr addr,
1369                             MemoryRegionSection *section, hwaddr mr_offset)
1370 {
1371 #ifdef CONFIG_PLUGIN
1372     SavedIOTLB *saved = &cs->saved_iotlb;
1373     saved->addr = addr;
1374     saved->section = section;
1375     saved->mr_offset = mr_offset;
1376 #endif
1377 }
1378 
1379 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1380                       int mmu_idx, uint64_t val, target_ulong addr,
1381                       uintptr_t retaddr, MemOp op)
1382 {
1383     CPUState *cpu = env_cpu(env);
1384     hwaddr mr_offset;
1385     MemoryRegionSection *section;
1386     MemoryRegion *mr;
1387     bool locked = false;
1388     MemTxResult r;
1389 
1390     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1391     mr = section->mr;
1392     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1393     if (!cpu->can_do_io) {
1394         cpu_io_recompile(cpu, retaddr);
1395     }
1396     cpu->mem_io_pc = retaddr;
1397 
1398     /*
1399      * The memory_region_dispatch may trigger a flush/resize
1400      * so for plugins we save the iotlb_data just in case.
1401      */
1402     save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset);
1403 
1404     if (!qemu_mutex_iothread_locked()) {
1405         qemu_mutex_lock_iothread();
1406         locked = true;
1407     }
1408     r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
1409     if (r != MEMTX_OK) {
1410         hwaddr physaddr = mr_offset +
1411             section->offset_within_address_space -
1412             section->offset_within_region;
1413 
1414         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
1415                                MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
1416                                retaddr);
1417     }
1418     if (locked) {
1419         qemu_mutex_unlock_iothread();
1420     }
1421 }
1422 
1423 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
1424 {
1425 #if TCG_OVERSIZED_GUEST
1426     return *(target_ulong *)((uintptr_t)entry + ofs);
1427 #else
1428     /* ofs might correspond to .addr_write, so use qatomic_read */
1429     return qatomic_read((target_ulong *)((uintptr_t)entry + ofs));
1430 #endif
1431 }
1432 
1433 /* Return true if ADDR is present in the victim tlb, and has been copied
1434    back to the main tlb.  */
1435 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
1436                            size_t elt_ofs, target_ulong page)
1437 {
1438     size_t vidx;
1439 
1440     assert_cpu_is_self(env_cpu(env));
1441     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1442         CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1443         target_ulong cmp;
1444 
1445         /* elt_ofs might correspond to .addr_write, so use qatomic_read */
1446 #if TCG_OVERSIZED_GUEST
1447         cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
1448 #else
1449         cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1450 #endif
1451 
1452         if (cmp == page) {
1453             /* Found entry in victim tlb, swap tlb and iotlb.  */
1454             CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1455 
1456             qemu_spin_lock(&env_tlb(env)->c.lock);
1457             copy_tlb_helper_locked(&tmptlb, tlb);
1458             copy_tlb_helper_locked(tlb, vtlb);
1459             copy_tlb_helper_locked(vtlb, &tmptlb);
1460             qemu_spin_unlock(&env_tlb(env)->c.lock);
1461 
1462             CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1463             CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1464             tmpio = *io; *io = *vio; *vio = tmpio;
1465             return true;
1466         }
1467     }
1468     return false;
1469 }
1470 
1471 /* Macro to call the above, with local variables from the use context.  */
1472 #define VICTIM_TLB_HIT(TY, ADDR) \
1473   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1474                  (ADDR) & TARGET_PAGE_MASK)
1475 
1476 /*
1477  * Return a ram_addr_t for the virtual address for execution.
1478  *
1479  * Return -1 if we can't translate and execute from an entire page
1480  * of RAM.  This will force us to execute by loading and translating
1481  * one insn at a time, without caching.
1482  *
1483  * NOTE: This function will trigger an exception if the page is
1484  * not executable.
1485  */
1486 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
1487                                         void **hostp)
1488 {
1489     uintptr_t mmu_idx = cpu_mmu_index(env, true);
1490     uintptr_t index = tlb_index(env, mmu_idx, addr);
1491     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1492     void *p;
1493 
1494     if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1495         if (!VICTIM_TLB_HIT(addr_code, addr)) {
1496             tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
1497             index = tlb_index(env, mmu_idx, addr);
1498             entry = tlb_entry(env, mmu_idx, addr);
1499 
1500             if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1501                 /*
1502                  * The MMU protection covers a smaller range than a target
1503                  * page, so we must redo the MMU check for every insn.
1504                  */
1505                 return -1;
1506             }
1507         }
1508         assert(tlb_hit(entry->addr_code, addr));
1509     }
1510 
1511     if (unlikely(entry->addr_code & TLB_MMIO)) {
1512         /* The region is not backed by RAM.  */
1513         if (hostp) {
1514             *hostp = NULL;
1515         }
1516         return -1;
1517     }
1518 
1519     p = (void *)((uintptr_t)addr + entry->addend);
1520     if (hostp) {
1521         *hostp = p;
1522     }
1523     return qemu_ram_addr_from_host_nofail(p);
1524 }
1525 
1526 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1527 {
1528     return get_page_addr_code_hostp(env, addr, NULL);
1529 }
1530 
1531 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1532                            CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
1533 {
1534     ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
1535 
1536     trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1537 
1538     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1539         struct page_collection *pages
1540             = page_collection_lock(ram_addr, ram_addr + size);
1541         tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr);
1542         page_collection_unlock(pages);
1543     }
1544 
1545     /*
1546      * Set both VGA and migration bits for simplicity and to remove
1547      * the notdirty callback faster.
1548      */
1549     cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1550 
1551     /* We remove the notdirty callback only if the code has been flushed. */
1552     if (!cpu_physical_memory_is_clean(ram_addr)) {
1553         trace_memory_notdirty_set_dirty(mem_vaddr);
1554         tlb_set_dirty(cpu, mem_vaddr);
1555     }
1556 }
1557 
1558 static int probe_access_internal(CPUArchState *env, target_ulong addr,
1559                                  int fault_size, MMUAccessType access_type,
1560                                  int mmu_idx, bool nonfault,
1561                                  void **phost, uintptr_t retaddr)
1562 {
1563     uintptr_t index = tlb_index(env, mmu_idx, addr);
1564     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1565     target_ulong tlb_addr, page_addr;
1566     size_t elt_ofs;
1567     int flags;
1568 
1569     switch (access_type) {
1570     case MMU_DATA_LOAD:
1571         elt_ofs = offsetof(CPUTLBEntry, addr_read);
1572         break;
1573     case MMU_DATA_STORE:
1574         elt_ofs = offsetof(CPUTLBEntry, addr_write);
1575         break;
1576     case MMU_INST_FETCH:
1577         elt_ofs = offsetof(CPUTLBEntry, addr_code);
1578         break;
1579     default:
1580         g_assert_not_reached();
1581     }
1582     tlb_addr = tlb_read_ofs(entry, elt_ofs);
1583 
1584     page_addr = addr & TARGET_PAGE_MASK;
1585     if (!tlb_hit_page(tlb_addr, page_addr)) {
1586         if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
1587             CPUState *cs = env_cpu(env);
1588             CPUClass *cc = CPU_GET_CLASS(cs);
1589 
1590             if (!cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
1591                                        mmu_idx, nonfault, retaddr)) {
1592                 /* Non-faulting page table read failed.  */
1593                 *phost = NULL;
1594                 return TLB_INVALID_MASK;
1595             }
1596 
1597             /* TLB resize via tlb_fill may have moved the entry.  */
1598             entry = tlb_entry(env, mmu_idx, addr);
1599         }
1600         tlb_addr = tlb_read_ofs(entry, elt_ofs);
1601     }
1602     flags = tlb_addr & TLB_FLAGS_MASK;
1603 
1604     /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
1605     if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
1606         *phost = NULL;
1607         return TLB_MMIO;
1608     }
1609 
1610     /* Everything else is RAM. */
1611     *phost = (void *)((uintptr_t)addr + entry->addend);
1612     return flags;
1613 }
1614 
1615 int probe_access_flags(CPUArchState *env, target_ulong addr,
1616                        MMUAccessType access_type, int mmu_idx,
1617                        bool nonfault, void **phost, uintptr_t retaddr)
1618 {
1619     int flags;
1620 
1621     flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
1622                                   nonfault, phost, retaddr);
1623 
1624     /* Handle clean RAM pages.  */
1625     if (unlikely(flags & TLB_NOTDIRTY)) {
1626         uintptr_t index = tlb_index(env, mmu_idx, addr);
1627         CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1628 
1629         notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
1630         flags &= ~TLB_NOTDIRTY;
1631     }
1632 
1633     return flags;
1634 }
1635 
1636 void *probe_access(CPUArchState *env, target_ulong addr, int size,
1637                    MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1638 {
1639     void *host;
1640     int flags;
1641 
1642     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1643 
1644     flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
1645                                   false, &host, retaddr);
1646 
1647     /* Per the interface, size == 0 merely faults the access. */
1648     if (size == 0) {
1649         return NULL;
1650     }
1651 
1652     if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
1653         uintptr_t index = tlb_index(env, mmu_idx, addr);
1654         CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1655 
1656         /* Handle watchpoints.  */
1657         if (flags & TLB_WATCHPOINT) {
1658             int wp_access = (access_type == MMU_DATA_STORE
1659                              ? BP_MEM_WRITE : BP_MEM_READ);
1660             cpu_check_watchpoint(env_cpu(env), addr, size,
1661                                  iotlbentry->attrs, wp_access, retaddr);
1662         }
1663 
1664         /* Handle clean RAM pages.  */
1665         if (flags & TLB_NOTDIRTY) {
1666             notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
1667         }
1668     }
1669 
1670     return host;
1671 }
1672 
1673 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
1674                         MMUAccessType access_type, int mmu_idx)
1675 {
1676     void *host;
1677     int flags;
1678 
1679     flags = probe_access_internal(env, addr, 0, access_type,
1680                                   mmu_idx, true, &host, 0);
1681 
1682     /* No combination of flags are expected by the caller. */
1683     return flags ? NULL : host;
1684 }
1685 
1686 #ifdef CONFIG_PLUGIN
1687 /*
1688  * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
1689  * This should be a hot path as we will have just looked this path up
1690  * in the softmmu lookup code (or helper). We don't handle re-fills or
1691  * checking the victim table. This is purely informational.
1692  *
1693  * This almost never fails as the memory access being instrumented
1694  * should have just filled the TLB. The one corner case is io_writex
1695  * which can cause TLB flushes and potential resizing of the TLBs
1696  * losing the information we need. In those cases we need to recover
1697  * data from a copy of the iotlbentry. As long as this always occurs
1698  * from the same thread (which a mem callback will be) this is safe.
1699  */
1700 
1701 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
1702                        bool is_store, struct qemu_plugin_hwaddr *data)
1703 {
1704     CPUArchState *env = cpu->env_ptr;
1705     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1706     uintptr_t index = tlb_index(env, mmu_idx, addr);
1707     target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
1708 
1709     if (likely(tlb_hit(tlb_addr, addr))) {
1710         /* We must have an iotlb entry for MMIO */
1711         if (tlb_addr & TLB_MMIO) {
1712             CPUIOTLBEntry *iotlbentry;
1713             iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1714             data->is_io = true;
1715             data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1716             data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1717         } else {
1718             data->is_io = false;
1719             data->v.ram.hostaddr = addr + tlbe->addend;
1720         }
1721         return true;
1722     } else {
1723         SavedIOTLB *saved = &cpu->saved_iotlb;
1724         data->is_io = true;
1725         data->v.io.section = saved->section;
1726         data->v.io.offset = saved->mr_offset;
1727         return true;
1728     }
1729 }
1730 
1731 #endif
1732 
1733 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
1734  * operations, or io operations to proceed.  Return the host address.  */
1735 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1736                                TCGMemOpIdx oi, uintptr_t retaddr)
1737 {
1738     size_t mmu_idx = get_mmuidx(oi);
1739     uintptr_t index = tlb_index(env, mmu_idx, addr);
1740     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1741     target_ulong tlb_addr = tlb_addr_write(tlbe);
1742     MemOp mop = get_memop(oi);
1743     int a_bits = get_alignment_bits(mop);
1744     int s_bits = mop & MO_SIZE;
1745     void *hostaddr;
1746 
1747     /* Adjust the given return address.  */
1748     retaddr -= GETPC_ADJ;
1749 
1750     /* Enforce guest required alignment.  */
1751     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1752         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1753         cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1754                              mmu_idx, retaddr);
1755     }
1756 
1757     /* Enforce qemu required alignment.  */
1758     if (unlikely(addr & ((1 << s_bits) - 1))) {
1759         /* We get here if guest alignment was not requested,
1760            or was not enforced by cpu_unaligned_access above.
1761            We might widen the access and emulate, but for now
1762            mark an exception and exit the cpu loop.  */
1763         goto stop_the_world;
1764     }
1765 
1766     /* Check TLB entry and enforce page permissions.  */
1767     if (!tlb_hit(tlb_addr, addr)) {
1768         if (!VICTIM_TLB_HIT(addr_write, addr)) {
1769             tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
1770                      mmu_idx, retaddr);
1771             index = tlb_index(env, mmu_idx, addr);
1772             tlbe = tlb_entry(env, mmu_idx, addr);
1773         }
1774         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1775     }
1776 
1777     /* Notice an IO access or a needs-MMU-lookup access */
1778     if (unlikely(tlb_addr & TLB_MMIO)) {
1779         /* There's really nothing that can be done to
1780            support this apart from stop-the-world.  */
1781         goto stop_the_world;
1782     }
1783 
1784     /* Let the guest notice RMW on a write-only page.  */
1785     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1786         tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1787                  mmu_idx, retaddr);
1788         /* Since we don't support reads and writes to different addresses,
1789            and we do have the proper page loaded for write, this shouldn't
1790            ever return.  But just in case, handle via stop-the-world.  */
1791         goto stop_the_world;
1792     }
1793 
1794     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1795 
1796     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1797         notdirty_write(env_cpu(env), addr, 1 << s_bits,
1798                        &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
1799     }
1800 
1801     return hostaddr;
1802 
1803  stop_the_world:
1804     cpu_loop_exit_atomic(env_cpu(env), retaddr);
1805 }
1806 
1807 /*
1808  * Load Helpers
1809  *
1810  * We support two different access types. SOFTMMU_CODE_ACCESS is
1811  * specifically for reading instructions from system memory. It is
1812  * called by the translation loop and in some helpers where the code
1813  * is disassembled. It shouldn't be called directly by guest code.
1814  */
1815 
1816 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1817                                 TCGMemOpIdx oi, uintptr_t retaddr);
1818 
1819 static inline uint64_t QEMU_ALWAYS_INLINE
1820 load_memop(const void *haddr, MemOp op)
1821 {
1822     switch (op) {
1823     case MO_UB:
1824         return ldub_p(haddr);
1825     case MO_BEUW:
1826         return lduw_be_p(haddr);
1827     case MO_LEUW:
1828         return lduw_le_p(haddr);
1829     case MO_BEUL:
1830         return (uint32_t)ldl_be_p(haddr);
1831     case MO_LEUL:
1832         return (uint32_t)ldl_le_p(haddr);
1833     case MO_BEQ:
1834         return ldq_be_p(haddr);
1835     case MO_LEQ:
1836         return ldq_le_p(haddr);
1837     default:
1838         qemu_build_not_reached();
1839     }
1840 }
1841 
1842 static inline uint64_t QEMU_ALWAYS_INLINE
1843 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1844             uintptr_t retaddr, MemOp op, bool code_read,
1845             FullLoadHelper *full_load)
1846 {
1847     uintptr_t mmu_idx = get_mmuidx(oi);
1848     uintptr_t index = tlb_index(env, mmu_idx, addr);
1849     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1850     target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1851     const size_t tlb_off = code_read ?
1852         offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1853     const MMUAccessType access_type =
1854         code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1855     unsigned a_bits = get_alignment_bits(get_memop(oi));
1856     void *haddr;
1857     uint64_t res;
1858     size_t size = memop_size(op);
1859 
1860     /* Handle CPU specific unaligned behaviour */
1861     if (addr & ((1 << a_bits) - 1)) {
1862         cpu_unaligned_access(env_cpu(env), addr, access_type,
1863                              mmu_idx, retaddr);
1864     }
1865 
1866     /* If the TLB entry is for a different page, reload and try again.  */
1867     if (!tlb_hit(tlb_addr, addr)) {
1868         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1869                             addr & TARGET_PAGE_MASK)) {
1870             tlb_fill(env_cpu(env), addr, size,
1871                      access_type, mmu_idx, retaddr);
1872             index = tlb_index(env, mmu_idx, addr);
1873             entry = tlb_entry(env, mmu_idx, addr);
1874         }
1875         tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1876         tlb_addr &= ~TLB_INVALID_MASK;
1877     }
1878 
1879     /* Handle anything that isn't just a straight memory access.  */
1880     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1881         CPUIOTLBEntry *iotlbentry;
1882         bool need_swap;
1883 
1884         /* For anything that is unaligned, recurse through full_load.  */
1885         if ((addr & (size - 1)) != 0) {
1886             goto do_unaligned_access;
1887         }
1888 
1889         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1890 
1891         /* Handle watchpoints.  */
1892         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1893             /* On watchpoint hit, this will longjmp out.  */
1894             cpu_check_watchpoint(env_cpu(env), addr, size,
1895                                  iotlbentry->attrs, BP_MEM_READ, retaddr);
1896         }
1897 
1898         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1899 
1900         /* Handle I/O access.  */
1901         if (likely(tlb_addr & TLB_MMIO)) {
1902             return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
1903                             access_type, op ^ (need_swap * MO_BSWAP));
1904         }
1905 
1906         haddr = (void *)((uintptr_t)addr + entry->addend);
1907 
1908         /*
1909          * Keep these two load_memop separate to ensure that the compiler
1910          * is able to fold the entire function to a single instruction.
1911          * There is a build-time assert inside to remind you of this.  ;-)
1912          */
1913         if (unlikely(need_swap)) {
1914             return load_memop(haddr, op ^ MO_BSWAP);
1915         }
1916         return load_memop(haddr, op);
1917     }
1918 
1919     /* Handle slow unaligned access (it spans two pages or IO).  */
1920     if (size > 1
1921         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1922                     >= TARGET_PAGE_SIZE)) {
1923         target_ulong addr1, addr2;
1924         uint64_t r1, r2;
1925         unsigned shift;
1926     do_unaligned_access:
1927         addr1 = addr & ~((target_ulong)size - 1);
1928         addr2 = addr1 + size;
1929         r1 = full_load(env, addr1, oi, retaddr);
1930         r2 = full_load(env, addr2, oi, retaddr);
1931         shift = (addr & (size - 1)) * 8;
1932 
1933         if (memop_big_endian(op)) {
1934             /* Big-endian combine.  */
1935             res = (r1 << shift) | (r2 >> ((size * 8) - shift));
1936         } else {
1937             /* Little-endian combine.  */
1938             res = (r1 >> shift) | (r2 << ((size * 8) - shift));
1939         }
1940         return res & MAKE_64BIT_MASK(0, size * 8);
1941     }
1942 
1943     haddr = (void *)((uintptr_t)addr + entry->addend);
1944     return load_memop(haddr, op);
1945 }
1946 
1947 /*
1948  * For the benefit of TCG generated code, we want to avoid the
1949  * complication of ABI-specific return type promotion and always
1950  * return a value extended to the register size of the host. This is
1951  * tcg_target_long, except in the case of a 32-bit host and 64-bit
1952  * data, and for that we always have uint64_t.
1953  *
1954  * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1955  */
1956 
1957 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1958                               TCGMemOpIdx oi, uintptr_t retaddr)
1959 {
1960     return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1961 }
1962 
1963 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1964                                      TCGMemOpIdx oi, uintptr_t retaddr)
1965 {
1966     return full_ldub_mmu(env, addr, oi, retaddr);
1967 }
1968 
1969 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1970                                  TCGMemOpIdx oi, uintptr_t retaddr)
1971 {
1972     return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1973                        full_le_lduw_mmu);
1974 }
1975 
1976 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1977                                     TCGMemOpIdx oi, uintptr_t retaddr)
1978 {
1979     return full_le_lduw_mmu(env, addr, oi, retaddr);
1980 }
1981 
1982 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1983                                  TCGMemOpIdx oi, uintptr_t retaddr)
1984 {
1985     return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
1986                        full_be_lduw_mmu);
1987 }
1988 
1989 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1990                                     TCGMemOpIdx oi, uintptr_t retaddr)
1991 {
1992     return full_be_lduw_mmu(env, addr, oi, retaddr);
1993 }
1994 
1995 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1996                                  TCGMemOpIdx oi, uintptr_t retaddr)
1997 {
1998     return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
1999                        full_le_ldul_mmu);
2000 }
2001 
2002 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
2003                                     TCGMemOpIdx oi, uintptr_t retaddr)
2004 {
2005     return full_le_ldul_mmu(env, addr, oi, retaddr);
2006 }
2007 
2008 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
2009                                  TCGMemOpIdx oi, uintptr_t retaddr)
2010 {
2011     return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
2012                        full_be_ldul_mmu);
2013 }
2014 
2015 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
2016                                     TCGMemOpIdx oi, uintptr_t retaddr)
2017 {
2018     return full_be_ldul_mmu(env, addr, oi, retaddr);
2019 }
2020 
2021 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
2022                            TCGMemOpIdx oi, uintptr_t retaddr)
2023 {
2024     return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
2025                        helper_le_ldq_mmu);
2026 }
2027 
2028 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
2029                            TCGMemOpIdx oi, uintptr_t retaddr)
2030 {
2031     return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
2032                        helper_be_ldq_mmu);
2033 }
2034 
2035 /*
2036  * Provide signed versions of the load routines as well.  We can of course
2037  * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
2038  */
2039 
2040 
2041 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
2042                                      TCGMemOpIdx oi, uintptr_t retaddr)
2043 {
2044     return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
2045 }
2046 
2047 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
2048                                     TCGMemOpIdx oi, uintptr_t retaddr)
2049 {
2050     return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
2051 }
2052 
2053 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
2054                                     TCGMemOpIdx oi, uintptr_t retaddr)
2055 {
2056     return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
2057 }
2058 
2059 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
2060                                     TCGMemOpIdx oi, uintptr_t retaddr)
2061 {
2062     return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
2063 }
2064 
2065 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
2066                                     TCGMemOpIdx oi, uintptr_t retaddr)
2067 {
2068     return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
2069 }
2070 
2071 /*
2072  * Load helpers for cpu_ldst.h.
2073  */
2074 
2075 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
2076                                        int mmu_idx, uintptr_t retaddr,
2077                                        MemOp op, FullLoadHelper *full_load)
2078 {
2079     uint16_t meminfo;
2080     TCGMemOpIdx oi;
2081     uint64_t ret;
2082 
2083     meminfo = trace_mem_get_info(op, mmu_idx, false);
2084     trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
2085 
2086     op &= ~MO_SIGN;
2087     oi = make_memop_idx(op, mmu_idx);
2088     ret = full_load(env, addr, oi, retaddr);
2089 
2090     qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
2091 
2092     return ret;
2093 }
2094 
2095 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2096                             int mmu_idx, uintptr_t ra)
2097 {
2098     return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu);
2099 }
2100 
2101 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2102                        int mmu_idx, uintptr_t ra)
2103 {
2104     return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB,
2105                                    full_ldub_mmu);
2106 }
2107 
2108 uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2109                                int mmu_idx, uintptr_t ra)
2110 {
2111     return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu);
2112 }
2113 
2114 int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2115                           int mmu_idx, uintptr_t ra)
2116 {
2117     return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW,
2118                                     full_be_lduw_mmu);
2119 }
2120 
2121 uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2122                               int mmu_idx, uintptr_t ra)
2123 {
2124     return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu);
2125 }
2126 
2127 uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2128                               int mmu_idx, uintptr_t ra)
2129 {
2130     return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu);
2131 }
2132 
2133 uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2134                                int mmu_idx, uintptr_t ra)
2135 {
2136     return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu);
2137 }
2138 
2139 int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2140                           int mmu_idx, uintptr_t ra)
2141 {
2142     return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW,
2143                                     full_le_lduw_mmu);
2144 }
2145 
2146 uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2147                               int mmu_idx, uintptr_t ra)
2148 {
2149     return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu);
2150 }
2151 
2152 uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2153                               int mmu_idx, uintptr_t ra)
2154 {
2155     return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu);
2156 }
2157 
2158 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr,
2159                           uintptr_t retaddr)
2160 {
2161     return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2162 }
2163 
2164 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
2165 {
2166     return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2167 }
2168 
2169 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr,
2170                              uintptr_t retaddr)
2171 {
2172     return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2173 }
2174 
2175 int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
2176 {
2177     return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2178 }
2179 
2180 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr,
2181                             uintptr_t retaddr)
2182 {
2183     return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2184 }
2185 
2186 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr,
2187                             uintptr_t retaddr)
2188 {
2189     return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2190 }
2191 
2192 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr,
2193                              uintptr_t retaddr)
2194 {
2195     return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2196 }
2197 
2198 int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
2199 {
2200     return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2201 }
2202 
2203 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr,
2204                             uintptr_t retaddr)
2205 {
2206     return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2207 }
2208 
2209 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr,
2210                             uintptr_t retaddr)
2211 {
2212     return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2213 }
2214 
2215 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr)
2216 {
2217     return cpu_ldub_data_ra(env, ptr, 0);
2218 }
2219 
2220 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr)
2221 {
2222     return cpu_ldsb_data_ra(env, ptr, 0);
2223 }
2224 
2225 uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr)
2226 {
2227     return cpu_lduw_be_data_ra(env, ptr, 0);
2228 }
2229 
2230 int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr)
2231 {
2232     return cpu_ldsw_be_data_ra(env, ptr, 0);
2233 }
2234 
2235 uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr)
2236 {
2237     return cpu_ldl_be_data_ra(env, ptr, 0);
2238 }
2239 
2240 uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr)
2241 {
2242     return cpu_ldq_be_data_ra(env, ptr, 0);
2243 }
2244 
2245 uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr)
2246 {
2247     return cpu_lduw_le_data_ra(env, ptr, 0);
2248 }
2249 
2250 int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr)
2251 {
2252     return cpu_ldsw_le_data_ra(env, ptr, 0);
2253 }
2254 
2255 uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr)
2256 {
2257     return cpu_ldl_le_data_ra(env, ptr, 0);
2258 }
2259 
2260 uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr)
2261 {
2262     return cpu_ldq_le_data_ra(env, ptr, 0);
2263 }
2264 
2265 /*
2266  * Store Helpers
2267  */
2268 
2269 static inline void QEMU_ALWAYS_INLINE
2270 store_memop(void *haddr, uint64_t val, MemOp op)
2271 {
2272     switch (op) {
2273     case MO_UB:
2274         stb_p(haddr, val);
2275         break;
2276     case MO_BEUW:
2277         stw_be_p(haddr, val);
2278         break;
2279     case MO_LEUW:
2280         stw_le_p(haddr, val);
2281         break;
2282     case MO_BEUL:
2283         stl_be_p(haddr, val);
2284         break;
2285     case MO_LEUL:
2286         stl_le_p(haddr, val);
2287         break;
2288     case MO_BEQ:
2289         stq_be_p(haddr, val);
2290         break;
2291     case MO_LEQ:
2292         stq_le_p(haddr, val);
2293         break;
2294     default:
2295         qemu_build_not_reached();
2296     }
2297 }
2298 
2299 static void __attribute__((noinline))
2300 store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
2301                        uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
2302                        bool big_endian)
2303 {
2304     const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
2305     uintptr_t index, index2;
2306     CPUTLBEntry *entry, *entry2;
2307     target_ulong page2, tlb_addr, tlb_addr2;
2308     TCGMemOpIdx oi;
2309     size_t size2;
2310     int i;
2311 
2312     /*
2313      * Ensure the second page is in the TLB.  Note that the first page
2314      * is already guaranteed to be filled, and that the second page
2315      * cannot evict the first.
2316      */
2317     page2 = (addr + size) & TARGET_PAGE_MASK;
2318     size2 = (addr + size) & ~TARGET_PAGE_MASK;
2319     index2 = tlb_index(env, mmu_idx, page2);
2320     entry2 = tlb_entry(env, mmu_idx, page2);
2321 
2322     tlb_addr2 = tlb_addr_write(entry2);
2323     if (!tlb_hit_page(tlb_addr2, page2)) {
2324         if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
2325             tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
2326                      mmu_idx, retaddr);
2327             index2 = tlb_index(env, mmu_idx, page2);
2328             entry2 = tlb_entry(env, mmu_idx, page2);
2329         }
2330         tlb_addr2 = tlb_addr_write(entry2);
2331     }
2332 
2333     index = tlb_index(env, mmu_idx, addr);
2334     entry = tlb_entry(env, mmu_idx, addr);
2335     tlb_addr = tlb_addr_write(entry);
2336 
2337     /*
2338      * Handle watchpoints.  Since this may trap, all checks
2339      * must happen before any store.
2340      */
2341     if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2342         cpu_check_watchpoint(env_cpu(env), addr, size - size2,
2343                              env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
2344                              BP_MEM_WRITE, retaddr);
2345     }
2346     if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
2347         cpu_check_watchpoint(env_cpu(env), page2, size2,
2348                              env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
2349                              BP_MEM_WRITE, retaddr);
2350     }
2351 
2352     /*
2353      * XXX: not efficient, but simple.
2354      * This loop must go in the forward direction to avoid issues
2355      * with self-modifying code in Windows 64-bit.
2356      */
2357     oi = make_memop_idx(MO_UB, mmu_idx);
2358     if (big_endian) {
2359         for (i = 0; i < size; ++i) {
2360             /* Big-endian extract.  */
2361             uint8_t val8 = val >> (((size - 1) * 8) - (i * 8));
2362             helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
2363         }
2364     } else {
2365         for (i = 0; i < size; ++i) {
2366             /* Little-endian extract.  */
2367             uint8_t val8 = val >> (i * 8);
2368             helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
2369         }
2370     }
2371 }
2372 
2373 static inline void QEMU_ALWAYS_INLINE
2374 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2375              TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
2376 {
2377     uintptr_t mmu_idx = get_mmuidx(oi);
2378     uintptr_t index = tlb_index(env, mmu_idx, addr);
2379     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
2380     target_ulong tlb_addr = tlb_addr_write(entry);
2381     const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
2382     unsigned a_bits = get_alignment_bits(get_memop(oi));
2383     void *haddr;
2384     size_t size = memop_size(op);
2385 
2386     /* Handle CPU specific unaligned behaviour */
2387     if (addr & ((1 << a_bits) - 1)) {
2388         cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
2389                              mmu_idx, retaddr);
2390     }
2391 
2392     /* If the TLB entry is for a different page, reload and try again.  */
2393     if (!tlb_hit(tlb_addr, addr)) {
2394         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
2395             addr & TARGET_PAGE_MASK)) {
2396             tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
2397                      mmu_idx, retaddr);
2398             index = tlb_index(env, mmu_idx, addr);
2399             entry = tlb_entry(env, mmu_idx, addr);
2400         }
2401         tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
2402     }
2403 
2404     /* Handle anything that isn't just a straight memory access.  */
2405     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
2406         CPUIOTLBEntry *iotlbentry;
2407         bool need_swap;
2408 
2409         /* For anything that is unaligned, recurse through byte stores.  */
2410         if ((addr & (size - 1)) != 0) {
2411             goto do_unaligned_access;
2412         }
2413 
2414         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
2415 
2416         /* Handle watchpoints.  */
2417         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2418             /* On watchpoint hit, this will longjmp out.  */
2419             cpu_check_watchpoint(env_cpu(env), addr, size,
2420                                  iotlbentry->attrs, BP_MEM_WRITE, retaddr);
2421         }
2422 
2423         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
2424 
2425         /* Handle I/O access.  */
2426         if (tlb_addr & TLB_MMIO) {
2427             io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
2428                       op ^ (need_swap * MO_BSWAP));
2429             return;
2430         }
2431 
2432         /* Ignore writes to ROM.  */
2433         if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
2434             return;
2435         }
2436 
2437         /* Handle clean RAM pages.  */
2438         if (tlb_addr & TLB_NOTDIRTY) {
2439             notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
2440         }
2441 
2442         haddr = (void *)((uintptr_t)addr + entry->addend);
2443 
2444         /*
2445          * Keep these two store_memop separate to ensure that the compiler
2446          * is able to fold the entire function to a single instruction.
2447          * There is a build-time assert inside to remind you of this.  ;-)
2448          */
2449         if (unlikely(need_swap)) {
2450             store_memop(haddr, val, op ^ MO_BSWAP);
2451         } else {
2452             store_memop(haddr, val, op);
2453         }
2454         return;
2455     }
2456 
2457     /* Handle slow unaligned access (it spans two pages or IO).  */
2458     if (size > 1
2459         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
2460                      >= TARGET_PAGE_SIZE)) {
2461     do_unaligned_access:
2462         store_helper_unaligned(env, addr, val, retaddr, size,
2463                                mmu_idx, memop_big_endian(op));
2464         return;
2465     }
2466 
2467     haddr = (void *)((uintptr_t)addr + entry->addend);
2468     store_memop(haddr, val, op);
2469 }
2470 
2471 void __attribute__((noinline))
2472 helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
2473                    TCGMemOpIdx oi, uintptr_t retaddr)
2474 {
2475     store_helper(env, addr, val, oi, retaddr, MO_UB);
2476 }
2477 
2478 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2479                        TCGMemOpIdx oi, uintptr_t retaddr)
2480 {
2481     store_helper(env, addr, val, oi, retaddr, MO_LEUW);
2482 }
2483 
2484 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2485                        TCGMemOpIdx oi, uintptr_t retaddr)
2486 {
2487     store_helper(env, addr, val, oi, retaddr, MO_BEUW);
2488 }
2489 
2490 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2491                        TCGMemOpIdx oi, uintptr_t retaddr)
2492 {
2493     store_helper(env, addr, val, oi, retaddr, MO_LEUL);
2494 }
2495 
2496 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2497                        TCGMemOpIdx oi, uintptr_t retaddr)
2498 {
2499     store_helper(env, addr, val, oi, retaddr, MO_BEUL);
2500 }
2501 
2502 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2503                        TCGMemOpIdx oi, uintptr_t retaddr)
2504 {
2505     store_helper(env, addr, val, oi, retaddr, MO_LEQ);
2506 }
2507 
2508 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2509                        TCGMemOpIdx oi, uintptr_t retaddr)
2510 {
2511     store_helper(env, addr, val, oi, retaddr, MO_BEQ);
2512 }
2513 
2514 /*
2515  * Store Helpers for cpu_ldst.h
2516  */
2517 
2518 static inline void QEMU_ALWAYS_INLINE
2519 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2520                  int mmu_idx, uintptr_t retaddr, MemOp op)
2521 {
2522     TCGMemOpIdx oi;
2523     uint16_t meminfo;
2524 
2525     meminfo = trace_mem_get_info(op, mmu_idx, true);
2526     trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
2527 
2528     oi = make_memop_idx(op, mmu_idx);
2529     store_helper(env, addr, val, oi, retaddr, op);
2530 
2531     qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
2532 }
2533 
2534 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2535                        int mmu_idx, uintptr_t retaddr)
2536 {
2537     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB);
2538 }
2539 
2540 void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2541                           int mmu_idx, uintptr_t retaddr)
2542 {
2543     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW);
2544 }
2545 
2546 void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2547                           int mmu_idx, uintptr_t retaddr)
2548 {
2549     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL);
2550 }
2551 
2552 void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2553                           int mmu_idx, uintptr_t retaddr)
2554 {
2555     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ);
2556 }
2557 
2558 void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2559                           int mmu_idx, uintptr_t retaddr)
2560 {
2561     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW);
2562 }
2563 
2564 void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2565                           int mmu_idx, uintptr_t retaddr)
2566 {
2567     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL);
2568 }
2569 
2570 void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2571                           int mmu_idx, uintptr_t retaddr)
2572 {
2573     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ);
2574 }
2575 
2576 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr,
2577                      uint32_t val, uintptr_t retaddr)
2578 {
2579     cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2580 }
2581 
2582 void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr,
2583                         uint32_t val, uintptr_t retaddr)
2584 {
2585     cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2586 }
2587 
2588 void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr,
2589                         uint32_t val, uintptr_t retaddr)
2590 {
2591     cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2592 }
2593 
2594 void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr,
2595                         uint64_t val, uintptr_t retaddr)
2596 {
2597     cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2598 }
2599 
2600 void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr,
2601                         uint32_t val, uintptr_t retaddr)
2602 {
2603     cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2604 }
2605 
2606 void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr,
2607                         uint32_t val, uintptr_t retaddr)
2608 {
2609     cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2610 }
2611 
2612 void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr,
2613                         uint64_t val, uintptr_t retaddr)
2614 {
2615     cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2616 }
2617 
2618 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2619 {
2620     cpu_stb_data_ra(env, ptr, val, 0);
2621 }
2622 
2623 void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2624 {
2625     cpu_stw_be_data_ra(env, ptr, val, 0);
2626 }
2627 
2628 void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2629 {
2630     cpu_stl_be_data_ra(env, ptr, val, 0);
2631 }
2632 
2633 void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2634 {
2635     cpu_stq_be_data_ra(env, ptr, val, 0);
2636 }
2637 
2638 void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2639 {
2640     cpu_stw_le_data_ra(env, ptr, val, 0);
2641 }
2642 
2643 void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2644 {
2645     cpu_stl_le_data_ra(env, ptr, val, 0);
2646 }
2647 
2648 void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2649 {
2650     cpu_stq_le_data_ra(env, ptr, val, 0);
2651 }
2652 
2653 /* First set of helpers allows passing in of OI and RETADDR.  This makes
2654    them callable from other helpers.  */
2655 
2656 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
2657 #define ATOMIC_NAME(X) \
2658     HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
2659 #define ATOMIC_MMU_DECLS
2660 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
2661 #define ATOMIC_MMU_CLEANUP
2662 #define ATOMIC_MMU_IDX   get_mmuidx(oi)
2663 
2664 #include "atomic_common.c.inc"
2665 
2666 #define DATA_SIZE 1
2667 #include "atomic_template.h"
2668 
2669 #define DATA_SIZE 2
2670 #include "atomic_template.h"
2671 
2672 #define DATA_SIZE 4
2673 #include "atomic_template.h"
2674 
2675 #ifdef CONFIG_ATOMIC64
2676 #define DATA_SIZE 8
2677 #include "atomic_template.h"
2678 #endif
2679 
2680 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
2681 #define DATA_SIZE 16
2682 #include "atomic_template.h"
2683 #endif
2684 
2685 /* Second set of helpers are directly callable from TCG as helpers.  */
2686 
2687 #undef EXTRA_ARGS
2688 #undef ATOMIC_NAME
2689 #undef ATOMIC_MMU_LOOKUP
2690 #define EXTRA_ARGS         , TCGMemOpIdx oi
2691 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
2692 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC())
2693 
2694 #define DATA_SIZE 1
2695 #include "atomic_template.h"
2696 
2697 #define DATA_SIZE 2
2698 #include "atomic_template.h"
2699 
2700 #define DATA_SIZE 4
2701 #include "atomic_template.h"
2702 
2703 #ifdef CONFIG_ATOMIC64
2704 #define DATA_SIZE 8
2705 #include "atomic_template.h"
2706 #endif
2707 #undef ATOMIC_MMU_IDX
2708 
2709 /* Code access functions.  */
2710 
2711 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
2712                                TCGMemOpIdx oi, uintptr_t retaddr)
2713 {
2714     return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
2715 }
2716 
2717 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
2718 {
2719     TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
2720     return full_ldub_code(env, addr, oi, 0);
2721 }
2722 
2723 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
2724                                TCGMemOpIdx oi, uintptr_t retaddr)
2725 {
2726     return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
2727 }
2728 
2729 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
2730 {
2731     TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
2732     return full_lduw_code(env, addr, oi, 0);
2733 }
2734 
2735 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
2736                               TCGMemOpIdx oi, uintptr_t retaddr)
2737 {
2738     return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
2739 }
2740 
2741 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
2742 {
2743     TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
2744     return full_ldl_code(env, addr, oi, 0);
2745 }
2746 
2747 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
2748                               TCGMemOpIdx oi, uintptr_t retaddr)
2749 {
2750     return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
2751 }
2752 
2753 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
2754 {
2755     TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
2756     return full_ldq_code(env, addr, oi, 0);
2757 }
2758