xref: /openbmc/qemu/accel/tcg/cputlb.c (revision b56668bbe15cb094b4d928870635d079825f3731)
1 /*
2  *  Common CPU TLB handling
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 #include "translate-all.h"
37 
38 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
39 /* #define DEBUG_TLB */
40 /* #define DEBUG_TLB_LOG */
41 
42 #ifdef DEBUG_TLB
43 # define DEBUG_TLB_GATE 1
44 # ifdef DEBUG_TLB_LOG
45 #  define DEBUG_TLB_LOG_GATE 1
46 # else
47 #  define DEBUG_TLB_LOG_GATE 0
48 # endif
49 #else
50 # define DEBUG_TLB_GATE 0
51 # define DEBUG_TLB_LOG_GATE 0
52 #endif
53 
54 #define tlb_debug(fmt, ...) do { \
55     if (DEBUG_TLB_LOG_GATE) { \
56         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
57                       ## __VA_ARGS__); \
58     } else if (DEBUG_TLB_GATE) { \
59         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
60     } \
61 } while (0)
62 
63 #define assert_cpu_is_self(cpu) do {                              \
64         if (DEBUG_TLB_GATE) {                                     \
65             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
66         }                                                         \
67     } while (0)
68 
69 /* run_on_cpu_data.target_ptr should always be big enough for a
70  * target_ulong even on 32 bit builds */
71 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
72 
73 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
74  */
75 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
76 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
77 
78 static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
79 {
80     return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
81 }
82 
83 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
84                              size_t max_entries)
85 {
86     desc->window_begin_ns = ns;
87     desc->window_max_entries = max_entries;
88 }
89 
90 static void tlb_dyn_init(CPUArchState *env)
91 {
92     int i;
93 
94     for (i = 0; i < NB_MMU_MODES; i++) {
95         CPUTLBDesc *desc = &env_tlb(env)->d[i];
96         size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
97 
98         tlb_window_reset(desc, get_clock_realtime(), 0);
99         desc->n_used_entries = 0;
100         env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
101         env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
102         env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
103     }
104 }
105 
106 /**
107  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
108  * @env: CPU that owns the TLB
109  * @mmu_idx: MMU index of the TLB
110  *
111  * Called with tlb_lock_held.
112  *
113  * We have two main constraints when resizing a TLB: (1) we only resize it
114  * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
115  * the array or unnecessarily flushing it), which means we do not control how
116  * frequently the resizing can occur; (2) we don't have access to the guest's
117  * future scheduling decisions, and therefore have to decide the magnitude of
118  * the resize based on past observations.
119  *
120  * In general, a memory-hungry process can benefit greatly from an appropriately
121  * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
122  * we just have to make the TLB as large as possible; while an oversized TLB
123  * results in minimal TLB miss rates, it also takes longer to be flushed
124  * (flushes can be _very_ frequent), and the reduced locality can also hurt
125  * performance.
126  *
127  * To achieve near-optimal performance for all kinds of workloads, we:
128  *
129  * 1. Aggressively increase the size of the TLB when the use rate of the
130  * TLB being flushed is high, since it is likely that in the near future this
131  * memory-hungry process will execute again, and its memory hungriness will
132  * probably be similar.
133  *
134  * 2. Slowly reduce the size of the TLB as the use rate declines over a
135  * reasonably large time window. The rationale is that if in such a time window
136  * we have not observed a high TLB use rate, it is likely that we won't observe
137  * it in the near future. In that case, once a time window expires we downsize
138  * the TLB to match the maximum use rate observed in the window.
139  *
140  * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
141  * since in that range performance is likely near-optimal. Recall that the TLB
142  * is direct mapped, so we want the use rate to be low (or at least not too
143  * high), since otherwise we are likely to have a significant amount of
144  * conflict misses.
145  */
146 static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
147 {
148     CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
149     size_t old_size = tlb_n_entries(env, mmu_idx);
150     size_t rate;
151     size_t new_size = old_size;
152     int64_t now = get_clock_realtime();
153     int64_t window_len_ms = 100;
154     int64_t window_len_ns = window_len_ms * 1000 * 1000;
155     bool window_expired = now > desc->window_begin_ns + window_len_ns;
156 
157     if (desc->n_used_entries > desc->window_max_entries) {
158         desc->window_max_entries = desc->n_used_entries;
159     }
160     rate = desc->window_max_entries * 100 / old_size;
161 
162     if (rate > 70) {
163         new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
164     } else if (rate < 30 && window_expired) {
165         size_t ceil = pow2ceil(desc->window_max_entries);
166         size_t expected_rate = desc->window_max_entries * 100 / ceil;
167 
168         /*
169          * Avoid undersizing when the max number of entries seen is just below
170          * a pow2. For instance, if max_entries == 1025, the expected use rate
171          * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
172          * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
173          * later. Thus, make sure that the expected use rate remains below 70%.
174          * (and since we double the size, that means the lowest rate we'd
175          * expect to get is 35%, which is still in the 30-70% range where
176          * we consider that the size is appropriate.)
177          */
178         if (expected_rate > 70) {
179             ceil *= 2;
180         }
181         new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
182     }
183 
184     if (new_size == old_size) {
185         if (window_expired) {
186             tlb_window_reset(desc, now, desc->n_used_entries);
187         }
188         return;
189     }
190 
191     g_free(env_tlb(env)->f[mmu_idx].table);
192     g_free(env_tlb(env)->d[mmu_idx].iotlb);
193 
194     tlb_window_reset(desc, now, 0);
195     /* desc->n_used_entries is cleared by the caller */
196     env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
197     env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
198     env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
199     /*
200      * If the allocations fail, try smaller sizes. We just freed some
201      * memory, so going back to half of new_size has a good chance of working.
202      * Increased memory pressure elsewhere in the system might cause the
203      * allocations to fail though, so we progressively reduce the allocation
204      * size, aborting if we cannot even allocate the smallest TLB we support.
205      */
206     while (env_tlb(env)->f[mmu_idx].table == NULL ||
207            env_tlb(env)->d[mmu_idx].iotlb == NULL) {
208         if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
209             error_report("%s: %s", __func__, strerror(errno));
210             abort();
211         }
212         new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
213         env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
214 
215         g_free(env_tlb(env)->f[mmu_idx].table);
216         g_free(env_tlb(env)->d[mmu_idx].iotlb);
217         env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
218         env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
219     }
220 }
221 
222 static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
223 {
224     tlb_mmu_resize_locked(env, mmu_idx);
225     memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
226     env_tlb(env)->d[mmu_idx].n_used_entries = 0;
227 }
228 
229 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
230 {
231     env_tlb(env)->d[mmu_idx].n_used_entries++;
232 }
233 
234 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
235 {
236     env_tlb(env)->d[mmu_idx].n_used_entries--;
237 }
238 
239 void tlb_init(CPUState *cpu)
240 {
241     CPUArchState *env = cpu->env_ptr;
242 
243     qemu_spin_init(&env_tlb(env)->c.lock);
244 
245     /* Ensure that cpu_reset performs a full flush.  */
246     env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
247 
248     tlb_dyn_init(env);
249 }
250 
251 /* flush_all_helper: run fn across all cpus
252  *
253  * If the wait flag is set then the src cpu's helper will be queued as
254  * "safe" work and the loop exited creating a synchronisation point
255  * where all queued work will be finished before execution starts
256  * again.
257  */
258 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
259                              run_on_cpu_data d)
260 {
261     CPUState *cpu;
262 
263     CPU_FOREACH(cpu) {
264         if (cpu != src) {
265             async_run_on_cpu(cpu, fn, d);
266         }
267     }
268 }
269 
270 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
271 {
272     CPUState *cpu;
273     size_t full = 0, part = 0, elide = 0;
274 
275     CPU_FOREACH(cpu) {
276         CPUArchState *env = cpu->env_ptr;
277 
278         full += atomic_read(&env_tlb(env)->c.full_flush_count);
279         part += atomic_read(&env_tlb(env)->c.part_flush_count);
280         elide += atomic_read(&env_tlb(env)->c.elide_flush_count);
281     }
282     *pfull = full;
283     *ppart = part;
284     *pelide = elide;
285 }
286 
287 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
288 {
289     tlb_table_flush_by_mmuidx(env, mmu_idx);
290     env_tlb(env)->d[mmu_idx].large_page_addr = -1;
291     env_tlb(env)->d[mmu_idx].large_page_mask = -1;
292     env_tlb(env)->d[mmu_idx].vindex = 0;
293     memset(env_tlb(env)->d[mmu_idx].vtable, -1,
294            sizeof(env_tlb(env)->d[0].vtable));
295 }
296 
297 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
298 {
299     CPUArchState *env = cpu->env_ptr;
300     uint16_t asked = data.host_int;
301     uint16_t all_dirty, work, to_clean;
302 
303     assert_cpu_is_self(cpu);
304 
305     tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
306 
307     qemu_spin_lock(&env_tlb(env)->c.lock);
308 
309     all_dirty = env_tlb(env)->c.dirty;
310     to_clean = asked & all_dirty;
311     all_dirty &= ~to_clean;
312     env_tlb(env)->c.dirty = all_dirty;
313 
314     for (work = to_clean; work != 0; work &= work - 1) {
315         int mmu_idx = ctz32(work);
316         tlb_flush_one_mmuidx_locked(env, mmu_idx);
317     }
318 
319     qemu_spin_unlock(&env_tlb(env)->c.lock);
320 
321     cpu_tb_jmp_cache_clear(cpu);
322 
323     if (to_clean == ALL_MMUIDX_BITS) {
324         atomic_set(&env_tlb(env)->c.full_flush_count,
325                    env_tlb(env)->c.full_flush_count + 1);
326     } else {
327         atomic_set(&env_tlb(env)->c.part_flush_count,
328                    env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
329         if (to_clean != asked) {
330             atomic_set(&env_tlb(env)->c.elide_flush_count,
331                        env_tlb(env)->c.elide_flush_count +
332                        ctpop16(asked & ~to_clean));
333         }
334     }
335 }
336 
337 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
338 {
339     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
340 
341     if (cpu->created && !qemu_cpu_is_self(cpu)) {
342         async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
343                          RUN_ON_CPU_HOST_INT(idxmap));
344     } else {
345         tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
346     }
347 }
348 
349 void tlb_flush(CPUState *cpu)
350 {
351     tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
352 }
353 
354 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
355 {
356     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
357 
358     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
359 
360     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
361     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
362 }
363 
364 void tlb_flush_all_cpus(CPUState *src_cpu)
365 {
366     tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
367 }
368 
369 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
370 {
371     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
372 
373     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
374 
375     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
376     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
377 }
378 
379 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
380 {
381     tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
382 }
383 
384 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
385                                         target_ulong page)
386 {
387     return tlb_hit_page(tlb_entry->addr_read, page) ||
388            tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
389            tlb_hit_page(tlb_entry->addr_code, page);
390 }
391 
392 /**
393  * tlb_entry_is_empty - return true if the entry is not in use
394  * @te: pointer to CPUTLBEntry
395  */
396 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
397 {
398     return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
399 }
400 
401 /* Called with tlb_c.lock held */
402 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
403                                           target_ulong page)
404 {
405     if (tlb_hit_page_anyprot(tlb_entry, page)) {
406         memset(tlb_entry, -1, sizeof(*tlb_entry));
407         return true;
408     }
409     return false;
410 }
411 
412 /* Called with tlb_c.lock held */
413 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
414                                               target_ulong page)
415 {
416     CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
417     int k;
418 
419     assert_cpu_is_self(env_cpu(env));
420     for (k = 0; k < CPU_VTLB_SIZE; k++) {
421         if (tlb_flush_entry_locked(&d->vtable[k], page)) {
422             tlb_n_used_entries_dec(env, mmu_idx);
423         }
424     }
425 }
426 
427 static void tlb_flush_page_locked(CPUArchState *env, int midx,
428                                   target_ulong page)
429 {
430     target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
431     target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
432 
433     /* Check if we need to flush due to large pages.  */
434     if ((page & lp_mask) == lp_addr) {
435         tlb_debug("forcing full flush midx %d ("
436                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
437                   midx, lp_addr, lp_mask);
438         tlb_flush_one_mmuidx_locked(env, midx);
439     } else {
440         if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
441             tlb_n_used_entries_dec(env, midx);
442         }
443         tlb_flush_vtlb_page_locked(env, midx, page);
444     }
445 }
446 
447 /* As we are going to hijack the bottom bits of the page address for a
448  * mmuidx bit mask we need to fail to build if we can't do that
449  */
450 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
451 
452 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
453                                                 run_on_cpu_data data)
454 {
455     CPUArchState *env = cpu->env_ptr;
456     target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
457     target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
458     unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
459     int mmu_idx;
460 
461     assert_cpu_is_self(cpu);
462 
463     tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
464               addr, mmu_idx_bitmap);
465 
466     qemu_spin_lock(&env_tlb(env)->c.lock);
467     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
468         if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
469             tlb_flush_page_locked(env, mmu_idx, addr);
470         }
471     }
472     qemu_spin_unlock(&env_tlb(env)->c.lock);
473 
474     tb_flush_jmp_cache(cpu, addr);
475 }
476 
477 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
478 {
479     target_ulong addr_and_mmu_idx;
480 
481     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
482 
483     /* This should already be page aligned */
484     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
485     addr_and_mmu_idx |= idxmap;
486 
487     if (!qemu_cpu_is_self(cpu)) {
488         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
489                          RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
490     } else {
491         tlb_flush_page_by_mmuidx_async_work(
492             cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
493     }
494 }
495 
496 void tlb_flush_page(CPUState *cpu, target_ulong addr)
497 {
498     tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
499 }
500 
501 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
502                                        uint16_t idxmap)
503 {
504     const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
505     target_ulong addr_and_mmu_idx;
506 
507     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
508 
509     /* This should already be page aligned */
510     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
511     addr_and_mmu_idx |= idxmap;
512 
513     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
514     fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
515 }
516 
517 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
518 {
519     tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
520 }
521 
522 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
523                                               target_ulong addr,
524                                               uint16_t idxmap)
525 {
526     const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
527     target_ulong addr_and_mmu_idx;
528 
529     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
530 
531     /* This should already be page aligned */
532     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
533     addr_and_mmu_idx |= idxmap;
534 
535     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
536     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
537 }
538 
539 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
540 {
541     tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
542 }
543 
544 /* update the TLBs so that writes to code in the virtual page 'addr'
545    can be detected */
546 void tlb_protect_code(ram_addr_t ram_addr)
547 {
548     cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
549                                              DIRTY_MEMORY_CODE);
550 }
551 
552 /* update the TLB so that writes in physical page 'phys_addr' are no longer
553    tested for self modifying code */
554 void tlb_unprotect_code(ram_addr_t ram_addr)
555 {
556     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
557 }
558 
559 
560 /*
561  * Dirty write flag handling
562  *
563  * When the TCG code writes to a location it looks up the address in
564  * the TLB and uses that data to compute the final address. If any of
565  * the lower bits of the address are set then the slow path is forced.
566  * There are a number of reasons to do this but for normal RAM the
567  * most usual is detecting writes to code regions which may invalidate
568  * generated code.
569  *
570  * Other vCPUs might be reading their TLBs during guest execution, so we update
571  * te->addr_write with atomic_set. We don't need to worry about this for
572  * oversized guests as MTTCG is disabled for them.
573  *
574  * Called with tlb_c.lock held.
575  */
576 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
577                                          uintptr_t start, uintptr_t length)
578 {
579     uintptr_t addr = tlb_entry->addr_write;
580 
581     if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
582                  TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
583         addr &= TARGET_PAGE_MASK;
584         addr += tlb_entry->addend;
585         if ((addr - start) < length) {
586 #if TCG_OVERSIZED_GUEST
587             tlb_entry->addr_write |= TLB_NOTDIRTY;
588 #else
589             atomic_set(&tlb_entry->addr_write,
590                        tlb_entry->addr_write | TLB_NOTDIRTY);
591 #endif
592         }
593     }
594 }
595 
596 /*
597  * Called with tlb_c.lock held.
598  * Called only from the vCPU context, i.e. the TLB's owner thread.
599  */
600 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
601 {
602     *d = *s;
603 }
604 
605 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
606  * the target vCPU).
607  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
608  * thing actually updated is the target TLB entry ->addr_write flags.
609  */
610 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
611 {
612     CPUArchState *env;
613 
614     int mmu_idx;
615 
616     env = cpu->env_ptr;
617     qemu_spin_lock(&env_tlb(env)->c.lock);
618     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
619         unsigned int i;
620         unsigned int n = tlb_n_entries(env, mmu_idx);
621 
622         for (i = 0; i < n; i++) {
623             tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
624                                          start1, length);
625         }
626 
627         for (i = 0; i < CPU_VTLB_SIZE; i++) {
628             tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
629                                          start1, length);
630         }
631     }
632     qemu_spin_unlock(&env_tlb(env)->c.lock);
633 }
634 
635 /* Called with tlb_c.lock held */
636 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
637                                          target_ulong vaddr)
638 {
639     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
640         tlb_entry->addr_write = vaddr;
641     }
642 }
643 
644 /* update the TLB corresponding to virtual page vaddr
645    so that it is no longer dirty */
646 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
647 {
648     CPUArchState *env = cpu->env_ptr;
649     int mmu_idx;
650 
651     assert_cpu_is_self(cpu);
652 
653     vaddr &= TARGET_PAGE_MASK;
654     qemu_spin_lock(&env_tlb(env)->c.lock);
655     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
656         tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
657     }
658 
659     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
660         int k;
661         for (k = 0; k < CPU_VTLB_SIZE; k++) {
662             tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
663         }
664     }
665     qemu_spin_unlock(&env_tlb(env)->c.lock);
666 }
667 
668 /* Our TLB does not support large pages, so remember the area covered by
669    large pages and trigger a full TLB flush if these are invalidated.  */
670 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
671                                target_ulong vaddr, target_ulong size)
672 {
673     target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
674     target_ulong lp_mask = ~(size - 1);
675 
676     if (lp_addr == (target_ulong)-1) {
677         /* No previous large page.  */
678         lp_addr = vaddr;
679     } else {
680         /* Extend the existing region to include the new page.
681            This is a compromise between unnecessary flushes and
682            the cost of maintaining a full variable size TLB.  */
683         lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
684         while (((lp_addr ^ vaddr) & lp_mask) != 0) {
685             lp_mask <<= 1;
686         }
687     }
688     env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
689     env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
690 }
691 
692 /* Add a new TLB entry. At most one entry for a given virtual address
693  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
694  * supplied size is only used by tlb_flush_page.
695  *
696  * Called from TCG-generated code, which is under an RCU read-side
697  * critical section.
698  */
699 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
700                              hwaddr paddr, MemTxAttrs attrs, int prot,
701                              int mmu_idx, target_ulong size)
702 {
703     CPUArchState *env = cpu->env_ptr;
704     CPUTLB *tlb = env_tlb(env);
705     CPUTLBDesc *desc = &tlb->d[mmu_idx];
706     MemoryRegionSection *section;
707     unsigned int index;
708     target_ulong address;
709     target_ulong write_address;
710     uintptr_t addend;
711     CPUTLBEntry *te, tn;
712     hwaddr iotlb, xlat, sz, paddr_page;
713     target_ulong vaddr_page;
714     int asidx = cpu_asidx_from_attrs(cpu, attrs);
715     int wp_flags;
716     bool is_ram, is_romd;
717 
718     assert_cpu_is_self(cpu);
719 
720     if (size <= TARGET_PAGE_SIZE) {
721         sz = TARGET_PAGE_SIZE;
722     } else {
723         tlb_add_large_page(env, mmu_idx, vaddr, size);
724         sz = size;
725     }
726     vaddr_page = vaddr & TARGET_PAGE_MASK;
727     paddr_page = paddr & TARGET_PAGE_MASK;
728 
729     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
730                                                 &xlat, &sz, attrs, &prot);
731     assert(sz >= TARGET_PAGE_SIZE);
732 
733     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
734               " prot=%x idx=%d\n",
735               vaddr, paddr, prot, mmu_idx);
736 
737     address = vaddr_page;
738     if (size < TARGET_PAGE_SIZE) {
739         /* Repeat the MMU check and TLB fill on every access.  */
740         address |= TLB_INVALID_MASK;
741     }
742     if (attrs.byte_swap) {
743         address |= TLB_BSWAP;
744     }
745 
746     is_ram = memory_region_is_ram(section->mr);
747     is_romd = memory_region_is_romd(section->mr);
748 
749     if (is_ram || is_romd) {
750         /* RAM and ROMD both have associated host memory. */
751         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
752     } else {
753         /* I/O does not; force the host address to NULL. */
754         addend = 0;
755     }
756 
757     write_address = address;
758     if (is_ram) {
759         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
760         /*
761          * Computing is_clean is expensive; avoid all that unless
762          * the page is actually writable.
763          */
764         if (prot & PAGE_WRITE) {
765             if (section->readonly) {
766                 write_address |= TLB_DISCARD_WRITE;
767             } else if (cpu_physical_memory_is_clean(iotlb)) {
768                 write_address |= TLB_NOTDIRTY;
769             }
770         }
771     } else {
772         /* I/O or ROMD */
773         iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
774         /*
775          * Writes to romd devices must go through MMIO to enable write.
776          * Reads to romd devices go through the ram_ptr found above,
777          * but of course reads to I/O must go through MMIO.
778          */
779         write_address |= TLB_MMIO;
780         if (!is_romd) {
781             address = write_address;
782         }
783     }
784 
785     wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
786                                               TARGET_PAGE_SIZE);
787 
788     index = tlb_index(env, mmu_idx, vaddr_page);
789     te = tlb_entry(env, mmu_idx, vaddr_page);
790 
791     /*
792      * Hold the TLB lock for the rest of the function. We could acquire/release
793      * the lock several times in the function, but it is faster to amortize the
794      * acquisition cost by acquiring it just once. Note that this leads to
795      * a longer critical section, but this is not a concern since the TLB lock
796      * is unlikely to be contended.
797      */
798     qemu_spin_lock(&tlb->c.lock);
799 
800     /* Note that the tlb is no longer clean.  */
801     tlb->c.dirty |= 1 << mmu_idx;
802 
803     /* Make sure there's no cached translation for the new page.  */
804     tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
805 
806     /*
807      * Only evict the old entry to the victim tlb if it's for a
808      * different page; otherwise just overwrite the stale data.
809      */
810     if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
811         unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
812         CPUTLBEntry *tv = &desc->vtable[vidx];
813 
814         /* Evict the old entry into the victim tlb.  */
815         copy_tlb_helper_locked(tv, te);
816         desc->viotlb[vidx] = desc->iotlb[index];
817         tlb_n_used_entries_dec(env, mmu_idx);
818     }
819 
820     /* refill the tlb */
821     /*
822      * At this point iotlb contains a physical section number in the lower
823      * TARGET_PAGE_BITS, and either
824      *  + the ram_addr_t of the page base of the target RAM (RAM)
825      *  + the offset within section->mr of the page base (I/O, ROMD)
826      * We subtract the vaddr_page (which is page aligned and thus won't
827      * disturb the low bits) to give an offset which can be added to the
828      * (non-page-aligned) vaddr of the eventual memory access to get
829      * the MemoryRegion offset for the access. Note that the vaddr we
830      * subtract here is that of the page base, and not the same as the
831      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
832      */
833     desc->iotlb[index].addr = iotlb - vaddr_page;
834     desc->iotlb[index].attrs = attrs;
835 
836     /* Now calculate the new entry */
837     tn.addend = addend - vaddr_page;
838     if (prot & PAGE_READ) {
839         tn.addr_read = address;
840         if (wp_flags & BP_MEM_READ) {
841             tn.addr_read |= TLB_WATCHPOINT;
842         }
843     } else {
844         tn.addr_read = -1;
845     }
846 
847     if (prot & PAGE_EXEC) {
848         tn.addr_code = address;
849     } else {
850         tn.addr_code = -1;
851     }
852 
853     tn.addr_write = -1;
854     if (prot & PAGE_WRITE) {
855         tn.addr_write = write_address;
856         if (prot & PAGE_WRITE_INV) {
857             tn.addr_write |= TLB_INVALID_MASK;
858         }
859         if (wp_flags & BP_MEM_WRITE) {
860             tn.addr_write |= TLB_WATCHPOINT;
861         }
862     }
863 
864     copy_tlb_helper_locked(te, &tn);
865     tlb_n_used_entries_inc(env, mmu_idx);
866     qemu_spin_unlock(&tlb->c.lock);
867 }
868 
869 /* Add a new TLB entry, but without specifying the memory
870  * transaction attributes to be used.
871  */
872 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
873                   hwaddr paddr, int prot,
874                   int mmu_idx, target_ulong size)
875 {
876     tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
877                             prot, mmu_idx, size);
878 }
879 
880 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
881 {
882     ram_addr_t ram_addr;
883 
884     ram_addr = qemu_ram_addr_from_host(ptr);
885     if (ram_addr == RAM_ADDR_INVALID) {
886         error_report("Bad ram pointer %p", ptr);
887         abort();
888     }
889     return ram_addr;
890 }
891 
892 /*
893  * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
894  * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
895  * be discarded and looked up again (e.g. via tlb_entry()).
896  */
897 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
898                      MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
899 {
900     CPUClass *cc = CPU_GET_CLASS(cpu);
901     bool ok;
902 
903     /*
904      * This is not a probe, so only valid return is success; failure
905      * should result in exception + longjmp to the cpu loop.
906      */
907     ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr);
908     assert(ok);
909 }
910 
911 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
912                          int mmu_idx, target_ulong addr, uintptr_t retaddr,
913                          MMUAccessType access_type, MemOp op)
914 {
915     CPUState *cpu = env_cpu(env);
916     hwaddr mr_offset;
917     MemoryRegionSection *section;
918     MemoryRegion *mr;
919     uint64_t val;
920     bool locked = false;
921     MemTxResult r;
922 
923     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
924     mr = section->mr;
925     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
926     cpu->mem_io_pc = retaddr;
927     if (!cpu->can_do_io) {
928         cpu_io_recompile(cpu, retaddr);
929     }
930 
931     cpu->mem_io_access_type = access_type;
932 
933     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
934         qemu_mutex_lock_iothread();
935         locked = true;
936     }
937     r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
938     if (r != MEMTX_OK) {
939         hwaddr physaddr = mr_offset +
940             section->offset_within_address_space -
941             section->offset_within_region;
942 
943         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
944                                mmu_idx, iotlbentry->attrs, r, retaddr);
945     }
946     if (locked) {
947         qemu_mutex_unlock_iothread();
948     }
949 
950     return val;
951 }
952 
953 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
954                       int mmu_idx, uint64_t val, target_ulong addr,
955                       uintptr_t retaddr, MemOp op)
956 {
957     CPUState *cpu = env_cpu(env);
958     hwaddr mr_offset;
959     MemoryRegionSection *section;
960     MemoryRegion *mr;
961     bool locked = false;
962     MemTxResult r;
963 
964     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
965     mr = section->mr;
966     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
967     if (!cpu->can_do_io) {
968         cpu_io_recompile(cpu, retaddr);
969     }
970     cpu->mem_io_pc = retaddr;
971 
972     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
973         qemu_mutex_lock_iothread();
974         locked = true;
975     }
976     r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
977     if (r != MEMTX_OK) {
978         hwaddr physaddr = mr_offset +
979             section->offset_within_address_space -
980             section->offset_within_region;
981 
982         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
983                                MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
984                                retaddr);
985     }
986     if (locked) {
987         qemu_mutex_unlock_iothread();
988     }
989 }
990 
991 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
992 {
993 #if TCG_OVERSIZED_GUEST
994     return *(target_ulong *)((uintptr_t)entry + ofs);
995 #else
996     /* ofs might correspond to .addr_write, so use atomic_read */
997     return atomic_read((target_ulong *)((uintptr_t)entry + ofs));
998 #endif
999 }
1000 
1001 /* Return true if ADDR is present in the victim tlb, and has been copied
1002    back to the main tlb.  */
1003 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
1004                            size_t elt_ofs, target_ulong page)
1005 {
1006     size_t vidx;
1007 
1008     assert_cpu_is_self(env_cpu(env));
1009     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1010         CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1011         target_ulong cmp;
1012 
1013         /* elt_ofs might correspond to .addr_write, so use atomic_read */
1014 #if TCG_OVERSIZED_GUEST
1015         cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
1016 #else
1017         cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1018 #endif
1019 
1020         if (cmp == page) {
1021             /* Found entry in victim tlb, swap tlb and iotlb.  */
1022             CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1023 
1024             qemu_spin_lock(&env_tlb(env)->c.lock);
1025             copy_tlb_helper_locked(&tmptlb, tlb);
1026             copy_tlb_helper_locked(tlb, vtlb);
1027             copy_tlb_helper_locked(vtlb, &tmptlb);
1028             qemu_spin_unlock(&env_tlb(env)->c.lock);
1029 
1030             CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1031             CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1032             tmpio = *io; *io = *vio; *vio = tmpio;
1033             return true;
1034         }
1035     }
1036     return false;
1037 }
1038 
1039 /* Macro to call the above, with local variables from the use context.  */
1040 #define VICTIM_TLB_HIT(TY, ADDR) \
1041   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1042                  (ADDR) & TARGET_PAGE_MASK)
1043 
1044 /*
1045  * Return a ram_addr_t for the virtual address for execution.
1046  *
1047  * Return -1 if we can't translate and execute from an entire page
1048  * of RAM.  This will force us to execute by loading and translating
1049  * one insn at a time, without caching.
1050  *
1051  * NOTE: This function will trigger an exception if the page is
1052  * not executable.
1053  */
1054 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1055 {
1056     uintptr_t mmu_idx = cpu_mmu_index(env, true);
1057     uintptr_t index = tlb_index(env, mmu_idx, addr);
1058     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1059     void *p;
1060 
1061     if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1062         if (!VICTIM_TLB_HIT(addr_code, addr)) {
1063             tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
1064             index = tlb_index(env, mmu_idx, addr);
1065             entry = tlb_entry(env, mmu_idx, addr);
1066 
1067             if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1068                 /*
1069                  * The MMU protection covers a smaller range than a target
1070                  * page, so we must redo the MMU check for every insn.
1071                  */
1072                 return -1;
1073             }
1074         }
1075         assert(tlb_hit(entry->addr_code, addr));
1076     }
1077 
1078     if (unlikely(entry->addr_code & TLB_MMIO)) {
1079         /* The region is not backed by RAM.  */
1080         return -1;
1081     }
1082 
1083     p = (void *)((uintptr_t)addr + entry->addend);
1084     return qemu_ram_addr_from_host_nofail(p);
1085 }
1086 
1087 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1088                            CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
1089 {
1090     ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
1091 
1092     trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1093 
1094     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1095         struct page_collection *pages
1096             = page_collection_lock(ram_addr, ram_addr + size);
1097 
1098         /* We require mem_io_pc in tb_invalidate_phys_page_range.  */
1099         cpu->mem_io_pc = retaddr;
1100 
1101         tb_invalidate_phys_page_fast(pages, ram_addr, size);
1102         page_collection_unlock(pages);
1103     }
1104 
1105     /*
1106      * Set both VGA and migration bits for simplicity and to remove
1107      * the notdirty callback faster.
1108      */
1109     cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1110 
1111     /* We remove the notdirty callback only if the code has been flushed. */
1112     if (!cpu_physical_memory_is_clean(ram_addr)) {
1113         trace_memory_notdirty_set_dirty(mem_vaddr);
1114         tlb_set_dirty(cpu, mem_vaddr);
1115     }
1116 }
1117 
1118 /*
1119  * Probe for whether the specified guest access is permitted. If it is not
1120  * permitted then an exception will be taken in the same way as if this
1121  * were a real access (and we will not return).
1122  * If the size is 0 or the page requires I/O access, returns NULL; otherwise,
1123  * returns the address of the host page similar to tlb_vaddr_to_host().
1124  */
1125 void *probe_access(CPUArchState *env, target_ulong addr, int size,
1126                    MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1127 {
1128     uintptr_t index = tlb_index(env, mmu_idx, addr);
1129     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1130     target_ulong tlb_addr;
1131     size_t elt_ofs;
1132     int wp_access;
1133 
1134     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1135 
1136     switch (access_type) {
1137     case MMU_DATA_LOAD:
1138         elt_ofs = offsetof(CPUTLBEntry, addr_read);
1139         wp_access = BP_MEM_READ;
1140         break;
1141     case MMU_DATA_STORE:
1142         elt_ofs = offsetof(CPUTLBEntry, addr_write);
1143         wp_access = BP_MEM_WRITE;
1144         break;
1145     case MMU_INST_FETCH:
1146         elt_ofs = offsetof(CPUTLBEntry, addr_code);
1147         wp_access = BP_MEM_READ;
1148         break;
1149     default:
1150         g_assert_not_reached();
1151     }
1152     tlb_addr = tlb_read_ofs(entry, elt_ofs);
1153 
1154     if (unlikely(!tlb_hit(tlb_addr, addr))) {
1155         if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs,
1156                             addr & TARGET_PAGE_MASK)) {
1157             tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr);
1158             /* TLB resize via tlb_fill may have moved the entry. */
1159             index = tlb_index(env, mmu_idx, addr);
1160             entry = tlb_entry(env, mmu_idx, addr);
1161         }
1162         tlb_addr = tlb_read_ofs(entry, elt_ofs);
1163     }
1164 
1165     if (!size) {
1166         return NULL;
1167     }
1168 
1169     if (unlikely(tlb_addr & TLB_FLAGS_MASK)) {
1170         CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1171 
1172         /* Reject I/O access, or other required slow-path.  */
1173         if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) {
1174             return NULL;
1175         }
1176 
1177         /* Handle watchpoints.  */
1178         if (tlb_addr & TLB_WATCHPOINT) {
1179             cpu_check_watchpoint(env_cpu(env), addr, size,
1180                                  iotlbentry->attrs, wp_access, retaddr);
1181         }
1182 
1183         /* Handle clean RAM pages.  */
1184         if (tlb_addr & TLB_NOTDIRTY) {
1185             notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
1186         }
1187     }
1188 
1189     return (void *)((uintptr_t)addr + entry->addend);
1190 }
1191 
1192 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
1193                         MMUAccessType access_type, int mmu_idx)
1194 {
1195     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1196     uintptr_t tlb_addr, page;
1197     size_t elt_ofs;
1198 
1199     switch (access_type) {
1200     case MMU_DATA_LOAD:
1201         elt_ofs = offsetof(CPUTLBEntry, addr_read);
1202         break;
1203     case MMU_DATA_STORE:
1204         elt_ofs = offsetof(CPUTLBEntry, addr_write);
1205         break;
1206     case MMU_INST_FETCH:
1207         elt_ofs = offsetof(CPUTLBEntry, addr_code);
1208         break;
1209     default:
1210         g_assert_not_reached();
1211     }
1212 
1213     page = addr & TARGET_PAGE_MASK;
1214     tlb_addr = tlb_read_ofs(entry, elt_ofs);
1215 
1216     if (!tlb_hit_page(tlb_addr, page)) {
1217         uintptr_t index = tlb_index(env, mmu_idx, addr);
1218 
1219         if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) {
1220             CPUState *cs = env_cpu(env);
1221             CPUClass *cc = CPU_GET_CLASS(cs);
1222 
1223             if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) {
1224                 /* Non-faulting page table read failed.  */
1225                 return NULL;
1226             }
1227 
1228             /* TLB resize via tlb_fill may have moved the entry.  */
1229             entry = tlb_entry(env, mmu_idx, addr);
1230         }
1231         tlb_addr = tlb_read_ofs(entry, elt_ofs);
1232     }
1233 
1234     if (tlb_addr & ~TARGET_PAGE_MASK) {
1235         /* IO access */
1236         return NULL;
1237     }
1238 
1239     return (void *)((uintptr_t)addr + entry->addend);
1240 }
1241 
1242 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
1243  * operations, or io operations to proceed.  Return the host address.  */
1244 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1245                                TCGMemOpIdx oi, uintptr_t retaddr)
1246 {
1247     size_t mmu_idx = get_mmuidx(oi);
1248     uintptr_t index = tlb_index(env, mmu_idx, addr);
1249     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1250     target_ulong tlb_addr = tlb_addr_write(tlbe);
1251     MemOp mop = get_memop(oi);
1252     int a_bits = get_alignment_bits(mop);
1253     int s_bits = mop & MO_SIZE;
1254     void *hostaddr;
1255 
1256     /* Adjust the given return address.  */
1257     retaddr -= GETPC_ADJ;
1258 
1259     /* Enforce guest required alignment.  */
1260     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1261         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1262         cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1263                              mmu_idx, retaddr);
1264     }
1265 
1266     /* Enforce qemu required alignment.  */
1267     if (unlikely(addr & ((1 << s_bits) - 1))) {
1268         /* We get here if guest alignment was not requested,
1269            or was not enforced by cpu_unaligned_access above.
1270            We might widen the access and emulate, but for now
1271            mark an exception and exit the cpu loop.  */
1272         goto stop_the_world;
1273     }
1274 
1275     /* Check TLB entry and enforce page permissions.  */
1276     if (!tlb_hit(tlb_addr, addr)) {
1277         if (!VICTIM_TLB_HIT(addr_write, addr)) {
1278             tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
1279                      mmu_idx, retaddr);
1280             index = tlb_index(env, mmu_idx, addr);
1281             tlbe = tlb_entry(env, mmu_idx, addr);
1282         }
1283         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1284     }
1285 
1286     /* Notice an IO access or a needs-MMU-lookup access */
1287     if (unlikely(tlb_addr & TLB_MMIO)) {
1288         /* There's really nothing that can be done to
1289            support this apart from stop-the-world.  */
1290         goto stop_the_world;
1291     }
1292 
1293     /* Let the guest notice RMW on a write-only page.  */
1294     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1295         tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1296                  mmu_idx, retaddr);
1297         /* Since we don't support reads and writes to different addresses,
1298            and we do have the proper page loaded for write, this shouldn't
1299            ever return.  But just in case, handle via stop-the-world.  */
1300         goto stop_the_world;
1301     }
1302 
1303     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1304 
1305     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1306         notdirty_write(env_cpu(env), addr, 1 << s_bits,
1307                        &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
1308     }
1309 
1310     return hostaddr;
1311 
1312  stop_the_world:
1313     cpu_loop_exit_atomic(env_cpu(env), retaddr);
1314 }
1315 
1316 /*
1317  * Load Helpers
1318  *
1319  * We support two different access types. SOFTMMU_CODE_ACCESS is
1320  * specifically for reading instructions from system memory. It is
1321  * called by the translation loop and in some helpers where the code
1322  * is disassembled. It shouldn't be called directly by guest code.
1323  */
1324 
1325 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1326                                 TCGMemOpIdx oi, uintptr_t retaddr);
1327 
1328 static inline uint64_t QEMU_ALWAYS_INLINE
1329 load_memop(const void *haddr, MemOp op)
1330 {
1331     switch (op) {
1332     case MO_UB:
1333         return ldub_p(haddr);
1334     case MO_BEUW:
1335         return lduw_be_p(haddr);
1336     case MO_LEUW:
1337         return lduw_le_p(haddr);
1338     case MO_BEUL:
1339         return (uint32_t)ldl_be_p(haddr);
1340     case MO_LEUL:
1341         return (uint32_t)ldl_le_p(haddr);
1342     case MO_BEQ:
1343         return ldq_be_p(haddr);
1344     case MO_LEQ:
1345         return ldq_le_p(haddr);
1346     default:
1347         qemu_build_not_reached();
1348     }
1349 }
1350 
1351 static inline uint64_t QEMU_ALWAYS_INLINE
1352 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1353             uintptr_t retaddr, MemOp op, bool code_read,
1354             FullLoadHelper *full_load)
1355 {
1356     uintptr_t mmu_idx = get_mmuidx(oi);
1357     uintptr_t index = tlb_index(env, mmu_idx, addr);
1358     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1359     target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1360     const size_t tlb_off = code_read ?
1361         offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1362     const MMUAccessType access_type =
1363         code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1364     unsigned a_bits = get_alignment_bits(get_memop(oi));
1365     void *haddr;
1366     uint64_t res;
1367     size_t size = memop_size(op);
1368 
1369     /* Handle CPU specific unaligned behaviour */
1370     if (addr & ((1 << a_bits) - 1)) {
1371         cpu_unaligned_access(env_cpu(env), addr, access_type,
1372                              mmu_idx, retaddr);
1373     }
1374 
1375     /* If the TLB entry is for a different page, reload and try again.  */
1376     if (!tlb_hit(tlb_addr, addr)) {
1377         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1378                             addr & TARGET_PAGE_MASK)) {
1379             tlb_fill(env_cpu(env), addr, size,
1380                      access_type, mmu_idx, retaddr);
1381             index = tlb_index(env, mmu_idx, addr);
1382             entry = tlb_entry(env, mmu_idx, addr);
1383         }
1384         tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1385         tlb_addr &= ~TLB_INVALID_MASK;
1386     }
1387 
1388     /* Handle anything that isn't just a straight memory access.  */
1389     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1390         CPUIOTLBEntry *iotlbentry;
1391         bool need_swap;
1392 
1393         /* For anything that is unaligned, recurse through full_load.  */
1394         if ((addr & (size - 1)) != 0) {
1395             goto do_unaligned_access;
1396         }
1397 
1398         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1399 
1400         /* Handle watchpoints.  */
1401         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1402             /* On watchpoint hit, this will longjmp out.  */
1403             cpu_check_watchpoint(env_cpu(env), addr, size,
1404                                  iotlbentry->attrs, BP_MEM_READ, retaddr);
1405         }
1406 
1407         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1408 
1409         /* Handle I/O access.  */
1410         if (likely(tlb_addr & TLB_MMIO)) {
1411             return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
1412                             access_type, op ^ (need_swap * MO_BSWAP));
1413         }
1414 
1415         haddr = (void *)((uintptr_t)addr + entry->addend);
1416 
1417         /*
1418          * Keep these two load_memop separate to ensure that the compiler
1419          * is able to fold the entire function to a single instruction.
1420          * There is a build-time assert inside to remind you of this.  ;-)
1421          */
1422         if (unlikely(need_swap)) {
1423             return load_memop(haddr, op ^ MO_BSWAP);
1424         }
1425         return load_memop(haddr, op);
1426     }
1427 
1428     /* Handle slow unaligned access (it spans two pages or IO).  */
1429     if (size > 1
1430         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1431                     >= TARGET_PAGE_SIZE)) {
1432         target_ulong addr1, addr2;
1433         uint64_t r1, r2;
1434         unsigned shift;
1435     do_unaligned_access:
1436         addr1 = addr & ~((target_ulong)size - 1);
1437         addr2 = addr1 + size;
1438         r1 = full_load(env, addr1, oi, retaddr);
1439         r2 = full_load(env, addr2, oi, retaddr);
1440         shift = (addr & (size - 1)) * 8;
1441 
1442         if (memop_big_endian(op)) {
1443             /* Big-endian combine.  */
1444             res = (r1 << shift) | (r2 >> ((size * 8) - shift));
1445         } else {
1446             /* Little-endian combine.  */
1447             res = (r1 >> shift) | (r2 << ((size * 8) - shift));
1448         }
1449         return res & MAKE_64BIT_MASK(0, size * 8);
1450     }
1451 
1452     haddr = (void *)((uintptr_t)addr + entry->addend);
1453     return load_memop(haddr, op);
1454 }
1455 
1456 /*
1457  * For the benefit of TCG generated code, we want to avoid the
1458  * complication of ABI-specific return type promotion and always
1459  * return a value extended to the register size of the host. This is
1460  * tcg_target_long, except in the case of a 32-bit host and 64-bit
1461  * data, and for that we always have uint64_t.
1462  *
1463  * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1464  */
1465 
1466 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1467                               TCGMemOpIdx oi, uintptr_t retaddr)
1468 {
1469     return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1470 }
1471 
1472 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1473                                      TCGMemOpIdx oi, uintptr_t retaddr)
1474 {
1475     return full_ldub_mmu(env, addr, oi, retaddr);
1476 }
1477 
1478 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1479                                  TCGMemOpIdx oi, uintptr_t retaddr)
1480 {
1481     return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1482                        full_le_lduw_mmu);
1483 }
1484 
1485 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1486                                     TCGMemOpIdx oi, uintptr_t retaddr)
1487 {
1488     return full_le_lduw_mmu(env, addr, oi, retaddr);
1489 }
1490 
1491 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1492                                  TCGMemOpIdx oi, uintptr_t retaddr)
1493 {
1494     return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
1495                        full_be_lduw_mmu);
1496 }
1497 
1498 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1499                                     TCGMemOpIdx oi, uintptr_t retaddr)
1500 {
1501     return full_be_lduw_mmu(env, addr, oi, retaddr);
1502 }
1503 
1504 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1505                                  TCGMemOpIdx oi, uintptr_t retaddr)
1506 {
1507     return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
1508                        full_le_ldul_mmu);
1509 }
1510 
1511 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1512                                     TCGMemOpIdx oi, uintptr_t retaddr)
1513 {
1514     return full_le_ldul_mmu(env, addr, oi, retaddr);
1515 }
1516 
1517 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1518                                  TCGMemOpIdx oi, uintptr_t retaddr)
1519 {
1520     return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
1521                        full_be_ldul_mmu);
1522 }
1523 
1524 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1525                                     TCGMemOpIdx oi, uintptr_t retaddr)
1526 {
1527     return full_be_ldul_mmu(env, addr, oi, retaddr);
1528 }
1529 
1530 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
1531                            TCGMemOpIdx oi, uintptr_t retaddr)
1532 {
1533     return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
1534                        helper_le_ldq_mmu);
1535 }
1536 
1537 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
1538                            TCGMemOpIdx oi, uintptr_t retaddr)
1539 {
1540     return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
1541                        helper_be_ldq_mmu);
1542 }
1543 
1544 /*
1545  * Provide signed versions of the load routines as well.  We can of course
1546  * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
1547  */
1548 
1549 
1550 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
1551                                      TCGMemOpIdx oi, uintptr_t retaddr)
1552 {
1553     return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
1554 }
1555 
1556 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
1557                                     TCGMemOpIdx oi, uintptr_t retaddr)
1558 {
1559     return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
1560 }
1561 
1562 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
1563                                     TCGMemOpIdx oi, uintptr_t retaddr)
1564 {
1565     return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
1566 }
1567 
1568 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
1569                                     TCGMemOpIdx oi, uintptr_t retaddr)
1570 {
1571     return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
1572 }
1573 
1574 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
1575                                     TCGMemOpIdx oi, uintptr_t retaddr)
1576 {
1577     return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
1578 }
1579 
1580 /*
1581  * Store Helpers
1582  */
1583 
1584 static inline void QEMU_ALWAYS_INLINE
1585 store_memop(void *haddr, uint64_t val, MemOp op)
1586 {
1587     switch (op) {
1588     case MO_UB:
1589         stb_p(haddr, val);
1590         break;
1591     case MO_BEUW:
1592         stw_be_p(haddr, val);
1593         break;
1594     case MO_LEUW:
1595         stw_le_p(haddr, val);
1596         break;
1597     case MO_BEUL:
1598         stl_be_p(haddr, val);
1599         break;
1600     case MO_LEUL:
1601         stl_le_p(haddr, val);
1602         break;
1603     case MO_BEQ:
1604         stq_be_p(haddr, val);
1605         break;
1606     case MO_LEQ:
1607         stq_le_p(haddr, val);
1608         break;
1609     default:
1610         qemu_build_not_reached();
1611     }
1612 }
1613 
1614 static inline void QEMU_ALWAYS_INLINE
1615 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
1616              TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
1617 {
1618     uintptr_t mmu_idx = get_mmuidx(oi);
1619     uintptr_t index = tlb_index(env, mmu_idx, addr);
1620     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1621     target_ulong tlb_addr = tlb_addr_write(entry);
1622     const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
1623     unsigned a_bits = get_alignment_bits(get_memop(oi));
1624     void *haddr;
1625     size_t size = memop_size(op);
1626 
1627     /* Handle CPU specific unaligned behaviour */
1628     if (addr & ((1 << a_bits) - 1)) {
1629         cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1630                              mmu_idx, retaddr);
1631     }
1632 
1633     /* If the TLB entry is for a different page, reload and try again.  */
1634     if (!tlb_hit(tlb_addr, addr)) {
1635         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1636             addr & TARGET_PAGE_MASK)) {
1637             tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
1638                      mmu_idx, retaddr);
1639             index = tlb_index(env, mmu_idx, addr);
1640             entry = tlb_entry(env, mmu_idx, addr);
1641         }
1642         tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
1643     }
1644 
1645     /* Handle anything that isn't just a straight memory access.  */
1646     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1647         CPUIOTLBEntry *iotlbentry;
1648         bool need_swap;
1649 
1650         /* For anything that is unaligned, recurse through byte stores.  */
1651         if ((addr & (size - 1)) != 0) {
1652             goto do_unaligned_access;
1653         }
1654 
1655         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1656 
1657         /* Handle watchpoints.  */
1658         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1659             /* On watchpoint hit, this will longjmp out.  */
1660             cpu_check_watchpoint(env_cpu(env), addr, size,
1661                                  iotlbentry->attrs, BP_MEM_WRITE, retaddr);
1662         }
1663 
1664         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1665 
1666         /* Handle I/O access.  */
1667         if (tlb_addr & TLB_MMIO) {
1668             io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
1669                       op ^ (need_swap * MO_BSWAP));
1670             return;
1671         }
1672 
1673         /* Ignore writes to ROM.  */
1674         if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
1675             return;
1676         }
1677 
1678         /* Handle clean RAM pages.  */
1679         if (tlb_addr & TLB_NOTDIRTY) {
1680             notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
1681         }
1682 
1683         haddr = (void *)((uintptr_t)addr + entry->addend);
1684 
1685         /*
1686          * Keep these two store_memop separate to ensure that the compiler
1687          * is able to fold the entire function to a single instruction.
1688          * There is a build-time assert inside to remind you of this.  ;-)
1689          */
1690         if (unlikely(need_swap)) {
1691             store_memop(haddr, val, op ^ MO_BSWAP);
1692         } else {
1693             store_memop(haddr, val, op);
1694         }
1695         return;
1696     }
1697 
1698     /* Handle slow unaligned access (it spans two pages or IO).  */
1699     if (size > 1
1700         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1701                      >= TARGET_PAGE_SIZE)) {
1702         int i;
1703         uintptr_t index2;
1704         CPUTLBEntry *entry2;
1705         target_ulong page2, tlb_addr2;
1706         size_t size2;
1707 
1708     do_unaligned_access:
1709         /*
1710          * Ensure the second page is in the TLB.  Note that the first page
1711          * is already guaranteed to be filled, and that the second page
1712          * cannot evict the first.
1713          */
1714         page2 = (addr + size) & TARGET_PAGE_MASK;
1715         size2 = (addr + size) & ~TARGET_PAGE_MASK;
1716         index2 = tlb_index(env, mmu_idx, page2);
1717         entry2 = tlb_entry(env, mmu_idx, page2);
1718         tlb_addr2 = tlb_addr_write(entry2);
1719         if (!tlb_hit_page(tlb_addr2, page2)) {
1720             if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
1721                 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
1722                          mmu_idx, retaddr);
1723                 index2 = tlb_index(env, mmu_idx, page2);
1724                 entry2 = tlb_entry(env, mmu_idx, page2);
1725             }
1726             tlb_addr2 = tlb_addr_write(entry2);
1727         }
1728 
1729         /*
1730          * Handle watchpoints.  Since this may trap, all checks
1731          * must happen before any store.
1732          */
1733         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1734             cpu_check_watchpoint(env_cpu(env), addr, size - size2,
1735                                  env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
1736                                  BP_MEM_WRITE, retaddr);
1737         }
1738         if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
1739             cpu_check_watchpoint(env_cpu(env), page2, size2,
1740                                  env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
1741                                  BP_MEM_WRITE, retaddr);
1742         }
1743 
1744         /*
1745          * XXX: not efficient, but simple.
1746          * This loop must go in the forward direction to avoid issues
1747          * with self-modifying code in Windows 64-bit.
1748          */
1749         for (i = 0; i < size; ++i) {
1750             uint8_t val8;
1751             if (memop_big_endian(op)) {
1752                 /* Big-endian extract.  */
1753                 val8 = val >> (((size - 1) * 8) - (i * 8));
1754             } else {
1755                 /* Little-endian extract.  */
1756                 val8 = val >> (i * 8);
1757             }
1758             helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
1759         }
1760         return;
1761     }
1762 
1763     haddr = (void *)((uintptr_t)addr + entry->addend);
1764     store_memop(haddr, val, op);
1765 }
1766 
1767 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
1768                         TCGMemOpIdx oi, uintptr_t retaddr)
1769 {
1770     store_helper(env, addr, val, oi, retaddr, MO_UB);
1771 }
1772 
1773 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
1774                        TCGMemOpIdx oi, uintptr_t retaddr)
1775 {
1776     store_helper(env, addr, val, oi, retaddr, MO_LEUW);
1777 }
1778 
1779 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
1780                        TCGMemOpIdx oi, uintptr_t retaddr)
1781 {
1782     store_helper(env, addr, val, oi, retaddr, MO_BEUW);
1783 }
1784 
1785 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
1786                        TCGMemOpIdx oi, uintptr_t retaddr)
1787 {
1788     store_helper(env, addr, val, oi, retaddr, MO_LEUL);
1789 }
1790 
1791 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
1792                        TCGMemOpIdx oi, uintptr_t retaddr)
1793 {
1794     store_helper(env, addr, val, oi, retaddr, MO_BEUL);
1795 }
1796 
1797 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
1798                        TCGMemOpIdx oi, uintptr_t retaddr)
1799 {
1800     store_helper(env, addr, val, oi, retaddr, MO_LEQ);
1801 }
1802 
1803 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
1804                        TCGMemOpIdx oi, uintptr_t retaddr)
1805 {
1806     store_helper(env, addr, val, oi, retaddr, MO_BEQ);
1807 }
1808 
1809 /* First set of helpers allows passing in of OI and RETADDR.  This makes
1810    them callable from other helpers.  */
1811 
1812 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
1813 #define ATOMIC_NAME(X) \
1814     HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
1815 #define ATOMIC_MMU_DECLS
1816 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
1817 #define ATOMIC_MMU_CLEANUP
1818 
1819 #define DATA_SIZE 1
1820 #include "atomic_template.h"
1821 
1822 #define DATA_SIZE 2
1823 #include "atomic_template.h"
1824 
1825 #define DATA_SIZE 4
1826 #include "atomic_template.h"
1827 
1828 #ifdef CONFIG_ATOMIC64
1829 #define DATA_SIZE 8
1830 #include "atomic_template.h"
1831 #endif
1832 
1833 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
1834 #define DATA_SIZE 16
1835 #include "atomic_template.h"
1836 #endif
1837 
1838 /* Second set of helpers are directly callable from TCG as helpers.  */
1839 
1840 #undef EXTRA_ARGS
1841 #undef ATOMIC_NAME
1842 #undef ATOMIC_MMU_LOOKUP
1843 #define EXTRA_ARGS         , TCGMemOpIdx oi
1844 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
1845 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC())
1846 
1847 #define DATA_SIZE 1
1848 #include "atomic_template.h"
1849 
1850 #define DATA_SIZE 2
1851 #include "atomic_template.h"
1852 
1853 #define DATA_SIZE 4
1854 #include "atomic_template.h"
1855 
1856 #ifdef CONFIG_ATOMIC64
1857 #define DATA_SIZE 8
1858 #include "atomic_template.h"
1859 #endif
1860 
1861 /* Code access functions.  */
1862 
1863 static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr,
1864                                TCGMemOpIdx oi, uintptr_t retaddr)
1865 {
1866     return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_cmmu);
1867 }
1868 
1869 uint8_t helper_ret_ldb_cmmu(CPUArchState *env, target_ulong addr,
1870                             TCGMemOpIdx oi, uintptr_t retaddr)
1871 {
1872     return full_ldub_cmmu(env, addr, oi, retaddr);
1873 }
1874 
1875 static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr,
1876                                   TCGMemOpIdx oi, uintptr_t retaddr)
1877 {
1878     return load_helper(env, addr, oi, retaddr, MO_LEUW, true,
1879                        full_le_lduw_cmmu);
1880 }
1881 
1882 uint16_t helper_le_ldw_cmmu(CPUArchState *env, target_ulong addr,
1883                             TCGMemOpIdx oi, uintptr_t retaddr)
1884 {
1885     return full_le_lduw_cmmu(env, addr, oi, retaddr);
1886 }
1887 
1888 static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr,
1889                                   TCGMemOpIdx oi, uintptr_t retaddr)
1890 {
1891     return load_helper(env, addr, oi, retaddr, MO_BEUW, true,
1892                        full_be_lduw_cmmu);
1893 }
1894 
1895 uint16_t helper_be_ldw_cmmu(CPUArchState *env, target_ulong addr,
1896                             TCGMemOpIdx oi, uintptr_t retaddr)
1897 {
1898     return full_be_lduw_cmmu(env, addr, oi, retaddr);
1899 }
1900 
1901 static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr,
1902                                   TCGMemOpIdx oi, uintptr_t retaddr)
1903 {
1904     return load_helper(env, addr, oi, retaddr, MO_LEUL, true,
1905                        full_le_ldul_cmmu);
1906 }
1907 
1908 uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr,
1909                             TCGMemOpIdx oi, uintptr_t retaddr)
1910 {
1911     return full_le_ldul_cmmu(env, addr, oi, retaddr);
1912 }
1913 
1914 static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr,
1915                                   TCGMemOpIdx oi, uintptr_t retaddr)
1916 {
1917     return load_helper(env, addr, oi, retaddr, MO_BEUL, true,
1918                        full_be_ldul_cmmu);
1919 }
1920 
1921 uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr,
1922                             TCGMemOpIdx oi, uintptr_t retaddr)
1923 {
1924     return full_be_ldul_cmmu(env, addr, oi, retaddr);
1925 }
1926 
1927 uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr,
1928                             TCGMemOpIdx oi, uintptr_t retaddr)
1929 {
1930     return load_helper(env, addr, oi, retaddr, MO_LEQ, true,
1931                        helper_le_ldq_cmmu);
1932 }
1933 
1934 uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr,
1935                             TCGMemOpIdx oi, uintptr_t retaddr)
1936 {
1937     return load_helper(env, addr, oi, retaddr, MO_BEQ, true,
1938                        helper_be_ldq_cmmu);
1939 }
1940