xref: /openbmc/qemu/accel/tcg/cputlb.c (revision 0221d73ce6a8e075adaa0a35a6ef853d2652b855)
1 /*
2  *  Common CPU TLB handling
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 #include "translate-all.h"
37 #ifdef CONFIG_PLUGIN
38 #include "qemu/plugin-memory.h"
39 #endif
40 
41 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
42 /* #define DEBUG_TLB */
43 /* #define DEBUG_TLB_LOG */
44 
45 #ifdef DEBUG_TLB
46 # define DEBUG_TLB_GATE 1
47 # ifdef DEBUG_TLB_LOG
48 #  define DEBUG_TLB_LOG_GATE 1
49 # else
50 #  define DEBUG_TLB_LOG_GATE 0
51 # endif
52 #else
53 # define DEBUG_TLB_GATE 0
54 # define DEBUG_TLB_LOG_GATE 0
55 #endif
56 
57 #define tlb_debug(fmt, ...) do { \
58     if (DEBUG_TLB_LOG_GATE) { \
59         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
60                       ## __VA_ARGS__); \
61     } else if (DEBUG_TLB_GATE) { \
62         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
63     } \
64 } while (0)
65 
66 #define assert_cpu_is_self(cpu) do {                              \
67         if (DEBUG_TLB_GATE) {                                     \
68             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
69         }                                                         \
70     } while (0)
71 
72 /* run_on_cpu_data.target_ptr should always be big enough for a
73  * target_ulong even on 32 bit builds */
74 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
75 
76 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
77  */
78 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
79 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
80 
81 static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
82 {
83     return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
84 }
85 
86 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
87                              size_t max_entries)
88 {
89     desc->window_begin_ns = ns;
90     desc->window_max_entries = max_entries;
91 }
92 
93 static void tlb_dyn_init(CPUArchState *env)
94 {
95     int i;
96 
97     for (i = 0; i < NB_MMU_MODES; i++) {
98         CPUTLBDesc *desc = &env_tlb(env)->d[i];
99         size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
100 
101         tlb_window_reset(desc, get_clock_realtime(), 0);
102         desc->n_used_entries = 0;
103         env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
104         env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
105         env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
106     }
107 }
108 
109 /**
110  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
111  * @env: CPU that owns the TLB
112  * @mmu_idx: MMU index of the TLB
113  *
114  * Called with tlb_lock_held.
115  *
116  * We have two main constraints when resizing a TLB: (1) we only resize it
117  * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
118  * the array or unnecessarily flushing it), which means we do not control how
119  * frequently the resizing can occur; (2) we don't have access to the guest's
120  * future scheduling decisions, and therefore have to decide the magnitude of
121  * the resize based on past observations.
122  *
123  * In general, a memory-hungry process can benefit greatly from an appropriately
124  * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
125  * we just have to make the TLB as large as possible; while an oversized TLB
126  * results in minimal TLB miss rates, it also takes longer to be flushed
127  * (flushes can be _very_ frequent), and the reduced locality can also hurt
128  * performance.
129  *
130  * To achieve near-optimal performance for all kinds of workloads, we:
131  *
132  * 1. Aggressively increase the size of the TLB when the use rate of the
133  * TLB being flushed is high, since it is likely that in the near future this
134  * memory-hungry process will execute again, and its memory hungriness will
135  * probably be similar.
136  *
137  * 2. Slowly reduce the size of the TLB as the use rate declines over a
138  * reasonably large time window. The rationale is that if in such a time window
139  * we have not observed a high TLB use rate, it is likely that we won't observe
140  * it in the near future. In that case, once a time window expires we downsize
141  * the TLB to match the maximum use rate observed in the window.
142  *
143  * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
144  * since in that range performance is likely near-optimal. Recall that the TLB
145  * is direct mapped, so we want the use rate to be low (or at least not too
146  * high), since otherwise we are likely to have a significant amount of
147  * conflict misses.
148  */
149 static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
150 {
151     CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
152     size_t old_size = tlb_n_entries(env, mmu_idx);
153     size_t rate;
154     size_t new_size = old_size;
155     int64_t now = get_clock_realtime();
156     int64_t window_len_ms = 100;
157     int64_t window_len_ns = window_len_ms * 1000 * 1000;
158     bool window_expired = now > desc->window_begin_ns + window_len_ns;
159 
160     if (desc->n_used_entries > desc->window_max_entries) {
161         desc->window_max_entries = desc->n_used_entries;
162     }
163     rate = desc->window_max_entries * 100 / old_size;
164 
165     if (rate > 70) {
166         new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
167     } else if (rate < 30 && window_expired) {
168         size_t ceil = pow2ceil(desc->window_max_entries);
169         size_t expected_rate = desc->window_max_entries * 100 / ceil;
170 
171         /*
172          * Avoid undersizing when the max number of entries seen is just below
173          * a pow2. For instance, if max_entries == 1025, the expected use rate
174          * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
175          * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
176          * later. Thus, make sure that the expected use rate remains below 70%.
177          * (and since we double the size, that means the lowest rate we'd
178          * expect to get is 35%, which is still in the 30-70% range where
179          * we consider that the size is appropriate.)
180          */
181         if (expected_rate > 70) {
182             ceil *= 2;
183         }
184         new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
185     }
186 
187     if (new_size == old_size) {
188         if (window_expired) {
189             tlb_window_reset(desc, now, desc->n_used_entries);
190         }
191         return;
192     }
193 
194     g_free(env_tlb(env)->f[mmu_idx].table);
195     g_free(env_tlb(env)->d[mmu_idx].iotlb);
196 
197     tlb_window_reset(desc, now, 0);
198     /* desc->n_used_entries is cleared by the caller */
199     env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
200     env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
201     env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
202     /*
203      * If the allocations fail, try smaller sizes. We just freed some
204      * memory, so going back to half of new_size has a good chance of working.
205      * Increased memory pressure elsewhere in the system might cause the
206      * allocations to fail though, so we progressively reduce the allocation
207      * size, aborting if we cannot even allocate the smallest TLB we support.
208      */
209     while (env_tlb(env)->f[mmu_idx].table == NULL ||
210            env_tlb(env)->d[mmu_idx].iotlb == NULL) {
211         if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
212             error_report("%s: %s", __func__, strerror(errno));
213             abort();
214         }
215         new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
216         env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
217 
218         g_free(env_tlb(env)->f[mmu_idx].table);
219         g_free(env_tlb(env)->d[mmu_idx].iotlb);
220         env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
221         env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
222     }
223 }
224 
225 static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
226 {
227     tlb_mmu_resize_locked(env, mmu_idx);
228     memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
229     env_tlb(env)->d[mmu_idx].n_used_entries = 0;
230 }
231 
232 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
233 {
234     env_tlb(env)->d[mmu_idx].n_used_entries++;
235 }
236 
237 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
238 {
239     env_tlb(env)->d[mmu_idx].n_used_entries--;
240 }
241 
242 void tlb_init(CPUState *cpu)
243 {
244     CPUArchState *env = cpu->env_ptr;
245 
246     qemu_spin_init(&env_tlb(env)->c.lock);
247 
248     /* Ensure that cpu_reset performs a full flush.  */
249     env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
250 
251     tlb_dyn_init(env);
252 }
253 
254 /* flush_all_helper: run fn across all cpus
255  *
256  * If the wait flag is set then the src cpu's helper will be queued as
257  * "safe" work and the loop exited creating a synchronisation point
258  * where all queued work will be finished before execution starts
259  * again.
260  */
261 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
262                              run_on_cpu_data d)
263 {
264     CPUState *cpu;
265 
266     CPU_FOREACH(cpu) {
267         if (cpu != src) {
268             async_run_on_cpu(cpu, fn, d);
269         }
270     }
271 }
272 
273 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
274 {
275     CPUState *cpu;
276     size_t full = 0, part = 0, elide = 0;
277 
278     CPU_FOREACH(cpu) {
279         CPUArchState *env = cpu->env_ptr;
280 
281         full += atomic_read(&env_tlb(env)->c.full_flush_count);
282         part += atomic_read(&env_tlb(env)->c.part_flush_count);
283         elide += atomic_read(&env_tlb(env)->c.elide_flush_count);
284     }
285     *pfull = full;
286     *ppart = part;
287     *pelide = elide;
288 }
289 
290 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
291 {
292     tlb_table_flush_by_mmuidx(env, mmu_idx);
293     env_tlb(env)->d[mmu_idx].large_page_addr = -1;
294     env_tlb(env)->d[mmu_idx].large_page_mask = -1;
295     env_tlb(env)->d[mmu_idx].vindex = 0;
296     memset(env_tlb(env)->d[mmu_idx].vtable, -1,
297            sizeof(env_tlb(env)->d[0].vtable));
298 }
299 
300 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
301 {
302     CPUArchState *env = cpu->env_ptr;
303     uint16_t asked = data.host_int;
304     uint16_t all_dirty, work, to_clean;
305 
306     assert_cpu_is_self(cpu);
307 
308     tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
309 
310     qemu_spin_lock(&env_tlb(env)->c.lock);
311 
312     all_dirty = env_tlb(env)->c.dirty;
313     to_clean = asked & all_dirty;
314     all_dirty &= ~to_clean;
315     env_tlb(env)->c.dirty = all_dirty;
316 
317     for (work = to_clean; work != 0; work &= work - 1) {
318         int mmu_idx = ctz32(work);
319         tlb_flush_one_mmuidx_locked(env, mmu_idx);
320     }
321 
322     qemu_spin_unlock(&env_tlb(env)->c.lock);
323 
324     cpu_tb_jmp_cache_clear(cpu);
325 
326     if (to_clean == ALL_MMUIDX_BITS) {
327         atomic_set(&env_tlb(env)->c.full_flush_count,
328                    env_tlb(env)->c.full_flush_count + 1);
329     } else {
330         atomic_set(&env_tlb(env)->c.part_flush_count,
331                    env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
332         if (to_clean != asked) {
333             atomic_set(&env_tlb(env)->c.elide_flush_count,
334                        env_tlb(env)->c.elide_flush_count +
335                        ctpop16(asked & ~to_clean));
336         }
337     }
338 }
339 
340 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
341 {
342     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
343 
344     if (cpu->created && !qemu_cpu_is_self(cpu)) {
345         async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
346                          RUN_ON_CPU_HOST_INT(idxmap));
347     } else {
348         tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
349     }
350 }
351 
352 void tlb_flush(CPUState *cpu)
353 {
354     tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
355 }
356 
357 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
358 {
359     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
360 
361     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
362 
363     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
364     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
365 }
366 
367 void tlb_flush_all_cpus(CPUState *src_cpu)
368 {
369     tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
370 }
371 
372 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
373 {
374     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
375 
376     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
377 
378     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
379     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
380 }
381 
382 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
383 {
384     tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
385 }
386 
387 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
388                                         target_ulong page)
389 {
390     return tlb_hit_page(tlb_entry->addr_read, page) ||
391            tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
392            tlb_hit_page(tlb_entry->addr_code, page);
393 }
394 
395 /**
396  * tlb_entry_is_empty - return true if the entry is not in use
397  * @te: pointer to CPUTLBEntry
398  */
399 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
400 {
401     return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
402 }
403 
404 /* Called with tlb_c.lock held */
405 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
406                                           target_ulong page)
407 {
408     if (tlb_hit_page_anyprot(tlb_entry, page)) {
409         memset(tlb_entry, -1, sizeof(*tlb_entry));
410         return true;
411     }
412     return false;
413 }
414 
415 /* Called with tlb_c.lock held */
416 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
417                                               target_ulong page)
418 {
419     CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
420     int k;
421 
422     assert_cpu_is_self(env_cpu(env));
423     for (k = 0; k < CPU_VTLB_SIZE; k++) {
424         if (tlb_flush_entry_locked(&d->vtable[k], page)) {
425             tlb_n_used_entries_dec(env, mmu_idx);
426         }
427     }
428 }
429 
430 static void tlb_flush_page_locked(CPUArchState *env, int midx,
431                                   target_ulong page)
432 {
433     target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
434     target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
435 
436     /* Check if we need to flush due to large pages.  */
437     if ((page & lp_mask) == lp_addr) {
438         tlb_debug("forcing full flush midx %d ("
439                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
440                   midx, lp_addr, lp_mask);
441         tlb_flush_one_mmuidx_locked(env, midx);
442     } else {
443         if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
444             tlb_n_used_entries_dec(env, midx);
445         }
446         tlb_flush_vtlb_page_locked(env, midx, page);
447     }
448 }
449 
450 /* As we are going to hijack the bottom bits of the page address for a
451  * mmuidx bit mask we need to fail to build if we can't do that
452  */
453 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
454 
455 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
456                                                 run_on_cpu_data data)
457 {
458     CPUArchState *env = cpu->env_ptr;
459     target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
460     target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
461     unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
462     int mmu_idx;
463 
464     assert_cpu_is_self(cpu);
465 
466     tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
467               addr, mmu_idx_bitmap);
468 
469     qemu_spin_lock(&env_tlb(env)->c.lock);
470     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
471         if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
472             tlb_flush_page_locked(env, mmu_idx, addr);
473         }
474     }
475     qemu_spin_unlock(&env_tlb(env)->c.lock);
476 
477     tb_flush_jmp_cache(cpu, addr);
478 }
479 
480 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
481 {
482     target_ulong addr_and_mmu_idx;
483 
484     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
485 
486     /* This should already be page aligned */
487     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
488     addr_and_mmu_idx |= idxmap;
489 
490     if (!qemu_cpu_is_self(cpu)) {
491         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
492                          RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
493     } else {
494         tlb_flush_page_by_mmuidx_async_work(
495             cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
496     }
497 }
498 
499 void tlb_flush_page(CPUState *cpu, target_ulong addr)
500 {
501     tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
502 }
503 
504 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
505                                        uint16_t idxmap)
506 {
507     const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
508     target_ulong addr_and_mmu_idx;
509 
510     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
511 
512     /* This should already be page aligned */
513     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
514     addr_and_mmu_idx |= idxmap;
515 
516     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
517     fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
518 }
519 
520 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
521 {
522     tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
523 }
524 
525 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
526                                               target_ulong addr,
527                                               uint16_t idxmap)
528 {
529     const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
530     target_ulong addr_and_mmu_idx;
531 
532     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
533 
534     /* This should already be page aligned */
535     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
536     addr_and_mmu_idx |= idxmap;
537 
538     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
539     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
540 }
541 
542 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
543 {
544     tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
545 }
546 
547 /* update the TLBs so that writes to code in the virtual page 'addr'
548    can be detected */
549 void tlb_protect_code(ram_addr_t ram_addr)
550 {
551     cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
552                                              DIRTY_MEMORY_CODE);
553 }
554 
555 /* update the TLB so that writes in physical page 'phys_addr' are no longer
556    tested for self modifying code */
557 void tlb_unprotect_code(ram_addr_t ram_addr)
558 {
559     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
560 }
561 
562 
563 /*
564  * Dirty write flag handling
565  *
566  * When the TCG code writes to a location it looks up the address in
567  * the TLB and uses that data to compute the final address. If any of
568  * the lower bits of the address are set then the slow path is forced.
569  * There are a number of reasons to do this but for normal RAM the
570  * most usual is detecting writes to code regions which may invalidate
571  * generated code.
572  *
573  * Other vCPUs might be reading their TLBs during guest execution, so we update
574  * te->addr_write with atomic_set. We don't need to worry about this for
575  * oversized guests as MTTCG is disabled for them.
576  *
577  * Called with tlb_c.lock held.
578  */
579 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
580                                          uintptr_t start, uintptr_t length)
581 {
582     uintptr_t addr = tlb_entry->addr_write;
583 
584     if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
585                  TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
586         addr &= TARGET_PAGE_MASK;
587         addr += tlb_entry->addend;
588         if ((addr - start) < length) {
589 #if TCG_OVERSIZED_GUEST
590             tlb_entry->addr_write |= TLB_NOTDIRTY;
591 #else
592             atomic_set(&tlb_entry->addr_write,
593                        tlb_entry->addr_write | TLB_NOTDIRTY);
594 #endif
595         }
596     }
597 }
598 
599 /*
600  * Called with tlb_c.lock held.
601  * Called only from the vCPU context, i.e. the TLB's owner thread.
602  */
603 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
604 {
605     *d = *s;
606 }
607 
608 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
609  * the target vCPU).
610  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
611  * thing actually updated is the target TLB entry ->addr_write flags.
612  */
613 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
614 {
615     CPUArchState *env;
616 
617     int mmu_idx;
618 
619     env = cpu->env_ptr;
620     qemu_spin_lock(&env_tlb(env)->c.lock);
621     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
622         unsigned int i;
623         unsigned int n = tlb_n_entries(env, mmu_idx);
624 
625         for (i = 0; i < n; i++) {
626             tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
627                                          start1, length);
628         }
629 
630         for (i = 0; i < CPU_VTLB_SIZE; i++) {
631             tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
632                                          start1, length);
633         }
634     }
635     qemu_spin_unlock(&env_tlb(env)->c.lock);
636 }
637 
638 /* Called with tlb_c.lock held */
639 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
640                                          target_ulong vaddr)
641 {
642     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
643         tlb_entry->addr_write = vaddr;
644     }
645 }
646 
647 /* update the TLB corresponding to virtual page vaddr
648    so that it is no longer dirty */
649 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
650 {
651     CPUArchState *env = cpu->env_ptr;
652     int mmu_idx;
653 
654     assert_cpu_is_self(cpu);
655 
656     vaddr &= TARGET_PAGE_MASK;
657     qemu_spin_lock(&env_tlb(env)->c.lock);
658     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
659         tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
660     }
661 
662     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
663         int k;
664         for (k = 0; k < CPU_VTLB_SIZE; k++) {
665             tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
666         }
667     }
668     qemu_spin_unlock(&env_tlb(env)->c.lock);
669 }
670 
671 /* Our TLB does not support large pages, so remember the area covered by
672    large pages and trigger a full TLB flush if these are invalidated.  */
673 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
674                                target_ulong vaddr, target_ulong size)
675 {
676     target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
677     target_ulong lp_mask = ~(size - 1);
678 
679     if (lp_addr == (target_ulong)-1) {
680         /* No previous large page.  */
681         lp_addr = vaddr;
682     } else {
683         /* Extend the existing region to include the new page.
684            This is a compromise between unnecessary flushes and
685            the cost of maintaining a full variable size TLB.  */
686         lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
687         while (((lp_addr ^ vaddr) & lp_mask) != 0) {
688             lp_mask <<= 1;
689         }
690     }
691     env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
692     env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
693 }
694 
695 /* Add a new TLB entry. At most one entry for a given virtual address
696  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
697  * supplied size is only used by tlb_flush_page.
698  *
699  * Called from TCG-generated code, which is under an RCU read-side
700  * critical section.
701  */
702 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
703                              hwaddr paddr, MemTxAttrs attrs, int prot,
704                              int mmu_idx, target_ulong size)
705 {
706     CPUArchState *env = cpu->env_ptr;
707     CPUTLB *tlb = env_tlb(env);
708     CPUTLBDesc *desc = &tlb->d[mmu_idx];
709     MemoryRegionSection *section;
710     unsigned int index;
711     target_ulong address;
712     target_ulong write_address;
713     uintptr_t addend;
714     CPUTLBEntry *te, tn;
715     hwaddr iotlb, xlat, sz, paddr_page;
716     target_ulong vaddr_page;
717     int asidx = cpu_asidx_from_attrs(cpu, attrs);
718     int wp_flags;
719     bool is_ram, is_romd;
720 
721     assert_cpu_is_self(cpu);
722 
723     if (size <= TARGET_PAGE_SIZE) {
724         sz = TARGET_PAGE_SIZE;
725     } else {
726         tlb_add_large_page(env, mmu_idx, vaddr, size);
727         sz = size;
728     }
729     vaddr_page = vaddr & TARGET_PAGE_MASK;
730     paddr_page = paddr & TARGET_PAGE_MASK;
731 
732     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
733                                                 &xlat, &sz, attrs, &prot);
734     assert(sz >= TARGET_PAGE_SIZE);
735 
736     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
737               " prot=%x idx=%d\n",
738               vaddr, paddr, prot, mmu_idx);
739 
740     address = vaddr_page;
741     if (size < TARGET_PAGE_SIZE) {
742         /* Repeat the MMU check and TLB fill on every access.  */
743         address |= TLB_INVALID_MASK;
744     }
745     if (attrs.byte_swap) {
746         address |= TLB_BSWAP;
747     }
748 
749     is_ram = memory_region_is_ram(section->mr);
750     is_romd = memory_region_is_romd(section->mr);
751 
752     if (is_ram || is_romd) {
753         /* RAM and ROMD both have associated host memory. */
754         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
755     } else {
756         /* I/O does not; force the host address to NULL. */
757         addend = 0;
758     }
759 
760     write_address = address;
761     if (is_ram) {
762         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
763         /*
764          * Computing is_clean is expensive; avoid all that unless
765          * the page is actually writable.
766          */
767         if (prot & PAGE_WRITE) {
768             if (section->readonly) {
769                 write_address |= TLB_DISCARD_WRITE;
770             } else if (cpu_physical_memory_is_clean(iotlb)) {
771                 write_address |= TLB_NOTDIRTY;
772             }
773         }
774     } else {
775         /* I/O or ROMD */
776         iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
777         /*
778          * Writes to romd devices must go through MMIO to enable write.
779          * Reads to romd devices go through the ram_ptr found above,
780          * but of course reads to I/O must go through MMIO.
781          */
782         write_address |= TLB_MMIO;
783         if (!is_romd) {
784             address = write_address;
785         }
786     }
787 
788     wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
789                                               TARGET_PAGE_SIZE);
790 
791     index = tlb_index(env, mmu_idx, vaddr_page);
792     te = tlb_entry(env, mmu_idx, vaddr_page);
793 
794     /*
795      * Hold the TLB lock for the rest of the function. We could acquire/release
796      * the lock several times in the function, but it is faster to amortize the
797      * acquisition cost by acquiring it just once. Note that this leads to
798      * a longer critical section, but this is not a concern since the TLB lock
799      * is unlikely to be contended.
800      */
801     qemu_spin_lock(&tlb->c.lock);
802 
803     /* Note that the tlb is no longer clean.  */
804     tlb->c.dirty |= 1 << mmu_idx;
805 
806     /* Make sure there's no cached translation for the new page.  */
807     tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
808 
809     /*
810      * Only evict the old entry to the victim tlb if it's for a
811      * different page; otherwise just overwrite the stale data.
812      */
813     if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
814         unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
815         CPUTLBEntry *tv = &desc->vtable[vidx];
816 
817         /* Evict the old entry into the victim tlb.  */
818         copy_tlb_helper_locked(tv, te);
819         desc->viotlb[vidx] = desc->iotlb[index];
820         tlb_n_used_entries_dec(env, mmu_idx);
821     }
822 
823     /* refill the tlb */
824     /*
825      * At this point iotlb contains a physical section number in the lower
826      * TARGET_PAGE_BITS, and either
827      *  + the ram_addr_t of the page base of the target RAM (RAM)
828      *  + the offset within section->mr of the page base (I/O, ROMD)
829      * We subtract the vaddr_page (which is page aligned and thus won't
830      * disturb the low bits) to give an offset which can be added to the
831      * (non-page-aligned) vaddr of the eventual memory access to get
832      * the MemoryRegion offset for the access. Note that the vaddr we
833      * subtract here is that of the page base, and not the same as the
834      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
835      */
836     desc->iotlb[index].addr = iotlb - vaddr_page;
837     desc->iotlb[index].attrs = attrs;
838 
839     /* Now calculate the new entry */
840     tn.addend = addend - vaddr_page;
841     if (prot & PAGE_READ) {
842         tn.addr_read = address;
843         if (wp_flags & BP_MEM_READ) {
844             tn.addr_read |= TLB_WATCHPOINT;
845         }
846     } else {
847         tn.addr_read = -1;
848     }
849 
850     if (prot & PAGE_EXEC) {
851         tn.addr_code = address;
852     } else {
853         tn.addr_code = -1;
854     }
855 
856     tn.addr_write = -1;
857     if (prot & PAGE_WRITE) {
858         tn.addr_write = write_address;
859         if (prot & PAGE_WRITE_INV) {
860             tn.addr_write |= TLB_INVALID_MASK;
861         }
862         if (wp_flags & BP_MEM_WRITE) {
863             tn.addr_write |= TLB_WATCHPOINT;
864         }
865     }
866 
867     copy_tlb_helper_locked(te, &tn);
868     tlb_n_used_entries_inc(env, mmu_idx);
869     qemu_spin_unlock(&tlb->c.lock);
870 }
871 
872 /* Add a new TLB entry, but without specifying the memory
873  * transaction attributes to be used.
874  */
875 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
876                   hwaddr paddr, int prot,
877                   int mmu_idx, target_ulong size)
878 {
879     tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
880                             prot, mmu_idx, size);
881 }
882 
883 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
884 {
885     ram_addr_t ram_addr;
886 
887     ram_addr = qemu_ram_addr_from_host(ptr);
888     if (ram_addr == RAM_ADDR_INVALID) {
889         error_report("Bad ram pointer %p", ptr);
890         abort();
891     }
892     return ram_addr;
893 }
894 
895 /*
896  * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
897  * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
898  * be discarded and looked up again (e.g. via tlb_entry()).
899  */
900 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
901                      MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
902 {
903     CPUClass *cc = CPU_GET_CLASS(cpu);
904     bool ok;
905 
906     /*
907      * This is not a probe, so only valid return is success; failure
908      * should result in exception + longjmp to the cpu loop.
909      */
910     ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr);
911     assert(ok);
912 }
913 
914 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
915                          int mmu_idx, target_ulong addr, uintptr_t retaddr,
916                          MMUAccessType access_type, MemOp op)
917 {
918     CPUState *cpu = env_cpu(env);
919     hwaddr mr_offset;
920     MemoryRegionSection *section;
921     MemoryRegion *mr;
922     uint64_t val;
923     bool locked = false;
924     MemTxResult r;
925 
926     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
927     mr = section->mr;
928     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
929     cpu->mem_io_pc = retaddr;
930     if (!cpu->can_do_io) {
931         cpu_io_recompile(cpu, retaddr);
932     }
933 
934     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
935         qemu_mutex_lock_iothread();
936         locked = true;
937     }
938     r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
939     if (r != MEMTX_OK) {
940         hwaddr physaddr = mr_offset +
941             section->offset_within_address_space -
942             section->offset_within_region;
943 
944         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
945                                mmu_idx, iotlbentry->attrs, r, retaddr);
946     }
947     if (locked) {
948         qemu_mutex_unlock_iothread();
949     }
950 
951     return val;
952 }
953 
954 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
955                       int mmu_idx, uint64_t val, target_ulong addr,
956                       uintptr_t retaddr, MemOp op)
957 {
958     CPUState *cpu = env_cpu(env);
959     hwaddr mr_offset;
960     MemoryRegionSection *section;
961     MemoryRegion *mr;
962     bool locked = false;
963     MemTxResult r;
964 
965     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
966     mr = section->mr;
967     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
968     if (!cpu->can_do_io) {
969         cpu_io_recompile(cpu, retaddr);
970     }
971     cpu->mem_io_pc = retaddr;
972 
973     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
974         qemu_mutex_lock_iothread();
975         locked = true;
976     }
977     r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
978     if (r != MEMTX_OK) {
979         hwaddr physaddr = mr_offset +
980             section->offset_within_address_space -
981             section->offset_within_region;
982 
983         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
984                                MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
985                                retaddr);
986     }
987     if (locked) {
988         qemu_mutex_unlock_iothread();
989     }
990 }
991 
992 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
993 {
994 #if TCG_OVERSIZED_GUEST
995     return *(target_ulong *)((uintptr_t)entry + ofs);
996 #else
997     /* ofs might correspond to .addr_write, so use atomic_read */
998     return atomic_read((target_ulong *)((uintptr_t)entry + ofs));
999 #endif
1000 }
1001 
1002 /* Return true if ADDR is present in the victim tlb, and has been copied
1003    back to the main tlb.  */
1004 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
1005                            size_t elt_ofs, target_ulong page)
1006 {
1007     size_t vidx;
1008 
1009     assert_cpu_is_self(env_cpu(env));
1010     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1011         CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1012         target_ulong cmp;
1013 
1014         /* elt_ofs might correspond to .addr_write, so use atomic_read */
1015 #if TCG_OVERSIZED_GUEST
1016         cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
1017 #else
1018         cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1019 #endif
1020 
1021         if (cmp == page) {
1022             /* Found entry in victim tlb, swap tlb and iotlb.  */
1023             CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1024 
1025             qemu_spin_lock(&env_tlb(env)->c.lock);
1026             copy_tlb_helper_locked(&tmptlb, tlb);
1027             copy_tlb_helper_locked(tlb, vtlb);
1028             copy_tlb_helper_locked(vtlb, &tmptlb);
1029             qemu_spin_unlock(&env_tlb(env)->c.lock);
1030 
1031             CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1032             CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1033             tmpio = *io; *io = *vio; *vio = tmpio;
1034             return true;
1035         }
1036     }
1037     return false;
1038 }
1039 
1040 /* Macro to call the above, with local variables from the use context.  */
1041 #define VICTIM_TLB_HIT(TY, ADDR) \
1042   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1043                  (ADDR) & TARGET_PAGE_MASK)
1044 
1045 /*
1046  * Return a ram_addr_t for the virtual address for execution.
1047  *
1048  * Return -1 if we can't translate and execute from an entire page
1049  * of RAM.  This will force us to execute by loading and translating
1050  * one insn at a time, without caching.
1051  *
1052  * NOTE: This function will trigger an exception if the page is
1053  * not executable.
1054  */
1055 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
1056                                         void **hostp)
1057 {
1058     uintptr_t mmu_idx = cpu_mmu_index(env, true);
1059     uintptr_t index = tlb_index(env, mmu_idx, addr);
1060     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1061     void *p;
1062 
1063     if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1064         if (!VICTIM_TLB_HIT(addr_code, addr)) {
1065             tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
1066             index = tlb_index(env, mmu_idx, addr);
1067             entry = tlb_entry(env, mmu_idx, addr);
1068 
1069             if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1070                 /*
1071                  * The MMU protection covers a smaller range than a target
1072                  * page, so we must redo the MMU check for every insn.
1073                  */
1074                 return -1;
1075             }
1076         }
1077         assert(tlb_hit(entry->addr_code, addr));
1078     }
1079 
1080     if (unlikely(entry->addr_code & TLB_MMIO)) {
1081         /* The region is not backed by RAM.  */
1082         if (hostp) {
1083             *hostp = NULL;
1084         }
1085         return -1;
1086     }
1087 
1088     p = (void *)((uintptr_t)addr + entry->addend);
1089     if (hostp) {
1090         *hostp = p;
1091     }
1092     return qemu_ram_addr_from_host_nofail(p);
1093 }
1094 
1095 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1096 {
1097     return get_page_addr_code_hostp(env, addr, NULL);
1098 }
1099 
1100 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1101                            CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
1102 {
1103     ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
1104 
1105     trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1106 
1107     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1108         struct page_collection *pages
1109             = page_collection_lock(ram_addr, ram_addr + size);
1110         tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr);
1111         page_collection_unlock(pages);
1112     }
1113 
1114     /*
1115      * Set both VGA and migration bits for simplicity and to remove
1116      * the notdirty callback faster.
1117      */
1118     cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1119 
1120     /* We remove the notdirty callback only if the code has been flushed. */
1121     if (!cpu_physical_memory_is_clean(ram_addr)) {
1122         trace_memory_notdirty_set_dirty(mem_vaddr);
1123         tlb_set_dirty(cpu, mem_vaddr);
1124     }
1125 }
1126 
1127 /*
1128  * Probe for whether the specified guest access is permitted. If it is not
1129  * permitted then an exception will be taken in the same way as if this
1130  * were a real access (and we will not return).
1131  * If the size is 0 or the page requires I/O access, returns NULL; otherwise,
1132  * returns the address of the host page similar to tlb_vaddr_to_host().
1133  */
1134 void *probe_access(CPUArchState *env, target_ulong addr, int size,
1135                    MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1136 {
1137     uintptr_t index = tlb_index(env, mmu_idx, addr);
1138     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1139     target_ulong tlb_addr;
1140     size_t elt_ofs;
1141     int wp_access;
1142 
1143     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1144 
1145     switch (access_type) {
1146     case MMU_DATA_LOAD:
1147         elt_ofs = offsetof(CPUTLBEntry, addr_read);
1148         wp_access = BP_MEM_READ;
1149         break;
1150     case MMU_DATA_STORE:
1151         elt_ofs = offsetof(CPUTLBEntry, addr_write);
1152         wp_access = BP_MEM_WRITE;
1153         break;
1154     case MMU_INST_FETCH:
1155         elt_ofs = offsetof(CPUTLBEntry, addr_code);
1156         wp_access = BP_MEM_READ;
1157         break;
1158     default:
1159         g_assert_not_reached();
1160     }
1161     tlb_addr = tlb_read_ofs(entry, elt_ofs);
1162 
1163     if (unlikely(!tlb_hit(tlb_addr, addr))) {
1164         if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs,
1165                             addr & TARGET_PAGE_MASK)) {
1166             tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr);
1167             /* TLB resize via tlb_fill may have moved the entry. */
1168             index = tlb_index(env, mmu_idx, addr);
1169             entry = tlb_entry(env, mmu_idx, addr);
1170         }
1171         tlb_addr = tlb_read_ofs(entry, elt_ofs);
1172     }
1173 
1174     if (!size) {
1175         return NULL;
1176     }
1177 
1178     if (unlikely(tlb_addr & TLB_FLAGS_MASK)) {
1179         CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1180 
1181         /* Reject I/O access, or other required slow-path.  */
1182         if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) {
1183             return NULL;
1184         }
1185 
1186         /* Handle watchpoints.  */
1187         if (tlb_addr & TLB_WATCHPOINT) {
1188             cpu_check_watchpoint(env_cpu(env), addr, size,
1189                                  iotlbentry->attrs, wp_access, retaddr);
1190         }
1191 
1192         /* Handle clean RAM pages.  */
1193         if (tlb_addr & TLB_NOTDIRTY) {
1194             notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
1195         }
1196     }
1197 
1198     return (void *)((uintptr_t)addr + entry->addend);
1199 }
1200 
1201 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
1202                         MMUAccessType access_type, int mmu_idx)
1203 {
1204     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1205     target_ulong tlb_addr, page;
1206     size_t elt_ofs;
1207 
1208     switch (access_type) {
1209     case MMU_DATA_LOAD:
1210         elt_ofs = offsetof(CPUTLBEntry, addr_read);
1211         break;
1212     case MMU_DATA_STORE:
1213         elt_ofs = offsetof(CPUTLBEntry, addr_write);
1214         break;
1215     case MMU_INST_FETCH:
1216         elt_ofs = offsetof(CPUTLBEntry, addr_code);
1217         break;
1218     default:
1219         g_assert_not_reached();
1220     }
1221 
1222     page = addr & TARGET_PAGE_MASK;
1223     tlb_addr = tlb_read_ofs(entry, elt_ofs);
1224 
1225     if (!tlb_hit_page(tlb_addr, page)) {
1226         uintptr_t index = tlb_index(env, mmu_idx, addr);
1227 
1228         if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) {
1229             CPUState *cs = env_cpu(env);
1230             CPUClass *cc = CPU_GET_CLASS(cs);
1231 
1232             if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) {
1233                 /* Non-faulting page table read failed.  */
1234                 return NULL;
1235             }
1236 
1237             /* TLB resize via tlb_fill may have moved the entry.  */
1238             entry = tlb_entry(env, mmu_idx, addr);
1239         }
1240         tlb_addr = tlb_read_ofs(entry, elt_ofs);
1241     }
1242 
1243     if (tlb_addr & ~TARGET_PAGE_MASK) {
1244         /* IO access */
1245         return NULL;
1246     }
1247 
1248     return (void *)((uintptr_t)addr + entry->addend);
1249 }
1250 
1251 
1252 #ifdef CONFIG_PLUGIN
1253 /*
1254  * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
1255  * This should be a hot path as we will have just looked this path up
1256  * in the softmmu lookup code (or helper). We don't handle re-fills or
1257  * checking the victim table. This is purely informational.
1258  *
1259  * This should never fail as the memory access being instrumented
1260  * should have just filled the TLB.
1261  */
1262 
1263 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
1264                        bool is_store, struct qemu_plugin_hwaddr *data)
1265 {
1266     CPUArchState *env = cpu->env_ptr;
1267     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1268     uintptr_t index = tlb_index(env, mmu_idx, addr);
1269     target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
1270 
1271     if (likely(tlb_hit(tlb_addr, addr))) {
1272         /* We must have an iotlb entry for MMIO */
1273         if (tlb_addr & TLB_MMIO) {
1274             CPUIOTLBEntry *iotlbentry;
1275             iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1276             data->is_io = true;
1277             data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1278             data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1279         } else {
1280             data->is_io = false;
1281             data->v.ram.hostaddr = addr + tlbe->addend;
1282         }
1283         return true;
1284     }
1285     return false;
1286 }
1287 
1288 #endif
1289 
1290 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
1291  * operations, or io operations to proceed.  Return the host address.  */
1292 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1293                                TCGMemOpIdx oi, uintptr_t retaddr)
1294 {
1295     size_t mmu_idx = get_mmuidx(oi);
1296     uintptr_t index = tlb_index(env, mmu_idx, addr);
1297     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1298     target_ulong tlb_addr = tlb_addr_write(tlbe);
1299     MemOp mop = get_memop(oi);
1300     int a_bits = get_alignment_bits(mop);
1301     int s_bits = mop & MO_SIZE;
1302     void *hostaddr;
1303 
1304     /* Adjust the given return address.  */
1305     retaddr -= GETPC_ADJ;
1306 
1307     /* Enforce guest required alignment.  */
1308     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1309         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1310         cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1311                              mmu_idx, retaddr);
1312     }
1313 
1314     /* Enforce qemu required alignment.  */
1315     if (unlikely(addr & ((1 << s_bits) - 1))) {
1316         /* We get here if guest alignment was not requested,
1317            or was not enforced by cpu_unaligned_access above.
1318            We might widen the access and emulate, but for now
1319            mark an exception and exit the cpu loop.  */
1320         goto stop_the_world;
1321     }
1322 
1323     /* Check TLB entry and enforce page permissions.  */
1324     if (!tlb_hit(tlb_addr, addr)) {
1325         if (!VICTIM_TLB_HIT(addr_write, addr)) {
1326             tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
1327                      mmu_idx, retaddr);
1328             index = tlb_index(env, mmu_idx, addr);
1329             tlbe = tlb_entry(env, mmu_idx, addr);
1330         }
1331         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1332     }
1333 
1334     /* Notice an IO access or a needs-MMU-lookup access */
1335     if (unlikely(tlb_addr & TLB_MMIO)) {
1336         /* There's really nothing that can be done to
1337            support this apart from stop-the-world.  */
1338         goto stop_the_world;
1339     }
1340 
1341     /* Let the guest notice RMW on a write-only page.  */
1342     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1343         tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1344                  mmu_idx, retaddr);
1345         /* Since we don't support reads and writes to different addresses,
1346            and we do have the proper page loaded for write, this shouldn't
1347            ever return.  But just in case, handle via stop-the-world.  */
1348         goto stop_the_world;
1349     }
1350 
1351     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1352 
1353     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1354         notdirty_write(env_cpu(env), addr, 1 << s_bits,
1355                        &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
1356     }
1357 
1358     return hostaddr;
1359 
1360  stop_the_world:
1361     cpu_loop_exit_atomic(env_cpu(env), retaddr);
1362 }
1363 
1364 /*
1365  * Load Helpers
1366  *
1367  * We support two different access types. SOFTMMU_CODE_ACCESS is
1368  * specifically for reading instructions from system memory. It is
1369  * called by the translation loop and in some helpers where the code
1370  * is disassembled. It shouldn't be called directly by guest code.
1371  */
1372 
1373 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1374                                 TCGMemOpIdx oi, uintptr_t retaddr);
1375 
1376 static inline uint64_t QEMU_ALWAYS_INLINE
1377 load_memop(const void *haddr, MemOp op)
1378 {
1379     switch (op) {
1380     case MO_UB:
1381         return ldub_p(haddr);
1382     case MO_BEUW:
1383         return lduw_be_p(haddr);
1384     case MO_LEUW:
1385         return lduw_le_p(haddr);
1386     case MO_BEUL:
1387         return (uint32_t)ldl_be_p(haddr);
1388     case MO_LEUL:
1389         return (uint32_t)ldl_le_p(haddr);
1390     case MO_BEQ:
1391         return ldq_be_p(haddr);
1392     case MO_LEQ:
1393         return ldq_le_p(haddr);
1394     default:
1395         qemu_build_not_reached();
1396     }
1397 }
1398 
1399 static inline uint64_t QEMU_ALWAYS_INLINE
1400 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1401             uintptr_t retaddr, MemOp op, bool code_read,
1402             FullLoadHelper *full_load)
1403 {
1404     uintptr_t mmu_idx = get_mmuidx(oi);
1405     uintptr_t index = tlb_index(env, mmu_idx, addr);
1406     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1407     target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1408     const size_t tlb_off = code_read ?
1409         offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1410     const MMUAccessType access_type =
1411         code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1412     unsigned a_bits = get_alignment_bits(get_memop(oi));
1413     void *haddr;
1414     uint64_t res;
1415     size_t size = memop_size(op);
1416 
1417     /* Handle CPU specific unaligned behaviour */
1418     if (addr & ((1 << a_bits) - 1)) {
1419         cpu_unaligned_access(env_cpu(env), addr, access_type,
1420                              mmu_idx, retaddr);
1421     }
1422 
1423     /* If the TLB entry is for a different page, reload and try again.  */
1424     if (!tlb_hit(tlb_addr, addr)) {
1425         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1426                             addr & TARGET_PAGE_MASK)) {
1427             tlb_fill(env_cpu(env), addr, size,
1428                      access_type, mmu_idx, retaddr);
1429             index = tlb_index(env, mmu_idx, addr);
1430             entry = tlb_entry(env, mmu_idx, addr);
1431         }
1432         tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1433         tlb_addr &= ~TLB_INVALID_MASK;
1434     }
1435 
1436     /* Handle anything that isn't just a straight memory access.  */
1437     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1438         CPUIOTLBEntry *iotlbentry;
1439         bool need_swap;
1440 
1441         /* For anything that is unaligned, recurse through full_load.  */
1442         if ((addr & (size - 1)) != 0) {
1443             goto do_unaligned_access;
1444         }
1445 
1446         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1447 
1448         /* Handle watchpoints.  */
1449         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1450             /* On watchpoint hit, this will longjmp out.  */
1451             cpu_check_watchpoint(env_cpu(env), addr, size,
1452                                  iotlbentry->attrs, BP_MEM_READ, retaddr);
1453         }
1454 
1455         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1456 
1457         /* Handle I/O access.  */
1458         if (likely(tlb_addr & TLB_MMIO)) {
1459             return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
1460                             access_type, op ^ (need_swap * MO_BSWAP));
1461         }
1462 
1463         haddr = (void *)((uintptr_t)addr + entry->addend);
1464 
1465         /*
1466          * Keep these two load_memop separate to ensure that the compiler
1467          * is able to fold the entire function to a single instruction.
1468          * There is a build-time assert inside to remind you of this.  ;-)
1469          */
1470         if (unlikely(need_swap)) {
1471             return load_memop(haddr, op ^ MO_BSWAP);
1472         }
1473         return load_memop(haddr, op);
1474     }
1475 
1476     /* Handle slow unaligned access (it spans two pages or IO).  */
1477     if (size > 1
1478         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1479                     >= TARGET_PAGE_SIZE)) {
1480         target_ulong addr1, addr2;
1481         uint64_t r1, r2;
1482         unsigned shift;
1483     do_unaligned_access:
1484         addr1 = addr & ~((target_ulong)size - 1);
1485         addr2 = addr1 + size;
1486         r1 = full_load(env, addr1, oi, retaddr);
1487         r2 = full_load(env, addr2, oi, retaddr);
1488         shift = (addr & (size - 1)) * 8;
1489 
1490         if (memop_big_endian(op)) {
1491             /* Big-endian combine.  */
1492             res = (r1 << shift) | (r2 >> ((size * 8) - shift));
1493         } else {
1494             /* Little-endian combine.  */
1495             res = (r1 >> shift) | (r2 << ((size * 8) - shift));
1496         }
1497         return res & MAKE_64BIT_MASK(0, size * 8);
1498     }
1499 
1500     haddr = (void *)((uintptr_t)addr + entry->addend);
1501     return load_memop(haddr, op);
1502 }
1503 
1504 /*
1505  * For the benefit of TCG generated code, we want to avoid the
1506  * complication of ABI-specific return type promotion and always
1507  * return a value extended to the register size of the host. This is
1508  * tcg_target_long, except in the case of a 32-bit host and 64-bit
1509  * data, and for that we always have uint64_t.
1510  *
1511  * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1512  */
1513 
1514 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1515                               TCGMemOpIdx oi, uintptr_t retaddr)
1516 {
1517     return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1518 }
1519 
1520 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1521                                      TCGMemOpIdx oi, uintptr_t retaddr)
1522 {
1523     return full_ldub_mmu(env, addr, oi, retaddr);
1524 }
1525 
1526 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1527                                  TCGMemOpIdx oi, uintptr_t retaddr)
1528 {
1529     return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1530                        full_le_lduw_mmu);
1531 }
1532 
1533 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1534                                     TCGMemOpIdx oi, uintptr_t retaddr)
1535 {
1536     return full_le_lduw_mmu(env, addr, oi, retaddr);
1537 }
1538 
1539 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1540                                  TCGMemOpIdx oi, uintptr_t retaddr)
1541 {
1542     return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
1543                        full_be_lduw_mmu);
1544 }
1545 
1546 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1547                                     TCGMemOpIdx oi, uintptr_t retaddr)
1548 {
1549     return full_be_lduw_mmu(env, addr, oi, retaddr);
1550 }
1551 
1552 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1553                                  TCGMemOpIdx oi, uintptr_t retaddr)
1554 {
1555     return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
1556                        full_le_ldul_mmu);
1557 }
1558 
1559 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1560                                     TCGMemOpIdx oi, uintptr_t retaddr)
1561 {
1562     return full_le_ldul_mmu(env, addr, oi, retaddr);
1563 }
1564 
1565 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1566                                  TCGMemOpIdx oi, uintptr_t retaddr)
1567 {
1568     return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
1569                        full_be_ldul_mmu);
1570 }
1571 
1572 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1573                                     TCGMemOpIdx oi, uintptr_t retaddr)
1574 {
1575     return full_be_ldul_mmu(env, addr, oi, retaddr);
1576 }
1577 
1578 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
1579                            TCGMemOpIdx oi, uintptr_t retaddr)
1580 {
1581     return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
1582                        helper_le_ldq_mmu);
1583 }
1584 
1585 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
1586                            TCGMemOpIdx oi, uintptr_t retaddr)
1587 {
1588     return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
1589                        helper_be_ldq_mmu);
1590 }
1591 
1592 /*
1593  * Provide signed versions of the load routines as well.  We can of course
1594  * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
1595  */
1596 
1597 
1598 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
1599                                      TCGMemOpIdx oi, uintptr_t retaddr)
1600 {
1601     return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
1602 }
1603 
1604 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
1605                                     TCGMemOpIdx oi, uintptr_t retaddr)
1606 {
1607     return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
1608 }
1609 
1610 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
1611                                     TCGMemOpIdx oi, uintptr_t retaddr)
1612 {
1613     return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
1614 }
1615 
1616 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
1617                                     TCGMemOpIdx oi, uintptr_t retaddr)
1618 {
1619     return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
1620 }
1621 
1622 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
1623                                     TCGMemOpIdx oi, uintptr_t retaddr)
1624 {
1625     return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
1626 }
1627 
1628 /*
1629  * Store Helpers
1630  */
1631 
1632 static inline void QEMU_ALWAYS_INLINE
1633 store_memop(void *haddr, uint64_t val, MemOp op)
1634 {
1635     switch (op) {
1636     case MO_UB:
1637         stb_p(haddr, val);
1638         break;
1639     case MO_BEUW:
1640         stw_be_p(haddr, val);
1641         break;
1642     case MO_LEUW:
1643         stw_le_p(haddr, val);
1644         break;
1645     case MO_BEUL:
1646         stl_be_p(haddr, val);
1647         break;
1648     case MO_LEUL:
1649         stl_le_p(haddr, val);
1650         break;
1651     case MO_BEQ:
1652         stq_be_p(haddr, val);
1653         break;
1654     case MO_LEQ:
1655         stq_le_p(haddr, val);
1656         break;
1657     default:
1658         qemu_build_not_reached();
1659     }
1660 }
1661 
1662 static inline void QEMU_ALWAYS_INLINE
1663 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
1664              TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
1665 {
1666     uintptr_t mmu_idx = get_mmuidx(oi);
1667     uintptr_t index = tlb_index(env, mmu_idx, addr);
1668     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1669     target_ulong tlb_addr = tlb_addr_write(entry);
1670     const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
1671     unsigned a_bits = get_alignment_bits(get_memop(oi));
1672     void *haddr;
1673     size_t size = memop_size(op);
1674 
1675     /* Handle CPU specific unaligned behaviour */
1676     if (addr & ((1 << a_bits) - 1)) {
1677         cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1678                              mmu_idx, retaddr);
1679     }
1680 
1681     /* If the TLB entry is for a different page, reload and try again.  */
1682     if (!tlb_hit(tlb_addr, addr)) {
1683         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1684             addr & TARGET_PAGE_MASK)) {
1685             tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
1686                      mmu_idx, retaddr);
1687             index = tlb_index(env, mmu_idx, addr);
1688             entry = tlb_entry(env, mmu_idx, addr);
1689         }
1690         tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
1691     }
1692 
1693     /* Handle anything that isn't just a straight memory access.  */
1694     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1695         CPUIOTLBEntry *iotlbentry;
1696         bool need_swap;
1697 
1698         /* For anything that is unaligned, recurse through byte stores.  */
1699         if ((addr & (size - 1)) != 0) {
1700             goto do_unaligned_access;
1701         }
1702 
1703         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1704 
1705         /* Handle watchpoints.  */
1706         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1707             /* On watchpoint hit, this will longjmp out.  */
1708             cpu_check_watchpoint(env_cpu(env), addr, size,
1709                                  iotlbentry->attrs, BP_MEM_WRITE, retaddr);
1710         }
1711 
1712         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1713 
1714         /* Handle I/O access.  */
1715         if (tlb_addr & TLB_MMIO) {
1716             io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
1717                       op ^ (need_swap * MO_BSWAP));
1718             return;
1719         }
1720 
1721         /* Ignore writes to ROM.  */
1722         if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
1723             return;
1724         }
1725 
1726         /* Handle clean RAM pages.  */
1727         if (tlb_addr & TLB_NOTDIRTY) {
1728             notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
1729         }
1730 
1731         haddr = (void *)((uintptr_t)addr + entry->addend);
1732 
1733         /*
1734          * Keep these two store_memop separate to ensure that the compiler
1735          * is able to fold the entire function to a single instruction.
1736          * There is a build-time assert inside to remind you of this.  ;-)
1737          */
1738         if (unlikely(need_swap)) {
1739             store_memop(haddr, val, op ^ MO_BSWAP);
1740         } else {
1741             store_memop(haddr, val, op);
1742         }
1743         return;
1744     }
1745 
1746     /* Handle slow unaligned access (it spans two pages or IO).  */
1747     if (size > 1
1748         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1749                      >= TARGET_PAGE_SIZE)) {
1750         int i;
1751         uintptr_t index2;
1752         CPUTLBEntry *entry2;
1753         target_ulong page2, tlb_addr2;
1754         size_t size2;
1755 
1756     do_unaligned_access:
1757         /*
1758          * Ensure the second page is in the TLB.  Note that the first page
1759          * is already guaranteed to be filled, and that the second page
1760          * cannot evict the first.
1761          */
1762         page2 = (addr + size) & TARGET_PAGE_MASK;
1763         size2 = (addr + size) & ~TARGET_PAGE_MASK;
1764         index2 = tlb_index(env, mmu_idx, page2);
1765         entry2 = tlb_entry(env, mmu_idx, page2);
1766         tlb_addr2 = tlb_addr_write(entry2);
1767         if (!tlb_hit_page(tlb_addr2, page2)) {
1768             if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
1769                 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
1770                          mmu_idx, retaddr);
1771                 index2 = tlb_index(env, mmu_idx, page2);
1772                 entry2 = tlb_entry(env, mmu_idx, page2);
1773             }
1774             tlb_addr2 = tlb_addr_write(entry2);
1775         }
1776 
1777         /*
1778          * Handle watchpoints.  Since this may trap, all checks
1779          * must happen before any store.
1780          */
1781         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1782             cpu_check_watchpoint(env_cpu(env), addr, size - size2,
1783                                  env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
1784                                  BP_MEM_WRITE, retaddr);
1785         }
1786         if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
1787             cpu_check_watchpoint(env_cpu(env), page2, size2,
1788                                  env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
1789                                  BP_MEM_WRITE, retaddr);
1790         }
1791 
1792         /*
1793          * XXX: not efficient, but simple.
1794          * This loop must go in the forward direction to avoid issues
1795          * with self-modifying code in Windows 64-bit.
1796          */
1797         for (i = 0; i < size; ++i) {
1798             uint8_t val8;
1799             if (memop_big_endian(op)) {
1800                 /* Big-endian extract.  */
1801                 val8 = val >> (((size - 1) * 8) - (i * 8));
1802             } else {
1803                 /* Little-endian extract.  */
1804                 val8 = val >> (i * 8);
1805             }
1806             helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
1807         }
1808         return;
1809     }
1810 
1811     haddr = (void *)((uintptr_t)addr + entry->addend);
1812     store_memop(haddr, val, op);
1813 }
1814 
1815 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
1816                         TCGMemOpIdx oi, uintptr_t retaddr)
1817 {
1818     store_helper(env, addr, val, oi, retaddr, MO_UB);
1819 }
1820 
1821 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
1822                        TCGMemOpIdx oi, uintptr_t retaddr)
1823 {
1824     store_helper(env, addr, val, oi, retaddr, MO_LEUW);
1825 }
1826 
1827 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
1828                        TCGMemOpIdx oi, uintptr_t retaddr)
1829 {
1830     store_helper(env, addr, val, oi, retaddr, MO_BEUW);
1831 }
1832 
1833 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
1834                        TCGMemOpIdx oi, uintptr_t retaddr)
1835 {
1836     store_helper(env, addr, val, oi, retaddr, MO_LEUL);
1837 }
1838 
1839 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
1840                        TCGMemOpIdx oi, uintptr_t retaddr)
1841 {
1842     store_helper(env, addr, val, oi, retaddr, MO_BEUL);
1843 }
1844 
1845 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
1846                        TCGMemOpIdx oi, uintptr_t retaddr)
1847 {
1848     store_helper(env, addr, val, oi, retaddr, MO_LEQ);
1849 }
1850 
1851 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
1852                        TCGMemOpIdx oi, uintptr_t retaddr)
1853 {
1854     store_helper(env, addr, val, oi, retaddr, MO_BEQ);
1855 }
1856 
1857 /* First set of helpers allows passing in of OI and RETADDR.  This makes
1858    them callable from other helpers.  */
1859 
1860 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
1861 #define ATOMIC_NAME(X) \
1862     HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
1863 #define ATOMIC_MMU_DECLS
1864 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
1865 #define ATOMIC_MMU_CLEANUP
1866 #define ATOMIC_MMU_IDX   get_mmuidx(oi)
1867 
1868 #include "atomic_common.inc.c"
1869 
1870 #define DATA_SIZE 1
1871 #include "atomic_template.h"
1872 
1873 #define DATA_SIZE 2
1874 #include "atomic_template.h"
1875 
1876 #define DATA_SIZE 4
1877 #include "atomic_template.h"
1878 
1879 #ifdef CONFIG_ATOMIC64
1880 #define DATA_SIZE 8
1881 #include "atomic_template.h"
1882 #endif
1883 
1884 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
1885 #define DATA_SIZE 16
1886 #include "atomic_template.h"
1887 #endif
1888 
1889 /* Second set of helpers are directly callable from TCG as helpers.  */
1890 
1891 #undef EXTRA_ARGS
1892 #undef ATOMIC_NAME
1893 #undef ATOMIC_MMU_LOOKUP
1894 #define EXTRA_ARGS         , TCGMemOpIdx oi
1895 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
1896 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC())
1897 
1898 #define DATA_SIZE 1
1899 #include "atomic_template.h"
1900 
1901 #define DATA_SIZE 2
1902 #include "atomic_template.h"
1903 
1904 #define DATA_SIZE 4
1905 #include "atomic_template.h"
1906 
1907 #ifdef CONFIG_ATOMIC64
1908 #define DATA_SIZE 8
1909 #include "atomic_template.h"
1910 #endif
1911 #undef ATOMIC_MMU_IDX
1912 
1913 /* Code access functions.  */
1914 
1915 static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr,
1916                                TCGMemOpIdx oi, uintptr_t retaddr)
1917 {
1918     return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_cmmu);
1919 }
1920 
1921 uint8_t helper_ret_ldub_cmmu(CPUArchState *env, target_ulong addr,
1922                             TCGMemOpIdx oi, uintptr_t retaddr)
1923 {
1924     return full_ldub_cmmu(env, addr, oi, retaddr);
1925 }
1926 
1927 int8_t helper_ret_ldsb_cmmu(CPUArchState *env, target_ulong addr,
1928                             TCGMemOpIdx oi, uintptr_t retaddr)
1929 {
1930     return (int8_t) full_ldub_cmmu(env, addr, oi, retaddr);
1931 }
1932 
1933 static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr,
1934                                   TCGMemOpIdx oi, uintptr_t retaddr)
1935 {
1936     return load_helper(env, addr, oi, retaddr, MO_LEUW, true,
1937                        full_le_lduw_cmmu);
1938 }
1939 
1940 uint16_t helper_le_lduw_cmmu(CPUArchState *env, target_ulong addr,
1941                             TCGMemOpIdx oi, uintptr_t retaddr)
1942 {
1943     return full_le_lduw_cmmu(env, addr, oi, retaddr);
1944 }
1945 
1946 int16_t helper_le_ldsw_cmmu(CPUArchState *env, target_ulong addr,
1947                             TCGMemOpIdx oi, uintptr_t retaddr)
1948 {
1949     return (int16_t) full_le_lduw_cmmu(env, addr, oi, retaddr);
1950 }
1951 
1952 static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr,
1953                                   TCGMemOpIdx oi, uintptr_t retaddr)
1954 {
1955     return load_helper(env, addr, oi, retaddr, MO_BEUW, true,
1956                        full_be_lduw_cmmu);
1957 }
1958 
1959 uint16_t helper_be_lduw_cmmu(CPUArchState *env, target_ulong addr,
1960                             TCGMemOpIdx oi, uintptr_t retaddr)
1961 {
1962     return full_be_lduw_cmmu(env, addr, oi, retaddr);
1963 }
1964 
1965 int16_t helper_be_ldsw_cmmu(CPUArchState *env, target_ulong addr,
1966                             TCGMemOpIdx oi, uintptr_t retaddr)
1967 {
1968     return (int16_t) full_be_lduw_cmmu(env, addr, oi, retaddr);
1969 }
1970 
1971 static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr,
1972                                   TCGMemOpIdx oi, uintptr_t retaddr)
1973 {
1974     return load_helper(env, addr, oi, retaddr, MO_LEUL, true,
1975                        full_le_ldul_cmmu);
1976 }
1977 
1978 uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr,
1979                             TCGMemOpIdx oi, uintptr_t retaddr)
1980 {
1981     return full_le_ldul_cmmu(env, addr, oi, retaddr);
1982 }
1983 
1984 static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr,
1985                                   TCGMemOpIdx oi, uintptr_t retaddr)
1986 {
1987     return load_helper(env, addr, oi, retaddr, MO_BEUL, true,
1988                        full_be_ldul_cmmu);
1989 }
1990 
1991 uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr,
1992                             TCGMemOpIdx oi, uintptr_t retaddr)
1993 {
1994     return full_be_ldul_cmmu(env, addr, oi, retaddr);
1995 }
1996 
1997 uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr,
1998                             TCGMemOpIdx oi, uintptr_t retaddr)
1999 {
2000     return load_helper(env, addr, oi, retaddr, MO_LEQ, true,
2001                        helper_le_ldq_cmmu);
2002 }
2003 
2004 uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr,
2005                             TCGMemOpIdx oi, uintptr_t retaddr)
2006 {
2007     return load_helper(env, addr, oi, retaddr, MO_BEQ, true,
2008                        helper_be_ldq_cmmu);
2009 }
2010