xref: /openbmc/qemu/accel/tcg/cputlb.c (revision 671872b6731ed746f025566e3ef2bc8d5ec1a779)
1 /*
2  *  Common CPU TLB handling
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 
37 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
38 /* #define DEBUG_TLB */
39 /* #define DEBUG_TLB_LOG */
40 
41 #ifdef DEBUG_TLB
42 # define DEBUG_TLB_GATE 1
43 # ifdef DEBUG_TLB_LOG
44 #  define DEBUG_TLB_LOG_GATE 1
45 # else
46 #  define DEBUG_TLB_LOG_GATE 0
47 # endif
48 #else
49 # define DEBUG_TLB_GATE 0
50 # define DEBUG_TLB_LOG_GATE 0
51 #endif
52 
53 #define tlb_debug(fmt, ...) do { \
54     if (DEBUG_TLB_LOG_GATE) { \
55         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
56                       ## __VA_ARGS__); \
57     } else if (DEBUG_TLB_GATE) { \
58         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
59     } \
60 } while (0)
61 
62 #define assert_cpu_is_self(cpu) do {                              \
63         if (DEBUG_TLB_GATE) {                                     \
64             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
65         }                                                         \
66     } while (0)
67 
68 /* run_on_cpu_data.target_ptr should always be big enough for a
69  * target_ulong even on 32 bit builds */
70 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
71 
72 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
73  */
74 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
75 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
76 
77 void tlb_init(CPUState *cpu)
78 {
79     CPUArchState *env = cpu->env_ptr;
80 
81     qemu_spin_init(&env->tlb_c.lock);
82 
83     /* Ensure that cpu_reset performs a full flush.  */
84     env->tlb_c.dirty = ALL_MMUIDX_BITS;
85 }
86 
87 /* flush_all_helper: run fn across all cpus
88  *
89  * If the wait flag is set then the src cpu's helper will be queued as
90  * "safe" work and the loop exited creating a synchronisation point
91  * where all queued work will be finished before execution starts
92  * again.
93  */
94 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
95                              run_on_cpu_data d)
96 {
97     CPUState *cpu;
98 
99     CPU_FOREACH(cpu) {
100         if (cpu != src) {
101             async_run_on_cpu(cpu, fn, d);
102         }
103     }
104 }
105 
106 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
107 {
108     CPUState *cpu;
109     size_t full = 0, part = 0, elide = 0;
110 
111     CPU_FOREACH(cpu) {
112         CPUArchState *env = cpu->env_ptr;
113 
114         full += atomic_read(&env->tlb_c.full_flush_count);
115         part += atomic_read(&env->tlb_c.part_flush_count);
116         elide += atomic_read(&env->tlb_c.elide_flush_count);
117     }
118     *pfull = full;
119     *ppart = part;
120     *pelide = elide;
121 }
122 
123 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
124 {
125     memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
126     memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
127     env->tlb_d[mmu_idx].large_page_addr = -1;
128     env->tlb_d[mmu_idx].large_page_mask = -1;
129     env->tlb_d[mmu_idx].vindex = 0;
130 }
131 
132 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
133 {
134     CPUArchState *env = cpu->env_ptr;
135     uint16_t asked = data.host_int;
136     uint16_t all_dirty, work, to_clean;
137 
138     assert_cpu_is_self(cpu);
139 
140     tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
141 
142     qemu_spin_lock(&env->tlb_c.lock);
143 
144     all_dirty = env->tlb_c.dirty;
145     to_clean = asked & all_dirty;
146     all_dirty &= ~to_clean;
147     env->tlb_c.dirty = all_dirty;
148 
149     for (work = to_clean; work != 0; work &= work - 1) {
150         int mmu_idx = ctz32(work);
151         tlb_flush_one_mmuidx_locked(env, mmu_idx);
152     }
153 
154     qemu_spin_unlock(&env->tlb_c.lock);
155 
156     cpu_tb_jmp_cache_clear(cpu);
157 
158     if (to_clean == ALL_MMUIDX_BITS) {
159         atomic_set(&env->tlb_c.full_flush_count,
160                    env->tlb_c.full_flush_count + 1);
161     } else {
162         atomic_set(&env->tlb_c.part_flush_count,
163                    env->tlb_c.part_flush_count + ctpop16(to_clean));
164         if (to_clean != asked) {
165             atomic_set(&env->tlb_c.elide_flush_count,
166                        env->tlb_c.elide_flush_count +
167                        ctpop16(asked & ~to_clean));
168         }
169     }
170 }
171 
172 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
173 {
174     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
175 
176     if (cpu->created && !qemu_cpu_is_self(cpu)) {
177         async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
178                          RUN_ON_CPU_HOST_INT(idxmap));
179     } else {
180         tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
181     }
182 }
183 
184 void tlb_flush(CPUState *cpu)
185 {
186     tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
187 }
188 
189 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
190 {
191     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
192 
193     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
194 
195     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
196     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
197 }
198 
199 void tlb_flush_all_cpus(CPUState *src_cpu)
200 {
201     tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
202 }
203 
204 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
205 {
206     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
207 
208     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
209 
210     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
211     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
212 }
213 
214 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
215 {
216     tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
217 }
218 
219 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
220                                         target_ulong page)
221 {
222     return tlb_hit_page(tlb_entry->addr_read, page) ||
223            tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
224            tlb_hit_page(tlb_entry->addr_code, page);
225 }
226 
227 /* Called with tlb_c.lock held */
228 static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
229                                           target_ulong page)
230 {
231     if (tlb_hit_page_anyprot(tlb_entry, page)) {
232         memset(tlb_entry, -1, sizeof(*tlb_entry));
233     }
234 }
235 
236 /* Called with tlb_c.lock held */
237 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
238                                               target_ulong page)
239 {
240     int k;
241 
242     assert_cpu_is_self(ENV_GET_CPU(env));
243     for (k = 0; k < CPU_VTLB_SIZE; k++) {
244         tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page);
245     }
246 }
247 
248 static void tlb_flush_page_locked(CPUArchState *env, int midx,
249                                   target_ulong page)
250 {
251     target_ulong lp_addr = env->tlb_d[midx].large_page_addr;
252     target_ulong lp_mask = env->tlb_d[midx].large_page_mask;
253 
254     /* Check if we need to flush due to large pages.  */
255     if ((page & lp_mask) == lp_addr) {
256         tlb_debug("forcing full flush midx %d ("
257                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
258                   midx, lp_addr, lp_mask);
259         tlb_flush_one_mmuidx_locked(env, midx);
260     } else {
261         tlb_flush_entry_locked(tlb_entry(env, midx, page), page);
262         tlb_flush_vtlb_page_locked(env, midx, page);
263     }
264 }
265 
266 /* As we are going to hijack the bottom bits of the page address for a
267  * mmuidx bit mask we need to fail to build if we can't do that
268  */
269 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
270 
271 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
272                                                 run_on_cpu_data data)
273 {
274     CPUArchState *env = cpu->env_ptr;
275     target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
276     target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
277     unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
278     int mmu_idx;
279 
280     assert_cpu_is_self(cpu);
281 
282     tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
283               addr, mmu_idx_bitmap);
284 
285     qemu_spin_lock(&env->tlb_c.lock);
286     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
287         if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
288             tlb_flush_page_locked(env, mmu_idx, addr);
289         }
290     }
291     qemu_spin_unlock(&env->tlb_c.lock);
292 
293     tb_flush_jmp_cache(cpu, addr);
294 }
295 
296 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
297 {
298     target_ulong addr_and_mmu_idx;
299 
300     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
301 
302     /* This should already be page aligned */
303     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
304     addr_and_mmu_idx |= idxmap;
305 
306     if (!qemu_cpu_is_self(cpu)) {
307         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
308                          RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
309     } else {
310         tlb_flush_page_by_mmuidx_async_work(
311             cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
312     }
313 }
314 
315 void tlb_flush_page(CPUState *cpu, target_ulong addr)
316 {
317     tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
318 }
319 
320 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
321                                        uint16_t idxmap)
322 {
323     const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
324     target_ulong addr_and_mmu_idx;
325 
326     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
327 
328     /* This should already be page aligned */
329     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
330     addr_and_mmu_idx |= idxmap;
331 
332     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
333     fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
334 }
335 
336 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
337 {
338     tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
339 }
340 
341 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
342                                               target_ulong addr,
343                                               uint16_t idxmap)
344 {
345     const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
346     target_ulong addr_and_mmu_idx;
347 
348     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
349 
350     /* This should already be page aligned */
351     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
352     addr_and_mmu_idx |= idxmap;
353 
354     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
355     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
356 }
357 
358 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
359 {
360     tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
361 }
362 
363 /* update the TLBs so that writes to code in the virtual page 'addr'
364    can be detected */
365 void tlb_protect_code(ram_addr_t ram_addr)
366 {
367     cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
368                                              DIRTY_MEMORY_CODE);
369 }
370 
371 /* update the TLB so that writes in physical page 'phys_addr' are no longer
372    tested for self modifying code */
373 void tlb_unprotect_code(ram_addr_t ram_addr)
374 {
375     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
376 }
377 
378 
379 /*
380  * Dirty write flag handling
381  *
382  * When the TCG code writes to a location it looks up the address in
383  * the TLB and uses that data to compute the final address. If any of
384  * the lower bits of the address are set then the slow path is forced.
385  * There are a number of reasons to do this but for normal RAM the
386  * most usual is detecting writes to code regions which may invalidate
387  * generated code.
388  *
389  * Other vCPUs might be reading their TLBs during guest execution, so we update
390  * te->addr_write with atomic_set. We don't need to worry about this for
391  * oversized guests as MTTCG is disabled for them.
392  *
393  * Called with tlb_c.lock held.
394  */
395 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
396                                          uintptr_t start, uintptr_t length)
397 {
398     uintptr_t addr = tlb_entry->addr_write;
399 
400     if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
401         addr &= TARGET_PAGE_MASK;
402         addr += tlb_entry->addend;
403         if ((addr - start) < length) {
404 #if TCG_OVERSIZED_GUEST
405             tlb_entry->addr_write |= TLB_NOTDIRTY;
406 #else
407             atomic_set(&tlb_entry->addr_write,
408                        tlb_entry->addr_write | TLB_NOTDIRTY);
409 #endif
410         }
411     }
412 }
413 
414 /*
415  * Called with tlb_c.lock held.
416  * Called only from the vCPU context, i.e. the TLB's owner thread.
417  */
418 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
419 {
420     *d = *s;
421 }
422 
423 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
424  * the target vCPU).
425  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
426  * thing actually updated is the target TLB entry ->addr_write flags.
427  */
428 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
429 {
430     CPUArchState *env;
431 
432     int mmu_idx;
433 
434     env = cpu->env_ptr;
435     qemu_spin_lock(&env->tlb_c.lock);
436     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
437         unsigned int i;
438 
439         for (i = 0; i < CPU_TLB_SIZE; i++) {
440             tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1,
441                                          length);
442         }
443 
444         for (i = 0; i < CPU_VTLB_SIZE; i++) {
445             tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1,
446                                          length);
447         }
448     }
449     qemu_spin_unlock(&env->tlb_c.lock);
450 }
451 
452 /* Called with tlb_c.lock held */
453 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
454                                          target_ulong vaddr)
455 {
456     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
457         tlb_entry->addr_write = vaddr;
458     }
459 }
460 
461 /* update the TLB corresponding to virtual page vaddr
462    so that it is no longer dirty */
463 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
464 {
465     CPUArchState *env = cpu->env_ptr;
466     int mmu_idx;
467 
468     assert_cpu_is_self(cpu);
469 
470     vaddr &= TARGET_PAGE_MASK;
471     qemu_spin_lock(&env->tlb_c.lock);
472     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
473         tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
474     }
475 
476     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
477         int k;
478         for (k = 0; k < CPU_VTLB_SIZE; k++) {
479             tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr);
480         }
481     }
482     qemu_spin_unlock(&env->tlb_c.lock);
483 }
484 
485 /* Our TLB does not support large pages, so remember the area covered by
486    large pages and trigger a full TLB flush if these are invalidated.  */
487 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
488                                target_ulong vaddr, target_ulong size)
489 {
490     target_ulong lp_addr = env->tlb_d[mmu_idx].large_page_addr;
491     target_ulong lp_mask = ~(size - 1);
492 
493     if (lp_addr == (target_ulong)-1) {
494         /* No previous large page.  */
495         lp_addr = vaddr;
496     } else {
497         /* Extend the existing region to include the new page.
498            This is a compromise between unnecessary flushes and
499            the cost of maintaining a full variable size TLB.  */
500         lp_mask &= env->tlb_d[mmu_idx].large_page_mask;
501         while (((lp_addr ^ vaddr) & lp_mask) != 0) {
502             lp_mask <<= 1;
503         }
504     }
505     env->tlb_d[mmu_idx].large_page_addr = lp_addr & lp_mask;
506     env->tlb_d[mmu_idx].large_page_mask = lp_mask;
507 }
508 
509 /* Add a new TLB entry. At most one entry for a given virtual address
510  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
511  * supplied size is only used by tlb_flush_page.
512  *
513  * Called from TCG-generated code, which is under an RCU read-side
514  * critical section.
515  */
516 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
517                              hwaddr paddr, MemTxAttrs attrs, int prot,
518                              int mmu_idx, target_ulong size)
519 {
520     CPUArchState *env = cpu->env_ptr;
521     MemoryRegionSection *section;
522     unsigned int index;
523     target_ulong address;
524     target_ulong code_address;
525     uintptr_t addend;
526     CPUTLBEntry *te, tn;
527     hwaddr iotlb, xlat, sz, paddr_page;
528     target_ulong vaddr_page;
529     int asidx = cpu_asidx_from_attrs(cpu, attrs);
530 
531     assert_cpu_is_self(cpu);
532 
533     if (size <= TARGET_PAGE_SIZE) {
534         sz = TARGET_PAGE_SIZE;
535     } else {
536         tlb_add_large_page(env, mmu_idx, vaddr, size);
537         sz = size;
538     }
539     vaddr_page = vaddr & TARGET_PAGE_MASK;
540     paddr_page = paddr & TARGET_PAGE_MASK;
541 
542     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
543                                                 &xlat, &sz, attrs, &prot);
544     assert(sz >= TARGET_PAGE_SIZE);
545 
546     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
547               " prot=%x idx=%d\n",
548               vaddr, paddr, prot, mmu_idx);
549 
550     address = vaddr_page;
551     if (size < TARGET_PAGE_SIZE) {
552         /*
553          * Slow-path the TLB entries; we will repeat the MMU check and TLB
554          * fill on every access.
555          */
556         address |= TLB_RECHECK;
557     }
558     if (!memory_region_is_ram(section->mr) &&
559         !memory_region_is_romd(section->mr)) {
560         /* IO memory case */
561         address |= TLB_MMIO;
562         addend = 0;
563     } else {
564         /* TLB_MMIO for rom/romd handled below */
565         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
566     }
567 
568     code_address = address;
569     iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
570                                             paddr_page, xlat, prot, &address);
571 
572     index = tlb_index(env, mmu_idx, vaddr_page);
573     te = tlb_entry(env, mmu_idx, vaddr_page);
574 
575     /*
576      * Hold the TLB lock for the rest of the function. We could acquire/release
577      * the lock several times in the function, but it is faster to amortize the
578      * acquisition cost by acquiring it just once. Note that this leads to
579      * a longer critical section, but this is not a concern since the TLB lock
580      * is unlikely to be contended.
581      */
582     qemu_spin_lock(&env->tlb_c.lock);
583 
584     /* Note that the tlb is no longer clean.  */
585     env->tlb_c.dirty |= 1 << mmu_idx;
586 
587     /* Make sure there's no cached translation for the new page.  */
588     tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
589 
590     /*
591      * Only evict the old entry to the victim tlb if it's for a
592      * different page; otherwise just overwrite the stale data.
593      */
594     if (!tlb_hit_page_anyprot(te, vaddr_page)) {
595         unsigned vidx = env->tlb_d[mmu_idx].vindex++ % CPU_VTLB_SIZE;
596         CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx];
597 
598         /* Evict the old entry into the victim tlb.  */
599         copy_tlb_helper_locked(tv, te);
600         env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
601     }
602 
603     /* refill the tlb */
604     /*
605      * At this point iotlb contains a physical section number in the lower
606      * TARGET_PAGE_BITS, and either
607      *  + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)
608      *  + the offset within section->mr of the page base (otherwise)
609      * We subtract the vaddr_page (which is page aligned and thus won't
610      * disturb the low bits) to give an offset which can be added to the
611      * (non-page-aligned) vaddr of the eventual memory access to get
612      * the MemoryRegion offset for the access. Note that the vaddr we
613      * subtract here is that of the page base, and not the same as the
614      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
615      */
616     env->iotlb[mmu_idx][index].addr = iotlb - vaddr_page;
617     env->iotlb[mmu_idx][index].attrs = attrs;
618 
619     /* Now calculate the new entry */
620     tn.addend = addend - vaddr_page;
621     if (prot & PAGE_READ) {
622         tn.addr_read = address;
623     } else {
624         tn.addr_read = -1;
625     }
626 
627     if (prot & PAGE_EXEC) {
628         tn.addr_code = code_address;
629     } else {
630         tn.addr_code = -1;
631     }
632 
633     tn.addr_write = -1;
634     if (prot & PAGE_WRITE) {
635         if ((memory_region_is_ram(section->mr) && section->readonly)
636             || memory_region_is_romd(section->mr)) {
637             /* Write access calls the I/O callback.  */
638             tn.addr_write = address | TLB_MMIO;
639         } else if (memory_region_is_ram(section->mr)
640                    && cpu_physical_memory_is_clean(
641                        memory_region_get_ram_addr(section->mr) + xlat)) {
642             tn.addr_write = address | TLB_NOTDIRTY;
643         } else {
644             tn.addr_write = address;
645         }
646         if (prot & PAGE_WRITE_INV) {
647             tn.addr_write |= TLB_INVALID_MASK;
648         }
649     }
650 
651     copy_tlb_helper_locked(te, &tn);
652     qemu_spin_unlock(&env->tlb_c.lock);
653 }
654 
655 /* Add a new TLB entry, but without specifying the memory
656  * transaction attributes to be used.
657  */
658 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
659                   hwaddr paddr, int prot,
660                   int mmu_idx, target_ulong size)
661 {
662     tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
663                             prot, mmu_idx, size);
664 }
665 
666 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
667 {
668     ram_addr_t ram_addr;
669 
670     ram_addr = qemu_ram_addr_from_host(ptr);
671     if (ram_addr == RAM_ADDR_INVALID) {
672         error_report("Bad ram pointer %p", ptr);
673         abort();
674     }
675     return ram_addr;
676 }
677 
678 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
679                          int mmu_idx,
680                          target_ulong addr, uintptr_t retaddr,
681                          bool recheck, MMUAccessType access_type, int size)
682 {
683     CPUState *cpu = ENV_GET_CPU(env);
684     hwaddr mr_offset;
685     MemoryRegionSection *section;
686     MemoryRegion *mr;
687     uint64_t val;
688     bool locked = false;
689     MemTxResult r;
690 
691     if (recheck) {
692         /*
693          * This is a TLB_RECHECK access, where the MMU protection
694          * covers a smaller range than a target page, and we must
695          * repeat the MMU check here. This tlb_fill() call might
696          * longjump out if this access should cause a guest exception.
697          */
698         CPUTLBEntry *entry;
699         target_ulong tlb_addr;
700 
701         tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
702 
703         entry = tlb_entry(env, mmu_idx, addr);
704         tlb_addr = entry->addr_read;
705         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
706             /* RAM access */
707             uintptr_t haddr = addr + entry->addend;
708 
709             return ldn_p((void *)haddr, size);
710         }
711         /* Fall through for handling IO accesses */
712     }
713 
714     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
715     mr = section->mr;
716     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
717     cpu->mem_io_pc = retaddr;
718     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
719         cpu_io_recompile(cpu, retaddr);
720     }
721 
722     cpu->mem_io_vaddr = addr;
723     cpu->mem_io_access_type = access_type;
724 
725     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
726         qemu_mutex_lock_iothread();
727         locked = true;
728     }
729     r = memory_region_dispatch_read(mr, mr_offset,
730                                     &val, size, iotlbentry->attrs);
731     if (r != MEMTX_OK) {
732         hwaddr physaddr = mr_offset +
733             section->offset_within_address_space -
734             section->offset_within_region;
735 
736         cpu_transaction_failed(cpu, physaddr, addr, size, access_type,
737                                mmu_idx, iotlbentry->attrs, r, retaddr);
738     }
739     if (locked) {
740         qemu_mutex_unlock_iothread();
741     }
742 
743     return val;
744 }
745 
746 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
747                       int mmu_idx,
748                       uint64_t val, target_ulong addr,
749                       uintptr_t retaddr, bool recheck, int size)
750 {
751     CPUState *cpu = ENV_GET_CPU(env);
752     hwaddr mr_offset;
753     MemoryRegionSection *section;
754     MemoryRegion *mr;
755     bool locked = false;
756     MemTxResult r;
757 
758     if (recheck) {
759         /*
760          * This is a TLB_RECHECK access, where the MMU protection
761          * covers a smaller range than a target page, and we must
762          * repeat the MMU check here. This tlb_fill() call might
763          * longjump out if this access should cause a guest exception.
764          */
765         CPUTLBEntry *entry;
766         target_ulong tlb_addr;
767 
768         tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
769 
770         entry = tlb_entry(env, mmu_idx, addr);
771         tlb_addr = tlb_addr_write(entry);
772         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
773             /* RAM access */
774             uintptr_t haddr = addr + entry->addend;
775 
776             stn_p((void *)haddr, size, val);
777             return;
778         }
779         /* Fall through for handling IO accesses */
780     }
781 
782     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
783     mr = section->mr;
784     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
785     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
786         cpu_io_recompile(cpu, retaddr);
787     }
788     cpu->mem_io_vaddr = addr;
789     cpu->mem_io_pc = retaddr;
790 
791     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
792         qemu_mutex_lock_iothread();
793         locked = true;
794     }
795     r = memory_region_dispatch_write(mr, mr_offset,
796                                      val, size, iotlbentry->attrs);
797     if (r != MEMTX_OK) {
798         hwaddr physaddr = mr_offset +
799             section->offset_within_address_space -
800             section->offset_within_region;
801 
802         cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_STORE,
803                                mmu_idx, iotlbentry->attrs, r, retaddr);
804     }
805     if (locked) {
806         qemu_mutex_unlock_iothread();
807     }
808 }
809 
810 /* Return true if ADDR is present in the victim tlb, and has been copied
811    back to the main tlb.  */
812 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
813                            size_t elt_ofs, target_ulong page)
814 {
815     size_t vidx;
816 
817     assert_cpu_is_self(ENV_GET_CPU(env));
818     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
819         CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
820         target_ulong cmp;
821 
822         /* elt_ofs might correspond to .addr_write, so use atomic_read */
823 #if TCG_OVERSIZED_GUEST
824         cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
825 #else
826         cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
827 #endif
828 
829         if (cmp == page) {
830             /* Found entry in victim tlb, swap tlb and iotlb.  */
831             CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
832 
833             qemu_spin_lock(&env->tlb_c.lock);
834             copy_tlb_helper_locked(&tmptlb, tlb);
835             copy_tlb_helper_locked(tlb, vtlb);
836             copy_tlb_helper_locked(vtlb, &tmptlb);
837             qemu_spin_unlock(&env->tlb_c.lock);
838 
839             CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
840             CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
841             tmpio = *io; *io = *vio; *vio = tmpio;
842             return true;
843         }
844     }
845     return false;
846 }
847 
848 /* Macro to call the above, with local variables from the use context.  */
849 #define VICTIM_TLB_HIT(TY, ADDR) \
850   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
851                  (ADDR) & TARGET_PAGE_MASK)
852 
853 /* NOTE: this function can trigger an exception */
854 /* NOTE2: the returned address is not exactly the physical address: it
855  * is actually a ram_addr_t (in system mode; the user mode emulation
856  * version of this function returns a guest virtual address).
857  */
858 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
859 {
860     uintptr_t mmu_idx = cpu_mmu_index(env, true);
861     uintptr_t index = tlb_index(env, mmu_idx, addr);
862     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
863     void *p;
864 
865     if (unlikely(!tlb_hit(entry->addr_code, addr))) {
866         if (!VICTIM_TLB_HIT(addr_code, addr)) {
867             tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
868         }
869         assert(tlb_hit(entry->addr_code, addr));
870     }
871 
872     if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) {
873         /*
874          * Return -1 if we can't translate and execute from an entire
875          * page of RAM here, which will cause us to execute by loading
876          * and translating one insn at a time, without caching:
877          *  - TLB_RECHECK: means the MMU protection covers a smaller range
878          *    than a target page, so we must redo the MMU check every insn
879          *  - TLB_MMIO: region is not backed by RAM
880          */
881         return -1;
882     }
883 
884     p = (void *)((uintptr_t)addr + entry->addend);
885     return qemu_ram_addr_from_host_nofail(p);
886 }
887 
888 /* Probe for whether the specified guest write access is permitted.
889  * If it is not permitted then an exception will be taken in the same
890  * way as if this were a real write access (and we will not return).
891  * Otherwise the function will return, and there will be a valid
892  * entry in the TLB for this access.
893  */
894 void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
895                  uintptr_t retaddr)
896 {
897     uintptr_t index = tlb_index(env, mmu_idx, addr);
898     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
899 
900     if (!tlb_hit(tlb_addr_write(entry), addr)) {
901         /* TLB entry is for a different page */
902         if (!VICTIM_TLB_HIT(addr_write, addr)) {
903             tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
904                      mmu_idx, retaddr);
905         }
906     }
907 }
908 
909 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
910  * operations, or io operations to proceed.  Return the host address.  */
911 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
912                                TCGMemOpIdx oi, uintptr_t retaddr,
913                                NotDirtyInfo *ndi)
914 {
915     size_t mmu_idx = get_mmuidx(oi);
916     uintptr_t index = tlb_index(env, mmu_idx, addr);
917     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
918     target_ulong tlb_addr = tlb_addr_write(tlbe);
919     TCGMemOp mop = get_memop(oi);
920     int a_bits = get_alignment_bits(mop);
921     int s_bits = mop & MO_SIZE;
922     void *hostaddr;
923 
924     /* Adjust the given return address.  */
925     retaddr -= GETPC_ADJ;
926 
927     /* Enforce guest required alignment.  */
928     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
929         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
930         cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
931                              mmu_idx, retaddr);
932     }
933 
934     /* Enforce qemu required alignment.  */
935     if (unlikely(addr & ((1 << s_bits) - 1))) {
936         /* We get here if guest alignment was not requested,
937            or was not enforced by cpu_unaligned_access above.
938            We might widen the access and emulate, but for now
939            mark an exception and exit the cpu loop.  */
940         goto stop_the_world;
941     }
942 
943     /* Check TLB entry and enforce page permissions.  */
944     if (!tlb_hit(tlb_addr, addr)) {
945         if (!VICTIM_TLB_HIT(addr_write, addr)) {
946             tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE,
947                      mmu_idx, retaddr);
948         }
949         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
950     }
951 
952     /* Notice an IO access or a needs-MMU-lookup access */
953     if (unlikely(tlb_addr & (TLB_MMIO | TLB_RECHECK))) {
954         /* There's really nothing that can be done to
955            support this apart from stop-the-world.  */
956         goto stop_the_world;
957     }
958 
959     /* Let the guest notice RMW on a write-only page.  */
960     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
961         tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_LOAD,
962                  mmu_idx, retaddr);
963         /* Since we don't support reads and writes to different addresses,
964            and we do have the proper page loaded for write, this shouldn't
965            ever return.  But just in case, handle via stop-the-world.  */
966         goto stop_the_world;
967     }
968 
969     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
970 
971     ndi->active = false;
972     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
973         ndi->active = true;
974         memory_notdirty_write_prepare(ndi, ENV_GET_CPU(env), addr,
975                                       qemu_ram_addr_from_host_nofail(hostaddr),
976                                       1 << s_bits);
977     }
978 
979     return hostaddr;
980 
981  stop_the_world:
982     cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
983 }
984 
985 #ifdef TARGET_WORDS_BIGENDIAN
986 # define TGT_BE(X)  (X)
987 # define TGT_LE(X)  BSWAP(X)
988 #else
989 # define TGT_BE(X)  BSWAP(X)
990 # define TGT_LE(X)  (X)
991 #endif
992 
993 #define MMUSUFFIX _mmu
994 
995 #define DATA_SIZE 1
996 #include "softmmu_template.h"
997 
998 #define DATA_SIZE 2
999 #include "softmmu_template.h"
1000 
1001 #define DATA_SIZE 4
1002 #include "softmmu_template.h"
1003 
1004 #define DATA_SIZE 8
1005 #include "softmmu_template.h"
1006 
1007 /* First set of helpers allows passing in of OI and RETADDR.  This makes
1008    them callable from other helpers.  */
1009 
1010 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
1011 #define ATOMIC_NAME(X) \
1012     HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
1013 #define ATOMIC_MMU_DECLS NotDirtyInfo ndi
1014 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi)
1015 #define ATOMIC_MMU_CLEANUP                              \
1016     do {                                                \
1017         if (unlikely(ndi.active)) {                     \
1018             memory_notdirty_write_complete(&ndi);       \
1019         }                                               \
1020     } while (0)
1021 
1022 #define DATA_SIZE 1
1023 #include "atomic_template.h"
1024 
1025 #define DATA_SIZE 2
1026 #include "atomic_template.h"
1027 
1028 #define DATA_SIZE 4
1029 #include "atomic_template.h"
1030 
1031 #ifdef CONFIG_ATOMIC64
1032 #define DATA_SIZE 8
1033 #include "atomic_template.h"
1034 #endif
1035 
1036 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
1037 #define DATA_SIZE 16
1038 #include "atomic_template.h"
1039 #endif
1040 
1041 /* Second set of helpers are directly callable from TCG as helpers.  */
1042 
1043 #undef EXTRA_ARGS
1044 #undef ATOMIC_NAME
1045 #undef ATOMIC_MMU_LOOKUP
1046 #define EXTRA_ARGS         , TCGMemOpIdx oi
1047 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
1048 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi)
1049 
1050 #define DATA_SIZE 1
1051 #include "atomic_template.h"
1052 
1053 #define DATA_SIZE 2
1054 #include "atomic_template.h"
1055 
1056 #define DATA_SIZE 4
1057 #include "atomic_template.h"
1058 
1059 #ifdef CONFIG_ATOMIC64
1060 #define DATA_SIZE 8
1061 #include "atomic_template.h"
1062 #endif
1063 
1064 /* Code access functions.  */
1065 
1066 #undef MMUSUFFIX
1067 #define MMUSUFFIX _cmmu
1068 #undef GETPC
1069 #define GETPC() ((uintptr_t)0)
1070 #define SOFTMMU_CODE_ACCESS
1071 
1072 #define DATA_SIZE 1
1073 #include "softmmu_template.h"
1074 
1075 #define DATA_SIZE 2
1076 #include "softmmu_template.h"
1077 
1078 #define DATA_SIZE 4
1079 #include "softmmu_template.h"
1080 
1081 #define DATA_SIZE 8
1082 #include "softmmu_template.h"
1083