xref: /openbmc/qemu/accel/tcg/cputlb.c (revision 53d28455)
1 /*
2  *  Common CPU TLB handling
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 
37 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
38 /* #define DEBUG_TLB */
39 /* #define DEBUG_TLB_LOG */
40 
41 #ifdef DEBUG_TLB
42 # define DEBUG_TLB_GATE 1
43 # ifdef DEBUG_TLB_LOG
44 #  define DEBUG_TLB_LOG_GATE 1
45 # else
46 #  define DEBUG_TLB_LOG_GATE 0
47 # endif
48 #else
49 # define DEBUG_TLB_GATE 0
50 # define DEBUG_TLB_LOG_GATE 0
51 #endif
52 
53 #define tlb_debug(fmt, ...) do { \
54     if (DEBUG_TLB_LOG_GATE) { \
55         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
56                       ## __VA_ARGS__); \
57     } else if (DEBUG_TLB_GATE) { \
58         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
59     } \
60 } while (0)
61 
62 #define assert_cpu_is_self(cpu) do {                              \
63         if (DEBUG_TLB_GATE) {                                     \
64             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
65         }                                                         \
66     } while (0)
67 
68 /* run_on_cpu_data.target_ptr should always be big enough for a
69  * target_ulong even on 32 bit builds */
70 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
71 
72 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
73  */
74 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
75 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
76 
77 void tlb_init(CPUState *cpu)
78 {
79     CPUArchState *env = cpu->env_ptr;
80 
81     qemu_spin_init(&env->tlb_c.lock);
82 }
83 
84 /* flush_all_helper: run fn across all cpus
85  *
86  * If the wait flag is set then the src cpu's helper will be queued as
87  * "safe" work and the loop exited creating a synchronisation point
88  * where all queued work will be finished before execution starts
89  * again.
90  */
91 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
92                              run_on_cpu_data d)
93 {
94     CPUState *cpu;
95 
96     CPU_FOREACH(cpu) {
97         if (cpu != src) {
98             async_run_on_cpu(cpu, fn, d);
99         }
100     }
101 }
102 
103 size_t tlb_flush_count(void)
104 {
105     CPUState *cpu;
106     size_t count = 0;
107 
108     CPU_FOREACH(cpu) {
109         CPUArchState *env = cpu->env_ptr;
110 
111         count += atomic_read(&env->tlb_flush_count);
112     }
113     return count;
114 }
115 
116 /* This is OK because CPU architectures generally permit an
117  * implementation to drop entries from the TLB at any time, so
118  * flushing more entries than required is only an efficiency issue,
119  * not a correctness issue.
120  */
121 static void tlb_flush_nocheck(CPUState *cpu)
122 {
123     CPUArchState *env = cpu->env_ptr;
124 
125     /* The QOM tests will trigger tlb_flushes without setting up TCG
126      * so we bug out here in that case.
127      */
128     if (!tcg_enabled()) {
129         return;
130     }
131 
132     assert_cpu_is_self(cpu);
133     atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1);
134     tlb_debug("(count: %zu)\n", tlb_flush_count());
135 
136     /*
137      * tlb_table/tlb_v_table updates from any thread must hold tlb_c.lock.
138      * However, updates from the owner thread (as is the case here; see the
139      * above assert_cpu_is_self) do not need atomic_set because all reads
140      * that do not hold the lock are performed by the same owner thread.
141      */
142     qemu_spin_lock(&env->tlb_c.lock);
143     memset(env->tlb_table, -1, sizeof(env->tlb_table));
144     memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
145     qemu_spin_unlock(&env->tlb_c.lock);
146 
147     cpu_tb_jmp_cache_clear(cpu);
148 
149     env->vtlb_index = 0;
150     env->tlb_flush_addr = -1;
151     env->tlb_flush_mask = 0;
152 
153     atomic_mb_set(&cpu->pending_tlb_flush, 0);
154 }
155 
156 static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
157 {
158     tlb_flush_nocheck(cpu);
159 }
160 
161 void tlb_flush(CPUState *cpu)
162 {
163     if (cpu->created && !qemu_cpu_is_self(cpu)) {
164         if (atomic_mb_read(&cpu->pending_tlb_flush) != ALL_MMUIDX_BITS) {
165             atomic_mb_set(&cpu->pending_tlb_flush, ALL_MMUIDX_BITS);
166             async_run_on_cpu(cpu, tlb_flush_global_async_work,
167                              RUN_ON_CPU_NULL);
168         }
169     } else {
170         tlb_flush_nocheck(cpu);
171     }
172 }
173 
174 void tlb_flush_all_cpus(CPUState *src_cpu)
175 {
176     const run_on_cpu_func fn = tlb_flush_global_async_work;
177     flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
178     fn(src_cpu, RUN_ON_CPU_NULL);
179 }
180 
181 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
182 {
183     const run_on_cpu_func fn = tlb_flush_global_async_work;
184     flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
185     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_NULL);
186 }
187 
188 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
189 {
190     CPUArchState *env = cpu->env_ptr;
191     unsigned long mmu_idx_bitmask = data.host_int;
192     int mmu_idx;
193 
194     assert_cpu_is_self(cpu);
195 
196     tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
197 
198     qemu_spin_lock(&env->tlb_c.lock);
199     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
200 
201         if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
202             tlb_debug("%d\n", mmu_idx);
203 
204             memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
205             memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
206         }
207     }
208     qemu_spin_unlock(&env->tlb_c.lock);
209 
210     cpu_tb_jmp_cache_clear(cpu);
211 
212     tlb_debug("done\n");
213 }
214 
215 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
216 {
217     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
218 
219     if (!qemu_cpu_is_self(cpu)) {
220         uint16_t pending_flushes = idxmap;
221         pending_flushes &= ~atomic_mb_read(&cpu->pending_tlb_flush);
222 
223         if (pending_flushes) {
224             tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", pending_flushes);
225 
226             atomic_or(&cpu->pending_tlb_flush, pending_flushes);
227             async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
228                              RUN_ON_CPU_HOST_INT(pending_flushes));
229         }
230     } else {
231         tlb_flush_by_mmuidx_async_work(cpu,
232                                        RUN_ON_CPU_HOST_INT(idxmap));
233     }
234 }
235 
236 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
237 {
238     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
239 
240     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
241 
242     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
243     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
244 }
245 
246 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
247                                                        uint16_t idxmap)
248 {
249     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
250 
251     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
252 
253     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
254     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
255 }
256 
257 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
258                                         target_ulong page)
259 {
260     return tlb_hit_page(tlb_entry->addr_read, page) ||
261            tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
262            tlb_hit_page(tlb_entry->addr_code, page);
263 }
264 
265 /* Called with tlb_c.lock held */
266 static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
267                                           target_ulong page)
268 {
269     if (tlb_hit_page_anyprot(tlb_entry, page)) {
270         memset(tlb_entry, -1, sizeof(*tlb_entry));
271     }
272 }
273 
274 /* Called with tlb_c.lock held */
275 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
276                                               target_ulong page)
277 {
278     int k;
279 
280     assert_cpu_is_self(ENV_GET_CPU(env));
281     for (k = 0; k < CPU_VTLB_SIZE; k++) {
282         tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page);
283     }
284 }
285 
286 static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
287 {
288     CPUArchState *env = cpu->env_ptr;
289     target_ulong addr = (target_ulong) data.target_ptr;
290     int mmu_idx;
291 
292     assert_cpu_is_self(cpu);
293 
294     tlb_debug("page :" TARGET_FMT_lx "\n", addr);
295 
296     /* Check if we need to flush due to large pages.  */
297     if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
298         tlb_debug("forcing full flush ("
299                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
300                   env->tlb_flush_addr, env->tlb_flush_mask);
301 
302         tlb_flush(cpu);
303         return;
304     }
305 
306     addr &= TARGET_PAGE_MASK;
307     qemu_spin_lock(&env->tlb_c.lock);
308     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
309         tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr);
310         tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
311     }
312     qemu_spin_unlock(&env->tlb_c.lock);
313 
314     tb_flush_jmp_cache(cpu, addr);
315 }
316 
317 void tlb_flush_page(CPUState *cpu, target_ulong addr)
318 {
319     tlb_debug("page :" TARGET_FMT_lx "\n", addr);
320 
321     if (!qemu_cpu_is_self(cpu)) {
322         async_run_on_cpu(cpu, tlb_flush_page_async_work,
323                          RUN_ON_CPU_TARGET_PTR(addr));
324     } else {
325         tlb_flush_page_async_work(cpu, RUN_ON_CPU_TARGET_PTR(addr));
326     }
327 }
328 
329 /* As we are going to hijack the bottom bits of the page address for a
330  * mmuidx bit mask we need to fail to build if we can't do that
331  */
332 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
333 
334 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
335                                                 run_on_cpu_data data)
336 {
337     CPUArchState *env = cpu->env_ptr;
338     target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
339     target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
340     unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
341     int mmu_idx;
342 
343     assert_cpu_is_self(cpu);
344 
345     tlb_debug("flush page addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
346               addr, mmu_idx_bitmap);
347 
348     qemu_spin_lock(&env->tlb_c.lock);
349     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
350         if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
351             tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr);
352             tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
353         }
354     }
355     qemu_spin_unlock(&env->tlb_c.lock);
356 
357     tb_flush_jmp_cache(cpu, addr);
358 }
359 
360 static void tlb_check_page_and_flush_by_mmuidx_async_work(CPUState *cpu,
361                                                           run_on_cpu_data data)
362 {
363     CPUArchState *env = cpu->env_ptr;
364     target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
365     target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
366     unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
367 
368     tlb_debug("addr:"TARGET_FMT_lx" mmu_idx: %04lx\n", addr, mmu_idx_bitmap);
369 
370     /* Check if we need to flush due to large pages.  */
371     if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
372         tlb_debug("forced full flush ("
373                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
374                   env->tlb_flush_addr, env->tlb_flush_mask);
375 
376         tlb_flush_by_mmuidx_async_work(cpu,
377                                        RUN_ON_CPU_HOST_INT(mmu_idx_bitmap));
378     } else {
379         tlb_flush_page_by_mmuidx_async_work(cpu, data);
380     }
381 }
382 
383 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
384 {
385     target_ulong addr_and_mmu_idx;
386 
387     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
388 
389     /* This should already be page aligned */
390     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
391     addr_and_mmu_idx |= idxmap;
392 
393     if (!qemu_cpu_is_self(cpu)) {
394         async_run_on_cpu(cpu, tlb_check_page_and_flush_by_mmuidx_async_work,
395                          RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
396     } else {
397         tlb_check_page_and_flush_by_mmuidx_async_work(
398             cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
399     }
400 }
401 
402 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
403                                        uint16_t idxmap)
404 {
405     const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
406     target_ulong addr_and_mmu_idx;
407 
408     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
409 
410     /* This should already be page aligned */
411     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
412     addr_and_mmu_idx |= idxmap;
413 
414     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
415     fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
416 }
417 
418 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
419                                                             target_ulong addr,
420                                                             uint16_t idxmap)
421 {
422     const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
423     target_ulong addr_and_mmu_idx;
424 
425     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
426 
427     /* This should already be page aligned */
428     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
429     addr_and_mmu_idx |= idxmap;
430 
431     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
432     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
433 }
434 
435 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
436 {
437     const run_on_cpu_func fn = tlb_flush_page_async_work;
438 
439     flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
440     fn(src, RUN_ON_CPU_TARGET_PTR(addr));
441 }
442 
443 void tlb_flush_page_all_cpus_synced(CPUState *src,
444                                                   target_ulong addr)
445 {
446     const run_on_cpu_func fn = tlb_flush_page_async_work;
447 
448     flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
449     async_safe_run_on_cpu(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
450 }
451 
452 /* update the TLBs so that writes to code in the virtual page 'addr'
453    can be detected */
454 void tlb_protect_code(ram_addr_t ram_addr)
455 {
456     cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
457                                              DIRTY_MEMORY_CODE);
458 }
459 
460 /* update the TLB so that writes in physical page 'phys_addr' are no longer
461    tested for self modifying code */
462 void tlb_unprotect_code(ram_addr_t ram_addr)
463 {
464     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
465 }
466 
467 
468 /*
469  * Dirty write flag handling
470  *
471  * When the TCG code writes to a location it looks up the address in
472  * the TLB and uses that data to compute the final address. If any of
473  * the lower bits of the address are set then the slow path is forced.
474  * There are a number of reasons to do this but for normal RAM the
475  * most usual is detecting writes to code regions which may invalidate
476  * generated code.
477  *
478  * Other vCPUs might be reading their TLBs during guest execution, so we update
479  * te->addr_write with atomic_set. We don't need to worry about this for
480  * oversized guests as MTTCG is disabled for them.
481  *
482  * Called with tlb_c.lock held.
483  */
484 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
485                                          uintptr_t start, uintptr_t length)
486 {
487     uintptr_t addr = tlb_entry->addr_write;
488 
489     if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
490         addr &= TARGET_PAGE_MASK;
491         addr += tlb_entry->addend;
492         if ((addr - start) < length) {
493 #if TCG_OVERSIZED_GUEST
494             tlb_entry->addr_write |= TLB_NOTDIRTY;
495 #else
496             atomic_set(&tlb_entry->addr_write,
497                        tlb_entry->addr_write | TLB_NOTDIRTY);
498 #endif
499         }
500     }
501 }
502 
503 /*
504  * Called with tlb_c.lock held.
505  * Called only from the vCPU context, i.e. the TLB's owner thread.
506  */
507 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
508 {
509     *d = *s;
510 }
511 
512 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
513  * the target vCPU).
514  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
515  * thing actually updated is the target TLB entry ->addr_write flags.
516  */
517 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
518 {
519     CPUArchState *env;
520 
521     int mmu_idx;
522 
523     env = cpu->env_ptr;
524     qemu_spin_lock(&env->tlb_c.lock);
525     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
526         unsigned int i;
527 
528         for (i = 0; i < CPU_TLB_SIZE; i++) {
529             tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1,
530                                          length);
531         }
532 
533         for (i = 0; i < CPU_VTLB_SIZE; i++) {
534             tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1,
535                                          length);
536         }
537     }
538     qemu_spin_unlock(&env->tlb_c.lock);
539 }
540 
541 /* Called with tlb_c.lock held */
542 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
543                                          target_ulong vaddr)
544 {
545     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
546         tlb_entry->addr_write = vaddr;
547     }
548 }
549 
550 /* update the TLB corresponding to virtual page vaddr
551    so that it is no longer dirty */
552 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
553 {
554     CPUArchState *env = cpu->env_ptr;
555     int mmu_idx;
556 
557     assert_cpu_is_self(cpu);
558 
559     vaddr &= TARGET_PAGE_MASK;
560     qemu_spin_lock(&env->tlb_c.lock);
561     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
562         tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
563     }
564 
565     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
566         int k;
567         for (k = 0; k < CPU_VTLB_SIZE; k++) {
568             tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr);
569         }
570     }
571     qemu_spin_unlock(&env->tlb_c.lock);
572 }
573 
574 /* Our TLB does not support large pages, so remember the area covered by
575    large pages and trigger a full TLB flush if these are invalidated.  */
576 static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
577                                target_ulong size)
578 {
579     target_ulong mask = ~(size - 1);
580 
581     if (env->tlb_flush_addr == (target_ulong)-1) {
582         env->tlb_flush_addr = vaddr & mask;
583         env->tlb_flush_mask = mask;
584         return;
585     }
586     /* Extend the existing region to include the new page.
587        This is a compromise between unnecessary flushes and the cost
588        of maintaining a full variable size TLB.  */
589     mask &= env->tlb_flush_mask;
590     while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
591         mask <<= 1;
592     }
593     env->tlb_flush_addr &= mask;
594     env->tlb_flush_mask = mask;
595 }
596 
597 /* Add a new TLB entry. At most one entry for a given virtual address
598  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
599  * supplied size is only used by tlb_flush_page.
600  *
601  * Called from TCG-generated code, which is under an RCU read-side
602  * critical section.
603  */
604 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
605                              hwaddr paddr, MemTxAttrs attrs, int prot,
606                              int mmu_idx, target_ulong size)
607 {
608     CPUArchState *env = cpu->env_ptr;
609     MemoryRegionSection *section;
610     unsigned int index;
611     target_ulong address;
612     target_ulong code_address;
613     uintptr_t addend;
614     CPUTLBEntry *te, tn;
615     hwaddr iotlb, xlat, sz, paddr_page;
616     target_ulong vaddr_page;
617     int asidx = cpu_asidx_from_attrs(cpu, attrs);
618 
619     assert_cpu_is_self(cpu);
620 
621     if (size < TARGET_PAGE_SIZE) {
622         sz = TARGET_PAGE_SIZE;
623     } else {
624         if (size > TARGET_PAGE_SIZE) {
625             tlb_add_large_page(env, vaddr, size);
626         }
627         sz = size;
628     }
629     vaddr_page = vaddr & TARGET_PAGE_MASK;
630     paddr_page = paddr & TARGET_PAGE_MASK;
631 
632     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
633                                                 &xlat, &sz, attrs, &prot);
634     assert(sz >= TARGET_PAGE_SIZE);
635 
636     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
637               " prot=%x idx=%d\n",
638               vaddr, paddr, prot, mmu_idx);
639 
640     address = vaddr_page;
641     if (size < TARGET_PAGE_SIZE) {
642         /*
643          * Slow-path the TLB entries; we will repeat the MMU check and TLB
644          * fill on every access.
645          */
646         address |= TLB_RECHECK;
647     }
648     if (!memory_region_is_ram(section->mr) &&
649         !memory_region_is_romd(section->mr)) {
650         /* IO memory case */
651         address |= TLB_MMIO;
652         addend = 0;
653     } else {
654         /* TLB_MMIO for rom/romd handled below */
655         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
656     }
657 
658     code_address = address;
659     iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
660                                             paddr_page, xlat, prot, &address);
661 
662     index = tlb_index(env, mmu_idx, vaddr_page);
663     te = tlb_entry(env, mmu_idx, vaddr_page);
664 
665     /*
666      * Hold the TLB lock for the rest of the function. We could acquire/release
667      * the lock several times in the function, but it is faster to amortize the
668      * acquisition cost by acquiring it just once. Note that this leads to
669      * a longer critical section, but this is not a concern since the TLB lock
670      * is unlikely to be contended.
671      */
672     qemu_spin_lock(&env->tlb_c.lock);
673 
674     /* Make sure there's no cached translation for the new page.  */
675     tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
676 
677     /*
678      * Only evict the old entry to the victim tlb if it's for a
679      * different page; otherwise just overwrite the stale data.
680      */
681     if (!tlb_hit_page_anyprot(te, vaddr_page)) {
682         unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;
683         CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx];
684 
685         /* Evict the old entry into the victim tlb.  */
686         copy_tlb_helper_locked(tv, te);
687         env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
688     }
689 
690     /* refill the tlb */
691     /*
692      * At this point iotlb contains a physical section number in the lower
693      * TARGET_PAGE_BITS, and either
694      *  + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)
695      *  + the offset within section->mr of the page base (otherwise)
696      * We subtract the vaddr_page (which is page aligned and thus won't
697      * disturb the low bits) to give an offset which can be added to the
698      * (non-page-aligned) vaddr of the eventual memory access to get
699      * the MemoryRegion offset for the access. Note that the vaddr we
700      * subtract here is that of the page base, and not the same as the
701      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
702      */
703     env->iotlb[mmu_idx][index].addr = iotlb - vaddr_page;
704     env->iotlb[mmu_idx][index].attrs = attrs;
705 
706     /* Now calculate the new entry */
707     tn.addend = addend - vaddr_page;
708     if (prot & PAGE_READ) {
709         tn.addr_read = address;
710     } else {
711         tn.addr_read = -1;
712     }
713 
714     if (prot & PAGE_EXEC) {
715         tn.addr_code = code_address;
716     } else {
717         tn.addr_code = -1;
718     }
719 
720     tn.addr_write = -1;
721     if (prot & PAGE_WRITE) {
722         if ((memory_region_is_ram(section->mr) && section->readonly)
723             || memory_region_is_romd(section->mr)) {
724             /* Write access calls the I/O callback.  */
725             tn.addr_write = address | TLB_MMIO;
726         } else if (memory_region_is_ram(section->mr)
727                    && cpu_physical_memory_is_clean(
728                        memory_region_get_ram_addr(section->mr) + xlat)) {
729             tn.addr_write = address | TLB_NOTDIRTY;
730         } else {
731             tn.addr_write = address;
732         }
733         if (prot & PAGE_WRITE_INV) {
734             tn.addr_write |= TLB_INVALID_MASK;
735         }
736     }
737 
738     copy_tlb_helper_locked(te, &tn);
739     qemu_spin_unlock(&env->tlb_c.lock);
740 }
741 
742 /* Add a new TLB entry, but without specifying the memory
743  * transaction attributes to be used.
744  */
745 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
746                   hwaddr paddr, int prot,
747                   int mmu_idx, target_ulong size)
748 {
749     tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
750                             prot, mmu_idx, size);
751 }
752 
753 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
754 {
755     ram_addr_t ram_addr;
756 
757     ram_addr = qemu_ram_addr_from_host(ptr);
758     if (ram_addr == RAM_ADDR_INVALID) {
759         error_report("Bad ram pointer %p", ptr);
760         abort();
761     }
762     return ram_addr;
763 }
764 
765 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
766                          int mmu_idx,
767                          target_ulong addr, uintptr_t retaddr,
768                          bool recheck, MMUAccessType access_type, int size)
769 {
770     CPUState *cpu = ENV_GET_CPU(env);
771     hwaddr mr_offset;
772     MemoryRegionSection *section;
773     MemoryRegion *mr;
774     uint64_t val;
775     bool locked = false;
776     MemTxResult r;
777 
778     if (recheck) {
779         /*
780          * This is a TLB_RECHECK access, where the MMU protection
781          * covers a smaller range than a target page, and we must
782          * repeat the MMU check here. This tlb_fill() call might
783          * longjump out if this access should cause a guest exception.
784          */
785         CPUTLBEntry *entry;
786         target_ulong tlb_addr;
787 
788         tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
789 
790         entry = tlb_entry(env, mmu_idx, addr);
791         tlb_addr = entry->addr_read;
792         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
793             /* RAM access */
794             uintptr_t haddr = addr + entry->addend;
795 
796             return ldn_p((void *)haddr, size);
797         }
798         /* Fall through for handling IO accesses */
799     }
800 
801     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
802     mr = section->mr;
803     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
804     cpu->mem_io_pc = retaddr;
805     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
806         cpu_io_recompile(cpu, retaddr);
807     }
808 
809     cpu->mem_io_vaddr = addr;
810     cpu->mem_io_access_type = access_type;
811 
812     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
813         qemu_mutex_lock_iothread();
814         locked = true;
815     }
816     r = memory_region_dispatch_read(mr, mr_offset,
817                                     &val, size, iotlbentry->attrs);
818     if (r != MEMTX_OK) {
819         hwaddr physaddr = mr_offset +
820             section->offset_within_address_space -
821             section->offset_within_region;
822 
823         cpu_transaction_failed(cpu, physaddr, addr, size, access_type,
824                                mmu_idx, iotlbentry->attrs, r, retaddr);
825     }
826     if (locked) {
827         qemu_mutex_unlock_iothread();
828     }
829 
830     return val;
831 }
832 
833 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
834                       int mmu_idx,
835                       uint64_t val, target_ulong addr,
836                       uintptr_t retaddr, bool recheck, int size)
837 {
838     CPUState *cpu = ENV_GET_CPU(env);
839     hwaddr mr_offset;
840     MemoryRegionSection *section;
841     MemoryRegion *mr;
842     bool locked = false;
843     MemTxResult r;
844 
845     if (recheck) {
846         /*
847          * This is a TLB_RECHECK access, where the MMU protection
848          * covers a smaller range than a target page, and we must
849          * repeat the MMU check here. This tlb_fill() call might
850          * longjump out if this access should cause a guest exception.
851          */
852         CPUTLBEntry *entry;
853         target_ulong tlb_addr;
854 
855         tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
856 
857         entry = tlb_entry(env, mmu_idx, addr);
858         tlb_addr = tlb_addr_write(entry);
859         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
860             /* RAM access */
861             uintptr_t haddr = addr + entry->addend;
862 
863             stn_p((void *)haddr, size, val);
864             return;
865         }
866         /* Fall through for handling IO accesses */
867     }
868 
869     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
870     mr = section->mr;
871     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
872     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
873         cpu_io_recompile(cpu, retaddr);
874     }
875     cpu->mem_io_vaddr = addr;
876     cpu->mem_io_pc = retaddr;
877 
878     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
879         qemu_mutex_lock_iothread();
880         locked = true;
881     }
882     r = memory_region_dispatch_write(mr, mr_offset,
883                                      val, size, iotlbentry->attrs);
884     if (r != MEMTX_OK) {
885         hwaddr physaddr = mr_offset +
886             section->offset_within_address_space -
887             section->offset_within_region;
888 
889         cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_STORE,
890                                mmu_idx, iotlbentry->attrs, r, retaddr);
891     }
892     if (locked) {
893         qemu_mutex_unlock_iothread();
894     }
895 }
896 
897 /* Return true if ADDR is present in the victim tlb, and has been copied
898    back to the main tlb.  */
899 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
900                            size_t elt_ofs, target_ulong page)
901 {
902     size_t vidx;
903 
904     assert_cpu_is_self(ENV_GET_CPU(env));
905     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
906         CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
907         target_ulong cmp;
908 
909         /* elt_ofs might correspond to .addr_write, so use atomic_read */
910 #if TCG_OVERSIZED_GUEST
911         cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
912 #else
913         cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
914 #endif
915 
916         if (cmp == page) {
917             /* Found entry in victim tlb, swap tlb and iotlb.  */
918             CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
919 
920             qemu_spin_lock(&env->tlb_c.lock);
921             copy_tlb_helper_locked(&tmptlb, tlb);
922             copy_tlb_helper_locked(tlb, vtlb);
923             copy_tlb_helper_locked(vtlb, &tmptlb);
924             qemu_spin_unlock(&env->tlb_c.lock);
925 
926             CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
927             CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
928             tmpio = *io; *io = *vio; *vio = tmpio;
929             return true;
930         }
931     }
932     return false;
933 }
934 
935 /* Macro to call the above, with local variables from the use context.  */
936 #define VICTIM_TLB_HIT(TY, ADDR) \
937   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
938                  (ADDR) & TARGET_PAGE_MASK)
939 
940 /* NOTE: this function can trigger an exception */
941 /* NOTE2: the returned address is not exactly the physical address: it
942  * is actually a ram_addr_t (in system mode; the user mode emulation
943  * version of this function returns a guest virtual address).
944  */
945 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
946 {
947     uintptr_t mmu_idx = cpu_mmu_index(env, true);
948     uintptr_t index = tlb_index(env, mmu_idx, addr);
949     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
950     void *p;
951 
952     if (unlikely(!tlb_hit(entry->addr_code, addr))) {
953         if (!VICTIM_TLB_HIT(addr_code, addr)) {
954             tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
955         }
956         assert(tlb_hit(entry->addr_code, addr));
957     }
958 
959     if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) {
960         /*
961          * Return -1 if we can't translate and execute from an entire
962          * page of RAM here, which will cause us to execute by loading
963          * and translating one insn at a time, without caching:
964          *  - TLB_RECHECK: means the MMU protection covers a smaller range
965          *    than a target page, so we must redo the MMU check every insn
966          *  - TLB_MMIO: region is not backed by RAM
967          */
968         return -1;
969     }
970 
971     p = (void *)((uintptr_t)addr + entry->addend);
972     return qemu_ram_addr_from_host_nofail(p);
973 }
974 
975 /* Probe for whether the specified guest write access is permitted.
976  * If it is not permitted then an exception will be taken in the same
977  * way as if this were a real write access (and we will not return).
978  * Otherwise the function will return, and there will be a valid
979  * entry in the TLB for this access.
980  */
981 void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
982                  uintptr_t retaddr)
983 {
984     uintptr_t index = tlb_index(env, mmu_idx, addr);
985     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
986 
987     if (!tlb_hit(tlb_addr_write(entry), addr)) {
988         /* TLB entry is for a different page */
989         if (!VICTIM_TLB_HIT(addr_write, addr)) {
990             tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
991                      mmu_idx, retaddr);
992         }
993     }
994 }
995 
996 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
997  * operations, or io operations to proceed.  Return the host address.  */
998 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
999                                TCGMemOpIdx oi, uintptr_t retaddr,
1000                                NotDirtyInfo *ndi)
1001 {
1002     size_t mmu_idx = get_mmuidx(oi);
1003     uintptr_t index = tlb_index(env, mmu_idx, addr);
1004     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1005     target_ulong tlb_addr = tlb_addr_write(tlbe);
1006     TCGMemOp mop = get_memop(oi);
1007     int a_bits = get_alignment_bits(mop);
1008     int s_bits = mop & MO_SIZE;
1009     void *hostaddr;
1010 
1011     /* Adjust the given return address.  */
1012     retaddr -= GETPC_ADJ;
1013 
1014     /* Enforce guest required alignment.  */
1015     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1016         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1017         cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
1018                              mmu_idx, retaddr);
1019     }
1020 
1021     /* Enforce qemu required alignment.  */
1022     if (unlikely(addr & ((1 << s_bits) - 1))) {
1023         /* We get here if guest alignment was not requested,
1024            or was not enforced by cpu_unaligned_access above.
1025            We might widen the access and emulate, but for now
1026            mark an exception and exit the cpu loop.  */
1027         goto stop_the_world;
1028     }
1029 
1030     /* Check TLB entry and enforce page permissions.  */
1031     if (!tlb_hit(tlb_addr, addr)) {
1032         if (!VICTIM_TLB_HIT(addr_write, addr)) {
1033             tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE,
1034                      mmu_idx, retaddr);
1035         }
1036         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1037     }
1038 
1039     /* Notice an IO access or a needs-MMU-lookup access */
1040     if (unlikely(tlb_addr & (TLB_MMIO | TLB_RECHECK))) {
1041         /* There's really nothing that can be done to
1042            support this apart from stop-the-world.  */
1043         goto stop_the_world;
1044     }
1045 
1046     /* Let the guest notice RMW on a write-only page.  */
1047     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1048         tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1049                  mmu_idx, retaddr);
1050         /* Since we don't support reads and writes to different addresses,
1051            and we do have the proper page loaded for write, this shouldn't
1052            ever return.  But just in case, handle via stop-the-world.  */
1053         goto stop_the_world;
1054     }
1055 
1056     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1057 
1058     ndi->active = false;
1059     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1060         ndi->active = true;
1061         memory_notdirty_write_prepare(ndi, ENV_GET_CPU(env), addr,
1062                                       qemu_ram_addr_from_host_nofail(hostaddr),
1063                                       1 << s_bits);
1064     }
1065 
1066     return hostaddr;
1067 
1068  stop_the_world:
1069     cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
1070 }
1071 
1072 #ifdef TARGET_WORDS_BIGENDIAN
1073 # define TGT_BE(X)  (X)
1074 # define TGT_LE(X)  BSWAP(X)
1075 #else
1076 # define TGT_BE(X)  BSWAP(X)
1077 # define TGT_LE(X)  (X)
1078 #endif
1079 
1080 #define MMUSUFFIX _mmu
1081 
1082 #define DATA_SIZE 1
1083 #include "softmmu_template.h"
1084 
1085 #define DATA_SIZE 2
1086 #include "softmmu_template.h"
1087 
1088 #define DATA_SIZE 4
1089 #include "softmmu_template.h"
1090 
1091 #define DATA_SIZE 8
1092 #include "softmmu_template.h"
1093 
1094 /* First set of helpers allows passing in of OI and RETADDR.  This makes
1095    them callable from other helpers.  */
1096 
1097 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
1098 #define ATOMIC_NAME(X) \
1099     HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
1100 #define ATOMIC_MMU_DECLS NotDirtyInfo ndi
1101 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi)
1102 #define ATOMIC_MMU_CLEANUP                              \
1103     do {                                                \
1104         if (unlikely(ndi.active)) {                     \
1105             memory_notdirty_write_complete(&ndi);       \
1106         }                                               \
1107     } while (0)
1108 
1109 #define DATA_SIZE 1
1110 #include "atomic_template.h"
1111 
1112 #define DATA_SIZE 2
1113 #include "atomic_template.h"
1114 
1115 #define DATA_SIZE 4
1116 #include "atomic_template.h"
1117 
1118 #ifdef CONFIG_ATOMIC64
1119 #define DATA_SIZE 8
1120 #include "atomic_template.h"
1121 #endif
1122 
1123 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
1124 #define DATA_SIZE 16
1125 #include "atomic_template.h"
1126 #endif
1127 
1128 /* Second set of helpers are directly callable from TCG as helpers.  */
1129 
1130 #undef EXTRA_ARGS
1131 #undef ATOMIC_NAME
1132 #undef ATOMIC_MMU_LOOKUP
1133 #define EXTRA_ARGS         , TCGMemOpIdx oi
1134 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
1135 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi)
1136 
1137 #define DATA_SIZE 1
1138 #include "atomic_template.h"
1139 
1140 #define DATA_SIZE 2
1141 #include "atomic_template.h"
1142 
1143 #define DATA_SIZE 4
1144 #include "atomic_template.h"
1145 
1146 #ifdef CONFIG_ATOMIC64
1147 #define DATA_SIZE 8
1148 #include "atomic_template.h"
1149 #endif
1150 
1151 /* Code access functions.  */
1152 
1153 #undef MMUSUFFIX
1154 #define MMUSUFFIX _cmmu
1155 #undef GETPC
1156 #define GETPC() ((uintptr_t)0)
1157 #define SOFTMMU_CODE_ACCESS
1158 
1159 #define DATA_SIZE 1
1160 #include "softmmu_template.h"
1161 
1162 #define DATA_SIZE 2
1163 #include "softmmu_template.h"
1164 
1165 #define DATA_SIZE 4
1166 #include "softmmu_template.h"
1167 
1168 #define DATA_SIZE 8
1169 #include "softmmu_template.h"
1170