xref: /openbmc/qemu/accel/tcg/cputlb.c (revision 016d4b01)
1 /*
2  *  Common CPU TLB handling
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 
36 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
37 /* #define DEBUG_TLB */
38 /* #define DEBUG_TLB_LOG */
39 
40 #ifdef DEBUG_TLB
41 # define DEBUG_TLB_GATE 1
42 # ifdef DEBUG_TLB_LOG
43 #  define DEBUG_TLB_LOG_GATE 1
44 # else
45 #  define DEBUG_TLB_LOG_GATE 0
46 # endif
47 #else
48 # define DEBUG_TLB_GATE 0
49 # define DEBUG_TLB_LOG_GATE 0
50 #endif
51 
52 #define tlb_debug(fmt, ...) do { \
53     if (DEBUG_TLB_LOG_GATE) { \
54         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
55                       ## __VA_ARGS__); \
56     } else if (DEBUG_TLB_GATE) { \
57         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
58     } \
59 } while (0)
60 
61 #define assert_cpu_is_self(this_cpu) do {                         \
62         if (DEBUG_TLB_GATE) {                                     \
63             g_assert(!cpu->created || qemu_cpu_is_self(cpu));     \
64         }                                                         \
65     } while (0)
66 
67 /* run_on_cpu_data.target_ptr should always be big enough for a
68  * target_ulong even on 32 bit builds */
69 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
70 
71 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
72  */
73 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
74 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
75 
76 /* flush_all_helper: run fn across all cpus
77  *
78  * If the wait flag is set then the src cpu's helper will be queued as
79  * "safe" work and the loop exited creating a synchronisation point
80  * where all queued work will be finished before execution starts
81  * again.
82  */
83 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
84                              run_on_cpu_data d)
85 {
86     CPUState *cpu;
87 
88     CPU_FOREACH(cpu) {
89         if (cpu != src) {
90             async_run_on_cpu(cpu, fn, d);
91         }
92     }
93 }
94 
95 size_t tlb_flush_count(void)
96 {
97     CPUState *cpu;
98     size_t count = 0;
99 
100     CPU_FOREACH(cpu) {
101         CPUArchState *env = cpu->env_ptr;
102 
103         count += atomic_read(&env->tlb_flush_count);
104     }
105     return count;
106 }
107 
108 /* This is OK because CPU architectures generally permit an
109  * implementation to drop entries from the TLB at any time, so
110  * flushing more entries than required is only an efficiency issue,
111  * not a correctness issue.
112  */
113 static void tlb_flush_nocheck(CPUState *cpu)
114 {
115     CPUArchState *env = cpu->env_ptr;
116 
117     /* The QOM tests will trigger tlb_flushes without setting up TCG
118      * so we bug out here in that case.
119      */
120     if (!tcg_enabled()) {
121         return;
122     }
123 
124     assert_cpu_is_self(cpu);
125     atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1);
126     tlb_debug("(count: %zu)\n", tlb_flush_count());
127 
128     memset(env->tlb_table, -1, sizeof(env->tlb_table));
129     memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
130     cpu_tb_jmp_cache_clear(cpu);
131 
132     env->vtlb_index = 0;
133     env->tlb_flush_addr = -1;
134     env->tlb_flush_mask = 0;
135 
136     atomic_mb_set(&cpu->pending_tlb_flush, 0);
137 }
138 
139 static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
140 {
141     tlb_flush_nocheck(cpu);
142 }
143 
144 void tlb_flush(CPUState *cpu)
145 {
146     if (cpu->created && !qemu_cpu_is_self(cpu)) {
147         if (atomic_mb_read(&cpu->pending_tlb_flush) != ALL_MMUIDX_BITS) {
148             atomic_mb_set(&cpu->pending_tlb_flush, ALL_MMUIDX_BITS);
149             async_run_on_cpu(cpu, tlb_flush_global_async_work,
150                              RUN_ON_CPU_NULL);
151         }
152     } else {
153         tlb_flush_nocheck(cpu);
154     }
155 }
156 
157 void tlb_flush_all_cpus(CPUState *src_cpu)
158 {
159     const run_on_cpu_func fn = tlb_flush_global_async_work;
160     flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
161     fn(src_cpu, RUN_ON_CPU_NULL);
162 }
163 
164 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
165 {
166     const run_on_cpu_func fn = tlb_flush_global_async_work;
167     flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
168     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_NULL);
169 }
170 
171 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
172 {
173     CPUArchState *env = cpu->env_ptr;
174     unsigned long mmu_idx_bitmask = data.host_int;
175     int mmu_idx;
176 
177     assert_cpu_is_self(cpu);
178 
179     tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
180 
181     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
182 
183         if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
184             tlb_debug("%d\n", mmu_idx);
185 
186             memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
187             memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
188         }
189     }
190 
191     cpu_tb_jmp_cache_clear(cpu);
192 
193     tlb_debug("done\n");
194 }
195 
196 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
197 {
198     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
199 
200     if (!qemu_cpu_is_self(cpu)) {
201         uint16_t pending_flushes = idxmap;
202         pending_flushes &= ~atomic_mb_read(&cpu->pending_tlb_flush);
203 
204         if (pending_flushes) {
205             tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", pending_flushes);
206 
207             atomic_or(&cpu->pending_tlb_flush, pending_flushes);
208             async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
209                              RUN_ON_CPU_HOST_INT(pending_flushes));
210         }
211     } else {
212         tlb_flush_by_mmuidx_async_work(cpu,
213                                        RUN_ON_CPU_HOST_INT(idxmap));
214     }
215 }
216 
217 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
218 {
219     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
220 
221     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
222 
223     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
224     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
225 }
226 
227 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
228                                                        uint16_t idxmap)
229 {
230     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
231 
232     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
233 
234     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
235     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
236 }
237 
238 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
239                                         target_ulong page)
240 {
241     return tlb_hit_page(tlb_entry->addr_read, page) ||
242            tlb_hit_page(tlb_entry->addr_write, page) ||
243            tlb_hit_page(tlb_entry->addr_code, page);
244 }
245 
246 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong page)
247 {
248     if (tlb_hit_page_anyprot(tlb_entry, page)) {
249         memset(tlb_entry, -1, sizeof(*tlb_entry));
250     }
251 }
252 
253 static inline void tlb_flush_vtlb_page(CPUArchState *env, int mmu_idx,
254                                        target_ulong page)
255 {
256     int k;
257     for (k = 0; k < CPU_VTLB_SIZE; k++) {
258         tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], page);
259     }
260 }
261 
262 static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
263 {
264     CPUArchState *env = cpu->env_ptr;
265     target_ulong addr = (target_ulong) data.target_ptr;
266     int i;
267     int mmu_idx;
268 
269     assert_cpu_is_self(cpu);
270 
271     tlb_debug("page :" TARGET_FMT_lx "\n", addr);
272 
273     /* Check if we need to flush due to large pages.  */
274     if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
275         tlb_debug("forcing full flush ("
276                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
277                   env->tlb_flush_addr, env->tlb_flush_mask);
278 
279         tlb_flush(cpu);
280         return;
281     }
282 
283     addr &= TARGET_PAGE_MASK;
284     i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
285     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
286         tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
287         tlb_flush_vtlb_page(env, mmu_idx, addr);
288     }
289 
290     tb_flush_jmp_cache(cpu, addr);
291 }
292 
293 void tlb_flush_page(CPUState *cpu, target_ulong addr)
294 {
295     tlb_debug("page :" TARGET_FMT_lx "\n", addr);
296 
297     if (!qemu_cpu_is_self(cpu)) {
298         async_run_on_cpu(cpu, tlb_flush_page_async_work,
299                          RUN_ON_CPU_TARGET_PTR(addr));
300     } else {
301         tlb_flush_page_async_work(cpu, RUN_ON_CPU_TARGET_PTR(addr));
302     }
303 }
304 
305 /* As we are going to hijack the bottom bits of the page address for a
306  * mmuidx bit mask we need to fail to build if we can't do that
307  */
308 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
309 
310 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
311                                                 run_on_cpu_data data)
312 {
313     CPUArchState *env = cpu->env_ptr;
314     target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
315     target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
316     unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
317     int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
318     int mmu_idx;
319 
320     assert_cpu_is_self(cpu);
321 
322     tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
323               page, addr, mmu_idx_bitmap);
324 
325     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
326         if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
327             tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr);
328             tlb_flush_vtlb_page(env, mmu_idx, addr);
329         }
330     }
331 
332     tb_flush_jmp_cache(cpu, addr);
333 }
334 
335 static void tlb_check_page_and_flush_by_mmuidx_async_work(CPUState *cpu,
336                                                           run_on_cpu_data data)
337 {
338     CPUArchState *env = cpu->env_ptr;
339     target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
340     target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
341     unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
342 
343     tlb_debug("addr:"TARGET_FMT_lx" mmu_idx: %04lx\n", addr, mmu_idx_bitmap);
344 
345     /* Check if we need to flush due to large pages.  */
346     if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
347         tlb_debug("forced full flush ("
348                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
349                   env->tlb_flush_addr, env->tlb_flush_mask);
350 
351         tlb_flush_by_mmuidx_async_work(cpu,
352                                        RUN_ON_CPU_HOST_INT(mmu_idx_bitmap));
353     } else {
354         tlb_flush_page_by_mmuidx_async_work(cpu, data);
355     }
356 }
357 
358 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
359 {
360     target_ulong addr_and_mmu_idx;
361 
362     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
363 
364     /* This should already be page aligned */
365     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
366     addr_and_mmu_idx |= idxmap;
367 
368     if (!qemu_cpu_is_self(cpu)) {
369         async_run_on_cpu(cpu, tlb_check_page_and_flush_by_mmuidx_async_work,
370                          RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
371     } else {
372         tlb_check_page_and_flush_by_mmuidx_async_work(
373             cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
374     }
375 }
376 
377 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
378                                        uint16_t idxmap)
379 {
380     const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
381     target_ulong addr_and_mmu_idx;
382 
383     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
384 
385     /* This should already be page aligned */
386     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
387     addr_and_mmu_idx |= idxmap;
388 
389     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
390     fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
391 }
392 
393 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
394                                                             target_ulong addr,
395                                                             uint16_t idxmap)
396 {
397     const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
398     target_ulong addr_and_mmu_idx;
399 
400     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
401 
402     /* This should already be page aligned */
403     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
404     addr_and_mmu_idx |= idxmap;
405 
406     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
407     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
408 }
409 
410 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
411 {
412     const run_on_cpu_func fn = tlb_flush_page_async_work;
413 
414     flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
415     fn(src, RUN_ON_CPU_TARGET_PTR(addr));
416 }
417 
418 void tlb_flush_page_all_cpus_synced(CPUState *src,
419                                                   target_ulong addr)
420 {
421     const run_on_cpu_func fn = tlb_flush_page_async_work;
422 
423     flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
424     async_safe_run_on_cpu(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
425 }
426 
427 /* update the TLBs so that writes to code in the virtual page 'addr'
428    can be detected */
429 void tlb_protect_code(ram_addr_t ram_addr)
430 {
431     cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
432                                              DIRTY_MEMORY_CODE);
433 }
434 
435 /* update the TLB so that writes in physical page 'phys_addr' are no longer
436    tested for self modifying code */
437 void tlb_unprotect_code(ram_addr_t ram_addr)
438 {
439     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
440 }
441 
442 
443 /*
444  * Dirty write flag handling
445  *
446  * When the TCG code writes to a location it looks up the address in
447  * the TLB and uses that data to compute the final address. If any of
448  * the lower bits of the address are set then the slow path is forced.
449  * There are a number of reasons to do this but for normal RAM the
450  * most usual is detecting writes to code regions which may invalidate
451  * generated code.
452  *
453  * Because we want other vCPUs to respond to changes straight away we
454  * update the te->addr_write field atomically. If the TLB entry has
455  * been changed by the vCPU in the mean time we skip the update.
456  *
457  * As this function uses atomic accesses we also need to ensure
458  * updates to tlb_entries follow the same access rules. We don't need
459  * to worry about this for oversized guests as MTTCG is disabled for
460  * them.
461  */
462 
463 static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
464                            uintptr_t length)
465 {
466 #if TCG_OVERSIZED_GUEST
467     uintptr_t addr = tlb_entry->addr_write;
468 
469     if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
470         addr &= TARGET_PAGE_MASK;
471         addr += tlb_entry->addend;
472         if ((addr - start) < length) {
473             tlb_entry->addr_write |= TLB_NOTDIRTY;
474         }
475     }
476 #else
477     /* paired with atomic_mb_set in tlb_set_page_with_attrs */
478     uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write);
479     uintptr_t addr = orig_addr;
480 
481     if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
482         addr &= TARGET_PAGE_MASK;
483         addr += atomic_read(&tlb_entry->addend);
484         if ((addr - start) < length) {
485             uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY;
486             atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr);
487         }
488     }
489 #endif
490 }
491 
492 /* For atomic correctness when running MTTCG we need to use the right
493  * primitives when copying entries */
494 static inline void copy_tlb_helper(CPUTLBEntry *d, CPUTLBEntry *s,
495                                    bool atomic_set)
496 {
497 #if TCG_OVERSIZED_GUEST
498     *d = *s;
499 #else
500     if (atomic_set) {
501         d->addr_read = s->addr_read;
502         d->addr_code = s->addr_code;
503         atomic_set(&d->addend, atomic_read(&s->addend));
504         /* Pairs with flag setting in tlb_reset_dirty_range */
505         atomic_mb_set(&d->addr_write, atomic_read(&s->addr_write));
506     } else {
507         d->addr_read = s->addr_read;
508         d->addr_write = atomic_read(&s->addr_write);
509         d->addr_code = s->addr_code;
510         d->addend = atomic_read(&s->addend);
511     }
512 #endif
513 }
514 
515 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
516  * the target vCPU). As such care needs to be taken that we don't
517  * dangerously race with another vCPU update. The only thing actually
518  * updated is the target TLB entry ->addr_write flags.
519  */
520 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
521 {
522     CPUArchState *env;
523 
524     int mmu_idx;
525 
526     env = cpu->env_ptr;
527     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
528         unsigned int i;
529 
530         for (i = 0; i < CPU_TLB_SIZE; i++) {
531             tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
532                                   start1, length);
533         }
534 
535         for (i = 0; i < CPU_VTLB_SIZE; i++) {
536             tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i],
537                                   start1, length);
538         }
539     }
540 }
541 
542 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
543 {
544     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
545         tlb_entry->addr_write = vaddr;
546     }
547 }
548 
549 /* update the TLB corresponding to virtual page vaddr
550    so that it is no longer dirty */
551 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
552 {
553     CPUArchState *env = cpu->env_ptr;
554     int i;
555     int mmu_idx;
556 
557     assert_cpu_is_self(cpu);
558 
559     vaddr &= TARGET_PAGE_MASK;
560     i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
561     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
562         tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
563     }
564 
565     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
566         int k;
567         for (k = 0; k < CPU_VTLB_SIZE; k++) {
568             tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr);
569         }
570     }
571 }
572 
573 /* Our TLB does not support large pages, so remember the area covered by
574    large pages and trigger a full TLB flush if these are invalidated.  */
575 static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
576                                target_ulong size)
577 {
578     target_ulong mask = ~(size - 1);
579 
580     if (env->tlb_flush_addr == (target_ulong)-1) {
581         env->tlb_flush_addr = vaddr & mask;
582         env->tlb_flush_mask = mask;
583         return;
584     }
585     /* Extend the existing region to include the new page.
586        This is a compromise between unnecessary flushes and the cost
587        of maintaining a full variable size TLB.  */
588     mask &= env->tlb_flush_mask;
589     while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
590         mask <<= 1;
591     }
592     env->tlb_flush_addr &= mask;
593     env->tlb_flush_mask = mask;
594 }
595 
596 /* Add a new TLB entry. At most one entry for a given virtual address
597  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
598  * supplied size is only used by tlb_flush_page.
599  *
600  * Called from TCG-generated code, which is under an RCU read-side
601  * critical section.
602  */
603 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
604                              hwaddr paddr, MemTxAttrs attrs, int prot,
605                              int mmu_idx, target_ulong size)
606 {
607     CPUArchState *env = cpu->env_ptr;
608     MemoryRegionSection *section;
609     unsigned int index;
610     target_ulong address;
611     target_ulong code_address;
612     uintptr_t addend;
613     CPUTLBEntry *te, tn;
614     hwaddr iotlb, xlat, sz, paddr_page;
615     target_ulong vaddr_page;
616     int asidx = cpu_asidx_from_attrs(cpu, attrs);
617 
618     assert_cpu_is_self(cpu);
619 
620     if (size < TARGET_PAGE_SIZE) {
621         sz = TARGET_PAGE_SIZE;
622     } else {
623         if (size > TARGET_PAGE_SIZE) {
624             tlb_add_large_page(env, vaddr, size);
625         }
626         sz = size;
627     }
628     vaddr_page = vaddr & TARGET_PAGE_MASK;
629     paddr_page = paddr & TARGET_PAGE_MASK;
630 
631     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
632                                                 &xlat, &sz, attrs, &prot);
633     assert(sz >= TARGET_PAGE_SIZE);
634 
635     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
636               " prot=%x idx=%d\n",
637               vaddr, paddr, prot, mmu_idx);
638 
639     address = vaddr_page;
640     if (size < TARGET_PAGE_SIZE) {
641         /*
642          * Slow-path the TLB entries; we will repeat the MMU check and TLB
643          * fill on every access.
644          */
645         address |= TLB_RECHECK;
646     }
647     if (!memory_region_is_ram(section->mr) &&
648         !memory_region_is_romd(section->mr)) {
649         /* IO memory case */
650         address |= TLB_MMIO;
651         addend = 0;
652     } else {
653         /* TLB_MMIO for rom/romd handled below */
654         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
655     }
656 
657     /* Make sure there's no cached translation for the new page.  */
658     tlb_flush_vtlb_page(env, mmu_idx, vaddr_page);
659 
660     code_address = address;
661     iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
662                                             paddr_page, xlat, prot, &address);
663 
664     index = (vaddr_page >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
665     te = &env->tlb_table[mmu_idx][index];
666 
667     /*
668      * Only evict the old entry to the victim tlb if it's for a
669      * different page; otherwise just overwrite the stale data.
670      */
671     if (!tlb_hit_page_anyprot(te, vaddr_page)) {
672         unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;
673         CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx];
674 
675         /* Evict the old entry into the victim tlb.  */
676         copy_tlb_helper(tv, te, true);
677         env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
678     }
679 
680     /* refill the tlb */
681     /*
682      * At this point iotlb contains a physical section number in the lower
683      * TARGET_PAGE_BITS, and either
684      *  + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)
685      *  + the offset within section->mr of the page base (otherwise)
686      * We subtract the vaddr_page (which is page aligned and thus won't
687      * disturb the low bits) to give an offset which can be added to the
688      * (non-page-aligned) vaddr of the eventual memory access to get
689      * the MemoryRegion offset for the access. Note that the vaddr we
690      * subtract here is that of the page base, and not the same as the
691      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
692      */
693     env->iotlb[mmu_idx][index].addr = iotlb - vaddr_page;
694     env->iotlb[mmu_idx][index].attrs = attrs;
695 
696     /* Now calculate the new entry */
697     tn.addend = addend - vaddr_page;
698     if (prot & PAGE_READ) {
699         tn.addr_read = address;
700     } else {
701         tn.addr_read = -1;
702     }
703 
704     if (prot & PAGE_EXEC) {
705         tn.addr_code = code_address;
706     } else {
707         tn.addr_code = -1;
708     }
709 
710     tn.addr_write = -1;
711     if (prot & PAGE_WRITE) {
712         if ((memory_region_is_ram(section->mr) && section->readonly)
713             || memory_region_is_romd(section->mr)) {
714             /* Write access calls the I/O callback.  */
715             tn.addr_write = address | TLB_MMIO;
716         } else if (memory_region_is_ram(section->mr)
717                    && cpu_physical_memory_is_clean(
718                        memory_region_get_ram_addr(section->mr) + xlat)) {
719             tn.addr_write = address | TLB_NOTDIRTY;
720         } else {
721             tn.addr_write = address;
722         }
723         if (prot & PAGE_WRITE_INV) {
724             tn.addr_write |= TLB_INVALID_MASK;
725         }
726     }
727 
728     /* Pairs with flag setting in tlb_reset_dirty_range */
729     copy_tlb_helper(te, &tn, true);
730     /* atomic_mb_set(&te->addr_write, write_address); */
731 }
732 
733 /* Add a new TLB entry, but without specifying the memory
734  * transaction attributes to be used.
735  */
736 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
737                   hwaddr paddr, int prot,
738                   int mmu_idx, target_ulong size)
739 {
740     tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
741                             prot, mmu_idx, size);
742 }
743 
744 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
745 {
746     ram_addr_t ram_addr;
747 
748     ram_addr = qemu_ram_addr_from_host(ptr);
749     if (ram_addr == RAM_ADDR_INVALID) {
750         error_report("Bad ram pointer %p", ptr);
751         abort();
752     }
753     return ram_addr;
754 }
755 
756 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
757                          int mmu_idx,
758                          target_ulong addr, uintptr_t retaddr,
759                          bool recheck, MMUAccessType access_type, int size)
760 {
761     CPUState *cpu = ENV_GET_CPU(env);
762     hwaddr mr_offset;
763     MemoryRegionSection *section;
764     MemoryRegion *mr;
765     uint64_t val;
766     bool locked = false;
767     MemTxResult r;
768 
769     if (recheck) {
770         /*
771          * This is a TLB_RECHECK access, where the MMU protection
772          * covers a smaller range than a target page, and we must
773          * repeat the MMU check here. This tlb_fill() call might
774          * longjump out if this access should cause a guest exception.
775          */
776         int index;
777         target_ulong tlb_addr;
778 
779         tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
780 
781         index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
782         tlb_addr = env->tlb_table[mmu_idx][index].addr_read;
783         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
784             /* RAM access */
785             uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend;
786 
787             return ldn_p((void *)haddr, size);
788         }
789         /* Fall through for handling IO accesses */
790     }
791 
792     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
793     mr = section->mr;
794     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
795     cpu->mem_io_pc = retaddr;
796     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
797         cpu_io_recompile(cpu, retaddr);
798     }
799 
800     cpu->mem_io_vaddr = addr;
801     cpu->mem_io_access_type = access_type;
802 
803     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
804         qemu_mutex_lock_iothread();
805         locked = true;
806     }
807     r = memory_region_dispatch_read(mr, mr_offset,
808                                     &val, size, iotlbentry->attrs);
809     if (r != MEMTX_OK) {
810         hwaddr physaddr = mr_offset +
811             section->offset_within_address_space -
812             section->offset_within_region;
813 
814         cpu_transaction_failed(cpu, physaddr, addr, size, access_type,
815                                mmu_idx, iotlbentry->attrs, r, retaddr);
816     }
817     if (locked) {
818         qemu_mutex_unlock_iothread();
819     }
820 
821     return val;
822 }
823 
824 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
825                       int mmu_idx,
826                       uint64_t val, target_ulong addr,
827                       uintptr_t retaddr, bool recheck, int size)
828 {
829     CPUState *cpu = ENV_GET_CPU(env);
830     hwaddr mr_offset;
831     MemoryRegionSection *section;
832     MemoryRegion *mr;
833     bool locked = false;
834     MemTxResult r;
835 
836     if (recheck) {
837         /*
838          * This is a TLB_RECHECK access, where the MMU protection
839          * covers a smaller range than a target page, and we must
840          * repeat the MMU check here. This tlb_fill() call might
841          * longjump out if this access should cause a guest exception.
842          */
843         int index;
844         target_ulong tlb_addr;
845 
846         tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
847 
848         index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
849         tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
850         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
851             /* RAM access */
852             uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend;
853 
854             stn_p((void *)haddr, size, val);
855             return;
856         }
857         /* Fall through for handling IO accesses */
858     }
859 
860     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
861     mr = section->mr;
862     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
863     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
864         cpu_io_recompile(cpu, retaddr);
865     }
866     cpu->mem_io_vaddr = addr;
867     cpu->mem_io_pc = retaddr;
868 
869     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
870         qemu_mutex_lock_iothread();
871         locked = true;
872     }
873     r = memory_region_dispatch_write(mr, mr_offset,
874                                      val, size, iotlbentry->attrs);
875     if (r != MEMTX_OK) {
876         hwaddr physaddr = mr_offset +
877             section->offset_within_address_space -
878             section->offset_within_region;
879 
880         cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_STORE,
881                                mmu_idx, iotlbentry->attrs, r, retaddr);
882     }
883     if (locked) {
884         qemu_mutex_unlock_iothread();
885     }
886 }
887 
888 /* Return true if ADDR is present in the victim tlb, and has been copied
889    back to the main tlb.  */
890 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
891                            size_t elt_ofs, target_ulong page)
892 {
893     size_t vidx;
894     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
895         CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
896         target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
897 
898         if (cmp == page) {
899             /* Found entry in victim tlb, swap tlb and iotlb.  */
900             CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
901 
902             copy_tlb_helper(&tmptlb, tlb, false);
903             copy_tlb_helper(tlb, vtlb, true);
904             copy_tlb_helper(vtlb, &tmptlb, true);
905 
906             CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
907             CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
908             tmpio = *io; *io = *vio; *vio = tmpio;
909             return true;
910         }
911     }
912     return false;
913 }
914 
915 /* Macro to call the above, with local variables from the use context.  */
916 #define VICTIM_TLB_HIT(TY, ADDR) \
917   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
918                  (ADDR) & TARGET_PAGE_MASK)
919 
920 /* NOTE: this function can trigger an exception */
921 /* NOTE2: the returned address is not exactly the physical address: it
922  * is actually a ram_addr_t (in system mode; the user mode emulation
923  * version of this function returns a guest virtual address).
924  */
925 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
926 {
927     int mmu_idx, index;
928     void *p;
929 
930     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
931     mmu_idx = cpu_mmu_index(env, true);
932     if (unlikely(!tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr))) {
933         if (!VICTIM_TLB_HIT(addr_code, addr)) {
934             tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
935         }
936         assert(tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr));
937     }
938 
939     if (unlikely(env->tlb_table[mmu_idx][index].addr_code &
940                  (TLB_RECHECK | TLB_MMIO))) {
941         /*
942          * Return -1 if we can't translate and execute from an entire
943          * page of RAM here, which will cause us to execute by loading
944          * and translating one insn at a time, without caching:
945          *  - TLB_RECHECK: means the MMU protection covers a smaller range
946          *    than a target page, so we must redo the MMU check every insn
947          *  - TLB_MMIO: region is not backed by RAM
948          */
949         return -1;
950     }
951 
952     p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend);
953     return qemu_ram_addr_from_host_nofail(p);
954 }
955 
956 /* Probe for whether the specified guest write access is permitted.
957  * If it is not permitted then an exception will be taken in the same
958  * way as if this were a real write access (and we will not return).
959  * Otherwise the function will return, and there will be a valid
960  * entry in the TLB for this access.
961  */
962 void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
963                  uintptr_t retaddr)
964 {
965     int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
966     target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
967 
968     if (!tlb_hit(tlb_addr, addr)) {
969         /* TLB entry is for a different page */
970         if (!VICTIM_TLB_HIT(addr_write, addr)) {
971             tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
972                      mmu_idx, retaddr);
973         }
974     }
975 }
976 
977 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
978  * operations, or io operations to proceed.  Return the host address.  */
979 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
980                                TCGMemOpIdx oi, uintptr_t retaddr,
981                                NotDirtyInfo *ndi)
982 {
983     size_t mmu_idx = get_mmuidx(oi);
984     size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
985     CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index];
986     target_ulong tlb_addr = tlbe->addr_write;
987     TCGMemOp mop = get_memop(oi);
988     int a_bits = get_alignment_bits(mop);
989     int s_bits = mop & MO_SIZE;
990     void *hostaddr;
991 
992     /* Adjust the given return address.  */
993     retaddr -= GETPC_ADJ;
994 
995     /* Enforce guest required alignment.  */
996     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
997         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
998         cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
999                              mmu_idx, retaddr);
1000     }
1001 
1002     /* Enforce qemu required alignment.  */
1003     if (unlikely(addr & ((1 << s_bits) - 1))) {
1004         /* We get here if guest alignment was not requested,
1005            or was not enforced by cpu_unaligned_access above.
1006            We might widen the access and emulate, but for now
1007            mark an exception and exit the cpu loop.  */
1008         goto stop_the_world;
1009     }
1010 
1011     /* Check TLB entry and enforce page permissions.  */
1012     if (!tlb_hit(tlb_addr, addr)) {
1013         if (!VICTIM_TLB_HIT(addr_write, addr)) {
1014             tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE,
1015                      mmu_idx, retaddr);
1016         }
1017         tlb_addr = tlbe->addr_write & ~TLB_INVALID_MASK;
1018     }
1019 
1020     /* Notice an IO access or a needs-MMU-lookup access */
1021     if (unlikely(tlb_addr & (TLB_MMIO | TLB_RECHECK))) {
1022         /* There's really nothing that can be done to
1023            support this apart from stop-the-world.  */
1024         goto stop_the_world;
1025     }
1026 
1027     /* Let the guest notice RMW on a write-only page.  */
1028     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1029         tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1030                  mmu_idx, retaddr);
1031         /* Since we don't support reads and writes to different addresses,
1032            and we do have the proper page loaded for write, this shouldn't
1033            ever return.  But just in case, handle via stop-the-world.  */
1034         goto stop_the_world;
1035     }
1036 
1037     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1038 
1039     ndi->active = false;
1040     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1041         ndi->active = true;
1042         memory_notdirty_write_prepare(ndi, ENV_GET_CPU(env), addr,
1043                                       qemu_ram_addr_from_host_nofail(hostaddr),
1044                                       1 << s_bits);
1045     }
1046 
1047     return hostaddr;
1048 
1049  stop_the_world:
1050     cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
1051 }
1052 
1053 #ifdef TARGET_WORDS_BIGENDIAN
1054 # define TGT_BE(X)  (X)
1055 # define TGT_LE(X)  BSWAP(X)
1056 #else
1057 # define TGT_BE(X)  BSWAP(X)
1058 # define TGT_LE(X)  (X)
1059 #endif
1060 
1061 #define MMUSUFFIX _mmu
1062 
1063 #define DATA_SIZE 1
1064 #include "softmmu_template.h"
1065 
1066 #define DATA_SIZE 2
1067 #include "softmmu_template.h"
1068 
1069 #define DATA_SIZE 4
1070 #include "softmmu_template.h"
1071 
1072 #define DATA_SIZE 8
1073 #include "softmmu_template.h"
1074 
1075 /* First set of helpers allows passing in of OI and RETADDR.  This makes
1076    them callable from other helpers.  */
1077 
1078 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
1079 #define ATOMIC_NAME(X) \
1080     HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
1081 #define ATOMIC_MMU_DECLS NotDirtyInfo ndi
1082 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi)
1083 #define ATOMIC_MMU_CLEANUP                              \
1084     do {                                                \
1085         if (unlikely(ndi.active)) {                     \
1086             memory_notdirty_write_complete(&ndi);       \
1087         }                                               \
1088     } while (0)
1089 
1090 #define DATA_SIZE 1
1091 #include "atomic_template.h"
1092 
1093 #define DATA_SIZE 2
1094 #include "atomic_template.h"
1095 
1096 #define DATA_SIZE 4
1097 #include "atomic_template.h"
1098 
1099 #ifdef CONFIG_ATOMIC64
1100 #define DATA_SIZE 8
1101 #include "atomic_template.h"
1102 #endif
1103 
1104 #ifdef CONFIG_ATOMIC128
1105 #define DATA_SIZE 16
1106 #include "atomic_template.h"
1107 #endif
1108 
1109 /* Second set of helpers are directly callable from TCG as helpers.  */
1110 
1111 #undef EXTRA_ARGS
1112 #undef ATOMIC_NAME
1113 #undef ATOMIC_MMU_LOOKUP
1114 #define EXTRA_ARGS         , TCGMemOpIdx oi
1115 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
1116 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi)
1117 
1118 #define DATA_SIZE 1
1119 #include "atomic_template.h"
1120 
1121 #define DATA_SIZE 2
1122 #include "atomic_template.h"
1123 
1124 #define DATA_SIZE 4
1125 #include "atomic_template.h"
1126 
1127 #ifdef CONFIG_ATOMIC64
1128 #define DATA_SIZE 8
1129 #include "atomic_template.h"
1130 #endif
1131 
1132 /* Code access functions.  */
1133 
1134 #undef MMUSUFFIX
1135 #define MMUSUFFIX _cmmu
1136 #undef GETPC
1137 #define GETPC() ((uintptr_t)0)
1138 #define SOFTMMU_CODE_ACCESS
1139 
1140 #define DATA_SIZE 1
1141 #include "softmmu_template.h"
1142 
1143 #define DATA_SIZE 2
1144 #include "softmmu_template.h"
1145 
1146 #define DATA_SIZE 4
1147 #include "softmmu_template.h"
1148 
1149 #define DATA_SIZE 8
1150 #include "softmmu_template.h"
1151