xref: /openbmc/qemu/accel/tcg/cputlb.c (revision d5363e58)
1 /*
2  *  Common CPU TLB handling
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 
37 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
38 /* #define DEBUG_TLB */
39 /* #define DEBUG_TLB_LOG */
40 
41 #ifdef DEBUG_TLB
42 # define DEBUG_TLB_GATE 1
43 # ifdef DEBUG_TLB_LOG
44 #  define DEBUG_TLB_LOG_GATE 1
45 # else
46 #  define DEBUG_TLB_LOG_GATE 0
47 # endif
48 #else
49 # define DEBUG_TLB_GATE 0
50 # define DEBUG_TLB_LOG_GATE 0
51 #endif
52 
53 #define tlb_debug(fmt, ...) do { \
54     if (DEBUG_TLB_LOG_GATE) { \
55         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
56                       ## __VA_ARGS__); \
57     } else if (DEBUG_TLB_GATE) { \
58         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
59     } \
60 } while (0)
61 
62 #define assert_cpu_is_self(cpu) do {                              \
63         if (DEBUG_TLB_GATE) {                                     \
64             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
65         }                                                         \
66     } while (0)
67 
68 /* run_on_cpu_data.target_ptr should always be big enough for a
69  * target_ulong even on 32 bit builds */
70 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
71 
72 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
73  */
74 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
75 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
76 
77 void tlb_init(CPUState *cpu)
78 {
79     CPUArchState *env = cpu->env_ptr;
80 
81     qemu_spin_init(&env->tlb_c.lock);
82 }
83 
84 /* flush_all_helper: run fn across all cpus
85  *
86  * If the wait flag is set then the src cpu's helper will be queued as
87  * "safe" work and the loop exited creating a synchronisation point
88  * where all queued work will be finished before execution starts
89  * again.
90  */
91 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
92                              run_on_cpu_data d)
93 {
94     CPUState *cpu;
95 
96     CPU_FOREACH(cpu) {
97         if (cpu != src) {
98             async_run_on_cpu(cpu, fn, d);
99         }
100     }
101 }
102 
103 size_t tlb_flush_count(void)
104 {
105     CPUState *cpu;
106     size_t count = 0;
107 
108     CPU_FOREACH(cpu) {
109         CPUArchState *env = cpu->env_ptr;
110 
111         count += atomic_read(&env->tlb_flush_count);
112     }
113     return count;
114 }
115 
116 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
117 {
118     memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
119     memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
120     env->tlb_d[mmu_idx].large_page_addr = -1;
121     env->tlb_d[mmu_idx].large_page_mask = -1;
122     env->tlb_d[mmu_idx].vindex = 0;
123 }
124 
125 /* This is OK because CPU architectures generally permit an
126  * implementation to drop entries from the TLB at any time, so
127  * flushing more entries than required is only an efficiency issue,
128  * not a correctness issue.
129  */
130 static void tlb_flush_nocheck(CPUState *cpu)
131 {
132     CPUArchState *env = cpu->env_ptr;
133     int mmu_idx;
134 
135     assert_cpu_is_self(cpu);
136     atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1);
137     tlb_debug("(count: %zu)\n", tlb_flush_count());
138 
139     /*
140      * tlb_table/tlb_v_table updates from any thread must hold tlb_c.lock.
141      * However, updates from the owner thread (as is the case here; see the
142      * above assert_cpu_is_self) do not need atomic_set because all reads
143      * that do not hold the lock are performed by the same owner thread.
144      */
145     qemu_spin_lock(&env->tlb_c.lock);
146     env->tlb_c.pending_flush = 0;
147     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
148         tlb_flush_one_mmuidx_locked(env, mmu_idx);
149     }
150     qemu_spin_unlock(&env->tlb_c.lock);
151 
152     cpu_tb_jmp_cache_clear(cpu);
153 }
154 
155 static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
156 {
157     tlb_flush_nocheck(cpu);
158 }
159 
160 void tlb_flush(CPUState *cpu)
161 {
162     if (cpu->created && !qemu_cpu_is_self(cpu)) {
163         CPUArchState *env = cpu->env_ptr;
164         uint16_t pending;
165 
166         qemu_spin_lock(&env->tlb_c.lock);
167         pending = env->tlb_c.pending_flush;
168         env->tlb_c.pending_flush = ALL_MMUIDX_BITS;
169         qemu_spin_unlock(&env->tlb_c.lock);
170 
171         if (pending != ALL_MMUIDX_BITS) {
172             async_run_on_cpu(cpu, tlb_flush_global_async_work,
173                              RUN_ON_CPU_NULL);
174         }
175     } else {
176         tlb_flush_nocheck(cpu);
177     }
178 }
179 
180 void tlb_flush_all_cpus(CPUState *src_cpu)
181 {
182     const run_on_cpu_func fn = tlb_flush_global_async_work;
183     flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
184     fn(src_cpu, RUN_ON_CPU_NULL);
185 }
186 
187 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
188 {
189     const run_on_cpu_func fn = tlb_flush_global_async_work;
190     flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
191     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_NULL);
192 }
193 
194 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
195 {
196     CPUArchState *env = cpu->env_ptr;
197     unsigned long mmu_idx_bitmask = data.host_int;
198     int mmu_idx;
199 
200     assert_cpu_is_self(cpu);
201 
202     tlb_debug("mmu_idx:0x%04lx\n", mmu_idx_bitmask);
203 
204     qemu_spin_lock(&env->tlb_c.lock);
205     env->tlb_c.pending_flush &= ~mmu_idx_bitmask;
206 
207     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
208         if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
209             tlb_flush_one_mmuidx_locked(env, mmu_idx);
210         }
211     }
212     qemu_spin_unlock(&env->tlb_c.lock);
213 
214     cpu_tb_jmp_cache_clear(cpu);
215 }
216 
217 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
218 {
219     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
220 
221     if (!qemu_cpu_is_self(cpu)) {
222         CPUArchState *env = cpu->env_ptr;
223         uint16_t pending, to_clean;
224 
225         qemu_spin_lock(&env->tlb_c.lock);
226         pending = env->tlb_c.pending_flush;
227         to_clean = idxmap & ~pending;
228         env->tlb_c.pending_flush = pending | idxmap;
229         qemu_spin_unlock(&env->tlb_c.lock);
230 
231         if (to_clean) {
232             tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", to_clean);
233             async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
234                              RUN_ON_CPU_HOST_INT(to_clean));
235         }
236     } else {
237         tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
238     }
239 }
240 
241 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
242 {
243     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
244 
245     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
246 
247     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
248     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
249 }
250 
251 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
252                                                        uint16_t idxmap)
253 {
254     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
255 
256     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
257 
258     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
259     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
260 }
261 
262 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
263                                         target_ulong page)
264 {
265     return tlb_hit_page(tlb_entry->addr_read, page) ||
266            tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
267            tlb_hit_page(tlb_entry->addr_code, page);
268 }
269 
270 /* Called with tlb_c.lock held */
271 static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
272                                           target_ulong page)
273 {
274     if (tlb_hit_page_anyprot(tlb_entry, page)) {
275         memset(tlb_entry, -1, sizeof(*tlb_entry));
276     }
277 }
278 
279 /* Called with tlb_c.lock held */
280 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
281                                               target_ulong page)
282 {
283     int k;
284 
285     assert_cpu_is_self(ENV_GET_CPU(env));
286     for (k = 0; k < CPU_VTLB_SIZE; k++) {
287         tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page);
288     }
289 }
290 
291 static void tlb_flush_page_locked(CPUArchState *env, int midx,
292                                   target_ulong page)
293 {
294     target_ulong lp_addr = env->tlb_d[midx].large_page_addr;
295     target_ulong lp_mask = env->tlb_d[midx].large_page_mask;
296 
297     /* Check if we need to flush due to large pages.  */
298     if ((page & lp_mask) == lp_addr) {
299         tlb_debug("forcing full flush midx %d ("
300                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
301                   midx, lp_addr, lp_mask);
302         tlb_flush_one_mmuidx_locked(env, midx);
303     } else {
304         tlb_flush_entry_locked(tlb_entry(env, midx, page), page);
305         tlb_flush_vtlb_page_locked(env, midx, page);
306     }
307 }
308 
309 static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
310 {
311     CPUArchState *env = cpu->env_ptr;
312     target_ulong addr = (target_ulong) data.target_ptr;
313     int mmu_idx;
314 
315     assert_cpu_is_self(cpu);
316 
317     tlb_debug("page addr:" TARGET_FMT_lx "\n", addr);
318 
319     addr &= TARGET_PAGE_MASK;
320     qemu_spin_lock(&env->tlb_c.lock);
321     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
322         tlb_flush_page_locked(env, mmu_idx, addr);
323     }
324     qemu_spin_unlock(&env->tlb_c.lock);
325 
326     tb_flush_jmp_cache(cpu, addr);
327 }
328 
329 void tlb_flush_page(CPUState *cpu, target_ulong addr)
330 {
331     tlb_debug("page :" TARGET_FMT_lx "\n", addr);
332 
333     if (!qemu_cpu_is_self(cpu)) {
334         async_run_on_cpu(cpu, tlb_flush_page_async_work,
335                          RUN_ON_CPU_TARGET_PTR(addr));
336     } else {
337         tlb_flush_page_async_work(cpu, RUN_ON_CPU_TARGET_PTR(addr));
338     }
339 }
340 
341 /* As we are going to hijack the bottom bits of the page address for a
342  * mmuidx bit mask we need to fail to build if we can't do that
343  */
344 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
345 
346 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
347                                                 run_on_cpu_data data)
348 {
349     CPUArchState *env = cpu->env_ptr;
350     target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
351     target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
352     unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
353     int mmu_idx;
354 
355     assert_cpu_is_self(cpu);
356 
357     tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
358               addr, mmu_idx_bitmap);
359 
360     qemu_spin_lock(&env->tlb_c.lock);
361     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
362         if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
363             tlb_flush_page_locked(env, mmu_idx, addr);
364         }
365     }
366     qemu_spin_unlock(&env->tlb_c.lock);
367 
368     tb_flush_jmp_cache(cpu, addr);
369 }
370 
371 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
372 {
373     target_ulong addr_and_mmu_idx;
374 
375     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
376 
377     /* This should already be page aligned */
378     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
379     addr_and_mmu_idx |= idxmap;
380 
381     if (!qemu_cpu_is_self(cpu)) {
382         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
383                          RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
384     } else {
385         tlb_flush_page_by_mmuidx_async_work(
386             cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
387     }
388 }
389 
390 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
391                                        uint16_t idxmap)
392 {
393     const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
394     target_ulong addr_and_mmu_idx;
395 
396     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
397 
398     /* This should already be page aligned */
399     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
400     addr_and_mmu_idx |= idxmap;
401 
402     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
403     fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
404 }
405 
406 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
407                                               target_ulong addr,
408                                               uint16_t idxmap)
409 {
410     const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
411     target_ulong addr_and_mmu_idx;
412 
413     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
414 
415     /* This should already be page aligned */
416     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
417     addr_and_mmu_idx |= idxmap;
418 
419     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
420     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
421 }
422 
423 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
424 {
425     const run_on_cpu_func fn = tlb_flush_page_async_work;
426 
427     flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
428     fn(src, RUN_ON_CPU_TARGET_PTR(addr));
429 }
430 
431 void tlb_flush_page_all_cpus_synced(CPUState *src,
432                                                   target_ulong addr)
433 {
434     const run_on_cpu_func fn = tlb_flush_page_async_work;
435 
436     flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
437     async_safe_run_on_cpu(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
438 }
439 
440 /* update the TLBs so that writes to code in the virtual page 'addr'
441    can be detected */
442 void tlb_protect_code(ram_addr_t ram_addr)
443 {
444     cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
445                                              DIRTY_MEMORY_CODE);
446 }
447 
448 /* update the TLB so that writes in physical page 'phys_addr' are no longer
449    tested for self modifying code */
450 void tlb_unprotect_code(ram_addr_t ram_addr)
451 {
452     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
453 }
454 
455 
456 /*
457  * Dirty write flag handling
458  *
459  * When the TCG code writes to a location it looks up the address in
460  * the TLB and uses that data to compute the final address. If any of
461  * the lower bits of the address are set then the slow path is forced.
462  * There are a number of reasons to do this but for normal RAM the
463  * most usual is detecting writes to code regions which may invalidate
464  * generated code.
465  *
466  * Other vCPUs might be reading their TLBs during guest execution, so we update
467  * te->addr_write with atomic_set. We don't need to worry about this for
468  * oversized guests as MTTCG is disabled for them.
469  *
470  * Called with tlb_c.lock held.
471  */
472 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
473                                          uintptr_t start, uintptr_t length)
474 {
475     uintptr_t addr = tlb_entry->addr_write;
476 
477     if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
478         addr &= TARGET_PAGE_MASK;
479         addr += tlb_entry->addend;
480         if ((addr - start) < length) {
481 #if TCG_OVERSIZED_GUEST
482             tlb_entry->addr_write |= TLB_NOTDIRTY;
483 #else
484             atomic_set(&tlb_entry->addr_write,
485                        tlb_entry->addr_write | TLB_NOTDIRTY);
486 #endif
487         }
488     }
489 }
490 
491 /*
492  * Called with tlb_c.lock held.
493  * Called only from the vCPU context, i.e. the TLB's owner thread.
494  */
495 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
496 {
497     *d = *s;
498 }
499 
500 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
501  * the target vCPU).
502  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
503  * thing actually updated is the target TLB entry ->addr_write flags.
504  */
505 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
506 {
507     CPUArchState *env;
508 
509     int mmu_idx;
510 
511     env = cpu->env_ptr;
512     qemu_spin_lock(&env->tlb_c.lock);
513     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
514         unsigned int i;
515 
516         for (i = 0; i < CPU_TLB_SIZE; i++) {
517             tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1,
518                                          length);
519         }
520 
521         for (i = 0; i < CPU_VTLB_SIZE; i++) {
522             tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1,
523                                          length);
524         }
525     }
526     qemu_spin_unlock(&env->tlb_c.lock);
527 }
528 
529 /* Called with tlb_c.lock held */
530 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
531                                          target_ulong vaddr)
532 {
533     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
534         tlb_entry->addr_write = vaddr;
535     }
536 }
537 
538 /* update the TLB corresponding to virtual page vaddr
539    so that it is no longer dirty */
540 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
541 {
542     CPUArchState *env = cpu->env_ptr;
543     int mmu_idx;
544 
545     assert_cpu_is_self(cpu);
546 
547     vaddr &= TARGET_PAGE_MASK;
548     qemu_spin_lock(&env->tlb_c.lock);
549     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
550         tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
551     }
552 
553     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
554         int k;
555         for (k = 0; k < CPU_VTLB_SIZE; k++) {
556             tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr);
557         }
558     }
559     qemu_spin_unlock(&env->tlb_c.lock);
560 }
561 
562 /* Our TLB does not support large pages, so remember the area covered by
563    large pages and trigger a full TLB flush if these are invalidated.  */
564 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
565                                target_ulong vaddr, target_ulong size)
566 {
567     target_ulong lp_addr = env->tlb_d[mmu_idx].large_page_addr;
568     target_ulong lp_mask = ~(size - 1);
569 
570     if (lp_addr == (target_ulong)-1) {
571         /* No previous large page.  */
572         lp_addr = vaddr;
573     } else {
574         /* Extend the existing region to include the new page.
575            This is a compromise between unnecessary flushes and
576            the cost of maintaining a full variable size TLB.  */
577         lp_mask &= env->tlb_d[mmu_idx].large_page_mask;
578         while (((lp_addr ^ vaddr) & lp_mask) != 0) {
579             lp_mask <<= 1;
580         }
581     }
582     env->tlb_d[mmu_idx].large_page_addr = lp_addr & lp_mask;
583     env->tlb_d[mmu_idx].large_page_mask = lp_mask;
584 }
585 
586 /* Add a new TLB entry. At most one entry for a given virtual address
587  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
588  * supplied size is only used by tlb_flush_page.
589  *
590  * Called from TCG-generated code, which is under an RCU read-side
591  * critical section.
592  */
593 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
594                              hwaddr paddr, MemTxAttrs attrs, int prot,
595                              int mmu_idx, target_ulong size)
596 {
597     CPUArchState *env = cpu->env_ptr;
598     MemoryRegionSection *section;
599     unsigned int index;
600     target_ulong address;
601     target_ulong code_address;
602     uintptr_t addend;
603     CPUTLBEntry *te, tn;
604     hwaddr iotlb, xlat, sz, paddr_page;
605     target_ulong vaddr_page;
606     int asidx = cpu_asidx_from_attrs(cpu, attrs);
607 
608     assert_cpu_is_self(cpu);
609 
610     if (size <= TARGET_PAGE_SIZE) {
611         sz = TARGET_PAGE_SIZE;
612     } else {
613         tlb_add_large_page(env, mmu_idx, vaddr, size);
614         sz = size;
615     }
616     vaddr_page = vaddr & TARGET_PAGE_MASK;
617     paddr_page = paddr & TARGET_PAGE_MASK;
618 
619     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
620                                                 &xlat, &sz, attrs, &prot);
621     assert(sz >= TARGET_PAGE_SIZE);
622 
623     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
624               " prot=%x idx=%d\n",
625               vaddr, paddr, prot, mmu_idx);
626 
627     address = vaddr_page;
628     if (size < TARGET_PAGE_SIZE) {
629         /*
630          * Slow-path the TLB entries; we will repeat the MMU check and TLB
631          * fill on every access.
632          */
633         address |= TLB_RECHECK;
634     }
635     if (!memory_region_is_ram(section->mr) &&
636         !memory_region_is_romd(section->mr)) {
637         /* IO memory case */
638         address |= TLB_MMIO;
639         addend = 0;
640     } else {
641         /* TLB_MMIO for rom/romd handled below */
642         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
643     }
644 
645     code_address = address;
646     iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
647                                             paddr_page, xlat, prot, &address);
648 
649     index = tlb_index(env, mmu_idx, vaddr_page);
650     te = tlb_entry(env, mmu_idx, vaddr_page);
651 
652     /*
653      * Hold the TLB lock for the rest of the function. We could acquire/release
654      * the lock several times in the function, but it is faster to amortize the
655      * acquisition cost by acquiring it just once. Note that this leads to
656      * a longer critical section, but this is not a concern since the TLB lock
657      * is unlikely to be contended.
658      */
659     qemu_spin_lock(&env->tlb_c.lock);
660 
661     /* Make sure there's no cached translation for the new page.  */
662     tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
663 
664     /*
665      * Only evict the old entry to the victim tlb if it's for a
666      * different page; otherwise just overwrite the stale data.
667      */
668     if (!tlb_hit_page_anyprot(te, vaddr_page)) {
669         unsigned vidx = env->tlb_d[mmu_idx].vindex++ % CPU_VTLB_SIZE;
670         CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx];
671 
672         /* Evict the old entry into the victim tlb.  */
673         copy_tlb_helper_locked(tv, te);
674         env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
675     }
676 
677     /* refill the tlb */
678     /*
679      * At this point iotlb contains a physical section number in the lower
680      * TARGET_PAGE_BITS, and either
681      *  + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)
682      *  + the offset within section->mr of the page base (otherwise)
683      * We subtract the vaddr_page (which is page aligned and thus won't
684      * disturb the low bits) to give an offset which can be added to the
685      * (non-page-aligned) vaddr of the eventual memory access to get
686      * the MemoryRegion offset for the access. Note that the vaddr we
687      * subtract here is that of the page base, and not the same as the
688      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
689      */
690     env->iotlb[mmu_idx][index].addr = iotlb - vaddr_page;
691     env->iotlb[mmu_idx][index].attrs = attrs;
692 
693     /* Now calculate the new entry */
694     tn.addend = addend - vaddr_page;
695     if (prot & PAGE_READ) {
696         tn.addr_read = address;
697     } else {
698         tn.addr_read = -1;
699     }
700 
701     if (prot & PAGE_EXEC) {
702         tn.addr_code = code_address;
703     } else {
704         tn.addr_code = -1;
705     }
706 
707     tn.addr_write = -1;
708     if (prot & PAGE_WRITE) {
709         if ((memory_region_is_ram(section->mr) && section->readonly)
710             || memory_region_is_romd(section->mr)) {
711             /* Write access calls the I/O callback.  */
712             tn.addr_write = address | TLB_MMIO;
713         } else if (memory_region_is_ram(section->mr)
714                    && cpu_physical_memory_is_clean(
715                        memory_region_get_ram_addr(section->mr) + xlat)) {
716             tn.addr_write = address | TLB_NOTDIRTY;
717         } else {
718             tn.addr_write = address;
719         }
720         if (prot & PAGE_WRITE_INV) {
721             tn.addr_write |= TLB_INVALID_MASK;
722         }
723     }
724 
725     copy_tlb_helper_locked(te, &tn);
726     qemu_spin_unlock(&env->tlb_c.lock);
727 }
728 
729 /* Add a new TLB entry, but without specifying the memory
730  * transaction attributes to be used.
731  */
732 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
733                   hwaddr paddr, int prot,
734                   int mmu_idx, target_ulong size)
735 {
736     tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
737                             prot, mmu_idx, size);
738 }
739 
740 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
741 {
742     ram_addr_t ram_addr;
743 
744     ram_addr = qemu_ram_addr_from_host(ptr);
745     if (ram_addr == RAM_ADDR_INVALID) {
746         error_report("Bad ram pointer %p", ptr);
747         abort();
748     }
749     return ram_addr;
750 }
751 
752 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
753                          int mmu_idx,
754                          target_ulong addr, uintptr_t retaddr,
755                          bool recheck, MMUAccessType access_type, int size)
756 {
757     CPUState *cpu = ENV_GET_CPU(env);
758     hwaddr mr_offset;
759     MemoryRegionSection *section;
760     MemoryRegion *mr;
761     uint64_t val;
762     bool locked = false;
763     MemTxResult r;
764 
765     if (recheck) {
766         /*
767          * This is a TLB_RECHECK access, where the MMU protection
768          * covers a smaller range than a target page, and we must
769          * repeat the MMU check here. This tlb_fill() call might
770          * longjump out if this access should cause a guest exception.
771          */
772         CPUTLBEntry *entry;
773         target_ulong tlb_addr;
774 
775         tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
776 
777         entry = tlb_entry(env, mmu_idx, addr);
778         tlb_addr = entry->addr_read;
779         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
780             /* RAM access */
781             uintptr_t haddr = addr + entry->addend;
782 
783             return ldn_p((void *)haddr, size);
784         }
785         /* Fall through for handling IO accesses */
786     }
787 
788     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
789     mr = section->mr;
790     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
791     cpu->mem_io_pc = retaddr;
792     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
793         cpu_io_recompile(cpu, retaddr);
794     }
795 
796     cpu->mem_io_vaddr = addr;
797     cpu->mem_io_access_type = access_type;
798 
799     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
800         qemu_mutex_lock_iothread();
801         locked = true;
802     }
803     r = memory_region_dispatch_read(mr, mr_offset,
804                                     &val, size, iotlbentry->attrs);
805     if (r != MEMTX_OK) {
806         hwaddr physaddr = mr_offset +
807             section->offset_within_address_space -
808             section->offset_within_region;
809 
810         cpu_transaction_failed(cpu, physaddr, addr, size, access_type,
811                                mmu_idx, iotlbentry->attrs, r, retaddr);
812     }
813     if (locked) {
814         qemu_mutex_unlock_iothread();
815     }
816 
817     return val;
818 }
819 
820 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
821                       int mmu_idx,
822                       uint64_t val, target_ulong addr,
823                       uintptr_t retaddr, bool recheck, int size)
824 {
825     CPUState *cpu = ENV_GET_CPU(env);
826     hwaddr mr_offset;
827     MemoryRegionSection *section;
828     MemoryRegion *mr;
829     bool locked = false;
830     MemTxResult r;
831 
832     if (recheck) {
833         /*
834          * This is a TLB_RECHECK access, where the MMU protection
835          * covers a smaller range than a target page, and we must
836          * repeat the MMU check here. This tlb_fill() call might
837          * longjump out if this access should cause a guest exception.
838          */
839         CPUTLBEntry *entry;
840         target_ulong tlb_addr;
841 
842         tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
843 
844         entry = tlb_entry(env, mmu_idx, addr);
845         tlb_addr = tlb_addr_write(entry);
846         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
847             /* RAM access */
848             uintptr_t haddr = addr + entry->addend;
849 
850             stn_p((void *)haddr, size, val);
851             return;
852         }
853         /* Fall through for handling IO accesses */
854     }
855 
856     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
857     mr = section->mr;
858     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
859     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
860         cpu_io_recompile(cpu, retaddr);
861     }
862     cpu->mem_io_vaddr = addr;
863     cpu->mem_io_pc = retaddr;
864 
865     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
866         qemu_mutex_lock_iothread();
867         locked = true;
868     }
869     r = memory_region_dispatch_write(mr, mr_offset,
870                                      val, size, iotlbentry->attrs);
871     if (r != MEMTX_OK) {
872         hwaddr physaddr = mr_offset +
873             section->offset_within_address_space -
874             section->offset_within_region;
875 
876         cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_STORE,
877                                mmu_idx, iotlbentry->attrs, r, retaddr);
878     }
879     if (locked) {
880         qemu_mutex_unlock_iothread();
881     }
882 }
883 
884 /* Return true if ADDR is present in the victim tlb, and has been copied
885    back to the main tlb.  */
886 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
887                            size_t elt_ofs, target_ulong page)
888 {
889     size_t vidx;
890 
891     assert_cpu_is_self(ENV_GET_CPU(env));
892     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
893         CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
894         target_ulong cmp;
895 
896         /* elt_ofs might correspond to .addr_write, so use atomic_read */
897 #if TCG_OVERSIZED_GUEST
898         cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
899 #else
900         cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
901 #endif
902 
903         if (cmp == page) {
904             /* Found entry in victim tlb, swap tlb and iotlb.  */
905             CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
906 
907             qemu_spin_lock(&env->tlb_c.lock);
908             copy_tlb_helper_locked(&tmptlb, tlb);
909             copy_tlb_helper_locked(tlb, vtlb);
910             copy_tlb_helper_locked(vtlb, &tmptlb);
911             qemu_spin_unlock(&env->tlb_c.lock);
912 
913             CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
914             CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
915             tmpio = *io; *io = *vio; *vio = tmpio;
916             return true;
917         }
918     }
919     return false;
920 }
921 
922 /* Macro to call the above, with local variables from the use context.  */
923 #define VICTIM_TLB_HIT(TY, ADDR) \
924   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
925                  (ADDR) & TARGET_PAGE_MASK)
926 
927 /* NOTE: this function can trigger an exception */
928 /* NOTE2: the returned address is not exactly the physical address: it
929  * is actually a ram_addr_t (in system mode; the user mode emulation
930  * version of this function returns a guest virtual address).
931  */
932 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
933 {
934     uintptr_t mmu_idx = cpu_mmu_index(env, true);
935     uintptr_t index = tlb_index(env, mmu_idx, addr);
936     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
937     void *p;
938 
939     if (unlikely(!tlb_hit(entry->addr_code, addr))) {
940         if (!VICTIM_TLB_HIT(addr_code, addr)) {
941             tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
942         }
943         assert(tlb_hit(entry->addr_code, addr));
944     }
945 
946     if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) {
947         /*
948          * Return -1 if we can't translate and execute from an entire
949          * page of RAM here, which will cause us to execute by loading
950          * and translating one insn at a time, without caching:
951          *  - TLB_RECHECK: means the MMU protection covers a smaller range
952          *    than a target page, so we must redo the MMU check every insn
953          *  - TLB_MMIO: region is not backed by RAM
954          */
955         return -1;
956     }
957 
958     p = (void *)((uintptr_t)addr + entry->addend);
959     return qemu_ram_addr_from_host_nofail(p);
960 }
961 
962 /* Probe for whether the specified guest write access is permitted.
963  * If it is not permitted then an exception will be taken in the same
964  * way as if this were a real write access (and we will not return).
965  * Otherwise the function will return, and there will be a valid
966  * entry in the TLB for this access.
967  */
968 void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
969                  uintptr_t retaddr)
970 {
971     uintptr_t index = tlb_index(env, mmu_idx, addr);
972     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
973 
974     if (!tlb_hit(tlb_addr_write(entry), addr)) {
975         /* TLB entry is for a different page */
976         if (!VICTIM_TLB_HIT(addr_write, addr)) {
977             tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
978                      mmu_idx, retaddr);
979         }
980     }
981 }
982 
983 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
984  * operations, or io operations to proceed.  Return the host address.  */
985 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
986                                TCGMemOpIdx oi, uintptr_t retaddr,
987                                NotDirtyInfo *ndi)
988 {
989     size_t mmu_idx = get_mmuidx(oi);
990     uintptr_t index = tlb_index(env, mmu_idx, addr);
991     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
992     target_ulong tlb_addr = tlb_addr_write(tlbe);
993     TCGMemOp mop = get_memop(oi);
994     int a_bits = get_alignment_bits(mop);
995     int s_bits = mop & MO_SIZE;
996     void *hostaddr;
997 
998     /* Adjust the given return address.  */
999     retaddr -= GETPC_ADJ;
1000 
1001     /* Enforce guest required alignment.  */
1002     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1003         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1004         cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
1005                              mmu_idx, retaddr);
1006     }
1007 
1008     /* Enforce qemu required alignment.  */
1009     if (unlikely(addr & ((1 << s_bits) - 1))) {
1010         /* We get here if guest alignment was not requested,
1011            or was not enforced by cpu_unaligned_access above.
1012            We might widen the access and emulate, but for now
1013            mark an exception and exit the cpu loop.  */
1014         goto stop_the_world;
1015     }
1016 
1017     /* Check TLB entry and enforce page permissions.  */
1018     if (!tlb_hit(tlb_addr, addr)) {
1019         if (!VICTIM_TLB_HIT(addr_write, addr)) {
1020             tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE,
1021                      mmu_idx, retaddr);
1022         }
1023         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1024     }
1025 
1026     /* Notice an IO access or a needs-MMU-lookup access */
1027     if (unlikely(tlb_addr & (TLB_MMIO | TLB_RECHECK))) {
1028         /* There's really nothing that can be done to
1029            support this apart from stop-the-world.  */
1030         goto stop_the_world;
1031     }
1032 
1033     /* Let the guest notice RMW on a write-only page.  */
1034     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1035         tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1036                  mmu_idx, retaddr);
1037         /* Since we don't support reads and writes to different addresses,
1038            and we do have the proper page loaded for write, this shouldn't
1039            ever return.  But just in case, handle via stop-the-world.  */
1040         goto stop_the_world;
1041     }
1042 
1043     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1044 
1045     ndi->active = false;
1046     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1047         ndi->active = true;
1048         memory_notdirty_write_prepare(ndi, ENV_GET_CPU(env), addr,
1049                                       qemu_ram_addr_from_host_nofail(hostaddr),
1050                                       1 << s_bits);
1051     }
1052 
1053     return hostaddr;
1054 
1055  stop_the_world:
1056     cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
1057 }
1058 
1059 #ifdef TARGET_WORDS_BIGENDIAN
1060 # define TGT_BE(X)  (X)
1061 # define TGT_LE(X)  BSWAP(X)
1062 #else
1063 # define TGT_BE(X)  BSWAP(X)
1064 # define TGT_LE(X)  (X)
1065 #endif
1066 
1067 #define MMUSUFFIX _mmu
1068 
1069 #define DATA_SIZE 1
1070 #include "softmmu_template.h"
1071 
1072 #define DATA_SIZE 2
1073 #include "softmmu_template.h"
1074 
1075 #define DATA_SIZE 4
1076 #include "softmmu_template.h"
1077 
1078 #define DATA_SIZE 8
1079 #include "softmmu_template.h"
1080 
1081 /* First set of helpers allows passing in of OI and RETADDR.  This makes
1082    them callable from other helpers.  */
1083 
1084 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
1085 #define ATOMIC_NAME(X) \
1086     HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
1087 #define ATOMIC_MMU_DECLS NotDirtyInfo ndi
1088 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi)
1089 #define ATOMIC_MMU_CLEANUP                              \
1090     do {                                                \
1091         if (unlikely(ndi.active)) {                     \
1092             memory_notdirty_write_complete(&ndi);       \
1093         }                                               \
1094     } while (0)
1095 
1096 #define DATA_SIZE 1
1097 #include "atomic_template.h"
1098 
1099 #define DATA_SIZE 2
1100 #include "atomic_template.h"
1101 
1102 #define DATA_SIZE 4
1103 #include "atomic_template.h"
1104 
1105 #ifdef CONFIG_ATOMIC64
1106 #define DATA_SIZE 8
1107 #include "atomic_template.h"
1108 #endif
1109 
1110 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
1111 #define DATA_SIZE 16
1112 #include "atomic_template.h"
1113 #endif
1114 
1115 /* Second set of helpers are directly callable from TCG as helpers.  */
1116 
1117 #undef EXTRA_ARGS
1118 #undef ATOMIC_NAME
1119 #undef ATOMIC_MMU_LOOKUP
1120 #define EXTRA_ARGS         , TCGMemOpIdx oi
1121 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
1122 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi)
1123 
1124 #define DATA_SIZE 1
1125 #include "atomic_template.h"
1126 
1127 #define DATA_SIZE 2
1128 #include "atomic_template.h"
1129 
1130 #define DATA_SIZE 4
1131 #include "atomic_template.h"
1132 
1133 #ifdef CONFIG_ATOMIC64
1134 #define DATA_SIZE 8
1135 #include "atomic_template.h"
1136 #endif
1137 
1138 /* Code access functions.  */
1139 
1140 #undef MMUSUFFIX
1141 #define MMUSUFFIX _cmmu
1142 #undef GETPC
1143 #define GETPC() ((uintptr_t)0)
1144 #define SOFTMMU_CODE_ACCESS
1145 
1146 #define DATA_SIZE 1
1147 #include "softmmu_template.h"
1148 
1149 #define DATA_SIZE 2
1150 #include "softmmu_template.h"
1151 
1152 #define DATA_SIZE 4
1153 #include "softmmu_template.h"
1154 
1155 #define DATA_SIZE 8
1156 #include "softmmu_template.h"
1157