xref: /openbmc/qemu/accel/tcg/cputlb.c (revision 1308e026)
1 /*
2  *  Common CPU TLB handling
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 
37 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
38 /* #define DEBUG_TLB */
39 /* #define DEBUG_TLB_LOG */
40 
41 #ifdef DEBUG_TLB
42 # define DEBUG_TLB_GATE 1
43 # ifdef DEBUG_TLB_LOG
44 #  define DEBUG_TLB_LOG_GATE 1
45 # else
46 #  define DEBUG_TLB_LOG_GATE 0
47 # endif
48 #else
49 # define DEBUG_TLB_GATE 0
50 # define DEBUG_TLB_LOG_GATE 0
51 #endif
52 
53 #define tlb_debug(fmt, ...) do { \
54     if (DEBUG_TLB_LOG_GATE) { \
55         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
56                       ## __VA_ARGS__); \
57     } else if (DEBUG_TLB_GATE) { \
58         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
59     } \
60 } while (0)
61 
62 #define assert_cpu_is_self(cpu) do {                              \
63         if (DEBUG_TLB_GATE) {                                     \
64             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
65         }                                                         \
66     } while (0)
67 
68 /* run_on_cpu_data.target_ptr should always be big enough for a
69  * target_ulong even on 32 bit builds */
70 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
71 
72 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
73  */
74 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
75 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
76 
77 void tlb_init(CPUState *cpu)
78 {
79     CPUArchState *env = cpu->env_ptr;
80 
81     qemu_spin_init(&env->tlb_c.lock);
82 }
83 
84 /* flush_all_helper: run fn across all cpus
85  *
86  * If the wait flag is set then the src cpu's helper will be queued as
87  * "safe" work and the loop exited creating a synchronisation point
88  * where all queued work will be finished before execution starts
89  * again.
90  */
91 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
92                              run_on_cpu_data d)
93 {
94     CPUState *cpu;
95 
96     CPU_FOREACH(cpu) {
97         if (cpu != src) {
98             async_run_on_cpu(cpu, fn, d);
99         }
100     }
101 }
102 
103 size_t tlb_flush_count(void)
104 {
105     CPUState *cpu;
106     size_t count = 0;
107 
108     CPU_FOREACH(cpu) {
109         CPUArchState *env = cpu->env_ptr;
110 
111         count += atomic_read(&env->tlb_flush_count);
112     }
113     return count;
114 }
115 
116 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
117 {
118     memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
119     memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
120     env->tlb_d[mmu_idx].large_page_addr = -1;
121     env->tlb_d[mmu_idx].large_page_mask = -1;
122 }
123 
124 /* This is OK because CPU architectures generally permit an
125  * implementation to drop entries from the TLB at any time, so
126  * flushing more entries than required is only an efficiency issue,
127  * not a correctness issue.
128  */
129 static void tlb_flush_nocheck(CPUState *cpu)
130 {
131     CPUArchState *env = cpu->env_ptr;
132     int mmu_idx;
133 
134     assert_cpu_is_self(cpu);
135     atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1);
136     tlb_debug("(count: %zu)\n", tlb_flush_count());
137 
138     /*
139      * tlb_table/tlb_v_table updates from any thread must hold tlb_c.lock.
140      * However, updates from the owner thread (as is the case here; see the
141      * above assert_cpu_is_self) do not need atomic_set because all reads
142      * that do not hold the lock are performed by the same owner thread.
143      */
144     qemu_spin_lock(&env->tlb_c.lock);
145     env->tlb_c.pending_flush = 0;
146     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
147         tlb_flush_one_mmuidx_locked(env, mmu_idx);
148     }
149     qemu_spin_unlock(&env->tlb_c.lock);
150 
151     cpu_tb_jmp_cache_clear(cpu);
152 
153     env->vtlb_index = 0;
154 }
155 
156 static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
157 {
158     tlb_flush_nocheck(cpu);
159 }
160 
161 void tlb_flush(CPUState *cpu)
162 {
163     if (cpu->created && !qemu_cpu_is_self(cpu)) {
164         CPUArchState *env = cpu->env_ptr;
165         uint16_t pending;
166 
167         qemu_spin_lock(&env->tlb_c.lock);
168         pending = env->tlb_c.pending_flush;
169         env->tlb_c.pending_flush = ALL_MMUIDX_BITS;
170         qemu_spin_unlock(&env->tlb_c.lock);
171 
172         if (pending != ALL_MMUIDX_BITS) {
173             async_run_on_cpu(cpu, tlb_flush_global_async_work,
174                              RUN_ON_CPU_NULL);
175         }
176     } else {
177         tlb_flush_nocheck(cpu);
178     }
179 }
180 
181 void tlb_flush_all_cpus(CPUState *src_cpu)
182 {
183     const run_on_cpu_func fn = tlb_flush_global_async_work;
184     flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
185     fn(src_cpu, RUN_ON_CPU_NULL);
186 }
187 
188 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
189 {
190     const run_on_cpu_func fn = tlb_flush_global_async_work;
191     flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
192     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_NULL);
193 }
194 
195 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
196 {
197     CPUArchState *env = cpu->env_ptr;
198     unsigned long mmu_idx_bitmask = data.host_int;
199     int mmu_idx;
200 
201     assert_cpu_is_self(cpu);
202 
203     tlb_debug("mmu_idx:0x%04lx\n", mmu_idx_bitmask);
204 
205     qemu_spin_lock(&env->tlb_c.lock);
206     env->tlb_c.pending_flush &= ~mmu_idx_bitmask;
207 
208     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
209         if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
210             tlb_flush_one_mmuidx_locked(env, mmu_idx);
211         }
212     }
213     qemu_spin_unlock(&env->tlb_c.lock);
214 
215     cpu_tb_jmp_cache_clear(cpu);
216 }
217 
218 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
219 {
220     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
221 
222     if (!qemu_cpu_is_self(cpu)) {
223         CPUArchState *env = cpu->env_ptr;
224         uint16_t pending, to_clean;
225 
226         qemu_spin_lock(&env->tlb_c.lock);
227         pending = env->tlb_c.pending_flush;
228         to_clean = idxmap & ~pending;
229         env->tlb_c.pending_flush = pending | idxmap;
230         qemu_spin_unlock(&env->tlb_c.lock);
231 
232         if (to_clean) {
233             tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", to_clean);
234             async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
235                              RUN_ON_CPU_HOST_INT(to_clean));
236         }
237     } else {
238         tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
239     }
240 }
241 
242 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
243 {
244     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
245 
246     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
247 
248     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
249     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
250 }
251 
252 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
253                                                        uint16_t idxmap)
254 {
255     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
256 
257     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
258 
259     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
260     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
261 }
262 
263 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
264                                         target_ulong page)
265 {
266     return tlb_hit_page(tlb_entry->addr_read, page) ||
267            tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
268            tlb_hit_page(tlb_entry->addr_code, page);
269 }
270 
271 /* Called with tlb_c.lock held */
272 static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
273                                           target_ulong page)
274 {
275     if (tlb_hit_page_anyprot(tlb_entry, page)) {
276         memset(tlb_entry, -1, sizeof(*tlb_entry));
277     }
278 }
279 
280 /* Called with tlb_c.lock held */
281 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
282                                               target_ulong page)
283 {
284     int k;
285 
286     assert_cpu_is_self(ENV_GET_CPU(env));
287     for (k = 0; k < CPU_VTLB_SIZE; k++) {
288         tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page);
289     }
290 }
291 
292 static void tlb_flush_page_locked(CPUArchState *env, int midx,
293                                   target_ulong page)
294 {
295     target_ulong lp_addr = env->tlb_d[midx].large_page_addr;
296     target_ulong lp_mask = env->tlb_d[midx].large_page_mask;
297 
298     /* Check if we need to flush due to large pages.  */
299     if ((page & lp_mask) == lp_addr) {
300         tlb_debug("forcing full flush midx %d ("
301                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
302                   midx, lp_addr, lp_mask);
303         tlb_flush_one_mmuidx_locked(env, midx);
304     } else {
305         tlb_flush_entry_locked(tlb_entry(env, midx, page), page);
306         tlb_flush_vtlb_page_locked(env, midx, page);
307     }
308 }
309 
310 static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
311 {
312     CPUArchState *env = cpu->env_ptr;
313     target_ulong addr = (target_ulong) data.target_ptr;
314     int mmu_idx;
315 
316     assert_cpu_is_self(cpu);
317 
318     tlb_debug("page addr:" TARGET_FMT_lx "\n", addr);
319 
320     addr &= TARGET_PAGE_MASK;
321     qemu_spin_lock(&env->tlb_c.lock);
322     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
323         tlb_flush_page_locked(env, mmu_idx, addr);
324     }
325     qemu_spin_unlock(&env->tlb_c.lock);
326 
327     tb_flush_jmp_cache(cpu, addr);
328 }
329 
330 void tlb_flush_page(CPUState *cpu, target_ulong addr)
331 {
332     tlb_debug("page :" TARGET_FMT_lx "\n", addr);
333 
334     if (!qemu_cpu_is_self(cpu)) {
335         async_run_on_cpu(cpu, tlb_flush_page_async_work,
336                          RUN_ON_CPU_TARGET_PTR(addr));
337     } else {
338         tlb_flush_page_async_work(cpu, RUN_ON_CPU_TARGET_PTR(addr));
339     }
340 }
341 
342 /* As we are going to hijack the bottom bits of the page address for a
343  * mmuidx bit mask we need to fail to build if we can't do that
344  */
345 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
346 
347 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
348                                                 run_on_cpu_data data)
349 {
350     CPUArchState *env = cpu->env_ptr;
351     target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
352     target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
353     unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
354     int mmu_idx;
355 
356     assert_cpu_is_self(cpu);
357 
358     tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
359               addr, mmu_idx_bitmap);
360 
361     qemu_spin_lock(&env->tlb_c.lock);
362     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
363         if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
364             tlb_flush_page_locked(env, mmu_idx, addr);
365         }
366     }
367     qemu_spin_unlock(&env->tlb_c.lock);
368 
369     tb_flush_jmp_cache(cpu, addr);
370 }
371 
372 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
373 {
374     target_ulong addr_and_mmu_idx;
375 
376     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
377 
378     /* This should already be page aligned */
379     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
380     addr_and_mmu_idx |= idxmap;
381 
382     if (!qemu_cpu_is_self(cpu)) {
383         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
384                          RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
385     } else {
386         tlb_flush_page_by_mmuidx_async_work(
387             cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
388     }
389 }
390 
391 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
392                                        uint16_t idxmap)
393 {
394     const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
395     target_ulong addr_and_mmu_idx;
396 
397     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
398 
399     /* This should already be page aligned */
400     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
401     addr_and_mmu_idx |= idxmap;
402 
403     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
404     fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
405 }
406 
407 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
408                                               target_ulong addr,
409                                               uint16_t idxmap)
410 {
411     const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
412     target_ulong addr_and_mmu_idx;
413 
414     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
415 
416     /* This should already be page aligned */
417     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
418     addr_and_mmu_idx |= idxmap;
419 
420     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
421     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
422 }
423 
424 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
425 {
426     const run_on_cpu_func fn = tlb_flush_page_async_work;
427 
428     flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
429     fn(src, RUN_ON_CPU_TARGET_PTR(addr));
430 }
431 
432 void tlb_flush_page_all_cpus_synced(CPUState *src,
433                                                   target_ulong addr)
434 {
435     const run_on_cpu_func fn = tlb_flush_page_async_work;
436 
437     flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
438     async_safe_run_on_cpu(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
439 }
440 
441 /* update the TLBs so that writes to code in the virtual page 'addr'
442    can be detected */
443 void tlb_protect_code(ram_addr_t ram_addr)
444 {
445     cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
446                                              DIRTY_MEMORY_CODE);
447 }
448 
449 /* update the TLB so that writes in physical page 'phys_addr' are no longer
450    tested for self modifying code */
451 void tlb_unprotect_code(ram_addr_t ram_addr)
452 {
453     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
454 }
455 
456 
457 /*
458  * Dirty write flag handling
459  *
460  * When the TCG code writes to a location it looks up the address in
461  * the TLB and uses that data to compute the final address. If any of
462  * the lower bits of the address are set then the slow path is forced.
463  * There are a number of reasons to do this but for normal RAM the
464  * most usual is detecting writes to code regions which may invalidate
465  * generated code.
466  *
467  * Other vCPUs might be reading their TLBs during guest execution, so we update
468  * te->addr_write with atomic_set. We don't need to worry about this for
469  * oversized guests as MTTCG is disabled for them.
470  *
471  * Called with tlb_c.lock held.
472  */
473 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
474                                          uintptr_t start, uintptr_t length)
475 {
476     uintptr_t addr = tlb_entry->addr_write;
477 
478     if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
479         addr &= TARGET_PAGE_MASK;
480         addr += tlb_entry->addend;
481         if ((addr - start) < length) {
482 #if TCG_OVERSIZED_GUEST
483             tlb_entry->addr_write |= TLB_NOTDIRTY;
484 #else
485             atomic_set(&tlb_entry->addr_write,
486                        tlb_entry->addr_write | TLB_NOTDIRTY);
487 #endif
488         }
489     }
490 }
491 
492 /*
493  * Called with tlb_c.lock held.
494  * Called only from the vCPU context, i.e. the TLB's owner thread.
495  */
496 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
497 {
498     *d = *s;
499 }
500 
501 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
502  * the target vCPU).
503  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
504  * thing actually updated is the target TLB entry ->addr_write flags.
505  */
506 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
507 {
508     CPUArchState *env;
509 
510     int mmu_idx;
511 
512     env = cpu->env_ptr;
513     qemu_spin_lock(&env->tlb_c.lock);
514     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
515         unsigned int i;
516 
517         for (i = 0; i < CPU_TLB_SIZE; i++) {
518             tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1,
519                                          length);
520         }
521 
522         for (i = 0; i < CPU_VTLB_SIZE; i++) {
523             tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1,
524                                          length);
525         }
526     }
527     qemu_spin_unlock(&env->tlb_c.lock);
528 }
529 
530 /* Called with tlb_c.lock held */
531 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
532                                          target_ulong vaddr)
533 {
534     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
535         tlb_entry->addr_write = vaddr;
536     }
537 }
538 
539 /* update the TLB corresponding to virtual page vaddr
540    so that it is no longer dirty */
541 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
542 {
543     CPUArchState *env = cpu->env_ptr;
544     int mmu_idx;
545 
546     assert_cpu_is_self(cpu);
547 
548     vaddr &= TARGET_PAGE_MASK;
549     qemu_spin_lock(&env->tlb_c.lock);
550     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
551         tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
552     }
553 
554     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
555         int k;
556         for (k = 0; k < CPU_VTLB_SIZE; k++) {
557             tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr);
558         }
559     }
560     qemu_spin_unlock(&env->tlb_c.lock);
561 }
562 
563 /* Our TLB does not support large pages, so remember the area covered by
564    large pages and trigger a full TLB flush if these are invalidated.  */
565 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
566                                target_ulong vaddr, target_ulong size)
567 {
568     target_ulong lp_addr = env->tlb_d[mmu_idx].large_page_addr;
569     target_ulong lp_mask = ~(size - 1);
570 
571     if (lp_addr == (target_ulong)-1) {
572         /* No previous large page.  */
573         lp_addr = vaddr;
574     } else {
575         /* Extend the existing region to include the new page.
576            This is a compromise between unnecessary flushes and
577            the cost of maintaining a full variable size TLB.  */
578         lp_mask &= env->tlb_d[mmu_idx].large_page_mask;
579         while (((lp_addr ^ vaddr) & lp_mask) != 0) {
580             lp_mask <<= 1;
581         }
582     }
583     env->tlb_d[mmu_idx].large_page_addr = lp_addr & lp_mask;
584     env->tlb_d[mmu_idx].large_page_mask = lp_mask;
585 }
586 
587 /* Add a new TLB entry. At most one entry for a given virtual address
588  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
589  * supplied size is only used by tlb_flush_page.
590  *
591  * Called from TCG-generated code, which is under an RCU read-side
592  * critical section.
593  */
594 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
595                              hwaddr paddr, MemTxAttrs attrs, int prot,
596                              int mmu_idx, target_ulong size)
597 {
598     CPUArchState *env = cpu->env_ptr;
599     MemoryRegionSection *section;
600     unsigned int index;
601     target_ulong address;
602     target_ulong code_address;
603     uintptr_t addend;
604     CPUTLBEntry *te, tn;
605     hwaddr iotlb, xlat, sz, paddr_page;
606     target_ulong vaddr_page;
607     int asidx = cpu_asidx_from_attrs(cpu, attrs);
608 
609     assert_cpu_is_self(cpu);
610 
611     if (size <= TARGET_PAGE_SIZE) {
612         sz = TARGET_PAGE_SIZE;
613     } else {
614         tlb_add_large_page(env, mmu_idx, vaddr, size);
615         sz = size;
616     }
617     vaddr_page = vaddr & TARGET_PAGE_MASK;
618     paddr_page = paddr & TARGET_PAGE_MASK;
619 
620     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
621                                                 &xlat, &sz, attrs, &prot);
622     assert(sz >= TARGET_PAGE_SIZE);
623 
624     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
625               " prot=%x idx=%d\n",
626               vaddr, paddr, prot, mmu_idx);
627 
628     address = vaddr_page;
629     if (size < TARGET_PAGE_SIZE) {
630         /*
631          * Slow-path the TLB entries; we will repeat the MMU check and TLB
632          * fill on every access.
633          */
634         address |= TLB_RECHECK;
635     }
636     if (!memory_region_is_ram(section->mr) &&
637         !memory_region_is_romd(section->mr)) {
638         /* IO memory case */
639         address |= TLB_MMIO;
640         addend = 0;
641     } else {
642         /* TLB_MMIO for rom/romd handled below */
643         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
644     }
645 
646     code_address = address;
647     iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
648                                             paddr_page, xlat, prot, &address);
649 
650     index = tlb_index(env, mmu_idx, vaddr_page);
651     te = tlb_entry(env, mmu_idx, vaddr_page);
652 
653     /*
654      * Hold the TLB lock for the rest of the function. We could acquire/release
655      * the lock several times in the function, but it is faster to amortize the
656      * acquisition cost by acquiring it just once. Note that this leads to
657      * a longer critical section, but this is not a concern since the TLB lock
658      * is unlikely to be contended.
659      */
660     qemu_spin_lock(&env->tlb_c.lock);
661 
662     /* Make sure there's no cached translation for the new page.  */
663     tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
664 
665     /*
666      * Only evict the old entry to the victim tlb if it's for a
667      * different page; otherwise just overwrite the stale data.
668      */
669     if (!tlb_hit_page_anyprot(te, vaddr_page)) {
670         unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;
671         CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx];
672 
673         /* Evict the old entry into the victim tlb.  */
674         copy_tlb_helper_locked(tv, te);
675         env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
676     }
677 
678     /* refill the tlb */
679     /*
680      * At this point iotlb contains a physical section number in the lower
681      * TARGET_PAGE_BITS, and either
682      *  + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)
683      *  + the offset within section->mr of the page base (otherwise)
684      * We subtract the vaddr_page (which is page aligned and thus won't
685      * disturb the low bits) to give an offset which can be added to the
686      * (non-page-aligned) vaddr of the eventual memory access to get
687      * the MemoryRegion offset for the access. Note that the vaddr we
688      * subtract here is that of the page base, and not the same as the
689      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
690      */
691     env->iotlb[mmu_idx][index].addr = iotlb - vaddr_page;
692     env->iotlb[mmu_idx][index].attrs = attrs;
693 
694     /* Now calculate the new entry */
695     tn.addend = addend - vaddr_page;
696     if (prot & PAGE_READ) {
697         tn.addr_read = address;
698     } else {
699         tn.addr_read = -1;
700     }
701 
702     if (prot & PAGE_EXEC) {
703         tn.addr_code = code_address;
704     } else {
705         tn.addr_code = -1;
706     }
707 
708     tn.addr_write = -1;
709     if (prot & PAGE_WRITE) {
710         if ((memory_region_is_ram(section->mr) && section->readonly)
711             || memory_region_is_romd(section->mr)) {
712             /* Write access calls the I/O callback.  */
713             tn.addr_write = address | TLB_MMIO;
714         } else if (memory_region_is_ram(section->mr)
715                    && cpu_physical_memory_is_clean(
716                        memory_region_get_ram_addr(section->mr) + xlat)) {
717             tn.addr_write = address | TLB_NOTDIRTY;
718         } else {
719             tn.addr_write = address;
720         }
721         if (prot & PAGE_WRITE_INV) {
722             tn.addr_write |= TLB_INVALID_MASK;
723         }
724     }
725 
726     copy_tlb_helper_locked(te, &tn);
727     qemu_spin_unlock(&env->tlb_c.lock);
728 }
729 
730 /* Add a new TLB entry, but without specifying the memory
731  * transaction attributes to be used.
732  */
733 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
734                   hwaddr paddr, int prot,
735                   int mmu_idx, target_ulong size)
736 {
737     tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
738                             prot, mmu_idx, size);
739 }
740 
741 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
742 {
743     ram_addr_t ram_addr;
744 
745     ram_addr = qemu_ram_addr_from_host(ptr);
746     if (ram_addr == RAM_ADDR_INVALID) {
747         error_report("Bad ram pointer %p", ptr);
748         abort();
749     }
750     return ram_addr;
751 }
752 
753 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
754                          int mmu_idx,
755                          target_ulong addr, uintptr_t retaddr,
756                          bool recheck, MMUAccessType access_type, int size)
757 {
758     CPUState *cpu = ENV_GET_CPU(env);
759     hwaddr mr_offset;
760     MemoryRegionSection *section;
761     MemoryRegion *mr;
762     uint64_t val;
763     bool locked = false;
764     MemTxResult r;
765 
766     if (recheck) {
767         /*
768          * This is a TLB_RECHECK access, where the MMU protection
769          * covers a smaller range than a target page, and we must
770          * repeat the MMU check here. This tlb_fill() call might
771          * longjump out if this access should cause a guest exception.
772          */
773         CPUTLBEntry *entry;
774         target_ulong tlb_addr;
775 
776         tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
777 
778         entry = tlb_entry(env, mmu_idx, addr);
779         tlb_addr = entry->addr_read;
780         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
781             /* RAM access */
782             uintptr_t haddr = addr + entry->addend;
783 
784             return ldn_p((void *)haddr, size);
785         }
786         /* Fall through for handling IO accesses */
787     }
788 
789     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
790     mr = section->mr;
791     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
792     cpu->mem_io_pc = retaddr;
793     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
794         cpu_io_recompile(cpu, retaddr);
795     }
796 
797     cpu->mem_io_vaddr = addr;
798     cpu->mem_io_access_type = access_type;
799 
800     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
801         qemu_mutex_lock_iothread();
802         locked = true;
803     }
804     r = memory_region_dispatch_read(mr, mr_offset,
805                                     &val, size, iotlbentry->attrs);
806     if (r != MEMTX_OK) {
807         hwaddr physaddr = mr_offset +
808             section->offset_within_address_space -
809             section->offset_within_region;
810 
811         cpu_transaction_failed(cpu, physaddr, addr, size, access_type,
812                                mmu_idx, iotlbentry->attrs, r, retaddr);
813     }
814     if (locked) {
815         qemu_mutex_unlock_iothread();
816     }
817 
818     return val;
819 }
820 
821 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
822                       int mmu_idx,
823                       uint64_t val, target_ulong addr,
824                       uintptr_t retaddr, bool recheck, int size)
825 {
826     CPUState *cpu = ENV_GET_CPU(env);
827     hwaddr mr_offset;
828     MemoryRegionSection *section;
829     MemoryRegion *mr;
830     bool locked = false;
831     MemTxResult r;
832 
833     if (recheck) {
834         /*
835          * This is a TLB_RECHECK access, where the MMU protection
836          * covers a smaller range than a target page, and we must
837          * repeat the MMU check here. This tlb_fill() call might
838          * longjump out if this access should cause a guest exception.
839          */
840         CPUTLBEntry *entry;
841         target_ulong tlb_addr;
842 
843         tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
844 
845         entry = tlb_entry(env, mmu_idx, addr);
846         tlb_addr = tlb_addr_write(entry);
847         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
848             /* RAM access */
849             uintptr_t haddr = addr + entry->addend;
850 
851             stn_p((void *)haddr, size, val);
852             return;
853         }
854         /* Fall through for handling IO accesses */
855     }
856 
857     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
858     mr = section->mr;
859     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
860     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
861         cpu_io_recompile(cpu, retaddr);
862     }
863     cpu->mem_io_vaddr = addr;
864     cpu->mem_io_pc = retaddr;
865 
866     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
867         qemu_mutex_lock_iothread();
868         locked = true;
869     }
870     r = memory_region_dispatch_write(mr, mr_offset,
871                                      val, size, iotlbentry->attrs);
872     if (r != MEMTX_OK) {
873         hwaddr physaddr = mr_offset +
874             section->offset_within_address_space -
875             section->offset_within_region;
876 
877         cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_STORE,
878                                mmu_idx, iotlbentry->attrs, r, retaddr);
879     }
880     if (locked) {
881         qemu_mutex_unlock_iothread();
882     }
883 }
884 
885 /* Return true if ADDR is present in the victim tlb, and has been copied
886    back to the main tlb.  */
887 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
888                            size_t elt_ofs, target_ulong page)
889 {
890     size_t vidx;
891 
892     assert_cpu_is_self(ENV_GET_CPU(env));
893     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
894         CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
895         target_ulong cmp;
896 
897         /* elt_ofs might correspond to .addr_write, so use atomic_read */
898 #if TCG_OVERSIZED_GUEST
899         cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
900 #else
901         cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
902 #endif
903 
904         if (cmp == page) {
905             /* Found entry in victim tlb, swap tlb and iotlb.  */
906             CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
907 
908             qemu_spin_lock(&env->tlb_c.lock);
909             copy_tlb_helper_locked(&tmptlb, tlb);
910             copy_tlb_helper_locked(tlb, vtlb);
911             copy_tlb_helper_locked(vtlb, &tmptlb);
912             qemu_spin_unlock(&env->tlb_c.lock);
913 
914             CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
915             CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
916             tmpio = *io; *io = *vio; *vio = tmpio;
917             return true;
918         }
919     }
920     return false;
921 }
922 
923 /* Macro to call the above, with local variables from the use context.  */
924 #define VICTIM_TLB_HIT(TY, ADDR) \
925   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
926                  (ADDR) & TARGET_PAGE_MASK)
927 
928 /* NOTE: this function can trigger an exception */
929 /* NOTE2: the returned address is not exactly the physical address: it
930  * is actually a ram_addr_t (in system mode; the user mode emulation
931  * version of this function returns a guest virtual address).
932  */
933 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
934 {
935     uintptr_t mmu_idx = cpu_mmu_index(env, true);
936     uintptr_t index = tlb_index(env, mmu_idx, addr);
937     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
938     void *p;
939 
940     if (unlikely(!tlb_hit(entry->addr_code, addr))) {
941         if (!VICTIM_TLB_HIT(addr_code, addr)) {
942             tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
943         }
944         assert(tlb_hit(entry->addr_code, addr));
945     }
946 
947     if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) {
948         /*
949          * Return -1 if we can't translate and execute from an entire
950          * page of RAM here, which will cause us to execute by loading
951          * and translating one insn at a time, without caching:
952          *  - TLB_RECHECK: means the MMU protection covers a smaller range
953          *    than a target page, so we must redo the MMU check every insn
954          *  - TLB_MMIO: region is not backed by RAM
955          */
956         return -1;
957     }
958 
959     p = (void *)((uintptr_t)addr + entry->addend);
960     return qemu_ram_addr_from_host_nofail(p);
961 }
962 
963 /* Probe for whether the specified guest write access is permitted.
964  * If it is not permitted then an exception will be taken in the same
965  * way as if this were a real write access (and we will not return).
966  * Otherwise the function will return, and there will be a valid
967  * entry in the TLB for this access.
968  */
969 void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
970                  uintptr_t retaddr)
971 {
972     uintptr_t index = tlb_index(env, mmu_idx, addr);
973     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
974 
975     if (!tlb_hit(tlb_addr_write(entry), addr)) {
976         /* TLB entry is for a different page */
977         if (!VICTIM_TLB_HIT(addr_write, addr)) {
978             tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
979                      mmu_idx, retaddr);
980         }
981     }
982 }
983 
984 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
985  * operations, or io operations to proceed.  Return the host address.  */
986 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
987                                TCGMemOpIdx oi, uintptr_t retaddr,
988                                NotDirtyInfo *ndi)
989 {
990     size_t mmu_idx = get_mmuidx(oi);
991     uintptr_t index = tlb_index(env, mmu_idx, addr);
992     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
993     target_ulong tlb_addr = tlb_addr_write(tlbe);
994     TCGMemOp mop = get_memop(oi);
995     int a_bits = get_alignment_bits(mop);
996     int s_bits = mop & MO_SIZE;
997     void *hostaddr;
998 
999     /* Adjust the given return address.  */
1000     retaddr -= GETPC_ADJ;
1001 
1002     /* Enforce guest required alignment.  */
1003     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1004         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1005         cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
1006                              mmu_idx, retaddr);
1007     }
1008 
1009     /* Enforce qemu required alignment.  */
1010     if (unlikely(addr & ((1 << s_bits) - 1))) {
1011         /* We get here if guest alignment was not requested,
1012            or was not enforced by cpu_unaligned_access above.
1013            We might widen the access and emulate, but for now
1014            mark an exception and exit the cpu loop.  */
1015         goto stop_the_world;
1016     }
1017 
1018     /* Check TLB entry and enforce page permissions.  */
1019     if (!tlb_hit(tlb_addr, addr)) {
1020         if (!VICTIM_TLB_HIT(addr_write, addr)) {
1021             tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE,
1022                      mmu_idx, retaddr);
1023         }
1024         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1025     }
1026 
1027     /* Notice an IO access or a needs-MMU-lookup access */
1028     if (unlikely(tlb_addr & (TLB_MMIO | TLB_RECHECK))) {
1029         /* There's really nothing that can be done to
1030            support this apart from stop-the-world.  */
1031         goto stop_the_world;
1032     }
1033 
1034     /* Let the guest notice RMW on a write-only page.  */
1035     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1036         tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1037                  mmu_idx, retaddr);
1038         /* Since we don't support reads and writes to different addresses,
1039            and we do have the proper page loaded for write, this shouldn't
1040            ever return.  But just in case, handle via stop-the-world.  */
1041         goto stop_the_world;
1042     }
1043 
1044     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1045 
1046     ndi->active = false;
1047     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1048         ndi->active = true;
1049         memory_notdirty_write_prepare(ndi, ENV_GET_CPU(env), addr,
1050                                       qemu_ram_addr_from_host_nofail(hostaddr),
1051                                       1 << s_bits);
1052     }
1053 
1054     return hostaddr;
1055 
1056  stop_the_world:
1057     cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
1058 }
1059 
1060 #ifdef TARGET_WORDS_BIGENDIAN
1061 # define TGT_BE(X)  (X)
1062 # define TGT_LE(X)  BSWAP(X)
1063 #else
1064 # define TGT_BE(X)  BSWAP(X)
1065 # define TGT_LE(X)  (X)
1066 #endif
1067 
1068 #define MMUSUFFIX _mmu
1069 
1070 #define DATA_SIZE 1
1071 #include "softmmu_template.h"
1072 
1073 #define DATA_SIZE 2
1074 #include "softmmu_template.h"
1075 
1076 #define DATA_SIZE 4
1077 #include "softmmu_template.h"
1078 
1079 #define DATA_SIZE 8
1080 #include "softmmu_template.h"
1081 
1082 /* First set of helpers allows passing in of OI and RETADDR.  This makes
1083    them callable from other helpers.  */
1084 
1085 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
1086 #define ATOMIC_NAME(X) \
1087     HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
1088 #define ATOMIC_MMU_DECLS NotDirtyInfo ndi
1089 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi)
1090 #define ATOMIC_MMU_CLEANUP                              \
1091     do {                                                \
1092         if (unlikely(ndi.active)) {                     \
1093             memory_notdirty_write_complete(&ndi);       \
1094         }                                               \
1095     } while (0)
1096 
1097 #define DATA_SIZE 1
1098 #include "atomic_template.h"
1099 
1100 #define DATA_SIZE 2
1101 #include "atomic_template.h"
1102 
1103 #define DATA_SIZE 4
1104 #include "atomic_template.h"
1105 
1106 #ifdef CONFIG_ATOMIC64
1107 #define DATA_SIZE 8
1108 #include "atomic_template.h"
1109 #endif
1110 
1111 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
1112 #define DATA_SIZE 16
1113 #include "atomic_template.h"
1114 #endif
1115 
1116 /* Second set of helpers are directly callable from TCG as helpers.  */
1117 
1118 #undef EXTRA_ARGS
1119 #undef ATOMIC_NAME
1120 #undef ATOMIC_MMU_LOOKUP
1121 #define EXTRA_ARGS         , TCGMemOpIdx oi
1122 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
1123 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi)
1124 
1125 #define DATA_SIZE 1
1126 #include "atomic_template.h"
1127 
1128 #define DATA_SIZE 2
1129 #include "atomic_template.h"
1130 
1131 #define DATA_SIZE 4
1132 #include "atomic_template.h"
1133 
1134 #ifdef CONFIG_ATOMIC64
1135 #define DATA_SIZE 8
1136 #include "atomic_template.h"
1137 #endif
1138 
1139 /* Code access functions.  */
1140 
1141 #undef MMUSUFFIX
1142 #define MMUSUFFIX _cmmu
1143 #undef GETPC
1144 #define GETPC() ((uintptr_t)0)
1145 #define SOFTMMU_CODE_ACCESS
1146 
1147 #define DATA_SIZE 1
1148 #include "softmmu_template.h"
1149 
1150 #define DATA_SIZE 2
1151 #include "softmmu_template.h"
1152 
1153 #define DATA_SIZE 4
1154 #include "softmmu_template.h"
1155 
1156 #define DATA_SIZE 8
1157 #include "softmmu_template.h"
1158