xref: /openbmc/qemu/accel/tcg/cputlb.c (revision 60a2ad7d)
1 /*
2  *  Common CPU TLB handling
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 
37 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
38 /* #define DEBUG_TLB */
39 /* #define DEBUG_TLB_LOG */
40 
41 #ifdef DEBUG_TLB
42 # define DEBUG_TLB_GATE 1
43 # ifdef DEBUG_TLB_LOG
44 #  define DEBUG_TLB_LOG_GATE 1
45 # else
46 #  define DEBUG_TLB_LOG_GATE 0
47 # endif
48 #else
49 # define DEBUG_TLB_GATE 0
50 # define DEBUG_TLB_LOG_GATE 0
51 #endif
52 
53 #define tlb_debug(fmt, ...) do { \
54     if (DEBUG_TLB_LOG_GATE) { \
55         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
56                       ## __VA_ARGS__); \
57     } else if (DEBUG_TLB_GATE) { \
58         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
59     } \
60 } while (0)
61 
62 #define assert_cpu_is_self(cpu) do {                              \
63         if (DEBUG_TLB_GATE) {                                     \
64             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
65         }                                                         \
66     } while (0)
67 
68 /* run_on_cpu_data.target_ptr should always be big enough for a
69  * target_ulong even on 32 bit builds */
70 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
71 
72 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
73  */
74 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
75 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
76 
77 void tlb_init(CPUState *cpu)
78 {
79     CPUArchState *env = cpu->env_ptr;
80 
81     qemu_spin_init(&env->tlb_c.lock);
82 }
83 
84 /* flush_all_helper: run fn across all cpus
85  *
86  * If the wait flag is set then the src cpu's helper will be queued as
87  * "safe" work and the loop exited creating a synchronisation point
88  * where all queued work will be finished before execution starts
89  * again.
90  */
91 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
92                              run_on_cpu_data d)
93 {
94     CPUState *cpu;
95 
96     CPU_FOREACH(cpu) {
97         if (cpu != src) {
98             async_run_on_cpu(cpu, fn, d);
99         }
100     }
101 }
102 
103 size_t tlb_flush_count(void)
104 {
105     CPUState *cpu;
106     size_t count = 0;
107 
108     CPU_FOREACH(cpu) {
109         CPUArchState *env = cpu->env_ptr;
110 
111         count += atomic_read(&env->tlb_flush_count);
112     }
113     return count;
114 }
115 
116 /* This is OK because CPU architectures generally permit an
117  * implementation to drop entries from the TLB at any time, so
118  * flushing more entries than required is only an efficiency issue,
119  * not a correctness issue.
120  */
121 static void tlb_flush_nocheck(CPUState *cpu)
122 {
123     CPUArchState *env = cpu->env_ptr;
124 
125     assert_cpu_is_self(cpu);
126     atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1);
127     tlb_debug("(count: %zu)\n", tlb_flush_count());
128 
129     /*
130      * tlb_table/tlb_v_table updates from any thread must hold tlb_c.lock.
131      * However, updates from the owner thread (as is the case here; see the
132      * above assert_cpu_is_self) do not need atomic_set because all reads
133      * that do not hold the lock are performed by the same owner thread.
134      */
135     qemu_spin_lock(&env->tlb_c.lock);
136     env->tlb_c.pending_flush = 0;
137     memset(env->tlb_table, -1, sizeof(env->tlb_table));
138     memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
139     qemu_spin_unlock(&env->tlb_c.lock);
140 
141     cpu_tb_jmp_cache_clear(cpu);
142 
143     env->vtlb_index = 0;
144     env->tlb_flush_addr = -1;
145     env->tlb_flush_mask = 0;
146 }
147 
148 static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
149 {
150     tlb_flush_nocheck(cpu);
151 }
152 
153 void tlb_flush(CPUState *cpu)
154 {
155     if (cpu->created && !qemu_cpu_is_self(cpu)) {
156         CPUArchState *env = cpu->env_ptr;
157         uint16_t pending;
158 
159         qemu_spin_lock(&env->tlb_c.lock);
160         pending = env->tlb_c.pending_flush;
161         env->tlb_c.pending_flush = ALL_MMUIDX_BITS;
162         qemu_spin_unlock(&env->tlb_c.lock);
163 
164         if (pending != ALL_MMUIDX_BITS) {
165             async_run_on_cpu(cpu, tlb_flush_global_async_work,
166                              RUN_ON_CPU_NULL);
167         }
168     } else {
169         tlb_flush_nocheck(cpu);
170     }
171 }
172 
173 void tlb_flush_all_cpus(CPUState *src_cpu)
174 {
175     const run_on_cpu_func fn = tlb_flush_global_async_work;
176     flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
177     fn(src_cpu, RUN_ON_CPU_NULL);
178 }
179 
180 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
181 {
182     const run_on_cpu_func fn = tlb_flush_global_async_work;
183     flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
184     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_NULL);
185 }
186 
187 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
188 {
189     CPUArchState *env = cpu->env_ptr;
190     unsigned long mmu_idx_bitmask = data.host_int;
191     int mmu_idx;
192 
193     assert_cpu_is_self(cpu);
194 
195     tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
196 
197     qemu_spin_lock(&env->tlb_c.lock);
198     env->tlb_c.pending_flush &= ~mmu_idx_bitmask;
199 
200     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
201 
202         if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
203             tlb_debug("%d\n", mmu_idx);
204 
205             memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
206             memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
207         }
208     }
209     qemu_spin_unlock(&env->tlb_c.lock);
210 
211     cpu_tb_jmp_cache_clear(cpu);
212 
213     tlb_debug("done\n");
214 }
215 
216 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
217 {
218     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
219 
220     if (!qemu_cpu_is_self(cpu)) {
221         CPUArchState *env = cpu->env_ptr;
222         uint16_t pending, to_clean;
223 
224         qemu_spin_lock(&env->tlb_c.lock);
225         pending = env->tlb_c.pending_flush;
226         to_clean = idxmap & ~pending;
227         env->tlb_c.pending_flush = pending | idxmap;
228         qemu_spin_unlock(&env->tlb_c.lock);
229 
230         if (to_clean) {
231             tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", to_clean);
232             async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
233                              RUN_ON_CPU_HOST_INT(to_clean));
234         }
235     } else {
236         tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
237     }
238 }
239 
240 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
241 {
242     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
243 
244     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
245 
246     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
247     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
248 }
249 
250 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
251                                                        uint16_t idxmap)
252 {
253     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
254 
255     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
256 
257     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
258     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
259 }
260 
261 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
262                                         target_ulong page)
263 {
264     return tlb_hit_page(tlb_entry->addr_read, page) ||
265            tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
266            tlb_hit_page(tlb_entry->addr_code, page);
267 }
268 
269 /* Called with tlb_c.lock held */
270 static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
271                                           target_ulong page)
272 {
273     if (tlb_hit_page_anyprot(tlb_entry, page)) {
274         memset(tlb_entry, -1, sizeof(*tlb_entry));
275     }
276 }
277 
278 /* Called with tlb_c.lock held */
279 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
280                                               target_ulong page)
281 {
282     int k;
283 
284     assert_cpu_is_self(ENV_GET_CPU(env));
285     for (k = 0; k < CPU_VTLB_SIZE; k++) {
286         tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page);
287     }
288 }
289 
290 static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
291 {
292     CPUArchState *env = cpu->env_ptr;
293     target_ulong addr = (target_ulong) data.target_ptr;
294     int mmu_idx;
295 
296     assert_cpu_is_self(cpu);
297 
298     tlb_debug("page :" TARGET_FMT_lx "\n", addr);
299 
300     /* Check if we need to flush due to large pages.  */
301     if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
302         tlb_debug("forcing full flush ("
303                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
304                   env->tlb_flush_addr, env->tlb_flush_mask);
305 
306         tlb_flush(cpu);
307         return;
308     }
309 
310     addr &= TARGET_PAGE_MASK;
311     qemu_spin_lock(&env->tlb_c.lock);
312     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
313         tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr);
314         tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
315     }
316     qemu_spin_unlock(&env->tlb_c.lock);
317 
318     tb_flush_jmp_cache(cpu, addr);
319 }
320 
321 void tlb_flush_page(CPUState *cpu, target_ulong addr)
322 {
323     tlb_debug("page :" TARGET_FMT_lx "\n", addr);
324 
325     if (!qemu_cpu_is_self(cpu)) {
326         async_run_on_cpu(cpu, tlb_flush_page_async_work,
327                          RUN_ON_CPU_TARGET_PTR(addr));
328     } else {
329         tlb_flush_page_async_work(cpu, RUN_ON_CPU_TARGET_PTR(addr));
330     }
331 }
332 
333 /* As we are going to hijack the bottom bits of the page address for a
334  * mmuidx bit mask we need to fail to build if we can't do that
335  */
336 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
337 
338 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
339                                                 run_on_cpu_data data)
340 {
341     CPUArchState *env = cpu->env_ptr;
342     target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
343     target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
344     unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
345     int mmu_idx;
346 
347     assert_cpu_is_self(cpu);
348 
349     tlb_debug("flush page addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
350               addr, mmu_idx_bitmap);
351 
352     qemu_spin_lock(&env->tlb_c.lock);
353     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
354         if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
355             tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr);
356             tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
357         }
358     }
359     qemu_spin_unlock(&env->tlb_c.lock);
360 
361     tb_flush_jmp_cache(cpu, addr);
362 }
363 
364 static void tlb_check_page_and_flush_by_mmuidx_async_work(CPUState *cpu,
365                                                           run_on_cpu_data data)
366 {
367     CPUArchState *env = cpu->env_ptr;
368     target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
369     target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
370     unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
371 
372     tlb_debug("addr:"TARGET_FMT_lx" mmu_idx: %04lx\n", addr, mmu_idx_bitmap);
373 
374     /* Check if we need to flush due to large pages.  */
375     if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
376         tlb_debug("forced full flush ("
377                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
378                   env->tlb_flush_addr, env->tlb_flush_mask);
379 
380         tlb_flush_by_mmuidx_async_work(cpu,
381                                        RUN_ON_CPU_HOST_INT(mmu_idx_bitmap));
382     } else {
383         tlb_flush_page_by_mmuidx_async_work(cpu, data);
384     }
385 }
386 
387 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
388 {
389     target_ulong addr_and_mmu_idx;
390 
391     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
392 
393     /* This should already be page aligned */
394     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
395     addr_and_mmu_idx |= idxmap;
396 
397     if (!qemu_cpu_is_self(cpu)) {
398         async_run_on_cpu(cpu, tlb_check_page_and_flush_by_mmuidx_async_work,
399                          RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
400     } else {
401         tlb_check_page_and_flush_by_mmuidx_async_work(
402             cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
403     }
404 }
405 
406 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
407                                        uint16_t idxmap)
408 {
409     const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
410     target_ulong addr_and_mmu_idx;
411 
412     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
413 
414     /* This should already be page aligned */
415     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
416     addr_and_mmu_idx |= idxmap;
417 
418     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
419     fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
420 }
421 
422 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
423                                                             target_ulong addr,
424                                                             uint16_t idxmap)
425 {
426     const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
427     target_ulong addr_and_mmu_idx;
428 
429     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
430 
431     /* This should already be page aligned */
432     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
433     addr_and_mmu_idx |= idxmap;
434 
435     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
436     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
437 }
438 
439 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
440 {
441     const run_on_cpu_func fn = tlb_flush_page_async_work;
442 
443     flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
444     fn(src, RUN_ON_CPU_TARGET_PTR(addr));
445 }
446 
447 void tlb_flush_page_all_cpus_synced(CPUState *src,
448                                                   target_ulong addr)
449 {
450     const run_on_cpu_func fn = tlb_flush_page_async_work;
451 
452     flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
453     async_safe_run_on_cpu(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
454 }
455 
456 /* update the TLBs so that writes to code in the virtual page 'addr'
457    can be detected */
458 void tlb_protect_code(ram_addr_t ram_addr)
459 {
460     cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
461                                              DIRTY_MEMORY_CODE);
462 }
463 
464 /* update the TLB so that writes in physical page 'phys_addr' are no longer
465    tested for self modifying code */
466 void tlb_unprotect_code(ram_addr_t ram_addr)
467 {
468     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
469 }
470 
471 
472 /*
473  * Dirty write flag handling
474  *
475  * When the TCG code writes to a location it looks up the address in
476  * the TLB and uses that data to compute the final address. If any of
477  * the lower bits of the address are set then the slow path is forced.
478  * There are a number of reasons to do this but for normal RAM the
479  * most usual is detecting writes to code regions which may invalidate
480  * generated code.
481  *
482  * Other vCPUs might be reading their TLBs during guest execution, so we update
483  * te->addr_write with atomic_set. We don't need to worry about this for
484  * oversized guests as MTTCG is disabled for them.
485  *
486  * Called with tlb_c.lock held.
487  */
488 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
489                                          uintptr_t start, uintptr_t length)
490 {
491     uintptr_t addr = tlb_entry->addr_write;
492 
493     if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
494         addr &= TARGET_PAGE_MASK;
495         addr += tlb_entry->addend;
496         if ((addr - start) < length) {
497 #if TCG_OVERSIZED_GUEST
498             tlb_entry->addr_write |= TLB_NOTDIRTY;
499 #else
500             atomic_set(&tlb_entry->addr_write,
501                        tlb_entry->addr_write | TLB_NOTDIRTY);
502 #endif
503         }
504     }
505 }
506 
507 /*
508  * Called with tlb_c.lock held.
509  * Called only from the vCPU context, i.e. the TLB's owner thread.
510  */
511 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
512 {
513     *d = *s;
514 }
515 
516 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
517  * the target vCPU).
518  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
519  * thing actually updated is the target TLB entry ->addr_write flags.
520  */
521 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
522 {
523     CPUArchState *env;
524 
525     int mmu_idx;
526 
527     env = cpu->env_ptr;
528     qemu_spin_lock(&env->tlb_c.lock);
529     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
530         unsigned int i;
531 
532         for (i = 0; i < CPU_TLB_SIZE; i++) {
533             tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1,
534                                          length);
535         }
536 
537         for (i = 0; i < CPU_VTLB_SIZE; i++) {
538             tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1,
539                                          length);
540         }
541     }
542     qemu_spin_unlock(&env->tlb_c.lock);
543 }
544 
545 /* Called with tlb_c.lock held */
546 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
547                                          target_ulong vaddr)
548 {
549     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
550         tlb_entry->addr_write = vaddr;
551     }
552 }
553 
554 /* update the TLB corresponding to virtual page vaddr
555    so that it is no longer dirty */
556 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
557 {
558     CPUArchState *env = cpu->env_ptr;
559     int mmu_idx;
560 
561     assert_cpu_is_self(cpu);
562 
563     vaddr &= TARGET_PAGE_MASK;
564     qemu_spin_lock(&env->tlb_c.lock);
565     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
566         tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
567     }
568 
569     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
570         int k;
571         for (k = 0; k < CPU_VTLB_SIZE; k++) {
572             tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr);
573         }
574     }
575     qemu_spin_unlock(&env->tlb_c.lock);
576 }
577 
578 /* Our TLB does not support large pages, so remember the area covered by
579    large pages and trigger a full TLB flush if these are invalidated.  */
580 static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
581                                target_ulong size)
582 {
583     target_ulong mask = ~(size - 1);
584 
585     if (env->tlb_flush_addr == (target_ulong)-1) {
586         env->tlb_flush_addr = vaddr & mask;
587         env->tlb_flush_mask = mask;
588         return;
589     }
590     /* Extend the existing region to include the new page.
591        This is a compromise between unnecessary flushes and the cost
592        of maintaining a full variable size TLB.  */
593     mask &= env->tlb_flush_mask;
594     while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
595         mask <<= 1;
596     }
597     env->tlb_flush_addr &= mask;
598     env->tlb_flush_mask = mask;
599 }
600 
601 /* Add a new TLB entry. At most one entry for a given virtual address
602  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
603  * supplied size is only used by tlb_flush_page.
604  *
605  * Called from TCG-generated code, which is under an RCU read-side
606  * critical section.
607  */
608 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
609                              hwaddr paddr, MemTxAttrs attrs, int prot,
610                              int mmu_idx, target_ulong size)
611 {
612     CPUArchState *env = cpu->env_ptr;
613     MemoryRegionSection *section;
614     unsigned int index;
615     target_ulong address;
616     target_ulong code_address;
617     uintptr_t addend;
618     CPUTLBEntry *te, tn;
619     hwaddr iotlb, xlat, sz, paddr_page;
620     target_ulong vaddr_page;
621     int asidx = cpu_asidx_from_attrs(cpu, attrs);
622 
623     assert_cpu_is_self(cpu);
624 
625     if (size < TARGET_PAGE_SIZE) {
626         sz = TARGET_PAGE_SIZE;
627     } else {
628         if (size > TARGET_PAGE_SIZE) {
629             tlb_add_large_page(env, vaddr, size);
630         }
631         sz = size;
632     }
633     vaddr_page = vaddr & TARGET_PAGE_MASK;
634     paddr_page = paddr & TARGET_PAGE_MASK;
635 
636     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
637                                                 &xlat, &sz, attrs, &prot);
638     assert(sz >= TARGET_PAGE_SIZE);
639 
640     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
641               " prot=%x idx=%d\n",
642               vaddr, paddr, prot, mmu_idx);
643 
644     address = vaddr_page;
645     if (size < TARGET_PAGE_SIZE) {
646         /*
647          * Slow-path the TLB entries; we will repeat the MMU check and TLB
648          * fill on every access.
649          */
650         address |= TLB_RECHECK;
651     }
652     if (!memory_region_is_ram(section->mr) &&
653         !memory_region_is_romd(section->mr)) {
654         /* IO memory case */
655         address |= TLB_MMIO;
656         addend = 0;
657     } else {
658         /* TLB_MMIO for rom/romd handled below */
659         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
660     }
661 
662     code_address = address;
663     iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
664                                             paddr_page, xlat, prot, &address);
665 
666     index = tlb_index(env, mmu_idx, vaddr_page);
667     te = tlb_entry(env, mmu_idx, vaddr_page);
668 
669     /*
670      * Hold the TLB lock for the rest of the function. We could acquire/release
671      * the lock several times in the function, but it is faster to amortize the
672      * acquisition cost by acquiring it just once. Note that this leads to
673      * a longer critical section, but this is not a concern since the TLB lock
674      * is unlikely to be contended.
675      */
676     qemu_spin_lock(&env->tlb_c.lock);
677 
678     /* Make sure there's no cached translation for the new page.  */
679     tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
680 
681     /*
682      * Only evict the old entry to the victim tlb if it's for a
683      * different page; otherwise just overwrite the stale data.
684      */
685     if (!tlb_hit_page_anyprot(te, vaddr_page)) {
686         unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;
687         CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx];
688 
689         /* Evict the old entry into the victim tlb.  */
690         copy_tlb_helper_locked(tv, te);
691         env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
692     }
693 
694     /* refill the tlb */
695     /*
696      * At this point iotlb contains a physical section number in the lower
697      * TARGET_PAGE_BITS, and either
698      *  + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)
699      *  + the offset within section->mr of the page base (otherwise)
700      * We subtract the vaddr_page (which is page aligned and thus won't
701      * disturb the low bits) to give an offset which can be added to the
702      * (non-page-aligned) vaddr of the eventual memory access to get
703      * the MemoryRegion offset for the access. Note that the vaddr we
704      * subtract here is that of the page base, and not the same as the
705      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
706      */
707     env->iotlb[mmu_idx][index].addr = iotlb - vaddr_page;
708     env->iotlb[mmu_idx][index].attrs = attrs;
709 
710     /* Now calculate the new entry */
711     tn.addend = addend - vaddr_page;
712     if (prot & PAGE_READ) {
713         tn.addr_read = address;
714     } else {
715         tn.addr_read = -1;
716     }
717 
718     if (prot & PAGE_EXEC) {
719         tn.addr_code = code_address;
720     } else {
721         tn.addr_code = -1;
722     }
723 
724     tn.addr_write = -1;
725     if (prot & PAGE_WRITE) {
726         if ((memory_region_is_ram(section->mr) && section->readonly)
727             || memory_region_is_romd(section->mr)) {
728             /* Write access calls the I/O callback.  */
729             tn.addr_write = address | TLB_MMIO;
730         } else if (memory_region_is_ram(section->mr)
731                    && cpu_physical_memory_is_clean(
732                        memory_region_get_ram_addr(section->mr) + xlat)) {
733             tn.addr_write = address | TLB_NOTDIRTY;
734         } else {
735             tn.addr_write = address;
736         }
737         if (prot & PAGE_WRITE_INV) {
738             tn.addr_write |= TLB_INVALID_MASK;
739         }
740     }
741 
742     copy_tlb_helper_locked(te, &tn);
743     qemu_spin_unlock(&env->tlb_c.lock);
744 }
745 
746 /* Add a new TLB entry, but without specifying the memory
747  * transaction attributes to be used.
748  */
749 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
750                   hwaddr paddr, int prot,
751                   int mmu_idx, target_ulong size)
752 {
753     tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
754                             prot, mmu_idx, size);
755 }
756 
757 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
758 {
759     ram_addr_t ram_addr;
760 
761     ram_addr = qemu_ram_addr_from_host(ptr);
762     if (ram_addr == RAM_ADDR_INVALID) {
763         error_report("Bad ram pointer %p", ptr);
764         abort();
765     }
766     return ram_addr;
767 }
768 
769 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
770                          int mmu_idx,
771                          target_ulong addr, uintptr_t retaddr,
772                          bool recheck, MMUAccessType access_type, int size)
773 {
774     CPUState *cpu = ENV_GET_CPU(env);
775     hwaddr mr_offset;
776     MemoryRegionSection *section;
777     MemoryRegion *mr;
778     uint64_t val;
779     bool locked = false;
780     MemTxResult r;
781 
782     if (recheck) {
783         /*
784          * This is a TLB_RECHECK access, where the MMU protection
785          * covers a smaller range than a target page, and we must
786          * repeat the MMU check here. This tlb_fill() call might
787          * longjump out if this access should cause a guest exception.
788          */
789         CPUTLBEntry *entry;
790         target_ulong tlb_addr;
791 
792         tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
793 
794         entry = tlb_entry(env, mmu_idx, addr);
795         tlb_addr = entry->addr_read;
796         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
797             /* RAM access */
798             uintptr_t haddr = addr + entry->addend;
799 
800             return ldn_p((void *)haddr, size);
801         }
802         /* Fall through for handling IO accesses */
803     }
804 
805     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
806     mr = section->mr;
807     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
808     cpu->mem_io_pc = retaddr;
809     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
810         cpu_io_recompile(cpu, retaddr);
811     }
812 
813     cpu->mem_io_vaddr = addr;
814     cpu->mem_io_access_type = access_type;
815 
816     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
817         qemu_mutex_lock_iothread();
818         locked = true;
819     }
820     r = memory_region_dispatch_read(mr, mr_offset,
821                                     &val, size, iotlbentry->attrs);
822     if (r != MEMTX_OK) {
823         hwaddr physaddr = mr_offset +
824             section->offset_within_address_space -
825             section->offset_within_region;
826 
827         cpu_transaction_failed(cpu, physaddr, addr, size, access_type,
828                                mmu_idx, iotlbentry->attrs, r, retaddr);
829     }
830     if (locked) {
831         qemu_mutex_unlock_iothread();
832     }
833 
834     return val;
835 }
836 
837 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
838                       int mmu_idx,
839                       uint64_t val, target_ulong addr,
840                       uintptr_t retaddr, bool recheck, int size)
841 {
842     CPUState *cpu = ENV_GET_CPU(env);
843     hwaddr mr_offset;
844     MemoryRegionSection *section;
845     MemoryRegion *mr;
846     bool locked = false;
847     MemTxResult r;
848 
849     if (recheck) {
850         /*
851          * This is a TLB_RECHECK access, where the MMU protection
852          * covers a smaller range than a target page, and we must
853          * repeat the MMU check here. This tlb_fill() call might
854          * longjump out if this access should cause a guest exception.
855          */
856         CPUTLBEntry *entry;
857         target_ulong tlb_addr;
858 
859         tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
860 
861         entry = tlb_entry(env, mmu_idx, addr);
862         tlb_addr = tlb_addr_write(entry);
863         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
864             /* RAM access */
865             uintptr_t haddr = addr + entry->addend;
866 
867             stn_p((void *)haddr, size, val);
868             return;
869         }
870         /* Fall through for handling IO accesses */
871     }
872 
873     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
874     mr = section->mr;
875     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
876     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
877         cpu_io_recompile(cpu, retaddr);
878     }
879     cpu->mem_io_vaddr = addr;
880     cpu->mem_io_pc = retaddr;
881 
882     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
883         qemu_mutex_lock_iothread();
884         locked = true;
885     }
886     r = memory_region_dispatch_write(mr, mr_offset,
887                                      val, size, iotlbentry->attrs);
888     if (r != MEMTX_OK) {
889         hwaddr physaddr = mr_offset +
890             section->offset_within_address_space -
891             section->offset_within_region;
892 
893         cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_STORE,
894                                mmu_idx, iotlbentry->attrs, r, retaddr);
895     }
896     if (locked) {
897         qemu_mutex_unlock_iothread();
898     }
899 }
900 
901 /* Return true if ADDR is present in the victim tlb, and has been copied
902    back to the main tlb.  */
903 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
904                            size_t elt_ofs, target_ulong page)
905 {
906     size_t vidx;
907 
908     assert_cpu_is_self(ENV_GET_CPU(env));
909     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
910         CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
911         target_ulong cmp;
912 
913         /* elt_ofs might correspond to .addr_write, so use atomic_read */
914 #if TCG_OVERSIZED_GUEST
915         cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
916 #else
917         cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
918 #endif
919 
920         if (cmp == page) {
921             /* Found entry in victim tlb, swap tlb and iotlb.  */
922             CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
923 
924             qemu_spin_lock(&env->tlb_c.lock);
925             copy_tlb_helper_locked(&tmptlb, tlb);
926             copy_tlb_helper_locked(tlb, vtlb);
927             copy_tlb_helper_locked(vtlb, &tmptlb);
928             qemu_spin_unlock(&env->tlb_c.lock);
929 
930             CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
931             CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
932             tmpio = *io; *io = *vio; *vio = tmpio;
933             return true;
934         }
935     }
936     return false;
937 }
938 
939 /* Macro to call the above, with local variables from the use context.  */
940 #define VICTIM_TLB_HIT(TY, ADDR) \
941   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
942                  (ADDR) & TARGET_PAGE_MASK)
943 
944 /* NOTE: this function can trigger an exception */
945 /* NOTE2: the returned address is not exactly the physical address: it
946  * is actually a ram_addr_t (in system mode; the user mode emulation
947  * version of this function returns a guest virtual address).
948  */
949 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
950 {
951     uintptr_t mmu_idx = cpu_mmu_index(env, true);
952     uintptr_t index = tlb_index(env, mmu_idx, addr);
953     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
954     void *p;
955 
956     if (unlikely(!tlb_hit(entry->addr_code, addr))) {
957         if (!VICTIM_TLB_HIT(addr_code, addr)) {
958             tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
959         }
960         assert(tlb_hit(entry->addr_code, addr));
961     }
962 
963     if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) {
964         /*
965          * Return -1 if we can't translate and execute from an entire
966          * page of RAM here, which will cause us to execute by loading
967          * and translating one insn at a time, without caching:
968          *  - TLB_RECHECK: means the MMU protection covers a smaller range
969          *    than a target page, so we must redo the MMU check every insn
970          *  - TLB_MMIO: region is not backed by RAM
971          */
972         return -1;
973     }
974 
975     p = (void *)((uintptr_t)addr + entry->addend);
976     return qemu_ram_addr_from_host_nofail(p);
977 }
978 
979 /* Probe for whether the specified guest write access is permitted.
980  * If it is not permitted then an exception will be taken in the same
981  * way as if this were a real write access (and we will not return).
982  * Otherwise the function will return, and there will be a valid
983  * entry in the TLB for this access.
984  */
985 void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
986                  uintptr_t retaddr)
987 {
988     uintptr_t index = tlb_index(env, mmu_idx, addr);
989     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
990 
991     if (!tlb_hit(tlb_addr_write(entry), addr)) {
992         /* TLB entry is for a different page */
993         if (!VICTIM_TLB_HIT(addr_write, addr)) {
994             tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
995                      mmu_idx, retaddr);
996         }
997     }
998 }
999 
1000 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
1001  * operations, or io operations to proceed.  Return the host address.  */
1002 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1003                                TCGMemOpIdx oi, uintptr_t retaddr,
1004                                NotDirtyInfo *ndi)
1005 {
1006     size_t mmu_idx = get_mmuidx(oi);
1007     uintptr_t index = tlb_index(env, mmu_idx, addr);
1008     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1009     target_ulong tlb_addr = tlb_addr_write(tlbe);
1010     TCGMemOp mop = get_memop(oi);
1011     int a_bits = get_alignment_bits(mop);
1012     int s_bits = mop & MO_SIZE;
1013     void *hostaddr;
1014 
1015     /* Adjust the given return address.  */
1016     retaddr -= GETPC_ADJ;
1017 
1018     /* Enforce guest required alignment.  */
1019     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1020         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1021         cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
1022                              mmu_idx, retaddr);
1023     }
1024 
1025     /* Enforce qemu required alignment.  */
1026     if (unlikely(addr & ((1 << s_bits) - 1))) {
1027         /* We get here if guest alignment was not requested,
1028            or was not enforced by cpu_unaligned_access above.
1029            We might widen the access and emulate, but for now
1030            mark an exception and exit the cpu loop.  */
1031         goto stop_the_world;
1032     }
1033 
1034     /* Check TLB entry and enforce page permissions.  */
1035     if (!tlb_hit(tlb_addr, addr)) {
1036         if (!VICTIM_TLB_HIT(addr_write, addr)) {
1037             tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE,
1038                      mmu_idx, retaddr);
1039         }
1040         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1041     }
1042 
1043     /* Notice an IO access or a needs-MMU-lookup access */
1044     if (unlikely(tlb_addr & (TLB_MMIO | TLB_RECHECK))) {
1045         /* There's really nothing that can be done to
1046            support this apart from stop-the-world.  */
1047         goto stop_the_world;
1048     }
1049 
1050     /* Let the guest notice RMW on a write-only page.  */
1051     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1052         tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1053                  mmu_idx, retaddr);
1054         /* Since we don't support reads and writes to different addresses,
1055            and we do have the proper page loaded for write, this shouldn't
1056            ever return.  But just in case, handle via stop-the-world.  */
1057         goto stop_the_world;
1058     }
1059 
1060     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1061 
1062     ndi->active = false;
1063     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1064         ndi->active = true;
1065         memory_notdirty_write_prepare(ndi, ENV_GET_CPU(env), addr,
1066                                       qemu_ram_addr_from_host_nofail(hostaddr),
1067                                       1 << s_bits);
1068     }
1069 
1070     return hostaddr;
1071 
1072  stop_the_world:
1073     cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
1074 }
1075 
1076 #ifdef TARGET_WORDS_BIGENDIAN
1077 # define TGT_BE(X)  (X)
1078 # define TGT_LE(X)  BSWAP(X)
1079 #else
1080 # define TGT_BE(X)  BSWAP(X)
1081 # define TGT_LE(X)  (X)
1082 #endif
1083 
1084 #define MMUSUFFIX _mmu
1085 
1086 #define DATA_SIZE 1
1087 #include "softmmu_template.h"
1088 
1089 #define DATA_SIZE 2
1090 #include "softmmu_template.h"
1091 
1092 #define DATA_SIZE 4
1093 #include "softmmu_template.h"
1094 
1095 #define DATA_SIZE 8
1096 #include "softmmu_template.h"
1097 
1098 /* First set of helpers allows passing in of OI and RETADDR.  This makes
1099    them callable from other helpers.  */
1100 
1101 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
1102 #define ATOMIC_NAME(X) \
1103     HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
1104 #define ATOMIC_MMU_DECLS NotDirtyInfo ndi
1105 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi)
1106 #define ATOMIC_MMU_CLEANUP                              \
1107     do {                                                \
1108         if (unlikely(ndi.active)) {                     \
1109             memory_notdirty_write_complete(&ndi);       \
1110         }                                               \
1111     } while (0)
1112 
1113 #define DATA_SIZE 1
1114 #include "atomic_template.h"
1115 
1116 #define DATA_SIZE 2
1117 #include "atomic_template.h"
1118 
1119 #define DATA_SIZE 4
1120 #include "atomic_template.h"
1121 
1122 #ifdef CONFIG_ATOMIC64
1123 #define DATA_SIZE 8
1124 #include "atomic_template.h"
1125 #endif
1126 
1127 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
1128 #define DATA_SIZE 16
1129 #include "atomic_template.h"
1130 #endif
1131 
1132 /* Second set of helpers are directly callable from TCG as helpers.  */
1133 
1134 #undef EXTRA_ARGS
1135 #undef ATOMIC_NAME
1136 #undef ATOMIC_MMU_LOOKUP
1137 #define EXTRA_ARGS         , TCGMemOpIdx oi
1138 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
1139 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi)
1140 
1141 #define DATA_SIZE 1
1142 #include "atomic_template.h"
1143 
1144 #define DATA_SIZE 2
1145 #include "atomic_template.h"
1146 
1147 #define DATA_SIZE 4
1148 #include "atomic_template.h"
1149 
1150 #ifdef CONFIG_ATOMIC64
1151 #define DATA_SIZE 8
1152 #include "atomic_template.h"
1153 #endif
1154 
1155 /* Code access functions.  */
1156 
1157 #undef MMUSUFFIX
1158 #define MMUSUFFIX _cmmu
1159 #undef GETPC
1160 #define GETPC() ((uintptr_t)0)
1161 #define SOFTMMU_CODE_ACCESS
1162 
1163 #define DATA_SIZE 1
1164 #include "softmmu_template.h"
1165 
1166 #define DATA_SIZE 2
1167 #include "softmmu_template.h"
1168 
1169 #define DATA_SIZE 4
1170 #include "softmmu_template.h"
1171 
1172 #define DATA_SIZE 8
1173 #include "softmmu_template.h"
1174