xref: /openbmc/qemu/hw/xen/xen-mapcache.c (revision 7d87775f)
1 /*
2  * Copyright (C) 2011       Citrix Ltd.
3  *
4  * This work is licensed under the terms of the GNU GPL, version 2.  See
5  * the COPYING file in the top-level directory.
6  *
7  * Contributions after 2012-01-13 are licensed under the terms of the
8  * GNU GPL, version 2 or (at your option) any later version.
9  */
10 
11 #include "qemu/osdep.h"
12 #include "qemu/units.h"
13 #include "qemu/error-report.h"
14 
15 #include <sys/resource.h>
16 
17 #include "hw/xen/xen-hvm-common.h"
18 #include "hw/xen/xen_native.h"
19 #include "qemu/bitmap.h"
20 
21 #include "sysemu/runstate.h"
22 #include "sysemu/xen-mapcache.h"
23 #include "trace.h"
24 
25 #include <xenevtchn.h>
26 #include <xengnttab.h>
27 
28 #if HOST_LONG_BITS == 32
29 #  define MCACHE_MAX_SIZE     (1UL<<31) /* 2GB Cap */
30 #else
31 #  define MCACHE_MAX_SIZE     (1UL<<35) /* 32GB Cap */
32 #endif
33 
34 /* This is the size of the virtual address space reserve to QEMU that will not
35  * be use by MapCache.
36  * From empirical tests I observed that qemu use 75MB more than the
37  * max_mcache_size.
38  */
39 #define NON_MCACHE_MEMORY_SIZE (80 * MiB)
40 
41 typedef struct MapCacheEntry {
42     hwaddr paddr_index;
43     uint8_t *vaddr_base;
44     unsigned long *valid_mapping;
45     uint32_t lock;
46 #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
47 #define XEN_MAPCACHE_ENTRY_GRANT (1 << 1)
48     uint8_t flags;
49     hwaddr size;
50     struct MapCacheEntry *next;
51 } MapCacheEntry;
52 
53 typedef struct MapCacheRev {
54     uint8_t *vaddr_req;
55     hwaddr paddr_index;
56     hwaddr size;
57     QTAILQ_ENTRY(MapCacheRev) next;
58     bool dma;
59 } MapCacheRev;
60 
61 typedef struct MapCache {
62     MapCacheEntry *entry;
63     unsigned long nr_buckets;
64     QTAILQ_HEAD(, MapCacheRev) locked_entries;
65 
66     /* For most cases (>99.9%), the page address is the same. */
67     MapCacheEntry *last_entry;
68     unsigned long max_mcache_size;
69     unsigned int bucket_shift;
70     unsigned long bucket_size;
71 
72     phys_offset_to_gaddr_t phys_offset_to_gaddr;
73     QemuMutex lock;
74     void *opaque;
75 } MapCache;
76 
77 static MapCache *mapcache;
78 static MapCache *mapcache_grants;
79 static xengnttab_handle *xen_region_gnttabdev;
80 
81 static inline void mapcache_lock(MapCache *mc)
82 {
83     qemu_mutex_lock(&mc->lock);
84 }
85 
86 static inline void mapcache_unlock(MapCache *mc)
87 {
88     qemu_mutex_unlock(&mc->lock);
89 }
90 
91 static inline int test_bits(int nr, int size, const unsigned long *addr)
92 {
93     unsigned long res = find_next_zero_bit(addr, size + nr, nr);
94     if (res >= nr + size)
95         return 1;
96     else
97         return 0;
98 }
99 
100 static MapCache *xen_map_cache_init_single(phys_offset_to_gaddr_t f,
101                                            void *opaque,
102                                            unsigned int bucket_shift,
103                                            unsigned long max_size)
104 {
105     unsigned long size;
106     MapCache *mc;
107 
108     assert(bucket_shift >= XC_PAGE_SHIFT);
109 
110     mc = g_new0(MapCache, 1);
111 
112     mc->phys_offset_to_gaddr = f;
113     mc->opaque = opaque;
114     qemu_mutex_init(&mc->lock);
115 
116     QTAILQ_INIT(&mc->locked_entries);
117 
118     mc->bucket_shift = bucket_shift;
119     mc->bucket_size = 1UL << bucket_shift;
120     mc->max_mcache_size = max_size;
121 
122     mc->nr_buckets =
123         (((mc->max_mcache_size >> XC_PAGE_SHIFT) +
124           (1UL << (bucket_shift - XC_PAGE_SHIFT)) - 1) >>
125          (bucket_shift - XC_PAGE_SHIFT));
126 
127     size = mc->nr_buckets * sizeof(MapCacheEntry);
128     size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
129     trace_xen_map_cache_init(mc->nr_buckets, size);
130     mc->entry = g_malloc0(size);
131     return mc;
132 }
133 
134 void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
135 {
136     struct rlimit rlimit_as;
137     unsigned long max_mcache_size;
138     unsigned int bucket_shift;
139 
140     xen_region_gnttabdev = xengnttab_open(NULL, 0);
141     if (xen_region_gnttabdev == NULL) {
142         error_report("mapcache: Failed to open gnttab device");
143         exit(EXIT_FAILURE);
144     }
145 
146     if (HOST_LONG_BITS == 32) {
147         bucket_shift = 16;
148     } else {
149         bucket_shift = 20;
150     }
151 
152     if (geteuid() == 0) {
153         rlimit_as.rlim_cur = RLIM_INFINITY;
154         rlimit_as.rlim_max = RLIM_INFINITY;
155         max_mcache_size = MCACHE_MAX_SIZE;
156     } else {
157         getrlimit(RLIMIT_AS, &rlimit_as);
158         rlimit_as.rlim_cur = rlimit_as.rlim_max;
159 
160         if (rlimit_as.rlim_max != RLIM_INFINITY) {
161             warn_report("QEMU's maximum size of virtual"
162                         " memory is not infinity");
163         }
164         if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) {
165             max_mcache_size = rlimit_as.rlim_max - NON_MCACHE_MEMORY_SIZE;
166         } else {
167             max_mcache_size = MCACHE_MAX_SIZE;
168         }
169     }
170 
171     mapcache = xen_map_cache_init_single(f, opaque,
172                                          bucket_shift,
173                                          max_mcache_size);
174 
175     /*
176      * Grant mappings must use XC_PAGE_SIZE granularity since we can't
177      * map anything beyond the number of pages granted to us.
178      */
179     mapcache_grants = xen_map_cache_init_single(f, opaque,
180                                                 XC_PAGE_SHIFT,
181                                                 max_mcache_size);
182 
183     setrlimit(RLIMIT_AS, &rlimit_as);
184 }
185 
186 static void xen_remap_bucket(MapCache *mc,
187                              MapCacheEntry *entry,
188                              void *vaddr,
189                              hwaddr size,
190                              hwaddr address_index,
191                              bool dummy,
192                              bool grant,
193                              bool is_write,
194                              ram_addr_t ram_offset)
195 {
196     uint8_t *vaddr_base;
197     g_autofree uint32_t *refs = NULL;
198     g_autofree xen_pfn_t *pfns = NULL;
199     g_autofree int *err;
200     unsigned int i;
201     hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
202 
203     trace_xen_remap_bucket(address_index);
204 
205     if (grant) {
206         refs = g_new0(uint32_t, nb_pfn);
207     } else {
208         pfns = g_new0(xen_pfn_t, nb_pfn);
209     }
210     err = g_new0(int, nb_pfn);
211 
212     if (entry->vaddr_base != NULL) {
213         if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
214             ram_block_notify_remove(entry->vaddr_base, entry->size,
215                                     entry->size);
216         }
217 
218         /*
219          * If an entry is being replaced by another mapping and we're using
220          * MAP_FIXED flag for it - there is possibility of a race for vaddr
221          * address with another thread doing an mmap call itself
222          * (see man 2 mmap). To avoid that we skip explicit unmapping here
223          * and allow the kernel to destroy the previous mappings by replacing
224          * them in mmap call later.
225          *
226          * Non-identical replacements are not allowed therefore.
227          */
228         assert(!vaddr || (entry->vaddr_base == vaddr && entry->size == size));
229 
230         if (!vaddr && munmap(entry->vaddr_base, entry->size) != 0) {
231             perror("unmap fails");
232             exit(-1);
233         }
234     }
235     g_free(entry->valid_mapping);
236     entry->valid_mapping = NULL;
237 
238     if (grant) {
239         hwaddr grant_base = address_index - (ram_offset >> XC_PAGE_SHIFT);
240 
241         for (i = 0; i < nb_pfn; i++) {
242             refs[i] = grant_base + i;
243         }
244     } else {
245         for (i = 0; i < nb_pfn; i++) {
246             pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + i;
247         }
248     }
249 
250     entry->flags &= ~XEN_MAPCACHE_ENTRY_GRANT;
251 
252     if (!dummy) {
253         if (grant) {
254             int prot = PROT_READ;
255 
256             if (is_write) {
257                 prot |= PROT_WRITE;
258             }
259 
260             entry->flags |= XEN_MAPCACHE_ENTRY_GRANT;
261             assert(vaddr == NULL);
262             vaddr_base = xengnttab_map_domain_grant_refs(xen_region_gnttabdev,
263                                                          nb_pfn,
264                                                          xen_domid, refs,
265                                                          prot);
266         } else {
267             /*
268              * If the caller has requested the mapping at a specific address use
269              * MAP_FIXED to make sure it's honored.
270              *
271              * We don't yet support upgrading mappings from RO to RW, to handle
272              * models using ordinary address_space_rw(), foreign mappings ignore
273              * is_write and are always mapped RW.
274              */
275             vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
276                                                PROT_READ | PROT_WRITE,
277                                                vaddr ? MAP_FIXED : 0,
278                                                nb_pfn, pfns, err);
279         }
280         if (vaddr_base == NULL) {
281             perror(grant ? "xengnttab_map_domain_grant_refs"
282                            : "xenforeignmemory_map2");
283             exit(-1);
284         }
285     } else {
286         /*
287          * We create dummy mappings where we are unable to create a foreign
288          * mapping immediately due to certain circumstances (i.e. on resume now)
289          */
290         vaddr_base = mmap(vaddr, size, PROT_READ | PROT_WRITE,
291                           MAP_ANON | MAP_SHARED | (vaddr ? MAP_FIXED : 0),
292                           -1, 0);
293         if (vaddr_base == MAP_FAILED) {
294             perror("mmap");
295             exit(-1);
296         }
297     }
298 
299     if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
300         ram_block_notify_add(vaddr_base, size, size);
301     }
302 
303     entry->vaddr_base = vaddr_base;
304     entry->paddr_index = address_index;
305     entry->size = size;
306     entry->valid_mapping = g_new0(unsigned long,
307                                   BITS_TO_LONGS(size >> XC_PAGE_SHIFT));
308 
309     if (dummy) {
310         entry->flags |= XEN_MAPCACHE_ENTRY_DUMMY;
311     } else {
312         entry->flags &= ~(XEN_MAPCACHE_ENTRY_DUMMY);
313     }
314 
315     bitmap_zero(entry->valid_mapping, nb_pfn);
316     for (i = 0; i < nb_pfn; i++) {
317         if (!err[i]) {
318             bitmap_set(entry->valid_mapping, i, 1);
319         }
320     }
321 }
322 
323 static uint8_t *xen_map_cache_unlocked(MapCache *mc,
324                                        hwaddr phys_addr, hwaddr size,
325                                        ram_addr_t ram_offset,
326                                        uint8_t lock, bool dma,
327                                        bool grant, bool is_write)
328 {
329     MapCacheEntry *entry, *pentry = NULL,
330                   *free_entry = NULL, *free_pentry = NULL;
331     hwaddr address_index;
332     hwaddr address_offset;
333     hwaddr cache_size = size;
334     hwaddr test_bit_size;
335     bool translated G_GNUC_UNUSED = false;
336     bool dummy = false;
337 
338 tryagain:
339     address_index  = phys_addr >> mc->bucket_shift;
340     address_offset = phys_addr & (mc->bucket_size - 1);
341 
342     trace_xen_map_cache(phys_addr);
343 
344     /* test_bit_size is always a multiple of XC_PAGE_SIZE */
345     if (size) {
346         test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1));
347 
348         if (test_bit_size % XC_PAGE_SIZE) {
349             test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
350         }
351     } else {
352         test_bit_size = XC_PAGE_SIZE;
353     }
354 
355     if (mc->last_entry != NULL &&
356         mc->last_entry->paddr_index == address_index &&
357         !lock && !size &&
358         test_bits(address_offset >> XC_PAGE_SHIFT,
359                   test_bit_size >> XC_PAGE_SHIFT,
360                   mc->last_entry->valid_mapping)) {
361         trace_xen_map_cache_return(
362             mc->last_entry->vaddr_base + address_offset
363         );
364         return mc->last_entry->vaddr_base + address_offset;
365     }
366 
367     /* size is always a multiple of mc->bucket_size */
368     if (size) {
369         cache_size = size + address_offset;
370         if (cache_size % mc->bucket_size) {
371             cache_size += mc->bucket_size - (cache_size % mc->bucket_size);
372         }
373     } else {
374         cache_size = mc->bucket_size;
375     }
376 
377     entry = &mc->entry[address_index % mc->nr_buckets];
378 
379     while (entry && (lock || entry->lock) && entry->vaddr_base &&
380             (entry->paddr_index != address_index || entry->size != cache_size ||
381              !test_bits(address_offset >> XC_PAGE_SHIFT,
382                  test_bit_size >> XC_PAGE_SHIFT,
383                  entry->valid_mapping))) {
384         if (!free_entry && !entry->lock) {
385             free_entry = entry;
386             free_pentry = pentry;
387         }
388         pentry = entry;
389         entry = entry->next;
390     }
391     if (!entry && free_entry) {
392         entry = free_entry;
393         pentry = free_pentry;
394     }
395     if (!entry) {
396         entry = g_new0(MapCacheEntry, 1);
397         pentry->next = entry;
398         xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
399                          grant, is_write, ram_offset);
400     } else if (!entry->lock) {
401         if (!entry->vaddr_base || entry->paddr_index != address_index ||
402                 entry->size != cache_size ||
403                 !test_bits(address_offset >> XC_PAGE_SHIFT,
404                     test_bit_size >> XC_PAGE_SHIFT,
405                     entry->valid_mapping)) {
406             xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
407                              grant, is_write, ram_offset);
408         }
409     }
410 
411     if(!test_bits(address_offset >> XC_PAGE_SHIFT,
412                 test_bit_size >> XC_PAGE_SHIFT,
413                 entry->valid_mapping)) {
414         mc->last_entry = NULL;
415 #ifdef XEN_COMPAT_PHYSMAP
416         if (!translated && mc->phys_offset_to_gaddr) {
417             phys_addr = mc->phys_offset_to_gaddr(phys_addr, size);
418             translated = true;
419             goto tryagain;
420         }
421 #endif
422         if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) {
423             dummy = true;
424             goto tryagain;
425         }
426         trace_xen_map_cache_return(NULL);
427         return NULL;
428     }
429 
430     mc->last_entry = entry;
431     if (lock) {
432         MapCacheRev *reventry = g_new0(MapCacheRev, 1);
433         entry->lock++;
434         if (entry->lock == 0) {
435             error_report("mapcache entry lock overflow: "HWADDR_FMT_plx" -> %p",
436                          entry->paddr_index, entry->vaddr_base);
437             abort();
438         }
439         reventry->dma = dma;
440         reventry->vaddr_req = mc->last_entry->vaddr_base + address_offset;
441         reventry->paddr_index = mc->last_entry->paddr_index;
442         reventry->size = entry->size;
443         QTAILQ_INSERT_HEAD(&mc->locked_entries, reventry, next);
444     }
445 
446     trace_xen_map_cache_return(
447         mc->last_entry->vaddr_base + address_offset
448     );
449     return mc->last_entry->vaddr_base + address_offset;
450 }
451 
452 uint8_t *xen_map_cache(MemoryRegion *mr,
453                        hwaddr phys_addr, hwaddr size,
454                        ram_addr_t ram_addr_offset,
455                        uint8_t lock, bool dma,
456                        bool is_write)
457 {
458     bool grant = xen_mr_is_grants(mr);
459     MapCache *mc = grant ? mapcache_grants : mapcache;
460     uint8_t *p;
461 
462     if (grant && !lock) {
463         /*
464          * Grants are only supported via address_space_map(). Anything
465          * else is considered a user/guest error.
466          *
467          * QEMU generally doesn't expect these mappings to ever fail, so
468          * if this happens we report an error message and abort().
469          */
470         error_report("Tried to access a grant reference without mapping it.");
471         abort();
472     }
473 
474     mapcache_lock(mc);
475     p = xen_map_cache_unlocked(mc, phys_addr, size, ram_addr_offset,
476                                lock, dma, grant, is_write);
477     mapcache_unlock(mc);
478     return p;
479 }
480 
481 static ram_addr_t xen_ram_addr_from_mapcache_single(MapCache *mc, void *ptr)
482 {
483     MapCacheEntry *entry = NULL;
484     MapCacheRev *reventry;
485     hwaddr paddr_index;
486     hwaddr size;
487     ram_addr_t raddr;
488     int found = 0;
489 
490     mapcache_lock(mc);
491     QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
492         if (reventry->vaddr_req == ptr) {
493             paddr_index = reventry->paddr_index;
494             size = reventry->size;
495             found = 1;
496             break;
497         }
498     }
499     if (!found) {
500         trace_xen_ram_addr_from_mapcache_not_found(ptr);
501         mapcache_unlock(mc);
502         return RAM_ADDR_INVALID;
503     }
504 
505     entry = &mc->entry[paddr_index % mc->nr_buckets];
506     while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
507         entry = entry->next;
508     }
509     if (!entry) {
510         trace_xen_ram_addr_from_mapcache_not_in_cache(ptr);
511         raddr = RAM_ADDR_INVALID;
512     } else {
513         raddr = (reventry->paddr_index << mc->bucket_shift) +
514              ((unsigned long) ptr - (unsigned long) entry->vaddr_base);
515     }
516     mapcache_unlock(mc);
517     return raddr;
518 }
519 
520 ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
521 {
522     ram_addr_t addr;
523 
524     addr = xen_ram_addr_from_mapcache_single(mapcache, ptr);
525     if (addr == RAM_ADDR_INVALID) {
526         addr = xen_ram_addr_from_mapcache_single(mapcache_grants, ptr);
527     }
528 
529     return addr;
530 }
531 
532 static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
533                                                     uint8_t *buffer)
534 {
535     MapCacheEntry *entry = NULL, *pentry = NULL;
536     MapCacheRev *reventry;
537     hwaddr paddr_index;
538     hwaddr size;
539     int found = 0;
540     int rc;
541 
542     QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
543         if (reventry->vaddr_req == buffer) {
544             paddr_index = reventry->paddr_index;
545             size = reventry->size;
546             found = 1;
547             break;
548         }
549     }
550     if (!found) {
551         trace_xen_invalidate_map_cache_entry_unlocked_not_found(buffer);
552         QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
553             trace_xen_invalidate_map_cache_entry_unlocked_found(
554                 reventry->paddr_index,
555                 reventry->vaddr_req
556             );
557         }
558         return;
559     }
560     QTAILQ_REMOVE(&mc->locked_entries, reventry, next);
561     g_free(reventry);
562 
563     if (mc->last_entry != NULL &&
564         mc->last_entry->paddr_index == paddr_index) {
565         mc->last_entry = NULL;
566     }
567 
568     entry = &mc->entry[paddr_index % mc->nr_buckets];
569     while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
570         pentry = entry;
571         entry = entry->next;
572     }
573     if (!entry) {
574         trace_xen_invalidate_map_cache_entry_unlocked_miss(buffer);
575         return;
576     }
577     entry->lock--;
578     if (entry->lock > 0) {
579         return;
580     }
581 
582     ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size);
583     if (entry->flags & XEN_MAPCACHE_ENTRY_GRANT) {
584         rc = xengnttab_unmap(xen_region_gnttabdev, entry->vaddr_base,
585                              entry->size >> mc->bucket_shift);
586     } else {
587         rc = munmap(entry->vaddr_base, entry->size);
588     }
589 
590     if (rc) {
591         perror("unmap fails");
592         exit(-1);
593     }
594 
595     g_free(entry->valid_mapping);
596     if (pentry) {
597         pentry->next = entry->next;
598         g_free(entry);
599     } else {
600         /*
601          * Invalidate mapping but keep entry->next pointing to the rest
602          * of the list.
603          *
604          * Note that lock is already zero here, otherwise we don't unmap.
605          */
606         entry->paddr_index = 0;
607         entry->vaddr_base = NULL;
608         entry->valid_mapping = NULL;
609         entry->flags = 0;
610         entry->size = 0;
611     }
612 }
613 
614 typedef struct XenMapCacheData {
615     Coroutine *co;
616     uint8_t *buffer;
617 } XenMapCacheData;
618 
619 static void xen_invalidate_map_cache_entry_single(MapCache *mc, uint8_t *buffer)
620 {
621     mapcache_lock(mc);
622     xen_invalidate_map_cache_entry_unlocked(mc, buffer);
623     mapcache_unlock(mc);
624 }
625 
626 static void xen_invalidate_map_cache_entry_all(uint8_t *buffer)
627 {
628     xen_invalidate_map_cache_entry_single(mapcache, buffer);
629     xen_invalidate_map_cache_entry_single(mapcache_grants, buffer);
630 }
631 
632 static void xen_invalidate_map_cache_entry_bh(void *opaque)
633 {
634     XenMapCacheData *data = opaque;
635 
636     xen_invalidate_map_cache_entry_all(data->buffer);
637     aio_co_wake(data->co);
638 }
639 
640 void coroutine_mixed_fn xen_invalidate_map_cache_entry(uint8_t *buffer)
641 {
642     if (qemu_in_coroutine()) {
643         XenMapCacheData data = {
644             .co = qemu_coroutine_self(),
645             .buffer = buffer,
646         };
647         aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
648                                 xen_invalidate_map_cache_entry_bh, &data);
649         qemu_coroutine_yield();
650     } else {
651         xen_invalidate_map_cache_entry_all(buffer);
652     }
653 }
654 
655 static void xen_invalidate_map_cache_single(MapCache *mc)
656 {
657     unsigned long i;
658     MapCacheRev *reventry;
659 
660     mapcache_lock(mc);
661 
662     QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
663         if (!reventry->dma) {
664             continue;
665         }
666         trace_xen_invalidate_map_cache(reventry->paddr_index,
667                                        reventry->vaddr_req);
668     }
669 
670     for (i = 0; i < mc->nr_buckets; i++) {
671         MapCacheEntry *entry = &mc->entry[i];
672 
673         if (entry->vaddr_base == NULL) {
674             continue;
675         }
676         if (entry->lock > 0) {
677             continue;
678         }
679 
680         if (munmap(entry->vaddr_base, entry->size) != 0) {
681             perror("unmap fails");
682             exit(-1);
683         }
684 
685         entry->paddr_index = 0;
686         entry->vaddr_base = NULL;
687         entry->size = 0;
688         g_free(entry->valid_mapping);
689         entry->valid_mapping = NULL;
690     }
691 
692     mc->last_entry = NULL;
693 
694     mapcache_unlock(mc);
695 }
696 
697 void xen_invalidate_map_cache(void)
698 {
699     /* Flush pending AIO before destroying the mapcache */
700     bdrv_drain_all();
701 
702     xen_invalidate_map_cache_single(mapcache);
703     xen_invalidate_map_cache_single(mapcache_grants);
704 }
705 
706 static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc,
707                                                  hwaddr old_phys_addr,
708                                                  hwaddr new_phys_addr,
709                                                  hwaddr size)
710 {
711     MapCacheEntry *entry;
712     hwaddr address_index, address_offset;
713     hwaddr test_bit_size, cache_size = size;
714 
715     address_index  = old_phys_addr >> mc->bucket_shift;
716     address_offset = old_phys_addr & (mc->bucket_size - 1);
717 
718     assert(size);
719     /* test_bit_size is always a multiple of XC_PAGE_SIZE */
720     test_bit_size = size + (old_phys_addr & (XC_PAGE_SIZE - 1));
721     if (test_bit_size % XC_PAGE_SIZE) {
722         test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
723     }
724     cache_size = size + address_offset;
725     if (cache_size % mc->bucket_size) {
726         cache_size += mc->bucket_size - (cache_size % mc->bucket_size);
727     }
728 
729     entry = &mc->entry[address_index % mc->nr_buckets];
730     while (entry && !(entry->paddr_index == address_index &&
731                       entry->size == cache_size)) {
732         entry = entry->next;
733     }
734     if (!entry) {
735         trace_xen_replace_cache_entry_unlocked(old_phys_addr);
736         return NULL;
737     }
738 
739     assert((entry->flags & XEN_MAPCACHE_ENTRY_GRANT) == 0);
740 
741     address_index  = new_phys_addr >> mc->bucket_shift;
742     address_offset = new_phys_addr & (mc->bucket_size - 1);
743 
744     trace_xen_replace_cache_entry_dummy(old_phys_addr, new_phys_addr);
745 
746     xen_remap_bucket(mc, entry, entry->vaddr_base,
747                      cache_size, address_index, false,
748                      false, false, old_phys_addr);
749     if (!test_bits(address_offset >> XC_PAGE_SHIFT,
750                 test_bit_size >> XC_PAGE_SHIFT,
751                 entry->valid_mapping)) {
752         trace_xen_replace_cache_entry_unlocked_could_not_update_entry(
753             old_phys_addr
754         );
755         return NULL;
756     }
757 
758     return entry->vaddr_base + address_offset;
759 }
760 
761 uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr,
762                                  hwaddr new_phys_addr,
763                                  hwaddr size)
764 {
765     uint8_t *p;
766 
767     mapcache_lock(mapcache);
768     p = xen_replace_cache_entry_unlocked(mapcache, old_phys_addr,
769                                          new_phys_addr, size);
770     mapcache_unlock(mapcache);
771     return p;
772 }
773