xref: /openbmc/qemu/hw/xen/xen-mapcache.c (revision dfad8421af474a38e272cdb19ae3c8e778acf820)
1  /*
2   * Copyright (C) 2011       Citrix Ltd.
3   *
4   * This work is licensed under the terms of the GNU GPL, version 2.  See
5   * the COPYING file in the top-level directory.
6   *
7   * Contributions after 2012-01-13 are licensed under the terms of the
8   * GNU GPL, version 2 or (at your option) any later version.
9   */
10  
11  #include "qemu/osdep.h"
12  #include "qemu/units.h"
13  #include "qemu/error-report.h"
14  
15  #include <sys/resource.h>
16  
17  #include "hw/xen/xen-hvm-common.h"
18  #include "hw/xen/xen_native.h"
19  #include "qemu/bitmap.h"
20  
21  #include "sysemu/runstate.h"
22  #include "sysemu/xen-mapcache.h"
23  #include "trace.h"
24  
25  #include <xenevtchn.h>
26  #include <xengnttab.h>
27  
28  #if HOST_LONG_BITS == 32
29  #  define MCACHE_MAX_SIZE     (1UL<<31) /* 2GB Cap */
30  #else
31  #  define MCACHE_MAX_SIZE     (1UL<<35) /* 32GB Cap */
32  #endif
33  
34  /* This is the size of the virtual address space reserve to QEMU that will not
35   * be use by MapCache.
36   * From empirical tests I observed that qemu use 75MB more than the
37   * max_mcache_size.
38   */
39  #define NON_MCACHE_MEMORY_SIZE (80 * MiB)
40  
41  typedef struct MapCacheEntry {
42      hwaddr paddr_index;
43      uint8_t *vaddr_base;
44      unsigned long *valid_mapping;
45      uint32_t lock;
46  #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
47  #define XEN_MAPCACHE_ENTRY_GRANT (1 << 1)
48      uint8_t flags;
49      hwaddr size;
50      struct MapCacheEntry *next;
51  } MapCacheEntry;
52  
53  typedef struct MapCacheRev {
54      uint8_t *vaddr_req;
55      hwaddr paddr_index;
56      hwaddr size;
57      QTAILQ_ENTRY(MapCacheRev) next;
58      bool dma;
59  } MapCacheRev;
60  
61  typedef struct MapCache {
62      MapCacheEntry *entry;
63      unsigned long nr_buckets;
64      QTAILQ_HEAD(, MapCacheRev) locked_entries;
65  
66      /* For most cases (>99.9%), the page address is the same. */
67      MapCacheEntry *last_entry;
68      unsigned long max_mcache_size;
69      unsigned int bucket_shift;
70      unsigned long bucket_size;
71  
72      phys_offset_to_gaddr_t phys_offset_to_gaddr;
73      QemuMutex lock;
74      void *opaque;
75  } MapCache;
76  
77  static MapCache *mapcache;
78  static MapCache *mapcache_grants;
79  static xengnttab_handle *xen_region_gnttabdev;
80  
81  static inline void mapcache_lock(MapCache *mc)
82  {
83      qemu_mutex_lock(&mc->lock);
84  }
85  
86  static inline void mapcache_unlock(MapCache *mc)
87  {
88      qemu_mutex_unlock(&mc->lock);
89  }
90  
91  static inline int test_bits(int nr, int size, const unsigned long *addr)
92  {
93      unsigned long res = find_next_zero_bit(addr, size + nr, nr);
94      if (res >= nr + size)
95          return 1;
96      else
97          return 0;
98  }
99  
100  static MapCache *xen_map_cache_init_single(phys_offset_to_gaddr_t f,
101                                             void *opaque,
102                                             unsigned int bucket_shift,
103                                             unsigned long max_size)
104  {
105      unsigned long size;
106      MapCache *mc;
107  
108      assert(bucket_shift >= XC_PAGE_SHIFT);
109  
110      mc = g_new0(MapCache, 1);
111  
112      mc->phys_offset_to_gaddr = f;
113      mc->opaque = opaque;
114      qemu_mutex_init(&mc->lock);
115  
116      QTAILQ_INIT(&mc->locked_entries);
117  
118      mc->bucket_shift = bucket_shift;
119      mc->bucket_size = 1UL << bucket_shift;
120      mc->max_mcache_size = max_size;
121  
122      mc->nr_buckets =
123          (((mc->max_mcache_size >> XC_PAGE_SHIFT) +
124            (1UL << (bucket_shift - XC_PAGE_SHIFT)) - 1) >>
125           (bucket_shift - XC_PAGE_SHIFT));
126  
127      size = mc->nr_buckets * sizeof(MapCacheEntry);
128      size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
129      trace_xen_map_cache_init(mc->nr_buckets, size);
130      mc->entry = g_malloc0(size);
131      return mc;
132  }
133  
134  void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
135  {
136      struct rlimit rlimit_as;
137      unsigned long max_mcache_size;
138      unsigned int bucket_shift;
139  
140      xen_region_gnttabdev = xengnttab_open(NULL, 0);
141      if (xen_region_gnttabdev == NULL) {
142          error_report("mapcache: Failed to open gnttab device");
143          exit(EXIT_FAILURE);
144      }
145  
146      if (HOST_LONG_BITS == 32) {
147          bucket_shift = 16;
148      } else {
149          bucket_shift = 20;
150      }
151  
152      if (geteuid() == 0) {
153          rlimit_as.rlim_cur = RLIM_INFINITY;
154          rlimit_as.rlim_max = RLIM_INFINITY;
155          max_mcache_size = MCACHE_MAX_SIZE;
156      } else {
157          getrlimit(RLIMIT_AS, &rlimit_as);
158          rlimit_as.rlim_cur = rlimit_as.rlim_max;
159  
160          if (rlimit_as.rlim_max != RLIM_INFINITY) {
161              warn_report("QEMU's maximum size of virtual"
162                          " memory is not infinity");
163          }
164          if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) {
165              max_mcache_size = rlimit_as.rlim_max - NON_MCACHE_MEMORY_SIZE;
166          } else {
167              max_mcache_size = MCACHE_MAX_SIZE;
168          }
169      }
170  
171      mapcache = xen_map_cache_init_single(f, opaque,
172                                           bucket_shift,
173                                           max_mcache_size);
174  
175      /*
176       * Grant mappings must use XC_PAGE_SIZE granularity since we can't
177       * map anything beyond the number of pages granted to us.
178       */
179      mapcache_grants = xen_map_cache_init_single(f, opaque,
180                                                  XC_PAGE_SHIFT,
181                                                  max_mcache_size);
182  
183      setrlimit(RLIMIT_AS, &rlimit_as);
184  }
185  
186  static void xen_remap_bucket(MapCache *mc,
187                               MapCacheEntry *entry,
188                               void *vaddr,
189                               hwaddr size,
190                               hwaddr address_index,
191                               bool dummy,
192                               bool grant,
193                               bool is_write,
194                               ram_addr_t ram_offset)
195  {
196      uint8_t *vaddr_base;
197      g_autofree uint32_t *refs = NULL;
198      g_autofree xen_pfn_t *pfns = NULL;
199      g_autofree int *err;
200      unsigned int i;
201      hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
202  
203      trace_xen_remap_bucket(address_index);
204  
205      if (grant) {
206          refs = g_new0(uint32_t, nb_pfn);
207      } else {
208          pfns = g_new0(xen_pfn_t, nb_pfn);
209      }
210      err = g_new0(int, nb_pfn);
211  
212      if (entry->vaddr_base != NULL) {
213          if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
214              ram_block_notify_remove(entry->vaddr_base, entry->size,
215                                      entry->size);
216          }
217  
218          /*
219           * If an entry is being replaced by another mapping and we're using
220           * MAP_FIXED flag for it - there is possibility of a race for vaddr
221           * address with another thread doing an mmap call itself
222           * (see man 2 mmap). To avoid that we skip explicit unmapping here
223           * and allow the kernel to destroy the previous mappings by replacing
224           * them in mmap call later.
225           *
226           * Non-identical replacements are not allowed therefore.
227           */
228          assert(!vaddr || (entry->vaddr_base == vaddr && entry->size == size));
229  
230          if (!vaddr && munmap(entry->vaddr_base, entry->size) != 0) {
231              perror("unmap fails");
232              exit(-1);
233          }
234      }
235      g_free(entry->valid_mapping);
236      entry->valid_mapping = NULL;
237  
238      if (grant) {
239          hwaddr grant_base = address_index - (ram_offset >> XC_PAGE_SHIFT);
240  
241          for (i = 0; i < nb_pfn; i++) {
242              refs[i] = grant_base + i;
243          }
244      } else {
245          for (i = 0; i < nb_pfn; i++) {
246              pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + i;
247          }
248      }
249  
250      entry->flags &= ~XEN_MAPCACHE_ENTRY_GRANT;
251  
252      if (!dummy) {
253          if (grant) {
254              int prot = PROT_READ;
255  
256              if (is_write) {
257                  prot |= PROT_WRITE;
258              }
259  
260              entry->flags |= XEN_MAPCACHE_ENTRY_GRANT;
261              assert(vaddr == NULL);
262              vaddr_base = xengnttab_map_domain_grant_refs(xen_region_gnttabdev,
263                                                           nb_pfn,
264                                                           xen_domid, refs,
265                                                           prot);
266          } else {
267              /*
268               * If the caller has requested the mapping at a specific address use
269               * MAP_FIXED to make sure it's honored.
270               *
271               * We don't yet support upgrading mappings from RO to RW, to handle
272               * models using ordinary address_space_rw(), foreign mappings ignore
273               * is_write and are always mapped RW.
274               */
275              vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
276                                                 PROT_READ | PROT_WRITE,
277                                                 vaddr ? MAP_FIXED : 0,
278                                                 nb_pfn, pfns, err);
279          }
280          if (vaddr_base == NULL) {
281              perror(grant ? "xengnttab_map_domain_grant_refs"
282                             : "xenforeignmemory_map2");
283              exit(-1);
284          }
285      } else {
286          /*
287           * We create dummy mappings where we are unable to create a foreign
288           * mapping immediately due to certain circumstances (i.e. on resume now)
289           */
290          vaddr_base = mmap(vaddr, size, PROT_READ | PROT_WRITE,
291                            MAP_ANON | MAP_SHARED | (vaddr ? MAP_FIXED : 0),
292                            -1, 0);
293          if (vaddr_base == MAP_FAILED) {
294              perror("mmap");
295              exit(-1);
296          }
297      }
298  
299      if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
300          ram_block_notify_add(vaddr_base, size, size);
301      }
302  
303      entry->vaddr_base = vaddr_base;
304      entry->paddr_index = address_index;
305      entry->size = size;
306      entry->valid_mapping = g_new0(unsigned long,
307                                    BITS_TO_LONGS(size >> XC_PAGE_SHIFT));
308  
309      if (dummy) {
310          entry->flags |= XEN_MAPCACHE_ENTRY_DUMMY;
311      } else {
312          entry->flags &= ~(XEN_MAPCACHE_ENTRY_DUMMY);
313      }
314  
315      bitmap_zero(entry->valid_mapping, nb_pfn);
316      for (i = 0; i < nb_pfn; i++) {
317          if (!err[i]) {
318              bitmap_set(entry->valid_mapping, i, 1);
319          }
320      }
321  }
322  
323  static uint8_t *xen_map_cache_unlocked(MapCache *mc,
324                                         hwaddr phys_addr, hwaddr size,
325                                         ram_addr_t ram_offset,
326                                         uint8_t lock, bool dma,
327                                         bool grant, bool is_write)
328  {
329      MapCacheEntry *entry, *pentry = NULL,
330                    *free_entry = NULL, *free_pentry = NULL;
331      hwaddr address_index;
332      hwaddr address_offset;
333      hwaddr cache_size = size;
334      hwaddr test_bit_size;
335      bool translated G_GNUC_UNUSED = false;
336      bool dummy = false;
337  
338  tryagain:
339      address_index  = phys_addr >> mc->bucket_shift;
340      address_offset = phys_addr & (mc->bucket_size - 1);
341  
342      trace_xen_map_cache(phys_addr);
343  
344      /* test_bit_size is always a multiple of XC_PAGE_SIZE */
345      if (size) {
346          test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1));
347  
348          if (test_bit_size % XC_PAGE_SIZE) {
349              test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
350          }
351      } else {
352          test_bit_size = XC_PAGE_SIZE;
353      }
354  
355      if (mc->last_entry != NULL &&
356          mc->last_entry->paddr_index == address_index &&
357          !lock && !size &&
358          test_bits(address_offset >> XC_PAGE_SHIFT,
359                    test_bit_size >> XC_PAGE_SHIFT,
360                    mc->last_entry->valid_mapping)) {
361          trace_xen_map_cache_return(
362              mc->last_entry->vaddr_base + address_offset
363          );
364          return mc->last_entry->vaddr_base + address_offset;
365      }
366  
367      /* size is always a multiple of mc->bucket_size */
368      if (size) {
369          cache_size = size + address_offset;
370          if (cache_size % mc->bucket_size) {
371              cache_size += mc->bucket_size - (cache_size % mc->bucket_size);
372          }
373      } else {
374          cache_size = mc->bucket_size;
375      }
376  
377      entry = &mc->entry[address_index % mc->nr_buckets];
378  
379      while (entry && (lock || entry->lock) && entry->vaddr_base &&
380              (entry->paddr_index != address_index || entry->size != cache_size ||
381               !test_bits(address_offset >> XC_PAGE_SHIFT,
382                   test_bit_size >> XC_PAGE_SHIFT,
383                   entry->valid_mapping))) {
384          if (!free_entry && !entry->lock) {
385              free_entry = entry;
386              free_pentry = pentry;
387          }
388          pentry = entry;
389          entry = entry->next;
390      }
391      if (!entry && free_entry) {
392          entry = free_entry;
393          pentry = free_pentry;
394      }
395      if (!entry) {
396          entry = g_new0(MapCacheEntry, 1);
397          pentry->next = entry;
398          xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
399                           grant, is_write, ram_offset);
400      } else if (!entry->lock) {
401          if (!entry->vaddr_base || entry->paddr_index != address_index ||
402                  entry->size != cache_size ||
403                  !test_bits(address_offset >> XC_PAGE_SHIFT,
404                      test_bit_size >> XC_PAGE_SHIFT,
405                      entry->valid_mapping)) {
406              xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
407                               grant, is_write, ram_offset);
408          }
409      }
410  
411      if(!test_bits(address_offset >> XC_PAGE_SHIFT,
412                  test_bit_size >> XC_PAGE_SHIFT,
413                  entry->valid_mapping)) {
414          mc->last_entry = NULL;
415  #ifdef XEN_COMPAT_PHYSMAP
416          if (!translated && mc->phys_offset_to_gaddr) {
417              phys_addr = mc->phys_offset_to_gaddr(phys_addr, size);
418              translated = true;
419              goto tryagain;
420          }
421  #endif
422          if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) {
423              dummy = true;
424              goto tryagain;
425          }
426          trace_xen_map_cache_return(NULL);
427          return NULL;
428      }
429  
430      mc->last_entry = entry;
431      if (lock) {
432          MapCacheRev *reventry = g_new0(MapCacheRev, 1);
433          entry->lock++;
434          if (entry->lock == 0) {
435              error_report("mapcache entry lock overflow: "HWADDR_FMT_plx" -> %p",
436                           entry->paddr_index, entry->vaddr_base);
437              abort();
438          }
439          reventry->dma = dma;
440          reventry->vaddr_req = mc->last_entry->vaddr_base + address_offset;
441          reventry->paddr_index = mc->last_entry->paddr_index;
442          reventry->size = entry->size;
443          QTAILQ_INSERT_HEAD(&mc->locked_entries, reventry, next);
444      }
445  
446      trace_xen_map_cache_return(
447          mc->last_entry->vaddr_base + address_offset
448      );
449      return mc->last_entry->vaddr_base + address_offset;
450  }
451  
452  uint8_t *xen_map_cache(MemoryRegion *mr,
453                         hwaddr phys_addr, hwaddr size,
454                         ram_addr_t ram_addr_offset,
455                         uint8_t lock, bool dma,
456                         bool is_write)
457  {
458      bool grant = xen_mr_is_grants(mr);
459      MapCache *mc = grant ? mapcache_grants : mapcache;
460      uint8_t *p;
461  
462      if (grant && !lock) {
463          /*
464           * Grants are only supported via address_space_map(). Anything
465           * else is considered a user/guest error.
466           *
467           * QEMU generally doesn't expect these mappings to ever fail, so
468           * if this happens we report an error message and abort().
469           */
470          error_report("Tried to access a grant reference without mapping it.");
471          abort();
472      }
473  
474      mapcache_lock(mc);
475      p = xen_map_cache_unlocked(mc, phys_addr, size, ram_addr_offset,
476                                 lock, dma, grant, is_write);
477      mapcache_unlock(mc);
478      return p;
479  }
480  
481  static ram_addr_t xen_ram_addr_from_mapcache_single(MapCache *mc, void *ptr)
482  {
483      MapCacheEntry *entry = NULL;
484      MapCacheRev *reventry;
485      hwaddr paddr_index;
486      hwaddr size;
487      ram_addr_t raddr;
488      int found = 0;
489  
490      mapcache_lock(mc);
491      QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
492          if (reventry->vaddr_req == ptr) {
493              paddr_index = reventry->paddr_index;
494              size = reventry->size;
495              found = 1;
496              break;
497          }
498      }
499      if (!found) {
500          trace_xen_ram_addr_from_mapcache_not_found(ptr);
501          mapcache_unlock(mc);
502          return RAM_ADDR_INVALID;
503      }
504  
505      entry = &mc->entry[paddr_index % mc->nr_buckets];
506      while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
507          entry = entry->next;
508      }
509      if (!entry) {
510          trace_xen_ram_addr_from_mapcache_not_in_cache(ptr);
511          raddr = RAM_ADDR_INVALID;
512      } else {
513          raddr = (reventry->paddr_index << mc->bucket_shift) +
514               ((unsigned long) ptr - (unsigned long) entry->vaddr_base);
515      }
516      mapcache_unlock(mc);
517      return raddr;
518  }
519  
520  ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
521  {
522      ram_addr_t addr;
523  
524      addr = xen_ram_addr_from_mapcache_single(mapcache, ptr);
525      if (addr == RAM_ADDR_INVALID) {
526          addr = xen_ram_addr_from_mapcache_single(mapcache_grants, ptr);
527      }
528  
529      return addr;
530  }
531  
532  static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
533                                                      uint8_t *buffer)
534  {
535      MapCacheEntry *entry = NULL, *pentry = NULL;
536      MapCacheRev *reventry;
537      hwaddr paddr_index;
538      hwaddr size;
539      int found = 0;
540      int rc;
541  
542      QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
543          if (reventry->vaddr_req == buffer) {
544              paddr_index = reventry->paddr_index;
545              size = reventry->size;
546              found = 1;
547              break;
548          }
549      }
550      if (!found) {
551          trace_xen_invalidate_map_cache_entry_unlocked_not_found(buffer);
552          QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
553              trace_xen_invalidate_map_cache_entry_unlocked_found(
554                  reventry->paddr_index,
555                  reventry->vaddr_req
556              );
557          }
558          return;
559      }
560      QTAILQ_REMOVE(&mc->locked_entries, reventry, next);
561      g_free(reventry);
562  
563      if (mc->last_entry != NULL &&
564          mc->last_entry->paddr_index == paddr_index) {
565          mc->last_entry = NULL;
566      }
567  
568      entry = &mc->entry[paddr_index % mc->nr_buckets];
569      while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
570          pentry = entry;
571          entry = entry->next;
572      }
573      if (!entry) {
574          trace_xen_invalidate_map_cache_entry_unlocked_miss(buffer);
575          return;
576      }
577      entry->lock--;
578      if (entry->lock > 0) {
579          return;
580      }
581  
582      ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size);
583      if (entry->flags & XEN_MAPCACHE_ENTRY_GRANT) {
584          rc = xengnttab_unmap(xen_region_gnttabdev, entry->vaddr_base,
585                               entry->size >> mc->bucket_shift);
586      } else {
587          rc = munmap(entry->vaddr_base, entry->size);
588      }
589  
590      if (rc) {
591          perror("unmap fails");
592          exit(-1);
593      }
594  
595      g_free(entry->valid_mapping);
596      if (pentry) {
597          pentry->next = entry->next;
598          g_free(entry);
599      } else {
600          /*
601           * Invalidate mapping but keep entry->next pointing to the rest
602           * of the list.
603           *
604           * Note that lock is already zero here, otherwise we don't unmap.
605           */
606          entry->paddr_index = 0;
607          entry->vaddr_base = NULL;
608          entry->valid_mapping = NULL;
609          entry->flags = 0;
610          entry->size = 0;
611      }
612  }
613  
614  typedef struct XenMapCacheData {
615      Coroutine *co;
616      uint8_t *buffer;
617  } XenMapCacheData;
618  
619  static void xen_invalidate_map_cache_entry_single(MapCache *mc, uint8_t *buffer)
620  {
621      mapcache_lock(mc);
622      xen_invalidate_map_cache_entry_unlocked(mc, buffer);
623      mapcache_unlock(mc);
624  }
625  
626  static void xen_invalidate_map_cache_entry_all(uint8_t *buffer)
627  {
628      xen_invalidate_map_cache_entry_single(mapcache, buffer);
629      xen_invalidate_map_cache_entry_single(mapcache_grants, buffer);
630  }
631  
632  static void xen_invalidate_map_cache_entry_bh(void *opaque)
633  {
634      XenMapCacheData *data = opaque;
635  
636      xen_invalidate_map_cache_entry_all(data->buffer);
637      aio_co_wake(data->co);
638  }
639  
640  void coroutine_mixed_fn xen_invalidate_map_cache_entry(uint8_t *buffer)
641  {
642      if (qemu_in_coroutine()) {
643          XenMapCacheData data = {
644              .co = qemu_coroutine_self(),
645              .buffer = buffer,
646          };
647          aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
648                                  xen_invalidate_map_cache_entry_bh, &data);
649          qemu_coroutine_yield();
650      } else {
651          xen_invalidate_map_cache_entry_all(buffer);
652      }
653  }
654  
655  static void xen_invalidate_map_cache_single(MapCache *mc)
656  {
657      unsigned long i;
658      MapCacheRev *reventry;
659  
660      mapcache_lock(mc);
661  
662      QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
663          if (!reventry->dma) {
664              continue;
665          }
666          trace_xen_invalidate_map_cache(reventry->paddr_index,
667                                         reventry->vaddr_req);
668      }
669  
670      for (i = 0; i < mc->nr_buckets; i++) {
671          MapCacheEntry *entry = &mc->entry[i];
672  
673          if (entry->vaddr_base == NULL) {
674              continue;
675          }
676          if (entry->lock > 0) {
677              continue;
678          }
679  
680          if (munmap(entry->vaddr_base, entry->size) != 0) {
681              perror("unmap fails");
682              exit(-1);
683          }
684  
685          entry->paddr_index = 0;
686          entry->vaddr_base = NULL;
687          entry->size = 0;
688          g_free(entry->valid_mapping);
689          entry->valid_mapping = NULL;
690      }
691  
692      mc->last_entry = NULL;
693  
694      mapcache_unlock(mc);
695  }
696  
697  void xen_invalidate_map_cache(void)
698  {
699      /* Flush pending AIO before destroying the mapcache */
700      bdrv_drain_all();
701  
702      xen_invalidate_map_cache_single(mapcache);
703      xen_invalidate_map_cache_single(mapcache_grants);
704  }
705  
706  static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc,
707                                                   hwaddr old_phys_addr,
708                                                   hwaddr new_phys_addr,
709                                                   hwaddr size)
710  {
711      MapCacheEntry *entry;
712      hwaddr address_index, address_offset;
713      hwaddr test_bit_size, cache_size = size;
714  
715      address_index  = old_phys_addr >> mc->bucket_shift;
716      address_offset = old_phys_addr & (mc->bucket_size - 1);
717  
718      assert(size);
719      /* test_bit_size is always a multiple of XC_PAGE_SIZE */
720      test_bit_size = size + (old_phys_addr & (XC_PAGE_SIZE - 1));
721      if (test_bit_size % XC_PAGE_SIZE) {
722          test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
723      }
724      cache_size = size + address_offset;
725      if (cache_size % mc->bucket_size) {
726          cache_size += mc->bucket_size - (cache_size % mc->bucket_size);
727      }
728  
729      entry = &mc->entry[address_index % mc->nr_buckets];
730      while (entry && !(entry->paddr_index == address_index &&
731                        entry->size == cache_size)) {
732          entry = entry->next;
733      }
734      if (!entry) {
735          trace_xen_replace_cache_entry_unlocked(old_phys_addr);
736          return NULL;
737      }
738  
739      assert((entry->flags & XEN_MAPCACHE_ENTRY_GRANT) == 0);
740  
741      address_index  = new_phys_addr >> mc->bucket_shift;
742      address_offset = new_phys_addr & (mc->bucket_size - 1);
743  
744      trace_xen_replace_cache_entry_dummy(old_phys_addr, new_phys_addr);
745  
746      xen_remap_bucket(mc, entry, entry->vaddr_base,
747                       cache_size, address_index, false,
748                       false, false, old_phys_addr);
749      if (!test_bits(address_offset >> XC_PAGE_SHIFT,
750                  test_bit_size >> XC_PAGE_SHIFT,
751                  entry->valid_mapping)) {
752          trace_xen_replace_cache_entry_unlocked_could_not_update_entry(
753              old_phys_addr
754          );
755          return NULL;
756      }
757  
758      return entry->vaddr_base + address_offset;
759  }
760  
761  uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr,
762                                   hwaddr new_phys_addr,
763                                   hwaddr size)
764  {
765      uint8_t *p;
766  
767      mapcache_lock(mapcache);
768      p = xen_replace_cache_entry_unlocked(mapcache, old_phys_addr,
769                                           new_phys_addr, size);
770      mapcache_unlock(mapcache);
771      return p;
772  }
773