xref: /openbmc/qemu/hw/virtio/vhost.c (revision 24602b77f5658ae8377958c15fdef2f44affc743)
16e790746SPaolo Bonzini /*
26e790746SPaolo Bonzini  * vhost support
36e790746SPaolo Bonzini  *
46e790746SPaolo Bonzini  * Copyright Red Hat, Inc. 2010
56e790746SPaolo Bonzini  *
66e790746SPaolo Bonzini  * Authors:
76e790746SPaolo Bonzini  *  Michael S. Tsirkin <mst@redhat.com>
86e790746SPaolo Bonzini  *
96e790746SPaolo Bonzini  * This work is licensed under the terms of the GNU GPL, version 2.  See
106e790746SPaolo Bonzini  * the COPYING file in the top-level directory.
116e790746SPaolo Bonzini  *
126e790746SPaolo Bonzini  * Contributions after 2012-01-13 are licensed under the terms of the
136e790746SPaolo Bonzini  * GNU GPL, version 2 or (at your option) any later version.
146e790746SPaolo Bonzini  */
156e790746SPaolo Bonzini 
169b8bfe21SPeter Maydell #include "qemu/osdep.h"
17da34e65cSMarkus Armbruster #include "qapi/error.h"
186e790746SPaolo Bonzini #include "hw/virtio/vhost.h"
195444e768SPaolo Bonzini #include "qemu/atomic.h"
206e790746SPaolo Bonzini #include "qemu/range.h"
2104b7a152SGreg Kurz #include "qemu/error-report.h"
2215324404SMarc-André Lureau #include "qemu/memfd.h"
23345cc1cbSJason Wang #include "qemu/log.h"
2418658a3cSPaolo Bonzini #include "standard-headers/linux/vhost_types.h"
251c819449SKONRAD Frederic #include "hw/virtio/virtio-bus.h"
26766aa0a6SDavid Hildenbrand #include "hw/mem/memory-device.h"
27795c40b8SJuan Quintela #include "migration/blocker.h"
28ca77ee28SMarkus Armbruster #include "migration/qemu-file-types.h"
29c471ad0eSJason Wang #include "sysemu/dma.h"
30aa3c40f6SDr. David Alan Gilbert #include "trace.h"
316e790746SPaolo Bonzini 
32162bba7fSMarc-André Lureau /* enabled until disconnected backend stabilizes */
33162bba7fSMarc-André Lureau #define _VHOST_DEBUG 1
34162bba7fSMarc-André Lureau 
35162bba7fSMarc-André Lureau #ifdef _VHOST_DEBUG
365d33ae4bSRoman Kagan #define VHOST_OPS_DEBUG(retval, fmt, ...) \
375d33ae4bSRoman Kagan     do { \
385d33ae4bSRoman Kagan         error_report(fmt ": %s (%d)", ## __VA_ARGS__, \
395d33ae4bSRoman Kagan                      strerror(-retval), -retval); \
405d33ae4bSRoman Kagan     } while (0)
41162bba7fSMarc-André Lureau #else
425d33ae4bSRoman Kagan #define VHOST_OPS_DEBUG(retval, fmt, ...) \
43162bba7fSMarc-André Lureau     do { } while (0)
44162bba7fSMarc-André Lureau #endif
45162bba7fSMarc-André Lureau 
4651d59a64SSi-Wei Liu static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX];
4751d59a64SSi-Wei Liu static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX];
48c5cd7e5fSSi-Wei Liu static QLIST_HEAD(, vhost_dev) vhost_log_devs[VHOST_BACKEND_TYPE_MAX];
49309750faSJason Wang 
50552b2522SDavid Hildenbrand /* Memslots used by backends that support private memslots (without an fd). */
512ce68e4cSIgor Mammedov static unsigned int used_memslots;
52552b2522SDavid Hildenbrand 
53552b2522SDavid Hildenbrand /* Memslots used by backends that only support shared memslots (with an fd). */
54552b2522SDavid Hildenbrand static unsigned int used_shared_memslots;
55552b2522SDavid Hildenbrand 
562ce68e4cSIgor Mammedov static QLIST_HEAD(, vhost_dev) vhost_devices =
572ce68e4cSIgor Mammedov     QLIST_HEAD_INITIALIZER(vhost_devices);
582ce68e4cSIgor Mammedov 
vhost_get_max_memslots(void)59cd89c065SDavid Hildenbrand unsigned int vhost_get_max_memslots(void)
60cd89c065SDavid Hildenbrand {
61cd89c065SDavid Hildenbrand     unsigned int max = UINT_MAX;
62cd89c065SDavid Hildenbrand     struct vhost_dev *hdev;
63cd89c065SDavid Hildenbrand 
64cd89c065SDavid Hildenbrand     QLIST_FOREACH(hdev, &vhost_devices, entry) {
65cd89c065SDavid Hildenbrand         max = MIN(max, hdev->vhost_ops->vhost_backend_memslots_limit(hdev));
66cd89c065SDavid Hildenbrand     }
67cd89c065SDavid Hildenbrand     return max;
68cd89c065SDavid Hildenbrand }
69cd89c065SDavid Hildenbrand 
vhost_get_free_memslots(void)708c49951cSDavid Hildenbrand unsigned int vhost_get_free_memslots(void)
712ce68e4cSIgor Mammedov {
72552b2522SDavid Hildenbrand     unsigned int free = UINT_MAX;
732ce68e4cSIgor Mammedov     struct vhost_dev *hdev;
742ce68e4cSIgor Mammedov 
752ce68e4cSIgor Mammedov     QLIST_FOREACH(hdev, &vhost_devices, entry) {
762ce68e4cSIgor Mammedov         unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
77552b2522SDavid Hildenbrand         unsigned int cur_free;
78552b2522SDavid Hildenbrand 
79552b2522SDavid Hildenbrand         if (hdev->vhost_ops->vhost_backend_no_private_memslots &&
80552b2522SDavid Hildenbrand             hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {
81552b2522SDavid Hildenbrand             cur_free = r - used_shared_memslots;
82552b2522SDavid Hildenbrand         } else {
83552b2522SDavid Hildenbrand             cur_free = r - used_memslots;
842ce68e4cSIgor Mammedov         }
85552b2522SDavid Hildenbrand         free = MIN(free, cur_free);
86552b2522SDavid Hildenbrand     }
878c49951cSDavid Hildenbrand     return free;
882ce68e4cSIgor Mammedov }
892ce68e4cSIgor Mammedov 
vhost_dev_sync_region(struct vhost_dev * dev,MemoryRegionSection * section,uint64_t mfirst,uint64_t mlast,uint64_t rfirst,uint64_t rlast)906e790746SPaolo Bonzini static void vhost_dev_sync_region(struct vhost_dev *dev,
916e790746SPaolo Bonzini                                   MemoryRegionSection *section,
926e790746SPaolo Bonzini                                   uint64_t mfirst, uint64_t mlast,
936e790746SPaolo Bonzini                                   uint64_t rfirst, uint64_t rlast)
946e790746SPaolo Bonzini {
95da318288SThomas Huth     vhost_log_chunk_t *dev_log = dev->log->log;
96309750faSJason Wang 
976e790746SPaolo Bonzini     uint64_t start = MAX(mfirst, rfirst);
986e790746SPaolo Bonzini     uint64_t end = MIN(mlast, rlast);
99da318288SThomas Huth     vhost_log_chunk_t *from = dev_log + start / VHOST_LOG_CHUNK;
100da318288SThomas Huth     vhost_log_chunk_t *to = dev_log + end / VHOST_LOG_CHUNK + 1;
10133c5793bSMarc-André Lureau     uint64_t addr = QEMU_ALIGN_DOWN(start, VHOST_LOG_CHUNK);
1026e790746SPaolo Bonzini 
1036e790746SPaolo Bonzini     if (end < start) {
1046e790746SPaolo Bonzini         return;
1056e790746SPaolo Bonzini     }
1066e790746SPaolo Bonzini     assert(end / VHOST_LOG_CHUNK < dev->log_size);
1076e790746SPaolo Bonzini     assert(start / VHOST_LOG_CHUNK < dev->log_size);
1086e790746SPaolo Bonzini 
1096e790746SPaolo Bonzini     for (;from < to; ++from) {
1106e790746SPaolo Bonzini         vhost_log_chunk_t log;
1116e790746SPaolo Bonzini         /* We first check with non-atomic: much cheaper,
1126e790746SPaolo Bonzini          * and we expect non-dirty to be the common case. */
1136e790746SPaolo Bonzini         if (!*from) {
1146e790746SPaolo Bonzini             addr += VHOST_LOG_CHUNK;
1156e790746SPaolo Bonzini             continue;
1166e790746SPaolo Bonzini         }
1175444e768SPaolo Bonzini         /* Data must be read atomically. We don't really need barrier semantics
1185444e768SPaolo Bonzini          * but it's easier to use atomic_* than roll our own. */
119d73415a3SStefan Hajnoczi         log = qatomic_xchg(from, 0);
120747eb78bSNatanael Copa         while (log) {
121747eb78bSNatanael Copa             int bit = ctzl(log);
1226e790746SPaolo Bonzini             hwaddr page_addr;
1236e790746SPaolo Bonzini             hwaddr section_offset;
1246e790746SPaolo Bonzini             hwaddr mr_offset;
1256e790746SPaolo Bonzini             page_addr = addr + bit * VHOST_LOG_PAGE;
1266e790746SPaolo Bonzini             section_offset = page_addr - section->offset_within_address_space;
1276e790746SPaolo Bonzini             mr_offset = section_offset + section->offset_within_region;
1286e790746SPaolo Bonzini             memory_region_set_dirty(section->mr, mr_offset, VHOST_LOG_PAGE);
1296e790746SPaolo Bonzini             log &= ~(0x1ull << bit);
1306e790746SPaolo Bonzini         }
1316e790746SPaolo Bonzini         addr += VHOST_LOG_CHUNK;
1326e790746SPaolo Bonzini     }
1336e790746SPaolo Bonzini }
1346e790746SPaolo Bonzini 
vhost_dev_has_iommu(struct vhost_dev * dev)13574b5d2b5SCindy Lu bool vhost_dev_has_iommu(struct vhost_dev *dev)
136345cc1cbSJason Wang {
137345cc1cbSJason Wang     VirtIODevice *vdev = dev->vdev;
138345cc1cbSJason Wang 
139345cc1cbSJason Wang     /*
140345cc1cbSJason Wang      * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
141345cc1cbSJason Wang      * incremental memory mapping API via IOTLB API. For platform that
142345cc1cbSJason Wang      * does not have IOMMU, there's no need to enable this feature
143345cc1cbSJason Wang      * which may cause unnecessary IOTLB miss/update transactions.
144345cc1cbSJason Wang      */
145345cc1cbSJason Wang     if (vdev) {
146345cc1cbSJason Wang         return virtio_bus_device_iommu_enabled(vdev) &&
147345cc1cbSJason Wang             virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
148345cc1cbSJason Wang     } else {
149345cc1cbSJason Wang         return false;
150345cc1cbSJason Wang     }
151345cc1cbSJason Wang }
152345cc1cbSJason Wang 
vhost_dev_should_log(struct vhost_dev * dev)153c5cd7e5fSSi-Wei Liu static inline bool vhost_dev_should_log(struct vhost_dev *dev)
154c5cd7e5fSSi-Wei Liu {
155c5cd7e5fSSi-Wei Liu     assert(dev->vhost_ops);
156c5cd7e5fSSi-Wei Liu     assert(dev->vhost_ops->backend_type > VHOST_BACKEND_TYPE_NONE);
157c5cd7e5fSSi-Wei Liu     assert(dev->vhost_ops->backend_type < VHOST_BACKEND_TYPE_MAX);
158c5cd7e5fSSi-Wei Liu 
159c5cd7e5fSSi-Wei Liu     return dev == QLIST_FIRST(&vhost_log_devs[dev->vhost_ops->backend_type]);
160c5cd7e5fSSi-Wei Liu }
161c5cd7e5fSSi-Wei Liu 
vhost_dev_elect_mem_logger(struct vhost_dev * hdev,bool add)162c5cd7e5fSSi-Wei Liu static inline void vhost_dev_elect_mem_logger(struct vhost_dev *hdev, bool add)
163c5cd7e5fSSi-Wei Liu {
164c5cd7e5fSSi-Wei Liu     VhostBackendType backend_type;
165c5cd7e5fSSi-Wei Liu 
166c5cd7e5fSSi-Wei Liu     assert(hdev->vhost_ops);
167c5cd7e5fSSi-Wei Liu 
168c5cd7e5fSSi-Wei Liu     backend_type = hdev->vhost_ops->backend_type;
169c5cd7e5fSSi-Wei Liu     assert(backend_type > VHOST_BACKEND_TYPE_NONE);
170c5cd7e5fSSi-Wei Liu     assert(backend_type < VHOST_BACKEND_TYPE_MAX);
171c5cd7e5fSSi-Wei Liu 
172c5cd7e5fSSi-Wei Liu     if (add && !QLIST_IS_INSERTED(hdev, logdev_entry)) {
173c5cd7e5fSSi-Wei Liu         if (QLIST_EMPTY(&vhost_log_devs[backend_type])) {
174c5cd7e5fSSi-Wei Liu             QLIST_INSERT_HEAD(&vhost_log_devs[backend_type],
175c5cd7e5fSSi-Wei Liu                               hdev, logdev_entry);
176c5cd7e5fSSi-Wei Liu         } else {
177c5cd7e5fSSi-Wei Liu             /*
178c5cd7e5fSSi-Wei Liu              * The first vhost_device in the list is selected as the shared
179c5cd7e5fSSi-Wei Liu              * logger to scan memory sections. Put new entry next to the head
180c5cd7e5fSSi-Wei Liu              * to avoid inadvertent change to the underlying logger device.
181c5cd7e5fSSi-Wei Liu              * This is done in order to get better cache locality and to avoid
182c5cd7e5fSSi-Wei Liu              * performance churn on the hot path for log scanning. Even when
183c5cd7e5fSSi-Wei Liu              * new devices come and go quickly, it wouldn't end up changing
184c5cd7e5fSSi-Wei Liu              * the active leading logger device at all.
185c5cd7e5fSSi-Wei Liu              */
186c5cd7e5fSSi-Wei Liu             QLIST_INSERT_AFTER(QLIST_FIRST(&vhost_log_devs[backend_type]),
187c5cd7e5fSSi-Wei Liu                                hdev, logdev_entry);
188c5cd7e5fSSi-Wei Liu         }
189c5cd7e5fSSi-Wei Liu     } else if (!add && QLIST_IS_INSERTED(hdev, logdev_entry)) {
190c5cd7e5fSSi-Wei Liu         QLIST_REMOVE(hdev, logdev_entry);
191c5cd7e5fSSi-Wei Liu     }
192c5cd7e5fSSi-Wei Liu }
193c5cd7e5fSSi-Wei Liu 
vhost_sync_dirty_bitmap(struct vhost_dev * dev,MemoryRegionSection * section,hwaddr first,hwaddr last)1946e790746SPaolo Bonzini static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
1956e790746SPaolo Bonzini                                    MemoryRegionSection *section,
1966e790746SPaolo Bonzini                                    hwaddr first,
1976e790746SPaolo Bonzini                                    hwaddr last)
1986e790746SPaolo Bonzini {
1996e790746SPaolo Bonzini     int i;
2006e790746SPaolo Bonzini     hwaddr start_addr;
2016e790746SPaolo Bonzini     hwaddr end_addr;
2026e790746SPaolo Bonzini 
2036e790746SPaolo Bonzini     if (!dev->log_enabled || !dev->started) {
2046e790746SPaolo Bonzini         return 0;
2056e790746SPaolo Bonzini     }
2066e790746SPaolo Bonzini     start_addr = section->offset_within_address_space;
207052e87b0SPaolo Bonzini     end_addr = range_get_last(start_addr, int128_get64(section->size));
2086e790746SPaolo Bonzini     start_addr = MAX(first, start_addr);
2096e790746SPaolo Bonzini     end_addr = MIN(last, end_addr);
2106e790746SPaolo Bonzini 
211c5cd7e5fSSi-Wei Liu     if (vhost_dev_should_log(dev)) {
2126e790746SPaolo Bonzini         for (i = 0; i < dev->mem->nregions; ++i) {
2136e790746SPaolo Bonzini             struct vhost_memory_region *reg = dev->mem->regions + i;
2146e790746SPaolo Bonzini             vhost_dev_sync_region(dev, section, start_addr, end_addr,
2156e790746SPaolo Bonzini                                   reg->guest_phys_addr,
2166e790746SPaolo Bonzini                                   range_get_last(reg->guest_phys_addr,
2176e790746SPaolo Bonzini                                                  reg->memory_size));
2186e790746SPaolo Bonzini         }
219c5cd7e5fSSi-Wei Liu     }
2206e790746SPaolo Bonzini     for (i = 0; i < dev->nvqs; ++i) {
2216e790746SPaolo Bonzini         struct vhost_virtqueue *vq = dev->vqs + i;
222240e647aSLi Hangjing 
223240e647aSLi Hangjing         if (!vq->used_phys && !vq->used_size) {
224240e647aSLi Hangjing             continue;
225240e647aSLi Hangjing         }
226240e647aSLi Hangjing 
227345cc1cbSJason Wang         if (vhost_dev_has_iommu(dev)) {
228345cc1cbSJason Wang             IOMMUTLBEntry iotlb;
229345cc1cbSJason Wang             hwaddr used_phys = vq->used_phys, used_size = vq->used_size;
230345cc1cbSJason Wang             hwaddr phys, s, offset;
231345cc1cbSJason Wang 
232345cc1cbSJason Wang             while (used_size) {
233345cc1cbSJason Wang                 rcu_read_lock();
234345cc1cbSJason Wang                 iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
235345cc1cbSJason Wang                                                       used_phys,
236345cc1cbSJason Wang                                                       true,
237345cc1cbSJason Wang                                                       MEMTXATTRS_UNSPECIFIED);
238345cc1cbSJason Wang                 rcu_read_unlock();
239345cc1cbSJason Wang 
240345cc1cbSJason Wang                 if (!iotlb.target_as) {
241345cc1cbSJason Wang                     qemu_log_mask(LOG_GUEST_ERROR, "translation "
242345cc1cbSJason Wang                                   "failure for used_iova %"PRIx64"\n",
243345cc1cbSJason Wang                                   used_phys);
244345cc1cbSJason Wang                     return -EINVAL;
245345cc1cbSJason Wang                 }
246345cc1cbSJason Wang 
247345cc1cbSJason Wang                 offset = used_phys & iotlb.addr_mask;
248345cc1cbSJason Wang                 phys = iotlb.translated_addr + offset;
249345cc1cbSJason Wang 
250345cc1cbSJason Wang                 /*
251345cc1cbSJason Wang                  * Distance from start of used ring until last byte of
252345cc1cbSJason Wang                  * IOMMU page.
253345cc1cbSJason Wang                  */
254345cc1cbSJason Wang                 s = iotlb.addr_mask - offset;
255345cc1cbSJason Wang                 /*
256345cc1cbSJason Wang                  * Size of used ring, or of the part of it until end
257345cc1cbSJason Wang                  * of IOMMU page. To avoid zero result, do the adding
258345cc1cbSJason Wang                  * outside of MIN().
259345cc1cbSJason Wang                  */
260345cc1cbSJason Wang                 s = MIN(s, used_size - 1) + 1;
261345cc1cbSJason Wang 
262345cc1cbSJason Wang                 vhost_dev_sync_region(dev, section, start_addr, end_addr, phys,
263345cc1cbSJason Wang                                       range_get_last(phys, s));
264345cc1cbSJason Wang                 used_size -= s;
265345cc1cbSJason Wang                 used_phys += s;
266345cc1cbSJason Wang             }
267345cc1cbSJason Wang         } else {
268345cc1cbSJason Wang             vhost_dev_sync_region(dev, section, start_addr,
269345cc1cbSJason Wang                                   end_addr, vq->used_phys,
2706e790746SPaolo Bonzini                                   range_get_last(vq->used_phys, vq->used_size));
2716e790746SPaolo Bonzini         }
272345cc1cbSJason Wang     }
2736e790746SPaolo Bonzini     return 0;
2746e790746SPaolo Bonzini }
2756e790746SPaolo Bonzini 
vhost_log_sync(MemoryListener * listener,MemoryRegionSection * section)2766e790746SPaolo Bonzini static void vhost_log_sync(MemoryListener *listener,
2776e790746SPaolo Bonzini                           MemoryRegionSection *section)
2786e790746SPaolo Bonzini {
2796e790746SPaolo Bonzini     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
2806e790746SPaolo Bonzini                                          memory_listener);
2816e790746SPaolo Bonzini     vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL);
2826e790746SPaolo Bonzini }
2836e790746SPaolo Bonzini 
vhost_log_sync_range(struct vhost_dev * dev,hwaddr first,hwaddr last)2846e790746SPaolo Bonzini static void vhost_log_sync_range(struct vhost_dev *dev,
2856e790746SPaolo Bonzini                                  hwaddr first, hwaddr last)
2866e790746SPaolo Bonzini {
2876e790746SPaolo Bonzini     int i;
2886e790746SPaolo Bonzini     /* FIXME: this is N^2 in number of sections */
2896e790746SPaolo Bonzini     for (i = 0; i < dev->n_mem_sections; ++i) {
2906e790746SPaolo Bonzini         MemoryRegionSection *section = &dev->mem_sections[i];
2916e790746SPaolo Bonzini         vhost_sync_dirty_bitmap(dev, section, first, last);
2926e790746SPaolo Bonzini     }
2936e790746SPaolo Bonzini }
2946e790746SPaolo Bonzini 
vhost_get_log_size(struct vhost_dev * dev)2956e790746SPaolo Bonzini static uint64_t vhost_get_log_size(struct vhost_dev *dev)
2966e790746SPaolo Bonzini {
2976e790746SPaolo Bonzini     uint64_t log_size = 0;
2986e790746SPaolo Bonzini     int i;
2996e790746SPaolo Bonzini     for (i = 0; i < dev->mem->nregions; ++i) {
3006e790746SPaolo Bonzini         struct vhost_memory_region *reg = dev->mem->regions + i;
3016e790746SPaolo Bonzini         uint64_t last = range_get_last(reg->guest_phys_addr,
3026e790746SPaolo Bonzini                                        reg->memory_size);
3036e790746SPaolo Bonzini         log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
3046e790746SPaolo Bonzini     }
3056e790746SPaolo Bonzini     return log_size;
3066e790746SPaolo Bonzini }
30715324404SMarc-André Lureau 
vhost_set_backend_type(struct vhost_dev * dev,VhostBackendType backend_type)3089b1d929aSTiberiu Georgescu static int vhost_set_backend_type(struct vhost_dev *dev,
3099b1d929aSTiberiu Georgescu                                   VhostBackendType backend_type)
3109b1d929aSTiberiu Georgescu {
3119b1d929aSTiberiu Georgescu     int r = 0;
3129b1d929aSTiberiu Georgescu 
3139b1d929aSTiberiu Georgescu     switch (backend_type) {
3149b1d929aSTiberiu Georgescu #ifdef CONFIG_VHOST_KERNEL
3159b1d929aSTiberiu Georgescu     case VHOST_BACKEND_TYPE_KERNEL:
3169b1d929aSTiberiu Georgescu         dev->vhost_ops = &kernel_ops;
3179b1d929aSTiberiu Georgescu         break;
3189b1d929aSTiberiu Georgescu #endif
3199b1d929aSTiberiu Georgescu #ifdef CONFIG_VHOST_USER
3209b1d929aSTiberiu Georgescu     case VHOST_BACKEND_TYPE_USER:
3219b1d929aSTiberiu Georgescu         dev->vhost_ops = &user_ops;
3229b1d929aSTiberiu Georgescu         break;
3239b1d929aSTiberiu Georgescu #endif
3249b1d929aSTiberiu Georgescu #ifdef CONFIG_VHOST_VDPA
3259b1d929aSTiberiu Georgescu     case VHOST_BACKEND_TYPE_VDPA:
3269b1d929aSTiberiu Georgescu         dev->vhost_ops = &vdpa_ops;
3279b1d929aSTiberiu Georgescu         break;
3289b1d929aSTiberiu Georgescu #endif
3299b1d929aSTiberiu Georgescu     default:
3309b1d929aSTiberiu Georgescu         error_report("Unknown vhost backend type");
3319b1d929aSTiberiu Georgescu         r = -1;
3329b1d929aSTiberiu Georgescu     }
3339b1d929aSTiberiu Georgescu 
33451d59a64SSi-Wei Liu     if (r == 0) {
33551d59a64SSi-Wei Liu         assert(dev->vhost_ops->backend_type == backend_type);
33651d59a64SSi-Wei Liu     }
33751d59a64SSi-Wei Liu 
3389b1d929aSTiberiu Georgescu     return r;
3399b1d929aSTiberiu Georgescu }
3409b1d929aSTiberiu Georgescu 
vhost_log_alloc(uint64_t size,bool share)34115324404SMarc-André Lureau static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
342309750faSJason Wang {
3430f2956f9SMarc-André Lureau     Error *err = NULL;
34415324404SMarc-André Lureau     struct vhost_log *log;
34515324404SMarc-André Lureau     uint64_t logsize = size * sizeof(*(log->log));
34615324404SMarc-André Lureau     int fd = -1;
34715324404SMarc-André Lureau 
34815324404SMarc-André Lureau     log = g_new0(struct vhost_log, 1);
34915324404SMarc-André Lureau     if (share) {
35015324404SMarc-André Lureau         log->log = qemu_memfd_alloc("vhost-log", logsize,
35115324404SMarc-André Lureau                                     F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
3520f2956f9SMarc-André Lureau                                     &fd, &err);
3530f2956f9SMarc-André Lureau         if (err) {
3540f2956f9SMarc-André Lureau             error_report_err(err);
3550f2956f9SMarc-André Lureau             g_free(log);
3560f2956f9SMarc-André Lureau             return NULL;
3570f2956f9SMarc-André Lureau         }
35815324404SMarc-André Lureau         memset(log->log, 0, logsize);
35915324404SMarc-André Lureau     } else {
36015324404SMarc-André Lureau         log->log = g_malloc0(logsize);
36115324404SMarc-André Lureau     }
362309750faSJason Wang 
363309750faSJason Wang     log->size = size;
364309750faSJason Wang     log->refcnt = 1;
36515324404SMarc-André Lureau     log->fd = fd;
366309750faSJason Wang 
367309750faSJason Wang     return log;
368309750faSJason Wang }
369309750faSJason Wang 
vhost_log_get(VhostBackendType backend_type,uint64_t size,bool share)37051d59a64SSi-Wei Liu static struct vhost_log *vhost_log_get(VhostBackendType backend_type,
37151d59a64SSi-Wei Liu                                        uint64_t size, bool share)
372309750faSJason Wang {
37351d59a64SSi-Wei Liu     struct vhost_log *log;
37451d59a64SSi-Wei Liu 
37551d59a64SSi-Wei Liu     assert(backend_type > VHOST_BACKEND_TYPE_NONE);
37651d59a64SSi-Wei Liu     assert(backend_type < VHOST_BACKEND_TYPE_MAX);
37751d59a64SSi-Wei Liu 
37851d59a64SSi-Wei Liu     log = share ? vhost_log_shm[backend_type] : vhost_log[backend_type];
37915324404SMarc-André Lureau 
38015324404SMarc-André Lureau     if (!log || log->size != size) {
38115324404SMarc-André Lureau         log = vhost_log_alloc(size, share);
38215324404SMarc-André Lureau         if (share) {
38351d59a64SSi-Wei Liu             vhost_log_shm[backend_type] = log;
384309750faSJason Wang         } else {
38551d59a64SSi-Wei Liu             vhost_log[backend_type] = log;
38615324404SMarc-André Lureau         }
38715324404SMarc-André Lureau     } else {
38815324404SMarc-André Lureau         ++log->refcnt;
389309750faSJason Wang     }
390309750faSJason Wang 
39115324404SMarc-André Lureau     return log;
392309750faSJason Wang }
393309750faSJason Wang 
vhost_log_put(struct vhost_dev * dev,bool sync)394309750faSJason Wang static void vhost_log_put(struct vhost_dev *dev, bool sync)
395309750faSJason Wang {
396309750faSJason Wang     struct vhost_log *log = dev->log;
39751d59a64SSi-Wei Liu     VhostBackendType backend_type;
398309750faSJason Wang 
399309750faSJason Wang     if (!log) {
400309750faSJason Wang         return;
401309750faSJason Wang     }
402309750faSJason Wang 
40351d59a64SSi-Wei Liu     assert(dev->vhost_ops);
40451d59a64SSi-Wei Liu     backend_type = dev->vhost_ops->backend_type;
40551d59a64SSi-Wei Liu 
40651d59a64SSi-Wei Liu     if (backend_type == VHOST_BACKEND_TYPE_NONE ||
40751d59a64SSi-Wei Liu         backend_type >= VHOST_BACKEND_TYPE_MAX) {
40851d59a64SSi-Wei Liu         return;
40951d59a64SSi-Wei Liu     }
41051d59a64SSi-Wei Liu 
411309750faSJason Wang     --log->refcnt;
412309750faSJason Wang     if (log->refcnt == 0) {
413309750faSJason Wang         /* Sync only the range covered by the old log */
414309750faSJason Wang         if (dev->log_size && sync) {
415309750faSJason Wang             vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
416309750faSJason Wang         }
41715324404SMarc-André Lureau 
41851d59a64SSi-Wei Liu         if (vhost_log[backend_type] == log) {
41915324404SMarc-André Lureau             g_free(log->log);
42051d59a64SSi-Wei Liu             vhost_log[backend_type] = NULL;
42151d59a64SSi-Wei Liu         } else if (vhost_log_shm[backend_type] == log) {
42215324404SMarc-André Lureau             qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
42315324404SMarc-André Lureau                             log->fd);
42451d59a64SSi-Wei Liu             vhost_log_shm[backend_type] = NULL;
425309750faSJason Wang         }
42615324404SMarc-André Lureau 
427309750faSJason Wang         g_free(log);
428309750faSJason Wang     }
4295c0ba1beSFelipe Franciosi 
430c5cd7e5fSSi-Wei Liu     vhost_dev_elect_mem_logger(dev, false);
4315c0ba1beSFelipe Franciosi     dev->log = NULL;
4325c0ba1beSFelipe Franciosi     dev->log_size = 0;
433309750faSJason Wang }
4346e790746SPaolo Bonzini 
vhost_dev_log_is_shared(struct vhost_dev * dev)43515324404SMarc-André Lureau static bool vhost_dev_log_is_shared(struct vhost_dev *dev)
43615324404SMarc-André Lureau {
43715324404SMarc-André Lureau     return dev->vhost_ops->vhost_requires_shm_log &&
43815324404SMarc-André Lureau            dev->vhost_ops->vhost_requires_shm_log(dev);
43915324404SMarc-André Lureau }
44015324404SMarc-André Lureau 
vhost_dev_log_resize(struct vhost_dev * dev,uint64_t size)4416e790746SPaolo Bonzini static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
4426e790746SPaolo Bonzini {
44351d59a64SSi-Wei Liu     struct vhost_log *log = vhost_log_get(dev->vhost_ops->backend_type,
44451d59a64SSi-Wei Liu                                           size, vhost_dev_log_is_shared(dev));
445309750faSJason Wang     uint64_t log_base = (uintptr_t)log->log;
4466e790746SPaolo Bonzini     int r;
4476e790746SPaolo Bonzini 
448636f4dddSMarc-André Lureau     /* inform backend of log switching, this must be done before
449636f4dddSMarc-André Lureau        releasing the current log, to ensure no logging is lost */
4509a78a5ddSMarc-André Lureau     r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log);
451162bba7fSMarc-André Lureau     if (r < 0) {
4525d33ae4bSRoman Kagan         VHOST_OPS_DEBUG(r, "vhost_set_log_base failed");
453162bba7fSMarc-André Lureau     }
454162bba7fSMarc-André Lureau 
455309750faSJason Wang     vhost_log_put(dev, true);
4566e790746SPaolo Bonzini     dev->log = log;
4576e790746SPaolo Bonzini     dev->log_size = size;
4586e790746SPaolo Bonzini }
4596e790746SPaolo Bonzini 
vhost_memory_map(struct vhost_dev * dev,hwaddr addr,hwaddr * plen,bool is_write)460c471ad0eSJason Wang static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
461b897a474SPhilippe Mathieu-Daudé                               hwaddr *plen, bool is_write)
462c471ad0eSJason Wang {
463c471ad0eSJason Wang     if (!vhost_dev_has_iommu(dev)) {
464c471ad0eSJason Wang         return cpu_physical_memory_map(addr, plen, is_write);
465c471ad0eSJason Wang     } else {
466c471ad0eSJason Wang         return (void *)(uintptr_t)addr;
467c471ad0eSJason Wang     }
468c471ad0eSJason Wang }
469c471ad0eSJason Wang 
vhost_memory_unmap(struct vhost_dev * dev,void * buffer,hwaddr len,int is_write,hwaddr access_len)470c471ad0eSJason Wang static void vhost_memory_unmap(struct vhost_dev *dev, void *buffer,
471c471ad0eSJason Wang                                hwaddr len, int is_write,
472c471ad0eSJason Wang                                hwaddr access_len)
473c471ad0eSJason Wang {
474c471ad0eSJason Wang     if (!vhost_dev_has_iommu(dev)) {
475c471ad0eSJason Wang         cpu_physical_memory_unmap(buffer, len, is_write, access_len);
476c471ad0eSJason Wang     }
477c471ad0eSJason Wang }
478c471ad0eSJason Wang 
vhost_verify_ring_part_mapping(void * ring_hva,uint64_t ring_gpa,uint64_t ring_size,void * reg_hva,uint64_t reg_gpa,uint64_t reg_size)4790ca1fd2dSDr. David Alan Gilbert static int vhost_verify_ring_part_mapping(void *ring_hva,
4800ca1fd2dSDr. David Alan Gilbert                                           uint64_t ring_gpa,
4810ca1fd2dSDr. David Alan Gilbert                                           uint64_t ring_size,
4820ca1fd2dSDr. David Alan Gilbert                                           void *reg_hva,
4830ca1fd2dSDr. David Alan Gilbert                                           uint64_t reg_gpa,
4840ca1fd2dSDr. David Alan Gilbert                                           uint64_t reg_size)
485f1f9e6c5SGreg Kurz {
4860ca1fd2dSDr. David Alan Gilbert     uint64_t hva_ring_offset;
4870ca1fd2dSDr. David Alan Gilbert     uint64_t ring_last = range_get_last(ring_gpa, ring_size);
4880ca1fd2dSDr. David Alan Gilbert     uint64_t reg_last = range_get_last(reg_gpa, reg_size);
489f1f9e6c5SGreg Kurz 
4900ca1fd2dSDr. David Alan Gilbert     if (ring_last < reg_gpa || ring_gpa > reg_last) {
491f1f9e6c5SGreg Kurz         return 0;
492f1f9e6c5SGreg Kurz     }
4930ca1fd2dSDr. David Alan Gilbert     /* check that whole ring's is mapped */
4940ca1fd2dSDr. David Alan Gilbert     if (ring_last > reg_last) {
4950ca1fd2dSDr. David Alan Gilbert         return -ENOMEM;
496f1f9e6c5SGreg Kurz     }
4970ca1fd2dSDr. David Alan Gilbert     /* check that ring's MemoryRegion wasn't replaced */
4980ca1fd2dSDr. David Alan Gilbert     hva_ring_offset = ring_gpa - reg_gpa;
4990ca1fd2dSDr. David Alan Gilbert     if (ring_hva != reg_hva + hva_ring_offset) {
5000ca1fd2dSDr. David Alan Gilbert         return -EBUSY;
501f1f9e6c5SGreg Kurz     }
5020ca1fd2dSDr. David Alan Gilbert 
5030ca1fd2dSDr. David Alan Gilbert     return 0;
504f1f9e6c5SGreg Kurz }
505f1f9e6c5SGreg Kurz 
vhost_verify_ring_mappings(struct vhost_dev * dev,void * reg_hva,uint64_t reg_gpa,uint64_t reg_size)5066e790746SPaolo Bonzini static int vhost_verify_ring_mappings(struct vhost_dev *dev,
5070ca1fd2dSDr. David Alan Gilbert                                       void *reg_hva,
5080ca1fd2dSDr. David Alan Gilbert                                       uint64_t reg_gpa,
5090ca1fd2dSDr. David Alan Gilbert                                       uint64_t reg_size)
5106e790746SPaolo Bonzini {
511f1f9e6c5SGreg Kurz     int i, j;
5128617343fSMichael S. Tsirkin     int r = 0;
513f1f9e6c5SGreg Kurz     const char *part_name[] = {
514f1f9e6c5SGreg Kurz         "descriptor table",
515f1f9e6c5SGreg Kurz         "available ring",
516f1f9e6c5SGreg Kurz         "used ring"
517f1f9e6c5SGreg Kurz     };
5188617343fSMichael S. Tsirkin 
519aebbdbeeSJason Wang     if (vhost_dev_has_iommu(dev)) {
520aebbdbeeSJason Wang         return 0;
521aebbdbeeSJason Wang     }
522aebbdbeeSJason Wang 
523f1f9e6c5SGreg Kurz     for (i = 0; i < dev->nvqs; ++i) {
5246e790746SPaolo Bonzini         struct vhost_virtqueue *vq = dev->vqs + i;
5256e790746SPaolo Bonzini 
526fb20fbb7SJia He         if (vq->desc_phys == 0) {
527fb20fbb7SJia He             continue;
528fb20fbb7SJia He         }
529fb20fbb7SJia He 
530f1f9e6c5SGreg Kurz         j = 0;
5310ca1fd2dSDr. David Alan Gilbert         r = vhost_verify_ring_part_mapping(
5320ca1fd2dSDr. David Alan Gilbert                 vq->desc, vq->desc_phys, vq->desc_size,
5330ca1fd2dSDr. David Alan Gilbert                 reg_hva, reg_gpa, reg_size);
5342fe45ec3SGreg Kurz         if (r) {
535f1f9e6c5SGreg Kurz             break;
5366e790746SPaolo Bonzini         }
537f1f9e6c5SGreg Kurz 
538f1f9e6c5SGreg Kurz         j++;
5390ca1fd2dSDr. David Alan Gilbert         r = vhost_verify_ring_part_mapping(
5409fac50c8SJia He                 vq->avail, vq->avail_phys, vq->avail_size,
5410ca1fd2dSDr. David Alan Gilbert                 reg_hva, reg_gpa, reg_size);
5422fe45ec3SGreg Kurz         if (r) {
543f1f9e6c5SGreg Kurz             break;
5446e790746SPaolo Bonzini         }
545f1f9e6c5SGreg Kurz 
546f1f9e6c5SGreg Kurz         j++;
5470ca1fd2dSDr. David Alan Gilbert         r = vhost_verify_ring_part_mapping(
5489fac50c8SJia He                 vq->used, vq->used_phys, vq->used_size,
5490ca1fd2dSDr. David Alan Gilbert                 reg_hva, reg_gpa, reg_size);
5502fe45ec3SGreg Kurz         if (r) {
551f1f9e6c5SGreg Kurz             break;
5526e790746SPaolo Bonzini         }
553f1f9e6c5SGreg Kurz     }
554f1f9e6c5SGreg Kurz 
555f1f9e6c5SGreg Kurz     if (r == -ENOMEM) {
556f1f9e6c5SGreg Kurz         error_report("Unable to map %s for ring %d", part_name[j], i);
557f1f9e6c5SGreg Kurz     } else if (r == -EBUSY) {
558f1f9e6c5SGreg Kurz         error_report("%s relocated for ring %d", part_name[j], i);
5596e790746SPaolo Bonzini     }
5608617343fSMichael S. Tsirkin     return r;
5616e790746SPaolo Bonzini }
5626e790746SPaolo Bonzini 
563083b9bd7SAlex Bennée /*
564083b9bd7SAlex Bennée  * vhost_section: identify sections needed for vhost access
565083b9bd7SAlex Bennée  *
566083b9bd7SAlex Bennée  * We only care about RAM sections here (where virtqueue and guest
567552b2522SDavid Hildenbrand  * internals accessed by virtio might live).
568083b9bd7SAlex Bennée  */
vhost_section(struct vhost_dev * dev,MemoryRegionSection * section)569988a2775STiwei Bie static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)
570af603142SNicholas Bellinger {
571083b9bd7SAlex Bennée     MemoryRegion *mr = section->mr;
572aa3c40f6SDr. David Alan Gilbert 
573083b9bd7SAlex Bennée     if (memory_region_is_ram(mr) && !memory_region_is_rom(mr)) {
574083b9bd7SAlex Bennée         uint8_t dirty_mask = memory_region_get_dirty_log_mask(mr);
575083b9bd7SAlex Bennée         uint8_t handled_dirty;
576083b9bd7SAlex Bennée 
577083b9bd7SAlex Bennée         /*
578083b9bd7SAlex Bennée          * Kernel based vhost doesn't handle any block which is doing
579083b9bd7SAlex Bennée          * dirty-tracking other than migration for which it has
580083b9bd7SAlex Bennée          * specific logging support. However for TCG the kernel never
581083b9bd7SAlex Bennée          * gets involved anyway so we can also ignore it's
582083b9bd7SAlex Bennée          * self-modiying code detection flags. However a vhost-user
583083b9bd7SAlex Bennée          * client could still confuse a TCG guest if it re-writes
584083b9bd7SAlex Bennée          * executable memory that has already been translated.
585aa3c40f6SDr. David Alan Gilbert          */
586083b9bd7SAlex Bennée         handled_dirty = (1 << DIRTY_MEMORY_MIGRATION) |
587083b9bd7SAlex Bennée             (1 << DIRTY_MEMORY_CODE);
588aa3c40f6SDr. David Alan Gilbert 
589083b9bd7SAlex Bennée         if (dirty_mask & ~handled_dirty) {
590083b9bd7SAlex Bennée             trace_vhost_reject_section(mr->name, 1);
591083b9bd7SAlex Bennée             return false;
592988a2775STiwei Bie         }
593988a2775STiwei Bie 
594552b2522SDavid Hildenbrand         /*
595552b2522SDavid Hildenbrand          * Some backends (like vhost-user) can only handle memory regions
596552b2522SDavid Hildenbrand          * that have an fd (can be mapped into a different process). Filter
597552b2522SDavid Hildenbrand          * the ones without an fd out, if requested.
598552b2522SDavid Hildenbrand          *
599552b2522SDavid Hildenbrand          * TODO: we might have to limit to MAP_SHARED as well.
600552b2522SDavid Hildenbrand          */
601552b2522SDavid Hildenbrand         if (memory_region_get_fd(section->mr) < 0 &&
602552b2522SDavid Hildenbrand             dev->vhost_ops->vhost_backend_no_private_memslots &&
603552b2522SDavid Hildenbrand             dev->vhost_ops->vhost_backend_no_private_memslots(dev)) {
604083b9bd7SAlex Bennée             trace_vhost_reject_section(mr->name, 2);
605083b9bd7SAlex Bennée             return false;
606083b9bd7SAlex Bennée         }
607083b9bd7SAlex Bennée 
608083b9bd7SAlex Bennée         trace_vhost_section(mr->name);
609083b9bd7SAlex Bennée         return true;
610083b9bd7SAlex Bennée     } else {
611083b9bd7SAlex Bennée         trace_vhost_reject_section(mr->name, 3);
612083b9bd7SAlex Bennée         return false;
613083b9bd7SAlex Bennée     }
614af603142SNicholas Bellinger }
615af603142SNicholas Bellinger 
vhost_begin(MemoryListener * listener)616af603142SNicholas Bellinger static void vhost_begin(MemoryListener *listener)
617af603142SNicholas Bellinger {
618af603142SNicholas Bellinger     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
619af603142SNicholas Bellinger                                          memory_listener);
620c44317efSDr. David Alan Gilbert     dev->tmp_sections = NULL;
621c44317efSDr. David Alan Gilbert     dev->n_tmp_sections = 0;
622af603142SNicholas Bellinger }
623af603142SNicholas Bellinger 
vhost_commit(MemoryListener * listener)624af603142SNicholas Bellinger static void vhost_commit(MemoryListener *listener)
625af603142SNicholas Bellinger {
626af603142SNicholas Bellinger     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
627af603142SNicholas Bellinger                                          memory_listener);
628c44317efSDr. David Alan Gilbert     MemoryRegionSection *old_sections;
629c44317efSDr. David Alan Gilbert     int n_old_sections;
630af603142SNicholas Bellinger     uint64_t log_size;
631ade6d081SDr. David Alan Gilbert     size_t regions_size;
632af603142SNicholas Bellinger     int r;
6330ca1fd2dSDr. David Alan Gilbert     int i;
634ade6d081SDr. David Alan Gilbert     bool changed = false;
635af603142SNicholas Bellinger 
636ade6d081SDr. David Alan Gilbert     /* Note we can be called before the device is started, but then
637ade6d081SDr. David Alan Gilbert      * starting the device calls set_mem_table, so we need to have
638ade6d081SDr. David Alan Gilbert      * built the data structures.
639ade6d081SDr. David Alan Gilbert      */
640c44317efSDr. David Alan Gilbert     old_sections = dev->mem_sections;
641c44317efSDr. David Alan Gilbert     n_old_sections = dev->n_mem_sections;
642c44317efSDr. David Alan Gilbert     dev->mem_sections = dev->tmp_sections;
643c44317efSDr. David Alan Gilbert     dev->n_mem_sections = dev->n_tmp_sections;
644c44317efSDr. David Alan Gilbert 
645ade6d081SDr. David Alan Gilbert     if (dev->n_mem_sections != n_old_sections) {
646ade6d081SDr. David Alan Gilbert         changed = true;
647ade6d081SDr. David Alan Gilbert     } else {
648ade6d081SDr. David Alan Gilbert         /* Same size, lets check the contents */
649da318288SThomas Huth         for (i = 0; i < n_old_sections; i++) {
6503fc4a64cSDr. David Alan Gilbert             if (!MemoryRegionSection_eq(&old_sections[i],
6513fc4a64cSDr. David Alan Gilbert                                         &dev->mem_sections[i])) {
6523fc4a64cSDr. David Alan Gilbert                 changed = true;
6533fc4a64cSDr. David Alan Gilbert                 break;
6543fc4a64cSDr. David Alan Gilbert             }
6553fc4a64cSDr. David Alan Gilbert         }
656ade6d081SDr. David Alan Gilbert     }
657ade6d081SDr. David Alan Gilbert 
658ade6d081SDr. David Alan Gilbert     trace_vhost_commit(dev->started, changed);
659ade6d081SDr. David Alan Gilbert     if (!changed) {
660c44317efSDr. David Alan Gilbert         goto out;
661af603142SNicholas Bellinger     }
662ade6d081SDr. David Alan Gilbert 
663ade6d081SDr. David Alan Gilbert     /* Rebuild the regions list from the new sections list */
664ade6d081SDr. David Alan Gilbert     regions_size = offsetof(struct vhost_memory, regions) +
665ade6d081SDr. David Alan Gilbert                        dev->n_mem_sections * sizeof dev->mem->regions[0];
666ade6d081SDr. David Alan Gilbert     dev->mem = g_realloc(dev->mem, regions_size);
667ade6d081SDr. David Alan Gilbert     dev->mem->nregions = dev->n_mem_sections;
668552b2522SDavid Hildenbrand 
669552b2522SDavid Hildenbrand     if (dev->vhost_ops->vhost_backend_no_private_memslots &&
670552b2522SDavid Hildenbrand         dev->vhost_ops->vhost_backend_no_private_memslots(dev)) {
671552b2522SDavid Hildenbrand         used_shared_memslots = dev->mem->nregions;
672552b2522SDavid Hildenbrand     } else {
673ade6d081SDr. David Alan Gilbert         used_memslots = dev->mem->nregions;
674552b2522SDavid Hildenbrand     }
675552b2522SDavid Hildenbrand 
676ade6d081SDr. David Alan Gilbert     for (i = 0; i < dev->n_mem_sections; i++) {
677ade6d081SDr. David Alan Gilbert         struct vhost_memory_region *cur_vmr = dev->mem->regions + i;
678ade6d081SDr. David Alan Gilbert         struct MemoryRegionSection *mrs = dev->mem_sections + i;
679ade6d081SDr. David Alan Gilbert 
680ade6d081SDr. David Alan Gilbert         cur_vmr->guest_phys_addr = mrs->offset_within_address_space;
681ade6d081SDr. David Alan Gilbert         cur_vmr->memory_size     = int128_get64(mrs->size);
682ade6d081SDr. David Alan Gilbert         cur_vmr->userspace_addr  =
683ade6d081SDr. David Alan Gilbert             (uintptr_t)memory_region_get_ram_ptr(mrs->mr) +
684ade6d081SDr. David Alan Gilbert             mrs->offset_within_region;
685ade6d081SDr. David Alan Gilbert         cur_vmr->flags_padding   = 0;
686ade6d081SDr. David Alan Gilbert     }
687ade6d081SDr. David Alan Gilbert 
6886e790746SPaolo Bonzini     if (!dev->started) {
689c44317efSDr. David Alan Gilbert         goto out;
6906e790746SPaolo Bonzini     }
6916e790746SPaolo Bonzini 
6920ca1fd2dSDr. David Alan Gilbert     for (i = 0; i < dev->mem->nregions; i++) {
6930ca1fd2dSDr. David Alan Gilbert         if (vhost_verify_ring_mappings(dev,
6940ca1fd2dSDr. David Alan Gilbert                        (void *)(uintptr_t)dev->mem->regions[i].userspace_addr,
6950ca1fd2dSDr. David Alan Gilbert                        dev->mem->regions[i].guest_phys_addr,
6960ca1fd2dSDr. David Alan Gilbert                        dev->mem->regions[i].memory_size)) {
6970ca1fd2dSDr. David Alan Gilbert             error_report("Verify ring failure on region %d", i);
6980ca1fd2dSDr. David Alan Gilbert             abort();
6990ca1fd2dSDr. David Alan Gilbert         }
7006e790746SPaolo Bonzini     }
7016e790746SPaolo Bonzini 
7026e790746SPaolo Bonzini     if (!dev->log_enabled) {
70321e70425SMarc-André Lureau         r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
704162bba7fSMarc-André Lureau         if (r < 0) {
7055d33ae4bSRoman Kagan             VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
706162bba7fSMarc-André Lureau         }
707c44317efSDr. David Alan Gilbert         goto out;
7086e790746SPaolo Bonzini     }
7096e790746SPaolo Bonzini     log_size = vhost_get_log_size(dev);
7106e790746SPaolo Bonzini     /* We allocate an extra 4K bytes to log,
7116e790746SPaolo Bonzini      * to reduce the * number of reallocations. */
7126e790746SPaolo Bonzini #define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
7136e790746SPaolo Bonzini     /* To log more, must increase log size before table update. */
7146e790746SPaolo Bonzini     if (dev->log_size < log_size) {
7156e790746SPaolo Bonzini         vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
7166e790746SPaolo Bonzini     }
71721e70425SMarc-André Lureau     r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
718162bba7fSMarc-André Lureau     if (r < 0) {
7195d33ae4bSRoman Kagan         VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
720162bba7fSMarc-André Lureau     }
7216e790746SPaolo Bonzini     /* To log less, can only decrease log size after table update. */
7226e790746SPaolo Bonzini     if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
7236e790746SPaolo Bonzini         vhost_dev_log_resize(dev, log_size);
7246e790746SPaolo Bonzini     }
725c44317efSDr. David Alan Gilbert 
726c44317efSDr. David Alan Gilbert out:
727c44317efSDr. David Alan Gilbert     /* Deref the old list of sections, this must happen _after_ the
728c44317efSDr. David Alan Gilbert      * vhost_set_mem_table to ensure the client isn't still using the
729c44317efSDr. David Alan Gilbert      * section we're about to unref.
730c44317efSDr. David Alan Gilbert      */
731c44317efSDr. David Alan Gilbert     while (n_old_sections--) {
732c44317efSDr. David Alan Gilbert         memory_region_unref(old_sections[n_old_sections].mr);
733c44317efSDr. David Alan Gilbert     }
734c44317efSDr. David Alan Gilbert     g_free(old_sections);
735c44317efSDr. David Alan Gilbert     return;
736c44317efSDr. David Alan Gilbert }
737c44317efSDr. David Alan Gilbert 
73848d7c975SDr. David Alan Gilbert /* Adds the section data to the tmp_section structure.
73948d7c975SDr. David Alan Gilbert  * It relies on the listener calling us in memory address order
74048d7c975SDr. David Alan Gilbert  * and for each region (via the _add and _nop methods) to
74148d7c975SDr. David Alan Gilbert  * join neighbours.
74248d7c975SDr. David Alan Gilbert  */
vhost_region_add_section(struct vhost_dev * dev,MemoryRegionSection * section)74348d7c975SDr. David Alan Gilbert static void vhost_region_add_section(struct vhost_dev *dev,
744c44317efSDr. David Alan Gilbert                                      MemoryRegionSection *section)
745c44317efSDr. David Alan Gilbert {
74648d7c975SDr. David Alan Gilbert     bool need_add = true;
74748d7c975SDr. David Alan Gilbert     uint64_t mrs_size = int128_get64(section->size);
74848d7c975SDr. David Alan Gilbert     uint64_t mrs_gpa = section->offset_within_address_space;
74948d7c975SDr. David Alan Gilbert     uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
75048d7c975SDr. David Alan Gilbert                          section->offset_within_region;
751c1ece84eSDr. David Alan Gilbert     RAMBlock *mrs_rb = section->mr->ram_block;
75248d7c975SDr. David Alan Gilbert 
75348d7c975SDr. David Alan Gilbert     trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size,
75448d7c975SDr. David Alan Gilbert                                    mrs_host);
75548d7c975SDr. David Alan Gilbert 
75676525114SDr. David Alan Gilbert     if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
757c1ece84eSDr. David Alan Gilbert         /* Round the section to it's page size */
758c1ece84eSDr. David Alan Gilbert         /* First align the start down to a page boundary */
75976525114SDr. David Alan Gilbert         size_t mrs_page = qemu_ram_pagesize(mrs_rb);
760c1ece84eSDr. David Alan Gilbert         uint64_t alignage = mrs_host & (mrs_page - 1);
761c1ece84eSDr. David Alan Gilbert         if (alignage) {
762c1ece84eSDr. David Alan Gilbert             mrs_host -= alignage;
763c1ece84eSDr. David Alan Gilbert             mrs_size += alignage;
764c1ece84eSDr. David Alan Gilbert             mrs_gpa  -= alignage;
765c1ece84eSDr. David Alan Gilbert         }
766c1ece84eSDr. David Alan Gilbert         /* Now align the size up to a page boundary */
767c1ece84eSDr. David Alan Gilbert         alignage = mrs_size & (mrs_page - 1);
768c1ece84eSDr. David Alan Gilbert         if (alignage) {
769c1ece84eSDr. David Alan Gilbert             mrs_size += mrs_page - alignage;
770c1ece84eSDr. David Alan Gilbert         }
77183475056SMichael S. Tsirkin         trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa,
77283475056SMichael S. Tsirkin                                                mrs_size, mrs_host);
77376525114SDr. David Alan Gilbert     }
774c1ece84eSDr. David Alan Gilbert 
775533f5d66SDavid Hildenbrand     if (dev->n_tmp_sections && !section->unmergeable) {
77648d7c975SDr. David Alan Gilbert         /* Since we already have at least one section, lets see if
77748d7c975SDr. David Alan Gilbert          * this extends it; since we're scanning in order, we only
77848d7c975SDr. David Alan Gilbert          * have to look at the last one, and the FlatView that calls
77948d7c975SDr. David Alan Gilbert          * us shouldn't have overlaps.
78048d7c975SDr. David Alan Gilbert          */
78148d7c975SDr. David Alan Gilbert         MemoryRegionSection *prev_sec = dev->tmp_sections +
78248d7c975SDr. David Alan Gilbert                                                (dev->n_tmp_sections - 1);
78348d7c975SDr. David Alan Gilbert         uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
78448d7c975SDr. David Alan Gilbert         uint64_t prev_size = int128_get64(prev_sec->size);
78548d7c975SDr. David Alan Gilbert         uint64_t prev_gpa_end   = range_get_last(prev_gpa_start, prev_size);
78648d7c975SDr. David Alan Gilbert         uint64_t prev_host_start =
78748d7c975SDr. David Alan Gilbert                         (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
78848d7c975SDr. David Alan Gilbert                         prev_sec->offset_within_region;
78948d7c975SDr. David Alan Gilbert         uint64_t prev_host_end   = range_get_last(prev_host_start, prev_size);
79048d7c975SDr. David Alan Gilbert 
791c1ece84eSDr. David Alan Gilbert         if (mrs_gpa <= (prev_gpa_end + 1)) {
792c1ece84eSDr. David Alan Gilbert             /* OK, looks like overlapping/intersecting - it's possible that
793c1ece84eSDr. David Alan Gilbert              * the rounding to page sizes has made them overlap, but they should
794c1ece84eSDr. David Alan Gilbert              * match up in the same RAMBlock if they do.
795c1ece84eSDr. David Alan Gilbert              */
796c1ece84eSDr. David Alan Gilbert             if (mrs_gpa < prev_gpa_start) {
797ff477614SDr. David Alan Gilbert                 error_report("%s:Section '%s' rounded to %"PRIx64
798ff477614SDr. David Alan Gilbert                              " prior to previous '%s' %"PRIx64,
799ff477614SDr. David Alan Gilbert                              __func__, section->mr->name, mrs_gpa,
800ff477614SDr. David Alan Gilbert                              prev_sec->mr->name, prev_gpa_start);
801c1ece84eSDr. David Alan Gilbert                 /* A way to cleanly fail here would be better */
802c1ece84eSDr. David Alan Gilbert                 return;
803c1ece84eSDr. David Alan Gilbert             }
804c1ece84eSDr. David Alan Gilbert             /* Offset from the start of the previous GPA to this GPA */
805c1ece84eSDr. David Alan Gilbert             size_t offset = mrs_gpa - prev_gpa_start;
806c1ece84eSDr. David Alan Gilbert 
807c1ece84eSDr. David Alan Gilbert             if (prev_host_start + offset == mrs_host &&
808533f5d66SDavid Hildenbrand                 section->mr == prev_sec->mr && !prev_sec->unmergeable) {
809c1ece84eSDr. David Alan Gilbert                 uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
81048d7c975SDr. David Alan Gilbert                 need_add = false;
811c1ece84eSDr. David Alan Gilbert                 prev_sec->offset_within_address_space =
812c1ece84eSDr. David Alan Gilbert                     MIN(prev_gpa_start, mrs_gpa);
813c1ece84eSDr. David Alan Gilbert                 prev_sec->offset_within_region =
814c1ece84eSDr. David Alan Gilbert                     MIN(prev_host_start, mrs_host) -
815c1ece84eSDr. David Alan Gilbert                     (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
816c1ece84eSDr. David Alan Gilbert                 prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
817c1ece84eSDr. David Alan Gilbert                                                mrs_host));
818c1ece84eSDr. David Alan Gilbert                 trace_vhost_region_add_section_merge(section->mr->name,
819c1ece84eSDr. David Alan Gilbert                                         int128_get64(prev_sec->size),
820c1ece84eSDr. David Alan Gilbert                                         prev_sec->offset_within_address_space,
821c1ece84eSDr. David Alan Gilbert                                         prev_sec->offset_within_region);
822c1ece84eSDr. David Alan Gilbert             } else {
823e7b94a84SDr. David Alan Gilbert                 /* adjoining regions are fine, but overlapping ones with
824e7b94a84SDr. David Alan Gilbert                  * different blocks/offsets shouldn't happen
825e7b94a84SDr. David Alan Gilbert                  */
826e7b94a84SDr. David Alan Gilbert                 if (mrs_gpa != prev_gpa_end + 1) {
827c1ece84eSDr. David Alan Gilbert                     error_report("%s: Overlapping but not coherent sections "
828c1ece84eSDr. David Alan Gilbert                                  "at %"PRIx64,
829c1ece84eSDr. David Alan Gilbert                                  __func__, mrs_gpa);
830c1ece84eSDr. David Alan Gilbert                     return;
831c1ece84eSDr. David Alan Gilbert                 }
83248d7c975SDr. David Alan Gilbert             }
83348d7c975SDr. David Alan Gilbert         }
834e7b94a84SDr. David Alan Gilbert     }
83548d7c975SDr. David Alan Gilbert 
83648d7c975SDr. David Alan Gilbert     if (need_add) {
837c44317efSDr. David Alan Gilbert         ++dev->n_tmp_sections;
838c44317efSDr. David Alan Gilbert         dev->tmp_sections = g_renew(MemoryRegionSection, dev->tmp_sections,
839c44317efSDr. David Alan Gilbert                                     dev->n_tmp_sections);
840c44317efSDr. David Alan Gilbert         dev->tmp_sections[dev->n_tmp_sections - 1] = *section;
84148d7c975SDr. David Alan Gilbert         /* The flatview isn't stable and we don't use it, making it NULL
84248d7c975SDr. David Alan Gilbert          * means we can memcmp the list.
84348d7c975SDr. David Alan Gilbert          */
84448d7c975SDr. David Alan Gilbert         dev->tmp_sections[dev->n_tmp_sections - 1].fv = NULL;
845c44317efSDr. David Alan Gilbert         memory_region_ref(section->mr);
8466e790746SPaolo Bonzini     }
84748d7c975SDr. David Alan Gilbert }
8486e790746SPaolo Bonzini 
849938eeb64SDr. David Alan Gilbert /* Used for both add and nop callbacks */
vhost_region_addnop(MemoryListener * listener,MemoryRegionSection * section)850938eeb64SDr. David Alan Gilbert static void vhost_region_addnop(MemoryListener *listener,
8516e790746SPaolo Bonzini                                 MemoryRegionSection *section)
8526e790746SPaolo Bonzini {
8536e790746SPaolo Bonzini     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
8546e790746SPaolo Bonzini                                          memory_listener);
8556e790746SPaolo Bonzini 
856988a2775STiwei Bie     if (!vhost_section(dev, section)) {
8576e790746SPaolo Bonzini         return;
8586e790746SPaolo Bonzini     }
85948d7c975SDr. David Alan Gilbert     vhost_region_add_section(dev, section);
8606e790746SPaolo Bonzini }
8616e790746SPaolo Bonzini 
vhost_iommu_unmap_notify(IOMMUNotifier * n,IOMMUTLBEntry * iotlb)862375f74f4SJason Wang static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
863375f74f4SJason Wang {
864375f74f4SJason Wang     struct vhost_iommu *iommu = container_of(n, struct vhost_iommu, n);
865375f74f4SJason Wang     struct vhost_dev *hdev = iommu->hdev;
866375f74f4SJason Wang     hwaddr iova = iotlb->iova + iommu->iommu_offset;
867375f74f4SJason Wang 
868020e571bSMaxime Coquelin     if (vhost_backend_invalidate_device_iotlb(hdev, iova,
869375f74f4SJason Wang                                               iotlb->addr_mask + 1)) {
870375f74f4SJason Wang         error_report("Fail to invalidate device iotlb");
871375f74f4SJason Wang     }
872375f74f4SJason Wang }
873375f74f4SJason Wang 
vhost_iommu_region_add(MemoryListener * listener,MemoryRegionSection * section)874375f74f4SJason Wang static void vhost_iommu_region_add(MemoryListener *listener,
875375f74f4SJason Wang                                    MemoryRegionSection *section)
876375f74f4SJason Wang {
877375f74f4SJason Wang     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
878375f74f4SJason Wang                                          iommu_listener);
879375f74f4SJason Wang     struct vhost_iommu *iommu;
880698feb5eSPeter Xu     Int128 end;
881805d4496SMarkus Armbruster     int iommu_idx;
882388a86dfSTiwei Bie     IOMMUMemoryRegion *iommu_mr;
883375f74f4SJason Wang 
884375f74f4SJason Wang     if (!memory_region_is_iommu(section->mr)) {
885375f74f4SJason Wang         return;
886375f74f4SJason Wang     }
887375f74f4SJason Wang 
888388a86dfSTiwei Bie     iommu_mr = IOMMU_MEMORY_REGION(section->mr);
889388a86dfSTiwei Bie 
890375f74f4SJason Wang     iommu = g_malloc0(sizeof(*iommu));
891698feb5eSPeter Xu     end = int128_add(int128_make64(section->offset_within_region),
892698feb5eSPeter Xu                      section->size);
893698feb5eSPeter Xu     end = int128_sub(end, int128_one());
894cb1efcf4SPeter Maydell     iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
895cb1efcf4SPeter Maydell                                                    MEMTXATTRS_UNSPECIFIED);
896698feb5eSPeter Xu     iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify,
897ee071f67SViktor Prutyanov                         dev->vdev->device_iotlb_enabled ?
898ee071f67SViktor Prutyanov                             IOMMU_NOTIFIER_DEVIOTLB_UNMAP :
899ee071f67SViktor Prutyanov                             IOMMU_NOTIFIER_UNMAP,
900698feb5eSPeter Xu                         section->offset_within_region,
901cb1efcf4SPeter Maydell                         int128_get64(end),
902cb1efcf4SPeter Maydell                         iommu_idx);
903375f74f4SJason Wang     iommu->mr = section->mr;
904375f74f4SJason Wang     iommu->iommu_offset = section->offset_within_address_space -
905375f74f4SJason Wang                           section->offset_within_region;
906375f74f4SJason Wang     iommu->hdev = dev;
907805d4496SMarkus Armbruster     memory_region_register_iommu_notifier(section->mr, &iommu->n,
908805d4496SMarkus Armbruster                                           &error_fatal);
909375f74f4SJason Wang     QLIST_INSERT_HEAD(&dev->iommu_list, iommu, iommu_next);
910375f74f4SJason Wang     /* TODO: can replay help performance here? */
911375f74f4SJason Wang }
912375f74f4SJason Wang 
vhost_iommu_region_del(MemoryListener * listener,MemoryRegionSection * section)913375f74f4SJason Wang static void vhost_iommu_region_del(MemoryListener *listener,
914375f74f4SJason Wang                                    MemoryRegionSection *section)
915375f74f4SJason Wang {
916375f74f4SJason Wang     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
917375f74f4SJason Wang                                          iommu_listener);
918375f74f4SJason Wang     struct vhost_iommu *iommu;
919375f74f4SJason Wang 
920375f74f4SJason Wang     if (!memory_region_is_iommu(section->mr)) {
921375f74f4SJason Wang         return;
922375f74f4SJason Wang     }
923375f74f4SJason Wang 
924375f74f4SJason Wang     QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) {
925698feb5eSPeter Xu         if (iommu->mr == section->mr &&
926698feb5eSPeter Xu             iommu->n.start == section->offset_within_region) {
927375f74f4SJason Wang             memory_region_unregister_iommu_notifier(iommu->mr,
928375f74f4SJason Wang                                                     &iommu->n);
929375f74f4SJason Wang             QLIST_REMOVE(iommu, iommu_next);
930375f74f4SJason Wang             g_free(iommu);
931375f74f4SJason Wang             break;
932375f74f4SJason Wang         }
933375f74f4SJason Wang     }
934375f74f4SJason Wang }
935375f74f4SJason Wang 
vhost_toggle_device_iotlb(VirtIODevice * vdev)936ee071f67SViktor Prutyanov void vhost_toggle_device_iotlb(VirtIODevice *vdev)
937ee071f67SViktor Prutyanov {
938ee071f67SViktor Prutyanov     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
939ee071f67SViktor Prutyanov     struct vhost_dev *dev;
940ee071f67SViktor Prutyanov     struct vhost_iommu *iommu;
941ee071f67SViktor Prutyanov 
942ee071f67SViktor Prutyanov     if (vdev->vhost_started) {
943ee071f67SViktor Prutyanov         dev = vdc->get_vhost(vdev);
944ee071f67SViktor Prutyanov     } else {
945ee071f67SViktor Prutyanov         return;
946ee071f67SViktor Prutyanov     }
947ee071f67SViktor Prutyanov 
948ee071f67SViktor Prutyanov     QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) {
949ee071f67SViktor Prutyanov         memory_region_unregister_iommu_notifier(iommu->mr, &iommu->n);
950ee071f67SViktor Prutyanov         iommu->n.notifier_flags = vdev->device_iotlb_enabled ?
951ee071f67SViktor Prutyanov                 IOMMU_NOTIFIER_DEVIOTLB_UNMAP : IOMMU_NOTIFIER_UNMAP;
952ee071f67SViktor Prutyanov         memory_region_register_iommu_notifier(iommu->mr, &iommu->n,
953ee071f67SViktor Prutyanov                                               &error_fatal);
954ee071f67SViktor Prutyanov     }
955ee071f67SViktor Prutyanov }
956ee071f67SViktor Prutyanov 
vhost_virtqueue_set_addr(struct vhost_dev * dev,struct vhost_virtqueue * vq,unsigned idx,bool enable_log)9576e790746SPaolo Bonzini static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
9586e790746SPaolo Bonzini                                     struct vhost_virtqueue *vq,
9596e790746SPaolo Bonzini                                     unsigned idx, bool enable_log)
9606e790746SPaolo Bonzini {
961b4ab225cSCindy Lu     struct vhost_vring_addr addr;
962b4ab225cSCindy Lu     int r;
963b4ab225cSCindy Lu     memset(&addr, 0, sizeof(struct vhost_vring_addr));
964b4ab225cSCindy Lu 
965b4ab225cSCindy Lu     if (dev->vhost_ops->vhost_vq_get_addr) {
966b4ab225cSCindy Lu         r = dev->vhost_ops->vhost_vq_get_addr(dev, &addr, vq);
967b4ab225cSCindy Lu         if (r < 0) {
9685d33ae4bSRoman Kagan             VHOST_OPS_DEBUG(r, "vhost_vq_get_addr failed");
9695d33ae4bSRoman Kagan             return r;
970b4ab225cSCindy Lu         }
971b4ab225cSCindy Lu     } else {
972b4ab225cSCindy Lu         addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc;
973b4ab225cSCindy Lu         addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail;
974b4ab225cSCindy Lu         addr.used_user_addr = (uint64_t)(unsigned long)vq->used;
975b4ab225cSCindy Lu     }
976b4ab225cSCindy Lu     addr.index = idx;
977b4ab225cSCindy Lu     addr.log_guest_addr = vq->used_phys;
978b4ab225cSCindy Lu     addr.flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0;
979b4ab225cSCindy Lu     r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
9806e790746SPaolo Bonzini     if (r < 0) {
9815d33ae4bSRoman Kagan         VHOST_OPS_DEBUG(r, "vhost_set_vring_addr failed");
9826e790746SPaolo Bonzini     }
9835d33ae4bSRoman Kagan     return r;
9846e790746SPaolo Bonzini }
9856e790746SPaolo Bonzini 
vhost_dev_set_features(struct vhost_dev * dev,bool enable_log)986c471ad0eSJason Wang static int vhost_dev_set_features(struct vhost_dev *dev,
987c471ad0eSJason Wang                                   bool enable_log)
9886e790746SPaolo Bonzini {
9896e790746SPaolo Bonzini     uint64_t features = dev->acked_features;
9906e790746SPaolo Bonzini     int r;
9916e790746SPaolo Bonzini     if (enable_log) {
9929a2ba823SCornelia Huck         features |= 0x1ULL << VHOST_F_LOG_ALL;
9936e790746SPaolo Bonzini     }
994f7ef7e6eSJason Wang     if (!vhost_dev_has_iommu(dev)) {
995f7ef7e6eSJason Wang         features &= ~(0x1ULL << VIRTIO_F_IOMMU_PLATFORM);
996f7ef7e6eSJason Wang     }
9977a471694SCindy Lu     if (dev->vhost_ops->vhost_force_iommu) {
9987a471694SCindy Lu         if (dev->vhost_ops->vhost_force_iommu(dev) == true) {
9997a471694SCindy Lu             features |= 0x1ULL << VIRTIO_F_IOMMU_PLATFORM;
10007a471694SCindy Lu        }
10017a471694SCindy Lu     }
100221e70425SMarc-André Lureau     r = dev->vhost_ops->vhost_set_features(dev, features);
1003c6409692SMarc-André Lureau     if (r < 0) {
10045d33ae4bSRoman Kagan         VHOST_OPS_DEBUG(r, "vhost_set_features failed");
1005b37556edSJason Wang         goto out;
1006c6409692SMarc-André Lureau     }
1007b37556edSJason Wang     if (dev->vhost_ops->vhost_set_backend_cap) {
1008b37556edSJason Wang         r = dev->vhost_ops->vhost_set_backend_cap(dev);
1009b37556edSJason Wang         if (r < 0) {
10105d33ae4bSRoman Kagan             VHOST_OPS_DEBUG(r, "vhost_set_backend_cap failed");
1011b37556edSJason Wang             goto out;
1012b37556edSJason Wang         }
1013b37556edSJason Wang     }
1014b37556edSJason Wang 
1015b37556edSJason Wang out:
10165d33ae4bSRoman Kagan     return r;
10176e790746SPaolo Bonzini }
10186e790746SPaolo Bonzini 
vhost_dev_set_log(struct vhost_dev * dev,bool enable_log)10196e790746SPaolo Bonzini static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
10206e790746SPaolo Bonzini {
1021162bba7fSMarc-André Lureau     int r, i, idx;
10221e5a050fSDima Stepanov     hwaddr addr;
10231e5a050fSDima Stepanov 
10246e790746SPaolo Bonzini     r = vhost_dev_set_features(dev, enable_log);
10256e790746SPaolo Bonzini     if (r < 0) {
10266e790746SPaolo Bonzini         goto err_features;
10276e790746SPaolo Bonzini     }
10286e790746SPaolo Bonzini     for (i = 0; i < dev->nvqs; ++i) {
102925a2a920SThibaut Collet         idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
10301e5a050fSDima Stepanov         addr = virtio_queue_get_desc_addr(dev->vdev, idx);
10311e5a050fSDima Stepanov         if (!addr) {
10321e5a050fSDima Stepanov             /*
10331e5a050fSDima Stepanov              * The queue might not be ready for start. If this
10341e5a050fSDima Stepanov              * is the case there is no reason to continue the process.
10351e5a050fSDima Stepanov              * The similar logic is used by the vhost_virtqueue_start()
10361e5a050fSDima Stepanov              * routine.
10371e5a050fSDima Stepanov              */
10381e5a050fSDima Stepanov             continue;
10391e5a050fSDima Stepanov         }
104025a2a920SThibaut Collet         r = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
10416e790746SPaolo Bonzini                                      enable_log);
10426e790746SPaolo Bonzini         if (r < 0) {
10436e790746SPaolo Bonzini             goto err_vq;
10446e790746SPaolo Bonzini         }
10456e790746SPaolo Bonzini     }
1046c5cd7e5fSSi-Wei Liu 
1047c5cd7e5fSSi-Wei Liu     /*
1048c5cd7e5fSSi-Wei Liu      * At log start we select our vhost_device logger that will scan the
1049c5cd7e5fSSi-Wei Liu      * memory sections and skip for the others. This is possible because
1050c5cd7e5fSSi-Wei Liu      * the log is shared amongst all vhost devices for a given type of
1051c5cd7e5fSSi-Wei Liu      * backend.
1052c5cd7e5fSSi-Wei Liu      */
1053c5cd7e5fSSi-Wei Liu     vhost_dev_elect_mem_logger(dev, enable_log);
1054c5cd7e5fSSi-Wei Liu 
10556e790746SPaolo Bonzini     return 0;
10566e790746SPaolo Bonzini err_vq:
10576e790746SPaolo Bonzini     for (; i >= 0; --i) {
105825a2a920SThibaut Collet         idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
10599ce305c8SNi Xun         addr = virtio_queue_get_desc_addr(dev->vdev, idx);
10609ce305c8SNi Xun         if (!addr) {
10619ce305c8SNi Xun             continue;
10629ce305c8SNi Xun         }
1063162bba7fSMarc-André Lureau         vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
10646e790746SPaolo Bonzini                                  dev->log_enabled);
10656e790746SPaolo Bonzini     }
1066162bba7fSMarc-André Lureau     vhost_dev_set_features(dev, dev->log_enabled);
10676e790746SPaolo Bonzini err_features:
10686e790746SPaolo Bonzini     return r;
10696e790746SPaolo Bonzini }
10706e790746SPaolo Bonzini 
vhost_migration_log(MemoryListener * listener,bool enable)1071705f7f2fSRaphael Norwitz static int vhost_migration_log(MemoryListener *listener, bool enable)
10726e790746SPaolo Bonzini {
10736e790746SPaolo Bonzini     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
10746e790746SPaolo Bonzini                                          memory_listener);
10756e790746SPaolo Bonzini     int r;
1076705f7f2fSRaphael Norwitz     if (enable == dev->log_enabled) {
10776e790746SPaolo Bonzini         return 0;
10786e790746SPaolo Bonzini     }
10796e790746SPaolo Bonzini     if (!dev->started) {
10806e790746SPaolo Bonzini         dev->log_enabled = enable;
10816e790746SPaolo Bonzini         return 0;
10826e790746SPaolo Bonzini     }
1083f5b22d06SDima Stepanov 
1084f5b22d06SDima Stepanov     r = 0;
10856e790746SPaolo Bonzini     if (!enable) {
10866e790746SPaolo Bonzini         r = vhost_dev_set_log(dev, false);
10876e790746SPaolo Bonzini         if (r < 0) {
1088f5b22d06SDima Stepanov             goto check_dev_state;
10896e790746SPaolo Bonzini         }
1090309750faSJason Wang         vhost_log_put(dev, false);
10916e790746SPaolo Bonzini     } else {
10926e790746SPaolo Bonzini         vhost_dev_log_resize(dev, vhost_get_log_size(dev));
10936e790746SPaolo Bonzini         r = vhost_dev_set_log(dev, true);
10946e790746SPaolo Bonzini         if (r < 0) {
1095f5b22d06SDima Stepanov             goto check_dev_state;
10966e790746SPaolo Bonzini         }
10976e790746SPaolo Bonzini     }
1098f5b22d06SDima Stepanov 
1099f5b22d06SDima Stepanov check_dev_state:
11006e790746SPaolo Bonzini     dev->log_enabled = enable;
1101f5b22d06SDima Stepanov     /*
1102f5b22d06SDima Stepanov      * vhost-user-* devices could change their state during log
1103f5b22d06SDima Stepanov      * initialization due to disconnect. So check dev state after
1104f5b22d06SDima Stepanov      * vhost communication.
1105f5b22d06SDima Stepanov      */
1106f5b22d06SDima Stepanov     if (!dev->started) {
1107f5b22d06SDima Stepanov         /*
1108f5b22d06SDima Stepanov          * Since device is in the stopped state, it is okay for
1109f5b22d06SDima Stepanov          * migration. Return success.
1110f5b22d06SDima Stepanov          */
1111f5b22d06SDima Stepanov         r = 0;
1112f5b22d06SDima Stepanov     }
1113f5b22d06SDima Stepanov     if (r) {
1114cba42d61SMichael Tokarev         /* An error occurred. */
1115f5b22d06SDima Stepanov         dev->log_enabled = false;
1116f5b22d06SDima Stepanov     }
1117f5b22d06SDima Stepanov 
1118f5b22d06SDima Stepanov     return r;
11196e790746SPaolo Bonzini }
11206e790746SPaolo Bonzini 
vhost_log_global_start(MemoryListener * listener,Error ** errp)11213688fec8SCédric Le Goater static bool vhost_log_global_start(MemoryListener *listener, Error **errp)
11226e790746SPaolo Bonzini {
11236e790746SPaolo Bonzini     int r;
11246e790746SPaolo Bonzini 
11256e790746SPaolo Bonzini     r = vhost_migration_log(listener, true);
11266e790746SPaolo Bonzini     if (r < 0) {
11276e790746SPaolo Bonzini         abort();
11286e790746SPaolo Bonzini     }
11293688fec8SCédric Le Goater     return true;
11306e790746SPaolo Bonzini }
11316e790746SPaolo Bonzini 
vhost_log_global_stop(MemoryListener * listener)11326e790746SPaolo Bonzini static void vhost_log_global_stop(MemoryListener *listener)
11336e790746SPaolo Bonzini {
11346e790746SPaolo Bonzini     int r;
11356e790746SPaolo Bonzini 
11366e790746SPaolo Bonzini     r = vhost_migration_log(listener, false);
11376e790746SPaolo Bonzini     if (r < 0) {
11386e790746SPaolo Bonzini         abort();
11396e790746SPaolo Bonzini     }
11406e790746SPaolo Bonzini }
11416e790746SPaolo Bonzini 
vhost_log_start(MemoryListener * listener,MemoryRegionSection * section,int old,int new)11426e790746SPaolo Bonzini static void vhost_log_start(MemoryListener *listener,
1143b2dfd71cSPaolo Bonzini                             MemoryRegionSection *section,
1144b2dfd71cSPaolo Bonzini                             int old, int new)
11456e790746SPaolo Bonzini {
11466e790746SPaolo Bonzini     /* FIXME: implement */
11476e790746SPaolo Bonzini }
11486e790746SPaolo Bonzini 
vhost_log_stop(MemoryListener * listener,MemoryRegionSection * section,int old,int new)11496e790746SPaolo Bonzini static void vhost_log_stop(MemoryListener *listener,
1150b2dfd71cSPaolo Bonzini                            MemoryRegionSection *section,
1151b2dfd71cSPaolo Bonzini                            int old, int new)
11526e790746SPaolo Bonzini {
11536e790746SPaolo Bonzini     /* FIXME: implement */
11546e790746SPaolo Bonzini }
11556e790746SPaolo Bonzini 
115646f70ff1SGreg Kurz /* The vhost driver natively knows how to handle the vrings of non
115746f70ff1SGreg Kurz  * cross-endian legacy devices and modern devices. Only legacy devices
115846f70ff1SGreg Kurz  * exposed to a bi-endian guest may require the vhost driver to use a
115946f70ff1SGreg Kurz  * specific endianness.
116046f70ff1SGreg Kurz  */
vhost_needs_vring_endian(VirtIODevice * vdev)1161a122ab24SGreg Kurz static inline bool vhost_needs_vring_endian(VirtIODevice *vdev)
1162a122ab24SGreg Kurz {
1163e5848123SGreg Kurz     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1164e5848123SGreg Kurz         return false;
1165e5848123SGreg Kurz     }
1166e03b5686SMarc-André Lureau #if HOST_BIG_ENDIAN
116746f70ff1SGreg Kurz     return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_LITTLE;
1168a122ab24SGreg Kurz #else
116946f70ff1SGreg Kurz     return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_BIG;
1170a122ab24SGreg Kurz #endif
1171a122ab24SGreg Kurz }
1172a122ab24SGreg Kurz 
vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev * dev,bool is_big_endian,int vhost_vq_index)117304b7a152SGreg Kurz static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev,
117404b7a152SGreg Kurz                                                    bool is_big_endian,
117504b7a152SGreg Kurz                                                    int vhost_vq_index)
117604b7a152SGreg Kurz {
11775d33ae4bSRoman Kagan     int r;
117804b7a152SGreg Kurz     struct vhost_vring_state s = {
117904b7a152SGreg Kurz         .index = vhost_vq_index,
118004b7a152SGreg Kurz         .num = is_big_endian
118104b7a152SGreg Kurz     };
118204b7a152SGreg Kurz 
11835d33ae4bSRoman Kagan     r = dev->vhost_ops->vhost_set_vring_endian(dev, &s);
11845d33ae4bSRoman Kagan     if (r < 0) {
11855d33ae4bSRoman Kagan         VHOST_OPS_DEBUG(r, "vhost_set_vring_endian failed");
118604b7a152SGreg Kurz     }
11875d33ae4bSRoman Kagan     return r;
118804b7a152SGreg Kurz }
118904b7a152SGreg Kurz 
vhost_memory_region_lookup(struct vhost_dev * hdev,uint64_t gpa,uint64_t * uaddr,uint64_t * len)1190c471ad0eSJason Wang static int vhost_memory_region_lookup(struct vhost_dev *hdev,
1191c471ad0eSJason Wang                                       uint64_t gpa, uint64_t *uaddr,
1192c471ad0eSJason Wang                                       uint64_t *len)
1193c471ad0eSJason Wang {
1194c471ad0eSJason Wang     int i;
1195c471ad0eSJason Wang 
1196c471ad0eSJason Wang     for (i = 0; i < hdev->mem->nregions; i++) {
1197c471ad0eSJason Wang         struct vhost_memory_region *reg = hdev->mem->regions + i;
1198c471ad0eSJason Wang 
1199c471ad0eSJason Wang         if (gpa >= reg->guest_phys_addr &&
1200c471ad0eSJason Wang             reg->guest_phys_addr + reg->memory_size > gpa) {
1201c471ad0eSJason Wang             *uaddr = reg->userspace_addr + gpa - reg->guest_phys_addr;
1202c471ad0eSJason Wang             *len = reg->guest_phys_addr + reg->memory_size - gpa;
1203c471ad0eSJason Wang             return 0;
1204c471ad0eSJason Wang         }
1205c471ad0eSJason Wang     }
1206c471ad0eSJason Wang 
1207c471ad0eSJason Wang     return -EFAULT;
1208c471ad0eSJason Wang }
1209c471ad0eSJason Wang 
vhost_device_iotlb_miss(struct vhost_dev * dev,uint64_t iova,int write)1210fc58bd0dSMaxime Coquelin int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
1211c471ad0eSJason Wang {
1212c471ad0eSJason Wang     IOMMUTLBEntry iotlb;
1213c471ad0eSJason Wang     uint64_t uaddr, len;
1214fc58bd0dSMaxime Coquelin     int ret = -EFAULT;
1215c471ad0eSJason Wang 
12167a064bccSDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
1217c471ad0eSJason Wang 
1218ffcbbe72SPeter Xu     trace_vhost_iotlb_miss(dev, 1);
1219ffcbbe72SPeter Xu 
1220c471ad0eSJason Wang     iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
12217446eb07SPeter Maydell                                           iova, write,
12227446eb07SPeter Maydell                                           MEMTXATTRS_UNSPECIFIED);
1223c471ad0eSJason Wang     if (iotlb.target_as != NULL) {
1224fc58bd0dSMaxime Coquelin         ret = vhost_memory_region_lookup(dev, iotlb.translated_addr,
1225fc58bd0dSMaxime Coquelin                                          &uaddr, &len);
1226fc58bd0dSMaxime Coquelin         if (ret) {
1227ffcbbe72SPeter Xu             trace_vhost_iotlb_miss(dev, 3);
1228c471ad0eSJason Wang             error_report("Fail to lookup the translated address "
1229c471ad0eSJason Wang                          "%"PRIx64, iotlb.translated_addr);
1230c471ad0eSJason Wang             goto out;
1231c471ad0eSJason Wang         }
1232c471ad0eSJason Wang 
1233c471ad0eSJason Wang         len = MIN(iotlb.addr_mask + 1, len);
1234c471ad0eSJason Wang         iova = iova & ~iotlb.addr_mask;
1235c471ad0eSJason Wang 
1236020e571bSMaxime Coquelin         ret = vhost_backend_update_device_iotlb(dev, iova, uaddr,
1237fc58bd0dSMaxime Coquelin                                                 len, iotlb.perm);
1238fc58bd0dSMaxime Coquelin         if (ret) {
1239ffcbbe72SPeter Xu             trace_vhost_iotlb_miss(dev, 4);
1240c471ad0eSJason Wang             error_report("Fail to update device iotlb");
1241c471ad0eSJason Wang             goto out;
1242c471ad0eSJason Wang         }
1243c471ad0eSJason Wang     }
1244ffcbbe72SPeter Xu 
1245ffcbbe72SPeter Xu     trace_vhost_iotlb_miss(dev, 2);
1246ffcbbe72SPeter Xu 
1247c471ad0eSJason Wang out:
1248fc58bd0dSMaxime Coquelin     return ret;
1249c471ad0eSJason Wang }
1250c471ad0eSJason Wang 
vhost_virtqueue_start(struct vhost_dev * dev,struct VirtIODevice * vdev,struct vhost_virtqueue * vq,unsigned idx)1251ff48b628SKangjie Xu int vhost_virtqueue_start(struct vhost_dev *dev,
12526e790746SPaolo Bonzini                           struct VirtIODevice *vdev,
12536e790746SPaolo Bonzini                           struct vhost_virtqueue *vq,
12546e790746SPaolo Bonzini                           unsigned idx)
12556e790746SPaolo Bonzini {
125696a3d98dSJason Wang     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
125796a3d98dSJason Wang     VirtioBusState *vbus = VIRTIO_BUS(qbus);
125896a3d98dSJason Wang     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
12596e790746SPaolo Bonzini     hwaddr s, l, a;
12606e790746SPaolo Bonzini     int r;
126121e70425SMarc-André Lureau     int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
12626e790746SPaolo Bonzini     struct vhost_vring_file file = {
12636e790746SPaolo Bonzini         .index = vhost_vq_index
12646e790746SPaolo Bonzini     };
12656e790746SPaolo Bonzini     struct vhost_vring_state state = {
12666e790746SPaolo Bonzini         .index = vhost_vq_index
12676e790746SPaolo Bonzini     };
12686e790746SPaolo Bonzini     struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
12696e790746SPaolo Bonzini 
1270fb20fbb7SJia He     a = virtio_queue_get_desc_addr(vdev, idx);
1271fb20fbb7SJia He     if (a == 0) {
1272fb20fbb7SJia He         /* Queue might not be ready for start */
1273fb20fbb7SJia He         return 0;
1274fb20fbb7SJia He     }
12756e790746SPaolo Bonzini 
12766e790746SPaolo Bonzini     vq->num = state.num = virtio_queue_get_num(vdev, idx);
127721e70425SMarc-André Lureau     r = dev->vhost_ops->vhost_set_vring_num(dev, &state);
12786e790746SPaolo Bonzini     if (r) {
12795d33ae4bSRoman Kagan         VHOST_OPS_DEBUG(r, "vhost_set_vring_num failed");
12805d33ae4bSRoman Kagan         return r;
12816e790746SPaolo Bonzini     }
12826e790746SPaolo Bonzini 
12836e790746SPaolo Bonzini     state.num = virtio_queue_get_last_avail_idx(vdev, idx);
128421e70425SMarc-André Lureau     r = dev->vhost_ops->vhost_set_vring_base(dev, &state);
12856e790746SPaolo Bonzini     if (r) {
12865d33ae4bSRoman Kagan         VHOST_OPS_DEBUG(r, "vhost_set_vring_base failed");
12875d33ae4bSRoman Kagan         return r;
12886e790746SPaolo Bonzini     }
12896e790746SPaolo Bonzini 
1290e5848123SGreg Kurz     if (vhost_needs_vring_endian(vdev)) {
129104b7a152SGreg Kurz         r = vhost_virtqueue_set_vring_endian_legacy(dev,
129204b7a152SGreg Kurz                                                     virtio_is_big_endian(vdev),
129304b7a152SGreg Kurz                                                     vhost_vq_index);
129404b7a152SGreg Kurz         if (r) {
12955d33ae4bSRoman Kagan             return r;
129604b7a152SGreg Kurz         }
129704b7a152SGreg Kurz     }
129804b7a152SGreg Kurz 
1299f1f9e6c5SGreg Kurz     vq->desc_size = s = l = virtio_queue_get_desc_size(vdev, idx);
1300fb20fbb7SJia He     vq->desc_phys = a;
1301b897a474SPhilippe Mathieu-Daudé     vq->desc = vhost_memory_map(dev, a, &l, false);
13026e790746SPaolo Bonzini     if (!vq->desc || l != s) {
13036e790746SPaolo Bonzini         r = -ENOMEM;
13046e790746SPaolo Bonzini         goto fail_alloc_desc;
13056e790746SPaolo Bonzini     }
1306f1f9e6c5SGreg Kurz     vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx);
1307f1f9e6c5SGreg Kurz     vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx);
1308b897a474SPhilippe Mathieu-Daudé     vq->avail = vhost_memory_map(dev, a, &l, false);
13096e790746SPaolo Bonzini     if (!vq->avail || l != s) {
13106e790746SPaolo Bonzini         r = -ENOMEM;
13116e790746SPaolo Bonzini         goto fail_alloc_avail;
13126e790746SPaolo Bonzini     }
13136e790746SPaolo Bonzini     vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
13146e790746SPaolo Bonzini     vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
1315b897a474SPhilippe Mathieu-Daudé     vq->used = vhost_memory_map(dev, a, &l, true);
13166e790746SPaolo Bonzini     if (!vq->used || l != s) {
13176e790746SPaolo Bonzini         r = -ENOMEM;
13186e790746SPaolo Bonzini         goto fail_alloc_used;
13196e790746SPaolo Bonzini     }
13206e790746SPaolo Bonzini 
13216e790746SPaolo Bonzini     r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
13226e790746SPaolo Bonzini     if (r < 0) {
13236e790746SPaolo Bonzini         goto fail_alloc;
13246e790746SPaolo Bonzini     }
13256e790746SPaolo Bonzini 
13266e790746SPaolo Bonzini     file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
132721e70425SMarc-André Lureau     r = dev->vhost_ops->vhost_set_vring_kick(dev, &file);
13286e790746SPaolo Bonzini     if (r) {
13295d33ae4bSRoman Kagan         VHOST_OPS_DEBUG(r, "vhost_set_vring_kick failed");
13306e790746SPaolo Bonzini         goto fail_kick;
13316e790746SPaolo Bonzini     }
13326e790746SPaolo Bonzini 
13336e790746SPaolo Bonzini     /* Clear and discard previous events if any. */
13346e790746SPaolo Bonzini     event_notifier_test_and_clear(&vq->masked_notifier);
13356e790746SPaolo Bonzini 
13365669655aSVictor Kaplansky     /* Init vring in unmasked state, unless guest_notifier_mask
13375669655aSVictor Kaplansky      * will do it later.
13385669655aSVictor Kaplansky      */
13395669655aSVictor Kaplansky     if (!vdev->use_guest_notifier_mask) {
13405669655aSVictor Kaplansky         /* TODO: check and handle errors. */
13415669655aSVictor Kaplansky         vhost_virtqueue_mask(dev, vdev, idx, false);
13425669655aSVictor Kaplansky     }
13435669655aSVictor Kaplansky 
134496a3d98dSJason Wang     if (k->query_guest_notifiers &&
134596a3d98dSJason Wang         k->query_guest_notifiers(qbus->parent) &&
134696a3d98dSJason Wang         virtio_queue_vector(vdev, idx) == VIRTIO_NO_VECTOR) {
134796a3d98dSJason Wang         file.fd = -1;
134896a3d98dSJason Wang         r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
134996a3d98dSJason Wang         if (r) {
135096a3d98dSJason Wang             goto fail_vector;
135196a3d98dSJason Wang         }
135296a3d98dSJason Wang     }
135396a3d98dSJason Wang 
13546e790746SPaolo Bonzini     return 0;
13556e790746SPaolo Bonzini 
135696a3d98dSJason Wang fail_vector:
13576e790746SPaolo Bonzini fail_kick:
13586e790746SPaolo Bonzini fail_alloc:
1359c471ad0eSJason Wang     vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
13606e790746SPaolo Bonzini                        0, 0);
13616e790746SPaolo Bonzini fail_alloc_used:
1362c471ad0eSJason Wang     vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
13636e790746SPaolo Bonzini                        0, 0);
13646e790746SPaolo Bonzini fail_alloc_avail:
1365c471ad0eSJason Wang     vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
13666e790746SPaolo Bonzini                        0, 0);
13676e790746SPaolo Bonzini fail_alloc_desc:
13686e790746SPaolo Bonzini     return r;
13696e790746SPaolo Bonzini }
13706e790746SPaolo Bonzini 
vhost_virtqueue_stop(struct vhost_dev * dev,struct VirtIODevice * vdev,struct vhost_virtqueue * vq,unsigned idx)1371e1f101d9SKangjie Xu void vhost_virtqueue_stop(struct vhost_dev *dev,
13726e790746SPaolo Bonzini                           struct VirtIODevice *vdev,
13736e790746SPaolo Bonzini                           struct vhost_virtqueue *vq,
13746e790746SPaolo Bonzini                           unsigned idx)
13756e790746SPaolo Bonzini {
137621e70425SMarc-André Lureau     int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
13776e790746SPaolo Bonzini     struct vhost_vring_state state = {
137804b7a152SGreg Kurz         .index = vhost_vq_index,
13796e790746SPaolo Bonzini     };
13806e790746SPaolo Bonzini     int r;
1381fb20fbb7SJia He 
1382fa4ae4beSYury Kotov     if (virtio_queue_get_desc_addr(vdev, idx) == 0) {
1383fb20fbb7SJia He         /* Don't stop the virtqueue which might have not been started */
1384fb20fbb7SJia He         return;
1385fb20fbb7SJia He     }
1386fc57fd99SYuanhan Liu 
138721e70425SMarc-André Lureau     r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
13886e790746SPaolo Bonzini     if (r < 0) {
13895d33ae4bSRoman Kagan         VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r);
13902ae39a11SMaxime Coquelin         /* Connection to the backend is broken, so let's sync internal
13912ae39a11SMaxime Coquelin          * last avail idx to the device used idx.
13922ae39a11SMaxime Coquelin          */
13932ae39a11SMaxime Coquelin         virtio_queue_restore_last_avail_idx(vdev, idx);
1394499c5579SMarc-André Lureau     } else {
13956e790746SPaolo Bonzini         virtio_queue_set_last_avail_idx(vdev, idx, state.num);
1396499c5579SMarc-André Lureau     }
13973561ba14SMichael S. Tsirkin     virtio_queue_invalidate_signalled_used(vdev, idx);
1398aa94d521SYuri Benditovich     virtio_queue_update_used_idx(vdev, idx);
139904b7a152SGreg Kurz 
140004b7a152SGreg Kurz     /* In the cross-endian case, we need to reset the vring endianness to
140104b7a152SGreg Kurz      * native as legacy devices expect so by default.
140204b7a152SGreg Kurz      */
1403e5848123SGreg Kurz     if (vhost_needs_vring_endian(vdev)) {
1404162bba7fSMarc-André Lureau         vhost_virtqueue_set_vring_endian_legacy(dev,
140504b7a152SGreg Kurz                                                 !virtio_is_big_endian(vdev),
140604b7a152SGreg Kurz                                                 vhost_vq_index);
140704b7a152SGreg Kurz     }
140804b7a152SGreg Kurz 
1409c471ad0eSJason Wang     vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
14106e790746SPaolo Bonzini                        1, virtio_queue_get_used_size(vdev, idx));
1411c471ad0eSJason Wang     vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
14126e790746SPaolo Bonzini                        0, virtio_queue_get_avail_size(vdev, idx));
1413c471ad0eSJason Wang     vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
14146e790746SPaolo Bonzini                        0, virtio_queue_get_desc_size(vdev, idx));
14156e790746SPaolo Bonzini }
14166e790746SPaolo Bonzini 
vhost_virtqueue_set_busyloop_timeout(struct vhost_dev * dev,int n,uint32_t timeout)141769e87b32SJason Wang static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,
141869e87b32SJason Wang                                                 int n, uint32_t timeout)
141969e87b32SJason Wang {
142069e87b32SJason Wang     int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
142169e87b32SJason Wang     struct vhost_vring_state state = {
142269e87b32SJason Wang         .index = vhost_vq_index,
142369e87b32SJason Wang         .num = timeout,
142469e87b32SJason Wang     };
142569e87b32SJason Wang     int r;
142669e87b32SJason Wang 
142769e87b32SJason Wang     if (!dev->vhost_ops->vhost_set_vring_busyloop_timeout) {
142869e87b32SJason Wang         return -EINVAL;
142969e87b32SJason Wang     }
143069e87b32SJason Wang 
143169e87b32SJason Wang     r = dev->vhost_ops->vhost_set_vring_busyloop_timeout(dev, &state);
143269e87b32SJason Wang     if (r) {
14335d33ae4bSRoman Kagan         VHOST_OPS_DEBUG(r, "vhost_set_vring_busyloop_timeout failed");
143469e87b32SJason Wang         return r;
143569e87b32SJason Wang     }
143669e87b32SJason Wang 
143769e87b32SJason Wang     return 0;
143869e87b32SJason Wang }
143969e87b32SJason Wang 
vhost_virtqueue_error_notifier(EventNotifier * n)1440ae50ae0bSKonstantin Khlebnikov static void vhost_virtqueue_error_notifier(EventNotifier *n)
1441ae50ae0bSKonstantin Khlebnikov {
1442ae50ae0bSKonstantin Khlebnikov     struct vhost_virtqueue *vq = container_of(n, struct vhost_virtqueue,
1443ae50ae0bSKonstantin Khlebnikov                                               error_notifier);
1444ae50ae0bSKonstantin Khlebnikov     struct vhost_dev *dev = vq->dev;
1445ae50ae0bSKonstantin Khlebnikov     int index = vq - dev->vqs;
1446ae50ae0bSKonstantin Khlebnikov 
1447ae50ae0bSKonstantin Khlebnikov     if (event_notifier_test_and_clear(n) && dev->vdev) {
1448ae50ae0bSKonstantin Khlebnikov         VHOST_OPS_DEBUG(-EINVAL,  "vhost vring error in virtqueue %d",
1449ae50ae0bSKonstantin Khlebnikov                         dev->vq_index + index);
1450ae50ae0bSKonstantin Khlebnikov     }
1451ae50ae0bSKonstantin Khlebnikov }
1452ae50ae0bSKonstantin Khlebnikov 
vhost_virtqueue_init(struct vhost_dev * dev,struct vhost_virtqueue * vq,int n)14536e790746SPaolo Bonzini static int vhost_virtqueue_init(struct vhost_dev *dev,
14546e790746SPaolo Bonzini                                 struct vhost_virtqueue *vq, int n)
14556e790746SPaolo Bonzini {
145621e70425SMarc-André Lureau     int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
14576e790746SPaolo Bonzini     struct vhost_vring_file file = {
1458b931bfbfSChangchun Ouyang         .index = vhost_vq_index,
14596e790746SPaolo Bonzini     };
14606e790746SPaolo Bonzini     int r = event_notifier_init(&vq->masked_notifier, 0);
14616e790746SPaolo Bonzini     if (r < 0) {
14626e790746SPaolo Bonzini         return r;
14636e790746SPaolo Bonzini     }
14646e790746SPaolo Bonzini 
1465ff5eb77bSSergio Lopez     file.fd = event_notifier_get_wfd(&vq->masked_notifier);
146621e70425SMarc-André Lureau     r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
14676e790746SPaolo Bonzini     if (r) {
14685d33ae4bSRoman Kagan         VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed");
14696e790746SPaolo Bonzini         goto fail_call;
14706e790746SPaolo Bonzini     }
1471c471ad0eSJason Wang 
1472c471ad0eSJason Wang     vq->dev = dev;
1473c471ad0eSJason Wang 
1474ae50ae0bSKonstantin Khlebnikov     if (dev->vhost_ops->vhost_set_vring_err) {
1475ae50ae0bSKonstantin Khlebnikov         r = event_notifier_init(&vq->error_notifier, 0);
1476ae50ae0bSKonstantin Khlebnikov         if (r < 0) {
1477ae50ae0bSKonstantin Khlebnikov             goto fail_call;
1478ae50ae0bSKonstantin Khlebnikov         }
1479ae50ae0bSKonstantin Khlebnikov 
1480ae50ae0bSKonstantin Khlebnikov         file.fd = event_notifier_get_fd(&vq->error_notifier);
1481ae50ae0bSKonstantin Khlebnikov         r = dev->vhost_ops->vhost_set_vring_err(dev, &file);
1482ae50ae0bSKonstantin Khlebnikov         if (r) {
1483ae50ae0bSKonstantin Khlebnikov             VHOST_OPS_DEBUG(r, "vhost_set_vring_err failed");
1484ae50ae0bSKonstantin Khlebnikov             goto fail_err;
1485ae50ae0bSKonstantin Khlebnikov         }
1486ae50ae0bSKonstantin Khlebnikov 
1487ae50ae0bSKonstantin Khlebnikov         event_notifier_set_handler(&vq->error_notifier,
1488ae50ae0bSKonstantin Khlebnikov                                    vhost_virtqueue_error_notifier);
1489ae50ae0bSKonstantin Khlebnikov     }
1490ae50ae0bSKonstantin Khlebnikov 
14916e790746SPaolo Bonzini     return 0;
1492ae50ae0bSKonstantin Khlebnikov 
1493ae50ae0bSKonstantin Khlebnikov fail_err:
1494ae50ae0bSKonstantin Khlebnikov     event_notifier_cleanup(&vq->error_notifier);
14956e790746SPaolo Bonzini fail_call:
14966e790746SPaolo Bonzini     event_notifier_cleanup(&vq->masked_notifier);
14976e790746SPaolo Bonzini     return r;
14986e790746SPaolo Bonzini }
14996e790746SPaolo Bonzini 
vhost_virtqueue_cleanup(struct vhost_virtqueue * vq)15006e790746SPaolo Bonzini static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
15016e790746SPaolo Bonzini {
15026e790746SPaolo Bonzini     event_notifier_cleanup(&vq->masked_notifier);
1503ae50ae0bSKonstantin Khlebnikov     if (vq->dev->vhost_ops->vhost_set_vring_err) {
1504ae50ae0bSKonstantin Khlebnikov         event_notifier_set_handler(&vq->error_notifier, NULL);
1505ae50ae0bSKonstantin Khlebnikov         event_notifier_cleanup(&vq->error_notifier);
1506ae50ae0bSKonstantin Khlebnikov     }
15076e790746SPaolo Bonzini }
15086e790746SPaolo Bonzini 
vhost_dev_init(struct vhost_dev * hdev,void * opaque,VhostBackendType backend_type,uint32_t busyloop_timeout,Error ** errp)150981647a65SNikolay Nikolaev int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
1510a6945f22SKevin Wolf                    VhostBackendType backend_type, uint32_t busyloop_timeout,
1511a6945f22SKevin Wolf                    Error **errp)
15126e790746SPaolo Bonzini {
1513766aa0a6SDavid Hildenbrand     unsigned int used, reserved, limit;
15146e790746SPaolo Bonzini     uint64_t features;
1515a06db3ecSMarc-André Lureau     int i, r, n_initialized_vqs = 0;
151681647a65SNikolay Nikolaev 
1517c471ad0eSJason Wang     hdev->vdev = NULL;
1518d2fc4402SMarc-André Lureau     hdev->migration_blocker = NULL;
1519d2fc4402SMarc-André Lureau 
15207cb8a9b9SMarc-André Lureau     r = vhost_set_backend_type(hdev, backend_type);
15217cb8a9b9SMarc-André Lureau     assert(r >= 0);
15221a1bfac9SNikolay Nikolaev 
152328770ff9SKevin Wolf     r = hdev->vhost_ops->vhost_backend_init(hdev, opaque, errp);
15247cb8a9b9SMarc-André Lureau     if (r < 0) {
15257cb8a9b9SMarc-André Lureau         goto fail;
152624d1eb33SNikolay Nikolaev     }
152724d1eb33SNikolay Nikolaev 
152821e70425SMarc-André Lureau     r = hdev->vhost_ops->vhost_set_owner(hdev);
15296e790746SPaolo Bonzini     if (r < 0) {
1530f2a6e6c4SKevin Wolf         error_setg_errno(errp, -r, "vhost_set_owner failed");
15316e790746SPaolo Bonzini         goto fail;
15326e790746SPaolo Bonzini     }
15336e790746SPaolo Bonzini 
153421e70425SMarc-André Lureau     r = hdev->vhost_ops->vhost_get_features(hdev, &features);
15356e790746SPaolo Bonzini     if (r < 0) {
1536f2a6e6c4SKevin Wolf         error_setg_errno(errp, -r, "vhost_get_features failed");
15376e790746SPaolo Bonzini         goto fail;
15386e790746SPaolo Bonzini     }
15396e790746SPaolo Bonzini 
1540a2335113SDavid Hildenbrand     limit = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
1541a2335113SDavid Hildenbrand     if (limit < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS &&
1542a2335113SDavid Hildenbrand         memory_devices_memslot_auto_decision_active()) {
1543a2335113SDavid Hildenbrand         error_setg(errp, "some memory device (like virtio-mem)"
1544a2335113SDavid Hildenbrand             " decided how many memory slots to use based on the overall"
1545a2335113SDavid Hildenbrand             " number of memory slots; this vhost backend would further"
1546a2335113SDavid Hildenbrand             " restricts the overall number of memory slots");
1547a2335113SDavid Hildenbrand         error_append_hint(errp, "Try plugging this vhost backend before"
1548a2335113SDavid Hildenbrand             " plugging such memory devices.\n");
1549a2335113SDavid Hildenbrand         r = -EINVAL;
1550a2335113SDavid Hildenbrand         goto fail;
1551a2335113SDavid Hildenbrand     }
1552a2335113SDavid Hildenbrand 
1553a06db3ecSMarc-André Lureau     for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
1554b931bfbfSChangchun Ouyang         r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
15556e790746SPaolo Bonzini         if (r < 0) {
1556a6945f22SKevin Wolf             error_setg_errno(errp, -r, "Failed to initialize virtqueue %d", i);
1557a06db3ecSMarc-André Lureau             goto fail;
15586e790746SPaolo Bonzini         }
15596e790746SPaolo Bonzini     }
156069e87b32SJason Wang 
156169e87b32SJason Wang     if (busyloop_timeout) {
156269e87b32SJason Wang         for (i = 0; i < hdev->nvqs; ++i) {
156369e87b32SJason Wang             r = vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i,
156469e87b32SJason Wang                                                      busyloop_timeout);
156569e87b32SJason Wang             if (r < 0) {
1566f2a6e6c4SKevin Wolf                 error_setg_errno(errp, -r, "Failed to set busyloop timeout");
156769e87b32SJason Wang                 goto fail_busyloop;
156869e87b32SJason Wang             }
156969e87b32SJason Wang         }
157069e87b32SJason Wang     }
157169e87b32SJason Wang 
15726e790746SPaolo Bonzini     hdev->features = features;
15736e790746SPaolo Bonzini 
15746e790746SPaolo Bonzini     hdev->memory_listener = (MemoryListener) {
1575142518bdSPeter Xu         .name = "vhost",
15766e790746SPaolo Bonzini         .begin = vhost_begin,
15776e790746SPaolo Bonzini         .commit = vhost_commit,
1578938eeb64SDr. David Alan Gilbert         .region_add = vhost_region_addnop,
1579938eeb64SDr. David Alan Gilbert         .region_nop = vhost_region_addnop,
15806e790746SPaolo Bonzini         .log_start = vhost_log_start,
15816e790746SPaolo Bonzini         .log_stop = vhost_log_stop,
15826e790746SPaolo Bonzini         .log_sync = vhost_log_sync,
15836e790746SPaolo Bonzini         .log_global_start = vhost_log_global_start,
15846e790746SPaolo Bonzini         .log_global_stop = vhost_log_global_stop,
15858be0461dSIsaku Yamahata         .priority = MEMORY_LISTENER_PRIORITY_DEV_BACKEND
15866e790746SPaolo Bonzini     };
1587d2fc4402SMarc-André Lureau 
1588375f74f4SJason Wang     hdev->iommu_listener = (MemoryListener) {
1589142518bdSPeter Xu         .name = "vhost-iommu",
1590375f74f4SJason Wang         .region_add = vhost_iommu_region_add,
1591375f74f4SJason Wang         .region_del = vhost_iommu_region_del,
1592375f74f4SJason Wang     };
1593c471ad0eSJason Wang 
1594d2fc4402SMarc-André Lureau     if (hdev->migration_blocker == NULL) {
15959a2ba823SCornelia Huck         if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
15967145872eSMichael S. Tsirkin             error_setg(&hdev->migration_blocker,
15977145872eSMichael S. Tsirkin                        "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature.");
1598648abbfbSMarc-André Lureau         } else if (vhost_dev_log_is_shared(hdev) && !qemu_memfd_alloc_check()) {
159931190ed7SMarc-André Lureau             error_setg(&hdev->migration_blocker,
160031190ed7SMarc-André Lureau                        "Migration disabled: failed to allocate shared memory");
1601d2fc4402SMarc-André Lureau         }
1602d2fc4402SMarc-André Lureau     }
1603d2fc4402SMarc-André Lureau 
1604d2fc4402SMarc-André Lureau     if (hdev->migration_blocker != NULL) {
160589415796SSteve Sistare         r = migrate_add_blocker_normal(&hdev->migration_blocker, errp);
1606436c831aSMarkus Armbruster         if (r < 0) {
1607fe44dc91SAshijeet Acharya             goto fail_busyloop;
1608fe44dc91SAshijeet Acharya         }
16097145872eSMichael S. Tsirkin     }
1610d2fc4402SMarc-André Lureau 
16116e790746SPaolo Bonzini     hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
16126e790746SPaolo Bonzini     hdev->n_mem_sections = 0;
16136e790746SPaolo Bonzini     hdev->mem_sections = NULL;
16146e790746SPaolo Bonzini     hdev->log = NULL;
16156e790746SPaolo Bonzini     hdev->log_size = 0;
16166e790746SPaolo Bonzini     hdev->log_enabled = false;
16176e790746SPaolo Bonzini     hdev->started = false;
16186e790746SPaolo Bonzini     memory_listener_register(&hdev->memory_listener, &address_space_memory);
16195be5f9beSMarc-André Lureau     QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
16209e2a2a3eSJay Zhou 
1621552b2522SDavid Hildenbrand     /*
1622552b2522SDavid Hildenbrand      * The listener we registered properly updated the corresponding counter.
1623552b2522SDavid Hildenbrand      * So we can trust that these values are accurate.
1624552b2522SDavid Hildenbrand      */
1625552b2522SDavid Hildenbrand     if (hdev->vhost_ops->vhost_backend_no_private_memslots &&
1626552b2522SDavid Hildenbrand         hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {
1627552b2522SDavid Hildenbrand         used = used_shared_memslots;
1628552b2522SDavid Hildenbrand     } else {
1629552b2522SDavid Hildenbrand         used = used_memslots;
1630552b2522SDavid Hildenbrand     }
1631766aa0a6SDavid Hildenbrand     /*
1632766aa0a6SDavid Hildenbrand      * We assume that all reserved memslots actually require a real memslot
1633766aa0a6SDavid Hildenbrand      * in our vhost backend. This might not be true, for example, if the
1634766aa0a6SDavid Hildenbrand      * memslot would be ROM. If ever relevant, we can optimize for that --
1635766aa0a6SDavid Hildenbrand      * but we'll need additional information about the reservations.
1636766aa0a6SDavid Hildenbrand      */
1637766aa0a6SDavid Hildenbrand     reserved = memory_devices_get_reserved_memslots();
1638766aa0a6SDavid Hildenbrand     if (used + reserved > limit) {
1639766aa0a6SDavid Hildenbrand         error_setg(errp, "vhost backend memory slots limit (%d) is less"
1640766aa0a6SDavid Hildenbrand                    " than current number of used (%d) and reserved (%d)"
1641766aa0a6SDavid Hildenbrand                    " memory slots for memory devices.", limit, used, reserved);
1642f2a6e6c4SKevin Wolf         r = -EINVAL;
16439e2a2a3eSJay Zhou         goto fail_busyloop;
16449e2a2a3eSJay Zhou     }
16459e2a2a3eSJay Zhou 
16466e790746SPaolo Bonzini     return 0;
1647a06db3ecSMarc-André Lureau 
164869e87b32SJason Wang fail_busyloop:
16491d8d014eSStefan Hajnoczi     if (busyloop_timeout) {
165069e87b32SJason Wang         while (--i >= 0) {
165169e87b32SJason Wang             vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i, 0);
165269e87b32SJason Wang         }
16531d8d014eSStefan Hajnoczi     }
16546e790746SPaolo Bonzini fail:
1655a06db3ecSMarc-André Lureau     hdev->nvqs = n_initialized_vqs;
1656a06db3ecSMarc-André Lureau     vhost_dev_cleanup(hdev);
16576e790746SPaolo Bonzini     return r;
16586e790746SPaolo Bonzini }
16596e790746SPaolo Bonzini 
vhost_dev_cleanup(struct vhost_dev * hdev)16606e790746SPaolo Bonzini void vhost_dev_cleanup(struct vhost_dev *hdev)
16616e790746SPaolo Bonzini {
16626e790746SPaolo Bonzini     int i;
1663e0547b59SMarc-André Lureau 
1664a2761231SAlex Bennée     trace_vhost_dev_cleanup(hdev);
1665a2761231SAlex Bennée 
16666e790746SPaolo Bonzini     for (i = 0; i < hdev->nvqs; ++i) {
16676e790746SPaolo Bonzini         vhost_virtqueue_cleanup(hdev->vqs + i);
16686e790746SPaolo Bonzini     }
16695be5f9beSMarc-André Lureau     if (hdev->mem) {
16705be5f9beSMarc-André Lureau         /* those are only safe after successful init */
16716e790746SPaolo Bonzini         memory_listener_unregister(&hdev->memory_listener);
16725be5f9beSMarc-André Lureau         QLIST_REMOVE(hdev, entry);
16735be5f9beSMarc-André Lureau     }
1674c8a7fc51SSteve Sistare     migrate_del_blocker(&hdev->migration_blocker);
16756e790746SPaolo Bonzini     g_free(hdev->mem);
16766e790746SPaolo Bonzini     g_free(hdev->mem_sections);
1677e0547b59SMarc-André Lureau     if (hdev->vhost_ops) {
167824d1eb33SNikolay Nikolaev         hdev->vhost_ops->vhost_backend_cleanup(hdev);
1679e0547b59SMarc-André Lureau     }
16807b527247SMarc-André Lureau     assert(!hdev->log);
1681e0547b59SMarc-André Lureau 
1682e0547b59SMarc-André Lureau     memset(hdev, 0, sizeof(struct vhost_dev));
16836e790746SPaolo Bonzini }
16846e790746SPaolo Bonzini 
vhost_dev_disable_notifiers_nvqs(struct vhost_dev * hdev,VirtIODevice * vdev,unsigned int nvqs)16856166799fSzuoboqun void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev,
168692099aa4SLaurent Vivier                                       VirtIODevice *vdev,
168792099aa4SLaurent Vivier                                       unsigned int nvqs)
168892099aa4SLaurent Vivier {
168992099aa4SLaurent Vivier     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
169092099aa4SLaurent Vivier     int i, r;
169192099aa4SLaurent Vivier 
169292099aa4SLaurent Vivier     /*
169392099aa4SLaurent Vivier      * Batch all the host notifiers in a single transaction to avoid
169492099aa4SLaurent Vivier      * quadratic time complexity in address_space_update_ioeventfds().
169592099aa4SLaurent Vivier      */
169692099aa4SLaurent Vivier     memory_region_transaction_begin();
169792099aa4SLaurent Vivier 
169892099aa4SLaurent Vivier     for (i = 0; i < nvqs; ++i) {
169992099aa4SLaurent Vivier         r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
170092099aa4SLaurent Vivier                                          false);
170192099aa4SLaurent Vivier         if (r < 0) {
170292099aa4SLaurent Vivier             error_report("vhost VQ %d notifier cleanup failed: %d", i, -r);
170392099aa4SLaurent Vivier         }
170492099aa4SLaurent Vivier         assert(r >= 0);
170592099aa4SLaurent Vivier     }
170692099aa4SLaurent Vivier 
170792099aa4SLaurent Vivier     /*
170892099aa4SLaurent Vivier      * The transaction expects the ioeventfds to be open when it
170992099aa4SLaurent Vivier      * commits. Do it now, before the cleanup loop.
171092099aa4SLaurent Vivier      */
171192099aa4SLaurent Vivier     memory_region_transaction_commit();
171292099aa4SLaurent Vivier 
171392099aa4SLaurent Vivier     for (i = 0; i < nvqs; ++i) {
171492099aa4SLaurent Vivier         virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i);
171592099aa4SLaurent Vivier     }
171692099aa4SLaurent Vivier     virtio_device_release_ioeventfd(vdev);
171792099aa4SLaurent Vivier }
171892099aa4SLaurent Vivier 
17196e790746SPaolo Bonzini /* Stop processing guest IO notifications in qemu.
17206e790746SPaolo Bonzini  * Start processing them in vhost in kernel.
17216e790746SPaolo Bonzini  */
vhost_dev_enable_notifiers(struct vhost_dev * hdev,VirtIODevice * vdev)17226e790746SPaolo Bonzini int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
17236e790746SPaolo Bonzini {
17241c819449SKONRAD Frederic     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
17258771589bSLongpeng     int i, r;
17264afba631SMarc-André Lureau 
1727310837deSPaolo Bonzini     /* We will pass the notifiers to the kernel, make sure that QEMU
1728310837deSPaolo Bonzini      * doesn't interfere.
1729310837deSPaolo Bonzini      */
1730310837deSPaolo Bonzini     r = virtio_device_grab_ioeventfd(vdev);
1731310837deSPaolo Bonzini     if (r < 0) {
17324afba631SMarc-André Lureau         error_report("binding does not support host notifiers");
17338771589bSLongpeng         return r;
17346e790746SPaolo Bonzini     }
17356e790746SPaolo Bonzini 
17360fdc6b85SLongpeng     /*
17370fdc6b85SLongpeng      * Batch all the host notifiers in a single transaction to avoid
17380fdc6b85SLongpeng      * quadratic time complexity in address_space_update_ioeventfds().
17390fdc6b85SLongpeng      */
17400fdc6b85SLongpeng     memory_region_transaction_begin();
17410fdc6b85SLongpeng 
17426e790746SPaolo Bonzini     for (i = 0; i < hdev->nvqs; ++i) {
1743b1f0a33dSCornelia Huck         r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
1744b1f0a33dSCornelia Huck                                          true);
17456e790746SPaolo Bonzini         if (r < 0) {
17464afba631SMarc-André Lureau             error_report("vhost VQ %d notifier binding failed: %d", i, -r);
17470fdc6b85SLongpeng             memory_region_transaction_commit();
174892099aa4SLaurent Vivier             vhost_dev_disable_notifiers_nvqs(hdev, vdev, i);
17498771589bSLongpeng             return r;
17506e790746SPaolo Bonzini         }
17516e790746SPaolo Bonzini     }
17526e790746SPaolo Bonzini 
17530fdc6b85SLongpeng     memory_region_transaction_commit();
17540fdc6b85SLongpeng 
17556e790746SPaolo Bonzini     return 0;
17566e790746SPaolo Bonzini }
17576e790746SPaolo Bonzini 
17586e790746SPaolo Bonzini /* Stop processing guest IO notifications in vhost.
17596e790746SPaolo Bonzini  * Start processing them in qemu.
17606e790746SPaolo Bonzini  * This might actually run the qemu handlers right away,
17616e790746SPaolo Bonzini  * so virtio in qemu must be completely setup when this is called.
17626e790746SPaolo Bonzini  */
vhost_dev_disable_notifiers(struct vhost_dev * hdev,VirtIODevice * vdev)17636e790746SPaolo Bonzini void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
17646e790746SPaolo Bonzini {
176592099aa4SLaurent Vivier     vhost_dev_disable_notifiers_nvqs(hdev, vdev, hdev->nvqs);
17666e790746SPaolo Bonzini }
17676e790746SPaolo Bonzini 
17686e790746SPaolo Bonzini /* Test and clear event pending status.
17696e790746SPaolo Bonzini  * Should be called after unmask to avoid losing events.
17706e790746SPaolo Bonzini  */
vhost_virtqueue_pending(struct vhost_dev * hdev,int n)17716e790746SPaolo Bonzini bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
17726e790746SPaolo Bonzini {
17736e790746SPaolo Bonzini     struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index;
17746e790746SPaolo Bonzini     assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
17756e790746SPaolo Bonzini     return event_notifier_test_and_clear(&vq->masked_notifier);
17766e790746SPaolo Bonzini }
17776e790746SPaolo Bonzini 
17786e790746SPaolo Bonzini /* Mask/unmask events from this vq. */
vhost_virtqueue_mask(struct vhost_dev * hdev,VirtIODevice * vdev,int n,bool mask)17796e790746SPaolo Bonzini void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
17806e790746SPaolo Bonzini                          bool mask)
17816e790746SPaolo Bonzini {
17826e790746SPaolo Bonzini     struct VirtQueue *vvq = virtio_get_queue(vdev, n);
17836e790746SPaolo Bonzini     int r, index = n - hdev->vq_index;
1784fc57fd99SYuanhan Liu     struct vhost_vring_file file;
17856e790746SPaolo Bonzini 
17868695de0fSMarc-André Lureau     /* should only be called after backend is connected */
17878695de0fSMarc-André Lureau     assert(hdev->vhost_ops);
17888695de0fSMarc-André Lureau 
17896e790746SPaolo Bonzini     if (mask) {
17905669655aSVictor Kaplansky         assert(vdev->use_guest_notifier_mask);
1791ff5eb77bSSergio Lopez         file.fd = event_notifier_get_wfd(&hdev->vqs[index].masked_notifier);
17926e790746SPaolo Bonzini     } else {
1793ff5eb77bSSergio Lopez         file.fd = event_notifier_get_wfd(virtio_queue_get_guest_notifier(vvq));
17946e790746SPaolo Bonzini     }
1795fc57fd99SYuanhan Liu 
179621e70425SMarc-André Lureau     file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n);
179721e70425SMarc-André Lureau     r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file);
1798162bba7fSMarc-André Lureau     if (r < 0) {
1799f9a09ca3SCindy Lu         error_report("vhost_set_vring_call failed %d", -r);
1800f9a09ca3SCindy Lu     }
1801f9a09ca3SCindy Lu }
1802f9a09ca3SCindy Lu 
vhost_config_pending(struct vhost_dev * hdev)1803f9a09ca3SCindy Lu bool vhost_config_pending(struct vhost_dev *hdev)
1804f9a09ca3SCindy Lu {
1805f9a09ca3SCindy Lu     assert(hdev->vhost_ops);
1806f9a09ca3SCindy Lu     if ((hdev->started == false) ||
1807f9a09ca3SCindy Lu         (hdev->vhost_ops->vhost_set_config_call == NULL)) {
1808f9a09ca3SCindy Lu         return false;
1809f9a09ca3SCindy Lu     }
1810f9a09ca3SCindy Lu 
1811f9a09ca3SCindy Lu     EventNotifier *notifier =
1812f9a09ca3SCindy Lu         &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier;
1813f9a09ca3SCindy Lu     return event_notifier_test_and_clear(notifier);
1814f9a09ca3SCindy Lu }
1815f9a09ca3SCindy Lu 
vhost_config_mask(struct vhost_dev * hdev,VirtIODevice * vdev,bool mask)1816f9a09ca3SCindy Lu void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask)
1817f9a09ca3SCindy Lu {
1818f9a09ca3SCindy Lu     int fd;
1819f9a09ca3SCindy Lu     int r;
1820f9a09ca3SCindy Lu     EventNotifier *notifier =
1821f9a09ca3SCindy Lu         &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier;
1822f9a09ca3SCindy Lu     EventNotifier *config_notifier = &vdev->config_notifier;
1823f9a09ca3SCindy Lu     assert(hdev->vhost_ops);
1824f9a09ca3SCindy Lu 
1825f9a09ca3SCindy Lu     if ((hdev->started == false) ||
1826f9a09ca3SCindy Lu         (hdev->vhost_ops->vhost_set_config_call == NULL)) {
1827f9a09ca3SCindy Lu         return;
1828f9a09ca3SCindy Lu     }
1829f9a09ca3SCindy Lu     if (mask) {
1830f9a09ca3SCindy Lu         assert(vdev->use_guest_notifier_mask);
1831f9a09ca3SCindy Lu         fd = event_notifier_get_fd(notifier);
1832f9a09ca3SCindy Lu     } else {
1833f9a09ca3SCindy Lu         fd = event_notifier_get_fd(config_notifier);
1834f9a09ca3SCindy Lu     }
1835f9a09ca3SCindy Lu     r = hdev->vhost_ops->vhost_set_config_call(hdev, fd);
1836f9a09ca3SCindy Lu     if (r < 0) {
1837f9a09ca3SCindy Lu         error_report("vhost_set_config_call failed %d", -r);
1838f9a09ca3SCindy Lu     }
1839f9a09ca3SCindy Lu }
1840f9a09ca3SCindy Lu 
vhost_stop_config_intr(struct vhost_dev * dev)1841f9a09ca3SCindy Lu static void vhost_stop_config_intr(struct vhost_dev *dev)
1842f9a09ca3SCindy Lu {
1843f9a09ca3SCindy Lu     int fd = -1;
1844f9a09ca3SCindy Lu     assert(dev->vhost_ops);
1845f9a09ca3SCindy Lu     if (dev->vhost_ops->vhost_set_config_call) {
1846f9a09ca3SCindy Lu         dev->vhost_ops->vhost_set_config_call(dev, fd);
1847f9a09ca3SCindy Lu     }
1848f9a09ca3SCindy Lu }
1849f9a09ca3SCindy Lu 
vhost_start_config_intr(struct vhost_dev * dev)1850f9a09ca3SCindy Lu static void vhost_start_config_intr(struct vhost_dev *dev)
1851f9a09ca3SCindy Lu {
1852f9a09ca3SCindy Lu     int r;
1853f9a09ca3SCindy Lu 
1854f9a09ca3SCindy Lu     assert(dev->vhost_ops);
1855f9a09ca3SCindy Lu     int fd = event_notifier_get_fd(&dev->vdev->config_notifier);
1856f9a09ca3SCindy Lu     if (dev->vhost_ops->vhost_set_config_call) {
1857f9a09ca3SCindy Lu         r = dev->vhost_ops->vhost_set_config_call(dev, fd);
1858f9a09ca3SCindy Lu         if (!r) {
1859f9a09ca3SCindy Lu             event_notifier_set(&dev->vdev->config_notifier);
1860f9a09ca3SCindy Lu         }
1861162bba7fSMarc-André Lureau     }
18626e790746SPaolo Bonzini }
18636e790746SPaolo Bonzini 
vhost_get_features(struct vhost_dev * hdev,const int * feature_bits,uint64_t features)18649a2ba823SCornelia Huck uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
18659a2ba823SCornelia Huck                             uint64_t features)
18662e6d46d7SNikolay Nikolaev {
18672e6d46d7SNikolay Nikolaev     const int *bit = feature_bits;
18682e6d46d7SNikolay Nikolaev     while (*bit != VHOST_INVALID_FEATURE_BIT) {
18699a2ba823SCornelia Huck         uint64_t bit_mask = (1ULL << *bit);
18702e6d46d7SNikolay Nikolaev         if (!(hdev->features & bit_mask)) {
18712e6d46d7SNikolay Nikolaev             features &= ~bit_mask;
18722e6d46d7SNikolay Nikolaev         }
18732e6d46d7SNikolay Nikolaev         bit++;
18742e6d46d7SNikolay Nikolaev     }
18752e6d46d7SNikolay Nikolaev     return features;
18762e6d46d7SNikolay Nikolaev }
18772e6d46d7SNikolay Nikolaev 
vhost_ack_features(struct vhost_dev * hdev,const int * feature_bits,uint64_t features)18782e6d46d7SNikolay Nikolaev void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
18799a2ba823SCornelia Huck                         uint64_t features)
18802e6d46d7SNikolay Nikolaev {
18812e6d46d7SNikolay Nikolaev     const int *bit = feature_bits;
18822e6d46d7SNikolay Nikolaev     while (*bit != VHOST_INVALID_FEATURE_BIT) {
18839a2ba823SCornelia Huck         uint64_t bit_mask = (1ULL << *bit);
18842e6d46d7SNikolay Nikolaev         if (features & bit_mask) {
18852e6d46d7SNikolay Nikolaev             hdev->acked_features |= bit_mask;
18862e6d46d7SNikolay Nikolaev         }
18872e6d46d7SNikolay Nikolaev         bit++;
18882e6d46d7SNikolay Nikolaev     }
18892e6d46d7SNikolay Nikolaev }
18902e6d46d7SNikolay Nikolaev 
vhost_dev_get_config(struct vhost_dev * hdev,uint8_t * config,uint32_t config_len,Error ** errp)18914c3e257bSChangpeng Liu int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
189250de5138SKevin Wolf                          uint32_t config_len, Error **errp)
18934c3e257bSChangpeng Liu {
18944c3e257bSChangpeng Liu     assert(hdev->vhost_ops);
18954c3e257bSChangpeng Liu 
18964c3e257bSChangpeng Liu     if (hdev->vhost_ops->vhost_get_config) {
189766647ed4SMarkus Armbruster         return hdev->vhost_ops->vhost_get_config(hdev, config, config_len,
189866647ed4SMarkus Armbruster                                                  errp);
18994c3e257bSChangpeng Liu     }
19004c3e257bSChangpeng Liu 
190150de5138SKevin Wolf     error_setg(errp, "vhost_get_config not implemented");
19025d33ae4bSRoman Kagan     return -ENOSYS;
19034c3e257bSChangpeng Liu }
19044c3e257bSChangpeng Liu 
vhost_dev_set_config(struct vhost_dev * hdev,const uint8_t * data,uint32_t offset,uint32_t size,uint32_t flags)19054c3e257bSChangpeng Liu int vhost_dev_set_config(struct vhost_dev *hdev, const uint8_t *data,
19064c3e257bSChangpeng Liu                          uint32_t offset, uint32_t size, uint32_t flags)
19074c3e257bSChangpeng Liu {
19084c3e257bSChangpeng Liu     assert(hdev->vhost_ops);
19094c3e257bSChangpeng Liu 
19104c3e257bSChangpeng Liu     if (hdev->vhost_ops->vhost_set_config) {
19114c3e257bSChangpeng Liu         return hdev->vhost_ops->vhost_set_config(hdev, data, offset,
19124c3e257bSChangpeng Liu                                                  size, flags);
19134c3e257bSChangpeng Liu     }
19144c3e257bSChangpeng Liu 
19155d33ae4bSRoman Kagan     return -ENOSYS;
19164c3e257bSChangpeng Liu }
19174c3e257bSChangpeng Liu 
vhost_dev_set_config_notifier(struct vhost_dev * hdev,const VhostDevConfigOps * ops)19184c3e257bSChangpeng Liu void vhost_dev_set_config_notifier(struct vhost_dev *hdev,
19194c3e257bSChangpeng Liu                                    const VhostDevConfigOps *ops)
19204c3e257bSChangpeng Liu {
19214c3e257bSChangpeng Liu     hdev->config_ops = ops;
19224c3e257bSChangpeng Liu }
19234c3e257bSChangpeng Liu 
vhost_dev_free_inflight(struct vhost_inflight * inflight)19245ad204bfSXie Yongji void vhost_dev_free_inflight(struct vhost_inflight *inflight)
19255ad204bfSXie Yongji {
19260ac2e635SLi Feng     if (inflight && inflight->addr) {
19275ad204bfSXie Yongji         qemu_memfd_free(inflight->addr, inflight->size, inflight->fd);
19285ad204bfSXie Yongji         inflight->addr = NULL;
19295ad204bfSXie Yongji         inflight->fd = -1;
19305ad204bfSXie Yongji     }
19315ad204bfSXie Yongji }
19325ad204bfSXie Yongji 
vhost_dev_prepare_inflight(struct vhost_dev * hdev,VirtIODevice * vdev)19331b0063b3SJin Yu int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev)
19341b0063b3SJin Yu {
19351b0063b3SJin Yu     int r;
19361b0063b3SJin Yu 
19371b0063b3SJin Yu     if (hdev->vhost_ops->vhost_get_inflight_fd == NULL ||
19381b0063b3SJin Yu         hdev->vhost_ops->vhost_set_inflight_fd == NULL) {
19391b0063b3SJin Yu         return 0;
19401b0063b3SJin Yu     }
19411b0063b3SJin Yu 
19421b0063b3SJin Yu     hdev->vdev = vdev;
19431b0063b3SJin Yu 
19441b0063b3SJin Yu     r = vhost_dev_set_features(hdev, hdev->log_enabled);
19451b0063b3SJin Yu     if (r < 0) {
19465d33ae4bSRoman Kagan         VHOST_OPS_DEBUG(r, "vhost_dev_prepare_inflight failed");
19471b0063b3SJin Yu         return r;
19481b0063b3SJin Yu     }
19491b0063b3SJin Yu 
19501b0063b3SJin Yu     return 0;
19511b0063b3SJin Yu }
19521b0063b3SJin Yu 
vhost_dev_set_inflight(struct vhost_dev * dev,struct vhost_inflight * inflight)19535ad204bfSXie Yongji int vhost_dev_set_inflight(struct vhost_dev *dev,
19545ad204bfSXie Yongji                            struct vhost_inflight *inflight)
19555ad204bfSXie Yongji {
19565ad204bfSXie Yongji     int r;
19575ad204bfSXie Yongji 
19585ad204bfSXie Yongji     if (dev->vhost_ops->vhost_set_inflight_fd && inflight->addr) {
19595ad204bfSXie Yongji         r = dev->vhost_ops->vhost_set_inflight_fd(dev, inflight);
19605ad204bfSXie Yongji         if (r) {
19615d33ae4bSRoman Kagan             VHOST_OPS_DEBUG(r, "vhost_set_inflight_fd failed");
19625d33ae4bSRoman Kagan             return r;
19635ad204bfSXie Yongji         }
19645ad204bfSXie Yongji     }
19655ad204bfSXie Yongji 
19665ad204bfSXie Yongji     return 0;
19675ad204bfSXie Yongji }
19685ad204bfSXie Yongji 
vhost_dev_get_inflight(struct vhost_dev * dev,uint16_t queue_size,struct vhost_inflight * inflight)19695ad204bfSXie Yongji int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size,
19705ad204bfSXie Yongji                            struct vhost_inflight *inflight)
19715ad204bfSXie Yongji {
19725ad204bfSXie Yongji     int r;
19735ad204bfSXie Yongji 
19745ad204bfSXie Yongji     if (dev->vhost_ops->vhost_get_inflight_fd) {
19755ad204bfSXie Yongji         r = dev->vhost_ops->vhost_get_inflight_fd(dev, queue_size, inflight);
19765ad204bfSXie Yongji         if (r) {
19775d33ae4bSRoman Kagan             VHOST_OPS_DEBUG(r, "vhost_get_inflight_fd failed");
19785d33ae4bSRoman Kagan             return r;
19795ad204bfSXie Yongji         }
19805ad204bfSXie Yongji     }
19815ad204bfSXie Yongji 
19825ad204bfSXie Yongji     return 0;
19835ad204bfSXie Yongji }
19845ad204bfSXie Yongji 
vhost_dev_set_vring_enable(struct vhost_dev * hdev,int enable)19854daa5054SStefano Garzarella static int vhost_dev_set_vring_enable(struct vhost_dev *hdev, int enable)
19864daa5054SStefano Garzarella {
19874daa5054SStefano Garzarella     if (!hdev->vhost_ops->vhost_set_vring_enable) {
19884daa5054SStefano Garzarella         return 0;
19894daa5054SStefano Garzarella     }
19904daa5054SStefano Garzarella 
19914daa5054SStefano Garzarella     /*
19924daa5054SStefano Garzarella      * For vhost-user devices, if VHOST_USER_F_PROTOCOL_FEATURES has not
19934daa5054SStefano Garzarella      * been negotiated, the rings start directly in the enabled state, and
19944daa5054SStefano Garzarella      * .vhost_set_vring_enable callback will fail since
19954daa5054SStefano Garzarella      * VHOST_USER_SET_VRING_ENABLE is not supported.
19964daa5054SStefano Garzarella      */
19974daa5054SStefano Garzarella     if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER &&
19984daa5054SStefano Garzarella         !virtio_has_feature(hdev->backend_features,
19994daa5054SStefano Garzarella                             VHOST_USER_F_PROTOCOL_FEATURES)) {
20004daa5054SStefano Garzarella         return 0;
20014daa5054SStefano Garzarella     }
20024daa5054SStefano Garzarella 
20034daa5054SStefano Garzarella     return hdev->vhost_ops->vhost_set_vring_enable(hdev, enable);
20044daa5054SStefano Garzarella }
20054daa5054SStefano Garzarella 
20062c66de61SKevin Wolf /*
20072c66de61SKevin Wolf  * Host notifiers must be enabled at this point.
20082c66de61SKevin Wolf  *
20092c66de61SKevin Wolf  * If @vrings is true, this function will enable all vrings before starting the
20102c66de61SKevin Wolf  * device. If it is false, the vring initialization is left to be done by the
20112c66de61SKevin Wolf  * caller.
20122c66de61SKevin Wolf  */
vhost_dev_start(struct vhost_dev * hdev,VirtIODevice * vdev,bool vrings)20134daa5054SStefano Garzarella int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
20146e790746SPaolo Bonzini {
20156e790746SPaolo Bonzini     int i, r;
20166e790746SPaolo Bonzini 
20178695de0fSMarc-André Lureau     /* should only be called after backend is connected */
20188695de0fSMarc-André Lureau     assert(hdev->vhost_ops);
20198695de0fSMarc-André Lureau 
20204daa5054SStefano Garzarella     trace_vhost_dev_start(hdev, vdev->name, vrings);
2021a2761231SAlex Bennée 
2022c255488dSJonah Palmer     vdev->vhost_started = true;
20236e790746SPaolo Bonzini     hdev->started = true;
2024c471ad0eSJason Wang     hdev->vdev = vdev;
20256e790746SPaolo Bonzini 
20266e790746SPaolo Bonzini     r = vhost_dev_set_features(hdev, hdev->log_enabled);
20276e790746SPaolo Bonzini     if (r < 0) {
20286e790746SPaolo Bonzini         goto fail_features;
20296e790746SPaolo Bonzini     }
2030c471ad0eSJason Wang 
2031c471ad0eSJason Wang     if (vhost_dev_has_iommu(hdev)) {
2032375f74f4SJason Wang         memory_listener_register(&hdev->iommu_listener, vdev->dma_as);
2033c471ad0eSJason Wang     }
2034c471ad0eSJason Wang 
203521e70425SMarc-André Lureau     r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
20366e790746SPaolo Bonzini     if (r < 0) {
20375d33ae4bSRoman Kagan         VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
20386e790746SPaolo Bonzini         goto fail_mem;
20396e790746SPaolo Bonzini     }
20406e790746SPaolo Bonzini     for (i = 0; i < hdev->nvqs; ++i) {
20416e790746SPaolo Bonzini         r = vhost_virtqueue_start(hdev,
20426e790746SPaolo Bonzini                                   vdev,
20436e790746SPaolo Bonzini                                   hdev->vqs + i,
20446e790746SPaolo Bonzini                                   hdev->vq_index + i);
20456e790746SPaolo Bonzini         if (r < 0) {
20466e790746SPaolo Bonzini             goto fail_vq;
20476e790746SPaolo Bonzini         }
20486e790746SPaolo Bonzini     }
20496e790746SPaolo Bonzini 
2050f9a09ca3SCindy Lu     r = event_notifier_init(
2051f9a09ca3SCindy Lu         &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier, 0);
2052f9a09ca3SCindy Lu     if (r < 0) {
205377ece20bSPrasad Pandit         VHOST_OPS_DEBUG(r, "event_notifier_init failed");
205477ece20bSPrasad Pandit         goto fail_vq;
2055f9a09ca3SCindy Lu     }
2056f9a09ca3SCindy Lu     event_notifier_test_and_clear(
2057f9a09ca3SCindy Lu         &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
2058f9a09ca3SCindy Lu     if (!vdev->use_guest_notifier_mask) {
2059f9a09ca3SCindy Lu         vhost_config_mask(hdev, vdev, true);
2060f9a09ca3SCindy Lu     }
20616e790746SPaolo Bonzini     if (hdev->log_enabled) {
2062e05ca820SMichael S. Tsirkin         uint64_t log_base;
2063e05ca820SMichael S. Tsirkin 
20646e790746SPaolo Bonzini         hdev->log_size = vhost_get_log_size(hdev);
206551d59a64SSi-Wei Liu         hdev->log = vhost_log_get(hdev->vhost_ops->backend_type,
206651d59a64SSi-Wei Liu                                   hdev->log_size,
206715324404SMarc-André Lureau                                   vhost_dev_log_is_shared(hdev));
2068309750faSJason Wang         log_base = (uintptr_t)hdev->log->log;
2069c2bea314SMarc-André Lureau         r = hdev->vhost_ops->vhost_set_log_base(hdev,
20709a78a5ddSMarc-André Lureau                                                 hdev->log_size ? log_base : 0,
20719a78a5ddSMarc-André Lureau                                                 hdev->log);
20726e790746SPaolo Bonzini         if (r < 0) {
20735d33ae4bSRoman Kagan             VHOST_OPS_DEBUG(r, "vhost_set_log_base failed");
20746e790746SPaolo Bonzini             goto fail_log;
20756e790746SPaolo Bonzini         }
2076c5cd7e5fSSi-Wei Liu         vhost_dev_elect_mem_logger(hdev, true);
20776e790746SPaolo Bonzini     }
20784daa5054SStefano Garzarella     if (vrings) {
20794daa5054SStefano Garzarella         r = vhost_dev_set_vring_enable(hdev, true);
20804daa5054SStefano Garzarella         if (r) {
20814daa5054SStefano Garzarella             goto fail_log;
20824daa5054SStefano Garzarella         }
20834daa5054SStefano Garzarella     }
2084ca71db43SCindy Lu     if (hdev->vhost_ops->vhost_dev_start) {
2085ca71db43SCindy Lu         r = hdev->vhost_ops->vhost_dev_start(hdev, true);
2086ca71db43SCindy Lu         if (r) {
20874daa5054SStefano Garzarella             goto fail_start;
2088ca71db43SCindy Lu         }
2089ca71db43SCindy Lu     }
20903f63b4c6SJason Wang     if (vhost_dev_has_iommu(hdev) &&
20913f63b4c6SJason Wang         hdev->vhost_ops->vhost_set_iotlb_callback) {
2092c471ad0eSJason Wang             hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true);
2093c471ad0eSJason Wang 
2094c471ad0eSJason Wang         /* Update used ring information for IOTLB to work correctly,
2095c471ad0eSJason Wang          * vhost-kernel code requires for this.*/
2096c471ad0eSJason Wang         for (i = 0; i < hdev->nvqs; ++i) {
2097c471ad0eSJason Wang             struct vhost_virtqueue *vq = hdev->vqs + i;
2098*571bdc97SPrasad Pandit             r = vhost_device_iotlb_miss(hdev, vq->used_phys, true);
2099*571bdc97SPrasad Pandit             if (r) {
2100*571bdc97SPrasad Pandit                 goto fail_iotlb;
2101*571bdc97SPrasad Pandit             }
2102c471ad0eSJason Wang         }
2103c471ad0eSJason Wang     }
2104f9a09ca3SCindy Lu     vhost_start_config_intr(hdev);
21056e790746SPaolo Bonzini     return 0;
2106*571bdc97SPrasad Pandit fail_iotlb:
2107*571bdc97SPrasad Pandit     if (vhost_dev_has_iommu(hdev) &&
2108*571bdc97SPrasad Pandit         hdev->vhost_ops->vhost_set_iotlb_callback) {
2109*571bdc97SPrasad Pandit         hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
2110*571bdc97SPrasad Pandit     }
2111*571bdc97SPrasad Pandit     if (hdev->vhost_ops->vhost_dev_start) {
2112*571bdc97SPrasad Pandit         hdev->vhost_ops->vhost_dev_start(hdev, false);
2113*571bdc97SPrasad Pandit     }
21144daa5054SStefano Garzarella fail_start:
21154daa5054SStefano Garzarella     if (vrings) {
21164daa5054SStefano Garzarella         vhost_dev_set_vring_enable(hdev, false);
21174daa5054SStefano Garzarella     }
21186e790746SPaolo Bonzini fail_log:
2119309750faSJason Wang     vhost_log_put(hdev, false);
21206e790746SPaolo Bonzini fail_vq:
21216e790746SPaolo Bonzini     while (--i >= 0) {
21226e790746SPaolo Bonzini         vhost_virtqueue_stop(hdev,
21236e790746SPaolo Bonzini                              vdev,
21246e790746SPaolo Bonzini                              hdev->vqs + i,
21256e790746SPaolo Bonzini                              hdev->vq_index + i);
21266e790746SPaolo Bonzini     }
2127c471ad0eSJason Wang 
21286e790746SPaolo Bonzini fail_mem:
21291e3ffb34SPrasad Pandit     if (vhost_dev_has_iommu(hdev)) {
21301e3ffb34SPrasad Pandit         memory_listener_unregister(&hdev->iommu_listener);
21311e3ffb34SPrasad Pandit     }
21326e790746SPaolo Bonzini fail_features:
2133c255488dSJonah Palmer     vdev->vhost_started = false;
21346e790746SPaolo Bonzini     hdev->started = false;
21356e790746SPaolo Bonzini     return r;
21366e790746SPaolo Bonzini }
21376e790746SPaolo Bonzini 
21386e790746SPaolo Bonzini /* Host notifiers must be enabled at this point. */
vhost_dev_stop(struct vhost_dev * hdev,VirtIODevice * vdev,bool vrings)21394daa5054SStefano Garzarella void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
21406e790746SPaolo Bonzini {
21416e790746SPaolo Bonzini     int i;
21426e790746SPaolo Bonzini 
21438695de0fSMarc-André Lureau     /* should only be called after backend is connected */
21448695de0fSMarc-André Lureau     assert(hdev->vhost_ops);
2145f9a09ca3SCindy Lu     event_notifier_test_and_clear(
2146f9a09ca3SCindy Lu         &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
2147f9a09ca3SCindy Lu     event_notifier_test_and_clear(&vdev->config_notifier);
214818f2971cSLi Feng     event_notifier_cleanup(
214918f2971cSLi Feng         &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
21508695de0fSMarc-André Lureau 
21514daa5054SStefano Garzarella     trace_vhost_dev_stop(hdev, vdev->name, vrings);
2152a2761231SAlex Bennée 
2153ca71db43SCindy Lu     if (hdev->vhost_ops->vhost_dev_start) {
2154ca71db43SCindy Lu         hdev->vhost_ops->vhost_dev_start(hdev, false);
2155ca71db43SCindy Lu     }
21564daa5054SStefano Garzarella     if (vrings) {
21574daa5054SStefano Garzarella         vhost_dev_set_vring_enable(hdev, false);
21584daa5054SStefano Garzarella     }
21596e790746SPaolo Bonzini     for (i = 0; i < hdev->nvqs; ++i) {
21606e790746SPaolo Bonzini         vhost_virtqueue_stop(hdev,
21616e790746SPaolo Bonzini                              vdev,
21626e790746SPaolo Bonzini                              hdev->vqs + i,
21636e790746SPaolo Bonzini                              hdev->vq_index + i);
21646e790746SPaolo Bonzini     }
2165c3716f26SEugenio Pérez     if (hdev->vhost_ops->vhost_reset_status) {
2166c3716f26SEugenio Pérez         hdev->vhost_ops->vhost_reset_status(hdev);
2167c3716f26SEugenio Pérez     }
21686e790746SPaolo Bonzini 
2169c471ad0eSJason Wang     if (vhost_dev_has_iommu(hdev)) {
21703f63b4c6SJason Wang         if (hdev->vhost_ops->vhost_set_iotlb_callback) {
2171c471ad0eSJason Wang             hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
21723f63b4c6SJason Wang         }
2173375f74f4SJason Wang         memory_listener_unregister(&hdev->iommu_listener);
2174c471ad0eSJason Wang     }
2175f9a09ca3SCindy Lu     vhost_stop_config_intr(hdev);
2176309750faSJason Wang     vhost_log_put(hdev, true);
21776e790746SPaolo Bonzini     hdev->started = false;
2178c255488dSJonah Palmer     vdev->vhost_started = false;
2179c471ad0eSJason Wang     hdev->vdev = NULL;
21806e790746SPaolo Bonzini }
2181950d94baSMarc-André Lureau 
vhost_net_set_backend(struct vhost_dev * hdev,struct vhost_vring_file * file)2182950d94baSMarc-André Lureau int vhost_net_set_backend(struct vhost_dev *hdev,
2183950d94baSMarc-André Lureau                           struct vhost_vring_file *file)
2184950d94baSMarc-André Lureau {
2185950d94baSMarc-André Lureau     if (hdev->vhost_ops->vhost_net_set_backend) {
2186950d94baSMarc-André Lureau         return hdev->vhost_ops->vhost_net_set_backend(hdev, file);
2187950d94baSMarc-André Lureau     }
2188950d94baSMarc-André Lureau 
21895d33ae4bSRoman Kagan     return -ENOSYS;
2190950d94baSMarc-André Lureau }
2191c0c4f147SStefan Hajnoczi 
vhost_reset_device(struct vhost_dev * hdev)2192c0c4f147SStefan Hajnoczi int vhost_reset_device(struct vhost_dev *hdev)
2193c0c4f147SStefan Hajnoczi {
2194c0c4f147SStefan Hajnoczi     if (hdev->vhost_ops->vhost_reset_device) {
2195c0c4f147SStefan Hajnoczi         return hdev->vhost_ops->vhost_reset_device(hdev);
2196c0c4f147SStefan Hajnoczi     }
2197c0c4f147SStefan Hajnoczi 
2198c0c4f147SStefan Hajnoczi     return -ENOSYS;
2199c0c4f147SStefan Hajnoczi }
2200cda83adcSHanna Czenczek 
vhost_supports_device_state(struct vhost_dev * dev)2201cda83adcSHanna Czenczek bool vhost_supports_device_state(struct vhost_dev *dev)
2202cda83adcSHanna Czenczek {
2203cda83adcSHanna Czenczek     if (dev->vhost_ops->vhost_supports_device_state) {
2204cda83adcSHanna Czenczek         return dev->vhost_ops->vhost_supports_device_state(dev);
2205cda83adcSHanna Czenczek     }
2206cda83adcSHanna Czenczek 
2207cda83adcSHanna Czenczek     return false;
2208cda83adcSHanna Czenczek }
2209cda83adcSHanna Czenczek 
vhost_set_device_state_fd(struct vhost_dev * dev,VhostDeviceStateDirection direction,VhostDeviceStatePhase phase,int fd,int * reply_fd,Error ** errp)2210cda83adcSHanna Czenczek int vhost_set_device_state_fd(struct vhost_dev *dev,
2211cda83adcSHanna Czenczek                               VhostDeviceStateDirection direction,
2212cda83adcSHanna Czenczek                               VhostDeviceStatePhase phase,
2213cda83adcSHanna Czenczek                               int fd,
2214cda83adcSHanna Czenczek                               int *reply_fd,
2215cda83adcSHanna Czenczek                               Error **errp)
2216cda83adcSHanna Czenczek {
2217cda83adcSHanna Czenczek     if (dev->vhost_ops->vhost_set_device_state_fd) {
2218cda83adcSHanna Czenczek         return dev->vhost_ops->vhost_set_device_state_fd(dev, direction, phase,
2219cda83adcSHanna Czenczek                                                          fd, reply_fd, errp);
2220cda83adcSHanna Czenczek     }
2221cda83adcSHanna Czenczek 
2222cda83adcSHanna Czenczek     error_setg(errp,
2223cda83adcSHanna Czenczek                "vhost transport does not support migration state transfer");
2224cda83adcSHanna Czenczek     return -ENOSYS;
2225cda83adcSHanna Czenczek }
2226cda83adcSHanna Czenczek 
vhost_check_device_state(struct vhost_dev * dev,Error ** errp)2227cda83adcSHanna Czenczek int vhost_check_device_state(struct vhost_dev *dev, Error **errp)
2228cda83adcSHanna Czenczek {
2229cda83adcSHanna Czenczek     if (dev->vhost_ops->vhost_check_device_state) {
2230cda83adcSHanna Czenczek         return dev->vhost_ops->vhost_check_device_state(dev, errp);
2231cda83adcSHanna Czenczek     }
2232cda83adcSHanna Czenczek 
2233cda83adcSHanna Czenczek     error_setg(errp,
2234cda83adcSHanna Czenczek                "vhost transport does not support migration state transfer");
2235cda83adcSHanna Czenczek     return -ENOSYS;
2236cda83adcSHanna Czenczek }
22374a00d5d7SHanna Czenczek 
vhost_save_backend_state(struct vhost_dev * dev,QEMUFile * f,Error ** errp)22384a00d5d7SHanna Czenczek int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp)
22394a00d5d7SHanna Czenczek {
2240ff88dbecSZhao Liu     ERRP_GUARD();
22414a00d5d7SHanna Czenczek     /* Maximum chunk size in which to transfer the state */
22424a00d5d7SHanna Czenczek     const size_t chunk_size = 1 * 1024 * 1024;
22434a00d5d7SHanna Czenczek     g_autofree void *transfer_buf = NULL;
22444a00d5d7SHanna Czenczek     g_autoptr(GError) g_err = NULL;
22454a00d5d7SHanna Czenczek     int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1;
22464a00d5d7SHanna Czenczek     int ret;
22474a00d5d7SHanna Czenczek 
22484a00d5d7SHanna Czenczek     /* [0] for reading (our end), [1] for writing (back-end's end) */
22494a00d5d7SHanna Czenczek     if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) {
22504a00d5d7SHanna Czenczek         error_setg(errp, "Failed to set up state transfer pipe: %s",
22514a00d5d7SHanna Czenczek                    g_err->message);
22524a00d5d7SHanna Czenczek         ret = -EINVAL;
22534a00d5d7SHanna Czenczek         goto fail;
22544a00d5d7SHanna Czenczek     }
22554a00d5d7SHanna Czenczek 
22564a00d5d7SHanna Czenczek     read_fd = pipe_fds[0];
22574a00d5d7SHanna Czenczek     write_fd = pipe_fds[1];
22584a00d5d7SHanna Czenczek 
22594a00d5d7SHanna Czenczek     /*
22604a00d5d7SHanna Czenczek      * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped.
22614a00d5d7SHanna Czenczek      * Ideally, it is suspended, but SUSPEND/RESUME currently do not exist for
22624a00d5d7SHanna Czenczek      * vhost-user, so just check that it is stopped at all.
22634a00d5d7SHanna Czenczek      */
22644a00d5d7SHanna Czenczek     assert(!dev->started);
22654a00d5d7SHanna Czenczek 
22664a00d5d7SHanna Czenczek     /* Transfer ownership of write_fd to the back-end */
22674a00d5d7SHanna Czenczek     ret = vhost_set_device_state_fd(dev,
22684a00d5d7SHanna Czenczek                                     VHOST_TRANSFER_STATE_DIRECTION_SAVE,
22694a00d5d7SHanna Czenczek                                     VHOST_TRANSFER_STATE_PHASE_STOPPED,
22704a00d5d7SHanna Czenczek                                     write_fd,
22714a00d5d7SHanna Czenczek                                     &reply_fd,
22724a00d5d7SHanna Czenczek                                     errp);
22734a00d5d7SHanna Czenczek     if (ret < 0) {
22744a00d5d7SHanna Czenczek         error_prepend(errp, "Failed to initiate state transfer: ");
22754a00d5d7SHanna Czenczek         goto fail;
22764a00d5d7SHanna Czenczek     }
22774a00d5d7SHanna Czenczek 
22784a00d5d7SHanna Czenczek     /* If the back-end wishes to use a different pipe, switch over */
22794a00d5d7SHanna Czenczek     if (reply_fd >= 0) {
22804a00d5d7SHanna Czenczek         close(read_fd);
22814a00d5d7SHanna Czenczek         read_fd = reply_fd;
22824a00d5d7SHanna Czenczek     }
22834a00d5d7SHanna Czenczek 
22844a00d5d7SHanna Czenczek     transfer_buf = g_malloc(chunk_size);
22854a00d5d7SHanna Czenczek 
22864a00d5d7SHanna Czenczek     while (true) {
22874a00d5d7SHanna Czenczek         ssize_t read_ret;
22884a00d5d7SHanna Czenczek 
22894a00d5d7SHanna Czenczek         read_ret = RETRY_ON_EINTR(read(read_fd, transfer_buf, chunk_size));
22904a00d5d7SHanna Czenczek         if (read_ret < 0) {
22914a00d5d7SHanna Czenczek             ret = -errno;
22924a00d5d7SHanna Czenczek             error_setg_errno(errp, -ret, "Failed to receive state");
22934a00d5d7SHanna Czenczek             goto fail;
22944a00d5d7SHanna Czenczek         }
22954a00d5d7SHanna Czenczek 
22964a00d5d7SHanna Czenczek         assert(read_ret <= chunk_size);
22974a00d5d7SHanna Czenczek         qemu_put_be32(f, read_ret);
22984a00d5d7SHanna Czenczek 
22994a00d5d7SHanna Czenczek         if (read_ret == 0) {
23004a00d5d7SHanna Czenczek             /* EOF */
23014a00d5d7SHanna Czenczek             break;
23024a00d5d7SHanna Czenczek         }
23034a00d5d7SHanna Czenczek 
23044a00d5d7SHanna Czenczek         qemu_put_buffer(f, transfer_buf, read_ret);
23054a00d5d7SHanna Czenczek     }
23064a00d5d7SHanna Czenczek 
23074a00d5d7SHanna Czenczek     /*
23084a00d5d7SHanna Czenczek      * Back-end will not really care, but be clean and close our end of the pipe
23094a00d5d7SHanna Czenczek      * before inquiring the back-end about whether transfer was successful
23104a00d5d7SHanna Czenczek      */
23114a00d5d7SHanna Czenczek     close(read_fd);
23124a00d5d7SHanna Czenczek     read_fd = -1;
23134a00d5d7SHanna Czenczek 
23144a00d5d7SHanna Czenczek     /* Also, verify that the device is still stopped */
23154a00d5d7SHanna Czenczek     assert(!dev->started);
23164a00d5d7SHanna Czenczek 
23174a00d5d7SHanna Czenczek     ret = vhost_check_device_state(dev, errp);
23184a00d5d7SHanna Czenczek     if (ret < 0) {
23194a00d5d7SHanna Czenczek         goto fail;
23204a00d5d7SHanna Czenczek     }
23214a00d5d7SHanna Czenczek 
23224a00d5d7SHanna Czenczek     ret = 0;
23234a00d5d7SHanna Czenczek fail:
23244a00d5d7SHanna Czenczek     if (read_fd >= 0) {
23254a00d5d7SHanna Czenczek         close(read_fd);
23264a00d5d7SHanna Czenczek     }
23274a00d5d7SHanna Czenczek 
23284a00d5d7SHanna Czenczek     return ret;
23294a00d5d7SHanna Czenczek }
23304a00d5d7SHanna Czenczek 
vhost_load_backend_state(struct vhost_dev * dev,QEMUFile * f,Error ** errp)23314a00d5d7SHanna Czenczek int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp)
23324a00d5d7SHanna Czenczek {
2333ff88dbecSZhao Liu     ERRP_GUARD();
23344a00d5d7SHanna Czenczek     size_t transfer_buf_size = 0;
23354a00d5d7SHanna Czenczek     g_autofree void *transfer_buf = NULL;
23364a00d5d7SHanna Czenczek     g_autoptr(GError) g_err = NULL;
23374a00d5d7SHanna Czenczek     int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1;
23384a00d5d7SHanna Czenczek     int ret;
23394a00d5d7SHanna Czenczek 
23404a00d5d7SHanna Czenczek     /* [0] for reading (back-end's end), [1] for writing (our end) */
23414a00d5d7SHanna Czenczek     if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) {
23424a00d5d7SHanna Czenczek         error_setg(errp, "Failed to set up state transfer pipe: %s",
23434a00d5d7SHanna Czenczek                    g_err->message);
23444a00d5d7SHanna Czenczek         ret = -EINVAL;
23454a00d5d7SHanna Czenczek         goto fail;
23464a00d5d7SHanna Czenczek     }
23474a00d5d7SHanna Czenczek 
23484a00d5d7SHanna Czenczek     read_fd = pipe_fds[0];
23494a00d5d7SHanna Czenczek     write_fd = pipe_fds[1];
23504a00d5d7SHanna Czenczek 
23514a00d5d7SHanna Czenczek     /*
23524a00d5d7SHanna Czenczek      * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped.
23534a00d5d7SHanna Czenczek      * Ideally, it is suspended, but SUSPEND/RESUME currently do not exist for
23544a00d5d7SHanna Czenczek      * vhost-user, so just check that it is stopped at all.
23554a00d5d7SHanna Czenczek      */
23564a00d5d7SHanna Czenczek     assert(!dev->started);
23574a00d5d7SHanna Czenczek 
23584a00d5d7SHanna Czenczek     /* Transfer ownership of read_fd to the back-end */
23594a00d5d7SHanna Czenczek     ret = vhost_set_device_state_fd(dev,
23604a00d5d7SHanna Czenczek                                     VHOST_TRANSFER_STATE_DIRECTION_LOAD,
23614a00d5d7SHanna Czenczek                                     VHOST_TRANSFER_STATE_PHASE_STOPPED,
23624a00d5d7SHanna Czenczek                                     read_fd,
23634a00d5d7SHanna Czenczek                                     &reply_fd,
23644a00d5d7SHanna Czenczek                                     errp);
23654a00d5d7SHanna Czenczek     if (ret < 0) {
23664a00d5d7SHanna Czenczek         error_prepend(errp, "Failed to initiate state transfer: ");
23674a00d5d7SHanna Czenczek         goto fail;
23684a00d5d7SHanna Czenczek     }
23694a00d5d7SHanna Czenczek 
23704a00d5d7SHanna Czenczek     /* If the back-end wishes to use a different pipe, switch over */
23714a00d5d7SHanna Czenczek     if (reply_fd >= 0) {
23724a00d5d7SHanna Czenczek         close(write_fd);
23734a00d5d7SHanna Czenczek         write_fd = reply_fd;
23744a00d5d7SHanna Czenczek     }
23754a00d5d7SHanna Czenczek 
23764a00d5d7SHanna Czenczek     while (true) {
23774a00d5d7SHanna Czenczek         size_t this_chunk_size = qemu_get_be32(f);
23784a00d5d7SHanna Czenczek         ssize_t write_ret;
23794a00d5d7SHanna Czenczek         const uint8_t *transfer_pointer;
23804a00d5d7SHanna Czenczek 
23814a00d5d7SHanna Czenczek         if (this_chunk_size == 0) {
23824a00d5d7SHanna Czenczek             /* End of state */
23834a00d5d7SHanna Czenczek             break;
23844a00d5d7SHanna Czenczek         }
23854a00d5d7SHanna Czenczek 
23864a00d5d7SHanna Czenczek         if (transfer_buf_size < this_chunk_size) {
23874a00d5d7SHanna Czenczek             transfer_buf = g_realloc(transfer_buf, this_chunk_size);
23884a00d5d7SHanna Czenczek             transfer_buf_size = this_chunk_size;
23894a00d5d7SHanna Czenczek         }
23904a00d5d7SHanna Czenczek 
23914a00d5d7SHanna Czenczek         if (qemu_get_buffer(f, transfer_buf, this_chunk_size) <
23924a00d5d7SHanna Czenczek                 this_chunk_size)
23934a00d5d7SHanna Czenczek         {
23944a00d5d7SHanna Czenczek             error_setg(errp, "Failed to read state");
23954a00d5d7SHanna Czenczek             ret = -EINVAL;
23964a00d5d7SHanna Czenczek             goto fail;
23974a00d5d7SHanna Czenczek         }
23984a00d5d7SHanna Czenczek 
23994a00d5d7SHanna Czenczek         transfer_pointer = transfer_buf;
24004a00d5d7SHanna Czenczek         while (this_chunk_size > 0) {
24014a00d5d7SHanna Czenczek             write_ret = RETRY_ON_EINTR(
24024a00d5d7SHanna Czenczek                 write(write_fd, transfer_pointer, this_chunk_size)
24034a00d5d7SHanna Czenczek             );
24044a00d5d7SHanna Czenczek             if (write_ret < 0) {
24054a00d5d7SHanna Czenczek                 ret = -errno;
24064a00d5d7SHanna Czenczek                 error_setg_errno(errp, -ret, "Failed to send state");
24074a00d5d7SHanna Czenczek                 goto fail;
24084a00d5d7SHanna Czenczek             } else if (write_ret == 0) {
24094a00d5d7SHanna Czenczek                 error_setg(errp, "Failed to send state: Connection is closed");
24104a00d5d7SHanna Czenczek                 ret = -ECONNRESET;
24114a00d5d7SHanna Czenczek                 goto fail;
24124a00d5d7SHanna Czenczek             }
24134a00d5d7SHanna Czenczek 
24144a00d5d7SHanna Czenczek             assert(write_ret <= this_chunk_size);
24154a00d5d7SHanna Czenczek             this_chunk_size -= write_ret;
24164a00d5d7SHanna Czenczek             transfer_pointer += write_ret;
24174a00d5d7SHanna Czenczek         }
24184a00d5d7SHanna Czenczek     }
24194a00d5d7SHanna Czenczek 
24204a00d5d7SHanna Czenczek     /*
24214a00d5d7SHanna Czenczek      * Close our end, thus ending transfer, before inquiring the back-end about
24224a00d5d7SHanna Czenczek      * whether transfer was successful
24234a00d5d7SHanna Czenczek      */
24244a00d5d7SHanna Czenczek     close(write_fd);
24254a00d5d7SHanna Czenczek     write_fd = -1;
24264a00d5d7SHanna Czenczek 
24274a00d5d7SHanna Czenczek     /* Also, verify that the device is still stopped */
24284a00d5d7SHanna Czenczek     assert(!dev->started);
24294a00d5d7SHanna Czenczek 
24304a00d5d7SHanna Czenczek     ret = vhost_check_device_state(dev, errp);
24314a00d5d7SHanna Czenczek     if (ret < 0) {
24324a00d5d7SHanna Czenczek         goto fail;
24334a00d5d7SHanna Czenczek     }
24344a00d5d7SHanna Czenczek 
24354a00d5d7SHanna Czenczek     ret = 0;
24364a00d5d7SHanna Czenczek fail:
24374a00d5d7SHanna Czenczek     if (write_fd >= 0) {
24384a00d5d7SHanna Czenczek         close(write_fd);
24394a00d5d7SHanna Czenczek     }
24404a00d5d7SHanna Czenczek 
24414a00d5d7SHanna Czenczek     return ret;
24424a00d5d7SHanna Czenczek }
2443