16e790746SPaolo Bonzini /*
26e790746SPaolo Bonzini * vhost support
36e790746SPaolo Bonzini *
46e790746SPaolo Bonzini * Copyright Red Hat, Inc. 2010
56e790746SPaolo Bonzini *
66e790746SPaolo Bonzini * Authors:
76e790746SPaolo Bonzini * Michael S. Tsirkin <mst@redhat.com>
86e790746SPaolo Bonzini *
96e790746SPaolo Bonzini * This work is licensed under the terms of the GNU GPL, version 2. See
106e790746SPaolo Bonzini * the COPYING file in the top-level directory.
116e790746SPaolo Bonzini *
126e790746SPaolo Bonzini * Contributions after 2012-01-13 are licensed under the terms of the
136e790746SPaolo Bonzini * GNU GPL, version 2 or (at your option) any later version.
146e790746SPaolo Bonzini */
156e790746SPaolo Bonzini
169b8bfe21SPeter Maydell #include "qemu/osdep.h"
17da34e65cSMarkus Armbruster #include "qapi/error.h"
186e790746SPaolo Bonzini #include "hw/virtio/vhost.h"
195444e768SPaolo Bonzini #include "qemu/atomic.h"
206e790746SPaolo Bonzini #include "qemu/range.h"
2104b7a152SGreg Kurz #include "qemu/error-report.h"
2215324404SMarc-André Lureau #include "qemu/memfd.h"
23345cc1cbSJason Wang #include "qemu/log.h"
2418658a3cSPaolo Bonzini #include "standard-headers/linux/vhost_types.h"
251c819449SKONRAD Frederic #include "hw/virtio/virtio-bus.h"
26766aa0a6SDavid Hildenbrand #include "hw/mem/memory-device.h"
27795c40b8SJuan Quintela #include "migration/blocker.h"
28ca77ee28SMarkus Armbruster #include "migration/qemu-file-types.h"
29c471ad0eSJason Wang #include "sysemu/dma.h"
30aa3c40f6SDr. David Alan Gilbert #include "trace.h"
316e790746SPaolo Bonzini
32162bba7fSMarc-André Lureau /* enabled until disconnected backend stabilizes */
33162bba7fSMarc-André Lureau #define _VHOST_DEBUG 1
34162bba7fSMarc-André Lureau
35162bba7fSMarc-André Lureau #ifdef _VHOST_DEBUG
365d33ae4bSRoman Kagan #define VHOST_OPS_DEBUG(retval, fmt, ...) \
375d33ae4bSRoman Kagan do { \
385d33ae4bSRoman Kagan error_report(fmt ": %s (%d)", ## __VA_ARGS__, \
395d33ae4bSRoman Kagan strerror(-retval), -retval); \
405d33ae4bSRoman Kagan } while (0)
41162bba7fSMarc-André Lureau #else
425d33ae4bSRoman Kagan #define VHOST_OPS_DEBUG(retval, fmt, ...) \
43162bba7fSMarc-André Lureau do { } while (0)
44162bba7fSMarc-André Lureau #endif
45162bba7fSMarc-André Lureau
4651d59a64SSi-Wei Liu static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX];
4751d59a64SSi-Wei Liu static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX];
48c5cd7e5fSSi-Wei Liu static QLIST_HEAD(, vhost_dev) vhost_log_devs[VHOST_BACKEND_TYPE_MAX];
49309750faSJason Wang
50552b2522SDavid Hildenbrand /* Memslots used by backends that support private memslots (without an fd). */
512ce68e4cSIgor Mammedov static unsigned int used_memslots;
52552b2522SDavid Hildenbrand
53552b2522SDavid Hildenbrand /* Memslots used by backends that only support shared memslots (with an fd). */
54552b2522SDavid Hildenbrand static unsigned int used_shared_memslots;
55552b2522SDavid Hildenbrand
562ce68e4cSIgor Mammedov static QLIST_HEAD(, vhost_dev) vhost_devices =
572ce68e4cSIgor Mammedov QLIST_HEAD_INITIALIZER(vhost_devices);
582ce68e4cSIgor Mammedov
vhost_get_max_memslots(void)59cd89c065SDavid Hildenbrand unsigned int vhost_get_max_memslots(void)
60cd89c065SDavid Hildenbrand {
61cd89c065SDavid Hildenbrand unsigned int max = UINT_MAX;
62cd89c065SDavid Hildenbrand struct vhost_dev *hdev;
63cd89c065SDavid Hildenbrand
64cd89c065SDavid Hildenbrand QLIST_FOREACH(hdev, &vhost_devices, entry) {
65cd89c065SDavid Hildenbrand max = MIN(max, hdev->vhost_ops->vhost_backend_memslots_limit(hdev));
66cd89c065SDavid Hildenbrand }
67cd89c065SDavid Hildenbrand return max;
68cd89c065SDavid Hildenbrand }
69cd89c065SDavid Hildenbrand
vhost_get_free_memslots(void)708c49951cSDavid Hildenbrand unsigned int vhost_get_free_memslots(void)
712ce68e4cSIgor Mammedov {
72552b2522SDavid Hildenbrand unsigned int free = UINT_MAX;
732ce68e4cSIgor Mammedov struct vhost_dev *hdev;
742ce68e4cSIgor Mammedov
752ce68e4cSIgor Mammedov QLIST_FOREACH(hdev, &vhost_devices, entry) {
762ce68e4cSIgor Mammedov unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
77552b2522SDavid Hildenbrand unsigned int cur_free;
78552b2522SDavid Hildenbrand
79552b2522SDavid Hildenbrand if (hdev->vhost_ops->vhost_backend_no_private_memslots &&
80552b2522SDavid Hildenbrand hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {
81552b2522SDavid Hildenbrand cur_free = r - used_shared_memslots;
82552b2522SDavid Hildenbrand } else {
83552b2522SDavid Hildenbrand cur_free = r - used_memslots;
842ce68e4cSIgor Mammedov }
85552b2522SDavid Hildenbrand free = MIN(free, cur_free);
86552b2522SDavid Hildenbrand }
878c49951cSDavid Hildenbrand return free;
882ce68e4cSIgor Mammedov }
892ce68e4cSIgor Mammedov
vhost_dev_sync_region(struct vhost_dev * dev,MemoryRegionSection * section,uint64_t mfirst,uint64_t mlast,uint64_t rfirst,uint64_t rlast)906e790746SPaolo Bonzini static void vhost_dev_sync_region(struct vhost_dev *dev,
916e790746SPaolo Bonzini MemoryRegionSection *section,
926e790746SPaolo Bonzini uint64_t mfirst, uint64_t mlast,
936e790746SPaolo Bonzini uint64_t rfirst, uint64_t rlast)
946e790746SPaolo Bonzini {
95da318288SThomas Huth vhost_log_chunk_t *dev_log = dev->log->log;
96309750faSJason Wang
976e790746SPaolo Bonzini uint64_t start = MAX(mfirst, rfirst);
986e790746SPaolo Bonzini uint64_t end = MIN(mlast, rlast);
99da318288SThomas Huth vhost_log_chunk_t *from = dev_log + start / VHOST_LOG_CHUNK;
100da318288SThomas Huth vhost_log_chunk_t *to = dev_log + end / VHOST_LOG_CHUNK + 1;
10133c5793bSMarc-André Lureau uint64_t addr = QEMU_ALIGN_DOWN(start, VHOST_LOG_CHUNK);
1026e790746SPaolo Bonzini
1036e790746SPaolo Bonzini if (end < start) {
1046e790746SPaolo Bonzini return;
1056e790746SPaolo Bonzini }
1066e790746SPaolo Bonzini assert(end / VHOST_LOG_CHUNK < dev->log_size);
1076e790746SPaolo Bonzini assert(start / VHOST_LOG_CHUNK < dev->log_size);
1086e790746SPaolo Bonzini
1096e790746SPaolo Bonzini for (;from < to; ++from) {
1106e790746SPaolo Bonzini vhost_log_chunk_t log;
1116e790746SPaolo Bonzini /* We first check with non-atomic: much cheaper,
1126e790746SPaolo Bonzini * and we expect non-dirty to be the common case. */
1136e790746SPaolo Bonzini if (!*from) {
1146e790746SPaolo Bonzini addr += VHOST_LOG_CHUNK;
1156e790746SPaolo Bonzini continue;
1166e790746SPaolo Bonzini }
1175444e768SPaolo Bonzini /* Data must be read atomically. We don't really need barrier semantics
1185444e768SPaolo Bonzini * but it's easier to use atomic_* than roll our own. */
119d73415a3SStefan Hajnoczi log = qatomic_xchg(from, 0);
120747eb78bSNatanael Copa while (log) {
121747eb78bSNatanael Copa int bit = ctzl(log);
1226e790746SPaolo Bonzini hwaddr page_addr;
1236e790746SPaolo Bonzini hwaddr section_offset;
1246e790746SPaolo Bonzini hwaddr mr_offset;
1256e790746SPaolo Bonzini page_addr = addr + bit * VHOST_LOG_PAGE;
1266e790746SPaolo Bonzini section_offset = page_addr - section->offset_within_address_space;
1276e790746SPaolo Bonzini mr_offset = section_offset + section->offset_within_region;
1286e790746SPaolo Bonzini memory_region_set_dirty(section->mr, mr_offset, VHOST_LOG_PAGE);
1296e790746SPaolo Bonzini log &= ~(0x1ull << bit);
1306e790746SPaolo Bonzini }
1316e790746SPaolo Bonzini addr += VHOST_LOG_CHUNK;
1326e790746SPaolo Bonzini }
1336e790746SPaolo Bonzini }
1346e790746SPaolo Bonzini
vhost_dev_has_iommu(struct vhost_dev * dev)13574b5d2b5SCindy Lu bool vhost_dev_has_iommu(struct vhost_dev *dev)
136345cc1cbSJason Wang {
137345cc1cbSJason Wang VirtIODevice *vdev = dev->vdev;
138345cc1cbSJason Wang
139345cc1cbSJason Wang /*
140345cc1cbSJason Wang * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
141345cc1cbSJason Wang * incremental memory mapping API via IOTLB API. For platform that
142345cc1cbSJason Wang * does not have IOMMU, there's no need to enable this feature
143345cc1cbSJason Wang * which may cause unnecessary IOTLB miss/update transactions.
144345cc1cbSJason Wang */
145345cc1cbSJason Wang if (vdev) {
146345cc1cbSJason Wang return virtio_bus_device_iommu_enabled(vdev) &&
147345cc1cbSJason Wang virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
148345cc1cbSJason Wang } else {
149345cc1cbSJason Wang return false;
150345cc1cbSJason Wang }
151345cc1cbSJason Wang }
152345cc1cbSJason Wang
vhost_dev_should_log(struct vhost_dev * dev)153c5cd7e5fSSi-Wei Liu static inline bool vhost_dev_should_log(struct vhost_dev *dev)
154c5cd7e5fSSi-Wei Liu {
155c5cd7e5fSSi-Wei Liu assert(dev->vhost_ops);
156c5cd7e5fSSi-Wei Liu assert(dev->vhost_ops->backend_type > VHOST_BACKEND_TYPE_NONE);
157c5cd7e5fSSi-Wei Liu assert(dev->vhost_ops->backend_type < VHOST_BACKEND_TYPE_MAX);
158c5cd7e5fSSi-Wei Liu
159c5cd7e5fSSi-Wei Liu return dev == QLIST_FIRST(&vhost_log_devs[dev->vhost_ops->backend_type]);
160c5cd7e5fSSi-Wei Liu }
161c5cd7e5fSSi-Wei Liu
vhost_dev_elect_mem_logger(struct vhost_dev * hdev,bool add)162c5cd7e5fSSi-Wei Liu static inline void vhost_dev_elect_mem_logger(struct vhost_dev *hdev, bool add)
163c5cd7e5fSSi-Wei Liu {
164c5cd7e5fSSi-Wei Liu VhostBackendType backend_type;
165c5cd7e5fSSi-Wei Liu
166c5cd7e5fSSi-Wei Liu assert(hdev->vhost_ops);
167c5cd7e5fSSi-Wei Liu
168c5cd7e5fSSi-Wei Liu backend_type = hdev->vhost_ops->backend_type;
169c5cd7e5fSSi-Wei Liu assert(backend_type > VHOST_BACKEND_TYPE_NONE);
170c5cd7e5fSSi-Wei Liu assert(backend_type < VHOST_BACKEND_TYPE_MAX);
171c5cd7e5fSSi-Wei Liu
172c5cd7e5fSSi-Wei Liu if (add && !QLIST_IS_INSERTED(hdev, logdev_entry)) {
173c5cd7e5fSSi-Wei Liu if (QLIST_EMPTY(&vhost_log_devs[backend_type])) {
174c5cd7e5fSSi-Wei Liu QLIST_INSERT_HEAD(&vhost_log_devs[backend_type],
175c5cd7e5fSSi-Wei Liu hdev, logdev_entry);
176c5cd7e5fSSi-Wei Liu } else {
177c5cd7e5fSSi-Wei Liu /*
178c5cd7e5fSSi-Wei Liu * The first vhost_device in the list is selected as the shared
179c5cd7e5fSSi-Wei Liu * logger to scan memory sections. Put new entry next to the head
180c5cd7e5fSSi-Wei Liu * to avoid inadvertent change to the underlying logger device.
181c5cd7e5fSSi-Wei Liu * This is done in order to get better cache locality and to avoid
182c5cd7e5fSSi-Wei Liu * performance churn on the hot path for log scanning. Even when
183c5cd7e5fSSi-Wei Liu * new devices come and go quickly, it wouldn't end up changing
184c5cd7e5fSSi-Wei Liu * the active leading logger device at all.
185c5cd7e5fSSi-Wei Liu */
186c5cd7e5fSSi-Wei Liu QLIST_INSERT_AFTER(QLIST_FIRST(&vhost_log_devs[backend_type]),
187c5cd7e5fSSi-Wei Liu hdev, logdev_entry);
188c5cd7e5fSSi-Wei Liu }
189c5cd7e5fSSi-Wei Liu } else if (!add && QLIST_IS_INSERTED(hdev, logdev_entry)) {
190c5cd7e5fSSi-Wei Liu QLIST_REMOVE(hdev, logdev_entry);
191c5cd7e5fSSi-Wei Liu }
192c5cd7e5fSSi-Wei Liu }
193c5cd7e5fSSi-Wei Liu
vhost_sync_dirty_bitmap(struct vhost_dev * dev,MemoryRegionSection * section,hwaddr first,hwaddr last)1946e790746SPaolo Bonzini static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
1956e790746SPaolo Bonzini MemoryRegionSection *section,
1966e790746SPaolo Bonzini hwaddr first,
1976e790746SPaolo Bonzini hwaddr last)
1986e790746SPaolo Bonzini {
1996e790746SPaolo Bonzini int i;
2006e790746SPaolo Bonzini hwaddr start_addr;
2016e790746SPaolo Bonzini hwaddr end_addr;
2026e790746SPaolo Bonzini
2036e790746SPaolo Bonzini if (!dev->log_enabled || !dev->started) {
2046e790746SPaolo Bonzini return 0;
2056e790746SPaolo Bonzini }
2066e790746SPaolo Bonzini start_addr = section->offset_within_address_space;
207052e87b0SPaolo Bonzini end_addr = range_get_last(start_addr, int128_get64(section->size));
2086e790746SPaolo Bonzini start_addr = MAX(first, start_addr);
2096e790746SPaolo Bonzini end_addr = MIN(last, end_addr);
2106e790746SPaolo Bonzini
211c5cd7e5fSSi-Wei Liu if (vhost_dev_should_log(dev)) {
2126e790746SPaolo Bonzini for (i = 0; i < dev->mem->nregions; ++i) {
2136e790746SPaolo Bonzini struct vhost_memory_region *reg = dev->mem->regions + i;
2146e790746SPaolo Bonzini vhost_dev_sync_region(dev, section, start_addr, end_addr,
2156e790746SPaolo Bonzini reg->guest_phys_addr,
2166e790746SPaolo Bonzini range_get_last(reg->guest_phys_addr,
2176e790746SPaolo Bonzini reg->memory_size));
2186e790746SPaolo Bonzini }
219c5cd7e5fSSi-Wei Liu }
2206e790746SPaolo Bonzini for (i = 0; i < dev->nvqs; ++i) {
2216e790746SPaolo Bonzini struct vhost_virtqueue *vq = dev->vqs + i;
222240e647aSLi Hangjing
223240e647aSLi Hangjing if (!vq->used_phys && !vq->used_size) {
224240e647aSLi Hangjing continue;
225240e647aSLi Hangjing }
226240e647aSLi Hangjing
227345cc1cbSJason Wang if (vhost_dev_has_iommu(dev)) {
228345cc1cbSJason Wang IOMMUTLBEntry iotlb;
229345cc1cbSJason Wang hwaddr used_phys = vq->used_phys, used_size = vq->used_size;
230345cc1cbSJason Wang hwaddr phys, s, offset;
231345cc1cbSJason Wang
232345cc1cbSJason Wang while (used_size) {
233345cc1cbSJason Wang rcu_read_lock();
234345cc1cbSJason Wang iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
235345cc1cbSJason Wang used_phys,
236345cc1cbSJason Wang true,
237345cc1cbSJason Wang MEMTXATTRS_UNSPECIFIED);
238345cc1cbSJason Wang rcu_read_unlock();
239345cc1cbSJason Wang
240345cc1cbSJason Wang if (!iotlb.target_as) {
241345cc1cbSJason Wang qemu_log_mask(LOG_GUEST_ERROR, "translation "
242345cc1cbSJason Wang "failure for used_iova %"PRIx64"\n",
243345cc1cbSJason Wang used_phys);
244345cc1cbSJason Wang return -EINVAL;
245345cc1cbSJason Wang }
246345cc1cbSJason Wang
247345cc1cbSJason Wang offset = used_phys & iotlb.addr_mask;
248345cc1cbSJason Wang phys = iotlb.translated_addr + offset;
249345cc1cbSJason Wang
250345cc1cbSJason Wang /*
251345cc1cbSJason Wang * Distance from start of used ring until last byte of
252345cc1cbSJason Wang * IOMMU page.
253345cc1cbSJason Wang */
254345cc1cbSJason Wang s = iotlb.addr_mask - offset;
255345cc1cbSJason Wang /*
256345cc1cbSJason Wang * Size of used ring, or of the part of it until end
257345cc1cbSJason Wang * of IOMMU page. To avoid zero result, do the adding
258345cc1cbSJason Wang * outside of MIN().
259345cc1cbSJason Wang */
260345cc1cbSJason Wang s = MIN(s, used_size - 1) + 1;
261345cc1cbSJason Wang
262345cc1cbSJason Wang vhost_dev_sync_region(dev, section, start_addr, end_addr, phys,
263345cc1cbSJason Wang range_get_last(phys, s));
264345cc1cbSJason Wang used_size -= s;
265345cc1cbSJason Wang used_phys += s;
266345cc1cbSJason Wang }
267345cc1cbSJason Wang } else {
268345cc1cbSJason Wang vhost_dev_sync_region(dev, section, start_addr,
269345cc1cbSJason Wang end_addr, vq->used_phys,
2706e790746SPaolo Bonzini range_get_last(vq->used_phys, vq->used_size));
2716e790746SPaolo Bonzini }
272345cc1cbSJason Wang }
2736e790746SPaolo Bonzini return 0;
2746e790746SPaolo Bonzini }
2756e790746SPaolo Bonzini
vhost_log_sync(MemoryListener * listener,MemoryRegionSection * section)2766e790746SPaolo Bonzini static void vhost_log_sync(MemoryListener *listener,
2776e790746SPaolo Bonzini MemoryRegionSection *section)
2786e790746SPaolo Bonzini {
2796e790746SPaolo Bonzini struct vhost_dev *dev = container_of(listener, struct vhost_dev,
2806e790746SPaolo Bonzini memory_listener);
2816e790746SPaolo Bonzini vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL);
2826e790746SPaolo Bonzini }
2836e790746SPaolo Bonzini
vhost_log_sync_range(struct vhost_dev * dev,hwaddr first,hwaddr last)2846e790746SPaolo Bonzini static void vhost_log_sync_range(struct vhost_dev *dev,
2856e790746SPaolo Bonzini hwaddr first, hwaddr last)
2866e790746SPaolo Bonzini {
2876e790746SPaolo Bonzini int i;
2886e790746SPaolo Bonzini /* FIXME: this is N^2 in number of sections */
2896e790746SPaolo Bonzini for (i = 0; i < dev->n_mem_sections; ++i) {
2906e790746SPaolo Bonzini MemoryRegionSection *section = &dev->mem_sections[i];
2916e790746SPaolo Bonzini vhost_sync_dirty_bitmap(dev, section, first, last);
2926e790746SPaolo Bonzini }
2936e790746SPaolo Bonzini }
2946e790746SPaolo Bonzini
vhost_get_log_size(struct vhost_dev * dev)2956e790746SPaolo Bonzini static uint64_t vhost_get_log_size(struct vhost_dev *dev)
2966e790746SPaolo Bonzini {
2976e790746SPaolo Bonzini uint64_t log_size = 0;
2986e790746SPaolo Bonzini int i;
2996e790746SPaolo Bonzini for (i = 0; i < dev->mem->nregions; ++i) {
3006e790746SPaolo Bonzini struct vhost_memory_region *reg = dev->mem->regions + i;
3016e790746SPaolo Bonzini uint64_t last = range_get_last(reg->guest_phys_addr,
3026e790746SPaolo Bonzini reg->memory_size);
3036e790746SPaolo Bonzini log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
3046e790746SPaolo Bonzini }
3056e790746SPaolo Bonzini return log_size;
3066e790746SPaolo Bonzini }
30715324404SMarc-André Lureau
vhost_set_backend_type(struct vhost_dev * dev,VhostBackendType backend_type)3089b1d929aSTiberiu Georgescu static int vhost_set_backend_type(struct vhost_dev *dev,
3099b1d929aSTiberiu Georgescu VhostBackendType backend_type)
3109b1d929aSTiberiu Georgescu {
3119b1d929aSTiberiu Georgescu int r = 0;
3129b1d929aSTiberiu Georgescu
3139b1d929aSTiberiu Georgescu switch (backend_type) {
3149b1d929aSTiberiu Georgescu #ifdef CONFIG_VHOST_KERNEL
3159b1d929aSTiberiu Georgescu case VHOST_BACKEND_TYPE_KERNEL:
3169b1d929aSTiberiu Georgescu dev->vhost_ops = &kernel_ops;
3179b1d929aSTiberiu Georgescu break;
3189b1d929aSTiberiu Georgescu #endif
3199b1d929aSTiberiu Georgescu #ifdef CONFIG_VHOST_USER
3209b1d929aSTiberiu Georgescu case VHOST_BACKEND_TYPE_USER:
3219b1d929aSTiberiu Georgescu dev->vhost_ops = &user_ops;
3229b1d929aSTiberiu Georgescu break;
3239b1d929aSTiberiu Georgescu #endif
3249b1d929aSTiberiu Georgescu #ifdef CONFIG_VHOST_VDPA
3259b1d929aSTiberiu Georgescu case VHOST_BACKEND_TYPE_VDPA:
3269b1d929aSTiberiu Georgescu dev->vhost_ops = &vdpa_ops;
3279b1d929aSTiberiu Georgescu break;
3289b1d929aSTiberiu Georgescu #endif
3299b1d929aSTiberiu Georgescu default:
3309b1d929aSTiberiu Georgescu error_report("Unknown vhost backend type");
3319b1d929aSTiberiu Georgescu r = -1;
3329b1d929aSTiberiu Georgescu }
3339b1d929aSTiberiu Georgescu
33451d59a64SSi-Wei Liu if (r == 0) {
33551d59a64SSi-Wei Liu assert(dev->vhost_ops->backend_type == backend_type);
33651d59a64SSi-Wei Liu }
33751d59a64SSi-Wei Liu
3389b1d929aSTiberiu Georgescu return r;
3399b1d929aSTiberiu Georgescu }
3409b1d929aSTiberiu Georgescu
vhost_log_alloc(uint64_t size,bool share)34115324404SMarc-André Lureau static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
342309750faSJason Wang {
3430f2956f9SMarc-André Lureau Error *err = NULL;
34415324404SMarc-André Lureau struct vhost_log *log;
34515324404SMarc-André Lureau uint64_t logsize = size * sizeof(*(log->log));
34615324404SMarc-André Lureau int fd = -1;
34715324404SMarc-André Lureau
34815324404SMarc-André Lureau log = g_new0(struct vhost_log, 1);
34915324404SMarc-André Lureau if (share) {
35015324404SMarc-André Lureau log->log = qemu_memfd_alloc("vhost-log", logsize,
35115324404SMarc-André Lureau F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
3520f2956f9SMarc-André Lureau &fd, &err);
3530f2956f9SMarc-André Lureau if (err) {
3540f2956f9SMarc-André Lureau error_report_err(err);
3550f2956f9SMarc-André Lureau g_free(log);
3560f2956f9SMarc-André Lureau return NULL;
3570f2956f9SMarc-André Lureau }
35815324404SMarc-André Lureau memset(log->log, 0, logsize);
35915324404SMarc-André Lureau } else {
36015324404SMarc-André Lureau log->log = g_malloc0(logsize);
36115324404SMarc-André Lureau }
362309750faSJason Wang
363309750faSJason Wang log->size = size;
364309750faSJason Wang log->refcnt = 1;
36515324404SMarc-André Lureau log->fd = fd;
366309750faSJason Wang
367309750faSJason Wang return log;
368309750faSJason Wang }
369309750faSJason Wang
vhost_log_get(VhostBackendType backend_type,uint64_t size,bool share)37051d59a64SSi-Wei Liu static struct vhost_log *vhost_log_get(VhostBackendType backend_type,
37151d59a64SSi-Wei Liu uint64_t size, bool share)
372309750faSJason Wang {
37351d59a64SSi-Wei Liu struct vhost_log *log;
37451d59a64SSi-Wei Liu
37551d59a64SSi-Wei Liu assert(backend_type > VHOST_BACKEND_TYPE_NONE);
37651d59a64SSi-Wei Liu assert(backend_type < VHOST_BACKEND_TYPE_MAX);
37751d59a64SSi-Wei Liu
37851d59a64SSi-Wei Liu log = share ? vhost_log_shm[backend_type] : vhost_log[backend_type];
37915324404SMarc-André Lureau
38015324404SMarc-André Lureau if (!log || log->size != size) {
38115324404SMarc-André Lureau log = vhost_log_alloc(size, share);
38215324404SMarc-André Lureau if (share) {
38351d59a64SSi-Wei Liu vhost_log_shm[backend_type] = log;
384309750faSJason Wang } else {
38551d59a64SSi-Wei Liu vhost_log[backend_type] = log;
38615324404SMarc-André Lureau }
38715324404SMarc-André Lureau } else {
38815324404SMarc-André Lureau ++log->refcnt;
389309750faSJason Wang }
390309750faSJason Wang
39115324404SMarc-André Lureau return log;
392309750faSJason Wang }
393309750faSJason Wang
vhost_log_put(struct vhost_dev * dev,bool sync)394309750faSJason Wang static void vhost_log_put(struct vhost_dev *dev, bool sync)
395309750faSJason Wang {
396309750faSJason Wang struct vhost_log *log = dev->log;
39751d59a64SSi-Wei Liu VhostBackendType backend_type;
398309750faSJason Wang
399309750faSJason Wang if (!log) {
400309750faSJason Wang return;
401309750faSJason Wang }
402309750faSJason Wang
40351d59a64SSi-Wei Liu assert(dev->vhost_ops);
40451d59a64SSi-Wei Liu backend_type = dev->vhost_ops->backend_type;
40551d59a64SSi-Wei Liu
40651d59a64SSi-Wei Liu if (backend_type == VHOST_BACKEND_TYPE_NONE ||
40751d59a64SSi-Wei Liu backend_type >= VHOST_BACKEND_TYPE_MAX) {
40851d59a64SSi-Wei Liu return;
40951d59a64SSi-Wei Liu }
41051d59a64SSi-Wei Liu
411309750faSJason Wang --log->refcnt;
412309750faSJason Wang if (log->refcnt == 0) {
413309750faSJason Wang /* Sync only the range covered by the old log */
414309750faSJason Wang if (dev->log_size && sync) {
415309750faSJason Wang vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
416309750faSJason Wang }
41715324404SMarc-André Lureau
41851d59a64SSi-Wei Liu if (vhost_log[backend_type] == log) {
41915324404SMarc-André Lureau g_free(log->log);
42051d59a64SSi-Wei Liu vhost_log[backend_type] = NULL;
42151d59a64SSi-Wei Liu } else if (vhost_log_shm[backend_type] == log) {
42215324404SMarc-André Lureau qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
42315324404SMarc-André Lureau log->fd);
42451d59a64SSi-Wei Liu vhost_log_shm[backend_type] = NULL;
425309750faSJason Wang }
42615324404SMarc-André Lureau
427309750faSJason Wang g_free(log);
428309750faSJason Wang }
4295c0ba1beSFelipe Franciosi
430c5cd7e5fSSi-Wei Liu vhost_dev_elect_mem_logger(dev, false);
4315c0ba1beSFelipe Franciosi dev->log = NULL;
4325c0ba1beSFelipe Franciosi dev->log_size = 0;
433309750faSJason Wang }
4346e790746SPaolo Bonzini
vhost_dev_log_is_shared(struct vhost_dev * dev)43515324404SMarc-André Lureau static bool vhost_dev_log_is_shared(struct vhost_dev *dev)
43615324404SMarc-André Lureau {
43715324404SMarc-André Lureau return dev->vhost_ops->vhost_requires_shm_log &&
43815324404SMarc-André Lureau dev->vhost_ops->vhost_requires_shm_log(dev);
43915324404SMarc-André Lureau }
44015324404SMarc-André Lureau
vhost_dev_log_resize(struct vhost_dev * dev,uint64_t size)4416e790746SPaolo Bonzini static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
4426e790746SPaolo Bonzini {
44351d59a64SSi-Wei Liu struct vhost_log *log = vhost_log_get(dev->vhost_ops->backend_type,
44451d59a64SSi-Wei Liu size, vhost_dev_log_is_shared(dev));
445309750faSJason Wang uint64_t log_base = (uintptr_t)log->log;
4466e790746SPaolo Bonzini int r;
4476e790746SPaolo Bonzini
448636f4dddSMarc-André Lureau /* inform backend of log switching, this must be done before
449636f4dddSMarc-André Lureau releasing the current log, to ensure no logging is lost */
4509a78a5ddSMarc-André Lureau r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log);
451162bba7fSMarc-André Lureau if (r < 0) {
4525d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_log_base failed");
453162bba7fSMarc-André Lureau }
454162bba7fSMarc-André Lureau
455309750faSJason Wang vhost_log_put(dev, true);
4566e790746SPaolo Bonzini dev->log = log;
4576e790746SPaolo Bonzini dev->log_size = size;
4586e790746SPaolo Bonzini }
4596e790746SPaolo Bonzini
vhost_memory_map(struct vhost_dev * dev,hwaddr addr,hwaddr * plen,bool is_write)460c471ad0eSJason Wang static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
461b897a474SPhilippe Mathieu-Daudé hwaddr *plen, bool is_write)
462c471ad0eSJason Wang {
463c471ad0eSJason Wang if (!vhost_dev_has_iommu(dev)) {
464c471ad0eSJason Wang return cpu_physical_memory_map(addr, plen, is_write);
465c471ad0eSJason Wang } else {
466c471ad0eSJason Wang return (void *)(uintptr_t)addr;
467c471ad0eSJason Wang }
468c471ad0eSJason Wang }
469c471ad0eSJason Wang
vhost_memory_unmap(struct vhost_dev * dev,void * buffer,hwaddr len,int is_write,hwaddr access_len)470c471ad0eSJason Wang static void vhost_memory_unmap(struct vhost_dev *dev, void *buffer,
471c471ad0eSJason Wang hwaddr len, int is_write,
472c471ad0eSJason Wang hwaddr access_len)
473c471ad0eSJason Wang {
474c471ad0eSJason Wang if (!vhost_dev_has_iommu(dev)) {
475c471ad0eSJason Wang cpu_physical_memory_unmap(buffer, len, is_write, access_len);
476c471ad0eSJason Wang }
477c471ad0eSJason Wang }
478c471ad0eSJason Wang
vhost_verify_ring_part_mapping(void * ring_hva,uint64_t ring_gpa,uint64_t ring_size,void * reg_hva,uint64_t reg_gpa,uint64_t reg_size)4790ca1fd2dSDr. David Alan Gilbert static int vhost_verify_ring_part_mapping(void *ring_hva,
4800ca1fd2dSDr. David Alan Gilbert uint64_t ring_gpa,
4810ca1fd2dSDr. David Alan Gilbert uint64_t ring_size,
4820ca1fd2dSDr. David Alan Gilbert void *reg_hva,
4830ca1fd2dSDr. David Alan Gilbert uint64_t reg_gpa,
4840ca1fd2dSDr. David Alan Gilbert uint64_t reg_size)
485f1f9e6c5SGreg Kurz {
4860ca1fd2dSDr. David Alan Gilbert uint64_t hva_ring_offset;
4870ca1fd2dSDr. David Alan Gilbert uint64_t ring_last = range_get_last(ring_gpa, ring_size);
4880ca1fd2dSDr. David Alan Gilbert uint64_t reg_last = range_get_last(reg_gpa, reg_size);
489f1f9e6c5SGreg Kurz
4900ca1fd2dSDr. David Alan Gilbert if (ring_last < reg_gpa || ring_gpa > reg_last) {
491f1f9e6c5SGreg Kurz return 0;
492f1f9e6c5SGreg Kurz }
4930ca1fd2dSDr. David Alan Gilbert /* check that whole ring's is mapped */
4940ca1fd2dSDr. David Alan Gilbert if (ring_last > reg_last) {
4950ca1fd2dSDr. David Alan Gilbert return -ENOMEM;
496f1f9e6c5SGreg Kurz }
4970ca1fd2dSDr. David Alan Gilbert /* check that ring's MemoryRegion wasn't replaced */
4980ca1fd2dSDr. David Alan Gilbert hva_ring_offset = ring_gpa - reg_gpa;
4990ca1fd2dSDr. David Alan Gilbert if (ring_hva != reg_hva + hva_ring_offset) {
5000ca1fd2dSDr. David Alan Gilbert return -EBUSY;
501f1f9e6c5SGreg Kurz }
5020ca1fd2dSDr. David Alan Gilbert
5030ca1fd2dSDr. David Alan Gilbert return 0;
504f1f9e6c5SGreg Kurz }
505f1f9e6c5SGreg Kurz
vhost_verify_ring_mappings(struct vhost_dev * dev,void * reg_hva,uint64_t reg_gpa,uint64_t reg_size)5066e790746SPaolo Bonzini static int vhost_verify_ring_mappings(struct vhost_dev *dev,
5070ca1fd2dSDr. David Alan Gilbert void *reg_hva,
5080ca1fd2dSDr. David Alan Gilbert uint64_t reg_gpa,
5090ca1fd2dSDr. David Alan Gilbert uint64_t reg_size)
5106e790746SPaolo Bonzini {
511f1f9e6c5SGreg Kurz int i, j;
5128617343fSMichael S. Tsirkin int r = 0;
513f1f9e6c5SGreg Kurz const char *part_name[] = {
514f1f9e6c5SGreg Kurz "descriptor table",
515f1f9e6c5SGreg Kurz "available ring",
516f1f9e6c5SGreg Kurz "used ring"
517f1f9e6c5SGreg Kurz };
5188617343fSMichael S. Tsirkin
519aebbdbeeSJason Wang if (vhost_dev_has_iommu(dev)) {
520aebbdbeeSJason Wang return 0;
521aebbdbeeSJason Wang }
522aebbdbeeSJason Wang
523f1f9e6c5SGreg Kurz for (i = 0; i < dev->nvqs; ++i) {
5246e790746SPaolo Bonzini struct vhost_virtqueue *vq = dev->vqs + i;
5256e790746SPaolo Bonzini
526fb20fbb7SJia He if (vq->desc_phys == 0) {
527fb20fbb7SJia He continue;
528fb20fbb7SJia He }
529fb20fbb7SJia He
530f1f9e6c5SGreg Kurz j = 0;
5310ca1fd2dSDr. David Alan Gilbert r = vhost_verify_ring_part_mapping(
5320ca1fd2dSDr. David Alan Gilbert vq->desc, vq->desc_phys, vq->desc_size,
5330ca1fd2dSDr. David Alan Gilbert reg_hva, reg_gpa, reg_size);
5342fe45ec3SGreg Kurz if (r) {
535f1f9e6c5SGreg Kurz break;
5366e790746SPaolo Bonzini }
537f1f9e6c5SGreg Kurz
538f1f9e6c5SGreg Kurz j++;
5390ca1fd2dSDr. David Alan Gilbert r = vhost_verify_ring_part_mapping(
5409fac50c8SJia He vq->avail, vq->avail_phys, vq->avail_size,
5410ca1fd2dSDr. David Alan Gilbert reg_hva, reg_gpa, reg_size);
5422fe45ec3SGreg Kurz if (r) {
543f1f9e6c5SGreg Kurz break;
5446e790746SPaolo Bonzini }
545f1f9e6c5SGreg Kurz
546f1f9e6c5SGreg Kurz j++;
5470ca1fd2dSDr. David Alan Gilbert r = vhost_verify_ring_part_mapping(
5489fac50c8SJia He vq->used, vq->used_phys, vq->used_size,
5490ca1fd2dSDr. David Alan Gilbert reg_hva, reg_gpa, reg_size);
5502fe45ec3SGreg Kurz if (r) {
551f1f9e6c5SGreg Kurz break;
5526e790746SPaolo Bonzini }
553f1f9e6c5SGreg Kurz }
554f1f9e6c5SGreg Kurz
555f1f9e6c5SGreg Kurz if (r == -ENOMEM) {
556f1f9e6c5SGreg Kurz error_report("Unable to map %s for ring %d", part_name[j], i);
557f1f9e6c5SGreg Kurz } else if (r == -EBUSY) {
558f1f9e6c5SGreg Kurz error_report("%s relocated for ring %d", part_name[j], i);
5596e790746SPaolo Bonzini }
5608617343fSMichael S. Tsirkin return r;
5616e790746SPaolo Bonzini }
5626e790746SPaolo Bonzini
563083b9bd7SAlex Bennée /*
564083b9bd7SAlex Bennée * vhost_section: identify sections needed for vhost access
565083b9bd7SAlex Bennée *
566083b9bd7SAlex Bennée * We only care about RAM sections here (where virtqueue and guest
567552b2522SDavid Hildenbrand * internals accessed by virtio might live).
568083b9bd7SAlex Bennée */
vhost_section(struct vhost_dev * dev,MemoryRegionSection * section)569988a2775STiwei Bie static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)
570af603142SNicholas Bellinger {
571083b9bd7SAlex Bennée MemoryRegion *mr = section->mr;
572aa3c40f6SDr. David Alan Gilbert
573083b9bd7SAlex Bennée if (memory_region_is_ram(mr) && !memory_region_is_rom(mr)) {
574083b9bd7SAlex Bennée uint8_t dirty_mask = memory_region_get_dirty_log_mask(mr);
575083b9bd7SAlex Bennée uint8_t handled_dirty;
576083b9bd7SAlex Bennée
577083b9bd7SAlex Bennée /*
578083b9bd7SAlex Bennée * Kernel based vhost doesn't handle any block which is doing
579083b9bd7SAlex Bennée * dirty-tracking other than migration for which it has
580083b9bd7SAlex Bennée * specific logging support. However for TCG the kernel never
581083b9bd7SAlex Bennée * gets involved anyway so we can also ignore it's
582083b9bd7SAlex Bennée * self-modiying code detection flags. However a vhost-user
583083b9bd7SAlex Bennée * client could still confuse a TCG guest if it re-writes
584083b9bd7SAlex Bennée * executable memory that has already been translated.
585aa3c40f6SDr. David Alan Gilbert */
586083b9bd7SAlex Bennée handled_dirty = (1 << DIRTY_MEMORY_MIGRATION) |
587083b9bd7SAlex Bennée (1 << DIRTY_MEMORY_CODE);
588aa3c40f6SDr. David Alan Gilbert
589083b9bd7SAlex Bennée if (dirty_mask & ~handled_dirty) {
590083b9bd7SAlex Bennée trace_vhost_reject_section(mr->name, 1);
591083b9bd7SAlex Bennée return false;
592988a2775STiwei Bie }
593988a2775STiwei Bie
594552b2522SDavid Hildenbrand /*
595552b2522SDavid Hildenbrand * Some backends (like vhost-user) can only handle memory regions
596552b2522SDavid Hildenbrand * that have an fd (can be mapped into a different process). Filter
597552b2522SDavid Hildenbrand * the ones without an fd out, if requested.
598552b2522SDavid Hildenbrand *
599552b2522SDavid Hildenbrand * TODO: we might have to limit to MAP_SHARED as well.
600552b2522SDavid Hildenbrand */
601552b2522SDavid Hildenbrand if (memory_region_get_fd(section->mr) < 0 &&
602552b2522SDavid Hildenbrand dev->vhost_ops->vhost_backend_no_private_memslots &&
603552b2522SDavid Hildenbrand dev->vhost_ops->vhost_backend_no_private_memslots(dev)) {
604083b9bd7SAlex Bennée trace_vhost_reject_section(mr->name, 2);
605083b9bd7SAlex Bennée return false;
606083b9bd7SAlex Bennée }
607083b9bd7SAlex Bennée
608083b9bd7SAlex Bennée trace_vhost_section(mr->name);
609083b9bd7SAlex Bennée return true;
610083b9bd7SAlex Bennée } else {
611083b9bd7SAlex Bennée trace_vhost_reject_section(mr->name, 3);
612083b9bd7SAlex Bennée return false;
613083b9bd7SAlex Bennée }
614af603142SNicholas Bellinger }
615af603142SNicholas Bellinger
vhost_begin(MemoryListener * listener)616af603142SNicholas Bellinger static void vhost_begin(MemoryListener *listener)
617af603142SNicholas Bellinger {
618af603142SNicholas Bellinger struct vhost_dev *dev = container_of(listener, struct vhost_dev,
619af603142SNicholas Bellinger memory_listener);
620c44317efSDr. David Alan Gilbert dev->tmp_sections = NULL;
621c44317efSDr. David Alan Gilbert dev->n_tmp_sections = 0;
622af603142SNicholas Bellinger }
623af603142SNicholas Bellinger
vhost_commit(MemoryListener * listener)624af603142SNicholas Bellinger static void vhost_commit(MemoryListener *listener)
625af603142SNicholas Bellinger {
626af603142SNicholas Bellinger struct vhost_dev *dev = container_of(listener, struct vhost_dev,
627af603142SNicholas Bellinger memory_listener);
628c44317efSDr. David Alan Gilbert MemoryRegionSection *old_sections;
629c44317efSDr. David Alan Gilbert int n_old_sections;
630af603142SNicholas Bellinger uint64_t log_size;
631ade6d081SDr. David Alan Gilbert size_t regions_size;
632af603142SNicholas Bellinger int r;
6330ca1fd2dSDr. David Alan Gilbert int i;
634ade6d081SDr. David Alan Gilbert bool changed = false;
635af603142SNicholas Bellinger
636ade6d081SDr. David Alan Gilbert /* Note we can be called before the device is started, but then
637ade6d081SDr. David Alan Gilbert * starting the device calls set_mem_table, so we need to have
638ade6d081SDr. David Alan Gilbert * built the data structures.
639ade6d081SDr. David Alan Gilbert */
640c44317efSDr. David Alan Gilbert old_sections = dev->mem_sections;
641c44317efSDr. David Alan Gilbert n_old_sections = dev->n_mem_sections;
642c44317efSDr. David Alan Gilbert dev->mem_sections = dev->tmp_sections;
643c44317efSDr. David Alan Gilbert dev->n_mem_sections = dev->n_tmp_sections;
644c44317efSDr. David Alan Gilbert
645ade6d081SDr. David Alan Gilbert if (dev->n_mem_sections != n_old_sections) {
646ade6d081SDr. David Alan Gilbert changed = true;
647ade6d081SDr. David Alan Gilbert } else {
648ade6d081SDr. David Alan Gilbert /* Same size, lets check the contents */
649da318288SThomas Huth for (i = 0; i < n_old_sections; i++) {
6503fc4a64cSDr. David Alan Gilbert if (!MemoryRegionSection_eq(&old_sections[i],
6513fc4a64cSDr. David Alan Gilbert &dev->mem_sections[i])) {
6523fc4a64cSDr. David Alan Gilbert changed = true;
6533fc4a64cSDr. David Alan Gilbert break;
6543fc4a64cSDr. David Alan Gilbert }
6553fc4a64cSDr. David Alan Gilbert }
656ade6d081SDr. David Alan Gilbert }
657ade6d081SDr. David Alan Gilbert
658ade6d081SDr. David Alan Gilbert trace_vhost_commit(dev->started, changed);
659ade6d081SDr. David Alan Gilbert if (!changed) {
660c44317efSDr. David Alan Gilbert goto out;
661af603142SNicholas Bellinger }
662ade6d081SDr. David Alan Gilbert
663ade6d081SDr. David Alan Gilbert /* Rebuild the regions list from the new sections list */
664ade6d081SDr. David Alan Gilbert regions_size = offsetof(struct vhost_memory, regions) +
665ade6d081SDr. David Alan Gilbert dev->n_mem_sections * sizeof dev->mem->regions[0];
666ade6d081SDr. David Alan Gilbert dev->mem = g_realloc(dev->mem, regions_size);
667ade6d081SDr. David Alan Gilbert dev->mem->nregions = dev->n_mem_sections;
668552b2522SDavid Hildenbrand
669552b2522SDavid Hildenbrand if (dev->vhost_ops->vhost_backend_no_private_memslots &&
670552b2522SDavid Hildenbrand dev->vhost_ops->vhost_backend_no_private_memslots(dev)) {
671552b2522SDavid Hildenbrand used_shared_memslots = dev->mem->nregions;
672552b2522SDavid Hildenbrand } else {
673ade6d081SDr. David Alan Gilbert used_memslots = dev->mem->nregions;
674552b2522SDavid Hildenbrand }
675552b2522SDavid Hildenbrand
676ade6d081SDr. David Alan Gilbert for (i = 0; i < dev->n_mem_sections; i++) {
677ade6d081SDr. David Alan Gilbert struct vhost_memory_region *cur_vmr = dev->mem->regions + i;
678ade6d081SDr. David Alan Gilbert struct MemoryRegionSection *mrs = dev->mem_sections + i;
679ade6d081SDr. David Alan Gilbert
680ade6d081SDr. David Alan Gilbert cur_vmr->guest_phys_addr = mrs->offset_within_address_space;
681ade6d081SDr. David Alan Gilbert cur_vmr->memory_size = int128_get64(mrs->size);
682ade6d081SDr. David Alan Gilbert cur_vmr->userspace_addr =
683ade6d081SDr. David Alan Gilbert (uintptr_t)memory_region_get_ram_ptr(mrs->mr) +
684ade6d081SDr. David Alan Gilbert mrs->offset_within_region;
685ade6d081SDr. David Alan Gilbert cur_vmr->flags_padding = 0;
686ade6d081SDr. David Alan Gilbert }
687ade6d081SDr. David Alan Gilbert
6886e790746SPaolo Bonzini if (!dev->started) {
689c44317efSDr. David Alan Gilbert goto out;
6906e790746SPaolo Bonzini }
6916e790746SPaolo Bonzini
6920ca1fd2dSDr. David Alan Gilbert for (i = 0; i < dev->mem->nregions; i++) {
6930ca1fd2dSDr. David Alan Gilbert if (vhost_verify_ring_mappings(dev,
6940ca1fd2dSDr. David Alan Gilbert (void *)(uintptr_t)dev->mem->regions[i].userspace_addr,
6950ca1fd2dSDr. David Alan Gilbert dev->mem->regions[i].guest_phys_addr,
6960ca1fd2dSDr. David Alan Gilbert dev->mem->regions[i].memory_size)) {
6970ca1fd2dSDr. David Alan Gilbert error_report("Verify ring failure on region %d", i);
6980ca1fd2dSDr. David Alan Gilbert abort();
6990ca1fd2dSDr. David Alan Gilbert }
7006e790746SPaolo Bonzini }
7016e790746SPaolo Bonzini
7026e790746SPaolo Bonzini if (!dev->log_enabled) {
70321e70425SMarc-André Lureau r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
704162bba7fSMarc-André Lureau if (r < 0) {
7055d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
706162bba7fSMarc-André Lureau }
707c44317efSDr. David Alan Gilbert goto out;
7086e790746SPaolo Bonzini }
7096e790746SPaolo Bonzini log_size = vhost_get_log_size(dev);
7106e790746SPaolo Bonzini /* We allocate an extra 4K bytes to log,
7116e790746SPaolo Bonzini * to reduce the * number of reallocations. */
7126e790746SPaolo Bonzini #define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
7136e790746SPaolo Bonzini /* To log more, must increase log size before table update. */
7146e790746SPaolo Bonzini if (dev->log_size < log_size) {
7156e790746SPaolo Bonzini vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
7166e790746SPaolo Bonzini }
71721e70425SMarc-André Lureau r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
718162bba7fSMarc-André Lureau if (r < 0) {
7195d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
720162bba7fSMarc-André Lureau }
7216e790746SPaolo Bonzini /* To log less, can only decrease log size after table update. */
7226e790746SPaolo Bonzini if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
7236e790746SPaolo Bonzini vhost_dev_log_resize(dev, log_size);
7246e790746SPaolo Bonzini }
725c44317efSDr. David Alan Gilbert
726c44317efSDr. David Alan Gilbert out:
727c44317efSDr. David Alan Gilbert /* Deref the old list of sections, this must happen _after_ the
728c44317efSDr. David Alan Gilbert * vhost_set_mem_table to ensure the client isn't still using the
729c44317efSDr. David Alan Gilbert * section we're about to unref.
730c44317efSDr. David Alan Gilbert */
731c44317efSDr. David Alan Gilbert while (n_old_sections--) {
732c44317efSDr. David Alan Gilbert memory_region_unref(old_sections[n_old_sections].mr);
733c44317efSDr. David Alan Gilbert }
734c44317efSDr. David Alan Gilbert g_free(old_sections);
735c44317efSDr. David Alan Gilbert return;
736c44317efSDr. David Alan Gilbert }
737c44317efSDr. David Alan Gilbert
73848d7c975SDr. David Alan Gilbert /* Adds the section data to the tmp_section structure.
73948d7c975SDr. David Alan Gilbert * It relies on the listener calling us in memory address order
74048d7c975SDr. David Alan Gilbert * and for each region (via the _add and _nop methods) to
74148d7c975SDr. David Alan Gilbert * join neighbours.
74248d7c975SDr. David Alan Gilbert */
vhost_region_add_section(struct vhost_dev * dev,MemoryRegionSection * section)74348d7c975SDr. David Alan Gilbert static void vhost_region_add_section(struct vhost_dev *dev,
744c44317efSDr. David Alan Gilbert MemoryRegionSection *section)
745c44317efSDr. David Alan Gilbert {
74648d7c975SDr. David Alan Gilbert bool need_add = true;
74748d7c975SDr. David Alan Gilbert uint64_t mrs_size = int128_get64(section->size);
74848d7c975SDr. David Alan Gilbert uint64_t mrs_gpa = section->offset_within_address_space;
74948d7c975SDr. David Alan Gilbert uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
75048d7c975SDr. David Alan Gilbert section->offset_within_region;
751c1ece84eSDr. David Alan Gilbert RAMBlock *mrs_rb = section->mr->ram_block;
75248d7c975SDr. David Alan Gilbert
75348d7c975SDr. David Alan Gilbert trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size,
75448d7c975SDr. David Alan Gilbert mrs_host);
75548d7c975SDr. David Alan Gilbert
75676525114SDr. David Alan Gilbert if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
757c1ece84eSDr. David Alan Gilbert /* Round the section to it's page size */
758c1ece84eSDr. David Alan Gilbert /* First align the start down to a page boundary */
75976525114SDr. David Alan Gilbert size_t mrs_page = qemu_ram_pagesize(mrs_rb);
760c1ece84eSDr. David Alan Gilbert uint64_t alignage = mrs_host & (mrs_page - 1);
761c1ece84eSDr. David Alan Gilbert if (alignage) {
762c1ece84eSDr. David Alan Gilbert mrs_host -= alignage;
763c1ece84eSDr. David Alan Gilbert mrs_size += alignage;
764c1ece84eSDr. David Alan Gilbert mrs_gpa -= alignage;
765c1ece84eSDr. David Alan Gilbert }
766c1ece84eSDr. David Alan Gilbert /* Now align the size up to a page boundary */
767c1ece84eSDr. David Alan Gilbert alignage = mrs_size & (mrs_page - 1);
768c1ece84eSDr. David Alan Gilbert if (alignage) {
769c1ece84eSDr. David Alan Gilbert mrs_size += mrs_page - alignage;
770c1ece84eSDr. David Alan Gilbert }
77183475056SMichael S. Tsirkin trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa,
77283475056SMichael S. Tsirkin mrs_size, mrs_host);
77376525114SDr. David Alan Gilbert }
774c1ece84eSDr. David Alan Gilbert
775533f5d66SDavid Hildenbrand if (dev->n_tmp_sections && !section->unmergeable) {
77648d7c975SDr. David Alan Gilbert /* Since we already have at least one section, lets see if
77748d7c975SDr. David Alan Gilbert * this extends it; since we're scanning in order, we only
77848d7c975SDr. David Alan Gilbert * have to look at the last one, and the FlatView that calls
77948d7c975SDr. David Alan Gilbert * us shouldn't have overlaps.
78048d7c975SDr. David Alan Gilbert */
78148d7c975SDr. David Alan Gilbert MemoryRegionSection *prev_sec = dev->tmp_sections +
78248d7c975SDr. David Alan Gilbert (dev->n_tmp_sections - 1);
78348d7c975SDr. David Alan Gilbert uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
78448d7c975SDr. David Alan Gilbert uint64_t prev_size = int128_get64(prev_sec->size);
78548d7c975SDr. David Alan Gilbert uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size);
78648d7c975SDr. David Alan Gilbert uint64_t prev_host_start =
78748d7c975SDr. David Alan Gilbert (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
78848d7c975SDr. David Alan Gilbert prev_sec->offset_within_region;
78948d7c975SDr. David Alan Gilbert uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
79048d7c975SDr. David Alan Gilbert
791c1ece84eSDr. David Alan Gilbert if (mrs_gpa <= (prev_gpa_end + 1)) {
792c1ece84eSDr. David Alan Gilbert /* OK, looks like overlapping/intersecting - it's possible that
793c1ece84eSDr. David Alan Gilbert * the rounding to page sizes has made them overlap, but they should
794c1ece84eSDr. David Alan Gilbert * match up in the same RAMBlock if they do.
795c1ece84eSDr. David Alan Gilbert */
796c1ece84eSDr. David Alan Gilbert if (mrs_gpa < prev_gpa_start) {
797ff477614SDr. David Alan Gilbert error_report("%s:Section '%s' rounded to %"PRIx64
798ff477614SDr. David Alan Gilbert " prior to previous '%s' %"PRIx64,
799ff477614SDr. David Alan Gilbert __func__, section->mr->name, mrs_gpa,
800ff477614SDr. David Alan Gilbert prev_sec->mr->name, prev_gpa_start);
801c1ece84eSDr. David Alan Gilbert /* A way to cleanly fail here would be better */
802c1ece84eSDr. David Alan Gilbert return;
803c1ece84eSDr. David Alan Gilbert }
804c1ece84eSDr. David Alan Gilbert /* Offset from the start of the previous GPA to this GPA */
805c1ece84eSDr. David Alan Gilbert size_t offset = mrs_gpa - prev_gpa_start;
806c1ece84eSDr. David Alan Gilbert
807c1ece84eSDr. David Alan Gilbert if (prev_host_start + offset == mrs_host &&
808533f5d66SDavid Hildenbrand section->mr == prev_sec->mr && !prev_sec->unmergeable) {
809c1ece84eSDr. David Alan Gilbert uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
81048d7c975SDr. David Alan Gilbert need_add = false;
811c1ece84eSDr. David Alan Gilbert prev_sec->offset_within_address_space =
812c1ece84eSDr. David Alan Gilbert MIN(prev_gpa_start, mrs_gpa);
813c1ece84eSDr. David Alan Gilbert prev_sec->offset_within_region =
814c1ece84eSDr. David Alan Gilbert MIN(prev_host_start, mrs_host) -
815c1ece84eSDr. David Alan Gilbert (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
816c1ece84eSDr. David Alan Gilbert prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
817c1ece84eSDr. David Alan Gilbert mrs_host));
818c1ece84eSDr. David Alan Gilbert trace_vhost_region_add_section_merge(section->mr->name,
819c1ece84eSDr. David Alan Gilbert int128_get64(prev_sec->size),
820c1ece84eSDr. David Alan Gilbert prev_sec->offset_within_address_space,
821c1ece84eSDr. David Alan Gilbert prev_sec->offset_within_region);
822c1ece84eSDr. David Alan Gilbert } else {
823e7b94a84SDr. David Alan Gilbert /* adjoining regions are fine, but overlapping ones with
824e7b94a84SDr. David Alan Gilbert * different blocks/offsets shouldn't happen
825e7b94a84SDr. David Alan Gilbert */
826e7b94a84SDr. David Alan Gilbert if (mrs_gpa != prev_gpa_end + 1) {
827c1ece84eSDr. David Alan Gilbert error_report("%s: Overlapping but not coherent sections "
828c1ece84eSDr. David Alan Gilbert "at %"PRIx64,
829c1ece84eSDr. David Alan Gilbert __func__, mrs_gpa);
830c1ece84eSDr. David Alan Gilbert return;
831c1ece84eSDr. David Alan Gilbert }
83248d7c975SDr. David Alan Gilbert }
83348d7c975SDr. David Alan Gilbert }
834e7b94a84SDr. David Alan Gilbert }
83548d7c975SDr. David Alan Gilbert
83648d7c975SDr. David Alan Gilbert if (need_add) {
837c44317efSDr. David Alan Gilbert ++dev->n_tmp_sections;
838c44317efSDr. David Alan Gilbert dev->tmp_sections = g_renew(MemoryRegionSection, dev->tmp_sections,
839c44317efSDr. David Alan Gilbert dev->n_tmp_sections);
840c44317efSDr. David Alan Gilbert dev->tmp_sections[dev->n_tmp_sections - 1] = *section;
84148d7c975SDr. David Alan Gilbert /* The flatview isn't stable and we don't use it, making it NULL
84248d7c975SDr. David Alan Gilbert * means we can memcmp the list.
84348d7c975SDr. David Alan Gilbert */
84448d7c975SDr. David Alan Gilbert dev->tmp_sections[dev->n_tmp_sections - 1].fv = NULL;
845c44317efSDr. David Alan Gilbert memory_region_ref(section->mr);
8466e790746SPaolo Bonzini }
84748d7c975SDr. David Alan Gilbert }
8486e790746SPaolo Bonzini
849938eeb64SDr. David Alan Gilbert /* Used for both add and nop callbacks */
vhost_region_addnop(MemoryListener * listener,MemoryRegionSection * section)850938eeb64SDr. David Alan Gilbert static void vhost_region_addnop(MemoryListener *listener,
8516e790746SPaolo Bonzini MemoryRegionSection *section)
8526e790746SPaolo Bonzini {
8536e790746SPaolo Bonzini struct vhost_dev *dev = container_of(listener, struct vhost_dev,
8546e790746SPaolo Bonzini memory_listener);
8556e790746SPaolo Bonzini
856988a2775STiwei Bie if (!vhost_section(dev, section)) {
8576e790746SPaolo Bonzini return;
8586e790746SPaolo Bonzini }
85948d7c975SDr. David Alan Gilbert vhost_region_add_section(dev, section);
8606e790746SPaolo Bonzini }
8616e790746SPaolo Bonzini
vhost_iommu_unmap_notify(IOMMUNotifier * n,IOMMUTLBEntry * iotlb)862375f74f4SJason Wang static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
863375f74f4SJason Wang {
864375f74f4SJason Wang struct vhost_iommu *iommu = container_of(n, struct vhost_iommu, n);
865375f74f4SJason Wang struct vhost_dev *hdev = iommu->hdev;
866375f74f4SJason Wang hwaddr iova = iotlb->iova + iommu->iommu_offset;
867375f74f4SJason Wang
868020e571bSMaxime Coquelin if (vhost_backend_invalidate_device_iotlb(hdev, iova,
869375f74f4SJason Wang iotlb->addr_mask + 1)) {
870375f74f4SJason Wang error_report("Fail to invalidate device iotlb");
871375f74f4SJason Wang }
872375f74f4SJason Wang }
873375f74f4SJason Wang
vhost_iommu_region_add(MemoryListener * listener,MemoryRegionSection * section)874375f74f4SJason Wang static void vhost_iommu_region_add(MemoryListener *listener,
875375f74f4SJason Wang MemoryRegionSection *section)
876375f74f4SJason Wang {
877375f74f4SJason Wang struct vhost_dev *dev = container_of(listener, struct vhost_dev,
878375f74f4SJason Wang iommu_listener);
879375f74f4SJason Wang struct vhost_iommu *iommu;
880698feb5eSPeter Xu Int128 end;
881805d4496SMarkus Armbruster int iommu_idx;
882388a86dfSTiwei Bie IOMMUMemoryRegion *iommu_mr;
883375f74f4SJason Wang
884375f74f4SJason Wang if (!memory_region_is_iommu(section->mr)) {
885375f74f4SJason Wang return;
886375f74f4SJason Wang }
887375f74f4SJason Wang
888388a86dfSTiwei Bie iommu_mr = IOMMU_MEMORY_REGION(section->mr);
889388a86dfSTiwei Bie
890375f74f4SJason Wang iommu = g_malloc0(sizeof(*iommu));
891698feb5eSPeter Xu end = int128_add(int128_make64(section->offset_within_region),
892698feb5eSPeter Xu section->size);
893698feb5eSPeter Xu end = int128_sub(end, int128_one());
894cb1efcf4SPeter Maydell iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
895cb1efcf4SPeter Maydell MEMTXATTRS_UNSPECIFIED);
896698feb5eSPeter Xu iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify,
897ee071f67SViktor Prutyanov dev->vdev->device_iotlb_enabled ?
898ee071f67SViktor Prutyanov IOMMU_NOTIFIER_DEVIOTLB_UNMAP :
899ee071f67SViktor Prutyanov IOMMU_NOTIFIER_UNMAP,
900698feb5eSPeter Xu section->offset_within_region,
901cb1efcf4SPeter Maydell int128_get64(end),
902cb1efcf4SPeter Maydell iommu_idx);
903375f74f4SJason Wang iommu->mr = section->mr;
904375f74f4SJason Wang iommu->iommu_offset = section->offset_within_address_space -
905375f74f4SJason Wang section->offset_within_region;
906375f74f4SJason Wang iommu->hdev = dev;
907805d4496SMarkus Armbruster memory_region_register_iommu_notifier(section->mr, &iommu->n,
908805d4496SMarkus Armbruster &error_fatal);
909375f74f4SJason Wang QLIST_INSERT_HEAD(&dev->iommu_list, iommu, iommu_next);
910375f74f4SJason Wang /* TODO: can replay help performance here? */
911375f74f4SJason Wang }
912375f74f4SJason Wang
vhost_iommu_region_del(MemoryListener * listener,MemoryRegionSection * section)913375f74f4SJason Wang static void vhost_iommu_region_del(MemoryListener *listener,
914375f74f4SJason Wang MemoryRegionSection *section)
915375f74f4SJason Wang {
916375f74f4SJason Wang struct vhost_dev *dev = container_of(listener, struct vhost_dev,
917375f74f4SJason Wang iommu_listener);
918375f74f4SJason Wang struct vhost_iommu *iommu;
919375f74f4SJason Wang
920375f74f4SJason Wang if (!memory_region_is_iommu(section->mr)) {
921375f74f4SJason Wang return;
922375f74f4SJason Wang }
923375f74f4SJason Wang
924375f74f4SJason Wang QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) {
925698feb5eSPeter Xu if (iommu->mr == section->mr &&
926698feb5eSPeter Xu iommu->n.start == section->offset_within_region) {
927375f74f4SJason Wang memory_region_unregister_iommu_notifier(iommu->mr,
928375f74f4SJason Wang &iommu->n);
929375f74f4SJason Wang QLIST_REMOVE(iommu, iommu_next);
930375f74f4SJason Wang g_free(iommu);
931375f74f4SJason Wang break;
932375f74f4SJason Wang }
933375f74f4SJason Wang }
934375f74f4SJason Wang }
935375f74f4SJason Wang
vhost_toggle_device_iotlb(VirtIODevice * vdev)936ee071f67SViktor Prutyanov void vhost_toggle_device_iotlb(VirtIODevice *vdev)
937ee071f67SViktor Prutyanov {
938ee071f67SViktor Prutyanov VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
939ee071f67SViktor Prutyanov struct vhost_dev *dev;
940ee071f67SViktor Prutyanov struct vhost_iommu *iommu;
941ee071f67SViktor Prutyanov
942ee071f67SViktor Prutyanov if (vdev->vhost_started) {
943ee071f67SViktor Prutyanov dev = vdc->get_vhost(vdev);
944ee071f67SViktor Prutyanov } else {
945ee071f67SViktor Prutyanov return;
946ee071f67SViktor Prutyanov }
947ee071f67SViktor Prutyanov
948ee071f67SViktor Prutyanov QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) {
949ee071f67SViktor Prutyanov memory_region_unregister_iommu_notifier(iommu->mr, &iommu->n);
950ee071f67SViktor Prutyanov iommu->n.notifier_flags = vdev->device_iotlb_enabled ?
951ee071f67SViktor Prutyanov IOMMU_NOTIFIER_DEVIOTLB_UNMAP : IOMMU_NOTIFIER_UNMAP;
952ee071f67SViktor Prutyanov memory_region_register_iommu_notifier(iommu->mr, &iommu->n,
953ee071f67SViktor Prutyanov &error_fatal);
954ee071f67SViktor Prutyanov }
955ee071f67SViktor Prutyanov }
956ee071f67SViktor Prutyanov
vhost_virtqueue_set_addr(struct vhost_dev * dev,struct vhost_virtqueue * vq,unsigned idx,bool enable_log)9576e790746SPaolo Bonzini static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
9586e790746SPaolo Bonzini struct vhost_virtqueue *vq,
9596e790746SPaolo Bonzini unsigned idx, bool enable_log)
9606e790746SPaolo Bonzini {
961b4ab225cSCindy Lu struct vhost_vring_addr addr;
962b4ab225cSCindy Lu int r;
963b4ab225cSCindy Lu memset(&addr, 0, sizeof(struct vhost_vring_addr));
964b4ab225cSCindy Lu
965b4ab225cSCindy Lu if (dev->vhost_ops->vhost_vq_get_addr) {
966b4ab225cSCindy Lu r = dev->vhost_ops->vhost_vq_get_addr(dev, &addr, vq);
967b4ab225cSCindy Lu if (r < 0) {
9685d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_vq_get_addr failed");
9695d33ae4bSRoman Kagan return r;
970b4ab225cSCindy Lu }
971b4ab225cSCindy Lu } else {
972b4ab225cSCindy Lu addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc;
973b4ab225cSCindy Lu addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail;
974b4ab225cSCindy Lu addr.used_user_addr = (uint64_t)(unsigned long)vq->used;
975b4ab225cSCindy Lu }
976b4ab225cSCindy Lu addr.index = idx;
977b4ab225cSCindy Lu addr.log_guest_addr = vq->used_phys;
978b4ab225cSCindy Lu addr.flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0;
979b4ab225cSCindy Lu r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
9806e790746SPaolo Bonzini if (r < 0) {
9815d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_vring_addr failed");
9826e790746SPaolo Bonzini }
9835d33ae4bSRoman Kagan return r;
9846e790746SPaolo Bonzini }
9856e790746SPaolo Bonzini
vhost_dev_set_features(struct vhost_dev * dev,bool enable_log)986c471ad0eSJason Wang static int vhost_dev_set_features(struct vhost_dev *dev,
987c471ad0eSJason Wang bool enable_log)
9886e790746SPaolo Bonzini {
9896e790746SPaolo Bonzini uint64_t features = dev->acked_features;
9906e790746SPaolo Bonzini int r;
9916e790746SPaolo Bonzini if (enable_log) {
9929a2ba823SCornelia Huck features |= 0x1ULL << VHOST_F_LOG_ALL;
9936e790746SPaolo Bonzini }
994f7ef7e6eSJason Wang if (!vhost_dev_has_iommu(dev)) {
995f7ef7e6eSJason Wang features &= ~(0x1ULL << VIRTIO_F_IOMMU_PLATFORM);
996f7ef7e6eSJason Wang }
9977a471694SCindy Lu if (dev->vhost_ops->vhost_force_iommu) {
9987a471694SCindy Lu if (dev->vhost_ops->vhost_force_iommu(dev) == true) {
9997a471694SCindy Lu features |= 0x1ULL << VIRTIO_F_IOMMU_PLATFORM;
10007a471694SCindy Lu }
10017a471694SCindy Lu }
100221e70425SMarc-André Lureau r = dev->vhost_ops->vhost_set_features(dev, features);
1003c6409692SMarc-André Lureau if (r < 0) {
10045d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_features failed");
1005b37556edSJason Wang goto out;
1006c6409692SMarc-André Lureau }
1007b37556edSJason Wang if (dev->vhost_ops->vhost_set_backend_cap) {
1008b37556edSJason Wang r = dev->vhost_ops->vhost_set_backend_cap(dev);
1009b37556edSJason Wang if (r < 0) {
10105d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_backend_cap failed");
1011b37556edSJason Wang goto out;
1012b37556edSJason Wang }
1013b37556edSJason Wang }
1014b37556edSJason Wang
1015b37556edSJason Wang out:
10165d33ae4bSRoman Kagan return r;
10176e790746SPaolo Bonzini }
10186e790746SPaolo Bonzini
vhost_dev_set_log(struct vhost_dev * dev,bool enable_log)10196e790746SPaolo Bonzini static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
10206e790746SPaolo Bonzini {
1021162bba7fSMarc-André Lureau int r, i, idx;
10221e5a050fSDima Stepanov hwaddr addr;
10231e5a050fSDima Stepanov
10246e790746SPaolo Bonzini r = vhost_dev_set_features(dev, enable_log);
10256e790746SPaolo Bonzini if (r < 0) {
10266e790746SPaolo Bonzini goto err_features;
10276e790746SPaolo Bonzini }
10286e790746SPaolo Bonzini for (i = 0; i < dev->nvqs; ++i) {
102925a2a920SThibaut Collet idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
10301e5a050fSDima Stepanov addr = virtio_queue_get_desc_addr(dev->vdev, idx);
10311e5a050fSDima Stepanov if (!addr) {
10321e5a050fSDima Stepanov /*
10331e5a050fSDima Stepanov * The queue might not be ready for start. If this
10341e5a050fSDima Stepanov * is the case there is no reason to continue the process.
10351e5a050fSDima Stepanov * The similar logic is used by the vhost_virtqueue_start()
10361e5a050fSDima Stepanov * routine.
10371e5a050fSDima Stepanov */
10381e5a050fSDima Stepanov continue;
10391e5a050fSDima Stepanov }
104025a2a920SThibaut Collet r = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
10416e790746SPaolo Bonzini enable_log);
10426e790746SPaolo Bonzini if (r < 0) {
10436e790746SPaolo Bonzini goto err_vq;
10446e790746SPaolo Bonzini }
10456e790746SPaolo Bonzini }
1046c5cd7e5fSSi-Wei Liu
1047c5cd7e5fSSi-Wei Liu /*
1048c5cd7e5fSSi-Wei Liu * At log start we select our vhost_device logger that will scan the
1049c5cd7e5fSSi-Wei Liu * memory sections and skip for the others. This is possible because
1050c5cd7e5fSSi-Wei Liu * the log is shared amongst all vhost devices for a given type of
1051c5cd7e5fSSi-Wei Liu * backend.
1052c5cd7e5fSSi-Wei Liu */
1053c5cd7e5fSSi-Wei Liu vhost_dev_elect_mem_logger(dev, enable_log);
1054c5cd7e5fSSi-Wei Liu
10556e790746SPaolo Bonzini return 0;
10566e790746SPaolo Bonzini err_vq:
10576e790746SPaolo Bonzini for (; i >= 0; --i) {
105825a2a920SThibaut Collet idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
10599ce305c8SNi Xun addr = virtio_queue_get_desc_addr(dev->vdev, idx);
10609ce305c8SNi Xun if (!addr) {
10619ce305c8SNi Xun continue;
10629ce305c8SNi Xun }
1063162bba7fSMarc-André Lureau vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
10646e790746SPaolo Bonzini dev->log_enabled);
10656e790746SPaolo Bonzini }
1066162bba7fSMarc-André Lureau vhost_dev_set_features(dev, dev->log_enabled);
10676e790746SPaolo Bonzini err_features:
10686e790746SPaolo Bonzini return r;
10696e790746SPaolo Bonzini }
10706e790746SPaolo Bonzini
vhost_migration_log(MemoryListener * listener,bool enable)1071705f7f2fSRaphael Norwitz static int vhost_migration_log(MemoryListener *listener, bool enable)
10726e790746SPaolo Bonzini {
10736e790746SPaolo Bonzini struct vhost_dev *dev = container_of(listener, struct vhost_dev,
10746e790746SPaolo Bonzini memory_listener);
10756e790746SPaolo Bonzini int r;
1076705f7f2fSRaphael Norwitz if (enable == dev->log_enabled) {
10776e790746SPaolo Bonzini return 0;
10786e790746SPaolo Bonzini }
10796e790746SPaolo Bonzini if (!dev->started) {
10806e790746SPaolo Bonzini dev->log_enabled = enable;
10816e790746SPaolo Bonzini return 0;
10826e790746SPaolo Bonzini }
1083f5b22d06SDima Stepanov
1084f5b22d06SDima Stepanov r = 0;
10856e790746SPaolo Bonzini if (!enable) {
10866e790746SPaolo Bonzini r = vhost_dev_set_log(dev, false);
10876e790746SPaolo Bonzini if (r < 0) {
1088f5b22d06SDima Stepanov goto check_dev_state;
10896e790746SPaolo Bonzini }
1090309750faSJason Wang vhost_log_put(dev, false);
10916e790746SPaolo Bonzini } else {
10926e790746SPaolo Bonzini vhost_dev_log_resize(dev, vhost_get_log_size(dev));
10936e790746SPaolo Bonzini r = vhost_dev_set_log(dev, true);
10946e790746SPaolo Bonzini if (r < 0) {
1095f5b22d06SDima Stepanov goto check_dev_state;
10966e790746SPaolo Bonzini }
10976e790746SPaolo Bonzini }
1098f5b22d06SDima Stepanov
1099f5b22d06SDima Stepanov check_dev_state:
11006e790746SPaolo Bonzini dev->log_enabled = enable;
1101f5b22d06SDima Stepanov /*
1102f5b22d06SDima Stepanov * vhost-user-* devices could change their state during log
1103f5b22d06SDima Stepanov * initialization due to disconnect. So check dev state after
1104f5b22d06SDima Stepanov * vhost communication.
1105f5b22d06SDima Stepanov */
1106f5b22d06SDima Stepanov if (!dev->started) {
1107f5b22d06SDima Stepanov /*
1108f5b22d06SDima Stepanov * Since device is in the stopped state, it is okay for
1109f5b22d06SDima Stepanov * migration. Return success.
1110f5b22d06SDima Stepanov */
1111f5b22d06SDima Stepanov r = 0;
1112f5b22d06SDima Stepanov }
1113f5b22d06SDima Stepanov if (r) {
1114cba42d61SMichael Tokarev /* An error occurred. */
1115f5b22d06SDima Stepanov dev->log_enabled = false;
1116f5b22d06SDima Stepanov }
1117f5b22d06SDima Stepanov
1118f5b22d06SDima Stepanov return r;
11196e790746SPaolo Bonzini }
11206e790746SPaolo Bonzini
vhost_log_global_start(MemoryListener * listener,Error ** errp)11213688fec8SCédric Le Goater static bool vhost_log_global_start(MemoryListener *listener, Error **errp)
11226e790746SPaolo Bonzini {
11236e790746SPaolo Bonzini int r;
11246e790746SPaolo Bonzini
11256e790746SPaolo Bonzini r = vhost_migration_log(listener, true);
11266e790746SPaolo Bonzini if (r < 0) {
11276e790746SPaolo Bonzini abort();
11286e790746SPaolo Bonzini }
11293688fec8SCédric Le Goater return true;
11306e790746SPaolo Bonzini }
11316e790746SPaolo Bonzini
vhost_log_global_stop(MemoryListener * listener)11326e790746SPaolo Bonzini static void vhost_log_global_stop(MemoryListener *listener)
11336e790746SPaolo Bonzini {
11346e790746SPaolo Bonzini int r;
11356e790746SPaolo Bonzini
11366e790746SPaolo Bonzini r = vhost_migration_log(listener, false);
11376e790746SPaolo Bonzini if (r < 0) {
11386e790746SPaolo Bonzini abort();
11396e790746SPaolo Bonzini }
11406e790746SPaolo Bonzini }
11416e790746SPaolo Bonzini
vhost_log_start(MemoryListener * listener,MemoryRegionSection * section,int old,int new)11426e790746SPaolo Bonzini static void vhost_log_start(MemoryListener *listener,
1143b2dfd71cSPaolo Bonzini MemoryRegionSection *section,
1144b2dfd71cSPaolo Bonzini int old, int new)
11456e790746SPaolo Bonzini {
11466e790746SPaolo Bonzini /* FIXME: implement */
11476e790746SPaolo Bonzini }
11486e790746SPaolo Bonzini
vhost_log_stop(MemoryListener * listener,MemoryRegionSection * section,int old,int new)11496e790746SPaolo Bonzini static void vhost_log_stop(MemoryListener *listener,
1150b2dfd71cSPaolo Bonzini MemoryRegionSection *section,
1151b2dfd71cSPaolo Bonzini int old, int new)
11526e790746SPaolo Bonzini {
11536e790746SPaolo Bonzini /* FIXME: implement */
11546e790746SPaolo Bonzini }
11556e790746SPaolo Bonzini
115646f70ff1SGreg Kurz /* The vhost driver natively knows how to handle the vrings of non
115746f70ff1SGreg Kurz * cross-endian legacy devices and modern devices. Only legacy devices
115846f70ff1SGreg Kurz * exposed to a bi-endian guest may require the vhost driver to use a
115946f70ff1SGreg Kurz * specific endianness.
116046f70ff1SGreg Kurz */
vhost_needs_vring_endian(VirtIODevice * vdev)1161a122ab24SGreg Kurz static inline bool vhost_needs_vring_endian(VirtIODevice *vdev)
1162a122ab24SGreg Kurz {
1163e5848123SGreg Kurz if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1164e5848123SGreg Kurz return false;
1165e5848123SGreg Kurz }
1166e03b5686SMarc-André Lureau #if HOST_BIG_ENDIAN
116746f70ff1SGreg Kurz return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_LITTLE;
1168a122ab24SGreg Kurz #else
116946f70ff1SGreg Kurz return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_BIG;
1170a122ab24SGreg Kurz #endif
1171a122ab24SGreg Kurz }
1172a122ab24SGreg Kurz
vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev * dev,bool is_big_endian,int vhost_vq_index)117304b7a152SGreg Kurz static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev,
117404b7a152SGreg Kurz bool is_big_endian,
117504b7a152SGreg Kurz int vhost_vq_index)
117604b7a152SGreg Kurz {
11775d33ae4bSRoman Kagan int r;
117804b7a152SGreg Kurz struct vhost_vring_state s = {
117904b7a152SGreg Kurz .index = vhost_vq_index,
118004b7a152SGreg Kurz .num = is_big_endian
118104b7a152SGreg Kurz };
118204b7a152SGreg Kurz
11835d33ae4bSRoman Kagan r = dev->vhost_ops->vhost_set_vring_endian(dev, &s);
11845d33ae4bSRoman Kagan if (r < 0) {
11855d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_vring_endian failed");
118604b7a152SGreg Kurz }
11875d33ae4bSRoman Kagan return r;
118804b7a152SGreg Kurz }
118904b7a152SGreg Kurz
vhost_memory_region_lookup(struct vhost_dev * hdev,uint64_t gpa,uint64_t * uaddr,uint64_t * len)1190c471ad0eSJason Wang static int vhost_memory_region_lookup(struct vhost_dev *hdev,
1191c471ad0eSJason Wang uint64_t gpa, uint64_t *uaddr,
1192c471ad0eSJason Wang uint64_t *len)
1193c471ad0eSJason Wang {
1194c471ad0eSJason Wang int i;
1195c471ad0eSJason Wang
1196c471ad0eSJason Wang for (i = 0; i < hdev->mem->nregions; i++) {
1197c471ad0eSJason Wang struct vhost_memory_region *reg = hdev->mem->regions + i;
1198c471ad0eSJason Wang
1199c471ad0eSJason Wang if (gpa >= reg->guest_phys_addr &&
1200c471ad0eSJason Wang reg->guest_phys_addr + reg->memory_size > gpa) {
1201c471ad0eSJason Wang *uaddr = reg->userspace_addr + gpa - reg->guest_phys_addr;
1202c471ad0eSJason Wang *len = reg->guest_phys_addr + reg->memory_size - gpa;
1203c471ad0eSJason Wang return 0;
1204c471ad0eSJason Wang }
1205c471ad0eSJason Wang }
1206c471ad0eSJason Wang
1207c471ad0eSJason Wang return -EFAULT;
1208c471ad0eSJason Wang }
1209c471ad0eSJason Wang
vhost_device_iotlb_miss(struct vhost_dev * dev,uint64_t iova,int write)1210fc58bd0dSMaxime Coquelin int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
1211c471ad0eSJason Wang {
1212c471ad0eSJason Wang IOMMUTLBEntry iotlb;
1213c471ad0eSJason Wang uint64_t uaddr, len;
1214fc58bd0dSMaxime Coquelin int ret = -EFAULT;
1215c471ad0eSJason Wang
12167a064bccSDr. David Alan Gilbert RCU_READ_LOCK_GUARD();
1217c471ad0eSJason Wang
1218ffcbbe72SPeter Xu trace_vhost_iotlb_miss(dev, 1);
1219ffcbbe72SPeter Xu
1220c471ad0eSJason Wang iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
12217446eb07SPeter Maydell iova, write,
12227446eb07SPeter Maydell MEMTXATTRS_UNSPECIFIED);
1223c471ad0eSJason Wang if (iotlb.target_as != NULL) {
1224fc58bd0dSMaxime Coquelin ret = vhost_memory_region_lookup(dev, iotlb.translated_addr,
1225fc58bd0dSMaxime Coquelin &uaddr, &len);
1226fc58bd0dSMaxime Coquelin if (ret) {
1227ffcbbe72SPeter Xu trace_vhost_iotlb_miss(dev, 3);
1228c471ad0eSJason Wang error_report("Fail to lookup the translated address "
1229c471ad0eSJason Wang "%"PRIx64, iotlb.translated_addr);
1230c471ad0eSJason Wang goto out;
1231c471ad0eSJason Wang }
1232c471ad0eSJason Wang
1233c471ad0eSJason Wang len = MIN(iotlb.addr_mask + 1, len);
1234c471ad0eSJason Wang iova = iova & ~iotlb.addr_mask;
1235c471ad0eSJason Wang
1236020e571bSMaxime Coquelin ret = vhost_backend_update_device_iotlb(dev, iova, uaddr,
1237fc58bd0dSMaxime Coquelin len, iotlb.perm);
1238fc58bd0dSMaxime Coquelin if (ret) {
1239ffcbbe72SPeter Xu trace_vhost_iotlb_miss(dev, 4);
1240c471ad0eSJason Wang error_report("Fail to update device iotlb");
1241c471ad0eSJason Wang goto out;
1242c471ad0eSJason Wang }
1243c471ad0eSJason Wang }
1244ffcbbe72SPeter Xu
1245ffcbbe72SPeter Xu trace_vhost_iotlb_miss(dev, 2);
1246ffcbbe72SPeter Xu
1247c471ad0eSJason Wang out:
1248fc58bd0dSMaxime Coquelin return ret;
1249c471ad0eSJason Wang }
1250c471ad0eSJason Wang
vhost_virtqueue_start(struct vhost_dev * dev,struct VirtIODevice * vdev,struct vhost_virtqueue * vq,unsigned idx)1251ff48b628SKangjie Xu int vhost_virtqueue_start(struct vhost_dev *dev,
12526e790746SPaolo Bonzini struct VirtIODevice *vdev,
12536e790746SPaolo Bonzini struct vhost_virtqueue *vq,
12546e790746SPaolo Bonzini unsigned idx)
12556e790746SPaolo Bonzini {
125696a3d98dSJason Wang BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
125796a3d98dSJason Wang VirtioBusState *vbus = VIRTIO_BUS(qbus);
125896a3d98dSJason Wang VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
12596e790746SPaolo Bonzini hwaddr s, l, a;
12606e790746SPaolo Bonzini int r;
126121e70425SMarc-André Lureau int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
12626e790746SPaolo Bonzini struct vhost_vring_file file = {
12636e790746SPaolo Bonzini .index = vhost_vq_index
12646e790746SPaolo Bonzini };
12656e790746SPaolo Bonzini struct vhost_vring_state state = {
12666e790746SPaolo Bonzini .index = vhost_vq_index
12676e790746SPaolo Bonzini };
12686e790746SPaolo Bonzini struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
12696e790746SPaolo Bonzini
1270fb20fbb7SJia He a = virtio_queue_get_desc_addr(vdev, idx);
1271fb20fbb7SJia He if (a == 0) {
1272fb20fbb7SJia He /* Queue might not be ready for start */
1273fb20fbb7SJia He return 0;
1274fb20fbb7SJia He }
12756e790746SPaolo Bonzini
12766e790746SPaolo Bonzini vq->num = state.num = virtio_queue_get_num(vdev, idx);
127721e70425SMarc-André Lureau r = dev->vhost_ops->vhost_set_vring_num(dev, &state);
12786e790746SPaolo Bonzini if (r) {
12795d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_vring_num failed");
12805d33ae4bSRoman Kagan return r;
12816e790746SPaolo Bonzini }
12826e790746SPaolo Bonzini
12836e790746SPaolo Bonzini state.num = virtio_queue_get_last_avail_idx(vdev, idx);
128421e70425SMarc-André Lureau r = dev->vhost_ops->vhost_set_vring_base(dev, &state);
12856e790746SPaolo Bonzini if (r) {
12865d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_vring_base failed");
12875d33ae4bSRoman Kagan return r;
12886e790746SPaolo Bonzini }
12896e790746SPaolo Bonzini
1290e5848123SGreg Kurz if (vhost_needs_vring_endian(vdev)) {
129104b7a152SGreg Kurz r = vhost_virtqueue_set_vring_endian_legacy(dev,
129204b7a152SGreg Kurz virtio_is_big_endian(vdev),
129304b7a152SGreg Kurz vhost_vq_index);
129404b7a152SGreg Kurz if (r) {
12955d33ae4bSRoman Kagan return r;
129604b7a152SGreg Kurz }
129704b7a152SGreg Kurz }
129804b7a152SGreg Kurz
1299f1f9e6c5SGreg Kurz vq->desc_size = s = l = virtio_queue_get_desc_size(vdev, idx);
1300fb20fbb7SJia He vq->desc_phys = a;
1301b897a474SPhilippe Mathieu-Daudé vq->desc = vhost_memory_map(dev, a, &l, false);
13026e790746SPaolo Bonzini if (!vq->desc || l != s) {
13036e790746SPaolo Bonzini r = -ENOMEM;
13046e790746SPaolo Bonzini goto fail_alloc_desc;
13056e790746SPaolo Bonzini }
1306f1f9e6c5SGreg Kurz vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx);
1307f1f9e6c5SGreg Kurz vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx);
1308b897a474SPhilippe Mathieu-Daudé vq->avail = vhost_memory_map(dev, a, &l, false);
13096e790746SPaolo Bonzini if (!vq->avail || l != s) {
13106e790746SPaolo Bonzini r = -ENOMEM;
13116e790746SPaolo Bonzini goto fail_alloc_avail;
13126e790746SPaolo Bonzini }
13136e790746SPaolo Bonzini vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
13146e790746SPaolo Bonzini vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
1315b897a474SPhilippe Mathieu-Daudé vq->used = vhost_memory_map(dev, a, &l, true);
13166e790746SPaolo Bonzini if (!vq->used || l != s) {
13176e790746SPaolo Bonzini r = -ENOMEM;
13186e790746SPaolo Bonzini goto fail_alloc_used;
13196e790746SPaolo Bonzini }
13206e790746SPaolo Bonzini
13216e790746SPaolo Bonzini r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
13226e790746SPaolo Bonzini if (r < 0) {
13236e790746SPaolo Bonzini goto fail_alloc;
13246e790746SPaolo Bonzini }
13256e790746SPaolo Bonzini
13266e790746SPaolo Bonzini file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
132721e70425SMarc-André Lureau r = dev->vhost_ops->vhost_set_vring_kick(dev, &file);
13286e790746SPaolo Bonzini if (r) {
13295d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_vring_kick failed");
13306e790746SPaolo Bonzini goto fail_kick;
13316e790746SPaolo Bonzini }
13326e790746SPaolo Bonzini
13336e790746SPaolo Bonzini /* Clear and discard previous events if any. */
13346e790746SPaolo Bonzini event_notifier_test_and_clear(&vq->masked_notifier);
13356e790746SPaolo Bonzini
13365669655aSVictor Kaplansky /* Init vring in unmasked state, unless guest_notifier_mask
13375669655aSVictor Kaplansky * will do it later.
13385669655aSVictor Kaplansky */
13395669655aSVictor Kaplansky if (!vdev->use_guest_notifier_mask) {
13405669655aSVictor Kaplansky /* TODO: check and handle errors. */
13415669655aSVictor Kaplansky vhost_virtqueue_mask(dev, vdev, idx, false);
13425669655aSVictor Kaplansky }
13435669655aSVictor Kaplansky
134496a3d98dSJason Wang if (k->query_guest_notifiers &&
134596a3d98dSJason Wang k->query_guest_notifiers(qbus->parent) &&
134696a3d98dSJason Wang virtio_queue_vector(vdev, idx) == VIRTIO_NO_VECTOR) {
134796a3d98dSJason Wang file.fd = -1;
134896a3d98dSJason Wang r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
134996a3d98dSJason Wang if (r) {
135096a3d98dSJason Wang goto fail_vector;
135196a3d98dSJason Wang }
135296a3d98dSJason Wang }
135396a3d98dSJason Wang
13546e790746SPaolo Bonzini return 0;
13556e790746SPaolo Bonzini
135696a3d98dSJason Wang fail_vector:
13576e790746SPaolo Bonzini fail_kick:
13586e790746SPaolo Bonzini fail_alloc:
1359c471ad0eSJason Wang vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
13606e790746SPaolo Bonzini 0, 0);
13616e790746SPaolo Bonzini fail_alloc_used:
1362c471ad0eSJason Wang vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
13636e790746SPaolo Bonzini 0, 0);
13646e790746SPaolo Bonzini fail_alloc_avail:
1365c471ad0eSJason Wang vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
13666e790746SPaolo Bonzini 0, 0);
13676e790746SPaolo Bonzini fail_alloc_desc:
13686e790746SPaolo Bonzini return r;
13696e790746SPaolo Bonzini }
13706e790746SPaolo Bonzini
vhost_virtqueue_stop(struct vhost_dev * dev,struct VirtIODevice * vdev,struct vhost_virtqueue * vq,unsigned idx)1371e1f101d9SKangjie Xu void vhost_virtqueue_stop(struct vhost_dev *dev,
13726e790746SPaolo Bonzini struct VirtIODevice *vdev,
13736e790746SPaolo Bonzini struct vhost_virtqueue *vq,
13746e790746SPaolo Bonzini unsigned idx)
13756e790746SPaolo Bonzini {
137621e70425SMarc-André Lureau int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
13776e790746SPaolo Bonzini struct vhost_vring_state state = {
137804b7a152SGreg Kurz .index = vhost_vq_index,
13796e790746SPaolo Bonzini };
13806e790746SPaolo Bonzini int r;
1381fb20fbb7SJia He
1382fa4ae4beSYury Kotov if (virtio_queue_get_desc_addr(vdev, idx) == 0) {
1383fb20fbb7SJia He /* Don't stop the virtqueue which might have not been started */
1384fb20fbb7SJia He return;
1385fb20fbb7SJia He }
1386fc57fd99SYuanhan Liu
138721e70425SMarc-André Lureau r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
13886e790746SPaolo Bonzini if (r < 0) {
13895d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r);
13902ae39a11SMaxime Coquelin /* Connection to the backend is broken, so let's sync internal
13912ae39a11SMaxime Coquelin * last avail idx to the device used idx.
13922ae39a11SMaxime Coquelin */
13932ae39a11SMaxime Coquelin virtio_queue_restore_last_avail_idx(vdev, idx);
1394499c5579SMarc-André Lureau } else {
13956e790746SPaolo Bonzini virtio_queue_set_last_avail_idx(vdev, idx, state.num);
1396499c5579SMarc-André Lureau }
13973561ba14SMichael S. Tsirkin virtio_queue_invalidate_signalled_used(vdev, idx);
1398aa94d521SYuri Benditovich virtio_queue_update_used_idx(vdev, idx);
139904b7a152SGreg Kurz
140004b7a152SGreg Kurz /* In the cross-endian case, we need to reset the vring endianness to
140104b7a152SGreg Kurz * native as legacy devices expect so by default.
140204b7a152SGreg Kurz */
1403e5848123SGreg Kurz if (vhost_needs_vring_endian(vdev)) {
1404162bba7fSMarc-André Lureau vhost_virtqueue_set_vring_endian_legacy(dev,
140504b7a152SGreg Kurz !virtio_is_big_endian(vdev),
140604b7a152SGreg Kurz vhost_vq_index);
140704b7a152SGreg Kurz }
140804b7a152SGreg Kurz
1409c471ad0eSJason Wang vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
14106e790746SPaolo Bonzini 1, virtio_queue_get_used_size(vdev, idx));
1411c471ad0eSJason Wang vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
14126e790746SPaolo Bonzini 0, virtio_queue_get_avail_size(vdev, idx));
1413c471ad0eSJason Wang vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
14146e790746SPaolo Bonzini 0, virtio_queue_get_desc_size(vdev, idx));
14156e790746SPaolo Bonzini }
14166e790746SPaolo Bonzini
vhost_virtqueue_set_busyloop_timeout(struct vhost_dev * dev,int n,uint32_t timeout)141769e87b32SJason Wang static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,
141869e87b32SJason Wang int n, uint32_t timeout)
141969e87b32SJason Wang {
142069e87b32SJason Wang int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
142169e87b32SJason Wang struct vhost_vring_state state = {
142269e87b32SJason Wang .index = vhost_vq_index,
142369e87b32SJason Wang .num = timeout,
142469e87b32SJason Wang };
142569e87b32SJason Wang int r;
142669e87b32SJason Wang
142769e87b32SJason Wang if (!dev->vhost_ops->vhost_set_vring_busyloop_timeout) {
142869e87b32SJason Wang return -EINVAL;
142969e87b32SJason Wang }
143069e87b32SJason Wang
143169e87b32SJason Wang r = dev->vhost_ops->vhost_set_vring_busyloop_timeout(dev, &state);
143269e87b32SJason Wang if (r) {
14335d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_vring_busyloop_timeout failed");
143469e87b32SJason Wang return r;
143569e87b32SJason Wang }
143669e87b32SJason Wang
143769e87b32SJason Wang return 0;
143869e87b32SJason Wang }
143969e87b32SJason Wang
vhost_virtqueue_error_notifier(EventNotifier * n)1440ae50ae0bSKonstantin Khlebnikov static void vhost_virtqueue_error_notifier(EventNotifier *n)
1441ae50ae0bSKonstantin Khlebnikov {
1442ae50ae0bSKonstantin Khlebnikov struct vhost_virtqueue *vq = container_of(n, struct vhost_virtqueue,
1443ae50ae0bSKonstantin Khlebnikov error_notifier);
1444ae50ae0bSKonstantin Khlebnikov struct vhost_dev *dev = vq->dev;
1445ae50ae0bSKonstantin Khlebnikov int index = vq - dev->vqs;
1446ae50ae0bSKonstantin Khlebnikov
1447ae50ae0bSKonstantin Khlebnikov if (event_notifier_test_and_clear(n) && dev->vdev) {
1448ae50ae0bSKonstantin Khlebnikov VHOST_OPS_DEBUG(-EINVAL, "vhost vring error in virtqueue %d",
1449ae50ae0bSKonstantin Khlebnikov dev->vq_index + index);
1450ae50ae0bSKonstantin Khlebnikov }
1451ae50ae0bSKonstantin Khlebnikov }
1452ae50ae0bSKonstantin Khlebnikov
vhost_virtqueue_init(struct vhost_dev * dev,struct vhost_virtqueue * vq,int n)14536e790746SPaolo Bonzini static int vhost_virtqueue_init(struct vhost_dev *dev,
14546e790746SPaolo Bonzini struct vhost_virtqueue *vq, int n)
14556e790746SPaolo Bonzini {
145621e70425SMarc-André Lureau int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
14576e790746SPaolo Bonzini struct vhost_vring_file file = {
1458b931bfbfSChangchun Ouyang .index = vhost_vq_index,
14596e790746SPaolo Bonzini };
14606e790746SPaolo Bonzini int r = event_notifier_init(&vq->masked_notifier, 0);
14616e790746SPaolo Bonzini if (r < 0) {
14626e790746SPaolo Bonzini return r;
14636e790746SPaolo Bonzini }
14646e790746SPaolo Bonzini
1465ff5eb77bSSergio Lopez file.fd = event_notifier_get_wfd(&vq->masked_notifier);
146621e70425SMarc-André Lureau r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
14676e790746SPaolo Bonzini if (r) {
14685d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed");
14696e790746SPaolo Bonzini goto fail_call;
14706e790746SPaolo Bonzini }
1471c471ad0eSJason Wang
1472c471ad0eSJason Wang vq->dev = dev;
1473c471ad0eSJason Wang
1474ae50ae0bSKonstantin Khlebnikov if (dev->vhost_ops->vhost_set_vring_err) {
1475ae50ae0bSKonstantin Khlebnikov r = event_notifier_init(&vq->error_notifier, 0);
1476ae50ae0bSKonstantin Khlebnikov if (r < 0) {
1477ae50ae0bSKonstantin Khlebnikov goto fail_call;
1478ae50ae0bSKonstantin Khlebnikov }
1479ae50ae0bSKonstantin Khlebnikov
1480ae50ae0bSKonstantin Khlebnikov file.fd = event_notifier_get_fd(&vq->error_notifier);
1481ae50ae0bSKonstantin Khlebnikov r = dev->vhost_ops->vhost_set_vring_err(dev, &file);
1482ae50ae0bSKonstantin Khlebnikov if (r) {
1483ae50ae0bSKonstantin Khlebnikov VHOST_OPS_DEBUG(r, "vhost_set_vring_err failed");
1484ae50ae0bSKonstantin Khlebnikov goto fail_err;
1485ae50ae0bSKonstantin Khlebnikov }
1486ae50ae0bSKonstantin Khlebnikov
1487ae50ae0bSKonstantin Khlebnikov event_notifier_set_handler(&vq->error_notifier,
1488ae50ae0bSKonstantin Khlebnikov vhost_virtqueue_error_notifier);
1489ae50ae0bSKonstantin Khlebnikov }
1490ae50ae0bSKonstantin Khlebnikov
14916e790746SPaolo Bonzini return 0;
1492ae50ae0bSKonstantin Khlebnikov
1493ae50ae0bSKonstantin Khlebnikov fail_err:
1494ae50ae0bSKonstantin Khlebnikov event_notifier_cleanup(&vq->error_notifier);
14956e790746SPaolo Bonzini fail_call:
14966e790746SPaolo Bonzini event_notifier_cleanup(&vq->masked_notifier);
14976e790746SPaolo Bonzini return r;
14986e790746SPaolo Bonzini }
14996e790746SPaolo Bonzini
vhost_virtqueue_cleanup(struct vhost_virtqueue * vq)15006e790746SPaolo Bonzini static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
15016e790746SPaolo Bonzini {
15026e790746SPaolo Bonzini event_notifier_cleanup(&vq->masked_notifier);
1503ae50ae0bSKonstantin Khlebnikov if (vq->dev->vhost_ops->vhost_set_vring_err) {
1504ae50ae0bSKonstantin Khlebnikov event_notifier_set_handler(&vq->error_notifier, NULL);
1505ae50ae0bSKonstantin Khlebnikov event_notifier_cleanup(&vq->error_notifier);
1506ae50ae0bSKonstantin Khlebnikov }
15076e790746SPaolo Bonzini }
15086e790746SPaolo Bonzini
vhost_dev_init(struct vhost_dev * hdev,void * opaque,VhostBackendType backend_type,uint32_t busyloop_timeout,Error ** errp)150981647a65SNikolay Nikolaev int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
1510a6945f22SKevin Wolf VhostBackendType backend_type, uint32_t busyloop_timeout,
1511a6945f22SKevin Wolf Error **errp)
15126e790746SPaolo Bonzini {
1513766aa0a6SDavid Hildenbrand unsigned int used, reserved, limit;
15146e790746SPaolo Bonzini uint64_t features;
1515a06db3ecSMarc-André Lureau int i, r, n_initialized_vqs = 0;
151681647a65SNikolay Nikolaev
1517c471ad0eSJason Wang hdev->vdev = NULL;
1518d2fc4402SMarc-André Lureau hdev->migration_blocker = NULL;
1519d2fc4402SMarc-André Lureau
15207cb8a9b9SMarc-André Lureau r = vhost_set_backend_type(hdev, backend_type);
15217cb8a9b9SMarc-André Lureau assert(r >= 0);
15221a1bfac9SNikolay Nikolaev
152328770ff9SKevin Wolf r = hdev->vhost_ops->vhost_backend_init(hdev, opaque, errp);
15247cb8a9b9SMarc-André Lureau if (r < 0) {
15257cb8a9b9SMarc-André Lureau goto fail;
152624d1eb33SNikolay Nikolaev }
152724d1eb33SNikolay Nikolaev
152821e70425SMarc-André Lureau r = hdev->vhost_ops->vhost_set_owner(hdev);
15296e790746SPaolo Bonzini if (r < 0) {
1530f2a6e6c4SKevin Wolf error_setg_errno(errp, -r, "vhost_set_owner failed");
15316e790746SPaolo Bonzini goto fail;
15326e790746SPaolo Bonzini }
15336e790746SPaolo Bonzini
153421e70425SMarc-André Lureau r = hdev->vhost_ops->vhost_get_features(hdev, &features);
15356e790746SPaolo Bonzini if (r < 0) {
1536f2a6e6c4SKevin Wolf error_setg_errno(errp, -r, "vhost_get_features failed");
15376e790746SPaolo Bonzini goto fail;
15386e790746SPaolo Bonzini }
15396e790746SPaolo Bonzini
1540a2335113SDavid Hildenbrand limit = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
1541a2335113SDavid Hildenbrand if (limit < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS &&
1542a2335113SDavid Hildenbrand memory_devices_memslot_auto_decision_active()) {
1543a2335113SDavid Hildenbrand error_setg(errp, "some memory device (like virtio-mem)"
1544a2335113SDavid Hildenbrand " decided how many memory slots to use based on the overall"
1545a2335113SDavid Hildenbrand " number of memory slots; this vhost backend would further"
1546a2335113SDavid Hildenbrand " restricts the overall number of memory slots");
1547a2335113SDavid Hildenbrand error_append_hint(errp, "Try plugging this vhost backend before"
1548a2335113SDavid Hildenbrand " plugging such memory devices.\n");
1549a2335113SDavid Hildenbrand r = -EINVAL;
1550a2335113SDavid Hildenbrand goto fail;
1551a2335113SDavid Hildenbrand }
1552a2335113SDavid Hildenbrand
1553a06db3ecSMarc-André Lureau for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
1554b931bfbfSChangchun Ouyang r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
15556e790746SPaolo Bonzini if (r < 0) {
1556a6945f22SKevin Wolf error_setg_errno(errp, -r, "Failed to initialize virtqueue %d", i);
1557a06db3ecSMarc-André Lureau goto fail;
15586e790746SPaolo Bonzini }
15596e790746SPaolo Bonzini }
156069e87b32SJason Wang
156169e87b32SJason Wang if (busyloop_timeout) {
156269e87b32SJason Wang for (i = 0; i < hdev->nvqs; ++i) {
156369e87b32SJason Wang r = vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i,
156469e87b32SJason Wang busyloop_timeout);
156569e87b32SJason Wang if (r < 0) {
1566f2a6e6c4SKevin Wolf error_setg_errno(errp, -r, "Failed to set busyloop timeout");
156769e87b32SJason Wang goto fail_busyloop;
156869e87b32SJason Wang }
156969e87b32SJason Wang }
157069e87b32SJason Wang }
157169e87b32SJason Wang
15726e790746SPaolo Bonzini hdev->features = features;
15736e790746SPaolo Bonzini
15746e790746SPaolo Bonzini hdev->memory_listener = (MemoryListener) {
1575142518bdSPeter Xu .name = "vhost",
15766e790746SPaolo Bonzini .begin = vhost_begin,
15776e790746SPaolo Bonzini .commit = vhost_commit,
1578938eeb64SDr. David Alan Gilbert .region_add = vhost_region_addnop,
1579938eeb64SDr. David Alan Gilbert .region_nop = vhost_region_addnop,
15806e790746SPaolo Bonzini .log_start = vhost_log_start,
15816e790746SPaolo Bonzini .log_stop = vhost_log_stop,
15826e790746SPaolo Bonzini .log_sync = vhost_log_sync,
15836e790746SPaolo Bonzini .log_global_start = vhost_log_global_start,
15846e790746SPaolo Bonzini .log_global_stop = vhost_log_global_stop,
15858be0461dSIsaku Yamahata .priority = MEMORY_LISTENER_PRIORITY_DEV_BACKEND
15866e790746SPaolo Bonzini };
1587d2fc4402SMarc-André Lureau
1588375f74f4SJason Wang hdev->iommu_listener = (MemoryListener) {
1589142518bdSPeter Xu .name = "vhost-iommu",
1590375f74f4SJason Wang .region_add = vhost_iommu_region_add,
1591375f74f4SJason Wang .region_del = vhost_iommu_region_del,
1592375f74f4SJason Wang };
1593c471ad0eSJason Wang
1594d2fc4402SMarc-André Lureau if (hdev->migration_blocker == NULL) {
15959a2ba823SCornelia Huck if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
15967145872eSMichael S. Tsirkin error_setg(&hdev->migration_blocker,
15977145872eSMichael S. Tsirkin "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature.");
1598648abbfbSMarc-André Lureau } else if (vhost_dev_log_is_shared(hdev) && !qemu_memfd_alloc_check()) {
159931190ed7SMarc-André Lureau error_setg(&hdev->migration_blocker,
160031190ed7SMarc-André Lureau "Migration disabled: failed to allocate shared memory");
1601d2fc4402SMarc-André Lureau }
1602d2fc4402SMarc-André Lureau }
1603d2fc4402SMarc-André Lureau
1604d2fc4402SMarc-André Lureau if (hdev->migration_blocker != NULL) {
160589415796SSteve Sistare r = migrate_add_blocker_normal(&hdev->migration_blocker, errp);
1606436c831aSMarkus Armbruster if (r < 0) {
1607fe44dc91SAshijeet Acharya goto fail_busyloop;
1608fe44dc91SAshijeet Acharya }
16097145872eSMichael S. Tsirkin }
1610d2fc4402SMarc-André Lureau
16116e790746SPaolo Bonzini hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
16126e790746SPaolo Bonzini hdev->n_mem_sections = 0;
16136e790746SPaolo Bonzini hdev->mem_sections = NULL;
16146e790746SPaolo Bonzini hdev->log = NULL;
16156e790746SPaolo Bonzini hdev->log_size = 0;
16166e790746SPaolo Bonzini hdev->log_enabled = false;
16176e790746SPaolo Bonzini hdev->started = false;
16186e790746SPaolo Bonzini memory_listener_register(&hdev->memory_listener, &address_space_memory);
16195be5f9beSMarc-André Lureau QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
16209e2a2a3eSJay Zhou
1621552b2522SDavid Hildenbrand /*
1622552b2522SDavid Hildenbrand * The listener we registered properly updated the corresponding counter.
1623552b2522SDavid Hildenbrand * So we can trust that these values are accurate.
1624552b2522SDavid Hildenbrand */
1625552b2522SDavid Hildenbrand if (hdev->vhost_ops->vhost_backend_no_private_memslots &&
1626552b2522SDavid Hildenbrand hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {
1627552b2522SDavid Hildenbrand used = used_shared_memslots;
1628552b2522SDavid Hildenbrand } else {
1629552b2522SDavid Hildenbrand used = used_memslots;
1630552b2522SDavid Hildenbrand }
1631766aa0a6SDavid Hildenbrand /*
1632766aa0a6SDavid Hildenbrand * We assume that all reserved memslots actually require a real memslot
1633766aa0a6SDavid Hildenbrand * in our vhost backend. This might not be true, for example, if the
1634766aa0a6SDavid Hildenbrand * memslot would be ROM. If ever relevant, we can optimize for that --
1635766aa0a6SDavid Hildenbrand * but we'll need additional information about the reservations.
1636766aa0a6SDavid Hildenbrand */
1637766aa0a6SDavid Hildenbrand reserved = memory_devices_get_reserved_memslots();
1638766aa0a6SDavid Hildenbrand if (used + reserved > limit) {
1639766aa0a6SDavid Hildenbrand error_setg(errp, "vhost backend memory slots limit (%d) is less"
1640766aa0a6SDavid Hildenbrand " than current number of used (%d) and reserved (%d)"
1641766aa0a6SDavid Hildenbrand " memory slots for memory devices.", limit, used, reserved);
1642f2a6e6c4SKevin Wolf r = -EINVAL;
16439e2a2a3eSJay Zhou goto fail_busyloop;
16449e2a2a3eSJay Zhou }
16459e2a2a3eSJay Zhou
16466e790746SPaolo Bonzini return 0;
1647a06db3ecSMarc-André Lureau
164869e87b32SJason Wang fail_busyloop:
16491d8d014eSStefan Hajnoczi if (busyloop_timeout) {
165069e87b32SJason Wang while (--i >= 0) {
165169e87b32SJason Wang vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i, 0);
165269e87b32SJason Wang }
16531d8d014eSStefan Hajnoczi }
16546e790746SPaolo Bonzini fail:
1655a06db3ecSMarc-André Lureau hdev->nvqs = n_initialized_vqs;
1656a06db3ecSMarc-André Lureau vhost_dev_cleanup(hdev);
16576e790746SPaolo Bonzini return r;
16586e790746SPaolo Bonzini }
16596e790746SPaolo Bonzini
vhost_dev_cleanup(struct vhost_dev * hdev)16606e790746SPaolo Bonzini void vhost_dev_cleanup(struct vhost_dev *hdev)
16616e790746SPaolo Bonzini {
16626e790746SPaolo Bonzini int i;
1663e0547b59SMarc-André Lureau
1664a2761231SAlex Bennée trace_vhost_dev_cleanup(hdev);
1665a2761231SAlex Bennée
16666e790746SPaolo Bonzini for (i = 0; i < hdev->nvqs; ++i) {
16676e790746SPaolo Bonzini vhost_virtqueue_cleanup(hdev->vqs + i);
16686e790746SPaolo Bonzini }
16695be5f9beSMarc-André Lureau if (hdev->mem) {
16705be5f9beSMarc-André Lureau /* those are only safe after successful init */
16716e790746SPaolo Bonzini memory_listener_unregister(&hdev->memory_listener);
16725be5f9beSMarc-André Lureau QLIST_REMOVE(hdev, entry);
16735be5f9beSMarc-André Lureau }
1674c8a7fc51SSteve Sistare migrate_del_blocker(&hdev->migration_blocker);
16756e790746SPaolo Bonzini g_free(hdev->mem);
16766e790746SPaolo Bonzini g_free(hdev->mem_sections);
1677e0547b59SMarc-André Lureau if (hdev->vhost_ops) {
167824d1eb33SNikolay Nikolaev hdev->vhost_ops->vhost_backend_cleanup(hdev);
1679e0547b59SMarc-André Lureau }
16807b527247SMarc-André Lureau assert(!hdev->log);
1681e0547b59SMarc-André Lureau
1682e0547b59SMarc-André Lureau memset(hdev, 0, sizeof(struct vhost_dev));
16836e790746SPaolo Bonzini }
16846e790746SPaolo Bonzini
vhost_dev_disable_notifiers_nvqs(struct vhost_dev * hdev,VirtIODevice * vdev,unsigned int nvqs)16856166799fSzuoboqun void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev,
168692099aa4SLaurent Vivier VirtIODevice *vdev,
168792099aa4SLaurent Vivier unsigned int nvqs)
168892099aa4SLaurent Vivier {
168992099aa4SLaurent Vivier BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
169092099aa4SLaurent Vivier int i, r;
169192099aa4SLaurent Vivier
169292099aa4SLaurent Vivier /*
169392099aa4SLaurent Vivier * Batch all the host notifiers in a single transaction to avoid
169492099aa4SLaurent Vivier * quadratic time complexity in address_space_update_ioeventfds().
169592099aa4SLaurent Vivier */
169692099aa4SLaurent Vivier memory_region_transaction_begin();
169792099aa4SLaurent Vivier
169892099aa4SLaurent Vivier for (i = 0; i < nvqs; ++i) {
169992099aa4SLaurent Vivier r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
170092099aa4SLaurent Vivier false);
170192099aa4SLaurent Vivier if (r < 0) {
170292099aa4SLaurent Vivier error_report("vhost VQ %d notifier cleanup failed: %d", i, -r);
170392099aa4SLaurent Vivier }
170492099aa4SLaurent Vivier assert(r >= 0);
170592099aa4SLaurent Vivier }
170692099aa4SLaurent Vivier
170792099aa4SLaurent Vivier /*
170892099aa4SLaurent Vivier * The transaction expects the ioeventfds to be open when it
170992099aa4SLaurent Vivier * commits. Do it now, before the cleanup loop.
171092099aa4SLaurent Vivier */
171192099aa4SLaurent Vivier memory_region_transaction_commit();
171292099aa4SLaurent Vivier
171392099aa4SLaurent Vivier for (i = 0; i < nvqs; ++i) {
171492099aa4SLaurent Vivier virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i);
171592099aa4SLaurent Vivier }
171692099aa4SLaurent Vivier virtio_device_release_ioeventfd(vdev);
171792099aa4SLaurent Vivier }
171892099aa4SLaurent Vivier
17196e790746SPaolo Bonzini /* Stop processing guest IO notifications in qemu.
17206e790746SPaolo Bonzini * Start processing them in vhost in kernel.
17216e790746SPaolo Bonzini */
vhost_dev_enable_notifiers(struct vhost_dev * hdev,VirtIODevice * vdev)17226e790746SPaolo Bonzini int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
17236e790746SPaolo Bonzini {
17241c819449SKONRAD Frederic BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
17258771589bSLongpeng int i, r;
17264afba631SMarc-André Lureau
1727310837deSPaolo Bonzini /* We will pass the notifiers to the kernel, make sure that QEMU
1728310837deSPaolo Bonzini * doesn't interfere.
1729310837deSPaolo Bonzini */
1730310837deSPaolo Bonzini r = virtio_device_grab_ioeventfd(vdev);
1731310837deSPaolo Bonzini if (r < 0) {
17324afba631SMarc-André Lureau error_report("binding does not support host notifiers");
17338771589bSLongpeng return r;
17346e790746SPaolo Bonzini }
17356e790746SPaolo Bonzini
17360fdc6b85SLongpeng /*
17370fdc6b85SLongpeng * Batch all the host notifiers in a single transaction to avoid
17380fdc6b85SLongpeng * quadratic time complexity in address_space_update_ioeventfds().
17390fdc6b85SLongpeng */
17400fdc6b85SLongpeng memory_region_transaction_begin();
17410fdc6b85SLongpeng
17426e790746SPaolo Bonzini for (i = 0; i < hdev->nvqs; ++i) {
1743b1f0a33dSCornelia Huck r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
1744b1f0a33dSCornelia Huck true);
17456e790746SPaolo Bonzini if (r < 0) {
17464afba631SMarc-André Lureau error_report("vhost VQ %d notifier binding failed: %d", i, -r);
17470fdc6b85SLongpeng memory_region_transaction_commit();
174892099aa4SLaurent Vivier vhost_dev_disable_notifiers_nvqs(hdev, vdev, i);
17498771589bSLongpeng return r;
17506e790746SPaolo Bonzini }
17516e790746SPaolo Bonzini }
17526e790746SPaolo Bonzini
17530fdc6b85SLongpeng memory_region_transaction_commit();
17540fdc6b85SLongpeng
17556e790746SPaolo Bonzini return 0;
17566e790746SPaolo Bonzini }
17576e790746SPaolo Bonzini
17586e790746SPaolo Bonzini /* Stop processing guest IO notifications in vhost.
17596e790746SPaolo Bonzini * Start processing them in qemu.
17606e790746SPaolo Bonzini * This might actually run the qemu handlers right away,
17616e790746SPaolo Bonzini * so virtio in qemu must be completely setup when this is called.
17626e790746SPaolo Bonzini */
vhost_dev_disable_notifiers(struct vhost_dev * hdev,VirtIODevice * vdev)17636e790746SPaolo Bonzini void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
17646e790746SPaolo Bonzini {
176592099aa4SLaurent Vivier vhost_dev_disable_notifiers_nvqs(hdev, vdev, hdev->nvqs);
17666e790746SPaolo Bonzini }
17676e790746SPaolo Bonzini
17686e790746SPaolo Bonzini /* Test and clear event pending status.
17696e790746SPaolo Bonzini * Should be called after unmask to avoid losing events.
17706e790746SPaolo Bonzini */
vhost_virtqueue_pending(struct vhost_dev * hdev,int n)17716e790746SPaolo Bonzini bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
17726e790746SPaolo Bonzini {
17736e790746SPaolo Bonzini struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index;
17746e790746SPaolo Bonzini assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
17756e790746SPaolo Bonzini return event_notifier_test_and_clear(&vq->masked_notifier);
17766e790746SPaolo Bonzini }
17776e790746SPaolo Bonzini
17786e790746SPaolo Bonzini /* Mask/unmask events from this vq. */
vhost_virtqueue_mask(struct vhost_dev * hdev,VirtIODevice * vdev,int n,bool mask)17796e790746SPaolo Bonzini void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
17806e790746SPaolo Bonzini bool mask)
17816e790746SPaolo Bonzini {
17826e790746SPaolo Bonzini struct VirtQueue *vvq = virtio_get_queue(vdev, n);
17836e790746SPaolo Bonzini int r, index = n - hdev->vq_index;
1784fc57fd99SYuanhan Liu struct vhost_vring_file file;
17856e790746SPaolo Bonzini
17868695de0fSMarc-André Lureau /* should only be called after backend is connected */
17878695de0fSMarc-André Lureau assert(hdev->vhost_ops);
17888695de0fSMarc-André Lureau
17896e790746SPaolo Bonzini if (mask) {
17905669655aSVictor Kaplansky assert(vdev->use_guest_notifier_mask);
1791ff5eb77bSSergio Lopez file.fd = event_notifier_get_wfd(&hdev->vqs[index].masked_notifier);
17926e790746SPaolo Bonzini } else {
1793ff5eb77bSSergio Lopez file.fd = event_notifier_get_wfd(virtio_queue_get_guest_notifier(vvq));
17946e790746SPaolo Bonzini }
1795fc57fd99SYuanhan Liu
179621e70425SMarc-André Lureau file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n);
179721e70425SMarc-André Lureau r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file);
1798162bba7fSMarc-André Lureau if (r < 0) {
1799f9a09ca3SCindy Lu error_report("vhost_set_vring_call failed %d", -r);
1800f9a09ca3SCindy Lu }
1801f9a09ca3SCindy Lu }
1802f9a09ca3SCindy Lu
vhost_config_pending(struct vhost_dev * hdev)1803f9a09ca3SCindy Lu bool vhost_config_pending(struct vhost_dev *hdev)
1804f9a09ca3SCindy Lu {
1805f9a09ca3SCindy Lu assert(hdev->vhost_ops);
1806f9a09ca3SCindy Lu if ((hdev->started == false) ||
1807f9a09ca3SCindy Lu (hdev->vhost_ops->vhost_set_config_call == NULL)) {
1808f9a09ca3SCindy Lu return false;
1809f9a09ca3SCindy Lu }
1810f9a09ca3SCindy Lu
1811f9a09ca3SCindy Lu EventNotifier *notifier =
1812f9a09ca3SCindy Lu &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier;
1813f9a09ca3SCindy Lu return event_notifier_test_and_clear(notifier);
1814f9a09ca3SCindy Lu }
1815f9a09ca3SCindy Lu
vhost_config_mask(struct vhost_dev * hdev,VirtIODevice * vdev,bool mask)1816f9a09ca3SCindy Lu void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask)
1817f9a09ca3SCindy Lu {
1818f9a09ca3SCindy Lu int fd;
1819f9a09ca3SCindy Lu int r;
1820f9a09ca3SCindy Lu EventNotifier *notifier =
1821f9a09ca3SCindy Lu &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier;
1822f9a09ca3SCindy Lu EventNotifier *config_notifier = &vdev->config_notifier;
1823f9a09ca3SCindy Lu assert(hdev->vhost_ops);
1824f9a09ca3SCindy Lu
1825f9a09ca3SCindy Lu if ((hdev->started == false) ||
1826f9a09ca3SCindy Lu (hdev->vhost_ops->vhost_set_config_call == NULL)) {
1827f9a09ca3SCindy Lu return;
1828f9a09ca3SCindy Lu }
1829f9a09ca3SCindy Lu if (mask) {
1830f9a09ca3SCindy Lu assert(vdev->use_guest_notifier_mask);
1831f9a09ca3SCindy Lu fd = event_notifier_get_fd(notifier);
1832f9a09ca3SCindy Lu } else {
1833f9a09ca3SCindy Lu fd = event_notifier_get_fd(config_notifier);
1834f9a09ca3SCindy Lu }
1835f9a09ca3SCindy Lu r = hdev->vhost_ops->vhost_set_config_call(hdev, fd);
1836f9a09ca3SCindy Lu if (r < 0) {
1837f9a09ca3SCindy Lu error_report("vhost_set_config_call failed %d", -r);
1838f9a09ca3SCindy Lu }
1839f9a09ca3SCindy Lu }
1840f9a09ca3SCindy Lu
vhost_stop_config_intr(struct vhost_dev * dev)1841f9a09ca3SCindy Lu static void vhost_stop_config_intr(struct vhost_dev *dev)
1842f9a09ca3SCindy Lu {
1843f9a09ca3SCindy Lu int fd = -1;
1844f9a09ca3SCindy Lu assert(dev->vhost_ops);
1845f9a09ca3SCindy Lu if (dev->vhost_ops->vhost_set_config_call) {
1846f9a09ca3SCindy Lu dev->vhost_ops->vhost_set_config_call(dev, fd);
1847f9a09ca3SCindy Lu }
1848f9a09ca3SCindy Lu }
1849f9a09ca3SCindy Lu
vhost_start_config_intr(struct vhost_dev * dev)1850f9a09ca3SCindy Lu static void vhost_start_config_intr(struct vhost_dev *dev)
1851f9a09ca3SCindy Lu {
1852f9a09ca3SCindy Lu int r;
1853f9a09ca3SCindy Lu
1854f9a09ca3SCindy Lu assert(dev->vhost_ops);
1855f9a09ca3SCindy Lu int fd = event_notifier_get_fd(&dev->vdev->config_notifier);
1856f9a09ca3SCindy Lu if (dev->vhost_ops->vhost_set_config_call) {
1857f9a09ca3SCindy Lu r = dev->vhost_ops->vhost_set_config_call(dev, fd);
1858f9a09ca3SCindy Lu if (!r) {
1859f9a09ca3SCindy Lu event_notifier_set(&dev->vdev->config_notifier);
1860f9a09ca3SCindy Lu }
1861162bba7fSMarc-André Lureau }
18626e790746SPaolo Bonzini }
18636e790746SPaolo Bonzini
vhost_get_features(struct vhost_dev * hdev,const int * feature_bits,uint64_t features)18649a2ba823SCornelia Huck uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
18659a2ba823SCornelia Huck uint64_t features)
18662e6d46d7SNikolay Nikolaev {
18672e6d46d7SNikolay Nikolaev const int *bit = feature_bits;
18682e6d46d7SNikolay Nikolaev while (*bit != VHOST_INVALID_FEATURE_BIT) {
18699a2ba823SCornelia Huck uint64_t bit_mask = (1ULL << *bit);
18702e6d46d7SNikolay Nikolaev if (!(hdev->features & bit_mask)) {
18712e6d46d7SNikolay Nikolaev features &= ~bit_mask;
18722e6d46d7SNikolay Nikolaev }
18732e6d46d7SNikolay Nikolaev bit++;
18742e6d46d7SNikolay Nikolaev }
18752e6d46d7SNikolay Nikolaev return features;
18762e6d46d7SNikolay Nikolaev }
18772e6d46d7SNikolay Nikolaev
vhost_ack_features(struct vhost_dev * hdev,const int * feature_bits,uint64_t features)18782e6d46d7SNikolay Nikolaev void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
18799a2ba823SCornelia Huck uint64_t features)
18802e6d46d7SNikolay Nikolaev {
18812e6d46d7SNikolay Nikolaev const int *bit = feature_bits;
18822e6d46d7SNikolay Nikolaev while (*bit != VHOST_INVALID_FEATURE_BIT) {
18839a2ba823SCornelia Huck uint64_t bit_mask = (1ULL << *bit);
18842e6d46d7SNikolay Nikolaev if (features & bit_mask) {
18852e6d46d7SNikolay Nikolaev hdev->acked_features |= bit_mask;
18862e6d46d7SNikolay Nikolaev }
18872e6d46d7SNikolay Nikolaev bit++;
18882e6d46d7SNikolay Nikolaev }
18892e6d46d7SNikolay Nikolaev }
18902e6d46d7SNikolay Nikolaev
vhost_dev_get_config(struct vhost_dev * hdev,uint8_t * config,uint32_t config_len,Error ** errp)18914c3e257bSChangpeng Liu int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
189250de5138SKevin Wolf uint32_t config_len, Error **errp)
18934c3e257bSChangpeng Liu {
18944c3e257bSChangpeng Liu assert(hdev->vhost_ops);
18954c3e257bSChangpeng Liu
18964c3e257bSChangpeng Liu if (hdev->vhost_ops->vhost_get_config) {
189766647ed4SMarkus Armbruster return hdev->vhost_ops->vhost_get_config(hdev, config, config_len,
189866647ed4SMarkus Armbruster errp);
18994c3e257bSChangpeng Liu }
19004c3e257bSChangpeng Liu
190150de5138SKevin Wolf error_setg(errp, "vhost_get_config not implemented");
19025d33ae4bSRoman Kagan return -ENOSYS;
19034c3e257bSChangpeng Liu }
19044c3e257bSChangpeng Liu
vhost_dev_set_config(struct vhost_dev * hdev,const uint8_t * data,uint32_t offset,uint32_t size,uint32_t flags)19054c3e257bSChangpeng Liu int vhost_dev_set_config(struct vhost_dev *hdev, const uint8_t *data,
19064c3e257bSChangpeng Liu uint32_t offset, uint32_t size, uint32_t flags)
19074c3e257bSChangpeng Liu {
19084c3e257bSChangpeng Liu assert(hdev->vhost_ops);
19094c3e257bSChangpeng Liu
19104c3e257bSChangpeng Liu if (hdev->vhost_ops->vhost_set_config) {
19114c3e257bSChangpeng Liu return hdev->vhost_ops->vhost_set_config(hdev, data, offset,
19124c3e257bSChangpeng Liu size, flags);
19134c3e257bSChangpeng Liu }
19144c3e257bSChangpeng Liu
19155d33ae4bSRoman Kagan return -ENOSYS;
19164c3e257bSChangpeng Liu }
19174c3e257bSChangpeng Liu
vhost_dev_set_config_notifier(struct vhost_dev * hdev,const VhostDevConfigOps * ops)19184c3e257bSChangpeng Liu void vhost_dev_set_config_notifier(struct vhost_dev *hdev,
19194c3e257bSChangpeng Liu const VhostDevConfigOps *ops)
19204c3e257bSChangpeng Liu {
19214c3e257bSChangpeng Liu hdev->config_ops = ops;
19224c3e257bSChangpeng Liu }
19234c3e257bSChangpeng Liu
vhost_dev_free_inflight(struct vhost_inflight * inflight)19245ad204bfSXie Yongji void vhost_dev_free_inflight(struct vhost_inflight *inflight)
19255ad204bfSXie Yongji {
19260ac2e635SLi Feng if (inflight && inflight->addr) {
19275ad204bfSXie Yongji qemu_memfd_free(inflight->addr, inflight->size, inflight->fd);
19285ad204bfSXie Yongji inflight->addr = NULL;
19295ad204bfSXie Yongji inflight->fd = -1;
19305ad204bfSXie Yongji }
19315ad204bfSXie Yongji }
19325ad204bfSXie Yongji
vhost_dev_prepare_inflight(struct vhost_dev * hdev,VirtIODevice * vdev)19331b0063b3SJin Yu int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev)
19341b0063b3SJin Yu {
19351b0063b3SJin Yu int r;
19361b0063b3SJin Yu
19371b0063b3SJin Yu if (hdev->vhost_ops->vhost_get_inflight_fd == NULL ||
19381b0063b3SJin Yu hdev->vhost_ops->vhost_set_inflight_fd == NULL) {
19391b0063b3SJin Yu return 0;
19401b0063b3SJin Yu }
19411b0063b3SJin Yu
19421b0063b3SJin Yu hdev->vdev = vdev;
19431b0063b3SJin Yu
19441b0063b3SJin Yu r = vhost_dev_set_features(hdev, hdev->log_enabled);
19451b0063b3SJin Yu if (r < 0) {
19465d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_dev_prepare_inflight failed");
19471b0063b3SJin Yu return r;
19481b0063b3SJin Yu }
19491b0063b3SJin Yu
19501b0063b3SJin Yu return 0;
19511b0063b3SJin Yu }
19521b0063b3SJin Yu
vhost_dev_set_inflight(struct vhost_dev * dev,struct vhost_inflight * inflight)19535ad204bfSXie Yongji int vhost_dev_set_inflight(struct vhost_dev *dev,
19545ad204bfSXie Yongji struct vhost_inflight *inflight)
19555ad204bfSXie Yongji {
19565ad204bfSXie Yongji int r;
19575ad204bfSXie Yongji
19585ad204bfSXie Yongji if (dev->vhost_ops->vhost_set_inflight_fd && inflight->addr) {
19595ad204bfSXie Yongji r = dev->vhost_ops->vhost_set_inflight_fd(dev, inflight);
19605ad204bfSXie Yongji if (r) {
19615d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_inflight_fd failed");
19625d33ae4bSRoman Kagan return r;
19635ad204bfSXie Yongji }
19645ad204bfSXie Yongji }
19655ad204bfSXie Yongji
19665ad204bfSXie Yongji return 0;
19675ad204bfSXie Yongji }
19685ad204bfSXie Yongji
vhost_dev_get_inflight(struct vhost_dev * dev,uint16_t queue_size,struct vhost_inflight * inflight)19695ad204bfSXie Yongji int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size,
19705ad204bfSXie Yongji struct vhost_inflight *inflight)
19715ad204bfSXie Yongji {
19725ad204bfSXie Yongji int r;
19735ad204bfSXie Yongji
19745ad204bfSXie Yongji if (dev->vhost_ops->vhost_get_inflight_fd) {
19755ad204bfSXie Yongji r = dev->vhost_ops->vhost_get_inflight_fd(dev, queue_size, inflight);
19765ad204bfSXie Yongji if (r) {
19775d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_get_inflight_fd failed");
19785d33ae4bSRoman Kagan return r;
19795ad204bfSXie Yongji }
19805ad204bfSXie Yongji }
19815ad204bfSXie Yongji
19825ad204bfSXie Yongji return 0;
19835ad204bfSXie Yongji }
19845ad204bfSXie Yongji
vhost_dev_set_vring_enable(struct vhost_dev * hdev,int enable)19854daa5054SStefano Garzarella static int vhost_dev_set_vring_enable(struct vhost_dev *hdev, int enable)
19864daa5054SStefano Garzarella {
19874daa5054SStefano Garzarella if (!hdev->vhost_ops->vhost_set_vring_enable) {
19884daa5054SStefano Garzarella return 0;
19894daa5054SStefano Garzarella }
19904daa5054SStefano Garzarella
19914daa5054SStefano Garzarella /*
19924daa5054SStefano Garzarella * For vhost-user devices, if VHOST_USER_F_PROTOCOL_FEATURES has not
19934daa5054SStefano Garzarella * been negotiated, the rings start directly in the enabled state, and
19944daa5054SStefano Garzarella * .vhost_set_vring_enable callback will fail since
19954daa5054SStefano Garzarella * VHOST_USER_SET_VRING_ENABLE is not supported.
19964daa5054SStefano Garzarella */
19974daa5054SStefano Garzarella if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER &&
19984daa5054SStefano Garzarella !virtio_has_feature(hdev->backend_features,
19994daa5054SStefano Garzarella VHOST_USER_F_PROTOCOL_FEATURES)) {
20004daa5054SStefano Garzarella return 0;
20014daa5054SStefano Garzarella }
20024daa5054SStefano Garzarella
20034daa5054SStefano Garzarella return hdev->vhost_ops->vhost_set_vring_enable(hdev, enable);
20044daa5054SStefano Garzarella }
20054daa5054SStefano Garzarella
20062c66de61SKevin Wolf /*
20072c66de61SKevin Wolf * Host notifiers must be enabled at this point.
20082c66de61SKevin Wolf *
20092c66de61SKevin Wolf * If @vrings is true, this function will enable all vrings before starting the
20102c66de61SKevin Wolf * device. If it is false, the vring initialization is left to be done by the
20112c66de61SKevin Wolf * caller.
20122c66de61SKevin Wolf */
vhost_dev_start(struct vhost_dev * hdev,VirtIODevice * vdev,bool vrings)20134daa5054SStefano Garzarella int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
20146e790746SPaolo Bonzini {
20156e790746SPaolo Bonzini int i, r;
20166e790746SPaolo Bonzini
20178695de0fSMarc-André Lureau /* should only be called after backend is connected */
20188695de0fSMarc-André Lureau assert(hdev->vhost_ops);
20198695de0fSMarc-André Lureau
20204daa5054SStefano Garzarella trace_vhost_dev_start(hdev, vdev->name, vrings);
2021a2761231SAlex Bennée
2022c255488dSJonah Palmer vdev->vhost_started = true;
20236e790746SPaolo Bonzini hdev->started = true;
2024c471ad0eSJason Wang hdev->vdev = vdev;
20256e790746SPaolo Bonzini
20266e790746SPaolo Bonzini r = vhost_dev_set_features(hdev, hdev->log_enabled);
20276e790746SPaolo Bonzini if (r < 0) {
20286e790746SPaolo Bonzini goto fail_features;
20296e790746SPaolo Bonzini }
2030c471ad0eSJason Wang
2031c471ad0eSJason Wang if (vhost_dev_has_iommu(hdev)) {
2032375f74f4SJason Wang memory_listener_register(&hdev->iommu_listener, vdev->dma_as);
2033c471ad0eSJason Wang }
2034c471ad0eSJason Wang
203521e70425SMarc-André Lureau r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
20366e790746SPaolo Bonzini if (r < 0) {
20375d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
20386e790746SPaolo Bonzini goto fail_mem;
20396e790746SPaolo Bonzini }
20406e790746SPaolo Bonzini for (i = 0; i < hdev->nvqs; ++i) {
20416e790746SPaolo Bonzini r = vhost_virtqueue_start(hdev,
20426e790746SPaolo Bonzini vdev,
20436e790746SPaolo Bonzini hdev->vqs + i,
20446e790746SPaolo Bonzini hdev->vq_index + i);
20456e790746SPaolo Bonzini if (r < 0) {
20466e790746SPaolo Bonzini goto fail_vq;
20476e790746SPaolo Bonzini }
20486e790746SPaolo Bonzini }
20496e790746SPaolo Bonzini
2050f9a09ca3SCindy Lu r = event_notifier_init(
2051f9a09ca3SCindy Lu &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier, 0);
2052f9a09ca3SCindy Lu if (r < 0) {
205377ece20bSPrasad Pandit VHOST_OPS_DEBUG(r, "event_notifier_init failed");
205477ece20bSPrasad Pandit goto fail_vq;
2055f9a09ca3SCindy Lu }
2056f9a09ca3SCindy Lu event_notifier_test_and_clear(
2057f9a09ca3SCindy Lu &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
2058f9a09ca3SCindy Lu if (!vdev->use_guest_notifier_mask) {
2059f9a09ca3SCindy Lu vhost_config_mask(hdev, vdev, true);
2060f9a09ca3SCindy Lu }
20616e790746SPaolo Bonzini if (hdev->log_enabled) {
2062e05ca820SMichael S. Tsirkin uint64_t log_base;
2063e05ca820SMichael S. Tsirkin
20646e790746SPaolo Bonzini hdev->log_size = vhost_get_log_size(hdev);
206551d59a64SSi-Wei Liu hdev->log = vhost_log_get(hdev->vhost_ops->backend_type,
206651d59a64SSi-Wei Liu hdev->log_size,
206715324404SMarc-André Lureau vhost_dev_log_is_shared(hdev));
2068309750faSJason Wang log_base = (uintptr_t)hdev->log->log;
2069c2bea314SMarc-André Lureau r = hdev->vhost_ops->vhost_set_log_base(hdev,
20709a78a5ddSMarc-André Lureau hdev->log_size ? log_base : 0,
20719a78a5ddSMarc-André Lureau hdev->log);
20726e790746SPaolo Bonzini if (r < 0) {
20735d33ae4bSRoman Kagan VHOST_OPS_DEBUG(r, "vhost_set_log_base failed");
20746e790746SPaolo Bonzini goto fail_log;
20756e790746SPaolo Bonzini }
2076c5cd7e5fSSi-Wei Liu vhost_dev_elect_mem_logger(hdev, true);
20776e790746SPaolo Bonzini }
20784daa5054SStefano Garzarella if (vrings) {
20794daa5054SStefano Garzarella r = vhost_dev_set_vring_enable(hdev, true);
20804daa5054SStefano Garzarella if (r) {
20814daa5054SStefano Garzarella goto fail_log;
20824daa5054SStefano Garzarella }
20834daa5054SStefano Garzarella }
2084ca71db43SCindy Lu if (hdev->vhost_ops->vhost_dev_start) {
2085ca71db43SCindy Lu r = hdev->vhost_ops->vhost_dev_start(hdev, true);
2086ca71db43SCindy Lu if (r) {
20874daa5054SStefano Garzarella goto fail_start;
2088ca71db43SCindy Lu }
2089ca71db43SCindy Lu }
20903f63b4c6SJason Wang if (vhost_dev_has_iommu(hdev) &&
20913f63b4c6SJason Wang hdev->vhost_ops->vhost_set_iotlb_callback) {
2092c471ad0eSJason Wang hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true);
2093c471ad0eSJason Wang
2094c471ad0eSJason Wang /* Update used ring information for IOTLB to work correctly,
2095c471ad0eSJason Wang * vhost-kernel code requires for this.*/
2096c471ad0eSJason Wang for (i = 0; i < hdev->nvqs; ++i) {
2097c471ad0eSJason Wang struct vhost_virtqueue *vq = hdev->vqs + i;
2098*571bdc97SPrasad Pandit r = vhost_device_iotlb_miss(hdev, vq->used_phys, true);
2099*571bdc97SPrasad Pandit if (r) {
2100*571bdc97SPrasad Pandit goto fail_iotlb;
2101*571bdc97SPrasad Pandit }
2102c471ad0eSJason Wang }
2103c471ad0eSJason Wang }
2104f9a09ca3SCindy Lu vhost_start_config_intr(hdev);
21056e790746SPaolo Bonzini return 0;
2106*571bdc97SPrasad Pandit fail_iotlb:
2107*571bdc97SPrasad Pandit if (vhost_dev_has_iommu(hdev) &&
2108*571bdc97SPrasad Pandit hdev->vhost_ops->vhost_set_iotlb_callback) {
2109*571bdc97SPrasad Pandit hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
2110*571bdc97SPrasad Pandit }
2111*571bdc97SPrasad Pandit if (hdev->vhost_ops->vhost_dev_start) {
2112*571bdc97SPrasad Pandit hdev->vhost_ops->vhost_dev_start(hdev, false);
2113*571bdc97SPrasad Pandit }
21144daa5054SStefano Garzarella fail_start:
21154daa5054SStefano Garzarella if (vrings) {
21164daa5054SStefano Garzarella vhost_dev_set_vring_enable(hdev, false);
21174daa5054SStefano Garzarella }
21186e790746SPaolo Bonzini fail_log:
2119309750faSJason Wang vhost_log_put(hdev, false);
21206e790746SPaolo Bonzini fail_vq:
21216e790746SPaolo Bonzini while (--i >= 0) {
21226e790746SPaolo Bonzini vhost_virtqueue_stop(hdev,
21236e790746SPaolo Bonzini vdev,
21246e790746SPaolo Bonzini hdev->vqs + i,
21256e790746SPaolo Bonzini hdev->vq_index + i);
21266e790746SPaolo Bonzini }
2127c471ad0eSJason Wang
21286e790746SPaolo Bonzini fail_mem:
21291e3ffb34SPrasad Pandit if (vhost_dev_has_iommu(hdev)) {
21301e3ffb34SPrasad Pandit memory_listener_unregister(&hdev->iommu_listener);
21311e3ffb34SPrasad Pandit }
21326e790746SPaolo Bonzini fail_features:
2133c255488dSJonah Palmer vdev->vhost_started = false;
21346e790746SPaolo Bonzini hdev->started = false;
21356e790746SPaolo Bonzini return r;
21366e790746SPaolo Bonzini }
21376e790746SPaolo Bonzini
21386e790746SPaolo Bonzini /* Host notifiers must be enabled at this point. */
vhost_dev_stop(struct vhost_dev * hdev,VirtIODevice * vdev,bool vrings)21394daa5054SStefano Garzarella void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
21406e790746SPaolo Bonzini {
21416e790746SPaolo Bonzini int i;
21426e790746SPaolo Bonzini
21438695de0fSMarc-André Lureau /* should only be called after backend is connected */
21448695de0fSMarc-André Lureau assert(hdev->vhost_ops);
2145f9a09ca3SCindy Lu event_notifier_test_and_clear(
2146f9a09ca3SCindy Lu &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
2147f9a09ca3SCindy Lu event_notifier_test_and_clear(&vdev->config_notifier);
214818f2971cSLi Feng event_notifier_cleanup(
214918f2971cSLi Feng &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
21508695de0fSMarc-André Lureau
21514daa5054SStefano Garzarella trace_vhost_dev_stop(hdev, vdev->name, vrings);
2152a2761231SAlex Bennée
2153ca71db43SCindy Lu if (hdev->vhost_ops->vhost_dev_start) {
2154ca71db43SCindy Lu hdev->vhost_ops->vhost_dev_start(hdev, false);
2155ca71db43SCindy Lu }
21564daa5054SStefano Garzarella if (vrings) {
21574daa5054SStefano Garzarella vhost_dev_set_vring_enable(hdev, false);
21584daa5054SStefano Garzarella }
21596e790746SPaolo Bonzini for (i = 0; i < hdev->nvqs; ++i) {
21606e790746SPaolo Bonzini vhost_virtqueue_stop(hdev,
21616e790746SPaolo Bonzini vdev,
21626e790746SPaolo Bonzini hdev->vqs + i,
21636e790746SPaolo Bonzini hdev->vq_index + i);
21646e790746SPaolo Bonzini }
2165c3716f26SEugenio Pérez if (hdev->vhost_ops->vhost_reset_status) {
2166c3716f26SEugenio Pérez hdev->vhost_ops->vhost_reset_status(hdev);
2167c3716f26SEugenio Pérez }
21686e790746SPaolo Bonzini
2169c471ad0eSJason Wang if (vhost_dev_has_iommu(hdev)) {
21703f63b4c6SJason Wang if (hdev->vhost_ops->vhost_set_iotlb_callback) {
2171c471ad0eSJason Wang hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
21723f63b4c6SJason Wang }
2173375f74f4SJason Wang memory_listener_unregister(&hdev->iommu_listener);
2174c471ad0eSJason Wang }
2175f9a09ca3SCindy Lu vhost_stop_config_intr(hdev);
2176309750faSJason Wang vhost_log_put(hdev, true);
21776e790746SPaolo Bonzini hdev->started = false;
2178c255488dSJonah Palmer vdev->vhost_started = false;
2179c471ad0eSJason Wang hdev->vdev = NULL;
21806e790746SPaolo Bonzini }
2181950d94baSMarc-André Lureau
vhost_net_set_backend(struct vhost_dev * hdev,struct vhost_vring_file * file)2182950d94baSMarc-André Lureau int vhost_net_set_backend(struct vhost_dev *hdev,
2183950d94baSMarc-André Lureau struct vhost_vring_file *file)
2184950d94baSMarc-André Lureau {
2185950d94baSMarc-André Lureau if (hdev->vhost_ops->vhost_net_set_backend) {
2186950d94baSMarc-André Lureau return hdev->vhost_ops->vhost_net_set_backend(hdev, file);
2187950d94baSMarc-André Lureau }
2188950d94baSMarc-André Lureau
21895d33ae4bSRoman Kagan return -ENOSYS;
2190950d94baSMarc-André Lureau }
2191c0c4f147SStefan Hajnoczi
vhost_reset_device(struct vhost_dev * hdev)2192c0c4f147SStefan Hajnoczi int vhost_reset_device(struct vhost_dev *hdev)
2193c0c4f147SStefan Hajnoczi {
2194c0c4f147SStefan Hajnoczi if (hdev->vhost_ops->vhost_reset_device) {
2195c0c4f147SStefan Hajnoczi return hdev->vhost_ops->vhost_reset_device(hdev);
2196c0c4f147SStefan Hajnoczi }
2197c0c4f147SStefan Hajnoczi
2198c0c4f147SStefan Hajnoczi return -ENOSYS;
2199c0c4f147SStefan Hajnoczi }
2200cda83adcSHanna Czenczek
vhost_supports_device_state(struct vhost_dev * dev)2201cda83adcSHanna Czenczek bool vhost_supports_device_state(struct vhost_dev *dev)
2202cda83adcSHanna Czenczek {
2203cda83adcSHanna Czenczek if (dev->vhost_ops->vhost_supports_device_state) {
2204cda83adcSHanna Czenczek return dev->vhost_ops->vhost_supports_device_state(dev);
2205cda83adcSHanna Czenczek }
2206cda83adcSHanna Czenczek
2207cda83adcSHanna Czenczek return false;
2208cda83adcSHanna Czenczek }
2209cda83adcSHanna Czenczek
vhost_set_device_state_fd(struct vhost_dev * dev,VhostDeviceStateDirection direction,VhostDeviceStatePhase phase,int fd,int * reply_fd,Error ** errp)2210cda83adcSHanna Czenczek int vhost_set_device_state_fd(struct vhost_dev *dev,
2211cda83adcSHanna Czenczek VhostDeviceStateDirection direction,
2212cda83adcSHanna Czenczek VhostDeviceStatePhase phase,
2213cda83adcSHanna Czenczek int fd,
2214cda83adcSHanna Czenczek int *reply_fd,
2215cda83adcSHanna Czenczek Error **errp)
2216cda83adcSHanna Czenczek {
2217cda83adcSHanna Czenczek if (dev->vhost_ops->vhost_set_device_state_fd) {
2218cda83adcSHanna Czenczek return dev->vhost_ops->vhost_set_device_state_fd(dev, direction, phase,
2219cda83adcSHanna Czenczek fd, reply_fd, errp);
2220cda83adcSHanna Czenczek }
2221cda83adcSHanna Czenczek
2222cda83adcSHanna Czenczek error_setg(errp,
2223cda83adcSHanna Czenczek "vhost transport does not support migration state transfer");
2224cda83adcSHanna Czenczek return -ENOSYS;
2225cda83adcSHanna Czenczek }
2226cda83adcSHanna Czenczek
vhost_check_device_state(struct vhost_dev * dev,Error ** errp)2227cda83adcSHanna Czenczek int vhost_check_device_state(struct vhost_dev *dev, Error **errp)
2228cda83adcSHanna Czenczek {
2229cda83adcSHanna Czenczek if (dev->vhost_ops->vhost_check_device_state) {
2230cda83adcSHanna Czenczek return dev->vhost_ops->vhost_check_device_state(dev, errp);
2231cda83adcSHanna Czenczek }
2232cda83adcSHanna Czenczek
2233cda83adcSHanna Czenczek error_setg(errp,
2234cda83adcSHanna Czenczek "vhost transport does not support migration state transfer");
2235cda83adcSHanna Czenczek return -ENOSYS;
2236cda83adcSHanna Czenczek }
22374a00d5d7SHanna Czenczek
vhost_save_backend_state(struct vhost_dev * dev,QEMUFile * f,Error ** errp)22384a00d5d7SHanna Czenczek int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp)
22394a00d5d7SHanna Czenczek {
2240ff88dbecSZhao Liu ERRP_GUARD();
22414a00d5d7SHanna Czenczek /* Maximum chunk size in which to transfer the state */
22424a00d5d7SHanna Czenczek const size_t chunk_size = 1 * 1024 * 1024;
22434a00d5d7SHanna Czenczek g_autofree void *transfer_buf = NULL;
22444a00d5d7SHanna Czenczek g_autoptr(GError) g_err = NULL;
22454a00d5d7SHanna Czenczek int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1;
22464a00d5d7SHanna Czenczek int ret;
22474a00d5d7SHanna Czenczek
22484a00d5d7SHanna Czenczek /* [0] for reading (our end), [1] for writing (back-end's end) */
22494a00d5d7SHanna Czenczek if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) {
22504a00d5d7SHanna Czenczek error_setg(errp, "Failed to set up state transfer pipe: %s",
22514a00d5d7SHanna Czenczek g_err->message);
22524a00d5d7SHanna Czenczek ret = -EINVAL;
22534a00d5d7SHanna Czenczek goto fail;
22544a00d5d7SHanna Czenczek }
22554a00d5d7SHanna Czenczek
22564a00d5d7SHanna Czenczek read_fd = pipe_fds[0];
22574a00d5d7SHanna Czenczek write_fd = pipe_fds[1];
22584a00d5d7SHanna Czenczek
22594a00d5d7SHanna Czenczek /*
22604a00d5d7SHanna Czenczek * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped.
22614a00d5d7SHanna Czenczek * Ideally, it is suspended, but SUSPEND/RESUME currently do not exist for
22624a00d5d7SHanna Czenczek * vhost-user, so just check that it is stopped at all.
22634a00d5d7SHanna Czenczek */
22644a00d5d7SHanna Czenczek assert(!dev->started);
22654a00d5d7SHanna Czenczek
22664a00d5d7SHanna Czenczek /* Transfer ownership of write_fd to the back-end */
22674a00d5d7SHanna Czenczek ret = vhost_set_device_state_fd(dev,
22684a00d5d7SHanna Czenczek VHOST_TRANSFER_STATE_DIRECTION_SAVE,
22694a00d5d7SHanna Czenczek VHOST_TRANSFER_STATE_PHASE_STOPPED,
22704a00d5d7SHanna Czenczek write_fd,
22714a00d5d7SHanna Czenczek &reply_fd,
22724a00d5d7SHanna Czenczek errp);
22734a00d5d7SHanna Czenczek if (ret < 0) {
22744a00d5d7SHanna Czenczek error_prepend(errp, "Failed to initiate state transfer: ");
22754a00d5d7SHanna Czenczek goto fail;
22764a00d5d7SHanna Czenczek }
22774a00d5d7SHanna Czenczek
22784a00d5d7SHanna Czenczek /* If the back-end wishes to use a different pipe, switch over */
22794a00d5d7SHanna Czenczek if (reply_fd >= 0) {
22804a00d5d7SHanna Czenczek close(read_fd);
22814a00d5d7SHanna Czenczek read_fd = reply_fd;
22824a00d5d7SHanna Czenczek }
22834a00d5d7SHanna Czenczek
22844a00d5d7SHanna Czenczek transfer_buf = g_malloc(chunk_size);
22854a00d5d7SHanna Czenczek
22864a00d5d7SHanna Czenczek while (true) {
22874a00d5d7SHanna Czenczek ssize_t read_ret;
22884a00d5d7SHanna Czenczek
22894a00d5d7SHanna Czenczek read_ret = RETRY_ON_EINTR(read(read_fd, transfer_buf, chunk_size));
22904a00d5d7SHanna Czenczek if (read_ret < 0) {
22914a00d5d7SHanna Czenczek ret = -errno;
22924a00d5d7SHanna Czenczek error_setg_errno(errp, -ret, "Failed to receive state");
22934a00d5d7SHanna Czenczek goto fail;
22944a00d5d7SHanna Czenczek }
22954a00d5d7SHanna Czenczek
22964a00d5d7SHanna Czenczek assert(read_ret <= chunk_size);
22974a00d5d7SHanna Czenczek qemu_put_be32(f, read_ret);
22984a00d5d7SHanna Czenczek
22994a00d5d7SHanna Czenczek if (read_ret == 0) {
23004a00d5d7SHanna Czenczek /* EOF */
23014a00d5d7SHanna Czenczek break;
23024a00d5d7SHanna Czenczek }
23034a00d5d7SHanna Czenczek
23044a00d5d7SHanna Czenczek qemu_put_buffer(f, transfer_buf, read_ret);
23054a00d5d7SHanna Czenczek }
23064a00d5d7SHanna Czenczek
23074a00d5d7SHanna Czenczek /*
23084a00d5d7SHanna Czenczek * Back-end will not really care, but be clean and close our end of the pipe
23094a00d5d7SHanna Czenczek * before inquiring the back-end about whether transfer was successful
23104a00d5d7SHanna Czenczek */
23114a00d5d7SHanna Czenczek close(read_fd);
23124a00d5d7SHanna Czenczek read_fd = -1;
23134a00d5d7SHanna Czenczek
23144a00d5d7SHanna Czenczek /* Also, verify that the device is still stopped */
23154a00d5d7SHanna Czenczek assert(!dev->started);
23164a00d5d7SHanna Czenczek
23174a00d5d7SHanna Czenczek ret = vhost_check_device_state(dev, errp);
23184a00d5d7SHanna Czenczek if (ret < 0) {
23194a00d5d7SHanna Czenczek goto fail;
23204a00d5d7SHanna Czenczek }
23214a00d5d7SHanna Czenczek
23224a00d5d7SHanna Czenczek ret = 0;
23234a00d5d7SHanna Czenczek fail:
23244a00d5d7SHanna Czenczek if (read_fd >= 0) {
23254a00d5d7SHanna Czenczek close(read_fd);
23264a00d5d7SHanna Czenczek }
23274a00d5d7SHanna Czenczek
23284a00d5d7SHanna Czenczek return ret;
23294a00d5d7SHanna Czenczek }
23304a00d5d7SHanna Czenczek
vhost_load_backend_state(struct vhost_dev * dev,QEMUFile * f,Error ** errp)23314a00d5d7SHanna Czenczek int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp)
23324a00d5d7SHanna Czenczek {
2333ff88dbecSZhao Liu ERRP_GUARD();
23344a00d5d7SHanna Czenczek size_t transfer_buf_size = 0;
23354a00d5d7SHanna Czenczek g_autofree void *transfer_buf = NULL;
23364a00d5d7SHanna Czenczek g_autoptr(GError) g_err = NULL;
23374a00d5d7SHanna Czenczek int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1;
23384a00d5d7SHanna Czenczek int ret;
23394a00d5d7SHanna Czenczek
23404a00d5d7SHanna Czenczek /* [0] for reading (back-end's end), [1] for writing (our end) */
23414a00d5d7SHanna Czenczek if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) {
23424a00d5d7SHanna Czenczek error_setg(errp, "Failed to set up state transfer pipe: %s",
23434a00d5d7SHanna Czenczek g_err->message);
23444a00d5d7SHanna Czenczek ret = -EINVAL;
23454a00d5d7SHanna Czenczek goto fail;
23464a00d5d7SHanna Czenczek }
23474a00d5d7SHanna Czenczek
23484a00d5d7SHanna Czenczek read_fd = pipe_fds[0];
23494a00d5d7SHanna Czenczek write_fd = pipe_fds[1];
23504a00d5d7SHanna Czenczek
23514a00d5d7SHanna Czenczek /*
23524a00d5d7SHanna Czenczek * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped.
23534a00d5d7SHanna Czenczek * Ideally, it is suspended, but SUSPEND/RESUME currently do not exist for
23544a00d5d7SHanna Czenczek * vhost-user, so just check that it is stopped at all.
23554a00d5d7SHanna Czenczek */
23564a00d5d7SHanna Czenczek assert(!dev->started);
23574a00d5d7SHanna Czenczek
23584a00d5d7SHanna Czenczek /* Transfer ownership of read_fd to the back-end */
23594a00d5d7SHanna Czenczek ret = vhost_set_device_state_fd(dev,
23604a00d5d7SHanna Czenczek VHOST_TRANSFER_STATE_DIRECTION_LOAD,
23614a00d5d7SHanna Czenczek VHOST_TRANSFER_STATE_PHASE_STOPPED,
23624a00d5d7SHanna Czenczek read_fd,
23634a00d5d7SHanna Czenczek &reply_fd,
23644a00d5d7SHanna Czenczek errp);
23654a00d5d7SHanna Czenczek if (ret < 0) {
23664a00d5d7SHanna Czenczek error_prepend(errp, "Failed to initiate state transfer: ");
23674a00d5d7SHanna Czenczek goto fail;
23684a00d5d7SHanna Czenczek }
23694a00d5d7SHanna Czenczek
23704a00d5d7SHanna Czenczek /* If the back-end wishes to use a different pipe, switch over */
23714a00d5d7SHanna Czenczek if (reply_fd >= 0) {
23724a00d5d7SHanna Czenczek close(write_fd);
23734a00d5d7SHanna Czenczek write_fd = reply_fd;
23744a00d5d7SHanna Czenczek }
23754a00d5d7SHanna Czenczek
23764a00d5d7SHanna Czenczek while (true) {
23774a00d5d7SHanna Czenczek size_t this_chunk_size = qemu_get_be32(f);
23784a00d5d7SHanna Czenczek ssize_t write_ret;
23794a00d5d7SHanna Czenczek const uint8_t *transfer_pointer;
23804a00d5d7SHanna Czenczek
23814a00d5d7SHanna Czenczek if (this_chunk_size == 0) {
23824a00d5d7SHanna Czenczek /* End of state */
23834a00d5d7SHanna Czenczek break;
23844a00d5d7SHanna Czenczek }
23854a00d5d7SHanna Czenczek
23864a00d5d7SHanna Czenczek if (transfer_buf_size < this_chunk_size) {
23874a00d5d7SHanna Czenczek transfer_buf = g_realloc(transfer_buf, this_chunk_size);
23884a00d5d7SHanna Czenczek transfer_buf_size = this_chunk_size;
23894a00d5d7SHanna Czenczek }
23904a00d5d7SHanna Czenczek
23914a00d5d7SHanna Czenczek if (qemu_get_buffer(f, transfer_buf, this_chunk_size) <
23924a00d5d7SHanna Czenczek this_chunk_size)
23934a00d5d7SHanna Czenczek {
23944a00d5d7SHanna Czenczek error_setg(errp, "Failed to read state");
23954a00d5d7SHanna Czenczek ret = -EINVAL;
23964a00d5d7SHanna Czenczek goto fail;
23974a00d5d7SHanna Czenczek }
23984a00d5d7SHanna Czenczek
23994a00d5d7SHanna Czenczek transfer_pointer = transfer_buf;
24004a00d5d7SHanna Czenczek while (this_chunk_size > 0) {
24014a00d5d7SHanna Czenczek write_ret = RETRY_ON_EINTR(
24024a00d5d7SHanna Czenczek write(write_fd, transfer_pointer, this_chunk_size)
24034a00d5d7SHanna Czenczek );
24044a00d5d7SHanna Czenczek if (write_ret < 0) {
24054a00d5d7SHanna Czenczek ret = -errno;
24064a00d5d7SHanna Czenczek error_setg_errno(errp, -ret, "Failed to send state");
24074a00d5d7SHanna Czenczek goto fail;
24084a00d5d7SHanna Czenczek } else if (write_ret == 0) {
24094a00d5d7SHanna Czenczek error_setg(errp, "Failed to send state: Connection is closed");
24104a00d5d7SHanna Czenczek ret = -ECONNRESET;
24114a00d5d7SHanna Czenczek goto fail;
24124a00d5d7SHanna Czenczek }
24134a00d5d7SHanna Czenczek
24144a00d5d7SHanna Czenczek assert(write_ret <= this_chunk_size);
24154a00d5d7SHanna Czenczek this_chunk_size -= write_ret;
24164a00d5d7SHanna Czenczek transfer_pointer += write_ret;
24174a00d5d7SHanna Czenczek }
24184a00d5d7SHanna Czenczek }
24194a00d5d7SHanna Czenczek
24204a00d5d7SHanna Czenczek /*
24214a00d5d7SHanna Czenczek * Close our end, thus ending transfer, before inquiring the back-end about
24224a00d5d7SHanna Czenczek * whether transfer was successful
24234a00d5d7SHanna Czenczek */
24244a00d5d7SHanna Czenczek close(write_fd);
24254a00d5d7SHanna Czenczek write_fd = -1;
24264a00d5d7SHanna Czenczek
24274a00d5d7SHanna Czenczek /* Also, verify that the device is still stopped */
24284a00d5d7SHanna Czenczek assert(!dev->started);
24294a00d5d7SHanna Czenczek
24304a00d5d7SHanna Czenczek ret = vhost_check_device_state(dev, errp);
24314a00d5d7SHanna Czenczek if (ret < 0) {
24324a00d5d7SHanna Czenczek goto fail;
24334a00d5d7SHanna Czenczek }
24344a00d5d7SHanna Czenczek
24354a00d5d7SHanna Czenczek ret = 0;
24364a00d5d7SHanna Czenczek fail:
24374a00d5d7SHanna Czenczek if (write_fd >= 0) {
24384a00d5d7SHanna Czenczek close(write_fd);
24394a00d5d7SHanna Czenczek }
24404a00d5d7SHanna Czenczek
24414a00d5d7SHanna Czenczek return ret;
24424a00d5d7SHanna Czenczek }
2443