xref: /openbmc/qemu/hw/virtio/vhost-user.c (revision 06831001)
1 /*
2  * vhost-user
3  *
4  * Copyright (c) 2013 Virtual Open Systems Sarl.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  */
10 
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "hw/virtio/vhost.h"
14 #include "hw/virtio/vhost-user.h"
15 #include "hw/virtio/vhost-backend.h"
16 #include "hw/virtio/virtio.h"
17 #include "hw/virtio/virtio-net.h"
18 #include "chardev/char-fe.h"
19 #include "io/channel-socket.h"
20 #include "sysemu/kvm.h"
21 #include "qemu/error-report.h"
22 #include "qemu/main-loop.h"
23 #include "qemu/sockets.h"
24 #include "sysemu/runstate.h"
25 #include "sysemu/cryptodev.h"
26 #include "migration/migration.h"
27 #include "migration/postcopy-ram.h"
28 #include "trace.h"
29 #include "exec/ramblock.h"
30 
31 #include <sys/ioctl.h>
32 #include <sys/socket.h>
33 #include <sys/un.h>
34 
35 #include "standard-headers/linux/vhost_types.h"
36 
37 #ifdef CONFIG_LINUX
38 #include <linux/userfaultfd.h>
39 #endif
40 
41 #define VHOST_MEMORY_BASELINE_NREGIONS    8
42 #define VHOST_USER_F_PROTOCOL_FEATURES 30
43 #define VHOST_USER_BACKEND_MAX_FDS     8
44 
45 #if defined(TARGET_PPC) || defined(TARGET_PPC64)
46 #include "hw/ppc/spapr.h"
47 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS
48 
49 #else
50 #define VHOST_USER_MAX_RAM_SLOTS 512
51 #endif
52 
53 /*
54  * Maximum size of virtio device config space
55  */
56 #define VHOST_USER_MAX_CONFIG_SIZE 256
57 
58 enum VhostUserProtocolFeature {
59     VHOST_USER_PROTOCOL_F_MQ = 0,
60     VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
61     VHOST_USER_PROTOCOL_F_RARP = 2,
62     VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
63     VHOST_USER_PROTOCOL_F_NET_MTU = 4,
64     VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5,
65     VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
66     VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
67     VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
68     VHOST_USER_PROTOCOL_F_CONFIG = 9,
69     VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10,
70     VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
71     VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
72     VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
73     /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
74     VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
75     VHOST_USER_PROTOCOL_F_STATUS = 16,
76     VHOST_USER_PROTOCOL_F_MAX
77 };
78 
79 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
80 
81 typedef enum VhostUserRequest {
82     VHOST_USER_NONE = 0,
83     VHOST_USER_GET_FEATURES = 1,
84     VHOST_USER_SET_FEATURES = 2,
85     VHOST_USER_SET_OWNER = 3,
86     VHOST_USER_RESET_OWNER = 4,
87     VHOST_USER_SET_MEM_TABLE = 5,
88     VHOST_USER_SET_LOG_BASE = 6,
89     VHOST_USER_SET_LOG_FD = 7,
90     VHOST_USER_SET_VRING_NUM = 8,
91     VHOST_USER_SET_VRING_ADDR = 9,
92     VHOST_USER_SET_VRING_BASE = 10,
93     VHOST_USER_GET_VRING_BASE = 11,
94     VHOST_USER_SET_VRING_KICK = 12,
95     VHOST_USER_SET_VRING_CALL = 13,
96     VHOST_USER_SET_VRING_ERR = 14,
97     VHOST_USER_GET_PROTOCOL_FEATURES = 15,
98     VHOST_USER_SET_PROTOCOL_FEATURES = 16,
99     VHOST_USER_GET_QUEUE_NUM = 17,
100     VHOST_USER_SET_VRING_ENABLE = 18,
101     VHOST_USER_SEND_RARP = 19,
102     VHOST_USER_NET_SET_MTU = 20,
103     VHOST_USER_SET_BACKEND_REQ_FD = 21,
104     VHOST_USER_IOTLB_MSG = 22,
105     VHOST_USER_SET_VRING_ENDIAN = 23,
106     VHOST_USER_GET_CONFIG = 24,
107     VHOST_USER_SET_CONFIG = 25,
108     VHOST_USER_CREATE_CRYPTO_SESSION = 26,
109     VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
110     VHOST_USER_POSTCOPY_ADVISE  = 28,
111     VHOST_USER_POSTCOPY_LISTEN  = 29,
112     VHOST_USER_POSTCOPY_END     = 30,
113     VHOST_USER_GET_INFLIGHT_FD = 31,
114     VHOST_USER_SET_INFLIGHT_FD = 32,
115     VHOST_USER_GPU_SET_SOCKET = 33,
116     VHOST_USER_RESET_DEVICE = 34,
117     /* Message number 35 reserved for VHOST_USER_VRING_KICK. */
118     VHOST_USER_GET_MAX_MEM_SLOTS = 36,
119     VHOST_USER_ADD_MEM_REG = 37,
120     VHOST_USER_REM_MEM_REG = 38,
121     VHOST_USER_SET_STATUS = 39,
122     VHOST_USER_GET_STATUS = 40,
123     VHOST_USER_MAX
124 } VhostUserRequest;
125 
126 typedef enum VhostUserSlaveRequest {
127     VHOST_USER_BACKEND_NONE = 0,
128     VHOST_USER_BACKEND_IOTLB_MSG = 1,
129     VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2,
130     VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3,
131     VHOST_USER_BACKEND_MAX
132 }  VhostUserSlaveRequest;
133 
134 typedef struct VhostUserMemoryRegion {
135     uint64_t guest_phys_addr;
136     uint64_t memory_size;
137     uint64_t userspace_addr;
138     uint64_t mmap_offset;
139 } VhostUserMemoryRegion;
140 
141 typedef struct VhostUserMemory {
142     uint32_t nregions;
143     uint32_t padding;
144     VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS];
145 } VhostUserMemory;
146 
147 typedef struct VhostUserMemRegMsg {
148     uint64_t padding;
149     VhostUserMemoryRegion region;
150 } VhostUserMemRegMsg;
151 
152 typedef struct VhostUserLog {
153     uint64_t mmap_size;
154     uint64_t mmap_offset;
155 } VhostUserLog;
156 
157 typedef struct VhostUserConfig {
158     uint32_t offset;
159     uint32_t size;
160     uint32_t flags;
161     uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
162 } VhostUserConfig;
163 
164 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN    512
165 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN  64
166 
167 typedef struct VhostUserCryptoSession {
168     /* session id for success, -1 on errors */
169     int64_t session_id;
170     CryptoDevBackendSymSessionInfo session_setup_data;
171     uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
172     uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
173 } VhostUserCryptoSession;
174 
175 static VhostUserConfig c __attribute__ ((unused));
176 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
177                                    + sizeof(c.size) \
178                                    + sizeof(c.flags))
179 
180 typedef struct VhostUserVringArea {
181     uint64_t u64;
182     uint64_t size;
183     uint64_t offset;
184 } VhostUserVringArea;
185 
186 typedef struct VhostUserInflight {
187     uint64_t mmap_size;
188     uint64_t mmap_offset;
189     uint16_t num_queues;
190     uint16_t queue_size;
191 } VhostUserInflight;
192 
193 typedef struct {
194     VhostUserRequest request;
195 
196 #define VHOST_USER_VERSION_MASK     (0x3)
197 #define VHOST_USER_REPLY_MASK       (0x1 << 2)
198 #define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
199     uint32_t flags;
200     uint32_t size; /* the following payload size */
201 } QEMU_PACKED VhostUserHeader;
202 
203 typedef union {
204 #define VHOST_USER_VRING_IDX_MASK   (0xff)
205 #define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
206         uint64_t u64;
207         struct vhost_vring_state state;
208         struct vhost_vring_addr addr;
209         VhostUserMemory memory;
210         VhostUserMemRegMsg mem_reg;
211         VhostUserLog log;
212         struct vhost_iotlb_msg iotlb;
213         VhostUserConfig config;
214         VhostUserCryptoSession session;
215         VhostUserVringArea area;
216         VhostUserInflight inflight;
217 } VhostUserPayload;
218 
219 typedef struct VhostUserMsg {
220     VhostUserHeader hdr;
221     VhostUserPayload payload;
222 } QEMU_PACKED VhostUserMsg;
223 
224 static VhostUserMsg m __attribute__ ((unused));
225 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
226 
227 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
228 
229 /* The version of the protocol we support */
230 #define VHOST_USER_VERSION    (0x1)
231 
232 struct vhost_user {
233     struct vhost_dev *dev;
234     /* Shared between vhost devs of the same virtio device */
235     VhostUserState *user;
236     QIOChannel *slave_ioc;
237     GSource *slave_src;
238     NotifierWithReturn postcopy_notifier;
239     struct PostCopyFD  postcopy_fd;
240     uint64_t           postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS];
241     /* Length of the region_rb and region_rb_offset arrays */
242     size_t             region_rb_len;
243     /* RAMBlock associated with a given region */
244     RAMBlock         **region_rb;
245     /*
246      * The offset from the start of the RAMBlock to the start of the
247      * vhost region.
248      */
249     ram_addr_t        *region_rb_offset;
250 
251     /* True once we've entered postcopy_listen */
252     bool               postcopy_listen;
253 
254     /* Our current regions */
255     int num_shadow_regions;
256     struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS];
257 };
258 
259 struct scrub_regions {
260     struct vhost_memory_region *region;
261     int reg_idx;
262     int fd_idx;
263 };
264 
265 static bool ioeventfd_enabled(void)
266 {
267     return !kvm_enabled() || kvm_eventfds_enabled();
268 }
269 
270 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
271 {
272     struct vhost_user *u = dev->opaque;
273     CharBackend *chr = u->user->chr;
274     uint8_t *p = (uint8_t *) msg;
275     int r, size = VHOST_USER_HDR_SIZE;
276 
277     r = qemu_chr_fe_read_all(chr, p, size);
278     if (r != size) {
279         int saved_errno = errno;
280         error_report("Failed to read msg header. Read %d instead of %d."
281                      " Original request %d.", r, size, msg->hdr.request);
282         return r < 0 ? -saved_errno : -EIO;
283     }
284 
285     /* validate received flags */
286     if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
287         error_report("Failed to read msg header."
288                 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
289                 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
290         return -EPROTO;
291     }
292 
293     trace_vhost_user_read(msg->hdr.request, msg->hdr.flags);
294 
295     return 0;
296 }
297 
298 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
299 {
300     struct vhost_user *u = dev->opaque;
301     CharBackend *chr = u->user->chr;
302     uint8_t *p = (uint8_t *) msg;
303     int r, size;
304 
305     r = vhost_user_read_header(dev, msg);
306     if (r < 0) {
307         return r;
308     }
309 
310     /* validate message size is sane */
311     if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
312         error_report("Failed to read msg header."
313                 " Size %d exceeds the maximum %zu.", msg->hdr.size,
314                 VHOST_USER_PAYLOAD_SIZE);
315         return -EPROTO;
316     }
317 
318     if (msg->hdr.size) {
319         p += VHOST_USER_HDR_SIZE;
320         size = msg->hdr.size;
321         r = qemu_chr_fe_read_all(chr, p, size);
322         if (r != size) {
323             int saved_errno = errno;
324             error_report("Failed to read msg payload."
325                          " Read %d instead of %d.", r, msg->hdr.size);
326             return r < 0 ? -saved_errno : -EIO;
327         }
328     }
329 
330     return 0;
331 }
332 
333 static int process_message_reply(struct vhost_dev *dev,
334                                  const VhostUserMsg *msg)
335 {
336     int ret;
337     VhostUserMsg msg_reply;
338 
339     if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
340         return 0;
341     }
342 
343     ret = vhost_user_read(dev, &msg_reply);
344     if (ret < 0) {
345         return ret;
346     }
347 
348     if (msg_reply.hdr.request != msg->hdr.request) {
349         error_report("Received unexpected msg type. "
350                      "Expected %d received %d",
351                      msg->hdr.request, msg_reply.hdr.request);
352         return -EPROTO;
353     }
354 
355     return msg_reply.payload.u64 ? -EIO : 0;
356 }
357 
358 static bool vhost_user_one_time_request(VhostUserRequest request)
359 {
360     switch (request) {
361     case VHOST_USER_SET_OWNER:
362     case VHOST_USER_RESET_OWNER:
363     case VHOST_USER_SET_MEM_TABLE:
364     case VHOST_USER_GET_QUEUE_NUM:
365     case VHOST_USER_NET_SET_MTU:
366     case VHOST_USER_ADD_MEM_REG:
367     case VHOST_USER_REM_MEM_REG:
368         return true;
369     default:
370         return false;
371     }
372 }
373 
374 /* most non-init callers ignore the error */
375 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
376                             int *fds, int fd_num)
377 {
378     struct vhost_user *u = dev->opaque;
379     CharBackend *chr = u->user->chr;
380     int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
381 
382     /*
383      * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
384      * we just need send it once in the first time. For later such
385      * request, we just ignore it.
386      */
387     if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
388         msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
389         return 0;
390     }
391 
392     if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
393         error_report("Failed to set msg fds.");
394         return -EINVAL;
395     }
396 
397     ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
398     if (ret != size) {
399         int saved_errno = errno;
400         error_report("Failed to write msg."
401                      " Wrote %d instead of %d.", ret, size);
402         return ret < 0 ? -saved_errno : -EIO;
403     }
404 
405     trace_vhost_user_write(msg->hdr.request, msg->hdr.flags);
406 
407     return 0;
408 }
409 
410 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
411 {
412     VhostUserMsg msg = {
413         .hdr.request = VHOST_USER_GPU_SET_SOCKET,
414         .hdr.flags = VHOST_USER_VERSION,
415     };
416 
417     return vhost_user_write(dev, &msg, &fd, 1);
418 }
419 
420 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
421                                    struct vhost_log *log)
422 {
423     int fds[VHOST_USER_MAX_RAM_SLOTS];
424     size_t fd_num = 0;
425     bool shmfd = virtio_has_feature(dev->protocol_features,
426                                     VHOST_USER_PROTOCOL_F_LOG_SHMFD);
427     int ret;
428     VhostUserMsg msg = {
429         .hdr.request = VHOST_USER_SET_LOG_BASE,
430         .hdr.flags = VHOST_USER_VERSION,
431         .payload.log.mmap_size = log->size * sizeof(*(log->log)),
432         .payload.log.mmap_offset = 0,
433         .hdr.size = sizeof(msg.payload.log),
434     };
435 
436     /* Send only once with first queue pair */
437     if (dev->vq_index != 0) {
438         return 0;
439     }
440 
441     if (shmfd && log->fd != -1) {
442         fds[fd_num++] = log->fd;
443     }
444 
445     ret = vhost_user_write(dev, &msg, fds, fd_num);
446     if (ret < 0) {
447         return ret;
448     }
449 
450     if (shmfd) {
451         msg.hdr.size = 0;
452         ret = vhost_user_read(dev, &msg);
453         if (ret < 0) {
454             return ret;
455         }
456 
457         if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
458             error_report("Received unexpected msg type. "
459                          "Expected %d received %d",
460                          VHOST_USER_SET_LOG_BASE, msg.hdr.request);
461             return -EPROTO;
462         }
463     }
464 
465     return 0;
466 }
467 
468 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset,
469                                             int *fd)
470 {
471     MemoryRegion *mr;
472 
473     assert((uintptr_t)addr == addr);
474     mr = memory_region_from_host((void *)(uintptr_t)addr, offset);
475     *fd = memory_region_get_fd(mr);
476     *offset += mr->ram_block->fd_offset;
477 
478     return mr;
479 }
480 
481 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst,
482                                        struct vhost_memory_region *src,
483                                        uint64_t mmap_offset)
484 {
485     assert(src != NULL && dst != NULL);
486     dst->userspace_addr = src->userspace_addr;
487     dst->memory_size = src->memory_size;
488     dst->guest_phys_addr = src->guest_phys_addr;
489     dst->mmap_offset = mmap_offset;
490 }
491 
492 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
493                                              struct vhost_dev *dev,
494                                              VhostUserMsg *msg,
495                                              int *fds, size_t *fd_num,
496                                              bool track_ramblocks)
497 {
498     int i, fd;
499     ram_addr_t offset;
500     MemoryRegion *mr;
501     struct vhost_memory_region *reg;
502     VhostUserMemoryRegion region_buffer;
503 
504     msg->hdr.request = VHOST_USER_SET_MEM_TABLE;
505 
506     for (i = 0; i < dev->mem->nregions; ++i) {
507         reg = dev->mem->regions + i;
508 
509         mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
510         if (fd > 0) {
511             if (track_ramblocks) {
512                 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
513                 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
514                                                       reg->memory_size,
515                                                       reg->guest_phys_addr,
516                                                       reg->userspace_addr,
517                                                       offset);
518                 u->region_rb_offset[i] = offset;
519                 u->region_rb[i] = mr->ram_block;
520             } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
521                 error_report("Failed preparing vhost-user memory table msg");
522                 return -ENOBUFS;
523             }
524             vhost_user_fill_msg_region(&region_buffer, reg, offset);
525             msg->payload.memory.regions[*fd_num] = region_buffer;
526             fds[(*fd_num)++] = fd;
527         } else if (track_ramblocks) {
528             u->region_rb_offset[i] = 0;
529             u->region_rb[i] = NULL;
530         }
531     }
532 
533     msg->payload.memory.nregions = *fd_num;
534 
535     if (!*fd_num) {
536         error_report("Failed initializing vhost-user memory map, "
537                      "consider using -object memory-backend-file share=on");
538         return -EINVAL;
539     }
540 
541     msg->hdr.size = sizeof(msg->payload.memory.nregions);
542     msg->hdr.size += sizeof(msg->payload.memory.padding);
543     msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
544 
545     return 0;
546 }
547 
548 static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
549                              struct vhost_memory_region *vdev_reg)
550 {
551     return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr &&
552         shadow_reg->userspace_addr == vdev_reg->userspace_addr &&
553         shadow_reg->memory_size == vdev_reg->memory_size;
554 }
555 
556 static void scrub_shadow_regions(struct vhost_dev *dev,
557                                  struct scrub_regions *add_reg,
558                                  int *nr_add_reg,
559                                  struct scrub_regions *rem_reg,
560                                  int *nr_rem_reg, uint64_t *shadow_pcb,
561                                  bool track_ramblocks)
562 {
563     struct vhost_user *u = dev->opaque;
564     bool found[VHOST_USER_MAX_RAM_SLOTS] = {};
565     struct vhost_memory_region *reg, *shadow_reg;
566     int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0;
567     ram_addr_t offset;
568     MemoryRegion *mr;
569     bool matching;
570 
571     /*
572      * Find memory regions present in our shadow state which are not in
573      * the device's current memory state.
574      *
575      * Mark regions in both the shadow and device state as "found".
576      */
577     for (i = 0; i < u->num_shadow_regions; i++) {
578         shadow_reg = &u->shadow_regions[i];
579         matching = false;
580 
581         for (j = 0; j < dev->mem->nregions; j++) {
582             reg = &dev->mem->regions[j];
583 
584             mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
585 
586             if (reg_equal(shadow_reg, reg)) {
587                 matching = true;
588                 found[j] = true;
589                 if (track_ramblocks) {
590                     /*
591                      * Reset postcopy client bases, region_rb, and
592                      * region_rb_offset in case regions are removed.
593                      */
594                     if (fd > 0) {
595                         u->region_rb_offset[j] = offset;
596                         u->region_rb[j] = mr->ram_block;
597                         shadow_pcb[j] = u->postcopy_client_bases[i];
598                     } else {
599                         u->region_rb_offset[j] = 0;
600                         u->region_rb[j] = NULL;
601                     }
602                 }
603                 break;
604             }
605         }
606 
607         /*
608          * If the region was not found in the current device memory state
609          * create an entry for it in the removed list.
610          */
611         if (!matching) {
612             rem_reg[rm_idx].region = shadow_reg;
613             rem_reg[rm_idx++].reg_idx = i;
614         }
615     }
616 
617     /*
618      * For regions not marked "found", create entries in the added list.
619      *
620      * Note their indexes in the device memory state and the indexes of their
621      * file descriptors.
622      */
623     for (i = 0; i < dev->mem->nregions; i++) {
624         reg = &dev->mem->regions[i];
625         vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
626         if (fd > 0) {
627             ++fd_num;
628         }
629 
630         /*
631          * If the region was in both the shadow and device state we don't
632          * need to send a VHOST_USER_ADD_MEM_REG message for it.
633          */
634         if (found[i]) {
635             continue;
636         }
637 
638         add_reg[add_idx].region = reg;
639         add_reg[add_idx].reg_idx = i;
640         add_reg[add_idx++].fd_idx = fd_num;
641     }
642     *nr_rem_reg = rm_idx;
643     *nr_add_reg = add_idx;
644 
645     return;
646 }
647 
648 static int send_remove_regions(struct vhost_dev *dev,
649                                struct scrub_regions *remove_reg,
650                                int nr_rem_reg, VhostUserMsg *msg,
651                                bool reply_supported)
652 {
653     struct vhost_user *u = dev->opaque;
654     struct vhost_memory_region *shadow_reg;
655     int i, fd, shadow_reg_idx, ret;
656     ram_addr_t offset;
657     VhostUserMemoryRegion region_buffer;
658 
659     /*
660      * The regions in remove_reg appear in the same order they do in the
661      * shadow table. Therefore we can minimize memory copies by iterating
662      * through remove_reg backwards.
663      */
664     for (i = nr_rem_reg - 1; i >= 0; i--) {
665         shadow_reg = remove_reg[i].region;
666         shadow_reg_idx = remove_reg[i].reg_idx;
667 
668         vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd);
669 
670         if (fd > 0) {
671             msg->hdr.request = VHOST_USER_REM_MEM_REG;
672             vhost_user_fill_msg_region(&region_buffer, shadow_reg, 0);
673             msg->payload.mem_reg.region = region_buffer;
674 
675             ret = vhost_user_write(dev, msg, NULL, 0);
676             if (ret < 0) {
677                 return ret;
678             }
679 
680             if (reply_supported) {
681                 ret = process_message_reply(dev, msg);
682                 if (ret) {
683                     return ret;
684                 }
685             }
686         }
687 
688         /*
689          * At this point we know the backend has unmapped the region. It is now
690          * safe to remove it from the shadow table.
691          */
692         memmove(&u->shadow_regions[shadow_reg_idx],
693                 &u->shadow_regions[shadow_reg_idx + 1],
694                 sizeof(struct vhost_memory_region) *
695                 (u->num_shadow_regions - shadow_reg_idx - 1));
696         u->num_shadow_regions--;
697     }
698 
699     return 0;
700 }
701 
702 static int send_add_regions(struct vhost_dev *dev,
703                             struct scrub_regions *add_reg, int nr_add_reg,
704                             VhostUserMsg *msg, uint64_t *shadow_pcb,
705                             bool reply_supported, bool track_ramblocks)
706 {
707     struct vhost_user *u = dev->opaque;
708     int i, fd, ret, reg_idx, reg_fd_idx;
709     struct vhost_memory_region *reg;
710     MemoryRegion *mr;
711     ram_addr_t offset;
712     VhostUserMsg msg_reply;
713     VhostUserMemoryRegion region_buffer;
714 
715     for (i = 0; i < nr_add_reg; i++) {
716         reg = add_reg[i].region;
717         reg_idx = add_reg[i].reg_idx;
718         reg_fd_idx = add_reg[i].fd_idx;
719 
720         mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
721 
722         if (fd > 0) {
723             if (track_ramblocks) {
724                 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name,
725                                                       reg->memory_size,
726                                                       reg->guest_phys_addr,
727                                                       reg->userspace_addr,
728                                                       offset);
729                 u->region_rb_offset[reg_idx] = offset;
730                 u->region_rb[reg_idx] = mr->ram_block;
731             }
732             msg->hdr.request = VHOST_USER_ADD_MEM_REG;
733             vhost_user_fill_msg_region(&region_buffer, reg, offset);
734             msg->payload.mem_reg.region = region_buffer;
735 
736             ret = vhost_user_write(dev, msg, &fd, 1);
737             if (ret < 0) {
738                 return ret;
739             }
740 
741             if (track_ramblocks) {
742                 uint64_t reply_gpa;
743 
744                 ret = vhost_user_read(dev, &msg_reply);
745                 if (ret < 0) {
746                     return ret;
747                 }
748 
749                 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
750 
751                 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) {
752                     error_report("%s: Received unexpected msg type."
753                                  "Expected %d received %d", __func__,
754                                  VHOST_USER_ADD_MEM_REG,
755                                  msg_reply.hdr.request);
756                     return -EPROTO;
757                 }
758 
759                 /*
760                  * We're using the same structure, just reusing one of the
761                  * fields, so it should be the same size.
762                  */
763                 if (msg_reply.hdr.size != msg->hdr.size) {
764                     error_report("%s: Unexpected size for postcopy reply "
765                                  "%d vs %d", __func__, msg_reply.hdr.size,
766                                  msg->hdr.size);
767                     return -EPROTO;
768                 }
769 
770                 /* Get the postcopy client base from the backend's reply. */
771                 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) {
772                     shadow_pcb[reg_idx] =
773                         msg_reply.payload.mem_reg.region.userspace_addr;
774                     trace_vhost_user_set_mem_table_postcopy(
775                         msg_reply.payload.mem_reg.region.userspace_addr,
776                         msg->payload.mem_reg.region.userspace_addr,
777                         reg_fd_idx, reg_idx);
778                 } else {
779                     error_report("%s: invalid postcopy reply for region. "
780                                  "Got guest physical address %" PRIX64 ", expected "
781                                  "%" PRIX64, __func__, reply_gpa,
782                                  dev->mem->regions[reg_idx].guest_phys_addr);
783                     return -EPROTO;
784                 }
785             } else if (reply_supported) {
786                 ret = process_message_reply(dev, msg);
787                 if (ret) {
788                     return ret;
789                 }
790             }
791         } else if (track_ramblocks) {
792             u->region_rb_offset[reg_idx] = 0;
793             u->region_rb[reg_idx] = NULL;
794         }
795 
796         /*
797          * At this point, we know the backend has mapped in the new
798          * region, if the region has a valid file descriptor.
799          *
800          * The region should now be added to the shadow table.
801          */
802         u->shadow_regions[u->num_shadow_regions].guest_phys_addr =
803             reg->guest_phys_addr;
804         u->shadow_regions[u->num_shadow_regions].userspace_addr =
805             reg->userspace_addr;
806         u->shadow_regions[u->num_shadow_regions].memory_size =
807             reg->memory_size;
808         u->num_shadow_regions++;
809     }
810 
811     return 0;
812 }
813 
814 static int vhost_user_add_remove_regions(struct vhost_dev *dev,
815                                          VhostUserMsg *msg,
816                                          bool reply_supported,
817                                          bool track_ramblocks)
818 {
819     struct vhost_user *u = dev->opaque;
820     struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS];
821     struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS];
822     uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {};
823     int nr_add_reg, nr_rem_reg;
824     int ret;
825 
826     msg->hdr.size = sizeof(msg->payload.mem_reg);
827 
828     /* Find the regions which need to be removed or added. */
829     scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
830                          shadow_pcb, track_ramblocks);
831 
832     if (nr_rem_reg) {
833         ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
834                                   reply_supported);
835         if (ret < 0) {
836             goto err;
837         }
838     }
839 
840     if (nr_add_reg) {
841         ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb,
842                                reply_supported, track_ramblocks);
843         if (ret < 0) {
844             goto err;
845         }
846     }
847 
848     if (track_ramblocks) {
849         memcpy(u->postcopy_client_bases, shadow_pcb,
850                sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
851         /*
852          * Now we've registered this with the postcopy code, we ack to the
853          * client, because now we're in the position to be able to deal with
854          * any faults it generates.
855          */
856         /* TODO: Use this for failure cases as well with a bad value. */
857         msg->hdr.size = sizeof(msg->payload.u64);
858         msg->payload.u64 = 0; /* OK */
859 
860         ret = vhost_user_write(dev, msg, NULL, 0);
861         if (ret < 0) {
862             return ret;
863         }
864     }
865 
866     return 0;
867 
868 err:
869     if (track_ramblocks) {
870         memcpy(u->postcopy_client_bases, shadow_pcb,
871                sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
872     }
873 
874     return ret;
875 }
876 
877 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
878                                              struct vhost_memory *mem,
879                                              bool reply_supported,
880                                              bool config_mem_slots)
881 {
882     struct vhost_user *u = dev->opaque;
883     int fds[VHOST_MEMORY_BASELINE_NREGIONS];
884     size_t fd_num = 0;
885     VhostUserMsg msg_reply;
886     int region_i, msg_i;
887     int ret;
888 
889     VhostUserMsg msg = {
890         .hdr.flags = VHOST_USER_VERSION,
891     };
892 
893     if (u->region_rb_len < dev->mem->nregions) {
894         u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
895         u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
896                                       dev->mem->nregions);
897         memset(&(u->region_rb[u->region_rb_len]), '\0',
898                sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
899         memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
900                sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
901         u->region_rb_len = dev->mem->nregions;
902     }
903 
904     if (config_mem_slots) {
905         ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true);
906         if (ret < 0) {
907             return ret;
908         }
909     } else {
910         ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
911                                                 true);
912         if (ret < 0) {
913             return ret;
914         }
915 
916         ret = vhost_user_write(dev, &msg, fds, fd_num);
917         if (ret < 0) {
918             return ret;
919         }
920 
921         ret = vhost_user_read(dev, &msg_reply);
922         if (ret < 0) {
923             return ret;
924         }
925 
926         if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
927             error_report("%s: Received unexpected msg type."
928                          "Expected %d received %d", __func__,
929                          VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
930             return -EPROTO;
931         }
932 
933         /*
934          * We're using the same structure, just reusing one of the
935          * fields, so it should be the same size.
936          */
937         if (msg_reply.hdr.size != msg.hdr.size) {
938             error_report("%s: Unexpected size for postcopy reply "
939                          "%d vs %d", __func__, msg_reply.hdr.size,
940                          msg.hdr.size);
941             return -EPROTO;
942         }
943 
944         memset(u->postcopy_client_bases, 0,
945                sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
946 
947         /*
948          * They're in the same order as the regions that were sent
949          * but some of the regions were skipped (above) if they
950          * didn't have fd's
951          */
952         for (msg_i = 0, region_i = 0;
953              region_i < dev->mem->nregions;
954              region_i++) {
955             if (msg_i < fd_num &&
956                 msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
957                 dev->mem->regions[region_i].guest_phys_addr) {
958                 u->postcopy_client_bases[region_i] =
959                     msg_reply.payload.memory.regions[msg_i].userspace_addr;
960                 trace_vhost_user_set_mem_table_postcopy(
961                     msg_reply.payload.memory.regions[msg_i].userspace_addr,
962                     msg.payload.memory.regions[msg_i].userspace_addr,
963                     msg_i, region_i);
964                 msg_i++;
965             }
966         }
967         if (msg_i != fd_num) {
968             error_report("%s: postcopy reply not fully consumed "
969                          "%d vs %zd",
970                          __func__, msg_i, fd_num);
971             return -EIO;
972         }
973 
974         /*
975          * Now we've registered this with the postcopy code, we ack to the
976          * client, because now we're in the position to be able to deal
977          * with any faults it generates.
978          */
979         /* TODO: Use this for failure cases as well with a bad value. */
980         msg.hdr.size = sizeof(msg.payload.u64);
981         msg.payload.u64 = 0; /* OK */
982         ret = vhost_user_write(dev, &msg, NULL, 0);
983         if (ret < 0) {
984             return ret;
985         }
986     }
987 
988     return 0;
989 }
990 
991 static int vhost_user_set_mem_table(struct vhost_dev *dev,
992                                     struct vhost_memory *mem)
993 {
994     struct vhost_user *u = dev->opaque;
995     int fds[VHOST_MEMORY_BASELINE_NREGIONS];
996     size_t fd_num = 0;
997     bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
998     bool reply_supported = virtio_has_feature(dev->protocol_features,
999                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1000     bool config_mem_slots =
1001         virtio_has_feature(dev->protocol_features,
1002                            VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
1003     int ret;
1004 
1005     if (do_postcopy) {
1006         /*
1007          * Postcopy has enough differences that it's best done in it's own
1008          * version
1009          */
1010         return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported,
1011                                                  config_mem_slots);
1012     }
1013 
1014     VhostUserMsg msg = {
1015         .hdr.flags = VHOST_USER_VERSION,
1016     };
1017 
1018     if (reply_supported) {
1019         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1020     }
1021 
1022     if (config_mem_slots) {
1023         ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false);
1024         if (ret < 0) {
1025             return ret;
1026         }
1027     } else {
1028         ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
1029                                                 false);
1030         if (ret < 0) {
1031             return ret;
1032         }
1033 
1034         ret = vhost_user_write(dev, &msg, fds, fd_num);
1035         if (ret < 0) {
1036             return ret;
1037         }
1038 
1039         if (reply_supported) {
1040             return process_message_reply(dev, &msg);
1041         }
1042     }
1043 
1044     return 0;
1045 }
1046 
1047 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
1048                                        struct vhost_vring_state *ring)
1049 {
1050     bool cross_endian = virtio_has_feature(dev->protocol_features,
1051                                            VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
1052     VhostUserMsg msg = {
1053         .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
1054         .hdr.flags = VHOST_USER_VERSION,
1055         .payload.state = *ring,
1056         .hdr.size = sizeof(msg.payload.state),
1057     };
1058 
1059     if (!cross_endian) {
1060         error_report("vhost-user trying to send unhandled ioctl");
1061         return -ENOTSUP;
1062     }
1063 
1064     return vhost_user_write(dev, &msg, NULL, 0);
1065 }
1066 
1067 static int vhost_set_vring(struct vhost_dev *dev,
1068                            unsigned long int request,
1069                            struct vhost_vring_state *ring)
1070 {
1071     VhostUserMsg msg = {
1072         .hdr.request = request,
1073         .hdr.flags = VHOST_USER_VERSION,
1074         .payload.state = *ring,
1075         .hdr.size = sizeof(msg.payload.state),
1076     };
1077 
1078     return vhost_user_write(dev, &msg, NULL, 0);
1079 }
1080 
1081 static int vhost_user_set_vring_num(struct vhost_dev *dev,
1082                                     struct vhost_vring_state *ring)
1083 {
1084     return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
1085 }
1086 
1087 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
1088 {
1089     assert(n && n->unmap_addr);
1090     munmap(n->unmap_addr, qemu_real_host_page_size());
1091     n->unmap_addr = NULL;
1092 }
1093 
1094 /*
1095  * clean-up function for notifier, will finally free the structure
1096  * under rcu.
1097  */
1098 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
1099                                             VirtIODevice *vdev)
1100 {
1101     if (n->addr) {
1102         if (vdev) {
1103             virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
1104         }
1105         assert(!n->unmap_addr);
1106         n->unmap_addr = n->addr;
1107         n->addr = NULL;
1108         call_rcu(n, vhost_user_host_notifier_free, rcu);
1109     }
1110 }
1111 
1112 static int vhost_user_set_vring_base(struct vhost_dev *dev,
1113                                      struct vhost_vring_state *ring)
1114 {
1115     return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
1116 }
1117 
1118 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
1119 {
1120     int i;
1121 
1122     if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1123         return -EINVAL;
1124     }
1125 
1126     for (i = 0; i < dev->nvqs; ++i) {
1127         int ret;
1128         struct vhost_vring_state state = {
1129             .index = dev->vq_index + i,
1130             .num   = enable,
1131         };
1132 
1133         ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
1134         if (ret < 0) {
1135             /*
1136              * Restoring the previous state is likely infeasible, as well as
1137              * proceeding regardless the error, so just bail out and hope for
1138              * the device-level recovery.
1139              */
1140             return ret;
1141         }
1142     }
1143 
1144     return 0;
1145 }
1146 
1147 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u,
1148                                              int idx)
1149 {
1150     if (idx >= u->notifiers->len) {
1151         return NULL;
1152     }
1153     return g_ptr_array_index(u->notifiers, idx);
1154 }
1155 
1156 static int vhost_user_get_vring_base(struct vhost_dev *dev,
1157                                      struct vhost_vring_state *ring)
1158 {
1159     int ret;
1160     VhostUserMsg msg = {
1161         .hdr.request = VHOST_USER_GET_VRING_BASE,
1162         .hdr.flags = VHOST_USER_VERSION,
1163         .payload.state = *ring,
1164         .hdr.size = sizeof(msg.payload.state),
1165     };
1166     struct vhost_user *u = dev->opaque;
1167 
1168     VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
1169     if (n) {
1170         vhost_user_host_notifier_remove(n, dev->vdev);
1171     }
1172 
1173     ret = vhost_user_write(dev, &msg, NULL, 0);
1174     if (ret < 0) {
1175         return ret;
1176     }
1177 
1178     ret = vhost_user_read(dev, &msg);
1179     if (ret < 0) {
1180         return ret;
1181     }
1182 
1183     if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
1184         error_report("Received unexpected msg type. Expected %d received %d",
1185                      VHOST_USER_GET_VRING_BASE, msg.hdr.request);
1186         return -EPROTO;
1187     }
1188 
1189     if (msg.hdr.size != sizeof(msg.payload.state)) {
1190         error_report("Received bad msg size.");
1191         return -EPROTO;
1192     }
1193 
1194     *ring = msg.payload.state;
1195 
1196     return 0;
1197 }
1198 
1199 static int vhost_set_vring_file(struct vhost_dev *dev,
1200                                 VhostUserRequest request,
1201                                 struct vhost_vring_file *file)
1202 {
1203     int fds[VHOST_USER_MAX_RAM_SLOTS];
1204     size_t fd_num = 0;
1205     VhostUserMsg msg = {
1206         .hdr.request = request,
1207         .hdr.flags = VHOST_USER_VERSION,
1208         .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
1209         .hdr.size = sizeof(msg.payload.u64),
1210     };
1211 
1212     if (ioeventfd_enabled() && file->fd > 0) {
1213         fds[fd_num++] = file->fd;
1214     } else {
1215         msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
1216     }
1217 
1218     return vhost_user_write(dev, &msg, fds, fd_num);
1219 }
1220 
1221 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
1222                                      struct vhost_vring_file *file)
1223 {
1224     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
1225 }
1226 
1227 static int vhost_user_set_vring_call(struct vhost_dev *dev,
1228                                      struct vhost_vring_file *file)
1229 {
1230     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
1231 }
1232 
1233 static int vhost_user_set_vring_err(struct vhost_dev *dev,
1234                                     struct vhost_vring_file *file)
1235 {
1236     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file);
1237 }
1238 
1239 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
1240 {
1241     int ret;
1242     VhostUserMsg msg = {
1243         .hdr.request = request,
1244         .hdr.flags = VHOST_USER_VERSION,
1245     };
1246 
1247     if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
1248         return 0;
1249     }
1250 
1251     ret = vhost_user_write(dev, &msg, NULL, 0);
1252     if (ret < 0) {
1253         return ret;
1254     }
1255 
1256     ret = vhost_user_read(dev, &msg);
1257     if (ret < 0) {
1258         return ret;
1259     }
1260 
1261     if (msg.hdr.request != request) {
1262         error_report("Received unexpected msg type. Expected %d received %d",
1263                      request, msg.hdr.request);
1264         return -EPROTO;
1265     }
1266 
1267     if (msg.hdr.size != sizeof(msg.payload.u64)) {
1268         error_report("Received bad msg size.");
1269         return -EPROTO;
1270     }
1271 
1272     *u64 = msg.payload.u64;
1273 
1274     return 0;
1275 }
1276 
1277 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
1278 {
1279     if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) {
1280         return -EPROTO;
1281     }
1282 
1283     return 0;
1284 }
1285 
1286 static int enforce_reply(struct vhost_dev *dev,
1287                          const VhostUserMsg *msg)
1288 {
1289     uint64_t dummy;
1290 
1291     if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1292         return process_message_reply(dev, msg);
1293     }
1294 
1295    /*
1296     * We need to wait for a reply but the backend does not
1297     * support replies for the command we just sent.
1298     * Send VHOST_USER_GET_FEATURES which makes all backends
1299     * send a reply.
1300     */
1301     return vhost_user_get_features(dev, &dummy);
1302 }
1303 
1304 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
1305                                      struct vhost_vring_addr *addr)
1306 {
1307     int ret;
1308     VhostUserMsg msg = {
1309         .hdr.request = VHOST_USER_SET_VRING_ADDR,
1310         .hdr.flags = VHOST_USER_VERSION,
1311         .payload.addr = *addr,
1312         .hdr.size = sizeof(msg.payload.addr),
1313     };
1314 
1315     bool reply_supported = virtio_has_feature(dev->protocol_features,
1316                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1317 
1318     /*
1319      * wait for a reply if logging is enabled to make sure
1320      * backend is actually logging changes
1321      */
1322     bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG);
1323 
1324     if (reply_supported && wait_for_reply) {
1325         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1326     }
1327 
1328     ret = vhost_user_write(dev, &msg, NULL, 0);
1329     if (ret < 0) {
1330         return ret;
1331     }
1332 
1333     if (wait_for_reply) {
1334         return enforce_reply(dev, &msg);
1335     }
1336 
1337     return 0;
1338 }
1339 
1340 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
1341                               bool wait_for_reply)
1342 {
1343     VhostUserMsg msg = {
1344         .hdr.request = request,
1345         .hdr.flags = VHOST_USER_VERSION,
1346         .payload.u64 = u64,
1347         .hdr.size = sizeof(msg.payload.u64),
1348     };
1349     int ret;
1350 
1351     if (wait_for_reply) {
1352         bool reply_supported = virtio_has_feature(dev->protocol_features,
1353                                           VHOST_USER_PROTOCOL_F_REPLY_ACK);
1354         if (reply_supported) {
1355             msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1356         }
1357     }
1358 
1359     ret = vhost_user_write(dev, &msg, NULL, 0);
1360     if (ret < 0) {
1361         return ret;
1362     }
1363 
1364     if (wait_for_reply) {
1365         return enforce_reply(dev, &msg);
1366     }
1367 
1368     return 0;
1369 }
1370 
1371 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status)
1372 {
1373     return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false);
1374 }
1375 
1376 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status)
1377 {
1378     uint64_t value;
1379     int ret;
1380 
1381     ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value);
1382     if (ret < 0) {
1383         return ret;
1384     }
1385     *status = value;
1386 
1387     return 0;
1388 }
1389 
1390 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status)
1391 {
1392     uint8_t s;
1393     int ret;
1394 
1395     ret = vhost_user_get_status(dev, &s);
1396     if (ret < 0) {
1397         return ret;
1398     }
1399 
1400     if ((s & status) == status) {
1401         return 0;
1402     }
1403     s |= status;
1404 
1405     return vhost_user_set_status(dev, s);
1406 }
1407 
1408 static int vhost_user_set_features(struct vhost_dev *dev,
1409                                    uint64_t features)
1410 {
1411     /*
1412      * wait for a reply if logging is enabled to make sure
1413      * backend is actually logging changes
1414      */
1415     bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL);
1416     int ret;
1417 
1418     /*
1419      * We need to include any extra backend only feature bits that
1420      * might be needed by our device. Currently this includes the
1421      * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol
1422      * features.
1423      */
1424     ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES,
1425                               features | dev->backend_features,
1426                               log_enabled);
1427 
1428     if (virtio_has_feature(dev->protocol_features,
1429                            VHOST_USER_PROTOCOL_F_STATUS)) {
1430         if (!ret) {
1431             return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
1432         }
1433     }
1434 
1435     return ret;
1436 }
1437 
1438 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
1439                                             uint64_t features)
1440 {
1441     return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features,
1442                               false);
1443 }
1444 
1445 static int vhost_user_set_owner(struct vhost_dev *dev)
1446 {
1447     VhostUserMsg msg = {
1448         .hdr.request = VHOST_USER_SET_OWNER,
1449         .hdr.flags = VHOST_USER_VERSION,
1450     };
1451 
1452     return vhost_user_write(dev, &msg, NULL, 0);
1453 }
1454 
1455 static int vhost_user_get_max_memslots(struct vhost_dev *dev,
1456                                        uint64_t *max_memslots)
1457 {
1458     uint64_t backend_max_memslots;
1459     int err;
1460 
1461     err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS,
1462                              &backend_max_memslots);
1463     if (err < 0) {
1464         return err;
1465     }
1466 
1467     *max_memslots = backend_max_memslots;
1468 
1469     return 0;
1470 }
1471 
1472 static int vhost_user_reset_device(struct vhost_dev *dev)
1473 {
1474     VhostUserMsg msg = {
1475         .hdr.flags = VHOST_USER_VERSION,
1476     };
1477 
1478     msg.hdr.request = virtio_has_feature(dev->protocol_features,
1479                                          VHOST_USER_PROTOCOL_F_RESET_DEVICE)
1480         ? VHOST_USER_RESET_DEVICE
1481         : VHOST_USER_RESET_OWNER;
1482 
1483     return vhost_user_write(dev, &msg, NULL, 0);
1484 }
1485 
1486 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
1487 {
1488     if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1489         return -ENOSYS;
1490     }
1491 
1492     return dev->config_ops->vhost_dev_config_notifier(dev);
1493 }
1494 
1495 /*
1496  * Fetch or create the notifier for a given idx. Newly created
1497  * notifiers are added to the pointer array that tracks them.
1498  */
1499 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u,
1500                                                        int idx)
1501 {
1502     VhostUserHostNotifier *n = NULL;
1503     if (idx >= u->notifiers->len) {
1504         g_ptr_array_set_size(u->notifiers, idx + 1);
1505     }
1506 
1507     n = g_ptr_array_index(u->notifiers, idx);
1508     if (!n) {
1509         /*
1510          * In case notification arrive out-of-order,
1511          * make room for current index.
1512          */
1513         g_ptr_array_remove_index(u->notifiers, idx);
1514         n = g_new0(VhostUserHostNotifier, 1);
1515         n->idx = idx;
1516         g_ptr_array_insert(u->notifiers, idx, n);
1517         trace_vhost_user_create_notifier(idx, n);
1518     }
1519 
1520     return n;
1521 }
1522 
1523 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
1524                                                        VhostUserVringArea *area,
1525                                                        int fd)
1526 {
1527     int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
1528     size_t page_size = qemu_real_host_page_size();
1529     struct vhost_user *u = dev->opaque;
1530     VhostUserState *user = u->user;
1531     VirtIODevice *vdev = dev->vdev;
1532     VhostUserHostNotifier *n;
1533     void *addr;
1534     char *name;
1535 
1536     if (!virtio_has_feature(dev->protocol_features,
1537                             VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
1538         vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
1539         return -EINVAL;
1540     }
1541 
1542     /*
1543      * Fetch notifier and invalidate any old data before setting up
1544      * new mapped address.
1545      */
1546     n = fetch_or_create_notifier(user, queue_idx);
1547     vhost_user_host_notifier_remove(n, vdev);
1548 
1549     if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
1550         return 0;
1551     }
1552 
1553     /* Sanity check. */
1554     if (area->size != page_size) {
1555         return -EINVAL;
1556     }
1557 
1558     addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
1559                 fd, area->offset);
1560     if (addr == MAP_FAILED) {
1561         return -EFAULT;
1562     }
1563 
1564     name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
1565                            user, queue_idx);
1566     if (!n->mr.ram) { /* Don't init again after suspend. */
1567         memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
1568                                           page_size, addr);
1569     } else {
1570         n->mr.ram_block->host = addr;
1571     }
1572     g_free(name);
1573 
1574     if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
1575         object_unparent(OBJECT(&n->mr));
1576         munmap(addr, page_size);
1577         return -ENXIO;
1578     }
1579 
1580     n->addr = addr;
1581 
1582     return 0;
1583 }
1584 
1585 static void close_slave_channel(struct vhost_user *u)
1586 {
1587     g_source_destroy(u->slave_src);
1588     g_source_unref(u->slave_src);
1589     u->slave_src = NULL;
1590     object_unref(OBJECT(u->slave_ioc));
1591     u->slave_ioc = NULL;
1592 }
1593 
1594 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
1595                            gpointer opaque)
1596 {
1597     struct vhost_dev *dev = opaque;
1598     struct vhost_user *u = dev->opaque;
1599     VhostUserHeader hdr = { 0, };
1600     VhostUserPayload payload = { 0, };
1601     Error *local_err = NULL;
1602     gboolean rc = G_SOURCE_CONTINUE;
1603     int ret = 0;
1604     struct iovec iov;
1605     g_autofree int *fd = NULL;
1606     size_t fdsize = 0;
1607     int i;
1608 
1609     /* Read header */
1610     iov.iov_base = &hdr;
1611     iov.iov_len = VHOST_USER_HDR_SIZE;
1612 
1613     if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) {
1614         error_report_err(local_err);
1615         goto err;
1616     }
1617 
1618     if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1619         error_report("Failed to read msg header."
1620                 " Size %d exceeds the maximum %zu.", hdr.size,
1621                 VHOST_USER_PAYLOAD_SIZE);
1622         goto err;
1623     }
1624 
1625     /* Read payload */
1626     if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) {
1627         error_report_err(local_err);
1628         goto err;
1629     }
1630 
1631     switch (hdr.request) {
1632     case VHOST_USER_BACKEND_IOTLB_MSG:
1633         ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1634         break;
1635     case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG:
1636         ret = vhost_user_slave_handle_config_change(dev);
1637         break;
1638     case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG:
1639         ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
1640                                                           fd ? fd[0] : -1);
1641         break;
1642     default:
1643         error_report("Received unexpected msg type: %d.", hdr.request);
1644         ret = -EINVAL;
1645     }
1646 
1647     /*
1648      * REPLY_ACK feature handling. Other reply types has to be managed
1649      * directly in their request handlers.
1650      */
1651     if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1652         struct iovec iovec[2];
1653 
1654 
1655         hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1656         hdr.flags |= VHOST_USER_REPLY_MASK;
1657 
1658         payload.u64 = !!ret;
1659         hdr.size = sizeof(payload.u64);
1660 
1661         iovec[0].iov_base = &hdr;
1662         iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1663         iovec[1].iov_base = &payload;
1664         iovec[1].iov_len = hdr.size;
1665 
1666         if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) {
1667             error_report_err(local_err);
1668             goto err;
1669         }
1670     }
1671 
1672     goto fdcleanup;
1673 
1674 err:
1675     close_slave_channel(u);
1676     rc = G_SOURCE_REMOVE;
1677 
1678 fdcleanup:
1679     if (fd) {
1680         for (i = 0; i < fdsize; i++) {
1681             close(fd[i]);
1682         }
1683     }
1684     return rc;
1685 }
1686 
1687 static int vhost_setup_slave_channel(struct vhost_dev *dev)
1688 {
1689     VhostUserMsg msg = {
1690         .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD,
1691         .hdr.flags = VHOST_USER_VERSION,
1692     };
1693     struct vhost_user *u = dev->opaque;
1694     int sv[2], ret = 0;
1695     bool reply_supported = virtio_has_feature(dev->protocol_features,
1696                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1697     Error *local_err = NULL;
1698     QIOChannel *ioc;
1699 
1700     if (!virtio_has_feature(dev->protocol_features,
1701                             VHOST_USER_PROTOCOL_F_BACKEND_REQ)) {
1702         return 0;
1703     }
1704 
1705     if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1706         int saved_errno = errno;
1707         error_report("socketpair() failed");
1708         return -saved_errno;
1709     }
1710 
1711     ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err));
1712     if (!ioc) {
1713         error_report_err(local_err);
1714         return -ECONNREFUSED;
1715     }
1716     u->slave_ioc = ioc;
1717     u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
1718                                                 G_IO_IN | G_IO_HUP,
1719                                                 slave_read, dev, NULL, NULL);
1720 
1721     if (reply_supported) {
1722         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1723     }
1724 
1725     ret = vhost_user_write(dev, &msg, &sv[1], 1);
1726     if (ret) {
1727         goto out;
1728     }
1729 
1730     if (reply_supported) {
1731         ret = process_message_reply(dev, &msg);
1732     }
1733 
1734 out:
1735     close(sv[1]);
1736     if (ret) {
1737         close_slave_channel(u);
1738     }
1739 
1740     return ret;
1741 }
1742 
1743 #ifdef CONFIG_LINUX
1744 /*
1745  * Called back from the postcopy fault thread when a fault is received on our
1746  * ufd.
1747  * TODO: This is Linux specific
1748  */
1749 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1750                                              void *ufd)
1751 {
1752     struct vhost_dev *dev = pcfd->data;
1753     struct vhost_user *u = dev->opaque;
1754     struct uffd_msg *msg = ufd;
1755     uint64_t faultaddr = msg->arg.pagefault.address;
1756     RAMBlock *rb = NULL;
1757     uint64_t rb_offset;
1758     int i;
1759 
1760     trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1761                                             dev->mem->nregions);
1762     for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1763         trace_vhost_user_postcopy_fault_handler_loop(i,
1764                 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1765         if (faultaddr >= u->postcopy_client_bases[i]) {
1766             /* Ofset of the fault address in the vhost region */
1767             uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1768             if (region_offset < dev->mem->regions[i].memory_size) {
1769                 rb_offset = region_offset + u->region_rb_offset[i];
1770                 trace_vhost_user_postcopy_fault_handler_found(i,
1771                         region_offset, rb_offset);
1772                 rb = u->region_rb[i];
1773                 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1774                                                     rb_offset);
1775             }
1776         }
1777     }
1778     error_report("%s: Failed to find region for fault %" PRIx64,
1779                  __func__, faultaddr);
1780     return -1;
1781 }
1782 
1783 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1784                                      uint64_t offset)
1785 {
1786     struct vhost_dev *dev = pcfd->data;
1787     struct vhost_user *u = dev->opaque;
1788     int i;
1789 
1790     trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1791 
1792     if (!u) {
1793         return 0;
1794     }
1795     /* Translate the offset into an address in the clients address space */
1796     for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1797         if (u->region_rb[i] == rb &&
1798             offset >= u->region_rb_offset[i] &&
1799             offset < (u->region_rb_offset[i] +
1800                       dev->mem->regions[i].memory_size)) {
1801             uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1802                                    u->postcopy_client_bases[i];
1803             trace_vhost_user_postcopy_waker_found(client_addr);
1804             return postcopy_wake_shared(pcfd, client_addr, rb);
1805         }
1806     }
1807 
1808     trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1809     return 0;
1810 }
1811 #endif
1812 
1813 /*
1814  * Called at the start of an inbound postcopy on reception of the
1815  * 'advise' command.
1816  */
1817 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1818 {
1819 #ifdef CONFIG_LINUX
1820     struct vhost_user *u = dev->opaque;
1821     CharBackend *chr = u->user->chr;
1822     int ufd;
1823     int ret;
1824     VhostUserMsg msg = {
1825         .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1826         .hdr.flags = VHOST_USER_VERSION,
1827     };
1828 
1829     ret = vhost_user_write(dev, &msg, NULL, 0);
1830     if (ret < 0) {
1831         error_setg(errp, "Failed to send postcopy_advise to vhost");
1832         return ret;
1833     }
1834 
1835     ret = vhost_user_read(dev, &msg);
1836     if (ret < 0) {
1837         error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1838         return ret;
1839     }
1840 
1841     if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1842         error_setg(errp, "Unexpected msg type. Expected %d received %d",
1843                      VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1844         return -EPROTO;
1845     }
1846 
1847     if (msg.hdr.size) {
1848         error_setg(errp, "Received bad msg size.");
1849         return -EPROTO;
1850     }
1851     ufd = qemu_chr_fe_get_msgfd(chr);
1852     if (ufd < 0) {
1853         error_setg(errp, "%s: Failed to get ufd", __func__);
1854         return -EIO;
1855     }
1856     qemu_socket_set_nonblock(ufd);
1857 
1858     /* register ufd with userfault thread */
1859     u->postcopy_fd.fd = ufd;
1860     u->postcopy_fd.data = dev;
1861     u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1862     u->postcopy_fd.waker = vhost_user_postcopy_waker;
1863     u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1864     postcopy_register_shared_ufd(&u->postcopy_fd);
1865     return 0;
1866 #else
1867     error_setg(errp, "Postcopy not supported on non-Linux systems");
1868     return -ENOSYS;
1869 #endif
1870 }
1871 
1872 /*
1873  * Called at the switch to postcopy on reception of the 'listen' command.
1874  */
1875 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1876 {
1877     struct vhost_user *u = dev->opaque;
1878     int ret;
1879     VhostUserMsg msg = {
1880         .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1881         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1882     };
1883     u->postcopy_listen = true;
1884 
1885     trace_vhost_user_postcopy_listen();
1886 
1887     ret = vhost_user_write(dev, &msg, NULL, 0);
1888     if (ret < 0) {
1889         error_setg(errp, "Failed to send postcopy_listen to vhost");
1890         return ret;
1891     }
1892 
1893     ret = process_message_reply(dev, &msg);
1894     if (ret) {
1895         error_setg(errp, "Failed to receive reply to postcopy_listen");
1896         return ret;
1897     }
1898 
1899     return 0;
1900 }
1901 
1902 /*
1903  * Called at the end of postcopy
1904  */
1905 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1906 {
1907     VhostUserMsg msg = {
1908         .hdr.request = VHOST_USER_POSTCOPY_END,
1909         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1910     };
1911     int ret;
1912     struct vhost_user *u = dev->opaque;
1913 
1914     trace_vhost_user_postcopy_end_entry();
1915 
1916     ret = vhost_user_write(dev, &msg, NULL, 0);
1917     if (ret < 0) {
1918         error_setg(errp, "Failed to send postcopy_end to vhost");
1919         return ret;
1920     }
1921 
1922     ret = process_message_reply(dev, &msg);
1923     if (ret) {
1924         error_setg(errp, "Failed to receive reply to postcopy_end");
1925         return ret;
1926     }
1927     postcopy_unregister_shared_ufd(&u->postcopy_fd);
1928     close(u->postcopy_fd.fd);
1929     u->postcopy_fd.handler = NULL;
1930 
1931     trace_vhost_user_postcopy_end_exit();
1932 
1933     return 0;
1934 }
1935 
1936 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1937                                         void *opaque)
1938 {
1939     struct PostcopyNotifyData *pnd = opaque;
1940     struct vhost_user *u = container_of(notifier, struct vhost_user,
1941                                          postcopy_notifier);
1942     struct vhost_dev *dev = u->dev;
1943 
1944     switch (pnd->reason) {
1945     case POSTCOPY_NOTIFY_PROBE:
1946         if (!virtio_has_feature(dev->protocol_features,
1947                                 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1948             /* TODO: Get the device name into this error somehow */
1949             error_setg(pnd->errp,
1950                        "vhost-user backend not capable of postcopy");
1951             return -ENOENT;
1952         }
1953         break;
1954 
1955     case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1956         return vhost_user_postcopy_advise(dev, pnd->errp);
1957 
1958     case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1959         return vhost_user_postcopy_listen(dev, pnd->errp);
1960 
1961     case POSTCOPY_NOTIFY_INBOUND_END:
1962         return vhost_user_postcopy_end(dev, pnd->errp);
1963 
1964     default:
1965         /* We ignore notifications we don't know */
1966         break;
1967     }
1968 
1969     return 0;
1970 }
1971 
1972 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
1973                                    Error **errp)
1974 {
1975     uint64_t features, ram_slots;
1976     struct vhost_user *u;
1977     VhostUserState *vus = (VhostUserState *) opaque;
1978     int err;
1979 
1980     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1981 
1982     u = g_new0(struct vhost_user, 1);
1983     u->user = vus;
1984     u->dev = dev;
1985     dev->opaque = u;
1986 
1987     err = vhost_user_get_features(dev, &features);
1988     if (err < 0) {
1989         error_setg_errno(errp, -err, "vhost_backend_init failed");
1990         return err;
1991     }
1992 
1993     if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1994         bool supports_f_config = vus->supports_config ||
1995             (dev->config_ops && dev->config_ops->vhost_dev_config_notifier);
1996         uint64_t protocol_features;
1997 
1998         dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1999 
2000         err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
2001                                  &protocol_features);
2002         if (err < 0) {
2003             error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2004             return -EPROTO;
2005         }
2006 
2007         /*
2008          * We will use all the protocol features we support - although
2009          * we suppress F_CONFIG if we know QEMUs internal code can not support
2010          * it.
2011          */
2012         protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK;
2013 
2014         if (supports_f_config) {
2015             if (!virtio_has_feature(protocol_features,
2016                                     VHOST_USER_PROTOCOL_F_CONFIG)) {
2017                 error_setg(errp, "vhost-user device expecting "
2018                            "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does "
2019                            "not support it.");
2020                 return -EPROTO;
2021             }
2022         } else {
2023             if (virtio_has_feature(protocol_features,
2024                                    VHOST_USER_PROTOCOL_F_CONFIG)) {
2025                 warn_report("vhost-user backend supports "
2026                             "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not.");
2027                 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
2028             }
2029         }
2030 
2031         /* final set of protocol features */
2032         dev->protocol_features = protocol_features;
2033         err = vhost_user_set_protocol_features(dev, dev->protocol_features);
2034         if (err < 0) {
2035             error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2036             return -EPROTO;
2037         }
2038 
2039         /* query the max queues we support if backend supports Multiple Queue */
2040         if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
2041             err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
2042                                      &dev->max_queues);
2043             if (err < 0) {
2044                 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2045                 return -EPROTO;
2046             }
2047         } else {
2048             dev->max_queues = 1;
2049         }
2050 
2051         if (dev->num_queues && dev->max_queues < dev->num_queues) {
2052             error_setg(errp, "The maximum number of queues supported by the "
2053                        "backend is %" PRIu64, dev->max_queues);
2054             return -EINVAL;
2055         }
2056 
2057         if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
2058                 !(virtio_has_feature(dev->protocol_features,
2059                     VHOST_USER_PROTOCOL_F_BACKEND_REQ) &&
2060                  virtio_has_feature(dev->protocol_features,
2061                     VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
2062             error_setg(errp, "IOMMU support requires reply-ack and "
2063                        "slave-req protocol features.");
2064             return -EINVAL;
2065         }
2066 
2067         /* get max memory regions if backend supports configurable RAM slots */
2068         if (!virtio_has_feature(dev->protocol_features,
2069                                 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) {
2070             u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS;
2071         } else {
2072             err = vhost_user_get_max_memslots(dev, &ram_slots);
2073             if (err < 0) {
2074                 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2075                 return -EPROTO;
2076             }
2077 
2078             if (ram_slots < u->user->memory_slots) {
2079                 error_setg(errp, "The backend specified a max ram slots limit "
2080                            "of %" PRIu64", when the prior validated limit was "
2081                            "%d. This limit should never decrease.", ram_slots,
2082                            u->user->memory_slots);
2083                 return -EINVAL;
2084             }
2085 
2086             u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS);
2087         }
2088     }
2089 
2090     if (dev->migration_blocker == NULL &&
2091         !virtio_has_feature(dev->protocol_features,
2092                             VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
2093         error_setg(&dev->migration_blocker,
2094                    "Migration disabled: vhost-user backend lacks "
2095                    "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
2096     }
2097 
2098     if (dev->vq_index == 0) {
2099         err = vhost_setup_slave_channel(dev);
2100         if (err < 0) {
2101             error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2102             return -EPROTO;
2103         }
2104     }
2105 
2106     u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
2107     postcopy_add_notifier(&u->postcopy_notifier);
2108 
2109     return 0;
2110 }
2111 
2112 static int vhost_user_backend_cleanup(struct vhost_dev *dev)
2113 {
2114     struct vhost_user *u;
2115 
2116     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2117 
2118     u = dev->opaque;
2119     if (u->postcopy_notifier.notify) {
2120         postcopy_remove_notifier(&u->postcopy_notifier);
2121         u->postcopy_notifier.notify = NULL;
2122     }
2123     u->postcopy_listen = false;
2124     if (u->postcopy_fd.handler) {
2125         postcopy_unregister_shared_ufd(&u->postcopy_fd);
2126         close(u->postcopy_fd.fd);
2127         u->postcopy_fd.handler = NULL;
2128     }
2129     if (u->slave_ioc) {
2130         close_slave_channel(u);
2131     }
2132     g_free(u->region_rb);
2133     u->region_rb = NULL;
2134     g_free(u->region_rb_offset);
2135     u->region_rb_offset = NULL;
2136     u->region_rb_len = 0;
2137     g_free(u);
2138     dev->opaque = 0;
2139 
2140     return 0;
2141 }
2142 
2143 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
2144 {
2145     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
2146 
2147     return idx;
2148 }
2149 
2150 static int vhost_user_memslots_limit(struct vhost_dev *dev)
2151 {
2152     struct vhost_user *u = dev->opaque;
2153 
2154     return u->user->memory_slots;
2155 }
2156 
2157 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
2158 {
2159     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2160 
2161     return virtio_has_feature(dev->protocol_features,
2162                               VHOST_USER_PROTOCOL_F_LOG_SHMFD);
2163 }
2164 
2165 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
2166 {
2167     VhostUserMsg msg = { };
2168 
2169     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2170 
2171     /* If guest supports GUEST_ANNOUNCE do nothing */
2172     if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
2173         return 0;
2174     }
2175 
2176     /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
2177     if (virtio_has_feature(dev->protocol_features,
2178                            VHOST_USER_PROTOCOL_F_RARP)) {
2179         msg.hdr.request = VHOST_USER_SEND_RARP;
2180         msg.hdr.flags = VHOST_USER_VERSION;
2181         memcpy((char *)&msg.payload.u64, mac_addr, 6);
2182         msg.hdr.size = sizeof(msg.payload.u64);
2183 
2184         return vhost_user_write(dev, &msg, NULL, 0);
2185     }
2186     return -ENOTSUP;
2187 }
2188 
2189 static bool vhost_user_can_merge(struct vhost_dev *dev,
2190                                  uint64_t start1, uint64_t size1,
2191                                  uint64_t start2, uint64_t size2)
2192 {
2193     ram_addr_t offset;
2194     int mfd, rfd;
2195 
2196     (void)vhost_user_get_mr_data(start1, &offset, &mfd);
2197     (void)vhost_user_get_mr_data(start2, &offset, &rfd);
2198 
2199     return mfd == rfd;
2200 }
2201 
2202 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
2203 {
2204     VhostUserMsg msg;
2205     bool reply_supported = virtio_has_feature(dev->protocol_features,
2206                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
2207     int ret;
2208 
2209     if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
2210         return 0;
2211     }
2212 
2213     msg.hdr.request = VHOST_USER_NET_SET_MTU;
2214     msg.payload.u64 = mtu;
2215     msg.hdr.size = sizeof(msg.payload.u64);
2216     msg.hdr.flags = VHOST_USER_VERSION;
2217     if (reply_supported) {
2218         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2219     }
2220 
2221     ret = vhost_user_write(dev, &msg, NULL, 0);
2222     if (ret < 0) {
2223         return ret;
2224     }
2225 
2226     /* If reply_ack supported, slave has to ack specified MTU is valid */
2227     if (reply_supported) {
2228         return process_message_reply(dev, &msg);
2229     }
2230 
2231     return 0;
2232 }
2233 
2234 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
2235                                             struct vhost_iotlb_msg *imsg)
2236 {
2237     int ret;
2238     VhostUserMsg msg = {
2239         .hdr.request = VHOST_USER_IOTLB_MSG,
2240         .hdr.size = sizeof(msg.payload.iotlb),
2241         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
2242         .payload.iotlb = *imsg,
2243     };
2244 
2245     ret = vhost_user_write(dev, &msg, NULL, 0);
2246     if (ret < 0) {
2247         return ret;
2248     }
2249 
2250     return process_message_reply(dev, &msg);
2251 }
2252 
2253 
2254 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
2255 {
2256     /* No-op as the receive channel is not dedicated to IOTLB messages. */
2257 }
2258 
2259 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
2260                                  uint32_t config_len, Error **errp)
2261 {
2262     int ret;
2263     VhostUserMsg msg = {
2264         .hdr.request = VHOST_USER_GET_CONFIG,
2265         .hdr.flags = VHOST_USER_VERSION,
2266         .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
2267     };
2268 
2269     if (!virtio_has_feature(dev->protocol_features,
2270                 VHOST_USER_PROTOCOL_F_CONFIG)) {
2271         error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported");
2272         return -EINVAL;
2273     }
2274 
2275     assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE);
2276 
2277     msg.payload.config.offset = 0;
2278     msg.payload.config.size = config_len;
2279     ret = vhost_user_write(dev, &msg, NULL, 0);
2280     if (ret < 0) {
2281         error_setg_errno(errp, -ret, "vhost_get_config failed");
2282         return ret;
2283     }
2284 
2285     ret = vhost_user_read(dev, &msg);
2286     if (ret < 0) {
2287         error_setg_errno(errp, -ret, "vhost_get_config failed");
2288         return ret;
2289     }
2290 
2291     if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
2292         error_setg(errp,
2293                    "Received unexpected msg type. Expected %d received %d",
2294                    VHOST_USER_GET_CONFIG, msg.hdr.request);
2295         return -EPROTO;
2296     }
2297 
2298     if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
2299         error_setg(errp, "Received bad msg size.");
2300         return -EPROTO;
2301     }
2302 
2303     memcpy(config, msg.payload.config.region, config_len);
2304 
2305     return 0;
2306 }
2307 
2308 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
2309                                  uint32_t offset, uint32_t size, uint32_t flags)
2310 {
2311     int ret;
2312     uint8_t *p;
2313     bool reply_supported = virtio_has_feature(dev->protocol_features,
2314                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
2315 
2316     VhostUserMsg msg = {
2317         .hdr.request = VHOST_USER_SET_CONFIG,
2318         .hdr.flags = VHOST_USER_VERSION,
2319         .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
2320     };
2321 
2322     if (!virtio_has_feature(dev->protocol_features,
2323                 VHOST_USER_PROTOCOL_F_CONFIG)) {
2324         return -ENOTSUP;
2325     }
2326 
2327     if (reply_supported) {
2328         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2329     }
2330 
2331     if (size > VHOST_USER_MAX_CONFIG_SIZE) {
2332         return -EINVAL;
2333     }
2334 
2335     msg.payload.config.offset = offset,
2336     msg.payload.config.size = size,
2337     msg.payload.config.flags = flags,
2338     p = msg.payload.config.region;
2339     memcpy(p, data, size);
2340 
2341     ret = vhost_user_write(dev, &msg, NULL, 0);
2342     if (ret < 0) {
2343         return ret;
2344     }
2345 
2346     if (reply_supported) {
2347         return process_message_reply(dev, &msg);
2348     }
2349 
2350     return 0;
2351 }
2352 
2353 static int vhost_user_crypto_create_session(struct vhost_dev *dev,
2354                                             void *session_info,
2355                                             uint64_t *session_id)
2356 {
2357     int ret;
2358     bool crypto_session = virtio_has_feature(dev->protocol_features,
2359                                        VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2360     CryptoDevBackendSymSessionInfo *sess_info = session_info;
2361     VhostUserMsg msg = {
2362         .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
2363         .hdr.flags = VHOST_USER_VERSION,
2364         .hdr.size = sizeof(msg.payload.session),
2365     };
2366 
2367     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2368 
2369     if (!crypto_session) {
2370         error_report("vhost-user trying to send unhandled ioctl");
2371         return -ENOTSUP;
2372     }
2373 
2374     memcpy(&msg.payload.session.session_setup_data, sess_info,
2375               sizeof(CryptoDevBackendSymSessionInfo));
2376     if (sess_info->key_len) {
2377         memcpy(&msg.payload.session.key, sess_info->cipher_key,
2378                sess_info->key_len);
2379     }
2380     if (sess_info->auth_key_len > 0) {
2381         memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
2382                sess_info->auth_key_len);
2383     }
2384     ret = vhost_user_write(dev, &msg, NULL, 0);
2385     if (ret < 0) {
2386         error_report("vhost_user_write() return %d, create session failed",
2387                      ret);
2388         return ret;
2389     }
2390 
2391     ret = vhost_user_read(dev, &msg);
2392     if (ret < 0) {
2393         error_report("vhost_user_read() return %d, create session failed",
2394                      ret);
2395         return ret;
2396     }
2397 
2398     if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
2399         error_report("Received unexpected msg type. Expected %d received %d",
2400                      VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
2401         return -EPROTO;
2402     }
2403 
2404     if (msg.hdr.size != sizeof(msg.payload.session)) {
2405         error_report("Received bad msg size.");
2406         return -EPROTO;
2407     }
2408 
2409     if (msg.payload.session.session_id < 0) {
2410         error_report("Bad session id: %" PRId64 "",
2411                               msg.payload.session.session_id);
2412         return -EINVAL;
2413     }
2414     *session_id = msg.payload.session.session_id;
2415 
2416     return 0;
2417 }
2418 
2419 static int
2420 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
2421 {
2422     int ret;
2423     bool crypto_session = virtio_has_feature(dev->protocol_features,
2424                                        VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2425     VhostUserMsg msg = {
2426         .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
2427         .hdr.flags = VHOST_USER_VERSION,
2428         .hdr.size = sizeof(msg.payload.u64),
2429     };
2430     msg.payload.u64 = session_id;
2431 
2432     if (!crypto_session) {
2433         error_report("vhost-user trying to send unhandled ioctl");
2434         return -ENOTSUP;
2435     }
2436 
2437     ret = vhost_user_write(dev, &msg, NULL, 0);
2438     if (ret < 0) {
2439         error_report("vhost_user_write() return %d, close session failed",
2440                      ret);
2441         return ret;
2442     }
2443 
2444     return 0;
2445 }
2446 
2447 static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
2448                                           MemoryRegionSection *section)
2449 {
2450     return memory_region_get_fd(section->mr) >= 0;
2451 }
2452 
2453 static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
2454                                       uint16_t queue_size,
2455                                       struct vhost_inflight *inflight)
2456 {
2457     void *addr;
2458     int fd;
2459     int ret;
2460     struct vhost_user *u = dev->opaque;
2461     CharBackend *chr = u->user->chr;
2462     VhostUserMsg msg = {
2463         .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
2464         .hdr.flags = VHOST_USER_VERSION,
2465         .payload.inflight.num_queues = dev->nvqs,
2466         .payload.inflight.queue_size = queue_size,
2467         .hdr.size = sizeof(msg.payload.inflight),
2468     };
2469 
2470     if (!virtio_has_feature(dev->protocol_features,
2471                             VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2472         return 0;
2473     }
2474 
2475     ret = vhost_user_write(dev, &msg, NULL, 0);
2476     if (ret < 0) {
2477         return ret;
2478     }
2479 
2480     ret = vhost_user_read(dev, &msg);
2481     if (ret < 0) {
2482         return ret;
2483     }
2484 
2485     if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
2486         error_report("Received unexpected msg type. "
2487                      "Expected %d received %d",
2488                      VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
2489         return -EPROTO;
2490     }
2491 
2492     if (msg.hdr.size != sizeof(msg.payload.inflight)) {
2493         error_report("Received bad msg size.");
2494         return -EPROTO;
2495     }
2496 
2497     if (!msg.payload.inflight.mmap_size) {
2498         return 0;
2499     }
2500 
2501     fd = qemu_chr_fe_get_msgfd(chr);
2502     if (fd < 0) {
2503         error_report("Failed to get mem fd");
2504         return -EIO;
2505     }
2506 
2507     addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
2508                 MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
2509 
2510     if (addr == MAP_FAILED) {
2511         error_report("Failed to mmap mem fd");
2512         close(fd);
2513         return -EFAULT;
2514     }
2515 
2516     inflight->addr = addr;
2517     inflight->fd = fd;
2518     inflight->size = msg.payload.inflight.mmap_size;
2519     inflight->offset = msg.payload.inflight.mmap_offset;
2520     inflight->queue_size = queue_size;
2521 
2522     return 0;
2523 }
2524 
2525 static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
2526                                       struct vhost_inflight *inflight)
2527 {
2528     VhostUserMsg msg = {
2529         .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
2530         .hdr.flags = VHOST_USER_VERSION,
2531         .payload.inflight.mmap_size = inflight->size,
2532         .payload.inflight.mmap_offset = inflight->offset,
2533         .payload.inflight.num_queues = dev->nvqs,
2534         .payload.inflight.queue_size = inflight->queue_size,
2535         .hdr.size = sizeof(msg.payload.inflight),
2536     };
2537 
2538     if (!virtio_has_feature(dev->protocol_features,
2539                             VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2540         return 0;
2541     }
2542 
2543     return vhost_user_write(dev, &msg, &inflight->fd, 1);
2544 }
2545 
2546 static void vhost_user_state_destroy(gpointer data)
2547 {
2548     VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
2549     if (n) {
2550         vhost_user_host_notifier_remove(n, NULL);
2551         object_unparent(OBJECT(&n->mr));
2552         /*
2553          * We can't free until vhost_user_host_notifier_remove has
2554          * done it's thing so schedule the free with RCU.
2555          */
2556         g_free_rcu(n, rcu);
2557     }
2558 }
2559 
2560 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
2561 {
2562     if (user->chr) {
2563         error_setg(errp, "Cannot initialize vhost-user state");
2564         return false;
2565     }
2566     user->chr = chr;
2567     user->memory_slots = 0;
2568     user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4,
2569                                            &vhost_user_state_destroy);
2570     return true;
2571 }
2572 
2573 void vhost_user_cleanup(VhostUserState *user)
2574 {
2575     if (!user->chr) {
2576         return;
2577     }
2578     memory_region_transaction_begin();
2579     user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
2580     memory_region_transaction_commit();
2581     user->chr = NULL;
2582 }
2583 
2584 
2585 typedef struct {
2586     vu_async_close_fn cb;
2587     DeviceState *dev;
2588     CharBackend *cd;
2589     struct vhost_dev *vhost;
2590 } VhostAsyncCallback;
2591 
2592 static void vhost_user_async_close_bh(void *opaque)
2593 {
2594     VhostAsyncCallback *data = opaque;
2595     struct vhost_dev *vhost = data->vhost;
2596 
2597     /*
2598      * If the vhost_dev has been cleared in the meantime there is
2599      * nothing left to do as some other path has completed the
2600      * cleanup.
2601      */
2602     if (vhost->vdev) {
2603         data->cb(data->dev);
2604     }
2605 
2606     g_free(data);
2607 }
2608 
2609 /*
2610  * We only schedule the work if the machine is running. If suspended
2611  * we want to keep all the in-flight data as is for migration
2612  * purposes.
2613  */
2614 void vhost_user_async_close(DeviceState *d,
2615                             CharBackend *chardev, struct vhost_dev *vhost,
2616                             vu_async_close_fn cb)
2617 {
2618     if (!runstate_check(RUN_STATE_SHUTDOWN)) {
2619         /*
2620          * A close event may happen during a read/write, but vhost
2621          * code assumes the vhost_dev remains setup, so delay the
2622          * stop & clear.
2623          */
2624         AioContext *ctx = qemu_get_current_aio_context();
2625         VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1);
2626 
2627         /* Save data for the callback */
2628         data->cb = cb;
2629         data->dev = d;
2630         data->cd = chardev;
2631         data->vhost = vhost;
2632 
2633         /* Disable any further notifications on the chardev */
2634         qemu_chr_fe_set_handlers(chardev,
2635                                  NULL, NULL, NULL, NULL, NULL, NULL,
2636                                  false);
2637 
2638         aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data);
2639 
2640         /*
2641          * Move vhost device to the stopped state. The vhost-user device
2642          * will be clean up and disconnected in BH. This can be useful in
2643          * the vhost migration code. If disconnect was caught there is an
2644          * option for the general vhost code to get the dev state without
2645          * knowing its type (in this case vhost-user).
2646          *
2647          * Note if the vhost device is fully cleared by the time we
2648          * execute the bottom half we won't continue with the cleanup.
2649          */
2650         vhost->started = false;
2651     }
2652 }
2653 
2654 static int vhost_user_dev_start(struct vhost_dev *dev, bool started)
2655 {
2656     if (!virtio_has_feature(dev->protocol_features,
2657                             VHOST_USER_PROTOCOL_F_STATUS)) {
2658         return 0;
2659     }
2660 
2661     /* Set device status only for last queue pair */
2662     if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
2663         return 0;
2664     }
2665 
2666     if (started) {
2667         return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
2668                                           VIRTIO_CONFIG_S_DRIVER |
2669                                           VIRTIO_CONFIG_S_DRIVER_OK);
2670     } else {
2671         return 0;
2672     }
2673 }
2674 
2675 static void vhost_user_reset_status(struct vhost_dev *dev)
2676 {
2677     /* Set device status only for last queue pair */
2678     if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
2679         return;
2680     }
2681 
2682     if (virtio_has_feature(dev->protocol_features,
2683                            VHOST_USER_PROTOCOL_F_STATUS)) {
2684         vhost_user_set_status(dev, 0);
2685     }
2686 }
2687 
2688 const VhostOps user_ops = {
2689         .backend_type = VHOST_BACKEND_TYPE_USER,
2690         .vhost_backend_init = vhost_user_backend_init,
2691         .vhost_backend_cleanup = vhost_user_backend_cleanup,
2692         .vhost_backend_memslots_limit = vhost_user_memslots_limit,
2693         .vhost_set_log_base = vhost_user_set_log_base,
2694         .vhost_set_mem_table = vhost_user_set_mem_table,
2695         .vhost_set_vring_addr = vhost_user_set_vring_addr,
2696         .vhost_set_vring_endian = vhost_user_set_vring_endian,
2697         .vhost_set_vring_num = vhost_user_set_vring_num,
2698         .vhost_set_vring_base = vhost_user_set_vring_base,
2699         .vhost_get_vring_base = vhost_user_get_vring_base,
2700         .vhost_set_vring_kick = vhost_user_set_vring_kick,
2701         .vhost_set_vring_call = vhost_user_set_vring_call,
2702         .vhost_set_vring_err = vhost_user_set_vring_err,
2703         .vhost_set_features = vhost_user_set_features,
2704         .vhost_get_features = vhost_user_get_features,
2705         .vhost_set_owner = vhost_user_set_owner,
2706         .vhost_reset_device = vhost_user_reset_device,
2707         .vhost_get_vq_index = vhost_user_get_vq_index,
2708         .vhost_set_vring_enable = vhost_user_set_vring_enable,
2709         .vhost_requires_shm_log = vhost_user_requires_shm_log,
2710         .vhost_migration_done = vhost_user_migration_done,
2711         .vhost_backend_can_merge = vhost_user_can_merge,
2712         .vhost_net_set_mtu = vhost_user_net_set_mtu,
2713         .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
2714         .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
2715         .vhost_get_config = vhost_user_get_config,
2716         .vhost_set_config = vhost_user_set_config,
2717         .vhost_crypto_create_session = vhost_user_crypto_create_session,
2718         .vhost_crypto_close_session = vhost_user_crypto_close_session,
2719         .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
2720         .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
2721         .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
2722         .vhost_dev_start = vhost_user_dev_start,
2723         .vhost_reset_status = vhost_user_reset_status,
2724 };
2725