xref: /openbmc/qemu/hw/virtio/vhost-user.c (revision 19473e51cc1b019b1987261e1af8bc8b4a858f12)
1 /*
2  * vhost-user
3  *
4  * Copyright (c) 2013 Virtual Open Systems Sarl.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  */
10 
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "hw/virtio/vhost.h"
14 #include "hw/virtio/vhost-backend.h"
15 #include "hw/virtio/virtio-net.h"
16 #include "chardev/char-fe.h"
17 #include "sysemu/kvm.h"
18 #include "qemu/error-report.h"
19 #include "qemu/sockets.h"
20 #include "sysemu/cryptodev.h"
21 #include "migration/migration.h"
22 #include "migration/postcopy-ram.h"
23 #include "trace.h"
24 
25 #include <sys/ioctl.h>
26 #include <sys/socket.h>
27 #include <sys/un.h>
28 #include <linux/vhost.h>
29 #include <linux/userfaultfd.h>
30 
31 #define VHOST_MEMORY_MAX_NREGIONS    8
32 #define VHOST_USER_F_PROTOCOL_FEATURES 30
33 
34 /*
35  * Maximum size of virtio device config space
36  */
37 #define VHOST_USER_MAX_CONFIG_SIZE 256
38 
39 enum VhostUserProtocolFeature {
40     VHOST_USER_PROTOCOL_F_MQ = 0,
41     VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
42     VHOST_USER_PROTOCOL_F_RARP = 2,
43     VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
44     VHOST_USER_PROTOCOL_F_NET_MTU = 4,
45     VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
46     VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
47     VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
48     VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
49     VHOST_USER_PROTOCOL_F_CONFIG = 9,
50     VHOST_USER_PROTOCOL_F_MAX
51 };
52 
53 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
54 
55 typedef enum VhostUserRequest {
56     VHOST_USER_NONE = 0,
57     VHOST_USER_GET_FEATURES = 1,
58     VHOST_USER_SET_FEATURES = 2,
59     VHOST_USER_SET_OWNER = 3,
60     VHOST_USER_RESET_OWNER = 4,
61     VHOST_USER_SET_MEM_TABLE = 5,
62     VHOST_USER_SET_LOG_BASE = 6,
63     VHOST_USER_SET_LOG_FD = 7,
64     VHOST_USER_SET_VRING_NUM = 8,
65     VHOST_USER_SET_VRING_ADDR = 9,
66     VHOST_USER_SET_VRING_BASE = 10,
67     VHOST_USER_GET_VRING_BASE = 11,
68     VHOST_USER_SET_VRING_KICK = 12,
69     VHOST_USER_SET_VRING_CALL = 13,
70     VHOST_USER_SET_VRING_ERR = 14,
71     VHOST_USER_GET_PROTOCOL_FEATURES = 15,
72     VHOST_USER_SET_PROTOCOL_FEATURES = 16,
73     VHOST_USER_GET_QUEUE_NUM = 17,
74     VHOST_USER_SET_VRING_ENABLE = 18,
75     VHOST_USER_SEND_RARP = 19,
76     VHOST_USER_NET_SET_MTU = 20,
77     VHOST_USER_SET_SLAVE_REQ_FD = 21,
78     VHOST_USER_IOTLB_MSG = 22,
79     VHOST_USER_SET_VRING_ENDIAN = 23,
80     VHOST_USER_GET_CONFIG = 24,
81     VHOST_USER_SET_CONFIG = 25,
82     VHOST_USER_CREATE_CRYPTO_SESSION = 26,
83     VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
84     VHOST_USER_POSTCOPY_ADVISE  = 28,
85     VHOST_USER_POSTCOPY_LISTEN  = 29,
86     VHOST_USER_POSTCOPY_END     = 30,
87     VHOST_USER_MAX
88 } VhostUserRequest;
89 
90 typedef enum VhostUserSlaveRequest {
91     VHOST_USER_SLAVE_NONE = 0,
92     VHOST_USER_SLAVE_IOTLB_MSG = 1,
93     VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
94     VHOST_USER_SLAVE_MAX
95 }  VhostUserSlaveRequest;
96 
97 typedef struct VhostUserMemoryRegion {
98     uint64_t guest_phys_addr;
99     uint64_t memory_size;
100     uint64_t userspace_addr;
101     uint64_t mmap_offset;
102 } VhostUserMemoryRegion;
103 
104 typedef struct VhostUserMemory {
105     uint32_t nregions;
106     uint32_t padding;
107     VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
108 } VhostUserMemory;
109 
110 typedef struct VhostUserLog {
111     uint64_t mmap_size;
112     uint64_t mmap_offset;
113 } VhostUserLog;
114 
115 typedef struct VhostUserConfig {
116     uint32_t offset;
117     uint32_t size;
118     uint32_t flags;
119     uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
120 } VhostUserConfig;
121 
122 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN    512
123 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN  64
124 
125 typedef struct VhostUserCryptoSession {
126     /* session id for success, -1 on errors */
127     int64_t session_id;
128     CryptoDevBackendSymSessionInfo session_setup_data;
129     uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
130     uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
131 } VhostUserCryptoSession;
132 
133 static VhostUserConfig c __attribute__ ((unused));
134 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
135                                    + sizeof(c.size) \
136                                    + sizeof(c.flags))
137 
138 typedef struct {
139     VhostUserRequest request;
140 
141 #define VHOST_USER_VERSION_MASK     (0x3)
142 #define VHOST_USER_REPLY_MASK       (0x1<<2)
143 #define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
144     uint32_t flags;
145     uint32_t size; /* the following payload size */
146 } QEMU_PACKED VhostUserHeader;
147 
148 typedef union {
149 #define VHOST_USER_VRING_IDX_MASK   (0xff)
150 #define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
151         uint64_t u64;
152         struct vhost_vring_state state;
153         struct vhost_vring_addr addr;
154         VhostUserMemory memory;
155         VhostUserLog log;
156         struct vhost_iotlb_msg iotlb;
157         VhostUserConfig config;
158         VhostUserCryptoSession session;
159 } VhostUserPayload;
160 
161 typedef struct VhostUserMsg {
162     VhostUserHeader hdr;
163     VhostUserPayload payload;
164 } QEMU_PACKED VhostUserMsg;
165 
166 static VhostUserMsg m __attribute__ ((unused));
167 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
168 
169 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
170 
171 /* The version of the protocol we support */
172 #define VHOST_USER_VERSION    (0x1)
173 
174 struct vhost_user {
175     struct vhost_dev *dev;
176     CharBackend *chr;
177     int slave_fd;
178     NotifierWithReturn postcopy_notifier;
179     struct PostCopyFD  postcopy_fd;
180     uint64_t           postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS];
181     /* Length of the region_rb and region_rb_offset arrays */
182     size_t             region_rb_len;
183     /* RAMBlock associated with a given region */
184     RAMBlock         **region_rb;
185     /* The offset from the start of the RAMBlock to the start of the
186      * vhost region.
187      */
188     ram_addr_t        *region_rb_offset;
189 
190     /* True once we've entered postcopy_listen */
191     bool               postcopy_listen;
192 };
193 
194 static bool ioeventfd_enabled(void)
195 {
196     return kvm_enabled() && kvm_eventfds_enabled();
197 }
198 
199 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
200 {
201     struct vhost_user *u = dev->opaque;
202     CharBackend *chr = u->chr;
203     uint8_t *p = (uint8_t *) msg;
204     int r, size = VHOST_USER_HDR_SIZE;
205 
206     r = qemu_chr_fe_read_all(chr, p, size);
207     if (r != size) {
208         error_report("Failed to read msg header. Read %d instead of %d."
209                      " Original request %d.", r, size, msg->hdr.request);
210         goto fail;
211     }
212 
213     /* validate received flags */
214     if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
215         error_report("Failed to read msg header."
216                 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
217                 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
218         goto fail;
219     }
220 
221     /* validate message size is sane */
222     if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
223         error_report("Failed to read msg header."
224                 " Size %d exceeds the maximum %zu.", msg->hdr.size,
225                 VHOST_USER_PAYLOAD_SIZE);
226         goto fail;
227     }
228 
229     if (msg->hdr.size) {
230         p += VHOST_USER_HDR_SIZE;
231         size = msg->hdr.size;
232         r = qemu_chr_fe_read_all(chr, p, size);
233         if (r != size) {
234             error_report("Failed to read msg payload."
235                          " Read %d instead of %d.", r, msg->hdr.size);
236             goto fail;
237         }
238     }
239 
240     return 0;
241 
242 fail:
243     return -1;
244 }
245 
246 static int process_message_reply(struct vhost_dev *dev,
247                                  const VhostUserMsg *msg)
248 {
249     VhostUserMsg msg_reply;
250 
251     if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
252         return 0;
253     }
254 
255     if (vhost_user_read(dev, &msg_reply) < 0) {
256         return -1;
257     }
258 
259     if (msg_reply.hdr.request != msg->hdr.request) {
260         error_report("Received unexpected msg type."
261                      "Expected %d received %d",
262                      msg->hdr.request, msg_reply.hdr.request);
263         return -1;
264     }
265 
266     return msg_reply.payload.u64 ? -1 : 0;
267 }
268 
269 static bool vhost_user_one_time_request(VhostUserRequest request)
270 {
271     switch (request) {
272     case VHOST_USER_SET_OWNER:
273     case VHOST_USER_RESET_OWNER:
274     case VHOST_USER_SET_MEM_TABLE:
275     case VHOST_USER_GET_QUEUE_NUM:
276     case VHOST_USER_NET_SET_MTU:
277         return true;
278     default:
279         return false;
280     }
281 }
282 
283 /* most non-init callers ignore the error */
284 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
285                             int *fds, int fd_num)
286 {
287     struct vhost_user *u = dev->opaque;
288     CharBackend *chr = u->chr;
289     int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
290 
291     /*
292      * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
293      * we just need send it once in the first time. For later such
294      * request, we just ignore it.
295      */
296     if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
297         msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
298         return 0;
299     }
300 
301     if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
302         error_report("Failed to set msg fds.");
303         return -1;
304     }
305 
306     ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
307     if (ret != size) {
308         error_report("Failed to write msg."
309                      " Wrote %d instead of %d.", ret, size);
310         return -1;
311     }
312 
313     return 0;
314 }
315 
316 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
317                                    struct vhost_log *log)
318 {
319     int fds[VHOST_MEMORY_MAX_NREGIONS];
320     size_t fd_num = 0;
321     bool shmfd = virtio_has_feature(dev->protocol_features,
322                                     VHOST_USER_PROTOCOL_F_LOG_SHMFD);
323     VhostUserMsg msg = {
324         .hdr.request = VHOST_USER_SET_LOG_BASE,
325         .hdr.flags = VHOST_USER_VERSION,
326         .payload.log.mmap_size = log->size * sizeof(*(log->log)),
327         .payload.log.mmap_offset = 0,
328         .hdr.size = sizeof(msg.payload.log),
329     };
330 
331     if (shmfd && log->fd != -1) {
332         fds[fd_num++] = log->fd;
333     }
334 
335     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
336         return -1;
337     }
338 
339     if (shmfd) {
340         msg.hdr.size = 0;
341         if (vhost_user_read(dev, &msg) < 0) {
342             return -1;
343         }
344 
345         if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
346             error_report("Received unexpected msg type. "
347                          "Expected %d received %d",
348                          VHOST_USER_SET_LOG_BASE, msg.hdr.request);
349             return -1;
350         }
351     }
352 
353     return 0;
354 }
355 
356 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
357                                              struct vhost_memory *mem)
358 {
359     struct vhost_user *u = dev->opaque;
360     int fds[VHOST_MEMORY_MAX_NREGIONS];
361     int i, fd;
362     size_t fd_num = 0;
363     bool reply_supported = virtio_has_feature(dev->protocol_features,
364                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
365     VhostUserMsg msg_reply;
366     int region_i, msg_i;
367 
368     VhostUserMsg msg = {
369         .hdr.request = VHOST_USER_SET_MEM_TABLE,
370         .hdr.flags = VHOST_USER_VERSION,
371     };
372 
373     if (reply_supported) {
374         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
375     }
376 
377     if (u->region_rb_len < dev->mem->nregions) {
378         u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
379         u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
380                                       dev->mem->nregions);
381         memset(&(u->region_rb[u->region_rb_len]), '\0',
382                sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
383         memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
384                sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
385         u->region_rb_len = dev->mem->nregions;
386     }
387 
388     for (i = 0; i < dev->mem->nregions; ++i) {
389         struct vhost_memory_region *reg = dev->mem->regions + i;
390         ram_addr_t offset;
391         MemoryRegion *mr;
392 
393         assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
394         mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
395                                      &offset);
396         fd = memory_region_get_fd(mr);
397         if (fd > 0) {
398             trace_vhost_user_set_mem_table_withfd(fd_num, mr->name,
399                                                   reg->memory_size,
400                                                   reg->guest_phys_addr,
401                                                   reg->userspace_addr, offset);
402             u->region_rb_offset[i] = offset;
403             u->region_rb[i] = mr->ram_block;
404             msg.payload.memory.regions[fd_num].userspace_addr =
405                 reg->userspace_addr;
406             msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
407             msg.payload.memory.regions[fd_num].guest_phys_addr =
408                 reg->guest_phys_addr;
409             msg.payload.memory.regions[fd_num].mmap_offset = offset;
410             assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
411             fds[fd_num++] = fd;
412         } else {
413             u->region_rb_offset[i] = 0;
414             u->region_rb[i] = NULL;
415         }
416     }
417 
418     msg.payload.memory.nregions = fd_num;
419 
420     if (!fd_num) {
421         error_report("Failed initializing vhost-user memory map, "
422                      "consider using -object memory-backend-file share=on");
423         return -1;
424     }
425 
426     msg.hdr.size = sizeof(msg.payload.memory.nregions);
427     msg.hdr.size += sizeof(msg.payload.memory.padding);
428     msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
429 
430     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
431         return -1;
432     }
433 
434     if (vhost_user_read(dev, &msg_reply) < 0) {
435         return -1;
436     }
437 
438     if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
439         error_report("%s: Received unexpected msg type."
440                      "Expected %d received %d", __func__,
441                      VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
442         return -1;
443     }
444     /* We're using the same structure, just reusing one of the
445      * fields, so it should be the same size.
446      */
447     if (msg_reply.hdr.size != msg.hdr.size) {
448         error_report("%s: Unexpected size for postcopy reply "
449                      "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size);
450         return -1;
451     }
452 
453     memset(u->postcopy_client_bases, 0,
454            sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
455 
456     /* They're in the same order as the regions that were sent
457      * but some of the regions were skipped (above) if they
458      * didn't have fd's
459     */
460     for (msg_i = 0, region_i = 0;
461          region_i < dev->mem->nregions;
462         region_i++) {
463         if (msg_i < fd_num &&
464             msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
465             dev->mem->regions[region_i].guest_phys_addr) {
466             u->postcopy_client_bases[region_i] =
467                 msg_reply.payload.memory.regions[msg_i].userspace_addr;
468             trace_vhost_user_set_mem_table_postcopy(
469                 msg_reply.payload.memory.regions[msg_i].userspace_addr,
470                 msg.payload.memory.regions[msg_i].userspace_addr,
471                 msg_i, region_i);
472             msg_i++;
473         }
474     }
475     if (msg_i != fd_num) {
476         error_report("%s: postcopy reply not fully consumed "
477                      "%d vs %zd",
478                      __func__, msg_i, fd_num);
479         return -1;
480     }
481     /* Now we've registered this with the postcopy code, we ack to the client,
482      * because now we're in the position to be able to deal with any faults
483      * it generates.
484      */
485     /* TODO: Use this for failure cases as well with a bad value */
486     msg.hdr.size = sizeof(msg.payload.u64);
487     msg.payload.u64 = 0; /* OK */
488     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
489         return -1;
490     }
491 
492     if (reply_supported) {
493         return process_message_reply(dev, &msg);
494     }
495 
496     return 0;
497 }
498 
499 static int vhost_user_set_mem_table(struct vhost_dev *dev,
500                                     struct vhost_memory *mem)
501 {
502     struct vhost_user *u = dev->opaque;
503     int fds[VHOST_MEMORY_MAX_NREGIONS];
504     int i, fd;
505     size_t fd_num = 0;
506     bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
507     bool reply_supported = virtio_has_feature(dev->protocol_features,
508                                           VHOST_USER_PROTOCOL_F_REPLY_ACK) &&
509                                           !do_postcopy;
510 
511     if (do_postcopy) {
512         /* Postcopy has enough differences that it's best done in it's own
513          * version
514          */
515         return vhost_user_set_mem_table_postcopy(dev, mem);
516     }
517 
518     VhostUserMsg msg = {
519         .hdr.request = VHOST_USER_SET_MEM_TABLE,
520         .hdr.flags = VHOST_USER_VERSION,
521     };
522 
523     if (reply_supported) {
524         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
525     }
526 
527     for (i = 0; i < dev->mem->nregions; ++i) {
528         struct vhost_memory_region *reg = dev->mem->regions + i;
529         ram_addr_t offset;
530         MemoryRegion *mr;
531 
532         assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
533         mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
534                                      &offset);
535         fd = memory_region_get_fd(mr);
536         if (fd > 0) {
537             if (fd_num == VHOST_MEMORY_MAX_NREGIONS) {
538                 error_report("Failed preparing vhost-user memory table msg");
539                 return -1;
540             }
541             msg.payload.memory.regions[fd_num].userspace_addr =
542                 reg->userspace_addr;
543             msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
544             msg.payload.memory.regions[fd_num].guest_phys_addr =
545                 reg->guest_phys_addr;
546             msg.payload.memory.regions[fd_num].mmap_offset = offset;
547             fds[fd_num++] = fd;
548         }
549     }
550 
551     msg.payload.memory.nregions = fd_num;
552 
553     if (!fd_num) {
554         error_report("Failed initializing vhost-user memory map, "
555                      "consider using -object memory-backend-file share=on");
556         return -1;
557     }
558 
559     msg.hdr.size = sizeof(msg.payload.memory.nregions);
560     msg.hdr.size += sizeof(msg.payload.memory.padding);
561     msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
562 
563     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
564         return -1;
565     }
566 
567     if (reply_supported) {
568         return process_message_reply(dev, &msg);
569     }
570 
571     return 0;
572 }
573 
574 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
575                                      struct vhost_vring_addr *addr)
576 {
577     VhostUserMsg msg = {
578         .hdr.request = VHOST_USER_SET_VRING_ADDR,
579         .hdr.flags = VHOST_USER_VERSION,
580         .payload.addr = *addr,
581         .hdr.size = sizeof(msg.payload.addr),
582     };
583 
584     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
585         return -1;
586     }
587 
588     return 0;
589 }
590 
591 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
592                                        struct vhost_vring_state *ring)
593 {
594     bool cross_endian = virtio_has_feature(dev->protocol_features,
595                                            VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
596     VhostUserMsg msg = {
597         .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
598         .hdr.flags = VHOST_USER_VERSION,
599         .payload.state = *ring,
600         .hdr.size = sizeof(msg.payload.state),
601     };
602 
603     if (!cross_endian) {
604         error_report("vhost-user trying to send unhandled ioctl");
605         return -1;
606     }
607 
608     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
609         return -1;
610     }
611 
612     return 0;
613 }
614 
615 static int vhost_set_vring(struct vhost_dev *dev,
616                            unsigned long int request,
617                            struct vhost_vring_state *ring)
618 {
619     VhostUserMsg msg = {
620         .hdr.request = request,
621         .hdr.flags = VHOST_USER_VERSION,
622         .payload.state = *ring,
623         .hdr.size = sizeof(msg.payload.state),
624     };
625 
626     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
627         return -1;
628     }
629 
630     return 0;
631 }
632 
633 static int vhost_user_set_vring_num(struct vhost_dev *dev,
634                                     struct vhost_vring_state *ring)
635 {
636     return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
637 }
638 
639 static int vhost_user_set_vring_base(struct vhost_dev *dev,
640                                      struct vhost_vring_state *ring)
641 {
642     return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
643 }
644 
645 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
646 {
647     int i;
648 
649     if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
650         return -1;
651     }
652 
653     for (i = 0; i < dev->nvqs; ++i) {
654         struct vhost_vring_state state = {
655             .index = dev->vq_index + i,
656             .num   = enable,
657         };
658 
659         vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
660     }
661 
662     return 0;
663 }
664 
665 static int vhost_user_get_vring_base(struct vhost_dev *dev,
666                                      struct vhost_vring_state *ring)
667 {
668     VhostUserMsg msg = {
669         .hdr.request = VHOST_USER_GET_VRING_BASE,
670         .hdr.flags = VHOST_USER_VERSION,
671         .payload.state = *ring,
672         .hdr.size = sizeof(msg.payload.state),
673     };
674 
675     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
676         return -1;
677     }
678 
679     if (vhost_user_read(dev, &msg) < 0) {
680         return -1;
681     }
682 
683     if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
684         error_report("Received unexpected msg type. Expected %d received %d",
685                      VHOST_USER_GET_VRING_BASE, msg.hdr.request);
686         return -1;
687     }
688 
689     if (msg.hdr.size != sizeof(msg.payload.state)) {
690         error_report("Received bad msg size.");
691         return -1;
692     }
693 
694     *ring = msg.payload.state;
695 
696     return 0;
697 }
698 
699 static int vhost_set_vring_file(struct vhost_dev *dev,
700                                 VhostUserRequest request,
701                                 struct vhost_vring_file *file)
702 {
703     int fds[VHOST_MEMORY_MAX_NREGIONS];
704     size_t fd_num = 0;
705     VhostUserMsg msg = {
706         .hdr.request = request,
707         .hdr.flags = VHOST_USER_VERSION,
708         .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
709         .hdr.size = sizeof(msg.payload.u64),
710     };
711 
712     if (ioeventfd_enabled() && file->fd > 0) {
713         fds[fd_num++] = file->fd;
714     } else {
715         msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
716     }
717 
718     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
719         return -1;
720     }
721 
722     return 0;
723 }
724 
725 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
726                                      struct vhost_vring_file *file)
727 {
728     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
729 }
730 
731 static int vhost_user_set_vring_call(struct vhost_dev *dev,
732                                      struct vhost_vring_file *file)
733 {
734     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
735 }
736 
737 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
738 {
739     VhostUserMsg msg = {
740         .hdr.request = request,
741         .hdr.flags = VHOST_USER_VERSION,
742         .payload.u64 = u64,
743         .hdr.size = sizeof(msg.payload.u64),
744     };
745 
746     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
747         return -1;
748     }
749 
750     return 0;
751 }
752 
753 static int vhost_user_set_features(struct vhost_dev *dev,
754                                    uint64_t features)
755 {
756     return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
757 }
758 
759 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
760                                             uint64_t features)
761 {
762     return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
763 }
764 
765 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
766 {
767     VhostUserMsg msg = {
768         .hdr.request = request,
769         .hdr.flags = VHOST_USER_VERSION,
770     };
771 
772     if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
773         return 0;
774     }
775 
776     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
777         return -1;
778     }
779 
780     if (vhost_user_read(dev, &msg) < 0) {
781         return -1;
782     }
783 
784     if (msg.hdr.request != request) {
785         error_report("Received unexpected msg type. Expected %d received %d",
786                      request, msg.hdr.request);
787         return -1;
788     }
789 
790     if (msg.hdr.size != sizeof(msg.payload.u64)) {
791         error_report("Received bad msg size.");
792         return -1;
793     }
794 
795     *u64 = msg.payload.u64;
796 
797     return 0;
798 }
799 
800 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
801 {
802     return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
803 }
804 
805 static int vhost_user_set_owner(struct vhost_dev *dev)
806 {
807     VhostUserMsg msg = {
808         .hdr.request = VHOST_USER_SET_OWNER,
809         .hdr.flags = VHOST_USER_VERSION,
810     };
811 
812     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
813         return -1;
814     }
815 
816     return 0;
817 }
818 
819 static int vhost_user_reset_device(struct vhost_dev *dev)
820 {
821     VhostUserMsg msg = {
822         .hdr.request = VHOST_USER_RESET_OWNER,
823         .hdr.flags = VHOST_USER_VERSION,
824     };
825 
826     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
827         return -1;
828     }
829 
830     return 0;
831 }
832 
833 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
834 {
835     int ret = -1;
836 
837     if (!dev->config_ops) {
838         return -1;
839     }
840 
841     if (dev->config_ops->vhost_dev_config_notifier) {
842         ret = dev->config_ops->vhost_dev_config_notifier(dev);
843     }
844 
845     return ret;
846 }
847 
848 static void slave_read(void *opaque)
849 {
850     struct vhost_dev *dev = opaque;
851     struct vhost_user *u = dev->opaque;
852     VhostUserHeader hdr = { 0, };
853     VhostUserPayload payload = { 0, };
854     int size, ret = 0;
855     struct iovec iov;
856     struct msghdr msgh;
857     int fd = -1;
858     char control[CMSG_SPACE(sizeof(fd))];
859     struct cmsghdr *cmsg;
860     size_t fdsize;
861 
862     memset(&msgh, 0, sizeof(msgh));
863     msgh.msg_iov = &iov;
864     msgh.msg_iovlen = 1;
865     msgh.msg_control = control;
866     msgh.msg_controllen = sizeof(control);
867 
868     /* Read header */
869     iov.iov_base = &hdr;
870     iov.iov_len = VHOST_USER_HDR_SIZE;
871 
872     size = recvmsg(u->slave_fd, &msgh, 0);
873     if (size != VHOST_USER_HDR_SIZE) {
874         error_report("Failed to read from slave.");
875         goto err;
876     }
877 
878     if (msgh.msg_flags & MSG_CTRUNC) {
879         error_report("Truncated message.");
880         goto err;
881     }
882 
883     for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
884          cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
885             if (cmsg->cmsg_level == SOL_SOCKET &&
886                 cmsg->cmsg_type == SCM_RIGHTS) {
887                     fdsize = cmsg->cmsg_len - CMSG_LEN(0);
888                     memcpy(&fd, CMSG_DATA(cmsg), fdsize);
889                     break;
890             }
891     }
892 
893     if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
894         error_report("Failed to read msg header."
895                 " Size %d exceeds the maximum %zu.", hdr.size,
896                 VHOST_USER_PAYLOAD_SIZE);
897         goto err;
898     }
899 
900     /* Read payload */
901     size = read(u->slave_fd, &payload, hdr.size);
902     if (size != hdr.size) {
903         error_report("Failed to read payload from slave.");
904         goto err;
905     }
906 
907     switch (hdr.request) {
908     case VHOST_USER_SLAVE_IOTLB_MSG:
909         ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
910         break;
911     case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
912         ret = vhost_user_slave_handle_config_change(dev);
913         break;
914     default:
915         error_report("Received unexpected msg type.");
916         if (fd != -1) {
917             close(fd);
918         }
919         ret = -EINVAL;
920     }
921 
922     /* Message handlers need to make sure that fd will be consumed. */
923     fd = -1;
924 
925     /*
926      * REPLY_ACK feature handling. Other reply types has to be managed
927      * directly in their request handlers.
928      */
929     if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
930         struct iovec iovec[2];
931 
932 
933         hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
934         hdr.flags |= VHOST_USER_REPLY_MASK;
935 
936         payload.u64 = !!ret;
937         hdr.size = sizeof(payload.u64);
938 
939         iovec[0].iov_base = &hdr;
940         iovec[0].iov_len = VHOST_USER_HDR_SIZE;
941         iovec[1].iov_base = &payload;
942         iovec[1].iov_len = hdr.size;
943 
944         size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
945         if (size != VHOST_USER_HDR_SIZE + hdr.size) {
946             error_report("Failed to send msg reply to slave.");
947             goto err;
948         }
949     }
950 
951     return;
952 
953 err:
954     qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
955     close(u->slave_fd);
956     u->slave_fd = -1;
957     if (fd != -1) {
958         close(fd);
959     }
960     return;
961 }
962 
963 static int vhost_setup_slave_channel(struct vhost_dev *dev)
964 {
965     VhostUserMsg msg = {
966         .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
967         .hdr.flags = VHOST_USER_VERSION,
968     };
969     struct vhost_user *u = dev->opaque;
970     int sv[2], ret = 0;
971     bool reply_supported = virtio_has_feature(dev->protocol_features,
972                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
973 
974     if (!virtio_has_feature(dev->protocol_features,
975                             VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
976         return 0;
977     }
978 
979     if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
980         error_report("socketpair() failed");
981         return -1;
982     }
983 
984     u->slave_fd = sv[0];
985     qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev);
986 
987     if (reply_supported) {
988         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
989     }
990 
991     ret = vhost_user_write(dev, &msg, &sv[1], 1);
992     if (ret) {
993         goto out;
994     }
995 
996     if (reply_supported) {
997         ret = process_message_reply(dev, &msg);
998     }
999 
1000 out:
1001     close(sv[1]);
1002     if (ret) {
1003         qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1004         close(u->slave_fd);
1005         u->slave_fd = -1;
1006     }
1007 
1008     return ret;
1009 }
1010 
1011 /*
1012  * Called back from the postcopy fault thread when a fault is received on our
1013  * ufd.
1014  * TODO: This is Linux specific
1015  */
1016 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1017                                              void *ufd)
1018 {
1019     struct vhost_dev *dev = pcfd->data;
1020     struct vhost_user *u = dev->opaque;
1021     struct uffd_msg *msg = ufd;
1022     uint64_t faultaddr = msg->arg.pagefault.address;
1023     RAMBlock *rb = NULL;
1024     uint64_t rb_offset;
1025     int i;
1026 
1027     trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1028                                             dev->mem->nregions);
1029     for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1030         trace_vhost_user_postcopy_fault_handler_loop(i,
1031                 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1032         if (faultaddr >= u->postcopy_client_bases[i]) {
1033             /* Ofset of the fault address in the vhost region */
1034             uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1035             if (region_offset < dev->mem->regions[i].memory_size) {
1036                 rb_offset = region_offset + u->region_rb_offset[i];
1037                 trace_vhost_user_postcopy_fault_handler_found(i,
1038                         region_offset, rb_offset);
1039                 rb = u->region_rb[i];
1040                 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1041                                                     rb_offset);
1042             }
1043         }
1044     }
1045     error_report("%s: Failed to find region for fault %" PRIx64,
1046                  __func__, faultaddr);
1047     return -1;
1048 }
1049 
1050 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1051                                      uint64_t offset)
1052 {
1053     struct vhost_dev *dev = pcfd->data;
1054     struct vhost_user *u = dev->opaque;
1055     int i;
1056 
1057     trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1058 
1059     if (!u) {
1060         return 0;
1061     }
1062     /* Translate the offset into an address in the clients address space */
1063     for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1064         if (u->region_rb[i] == rb &&
1065             offset >= u->region_rb_offset[i] &&
1066             offset < (u->region_rb_offset[i] +
1067                       dev->mem->regions[i].memory_size)) {
1068             uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1069                                    u->postcopy_client_bases[i];
1070             trace_vhost_user_postcopy_waker_found(client_addr);
1071             return postcopy_wake_shared(pcfd, client_addr, rb);
1072         }
1073     }
1074 
1075     trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1076     return 0;
1077 }
1078 
1079 /*
1080  * Called at the start of an inbound postcopy on reception of the
1081  * 'advise' command.
1082  */
1083 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1084 {
1085     struct vhost_user *u = dev->opaque;
1086     CharBackend *chr = u->chr;
1087     int ufd;
1088     VhostUserMsg msg = {
1089         .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1090         .hdr.flags = VHOST_USER_VERSION,
1091     };
1092 
1093     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1094         error_setg(errp, "Failed to send postcopy_advise to vhost");
1095         return -1;
1096     }
1097 
1098     if (vhost_user_read(dev, &msg) < 0) {
1099         error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1100         return -1;
1101     }
1102 
1103     if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1104         error_setg(errp, "Unexpected msg type. Expected %d received %d",
1105                      VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1106         return -1;
1107     }
1108 
1109     if (msg.hdr.size) {
1110         error_setg(errp, "Received bad msg size.");
1111         return -1;
1112     }
1113     ufd = qemu_chr_fe_get_msgfd(chr);
1114     if (ufd < 0) {
1115         error_setg(errp, "%s: Failed to get ufd", __func__);
1116         return -1;
1117     }
1118     qemu_set_nonblock(ufd);
1119 
1120     /* register ufd with userfault thread */
1121     u->postcopy_fd.fd = ufd;
1122     u->postcopy_fd.data = dev;
1123     u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1124     u->postcopy_fd.waker = vhost_user_postcopy_waker;
1125     u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1126     postcopy_register_shared_ufd(&u->postcopy_fd);
1127     return 0;
1128 }
1129 
1130 /*
1131  * Called at the switch to postcopy on reception of the 'listen' command.
1132  */
1133 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1134 {
1135     struct vhost_user *u = dev->opaque;
1136     int ret;
1137     VhostUserMsg msg = {
1138         .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1139         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1140     };
1141     u->postcopy_listen = true;
1142     trace_vhost_user_postcopy_listen();
1143     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1144         error_setg(errp, "Failed to send postcopy_listen to vhost");
1145         return -1;
1146     }
1147 
1148     ret = process_message_reply(dev, &msg);
1149     if (ret) {
1150         error_setg(errp, "Failed to receive reply to postcopy_listen");
1151         return ret;
1152     }
1153 
1154     return 0;
1155 }
1156 
1157 /*
1158  * Called at the end of postcopy
1159  */
1160 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1161 {
1162     VhostUserMsg msg = {
1163         .hdr.request = VHOST_USER_POSTCOPY_END,
1164         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1165     };
1166     int ret;
1167     struct vhost_user *u = dev->opaque;
1168 
1169     trace_vhost_user_postcopy_end_entry();
1170     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1171         error_setg(errp, "Failed to send postcopy_end to vhost");
1172         return -1;
1173     }
1174 
1175     ret = process_message_reply(dev, &msg);
1176     if (ret) {
1177         error_setg(errp, "Failed to receive reply to postcopy_end");
1178         return ret;
1179     }
1180     postcopy_unregister_shared_ufd(&u->postcopy_fd);
1181     u->postcopy_fd.handler = NULL;
1182 
1183     trace_vhost_user_postcopy_end_exit();
1184 
1185     return 0;
1186 }
1187 
1188 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1189                                         void *opaque)
1190 {
1191     struct PostcopyNotifyData *pnd = opaque;
1192     struct vhost_user *u = container_of(notifier, struct vhost_user,
1193                                          postcopy_notifier);
1194     struct vhost_dev *dev = u->dev;
1195 
1196     switch (pnd->reason) {
1197     case POSTCOPY_NOTIFY_PROBE:
1198         if (!virtio_has_feature(dev->protocol_features,
1199                                 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1200             /* TODO: Get the device name into this error somehow */
1201             error_setg(pnd->errp,
1202                        "vhost-user backend not capable of postcopy");
1203             return -ENOENT;
1204         }
1205         break;
1206 
1207     case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1208         return vhost_user_postcopy_advise(dev, pnd->errp);
1209 
1210     case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1211         return vhost_user_postcopy_listen(dev, pnd->errp);
1212 
1213     case POSTCOPY_NOTIFY_INBOUND_END:
1214         return vhost_user_postcopy_end(dev, pnd->errp);
1215 
1216     default:
1217         /* We ignore notifications we don't know */
1218         break;
1219     }
1220 
1221     return 0;
1222 }
1223 
1224 static int vhost_user_init(struct vhost_dev *dev, void *opaque)
1225 {
1226     uint64_t features, protocol_features;
1227     struct vhost_user *u;
1228     int err;
1229 
1230     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1231 
1232     u = g_new0(struct vhost_user, 1);
1233     u->chr = opaque;
1234     u->slave_fd = -1;
1235     u->dev = dev;
1236     dev->opaque = u;
1237 
1238     err = vhost_user_get_features(dev, &features);
1239     if (err < 0) {
1240         return err;
1241     }
1242 
1243     if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1244         dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1245 
1246         err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
1247                                  &protocol_features);
1248         if (err < 0) {
1249             return err;
1250         }
1251 
1252         dev->protocol_features =
1253             protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
1254 
1255         if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1256             /* Don't acknowledge CONFIG feature if device doesn't support it */
1257             dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
1258         } else if (!(protocol_features &
1259                     (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
1260             error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG "
1261                     "but backend does not support it.");
1262             return -1;
1263         }
1264 
1265         err = vhost_user_set_protocol_features(dev, dev->protocol_features);
1266         if (err < 0) {
1267             return err;
1268         }
1269 
1270         /* query the max queues we support if backend supports Multiple Queue */
1271         if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
1272             err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
1273                                      &dev->max_queues);
1274             if (err < 0) {
1275                 return err;
1276             }
1277         }
1278 
1279         if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
1280                 !(virtio_has_feature(dev->protocol_features,
1281                     VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
1282                  virtio_has_feature(dev->protocol_features,
1283                     VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
1284             error_report("IOMMU support requires reply-ack and "
1285                          "slave-req protocol features.");
1286             return -1;
1287         }
1288     }
1289 
1290     if (dev->migration_blocker == NULL &&
1291         !virtio_has_feature(dev->protocol_features,
1292                             VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
1293         error_setg(&dev->migration_blocker,
1294                    "Migration disabled: vhost-user backend lacks "
1295                    "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
1296     }
1297 
1298     err = vhost_setup_slave_channel(dev);
1299     if (err < 0) {
1300         return err;
1301     }
1302 
1303     u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
1304     postcopy_add_notifier(&u->postcopy_notifier);
1305 
1306     return 0;
1307 }
1308 
1309 static int vhost_user_cleanup(struct vhost_dev *dev)
1310 {
1311     struct vhost_user *u;
1312 
1313     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1314 
1315     u = dev->opaque;
1316     if (u->postcopy_notifier.notify) {
1317         postcopy_remove_notifier(&u->postcopy_notifier);
1318         u->postcopy_notifier.notify = NULL;
1319     }
1320     if (u->slave_fd >= 0) {
1321         qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1322         close(u->slave_fd);
1323         u->slave_fd = -1;
1324     }
1325     g_free(u->region_rb);
1326     u->region_rb = NULL;
1327     g_free(u->region_rb_offset);
1328     u->region_rb_offset = NULL;
1329     u->region_rb_len = 0;
1330     g_free(u);
1331     dev->opaque = 0;
1332 
1333     return 0;
1334 }
1335 
1336 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
1337 {
1338     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
1339 
1340     return idx;
1341 }
1342 
1343 static int vhost_user_memslots_limit(struct vhost_dev *dev)
1344 {
1345     return VHOST_MEMORY_MAX_NREGIONS;
1346 }
1347 
1348 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
1349 {
1350     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1351 
1352     return virtio_has_feature(dev->protocol_features,
1353                               VHOST_USER_PROTOCOL_F_LOG_SHMFD);
1354 }
1355 
1356 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
1357 {
1358     VhostUserMsg msg = { };
1359 
1360     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1361 
1362     /* If guest supports GUEST_ANNOUNCE do nothing */
1363     if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
1364         return 0;
1365     }
1366 
1367     /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
1368     if (virtio_has_feature(dev->protocol_features,
1369                            VHOST_USER_PROTOCOL_F_RARP)) {
1370         msg.hdr.request = VHOST_USER_SEND_RARP;
1371         msg.hdr.flags = VHOST_USER_VERSION;
1372         memcpy((char *)&msg.payload.u64, mac_addr, 6);
1373         msg.hdr.size = sizeof(msg.payload.u64);
1374 
1375         return vhost_user_write(dev, &msg, NULL, 0);
1376     }
1377     return -1;
1378 }
1379 
1380 static bool vhost_user_can_merge(struct vhost_dev *dev,
1381                                  uint64_t start1, uint64_t size1,
1382                                  uint64_t start2, uint64_t size2)
1383 {
1384     ram_addr_t offset;
1385     int mfd, rfd;
1386     MemoryRegion *mr;
1387 
1388     mr = memory_region_from_host((void *)(uintptr_t)start1, &offset);
1389     mfd = memory_region_get_fd(mr);
1390 
1391     mr = memory_region_from_host((void *)(uintptr_t)start2, &offset);
1392     rfd = memory_region_get_fd(mr);
1393 
1394     return mfd == rfd;
1395 }
1396 
1397 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
1398 {
1399     VhostUserMsg msg;
1400     bool reply_supported = virtio_has_feature(dev->protocol_features,
1401                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1402 
1403     if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
1404         return 0;
1405     }
1406 
1407     msg.hdr.request = VHOST_USER_NET_SET_MTU;
1408     msg.payload.u64 = mtu;
1409     msg.hdr.size = sizeof(msg.payload.u64);
1410     msg.hdr.flags = VHOST_USER_VERSION;
1411     if (reply_supported) {
1412         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1413     }
1414 
1415     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1416         return -1;
1417     }
1418 
1419     /* If reply_ack supported, slave has to ack specified MTU is valid */
1420     if (reply_supported) {
1421         return process_message_reply(dev, &msg);
1422     }
1423 
1424     return 0;
1425 }
1426 
1427 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
1428                                             struct vhost_iotlb_msg *imsg)
1429 {
1430     VhostUserMsg msg = {
1431         .hdr.request = VHOST_USER_IOTLB_MSG,
1432         .hdr.size = sizeof(msg.payload.iotlb),
1433         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1434         .payload.iotlb = *imsg,
1435     };
1436 
1437     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1438         return -EFAULT;
1439     }
1440 
1441     return process_message_reply(dev, &msg);
1442 }
1443 
1444 
1445 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
1446 {
1447     /* No-op as the receive channel is not dedicated to IOTLB messages. */
1448 }
1449 
1450 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
1451                                  uint32_t config_len)
1452 {
1453     VhostUserMsg msg = {
1454         .hdr.request = VHOST_USER_GET_CONFIG,
1455         .hdr.flags = VHOST_USER_VERSION,
1456         .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
1457     };
1458 
1459     if (!virtio_has_feature(dev->protocol_features,
1460                 VHOST_USER_PROTOCOL_F_CONFIG)) {
1461         return -1;
1462     }
1463 
1464     if (config_len > VHOST_USER_MAX_CONFIG_SIZE) {
1465         return -1;
1466     }
1467 
1468     msg.payload.config.offset = 0;
1469     msg.payload.config.size = config_len;
1470     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1471         return -1;
1472     }
1473 
1474     if (vhost_user_read(dev, &msg) < 0) {
1475         return -1;
1476     }
1477 
1478     if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
1479         error_report("Received unexpected msg type. Expected %d received %d",
1480                      VHOST_USER_GET_CONFIG, msg.hdr.request);
1481         return -1;
1482     }
1483 
1484     if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
1485         error_report("Received bad msg size.");
1486         return -1;
1487     }
1488 
1489     memcpy(config, msg.payload.config.region, config_len);
1490 
1491     return 0;
1492 }
1493 
1494 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
1495                                  uint32_t offset, uint32_t size, uint32_t flags)
1496 {
1497     uint8_t *p;
1498     bool reply_supported = virtio_has_feature(dev->protocol_features,
1499                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1500 
1501     VhostUserMsg msg = {
1502         .hdr.request = VHOST_USER_SET_CONFIG,
1503         .hdr.flags = VHOST_USER_VERSION,
1504         .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
1505     };
1506 
1507     if (!virtio_has_feature(dev->protocol_features,
1508                 VHOST_USER_PROTOCOL_F_CONFIG)) {
1509         return -1;
1510     }
1511 
1512     if (reply_supported) {
1513         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1514     }
1515 
1516     if (size > VHOST_USER_MAX_CONFIG_SIZE) {
1517         return -1;
1518     }
1519 
1520     msg.payload.config.offset = offset,
1521     msg.payload.config.size = size,
1522     msg.payload.config.flags = flags,
1523     p = msg.payload.config.region;
1524     memcpy(p, data, size);
1525 
1526     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1527         return -1;
1528     }
1529 
1530     if (reply_supported) {
1531         return process_message_reply(dev, &msg);
1532     }
1533 
1534     return 0;
1535 }
1536 
1537 static int vhost_user_crypto_create_session(struct vhost_dev *dev,
1538                                             void *session_info,
1539                                             uint64_t *session_id)
1540 {
1541     bool crypto_session = virtio_has_feature(dev->protocol_features,
1542                                        VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
1543     CryptoDevBackendSymSessionInfo *sess_info = session_info;
1544     VhostUserMsg msg = {
1545         .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
1546         .hdr.flags = VHOST_USER_VERSION,
1547         .hdr.size = sizeof(msg.payload.session),
1548     };
1549 
1550     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1551 
1552     if (!crypto_session) {
1553         error_report("vhost-user trying to send unhandled ioctl");
1554         return -1;
1555     }
1556 
1557     memcpy(&msg.payload.session.session_setup_data, sess_info,
1558               sizeof(CryptoDevBackendSymSessionInfo));
1559     if (sess_info->key_len) {
1560         memcpy(&msg.payload.session.key, sess_info->cipher_key,
1561                sess_info->key_len);
1562     }
1563     if (sess_info->auth_key_len > 0) {
1564         memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
1565                sess_info->auth_key_len);
1566     }
1567     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1568         error_report("vhost_user_write() return -1, create session failed");
1569         return -1;
1570     }
1571 
1572     if (vhost_user_read(dev, &msg) < 0) {
1573         error_report("vhost_user_read() return -1, create session failed");
1574         return -1;
1575     }
1576 
1577     if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
1578         error_report("Received unexpected msg type. Expected %d received %d",
1579                      VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
1580         return -1;
1581     }
1582 
1583     if (msg.hdr.size != sizeof(msg.payload.session)) {
1584         error_report("Received bad msg size.");
1585         return -1;
1586     }
1587 
1588     if (msg.payload.session.session_id < 0) {
1589         error_report("Bad session id: %" PRId64 "",
1590                               msg.payload.session.session_id);
1591         return -1;
1592     }
1593     *session_id = msg.payload.session.session_id;
1594 
1595     return 0;
1596 }
1597 
1598 static int
1599 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
1600 {
1601     bool crypto_session = virtio_has_feature(dev->protocol_features,
1602                                        VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
1603     VhostUserMsg msg = {
1604         .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
1605         .hdr.flags = VHOST_USER_VERSION,
1606         .hdr.size = sizeof(msg.payload.u64),
1607     };
1608     msg.payload.u64 = session_id;
1609 
1610     if (!crypto_session) {
1611         error_report("vhost-user trying to send unhandled ioctl");
1612         return -1;
1613     }
1614 
1615     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1616         error_report("vhost_user_write() return -1, close session failed");
1617         return -1;
1618     }
1619 
1620     return 0;
1621 }
1622 
1623 const VhostOps user_ops = {
1624         .backend_type = VHOST_BACKEND_TYPE_USER,
1625         .vhost_backend_init = vhost_user_init,
1626         .vhost_backend_cleanup = vhost_user_cleanup,
1627         .vhost_backend_memslots_limit = vhost_user_memslots_limit,
1628         .vhost_set_log_base = vhost_user_set_log_base,
1629         .vhost_set_mem_table = vhost_user_set_mem_table,
1630         .vhost_set_vring_addr = vhost_user_set_vring_addr,
1631         .vhost_set_vring_endian = vhost_user_set_vring_endian,
1632         .vhost_set_vring_num = vhost_user_set_vring_num,
1633         .vhost_set_vring_base = vhost_user_set_vring_base,
1634         .vhost_get_vring_base = vhost_user_get_vring_base,
1635         .vhost_set_vring_kick = vhost_user_set_vring_kick,
1636         .vhost_set_vring_call = vhost_user_set_vring_call,
1637         .vhost_set_features = vhost_user_set_features,
1638         .vhost_get_features = vhost_user_get_features,
1639         .vhost_set_owner = vhost_user_set_owner,
1640         .vhost_reset_device = vhost_user_reset_device,
1641         .vhost_get_vq_index = vhost_user_get_vq_index,
1642         .vhost_set_vring_enable = vhost_user_set_vring_enable,
1643         .vhost_requires_shm_log = vhost_user_requires_shm_log,
1644         .vhost_migration_done = vhost_user_migration_done,
1645         .vhost_backend_can_merge = vhost_user_can_merge,
1646         .vhost_net_set_mtu = vhost_user_net_set_mtu,
1647         .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
1648         .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
1649         .vhost_get_config = vhost_user_get_config,
1650         .vhost_set_config = vhost_user_set_config,
1651         .vhost_crypto_create_session = vhost_user_crypto_create_session,
1652         .vhost_crypto_close_session = vhost_user_crypto_close_session,
1653 };
1654