xref: /openbmc/qemu/hw/virtio/vhost-user.c (revision 6598f0cd)
1 /*
2  * vhost-user
3  *
4  * Copyright (c) 2013 Virtual Open Systems Sarl.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  */
10 
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "hw/virtio/vhost.h"
14 #include "hw/virtio/vhost-user.h"
15 #include "hw/virtio/vhost-backend.h"
16 #include "hw/virtio/virtio.h"
17 #include "hw/virtio/virtio-net.h"
18 #include "chardev/char-fe.h"
19 #include "sysemu/kvm.h"
20 #include "qemu/error-report.h"
21 #include "qemu/sockets.h"
22 #include "sysemu/cryptodev.h"
23 #include "migration/migration.h"
24 #include "migration/postcopy-ram.h"
25 #include "trace.h"
26 
27 #include <sys/ioctl.h>
28 #include <sys/socket.h>
29 #include <sys/un.h>
30 #include <linux/vhost.h>
31 #include <linux/userfaultfd.h>
32 
33 #define VHOST_MEMORY_MAX_NREGIONS    8
34 #define VHOST_USER_F_PROTOCOL_FEATURES 30
35 #define VHOST_USER_SLAVE_MAX_FDS     8
36 
37 /*
38  * Maximum size of virtio device config space
39  */
40 #define VHOST_USER_MAX_CONFIG_SIZE 256
41 
42 enum VhostUserProtocolFeature {
43     VHOST_USER_PROTOCOL_F_MQ = 0,
44     VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
45     VHOST_USER_PROTOCOL_F_RARP = 2,
46     VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
47     VHOST_USER_PROTOCOL_F_NET_MTU = 4,
48     VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
49     VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
50     VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
51     VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
52     VHOST_USER_PROTOCOL_F_CONFIG = 9,
53     VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
54     VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
55     VHOST_USER_PROTOCOL_F_MAX
56 };
57 
58 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
59 
60 typedef enum VhostUserRequest {
61     VHOST_USER_NONE = 0,
62     VHOST_USER_GET_FEATURES = 1,
63     VHOST_USER_SET_FEATURES = 2,
64     VHOST_USER_SET_OWNER = 3,
65     VHOST_USER_RESET_OWNER = 4,
66     VHOST_USER_SET_MEM_TABLE = 5,
67     VHOST_USER_SET_LOG_BASE = 6,
68     VHOST_USER_SET_LOG_FD = 7,
69     VHOST_USER_SET_VRING_NUM = 8,
70     VHOST_USER_SET_VRING_ADDR = 9,
71     VHOST_USER_SET_VRING_BASE = 10,
72     VHOST_USER_GET_VRING_BASE = 11,
73     VHOST_USER_SET_VRING_KICK = 12,
74     VHOST_USER_SET_VRING_CALL = 13,
75     VHOST_USER_SET_VRING_ERR = 14,
76     VHOST_USER_GET_PROTOCOL_FEATURES = 15,
77     VHOST_USER_SET_PROTOCOL_FEATURES = 16,
78     VHOST_USER_GET_QUEUE_NUM = 17,
79     VHOST_USER_SET_VRING_ENABLE = 18,
80     VHOST_USER_SEND_RARP = 19,
81     VHOST_USER_NET_SET_MTU = 20,
82     VHOST_USER_SET_SLAVE_REQ_FD = 21,
83     VHOST_USER_IOTLB_MSG = 22,
84     VHOST_USER_SET_VRING_ENDIAN = 23,
85     VHOST_USER_GET_CONFIG = 24,
86     VHOST_USER_SET_CONFIG = 25,
87     VHOST_USER_CREATE_CRYPTO_SESSION = 26,
88     VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
89     VHOST_USER_POSTCOPY_ADVISE  = 28,
90     VHOST_USER_POSTCOPY_LISTEN  = 29,
91     VHOST_USER_POSTCOPY_END     = 30,
92     VHOST_USER_MAX
93 } VhostUserRequest;
94 
95 typedef enum VhostUserSlaveRequest {
96     VHOST_USER_SLAVE_NONE = 0,
97     VHOST_USER_SLAVE_IOTLB_MSG = 1,
98     VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
99     VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
100     VHOST_USER_SLAVE_MAX
101 }  VhostUserSlaveRequest;
102 
103 typedef struct VhostUserMemoryRegion {
104     uint64_t guest_phys_addr;
105     uint64_t memory_size;
106     uint64_t userspace_addr;
107     uint64_t mmap_offset;
108 } VhostUserMemoryRegion;
109 
110 typedef struct VhostUserMemory {
111     uint32_t nregions;
112     uint32_t padding;
113     VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
114 } VhostUserMemory;
115 
116 typedef struct VhostUserLog {
117     uint64_t mmap_size;
118     uint64_t mmap_offset;
119 } VhostUserLog;
120 
121 typedef struct VhostUserConfig {
122     uint32_t offset;
123     uint32_t size;
124     uint32_t flags;
125     uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
126 } VhostUserConfig;
127 
128 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN    512
129 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN  64
130 
131 typedef struct VhostUserCryptoSession {
132     /* session id for success, -1 on errors */
133     int64_t session_id;
134     CryptoDevBackendSymSessionInfo session_setup_data;
135     uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
136     uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
137 } VhostUserCryptoSession;
138 
139 static VhostUserConfig c __attribute__ ((unused));
140 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
141                                    + sizeof(c.size) \
142                                    + sizeof(c.flags))
143 
144 typedef struct VhostUserVringArea {
145     uint64_t u64;
146     uint64_t size;
147     uint64_t offset;
148 } VhostUserVringArea;
149 
150 typedef struct {
151     VhostUserRequest request;
152 
153 #define VHOST_USER_VERSION_MASK     (0x3)
154 #define VHOST_USER_REPLY_MASK       (0x1<<2)
155 #define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
156     uint32_t flags;
157     uint32_t size; /* the following payload size */
158 } QEMU_PACKED VhostUserHeader;
159 
160 typedef union {
161 #define VHOST_USER_VRING_IDX_MASK   (0xff)
162 #define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
163         uint64_t u64;
164         struct vhost_vring_state state;
165         struct vhost_vring_addr addr;
166         VhostUserMemory memory;
167         VhostUserLog log;
168         struct vhost_iotlb_msg iotlb;
169         VhostUserConfig config;
170         VhostUserCryptoSession session;
171         VhostUserVringArea area;
172 } VhostUserPayload;
173 
174 typedef struct VhostUserMsg {
175     VhostUserHeader hdr;
176     VhostUserPayload payload;
177 } QEMU_PACKED VhostUserMsg;
178 
179 static VhostUserMsg m __attribute__ ((unused));
180 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
181 
182 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
183 
184 /* The version of the protocol we support */
185 #define VHOST_USER_VERSION    (0x1)
186 
187 struct vhost_user {
188     struct vhost_dev *dev;
189     /* Shared between vhost devs of the same virtio device */
190     VhostUserState *user;
191     int slave_fd;
192     NotifierWithReturn postcopy_notifier;
193     struct PostCopyFD  postcopy_fd;
194     uint64_t           postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS];
195     /* Length of the region_rb and region_rb_offset arrays */
196     size_t             region_rb_len;
197     /* RAMBlock associated with a given region */
198     RAMBlock         **region_rb;
199     /* The offset from the start of the RAMBlock to the start of the
200      * vhost region.
201      */
202     ram_addr_t        *region_rb_offset;
203 
204     /* True once we've entered postcopy_listen */
205     bool               postcopy_listen;
206 };
207 
208 static bool ioeventfd_enabled(void)
209 {
210     return kvm_enabled() && kvm_eventfds_enabled();
211 }
212 
213 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
214 {
215     struct vhost_user *u = dev->opaque;
216     CharBackend *chr = u->user->chr;
217     uint8_t *p = (uint8_t *) msg;
218     int r, size = VHOST_USER_HDR_SIZE;
219 
220     r = qemu_chr_fe_read_all(chr, p, size);
221     if (r != size) {
222         error_report("Failed to read msg header. Read %d instead of %d."
223                      " Original request %d.", r, size, msg->hdr.request);
224         goto fail;
225     }
226 
227     /* validate received flags */
228     if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
229         error_report("Failed to read msg header."
230                 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
231                 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
232         goto fail;
233     }
234 
235     /* validate message size is sane */
236     if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
237         error_report("Failed to read msg header."
238                 " Size %d exceeds the maximum %zu.", msg->hdr.size,
239                 VHOST_USER_PAYLOAD_SIZE);
240         goto fail;
241     }
242 
243     if (msg->hdr.size) {
244         p += VHOST_USER_HDR_SIZE;
245         size = msg->hdr.size;
246         r = qemu_chr_fe_read_all(chr, p, size);
247         if (r != size) {
248             error_report("Failed to read msg payload."
249                          " Read %d instead of %d.", r, msg->hdr.size);
250             goto fail;
251         }
252     }
253 
254     return 0;
255 
256 fail:
257     return -1;
258 }
259 
260 static int process_message_reply(struct vhost_dev *dev,
261                                  const VhostUserMsg *msg)
262 {
263     VhostUserMsg msg_reply;
264 
265     if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
266         return 0;
267     }
268 
269     if (vhost_user_read(dev, &msg_reply) < 0) {
270         return -1;
271     }
272 
273     if (msg_reply.hdr.request != msg->hdr.request) {
274         error_report("Received unexpected msg type."
275                      "Expected %d received %d",
276                      msg->hdr.request, msg_reply.hdr.request);
277         return -1;
278     }
279 
280     return msg_reply.payload.u64 ? -1 : 0;
281 }
282 
283 static bool vhost_user_one_time_request(VhostUserRequest request)
284 {
285     switch (request) {
286     case VHOST_USER_SET_OWNER:
287     case VHOST_USER_RESET_OWNER:
288     case VHOST_USER_SET_MEM_TABLE:
289     case VHOST_USER_GET_QUEUE_NUM:
290     case VHOST_USER_NET_SET_MTU:
291         return true;
292     default:
293         return false;
294     }
295 }
296 
297 /* most non-init callers ignore the error */
298 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
299                             int *fds, int fd_num)
300 {
301     struct vhost_user *u = dev->opaque;
302     CharBackend *chr = u->user->chr;
303     int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
304 
305     /*
306      * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
307      * we just need send it once in the first time. For later such
308      * request, we just ignore it.
309      */
310     if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
311         msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
312         return 0;
313     }
314 
315     if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
316         error_report("Failed to set msg fds.");
317         return -1;
318     }
319 
320     ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
321     if (ret != size) {
322         error_report("Failed to write msg."
323                      " Wrote %d instead of %d.", ret, size);
324         return -1;
325     }
326 
327     return 0;
328 }
329 
330 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
331                                    struct vhost_log *log)
332 {
333     int fds[VHOST_MEMORY_MAX_NREGIONS];
334     size_t fd_num = 0;
335     bool shmfd = virtio_has_feature(dev->protocol_features,
336                                     VHOST_USER_PROTOCOL_F_LOG_SHMFD);
337     VhostUserMsg msg = {
338         .hdr.request = VHOST_USER_SET_LOG_BASE,
339         .hdr.flags = VHOST_USER_VERSION,
340         .payload.log.mmap_size = log->size * sizeof(*(log->log)),
341         .payload.log.mmap_offset = 0,
342         .hdr.size = sizeof(msg.payload.log),
343     };
344 
345     if (shmfd && log->fd != -1) {
346         fds[fd_num++] = log->fd;
347     }
348 
349     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
350         return -1;
351     }
352 
353     if (shmfd) {
354         msg.hdr.size = 0;
355         if (vhost_user_read(dev, &msg) < 0) {
356             return -1;
357         }
358 
359         if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
360             error_report("Received unexpected msg type. "
361                          "Expected %d received %d",
362                          VHOST_USER_SET_LOG_BASE, msg.hdr.request);
363             return -1;
364         }
365     }
366 
367     return 0;
368 }
369 
370 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
371                                              struct vhost_memory *mem)
372 {
373     struct vhost_user *u = dev->opaque;
374     int fds[VHOST_MEMORY_MAX_NREGIONS];
375     int i, fd;
376     size_t fd_num = 0;
377     bool reply_supported = virtio_has_feature(dev->protocol_features,
378                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
379     VhostUserMsg msg_reply;
380     int region_i, msg_i;
381 
382     VhostUserMsg msg = {
383         .hdr.request = VHOST_USER_SET_MEM_TABLE,
384         .hdr.flags = VHOST_USER_VERSION,
385     };
386 
387     if (reply_supported) {
388         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
389     }
390 
391     if (u->region_rb_len < dev->mem->nregions) {
392         u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
393         u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
394                                       dev->mem->nregions);
395         memset(&(u->region_rb[u->region_rb_len]), '\0',
396                sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
397         memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
398                sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
399         u->region_rb_len = dev->mem->nregions;
400     }
401 
402     for (i = 0; i < dev->mem->nregions; ++i) {
403         struct vhost_memory_region *reg = dev->mem->regions + i;
404         ram_addr_t offset;
405         MemoryRegion *mr;
406 
407         assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
408         mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
409                                      &offset);
410         fd = memory_region_get_fd(mr);
411         if (fd > 0) {
412             trace_vhost_user_set_mem_table_withfd(fd_num, mr->name,
413                                                   reg->memory_size,
414                                                   reg->guest_phys_addr,
415                                                   reg->userspace_addr, offset);
416             u->region_rb_offset[i] = offset;
417             u->region_rb[i] = mr->ram_block;
418             msg.payload.memory.regions[fd_num].userspace_addr =
419                 reg->userspace_addr;
420             msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
421             msg.payload.memory.regions[fd_num].guest_phys_addr =
422                 reg->guest_phys_addr;
423             msg.payload.memory.regions[fd_num].mmap_offset = offset;
424             assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
425             fds[fd_num++] = fd;
426         } else {
427             u->region_rb_offset[i] = 0;
428             u->region_rb[i] = NULL;
429         }
430     }
431 
432     msg.payload.memory.nregions = fd_num;
433 
434     if (!fd_num) {
435         error_report("Failed initializing vhost-user memory map, "
436                      "consider using -object memory-backend-file share=on");
437         return -1;
438     }
439 
440     msg.hdr.size = sizeof(msg.payload.memory.nregions);
441     msg.hdr.size += sizeof(msg.payload.memory.padding);
442     msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
443 
444     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
445         return -1;
446     }
447 
448     if (vhost_user_read(dev, &msg_reply) < 0) {
449         return -1;
450     }
451 
452     if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
453         error_report("%s: Received unexpected msg type."
454                      "Expected %d received %d", __func__,
455                      VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
456         return -1;
457     }
458     /* We're using the same structure, just reusing one of the
459      * fields, so it should be the same size.
460      */
461     if (msg_reply.hdr.size != msg.hdr.size) {
462         error_report("%s: Unexpected size for postcopy reply "
463                      "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size);
464         return -1;
465     }
466 
467     memset(u->postcopy_client_bases, 0,
468            sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
469 
470     /* They're in the same order as the regions that were sent
471      * but some of the regions were skipped (above) if they
472      * didn't have fd's
473     */
474     for (msg_i = 0, region_i = 0;
475          region_i < dev->mem->nregions;
476         region_i++) {
477         if (msg_i < fd_num &&
478             msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
479             dev->mem->regions[region_i].guest_phys_addr) {
480             u->postcopy_client_bases[region_i] =
481                 msg_reply.payload.memory.regions[msg_i].userspace_addr;
482             trace_vhost_user_set_mem_table_postcopy(
483                 msg_reply.payload.memory.regions[msg_i].userspace_addr,
484                 msg.payload.memory.regions[msg_i].userspace_addr,
485                 msg_i, region_i);
486             msg_i++;
487         }
488     }
489     if (msg_i != fd_num) {
490         error_report("%s: postcopy reply not fully consumed "
491                      "%d vs %zd",
492                      __func__, msg_i, fd_num);
493         return -1;
494     }
495     /* Now we've registered this with the postcopy code, we ack to the client,
496      * because now we're in the position to be able to deal with any faults
497      * it generates.
498      */
499     /* TODO: Use this for failure cases as well with a bad value */
500     msg.hdr.size = sizeof(msg.payload.u64);
501     msg.payload.u64 = 0; /* OK */
502     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
503         return -1;
504     }
505 
506     if (reply_supported) {
507         return process_message_reply(dev, &msg);
508     }
509 
510     return 0;
511 }
512 
513 static int vhost_user_set_mem_table(struct vhost_dev *dev,
514                                     struct vhost_memory *mem)
515 {
516     struct vhost_user *u = dev->opaque;
517     int fds[VHOST_MEMORY_MAX_NREGIONS];
518     int i, fd;
519     size_t fd_num = 0;
520     bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
521     bool reply_supported = virtio_has_feature(dev->protocol_features,
522                                           VHOST_USER_PROTOCOL_F_REPLY_ACK) &&
523                                           !do_postcopy;
524 
525     if (do_postcopy) {
526         /* Postcopy has enough differences that it's best done in it's own
527          * version
528          */
529         return vhost_user_set_mem_table_postcopy(dev, mem);
530     }
531 
532     VhostUserMsg msg = {
533         .hdr.request = VHOST_USER_SET_MEM_TABLE,
534         .hdr.flags = VHOST_USER_VERSION,
535     };
536 
537     if (reply_supported) {
538         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
539     }
540 
541     for (i = 0; i < dev->mem->nregions; ++i) {
542         struct vhost_memory_region *reg = dev->mem->regions + i;
543         ram_addr_t offset;
544         MemoryRegion *mr;
545 
546         assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
547         mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
548                                      &offset);
549         fd = memory_region_get_fd(mr);
550         if (fd > 0) {
551             if (fd_num == VHOST_MEMORY_MAX_NREGIONS) {
552                 error_report("Failed preparing vhost-user memory table msg");
553                 return -1;
554             }
555             msg.payload.memory.regions[fd_num].userspace_addr =
556                 reg->userspace_addr;
557             msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
558             msg.payload.memory.regions[fd_num].guest_phys_addr =
559                 reg->guest_phys_addr;
560             msg.payload.memory.regions[fd_num].mmap_offset = offset;
561             fds[fd_num++] = fd;
562         }
563     }
564 
565     msg.payload.memory.nregions = fd_num;
566 
567     if (!fd_num) {
568         error_report("Failed initializing vhost-user memory map, "
569                      "consider using -object memory-backend-file share=on");
570         return -1;
571     }
572 
573     msg.hdr.size = sizeof(msg.payload.memory.nregions);
574     msg.hdr.size += sizeof(msg.payload.memory.padding);
575     msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
576 
577     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
578         return -1;
579     }
580 
581     if (reply_supported) {
582         return process_message_reply(dev, &msg);
583     }
584 
585     return 0;
586 }
587 
588 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
589                                      struct vhost_vring_addr *addr)
590 {
591     VhostUserMsg msg = {
592         .hdr.request = VHOST_USER_SET_VRING_ADDR,
593         .hdr.flags = VHOST_USER_VERSION,
594         .payload.addr = *addr,
595         .hdr.size = sizeof(msg.payload.addr),
596     };
597 
598     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
599         return -1;
600     }
601 
602     return 0;
603 }
604 
605 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
606                                        struct vhost_vring_state *ring)
607 {
608     bool cross_endian = virtio_has_feature(dev->protocol_features,
609                                            VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
610     VhostUserMsg msg = {
611         .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
612         .hdr.flags = VHOST_USER_VERSION,
613         .payload.state = *ring,
614         .hdr.size = sizeof(msg.payload.state),
615     };
616 
617     if (!cross_endian) {
618         error_report("vhost-user trying to send unhandled ioctl");
619         return -1;
620     }
621 
622     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
623         return -1;
624     }
625 
626     return 0;
627 }
628 
629 static int vhost_set_vring(struct vhost_dev *dev,
630                            unsigned long int request,
631                            struct vhost_vring_state *ring)
632 {
633     VhostUserMsg msg = {
634         .hdr.request = request,
635         .hdr.flags = VHOST_USER_VERSION,
636         .payload.state = *ring,
637         .hdr.size = sizeof(msg.payload.state),
638     };
639 
640     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
641         return -1;
642     }
643 
644     return 0;
645 }
646 
647 static int vhost_user_set_vring_num(struct vhost_dev *dev,
648                                     struct vhost_vring_state *ring)
649 {
650     return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
651 }
652 
653 static void vhost_user_host_notifier_restore(struct vhost_dev *dev,
654                                              int queue_idx)
655 {
656     struct vhost_user *u = dev->opaque;
657     VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
658     VirtIODevice *vdev = dev->vdev;
659 
660     if (n->addr && !n->set) {
661         virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true);
662         n->set = true;
663     }
664 }
665 
666 static void vhost_user_host_notifier_remove(struct vhost_dev *dev,
667                                             int queue_idx)
668 {
669     struct vhost_user *u = dev->opaque;
670     VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
671     VirtIODevice *vdev = dev->vdev;
672 
673     if (n->addr && n->set) {
674         virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
675         n->set = false;
676     }
677 }
678 
679 static int vhost_user_set_vring_base(struct vhost_dev *dev,
680                                      struct vhost_vring_state *ring)
681 {
682     vhost_user_host_notifier_restore(dev, ring->index);
683 
684     return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
685 }
686 
687 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
688 {
689     int i;
690 
691     if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
692         return -1;
693     }
694 
695     for (i = 0; i < dev->nvqs; ++i) {
696         struct vhost_vring_state state = {
697             .index = dev->vq_index + i,
698             .num   = enable,
699         };
700 
701         vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
702     }
703 
704     return 0;
705 }
706 
707 static int vhost_user_get_vring_base(struct vhost_dev *dev,
708                                      struct vhost_vring_state *ring)
709 {
710     VhostUserMsg msg = {
711         .hdr.request = VHOST_USER_GET_VRING_BASE,
712         .hdr.flags = VHOST_USER_VERSION,
713         .payload.state = *ring,
714         .hdr.size = sizeof(msg.payload.state),
715     };
716 
717     vhost_user_host_notifier_remove(dev, ring->index);
718 
719     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
720         return -1;
721     }
722 
723     if (vhost_user_read(dev, &msg) < 0) {
724         return -1;
725     }
726 
727     if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
728         error_report("Received unexpected msg type. Expected %d received %d",
729                      VHOST_USER_GET_VRING_BASE, msg.hdr.request);
730         return -1;
731     }
732 
733     if (msg.hdr.size != sizeof(msg.payload.state)) {
734         error_report("Received bad msg size.");
735         return -1;
736     }
737 
738     *ring = msg.payload.state;
739 
740     return 0;
741 }
742 
743 static int vhost_set_vring_file(struct vhost_dev *dev,
744                                 VhostUserRequest request,
745                                 struct vhost_vring_file *file)
746 {
747     int fds[VHOST_MEMORY_MAX_NREGIONS];
748     size_t fd_num = 0;
749     VhostUserMsg msg = {
750         .hdr.request = request,
751         .hdr.flags = VHOST_USER_VERSION,
752         .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
753         .hdr.size = sizeof(msg.payload.u64),
754     };
755 
756     if (ioeventfd_enabled() && file->fd > 0) {
757         fds[fd_num++] = file->fd;
758     } else {
759         msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
760     }
761 
762     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
763         return -1;
764     }
765 
766     return 0;
767 }
768 
769 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
770                                      struct vhost_vring_file *file)
771 {
772     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
773 }
774 
775 static int vhost_user_set_vring_call(struct vhost_dev *dev,
776                                      struct vhost_vring_file *file)
777 {
778     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
779 }
780 
781 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
782 {
783     VhostUserMsg msg = {
784         .hdr.request = request,
785         .hdr.flags = VHOST_USER_VERSION,
786         .payload.u64 = u64,
787         .hdr.size = sizeof(msg.payload.u64),
788     };
789 
790     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
791         return -1;
792     }
793 
794     return 0;
795 }
796 
797 static int vhost_user_set_features(struct vhost_dev *dev,
798                                    uint64_t features)
799 {
800     return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
801 }
802 
803 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
804                                             uint64_t features)
805 {
806     return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
807 }
808 
809 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
810 {
811     VhostUserMsg msg = {
812         .hdr.request = request,
813         .hdr.flags = VHOST_USER_VERSION,
814     };
815 
816     if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
817         return 0;
818     }
819 
820     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
821         return -1;
822     }
823 
824     if (vhost_user_read(dev, &msg) < 0) {
825         return -1;
826     }
827 
828     if (msg.hdr.request != request) {
829         error_report("Received unexpected msg type. Expected %d received %d",
830                      request, msg.hdr.request);
831         return -1;
832     }
833 
834     if (msg.hdr.size != sizeof(msg.payload.u64)) {
835         error_report("Received bad msg size.");
836         return -1;
837     }
838 
839     *u64 = msg.payload.u64;
840 
841     return 0;
842 }
843 
844 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
845 {
846     return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
847 }
848 
849 static int vhost_user_set_owner(struct vhost_dev *dev)
850 {
851     VhostUserMsg msg = {
852         .hdr.request = VHOST_USER_SET_OWNER,
853         .hdr.flags = VHOST_USER_VERSION,
854     };
855 
856     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
857         return -1;
858     }
859 
860     return 0;
861 }
862 
863 static int vhost_user_reset_device(struct vhost_dev *dev)
864 {
865     VhostUserMsg msg = {
866         .hdr.request = VHOST_USER_RESET_OWNER,
867         .hdr.flags = VHOST_USER_VERSION,
868     };
869 
870     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
871         return -1;
872     }
873 
874     return 0;
875 }
876 
877 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
878 {
879     int ret = -1;
880 
881     if (!dev->config_ops) {
882         return -1;
883     }
884 
885     if (dev->config_ops->vhost_dev_config_notifier) {
886         ret = dev->config_ops->vhost_dev_config_notifier(dev);
887     }
888 
889     return ret;
890 }
891 
892 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
893                                                        VhostUserVringArea *area,
894                                                        int fd)
895 {
896     int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
897     size_t page_size = qemu_real_host_page_size;
898     struct vhost_user *u = dev->opaque;
899     VhostUserState *user = u->user;
900     VirtIODevice *vdev = dev->vdev;
901     VhostUserHostNotifier *n;
902     void *addr;
903     char *name;
904 
905     if (!virtio_has_feature(dev->protocol_features,
906                             VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
907         vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
908         return -1;
909     }
910 
911     n = &user->notifier[queue_idx];
912 
913     if (n->addr) {
914         virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
915         object_unparent(OBJECT(&n->mr));
916         munmap(n->addr, page_size);
917         n->addr = NULL;
918     }
919 
920     if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
921         return 0;
922     }
923 
924     /* Sanity check. */
925     if (area->size != page_size) {
926         return -1;
927     }
928 
929     addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
930                 fd, area->offset);
931     if (addr == MAP_FAILED) {
932         return -1;
933     }
934 
935     name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
936                            user, queue_idx);
937     memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
938                                       page_size, addr);
939     g_free(name);
940 
941     if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
942         munmap(addr, page_size);
943         return -1;
944     }
945 
946     n->addr = addr;
947     n->set = true;
948 
949     return 0;
950 }
951 
952 static void slave_read(void *opaque)
953 {
954     struct vhost_dev *dev = opaque;
955     struct vhost_user *u = dev->opaque;
956     VhostUserHeader hdr = { 0, };
957     VhostUserPayload payload = { 0, };
958     int size, ret = 0;
959     struct iovec iov;
960     struct msghdr msgh;
961     int fd[VHOST_USER_SLAVE_MAX_FDS];
962     char control[CMSG_SPACE(sizeof(fd))];
963     struct cmsghdr *cmsg;
964     int i, fdsize = 0;
965 
966     memset(&msgh, 0, sizeof(msgh));
967     msgh.msg_iov = &iov;
968     msgh.msg_iovlen = 1;
969     msgh.msg_control = control;
970     msgh.msg_controllen = sizeof(control);
971 
972     memset(fd, -1, sizeof(fd));
973 
974     /* Read header */
975     iov.iov_base = &hdr;
976     iov.iov_len = VHOST_USER_HDR_SIZE;
977 
978     size = recvmsg(u->slave_fd, &msgh, 0);
979     if (size != VHOST_USER_HDR_SIZE) {
980         error_report("Failed to read from slave.");
981         goto err;
982     }
983 
984     if (msgh.msg_flags & MSG_CTRUNC) {
985         error_report("Truncated message.");
986         goto err;
987     }
988 
989     for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
990          cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
991             if (cmsg->cmsg_level == SOL_SOCKET &&
992                 cmsg->cmsg_type == SCM_RIGHTS) {
993                     fdsize = cmsg->cmsg_len - CMSG_LEN(0);
994                     memcpy(fd, CMSG_DATA(cmsg), fdsize);
995                     break;
996             }
997     }
998 
999     if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1000         error_report("Failed to read msg header."
1001                 " Size %d exceeds the maximum %zu.", hdr.size,
1002                 VHOST_USER_PAYLOAD_SIZE);
1003         goto err;
1004     }
1005 
1006     /* Read payload */
1007     size = read(u->slave_fd, &payload, hdr.size);
1008     if (size != hdr.size) {
1009         error_report("Failed to read payload from slave.");
1010         goto err;
1011     }
1012 
1013     switch (hdr.request) {
1014     case VHOST_USER_SLAVE_IOTLB_MSG:
1015         ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1016         break;
1017     case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
1018         ret = vhost_user_slave_handle_config_change(dev);
1019         break;
1020     case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
1021         ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
1022                                                           fd[0]);
1023         break;
1024     default:
1025         error_report("Received unexpected msg type.");
1026         ret = -EINVAL;
1027     }
1028 
1029     /* Close the remaining file descriptors. */
1030     for (i = 0; i < fdsize; i++) {
1031         if (fd[i] != -1) {
1032             close(fd[i]);
1033         }
1034     }
1035 
1036     /*
1037      * REPLY_ACK feature handling. Other reply types has to be managed
1038      * directly in their request handlers.
1039      */
1040     if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1041         struct iovec iovec[2];
1042 
1043 
1044         hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1045         hdr.flags |= VHOST_USER_REPLY_MASK;
1046 
1047         payload.u64 = !!ret;
1048         hdr.size = sizeof(payload.u64);
1049 
1050         iovec[0].iov_base = &hdr;
1051         iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1052         iovec[1].iov_base = &payload;
1053         iovec[1].iov_len = hdr.size;
1054 
1055         size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
1056         if (size != VHOST_USER_HDR_SIZE + hdr.size) {
1057             error_report("Failed to send msg reply to slave.");
1058             goto err;
1059         }
1060     }
1061 
1062     return;
1063 
1064 err:
1065     qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1066     close(u->slave_fd);
1067     u->slave_fd = -1;
1068     for (i = 0; i < fdsize; i++) {
1069         if (fd[i] != -1) {
1070             close(fd[i]);
1071         }
1072     }
1073     return;
1074 }
1075 
1076 static int vhost_setup_slave_channel(struct vhost_dev *dev)
1077 {
1078     VhostUserMsg msg = {
1079         .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
1080         .hdr.flags = VHOST_USER_VERSION,
1081     };
1082     struct vhost_user *u = dev->opaque;
1083     int sv[2], ret = 0;
1084     bool reply_supported = virtio_has_feature(dev->protocol_features,
1085                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1086 
1087     if (!virtio_has_feature(dev->protocol_features,
1088                             VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
1089         return 0;
1090     }
1091 
1092     if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1093         error_report("socketpair() failed");
1094         return -1;
1095     }
1096 
1097     u->slave_fd = sv[0];
1098     qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev);
1099 
1100     if (reply_supported) {
1101         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1102     }
1103 
1104     ret = vhost_user_write(dev, &msg, &sv[1], 1);
1105     if (ret) {
1106         goto out;
1107     }
1108 
1109     if (reply_supported) {
1110         ret = process_message_reply(dev, &msg);
1111     }
1112 
1113 out:
1114     close(sv[1]);
1115     if (ret) {
1116         qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1117         close(u->slave_fd);
1118         u->slave_fd = -1;
1119     }
1120 
1121     return ret;
1122 }
1123 
1124 /*
1125  * Called back from the postcopy fault thread when a fault is received on our
1126  * ufd.
1127  * TODO: This is Linux specific
1128  */
1129 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1130                                              void *ufd)
1131 {
1132     struct vhost_dev *dev = pcfd->data;
1133     struct vhost_user *u = dev->opaque;
1134     struct uffd_msg *msg = ufd;
1135     uint64_t faultaddr = msg->arg.pagefault.address;
1136     RAMBlock *rb = NULL;
1137     uint64_t rb_offset;
1138     int i;
1139 
1140     trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1141                                             dev->mem->nregions);
1142     for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1143         trace_vhost_user_postcopy_fault_handler_loop(i,
1144                 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1145         if (faultaddr >= u->postcopy_client_bases[i]) {
1146             /* Ofset of the fault address in the vhost region */
1147             uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1148             if (region_offset < dev->mem->regions[i].memory_size) {
1149                 rb_offset = region_offset + u->region_rb_offset[i];
1150                 trace_vhost_user_postcopy_fault_handler_found(i,
1151                         region_offset, rb_offset);
1152                 rb = u->region_rb[i];
1153                 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1154                                                     rb_offset);
1155             }
1156         }
1157     }
1158     error_report("%s: Failed to find region for fault %" PRIx64,
1159                  __func__, faultaddr);
1160     return -1;
1161 }
1162 
1163 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1164                                      uint64_t offset)
1165 {
1166     struct vhost_dev *dev = pcfd->data;
1167     struct vhost_user *u = dev->opaque;
1168     int i;
1169 
1170     trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1171 
1172     if (!u) {
1173         return 0;
1174     }
1175     /* Translate the offset into an address in the clients address space */
1176     for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1177         if (u->region_rb[i] == rb &&
1178             offset >= u->region_rb_offset[i] &&
1179             offset < (u->region_rb_offset[i] +
1180                       dev->mem->regions[i].memory_size)) {
1181             uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1182                                    u->postcopy_client_bases[i];
1183             trace_vhost_user_postcopy_waker_found(client_addr);
1184             return postcopy_wake_shared(pcfd, client_addr, rb);
1185         }
1186     }
1187 
1188     trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1189     return 0;
1190 }
1191 
1192 /*
1193  * Called at the start of an inbound postcopy on reception of the
1194  * 'advise' command.
1195  */
1196 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1197 {
1198     struct vhost_user *u = dev->opaque;
1199     CharBackend *chr = u->user->chr;
1200     int ufd;
1201     VhostUserMsg msg = {
1202         .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1203         .hdr.flags = VHOST_USER_VERSION,
1204     };
1205 
1206     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1207         error_setg(errp, "Failed to send postcopy_advise to vhost");
1208         return -1;
1209     }
1210 
1211     if (vhost_user_read(dev, &msg) < 0) {
1212         error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1213         return -1;
1214     }
1215 
1216     if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1217         error_setg(errp, "Unexpected msg type. Expected %d received %d",
1218                      VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1219         return -1;
1220     }
1221 
1222     if (msg.hdr.size) {
1223         error_setg(errp, "Received bad msg size.");
1224         return -1;
1225     }
1226     ufd = qemu_chr_fe_get_msgfd(chr);
1227     if (ufd < 0) {
1228         error_setg(errp, "%s: Failed to get ufd", __func__);
1229         return -1;
1230     }
1231     qemu_set_nonblock(ufd);
1232 
1233     /* register ufd with userfault thread */
1234     u->postcopy_fd.fd = ufd;
1235     u->postcopy_fd.data = dev;
1236     u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1237     u->postcopy_fd.waker = vhost_user_postcopy_waker;
1238     u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1239     postcopy_register_shared_ufd(&u->postcopy_fd);
1240     return 0;
1241 }
1242 
1243 /*
1244  * Called at the switch to postcopy on reception of the 'listen' command.
1245  */
1246 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1247 {
1248     struct vhost_user *u = dev->opaque;
1249     int ret;
1250     VhostUserMsg msg = {
1251         .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1252         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1253     };
1254     u->postcopy_listen = true;
1255     trace_vhost_user_postcopy_listen();
1256     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1257         error_setg(errp, "Failed to send postcopy_listen to vhost");
1258         return -1;
1259     }
1260 
1261     ret = process_message_reply(dev, &msg);
1262     if (ret) {
1263         error_setg(errp, "Failed to receive reply to postcopy_listen");
1264         return ret;
1265     }
1266 
1267     return 0;
1268 }
1269 
1270 /*
1271  * Called at the end of postcopy
1272  */
1273 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1274 {
1275     VhostUserMsg msg = {
1276         .hdr.request = VHOST_USER_POSTCOPY_END,
1277         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1278     };
1279     int ret;
1280     struct vhost_user *u = dev->opaque;
1281 
1282     trace_vhost_user_postcopy_end_entry();
1283     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1284         error_setg(errp, "Failed to send postcopy_end to vhost");
1285         return -1;
1286     }
1287 
1288     ret = process_message_reply(dev, &msg);
1289     if (ret) {
1290         error_setg(errp, "Failed to receive reply to postcopy_end");
1291         return ret;
1292     }
1293     postcopy_unregister_shared_ufd(&u->postcopy_fd);
1294     u->postcopy_fd.handler = NULL;
1295 
1296     trace_vhost_user_postcopy_end_exit();
1297 
1298     return 0;
1299 }
1300 
1301 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1302                                         void *opaque)
1303 {
1304     struct PostcopyNotifyData *pnd = opaque;
1305     struct vhost_user *u = container_of(notifier, struct vhost_user,
1306                                          postcopy_notifier);
1307     struct vhost_dev *dev = u->dev;
1308 
1309     switch (pnd->reason) {
1310     case POSTCOPY_NOTIFY_PROBE:
1311         if (!virtio_has_feature(dev->protocol_features,
1312                                 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1313             /* TODO: Get the device name into this error somehow */
1314             error_setg(pnd->errp,
1315                        "vhost-user backend not capable of postcopy");
1316             return -ENOENT;
1317         }
1318         break;
1319 
1320     case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1321         return vhost_user_postcopy_advise(dev, pnd->errp);
1322 
1323     case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1324         return vhost_user_postcopy_listen(dev, pnd->errp);
1325 
1326     case POSTCOPY_NOTIFY_INBOUND_END:
1327         return vhost_user_postcopy_end(dev, pnd->errp);
1328 
1329     default:
1330         /* We ignore notifications we don't know */
1331         break;
1332     }
1333 
1334     return 0;
1335 }
1336 
1337 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
1338 {
1339     uint64_t features, protocol_features;
1340     struct vhost_user *u;
1341     int err;
1342 
1343     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1344 
1345     u = g_new0(struct vhost_user, 1);
1346     u->user = opaque;
1347     u->slave_fd = -1;
1348     u->dev = dev;
1349     dev->opaque = u;
1350 
1351     err = vhost_user_get_features(dev, &features);
1352     if (err < 0) {
1353         return err;
1354     }
1355 
1356     if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1357         dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1358 
1359         err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
1360                                  &protocol_features);
1361         if (err < 0) {
1362             return err;
1363         }
1364 
1365         dev->protocol_features =
1366             protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
1367 
1368         if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1369             /* Don't acknowledge CONFIG feature if device doesn't support it */
1370             dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
1371         } else if (!(protocol_features &
1372                     (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
1373             error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG "
1374                     "but backend does not support it.");
1375             return -1;
1376         }
1377 
1378         err = vhost_user_set_protocol_features(dev, dev->protocol_features);
1379         if (err < 0) {
1380             return err;
1381         }
1382 
1383         /* query the max queues we support if backend supports Multiple Queue */
1384         if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
1385             err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
1386                                      &dev->max_queues);
1387             if (err < 0) {
1388                 return err;
1389             }
1390         }
1391 
1392         if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
1393                 !(virtio_has_feature(dev->protocol_features,
1394                     VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
1395                  virtio_has_feature(dev->protocol_features,
1396                     VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
1397             error_report("IOMMU support requires reply-ack and "
1398                          "slave-req protocol features.");
1399             return -1;
1400         }
1401     }
1402 
1403     if (dev->migration_blocker == NULL &&
1404         !virtio_has_feature(dev->protocol_features,
1405                             VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
1406         error_setg(&dev->migration_blocker,
1407                    "Migration disabled: vhost-user backend lacks "
1408                    "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
1409     }
1410 
1411     err = vhost_setup_slave_channel(dev);
1412     if (err < 0) {
1413         return err;
1414     }
1415 
1416     u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
1417     postcopy_add_notifier(&u->postcopy_notifier);
1418 
1419     return 0;
1420 }
1421 
1422 static int vhost_user_backend_cleanup(struct vhost_dev *dev)
1423 {
1424     struct vhost_user *u;
1425 
1426     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1427 
1428     u = dev->opaque;
1429     if (u->postcopy_notifier.notify) {
1430         postcopy_remove_notifier(&u->postcopy_notifier);
1431         u->postcopy_notifier.notify = NULL;
1432     }
1433     if (u->slave_fd >= 0) {
1434         qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1435         close(u->slave_fd);
1436         u->slave_fd = -1;
1437     }
1438     g_free(u->region_rb);
1439     u->region_rb = NULL;
1440     g_free(u->region_rb_offset);
1441     u->region_rb_offset = NULL;
1442     u->region_rb_len = 0;
1443     g_free(u);
1444     dev->opaque = 0;
1445 
1446     return 0;
1447 }
1448 
1449 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
1450 {
1451     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
1452 
1453     return idx;
1454 }
1455 
1456 static int vhost_user_memslots_limit(struct vhost_dev *dev)
1457 {
1458     return VHOST_MEMORY_MAX_NREGIONS;
1459 }
1460 
1461 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
1462 {
1463     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1464 
1465     return virtio_has_feature(dev->protocol_features,
1466                               VHOST_USER_PROTOCOL_F_LOG_SHMFD);
1467 }
1468 
1469 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
1470 {
1471     VhostUserMsg msg = { };
1472 
1473     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1474 
1475     /* If guest supports GUEST_ANNOUNCE do nothing */
1476     if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
1477         return 0;
1478     }
1479 
1480     /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
1481     if (virtio_has_feature(dev->protocol_features,
1482                            VHOST_USER_PROTOCOL_F_RARP)) {
1483         msg.hdr.request = VHOST_USER_SEND_RARP;
1484         msg.hdr.flags = VHOST_USER_VERSION;
1485         memcpy((char *)&msg.payload.u64, mac_addr, 6);
1486         msg.hdr.size = sizeof(msg.payload.u64);
1487 
1488         return vhost_user_write(dev, &msg, NULL, 0);
1489     }
1490     return -1;
1491 }
1492 
1493 static bool vhost_user_can_merge(struct vhost_dev *dev,
1494                                  uint64_t start1, uint64_t size1,
1495                                  uint64_t start2, uint64_t size2)
1496 {
1497     ram_addr_t offset;
1498     int mfd, rfd;
1499     MemoryRegion *mr;
1500 
1501     mr = memory_region_from_host((void *)(uintptr_t)start1, &offset);
1502     mfd = memory_region_get_fd(mr);
1503 
1504     mr = memory_region_from_host((void *)(uintptr_t)start2, &offset);
1505     rfd = memory_region_get_fd(mr);
1506 
1507     return mfd == rfd;
1508 }
1509 
1510 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
1511 {
1512     VhostUserMsg msg;
1513     bool reply_supported = virtio_has_feature(dev->protocol_features,
1514                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1515 
1516     if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
1517         return 0;
1518     }
1519 
1520     msg.hdr.request = VHOST_USER_NET_SET_MTU;
1521     msg.payload.u64 = mtu;
1522     msg.hdr.size = sizeof(msg.payload.u64);
1523     msg.hdr.flags = VHOST_USER_VERSION;
1524     if (reply_supported) {
1525         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1526     }
1527 
1528     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1529         return -1;
1530     }
1531 
1532     /* If reply_ack supported, slave has to ack specified MTU is valid */
1533     if (reply_supported) {
1534         return process_message_reply(dev, &msg);
1535     }
1536 
1537     return 0;
1538 }
1539 
1540 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
1541                                             struct vhost_iotlb_msg *imsg)
1542 {
1543     VhostUserMsg msg = {
1544         .hdr.request = VHOST_USER_IOTLB_MSG,
1545         .hdr.size = sizeof(msg.payload.iotlb),
1546         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1547         .payload.iotlb = *imsg,
1548     };
1549 
1550     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1551         return -EFAULT;
1552     }
1553 
1554     return process_message_reply(dev, &msg);
1555 }
1556 
1557 
1558 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
1559 {
1560     /* No-op as the receive channel is not dedicated to IOTLB messages. */
1561 }
1562 
1563 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
1564                                  uint32_t config_len)
1565 {
1566     VhostUserMsg msg = {
1567         .hdr.request = VHOST_USER_GET_CONFIG,
1568         .hdr.flags = VHOST_USER_VERSION,
1569         .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
1570     };
1571 
1572     if (!virtio_has_feature(dev->protocol_features,
1573                 VHOST_USER_PROTOCOL_F_CONFIG)) {
1574         return -1;
1575     }
1576 
1577     if (config_len > VHOST_USER_MAX_CONFIG_SIZE) {
1578         return -1;
1579     }
1580 
1581     msg.payload.config.offset = 0;
1582     msg.payload.config.size = config_len;
1583     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1584         return -1;
1585     }
1586 
1587     if (vhost_user_read(dev, &msg) < 0) {
1588         return -1;
1589     }
1590 
1591     if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
1592         error_report("Received unexpected msg type. Expected %d received %d",
1593                      VHOST_USER_GET_CONFIG, msg.hdr.request);
1594         return -1;
1595     }
1596 
1597     if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
1598         error_report("Received bad msg size.");
1599         return -1;
1600     }
1601 
1602     memcpy(config, msg.payload.config.region, config_len);
1603 
1604     return 0;
1605 }
1606 
1607 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
1608                                  uint32_t offset, uint32_t size, uint32_t flags)
1609 {
1610     uint8_t *p;
1611     bool reply_supported = virtio_has_feature(dev->protocol_features,
1612                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1613 
1614     VhostUserMsg msg = {
1615         .hdr.request = VHOST_USER_SET_CONFIG,
1616         .hdr.flags = VHOST_USER_VERSION,
1617         .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
1618     };
1619 
1620     if (!virtio_has_feature(dev->protocol_features,
1621                 VHOST_USER_PROTOCOL_F_CONFIG)) {
1622         return -1;
1623     }
1624 
1625     if (reply_supported) {
1626         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1627     }
1628 
1629     if (size > VHOST_USER_MAX_CONFIG_SIZE) {
1630         return -1;
1631     }
1632 
1633     msg.payload.config.offset = offset,
1634     msg.payload.config.size = size,
1635     msg.payload.config.flags = flags,
1636     p = msg.payload.config.region;
1637     memcpy(p, data, size);
1638 
1639     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1640         return -1;
1641     }
1642 
1643     if (reply_supported) {
1644         return process_message_reply(dev, &msg);
1645     }
1646 
1647     return 0;
1648 }
1649 
1650 static int vhost_user_crypto_create_session(struct vhost_dev *dev,
1651                                             void *session_info,
1652                                             uint64_t *session_id)
1653 {
1654     bool crypto_session = virtio_has_feature(dev->protocol_features,
1655                                        VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
1656     CryptoDevBackendSymSessionInfo *sess_info = session_info;
1657     VhostUserMsg msg = {
1658         .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
1659         .hdr.flags = VHOST_USER_VERSION,
1660         .hdr.size = sizeof(msg.payload.session),
1661     };
1662 
1663     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1664 
1665     if (!crypto_session) {
1666         error_report("vhost-user trying to send unhandled ioctl");
1667         return -1;
1668     }
1669 
1670     memcpy(&msg.payload.session.session_setup_data, sess_info,
1671               sizeof(CryptoDevBackendSymSessionInfo));
1672     if (sess_info->key_len) {
1673         memcpy(&msg.payload.session.key, sess_info->cipher_key,
1674                sess_info->key_len);
1675     }
1676     if (sess_info->auth_key_len > 0) {
1677         memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
1678                sess_info->auth_key_len);
1679     }
1680     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1681         error_report("vhost_user_write() return -1, create session failed");
1682         return -1;
1683     }
1684 
1685     if (vhost_user_read(dev, &msg) < 0) {
1686         error_report("vhost_user_read() return -1, create session failed");
1687         return -1;
1688     }
1689 
1690     if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
1691         error_report("Received unexpected msg type. Expected %d received %d",
1692                      VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
1693         return -1;
1694     }
1695 
1696     if (msg.hdr.size != sizeof(msg.payload.session)) {
1697         error_report("Received bad msg size.");
1698         return -1;
1699     }
1700 
1701     if (msg.payload.session.session_id < 0) {
1702         error_report("Bad session id: %" PRId64 "",
1703                               msg.payload.session.session_id);
1704         return -1;
1705     }
1706     *session_id = msg.payload.session.session_id;
1707 
1708     return 0;
1709 }
1710 
1711 static int
1712 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
1713 {
1714     bool crypto_session = virtio_has_feature(dev->protocol_features,
1715                                        VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
1716     VhostUserMsg msg = {
1717         .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
1718         .hdr.flags = VHOST_USER_VERSION,
1719         .hdr.size = sizeof(msg.payload.u64),
1720     };
1721     msg.payload.u64 = session_id;
1722 
1723     if (!crypto_session) {
1724         error_report("vhost-user trying to send unhandled ioctl");
1725         return -1;
1726     }
1727 
1728     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1729         error_report("vhost_user_write() return -1, close session failed");
1730         return -1;
1731     }
1732 
1733     return 0;
1734 }
1735 
1736 static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
1737                                           MemoryRegionSection *section)
1738 {
1739     bool result;
1740 
1741     result = memory_region_get_fd(section->mr) >= 0;
1742 
1743     return result;
1744 }
1745 
1746 VhostUserState *vhost_user_init(void)
1747 {
1748     VhostUserState *user = g_new0(struct VhostUserState, 1);
1749 
1750     return user;
1751 }
1752 
1753 void vhost_user_cleanup(VhostUserState *user)
1754 {
1755     int i;
1756 
1757     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1758         if (user->notifier[i].addr) {
1759             object_unparent(OBJECT(&user->notifier[i].mr));
1760             munmap(user->notifier[i].addr, qemu_real_host_page_size);
1761             user->notifier[i].addr = NULL;
1762         }
1763     }
1764 }
1765 
1766 const VhostOps user_ops = {
1767         .backend_type = VHOST_BACKEND_TYPE_USER,
1768         .vhost_backend_init = vhost_user_backend_init,
1769         .vhost_backend_cleanup = vhost_user_backend_cleanup,
1770         .vhost_backend_memslots_limit = vhost_user_memslots_limit,
1771         .vhost_set_log_base = vhost_user_set_log_base,
1772         .vhost_set_mem_table = vhost_user_set_mem_table,
1773         .vhost_set_vring_addr = vhost_user_set_vring_addr,
1774         .vhost_set_vring_endian = vhost_user_set_vring_endian,
1775         .vhost_set_vring_num = vhost_user_set_vring_num,
1776         .vhost_set_vring_base = vhost_user_set_vring_base,
1777         .vhost_get_vring_base = vhost_user_get_vring_base,
1778         .vhost_set_vring_kick = vhost_user_set_vring_kick,
1779         .vhost_set_vring_call = vhost_user_set_vring_call,
1780         .vhost_set_features = vhost_user_set_features,
1781         .vhost_get_features = vhost_user_get_features,
1782         .vhost_set_owner = vhost_user_set_owner,
1783         .vhost_reset_device = vhost_user_reset_device,
1784         .vhost_get_vq_index = vhost_user_get_vq_index,
1785         .vhost_set_vring_enable = vhost_user_set_vring_enable,
1786         .vhost_requires_shm_log = vhost_user_requires_shm_log,
1787         .vhost_migration_done = vhost_user_migration_done,
1788         .vhost_backend_can_merge = vhost_user_can_merge,
1789         .vhost_net_set_mtu = vhost_user_net_set_mtu,
1790         .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
1791         .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
1792         .vhost_get_config = vhost_user_get_config,
1793         .vhost_set_config = vhost_user_set_config,
1794         .vhost_crypto_create_session = vhost_user_crypto_create_session,
1795         .vhost_crypto_close_session = vhost_user_crypto_close_session,
1796         .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
1797 };
1798