xref: /openbmc/qemu/hw/virtio/vhost-user.c (revision 47dc0ec576c3d8748985e34d273c8dc0465c5ddb)
1 /*
2  * vhost-user
3  *
4  * Copyright (c) 2013 Virtual Open Systems Sarl.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  */
10 
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "hw/virtio/vhost.h"
14 #include "hw/virtio/vhost-backend.h"
15 #include "hw/virtio/virtio-net.h"
16 #include "sysemu/char.h"
17 #include "sysemu/kvm.h"
18 #include "qemu/error-report.h"
19 #include "qemu/sockets.h"
20 #include "migration/migration.h"
21 
22 #include <sys/ioctl.h>
23 #include <sys/socket.h>
24 #include <sys/un.h>
25 #include <linux/vhost.h>
26 
27 #define VHOST_MEMORY_MAX_NREGIONS    8
28 #define VHOST_USER_F_PROTOCOL_FEATURES 30
29 
30 enum VhostUserProtocolFeature {
31     VHOST_USER_PROTOCOL_F_MQ = 0,
32     VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
33     VHOST_USER_PROTOCOL_F_RARP = 2,
34 
35     VHOST_USER_PROTOCOL_F_MAX
36 };
37 
38 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
39 
40 typedef enum VhostUserRequest {
41     VHOST_USER_NONE = 0,
42     VHOST_USER_GET_FEATURES = 1,
43     VHOST_USER_SET_FEATURES = 2,
44     VHOST_USER_SET_OWNER = 3,
45     VHOST_USER_RESET_OWNER = 4,
46     VHOST_USER_SET_MEM_TABLE = 5,
47     VHOST_USER_SET_LOG_BASE = 6,
48     VHOST_USER_SET_LOG_FD = 7,
49     VHOST_USER_SET_VRING_NUM = 8,
50     VHOST_USER_SET_VRING_ADDR = 9,
51     VHOST_USER_SET_VRING_BASE = 10,
52     VHOST_USER_GET_VRING_BASE = 11,
53     VHOST_USER_SET_VRING_KICK = 12,
54     VHOST_USER_SET_VRING_CALL = 13,
55     VHOST_USER_SET_VRING_ERR = 14,
56     VHOST_USER_GET_PROTOCOL_FEATURES = 15,
57     VHOST_USER_SET_PROTOCOL_FEATURES = 16,
58     VHOST_USER_GET_QUEUE_NUM = 17,
59     VHOST_USER_SET_VRING_ENABLE = 18,
60     VHOST_USER_SEND_RARP = 19,
61     VHOST_USER_MAX
62 } VhostUserRequest;
63 
64 typedef struct VhostUserMemoryRegion {
65     uint64_t guest_phys_addr;
66     uint64_t memory_size;
67     uint64_t userspace_addr;
68     uint64_t mmap_offset;
69 } VhostUserMemoryRegion;
70 
71 typedef struct VhostUserMemory {
72     uint32_t nregions;
73     uint32_t padding;
74     VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
75 } VhostUserMemory;
76 
77 typedef struct VhostUserLog {
78     uint64_t mmap_size;
79     uint64_t mmap_offset;
80 } VhostUserLog;
81 
82 typedef struct VhostUserMsg {
83     VhostUserRequest request;
84 
85 #define VHOST_USER_VERSION_MASK     (0x3)
86 #define VHOST_USER_REPLY_MASK       (0x1<<2)
87     uint32_t flags;
88     uint32_t size; /* the following payload size */
89     union {
90 #define VHOST_USER_VRING_IDX_MASK   (0xff)
91 #define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
92         uint64_t u64;
93         struct vhost_vring_state state;
94         struct vhost_vring_addr addr;
95         VhostUserMemory memory;
96         VhostUserLog log;
97     } payload;
98 } QEMU_PACKED VhostUserMsg;
99 
100 static VhostUserMsg m __attribute__ ((unused));
101 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \
102                             + sizeof(m.flags) \
103                             + sizeof(m.size))
104 
105 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
106 
107 /* The version of the protocol we support */
108 #define VHOST_USER_VERSION    (0x1)
109 
110 static bool ioeventfd_enabled(void)
111 {
112     return kvm_enabled() && kvm_eventfds_enabled();
113 }
114 
115 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
116 {
117     CharDriverState *chr = dev->opaque;
118     uint8_t *p = (uint8_t *) msg;
119     int r, size = VHOST_USER_HDR_SIZE;
120 
121     r = qemu_chr_fe_read_all(chr, p, size);
122     if (r != size) {
123         error_report("Failed to read msg header. Read %d instead of %d."
124                      " Original request %d.", r, size, msg->request);
125         goto fail;
126     }
127 
128     /* validate received flags */
129     if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
130         error_report("Failed to read msg header."
131                 " Flags 0x%x instead of 0x%x.", msg->flags,
132                 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
133         goto fail;
134     }
135 
136     /* validate message size is sane */
137     if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
138         error_report("Failed to read msg header."
139                 " Size %d exceeds the maximum %zu.", msg->size,
140                 VHOST_USER_PAYLOAD_SIZE);
141         goto fail;
142     }
143 
144     if (msg->size) {
145         p += VHOST_USER_HDR_SIZE;
146         size = msg->size;
147         r = qemu_chr_fe_read_all(chr, p, size);
148         if (r != size) {
149             error_report("Failed to read msg payload."
150                          " Read %d instead of %d.", r, msg->size);
151             goto fail;
152         }
153     }
154 
155     return 0;
156 
157 fail:
158     return -1;
159 }
160 
161 static bool vhost_user_one_time_request(VhostUserRequest request)
162 {
163     switch (request) {
164     case VHOST_USER_SET_OWNER:
165     case VHOST_USER_RESET_OWNER:
166     case VHOST_USER_SET_MEM_TABLE:
167     case VHOST_USER_GET_QUEUE_NUM:
168         return true;
169     default:
170         return false;
171     }
172 }
173 
174 /* most non-init callers ignore the error */
175 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
176                             int *fds, int fd_num)
177 {
178     CharDriverState *chr = dev->opaque;
179     int ret, size = VHOST_USER_HDR_SIZE + msg->size;
180 
181     /*
182      * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
183      * we just need send it once in the first time. For later such
184      * request, we just ignore it.
185      */
186     if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) {
187         return 0;
188     }
189 
190     if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
191         error_report("Failed to set msg fds.");
192         return -1;
193     }
194 
195     ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
196     if (ret != size) {
197         error_report("Failed to write msg."
198                      " Wrote %d instead of %d.", ret, size);
199         return -1;
200     }
201 
202     return 0;
203 }
204 
205 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
206                                    struct vhost_log *log)
207 {
208     int fds[VHOST_MEMORY_MAX_NREGIONS];
209     size_t fd_num = 0;
210     bool shmfd = virtio_has_feature(dev->protocol_features,
211                                     VHOST_USER_PROTOCOL_F_LOG_SHMFD);
212     VhostUserMsg msg = {
213         .request = VHOST_USER_SET_LOG_BASE,
214         .flags = VHOST_USER_VERSION,
215         .payload.log.mmap_size = log->size * sizeof(*(log->log)),
216         .payload.log.mmap_offset = 0,
217         .size = sizeof(msg.payload.log),
218     };
219 
220     if (shmfd && log->fd != -1) {
221         fds[fd_num++] = log->fd;
222     }
223 
224     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
225         return -1;
226     }
227 
228     if (shmfd) {
229         msg.size = 0;
230         if (vhost_user_read(dev, &msg) < 0) {
231             return -1;
232         }
233 
234         if (msg.request != VHOST_USER_SET_LOG_BASE) {
235             error_report("Received unexpected msg type. "
236                          "Expected %d received %d",
237                          VHOST_USER_SET_LOG_BASE, msg.request);
238             return -1;
239         }
240     }
241 
242     return 0;
243 }
244 
245 static int vhost_user_set_mem_table(struct vhost_dev *dev,
246                                     struct vhost_memory *mem)
247 {
248     int fds[VHOST_MEMORY_MAX_NREGIONS];
249     int i, fd;
250     size_t fd_num = 0;
251     VhostUserMsg msg = {
252         .request = VHOST_USER_SET_MEM_TABLE,
253         .flags = VHOST_USER_VERSION,
254     };
255 
256     for (i = 0; i < dev->mem->nregions; ++i) {
257         struct vhost_memory_region *reg = dev->mem->regions + i;
258         ram_addr_t offset;
259         MemoryRegion *mr;
260 
261         assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
262         mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
263                                      &offset);
264         fd = memory_region_get_fd(mr);
265         if (fd > 0) {
266             msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
267             msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
268             msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
269             msg.payload.memory.regions[fd_num].mmap_offset = offset;
270             assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
271             fds[fd_num++] = fd;
272         }
273     }
274 
275     msg.payload.memory.nregions = fd_num;
276 
277     if (!fd_num) {
278         error_report("Failed initializing vhost-user memory map, "
279                      "consider using -object memory-backend-file share=on");
280         return -1;
281     }
282 
283     msg.size = sizeof(msg.payload.memory.nregions);
284     msg.size += sizeof(msg.payload.memory.padding);
285     msg.size += fd_num * sizeof(VhostUserMemoryRegion);
286 
287     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
288         return -1;
289     }
290 
291     return 0;
292 }
293 
294 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
295                                      struct vhost_vring_addr *addr)
296 {
297     VhostUserMsg msg = {
298         .request = VHOST_USER_SET_VRING_ADDR,
299         .flags = VHOST_USER_VERSION,
300         .payload.addr = *addr,
301         .size = sizeof(msg.payload.addr),
302     };
303 
304     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
305         return -1;
306     }
307 
308     return 0;
309 }
310 
311 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
312                                        struct vhost_vring_state *ring)
313 {
314     error_report("vhost-user trying to send unhandled ioctl");
315     return -1;
316 }
317 
318 static int vhost_set_vring(struct vhost_dev *dev,
319                            unsigned long int request,
320                            struct vhost_vring_state *ring)
321 {
322     VhostUserMsg msg = {
323         .request = request,
324         .flags = VHOST_USER_VERSION,
325         .payload.state = *ring,
326         .size = sizeof(msg.payload.state),
327     };
328 
329     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
330         return -1;
331     }
332 
333     return 0;
334 }
335 
336 static int vhost_user_set_vring_num(struct vhost_dev *dev,
337                                     struct vhost_vring_state *ring)
338 {
339     return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
340 }
341 
342 static int vhost_user_set_vring_base(struct vhost_dev *dev,
343                                      struct vhost_vring_state *ring)
344 {
345     return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
346 }
347 
348 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
349 {
350     int i;
351 
352     if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
353         return -1;
354     }
355 
356     for (i = 0; i < dev->nvqs; ++i) {
357         struct vhost_vring_state state = {
358             .index = dev->vq_index + i,
359             .num   = enable,
360         };
361 
362         vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
363     }
364 
365     return 0;
366 }
367 
368 static int vhost_user_get_vring_base(struct vhost_dev *dev,
369                                      struct vhost_vring_state *ring)
370 {
371     VhostUserMsg msg = {
372         .request = VHOST_USER_GET_VRING_BASE,
373         .flags = VHOST_USER_VERSION,
374         .payload.state = *ring,
375         .size = sizeof(msg.payload.state),
376     };
377 
378     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
379         return -1;
380     }
381 
382     if (vhost_user_read(dev, &msg) < 0) {
383         return -1;
384     }
385 
386     if (msg.request != VHOST_USER_GET_VRING_BASE) {
387         error_report("Received unexpected msg type. Expected %d received %d",
388                      VHOST_USER_GET_VRING_BASE, msg.request);
389         return -1;
390     }
391 
392     if (msg.size != sizeof(msg.payload.state)) {
393         error_report("Received bad msg size.");
394         return -1;
395     }
396 
397     *ring = msg.payload.state;
398 
399     return 0;
400 }
401 
402 static int vhost_set_vring_file(struct vhost_dev *dev,
403                                 VhostUserRequest request,
404                                 struct vhost_vring_file *file)
405 {
406     int fds[VHOST_MEMORY_MAX_NREGIONS];
407     size_t fd_num = 0;
408     VhostUserMsg msg = {
409         .request = request,
410         .flags = VHOST_USER_VERSION,
411         .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
412         .size = sizeof(msg.payload.u64),
413     };
414 
415     if (ioeventfd_enabled() && file->fd > 0) {
416         fds[fd_num++] = file->fd;
417     } else {
418         msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
419     }
420 
421     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
422         return -1;
423     }
424 
425     return 0;
426 }
427 
428 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
429                                      struct vhost_vring_file *file)
430 {
431     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
432 }
433 
434 static int vhost_user_set_vring_call(struct vhost_dev *dev,
435                                      struct vhost_vring_file *file)
436 {
437     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
438 }
439 
440 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
441 {
442     VhostUserMsg msg = {
443         .request = request,
444         .flags = VHOST_USER_VERSION,
445         .payload.u64 = u64,
446         .size = sizeof(msg.payload.u64),
447     };
448 
449     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
450         return -1;
451     }
452 
453     return 0;
454 }
455 
456 static int vhost_user_set_features(struct vhost_dev *dev,
457                                    uint64_t features)
458 {
459     return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
460 }
461 
462 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
463                                             uint64_t features)
464 {
465     return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
466 }
467 
468 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
469 {
470     VhostUserMsg msg = {
471         .request = request,
472         .flags = VHOST_USER_VERSION,
473     };
474 
475     if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
476         return 0;
477     }
478 
479     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
480         return -1;
481     }
482 
483     if (vhost_user_read(dev, &msg) < 0) {
484         return -1;
485     }
486 
487     if (msg.request != request) {
488         error_report("Received unexpected msg type. Expected %d received %d",
489                      request, msg.request);
490         return -1;
491     }
492 
493     if (msg.size != sizeof(msg.payload.u64)) {
494         error_report("Received bad msg size.");
495         return -1;
496     }
497 
498     *u64 = msg.payload.u64;
499 
500     return 0;
501 }
502 
503 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
504 {
505     return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
506 }
507 
508 static int vhost_user_set_owner(struct vhost_dev *dev)
509 {
510     VhostUserMsg msg = {
511         .request = VHOST_USER_SET_OWNER,
512         .flags = VHOST_USER_VERSION,
513     };
514 
515     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
516         return -1;
517     }
518 
519     return 0;
520 }
521 
522 static int vhost_user_reset_device(struct vhost_dev *dev)
523 {
524     VhostUserMsg msg = {
525         .request = VHOST_USER_RESET_OWNER,
526         .flags = VHOST_USER_VERSION,
527     };
528 
529     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
530         return -1;
531     }
532 
533     return 0;
534 }
535 
536 static int vhost_user_init(struct vhost_dev *dev, void *opaque)
537 {
538     uint64_t features;
539     int err;
540 
541     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
542 
543     dev->opaque = opaque;
544 
545     err = vhost_user_get_features(dev, &features);
546     if (err < 0) {
547         return err;
548     }
549 
550     if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
551         dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
552 
553         err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
554                                  &features);
555         if (err < 0) {
556             return err;
557         }
558 
559         dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
560         err = vhost_user_set_protocol_features(dev, dev->protocol_features);
561         if (err < 0) {
562             return err;
563         }
564 
565         /* query the max queues we support if backend supports Multiple Queue */
566         if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
567             err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
568                                      &dev->max_queues);
569             if (err < 0) {
570                 return err;
571             }
572         }
573     }
574 
575     if (dev->migration_blocker == NULL &&
576         !virtio_has_feature(dev->protocol_features,
577                             VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
578         error_setg(&dev->migration_blocker,
579                    "Migration disabled: vhost-user backend lacks "
580                    "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
581     }
582 
583     return 0;
584 }
585 
586 static int vhost_user_cleanup(struct vhost_dev *dev)
587 {
588     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
589 
590     dev->opaque = 0;
591 
592     return 0;
593 }
594 
595 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
596 {
597     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
598 
599     return idx;
600 }
601 
602 static int vhost_user_memslots_limit(struct vhost_dev *dev)
603 {
604     return VHOST_MEMORY_MAX_NREGIONS;
605 }
606 
607 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
608 {
609     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
610 
611     return virtio_has_feature(dev->protocol_features,
612                               VHOST_USER_PROTOCOL_F_LOG_SHMFD);
613 }
614 
615 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
616 {
617     VhostUserMsg msg = { 0 };
618 
619     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
620 
621     /* If guest supports GUEST_ANNOUNCE do nothing */
622     if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
623         return 0;
624     }
625 
626     /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
627     if (virtio_has_feature(dev->protocol_features,
628                            VHOST_USER_PROTOCOL_F_RARP)) {
629         msg.request = VHOST_USER_SEND_RARP;
630         msg.flags = VHOST_USER_VERSION;
631         memcpy((char *)&msg.payload.u64, mac_addr, 6);
632         msg.size = sizeof(msg.payload.u64);
633 
634         return vhost_user_write(dev, &msg, NULL, 0);
635     }
636     return -1;
637 }
638 
639 static bool vhost_user_can_merge(struct vhost_dev *dev,
640                                  uint64_t start1, uint64_t size1,
641                                  uint64_t start2, uint64_t size2)
642 {
643     ram_addr_t offset;
644     int mfd, rfd;
645     MemoryRegion *mr;
646 
647     mr = memory_region_from_host((void *)(uintptr_t)start1, &offset);
648     mfd = memory_region_get_fd(mr);
649 
650     mr = memory_region_from_host((void *)(uintptr_t)start2, &offset);
651     rfd = memory_region_get_fd(mr);
652 
653     return mfd == rfd;
654 }
655 
656 const VhostOps user_ops = {
657         .backend_type = VHOST_BACKEND_TYPE_USER,
658         .vhost_backend_init = vhost_user_init,
659         .vhost_backend_cleanup = vhost_user_cleanup,
660         .vhost_backend_memslots_limit = vhost_user_memslots_limit,
661         .vhost_set_log_base = vhost_user_set_log_base,
662         .vhost_set_mem_table = vhost_user_set_mem_table,
663         .vhost_set_vring_addr = vhost_user_set_vring_addr,
664         .vhost_set_vring_endian = vhost_user_set_vring_endian,
665         .vhost_set_vring_num = vhost_user_set_vring_num,
666         .vhost_set_vring_base = vhost_user_set_vring_base,
667         .vhost_get_vring_base = vhost_user_get_vring_base,
668         .vhost_set_vring_kick = vhost_user_set_vring_kick,
669         .vhost_set_vring_call = vhost_user_set_vring_call,
670         .vhost_set_features = vhost_user_set_features,
671         .vhost_get_features = vhost_user_get_features,
672         .vhost_set_owner = vhost_user_set_owner,
673         .vhost_reset_device = vhost_user_reset_device,
674         .vhost_get_vq_index = vhost_user_get_vq_index,
675         .vhost_set_vring_enable = vhost_user_set_vring_enable,
676         .vhost_requires_shm_log = vhost_user_requires_shm_log,
677         .vhost_migration_done = vhost_user_migration_done,
678         .vhost_backend_can_merge = vhost_user_can_merge,
679 };
680