xref: /openbmc/qemu/hw/virtio/vhost-user.c (revision 2ce68e4c)
1 /*
2  * vhost-user
3  *
4  * Copyright (c) 2013 Virtual Open Systems Sarl.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  */
10 
11 #include "hw/virtio/vhost.h"
12 #include "hw/virtio/vhost-backend.h"
13 #include "sysemu/char.h"
14 #include "sysemu/kvm.h"
15 #include "qemu/error-report.h"
16 #include "qemu/sockets.h"
17 #include "exec/ram_addr.h"
18 
19 #include <fcntl.h>
20 #include <unistd.h>
21 #include <sys/ioctl.h>
22 #include <sys/socket.h>
23 #include <sys/un.h>
24 #include <linux/vhost.h>
25 
26 #define VHOST_MEMORY_MAX_NREGIONS    8
27 #define VHOST_USER_F_PROTOCOL_FEATURES 30
28 #define VHOST_USER_PROTOCOL_FEATURE_MASK 0x1ULL
29 
30 #define VHOST_USER_PROTOCOL_F_MQ    0
31 
32 typedef enum VhostUserRequest {
33     VHOST_USER_NONE = 0,
34     VHOST_USER_GET_FEATURES = 1,
35     VHOST_USER_SET_FEATURES = 2,
36     VHOST_USER_SET_OWNER = 3,
37     VHOST_USER_RESET_DEVICE = 4,
38     VHOST_USER_SET_MEM_TABLE = 5,
39     VHOST_USER_SET_LOG_BASE = 6,
40     VHOST_USER_SET_LOG_FD = 7,
41     VHOST_USER_SET_VRING_NUM = 8,
42     VHOST_USER_SET_VRING_ADDR = 9,
43     VHOST_USER_SET_VRING_BASE = 10,
44     VHOST_USER_GET_VRING_BASE = 11,
45     VHOST_USER_SET_VRING_KICK = 12,
46     VHOST_USER_SET_VRING_CALL = 13,
47     VHOST_USER_SET_VRING_ERR = 14,
48     VHOST_USER_GET_PROTOCOL_FEATURES = 15,
49     VHOST_USER_SET_PROTOCOL_FEATURES = 16,
50     VHOST_USER_GET_QUEUE_NUM = 17,
51     VHOST_USER_SET_VRING_ENABLE = 18,
52     VHOST_USER_MAX
53 } VhostUserRequest;
54 
55 typedef struct VhostUserMemoryRegion {
56     uint64_t guest_phys_addr;
57     uint64_t memory_size;
58     uint64_t userspace_addr;
59     uint64_t mmap_offset;
60 } VhostUserMemoryRegion;
61 
62 typedef struct VhostUserMemory {
63     uint32_t nregions;
64     uint32_t padding;
65     VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
66 } VhostUserMemory;
67 
68 typedef struct VhostUserMsg {
69     VhostUserRequest request;
70 
71 #define VHOST_USER_VERSION_MASK     (0x3)
72 #define VHOST_USER_REPLY_MASK       (0x1<<2)
73     uint32_t flags;
74     uint32_t size; /* the following payload size */
75     union {
76 #define VHOST_USER_VRING_IDX_MASK   (0xff)
77 #define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
78         uint64_t u64;
79         struct vhost_vring_state state;
80         struct vhost_vring_addr addr;
81         VhostUserMemory memory;
82     };
83 } QEMU_PACKED VhostUserMsg;
84 
85 static VhostUserMsg m __attribute__ ((unused));
86 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \
87                             + sizeof(m.flags) \
88                             + sizeof(m.size))
89 
90 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
91 
92 /* The version of the protocol we support */
93 #define VHOST_USER_VERSION    (0x1)
94 
95 static bool ioeventfd_enabled(void)
96 {
97     return kvm_enabled() && kvm_eventfds_enabled();
98 }
99 
100 static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = {
101     -1,                     /* VHOST_USER_NONE */
102     VHOST_GET_FEATURES,     /* VHOST_USER_GET_FEATURES */
103     VHOST_SET_FEATURES,     /* VHOST_USER_SET_FEATURES */
104     VHOST_SET_OWNER,        /* VHOST_USER_SET_OWNER */
105     VHOST_RESET_DEVICE,      /* VHOST_USER_RESET_DEVICE */
106     VHOST_SET_MEM_TABLE,    /* VHOST_USER_SET_MEM_TABLE */
107     VHOST_SET_LOG_BASE,     /* VHOST_USER_SET_LOG_BASE */
108     VHOST_SET_LOG_FD,       /* VHOST_USER_SET_LOG_FD */
109     VHOST_SET_VRING_NUM,    /* VHOST_USER_SET_VRING_NUM */
110     VHOST_SET_VRING_ADDR,   /* VHOST_USER_SET_VRING_ADDR */
111     VHOST_SET_VRING_BASE,   /* VHOST_USER_SET_VRING_BASE */
112     VHOST_GET_VRING_BASE,   /* VHOST_USER_GET_VRING_BASE */
113     VHOST_SET_VRING_KICK,   /* VHOST_USER_SET_VRING_KICK */
114     VHOST_SET_VRING_CALL,   /* VHOST_USER_SET_VRING_CALL */
115     VHOST_SET_VRING_ERR     /* VHOST_USER_SET_VRING_ERR */
116 };
117 
118 static VhostUserRequest vhost_user_request_translate(unsigned long int request)
119 {
120     VhostUserRequest idx;
121 
122     for (idx = 0; idx < VHOST_USER_MAX; idx++) {
123         if (ioctl_to_vhost_user_request[idx] == request) {
124             break;
125         }
126     }
127 
128     return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx;
129 }
130 
131 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
132 {
133     CharDriverState *chr = dev->opaque;
134     uint8_t *p = (uint8_t *) msg;
135     int r, size = VHOST_USER_HDR_SIZE;
136 
137     r = qemu_chr_fe_read_all(chr, p, size);
138     if (r != size) {
139         error_report("Failed to read msg header. Read %d instead of %d.", r,
140                 size);
141         goto fail;
142     }
143 
144     /* validate received flags */
145     if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
146         error_report("Failed to read msg header."
147                 " Flags 0x%x instead of 0x%x.", msg->flags,
148                 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
149         goto fail;
150     }
151 
152     /* validate message size is sane */
153     if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
154         error_report("Failed to read msg header."
155                 " Size %d exceeds the maximum %zu.", msg->size,
156                 VHOST_USER_PAYLOAD_SIZE);
157         goto fail;
158     }
159 
160     if (msg->size) {
161         p += VHOST_USER_HDR_SIZE;
162         size = msg->size;
163         r = qemu_chr_fe_read_all(chr, p, size);
164         if (r != size) {
165             error_report("Failed to read msg payload."
166                          " Read %d instead of %d.", r, msg->size);
167             goto fail;
168         }
169     }
170 
171     return 0;
172 
173 fail:
174     return -1;
175 }
176 
177 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
178                             int *fds, int fd_num)
179 {
180     CharDriverState *chr = dev->opaque;
181     int size = VHOST_USER_HDR_SIZE + msg->size;
182 
183     if (fd_num) {
184         qemu_chr_fe_set_msgfds(chr, fds, fd_num);
185     }
186 
187     return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ?
188             0 : -1;
189 }
190 
191 static bool vhost_user_one_time_request(VhostUserRequest request)
192 {
193     switch (request) {
194     case VHOST_USER_SET_OWNER:
195     case VHOST_USER_RESET_DEVICE:
196     case VHOST_USER_SET_MEM_TABLE:
197     case VHOST_USER_GET_QUEUE_NUM:
198         return true;
199     default:
200         return false;
201     }
202 }
203 
204 static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
205         void *arg)
206 {
207     VhostUserMsg msg;
208     VhostUserRequest msg_request;
209     struct vhost_vring_file *file = 0;
210     int need_reply = 0;
211     int fds[VHOST_MEMORY_MAX_NREGIONS];
212     int i, fd;
213     size_t fd_num = 0;
214 
215     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
216 
217     /* only translate vhost ioctl requests */
218     if (request > VHOST_USER_MAX) {
219         msg_request = vhost_user_request_translate(request);
220     } else {
221         msg_request = request;
222     }
223 
224     /*
225      * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
226      * we just need send it once in the first time. For later such
227      * request, we just ignore it.
228      */
229     if (vhost_user_one_time_request(msg_request) && dev->vq_index != 0) {
230         return 0;
231     }
232 
233     msg.request = msg_request;
234     msg.flags = VHOST_USER_VERSION;
235     msg.size = 0;
236 
237     switch (msg_request) {
238     case VHOST_USER_GET_FEATURES:
239     case VHOST_USER_GET_PROTOCOL_FEATURES:
240     case VHOST_USER_GET_QUEUE_NUM:
241         need_reply = 1;
242         break;
243 
244     case VHOST_USER_SET_FEATURES:
245     case VHOST_USER_SET_LOG_BASE:
246     case VHOST_USER_SET_PROTOCOL_FEATURES:
247         msg.u64 = *((__u64 *) arg);
248         msg.size = sizeof(m.u64);
249         break;
250 
251     case VHOST_USER_SET_OWNER:
252     case VHOST_USER_RESET_DEVICE:
253         break;
254 
255     case VHOST_USER_SET_MEM_TABLE:
256         for (i = 0; i < dev->mem->nregions; ++i) {
257             struct vhost_memory_region *reg = dev->mem->regions + i;
258             ram_addr_t ram_addr;
259 
260             assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
261             qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, &ram_addr);
262             fd = qemu_get_ram_fd(ram_addr);
263             if (fd > 0) {
264                 msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
265                 msg.memory.regions[fd_num].memory_size  = reg->memory_size;
266                 msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
267                 msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
268                     (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
269                 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
270                 fds[fd_num++] = fd;
271             }
272         }
273 
274         msg.memory.nregions = fd_num;
275 
276         if (!fd_num) {
277             error_report("Failed initializing vhost-user memory map, "
278                     "consider using -object memory-backend-file share=on");
279             return -1;
280         }
281 
282         msg.size = sizeof(m.memory.nregions);
283         msg.size += sizeof(m.memory.padding);
284         msg.size += fd_num * sizeof(VhostUserMemoryRegion);
285 
286         break;
287 
288     case VHOST_USER_SET_LOG_FD:
289         fds[fd_num++] = *((int *) arg);
290         break;
291 
292     case VHOST_USER_SET_VRING_NUM:
293     case VHOST_USER_SET_VRING_BASE:
294     case VHOST_USER_SET_VRING_ENABLE:
295         memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
296         msg.size = sizeof(m.state);
297         break;
298 
299     case VHOST_USER_GET_VRING_BASE:
300         memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
301         msg.size = sizeof(m.state);
302         need_reply = 1;
303         break;
304 
305     case VHOST_USER_SET_VRING_ADDR:
306         memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));
307         msg.size = sizeof(m.addr);
308         break;
309 
310     case VHOST_USER_SET_VRING_KICK:
311     case VHOST_USER_SET_VRING_CALL:
312     case VHOST_USER_SET_VRING_ERR:
313         file = arg;
314         msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
315         msg.size = sizeof(m.u64);
316         if (ioeventfd_enabled() && file->fd > 0) {
317             fds[fd_num++] = file->fd;
318         } else {
319             msg.u64 |= VHOST_USER_VRING_NOFD_MASK;
320         }
321         break;
322     default:
323         error_report("vhost-user trying to send unhandled ioctl");
324         return -1;
325         break;
326     }
327 
328     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
329         return 0;
330     }
331 
332     if (need_reply) {
333         if (vhost_user_read(dev, &msg) < 0) {
334             return 0;
335         }
336 
337         if (msg_request != msg.request) {
338             error_report("Received unexpected msg type."
339                     " Expected %d received %d", msg_request, msg.request);
340             return -1;
341         }
342 
343         switch (msg_request) {
344         case VHOST_USER_GET_FEATURES:
345         case VHOST_USER_GET_PROTOCOL_FEATURES:
346         case VHOST_USER_GET_QUEUE_NUM:
347             if (msg.size != sizeof(m.u64)) {
348                 error_report("Received bad msg size.");
349                 return -1;
350             }
351             *((__u64 *) arg) = msg.u64;
352             break;
353         case VHOST_USER_GET_VRING_BASE:
354             if (msg.size != sizeof(m.state)) {
355                 error_report("Received bad msg size.");
356                 return -1;
357             }
358             memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));
359             break;
360         default:
361             error_report("Received unexpected msg type.");
362             return -1;
363             break;
364         }
365     }
366 
367     return 0;
368 }
369 
370 static int vhost_user_init(struct vhost_dev *dev, void *opaque)
371 {
372     unsigned long long features;
373     int err;
374 
375     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
376 
377     dev->opaque = opaque;
378 
379     err = vhost_user_call(dev, VHOST_USER_GET_FEATURES, &features);
380     if (err < 0) {
381         return err;
382     }
383 
384     if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
385         dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
386 
387         err = vhost_user_call(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &features);
388         if (err < 0) {
389             return err;
390         }
391 
392         dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
393         err = vhost_user_call(dev, VHOST_USER_SET_PROTOCOL_FEATURES,
394                               &dev->protocol_features);
395         if (err < 0) {
396             return err;
397         }
398 
399         /* query the max queues we support if backend supports Multiple Queue */
400         if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
401             err = vhost_user_call(dev, VHOST_USER_GET_QUEUE_NUM, &dev->max_queues);
402             if (err < 0) {
403                 return err;
404             }
405         }
406     }
407 
408     return 0;
409 }
410 
411 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
412 {
413     struct vhost_vring_state state = {
414         .index = dev->vq_index,
415         .num   = enable,
416     };
417 
418     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
419 
420     if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) {
421         return -1;
422     }
423 
424     return vhost_user_call(dev, VHOST_USER_SET_VRING_ENABLE, &state);
425 }
426 
427 static int vhost_user_cleanup(struct vhost_dev *dev)
428 {
429     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
430 
431     dev->opaque = 0;
432 
433     return 0;
434 }
435 
436 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
437 {
438     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
439 
440     return idx;
441 }
442 
443 static int vhost_user_memslots_limit(struct vhost_dev *dev)
444 {
445     return VHOST_MEMORY_MAX_NREGIONS;
446 }
447 
448 const VhostOps user_ops = {
449         .backend_type = VHOST_BACKEND_TYPE_USER,
450         .vhost_call = vhost_user_call,
451         .vhost_backend_init = vhost_user_init,
452         .vhost_backend_cleanup = vhost_user_cleanup,
453         .vhost_backend_get_vq_index = vhost_user_get_vq_index,
454         .vhost_backend_set_vring_enable = vhost_user_set_vring_enable,
455         .vhost_backend_memslots_limit = vhost_user_memslots_limit,
456 };
457