xref: /openbmc/qemu/hw/virtio/vhost-user.c (revision d1f8b30ec8dde0318fd1b98d24a64926feae9625)
1 /*
2  * vhost-user
3  *
4  * Copyright (c) 2013 Virtual Open Systems Sarl.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  */
10 
11 #include "hw/virtio/vhost.h"
12 #include "hw/virtio/vhost-backend.h"
13 #include "sysemu/char.h"
14 #include "sysemu/kvm.h"
15 #include "qemu/error-report.h"
16 #include "qemu/sockets.h"
17 #include "exec/ram_addr.h"
18 
19 #include <fcntl.h>
20 #include <unistd.h>
21 #include <sys/ioctl.h>
22 #include <sys/socket.h>
23 #include <sys/un.h>
24 #include <linux/vhost.h>
25 
26 #define VHOST_MEMORY_MAX_NREGIONS    8
27 #define VHOST_USER_F_PROTOCOL_FEATURES 30
28 #define VHOST_USER_PROTOCOL_FEATURE_MASK 0x0ULL
29 
30 typedef enum VhostUserRequest {
31     VHOST_USER_NONE = 0,
32     VHOST_USER_GET_FEATURES = 1,
33     VHOST_USER_SET_FEATURES = 2,
34     VHOST_USER_SET_OWNER = 3,
35     VHOST_USER_RESET_DEVICE = 4,
36     VHOST_USER_SET_MEM_TABLE = 5,
37     VHOST_USER_SET_LOG_BASE = 6,
38     VHOST_USER_SET_LOG_FD = 7,
39     VHOST_USER_SET_VRING_NUM = 8,
40     VHOST_USER_SET_VRING_ADDR = 9,
41     VHOST_USER_SET_VRING_BASE = 10,
42     VHOST_USER_GET_VRING_BASE = 11,
43     VHOST_USER_SET_VRING_KICK = 12,
44     VHOST_USER_SET_VRING_CALL = 13,
45     VHOST_USER_SET_VRING_ERR = 14,
46     VHOST_USER_GET_PROTOCOL_FEATURES = 15,
47     VHOST_USER_SET_PROTOCOL_FEATURES = 16,
48     VHOST_USER_MAX
49 } VhostUserRequest;
50 
51 typedef struct VhostUserMemoryRegion {
52     uint64_t guest_phys_addr;
53     uint64_t memory_size;
54     uint64_t userspace_addr;
55     uint64_t mmap_offset;
56 } VhostUserMemoryRegion;
57 
58 typedef struct VhostUserMemory {
59     uint32_t nregions;
60     uint32_t padding;
61     VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
62 } VhostUserMemory;
63 
64 typedef struct VhostUserMsg {
65     VhostUserRequest request;
66 
67 #define VHOST_USER_VERSION_MASK     (0x3)
68 #define VHOST_USER_REPLY_MASK       (0x1<<2)
69     uint32_t flags;
70     uint32_t size; /* the following payload size */
71     union {
72 #define VHOST_USER_VRING_IDX_MASK   (0xff)
73 #define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
74         uint64_t u64;
75         struct vhost_vring_state state;
76         struct vhost_vring_addr addr;
77         VhostUserMemory memory;
78     };
79 } QEMU_PACKED VhostUserMsg;
80 
81 static VhostUserMsg m __attribute__ ((unused));
82 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \
83                             + sizeof(m.flags) \
84                             + sizeof(m.size))
85 
86 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
87 
88 /* The version of the protocol we support */
89 #define VHOST_USER_VERSION    (0x1)
90 
91 static bool ioeventfd_enabled(void)
92 {
93     return kvm_enabled() && kvm_eventfds_enabled();
94 }
95 
96 static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = {
97     -1,                     /* VHOST_USER_NONE */
98     VHOST_GET_FEATURES,     /* VHOST_USER_GET_FEATURES */
99     VHOST_SET_FEATURES,     /* VHOST_USER_SET_FEATURES */
100     VHOST_SET_OWNER,        /* VHOST_USER_SET_OWNER */
101     VHOST_RESET_DEVICE,      /* VHOST_USER_RESET_DEVICE */
102     VHOST_SET_MEM_TABLE,    /* VHOST_USER_SET_MEM_TABLE */
103     VHOST_SET_LOG_BASE,     /* VHOST_USER_SET_LOG_BASE */
104     VHOST_SET_LOG_FD,       /* VHOST_USER_SET_LOG_FD */
105     VHOST_SET_VRING_NUM,    /* VHOST_USER_SET_VRING_NUM */
106     VHOST_SET_VRING_ADDR,   /* VHOST_USER_SET_VRING_ADDR */
107     VHOST_SET_VRING_BASE,   /* VHOST_USER_SET_VRING_BASE */
108     VHOST_GET_VRING_BASE,   /* VHOST_USER_GET_VRING_BASE */
109     VHOST_SET_VRING_KICK,   /* VHOST_USER_SET_VRING_KICK */
110     VHOST_SET_VRING_CALL,   /* VHOST_USER_SET_VRING_CALL */
111     VHOST_SET_VRING_ERR     /* VHOST_USER_SET_VRING_ERR */
112 };
113 
114 static VhostUserRequest vhost_user_request_translate(unsigned long int request)
115 {
116     VhostUserRequest idx;
117 
118     for (idx = 0; idx < VHOST_USER_MAX; idx++) {
119         if (ioctl_to_vhost_user_request[idx] == request) {
120             break;
121         }
122     }
123 
124     return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx;
125 }
126 
127 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
128 {
129     CharDriverState *chr = dev->opaque;
130     uint8_t *p = (uint8_t *) msg;
131     int r, size = VHOST_USER_HDR_SIZE;
132 
133     r = qemu_chr_fe_read_all(chr, p, size);
134     if (r != size) {
135         error_report("Failed to read msg header. Read %d instead of %d.", r,
136                 size);
137         goto fail;
138     }
139 
140     /* validate received flags */
141     if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
142         error_report("Failed to read msg header."
143                 " Flags 0x%x instead of 0x%x.", msg->flags,
144                 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
145         goto fail;
146     }
147 
148     /* validate message size is sane */
149     if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
150         error_report("Failed to read msg header."
151                 " Size %d exceeds the maximum %zu.", msg->size,
152                 VHOST_USER_PAYLOAD_SIZE);
153         goto fail;
154     }
155 
156     if (msg->size) {
157         p += VHOST_USER_HDR_SIZE;
158         size = msg->size;
159         r = qemu_chr_fe_read_all(chr, p, size);
160         if (r != size) {
161             error_report("Failed to read msg payload."
162                          " Read %d instead of %d.", r, msg->size);
163             goto fail;
164         }
165     }
166 
167     return 0;
168 
169 fail:
170     return -1;
171 }
172 
173 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
174                             int *fds, int fd_num)
175 {
176     CharDriverState *chr = dev->opaque;
177     int size = VHOST_USER_HDR_SIZE + msg->size;
178 
179     if (fd_num) {
180         qemu_chr_fe_set_msgfds(chr, fds, fd_num);
181     }
182 
183     return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ?
184             0 : -1;
185 }
186 
187 static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
188         void *arg)
189 {
190     VhostUserMsg msg;
191     VhostUserRequest msg_request;
192     struct vhost_vring_file *file = 0;
193     int need_reply = 0;
194     int fds[VHOST_MEMORY_MAX_NREGIONS];
195     int i, fd;
196     size_t fd_num = 0;
197 
198     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
199 
200     /* only translate vhost ioctl requests */
201     if (request > VHOST_USER_MAX) {
202         msg_request = vhost_user_request_translate(request);
203     } else {
204         msg_request = request;
205     }
206 
207     msg.request = msg_request;
208     msg.flags = VHOST_USER_VERSION;
209     msg.size = 0;
210 
211     switch (msg_request) {
212     case VHOST_USER_GET_FEATURES:
213     case VHOST_USER_GET_PROTOCOL_FEATURES:
214         need_reply = 1;
215         break;
216 
217     case VHOST_USER_SET_FEATURES:
218     case VHOST_USER_SET_LOG_BASE:
219     case VHOST_USER_SET_PROTOCOL_FEATURES:
220         msg.u64 = *((__u64 *) arg);
221         msg.size = sizeof(m.u64);
222         break;
223 
224     case VHOST_USER_SET_OWNER:
225     case VHOST_USER_RESET_DEVICE:
226         break;
227 
228     case VHOST_USER_SET_MEM_TABLE:
229         for (i = 0; i < dev->mem->nregions; ++i) {
230             struct vhost_memory_region *reg = dev->mem->regions + i;
231             ram_addr_t ram_addr;
232 
233             assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
234             qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, &ram_addr);
235             fd = qemu_get_ram_fd(ram_addr);
236             if (fd > 0) {
237                 msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
238                 msg.memory.regions[fd_num].memory_size  = reg->memory_size;
239                 msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
240                 msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
241                     (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
242                 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
243                 fds[fd_num++] = fd;
244             }
245         }
246 
247         msg.memory.nregions = fd_num;
248 
249         if (!fd_num) {
250             error_report("Failed initializing vhost-user memory map, "
251                     "consider using -object memory-backend-file share=on");
252             return -1;
253         }
254 
255         msg.size = sizeof(m.memory.nregions);
256         msg.size += sizeof(m.memory.padding);
257         msg.size += fd_num * sizeof(VhostUserMemoryRegion);
258 
259         break;
260 
261     case VHOST_USER_SET_LOG_FD:
262         fds[fd_num++] = *((int *) arg);
263         break;
264 
265     case VHOST_USER_SET_VRING_NUM:
266     case VHOST_USER_SET_VRING_BASE:
267         memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
268         msg.size = sizeof(m.state);
269         break;
270 
271     case VHOST_USER_GET_VRING_BASE:
272         memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
273         msg.size = sizeof(m.state);
274         need_reply = 1;
275         break;
276 
277     case VHOST_USER_SET_VRING_ADDR:
278         memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));
279         msg.size = sizeof(m.addr);
280         break;
281 
282     case VHOST_USER_SET_VRING_KICK:
283     case VHOST_USER_SET_VRING_CALL:
284     case VHOST_USER_SET_VRING_ERR:
285         file = arg;
286         msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
287         msg.size = sizeof(m.u64);
288         if (ioeventfd_enabled() && file->fd > 0) {
289             fds[fd_num++] = file->fd;
290         } else {
291             msg.u64 |= VHOST_USER_VRING_NOFD_MASK;
292         }
293         break;
294     default:
295         error_report("vhost-user trying to send unhandled ioctl");
296         return -1;
297         break;
298     }
299 
300     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
301         return 0;
302     }
303 
304     if (need_reply) {
305         if (vhost_user_read(dev, &msg) < 0) {
306             return 0;
307         }
308 
309         if (msg_request != msg.request) {
310             error_report("Received unexpected msg type."
311                     " Expected %d received %d", msg_request, msg.request);
312             return -1;
313         }
314 
315         switch (msg_request) {
316         case VHOST_USER_GET_FEATURES:
317         case VHOST_USER_GET_PROTOCOL_FEATURES:
318             if (msg.size != sizeof(m.u64)) {
319                 error_report("Received bad msg size.");
320                 return -1;
321             }
322             *((__u64 *) arg) = msg.u64;
323             break;
324         case VHOST_USER_GET_VRING_BASE:
325             if (msg.size != sizeof(m.state)) {
326                 error_report("Received bad msg size.");
327                 return -1;
328             }
329             memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));
330             break;
331         default:
332             error_report("Received unexpected msg type.");
333             return -1;
334             break;
335         }
336     }
337 
338     return 0;
339 }
340 
341 static int vhost_user_init(struct vhost_dev *dev, void *opaque)
342 {
343     unsigned long long features;
344     int err;
345 
346     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
347 
348     dev->opaque = opaque;
349 
350     err = vhost_user_call(dev, VHOST_USER_GET_FEATURES, &features);
351     if (err < 0) {
352         return err;
353     }
354 
355     if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
356         dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
357 
358         err = vhost_user_call(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &features);
359         if (err < 0) {
360             return err;
361         }
362 
363         dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
364         err = vhost_user_call(dev, VHOST_USER_SET_PROTOCOL_FEATURES,
365                               &dev->protocol_features);
366         if (err < 0) {
367             return err;
368         }
369     }
370 
371     return 0;
372 }
373 
374 static int vhost_user_cleanup(struct vhost_dev *dev)
375 {
376     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
377 
378     dev->opaque = 0;
379 
380     return 0;
381 }
382 
383 const VhostOps user_ops = {
384         .backend_type = VHOST_BACKEND_TYPE_USER,
385         .vhost_call = vhost_user_call,
386         .vhost_backend_init = vhost_user_init,
387         .vhost_backend_cleanup = vhost_user_cleanup
388         };
389