xref: /openbmc/qemu/net/vhost-user.c (revision 4ece9b61c90ce8cd5edb28ce1ba9e14992382fb0)
1 /*
2  * vhost-user.c
3  *
4  * Copyright (c) 2013 Virtual Open Systems Sarl.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  */
10 
11 #include "qemu/osdep.h"
12 #include "clients.h"
13 #include "net/vhost_net.h"
14 #include "hw/virtio/vhost.h"
15 #include "hw/virtio/vhost-user.h"
16 #include "standard-headers/linux/virtio_net.h"
17 #include "chardev/char-fe.h"
18 #include "qapi/error.h"
19 #include "qapi/qapi-commands-net.h"
20 #include "qapi/qapi-events-net.h"
21 #include "qemu/config-file.h"
22 #include "qemu/error-report.h"
23 #include "qemu/option.h"
24 #include "trace.h"
25 
26 static const int user_feature_bits[] = {
27     VIRTIO_F_NOTIFY_ON_EMPTY,
28     VIRTIO_F_NOTIFICATION_DATA,
29     VIRTIO_RING_F_INDIRECT_DESC,
30     VIRTIO_RING_F_EVENT_IDX,
31 
32     VIRTIO_F_ANY_LAYOUT,
33     VIRTIO_F_VERSION_1,
34     VIRTIO_NET_F_CSUM,
35     VIRTIO_NET_F_GUEST_CSUM,
36     VIRTIO_NET_F_GSO,
37     VIRTIO_NET_F_GUEST_TSO4,
38     VIRTIO_NET_F_GUEST_TSO6,
39     VIRTIO_NET_F_GUEST_ECN,
40     VIRTIO_NET_F_GUEST_UFO,
41     VIRTIO_NET_F_HOST_TSO4,
42     VIRTIO_NET_F_HOST_TSO6,
43     VIRTIO_NET_F_HOST_ECN,
44     VIRTIO_NET_F_HOST_UFO,
45     VIRTIO_NET_F_MRG_RXBUF,
46     VIRTIO_NET_F_MTU,
47     VIRTIO_F_IOMMU_PLATFORM,
48     VIRTIO_F_RING_PACKED,
49     VIRTIO_F_RING_RESET,
50     VIRTIO_F_IN_ORDER,
51     VIRTIO_NET_F_RSS,
52     VIRTIO_NET_F_RSC_EXT,
53     VIRTIO_NET_F_HASH_REPORT,
54     VIRTIO_NET_F_GUEST_USO4,
55     VIRTIO_NET_F_GUEST_USO6,
56     VIRTIO_NET_F_HOST_USO,
57 
58     /* This bit implies RARP isn't sent by QEMU out of band */
59     VIRTIO_NET_F_GUEST_ANNOUNCE,
60 
61     VIRTIO_NET_F_MQ,
62 
63     VHOST_INVALID_FEATURE_BIT
64 };
65 
66 typedef struct NetVhostUserState {
67     NetClientState nc;
68     CharBackend chr; /* only queue index 0 */
69     VhostUserState *vhost_user;
70     VHostNetState *vhost_net;
71     guint watch;
72     uint64_t acked_features;
73     bool started;
74 } NetVhostUserState;
75 
76 static struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc)
77 {
78     NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
79     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
80     return s->vhost_net;
81 }
82 
83 static uint64_t vhost_user_get_acked_features(NetClientState *nc)
84 {
85     NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
86     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
87     return s->acked_features;
88 }
89 
90 static void vhost_user_save_acked_features(NetClientState *nc)
91 {
92     NetVhostUserState *s;
93 
94     s = DO_UPCAST(NetVhostUserState, nc, nc);
95     if (s->vhost_net) {
96         uint64_t features = vhost_net_get_acked_features(s->vhost_net);
97         if (features) {
98             s->acked_features = features;
99         }
100     }
101 }
102 
103 static void vhost_user_stop(int queues, NetClientState *ncs[])
104 {
105     int i;
106     NetVhostUserState *s;
107 
108     for (i = 0; i < queues; i++) {
109         assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
110 
111         s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
112 
113         if (s->vhost_net) {
114             vhost_user_save_acked_features(ncs[i]);
115             vhost_net_cleanup(s->vhost_net);
116         }
117     }
118 }
119 
120 static int vhost_user_start(int queues, NetClientState *ncs[],
121                             VhostUserState *be)
122 {
123     VhostNetOptions options;
124     struct vhost_net *net = NULL;
125     NetVhostUserState *s;
126     int max_queues;
127     int i;
128 
129     options.backend_type = VHOST_BACKEND_TYPE_USER;
130 
131     for (i = 0; i < queues; i++) {
132         assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
133 
134         s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
135 
136         options.net_backend = ncs[i];
137         options.opaque      = be;
138         options.busyloop_timeout = 0;
139         options.nvqs = 2;
140         options.feature_bits = user_feature_bits;
141         options.max_tx_queue_size = VIRTQUEUE_MAX_SIZE;
142         options.get_acked_features = vhost_user_get_acked_features;
143         options.save_acked_features = vhost_user_save_acked_features;
144         options.is_vhost_user = true;
145 
146         net = vhost_net_init(&options);
147         if (!net) {
148             error_report("failed to init vhost_net for queue %d", i);
149             goto err;
150         }
151 
152         if (i == 0) {
153             max_queues = vhost_net_get_max_queues(net);
154             if (queues > max_queues) {
155                 error_report("you are asking more queues than supported: %d",
156                              max_queues);
157                 goto err;
158             }
159         }
160 
161         if (s->vhost_net) {
162             vhost_net_cleanup(s->vhost_net);
163             g_free(s->vhost_net);
164         }
165         s->vhost_net = net;
166     }
167 
168     return 0;
169 
170 err:
171     if (net) {
172         vhost_net_cleanup(net);
173         g_free(net);
174     }
175     vhost_user_stop(i, ncs);
176     return -1;
177 }
178 
179 static ssize_t vhost_user_receive(NetClientState *nc, const uint8_t *buf,
180                                   size_t size)
181 {
182     /* In case of RARP (message size is 60) notify backup to send a fake RARP.
183        This fake RARP will be sent by backend only for guest
184        without GUEST_ANNOUNCE capability.
185      */
186     if (size == 60) {
187         NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
188         int r;
189         static int display_rarp_failure = 1;
190         char mac_addr[6];
191 
192         /* extract guest mac address from the RARP message */
193         memcpy(mac_addr, &buf[6], 6);
194 
195         r = vhost_net_notify_migration_done(s->vhost_net, mac_addr);
196 
197         if ((r != 0) && (display_rarp_failure)) {
198             fprintf(stderr,
199                     "Vhost user backend fails to broadcast fake RARP\n");
200             fflush(stderr);
201             display_rarp_failure = 0;
202         }
203     }
204 
205     return size;
206 }
207 
208 static void net_vhost_user_cleanup(NetClientState *nc)
209 {
210     NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
211 
212     if (s->vhost_net) {
213         vhost_net_cleanup(s->vhost_net);
214         g_free(s->vhost_net);
215         s->vhost_net = NULL;
216     }
217     if (nc->queue_index == 0) {
218         if (s->watch) {
219             g_source_remove(s->watch);
220             s->watch = 0;
221         }
222         qemu_chr_fe_deinit(&s->chr, true);
223         if (s->vhost_user) {
224             vhost_user_cleanup(s->vhost_user);
225             g_free(s->vhost_user);
226             s->vhost_user = NULL;
227         }
228     }
229 
230     qemu_purge_queued_packets(nc);
231 }
232 
233 static int vhost_user_set_vnet_endianness(NetClientState *nc,
234                                           bool enable)
235 {
236     /* Nothing to do.  If the server supports
237      * VHOST_USER_PROTOCOL_F_CROSS_ENDIAN, it will get the
238      * vnet header endianness from there.  If it doesn't, negotiation
239      * fails.
240      */
241     return 0;
242 }
243 
244 static bool vhost_user_has_vnet_hdr(NetClientState *nc)
245 {
246     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
247 
248     return true;
249 }
250 
251 static bool vhost_user_has_ufo(NetClientState *nc)
252 {
253     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
254 
255     return true;
256 }
257 
258 static bool vhost_user_check_peer_type(NetClientState *nc, ObjectClass *oc,
259                                        Error **errp)
260 {
261     const char *driver = object_class_get_name(oc);
262 
263     if (!g_str_has_prefix(driver, "virtio-net-")) {
264         error_setg(errp, "vhost-user requires frontend driver virtio-net-*");
265         return false;
266     }
267 
268     return true;
269 }
270 
271 static NetClientInfo net_vhost_user_info = {
272         .type = NET_CLIENT_DRIVER_VHOST_USER,
273         .size = sizeof(NetVhostUserState),
274         .receive = vhost_user_receive,
275         .cleanup = net_vhost_user_cleanup,
276         .has_vnet_hdr = vhost_user_has_vnet_hdr,
277         .has_ufo = vhost_user_has_ufo,
278         .set_vnet_be = vhost_user_set_vnet_endianness,
279         .set_vnet_le = vhost_user_set_vnet_endianness,
280         .check_peer_type = vhost_user_check_peer_type,
281         .get_vhost_net = vhost_user_get_vhost_net,
282 };
283 
284 static gboolean net_vhost_user_watch(void *do_not_use, GIOCondition cond,
285                                      void *opaque)
286 {
287     NetVhostUserState *s = opaque;
288 
289     qemu_chr_fe_disconnect(&s->chr);
290 
291     return G_SOURCE_CONTINUE;
292 }
293 
294 static void net_vhost_user_event(void *opaque, QEMUChrEvent event);
295 
296 static void chr_closed_bh(void *opaque)
297 {
298     const char *name = opaque;
299     NetClientState *ncs[MAX_QUEUE_NUM];
300     NetVhostUserState *s;
301     Error *err = NULL;
302     int queues, i;
303 
304     queues = qemu_find_net_clients_except(name, ncs,
305                                           NET_CLIENT_DRIVER_NIC,
306                                           MAX_QUEUE_NUM);
307     assert(queues < MAX_QUEUE_NUM);
308 
309     s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
310 
311     for (i = queues -1; i >= 0; i--) {
312         vhost_user_save_acked_features(ncs[i]);
313     }
314 
315     net_client_set_link(ncs, queues, false);
316 
317     qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event,
318                              NULL, opaque, NULL, true);
319 
320     if (err) {
321         error_report_err(err);
322     }
323     qapi_event_send_netdev_vhost_user_disconnected(name);
324 }
325 
326 static void net_vhost_user_event(void *opaque, QEMUChrEvent event)
327 {
328     const char *name = opaque;
329     NetClientState *ncs[MAX_QUEUE_NUM];
330     NetVhostUserState *s;
331     Chardev *chr;
332     Error *err = NULL;
333     int queues;
334 
335     queues = qemu_find_net_clients_except(name, ncs,
336                                           NET_CLIENT_DRIVER_NIC,
337                                           MAX_QUEUE_NUM);
338     assert(queues < MAX_QUEUE_NUM);
339 
340     s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
341     chr = qemu_chr_fe_get_driver(&s->chr);
342     trace_vhost_user_event(chr->label, event);
343     switch (event) {
344     case CHR_EVENT_OPENED:
345         if (vhost_user_start(queues, ncs, s->vhost_user) < 0) {
346             qemu_chr_fe_disconnect(&s->chr);
347             return;
348         }
349         s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP,
350                                          net_vhost_user_watch, s);
351         net_client_set_link(ncs, queues, true);
352         s->started = true;
353         qapi_event_send_netdev_vhost_user_connected(name, chr->label);
354         break;
355     case CHR_EVENT_CLOSED:
356         /* a close event may happen during a read/write, but vhost
357          * code assumes the vhost_dev remains setup, so delay the
358          * stop & clear to idle.
359          * FIXME: better handle failure in vhost code, remove bh
360          */
361         if (s->watch) {
362             AioContext *ctx = qemu_get_current_aio_context();
363 
364             g_source_remove(s->watch);
365             s->watch = 0;
366             qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL, NULL,
367                                      NULL, NULL, false);
368 
369             aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque);
370         }
371         break;
372     case CHR_EVENT_BREAK:
373     case CHR_EVENT_MUX_IN:
374     case CHR_EVENT_MUX_OUT:
375         /* Ignore */
376         break;
377     }
378 
379     if (err) {
380         error_report_err(err);
381     }
382 }
383 
384 static int net_vhost_user_init(NetClientState *peer, const char *device,
385                                const char *name, Chardev *chr,
386                                int queues)
387 {
388     Error *err = NULL;
389     NetClientState *nc, *nc0 = NULL;
390     NetVhostUserState *s = NULL;
391     VhostUserState *user;
392     int i;
393 
394     assert(name);
395     assert(queues > 0);
396 
397     user = g_new0(struct VhostUserState, 1);
398     for (i = 0; i < queues; i++) {
399         nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
400         qemu_set_info_str(nc, "vhost-user%d to %s", i, chr->label);
401         nc->queue_index = i;
402         if (!nc0) {
403             nc0 = nc;
404             s = DO_UPCAST(NetVhostUserState, nc, nc);
405             if (!qemu_chr_fe_init(&s->chr, chr, &err) ||
406                 !vhost_user_init(user, &s->chr, &err)) {
407                 error_report_err(err);
408                 goto err;
409             }
410         }
411         s = DO_UPCAST(NetVhostUserState, nc, nc);
412         s->vhost_user = user;
413     }
414 
415     s = DO_UPCAST(NetVhostUserState, nc, nc0);
416     do {
417         if (qemu_chr_fe_wait_connected(&s->chr, &err) < 0) {
418             error_report_err(err);
419             goto err;
420         }
421         qemu_chr_fe_set_handlers(&s->chr, NULL, NULL,
422                                  net_vhost_user_event, NULL, nc0->name, NULL,
423                                  true);
424     } while (!s->started);
425 
426     assert(s->vhost_net);
427 
428     return 0;
429 
430 err:
431     if (user) {
432         vhost_user_cleanup(user);
433         g_free(user);
434         if (s) {
435             s->vhost_user = NULL;
436         }
437     }
438     if (nc0) {
439         qemu_del_net_client(nc0);
440     }
441 
442     return -1;
443 }
444 
445 static Chardev *net_vhost_claim_chardev(
446     const NetdevVhostUserOptions *opts, Error **errp)
447 {
448     Chardev *chr = qemu_chr_find(opts->chardev);
449 
450     if (chr == NULL) {
451         error_setg(errp, "chardev \"%s\" not found", opts->chardev);
452         return NULL;
453     }
454 
455     if (!qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_RECONNECTABLE)) {
456         error_setg(errp, "chardev \"%s\" is not reconnectable",
457                    opts->chardev);
458         return NULL;
459     }
460     if (!qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_FD_PASS)) {
461         error_setg(errp, "chardev \"%s\" does not support FD passing",
462                    opts->chardev);
463         return NULL;
464     }
465 
466     return chr;
467 }
468 
469 int net_init_vhost_user(const Netdev *netdev, const char *name,
470                         NetClientState *peer, Error **errp)
471 {
472     int queues;
473     const NetdevVhostUserOptions *vhost_user_opts;
474     Chardev *chr;
475 
476     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_USER);
477     vhost_user_opts = &netdev->u.vhost_user;
478 
479     chr = net_vhost_claim_chardev(vhost_user_opts, errp);
480     if (!chr) {
481         return -1;
482     }
483 
484     queues = vhost_user_opts->has_queues ? vhost_user_opts->queues : 1;
485     if (queues < 1 || queues > MAX_QUEUE_NUM) {
486         error_setg(errp,
487                    "vhost-user number of queues must be in range [1, %d]",
488                    MAX_QUEUE_NUM);
489         return -1;
490     }
491 
492     return net_vhost_user_init(peer, "vhost_user", name, chr, queues);
493 }
494