xref: /openbmc/qemu/net/vhost-user.c (revision bd38794a1119ec8e3f0a7473458ce4cdd229bc42)
1 /*
2  * vhost-user.c
3  *
4  * Copyright (c) 2013 Virtual Open Systems Sarl.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  */
10 
11 #include "qemu/osdep.h"
12 #include "clients.h"
13 #include "net/vhost_net.h"
14 #include "net/vhost-user.h"
15 #include "hw/virtio/vhost.h"
16 #include "hw/virtio/vhost-user.h"
17 #include "standard-headers/linux/virtio_net.h"
18 #include "chardev/char-fe.h"
19 #include "qapi/error.h"
20 #include "qapi/qapi-commands-net.h"
21 #include "qapi/qapi-events-net.h"
22 #include "qemu/config-file.h"
23 #include "qemu/error-report.h"
24 #include "qemu/option.h"
25 #include "trace.h"
26 
27 static const int user_feature_bits[] = {
28     VIRTIO_F_NOTIFY_ON_EMPTY,
29     VIRTIO_F_NOTIFICATION_DATA,
30     VIRTIO_RING_F_INDIRECT_DESC,
31     VIRTIO_RING_F_EVENT_IDX,
32 
33     VIRTIO_F_ANY_LAYOUT,
34     VIRTIO_F_VERSION_1,
35     VIRTIO_NET_F_CSUM,
36     VIRTIO_NET_F_GUEST_CSUM,
37     VIRTIO_NET_F_GSO,
38     VIRTIO_NET_F_GUEST_TSO4,
39     VIRTIO_NET_F_GUEST_TSO6,
40     VIRTIO_NET_F_GUEST_ECN,
41     VIRTIO_NET_F_GUEST_UFO,
42     VIRTIO_NET_F_HOST_TSO4,
43     VIRTIO_NET_F_HOST_TSO6,
44     VIRTIO_NET_F_HOST_ECN,
45     VIRTIO_NET_F_HOST_UFO,
46     VIRTIO_NET_F_MRG_RXBUF,
47     VIRTIO_NET_F_MTU,
48     VIRTIO_F_IOMMU_PLATFORM,
49     VIRTIO_F_RING_PACKED,
50     VIRTIO_F_RING_RESET,
51     VIRTIO_F_IN_ORDER,
52     VIRTIO_NET_F_RSS,
53     VIRTIO_NET_F_RSC_EXT,
54     VIRTIO_NET_F_HASH_REPORT,
55     VIRTIO_NET_F_GUEST_USO4,
56     VIRTIO_NET_F_GUEST_USO6,
57     VIRTIO_NET_F_HOST_USO,
58 
59     /* This bit implies RARP isn't sent by QEMU out of band */
60     VIRTIO_NET_F_GUEST_ANNOUNCE,
61 
62     VIRTIO_NET_F_MQ,
63 
64     VHOST_INVALID_FEATURE_BIT
65 };
66 
67 typedef struct NetVhostUserState {
68     NetClientState nc;
69     CharBackend chr; /* only queue index 0 */
70     VhostUserState *vhost_user;
71     VHostNetState *vhost_net;
72     guint watch;
73     uint64_t acked_features;
74     bool started;
75 } NetVhostUserState;
76 
77 static struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc)
78 {
79     NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
80     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
81     return s->vhost_net;
82 }
83 
84 static uint64_t vhost_user_get_acked_features(NetClientState *nc)
85 {
86     NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
87     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
88     return s->acked_features;
89 }
90 
91 void vhost_user_save_acked_features(NetClientState *nc)
92 {
93     NetVhostUserState *s;
94 
95     s = DO_UPCAST(NetVhostUserState, nc, nc);
96     if (s->vhost_net) {
97         uint64_t features = vhost_net_get_acked_features(s->vhost_net);
98         if (features) {
99             s->acked_features = features;
100         }
101     }
102 }
103 
104 static void vhost_user_stop(int queues, NetClientState *ncs[])
105 {
106     int i;
107     NetVhostUserState *s;
108 
109     for (i = 0; i < queues; i++) {
110         assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
111 
112         s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
113 
114         if (s->vhost_net) {
115             vhost_user_save_acked_features(ncs[i]);
116             vhost_net_cleanup(s->vhost_net);
117         }
118     }
119 }
120 
121 static int vhost_user_start(int queues, NetClientState *ncs[],
122                             VhostUserState *be)
123 {
124     VhostNetOptions options;
125     struct vhost_net *net = NULL;
126     NetVhostUserState *s;
127     int max_queues;
128     int i;
129 
130     options.backend_type = VHOST_BACKEND_TYPE_USER;
131 
132     for (i = 0; i < queues; i++) {
133         assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
134 
135         s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
136 
137         options.net_backend = ncs[i];
138         options.opaque      = be;
139         options.busyloop_timeout = 0;
140         options.nvqs = 2;
141         options.feature_bits = user_feature_bits;
142         options.get_acked_features = vhost_user_get_acked_features;
143 
144         net = vhost_net_init(&options);
145         if (!net) {
146             error_report("failed to init vhost_net for queue %d", i);
147             goto err;
148         }
149 
150         if (i == 0) {
151             max_queues = vhost_net_get_max_queues(net);
152             if (queues > max_queues) {
153                 error_report("you are asking more queues than supported: %d",
154                              max_queues);
155                 goto err;
156             }
157         }
158 
159         if (s->vhost_net) {
160             vhost_net_cleanup(s->vhost_net);
161             g_free(s->vhost_net);
162         }
163         s->vhost_net = net;
164     }
165 
166     return 0;
167 
168 err:
169     if (net) {
170         vhost_net_cleanup(net);
171         g_free(net);
172     }
173     vhost_user_stop(i, ncs);
174     return -1;
175 }
176 
177 static ssize_t vhost_user_receive(NetClientState *nc, const uint8_t *buf,
178                                   size_t size)
179 {
180     /* In case of RARP (message size is 60) notify backup to send a fake RARP.
181        This fake RARP will be sent by backend only for guest
182        without GUEST_ANNOUNCE capability.
183      */
184     if (size == 60) {
185         NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
186         int r;
187         static int display_rarp_failure = 1;
188         char mac_addr[6];
189 
190         /* extract guest mac address from the RARP message */
191         memcpy(mac_addr, &buf[6], 6);
192 
193         r = vhost_net_notify_migration_done(s->vhost_net, mac_addr);
194 
195         if ((r != 0) && (display_rarp_failure)) {
196             fprintf(stderr,
197                     "Vhost user backend fails to broadcast fake RARP\n");
198             fflush(stderr);
199             display_rarp_failure = 0;
200         }
201     }
202 
203     return size;
204 }
205 
206 static void net_vhost_user_cleanup(NetClientState *nc)
207 {
208     NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
209 
210     if (s->vhost_net) {
211         vhost_net_cleanup(s->vhost_net);
212         g_free(s->vhost_net);
213         s->vhost_net = NULL;
214     }
215     if (nc->queue_index == 0) {
216         if (s->watch) {
217             g_source_remove(s->watch);
218             s->watch = 0;
219         }
220         qemu_chr_fe_deinit(&s->chr, true);
221         if (s->vhost_user) {
222             vhost_user_cleanup(s->vhost_user);
223             g_free(s->vhost_user);
224             s->vhost_user = NULL;
225         }
226     }
227 
228     qemu_purge_queued_packets(nc);
229 }
230 
231 static int vhost_user_set_vnet_endianness(NetClientState *nc,
232                                           bool enable)
233 {
234     /* Nothing to do.  If the server supports
235      * VHOST_USER_PROTOCOL_F_CROSS_ENDIAN, it will get the
236      * vnet header endianness from there.  If it doesn't, negotiation
237      * fails.
238      */
239     return 0;
240 }
241 
242 static bool vhost_user_has_vnet_hdr(NetClientState *nc)
243 {
244     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
245 
246     return true;
247 }
248 
249 static bool vhost_user_has_ufo(NetClientState *nc)
250 {
251     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
252 
253     return true;
254 }
255 
256 static bool vhost_user_check_peer_type(NetClientState *nc, ObjectClass *oc,
257                                        Error **errp)
258 {
259     const char *driver = object_class_get_name(oc);
260 
261     if (!g_str_has_prefix(driver, "virtio-net-")) {
262         error_setg(errp, "vhost-user requires frontend driver virtio-net-*");
263         return false;
264     }
265 
266     return true;
267 }
268 
269 static NetClientInfo net_vhost_user_info = {
270         .type = NET_CLIENT_DRIVER_VHOST_USER,
271         .size = sizeof(NetVhostUserState),
272         .receive = vhost_user_receive,
273         .cleanup = net_vhost_user_cleanup,
274         .has_vnet_hdr = vhost_user_has_vnet_hdr,
275         .has_ufo = vhost_user_has_ufo,
276         .set_vnet_be = vhost_user_set_vnet_endianness,
277         .set_vnet_le = vhost_user_set_vnet_endianness,
278         .check_peer_type = vhost_user_check_peer_type,
279         .get_vhost_net = vhost_user_get_vhost_net,
280 };
281 
282 static gboolean net_vhost_user_watch(void *do_not_use, GIOCondition cond,
283                                      void *opaque)
284 {
285     NetVhostUserState *s = opaque;
286 
287     qemu_chr_fe_disconnect(&s->chr);
288 
289     return G_SOURCE_CONTINUE;
290 }
291 
292 static void net_vhost_user_event(void *opaque, QEMUChrEvent event);
293 
294 static void chr_closed_bh(void *opaque)
295 {
296     const char *name = opaque;
297     NetClientState *ncs[MAX_QUEUE_NUM];
298     NetVhostUserState *s;
299     Error *err = NULL;
300     int queues, i;
301 
302     queues = qemu_find_net_clients_except(name, ncs,
303                                           NET_CLIENT_DRIVER_NIC,
304                                           MAX_QUEUE_NUM);
305     assert(queues < MAX_QUEUE_NUM);
306 
307     s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
308 
309     for (i = queues -1; i >= 0; i--) {
310         vhost_user_save_acked_features(ncs[i]);
311     }
312 
313     net_client_set_link(ncs, queues, false);
314 
315     qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event,
316                              NULL, opaque, NULL, true);
317 
318     if (err) {
319         error_report_err(err);
320     }
321     qapi_event_send_netdev_vhost_user_disconnected(name);
322 }
323 
324 static void net_vhost_user_event(void *opaque, QEMUChrEvent event)
325 {
326     const char *name = opaque;
327     NetClientState *ncs[MAX_QUEUE_NUM];
328     NetVhostUserState *s;
329     Chardev *chr;
330     Error *err = NULL;
331     int queues;
332 
333     queues = qemu_find_net_clients_except(name, ncs,
334                                           NET_CLIENT_DRIVER_NIC,
335                                           MAX_QUEUE_NUM);
336     assert(queues < MAX_QUEUE_NUM);
337 
338     s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
339     chr = qemu_chr_fe_get_driver(&s->chr);
340     trace_vhost_user_event(chr->label, event);
341     switch (event) {
342     case CHR_EVENT_OPENED:
343         if (vhost_user_start(queues, ncs, s->vhost_user) < 0) {
344             qemu_chr_fe_disconnect(&s->chr);
345             return;
346         }
347         s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP,
348                                          net_vhost_user_watch, s);
349         net_client_set_link(ncs, queues, true);
350         s->started = true;
351         qapi_event_send_netdev_vhost_user_connected(name, chr->label);
352         break;
353     case CHR_EVENT_CLOSED:
354         /* a close event may happen during a read/write, but vhost
355          * code assumes the vhost_dev remains setup, so delay the
356          * stop & clear to idle.
357          * FIXME: better handle failure in vhost code, remove bh
358          */
359         if (s->watch) {
360             AioContext *ctx = qemu_get_current_aio_context();
361 
362             g_source_remove(s->watch);
363             s->watch = 0;
364             qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL, NULL,
365                                      NULL, NULL, false);
366 
367             aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque);
368         }
369         break;
370     case CHR_EVENT_BREAK:
371     case CHR_EVENT_MUX_IN:
372     case CHR_EVENT_MUX_OUT:
373         /* Ignore */
374         break;
375     }
376 
377     if (err) {
378         error_report_err(err);
379     }
380 }
381 
382 static int net_vhost_user_init(NetClientState *peer, const char *device,
383                                const char *name, Chardev *chr,
384                                int queues)
385 {
386     Error *err = NULL;
387     NetClientState *nc, *nc0 = NULL;
388     NetVhostUserState *s = NULL;
389     VhostUserState *user;
390     int i;
391 
392     assert(name);
393     assert(queues > 0);
394 
395     user = g_new0(struct VhostUserState, 1);
396     for (i = 0; i < queues; i++) {
397         nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
398         qemu_set_info_str(nc, "vhost-user%d to %s", i, chr->label);
399         nc->queue_index = i;
400         if (!nc0) {
401             nc0 = nc;
402             s = DO_UPCAST(NetVhostUserState, nc, nc);
403             if (!qemu_chr_fe_init(&s->chr, chr, &err) ||
404                 !vhost_user_init(user, &s->chr, &err)) {
405                 error_report_err(err);
406                 goto err;
407             }
408         }
409         s = DO_UPCAST(NetVhostUserState, nc, nc);
410         s->vhost_user = user;
411     }
412 
413     s = DO_UPCAST(NetVhostUserState, nc, nc0);
414     do {
415         if (qemu_chr_fe_wait_connected(&s->chr, &err) < 0) {
416             error_report_err(err);
417             goto err;
418         }
419         qemu_chr_fe_set_handlers(&s->chr, NULL, NULL,
420                                  net_vhost_user_event, NULL, nc0->name, NULL,
421                                  true);
422     } while (!s->started);
423 
424     assert(s->vhost_net);
425 
426     return 0;
427 
428 err:
429     if (user) {
430         vhost_user_cleanup(user);
431         g_free(user);
432         if (s) {
433             s->vhost_user = NULL;
434         }
435     }
436     if (nc0) {
437         qemu_del_net_client(nc0);
438     }
439 
440     return -1;
441 }
442 
443 static Chardev *net_vhost_claim_chardev(
444     const NetdevVhostUserOptions *opts, Error **errp)
445 {
446     Chardev *chr = qemu_chr_find(opts->chardev);
447 
448     if (chr == NULL) {
449         error_setg(errp, "chardev \"%s\" not found", opts->chardev);
450         return NULL;
451     }
452 
453     if (!qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_RECONNECTABLE)) {
454         error_setg(errp, "chardev \"%s\" is not reconnectable",
455                    opts->chardev);
456         return NULL;
457     }
458     if (!qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_FD_PASS)) {
459         error_setg(errp, "chardev \"%s\" does not support FD passing",
460                    opts->chardev);
461         return NULL;
462     }
463 
464     return chr;
465 }
466 
467 int net_init_vhost_user(const Netdev *netdev, const char *name,
468                         NetClientState *peer, Error **errp)
469 {
470     int queues;
471     const NetdevVhostUserOptions *vhost_user_opts;
472     Chardev *chr;
473 
474     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_USER);
475     vhost_user_opts = &netdev->u.vhost_user;
476 
477     chr = net_vhost_claim_chardev(vhost_user_opts, errp);
478     if (!chr) {
479         return -1;
480     }
481 
482     queues = vhost_user_opts->has_queues ? vhost_user_opts->queues : 1;
483     if (queues < 1 || queues > MAX_QUEUE_NUM) {
484         error_setg(errp,
485                    "vhost-user number of queues must be in range [1, %d]",
486                    MAX_QUEUE_NUM);
487         return -1;
488     }
489 
490     return net_vhost_user_init(peer, "vhost_user", name, chr, queues);
491 }
492