xref: /openbmc/qemu/hw/net/vhost_net.c (revision 57941c9c)
1 /*
2  * vhost-net support
3  *
4  * Copyright Red Hat, Inc. 2010
5  *
6  * Authors:
7  *  Michael S. Tsirkin <mst@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "net/net.h"
18 #include "net/tap.h"
19 #include "net/vhost-user.h"
20 #include "net/vhost-vdpa.h"
21 
22 #include "standard-headers/linux/vhost_types.h"
23 #include "hw/virtio/virtio-net.h"
24 #include "net/vhost_net.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "qemu/main-loop.h"
28 
29 #include <sys/socket.h>
30 #include <net/if.h>
31 #include <netinet/in.h>
32 
33 
34 #include "standard-headers/linux/virtio_ring.h"
35 #include "hw/virtio/vhost.h"
36 #include "hw/virtio/virtio-bus.h"
37 #include "linux-headers/linux/vhost.h"
38 
39 
40 /* Features supported by host kernel. */
41 static const int kernel_feature_bits[] = {
42     VIRTIO_F_NOTIFY_ON_EMPTY,
43     VIRTIO_RING_F_INDIRECT_DESC,
44     VIRTIO_RING_F_EVENT_IDX,
45     VIRTIO_NET_F_MRG_RXBUF,
46     VIRTIO_F_VERSION_1,
47     VIRTIO_NET_F_MTU,
48     VIRTIO_F_IOMMU_PLATFORM,
49     VIRTIO_F_RING_PACKED,
50     VIRTIO_F_RING_RESET,
51     VIRTIO_F_IN_ORDER,
52     VIRTIO_F_NOTIFICATION_DATA,
53     VIRTIO_NET_F_HASH_REPORT,
54     VHOST_INVALID_FEATURE_BIT
55 };
56 
57 /* Features supported by others. */
58 static const int user_feature_bits[] = {
59     VIRTIO_F_NOTIFY_ON_EMPTY,
60     VIRTIO_F_NOTIFICATION_DATA,
61     VIRTIO_RING_F_INDIRECT_DESC,
62     VIRTIO_RING_F_EVENT_IDX,
63 
64     VIRTIO_F_ANY_LAYOUT,
65     VIRTIO_F_VERSION_1,
66     VIRTIO_NET_F_CSUM,
67     VIRTIO_NET_F_GUEST_CSUM,
68     VIRTIO_NET_F_GSO,
69     VIRTIO_NET_F_GUEST_TSO4,
70     VIRTIO_NET_F_GUEST_TSO6,
71     VIRTIO_NET_F_GUEST_ECN,
72     VIRTIO_NET_F_GUEST_UFO,
73     VIRTIO_NET_F_HOST_TSO4,
74     VIRTIO_NET_F_HOST_TSO6,
75     VIRTIO_NET_F_HOST_ECN,
76     VIRTIO_NET_F_HOST_UFO,
77     VIRTIO_NET_F_MRG_RXBUF,
78     VIRTIO_NET_F_MTU,
79     VIRTIO_F_IOMMU_PLATFORM,
80     VIRTIO_F_RING_PACKED,
81     VIRTIO_F_RING_RESET,
82     VIRTIO_F_IN_ORDER,
83     VIRTIO_NET_F_RSS,
84     VIRTIO_NET_F_HASH_REPORT,
85     VIRTIO_NET_F_GUEST_USO4,
86     VIRTIO_NET_F_GUEST_USO6,
87     VIRTIO_NET_F_HOST_USO,
88 
89     /* This bit implies RARP isn't sent by QEMU out of band */
90     VIRTIO_NET_F_GUEST_ANNOUNCE,
91 
92     VIRTIO_NET_F_MQ,
93 
94     VHOST_INVALID_FEATURE_BIT
95 };
96 
97 static const int *vhost_net_get_feature_bits(struct vhost_net *net)
98 {
99     const int *feature_bits = 0;
100 
101     switch (net->nc->info->type) {
102     case NET_CLIENT_DRIVER_TAP:
103         feature_bits = kernel_feature_bits;
104         break;
105     case NET_CLIENT_DRIVER_VHOST_USER:
106         feature_bits = user_feature_bits;
107         break;
108 #ifdef CONFIG_VHOST_NET_VDPA
109     case NET_CLIENT_DRIVER_VHOST_VDPA:
110         feature_bits = vdpa_feature_bits;
111         break;
112 #endif
113     default:
114         error_report("Feature bits not defined for this type: %d",
115                 net->nc->info->type);
116         break;
117     }
118 
119     return feature_bits;
120 }
121 
122 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
123 {
124     return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
125             features);
126 }
127 int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
128                          uint32_t config_len)
129 {
130     return vhost_dev_get_config(&net->dev, config, config_len, NULL);
131 }
132 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
133                          uint32_t offset, uint32_t size, uint32_t flags)
134 {
135     return vhost_dev_set_config(&net->dev, data, offset, size, flags);
136 }
137 
138 void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
139 {
140     net->dev.acked_features = net->dev.backend_features;
141     vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features);
142 }
143 
144 uint64_t vhost_net_get_max_queues(VHostNetState *net)
145 {
146     return net->dev.max_queues;
147 }
148 
149 uint64_t vhost_net_get_acked_features(VHostNetState *net)
150 {
151     return net->dev.acked_features;
152 }
153 
154 void vhost_net_save_acked_features(NetClientState *nc)
155 {
156 #ifdef CONFIG_VHOST_NET_USER
157     if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
158         vhost_user_save_acked_features(nc);
159     }
160 #endif
161 }
162 
163 static int vhost_net_get_fd(NetClientState *backend)
164 {
165     switch (backend->info->type) {
166     case NET_CLIENT_DRIVER_TAP:
167         return tap_get_fd(backend);
168     default:
169         fprintf(stderr, "vhost-net requires tap backend\n");
170         return -ENOSYS;
171     }
172 }
173 
174 struct vhost_net *vhost_net_init(VhostNetOptions *options)
175 {
176     int r;
177     bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
178     struct vhost_net *net = g_new0(struct vhost_net, 1);
179     uint64_t features = 0;
180     Error *local_err = NULL;
181 
182     if (!options->net_backend) {
183         fprintf(stderr, "vhost-net requires net backend to be setup\n");
184         goto fail;
185     }
186     net->nc = options->net_backend;
187     net->dev.nvqs = options->nvqs;
188 
189     net->dev.max_queues = 1;
190     net->dev.vqs = net->vqs;
191 
192     if (backend_kernel) {
193         r = vhost_net_get_fd(options->net_backend);
194         if (r < 0) {
195             goto fail;
196         }
197         net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
198             ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR);
199         net->backend = r;
200         net->dev.protocol_features = 0;
201     } else {
202         net->dev.backend_features = 0;
203         net->dev.protocol_features = 0;
204         net->backend = -1;
205 
206         /* vhost-user needs vq_index to initiate a specific queue pair */
207         net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
208     }
209 
210     r = vhost_dev_init(&net->dev, options->opaque,
211                        options->backend_type, options->busyloop_timeout,
212                        &local_err);
213     if (r < 0) {
214         error_report_err(local_err);
215         goto fail;
216     }
217     if (backend_kernel) {
218         if (!qemu_has_vnet_hdr_len(options->net_backend,
219                                sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
220             net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
221         }
222         if (~net->dev.features & net->dev.backend_features) {
223             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
224                    " for backend\n",
225                    (uint64_t)(~net->dev.features & net->dev.backend_features));
226             goto fail;
227         }
228     }
229 
230     /* Set sane init value. Override when guest acks. */
231 #ifdef CONFIG_VHOST_NET_USER
232     if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
233         features = vhost_user_get_acked_features(net->nc);
234         if (~net->dev.features & features) {
235             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
236                     " for backend\n",
237                     (uint64_t)(~net->dev.features & features));
238             goto fail;
239         }
240     }
241 #endif
242 
243     vhost_net_ack_features(net, features);
244 
245     return net;
246 
247 fail:
248     vhost_dev_cleanup(&net->dev);
249     g_free(net);
250     return NULL;
251 }
252 
253 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index,
254                                    int vq_index_end)
255 {
256     net->dev.vq_index = vq_index;
257     net->dev.vq_index_end = vq_index_end;
258 }
259 
260 static int vhost_net_start_one(struct vhost_net *net,
261                                VirtIODevice *dev)
262 {
263     struct vhost_vring_file file = { };
264     int r;
265 
266     if (net->nc->info->start) {
267         r = net->nc->info->start(net->nc);
268         if (r < 0) {
269             return r;
270         }
271     }
272 
273     r = vhost_dev_enable_notifiers(&net->dev, dev);
274     if (r < 0) {
275         goto fail_notifiers;
276     }
277 
278     r = vhost_dev_start(&net->dev, dev, false);
279     if (r < 0) {
280         goto fail_start;
281     }
282 
283     if (net->nc->info->poll) {
284         net->nc->info->poll(net->nc, false);
285     }
286 
287     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
288         qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
289         file.fd = net->backend;
290         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
291             if (!virtio_queue_enabled(dev, net->dev.vq_index +
292                                       file.index)) {
293                 /* Queue might not be ready for start */
294                 continue;
295             }
296             r = vhost_net_set_backend(&net->dev, &file);
297             if (r < 0) {
298                 r = -errno;
299                 goto fail;
300             }
301         }
302     }
303 
304     if (net->nc->info->load) {
305         r = net->nc->info->load(net->nc);
306         if (r < 0) {
307             goto fail;
308         }
309     }
310     return 0;
311 fail:
312     file.fd = -1;
313     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
314         while (file.index-- > 0) {
315             if (!virtio_queue_enabled(dev, net->dev.vq_index +
316                                       file.index)) {
317                 /* Queue might not be ready for start */
318                 continue;
319             }
320             int ret = vhost_net_set_backend(&net->dev, &file);
321             assert(ret >= 0);
322         }
323     }
324     if (net->nc->info->poll) {
325         net->nc->info->poll(net->nc, true);
326     }
327     vhost_dev_stop(&net->dev, dev, false);
328 fail_start:
329     vhost_dev_disable_notifiers(&net->dev, dev);
330 fail_notifiers:
331     return r;
332 }
333 
334 static void vhost_net_stop_one(struct vhost_net *net,
335                                VirtIODevice *dev)
336 {
337     struct vhost_vring_file file = { .fd = -1 };
338 
339     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
340         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
341             int r = vhost_net_set_backend(&net->dev, &file);
342             assert(r >= 0);
343         }
344     }
345     if (net->nc->info->poll) {
346         net->nc->info->poll(net->nc, true);
347     }
348     vhost_dev_stop(&net->dev, dev, false);
349     if (net->nc->info->stop) {
350         net->nc->info->stop(net->nc);
351     }
352     vhost_dev_disable_notifiers(&net->dev, dev);
353 }
354 
355 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
356                     int data_queue_pairs, int cvq)
357 {
358     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
359     VirtioBusState *vbus = VIRTIO_BUS(qbus);
360     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
361     int total_notifiers = data_queue_pairs * 2 + cvq;
362     VirtIONet *n = VIRTIO_NET(dev);
363     int nvhosts = data_queue_pairs + cvq;
364     struct vhost_net *net;
365     int r, e, i, index_end = data_queue_pairs * 2;
366     NetClientState *peer;
367 
368     if (cvq) {
369         index_end += 1;
370     }
371 
372     if (!k->set_guest_notifiers) {
373         error_report("binding does not support guest notifiers");
374         return -ENOSYS;
375     }
376 
377     for (i = 0; i < nvhosts; i++) {
378 
379         if (i < data_queue_pairs) {
380             peer = qemu_get_peer(ncs, i);
381         } else { /* Control Virtqueue */
382             peer = qemu_get_peer(ncs, n->max_queue_pairs);
383         }
384 
385         net = get_vhost_net(peer);
386         vhost_net_set_vq_index(net, i * 2, index_end);
387 
388         /* Suppress the masking guest notifiers on vhost user
389          * because vhost user doesn't interrupt masking/unmasking
390          * properly.
391          */
392         if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
393             dev->use_guest_notifier_mask = false;
394         }
395      }
396 
397     r = k->set_guest_notifiers(qbus->parent, total_notifiers, true);
398     if (r < 0) {
399         error_report("Error binding guest notifier: %d", -r);
400         goto err;
401     }
402 
403     for (i = 0; i < nvhosts; i++) {
404         if (i < data_queue_pairs) {
405             peer = qemu_get_peer(ncs, i);
406         } else {
407             peer = qemu_get_peer(ncs, n->max_queue_pairs);
408         }
409 
410         if (peer->vring_enable) {
411             /* restore vring enable state */
412             r = vhost_set_vring_enable(peer, peer->vring_enable);
413 
414             if (r < 0) {
415                 goto err_start;
416             }
417         }
418 
419         r = vhost_net_start_one(get_vhost_net(peer), dev);
420         if (r < 0) {
421             goto err_start;
422         }
423     }
424 
425     return 0;
426 
427 err_start:
428     while (--i >= 0) {
429         peer = qemu_get_peer(ncs, i < data_queue_pairs ?
430                                   i : n->max_queue_pairs);
431         vhost_net_stop_one(get_vhost_net(peer), dev);
432     }
433     e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
434     if (e < 0) {
435         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
436         fflush(stderr);
437     }
438 err:
439     return r;
440 }
441 
442 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
443                     int data_queue_pairs, int cvq)
444 {
445     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
446     VirtioBusState *vbus = VIRTIO_BUS(qbus);
447     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
448     VirtIONet *n = VIRTIO_NET(dev);
449     NetClientState *peer;
450     int total_notifiers = data_queue_pairs * 2 + cvq;
451     int nvhosts = data_queue_pairs + cvq;
452     int i, r;
453 
454     for (i = 0; i < nvhosts; i++) {
455         if (i < data_queue_pairs) {
456             peer = qemu_get_peer(ncs, i);
457         } else {
458             peer = qemu_get_peer(ncs, n->max_queue_pairs);
459         }
460         vhost_net_stop_one(get_vhost_net(peer), dev);
461     }
462 
463     r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
464     if (r < 0) {
465         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
466         fflush(stderr);
467     }
468     assert(r >= 0);
469 }
470 
471 void vhost_net_cleanup(struct vhost_net *net)
472 {
473     vhost_dev_cleanup(&net->dev);
474 }
475 
476 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
477 {
478     const VhostOps *vhost_ops = net->dev.vhost_ops;
479 
480     assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
481     assert(vhost_ops->vhost_migration_done);
482 
483     return vhost_ops->vhost_migration_done(&net->dev, mac_addr);
484 }
485 
486 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
487 {
488     return vhost_virtqueue_pending(&net->dev, idx);
489 }
490 
491 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
492                               int idx, bool mask)
493 {
494     vhost_virtqueue_mask(&net->dev, dev, idx, mask);
495 }
496 
497 bool vhost_net_config_pending(VHostNetState *net)
498 {
499     return vhost_config_pending(&net->dev);
500 }
501 
502 void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask)
503 {
504     vhost_config_mask(&net->dev, dev, mask);
505 }
506 VHostNetState *get_vhost_net(NetClientState *nc)
507 {
508     VHostNetState *vhost_net = 0;
509 
510     if (!nc) {
511         return 0;
512     }
513 
514     switch (nc->info->type) {
515     case NET_CLIENT_DRIVER_TAP:
516         vhost_net = tap_get_vhost_net(nc);
517         /*
518          * tap_get_vhost_net() can return NULL if a tap net-device backend is
519          * created with 'vhost=off' option, 'vhostforce=off' or no vhost or
520          * vhostforce or vhostfd options at all. Please see net_init_tap_one().
521          * Hence, we omit the assertion here.
522          */
523         break;
524 #ifdef CONFIG_VHOST_NET_USER
525     case NET_CLIENT_DRIVER_VHOST_USER:
526         vhost_net = vhost_user_get_vhost_net(nc);
527         assert(vhost_net);
528         break;
529 #endif
530 #ifdef CONFIG_VHOST_NET_VDPA
531     case NET_CLIENT_DRIVER_VHOST_VDPA:
532         vhost_net = vhost_vdpa_get_vhost_net(nc);
533         assert(vhost_net);
534         break;
535 #endif
536     default:
537         break;
538     }
539 
540     return vhost_net;
541 }
542 
543 int vhost_set_vring_enable(NetClientState *nc, int enable)
544 {
545     VHostNetState *net = get_vhost_net(nc);
546     const VhostOps *vhost_ops = net->dev.vhost_ops;
547 
548     /*
549      * vhost-vdpa network devices need to enable dataplane virtqueues after
550      * DRIVER_OK, so they can recover device state before starting dataplane.
551      * Because of that, we don't enable virtqueues here and leave it to
552      * net/vhost-vdpa.c.
553      */
554     if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
555         return 0;
556     }
557 
558     nc->vring_enable = enable;
559 
560     if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
561         return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
562     }
563 
564     return 0;
565 }
566 
567 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
568 {
569     const VhostOps *vhost_ops = net->dev.vhost_ops;
570 
571     if (!vhost_ops->vhost_net_set_mtu) {
572         return 0;
573     }
574 
575     return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
576 }
577 
578 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc,
579                                int vq_index)
580 {
581     VHostNetState *net = get_vhost_net(nc->peer);
582     const VhostOps *vhost_ops = net->dev.vhost_ops;
583     struct vhost_vring_file file = { .fd = -1 };
584     int idx;
585 
586     /* should only be called after backend is connected */
587     assert(vhost_ops);
588 
589     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
590 
591     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
592         file.index = idx;
593         int r = vhost_net_set_backend(&net->dev, &file);
594         assert(r >= 0);
595     }
596 
597     vhost_virtqueue_stop(&net->dev,
598                          vdev,
599                          net->dev.vqs + idx,
600                          net->dev.vq_index + idx);
601 }
602 
603 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
604                                 int vq_index)
605 {
606     VHostNetState *net = get_vhost_net(nc->peer);
607     const VhostOps *vhost_ops = net->dev.vhost_ops;
608     struct vhost_vring_file file = { };
609     int idx, r;
610 
611     if (!net->dev.started) {
612         return -EBUSY;
613     }
614 
615     /* should only be called after backend is connected */
616     assert(vhost_ops);
617 
618     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
619 
620     r = vhost_virtqueue_start(&net->dev,
621                               vdev,
622                               net->dev.vqs + idx,
623                               net->dev.vq_index + idx);
624     if (r < 0) {
625         goto err_start;
626     }
627 
628     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
629         file.index = idx;
630         file.fd = net->backend;
631         r = vhost_net_set_backend(&net->dev, &file);
632         if (r < 0) {
633             r = -errno;
634             goto err_start;
635         }
636     }
637 
638     return 0;
639 
640 err_start:
641     error_report("Error when restarting the queue.");
642 
643     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
644         file.fd = VHOST_FILE_UNBIND;
645         file.index = idx;
646         int ret = vhost_net_set_backend(&net->dev, &file);
647         assert(ret >= 0);
648     }
649 
650     vhost_dev_stop(&net->dev, vdev, false);
651 
652     return r;
653 }
654