xref: /openbmc/qemu/hw/net/vhost_net.c (revision 2089a2e5)
1 /*
2  * vhost-net support
3  *
4  * Copyright Red Hat, Inc. 2010
5  *
6  * Authors:
7  *  Michael S. Tsirkin <mst@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "net/net.h"
18 #include "net/tap.h"
19 #include "net/vhost-user.h"
20 #include "net/vhost-vdpa.h"
21 
22 #include "standard-headers/linux/vhost_types.h"
23 #include "hw/virtio/virtio-net.h"
24 #include "net/vhost_net.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "qemu/main-loop.h"
28 
29 #include <sys/socket.h>
30 #include <net/if.h>
31 #include <netinet/in.h>
32 
33 
34 #include "standard-headers/linux/virtio_ring.h"
35 #include "hw/virtio/vhost.h"
36 #include "hw/virtio/virtio-bus.h"
37 #include "linux-headers/linux/vhost.h"
38 
39 
40 /* Features supported by host kernel. */
41 static const int kernel_feature_bits[] = {
42     VIRTIO_F_NOTIFY_ON_EMPTY,
43     VIRTIO_RING_F_INDIRECT_DESC,
44     VIRTIO_RING_F_EVENT_IDX,
45     VIRTIO_NET_F_MRG_RXBUF,
46     VIRTIO_F_VERSION_1,
47     VIRTIO_NET_F_MTU,
48     VIRTIO_F_IOMMU_PLATFORM,
49     VIRTIO_F_RING_PACKED,
50     VIRTIO_F_RING_RESET,
51     VIRTIO_F_NOTIFICATION_DATA,
52     VIRTIO_NET_F_HASH_REPORT,
53     VHOST_INVALID_FEATURE_BIT
54 };
55 
56 /* Features supported by others. */
57 static const int user_feature_bits[] = {
58     VIRTIO_F_NOTIFY_ON_EMPTY,
59     VIRTIO_F_NOTIFICATION_DATA,
60     VIRTIO_RING_F_INDIRECT_DESC,
61     VIRTIO_RING_F_EVENT_IDX,
62 
63     VIRTIO_F_ANY_LAYOUT,
64     VIRTIO_F_VERSION_1,
65     VIRTIO_NET_F_CSUM,
66     VIRTIO_NET_F_GUEST_CSUM,
67     VIRTIO_NET_F_GSO,
68     VIRTIO_NET_F_GUEST_TSO4,
69     VIRTIO_NET_F_GUEST_TSO6,
70     VIRTIO_NET_F_GUEST_ECN,
71     VIRTIO_NET_F_GUEST_UFO,
72     VIRTIO_NET_F_HOST_TSO4,
73     VIRTIO_NET_F_HOST_TSO6,
74     VIRTIO_NET_F_HOST_ECN,
75     VIRTIO_NET_F_HOST_UFO,
76     VIRTIO_NET_F_MRG_RXBUF,
77     VIRTIO_NET_F_MTU,
78     VIRTIO_F_IOMMU_PLATFORM,
79     VIRTIO_F_RING_PACKED,
80     VIRTIO_F_RING_RESET,
81     VIRTIO_NET_F_RSS,
82     VIRTIO_NET_F_HASH_REPORT,
83     VIRTIO_NET_F_GUEST_USO4,
84     VIRTIO_NET_F_GUEST_USO6,
85     VIRTIO_NET_F_HOST_USO,
86 
87     /* This bit implies RARP isn't sent by QEMU out of band */
88     VIRTIO_NET_F_GUEST_ANNOUNCE,
89 
90     VIRTIO_NET_F_MQ,
91 
92     VHOST_INVALID_FEATURE_BIT
93 };
94 
95 static const int *vhost_net_get_feature_bits(struct vhost_net *net)
96 {
97     const int *feature_bits = 0;
98 
99     switch (net->nc->info->type) {
100     case NET_CLIENT_DRIVER_TAP:
101         feature_bits = kernel_feature_bits;
102         break;
103     case NET_CLIENT_DRIVER_VHOST_USER:
104         feature_bits = user_feature_bits;
105         break;
106 #ifdef CONFIG_VHOST_NET_VDPA
107     case NET_CLIENT_DRIVER_VHOST_VDPA:
108         feature_bits = vdpa_feature_bits;
109         break;
110 #endif
111     default:
112         error_report("Feature bits not defined for this type: %d",
113                 net->nc->info->type);
114         break;
115     }
116 
117     return feature_bits;
118 }
119 
120 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
121 {
122     return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
123             features);
124 }
125 int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
126                          uint32_t config_len)
127 {
128     return vhost_dev_get_config(&net->dev, config, config_len, NULL);
129 }
130 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
131                          uint32_t offset, uint32_t size, uint32_t flags)
132 {
133     return vhost_dev_set_config(&net->dev, data, offset, size, flags);
134 }
135 
136 void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
137 {
138     net->dev.acked_features = net->dev.backend_features;
139     vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features);
140 }
141 
142 uint64_t vhost_net_get_max_queues(VHostNetState *net)
143 {
144     return net->dev.max_queues;
145 }
146 
147 uint64_t vhost_net_get_acked_features(VHostNetState *net)
148 {
149     return net->dev.acked_features;
150 }
151 
152 void vhost_net_save_acked_features(NetClientState *nc)
153 {
154 #ifdef CONFIG_VHOST_NET_USER
155     if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
156         vhost_user_save_acked_features(nc);
157     }
158 #endif
159 }
160 
161 static int vhost_net_get_fd(NetClientState *backend)
162 {
163     switch (backend->info->type) {
164     case NET_CLIENT_DRIVER_TAP:
165         return tap_get_fd(backend);
166     default:
167         fprintf(stderr, "vhost-net requires tap backend\n");
168         return -ENOSYS;
169     }
170 }
171 
172 struct vhost_net *vhost_net_init(VhostNetOptions *options)
173 {
174     int r;
175     bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
176     struct vhost_net *net = g_new0(struct vhost_net, 1);
177     uint64_t features = 0;
178     Error *local_err = NULL;
179 
180     if (!options->net_backend) {
181         fprintf(stderr, "vhost-net requires net backend to be setup\n");
182         goto fail;
183     }
184     net->nc = options->net_backend;
185     net->dev.nvqs = options->nvqs;
186 
187     net->dev.max_queues = 1;
188     net->dev.vqs = net->vqs;
189 
190     if (backend_kernel) {
191         r = vhost_net_get_fd(options->net_backend);
192         if (r < 0) {
193             goto fail;
194         }
195         net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
196             ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR);
197         net->backend = r;
198         net->dev.protocol_features = 0;
199     } else {
200         net->dev.backend_features = 0;
201         net->dev.protocol_features = 0;
202         net->backend = -1;
203 
204         /* vhost-user needs vq_index to initiate a specific queue pair */
205         net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
206     }
207 
208     r = vhost_dev_init(&net->dev, options->opaque,
209                        options->backend_type, options->busyloop_timeout,
210                        &local_err);
211     if (r < 0) {
212         error_report_err(local_err);
213         goto fail;
214     }
215     if (backend_kernel) {
216         if (!qemu_has_vnet_hdr_len(options->net_backend,
217                                sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
218             net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
219         }
220         if (~net->dev.features & net->dev.backend_features) {
221             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
222                    " for backend\n",
223                    (uint64_t)(~net->dev.features & net->dev.backend_features));
224             goto fail;
225         }
226     }
227 
228     /* Set sane init value. Override when guest acks. */
229 #ifdef CONFIG_VHOST_NET_USER
230     if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
231         features = vhost_user_get_acked_features(net->nc);
232         if (~net->dev.features & features) {
233             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
234                     " for backend\n",
235                     (uint64_t)(~net->dev.features & features));
236             goto fail;
237         }
238     }
239 #endif
240 
241     vhost_net_ack_features(net, features);
242 
243     return net;
244 
245 fail:
246     vhost_dev_cleanup(&net->dev);
247     g_free(net);
248     return NULL;
249 }
250 
251 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index,
252                                    int vq_index_end)
253 {
254     net->dev.vq_index = vq_index;
255     net->dev.vq_index_end = vq_index_end;
256 }
257 
258 static int vhost_net_start_one(struct vhost_net *net,
259                                VirtIODevice *dev)
260 {
261     struct vhost_vring_file file = { };
262     int r;
263 
264     if (net->nc->info->start) {
265         r = net->nc->info->start(net->nc);
266         if (r < 0) {
267             return r;
268         }
269     }
270 
271     r = vhost_dev_enable_notifiers(&net->dev, dev);
272     if (r < 0) {
273         goto fail_notifiers;
274     }
275 
276     r = vhost_dev_start(&net->dev, dev, false);
277     if (r < 0) {
278         goto fail_start;
279     }
280 
281     if (net->nc->info->poll) {
282         net->nc->info->poll(net->nc, false);
283     }
284 
285     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
286         qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
287         file.fd = net->backend;
288         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
289             if (!virtio_queue_enabled(dev, net->dev.vq_index +
290                                       file.index)) {
291                 /* Queue might not be ready for start */
292                 continue;
293             }
294             r = vhost_net_set_backend(&net->dev, &file);
295             if (r < 0) {
296                 r = -errno;
297                 goto fail;
298             }
299         }
300     }
301 
302     if (net->nc->info->load) {
303         r = net->nc->info->load(net->nc);
304         if (r < 0) {
305             goto fail;
306         }
307     }
308     return 0;
309 fail:
310     file.fd = -1;
311     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
312         while (file.index-- > 0) {
313             if (!virtio_queue_enabled(dev, net->dev.vq_index +
314                                       file.index)) {
315                 /* Queue might not be ready for start */
316                 continue;
317             }
318             int ret = vhost_net_set_backend(&net->dev, &file);
319             assert(ret >= 0);
320         }
321     }
322     if (net->nc->info->poll) {
323         net->nc->info->poll(net->nc, true);
324     }
325     vhost_dev_stop(&net->dev, dev, false);
326 fail_start:
327     vhost_dev_disable_notifiers(&net->dev, dev);
328 fail_notifiers:
329     return r;
330 }
331 
332 static void vhost_net_stop_one(struct vhost_net *net,
333                                VirtIODevice *dev)
334 {
335     struct vhost_vring_file file = { .fd = -1 };
336 
337     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
338         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
339             int r = vhost_net_set_backend(&net->dev, &file);
340             assert(r >= 0);
341         }
342     }
343     if (net->nc->info->poll) {
344         net->nc->info->poll(net->nc, true);
345     }
346     vhost_dev_stop(&net->dev, dev, false);
347     if (net->nc->info->stop) {
348         net->nc->info->stop(net->nc);
349     }
350     vhost_dev_disable_notifiers(&net->dev, dev);
351 }
352 
353 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
354                     int data_queue_pairs, int cvq)
355 {
356     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
357     VirtioBusState *vbus = VIRTIO_BUS(qbus);
358     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
359     int total_notifiers = data_queue_pairs * 2 + cvq;
360     VirtIONet *n = VIRTIO_NET(dev);
361     int nvhosts = data_queue_pairs + cvq;
362     struct vhost_net *net;
363     int r, e, i, index_end = data_queue_pairs * 2;
364     NetClientState *peer;
365 
366     if (cvq) {
367         index_end += 1;
368     }
369 
370     if (!k->set_guest_notifiers) {
371         error_report("binding does not support guest notifiers");
372         return -ENOSYS;
373     }
374 
375     for (i = 0; i < nvhosts; i++) {
376 
377         if (i < data_queue_pairs) {
378             peer = qemu_get_peer(ncs, i);
379         } else { /* Control Virtqueue */
380             peer = qemu_get_peer(ncs, n->max_queue_pairs);
381         }
382 
383         net = get_vhost_net(peer);
384         vhost_net_set_vq_index(net, i * 2, index_end);
385 
386         /* Suppress the masking guest notifiers on vhost user
387          * because vhost user doesn't interrupt masking/unmasking
388          * properly.
389          */
390         if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
391             dev->use_guest_notifier_mask = false;
392         }
393      }
394 
395     r = k->set_guest_notifiers(qbus->parent, total_notifiers, true);
396     if (r < 0) {
397         error_report("Error binding guest notifier: %d", -r);
398         goto err;
399     }
400 
401     for (i = 0; i < nvhosts; i++) {
402         if (i < data_queue_pairs) {
403             peer = qemu_get_peer(ncs, i);
404         } else {
405             peer = qemu_get_peer(ncs, n->max_queue_pairs);
406         }
407 
408         if (peer->vring_enable) {
409             /* restore vring enable state */
410             r = vhost_set_vring_enable(peer, peer->vring_enable);
411 
412             if (r < 0) {
413                 goto err_start;
414             }
415         }
416 
417         r = vhost_net_start_one(get_vhost_net(peer), dev);
418         if (r < 0) {
419             goto err_start;
420         }
421     }
422 
423     return 0;
424 
425 err_start:
426     while (--i >= 0) {
427         peer = qemu_get_peer(ncs, i < data_queue_pairs ?
428                                   i : n->max_queue_pairs);
429         vhost_net_stop_one(get_vhost_net(peer), dev);
430     }
431     e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
432     if (e < 0) {
433         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
434         fflush(stderr);
435     }
436 err:
437     return r;
438 }
439 
440 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
441                     int data_queue_pairs, int cvq)
442 {
443     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
444     VirtioBusState *vbus = VIRTIO_BUS(qbus);
445     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
446     VirtIONet *n = VIRTIO_NET(dev);
447     NetClientState *peer;
448     int total_notifiers = data_queue_pairs * 2 + cvq;
449     int nvhosts = data_queue_pairs + cvq;
450     int i, r;
451 
452     for (i = 0; i < nvhosts; i++) {
453         if (i < data_queue_pairs) {
454             peer = qemu_get_peer(ncs, i);
455         } else {
456             peer = qemu_get_peer(ncs, n->max_queue_pairs);
457         }
458         vhost_net_stop_one(get_vhost_net(peer), dev);
459     }
460 
461     r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
462     if (r < 0) {
463         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
464         fflush(stderr);
465     }
466     assert(r >= 0);
467 }
468 
469 void vhost_net_cleanup(struct vhost_net *net)
470 {
471     vhost_dev_cleanup(&net->dev);
472 }
473 
474 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
475 {
476     const VhostOps *vhost_ops = net->dev.vhost_ops;
477 
478     assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
479     assert(vhost_ops->vhost_migration_done);
480 
481     return vhost_ops->vhost_migration_done(&net->dev, mac_addr);
482 }
483 
484 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
485 {
486     return vhost_virtqueue_pending(&net->dev, idx);
487 }
488 
489 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
490                               int idx, bool mask)
491 {
492     vhost_virtqueue_mask(&net->dev, dev, idx, mask);
493 }
494 
495 bool vhost_net_config_pending(VHostNetState *net)
496 {
497     return vhost_config_pending(&net->dev);
498 }
499 
500 void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask)
501 {
502     vhost_config_mask(&net->dev, dev, mask);
503 }
504 VHostNetState *get_vhost_net(NetClientState *nc)
505 {
506     VHostNetState *vhost_net = 0;
507 
508     if (!nc) {
509         return 0;
510     }
511 
512     switch (nc->info->type) {
513     case NET_CLIENT_DRIVER_TAP:
514         vhost_net = tap_get_vhost_net(nc);
515         /*
516          * tap_get_vhost_net() can return NULL if a tap net-device backend is
517          * created with 'vhost=off' option, 'vhostforce=off' or no vhost or
518          * vhostforce or vhostfd options at all. Please see net_init_tap_one().
519          * Hence, we omit the assertion here.
520          */
521         break;
522 #ifdef CONFIG_VHOST_NET_USER
523     case NET_CLIENT_DRIVER_VHOST_USER:
524         vhost_net = vhost_user_get_vhost_net(nc);
525         assert(vhost_net);
526         break;
527 #endif
528 #ifdef CONFIG_VHOST_NET_VDPA
529     case NET_CLIENT_DRIVER_VHOST_VDPA:
530         vhost_net = vhost_vdpa_get_vhost_net(nc);
531         assert(vhost_net);
532         break;
533 #endif
534     default:
535         break;
536     }
537 
538     return vhost_net;
539 }
540 
541 int vhost_set_vring_enable(NetClientState *nc, int enable)
542 {
543     VHostNetState *net = get_vhost_net(nc);
544     const VhostOps *vhost_ops = net->dev.vhost_ops;
545 
546     /*
547      * vhost-vdpa network devices need to enable dataplane virtqueues after
548      * DRIVER_OK, so they can recover device state before starting dataplane.
549      * Because of that, we don't enable virtqueues here and leave it to
550      * net/vhost-vdpa.c.
551      */
552     if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
553         return 0;
554     }
555 
556     nc->vring_enable = enable;
557 
558     if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
559         return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
560     }
561 
562     return 0;
563 }
564 
565 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
566 {
567     const VhostOps *vhost_ops = net->dev.vhost_ops;
568 
569     if (!vhost_ops->vhost_net_set_mtu) {
570         return 0;
571     }
572 
573     return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
574 }
575 
576 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc,
577                                int vq_index)
578 {
579     VHostNetState *net = get_vhost_net(nc->peer);
580     const VhostOps *vhost_ops = net->dev.vhost_ops;
581     struct vhost_vring_file file = { .fd = -1 };
582     int idx;
583 
584     /* should only be called after backend is connected */
585     assert(vhost_ops);
586 
587     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
588 
589     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
590         file.index = idx;
591         int r = vhost_net_set_backend(&net->dev, &file);
592         assert(r >= 0);
593     }
594 
595     vhost_virtqueue_stop(&net->dev,
596                          vdev,
597                          net->dev.vqs + idx,
598                          net->dev.vq_index + idx);
599 }
600 
601 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
602                                 int vq_index)
603 {
604     VHostNetState *net = get_vhost_net(nc->peer);
605     const VhostOps *vhost_ops = net->dev.vhost_ops;
606     struct vhost_vring_file file = { };
607     int idx, r;
608 
609     if (!net->dev.started) {
610         return -EBUSY;
611     }
612 
613     /* should only be called after backend is connected */
614     assert(vhost_ops);
615 
616     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
617 
618     r = vhost_virtqueue_start(&net->dev,
619                               vdev,
620                               net->dev.vqs + idx,
621                               net->dev.vq_index + idx);
622     if (r < 0) {
623         goto err_start;
624     }
625 
626     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
627         file.index = idx;
628         file.fd = net->backend;
629         r = vhost_net_set_backend(&net->dev, &file);
630         if (r < 0) {
631             r = -errno;
632             goto err_start;
633         }
634     }
635 
636     return 0;
637 
638 err_start:
639     error_report("Error when restarting the queue.");
640 
641     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
642         file.fd = VHOST_FILE_UNBIND;
643         file.index = idx;
644         int ret = vhost_net_set_backend(&net->dev, &file);
645         assert(ret >= 0);
646     }
647 
648     vhost_dev_stop(&net->dev, vdev, false);
649 
650     return r;
651 }
652