xref: /openbmc/qemu/hw/net/vhost_net.c (revision 05caa062)
1 /*
2  * vhost-net support
3  *
4  * Copyright Red Hat, Inc. 2010
5  *
6  * Authors:
7  *  Michael S. Tsirkin <mst@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "net/net.h"
18 #include "net/tap.h"
19 #include "net/vhost-user.h"
20 #include "net/vhost-vdpa.h"
21 
22 #include "standard-headers/linux/vhost_types.h"
23 #include "hw/virtio/virtio-net.h"
24 #include "net/vhost_net.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "qemu/main-loop.h"
28 
29 #include <sys/socket.h>
30 #include <net/if.h>
31 #include <netinet/in.h>
32 
33 
34 #include "standard-headers/linux/virtio_ring.h"
35 #include "hw/virtio/vhost.h"
36 #include "hw/virtio/virtio-bus.h"
37 #include "linux-headers/linux/vhost.h"
38 
39 
40 /* Features supported by host kernel. */
41 static const int kernel_feature_bits[] = {
42     VIRTIO_F_NOTIFY_ON_EMPTY,
43     VIRTIO_RING_F_INDIRECT_DESC,
44     VIRTIO_RING_F_EVENT_IDX,
45     VIRTIO_NET_F_MRG_RXBUF,
46     VIRTIO_F_VERSION_1,
47     VIRTIO_NET_F_MTU,
48     VIRTIO_F_IOMMU_PLATFORM,
49     VIRTIO_F_RING_PACKED,
50     VIRTIO_F_RING_RESET,
51     VIRTIO_F_IN_ORDER,
52     VIRTIO_F_NOTIFICATION_DATA,
53     VIRTIO_NET_F_RSC_EXT,
54     VIRTIO_NET_F_HASH_REPORT,
55     VHOST_INVALID_FEATURE_BIT
56 };
57 
58 /* Features supported by others. */
59 static const int user_feature_bits[] = {
60     VIRTIO_F_NOTIFY_ON_EMPTY,
61     VIRTIO_F_NOTIFICATION_DATA,
62     VIRTIO_RING_F_INDIRECT_DESC,
63     VIRTIO_RING_F_EVENT_IDX,
64 
65     VIRTIO_F_ANY_LAYOUT,
66     VIRTIO_F_VERSION_1,
67     VIRTIO_NET_F_CSUM,
68     VIRTIO_NET_F_GUEST_CSUM,
69     VIRTIO_NET_F_GSO,
70     VIRTIO_NET_F_GUEST_TSO4,
71     VIRTIO_NET_F_GUEST_TSO6,
72     VIRTIO_NET_F_GUEST_ECN,
73     VIRTIO_NET_F_GUEST_UFO,
74     VIRTIO_NET_F_HOST_TSO4,
75     VIRTIO_NET_F_HOST_TSO6,
76     VIRTIO_NET_F_HOST_ECN,
77     VIRTIO_NET_F_HOST_UFO,
78     VIRTIO_NET_F_MRG_RXBUF,
79     VIRTIO_NET_F_MTU,
80     VIRTIO_F_IOMMU_PLATFORM,
81     VIRTIO_F_RING_PACKED,
82     VIRTIO_F_RING_RESET,
83     VIRTIO_F_IN_ORDER,
84     VIRTIO_NET_F_RSS,
85     VIRTIO_NET_F_RSC_EXT,
86     VIRTIO_NET_F_HASH_REPORT,
87     VIRTIO_NET_F_GUEST_USO4,
88     VIRTIO_NET_F_GUEST_USO6,
89     VIRTIO_NET_F_HOST_USO,
90 
91     /* This bit implies RARP isn't sent by QEMU out of band */
92     VIRTIO_NET_F_GUEST_ANNOUNCE,
93 
94     VIRTIO_NET_F_MQ,
95 
96     VHOST_INVALID_FEATURE_BIT
97 };
98 
99 static const int *vhost_net_get_feature_bits(struct vhost_net *net)
100 {
101     const int *feature_bits = 0;
102 
103     switch (net->nc->info->type) {
104     case NET_CLIENT_DRIVER_TAP:
105         feature_bits = kernel_feature_bits;
106         break;
107     case NET_CLIENT_DRIVER_VHOST_USER:
108         feature_bits = user_feature_bits;
109         break;
110 #ifdef CONFIG_VHOST_NET_VDPA
111     case NET_CLIENT_DRIVER_VHOST_VDPA:
112         feature_bits = vdpa_feature_bits;
113         break;
114 #endif
115     default:
116         error_report("Feature bits not defined for this type: %d",
117                 net->nc->info->type);
118         break;
119     }
120 
121     return feature_bits;
122 }
123 
124 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
125 {
126     return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
127             features);
128 }
129 int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
130                          uint32_t config_len)
131 {
132     return vhost_dev_get_config(&net->dev, config, config_len, NULL);
133 }
134 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
135                          uint32_t offset, uint32_t size, uint32_t flags)
136 {
137     return vhost_dev_set_config(&net->dev, data, offset, size, flags);
138 }
139 
140 void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
141 {
142     net->dev.acked_features = net->dev.backend_features;
143     vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features);
144 }
145 
146 uint64_t vhost_net_get_max_queues(VHostNetState *net)
147 {
148     return net->dev.max_queues;
149 }
150 
151 uint64_t vhost_net_get_acked_features(VHostNetState *net)
152 {
153     return net->dev.acked_features;
154 }
155 
156 void vhost_net_save_acked_features(NetClientState *nc)
157 {
158 #ifdef CONFIG_VHOST_NET_USER
159     if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
160         vhost_user_save_acked_features(nc);
161     }
162 #endif
163 }
164 
165 static int vhost_net_get_fd(NetClientState *backend)
166 {
167     switch (backend->info->type) {
168     case NET_CLIENT_DRIVER_TAP:
169         return tap_get_fd(backend);
170     default:
171         fprintf(stderr, "vhost-net requires tap backend\n");
172         return -ENOSYS;
173     }
174 }
175 
176 struct vhost_net *vhost_net_init(VhostNetOptions *options)
177 {
178     int r;
179     bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
180     struct vhost_net *net = g_new0(struct vhost_net, 1);
181     uint64_t features = 0;
182     Error *local_err = NULL;
183 
184     if (!options->net_backend) {
185         fprintf(stderr, "vhost-net requires net backend to be setup\n");
186         goto fail;
187     }
188     net->nc = options->net_backend;
189     net->dev.nvqs = options->nvqs;
190 
191     net->dev.max_queues = 1;
192     net->dev.vqs = net->vqs;
193 
194     if (backend_kernel) {
195         r = vhost_net_get_fd(options->net_backend);
196         if (r < 0) {
197             goto fail;
198         }
199         net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
200             ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR);
201         net->backend = r;
202         net->dev.protocol_features = 0;
203     } else {
204         net->dev.backend_features = 0;
205         net->dev.protocol_features = 0;
206         net->backend = -1;
207 
208         /* vhost-user needs vq_index to initiate a specific queue pair */
209         net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
210     }
211 
212     r = vhost_dev_init(&net->dev, options->opaque,
213                        options->backend_type, options->busyloop_timeout,
214                        &local_err);
215     if (r < 0) {
216         error_report_err(local_err);
217         goto fail;
218     }
219     if (backend_kernel) {
220         if (!qemu_has_vnet_hdr_len(options->net_backend,
221                                sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
222             net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
223         }
224         if (~net->dev.features & net->dev.backend_features) {
225             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
226                    " for backend\n",
227                    (uint64_t)(~net->dev.features & net->dev.backend_features));
228             goto fail;
229         }
230     }
231 
232     /* Set sane init value. Override when guest acks. */
233 #ifdef CONFIG_VHOST_NET_USER
234     if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
235         features = vhost_user_get_acked_features(net->nc);
236         if (~net->dev.features & features) {
237             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
238                     " for backend\n",
239                     (uint64_t)(~net->dev.features & features));
240             goto fail;
241         }
242     }
243 #endif
244 
245     vhost_net_ack_features(net, features);
246 
247     return net;
248 
249 fail:
250     vhost_dev_cleanup(&net->dev);
251     g_free(net);
252     return NULL;
253 }
254 
255 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index,
256                                    int vq_index_end)
257 {
258     net->dev.vq_index = vq_index;
259     net->dev.vq_index_end = vq_index_end;
260 }
261 
262 static int vhost_net_start_one(struct vhost_net *net,
263                                VirtIODevice *dev)
264 {
265     struct vhost_vring_file file = { };
266     int r;
267 
268     if (net->nc->info->start) {
269         r = net->nc->info->start(net->nc);
270         if (r < 0) {
271             return r;
272         }
273     }
274 
275     r = vhost_dev_enable_notifiers(&net->dev, dev);
276     if (r < 0) {
277         goto fail_notifiers;
278     }
279 
280     r = vhost_dev_start(&net->dev, dev, false);
281     if (r < 0) {
282         goto fail_start;
283     }
284 
285     if (net->nc->info->poll) {
286         net->nc->info->poll(net->nc, false);
287     }
288 
289     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
290         qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
291         file.fd = net->backend;
292         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
293             if (!virtio_queue_enabled(dev, net->dev.vq_index +
294                                       file.index)) {
295                 /* Queue might not be ready for start */
296                 continue;
297             }
298             r = vhost_net_set_backend(&net->dev, &file);
299             if (r < 0) {
300                 r = -errno;
301                 goto fail;
302             }
303         }
304     }
305 
306     if (net->nc->info->load) {
307         r = net->nc->info->load(net->nc);
308         if (r < 0) {
309             goto fail;
310         }
311     }
312     return 0;
313 fail:
314     file.fd = -1;
315     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
316         while (file.index-- > 0) {
317             if (!virtio_queue_enabled(dev, net->dev.vq_index +
318                                       file.index)) {
319                 /* Queue might not be ready for start */
320                 continue;
321             }
322             int ret = vhost_net_set_backend(&net->dev, &file);
323             assert(ret >= 0);
324         }
325     }
326     if (net->nc->info->poll) {
327         net->nc->info->poll(net->nc, true);
328     }
329     vhost_dev_stop(&net->dev, dev, false);
330 fail_start:
331     vhost_dev_disable_notifiers(&net->dev, dev);
332 fail_notifiers:
333     return r;
334 }
335 
336 static void vhost_net_stop_one(struct vhost_net *net,
337                                VirtIODevice *dev)
338 {
339     struct vhost_vring_file file = { .fd = -1 };
340 
341     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
342         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
343             int r = vhost_net_set_backend(&net->dev, &file);
344             assert(r >= 0);
345         }
346     }
347     if (net->nc->info->poll) {
348         net->nc->info->poll(net->nc, true);
349     }
350     vhost_dev_stop(&net->dev, dev, false);
351     if (net->nc->info->stop) {
352         net->nc->info->stop(net->nc);
353     }
354     vhost_dev_disable_notifiers(&net->dev, dev);
355 }
356 
357 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
358                     int data_queue_pairs, int cvq)
359 {
360     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
361     VirtioBusState *vbus = VIRTIO_BUS(qbus);
362     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
363     int total_notifiers = data_queue_pairs * 2 + cvq;
364     VirtIONet *n = VIRTIO_NET(dev);
365     int nvhosts = data_queue_pairs + cvq;
366     struct vhost_net *net;
367     int r, e, i, index_end = data_queue_pairs * 2;
368     NetClientState *peer;
369 
370     if (cvq) {
371         index_end += 1;
372     }
373 
374     if (!k->set_guest_notifiers) {
375         error_report("binding does not support guest notifiers");
376         return -ENOSYS;
377     }
378 
379     for (i = 0; i < nvhosts; i++) {
380 
381         if (i < data_queue_pairs) {
382             peer = qemu_get_peer(ncs, i);
383         } else { /* Control Virtqueue */
384             peer = qemu_get_peer(ncs, n->max_queue_pairs);
385         }
386 
387         net = get_vhost_net(peer);
388         vhost_net_set_vq_index(net, i * 2, index_end);
389 
390         /* Suppress the masking guest notifiers on vhost user
391          * because vhost user doesn't interrupt masking/unmasking
392          * properly.
393          */
394         if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
395             dev->use_guest_notifier_mask = false;
396         }
397      }
398 
399     r = k->set_guest_notifiers(qbus->parent, total_notifiers, true);
400     if (r < 0) {
401         error_report("Error binding guest notifier: %d", -r);
402         goto err;
403     }
404 
405     for (i = 0; i < nvhosts; i++) {
406         if (i < data_queue_pairs) {
407             peer = qemu_get_peer(ncs, i);
408         } else {
409             peer = qemu_get_peer(ncs, n->max_queue_pairs);
410         }
411 
412         if (peer->vring_enable) {
413             /* restore vring enable state */
414             r = vhost_set_vring_enable(peer, peer->vring_enable);
415 
416             if (r < 0) {
417                 goto err_start;
418             }
419         }
420 
421         r = vhost_net_start_one(get_vhost_net(peer), dev);
422         if (r < 0) {
423             goto err_start;
424         }
425     }
426 
427     return 0;
428 
429 err_start:
430     while (--i >= 0) {
431         peer = qemu_get_peer(ncs, i < data_queue_pairs ?
432                                   i : n->max_queue_pairs);
433         vhost_net_stop_one(get_vhost_net(peer), dev);
434     }
435     e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
436     if (e < 0) {
437         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
438         fflush(stderr);
439     }
440 err:
441     return r;
442 }
443 
444 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
445                     int data_queue_pairs, int cvq)
446 {
447     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
448     VirtioBusState *vbus = VIRTIO_BUS(qbus);
449     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
450     VirtIONet *n = VIRTIO_NET(dev);
451     NetClientState *peer;
452     int total_notifiers = data_queue_pairs * 2 + cvq;
453     int nvhosts = data_queue_pairs + cvq;
454     int i, r;
455 
456     for (i = 0; i < nvhosts; i++) {
457         if (i < data_queue_pairs) {
458             peer = qemu_get_peer(ncs, i);
459         } else {
460             peer = qemu_get_peer(ncs, n->max_queue_pairs);
461         }
462         vhost_net_stop_one(get_vhost_net(peer), dev);
463     }
464 
465     r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
466     if (r < 0) {
467         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
468         fflush(stderr);
469     }
470     assert(r >= 0);
471 }
472 
473 void vhost_net_cleanup(struct vhost_net *net)
474 {
475     vhost_dev_cleanup(&net->dev);
476 }
477 
478 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
479 {
480     const VhostOps *vhost_ops = net->dev.vhost_ops;
481 
482     assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
483     assert(vhost_ops->vhost_migration_done);
484 
485     return vhost_ops->vhost_migration_done(&net->dev, mac_addr);
486 }
487 
488 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
489 {
490     return vhost_virtqueue_pending(&net->dev, idx);
491 }
492 
493 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
494                               int idx, bool mask)
495 {
496     vhost_virtqueue_mask(&net->dev, dev, idx, mask);
497 }
498 
499 bool vhost_net_config_pending(VHostNetState *net)
500 {
501     return vhost_config_pending(&net->dev);
502 }
503 
504 void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask)
505 {
506     vhost_config_mask(&net->dev, dev, mask);
507 }
508 VHostNetState *get_vhost_net(NetClientState *nc)
509 {
510     VHostNetState *vhost_net = 0;
511 
512     if (!nc) {
513         return 0;
514     }
515 
516     switch (nc->info->type) {
517     case NET_CLIENT_DRIVER_TAP:
518         vhost_net = tap_get_vhost_net(nc);
519         /*
520          * tap_get_vhost_net() can return NULL if a tap net-device backend is
521          * created with 'vhost=off' option, 'vhostforce=off' or no vhost or
522          * vhostforce or vhostfd options at all. Please see net_init_tap_one().
523          * Hence, we omit the assertion here.
524          */
525         break;
526 #ifdef CONFIG_VHOST_NET_USER
527     case NET_CLIENT_DRIVER_VHOST_USER:
528         vhost_net = vhost_user_get_vhost_net(nc);
529         assert(vhost_net);
530         break;
531 #endif
532 #ifdef CONFIG_VHOST_NET_VDPA
533     case NET_CLIENT_DRIVER_VHOST_VDPA:
534         vhost_net = vhost_vdpa_get_vhost_net(nc);
535         assert(vhost_net);
536         break;
537 #endif
538     default:
539         break;
540     }
541 
542     return vhost_net;
543 }
544 
545 int vhost_set_vring_enable(NetClientState *nc, int enable)
546 {
547     VHostNetState *net = get_vhost_net(nc);
548     const VhostOps *vhost_ops = net->dev.vhost_ops;
549 
550     /*
551      * vhost-vdpa network devices need to enable dataplane virtqueues after
552      * DRIVER_OK, so they can recover device state before starting dataplane.
553      * Because of that, we don't enable virtqueues here and leave it to
554      * net/vhost-vdpa.c.
555      */
556     if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
557         return 0;
558     }
559 
560     nc->vring_enable = enable;
561 
562     if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
563         return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
564     }
565 
566     return 0;
567 }
568 
569 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
570 {
571     const VhostOps *vhost_ops = net->dev.vhost_ops;
572 
573     if (!vhost_ops->vhost_net_set_mtu) {
574         return 0;
575     }
576 
577     return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
578 }
579 
580 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc,
581                                int vq_index)
582 {
583     VHostNetState *net = get_vhost_net(nc->peer);
584     const VhostOps *vhost_ops = net->dev.vhost_ops;
585     struct vhost_vring_file file = { .fd = -1 };
586     int idx;
587 
588     /* should only be called after backend is connected */
589     assert(vhost_ops);
590 
591     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
592 
593     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
594         file.index = idx;
595         int r = vhost_net_set_backend(&net->dev, &file);
596         assert(r >= 0);
597     }
598 
599     vhost_virtqueue_stop(&net->dev,
600                          vdev,
601                          net->dev.vqs + idx,
602                          net->dev.vq_index + idx);
603 }
604 
605 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
606                                 int vq_index)
607 {
608     VHostNetState *net = get_vhost_net(nc->peer);
609     const VhostOps *vhost_ops = net->dev.vhost_ops;
610     struct vhost_vring_file file = { };
611     int idx, r;
612 
613     if (!net->dev.started) {
614         return -EBUSY;
615     }
616 
617     /* should only be called after backend is connected */
618     assert(vhost_ops);
619 
620     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
621 
622     r = vhost_virtqueue_start(&net->dev,
623                               vdev,
624                               net->dev.vqs + idx,
625                               net->dev.vq_index + idx);
626     if (r < 0) {
627         goto err_start;
628     }
629 
630     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
631         file.index = idx;
632         file.fd = net->backend;
633         r = vhost_net_set_backend(&net->dev, &file);
634         if (r < 0) {
635             r = -errno;
636             goto err_start;
637         }
638     }
639 
640     return 0;
641 
642 err_start:
643     error_report("Error when restarting the queue.");
644 
645     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
646         file.fd = VHOST_FILE_UNBIND;
647         file.index = idx;
648         int ret = vhost_net_set_backend(&net->dev, &file);
649         assert(ret >= 0);
650     }
651 
652     vhost_dev_stop(&net->dev, vdev, false);
653 
654     return r;
655 }
656