xref: /openbmc/qemu/hw/net/vhost_net.c (revision 9884abee)
1 /*
2  * vhost-net support
3  *
4  * Copyright Red Hat, Inc. 2010
5  *
6  * Authors:
7  *  Michael S. Tsirkin <mst@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "net/net.h"
18 #include "net/tap.h"
19 #include "net/vhost-user.h"
20 
21 #include "hw/virtio/virtio-net.h"
22 #include "net/vhost_net.h"
23 #include "qemu/error-report.h"
24 
25 
26 #ifdef CONFIG_VHOST_NET
27 #include <linux/vhost.h>
28 #include <sys/socket.h>
29 #include <linux/kvm.h>
30 #include <netpacket/packet.h>
31 #include <net/ethernet.h>
32 #include <net/if.h>
33 #include <netinet/in.h>
34 
35 
36 #include "standard-headers/linux/virtio_ring.h"
37 #include "hw/virtio/vhost.h"
38 #include "hw/virtio/virtio-bus.h"
39 #include "hw/virtio/virtio-access.h"
40 
41 struct vhost_net {
42     struct vhost_dev dev;
43     struct vhost_virtqueue vqs[2];
44     int backend;
45     NetClientState *nc;
46 };
47 
48 /* Features supported by host kernel. */
49 static const int kernel_feature_bits[] = {
50     VIRTIO_F_NOTIFY_ON_EMPTY,
51     VIRTIO_RING_F_INDIRECT_DESC,
52     VIRTIO_RING_F_EVENT_IDX,
53     VIRTIO_NET_F_MRG_RXBUF,
54     VIRTIO_F_VERSION_1,
55     VHOST_INVALID_FEATURE_BIT
56 };
57 
58 /* Features supported by others. */
59 static const int user_feature_bits[] = {
60     VIRTIO_F_NOTIFY_ON_EMPTY,
61     VIRTIO_RING_F_INDIRECT_DESC,
62     VIRTIO_RING_F_EVENT_IDX,
63 
64     VIRTIO_F_ANY_LAYOUT,
65     VIRTIO_F_VERSION_1,
66     VIRTIO_NET_F_CSUM,
67     VIRTIO_NET_F_GUEST_CSUM,
68     VIRTIO_NET_F_GSO,
69     VIRTIO_NET_F_GUEST_TSO4,
70     VIRTIO_NET_F_GUEST_TSO6,
71     VIRTIO_NET_F_GUEST_ECN,
72     VIRTIO_NET_F_GUEST_UFO,
73     VIRTIO_NET_F_HOST_TSO4,
74     VIRTIO_NET_F_HOST_TSO6,
75     VIRTIO_NET_F_HOST_ECN,
76     VIRTIO_NET_F_HOST_UFO,
77     VIRTIO_NET_F_MRG_RXBUF,
78 
79     /* This bit implies RARP isn't sent by QEMU out of band */
80     VIRTIO_NET_F_GUEST_ANNOUNCE,
81 
82     VIRTIO_NET_F_MQ,
83 
84     VHOST_INVALID_FEATURE_BIT
85 };
86 
87 static const int *vhost_net_get_feature_bits(struct vhost_net *net)
88 {
89     const int *feature_bits = 0;
90 
91     switch (net->nc->info->type) {
92     case NET_CLIENT_OPTIONS_KIND_TAP:
93         feature_bits = kernel_feature_bits;
94         break;
95     case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
96         feature_bits = user_feature_bits;
97         break;
98     default:
99         error_report("Feature bits not defined for this type: %d",
100                 net->nc->info->type);
101         break;
102     }
103 
104     return feature_bits;
105 }
106 
107 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
108 {
109     return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
110             features);
111 }
112 
113 void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
114 {
115     net->dev.acked_features = net->dev.backend_features;
116     vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features);
117 }
118 
119 uint64_t vhost_net_get_max_queues(VHostNetState *net)
120 {
121     return net->dev.max_queues;
122 }
123 
124 static int vhost_net_get_fd(NetClientState *backend)
125 {
126     switch (backend->info->type) {
127     case NET_CLIENT_OPTIONS_KIND_TAP:
128         return tap_get_fd(backend);
129     default:
130         fprintf(stderr, "vhost-net requires tap backend\n");
131         return -EBADFD;
132     }
133 }
134 
135 struct vhost_net *vhost_net_init(VhostNetOptions *options)
136 {
137     int r;
138     bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
139     struct vhost_net *net = g_malloc(sizeof *net);
140 
141     if (!options->net_backend) {
142         fprintf(stderr, "vhost-net requires net backend to be setup\n");
143         goto fail;
144     }
145     net->nc = options->net_backend;
146 
147     net->dev.max_queues = 1;
148     net->dev.nvqs = 2;
149     net->dev.vqs = net->vqs;
150 
151     if (backend_kernel) {
152         r = vhost_net_get_fd(options->net_backend);
153         if (r < 0) {
154             goto fail;
155         }
156         net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
157             ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR);
158         net->backend = r;
159         net->dev.protocol_features = 0;
160     } else {
161         net->dev.backend_features = 0;
162         net->dev.protocol_features = 0;
163         net->backend = -1;
164 
165         /* vhost-user needs vq_index to initiate a specific queue pair */
166         net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
167     }
168 
169     r = vhost_dev_init(&net->dev, options->opaque,
170                        options->backend_type);
171     if (r < 0) {
172         goto fail;
173     }
174     if (backend_kernel) {
175         if (!qemu_has_vnet_hdr_len(options->net_backend,
176                                sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
177             net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
178         }
179         if (~net->dev.features & net->dev.backend_features) {
180             fprintf(stderr, "vhost lacks feature mask %" PRIu64
181                    " for backend\n",
182                    (uint64_t)(~net->dev.features & net->dev.backend_features));
183             vhost_dev_cleanup(&net->dev);
184             goto fail;
185         }
186     }
187     /* Set sane init value. Override when guest acks. */
188     vhost_net_ack_features(net, 0);
189     return net;
190 fail:
191     g_free(net);
192     return NULL;
193 }
194 
195 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index)
196 {
197     net->dev.vq_index = vq_index;
198 }
199 
200 static int vhost_net_set_vnet_endian(VirtIODevice *dev, NetClientState *peer,
201                                      bool set)
202 {
203     int r = 0;
204 
205     if (virtio_vdev_has_feature(dev, VIRTIO_F_VERSION_1) ||
206         (virtio_legacy_is_cross_endian(dev) && !virtio_is_big_endian(dev))) {
207         r = qemu_set_vnet_le(peer, set);
208         if (r) {
209             error_report("backend does not support LE vnet headers");
210         }
211     } else if (virtio_legacy_is_cross_endian(dev)) {
212         r = qemu_set_vnet_be(peer, set);
213         if (r) {
214             error_report("backend does not support BE vnet headers");
215         }
216     }
217 
218     return r;
219 }
220 
221 static int vhost_net_start_one(struct vhost_net *net,
222                                VirtIODevice *dev)
223 {
224     struct vhost_vring_file file = { };
225     int r;
226 
227     net->dev.nvqs = 2;
228     net->dev.vqs = net->vqs;
229 
230     r = vhost_dev_enable_notifiers(&net->dev, dev);
231     if (r < 0) {
232         goto fail_notifiers;
233     }
234 
235     r = vhost_dev_start(&net->dev, dev);
236     if (r < 0) {
237         goto fail_start;
238     }
239 
240     if (net->nc->info->poll) {
241         net->nc->info->poll(net->nc, false);
242     }
243 
244     if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
245         qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
246         file.fd = net->backend;
247         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
248             const VhostOps *vhost_ops = net->dev.vhost_ops;
249             r = vhost_ops->vhost_net_set_backend(&net->dev, &file);
250             if (r < 0) {
251                 r = -errno;
252                 goto fail;
253             }
254         }
255     }
256     return 0;
257 fail:
258     file.fd = -1;
259     if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
260         while (file.index-- > 0) {
261             const VhostOps *vhost_ops = net->dev.vhost_ops;
262             int r = vhost_ops->vhost_net_set_backend(&net->dev, &file);
263             assert(r >= 0);
264         }
265     }
266     if (net->nc->info->poll) {
267         net->nc->info->poll(net->nc, true);
268     }
269     vhost_dev_stop(&net->dev, dev);
270 fail_start:
271     vhost_dev_disable_notifiers(&net->dev, dev);
272 fail_notifiers:
273     return r;
274 }
275 
276 static void vhost_net_stop_one(struct vhost_net *net,
277                                VirtIODevice *dev)
278 {
279     struct vhost_vring_file file = { .fd = -1 };
280 
281     if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
282         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
283             const VhostOps *vhost_ops = net->dev.vhost_ops;
284             int r = vhost_ops->vhost_net_set_backend(&net->dev, &file);
285             assert(r >= 0);
286         }
287     }
288     if (net->nc->info->poll) {
289         net->nc->info->poll(net->nc, true);
290     }
291     vhost_dev_stop(&net->dev, dev);
292     vhost_dev_disable_notifiers(&net->dev, dev);
293 }
294 
295 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
296                     int total_queues)
297 {
298     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
299     VirtioBusState *vbus = VIRTIO_BUS(qbus);
300     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
301     int r, e, i;
302 
303     if (!k->set_guest_notifiers) {
304         error_report("binding does not support guest notifiers");
305         r = -ENOSYS;
306         goto err;
307     }
308 
309     r = vhost_net_set_vnet_endian(dev, ncs[0].peer, true);
310     if (r < 0) {
311         goto err;
312     }
313 
314     for (i = 0; i < total_queues; i++) {
315         vhost_net_set_vq_index(get_vhost_net(ncs[i].peer), i * 2);
316     }
317 
318     r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true);
319     if (r < 0) {
320         error_report("Error binding guest notifier: %d", -r);
321         goto err_endian;
322     }
323 
324     for (i = 0; i < total_queues; i++) {
325         r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev);
326 
327         if (r < 0) {
328             goto err_start;
329         }
330     }
331 
332     return 0;
333 
334 err_start:
335     while (--i >= 0) {
336         vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
337     }
338     e = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
339     if (e < 0) {
340         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
341         fflush(stderr);
342     }
343 err_endian:
344     vhost_net_set_vnet_endian(dev, ncs[0].peer, false);
345 err:
346     return r;
347 }
348 
349 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
350                     int total_queues)
351 {
352     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
353     VirtioBusState *vbus = VIRTIO_BUS(qbus);
354     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
355     int i, r;
356 
357     for (i = 0; i < total_queues; i++) {
358         vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
359     }
360 
361     r = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
362     if (r < 0) {
363         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
364         fflush(stderr);
365     }
366     assert(r >= 0);
367 
368     assert(vhost_net_set_vnet_endian(dev, ncs[0].peer, false) >= 0);
369 }
370 
371 void vhost_net_cleanup(struct vhost_net *net)
372 {
373     vhost_dev_cleanup(&net->dev);
374     g_free(net);
375 }
376 
377 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
378 {
379     const VhostOps *vhost_ops = net->dev.vhost_ops;
380     int r = -1;
381 
382     if (vhost_ops->vhost_migration_done) {
383         r = vhost_ops->vhost_migration_done(&net->dev, mac_addr);
384     }
385 
386     return r;
387 }
388 
389 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
390 {
391     return vhost_virtqueue_pending(&net->dev, idx);
392 }
393 
394 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
395                               int idx, bool mask)
396 {
397     vhost_virtqueue_mask(&net->dev, dev, idx, mask);
398 }
399 
400 VHostNetState *get_vhost_net(NetClientState *nc)
401 {
402     VHostNetState *vhost_net = 0;
403 
404     if (!nc) {
405         return 0;
406     }
407 
408     switch (nc->info->type) {
409     case NET_CLIENT_OPTIONS_KIND_TAP:
410         vhost_net = tap_get_vhost_net(nc);
411         break;
412     case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
413         vhost_net = vhost_user_get_vhost_net(nc);
414         break;
415     default:
416         break;
417     }
418 
419     return vhost_net;
420 }
421 
422 int vhost_set_vring_enable(NetClientState *nc, int enable)
423 {
424     VHostNetState *net = get_vhost_net(nc);
425     const VhostOps *vhost_ops = net->dev.vhost_ops;
426 
427     if (vhost_ops->vhost_set_vring_enable) {
428         return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
429     }
430 
431     return 0;
432 }
433 
434 #else
435 uint64_t vhost_net_get_max_queues(VHostNetState *net)
436 {
437     return 1;
438 }
439 
440 struct vhost_net *vhost_net_init(VhostNetOptions *options)
441 {
442     error_report("vhost-net support is not compiled in");
443     return NULL;
444 }
445 
446 int vhost_net_start(VirtIODevice *dev,
447                     NetClientState *ncs,
448                     int total_queues)
449 {
450     return -ENOSYS;
451 }
452 void vhost_net_stop(VirtIODevice *dev,
453                     NetClientState *ncs,
454                     int total_queues)
455 {
456 }
457 
458 void vhost_net_cleanup(struct vhost_net *net)
459 {
460 }
461 
462 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
463 {
464     return features;
465 }
466 void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
467 {
468 }
469 
470 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
471 {
472     return false;
473 }
474 
475 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
476                               int idx, bool mask)
477 {
478 }
479 
480 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
481 {
482     return -1;
483 }
484 
485 VHostNetState *get_vhost_net(NetClientState *nc)
486 {
487     return 0;
488 }
489 
490 int vhost_set_vring_enable(NetClientState *nc, int enable)
491 {
492     return 0;
493 }
494 #endif
495