xref: /openbmc/qemu/hw/net/vhost_net.c (revision bd38794a1119ec8e3f0a7473458ce4cdd229bc42)
1 /*
2  * vhost-net support
3  *
4  * Copyright Red Hat, Inc. 2010
5  *
6  * Authors:
7  *  Michael S. Tsirkin <mst@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "net/net.h"
18 #include "net/tap.h"
19 #include "net/vhost-user.h"
20 #include "net/vhost-vdpa.h"
21 
22 #include "standard-headers/linux/vhost_types.h"
23 #include "hw/virtio/virtio-net.h"
24 #include "net/vhost_net.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "qemu/main-loop.h"
28 
29 #include <sys/socket.h>
30 #include <net/if.h>
31 #include <netinet/in.h>
32 
33 
34 #include "standard-headers/linux/virtio_ring.h"
35 #include "hw/virtio/vhost.h"
36 #include "hw/virtio/virtio-bus.h"
37 #include "linux-headers/linux/vhost.h"
38 
39 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
40 {
41     return vhost_get_features(&net->dev, net->feature_bits,
42             features);
43 }
44 int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
45                          uint32_t config_len)
46 {
47     return vhost_dev_get_config(&net->dev, config, config_len, NULL);
48 }
49 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
50                          uint32_t offset, uint32_t size, uint32_t flags)
51 {
52     return vhost_dev_set_config(&net->dev, data, offset, size, flags);
53 }
54 
55 void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
56 {
57     net->dev.acked_features = net->dev.backend_features;
58     vhost_ack_features(&net->dev, net->feature_bits, features);
59 }
60 
61 uint64_t vhost_net_get_max_queues(VHostNetState *net)
62 {
63     return net->dev.max_queues;
64 }
65 
66 uint64_t vhost_net_get_acked_features(VHostNetState *net)
67 {
68     return net->dev.acked_features;
69 }
70 
71 void vhost_net_save_acked_features(NetClientState *nc)
72 {
73 #ifdef CONFIG_VHOST_NET_USER
74     if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
75         vhost_user_save_acked_features(nc);
76     }
77 #endif
78 }
79 
80 static void vhost_net_disable_notifiers_nvhosts(VirtIODevice *dev,
81                 NetClientState *ncs, int data_queue_pairs, int nvhosts)
82 {
83     VirtIONet *n = VIRTIO_NET(dev);
84     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
85     struct vhost_net *net;
86     struct vhost_dev *hdev;
87     int r, i, j;
88     NetClientState *peer;
89 
90     /*
91      * Batch all the host notifiers in a single transaction to avoid
92      * quadratic time complexity in address_space_update_ioeventfds().
93      */
94     memory_region_transaction_begin();
95 
96     for (i = 0; i < nvhosts; i++) {
97         if (i < data_queue_pairs) {
98             peer = qemu_get_peer(ncs, i);
99         } else {
100             peer = qemu_get_peer(ncs, n->max_queue_pairs);
101         }
102 
103         net = get_vhost_net(peer);
104         hdev = &net->dev;
105         for (j = 0; j < hdev->nvqs; j++) {
106             r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus),
107                                              hdev->vq_index + j,
108                                              false);
109             if (r < 0) {
110                 error_report("vhost %d VQ %d notifier cleanup failed: %d",
111                               i, j, -r);
112             }
113             assert(r >= 0);
114         }
115     }
116     /*
117      * The transaction expects the ioeventfds to be open when it
118      * commits. Do it now, before the cleanup loop.
119      */
120     memory_region_transaction_commit();
121 
122     for (i = 0; i < nvhosts; i++) {
123         if (i < data_queue_pairs) {
124             peer = qemu_get_peer(ncs, i);
125         } else {
126             peer = qemu_get_peer(ncs, n->max_queue_pairs);
127         }
128 
129         net = get_vhost_net(peer);
130         hdev = &net->dev;
131         for (j = 0; j < hdev->nvqs; j++) {
132             virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus),
133                                              hdev->vq_index + j);
134         }
135         virtio_device_release_ioeventfd(dev);
136     }
137 }
138 
139 static int vhost_net_enable_notifiers(VirtIODevice *dev,
140                 NetClientState *ncs, int data_queue_pairs, int cvq)
141 {
142     VirtIONet *n = VIRTIO_NET(dev);
143     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
144     int nvhosts = data_queue_pairs + cvq;
145     struct vhost_net *net;
146     struct vhost_dev *hdev;
147     int r, i, j, k;
148     NetClientState *peer;
149 
150     /*
151      * We will pass the notifiers to the kernel, make sure that QEMU
152      * doesn't interfere.
153      */
154     for (i = 0; i < nvhosts; i++) {
155         r = virtio_device_grab_ioeventfd(dev);
156         if (r < 0) {
157             error_report("vhost %d binding does not support host notifiers", i);
158             for (k = 0; k < i; k++) {
159                 virtio_device_release_ioeventfd(dev);
160             }
161             return r;
162         }
163     }
164 
165     /*
166      * Batch all the host notifiers in a single transaction to avoid
167      * quadratic time complexity in address_space_update_ioeventfds().
168      */
169     memory_region_transaction_begin();
170 
171     for (i = 0; i < nvhosts; i++) {
172         if (i < data_queue_pairs) {
173             peer = qemu_get_peer(ncs, i);
174         } else {
175             peer = qemu_get_peer(ncs, n->max_queue_pairs);
176         }
177 
178         net = get_vhost_net(peer);
179         hdev = &net->dev;
180 
181         for (j = 0; j < hdev->nvqs; j++) {
182             r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus),
183                                              hdev->vq_index + j,
184                                              true);
185             if (r < 0) {
186                 error_report("vhost %d VQ %d notifier binding failed: %d",
187                               i, j, -r);
188                 memory_region_transaction_commit();
189                 vhost_dev_disable_notifiers_nvqs(hdev, dev, j);
190                 goto fail_nvhosts;
191             }
192         }
193     }
194 
195     memory_region_transaction_commit();
196 
197     return 0;
198 fail_nvhosts:
199     vhost_net_disable_notifiers_nvhosts(dev, ncs, data_queue_pairs, i);
200     /*
201      * This for loop starts from i+1, not i, because the i-th ioeventfd
202      * has already been released in vhost_dev_disable_notifiers_nvqs().
203      */
204     for (k = i + 1; k < nvhosts; k++) {
205         virtio_device_release_ioeventfd(dev);
206     }
207 
208     return r;
209 }
210 
211 /*
212  * Stop processing guest IO notifications in qemu.
213  * Start processing them in vhost in kernel.
214  */
215 static void vhost_net_disable_notifiers(VirtIODevice *dev,
216                 NetClientState *ncs, int data_queue_pairs, int cvq)
217 {
218     vhost_net_disable_notifiers_nvhosts(dev, ncs, data_queue_pairs,
219                                         data_queue_pairs + cvq);
220 }
221 
222 static int vhost_net_get_fd(NetClientState *backend)
223 {
224     switch (backend->info->type) {
225     case NET_CLIENT_DRIVER_TAP:
226         return tap_get_fd(backend);
227     default:
228         fprintf(stderr, "vhost-net requires tap backend\n");
229         return -ENOSYS;
230     }
231 }
232 
233 struct vhost_net *vhost_net_init(VhostNetOptions *options)
234 {
235     int r;
236     bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
237     struct vhost_net *net = g_new0(struct vhost_net, 1);
238     uint64_t features = 0;
239     Error *local_err = NULL;
240 
241     if (!options->net_backend) {
242         fprintf(stderr, "vhost-net requires net backend to be setup\n");
243         goto fail;
244     }
245     net->nc = options->net_backend;
246     net->dev.nvqs = options->nvqs;
247     net->feature_bits = options->feature_bits;
248 
249     net->dev.max_queues = 1;
250     net->dev.vqs = net->vqs;
251 
252     if (backend_kernel) {
253         r = vhost_net_get_fd(options->net_backend);
254         if (r < 0) {
255             goto fail;
256         }
257         net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
258             ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR);
259         net->backend = r;
260         net->dev.protocol_features = 0;
261     } else {
262         net->dev.backend_features = 0;
263         net->dev.protocol_features = 0;
264         net->backend = -1;
265 
266         /* vhost-user needs vq_index to initiate a specific queue pair */
267         net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
268     }
269 
270     r = vhost_dev_init(&net->dev, options->opaque,
271                        options->backend_type, options->busyloop_timeout,
272                        &local_err);
273     if (r < 0) {
274         error_report_err(local_err);
275         goto fail;
276     }
277     if (backend_kernel) {
278         if (!qemu_has_vnet_hdr_len(options->net_backend,
279                                sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
280             net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
281         }
282         if (~net->dev.features & net->dev.backend_features) {
283             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
284                    " for backend\n",
285                    (uint64_t)(~net->dev.features & net->dev.backend_features));
286             goto fail;
287         }
288     }
289 
290     /* Set sane init value. Override when guest acks. */
291     if (options->get_acked_features) {
292         features = options->get_acked_features(net->nc);
293         if (~net->dev.features & features) {
294             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
295                     " for backend\n",
296                     (uint64_t)(~net->dev.features & features));
297             goto fail;
298         }
299     }
300 
301     vhost_net_ack_features(net, features);
302 
303     return net;
304 
305 fail:
306     vhost_dev_cleanup(&net->dev);
307     g_free(net);
308     return NULL;
309 }
310 
311 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index,
312                                    int vq_index_end)
313 {
314     net->dev.vq_index = vq_index;
315     net->dev.vq_index_end = vq_index_end;
316 }
317 
318 static int vhost_net_start_one(struct vhost_net *net,
319                                VirtIODevice *dev)
320 {
321     struct vhost_vring_file file = { };
322     int r;
323 
324     if (net->nc->info->start) {
325         r = net->nc->info->start(net->nc);
326         if (r < 0) {
327             return r;
328         }
329     }
330 
331     r = vhost_dev_start(&net->dev, dev, false);
332     if (r < 0) {
333         goto fail_start;
334     }
335 
336     if (net->nc->info->poll) {
337         net->nc->info->poll(net->nc, false);
338     }
339 
340     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
341         qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
342         file.fd = net->backend;
343         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
344             if (!virtio_queue_enabled(dev, net->dev.vq_index +
345                                       file.index)) {
346                 /* Queue might not be ready for start */
347                 continue;
348             }
349             r = vhost_net_set_backend(&net->dev, &file);
350             if (r < 0) {
351                 r = -errno;
352                 goto fail;
353             }
354         }
355     }
356 
357     if (net->nc->info->load) {
358         r = net->nc->info->load(net->nc);
359         if (r < 0) {
360             goto fail;
361         }
362     }
363     return 0;
364 fail:
365     file.fd = -1;
366     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
367         while (file.index-- > 0) {
368             if (!virtio_queue_enabled(dev, net->dev.vq_index +
369                                       file.index)) {
370                 /* Queue might not be ready for start */
371                 continue;
372             }
373             int ret = vhost_net_set_backend(&net->dev, &file);
374             assert(ret >= 0);
375         }
376     }
377     if (net->nc->info->poll) {
378         net->nc->info->poll(net->nc, true);
379     }
380     vhost_dev_stop(&net->dev, dev, false);
381 fail_start:
382     return r;
383 }
384 
385 static void vhost_net_stop_one(struct vhost_net *net,
386                                VirtIODevice *dev)
387 {
388     struct vhost_vring_file file = { .fd = -1 };
389 
390     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
391         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
392             int r = vhost_net_set_backend(&net->dev, &file);
393             assert(r >= 0);
394         }
395     }
396     if (net->nc->info->poll) {
397         net->nc->info->poll(net->nc, true);
398     }
399     vhost_dev_stop(&net->dev, dev, false);
400     if (net->nc->info->stop) {
401         net->nc->info->stop(net->nc);
402     }
403 }
404 
405 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
406                     int data_queue_pairs, int cvq)
407 {
408     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
409     VirtioBusState *vbus = VIRTIO_BUS(qbus);
410     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
411     int total_notifiers = data_queue_pairs * 2 + cvq;
412     VirtIONet *n = VIRTIO_NET(dev);
413     int nvhosts = data_queue_pairs + cvq;
414     struct vhost_net *net;
415     int r, e, i, index_end = data_queue_pairs * 2;
416     NetClientState *peer;
417 
418     if (cvq) {
419         index_end += 1;
420     }
421 
422     if (!k->set_guest_notifiers) {
423         error_report("binding does not support guest notifiers");
424         return -ENOSYS;
425     }
426 
427     for (i = 0; i < nvhosts; i++) {
428 
429         if (i < data_queue_pairs) {
430             peer = qemu_get_peer(ncs, i);
431         } else { /* Control Virtqueue */
432             peer = qemu_get_peer(ncs, n->max_queue_pairs);
433         }
434 
435         net = get_vhost_net(peer);
436         vhost_net_set_vq_index(net, i * 2, index_end);
437 
438         /* Suppress the masking guest notifiers on vhost user
439          * because vhost user doesn't interrupt masking/unmasking
440          * properly.
441          */
442         if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
443             dev->use_guest_notifier_mask = false;
444         }
445      }
446 
447     r = vhost_net_enable_notifiers(dev, ncs, data_queue_pairs, cvq);
448     if (r < 0) {
449         error_report("Error enabling host notifiers: %d", -r);
450         goto err;
451     }
452 
453     r = k->set_guest_notifiers(qbus->parent, total_notifiers, true);
454     if (r < 0) {
455         error_report("Error binding guest notifier: %d", -r);
456         goto err_host_notifiers;
457     }
458 
459     for (i = 0; i < nvhosts; i++) {
460         if (i < data_queue_pairs) {
461             peer = qemu_get_peer(ncs, i);
462         } else {
463             peer = qemu_get_peer(ncs, n->max_queue_pairs);
464         }
465 
466         if (peer->vring_enable) {
467             /* restore vring enable state */
468             r = vhost_net_set_vring_enable(peer, peer->vring_enable);
469 
470             if (r < 0) {
471                 goto err_guest_notifiers;
472             }
473         }
474 
475         r = vhost_net_start_one(get_vhost_net(peer), dev);
476         if (r < 0) {
477             goto err_guest_notifiers;
478         }
479     }
480 
481     return 0;
482 
483 err_guest_notifiers:
484     while (--i >= 0) {
485         peer = qemu_get_peer(ncs, i < data_queue_pairs ?
486                                   i : n->max_queue_pairs);
487         vhost_net_stop_one(get_vhost_net(peer), dev);
488     }
489     e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
490     if (e < 0) {
491         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
492         fflush(stderr);
493     }
494 err_host_notifiers:
495     vhost_net_disable_notifiers(dev, ncs, data_queue_pairs, cvq);
496 err:
497     return r;
498 }
499 
500 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
501                     int data_queue_pairs, int cvq)
502 {
503     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
504     VirtioBusState *vbus = VIRTIO_BUS(qbus);
505     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
506     VirtIONet *n = VIRTIO_NET(dev);
507     NetClientState *peer;
508     int total_notifiers = data_queue_pairs * 2 + cvq;
509     int nvhosts = data_queue_pairs + cvq;
510     int i, r;
511 
512     for (i = 0; i < nvhosts; i++) {
513         if (i < data_queue_pairs) {
514             peer = qemu_get_peer(ncs, i);
515         } else {
516             peer = qemu_get_peer(ncs, n->max_queue_pairs);
517         }
518         vhost_net_stop_one(get_vhost_net(peer), dev);
519     }
520 
521     r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
522     if (r < 0) {
523         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
524         fflush(stderr);
525     }
526     assert(r >= 0);
527 
528     vhost_net_disable_notifiers(dev, ncs, data_queue_pairs, cvq);
529 }
530 
531 void vhost_net_cleanup(struct vhost_net *net)
532 {
533     vhost_dev_cleanup(&net->dev);
534 }
535 
536 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
537 {
538     const VhostOps *vhost_ops = net->dev.vhost_ops;
539 
540     assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
541     assert(vhost_ops->vhost_migration_done);
542 
543     return vhost_ops->vhost_migration_done(&net->dev, mac_addr);
544 }
545 
546 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
547 {
548     return vhost_virtqueue_pending(&net->dev, idx);
549 }
550 
551 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
552                               int idx, bool mask)
553 {
554     vhost_virtqueue_mask(&net->dev, dev, idx, mask);
555 }
556 
557 bool vhost_net_config_pending(VHostNetState *net)
558 {
559     return vhost_config_pending(&net->dev);
560 }
561 
562 void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask)
563 {
564     vhost_config_mask(&net->dev, dev, mask);
565 }
566 
567 VHostNetState *get_vhost_net(NetClientState *nc)
568 {
569     if (!nc) {
570         return 0;
571     }
572 
573     if (nc->info->get_vhost_net) {
574         return nc->info->get_vhost_net(nc);
575     }
576 
577     return NULL;
578 }
579 
580 int vhost_net_set_vring_enable(NetClientState *nc, int enable)
581 {
582     VHostNetState *net = get_vhost_net(nc);
583     const VhostOps *vhost_ops = net->dev.vhost_ops;
584 
585     /*
586      * vhost-vdpa network devices need to enable dataplane virtqueues after
587      * DRIVER_OK, so they can recover device state before starting dataplane.
588      * Because of that, we don't enable virtqueues here and leave it to
589      * net/vhost-vdpa.c.
590      */
591     if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
592         return 0;
593     }
594 
595     nc->vring_enable = enable;
596 
597     if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
598         return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
599     }
600 
601     return 0;
602 }
603 
604 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
605 {
606     const VhostOps *vhost_ops = net->dev.vhost_ops;
607 
608     if (!vhost_ops->vhost_net_set_mtu) {
609         return 0;
610     }
611 
612     return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
613 }
614 
615 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc,
616                                int vq_index)
617 {
618     VHostNetState *net = get_vhost_net(nc->peer);
619     const VhostOps *vhost_ops = net->dev.vhost_ops;
620     struct vhost_vring_file file = { .fd = -1 };
621     int idx;
622 
623     /* should only be called after backend is connected */
624     assert(vhost_ops);
625 
626     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
627 
628     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
629         file.index = idx;
630         int r = vhost_net_set_backend(&net->dev, &file);
631         assert(r >= 0);
632     }
633 
634     vhost_virtqueue_stop(&net->dev,
635                          vdev,
636                          net->dev.vqs + idx,
637                          net->dev.vq_index + idx);
638 }
639 
640 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
641                                 int vq_index)
642 {
643     VHostNetState *net = get_vhost_net(nc->peer);
644     const VhostOps *vhost_ops = net->dev.vhost_ops;
645     struct vhost_vring_file file = { };
646     int idx, r;
647 
648     if (!net->dev.started) {
649         return -EBUSY;
650     }
651 
652     /* should only be called after backend is connected */
653     assert(vhost_ops);
654 
655     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
656 
657     r = vhost_virtqueue_start(&net->dev,
658                               vdev,
659                               net->dev.vqs + idx,
660                               net->dev.vq_index + idx);
661     if (r < 0) {
662         goto err_start;
663     }
664 
665     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
666         file.index = idx;
667         file.fd = net->backend;
668         r = vhost_net_set_backend(&net->dev, &file);
669         if (r < 0) {
670             r = -errno;
671             goto err_start;
672         }
673     }
674 
675     return 0;
676 
677 err_start:
678     error_report("Error when restarting the queue.");
679 
680     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
681         file.fd = VHOST_FILE_UNBIND;
682         file.index = idx;
683         int ret = vhost_net_set_backend(&net->dev, &file);
684         assert(ret >= 0);
685     }
686 
687     vhost_dev_stop(&net->dev, vdev, false);
688 
689     return r;
690 }
691