xref: /openbmc/qemu/hw/net/vhost_net.c (revision effdacbf28d76ca0eec9086539649e547e510bbc)
1 /*
2  * vhost-net support
3  *
4  * Copyright Red Hat, Inc. 2010
5  *
6  * Authors:
7  *  Michael S. Tsirkin <mst@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "net/net.h"
18 #include "net/tap.h"
19 #include "net/vhost-user.h"
20 #include "net/vhost-vdpa.h"
21 
22 #include "standard-headers/linux/vhost_types.h"
23 #include "hw/virtio/virtio-net.h"
24 #include "net/vhost_net.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "qemu/main-loop.h"
28 
29 #include <sys/socket.h>
30 #include <net/if.h>
31 #include <netinet/in.h>
32 
33 
34 #include "standard-headers/linux/virtio_ring.h"
35 #include "hw/virtio/vhost.h"
36 #include "hw/virtio/virtio-bus.h"
37 #include "linux-headers/linux/vhost.h"
38 
39 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
40 {
41     return vhost_get_features(&net->dev, net->feature_bits,
42             features);
43 }
44 int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
45                          uint32_t config_len)
46 {
47     return vhost_dev_get_config(&net->dev, config, config_len, NULL);
48 }
49 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
50                          uint32_t offset, uint32_t size, uint32_t flags)
51 {
52     return vhost_dev_set_config(&net->dev, data, offset, size, flags);
53 }
54 
55 void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
56 {
57     net->dev.acked_features = net->dev.backend_features;
58     vhost_ack_features(&net->dev, net->feature_bits, features);
59 }
60 
61 uint64_t vhost_net_get_max_queues(VHostNetState *net)
62 {
63     return net->dev.max_queues;
64 }
65 
66 uint64_t vhost_net_get_acked_features(VHostNetState *net)
67 {
68     return net->dev.acked_features;
69 }
70 
71 void vhost_net_save_acked_features(NetClientState *nc)
72 {
73 #ifdef CONFIG_VHOST_NET_USER
74     if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
75         vhost_user_save_acked_features(nc);
76     }
77 #endif
78 }
79 
80 static void vhost_net_disable_notifiers_nvhosts(VirtIODevice *dev,
81                 NetClientState *ncs, int data_queue_pairs, int nvhosts)
82 {
83     VirtIONet *n = VIRTIO_NET(dev);
84     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
85     struct vhost_net *net;
86     struct vhost_dev *hdev;
87     int r, i, j;
88     NetClientState *peer;
89 
90     /*
91      * Batch all the host notifiers in a single transaction to avoid
92      * quadratic time complexity in address_space_update_ioeventfds().
93      */
94     memory_region_transaction_begin();
95 
96     for (i = 0; i < nvhosts; i++) {
97         if (i < data_queue_pairs) {
98             peer = qemu_get_peer(ncs, i);
99         } else {
100             peer = qemu_get_peer(ncs, n->max_queue_pairs);
101         }
102 
103         net = get_vhost_net(peer);
104         hdev = &net->dev;
105         for (j = 0; j < hdev->nvqs; j++) {
106             r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus),
107                                              hdev->vq_index + j,
108                                              false);
109             if (r < 0) {
110                 error_report("vhost %d VQ %d notifier cleanup failed: %d",
111                               i, j, -r);
112             }
113             assert(r >= 0);
114         }
115     }
116     /*
117      * The transaction expects the ioeventfds to be open when it
118      * commits. Do it now, before the cleanup loop.
119      */
120     memory_region_transaction_commit();
121 
122     for (i = 0; i < nvhosts; i++) {
123         if (i < data_queue_pairs) {
124             peer = qemu_get_peer(ncs, i);
125         } else {
126             peer = qemu_get_peer(ncs, n->max_queue_pairs);
127         }
128 
129         net = get_vhost_net(peer);
130         hdev = &net->dev;
131         for (j = 0; j < hdev->nvqs; j++) {
132             virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus),
133                                              hdev->vq_index + j);
134         }
135         virtio_device_release_ioeventfd(dev);
136     }
137 }
138 
139 static int vhost_net_enable_notifiers(VirtIODevice *dev,
140                 NetClientState *ncs, int data_queue_pairs, int cvq)
141 {
142     VirtIONet *n = VIRTIO_NET(dev);
143     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
144     int nvhosts = data_queue_pairs + cvq;
145     struct vhost_net *net;
146     struct vhost_dev *hdev;
147     int r, i, j, k;
148     NetClientState *peer;
149 
150     /*
151      * We will pass the notifiers to the kernel, make sure that QEMU
152      * doesn't interfere.
153      */
154     for (i = 0; i < nvhosts; i++) {
155         r = virtio_device_grab_ioeventfd(dev);
156         if (r < 0) {
157             error_report("vhost %d binding does not support host notifiers", i);
158             for (k = 0; k < i; k++) {
159                 virtio_device_release_ioeventfd(dev);
160             }
161             return r;
162         }
163     }
164 
165     /*
166      * Batch all the host notifiers in a single transaction to avoid
167      * quadratic time complexity in address_space_update_ioeventfds().
168      */
169     memory_region_transaction_begin();
170 
171     for (i = 0; i < nvhosts; i++) {
172         if (i < data_queue_pairs) {
173             peer = qemu_get_peer(ncs, i);
174         } else {
175             peer = qemu_get_peer(ncs, n->max_queue_pairs);
176         }
177 
178         net = get_vhost_net(peer);
179         hdev = &net->dev;
180 
181         for (j = 0; j < hdev->nvqs; j++) {
182             r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus),
183                                              hdev->vq_index + j,
184                                              true);
185             if (r < 0) {
186                 error_report("vhost %d VQ %d notifier binding failed: %d",
187                               i, j, -r);
188                 memory_region_transaction_commit();
189                 vhost_dev_disable_notifiers_nvqs(hdev, dev, j);
190                 goto fail_nvhosts;
191             }
192         }
193     }
194 
195     memory_region_transaction_commit();
196 
197     return 0;
198 fail_nvhosts:
199     vhost_net_disable_notifiers_nvhosts(dev, ncs, data_queue_pairs, i);
200     /*
201      * This for loop starts from i+1, not i, because the i-th ioeventfd
202      * has already been released in vhost_dev_disable_notifiers_nvqs().
203      */
204     for (k = i + 1; k < nvhosts; k++) {
205         virtio_device_release_ioeventfd(dev);
206     }
207 
208     return r;
209 }
210 
211 /*
212  * Stop processing guest IO notifications in qemu.
213  * Start processing them in vhost in kernel.
214  */
215 static void vhost_net_disable_notifiers(VirtIODevice *dev,
216                 NetClientState *ncs, int data_queue_pairs, int cvq)
217 {
218     vhost_net_disable_notifiers_nvhosts(dev, ncs, data_queue_pairs,
219                                         data_queue_pairs + cvq);
220 }
221 
222 static int vhost_net_get_fd(NetClientState *backend)
223 {
224     switch (backend->info->type) {
225     case NET_CLIENT_DRIVER_TAP:
226         return tap_get_fd(backend);
227     default:
228         fprintf(stderr, "vhost-net requires tap backend\n");
229         return -ENOSYS;
230     }
231 }
232 
233 struct vhost_net *vhost_net_init(VhostNetOptions *options)
234 {
235     int r;
236     bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
237     struct vhost_net *net = g_new0(struct vhost_net, 1);
238     uint64_t features = 0;
239     Error *local_err = NULL;
240 
241     if (!options->net_backend) {
242         fprintf(stderr, "vhost-net requires net backend to be setup\n");
243         goto fail;
244     }
245     net->nc = options->net_backend;
246     net->dev.nvqs = options->nvqs;
247     net->feature_bits = options->feature_bits;
248 
249     net->dev.max_queues = 1;
250     net->dev.vqs = net->vqs;
251 
252     if (backend_kernel) {
253         r = vhost_net_get_fd(options->net_backend);
254         if (r < 0) {
255             goto fail;
256         }
257         net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
258             ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR);
259         net->backend = r;
260         net->dev.protocol_features = 0;
261     } else {
262         net->dev.backend_features = 0;
263         net->dev.protocol_features = 0;
264         net->backend = -1;
265 
266         /* vhost-user needs vq_index to initiate a specific queue pair */
267         net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
268     }
269 
270     r = vhost_dev_init(&net->dev, options->opaque,
271                        options->backend_type, options->busyloop_timeout,
272                        &local_err);
273     if (r < 0) {
274         error_report_err(local_err);
275         goto fail;
276     }
277     if (backend_kernel) {
278         if (!qemu_has_vnet_hdr_len(options->net_backend,
279                                sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
280             net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
281         }
282         if (~net->dev.features & net->dev.backend_features) {
283             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
284                    " for backend\n",
285                    (uint64_t)(~net->dev.features & net->dev.backend_features));
286             goto fail;
287         }
288     }
289 
290     /* Set sane init value. Override when guest acks. */
291 #ifdef CONFIG_VHOST_NET_USER
292     if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
293         features = vhost_user_get_acked_features(net->nc);
294         if (~net->dev.features & features) {
295             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
296                     " for backend\n",
297                     (uint64_t)(~net->dev.features & features));
298             goto fail;
299         }
300     }
301 #endif
302 
303     vhost_net_ack_features(net, features);
304 
305     return net;
306 
307 fail:
308     vhost_dev_cleanup(&net->dev);
309     g_free(net);
310     return NULL;
311 }
312 
313 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index,
314                                    int vq_index_end)
315 {
316     net->dev.vq_index = vq_index;
317     net->dev.vq_index_end = vq_index_end;
318 }
319 
320 static int vhost_net_start_one(struct vhost_net *net,
321                                VirtIODevice *dev)
322 {
323     struct vhost_vring_file file = { };
324     int r;
325 
326     if (net->nc->info->start) {
327         r = net->nc->info->start(net->nc);
328         if (r < 0) {
329             return r;
330         }
331     }
332 
333     r = vhost_dev_start(&net->dev, dev, false);
334     if (r < 0) {
335         goto fail_start;
336     }
337 
338     if (net->nc->info->poll) {
339         net->nc->info->poll(net->nc, false);
340     }
341 
342     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
343         qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
344         file.fd = net->backend;
345         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
346             if (!virtio_queue_enabled(dev, net->dev.vq_index +
347                                       file.index)) {
348                 /* Queue might not be ready for start */
349                 continue;
350             }
351             r = vhost_net_set_backend(&net->dev, &file);
352             if (r < 0) {
353                 r = -errno;
354                 goto fail;
355             }
356         }
357     }
358 
359     if (net->nc->info->load) {
360         r = net->nc->info->load(net->nc);
361         if (r < 0) {
362             goto fail;
363         }
364     }
365     return 0;
366 fail:
367     file.fd = -1;
368     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
369         while (file.index-- > 0) {
370             if (!virtio_queue_enabled(dev, net->dev.vq_index +
371                                       file.index)) {
372                 /* Queue might not be ready for start */
373                 continue;
374             }
375             int ret = vhost_net_set_backend(&net->dev, &file);
376             assert(ret >= 0);
377         }
378     }
379     if (net->nc->info->poll) {
380         net->nc->info->poll(net->nc, true);
381     }
382     vhost_dev_stop(&net->dev, dev, false);
383 fail_start:
384     return r;
385 }
386 
387 static void vhost_net_stop_one(struct vhost_net *net,
388                                VirtIODevice *dev)
389 {
390     struct vhost_vring_file file = { .fd = -1 };
391 
392     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
393         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
394             int r = vhost_net_set_backend(&net->dev, &file);
395             assert(r >= 0);
396         }
397     }
398     if (net->nc->info->poll) {
399         net->nc->info->poll(net->nc, true);
400     }
401     vhost_dev_stop(&net->dev, dev, false);
402     if (net->nc->info->stop) {
403         net->nc->info->stop(net->nc);
404     }
405 }
406 
407 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
408                     int data_queue_pairs, int cvq)
409 {
410     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
411     VirtioBusState *vbus = VIRTIO_BUS(qbus);
412     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
413     int total_notifiers = data_queue_pairs * 2 + cvq;
414     VirtIONet *n = VIRTIO_NET(dev);
415     int nvhosts = data_queue_pairs + cvq;
416     struct vhost_net *net;
417     int r, e, i, index_end = data_queue_pairs * 2;
418     NetClientState *peer;
419 
420     if (cvq) {
421         index_end += 1;
422     }
423 
424     if (!k->set_guest_notifiers) {
425         error_report("binding does not support guest notifiers");
426         return -ENOSYS;
427     }
428 
429     for (i = 0; i < nvhosts; i++) {
430 
431         if (i < data_queue_pairs) {
432             peer = qemu_get_peer(ncs, i);
433         } else { /* Control Virtqueue */
434             peer = qemu_get_peer(ncs, n->max_queue_pairs);
435         }
436 
437         net = get_vhost_net(peer);
438         vhost_net_set_vq_index(net, i * 2, index_end);
439 
440         /* Suppress the masking guest notifiers on vhost user
441          * because vhost user doesn't interrupt masking/unmasking
442          * properly.
443          */
444         if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
445             dev->use_guest_notifier_mask = false;
446         }
447      }
448 
449     r = vhost_net_enable_notifiers(dev, ncs, data_queue_pairs, cvq);
450     if (r < 0) {
451         error_report("Error enabling host notifiers: %d", -r);
452         goto err;
453     }
454 
455     r = k->set_guest_notifiers(qbus->parent, total_notifiers, true);
456     if (r < 0) {
457         error_report("Error binding guest notifier: %d", -r);
458         goto err_host_notifiers;
459     }
460 
461     for (i = 0; i < nvhosts; i++) {
462         if (i < data_queue_pairs) {
463             peer = qemu_get_peer(ncs, i);
464         } else {
465             peer = qemu_get_peer(ncs, n->max_queue_pairs);
466         }
467 
468         if (peer->vring_enable) {
469             /* restore vring enable state */
470             r = vhost_net_set_vring_enable(peer, peer->vring_enable);
471 
472             if (r < 0) {
473                 goto err_guest_notifiers;
474             }
475         }
476 
477         r = vhost_net_start_one(get_vhost_net(peer), dev);
478         if (r < 0) {
479             goto err_guest_notifiers;
480         }
481     }
482 
483     return 0;
484 
485 err_guest_notifiers:
486     while (--i >= 0) {
487         peer = qemu_get_peer(ncs, i < data_queue_pairs ?
488                                   i : n->max_queue_pairs);
489         vhost_net_stop_one(get_vhost_net(peer), dev);
490     }
491     e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
492     if (e < 0) {
493         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
494         fflush(stderr);
495     }
496 err_host_notifiers:
497     vhost_net_disable_notifiers(dev, ncs, data_queue_pairs, cvq);
498 err:
499     return r;
500 }
501 
502 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
503                     int data_queue_pairs, int cvq)
504 {
505     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
506     VirtioBusState *vbus = VIRTIO_BUS(qbus);
507     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
508     VirtIONet *n = VIRTIO_NET(dev);
509     NetClientState *peer;
510     int total_notifiers = data_queue_pairs * 2 + cvq;
511     int nvhosts = data_queue_pairs + cvq;
512     int i, r;
513 
514     for (i = 0; i < nvhosts; i++) {
515         if (i < data_queue_pairs) {
516             peer = qemu_get_peer(ncs, i);
517         } else {
518             peer = qemu_get_peer(ncs, n->max_queue_pairs);
519         }
520         vhost_net_stop_one(get_vhost_net(peer), dev);
521     }
522 
523     r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
524     if (r < 0) {
525         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
526         fflush(stderr);
527     }
528     assert(r >= 0);
529 
530     vhost_net_disable_notifiers(dev, ncs, data_queue_pairs, cvq);
531 }
532 
533 void vhost_net_cleanup(struct vhost_net *net)
534 {
535     vhost_dev_cleanup(&net->dev);
536 }
537 
538 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
539 {
540     const VhostOps *vhost_ops = net->dev.vhost_ops;
541 
542     assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
543     assert(vhost_ops->vhost_migration_done);
544 
545     return vhost_ops->vhost_migration_done(&net->dev, mac_addr);
546 }
547 
548 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
549 {
550     return vhost_virtqueue_pending(&net->dev, idx);
551 }
552 
553 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
554                               int idx, bool mask)
555 {
556     vhost_virtqueue_mask(&net->dev, dev, idx, mask);
557 }
558 
559 bool vhost_net_config_pending(VHostNetState *net)
560 {
561     return vhost_config_pending(&net->dev);
562 }
563 
564 void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask)
565 {
566     vhost_config_mask(&net->dev, dev, mask);
567 }
568 
569 VHostNetState *get_vhost_net(NetClientState *nc)
570 {
571     if (!nc) {
572         return 0;
573     }
574 
575     if (nc->info->get_vhost_net) {
576         return nc->info->get_vhost_net(nc);
577     }
578 
579     return NULL;
580 }
581 
582 int vhost_net_set_vring_enable(NetClientState *nc, int enable)
583 {
584     VHostNetState *net = get_vhost_net(nc);
585     const VhostOps *vhost_ops = net->dev.vhost_ops;
586 
587     /*
588      * vhost-vdpa network devices need to enable dataplane virtqueues after
589      * DRIVER_OK, so they can recover device state before starting dataplane.
590      * Because of that, we don't enable virtqueues here and leave it to
591      * net/vhost-vdpa.c.
592      */
593     if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
594         return 0;
595     }
596 
597     nc->vring_enable = enable;
598 
599     if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
600         return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
601     }
602 
603     return 0;
604 }
605 
606 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
607 {
608     const VhostOps *vhost_ops = net->dev.vhost_ops;
609 
610     if (!vhost_ops->vhost_net_set_mtu) {
611         return 0;
612     }
613 
614     return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
615 }
616 
617 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc,
618                                int vq_index)
619 {
620     VHostNetState *net = get_vhost_net(nc->peer);
621     const VhostOps *vhost_ops = net->dev.vhost_ops;
622     struct vhost_vring_file file = { .fd = -1 };
623     int idx;
624 
625     /* should only be called after backend is connected */
626     assert(vhost_ops);
627 
628     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
629 
630     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
631         file.index = idx;
632         int r = vhost_net_set_backend(&net->dev, &file);
633         assert(r >= 0);
634     }
635 
636     vhost_virtqueue_stop(&net->dev,
637                          vdev,
638                          net->dev.vqs + idx,
639                          net->dev.vq_index + idx);
640 }
641 
642 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
643                                 int vq_index)
644 {
645     VHostNetState *net = get_vhost_net(nc->peer);
646     const VhostOps *vhost_ops = net->dev.vhost_ops;
647     struct vhost_vring_file file = { };
648     int idx, r;
649 
650     if (!net->dev.started) {
651         return -EBUSY;
652     }
653 
654     /* should only be called after backend is connected */
655     assert(vhost_ops);
656 
657     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
658 
659     r = vhost_virtqueue_start(&net->dev,
660                               vdev,
661                               net->dev.vqs + idx,
662                               net->dev.vq_index + idx);
663     if (r < 0) {
664         goto err_start;
665     }
666 
667     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
668         file.index = idx;
669         file.fd = net->backend;
670         r = vhost_net_set_backend(&net->dev, &file);
671         if (r < 0) {
672             r = -errno;
673             goto err_start;
674         }
675     }
676 
677     return 0;
678 
679 err_start:
680     error_report("Error when restarting the queue.");
681 
682     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
683         file.fd = VHOST_FILE_UNBIND;
684         file.index = idx;
685         int ret = vhost_net_set_backend(&net->dev, &file);
686         assert(ret >= 0);
687     }
688 
689     vhost_dev_stop(&net->dev, vdev, false);
690 
691     return r;
692 }
693