xref: /openbmc/qemu/util/vhost-user-server.c (revision 411132c9)
170eb2c07SCoiby Xu /*
270eb2c07SCoiby Xu  * Sharing QEMU devices via vhost-user protocol
370eb2c07SCoiby Xu  *
470eb2c07SCoiby Xu  * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
570eb2c07SCoiby Xu  * Copyright (c) 2020 Red Hat, Inc.
670eb2c07SCoiby Xu  *
770eb2c07SCoiby Xu  * This work is licensed under the terms of the GNU GPL, version 2 or
870eb2c07SCoiby Xu  * later.  See the COPYING file in the top-level directory.
970eb2c07SCoiby Xu  */
1070eb2c07SCoiby Xu #include "qemu/osdep.h"
115feed38cSThomas Huth #include "qemu/error-report.h"
1270eb2c07SCoiby Xu #include "qemu/main-loop.h"
1380a06cc5SStefan Hajnoczi #include "qemu/vhost-user-server.h"
147185c857SStefan Hajnoczi #include "block/aio-wait.h"
1570eb2c07SCoiby Xu 
167185c857SStefan Hajnoczi /*
177185c857SStefan Hajnoczi  * Theory of operation:
187185c857SStefan Hajnoczi  *
197185c857SStefan Hajnoczi  * VuServer is started and stopped by vhost_user_server_start() and
207185c857SStefan Hajnoczi  * vhost_user_server_stop() from the main loop thread. Starting the server
217185c857SStefan Hajnoczi  * opens a vhost-user UNIX domain socket and listens for incoming connections.
227185c857SStefan Hajnoczi  * Only one connection is allowed at a time.
237185c857SStefan Hajnoczi  *
247185c857SStefan Hajnoczi  * The connection is handled by the vu_client_trip() coroutine in the
257185c857SStefan Hajnoczi  * VuServer->ctx AioContext. The coroutine consists of a vu_dispatch() loop
267185c857SStefan Hajnoczi  * where libvhost-user calls vu_message_read() to receive the next vhost-user
277185c857SStefan Hajnoczi  * protocol messages over the UNIX domain socket.
287185c857SStefan Hajnoczi  *
297185c857SStefan Hajnoczi  * When virtqueues are set up libvhost-user calls set_watch() to monitor kick
307185c857SStefan Hajnoczi  * fds. These fds are also handled in the VuServer->ctx AioContext.
317185c857SStefan Hajnoczi  *
327185c857SStefan Hajnoczi  * Both vu_client_trip() and kick fd monitoring can be stopped by shutting down
337185c857SStefan Hajnoczi  * the socket connection. Shutting down the socket connection causes
347185c857SStefan Hajnoczi  * vu_message_read() to fail since no more data can be received from the socket.
357185c857SStefan Hajnoczi  * After vu_dispatch() fails, vu_client_trip() calls vu_deinit() to stop
367185c857SStefan Hajnoczi  * libvhost-user before terminating the coroutine. vu_deinit() calls
377185c857SStefan Hajnoczi  * remove_watch() to stop monitoring kick fds and this stops virtqueue
387185c857SStefan Hajnoczi  * processing.
397185c857SStefan Hajnoczi  *
407185c857SStefan Hajnoczi  * When vu_client_trip() has finished cleaning up it schedules a BH in the main
417185c857SStefan Hajnoczi  * loop thread to accept the next client connection.
427185c857SStefan Hajnoczi  *
437185c857SStefan Hajnoczi  * When libvhost-user detects an error it calls panic_cb() and sets the
447185c857SStefan Hajnoczi  * dev->broken flag. Both vu_client_trip() and kick fd processing stop when
457185c857SStefan Hajnoczi  * the dev->broken flag is set.
467185c857SStefan Hajnoczi  *
477185c857SStefan Hajnoczi  * It is possible to switch AioContexts using
487185c857SStefan Hajnoczi  * vhost_user_server_detach_aio_context() and
497185c857SStefan Hajnoczi  * vhost_user_server_attach_aio_context(). They stop monitoring fds in the old
507185c857SStefan Hajnoczi  * AioContext and resume monitoring in the new AioContext. The vu_client_trip()
517185c857SStefan Hajnoczi  * coroutine remains in a yielded state during the switch. This is made
527185c857SStefan Hajnoczi  * possible by QIOChannel's support for spurious coroutine re-entry in
537185c857SStefan Hajnoczi  * qio_channel_yield(). The coroutine will restart I/O when re-entered from the
547185c857SStefan Hajnoczi  * new AioContext.
557185c857SStefan Hajnoczi  */
567185c857SStefan Hajnoczi 
vmsg_close_fds(VhostUserMsg * vmsg)5770eb2c07SCoiby Xu static void vmsg_close_fds(VhostUserMsg *vmsg)
5870eb2c07SCoiby Xu {
5970eb2c07SCoiby Xu     int i;
6070eb2c07SCoiby Xu     for (i = 0; i < vmsg->fd_num; i++) {
6170eb2c07SCoiby Xu         close(vmsg->fds[i]);
6270eb2c07SCoiby Xu     }
6370eb2c07SCoiby Xu }
6470eb2c07SCoiby Xu 
vmsg_unblock_fds(VhostUserMsg * vmsg)6570eb2c07SCoiby Xu static void vmsg_unblock_fds(VhostUserMsg *vmsg)
6670eb2c07SCoiby Xu {
6770eb2c07SCoiby Xu     int i;
6870eb2c07SCoiby Xu     for (i = 0; i < vmsg->fd_num; i++) {
69ff5927baSMarc-André Lureau         qemu_socket_set_nonblock(vmsg->fds[i]);
7070eb2c07SCoiby Xu     }
7170eb2c07SCoiby Xu }
7270eb2c07SCoiby Xu 
panic_cb(VuDev * vu_dev,const char * buf)7370eb2c07SCoiby Xu static void panic_cb(VuDev *vu_dev, const char *buf)
7470eb2c07SCoiby Xu {
7570eb2c07SCoiby Xu     error_report("vu_panic: %s", buf);
7670eb2c07SCoiby Xu }
7770eb2c07SCoiby Xu 
vhost_user_server_inc_in_flight(VuServer * server)7875d33e85SStefan Hajnoczi void vhost_user_server_inc_in_flight(VuServer *server)
79520d8b40SKevin Wolf {
80520d8b40SKevin Wolf     assert(!server->wait_idle);
818f5e9a8eSStefan Hajnoczi     qatomic_inc(&server->in_flight);
82520d8b40SKevin Wolf }
83520d8b40SKevin Wolf 
vhost_user_server_dec_in_flight(VuServer * server)8475d33e85SStefan Hajnoczi void vhost_user_server_dec_in_flight(VuServer *server)
85520d8b40SKevin Wolf {
868f5e9a8eSStefan Hajnoczi     if (qatomic_fetch_dec(&server->in_flight) == 1) {
878f5e9a8eSStefan Hajnoczi         if (server->wait_idle) {
88520d8b40SKevin Wolf             aio_co_wake(server->co_trip);
89520d8b40SKevin Wolf         }
90520d8b40SKevin Wolf     }
918f5e9a8eSStefan Hajnoczi }
928f5e9a8eSStefan Hajnoczi 
vhost_user_server_has_in_flight(VuServer * server)938f5e9a8eSStefan Hajnoczi bool vhost_user_server_has_in_flight(VuServer *server)
948f5e9a8eSStefan Hajnoczi {
958f5e9a8eSStefan Hajnoczi     return qatomic_load_acquire(&server->in_flight) > 0;
968f5e9a8eSStefan Hajnoczi }
97520d8b40SKevin Wolf 
9870eb2c07SCoiby Xu static bool coroutine_fn
vu_message_read(VuDev * vu_dev,int conn_fd,VhostUserMsg * vmsg)9970eb2c07SCoiby Xu vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
10070eb2c07SCoiby Xu {
10170eb2c07SCoiby Xu     struct iovec iov = {
10270eb2c07SCoiby Xu         .iov_base = (char *)vmsg,
10370eb2c07SCoiby Xu         .iov_len = VHOST_USER_HDR_SIZE,
10470eb2c07SCoiby Xu     };
10570eb2c07SCoiby Xu     int rc, read_bytes = 0;
10670eb2c07SCoiby Xu     Error *local_err = NULL;
10770eb2c07SCoiby Xu     const size_t max_fds = G_N_ELEMENTS(vmsg->fds);
10870eb2c07SCoiby Xu     VuServer *server = container_of(vu_dev, VuServer, vu_dev);
10970eb2c07SCoiby Xu     QIOChannel *ioc = server->ioc;
11070eb2c07SCoiby Xu 
1118c7f7cbcSStefan Hajnoczi     vmsg->fd_num = 0;
11270eb2c07SCoiby Xu     if (!ioc) {
11370eb2c07SCoiby Xu         error_report_err(local_err);
11470eb2c07SCoiby Xu         goto fail;
11570eb2c07SCoiby Xu     }
11670eb2c07SCoiby Xu 
11770eb2c07SCoiby Xu     assert(qemu_in_coroutine());
11870eb2c07SCoiby Xu     do {
1198c7f7cbcSStefan Hajnoczi         size_t nfds = 0;
1208c7f7cbcSStefan Hajnoczi         int *fds = NULL;
1218c7f7cbcSStefan Hajnoczi 
12270eb2c07SCoiby Xu         /*
12370eb2c07SCoiby Xu          * qio_channel_readv_full may have short reads, keeping calling it
12470eb2c07SCoiby Xu          * until getting VHOST_USER_HDR_SIZE or 0 bytes in total
12570eb2c07SCoiby Xu          */
12684615a19Smanish.mishra         rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err);
12770eb2c07SCoiby Xu         if (rc < 0) {
12870eb2c07SCoiby Xu             if (rc == QIO_CHANNEL_ERR_BLOCK) {
1298c7f7cbcSStefan Hajnoczi                 assert(local_err == NULL);
13006e0f098SStefan Hajnoczi                 if (server->ctx) {
13106e0f098SStefan Hajnoczi                     server->in_qio_channel_yield = true;
13270eb2c07SCoiby Xu                     qio_channel_yield(ioc, G_IO_IN);
13306e0f098SStefan Hajnoczi                     server->in_qio_channel_yield = false;
13406e0f098SStefan Hajnoczi                 } else {
135*411132c9SKevin Wolf                     return false;
13606e0f098SStefan Hajnoczi                 }
13770eb2c07SCoiby Xu                 continue;
13870eb2c07SCoiby Xu             } else {
13970eb2c07SCoiby Xu                 error_report_err(local_err);
14070eb2c07SCoiby Xu                 goto fail;
14170eb2c07SCoiby Xu             }
14270eb2c07SCoiby Xu         }
14370eb2c07SCoiby Xu 
1448c7f7cbcSStefan Hajnoczi         if (nfds > 0) {
1458c7f7cbcSStefan Hajnoczi             if (vmsg->fd_num + nfds > max_fds) {
1468c7f7cbcSStefan Hajnoczi                 error_report("A maximum of %zu fds are allowed, "
1478c7f7cbcSStefan Hajnoczi                              "however got %zu fds now",
1488c7f7cbcSStefan Hajnoczi                              max_fds, vmsg->fd_num + nfds);
1498c7f7cbcSStefan Hajnoczi                 g_free(fds);
1508c7f7cbcSStefan Hajnoczi                 goto fail;
1518c7f7cbcSStefan Hajnoczi             }
1528c7f7cbcSStefan Hajnoczi             memcpy(vmsg->fds + vmsg->fd_num, fds, nfds * sizeof(vmsg->fds[0]));
1538c7f7cbcSStefan Hajnoczi             vmsg->fd_num += nfds;
1548c7f7cbcSStefan Hajnoczi             g_free(fds);
1558c7f7cbcSStefan Hajnoczi         }
1568c7f7cbcSStefan Hajnoczi 
1578c7f7cbcSStefan Hajnoczi         if (rc == 0) { /* socket closed */
1588c7f7cbcSStefan Hajnoczi             goto fail;
1598c7f7cbcSStefan Hajnoczi         }
1608c7f7cbcSStefan Hajnoczi 
1618c7f7cbcSStefan Hajnoczi         iov.iov_base += rc;
1628c7f7cbcSStefan Hajnoczi         iov.iov_len -= rc;
1638c7f7cbcSStefan Hajnoczi         read_bytes += rc;
1648c7f7cbcSStefan Hajnoczi     } while (read_bytes != VHOST_USER_HDR_SIZE);
1658c7f7cbcSStefan Hajnoczi 
16670eb2c07SCoiby Xu     /* qio_channel_readv_full will make socket fds blocking, unblock them */
16770eb2c07SCoiby Xu     vmsg_unblock_fds(vmsg);
16870eb2c07SCoiby Xu     if (vmsg->size > sizeof(vmsg->payload)) {
16970eb2c07SCoiby Xu         error_report("Error: too big message request: %d, "
17070eb2c07SCoiby Xu                      "size: vmsg->size: %u, "
17170eb2c07SCoiby Xu                      "while sizeof(vmsg->payload) = %zu",
17270eb2c07SCoiby Xu                      vmsg->request, vmsg->size, sizeof(vmsg->payload));
17370eb2c07SCoiby Xu         goto fail;
17470eb2c07SCoiby Xu     }
17570eb2c07SCoiby Xu 
17670eb2c07SCoiby Xu     struct iovec iov_payload = {
17770eb2c07SCoiby Xu         .iov_base = (char *)&vmsg->payload,
17870eb2c07SCoiby Xu         .iov_len = vmsg->size,
17970eb2c07SCoiby Xu     };
18070eb2c07SCoiby Xu     if (vmsg->size) {
18170eb2c07SCoiby Xu         rc = qio_channel_readv_all_eof(ioc, &iov_payload, 1, &local_err);
182edaf6205SStefan Hajnoczi         if (rc != 1) {
183edaf6205SStefan Hajnoczi             if (local_err) {
18470eb2c07SCoiby Xu                 error_report_err(local_err);
185edaf6205SStefan Hajnoczi             }
18670eb2c07SCoiby Xu             goto fail;
18770eb2c07SCoiby Xu         }
18870eb2c07SCoiby Xu     }
18970eb2c07SCoiby Xu 
19070eb2c07SCoiby Xu     return true;
19170eb2c07SCoiby Xu 
19270eb2c07SCoiby Xu fail:
19370eb2c07SCoiby Xu     vmsg_close_fds(vmsg);
19470eb2c07SCoiby Xu 
19570eb2c07SCoiby Xu     return false;
19670eb2c07SCoiby Xu }
19770eb2c07SCoiby Xu 
vu_client_trip(void * opaque)19870eb2c07SCoiby Xu static coroutine_fn void vu_client_trip(void *opaque)
19970eb2c07SCoiby Xu {
20070eb2c07SCoiby Xu     VuServer *server = opaque;
2017185c857SStefan Hajnoczi     VuDev *vu_dev = &server->vu_dev;
20270eb2c07SCoiby Xu 
203*411132c9SKevin Wolf     while (!vu_dev->broken) {
204*411132c9SKevin Wolf         if (server->quiescing) {
205*411132c9SKevin Wolf             server->co_trip = NULL;
206*411132c9SKevin Wolf             aio_wait_kick();
207*411132c9SKevin Wolf             return;
208*411132c9SKevin Wolf         }
209*411132c9SKevin Wolf         /* vu_dispatch() returns false if server->ctx went away */
210*411132c9SKevin Wolf         if (!vu_dispatch(vu_dev) && server->ctx) {
211*411132c9SKevin Wolf             break;
212*411132c9SKevin Wolf         }
21370eb2c07SCoiby Xu     }
21470eb2c07SCoiby Xu 
2158f5e9a8eSStefan Hajnoczi     if (vhost_user_server_has_in_flight(server)) {
216520d8b40SKevin Wolf         /* Wait for requests to complete before we can unmap the memory */
217520d8b40SKevin Wolf         server->wait_idle = true;
218520d8b40SKevin Wolf         qemu_coroutine_yield();
219520d8b40SKevin Wolf         server->wait_idle = false;
220520d8b40SKevin Wolf     }
2218f5e9a8eSStefan Hajnoczi     assert(!vhost_user_server_has_in_flight(server));
222520d8b40SKevin Wolf 
2237185c857SStefan Hajnoczi     vu_deinit(vu_dev);
22470eb2c07SCoiby Xu 
2257185c857SStefan Hajnoczi     /* vu_deinit() should have called remove_watch() */
2267185c857SStefan Hajnoczi     assert(QTAILQ_EMPTY(&server->vu_fd_watches));
2277185c857SStefan Hajnoczi 
2287185c857SStefan Hajnoczi     object_unref(OBJECT(server->sioc));
2297185c857SStefan Hajnoczi     server->sioc = NULL;
2307185c857SStefan Hajnoczi 
2317185c857SStefan Hajnoczi     object_unref(OBJECT(server->ioc));
2327185c857SStefan Hajnoczi     server->ioc = NULL;
2337185c857SStefan Hajnoczi 
2347185c857SStefan Hajnoczi     server->co_trip = NULL;
2357185c857SStefan Hajnoczi     if (server->restart_listener_bh) {
2367185c857SStefan Hajnoczi         qemu_bh_schedule(server->restart_listener_bh);
2377185c857SStefan Hajnoczi     }
2387185c857SStefan Hajnoczi     aio_wait_kick();
23970eb2c07SCoiby Xu }
24070eb2c07SCoiby Xu 
24170eb2c07SCoiby Xu /*
24270eb2c07SCoiby Xu  * a wrapper for vu_kick_cb
24370eb2c07SCoiby Xu  *
24470eb2c07SCoiby Xu  * since aio_dispatch can only pass one user data pointer to the
24570eb2c07SCoiby Xu  * callback function, pack VuDev and pvt into a struct. Then unpack it
24670eb2c07SCoiby Xu  * and pass them to vu_kick_cb
24770eb2c07SCoiby Xu  */
kick_handler(void * opaque)24870eb2c07SCoiby Xu static void kick_handler(void *opaque)
24970eb2c07SCoiby Xu {
25070eb2c07SCoiby Xu     VuFdWatch *vu_fd_watch = opaque;
2517185c857SStefan Hajnoczi     VuDev *vu_dev = vu_fd_watch->vu_dev;
25270eb2c07SCoiby Xu 
2537185c857SStefan Hajnoczi     vu_fd_watch->cb(vu_dev, 0, vu_fd_watch->pvt);
2547185c857SStefan Hajnoczi 
2557185c857SStefan Hajnoczi     /* Stop vu_client_trip() if an error occurred in vu_fd_watch->cb() */
2567185c857SStefan Hajnoczi     if (vu_dev->broken) {
2577185c857SStefan Hajnoczi         VuServer *server = container_of(vu_dev, VuServer, vu_dev);
2587185c857SStefan Hajnoczi 
2597185c857SStefan Hajnoczi         qio_channel_shutdown(server->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
2607185c857SStefan Hajnoczi     }
2617185c857SStefan Hajnoczi }
26270eb2c07SCoiby Xu 
find_vu_fd_watch(VuServer * server,int fd)26370eb2c07SCoiby Xu static VuFdWatch *find_vu_fd_watch(VuServer *server, int fd)
26470eb2c07SCoiby Xu {
26570eb2c07SCoiby Xu 
26670eb2c07SCoiby Xu     VuFdWatch *vu_fd_watch, *next;
26770eb2c07SCoiby Xu     QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
26870eb2c07SCoiby Xu         if (vu_fd_watch->fd == fd) {
26970eb2c07SCoiby Xu             return vu_fd_watch;
27070eb2c07SCoiby Xu         }
27170eb2c07SCoiby Xu     }
27270eb2c07SCoiby Xu     return NULL;
27370eb2c07SCoiby Xu }
27470eb2c07SCoiby Xu 
27570eb2c07SCoiby Xu static void
set_watch(VuDev * vu_dev,int fd,int vu_evt,vu_watch_cb cb,void * pvt)27670eb2c07SCoiby Xu set_watch(VuDev *vu_dev, int fd, int vu_evt,
27770eb2c07SCoiby Xu           vu_watch_cb cb, void *pvt)
27870eb2c07SCoiby Xu {
27970eb2c07SCoiby Xu 
28070eb2c07SCoiby Xu     VuServer *server = container_of(vu_dev, VuServer, vu_dev);
28170eb2c07SCoiby Xu     g_assert(vu_dev);
28270eb2c07SCoiby Xu     g_assert(fd >= 0);
28370eb2c07SCoiby Xu     g_assert(cb);
28470eb2c07SCoiby Xu 
28570eb2c07SCoiby Xu     VuFdWatch *vu_fd_watch = find_vu_fd_watch(server, fd);
28670eb2c07SCoiby Xu 
28770eb2c07SCoiby Xu     if (!vu_fd_watch) {
288fbf58f21SPhilippe Mathieu-Daudé         vu_fd_watch = g_new0(VuFdWatch, 1);
28970eb2c07SCoiby Xu 
29070eb2c07SCoiby Xu         QTAILQ_INSERT_TAIL(&server->vu_fd_watches, vu_fd_watch, next);
29170eb2c07SCoiby Xu 
29270eb2c07SCoiby Xu         vu_fd_watch->fd = fd;
29370eb2c07SCoiby Xu         vu_fd_watch->cb = cb;
294ff5927baSMarc-André Lureau         qemu_socket_set_nonblock(fd);
29506e0f098SStefan Hajnoczi         aio_set_fd_handler(server->ctx, fd, kick_handler,
296826cc324SStefan Hajnoczi                            NULL, NULL, NULL, vu_fd_watch);
29770eb2c07SCoiby Xu         vu_fd_watch->vu_dev = vu_dev;
29870eb2c07SCoiby Xu         vu_fd_watch->pvt = pvt;
29970eb2c07SCoiby Xu     }
30070eb2c07SCoiby Xu }
30170eb2c07SCoiby Xu 
30270eb2c07SCoiby Xu 
remove_watch(VuDev * vu_dev,int fd)30370eb2c07SCoiby Xu static void remove_watch(VuDev *vu_dev, int fd)
30470eb2c07SCoiby Xu {
30570eb2c07SCoiby Xu     VuServer *server;
30670eb2c07SCoiby Xu     g_assert(vu_dev);
30770eb2c07SCoiby Xu     g_assert(fd >= 0);
30870eb2c07SCoiby Xu 
30970eb2c07SCoiby Xu     server = container_of(vu_dev, VuServer, vu_dev);
31070eb2c07SCoiby Xu 
31170eb2c07SCoiby Xu     VuFdWatch *vu_fd_watch = find_vu_fd_watch(server, fd);
31270eb2c07SCoiby Xu 
31370eb2c07SCoiby Xu     if (!vu_fd_watch) {
31470eb2c07SCoiby Xu         return;
31570eb2c07SCoiby Xu     }
31606e0f098SStefan Hajnoczi     aio_set_fd_handler(server->ctx, fd, NULL, NULL, NULL, NULL, NULL);
31770eb2c07SCoiby Xu 
31870eb2c07SCoiby Xu     QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
31970eb2c07SCoiby Xu     g_free(vu_fd_watch);
32070eb2c07SCoiby Xu }
32170eb2c07SCoiby Xu 
32270eb2c07SCoiby Xu 
vu_accept(QIONetListener * listener,QIOChannelSocket * sioc,gpointer opaque)32370eb2c07SCoiby Xu static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
32470eb2c07SCoiby Xu                       gpointer opaque)
32570eb2c07SCoiby Xu {
32670eb2c07SCoiby Xu     VuServer *server = opaque;
32770eb2c07SCoiby Xu 
32870eb2c07SCoiby Xu     if (server->sioc) {
32970eb2c07SCoiby Xu         warn_report("Only one vhost-user client is allowed to "
33070eb2c07SCoiby Xu                     "connect the server one time");
33170eb2c07SCoiby Xu         return;
33270eb2c07SCoiby Xu     }
33370eb2c07SCoiby Xu 
33470eb2c07SCoiby Xu     if (!vu_init(&server->vu_dev, server->max_queues, sioc->fd, panic_cb,
33570eb2c07SCoiby Xu                  vu_message_read, set_watch, remove_watch, server->vu_iface)) {
33670eb2c07SCoiby Xu         error_report("Failed to initialize libvhost-user");
33770eb2c07SCoiby Xu         return;
33870eb2c07SCoiby Xu     }
33970eb2c07SCoiby Xu 
34070eb2c07SCoiby Xu     /*
34170eb2c07SCoiby Xu      * Unset the callback function for network listener to make another
34270eb2c07SCoiby Xu      * vhost-user client keeping waiting until this client disconnects
34370eb2c07SCoiby Xu      */
34470eb2c07SCoiby Xu     qio_net_listener_set_client_func(server->listener,
34570eb2c07SCoiby Xu                                      NULL,
34670eb2c07SCoiby Xu                                      NULL,
34770eb2c07SCoiby Xu                                      NULL);
34870eb2c07SCoiby Xu     server->sioc = sioc;
34970eb2c07SCoiby Xu     /*
35070eb2c07SCoiby Xu      * Increase the object reference, so sioc will not freed by
35170eb2c07SCoiby Xu      * qio_net_listener_channel_func which will call object_unref(OBJECT(sioc))
35270eb2c07SCoiby Xu      */
35370eb2c07SCoiby Xu     object_ref(OBJECT(server->sioc));
35470eb2c07SCoiby Xu     qio_channel_set_name(QIO_CHANNEL(sioc), "vhost-user client");
35570eb2c07SCoiby Xu     server->ioc = QIO_CHANNEL(sioc);
35670eb2c07SCoiby Xu     object_ref(OBJECT(server->ioc));
35770eb2c07SCoiby Xu 
3587185c857SStefan Hajnoczi     /* TODO vu_message_write() spins if non-blocking! */
3597185c857SStefan Hajnoczi     qio_channel_set_blocking(server->ioc, false, NULL);
3607185c857SStefan Hajnoczi 
36106e0f098SStefan Hajnoczi     qio_channel_set_follow_coroutine_ctx(server->ioc, true);
36206e0f098SStefan Hajnoczi 
363*411132c9SKevin Wolf     /* Attaching the AioContext starts the vu_client_trip coroutine */
3647185c857SStefan Hajnoczi     aio_context_acquire(server->ctx);
3657185c857SStefan Hajnoczi     vhost_user_server_attach_aio_context(server, server->ctx);
3667185c857SStefan Hajnoczi     aio_context_release(server->ctx);
3677185c857SStefan Hajnoczi }
36870eb2c07SCoiby Xu 
3692957dc40SStefan Hajnoczi /* server->ctx acquired by caller */
vhost_user_server_stop(VuServer * server)37070eb2c07SCoiby Xu void vhost_user_server_stop(VuServer *server)
37170eb2c07SCoiby Xu {
3727185c857SStefan Hajnoczi     qemu_bh_delete(server->restart_listener_bh);
3737185c857SStefan Hajnoczi     server->restart_listener_bh = NULL;
3747185c857SStefan Hajnoczi 
37570eb2c07SCoiby Xu     if (server->sioc) {
3767185c857SStefan Hajnoczi         VuFdWatch *vu_fd_watch;
3777185c857SStefan Hajnoczi 
3787185c857SStefan Hajnoczi         QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
37960f782b6SStefan Hajnoczi             aio_set_fd_handler(server->ctx, vu_fd_watch->fd,
380826cc324SStefan Hajnoczi                                NULL, NULL, NULL, NULL, vu_fd_watch);
38170eb2c07SCoiby Xu         }
38270eb2c07SCoiby Xu 
3837185c857SStefan Hajnoczi         qio_channel_shutdown(server->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
3847185c857SStefan Hajnoczi 
3857185c857SStefan Hajnoczi         AIO_WAIT_WHILE(server->ctx, server->co_trip);
3867185c857SStefan Hajnoczi     }
3877185c857SStefan Hajnoczi 
38870eb2c07SCoiby Xu     if (server->listener) {
38970eb2c07SCoiby Xu         qio_net_listener_disconnect(server->listener);
39070eb2c07SCoiby Xu         object_unref(OBJECT(server->listener));
39170eb2c07SCoiby Xu     }
39270eb2c07SCoiby Xu }
39370eb2c07SCoiby Xu 
3947185c857SStefan Hajnoczi /*
3957185c857SStefan Hajnoczi  * Allow the next client to connect to the server. Called from a BH in the main
3967185c857SStefan Hajnoczi  * loop.
3977185c857SStefan Hajnoczi  */
restart_listener_bh(void * opaque)3987185c857SStefan Hajnoczi static void restart_listener_bh(void *opaque)
39970eb2c07SCoiby Xu {
4007185c857SStefan Hajnoczi     VuServer *server = opaque;
40170eb2c07SCoiby Xu 
4027185c857SStefan Hajnoczi     qio_net_listener_set_client_func(server->listener, vu_accept, server,
4037185c857SStefan Hajnoczi                                      NULL);
4047185c857SStefan Hajnoczi }
4057185c857SStefan Hajnoczi 
4067185c857SStefan Hajnoczi /* Called with ctx acquired */
vhost_user_server_attach_aio_context(VuServer * server,AioContext * ctx)4077185c857SStefan Hajnoczi void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx)
4087185c857SStefan Hajnoczi {
4097185c857SStefan Hajnoczi     VuFdWatch *vu_fd_watch;
4107185c857SStefan Hajnoczi 
4117185c857SStefan Hajnoczi     server->ctx = ctx;
41270eb2c07SCoiby Xu 
41370eb2c07SCoiby Xu     if (!server->sioc) {
41470eb2c07SCoiby Xu         return;
41570eb2c07SCoiby Xu     }
41670eb2c07SCoiby Xu 
4177185c857SStefan Hajnoczi     QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
41860f782b6SStefan Hajnoczi         aio_set_fd_handler(ctx, vu_fd_watch->fd, kick_handler, NULL,
419826cc324SStefan Hajnoczi                            NULL, NULL, vu_fd_watch);
4207185c857SStefan Hajnoczi     }
4217185c857SStefan Hajnoczi 
422*411132c9SKevin Wolf     if (server->co_trip) {
423*411132c9SKevin Wolf         /*
424*411132c9SKevin Wolf          * The caller didn't fully shut down co_trip (this can happen on
425*411132c9SKevin Wolf          * non-polling drains like in bdrv_graph_wrlock()). This is okay as long
426*411132c9SKevin Wolf          * as it no longer tries to shut it down and we're guaranteed to still
427*411132c9SKevin Wolf          * be in the same AioContext as before.
428*411132c9SKevin Wolf          *
429*411132c9SKevin Wolf          * co_ctx can still be NULL if we get multiple calls and only just
430*411132c9SKevin Wolf          * scheduled a new coroutine in the else branch.
431*411132c9SKevin Wolf          */
432*411132c9SKevin Wolf         AioContext *co_ctx = qemu_coroutine_get_aio_context(server->co_trip);
433*411132c9SKevin Wolf 
434*411132c9SKevin Wolf         assert(!server->quiescing);
435*411132c9SKevin Wolf         assert(!co_ctx || co_ctx == ctx);
436*411132c9SKevin Wolf     } else {
437*411132c9SKevin Wolf         server->co_trip = qemu_coroutine_create(vu_client_trip, server);
43806e0f098SStefan Hajnoczi         assert(!server->in_qio_channel_yield);
4397185c857SStefan Hajnoczi         aio_co_schedule(ctx, server->co_trip);
4407185c857SStefan Hajnoczi     }
441*411132c9SKevin Wolf }
4427185c857SStefan Hajnoczi 
4437185c857SStefan Hajnoczi /* Called with server->ctx acquired */
vhost_user_server_detach_aio_context(VuServer * server)4447185c857SStefan Hajnoczi void vhost_user_server_detach_aio_context(VuServer *server)
4457185c857SStefan Hajnoczi {
4467185c857SStefan Hajnoczi     if (server->sioc) {
4477185c857SStefan Hajnoczi         VuFdWatch *vu_fd_watch;
4487185c857SStefan Hajnoczi 
4497185c857SStefan Hajnoczi         QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
45060f782b6SStefan Hajnoczi             aio_set_fd_handler(server->ctx, vu_fd_watch->fd,
451826cc324SStefan Hajnoczi                                NULL, NULL, NULL, NULL, vu_fd_watch);
4527185c857SStefan Hajnoczi         }
45370eb2c07SCoiby Xu     }
45470eb2c07SCoiby Xu 
4557185c857SStefan Hajnoczi     server->ctx = NULL;
45606e0f098SStefan Hajnoczi 
45706e0f098SStefan Hajnoczi     if (server->ioc) {
45806e0f098SStefan Hajnoczi         if (server->in_qio_channel_yield) {
45906e0f098SStefan Hajnoczi             /* Stop receiving the next vhost-user message */
46006e0f098SStefan Hajnoczi             qio_channel_wake_read(server->ioc);
46106e0f098SStefan Hajnoczi         }
46206e0f098SStefan Hajnoczi     }
46370eb2c07SCoiby Xu }
46470eb2c07SCoiby Xu 
vhost_user_server_start(VuServer * server,SocketAddress * socket_addr,AioContext * ctx,uint16_t max_queues,const VuDevIface * vu_iface,Error ** errp)46570eb2c07SCoiby Xu bool vhost_user_server_start(VuServer *server,
46670eb2c07SCoiby Xu                              SocketAddress *socket_addr,
46770eb2c07SCoiby Xu                              AioContext *ctx,
46870eb2c07SCoiby Xu                              uint16_t max_queues,
46970eb2c07SCoiby Xu                              const VuDevIface *vu_iface,
47070eb2c07SCoiby Xu                              Error **errp)
47170eb2c07SCoiby Xu {
4727185c857SStefan Hajnoczi     QEMUBH *bh;
47390fc91d5SStefan Hajnoczi     QIONetListener *listener;
47490fc91d5SStefan Hajnoczi 
47590fc91d5SStefan Hajnoczi     if (socket_addr->type != SOCKET_ADDRESS_TYPE_UNIX &&
47690fc91d5SStefan Hajnoczi         socket_addr->type != SOCKET_ADDRESS_TYPE_FD) {
47790fc91d5SStefan Hajnoczi         error_setg(errp, "Only socket address types 'unix' and 'fd' are supported");
47890fc91d5SStefan Hajnoczi         return false;
47990fc91d5SStefan Hajnoczi     }
48090fc91d5SStefan Hajnoczi 
48190fc91d5SStefan Hajnoczi     listener = qio_net_listener_new();
48270eb2c07SCoiby Xu     if (qio_net_listener_open_sync(listener, socket_addr, 1,
48370eb2c07SCoiby Xu                                    errp) < 0) {
48470eb2c07SCoiby Xu         object_unref(OBJECT(listener));
48570eb2c07SCoiby Xu         return false;
48670eb2c07SCoiby Xu     }
48770eb2c07SCoiby Xu 
4887185c857SStefan Hajnoczi     bh = qemu_bh_new(restart_listener_bh, server);
4897185c857SStefan Hajnoczi 
4901d787456SStefan Hajnoczi     /* zero out unspecified fields */
49170eb2c07SCoiby Xu     *server = (VuServer) {
49270eb2c07SCoiby Xu         .listener              = listener,
4937185c857SStefan Hajnoczi         .restart_listener_bh   = bh,
49470eb2c07SCoiby Xu         .vu_iface              = vu_iface,
49570eb2c07SCoiby Xu         .max_queues            = max_queues,
49670eb2c07SCoiby Xu         .ctx                   = ctx,
49770eb2c07SCoiby Xu     };
49870eb2c07SCoiby Xu 
49970eb2c07SCoiby Xu     qio_net_listener_set_name(server->listener, "vhost-user-backend-listener");
50070eb2c07SCoiby Xu 
50170eb2c07SCoiby Xu     qio_net_listener_set_client_func(server->listener,
50270eb2c07SCoiby Xu                                      vu_accept,
50370eb2c07SCoiby Xu                                      server,
50470eb2c07SCoiby Xu                                      NULL);
50570eb2c07SCoiby Xu 
50670eb2c07SCoiby Xu     QTAILQ_INIT(&server->vu_fd_watches);
50770eb2c07SCoiby Xu     return true;
50870eb2c07SCoiby Xu }
509