xref: /openbmc/qemu/io/channel-socket.c (revision 43f59bf76535b8842a762f8625b96091611aac11)
1559607eaSDaniel P. Berrange /*
2559607eaSDaniel P. Berrange  * QEMU I/O channels sockets driver
3559607eaSDaniel P. Berrange  *
4559607eaSDaniel P. Berrange  * Copyright (c) 2015 Red Hat, Inc.
5559607eaSDaniel P. Berrange  *
6559607eaSDaniel P. Berrange  * This library is free software; you can redistribute it and/or
7559607eaSDaniel P. Berrange  * modify it under the terms of the GNU Lesser General Public
8559607eaSDaniel P. Berrange  * License as published by the Free Software Foundation; either
9c8198bd5SChetan Pant  * version 2.1 of the License, or (at your option) any later version.
10559607eaSDaniel P. Berrange  *
11559607eaSDaniel P. Berrange  * This library is distributed in the hope that it will be useful,
12559607eaSDaniel P. Berrange  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13559607eaSDaniel P. Berrange  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14559607eaSDaniel P. Berrange  * Lesser General Public License for more details.
15559607eaSDaniel P. Berrange  *
16559607eaSDaniel P. Berrange  * You should have received a copy of the GNU Lesser General Public
17559607eaSDaniel P. Berrange  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18559607eaSDaniel P. Berrange  */
19559607eaSDaniel P. Berrange 
20cae9fc56SPeter Maydell #include "qemu/osdep.h"
21da34e65cSMarkus Armbruster #include "qapi/error.h"
229af23989SMarkus Armbruster #include "qapi/qapi-visit-sockets.h"
230b8fa32fSMarkus Armbruster #include "qemu/module.h"
24559607eaSDaniel P. Berrange #include "io/channel-socket.h"
2506e0f098SStefan Hajnoczi #include "io/channel-util.h"
26559607eaSDaniel P. Berrange #include "io/channel-watch.h"
27559607eaSDaniel P. Berrange #include "trace.h"
2837f9e0a2SEric Blake #include "qapi/clone-visitor.h"
292bc58ffcSLeonardo Bras #ifdef CONFIG_LINUX
302bc58ffcSLeonardo Bras #include <linux/errqueue.h>
312bc58ffcSLeonardo Bras #include <sys/socket.h>
322bc58ffcSLeonardo Bras 
332bc58ffcSLeonardo Bras #if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY))
342bc58ffcSLeonardo Bras #define QEMU_MSG_ZEROCOPY
352bc58ffcSLeonardo Bras #endif
362bc58ffcSLeonardo Bras #endif
37559607eaSDaniel P. Berrange 
38559607eaSDaniel P. Berrange #define SOCKET_MAX_FDS 16
39559607eaSDaniel P. Berrange 
40559607eaSDaniel P. Berrange SocketAddress *
qio_channel_socket_get_local_address(QIOChannelSocket * ioc,Error ** errp)41559607eaSDaniel P. Berrange qio_channel_socket_get_local_address(QIOChannelSocket *ioc,
42559607eaSDaniel P. Berrange                                      Error **errp)
43559607eaSDaniel P. Berrange {
44559607eaSDaniel P. Berrange     return socket_sockaddr_to_address(&ioc->localAddr,
45559607eaSDaniel P. Berrange                                       ioc->localAddrLen,
46559607eaSDaniel P. Berrange                                       errp);
47559607eaSDaniel P. Berrange }
48559607eaSDaniel P. Berrange 
49559607eaSDaniel P. Berrange SocketAddress *
qio_channel_socket_get_remote_address(QIOChannelSocket * ioc,Error ** errp)50559607eaSDaniel P. Berrange qio_channel_socket_get_remote_address(QIOChannelSocket *ioc,
51559607eaSDaniel P. Berrange                                       Error **errp)
52559607eaSDaniel P. Berrange {
53559607eaSDaniel P. Berrange     return socket_sockaddr_to_address(&ioc->remoteAddr,
54559607eaSDaniel P. Berrange                                       ioc->remoteAddrLen,
55559607eaSDaniel P. Berrange                                       errp);
56559607eaSDaniel P. Berrange }
57559607eaSDaniel P. Berrange 
58559607eaSDaniel P. Berrange QIOChannelSocket *
qio_channel_socket_new(void)59559607eaSDaniel P. Berrange qio_channel_socket_new(void)
60559607eaSDaniel P. Berrange {
61559607eaSDaniel P. Berrange     QIOChannelSocket *sioc;
62559607eaSDaniel P. Berrange     QIOChannel *ioc;
63559607eaSDaniel P. Berrange 
64559607eaSDaniel P. Berrange     sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
65559607eaSDaniel P. Berrange     sioc->fd = -1;
662bc58ffcSLeonardo Bras     sioc->zero_copy_queued = 0;
672bc58ffcSLeonardo Bras     sioc->zero_copy_sent = 0;
68559607eaSDaniel P. Berrange 
69559607eaSDaniel P. Berrange     ioc = QIO_CHANNEL(sioc);
70d8d3c7ccSFelipe Franciosi     qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
71559607eaSDaniel P. Berrange 
72a5897205SPaolo Bonzini #ifdef WIN32
73a5897205SPaolo Bonzini     ioc->event = CreateEvent(NULL, FALSE, FALSE, NULL);
74a5897205SPaolo Bonzini #endif
75a5897205SPaolo Bonzini 
76559607eaSDaniel P. Berrange     trace_qio_channel_socket_new(sioc);
77559607eaSDaniel P. Berrange 
78559607eaSDaniel P. Berrange     return sioc;
79559607eaSDaniel P. Berrange }
80559607eaSDaniel P. Berrange 
81559607eaSDaniel P. Berrange 
82559607eaSDaniel P. Berrange static int
qio_channel_socket_set_fd(QIOChannelSocket * sioc,int fd,Error ** errp)83559607eaSDaniel P. Berrange qio_channel_socket_set_fd(QIOChannelSocket *sioc,
84559607eaSDaniel P. Berrange                           int fd,
85559607eaSDaniel P. Berrange                           Error **errp)
86559607eaSDaniel P. Berrange {
87559607eaSDaniel P. Berrange     if (sioc->fd != -1) {
88559607eaSDaniel P. Berrange         error_setg(errp, "Socket is already open");
89559607eaSDaniel P. Berrange         return -1;
90559607eaSDaniel P. Berrange     }
91559607eaSDaniel P. Berrange 
92559607eaSDaniel P. Berrange     sioc->fd = fd;
93559607eaSDaniel P. Berrange     sioc->remoteAddrLen = sizeof(sioc->remoteAddr);
94559607eaSDaniel P. Berrange     sioc->localAddrLen = sizeof(sioc->localAddr);
95559607eaSDaniel P. Berrange 
96559607eaSDaniel P. Berrange 
97559607eaSDaniel P. Berrange     if (getpeername(fd, (struct sockaddr *)&sioc->remoteAddr,
98559607eaSDaniel P. Berrange                     &sioc->remoteAddrLen) < 0) {
99b16a44e1SDaniel P. Berrange         if (errno == ENOTCONN) {
100559607eaSDaniel P. Berrange             memset(&sioc->remoteAddr, 0, sizeof(sioc->remoteAddr));
101559607eaSDaniel P. Berrange             sioc->remoteAddrLen = sizeof(sioc->remoteAddr);
102559607eaSDaniel P. Berrange         } else {
103b16a44e1SDaniel P. Berrange             error_setg_errno(errp, errno,
104559607eaSDaniel P. Berrange                              "Unable to query remote socket address");
105559607eaSDaniel P. Berrange             goto error;
106559607eaSDaniel P. Berrange         }
107559607eaSDaniel P. Berrange     }
108559607eaSDaniel P. Berrange 
109559607eaSDaniel P. Berrange     if (getsockname(fd, (struct sockaddr *)&sioc->localAddr,
110559607eaSDaniel P. Berrange                     &sioc->localAddrLen) < 0) {
111b16a44e1SDaniel P. Berrange         error_setg_errno(errp, errno,
112559607eaSDaniel P. Berrange                          "Unable to query local socket address");
113559607eaSDaniel P. Berrange         goto error;
114559607eaSDaniel P. Berrange     }
115559607eaSDaniel P. Berrange 
116559607eaSDaniel P. Berrange #ifndef WIN32
117559607eaSDaniel P. Berrange     if (sioc->localAddr.ss_family == AF_UNIX) {
118559607eaSDaniel P. Berrange         QIOChannel *ioc = QIO_CHANNEL(sioc);
119d8d3c7ccSFelipe Franciosi         qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS);
120559607eaSDaniel P. Berrange     }
121559607eaSDaniel P. Berrange #endif /* WIN32 */
122559607eaSDaniel P. Berrange 
123559607eaSDaniel P. Berrange     return 0;
124559607eaSDaniel P. Berrange 
125559607eaSDaniel P. Berrange  error:
126559607eaSDaniel P. Berrange     sioc->fd = -1; /* Let the caller close FD on failure */
127559607eaSDaniel P. Berrange     return -1;
128559607eaSDaniel P. Berrange }
129559607eaSDaniel P. Berrange 
130559607eaSDaniel P. Berrange QIOChannelSocket *
qio_channel_socket_new_fd(int fd,Error ** errp)131559607eaSDaniel P. Berrange qio_channel_socket_new_fd(int fd,
132559607eaSDaniel P. Berrange                           Error **errp)
133559607eaSDaniel P. Berrange {
134559607eaSDaniel P. Berrange     QIOChannelSocket *ioc;
135559607eaSDaniel P. Berrange 
136559607eaSDaniel P. Berrange     ioc = qio_channel_socket_new();
137559607eaSDaniel P. Berrange     if (qio_channel_socket_set_fd(ioc, fd, errp) < 0) {
138559607eaSDaniel P. Berrange         object_unref(OBJECT(ioc));
139559607eaSDaniel P. Berrange         return NULL;
140559607eaSDaniel P. Berrange     }
141559607eaSDaniel P. Berrange 
142559607eaSDaniel P. Berrange     trace_qio_channel_socket_new_fd(ioc, fd);
143559607eaSDaniel P. Berrange 
144559607eaSDaniel P. Berrange     return ioc;
145559607eaSDaniel P. Berrange }
146559607eaSDaniel P. Berrange 
147559607eaSDaniel P. Berrange 
qio_channel_socket_connect_sync(QIOChannelSocket * ioc,SocketAddress * addr,Error ** errp)148559607eaSDaniel P. Berrange int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
149559607eaSDaniel P. Berrange                                     SocketAddress *addr,
150559607eaSDaniel P. Berrange                                     Error **errp)
151559607eaSDaniel P. Berrange {
152559607eaSDaniel P. Berrange     int fd;
153559607eaSDaniel P. Berrange 
154559607eaSDaniel P. Berrange     trace_qio_channel_socket_connect_sync(ioc, addr);
155b2587932SCao jin     fd = socket_connect(addr, errp);
156559607eaSDaniel P. Berrange     if (fd < 0) {
157559607eaSDaniel P. Berrange         trace_qio_channel_socket_connect_fail(ioc);
158559607eaSDaniel P. Berrange         return -1;
159559607eaSDaniel P. Berrange     }
160559607eaSDaniel P. Berrange 
161559607eaSDaniel P. Berrange     trace_qio_channel_socket_connect_complete(ioc, fd);
162559607eaSDaniel P. Berrange     if (qio_channel_socket_set_fd(ioc, fd, errp) < 0) {
16325657fc6SMarc-André Lureau         close(fd);
164559607eaSDaniel P. Berrange         return -1;
165559607eaSDaniel P. Berrange     }
166559607eaSDaniel P. Berrange 
1672bc58ffcSLeonardo Bras #ifdef QEMU_MSG_ZEROCOPY
1682bc58ffcSLeonardo Bras     int ret, v = 1;
1692bc58ffcSLeonardo Bras     ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v));
1702bc58ffcSLeonardo Bras     if (ret == 0) {
1712bc58ffcSLeonardo Bras         /* Zero copy available on host */
1722bc58ffcSLeonardo Bras         qio_channel_set_feature(QIO_CHANNEL(ioc),
1732bc58ffcSLeonardo Bras                                 QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY);
1742bc58ffcSLeonardo Bras     }
1752bc58ffcSLeonardo Bras #endif
1762bc58ffcSLeonardo Bras 
17784615a19Smanish.mishra     qio_channel_set_feature(QIO_CHANNEL(ioc),
17884615a19Smanish.mishra                             QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
17984615a19Smanish.mishra 
180559607eaSDaniel P. Berrange     return 0;
181559607eaSDaniel P. Berrange }
182559607eaSDaniel P. Berrange 
183559607eaSDaniel P. Berrange 
qio_channel_socket_connect_worker(QIOTask * task,gpointer opaque)18459de517dSDaniel P. Berrange static void qio_channel_socket_connect_worker(QIOTask *task,
185559607eaSDaniel P. Berrange                                               gpointer opaque)
186559607eaSDaniel P. Berrange {
187559607eaSDaniel P. Berrange     QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(qio_task_get_source(task));
188559607eaSDaniel P. Berrange     SocketAddress *addr = opaque;
18959de517dSDaniel P. Berrange     Error *err = NULL;
190559607eaSDaniel P. Berrange 
19159de517dSDaniel P. Berrange     qio_channel_socket_connect_sync(ioc, addr, &err);
192559607eaSDaniel P. Berrange 
19359de517dSDaniel P. Berrange     qio_task_set_error(task, err);
194559607eaSDaniel P. Berrange }
195559607eaSDaniel P. Berrange 
196559607eaSDaniel P. Berrange 
qio_channel_socket_connect_async(QIOChannelSocket * ioc,SocketAddress * addr,QIOTaskFunc callback,gpointer opaque,GDestroyNotify destroy,GMainContext * context)197559607eaSDaniel P. Berrange void qio_channel_socket_connect_async(QIOChannelSocket *ioc,
198559607eaSDaniel P. Berrange                                       SocketAddress *addr,
199559607eaSDaniel P. Berrange                                       QIOTaskFunc callback,
200559607eaSDaniel P. Berrange                                       gpointer opaque,
2018005fdd8SPeter Xu                                       GDestroyNotify destroy,
2028005fdd8SPeter Xu                                       GMainContext *context)
203559607eaSDaniel P. Berrange {
204559607eaSDaniel P. Berrange     QIOTask *task = qio_task_new(
205559607eaSDaniel P. Berrange         OBJECT(ioc), callback, opaque, destroy);
206559607eaSDaniel P. Berrange     SocketAddress *addrCopy;
207559607eaSDaniel P. Berrange 
20837f9e0a2SEric Blake     addrCopy = QAPI_CLONE(SocketAddress, addr);
209559607eaSDaniel P. Berrange 
210559607eaSDaniel P. Berrange     /* socket_connect() does a non-blocking connect(), but it
211559607eaSDaniel P. Berrange      * still blocks in DNS lookups, so we must use a thread */
212559607eaSDaniel P. Berrange     trace_qio_channel_socket_connect_async(ioc, addr);
213559607eaSDaniel P. Berrange     qio_task_run_in_thread(task,
214559607eaSDaniel P. Berrange                            qio_channel_socket_connect_worker,
215559607eaSDaniel P. Berrange                            addrCopy,
216a17536c5SPeter Xu                            (GDestroyNotify)qapi_free_SocketAddress,
2178005fdd8SPeter Xu                            context);
218559607eaSDaniel P. Berrange }
219559607eaSDaniel P. Berrange 
220559607eaSDaniel P. Berrange 
qio_channel_socket_listen_sync(QIOChannelSocket * ioc,SocketAddress * addr,int num,Error ** errp)221559607eaSDaniel P. Berrange int qio_channel_socket_listen_sync(QIOChannelSocket *ioc,
222559607eaSDaniel P. Berrange                                    SocketAddress *addr,
2234e2d8bf6SJuan Quintela                                    int num,
224559607eaSDaniel P. Berrange                                    Error **errp)
225559607eaSDaniel P. Berrange {
226559607eaSDaniel P. Berrange     int fd;
227559607eaSDaniel P. Berrange 
2284e2d8bf6SJuan Quintela     trace_qio_channel_socket_listen_sync(ioc, addr, num);
2294e2d8bf6SJuan Quintela     fd = socket_listen(addr, num, errp);
230559607eaSDaniel P. Berrange     if (fd < 0) {
231559607eaSDaniel P. Berrange         trace_qio_channel_socket_listen_fail(ioc);
232559607eaSDaniel P. Berrange         return -1;
233559607eaSDaniel P. Berrange     }
234559607eaSDaniel P. Berrange 
235559607eaSDaniel P. Berrange     trace_qio_channel_socket_listen_complete(ioc, fd);
236559607eaSDaniel P. Berrange     if (qio_channel_socket_set_fd(ioc, fd, errp) < 0) {
23725657fc6SMarc-André Lureau         close(fd);
238559607eaSDaniel P. Berrange         return -1;
239559607eaSDaniel P. Berrange     }
240bf535208SDaniel P. Berrange     qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_LISTEN);
241559607eaSDaniel P. Berrange 
242559607eaSDaniel P. Berrange     return 0;
243559607eaSDaniel P. Berrange }
244559607eaSDaniel P. Berrange 
245559607eaSDaniel P. Berrange 
2467959e29eSJuan Quintela struct QIOChannelListenWorkerData {
2477959e29eSJuan Quintela     SocketAddress *addr;
2487959e29eSJuan Quintela     int num; /* amount of expected connections */
2497959e29eSJuan Quintela };
2507959e29eSJuan Quintela 
qio_channel_listen_worker_free(gpointer opaque)2517959e29eSJuan Quintela static void qio_channel_listen_worker_free(gpointer opaque)
2527959e29eSJuan Quintela {
2537959e29eSJuan Quintela     struct QIOChannelListenWorkerData *data = opaque;
2547959e29eSJuan Quintela 
2557959e29eSJuan Quintela     qapi_free_SocketAddress(data->addr);
2567959e29eSJuan Quintela     g_free(data);
2577959e29eSJuan Quintela }
2587959e29eSJuan Quintela 
qio_channel_socket_listen_worker(QIOTask * task,gpointer opaque)25959de517dSDaniel P. Berrange static void qio_channel_socket_listen_worker(QIOTask *task,
260559607eaSDaniel P. Berrange                                              gpointer opaque)
261559607eaSDaniel P. Berrange {
262559607eaSDaniel P. Berrange     QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(qio_task_get_source(task));
2637959e29eSJuan Quintela     struct QIOChannelListenWorkerData *data = opaque;
26459de517dSDaniel P. Berrange     Error *err = NULL;
265559607eaSDaniel P. Berrange 
2667959e29eSJuan Quintela     qio_channel_socket_listen_sync(ioc, data->addr, data->num, &err);
267559607eaSDaniel P. Berrange 
26859de517dSDaniel P. Berrange     qio_task_set_error(task, err);
269559607eaSDaniel P. Berrange }
270559607eaSDaniel P. Berrange 
271559607eaSDaniel P. Berrange 
qio_channel_socket_listen_async(QIOChannelSocket * ioc,SocketAddress * addr,int num,QIOTaskFunc callback,gpointer opaque,GDestroyNotify destroy,GMainContext * context)272559607eaSDaniel P. Berrange void qio_channel_socket_listen_async(QIOChannelSocket *ioc,
273559607eaSDaniel P. Berrange                                      SocketAddress *addr,
2747959e29eSJuan Quintela                                      int num,
275559607eaSDaniel P. Berrange                                      QIOTaskFunc callback,
276559607eaSDaniel P. Berrange                                      gpointer opaque,
2778005fdd8SPeter Xu                                      GDestroyNotify destroy,
2788005fdd8SPeter Xu                                      GMainContext *context)
279559607eaSDaniel P. Berrange {
280559607eaSDaniel P. Berrange     QIOTask *task = qio_task_new(
281559607eaSDaniel P. Berrange         OBJECT(ioc), callback, opaque, destroy);
2827959e29eSJuan Quintela     struct QIOChannelListenWorkerData *data;
283559607eaSDaniel P. Berrange 
2847959e29eSJuan Quintela     data = g_new0(struct QIOChannelListenWorkerData, 1);
2857959e29eSJuan Quintela     data->addr = QAPI_CLONE(SocketAddress, addr);
2867959e29eSJuan Quintela     data->num = num;
287559607eaSDaniel P. Berrange 
288559607eaSDaniel P. Berrange     /* socket_listen() blocks in DNS lookups, so we must use a thread */
2897959e29eSJuan Quintela     trace_qio_channel_socket_listen_async(ioc, addr, num);
290559607eaSDaniel P. Berrange     qio_task_run_in_thread(task,
291559607eaSDaniel P. Berrange                            qio_channel_socket_listen_worker,
2927959e29eSJuan Quintela                            data,
2937959e29eSJuan Quintela                            qio_channel_listen_worker_free,
2948005fdd8SPeter Xu                            context);
295559607eaSDaniel P. Berrange }
296559607eaSDaniel P. Berrange 
297559607eaSDaniel P. Berrange 
qio_channel_socket_dgram_sync(QIOChannelSocket * ioc,SocketAddress * localAddr,SocketAddress * remoteAddr,Error ** errp)298559607eaSDaniel P. Berrange int qio_channel_socket_dgram_sync(QIOChannelSocket *ioc,
299559607eaSDaniel P. Berrange                                   SocketAddress *localAddr,
300559607eaSDaniel P. Berrange                                   SocketAddress *remoteAddr,
301559607eaSDaniel P. Berrange                                   Error **errp)
302559607eaSDaniel P. Berrange {
303559607eaSDaniel P. Berrange     int fd;
304559607eaSDaniel P. Berrange 
305559607eaSDaniel P. Berrange     trace_qio_channel_socket_dgram_sync(ioc, localAddr, remoteAddr);
306150dcd1aSPaolo Bonzini     fd = socket_dgram(remoteAddr, localAddr, errp);
307559607eaSDaniel P. Berrange     if (fd < 0) {
308559607eaSDaniel P. Berrange         trace_qio_channel_socket_dgram_fail(ioc);
309559607eaSDaniel P. Berrange         return -1;
310559607eaSDaniel P. Berrange     }
311559607eaSDaniel P. Berrange 
312559607eaSDaniel P. Berrange     trace_qio_channel_socket_dgram_complete(ioc, fd);
313559607eaSDaniel P. Berrange     if (qio_channel_socket_set_fd(ioc, fd, errp) < 0) {
31425657fc6SMarc-André Lureau         close(fd);
315559607eaSDaniel P. Berrange         return -1;
316559607eaSDaniel P. Berrange     }
317559607eaSDaniel P. Berrange 
318559607eaSDaniel P. Berrange     return 0;
319559607eaSDaniel P. Berrange }
320559607eaSDaniel P. Berrange 
321559607eaSDaniel P. Berrange 
322559607eaSDaniel P. Berrange struct QIOChannelSocketDGramWorkerData {
323559607eaSDaniel P. Berrange     SocketAddress *localAddr;
324559607eaSDaniel P. Berrange     SocketAddress *remoteAddr;
325559607eaSDaniel P. Berrange };
326559607eaSDaniel P. Berrange 
327559607eaSDaniel P. Berrange 
qio_channel_socket_dgram_worker_free(gpointer opaque)328559607eaSDaniel P. Berrange static void qio_channel_socket_dgram_worker_free(gpointer opaque)
329559607eaSDaniel P. Berrange {
330559607eaSDaniel P. Berrange     struct QIOChannelSocketDGramWorkerData *data = opaque;
331559607eaSDaniel P. Berrange     qapi_free_SocketAddress(data->localAddr);
332559607eaSDaniel P. Berrange     qapi_free_SocketAddress(data->remoteAddr);
333559607eaSDaniel P. Berrange     g_free(data);
334559607eaSDaniel P. Berrange }
335559607eaSDaniel P. Berrange 
qio_channel_socket_dgram_worker(QIOTask * task,gpointer opaque)33659de517dSDaniel P. Berrange static void qio_channel_socket_dgram_worker(QIOTask *task,
337559607eaSDaniel P. Berrange                                             gpointer opaque)
338559607eaSDaniel P. Berrange {
339559607eaSDaniel P. Berrange     QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(qio_task_get_source(task));
340559607eaSDaniel P. Berrange     struct QIOChannelSocketDGramWorkerData *data = opaque;
34159de517dSDaniel P. Berrange     Error *err = NULL;
342559607eaSDaniel P. Berrange 
343559607eaSDaniel P. Berrange     /* socket_dgram() blocks in DNS lookups, so we must use a thread */
34459de517dSDaniel P. Berrange     qio_channel_socket_dgram_sync(ioc, data->localAddr,
34559de517dSDaniel P. Berrange                                   data->remoteAddr, &err);
346559607eaSDaniel P. Berrange 
34759de517dSDaniel P. Berrange     qio_task_set_error(task, err);
348559607eaSDaniel P. Berrange }
349559607eaSDaniel P. Berrange 
350559607eaSDaniel P. Berrange 
qio_channel_socket_dgram_async(QIOChannelSocket * ioc,SocketAddress * localAddr,SocketAddress * remoteAddr,QIOTaskFunc callback,gpointer opaque,GDestroyNotify destroy,GMainContext * context)351559607eaSDaniel P. Berrange void qio_channel_socket_dgram_async(QIOChannelSocket *ioc,
352559607eaSDaniel P. Berrange                                     SocketAddress *localAddr,
353559607eaSDaniel P. Berrange                                     SocketAddress *remoteAddr,
354559607eaSDaniel P. Berrange                                     QIOTaskFunc callback,
355559607eaSDaniel P. Berrange                                     gpointer opaque,
3568005fdd8SPeter Xu                                     GDestroyNotify destroy,
3578005fdd8SPeter Xu                                     GMainContext *context)
358559607eaSDaniel P. Berrange {
359559607eaSDaniel P. Berrange     QIOTask *task = qio_task_new(
360559607eaSDaniel P. Berrange         OBJECT(ioc), callback, opaque, destroy);
361559607eaSDaniel P. Berrange     struct QIOChannelSocketDGramWorkerData *data = g_new0(
362559607eaSDaniel P. Berrange         struct QIOChannelSocketDGramWorkerData, 1);
363559607eaSDaniel P. Berrange 
36437f9e0a2SEric Blake     data->localAddr = QAPI_CLONE(SocketAddress, localAddr);
36537f9e0a2SEric Blake     data->remoteAddr = QAPI_CLONE(SocketAddress, remoteAddr);
366559607eaSDaniel P. Berrange 
367559607eaSDaniel P. Berrange     trace_qio_channel_socket_dgram_async(ioc, localAddr, remoteAddr);
368559607eaSDaniel P. Berrange     qio_task_run_in_thread(task,
369559607eaSDaniel P. Berrange                            qio_channel_socket_dgram_worker,
370559607eaSDaniel P. Berrange                            data,
371a17536c5SPeter Xu                            qio_channel_socket_dgram_worker_free,
3728005fdd8SPeter Xu                            context);
373559607eaSDaniel P. Berrange }
374559607eaSDaniel P. Berrange 
375559607eaSDaniel P. Berrange 
376559607eaSDaniel P. Berrange QIOChannelSocket *
qio_channel_socket_accept(QIOChannelSocket * ioc,Error ** errp)377559607eaSDaniel P. Berrange qio_channel_socket_accept(QIOChannelSocket *ioc,
378559607eaSDaniel P. Berrange                           Error **errp)
379559607eaSDaniel P. Berrange {
380559607eaSDaniel P. Berrange     QIOChannelSocket *cioc;
381559607eaSDaniel P. Berrange 
3820e5d6327SWang guang     cioc = qio_channel_socket_new();
383559607eaSDaniel P. Berrange     cioc->remoteAddrLen = sizeof(ioc->remoteAddr);
384559607eaSDaniel P. Berrange     cioc->localAddrLen = sizeof(ioc->localAddr);
385559607eaSDaniel P. Berrange 
386559607eaSDaniel P. Berrange  retry:
387559607eaSDaniel P. Berrange     trace_qio_channel_socket_accept(ioc);
388de7971ffSDaniel P. Berrange     cioc->fd = qemu_accept(ioc->fd, (struct sockaddr *)&cioc->remoteAddr,
389559607eaSDaniel P. Berrange                            &cioc->remoteAddrLen);
390559607eaSDaniel P. Berrange     if (cioc->fd < 0) {
391b16a44e1SDaniel P. Berrange         if (errno == EINTR) {
392559607eaSDaniel P. Berrange             goto retry;
393559607eaSDaniel P. Berrange         }
3948bd9c4e6SPeter Xu         error_setg_errno(errp, errno, "Unable to accept connection");
3958bd9c4e6SPeter Xu         trace_qio_channel_socket_accept_fail(ioc);
396559607eaSDaniel P. Berrange         goto error;
397559607eaSDaniel P. Berrange     }
398559607eaSDaniel P. Berrange 
399bead5994SDaniel P. Berrange     if (getsockname(cioc->fd, (struct sockaddr *)&cioc->localAddr,
400bead5994SDaniel P. Berrange                     &cioc->localAddrLen) < 0) {
401b16a44e1SDaniel P. Berrange         error_setg_errno(errp, errno,
402559607eaSDaniel P. Berrange                          "Unable to query local socket address");
403559607eaSDaniel P. Berrange         goto error;
404559607eaSDaniel P. Berrange     }
405559607eaSDaniel P. Berrange 
406bead5994SDaniel P. Berrange #ifndef WIN32
407bead5994SDaniel P. Berrange     if (cioc->localAddr.ss_family == AF_UNIX) {
408d8d3c7ccSFelipe Franciosi         QIOChannel *ioc_local = QIO_CHANNEL(cioc);
409d8d3c7ccSFelipe Franciosi         qio_channel_set_feature(ioc_local, QIO_CHANNEL_FEATURE_FD_PASS);
410bead5994SDaniel P. Berrange     }
411bead5994SDaniel P. Berrange #endif /* WIN32 */
412bead5994SDaniel P. Berrange 
41384615a19Smanish.mishra     qio_channel_set_feature(QIO_CHANNEL(cioc),
41484615a19Smanish.mishra                             QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
41584615a19Smanish.mishra 
416559607eaSDaniel P. Berrange     trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd);
417559607eaSDaniel P. Berrange     return cioc;
418559607eaSDaniel P. Berrange 
419559607eaSDaniel P. Berrange  error:
420559607eaSDaniel P. Berrange     object_unref(OBJECT(cioc));
421559607eaSDaniel P. Berrange     return NULL;
422559607eaSDaniel P. Berrange }
423559607eaSDaniel P. Berrange 
qio_channel_socket_init(Object * obj)424559607eaSDaniel P. Berrange static void qio_channel_socket_init(Object *obj)
425559607eaSDaniel P. Berrange {
426559607eaSDaniel P. Berrange     QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj);
427559607eaSDaniel P. Berrange     ioc->fd = -1;
428559607eaSDaniel P. Berrange }
429559607eaSDaniel P. Berrange 
qio_channel_socket_finalize(Object * obj)430559607eaSDaniel P. Berrange static void qio_channel_socket_finalize(Object *obj)
431559607eaSDaniel P. Berrange {
432559607eaSDaniel P. Berrange     QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj);
43374b6ce43SMarc-André Lureau 
434559607eaSDaniel P. Berrange     if (ioc->fd != -1) {
435e413ae0cSFelipe Franciosi         QIOChannel *ioc_local = QIO_CHANNEL(ioc);
436e413ae0cSFelipe Franciosi         if (qio_channel_has_feature(ioc_local, QIO_CHANNEL_FEATURE_LISTEN)) {
43774b6ce43SMarc-André Lureau             Error *err = NULL;
43874b6ce43SMarc-André Lureau 
43974b6ce43SMarc-André Lureau             socket_listen_cleanup(ioc->fd, &err);
44074b6ce43SMarc-André Lureau             if (err) {
44174b6ce43SMarc-André Lureau                 error_report_err(err);
44274b6ce43SMarc-André Lureau                 err = NULL;
44374b6ce43SMarc-André Lureau             }
44474b6ce43SMarc-André Lureau         }
445a5897205SPaolo Bonzini #ifdef WIN32
446a4aafea2SMarc-André Lureau         qemu_socket_unselect(ioc->fd, NULL);
447a5897205SPaolo Bonzini #endif
44825657fc6SMarc-André Lureau         close(ioc->fd);
449559607eaSDaniel P. Berrange         ioc->fd = -1;
450559607eaSDaniel P. Berrange     }
451559607eaSDaniel P. Berrange }
452559607eaSDaniel P. Berrange 
453559607eaSDaniel P. Berrange 
454559607eaSDaniel P. Berrange #ifndef WIN32
qio_channel_socket_copy_fds(struct msghdr * msg,int ** fds,size_t * nfds)455559607eaSDaniel P. Berrange static void qio_channel_socket_copy_fds(struct msghdr *msg,
456559607eaSDaniel P. Berrange                                         int **fds, size_t *nfds)
457559607eaSDaniel P. Berrange {
458559607eaSDaniel P. Berrange     struct cmsghdr *cmsg;
459559607eaSDaniel P. Berrange 
460559607eaSDaniel P. Berrange     *nfds = 0;
461559607eaSDaniel P. Berrange     *fds = NULL;
462559607eaSDaniel P. Berrange 
463559607eaSDaniel P. Berrange     for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
464559607eaSDaniel P. Berrange         int fd_size, i;
465559607eaSDaniel P. Berrange         int gotfds;
466559607eaSDaniel P. Berrange 
467559607eaSDaniel P. Berrange         if (cmsg->cmsg_len < CMSG_LEN(sizeof(int)) ||
468559607eaSDaniel P. Berrange             cmsg->cmsg_level != SOL_SOCKET ||
469559607eaSDaniel P. Berrange             cmsg->cmsg_type != SCM_RIGHTS) {
470559607eaSDaniel P. Berrange             continue;
471559607eaSDaniel P. Berrange         }
472559607eaSDaniel P. Berrange 
473559607eaSDaniel P. Berrange         fd_size = cmsg->cmsg_len - CMSG_LEN(0);
474559607eaSDaniel P. Berrange 
475559607eaSDaniel P. Berrange         if (!fd_size) {
476559607eaSDaniel P. Berrange             continue;
477559607eaSDaniel P. Berrange         }
478559607eaSDaniel P. Berrange 
479559607eaSDaniel P. Berrange         gotfds = fd_size / sizeof(int);
480559607eaSDaniel P. Berrange         *fds = g_renew(int, *fds, *nfds + gotfds);
481559607eaSDaniel P. Berrange         memcpy(*fds + *nfds, CMSG_DATA(cmsg), fd_size);
482559607eaSDaniel P. Berrange 
483559607eaSDaniel P. Berrange         for (i = 0; i < gotfds; i++) {
484559607eaSDaniel P. Berrange             int fd = (*fds)[*nfds + i];
485559607eaSDaniel P. Berrange             if (fd < 0) {
486559607eaSDaniel P. Berrange                 continue;
487559607eaSDaniel P. Berrange             }
488559607eaSDaniel P. Berrange 
489559607eaSDaniel P. Berrange             /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */
490ff5927baSMarc-André Lureau             qemu_socket_set_block(fd);
491559607eaSDaniel P. Berrange 
492559607eaSDaniel P. Berrange #ifndef MSG_CMSG_CLOEXEC
493559607eaSDaniel P. Berrange             qemu_set_cloexec(fd);
494559607eaSDaniel P. Berrange #endif
495559607eaSDaniel P. Berrange         }
496559607eaSDaniel P. Berrange         *nfds += gotfds;
497559607eaSDaniel P. Berrange     }
498559607eaSDaniel P. Berrange }
499559607eaSDaniel P. Berrange 
500559607eaSDaniel P. Berrange 
qio_channel_socket_readv(QIOChannel * ioc,const struct iovec * iov,size_t niov,int ** fds,size_t * nfds,int flags,Error ** errp)501559607eaSDaniel P. Berrange static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
502559607eaSDaniel P. Berrange                                         const struct iovec *iov,
503559607eaSDaniel P. Berrange                                         size_t niov,
504559607eaSDaniel P. Berrange                                         int **fds,
505559607eaSDaniel P. Berrange                                         size_t *nfds,
50684615a19Smanish.mishra                                         int flags,
507559607eaSDaniel P. Berrange                                         Error **errp)
508559607eaSDaniel P. Berrange {
509559607eaSDaniel P. Berrange     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
510559607eaSDaniel P. Berrange     ssize_t ret;
511559607eaSDaniel P. Berrange     struct msghdr msg = { NULL, };
512559607eaSDaniel P. Berrange     char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
513559607eaSDaniel P. Berrange     int sflags = 0;
514559607eaSDaniel P. Berrange 
515ccf1e2dcSDaniel P. Berrange     memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
516ccf1e2dcSDaniel P. Berrange 
517559607eaSDaniel P. Berrange     msg.msg_iov = (struct iovec *)iov;
518559607eaSDaniel P. Berrange     msg.msg_iovlen = niov;
519559607eaSDaniel P. Berrange     if (fds && nfds) {
520559607eaSDaniel P. Berrange         msg.msg_control = control;
521559607eaSDaniel P. Berrange         msg.msg_controllen = sizeof(control);
522d80f54ceSDr. David Alan Gilbert #ifdef MSG_CMSG_CLOEXEC
523d80f54ceSDr. David Alan Gilbert         sflags |= MSG_CMSG_CLOEXEC;
524d80f54ceSDr. David Alan Gilbert #endif
525d80f54ceSDr. David Alan Gilbert 
526559607eaSDaniel P. Berrange     }
527559607eaSDaniel P. Berrange 
52884615a19Smanish.mishra     if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
52984615a19Smanish.mishra         sflags |= MSG_PEEK;
53084615a19Smanish.mishra     }
53184615a19Smanish.mishra 
532559607eaSDaniel P. Berrange  retry:
533559607eaSDaniel P. Berrange     ret = recvmsg(sioc->fd, &msg, sflags);
534559607eaSDaniel P. Berrange     if (ret < 0) {
535b16a44e1SDaniel P. Berrange         if (errno == EAGAIN) {
536559607eaSDaniel P. Berrange             return QIO_CHANNEL_ERR_BLOCK;
537559607eaSDaniel P. Berrange         }
538b16a44e1SDaniel P. Berrange         if (errno == EINTR) {
539559607eaSDaniel P. Berrange             goto retry;
540559607eaSDaniel P. Berrange         }
541559607eaSDaniel P. Berrange 
542b16a44e1SDaniel P. Berrange         error_setg_errno(errp, errno,
543559607eaSDaniel P. Berrange                          "Unable to read from socket");
544559607eaSDaniel P. Berrange         return -1;
545559607eaSDaniel P. Berrange     }
546559607eaSDaniel P. Berrange 
547559607eaSDaniel P. Berrange     if (fds && nfds) {
548559607eaSDaniel P. Berrange         qio_channel_socket_copy_fds(&msg, fds, nfds);
549559607eaSDaniel P. Berrange     }
550559607eaSDaniel P. Berrange 
551559607eaSDaniel P. Berrange     return ret;
552559607eaSDaniel P. Berrange }
553559607eaSDaniel P. Berrange 
qio_channel_socket_writev(QIOChannel * ioc,const struct iovec * iov,size_t niov,int * fds,size_t nfds,int flags,Error ** errp)554559607eaSDaniel P. Berrange static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
555559607eaSDaniel P. Berrange                                          const struct iovec *iov,
556559607eaSDaniel P. Berrange                                          size_t niov,
557559607eaSDaniel P. Berrange                                          int *fds,
558559607eaSDaniel P. Berrange                                          size_t nfds,
559b88651cbSLeonardo Bras                                          int flags,
560559607eaSDaniel P. Berrange                                          Error **errp)
561559607eaSDaniel P. Berrange {
562559607eaSDaniel P. Berrange     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
563559607eaSDaniel P. Berrange     ssize_t ret;
564559607eaSDaniel P. Berrange     struct msghdr msg = { NULL, };
565ccf1e2dcSDaniel P. Berrange     char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
5667b3c618aSDaniel P. Berrange     size_t fdsize = sizeof(int) * nfds;
5677b3c618aSDaniel P. Berrange     struct cmsghdr *cmsg;
5682bc58ffcSLeonardo Bras     int sflags = 0;
569559607eaSDaniel P. Berrange 
570ccf1e2dcSDaniel P. Berrange     memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
571ccf1e2dcSDaniel P. Berrange 
572559607eaSDaniel P. Berrange     msg.msg_iov = (struct iovec *)iov;
573559607eaSDaniel P. Berrange     msg.msg_iovlen = niov;
574559607eaSDaniel P. Berrange 
575559607eaSDaniel P. Berrange     if (nfds) {
576559607eaSDaniel P. Berrange         if (nfds > SOCKET_MAX_FDS) {
577cc75a50cSDaniel P. Berrange             error_setg_errno(errp, EINVAL,
578559607eaSDaniel P. Berrange                              "Only %d FDs can be sent, got %zu",
579559607eaSDaniel P. Berrange                              SOCKET_MAX_FDS, nfds);
580559607eaSDaniel P. Berrange             return -1;
581559607eaSDaniel P. Berrange         }
582559607eaSDaniel P. Berrange 
583559607eaSDaniel P. Berrange         msg.msg_control = control;
584559607eaSDaniel P. Berrange         msg.msg_controllen = CMSG_SPACE(sizeof(int) * nfds);
585559607eaSDaniel P. Berrange 
586559607eaSDaniel P. Berrange         cmsg = CMSG_FIRSTHDR(&msg);
587559607eaSDaniel P. Berrange         cmsg->cmsg_len = CMSG_LEN(fdsize);
588559607eaSDaniel P. Berrange         cmsg->cmsg_level = SOL_SOCKET;
589559607eaSDaniel P. Berrange         cmsg->cmsg_type = SCM_RIGHTS;
590559607eaSDaniel P. Berrange         memcpy(CMSG_DATA(cmsg), fds, fdsize);
591559607eaSDaniel P. Berrange     }
592559607eaSDaniel P. Berrange 
5932bc58ffcSLeonardo Bras     if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
594803ca43eSLeonardo Bras #ifdef QEMU_MSG_ZEROCOPY
5952bc58ffcSLeonardo Bras         sflags = MSG_ZEROCOPY;
596803ca43eSLeonardo Bras #else
597803ca43eSLeonardo Bras         /*
598803ca43eSLeonardo Bras          * We expect QIOChannel class entry point to have
599803ca43eSLeonardo Bras          * blocked this code path already
600803ca43eSLeonardo Bras          */
601803ca43eSLeonardo Bras         g_assert_not_reached();
6022bc58ffcSLeonardo Bras #endif
603803ca43eSLeonardo Bras     }
6042bc58ffcSLeonardo Bras 
605559607eaSDaniel P. Berrange  retry:
6062bc58ffcSLeonardo Bras     ret = sendmsg(sioc->fd, &msg, sflags);
607559607eaSDaniel P. Berrange     if (ret <= 0) {
6082bc58ffcSLeonardo Bras         switch (errno) {
6092bc58ffcSLeonardo Bras         case EAGAIN:
610559607eaSDaniel P. Berrange             return QIO_CHANNEL_ERR_BLOCK;
6112bc58ffcSLeonardo Bras         case EINTR:
612559607eaSDaniel P. Berrange             goto retry;
6132bc58ffcSLeonardo Bras         case ENOBUFS:
614803ca43eSLeonardo Bras             if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
6152bc58ffcSLeonardo Bras                 error_setg_errno(errp, errno,
6162bc58ffcSLeonardo Bras                                  "Process can't lock enough memory for using MSG_ZEROCOPY");
6172bc58ffcSLeonardo Bras                 return -1;
618559607eaSDaniel P. Berrange             }
6192bc58ffcSLeonardo Bras             break;
6202bc58ffcSLeonardo Bras         }
6212bc58ffcSLeonardo Bras 
622b16a44e1SDaniel P. Berrange         error_setg_errno(errp, errno,
623559607eaSDaniel P. Berrange                          "Unable to write to socket");
624559607eaSDaniel P. Berrange         return -1;
625559607eaSDaniel P. Berrange     }
6264f5a0971SLeonardo Bras 
6274f5a0971SLeonardo Bras     if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
6284f5a0971SLeonardo Bras         sioc->zero_copy_queued++;
6294f5a0971SLeonardo Bras     }
6304f5a0971SLeonardo Bras 
631559607eaSDaniel P. Berrange     return ret;
632559607eaSDaniel P. Berrange }
633559607eaSDaniel P. Berrange #else /* WIN32 */
qio_channel_socket_readv(QIOChannel * ioc,const struct iovec * iov,size_t niov,int ** fds,size_t * nfds,int flags,Error ** errp)634559607eaSDaniel P. Berrange static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
635559607eaSDaniel P. Berrange                                         const struct iovec *iov,
636559607eaSDaniel P. Berrange                                         size_t niov,
637559607eaSDaniel P. Berrange                                         int **fds,
638559607eaSDaniel P. Berrange                                         size_t *nfds,
63984615a19Smanish.mishra                                         int flags,
640559607eaSDaniel P. Berrange                                         Error **errp)
641559607eaSDaniel P. Berrange {
642559607eaSDaniel P. Berrange     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
643559607eaSDaniel P. Berrange     ssize_t done = 0;
644559607eaSDaniel P. Berrange     ssize_t i;
64584615a19Smanish.mishra     int sflags = 0;
64684615a19Smanish.mishra 
64784615a19Smanish.mishra     if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
64884615a19Smanish.mishra         sflags |= MSG_PEEK;
64984615a19Smanish.mishra     }
650559607eaSDaniel P. Berrange 
651559607eaSDaniel P. Berrange     for (i = 0; i < niov; i++) {
652559607eaSDaniel P. Berrange         ssize_t ret;
653559607eaSDaniel P. Berrange     retry:
654559607eaSDaniel P. Berrange         ret = recv(sioc->fd,
655559607eaSDaniel P. Berrange                    iov[i].iov_base,
656559607eaSDaniel P. Berrange                    iov[i].iov_len,
65784615a19Smanish.mishra                    sflags);
658559607eaSDaniel P. Berrange         if (ret < 0) {
659b16a44e1SDaniel P. Berrange             if (errno == EAGAIN) {
660559607eaSDaniel P. Berrange                 if (done) {
661559607eaSDaniel P. Berrange                     return done;
662559607eaSDaniel P. Berrange                 } else {
663559607eaSDaniel P. Berrange                     return QIO_CHANNEL_ERR_BLOCK;
664559607eaSDaniel P. Berrange                 }
665b16a44e1SDaniel P. Berrange             } else if (errno == EINTR) {
666559607eaSDaniel P. Berrange                 goto retry;
667559607eaSDaniel P. Berrange             } else {
668b16a44e1SDaniel P. Berrange                 error_setg_errno(errp, errno,
6695151d23eSDaniel P. Berrange                                  "Unable to read from socket");
670559607eaSDaniel P. Berrange                 return -1;
671559607eaSDaniel P. Berrange             }
672559607eaSDaniel P. Berrange         }
673559607eaSDaniel P. Berrange         done += ret;
674559607eaSDaniel P. Berrange         if (ret < iov[i].iov_len) {
675559607eaSDaniel P. Berrange             return done;
676559607eaSDaniel P. Berrange         }
677559607eaSDaniel P. Berrange     }
678559607eaSDaniel P. Berrange 
679559607eaSDaniel P. Berrange     return done;
680559607eaSDaniel P. Berrange }
681559607eaSDaniel P. Berrange 
qio_channel_socket_writev(QIOChannel * ioc,const struct iovec * iov,size_t niov,int * fds,size_t nfds,int flags,Error ** errp)682559607eaSDaniel P. Berrange static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
683559607eaSDaniel P. Berrange                                          const struct iovec *iov,
684559607eaSDaniel P. Berrange                                          size_t niov,
685559607eaSDaniel P. Berrange                                          int *fds,
686559607eaSDaniel P. Berrange                                          size_t nfds,
687b88651cbSLeonardo Bras                                          int flags,
688559607eaSDaniel P. Berrange                                          Error **errp)
689559607eaSDaniel P. Berrange {
690559607eaSDaniel P. Berrange     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
691559607eaSDaniel P. Berrange     ssize_t done = 0;
692559607eaSDaniel P. Berrange     ssize_t i;
693559607eaSDaniel P. Berrange 
694559607eaSDaniel P. Berrange     for (i = 0; i < niov; i++) {
695559607eaSDaniel P. Berrange         ssize_t ret;
696559607eaSDaniel P. Berrange     retry:
697559607eaSDaniel P. Berrange         ret = send(sioc->fd,
698559607eaSDaniel P. Berrange                    iov[i].iov_base,
699559607eaSDaniel P. Berrange                    iov[i].iov_len,
700559607eaSDaniel P. Berrange                    0);
701559607eaSDaniel P. Berrange         if (ret < 0) {
702b16a44e1SDaniel P. Berrange             if (errno == EAGAIN) {
703559607eaSDaniel P. Berrange                 if (done) {
704559607eaSDaniel P. Berrange                     return done;
705559607eaSDaniel P. Berrange                 } else {
706559607eaSDaniel P. Berrange                     return QIO_CHANNEL_ERR_BLOCK;
707559607eaSDaniel P. Berrange                 }
708b16a44e1SDaniel P. Berrange             } else if (errno == EINTR) {
709559607eaSDaniel P. Berrange                 goto retry;
710559607eaSDaniel P. Berrange             } else {
711b16a44e1SDaniel P. Berrange                 error_setg_errno(errp, errno,
712559607eaSDaniel P. Berrange                                  "Unable to write to socket");
713559607eaSDaniel P. Berrange                 return -1;
714559607eaSDaniel P. Berrange             }
715559607eaSDaniel P. Berrange         }
716559607eaSDaniel P. Berrange         done += ret;
717559607eaSDaniel P. Berrange         if (ret < iov[i].iov_len) {
718559607eaSDaniel P. Berrange             return done;
719559607eaSDaniel P. Berrange         }
720559607eaSDaniel P. Berrange     }
721559607eaSDaniel P. Berrange 
722559607eaSDaniel P. Berrange     return done;
723559607eaSDaniel P. Berrange }
724559607eaSDaniel P. Berrange #endif /* WIN32 */
725559607eaSDaniel P. Berrange 
7262bc58ffcSLeonardo Bras 
7272bc58ffcSLeonardo Bras #ifdef QEMU_MSG_ZEROCOPY
qio_channel_socket_flush(QIOChannel * ioc,Error ** errp)7282bc58ffcSLeonardo Bras static int qio_channel_socket_flush(QIOChannel *ioc,
7292bc58ffcSLeonardo Bras                                     Error **errp)
7302bc58ffcSLeonardo Bras {
7312bc58ffcSLeonardo Bras     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
7322bc58ffcSLeonardo Bras     struct msghdr msg = {};
7332bc58ffcSLeonardo Bras     struct sock_extended_err *serr;
7342bc58ffcSLeonardo Bras     struct cmsghdr *cm;
7352bc58ffcSLeonardo Bras     char control[CMSG_SPACE(sizeof(*serr))];
7362bc58ffcSLeonardo Bras     int received;
737927f93e0SLeonardo Bras     int ret;
738927f93e0SLeonardo Bras 
739927f93e0SLeonardo Bras     if (sioc->zero_copy_queued == sioc->zero_copy_sent) {
740927f93e0SLeonardo Bras         return 0;
741927f93e0SLeonardo Bras     }
7422bc58ffcSLeonardo Bras 
7432bc58ffcSLeonardo Bras     msg.msg_control = control;
7442bc58ffcSLeonardo Bras     msg.msg_controllen = sizeof(control);
7452bc58ffcSLeonardo Bras     memset(control, 0, sizeof(control));
7462bc58ffcSLeonardo Bras 
747927f93e0SLeonardo Bras     ret = 1;
748927f93e0SLeonardo Bras 
7492bc58ffcSLeonardo Bras     while (sioc->zero_copy_sent < sioc->zero_copy_queued) {
7502bc58ffcSLeonardo Bras         received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE);
7512bc58ffcSLeonardo Bras         if (received < 0) {
7522bc58ffcSLeonardo Bras             switch (errno) {
7532bc58ffcSLeonardo Bras             case EAGAIN:
7542bc58ffcSLeonardo Bras                 /* Nothing on errqueue, wait until something is available */
7552bc58ffcSLeonardo Bras                 qio_channel_wait(ioc, G_IO_ERR);
7562bc58ffcSLeonardo Bras                 continue;
7572bc58ffcSLeonardo Bras             case EINTR:
7582bc58ffcSLeonardo Bras                 continue;
7592bc58ffcSLeonardo Bras             default:
7602bc58ffcSLeonardo Bras                 error_setg_errno(errp, errno,
7612bc58ffcSLeonardo Bras                                  "Unable to read errqueue");
7622bc58ffcSLeonardo Bras                 return -1;
7632bc58ffcSLeonardo Bras             }
7642bc58ffcSLeonardo Bras         }
7652bc58ffcSLeonardo Bras 
7662bc58ffcSLeonardo Bras         cm = CMSG_FIRSTHDR(&msg);
7675258a7e2SLeonardo Bras         if (cm->cmsg_level != SOL_IP   && cm->cmsg_type != IP_RECVERR &&
7685258a7e2SLeonardo Bras             cm->cmsg_level != SOL_IPV6 && cm->cmsg_type != IPV6_RECVERR) {
7692bc58ffcSLeonardo Bras             error_setg_errno(errp, EPROTOTYPE,
7702bc58ffcSLeonardo Bras                              "Wrong cmsg in errqueue");
7712bc58ffcSLeonardo Bras             return -1;
7722bc58ffcSLeonardo Bras         }
7732bc58ffcSLeonardo Bras 
7742bc58ffcSLeonardo Bras         serr = (void *) CMSG_DATA(cm);
7752bc58ffcSLeonardo Bras         if (serr->ee_errno != SO_EE_ORIGIN_NONE) {
7762bc58ffcSLeonardo Bras             error_setg_errno(errp, serr->ee_errno,
7772bc58ffcSLeonardo Bras                              "Error on socket");
7782bc58ffcSLeonardo Bras             return -1;
7792bc58ffcSLeonardo Bras         }
7802bc58ffcSLeonardo Bras         if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
7812bc58ffcSLeonardo Bras             error_setg_errno(errp, serr->ee_origin,
7822bc58ffcSLeonardo Bras                              "Error not from zero copy");
7832bc58ffcSLeonardo Bras             return -1;
7842bc58ffcSLeonardo Bras         }
78535bafa95SVladimir Sementsov-Ogievskiy         if (serr->ee_data < serr->ee_info) {
78635bafa95SVladimir Sementsov-Ogievskiy             error_setg_errno(errp, serr->ee_origin,
78735bafa95SVladimir Sementsov-Ogievskiy                              "Wrong notification bounds");
78835bafa95SVladimir Sementsov-Ogievskiy             return -1;
78935bafa95SVladimir Sementsov-Ogievskiy         }
7902bc58ffcSLeonardo Bras 
7912bc58ffcSLeonardo Bras         /* No errors, count successfully finished sendmsg()*/
7922bc58ffcSLeonardo Bras         sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1;
7932bc58ffcSLeonardo Bras 
7942bc58ffcSLeonardo Bras         /* If any sendmsg() succeeded using zero copy, return 0 at the end */
7952bc58ffcSLeonardo Bras         if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) {
7962bc58ffcSLeonardo Bras             ret = 0;
7972bc58ffcSLeonardo Bras         }
7982bc58ffcSLeonardo Bras     }
7992bc58ffcSLeonardo Bras 
8002bc58ffcSLeonardo Bras     return ret;
8012bc58ffcSLeonardo Bras }
8022bc58ffcSLeonardo Bras 
8032bc58ffcSLeonardo Bras #endif /* QEMU_MSG_ZEROCOPY */
8042bc58ffcSLeonardo Bras 
805559607eaSDaniel P. Berrange static int
qio_channel_socket_set_blocking(QIOChannel * ioc,bool enabled,Error ** errp)806559607eaSDaniel P. Berrange qio_channel_socket_set_blocking(QIOChannel *ioc,
807559607eaSDaniel P. Berrange                                 bool enabled,
808559607eaSDaniel P. Berrange                                 Error **errp)
809559607eaSDaniel P. Berrange {
810559607eaSDaniel P. Berrange     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
811559607eaSDaniel P. Berrange 
812559607eaSDaniel P. Berrange     if (enabled) {
813ff5927baSMarc-André Lureau         qemu_socket_set_block(sioc->fd);
814559607eaSDaniel P. Berrange     } else {
815ff5927baSMarc-André Lureau         qemu_socket_set_nonblock(sioc->fd);
816559607eaSDaniel P. Berrange     }
817559607eaSDaniel P. Berrange     return 0;
818559607eaSDaniel P. Berrange }
819559607eaSDaniel P. Berrange 
820559607eaSDaniel P. Berrange 
821559607eaSDaniel P. Berrange static void
qio_channel_socket_set_delay(QIOChannel * ioc,bool enabled)822559607eaSDaniel P. Berrange qio_channel_socket_set_delay(QIOChannel *ioc,
823559607eaSDaniel P. Berrange                              bool enabled)
824559607eaSDaniel P. Berrange {
825559607eaSDaniel P. Berrange     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
826559607eaSDaniel P. Berrange     int v = enabled ? 0 : 1;
827559607eaSDaniel P. Berrange 
828e7b79428SMarc-André Lureau     setsockopt(sioc->fd,
829559607eaSDaniel P. Berrange                IPPROTO_TCP, TCP_NODELAY,
830559607eaSDaniel P. Berrange                &v, sizeof(v));
831559607eaSDaniel P. Berrange }
832559607eaSDaniel P. Berrange 
833559607eaSDaniel P. Berrange 
834559607eaSDaniel P. Berrange static void
qio_channel_socket_set_cork(QIOChannel * ioc,bool enabled)835559607eaSDaniel P. Berrange qio_channel_socket_set_cork(QIOChannel *ioc,
836559607eaSDaniel P. Berrange                             bool enabled)
837559607eaSDaniel P. Berrange {
838559607eaSDaniel P. Berrange     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
839559607eaSDaniel P. Berrange     int v = enabled ? 1 : 0;
840559607eaSDaniel P. Berrange 
841559607eaSDaniel P. Berrange     socket_set_cork(sioc->fd, v);
842559607eaSDaniel P. Berrange }
843559607eaSDaniel P. Berrange 
844*95fa0c79SAnthony Harivel static int
qio_channel_socket_get_peerpid(QIOChannel * ioc,unsigned int * pid,Error ** errp)845*95fa0c79SAnthony Harivel qio_channel_socket_get_peerpid(QIOChannel *ioc,
846*95fa0c79SAnthony Harivel                                unsigned int *pid,
847*95fa0c79SAnthony Harivel                                Error **errp)
848*95fa0c79SAnthony Harivel {
849*95fa0c79SAnthony Harivel #ifdef CONFIG_LINUX
850*95fa0c79SAnthony Harivel     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
851*95fa0c79SAnthony Harivel     Error *err = NULL;
852*95fa0c79SAnthony Harivel     socklen_t len = sizeof(struct ucred);
853*95fa0c79SAnthony Harivel 
854*95fa0c79SAnthony Harivel     struct ucred cred;
855*95fa0c79SAnthony Harivel     if (getsockopt(sioc->fd,
856*95fa0c79SAnthony Harivel                SOL_SOCKET, SO_PEERCRED,
857*95fa0c79SAnthony Harivel                &cred, &len) == -1) {
858*95fa0c79SAnthony Harivel         error_setg_errno(&err, errno, "Unable to get peer credentials");
859*95fa0c79SAnthony Harivel         error_propagate(errp, err);
860*95fa0c79SAnthony Harivel         *pid = -1;
861*95fa0c79SAnthony Harivel         return -1;
862*95fa0c79SAnthony Harivel     }
863*95fa0c79SAnthony Harivel     *pid = (unsigned int)cred.pid;
864*95fa0c79SAnthony Harivel     return 0;
865*95fa0c79SAnthony Harivel #else
866*95fa0c79SAnthony Harivel     error_setg(errp, "Unsupported feature");
867*95fa0c79SAnthony Harivel     *pid = -1;
868*95fa0c79SAnthony Harivel     return -1;
869*95fa0c79SAnthony Harivel #endif
870*95fa0c79SAnthony Harivel }
871559607eaSDaniel P. Berrange 
872559607eaSDaniel P. Berrange static int
qio_channel_socket_close(QIOChannel * ioc,Error ** errp)873559607eaSDaniel P. Berrange qio_channel_socket_close(QIOChannel *ioc,
874559607eaSDaniel P. Berrange                          Error **errp)
875559607eaSDaniel P. Berrange {
876559607eaSDaniel P. Berrange     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
877d66f78e1SPavel Balaev     int rc = 0;
878fdceb4abSMarkus Armbruster     Error *err = NULL;
879559607eaSDaniel P. Berrange 
880a5897205SPaolo Bonzini     if (sioc->fd != -1) {
881a5897205SPaolo Bonzini #ifdef WIN32
882a4aafea2SMarc-André Lureau         qemu_socket_unselect(sioc->fd, NULL);
883a5897205SPaolo Bonzini #endif
88473564c40SDaniel P. Berrangé         if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_LISTEN)) {
88573564c40SDaniel P. Berrangé             socket_listen_cleanup(sioc->fd, errp);
88673564c40SDaniel P. Berrangé         }
88773564c40SDaniel P. Berrangé 
88825657fc6SMarc-André Lureau         if (close(sioc->fd) < 0) {
889559607eaSDaniel P. Berrange             sioc->fd = -1;
890fdceb4abSMarkus Armbruster             error_setg_errno(&err, errno, "Unable to close socket");
891fdceb4abSMarkus Armbruster             error_propagate(errp, err);
892559607eaSDaniel P. Berrange             return -1;
893559607eaSDaniel P. Berrange         }
894559607eaSDaniel P. Berrange         sioc->fd = -1;
895d66f78e1SPavel Balaev     }
896d66f78e1SPavel Balaev     return rc;
897559607eaSDaniel P. Berrange }
898559607eaSDaniel P. Berrange 
899559607eaSDaniel P. Berrange static int
qio_channel_socket_shutdown(QIOChannel * ioc,QIOChannelShutdown how,Error ** errp)900559607eaSDaniel P. Berrange qio_channel_socket_shutdown(QIOChannel *ioc,
901559607eaSDaniel P. Berrange                             QIOChannelShutdown how,
902559607eaSDaniel P. Berrange                             Error **errp)
903559607eaSDaniel P. Berrange {
904559607eaSDaniel P. Berrange     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
905559607eaSDaniel P. Berrange     int sockhow;
906559607eaSDaniel P. Berrange 
907559607eaSDaniel P. Berrange     switch (how) {
908559607eaSDaniel P. Berrange     case QIO_CHANNEL_SHUTDOWN_READ:
909559607eaSDaniel P. Berrange         sockhow = SHUT_RD;
910559607eaSDaniel P. Berrange         break;
911559607eaSDaniel P. Berrange     case QIO_CHANNEL_SHUTDOWN_WRITE:
912559607eaSDaniel P. Berrange         sockhow = SHUT_WR;
913559607eaSDaniel P. Berrange         break;
914559607eaSDaniel P. Berrange     case QIO_CHANNEL_SHUTDOWN_BOTH:
915559607eaSDaniel P. Berrange     default:
916559607eaSDaniel P. Berrange         sockhow = SHUT_RDWR;
917559607eaSDaniel P. Berrange         break;
918559607eaSDaniel P. Berrange     }
919559607eaSDaniel P. Berrange 
920559607eaSDaniel P. Berrange     if (shutdown(sioc->fd, sockhow) < 0) {
921b16a44e1SDaniel P. Berrange         error_setg_errno(errp, errno,
922559607eaSDaniel P. Berrange                          "Unable to shutdown socket");
923559607eaSDaniel P. Berrange         return -1;
924559607eaSDaniel P. Berrange     }
925559607eaSDaniel P. Berrange     return 0;
926559607eaSDaniel P. Berrange }
927559607eaSDaniel P. Berrange 
qio_channel_socket_set_aio_fd_handler(QIOChannel * ioc,AioContext * read_ctx,IOHandler * io_read,AioContext * write_ctx,IOHandler * io_write,void * opaque)928bf88c124SPaolo Bonzini static void qio_channel_socket_set_aio_fd_handler(QIOChannel *ioc,
92906e0f098SStefan Hajnoczi                                                   AioContext *read_ctx,
930bf88c124SPaolo Bonzini                                                   IOHandler *io_read,
93106e0f098SStefan Hajnoczi                                                   AioContext *write_ctx,
932bf88c124SPaolo Bonzini                                                   IOHandler *io_write,
933bf88c124SPaolo Bonzini                                                   void *opaque)
934bf88c124SPaolo Bonzini {
935bf88c124SPaolo Bonzini     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
93606e0f098SStefan Hajnoczi 
93706e0f098SStefan Hajnoczi     qio_channel_util_set_aio_fd_handler(sioc->fd, read_ctx, io_read,
93806e0f098SStefan Hajnoczi                                         sioc->fd, write_ctx, io_write,
93906e0f098SStefan Hajnoczi                                         opaque);
940bf88c124SPaolo Bonzini }
941bf88c124SPaolo Bonzini 
qio_channel_socket_create_watch(QIOChannel * ioc,GIOCondition condition)942559607eaSDaniel P. Berrange static GSource *qio_channel_socket_create_watch(QIOChannel *ioc,
943559607eaSDaniel P. Berrange                                                 GIOCondition condition)
944559607eaSDaniel P. Berrange {
945559607eaSDaniel P. Berrange     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
946b83b68a0SPaolo Bonzini     return qio_channel_create_socket_watch(ioc,
947559607eaSDaniel P. Berrange                                            sioc->fd,
948559607eaSDaniel P. Berrange                                            condition);
949559607eaSDaniel P. Berrange }
950559607eaSDaniel P. Berrange 
qio_channel_socket_class_init(ObjectClass * klass,void * class_data G_GNUC_UNUSED)951559607eaSDaniel P. Berrange static void qio_channel_socket_class_init(ObjectClass *klass,
952559607eaSDaniel P. Berrange                                           void *class_data G_GNUC_UNUSED)
953559607eaSDaniel P. Berrange {
954559607eaSDaniel P. Berrange     QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
955559607eaSDaniel P. Berrange 
956559607eaSDaniel P. Berrange     ioc_klass->io_writev = qio_channel_socket_writev;
957559607eaSDaniel P. Berrange     ioc_klass->io_readv = qio_channel_socket_readv;
958559607eaSDaniel P. Berrange     ioc_klass->io_set_blocking = qio_channel_socket_set_blocking;
959559607eaSDaniel P. Berrange     ioc_klass->io_close = qio_channel_socket_close;
960559607eaSDaniel P. Berrange     ioc_klass->io_shutdown = qio_channel_socket_shutdown;
961559607eaSDaniel P. Berrange     ioc_klass->io_set_cork = qio_channel_socket_set_cork;
962559607eaSDaniel P. Berrange     ioc_klass->io_set_delay = qio_channel_socket_set_delay;
963559607eaSDaniel P. Berrange     ioc_klass->io_create_watch = qio_channel_socket_create_watch;
964bf88c124SPaolo Bonzini     ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler;
9652bc58ffcSLeonardo Bras #ifdef QEMU_MSG_ZEROCOPY
9662bc58ffcSLeonardo Bras     ioc_klass->io_flush = qio_channel_socket_flush;
9672bc58ffcSLeonardo Bras #endif
968*95fa0c79SAnthony Harivel     ioc_klass->io_peerpid = qio_channel_socket_get_peerpid;
969559607eaSDaniel P. Berrange }
970559607eaSDaniel P. Berrange 
971559607eaSDaniel P. Berrange static const TypeInfo qio_channel_socket_info = {
972559607eaSDaniel P. Berrange     .parent = TYPE_QIO_CHANNEL,
973559607eaSDaniel P. Berrange     .name = TYPE_QIO_CHANNEL_SOCKET,
974559607eaSDaniel P. Berrange     .instance_size = sizeof(QIOChannelSocket),
975559607eaSDaniel P. Berrange     .instance_init = qio_channel_socket_init,
976559607eaSDaniel P. Berrange     .instance_finalize = qio_channel_socket_finalize,
977559607eaSDaniel P. Berrange     .class_init = qio_channel_socket_class_init,
978559607eaSDaniel P. Berrange };
979559607eaSDaniel P. Berrange 
qio_channel_socket_register_types(void)980559607eaSDaniel P. Berrange static void qio_channel_socket_register_types(void)
981559607eaSDaniel P. Berrange {
982559607eaSDaniel P. Berrange     type_register_static(&qio_channel_socket_info);
983559607eaSDaniel P. Berrange }
984559607eaSDaniel P. Berrange 
985559607eaSDaniel P. Berrange type_init(qio_channel_socket_register_types);
986