xref: /openbmc/linux/io_uring/net.c (revision 17add5ce)
1f9ead18cSJens Axboe // SPDX-License-Identifier: GPL-2.0
2f9ead18cSJens Axboe #include <linux/kernel.h>
3f9ead18cSJens Axboe #include <linux/errno.h>
4f9ead18cSJens Axboe #include <linux/file.h>
5f9ead18cSJens Axboe #include <linux/slab.h>
6f9ead18cSJens Axboe #include <linux/net.h>
7f9ead18cSJens Axboe #include <linux/compat.h>
8f9ead18cSJens Axboe #include <net/compat.h>
9f9ead18cSJens Axboe #include <linux/io_uring.h>
10f9ead18cSJens Axboe 
11f9ead18cSJens Axboe #include <uapi/linux/io_uring.h>
12f9ead18cSJens Axboe 
13f9ead18cSJens Axboe #include "io_uring.h"
143b77495aSJens Axboe #include "kbuf.h"
1543e0bbbdSJens Axboe #include "alloc_cache.h"
16f9ead18cSJens Axboe #include "net.h"
1706a5464bSPavel Begunkov #include "notif.h"
1810c7d33eSPavel Begunkov #include "rsrc.h"
19f9ead18cSJens Axboe 
20f9ead18cSJens Axboe #if defined(CONFIG_NET)
21f9ead18cSJens Axboe struct io_shutdown {
22f9ead18cSJens Axboe 	struct file			*file;
23f9ead18cSJens Axboe 	int				how;
24f9ead18cSJens Axboe };
25f9ead18cSJens Axboe 
26f9ead18cSJens Axboe struct io_accept {
27f9ead18cSJens Axboe 	struct file			*file;
28f9ead18cSJens Axboe 	struct sockaddr __user		*addr;
29f9ead18cSJens Axboe 	int __user			*addr_len;
30f9ead18cSJens Axboe 	int				flags;
31f9ead18cSJens Axboe 	u32				file_slot;
32f9ead18cSJens Axboe 	unsigned long			nofile;
33f9ead18cSJens Axboe };
34f9ead18cSJens Axboe 
35f9ead18cSJens Axboe struct io_socket {
36f9ead18cSJens Axboe 	struct file			*file;
37f9ead18cSJens Axboe 	int				domain;
38f9ead18cSJens Axboe 	int				type;
39f9ead18cSJens Axboe 	int				protocol;
40f9ead18cSJens Axboe 	int				flags;
41f9ead18cSJens Axboe 	u32				file_slot;
42f9ead18cSJens Axboe 	unsigned long			nofile;
43f9ead18cSJens Axboe };
44f9ead18cSJens Axboe 
45f9ead18cSJens Axboe struct io_connect {
46f9ead18cSJens Axboe 	struct file			*file;
47f9ead18cSJens Axboe 	struct sockaddr __user		*addr;
48f9ead18cSJens Axboe 	int				addr_len;
493fb1bd68SJens Axboe 	bool				in_progress;
50f9ead18cSJens Axboe };
51f9ead18cSJens Axboe 
52f9ead18cSJens Axboe struct io_sr_msg {
53f9ead18cSJens Axboe 	struct file			*file;
54f9ead18cSJens Axboe 	union {
55f9ead18cSJens Axboe 		struct compat_msghdr __user	*umsg_compat;
56f9ead18cSJens Axboe 		struct user_msghdr __user	*umsg;
57f9ead18cSJens Axboe 		void __user			*buf;
58f9ead18cSJens Axboe 	};
590b048557SPavel Begunkov 	unsigned			len;
600b048557SPavel Begunkov 	unsigned			done_io;
61293402e5SPavel Begunkov 	unsigned			msg_flags;
620b048557SPavel Begunkov 	u16				flags;
63516e82f0SPavel Begunkov 	/* initialised and used only by !msg send variants */
640b048557SPavel Begunkov 	u16				addr_len;
65092aeedbSPavel Begunkov 	void __user			*addr;
66516e82f0SPavel Begunkov 	/* used only for send zerocopy */
67b48c312bSPavel Begunkov 	struct io_kiocb 		*notif;
6806a5464bSPavel Begunkov };
6906a5464bSPavel Begunkov 
70*17add5ceSPavel Begunkov static inline bool io_check_multishot(struct io_kiocb *req,
71*17add5ceSPavel Begunkov 				      unsigned int issue_flags)
72*17add5ceSPavel Begunkov {
73*17add5ceSPavel Begunkov 	/*
74*17add5ceSPavel Begunkov 	 * When ->locked_cq is set we only allow to post CQEs from the original
75*17add5ceSPavel Begunkov 	 * task context. Usual request completions will be handled in other
76*17add5ceSPavel Begunkov 	 * generic paths but multipoll may decide to post extra cqes.
77*17add5ceSPavel Begunkov 	 */
78*17add5ceSPavel Begunkov 	return !(issue_flags & IO_URING_F_IOWQ) ||
79*17add5ceSPavel Begunkov 		!(issue_flags & IO_URING_F_MULTISHOT) ||
80*17add5ceSPavel Begunkov 		!req->ctx->task_complete;
81*17add5ceSPavel Begunkov }
82*17add5ceSPavel Begunkov 
83f9ead18cSJens Axboe int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
84f9ead18cSJens Axboe {
85f2ccb5aeSStefan Metzmacher 	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
86f9ead18cSJens Axboe 
87f9ead18cSJens Axboe 	if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
88f9ead18cSJens Axboe 		     sqe->buf_index || sqe->splice_fd_in))
89f9ead18cSJens Axboe 		return -EINVAL;
90f9ead18cSJens Axboe 
91f9ead18cSJens Axboe 	shutdown->how = READ_ONCE(sqe->len);
92f9ead18cSJens Axboe 	return 0;
93f9ead18cSJens Axboe }
94f9ead18cSJens Axboe 
95f9ead18cSJens Axboe int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
96f9ead18cSJens Axboe {
97f2ccb5aeSStefan Metzmacher 	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
98f9ead18cSJens Axboe 	struct socket *sock;
99f9ead18cSJens Axboe 	int ret;
100f9ead18cSJens Axboe 
101f9ead18cSJens Axboe 	if (issue_flags & IO_URING_F_NONBLOCK)
102f9ead18cSJens Axboe 		return -EAGAIN;
103f9ead18cSJens Axboe 
104f9ead18cSJens Axboe 	sock = sock_from_file(req->file);
105f9ead18cSJens Axboe 	if (unlikely(!sock))
106f9ead18cSJens Axboe 		return -ENOTSOCK;
107f9ead18cSJens Axboe 
108f9ead18cSJens Axboe 	ret = __sys_shutdown_sock(sock, shutdown->how);
109f9ead18cSJens Axboe 	io_req_set_res(req, ret, 0);
110f9ead18cSJens Axboe 	return IOU_OK;
111f9ead18cSJens Axboe }
112f9ead18cSJens Axboe 
113f9ead18cSJens Axboe static bool io_net_retry(struct socket *sock, int flags)
114f9ead18cSJens Axboe {
115f9ead18cSJens Axboe 	if (!(flags & MSG_WAITALL))
116f9ead18cSJens Axboe 		return false;
117f9ead18cSJens Axboe 	return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
118f9ead18cSJens Axboe }
119f9ead18cSJens Axboe 
12043e0bbbdSJens Axboe static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
12143e0bbbdSJens Axboe {
12243e0bbbdSJens Axboe 	struct io_async_msghdr *hdr = req->async_data;
12343e0bbbdSJens Axboe 
12406360426SPavel Begunkov 	if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED)
12543e0bbbdSJens Axboe 		return;
12643e0bbbdSJens Axboe 
12743e0bbbdSJens Axboe 	/* Let normal cleanup path reap it if we fail adding to the cache */
12843e0bbbdSJens Axboe 	if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) {
12943e0bbbdSJens Axboe 		req->async_data = NULL;
13043e0bbbdSJens Axboe 		req->flags &= ~REQ_F_ASYNC_DATA;
13143e0bbbdSJens Axboe 	}
13243e0bbbdSJens Axboe }
13343e0bbbdSJens Axboe 
134858c293eSPavel Begunkov static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req,
13543e0bbbdSJens Axboe 						  unsigned int issue_flags)
13643e0bbbdSJens Axboe {
13743e0bbbdSJens Axboe 	struct io_ring_ctx *ctx = req->ctx;
13843e0bbbdSJens Axboe 	struct io_cache_entry *entry;
1394c17a496SPavel Begunkov 	struct io_async_msghdr *hdr;
14043e0bbbdSJens Axboe 
141df730ec2SXinghui Li 	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
142df730ec2SXinghui Li 		entry = io_alloc_cache_get(&ctx->netmsg_cache);
143df730ec2SXinghui Li 		if (entry) {
14443e0bbbdSJens Axboe 			hdr = container_of(entry, struct io_async_msghdr, cache);
1454c17a496SPavel Begunkov 			hdr->free_iov = NULL;
14643e0bbbdSJens Axboe 			req->flags |= REQ_F_ASYNC_DATA;
14743e0bbbdSJens Axboe 			req->async_data = hdr;
14843e0bbbdSJens Axboe 			return hdr;
14943e0bbbdSJens Axboe 		}
150df730ec2SXinghui Li 	}
15143e0bbbdSJens Axboe 
1524c17a496SPavel Begunkov 	if (!io_alloc_async_data(req)) {
1534c17a496SPavel Begunkov 		hdr = req->async_data;
1544c17a496SPavel Begunkov 		hdr->free_iov = NULL;
1554c17a496SPavel Begunkov 		return hdr;
1564c17a496SPavel Begunkov 	}
15743e0bbbdSJens Axboe 	return NULL;
15843e0bbbdSJens Axboe }
15943e0bbbdSJens Axboe 
160858c293eSPavel Begunkov static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req)
161858c293eSPavel Begunkov {
162858c293eSPavel Begunkov 	/* ->prep_async is always called from the submission context */
163858c293eSPavel Begunkov 	return io_msg_alloc_async(req, 0);
164858c293eSPavel Begunkov }
165858c293eSPavel Begunkov 
166f9ead18cSJens Axboe static int io_setup_async_msg(struct io_kiocb *req,
16743e0bbbdSJens Axboe 			      struct io_async_msghdr *kmsg,
16843e0bbbdSJens Axboe 			      unsigned int issue_flags)
169f9ead18cSJens Axboe {
1703f743e9bSPavel Begunkov 	struct io_async_msghdr *async_msg;
171f9ead18cSJens Axboe 
1723f743e9bSPavel Begunkov 	if (req_has_async_data(req))
173f9ead18cSJens Axboe 		return -EAGAIN;
174858c293eSPavel Begunkov 	async_msg = io_msg_alloc_async(req, issue_flags);
17543e0bbbdSJens Axboe 	if (!async_msg) {
176f9ead18cSJens Axboe 		kfree(kmsg->free_iov);
177f9ead18cSJens Axboe 		return -ENOMEM;
178f9ead18cSJens Axboe 	}
179f9ead18cSJens Axboe 	req->flags |= REQ_F_NEED_CLEANUP;
180f9ead18cSJens Axboe 	memcpy(async_msg, kmsg, sizeof(*kmsg));
1816f10ae8aSPavel Begunkov 	if (async_msg->msg.msg_name)
182f9ead18cSJens Axboe 		async_msg->msg.msg_name = &async_msg->addr;
183f9ead18cSJens Axboe 	/* if were using fast_iov, set it to the new one */
1843e4cb6ebSStefan Metzmacher 	if (!kmsg->free_iov) {
1853e4cb6ebSStefan Metzmacher 		size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov;
1863e4cb6ebSStefan Metzmacher 		async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx];
1873e4cb6ebSStefan Metzmacher 	}
188f9ead18cSJens Axboe 
189f9ead18cSJens Axboe 	return -EAGAIN;
190f9ead18cSJens Axboe }
191f9ead18cSJens Axboe 
192f9ead18cSJens Axboe static int io_sendmsg_copy_hdr(struct io_kiocb *req,
193f9ead18cSJens Axboe 			       struct io_async_msghdr *iomsg)
194f9ead18cSJens Axboe {
195f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
196f9ead18cSJens Axboe 
197f9ead18cSJens Axboe 	iomsg->msg.msg_name = &iomsg->addr;
198f9ead18cSJens Axboe 	iomsg->free_iov = iomsg->fast_iov;
199f9ead18cSJens Axboe 	return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
200f9ead18cSJens Axboe 					&iomsg->free_iov);
201f9ead18cSJens Axboe }
202f9ead18cSJens Axboe 
203516e82f0SPavel Begunkov int io_send_prep_async(struct io_kiocb *req)
204581711c4SPavel Begunkov {
205ac9e5784SPavel Begunkov 	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
206581711c4SPavel Begunkov 	struct io_async_msghdr *io;
207581711c4SPavel Begunkov 	int ret;
208581711c4SPavel Begunkov 
209581711c4SPavel Begunkov 	if (!zc->addr || req_has_async_data(req))
210581711c4SPavel Begunkov 		return 0;
2116bf8ad25SPavel Begunkov 	io = io_msg_alloc_async_prep(req);
2126bf8ad25SPavel Begunkov 	if (!io)
213581711c4SPavel Begunkov 		return -ENOMEM;
214581711c4SPavel Begunkov 	ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr);
215581711c4SPavel Begunkov 	return ret;
216581711c4SPavel Begunkov }
217581711c4SPavel Begunkov 
218581711c4SPavel Begunkov static int io_setup_async_addr(struct io_kiocb *req,
2196ae61b7aSPavel Begunkov 			      struct sockaddr_storage *addr_storage,
220581711c4SPavel Begunkov 			      unsigned int issue_flags)
221581711c4SPavel Begunkov {
2226ae61b7aSPavel Begunkov 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
223581711c4SPavel Begunkov 	struct io_async_msghdr *io;
224581711c4SPavel Begunkov 
2256ae61b7aSPavel Begunkov 	if (!sr->addr || req_has_async_data(req))
226581711c4SPavel Begunkov 		return -EAGAIN;
2276bf8ad25SPavel Begunkov 	io = io_msg_alloc_async(req, issue_flags);
2286bf8ad25SPavel Begunkov 	if (!io)
229581711c4SPavel Begunkov 		return -ENOMEM;
2306ae61b7aSPavel Begunkov 	memcpy(&io->addr, addr_storage, sizeof(io->addr));
231581711c4SPavel Begunkov 	return -EAGAIN;
232581711c4SPavel Begunkov }
233581711c4SPavel Begunkov 
234f9ead18cSJens Axboe int io_sendmsg_prep_async(struct io_kiocb *req)
235f9ead18cSJens Axboe {
236f9ead18cSJens Axboe 	int ret;
237f9ead18cSJens Axboe 
238858c293eSPavel Begunkov 	if (!io_msg_alloc_async_prep(req))
239858c293eSPavel Begunkov 		return -ENOMEM;
240f9ead18cSJens Axboe 	ret = io_sendmsg_copy_hdr(req, req->async_data);
241f9ead18cSJens Axboe 	if (!ret)
242f9ead18cSJens Axboe 		req->flags |= REQ_F_NEED_CLEANUP;
243f9ead18cSJens Axboe 	return ret;
244f9ead18cSJens Axboe }
245f9ead18cSJens Axboe 
246f9ead18cSJens Axboe void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
247f9ead18cSJens Axboe {
248f9ead18cSJens Axboe 	struct io_async_msghdr *io = req->async_data;
249f9ead18cSJens Axboe 
250f9ead18cSJens Axboe 	kfree(io->free_iov);
251f9ead18cSJens Axboe }
252f9ead18cSJens Axboe 
253f9ead18cSJens Axboe int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
254f9ead18cSJens Axboe {
255f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
256f9ead18cSJens Axboe 
257516e82f0SPavel Begunkov 	if (req->opcode == IORING_OP_SEND) {
258516e82f0SPavel Begunkov 		if (READ_ONCE(sqe->__pad3[0]))
259f9ead18cSJens Axboe 			return -EINVAL;
260516e82f0SPavel Begunkov 		sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
261516e82f0SPavel Begunkov 		sr->addr_len = READ_ONCE(sqe->addr_len);
262516e82f0SPavel Begunkov 	} else if (sqe->addr2 || sqe->file_index) {
263516e82f0SPavel Begunkov 		return -EINVAL;
264516e82f0SPavel Begunkov 	}
265f9ead18cSJens Axboe 
266f9ead18cSJens Axboe 	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
267f9ead18cSJens Axboe 	sr->len = READ_ONCE(sqe->len);
268f9ead18cSJens Axboe 	sr->flags = READ_ONCE(sqe->ioprio);
269f9ead18cSJens Axboe 	if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
270f9ead18cSJens Axboe 		return -EINVAL;
271f9ead18cSJens Axboe 	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
272f9ead18cSJens Axboe 	if (sr->msg_flags & MSG_DONTWAIT)
273f9ead18cSJens Axboe 		req->flags |= REQ_F_NOWAIT;
274f9ead18cSJens Axboe 
275f9ead18cSJens Axboe #ifdef CONFIG_COMPAT
276f9ead18cSJens Axboe 	if (req->ctx->compat)
277f9ead18cSJens Axboe 		sr->msg_flags |= MSG_CMSG_COMPAT;
278f9ead18cSJens Axboe #endif
279f9ead18cSJens Axboe 	sr->done_io = 0;
280f9ead18cSJens Axboe 	return 0;
281f9ead18cSJens Axboe }
282f9ead18cSJens Axboe 
283f9ead18cSJens Axboe int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
284f9ead18cSJens Axboe {
285f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
286f9ead18cSJens Axboe 	struct io_async_msghdr iomsg, *kmsg;
287f9ead18cSJens Axboe 	struct socket *sock;
288f9ead18cSJens Axboe 	unsigned flags;
289f9ead18cSJens Axboe 	int min_ret = 0;
290f9ead18cSJens Axboe 	int ret;
291f9ead18cSJens Axboe 
292f9ead18cSJens Axboe 	sock = sock_from_file(req->file);
293f9ead18cSJens Axboe 	if (unlikely(!sock))
294f9ead18cSJens Axboe 		return -ENOTSOCK;
295f9ead18cSJens Axboe 
296f9ead18cSJens Axboe 	if (req_has_async_data(req)) {
297f9ead18cSJens Axboe 		kmsg = req->async_data;
298f9ead18cSJens Axboe 	} else {
299f9ead18cSJens Axboe 		ret = io_sendmsg_copy_hdr(req, &iomsg);
300f9ead18cSJens Axboe 		if (ret)
301f9ead18cSJens Axboe 			return ret;
302f9ead18cSJens Axboe 		kmsg = &iomsg;
303f9ead18cSJens Axboe 	}
304f9ead18cSJens Axboe 
305f9ead18cSJens Axboe 	if (!(req->flags & REQ_F_POLLED) &&
306f9ead18cSJens Axboe 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
30743e0bbbdSJens Axboe 		return io_setup_async_msg(req, kmsg, issue_flags);
308f9ead18cSJens Axboe 
309f9ead18cSJens Axboe 	flags = sr->msg_flags;
310f9ead18cSJens Axboe 	if (issue_flags & IO_URING_F_NONBLOCK)
311f9ead18cSJens Axboe 		flags |= MSG_DONTWAIT;
312f9ead18cSJens Axboe 	if (flags & MSG_WAITALL)
313f9ead18cSJens Axboe 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
314f9ead18cSJens Axboe 
315f9ead18cSJens Axboe 	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
316f9ead18cSJens Axboe 
317f9ead18cSJens Axboe 	if (ret < min_ret) {
318f9ead18cSJens Axboe 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
31943e0bbbdSJens Axboe 			return io_setup_async_msg(req, kmsg, issue_flags);
320f9ead18cSJens Axboe 		if (ret > 0 && io_net_retry(sock, flags)) {
321f9ead18cSJens Axboe 			sr->done_io += ret;
322f9ead18cSJens Axboe 			req->flags |= REQ_F_PARTIAL_IO;
32343e0bbbdSJens Axboe 			return io_setup_async_msg(req, kmsg, issue_flags);
324f9ead18cSJens Axboe 		}
32595eafc74SPavel Begunkov 		if (ret == -ERESTARTSYS)
32695eafc74SPavel Begunkov 			ret = -EINTR;
327f9ead18cSJens Axboe 		req_set_fail(req);
328f9ead18cSJens Axboe 	}
329f9ead18cSJens Axboe 	/* fast path, check for non-NULL to avoid function call */
330f9ead18cSJens Axboe 	if (kmsg->free_iov)
331f9ead18cSJens Axboe 		kfree(kmsg->free_iov);
332f9ead18cSJens Axboe 	req->flags &= ~REQ_F_NEED_CLEANUP;
33343e0bbbdSJens Axboe 	io_netmsg_recycle(req, issue_flags);
334f9ead18cSJens Axboe 	if (ret >= 0)
335f9ead18cSJens Axboe 		ret += sr->done_io;
336f9ead18cSJens Axboe 	else if (sr->done_io)
337f9ead18cSJens Axboe 		ret = sr->done_io;
338f9ead18cSJens Axboe 	io_req_set_res(req, ret, 0);
339f9ead18cSJens Axboe 	return IOU_OK;
340f9ead18cSJens Axboe }
341f9ead18cSJens Axboe 
342f9ead18cSJens Axboe int io_send(struct io_kiocb *req, unsigned int issue_flags)
343f9ead18cSJens Axboe {
344516e82f0SPavel Begunkov 	struct sockaddr_storage __address;
345f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
346f9ead18cSJens Axboe 	struct msghdr msg;
347f9ead18cSJens Axboe 	struct iovec iov;
348f9ead18cSJens Axboe 	struct socket *sock;
349f9ead18cSJens Axboe 	unsigned flags;
350f9ead18cSJens Axboe 	int min_ret = 0;
351f9ead18cSJens Axboe 	int ret;
352f9ead18cSJens Axboe 
35304360d3eSPavel Begunkov 	msg.msg_name = NULL;
35404360d3eSPavel Begunkov 	msg.msg_control = NULL;
35504360d3eSPavel Begunkov 	msg.msg_controllen = 0;
35604360d3eSPavel Begunkov 	msg.msg_namelen = 0;
35704360d3eSPavel Begunkov 	msg.msg_ubuf = NULL;
35804360d3eSPavel Begunkov 
359516e82f0SPavel Begunkov 	if (sr->addr) {
360516e82f0SPavel Begunkov 		if (req_has_async_data(req)) {
361516e82f0SPavel Begunkov 			struct io_async_msghdr *io = req->async_data;
362516e82f0SPavel Begunkov 
363516e82f0SPavel Begunkov 			msg.msg_name = &io->addr;
364516e82f0SPavel Begunkov 		} else {
365516e82f0SPavel Begunkov 			ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address);
366516e82f0SPavel Begunkov 			if (unlikely(ret < 0))
367516e82f0SPavel Begunkov 				return ret;
368516e82f0SPavel Begunkov 			msg.msg_name = (struct sockaddr *)&__address;
369516e82f0SPavel Begunkov 		}
370516e82f0SPavel Begunkov 		msg.msg_namelen = sr->addr_len;
371516e82f0SPavel Begunkov 	}
372516e82f0SPavel Begunkov 
373f9ead18cSJens Axboe 	if (!(req->flags & REQ_F_POLLED) &&
374f9ead18cSJens Axboe 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
375516e82f0SPavel Begunkov 		return io_setup_async_addr(req, &__address, issue_flags);
376f9ead18cSJens Axboe 
377f9ead18cSJens Axboe 	sock = sock_from_file(req->file);
378f9ead18cSJens Axboe 	if (unlikely(!sock))
379f9ead18cSJens Axboe 		return -ENOTSOCK;
380f9ead18cSJens Axboe 
381f9ead18cSJens Axboe 	ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
382f9ead18cSJens Axboe 	if (unlikely(ret))
383f9ead18cSJens Axboe 		return ret;
384f9ead18cSJens Axboe 
385f9ead18cSJens Axboe 	flags = sr->msg_flags;
386f9ead18cSJens Axboe 	if (issue_flags & IO_URING_F_NONBLOCK)
387f9ead18cSJens Axboe 		flags |= MSG_DONTWAIT;
388f9ead18cSJens Axboe 	if (flags & MSG_WAITALL)
389f9ead18cSJens Axboe 		min_ret = iov_iter_count(&msg.msg_iter);
390f9ead18cSJens Axboe 
391f9ead18cSJens Axboe 	msg.msg_flags = flags;
392f9ead18cSJens Axboe 	ret = sock_sendmsg(sock, &msg);
393f9ead18cSJens Axboe 	if (ret < min_ret) {
394f9ead18cSJens Axboe 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
395516e82f0SPavel Begunkov 			return io_setup_async_addr(req, &__address, issue_flags);
396516e82f0SPavel Begunkov 
397f9ead18cSJens Axboe 		if (ret > 0 && io_net_retry(sock, flags)) {
398f9ead18cSJens Axboe 			sr->len -= ret;
399f9ead18cSJens Axboe 			sr->buf += ret;
400f9ead18cSJens Axboe 			sr->done_io += ret;
401f9ead18cSJens Axboe 			req->flags |= REQ_F_PARTIAL_IO;
402516e82f0SPavel Begunkov 			return io_setup_async_addr(req, &__address, issue_flags);
403f9ead18cSJens Axboe 		}
40495eafc74SPavel Begunkov 		if (ret == -ERESTARTSYS)
40595eafc74SPavel Begunkov 			ret = -EINTR;
406f9ead18cSJens Axboe 		req_set_fail(req);
407f9ead18cSJens Axboe 	}
408f9ead18cSJens Axboe 	if (ret >= 0)
409f9ead18cSJens Axboe 		ret += sr->done_io;
410f9ead18cSJens Axboe 	else if (sr->done_io)
411f9ead18cSJens Axboe 		ret = sr->done_io;
412f9ead18cSJens Axboe 	io_req_set_res(req, ret, 0);
413f9ead18cSJens Axboe 	return IOU_OK;
414f9ead18cSJens Axboe }
415f9ead18cSJens Axboe 
4169bb66906SDylan Yudaken static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg)
4179bb66906SDylan Yudaken {
4189b0fc3c0SDylan Yudaken 	int hdr;
4199bb66906SDylan Yudaken 
4209b0fc3c0SDylan Yudaken 	if (iomsg->namelen < 0)
4219bb66906SDylan Yudaken 		return true;
4229b0fc3c0SDylan Yudaken 	if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out),
4239b0fc3c0SDylan Yudaken 			       iomsg->namelen, &hdr))
4249bb66906SDylan Yudaken 		return true;
4259b0fc3c0SDylan Yudaken 	if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr))
4269bb66906SDylan Yudaken 		return true;
4279bb66906SDylan Yudaken 
4289bb66906SDylan Yudaken 	return false;
4299bb66906SDylan Yudaken }
4309bb66906SDylan Yudaken 
431f9ead18cSJens Axboe static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
432f9ead18cSJens Axboe 				 struct io_async_msghdr *iomsg)
433f9ead18cSJens Axboe {
434f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
4357fa875b8SDylan Yudaken 	struct user_msghdr msg;
436f9ead18cSJens Axboe 	int ret;
437f9ead18cSJens Axboe 
4387fa875b8SDylan Yudaken 	if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg)))
4397fa875b8SDylan Yudaken 		return -EFAULT;
4407fa875b8SDylan Yudaken 
4417fa875b8SDylan Yudaken 	ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
442f9ead18cSJens Axboe 	if (ret)
443f9ead18cSJens Axboe 		return ret;
444f9ead18cSJens Axboe 
445f9ead18cSJens Axboe 	if (req->flags & REQ_F_BUFFER_SELECT) {
4467fa875b8SDylan Yudaken 		if (msg.msg_iovlen == 0) {
4475702196eSDylan Yudaken 			sr->len = iomsg->fast_iov[0].iov_len = 0;
4485702196eSDylan Yudaken 			iomsg->fast_iov[0].iov_base = NULL;
4495702196eSDylan Yudaken 			iomsg->free_iov = NULL;
4507fa875b8SDylan Yudaken 		} else if (msg.msg_iovlen > 1) {
451f9ead18cSJens Axboe 			return -EINVAL;
4525702196eSDylan Yudaken 		} else {
4537fa875b8SDylan Yudaken 			if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov)))
454f9ead18cSJens Axboe 				return -EFAULT;
455f9ead18cSJens Axboe 			sr->len = iomsg->fast_iov[0].iov_len;
456f9ead18cSJens Axboe 			iomsg->free_iov = NULL;
4575702196eSDylan Yudaken 		}
4589bb66906SDylan Yudaken 
4599bb66906SDylan Yudaken 		if (req->flags & REQ_F_APOLL_MULTISHOT) {
4609bb66906SDylan Yudaken 			iomsg->namelen = msg.msg_namelen;
4619bb66906SDylan Yudaken 			iomsg->controllen = msg.msg_controllen;
4629bb66906SDylan Yudaken 			if (io_recvmsg_multishot_overflow(iomsg))
4639bb66906SDylan Yudaken 				return -EOVERFLOW;
4649bb66906SDylan Yudaken 		}
465f9ead18cSJens Axboe 	} else {
466f9ead18cSJens Axboe 		iomsg->free_iov = iomsg->fast_iov;
4677fa875b8SDylan Yudaken 		ret = __import_iovec(READ, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV,
468f9ead18cSJens Axboe 				     &iomsg->free_iov, &iomsg->msg.msg_iter,
469f9ead18cSJens Axboe 				     false);
470f9ead18cSJens Axboe 		if (ret > 0)
471f9ead18cSJens Axboe 			ret = 0;
472f9ead18cSJens Axboe 	}
473f9ead18cSJens Axboe 
474f9ead18cSJens Axboe 	return ret;
475f9ead18cSJens Axboe }
476f9ead18cSJens Axboe 
477f9ead18cSJens Axboe #ifdef CONFIG_COMPAT
478f9ead18cSJens Axboe static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
479f9ead18cSJens Axboe 					struct io_async_msghdr *iomsg)
480f9ead18cSJens Axboe {
481f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
48272c531f8SDylan Yudaken 	struct compat_msghdr msg;
483f9ead18cSJens Axboe 	struct compat_iovec __user *uiov;
484f9ead18cSJens Axboe 	int ret;
485f9ead18cSJens Axboe 
48672c531f8SDylan Yudaken 	if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg)))
48772c531f8SDylan Yudaken 		return -EFAULT;
48872c531f8SDylan Yudaken 
4894f6a94d3SJens Axboe 	ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
490f9ead18cSJens Axboe 	if (ret)
491f9ead18cSJens Axboe 		return ret;
492f9ead18cSJens Axboe 
49372c531f8SDylan Yudaken 	uiov = compat_ptr(msg.msg_iov);
494f9ead18cSJens Axboe 	if (req->flags & REQ_F_BUFFER_SELECT) {
495f9ead18cSJens Axboe 		compat_ssize_t clen;
496f9ead18cSJens Axboe 
49772c531f8SDylan Yudaken 		if (msg.msg_iovlen == 0) {
4986d2f75a0SDylan Yudaken 			sr->len = 0;
49972c531f8SDylan Yudaken 		} else if (msg.msg_iovlen > 1) {
500f9ead18cSJens Axboe 			return -EINVAL;
5016d2f75a0SDylan Yudaken 		} else {
502f9ead18cSJens Axboe 			if (!access_ok(uiov, sizeof(*uiov)))
503f9ead18cSJens Axboe 				return -EFAULT;
504f9ead18cSJens Axboe 			if (__get_user(clen, &uiov->iov_len))
505f9ead18cSJens Axboe 				return -EFAULT;
506f9ead18cSJens Axboe 			if (clen < 0)
507f9ead18cSJens Axboe 				return -EINVAL;
508f9ead18cSJens Axboe 			sr->len = clen;
5096d2f75a0SDylan Yudaken 		}
5109bb66906SDylan Yudaken 
5119bb66906SDylan Yudaken 		if (req->flags & REQ_F_APOLL_MULTISHOT) {
5129bb66906SDylan Yudaken 			iomsg->namelen = msg.msg_namelen;
5139bb66906SDylan Yudaken 			iomsg->controllen = msg.msg_controllen;
5149bb66906SDylan Yudaken 			if (io_recvmsg_multishot_overflow(iomsg))
5159bb66906SDylan Yudaken 				return -EOVERFLOW;
5169bb66906SDylan Yudaken 		}
517f9ead18cSJens Axboe 	} else {
518f9ead18cSJens Axboe 		iomsg->free_iov = iomsg->fast_iov;
51972c531f8SDylan Yudaken 		ret = __import_iovec(READ, (struct iovec __user *)uiov, msg.msg_iovlen,
520f9ead18cSJens Axboe 				   UIO_FASTIOV, &iomsg->free_iov,
521f9ead18cSJens Axboe 				   &iomsg->msg.msg_iter, true);
522f9ead18cSJens Axboe 		if (ret < 0)
523f9ead18cSJens Axboe 			return ret;
524f9ead18cSJens Axboe 	}
525f9ead18cSJens Axboe 
526f9ead18cSJens Axboe 	return 0;
527f9ead18cSJens Axboe }
528f9ead18cSJens Axboe #endif
529f9ead18cSJens Axboe 
530f9ead18cSJens Axboe static int io_recvmsg_copy_hdr(struct io_kiocb *req,
531f9ead18cSJens Axboe 			       struct io_async_msghdr *iomsg)
532f9ead18cSJens Axboe {
533f9ead18cSJens Axboe 	iomsg->msg.msg_name = &iomsg->addr;
534f9ead18cSJens Axboe 
535f9ead18cSJens Axboe #ifdef CONFIG_COMPAT
536f9ead18cSJens Axboe 	if (req->ctx->compat)
537f9ead18cSJens Axboe 		return __io_compat_recvmsg_copy_hdr(req, iomsg);
538f9ead18cSJens Axboe #endif
539f9ead18cSJens Axboe 
540f9ead18cSJens Axboe 	return __io_recvmsg_copy_hdr(req, iomsg);
541f9ead18cSJens Axboe }
542f9ead18cSJens Axboe 
543f9ead18cSJens Axboe int io_recvmsg_prep_async(struct io_kiocb *req)
544f9ead18cSJens Axboe {
545f9ead18cSJens Axboe 	int ret;
546f9ead18cSJens Axboe 
547858c293eSPavel Begunkov 	if (!io_msg_alloc_async_prep(req))
548858c293eSPavel Begunkov 		return -ENOMEM;
549f9ead18cSJens Axboe 	ret = io_recvmsg_copy_hdr(req, req->async_data);
550f9ead18cSJens Axboe 	if (!ret)
551f9ead18cSJens Axboe 		req->flags |= REQ_F_NEED_CLEANUP;
552f9ead18cSJens Axboe 	return ret;
553f9ead18cSJens Axboe }
554f9ead18cSJens Axboe 
555b3fdea6eSDylan Yudaken #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)
556b3fdea6eSDylan Yudaken 
557f9ead18cSJens Axboe int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
558f9ead18cSJens Axboe {
559f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
560f9ead18cSJens Axboe 
561f9ead18cSJens Axboe 	if (unlikely(sqe->file_index || sqe->addr2))
562f9ead18cSJens Axboe 		return -EINVAL;
563f9ead18cSJens Axboe 
564f9ead18cSJens Axboe 	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
565f9ead18cSJens Axboe 	sr->len = READ_ONCE(sqe->len);
566f9ead18cSJens Axboe 	sr->flags = READ_ONCE(sqe->ioprio);
567b3fdea6eSDylan Yudaken 	if (sr->flags & ~(RECVMSG_FLAGS))
568f9ead18cSJens Axboe 		return -EINVAL;
569f9ead18cSJens Axboe 	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
570f9ead18cSJens Axboe 	if (sr->msg_flags & MSG_DONTWAIT)
571f9ead18cSJens Axboe 		req->flags |= REQ_F_NOWAIT;
572f9ead18cSJens Axboe 	if (sr->msg_flags & MSG_ERRQUEUE)
573f9ead18cSJens Axboe 		req->flags |= REQ_F_CLEAR_POLLIN;
574b3fdea6eSDylan Yudaken 	if (sr->flags & IORING_RECV_MULTISHOT) {
575b3fdea6eSDylan Yudaken 		if (!(req->flags & REQ_F_BUFFER_SELECT))
576b3fdea6eSDylan Yudaken 			return -EINVAL;
577b3fdea6eSDylan Yudaken 		if (sr->msg_flags & MSG_WAITALL)
578b3fdea6eSDylan Yudaken 			return -EINVAL;
579b3fdea6eSDylan Yudaken 		if (req->opcode == IORING_OP_RECV && sr->len)
580b3fdea6eSDylan Yudaken 			return -EINVAL;
581b3fdea6eSDylan Yudaken 		req->flags |= REQ_F_APOLL_MULTISHOT;
582b3fdea6eSDylan Yudaken 	}
583f9ead18cSJens Axboe 
584f9ead18cSJens Axboe #ifdef CONFIG_COMPAT
585f9ead18cSJens Axboe 	if (req->ctx->compat)
586f9ead18cSJens Axboe 		sr->msg_flags |= MSG_CMSG_COMPAT;
587f9ead18cSJens Axboe #endif
588f9ead18cSJens Axboe 	sr->done_io = 0;
589f9ead18cSJens Axboe 	return 0;
590f9ead18cSJens Axboe }
591f9ead18cSJens Axboe 
592b3fdea6eSDylan Yudaken static inline void io_recv_prep_retry(struct io_kiocb *req)
593b3fdea6eSDylan Yudaken {
594f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
595b3fdea6eSDylan Yudaken 
596b3fdea6eSDylan Yudaken 	sr->done_io = 0;
597b3fdea6eSDylan Yudaken 	sr->len = 0; /* get from the provided buffer */
598b3fdea6eSDylan Yudaken }
599b3fdea6eSDylan Yudaken 
600b3fdea6eSDylan Yudaken /*
6019bb66906SDylan Yudaken  * Finishes io_recv and io_recvmsg.
602b3fdea6eSDylan Yudaken  *
603b3fdea6eSDylan Yudaken  * Returns true if it is actually finished, or false if it should run
604b3fdea6eSDylan Yudaken  * again (for multishot).
605b3fdea6eSDylan Yudaken  */
6069bb66906SDylan Yudaken static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
607100d6b17SPavel Begunkov 				  unsigned int cflags, bool mshot_finished,
608100d6b17SPavel Begunkov 				  unsigned issue_flags)
609b3fdea6eSDylan Yudaken {
610b3fdea6eSDylan Yudaken 	if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
611b3fdea6eSDylan Yudaken 		io_req_set_res(req, *ret, cflags);
612b3fdea6eSDylan Yudaken 		*ret = IOU_OK;
613b3fdea6eSDylan Yudaken 		return true;
614b3fdea6eSDylan Yudaken 	}
615b3fdea6eSDylan Yudaken 
6169bb66906SDylan Yudaken 	if (!mshot_finished) {
6179b8c5475SDylan Yudaken 		if (io_aux_cqe(req->ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
6189b8c5475SDylan Yudaken 			       req->cqe.user_data, *ret, cflags | IORING_CQE_F_MORE, true)) {
619b3fdea6eSDylan Yudaken 			io_recv_prep_retry(req);
620b3fdea6eSDylan Yudaken 			return false;
621b3fdea6eSDylan Yudaken 		}
622e2ad599dSDylan Yudaken 		/* Otherwise stop multishot but use the current result. */
623b3fdea6eSDylan Yudaken 	}
624b3fdea6eSDylan Yudaken 
625b3fdea6eSDylan Yudaken 	io_req_set_res(req, *ret, cflags);
626b3fdea6eSDylan Yudaken 
627100d6b17SPavel Begunkov 	if (issue_flags & IO_URING_F_MULTISHOT)
628b3fdea6eSDylan Yudaken 		*ret = IOU_STOP_MULTISHOT;
629e2df2ccbSDylan Yudaken 	else
630e2df2ccbSDylan Yudaken 		*ret = IOU_OK;
631b3fdea6eSDylan Yudaken 	return true;
632b3fdea6eSDylan Yudaken }
633b3fdea6eSDylan Yudaken 
6349bb66906SDylan Yudaken static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
6359bb66906SDylan Yudaken 				     struct io_sr_msg *sr, void __user **buf,
6369bb66906SDylan Yudaken 				     size_t *len)
6379bb66906SDylan Yudaken {
6389bb66906SDylan Yudaken 	unsigned long ubuf = (unsigned long) *buf;
6399bb66906SDylan Yudaken 	unsigned long hdr;
6409bb66906SDylan Yudaken 
6419bb66906SDylan Yudaken 	hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
6429bb66906SDylan Yudaken 		kmsg->controllen;
6439bb66906SDylan Yudaken 	if (*len < hdr)
6449bb66906SDylan Yudaken 		return -EFAULT;
6459bb66906SDylan Yudaken 
6469bb66906SDylan Yudaken 	if (kmsg->controllen) {
6479bb66906SDylan Yudaken 		unsigned long control = ubuf + hdr - kmsg->controllen;
6489bb66906SDylan Yudaken 
649d1f6222cSDylan Yudaken 		kmsg->msg.msg_control_user = (void __user *) control;
6509bb66906SDylan Yudaken 		kmsg->msg.msg_controllen = kmsg->controllen;
6519bb66906SDylan Yudaken 	}
6529bb66906SDylan Yudaken 
6539bb66906SDylan Yudaken 	sr->buf = *buf; /* stash for later copy */
654d1f6222cSDylan Yudaken 	*buf = (void __user *) (ubuf + hdr);
6559bb66906SDylan Yudaken 	kmsg->payloadlen = *len = *len - hdr;
6569bb66906SDylan Yudaken 	return 0;
6579bb66906SDylan Yudaken }
6589bb66906SDylan Yudaken 
6599bb66906SDylan Yudaken struct io_recvmsg_multishot_hdr {
6609bb66906SDylan Yudaken 	struct io_uring_recvmsg_out msg;
6619bb66906SDylan Yudaken 	struct sockaddr_storage addr;
6629bb66906SDylan Yudaken };
6639bb66906SDylan Yudaken 
6649bb66906SDylan Yudaken static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
6659bb66906SDylan Yudaken 				struct io_async_msghdr *kmsg,
6669bb66906SDylan Yudaken 				unsigned int flags, bool *finished)
6679bb66906SDylan Yudaken {
6689bb66906SDylan Yudaken 	int err;
6699bb66906SDylan Yudaken 	int copy_len;
6709bb66906SDylan Yudaken 	struct io_recvmsg_multishot_hdr hdr;
6719bb66906SDylan Yudaken 
6729bb66906SDylan Yudaken 	if (kmsg->namelen)
6739bb66906SDylan Yudaken 		kmsg->msg.msg_name = &hdr.addr;
6749bb66906SDylan Yudaken 	kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
6759bb66906SDylan Yudaken 	kmsg->msg.msg_namelen = 0;
6769bb66906SDylan Yudaken 
6779bb66906SDylan Yudaken 	if (sock->file->f_flags & O_NONBLOCK)
6789bb66906SDylan Yudaken 		flags |= MSG_DONTWAIT;
6799bb66906SDylan Yudaken 
6809bb66906SDylan Yudaken 	err = sock_recvmsg(sock, &kmsg->msg, flags);
6819bb66906SDylan Yudaken 	*finished = err <= 0;
6829bb66906SDylan Yudaken 	if (err < 0)
6839bb66906SDylan Yudaken 		return err;
6849bb66906SDylan Yudaken 
6859bb66906SDylan Yudaken 	hdr.msg = (struct io_uring_recvmsg_out) {
6869bb66906SDylan Yudaken 		.controllen = kmsg->controllen - kmsg->msg.msg_controllen,
6879bb66906SDylan Yudaken 		.flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
6889bb66906SDylan Yudaken 	};
6899bb66906SDylan Yudaken 
6909bb66906SDylan Yudaken 	hdr.msg.payloadlen = err;
6919bb66906SDylan Yudaken 	if (err > kmsg->payloadlen)
6929bb66906SDylan Yudaken 		err = kmsg->payloadlen;
6939bb66906SDylan Yudaken 
6949bb66906SDylan Yudaken 	copy_len = sizeof(struct io_uring_recvmsg_out);
6959bb66906SDylan Yudaken 	if (kmsg->msg.msg_namelen > kmsg->namelen)
6969bb66906SDylan Yudaken 		copy_len += kmsg->namelen;
6979bb66906SDylan Yudaken 	else
6989bb66906SDylan Yudaken 		copy_len += kmsg->msg.msg_namelen;
6999bb66906SDylan Yudaken 
7009bb66906SDylan Yudaken 	/*
7019bb66906SDylan Yudaken 	 *      "fromlen shall refer to the value before truncation.."
7029bb66906SDylan Yudaken 	 *                      1003.1g
7039bb66906SDylan Yudaken 	 */
7049bb66906SDylan Yudaken 	hdr.msg.namelen = kmsg->msg.msg_namelen;
7059bb66906SDylan Yudaken 
7069bb66906SDylan Yudaken 	/* ensure that there is no gap between hdr and sockaddr_storage */
7079bb66906SDylan Yudaken 	BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
7089bb66906SDylan Yudaken 		     sizeof(struct io_uring_recvmsg_out));
7099bb66906SDylan Yudaken 	if (copy_to_user(io->buf, &hdr, copy_len)) {
7109bb66906SDylan Yudaken 		*finished = true;
7119bb66906SDylan Yudaken 		return -EFAULT;
7129bb66906SDylan Yudaken 	}
7139bb66906SDylan Yudaken 
7149bb66906SDylan Yudaken 	return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
7159bb66906SDylan Yudaken 			kmsg->controllen + err;
7169bb66906SDylan Yudaken }
7179bb66906SDylan Yudaken 
718f9ead18cSJens Axboe int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
719f9ead18cSJens Axboe {
720f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
721f9ead18cSJens Axboe 	struct io_async_msghdr iomsg, *kmsg;
722f9ead18cSJens Axboe 	struct socket *sock;
723f9ead18cSJens Axboe 	unsigned int cflags;
724f9ead18cSJens Axboe 	unsigned flags;
725f9ead18cSJens Axboe 	int ret, min_ret = 0;
726f9ead18cSJens Axboe 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
7279bb66906SDylan Yudaken 	bool mshot_finished = true;
728f9ead18cSJens Axboe 
729f9ead18cSJens Axboe 	sock = sock_from_file(req->file);
730f9ead18cSJens Axboe 	if (unlikely(!sock))
731f9ead18cSJens Axboe 		return -ENOTSOCK;
732f9ead18cSJens Axboe 
733f9ead18cSJens Axboe 	if (req_has_async_data(req)) {
734f9ead18cSJens Axboe 		kmsg = req->async_data;
735f9ead18cSJens Axboe 	} else {
736f9ead18cSJens Axboe 		ret = io_recvmsg_copy_hdr(req, &iomsg);
737f9ead18cSJens Axboe 		if (ret)
738f9ead18cSJens Axboe 			return ret;
739f9ead18cSJens Axboe 		kmsg = &iomsg;
740f9ead18cSJens Axboe 	}
741f9ead18cSJens Axboe 
742f9ead18cSJens Axboe 	if (!(req->flags & REQ_F_POLLED) &&
743f9ead18cSJens Axboe 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
74443e0bbbdSJens Axboe 		return io_setup_async_msg(req, kmsg, issue_flags);
745f9ead18cSJens Axboe 
746*17add5ceSPavel Begunkov 	if (!io_check_multishot(req, issue_flags))
747*17add5ceSPavel Begunkov 		return io_setup_async_msg(req, kmsg, issue_flags);
748*17add5ceSPavel Begunkov 
7499bb66906SDylan Yudaken retry_multishot:
750f9ead18cSJens Axboe 	if (io_do_buffer_select(req)) {
751f9ead18cSJens Axboe 		void __user *buf;
7529bb66906SDylan Yudaken 		size_t len = sr->len;
753f9ead18cSJens Axboe 
7549bb66906SDylan Yudaken 		buf = io_buffer_select(req, &len, issue_flags);
755f9ead18cSJens Axboe 		if (!buf)
756f9ead18cSJens Axboe 			return -ENOBUFS;
7579bb66906SDylan Yudaken 
7589bb66906SDylan Yudaken 		if (req->flags & REQ_F_APOLL_MULTISHOT) {
7599bb66906SDylan Yudaken 			ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
7609bb66906SDylan Yudaken 			if (ret) {
7619bb66906SDylan Yudaken 				io_kbuf_recycle(req, issue_flags);
7629bb66906SDylan Yudaken 				return ret;
7639bb66906SDylan Yudaken 			}
7649bb66906SDylan Yudaken 		}
7659bb66906SDylan Yudaken 
766f9ead18cSJens Axboe 		kmsg->fast_iov[0].iov_base = buf;
7679bb66906SDylan Yudaken 		kmsg->fast_iov[0].iov_len = len;
768f9ead18cSJens Axboe 		iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
7699bb66906SDylan Yudaken 				len);
770f9ead18cSJens Axboe 	}
771f9ead18cSJens Axboe 
772f9ead18cSJens Axboe 	flags = sr->msg_flags;
773f9ead18cSJens Axboe 	if (force_nonblock)
774f9ead18cSJens Axboe 		flags |= MSG_DONTWAIT;
775f9ead18cSJens Axboe 	if (flags & MSG_WAITALL)
776f9ead18cSJens Axboe 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
777f9ead18cSJens Axboe 
778f9ead18cSJens Axboe 	kmsg->msg.msg_get_inq = 1;
7799bb66906SDylan Yudaken 	if (req->flags & REQ_F_APOLL_MULTISHOT)
7809bb66906SDylan Yudaken 		ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
7819bb66906SDylan Yudaken 					   &mshot_finished);
7829bb66906SDylan Yudaken 	else
7839bb66906SDylan Yudaken 		ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
7849bb66906SDylan Yudaken 					 kmsg->uaddr, flags);
7859bb66906SDylan Yudaken 
786f9ead18cSJens Axboe 	if (ret < min_ret) {
7879bb66906SDylan Yudaken 		if (ret == -EAGAIN && force_nonblock) {
7889bb66906SDylan Yudaken 			ret = io_setup_async_msg(req, kmsg, issue_flags);
789100d6b17SPavel Begunkov 			if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) {
7909bb66906SDylan Yudaken 				io_kbuf_recycle(req, issue_flags);
7919bb66906SDylan Yudaken 				return IOU_ISSUE_SKIP_COMPLETE;
7929bb66906SDylan Yudaken 			}
7939bb66906SDylan Yudaken 			return ret;
7949bb66906SDylan Yudaken 		}
795f9ead18cSJens Axboe 		if (ret > 0 && io_net_retry(sock, flags)) {
796f9ead18cSJens Axboe 			sr->done_io += ret;
797f9ead18cSJens Axboe 			req->flags |= REQ_F_PARTIAL_IO;
79843e0bbbdSJens Axboe 			return io_setup_async_msg(req, kmsg, issue_flags);
799f9ead18cSJens Axboe 		}
80095eafc74SPavel Begunkov 		if (ret == -ERESTARTSYS)
80195eafc74SPavel Begunkov 			ret = -EINTR;
802f9ead18cSJens Axboe 		req_set_fail(req);
803f9ead18cSJens Axboe 	} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
804f9ead18cSJens Axboe 		req_set_fail(req);
805f9ead18cSJens Axboe 	}
806f9ead18cSJens Axboe 
807d4e097daSDylan Yudaken 	if (ret > 0)
808f9ead18cSJens Axboe 		ret += sr->done_io;
809f9ead18cSJens Axboe 	else if (sr->done_io)
810f9ead18cSJens Axboe 		ret = sr->done_io;
811d4e097daSDylan Yudaken 	else
812d4e097daSDylan Yudaken 		io_kbuf_recycle(req, issue_flags);
813d4e097daSDylan Yudaken 
814f9ead18cSJens Axboe 	cflags = io_put_kbuf(req, issue_flags);
815f9ead18cSJens Axboe 	if (kmsg->msg.msg_inq)
816f9ead18cSJens Axboe 		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
817b3fdea6eSDylan Yudaken 
818100d6b17SPavel Begunkov 	if (!io_recv_finish(req, &ret, cflags, mshot_finished, issue_flags))
8199bb66906SDylan Yudaken 		goto retry_multishot;
8209bb66906SDylan Yudaken 
8219bb66906SDylan Yudaken 	if (mshot_finished) {
8229bb66906SDylan Yudaken 		io_netmsg_recycle(req, issue_flags);
8239bb66906SDylan Yudaken 		/* fast path, check for non-NULL to avoid function call */
8249bb66906SDylan Yudaken 		if (kmsg->free_iov)
8259bb66906SDylan Yudaken 			kfree(kmsg->free_iov);
8269bb66906SDylan Yudaken 		req->flags &= ~REQ_F_NEED_CLEANUP;
8279bb66906SDylan Yudaken 	}
8289bb66906SDylan Yudaken 
8299bb66906SDylan Yudaken 	return ret;
830f9ead18cSJens Axboe }
831f9ead18cSJens Axboe 
832f9ead18cSJens Axboe int io_recv(struct io_kiocb *req, unsigned int issue_flags)
833f9ead18cSJens Axboe {
834f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
835f9ead18cSJens Axboe 	struct msghdr msg;
836f9ead18cSJens Axboe 	struct socket *sock;
837f9ead18cSJens Axboe 	struct iovec iov;
838f9ead18cSJens Axboe 	unsigned int cflags;
839f9ead18cSJens Axboe 	unsigned flags;
840f9ead18cSJens Axboe 	int ret, min_ret = 0;
841f9ead18cSJens Axboe 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
842b3fdea6eSDylan Yudaken 	size_t len = sr->len;
843f9ead18cSJens Axboe 
844f9ead18cSJens Axboe 	if (!(req->flags & REQ_F_POLLED) &&
845f9ead18cSJens Axboe 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
846f9ead18cSJens Axboe 		return -EAGAIN;
847f9ead18cSJens Axboe 
848*17add5ceSPavel Begunkov 	if (!io_check_multishot(req, issue_flags))
849*17add5ceSPavel Begunkov 		return -EAGAIN;
850*17add5ceSPavel Begunkov 
851f9ead18cSJens Axboe 	sock = sock_from_file(req->file);
852f9ead18cSJens Axboe 	if (unlikely(!sock))
853f9ead18cSJens Axboe 		return -ENOTSOCK;
854f9ead18cSJens Axboe 
855b3fdea6eSDylan Yudaken retry_multishot:
856f9ead18cSJens Axboe 	if (io_do_buffer_select(req)) {
857f9ead18cSJens Axboe 		void __user *buf;
858f9ead18cSJens Axboe 
859b3fdea6eSDylan Yudaken 		buf = io_buffer_select(req, &len, issue_flags);
860f9ead18cSJens Axboe 		if (!buf)
861f9ead18cSJens Axboe 			return -ENOBUFS;
862f9ead18cSJens Axboe 		sr->buf = buf;
863f9ead18cSJens Axboe 	}
864f9ead18cSJens Axboe 
865b3fdea6eSDylan Yudaken 	ret = import_single_range(READ, sr->buf, len, &iov, &msg.msg_iter);
866f9ead18cSJens Axboe 	if (unlikely(ret))
867f9ead18cSJens Axboe 		goto out_free;
868f9ead18cSJens Axboe 
869f9ead18cSJens Axboe 	msg.msg_name = NULL;
870f9ead18cSJens Axboe 	msg.msg_namelen = 0;
871f9ead18cSJens Axboe 	msg.msg_control = NULL;
872f9ead18cSJens Axboe 	msg.msg_get_inq = 1;
873f9ead18cSJens Axboe 	msg.msg_flags = 0;
874f9ead18cSJens Axboe 	msg.msg_controllen = 0;
875f9ead18cSJens Axboe 	msg.msg_iocb = NULL;
876e02b6651SPavel Begunkov 	msg.msg_ubuf = NULL;
877f9ead18cSJens Axboe 
878f9ead18cSJens Axboe 	flags = sr->msg_flags;
879f9ead18cSJens Axboe 	if (force_nonblock)
880f9ead18cSJens Axboe 		flags |= MSG_DONTWAIT;
881f9ead18cSJens Axboe 	if (flags & MSG_WAITALL)
882f9ead18cSJens Axboe 		min_ret = iov_iter_count(&msg.msg_iter);
883f9ead18cSJens Axboe 
884f9ead18cSJens Axboe 	ret = sock_recvmsg(sock, &msg, flags);
885f9ead18cSJens Axboe 	if (ret < min_ret) {
886b3fdea6eSDylan Yudaken 		if (ret == -EAGAIN && force_nonblock) {
887100d6b17SPavel Begunkov 			if (issue_flags & IO_URING_F_MULTISHOT) {
888b3fdea6eSDylan Yudaken 				io_kbuf_recycle(req, issue_flags);
889b3fdea6eSDylan Yudaken 				return IOU_ISSUE_SKIP_COMPLETE;
890b3fdea6eSDylan Yudaken 			}
891b3fdea6eSDylan Yudaken 
892f9ead18cSJens Axboe 			return -EAGAIN;
893b3fdea6eSDylan Yudaken 		}
894f9ead18cSJens Axboe 		if (ret > 0 && io_net_retry(sock, flags)) {
895f9ead18cSJens Axboe 			sr->len -= ret;
896f9ead18cSJens Axboe 			sr->buf += ret;
897f9ead18cSJens Axboe 			sr->done_io += ret;
898f9ead18cSJens Axboe 			req->flags |= REQ_F_PARTIAL_IO;
899f9ead18cSJens Axboe 			return -EAGAIN;
900f9ead18cSJens Axboe 		}
90195eafc74SPavel Begunkov 		if (ret == -ERESTARTSYS)
90295eafc74SPavel Begunkov 			ret = -EINTR;
903f9ead18cSJens Axboe 		req_set_fail(req);
904f9ead18cSJens Axboe 	} else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
905f9ead18cSJens Axboe out_free:
906f9ead18cSJens Axboe 		req_set_fail(req);
907f9ead18cSJens Axboe 	}
908f9ead18cSJens Axboe 
909d4e097daSDylan Yudaken 	if (ret > 0)
910f9ead18cSJens Axboe 		ret += sr->done_io;
911f9ead18cSJens Axboe 	else if (sr->done_io)
912f9ead18cSJens Axboe 		ret = sr->done_io;
913d4e097daSDylan Yudaken 	else
914d4e097daSDylan Yudaken 		io_kbuf_recycle(req, issue_flags);
915d4e097daSDylan Yudaken 
916f9ead18cSJens Axboe 	cflags = io_put_kbuf(req, issue_flags);
917f9ead18cSJens Axboe 	if (msg.msg_inq)
918f9ead18cSJens Axboe 		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
919b3fdea6eSDylan Yudaken 
920100d6b17SPavel Begunkov 	if (!io_recv_finish(req, &ret, cflags, ret <= 0, issue_flags))
921b3fdea6eSDylan Yudaken 		goto retry_multishot;
922b3fdea6eSDylan Yudaken 
923b3fdea6eSDylan Yudaken 	return ret;
924f9ead18cSJens Axboe }
925f9ead18cSJens Axboe 
926b0e9b551SPavel Begunkov void io_send_zc_cleanup(struct io_kiocb *req)
927b48c312bSPavel Begunkov {
928ac9e5784SPavel Begunkov 	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
929493108d9SPavel Begunkov 	struct io_async_msghdr *io;
930b48c312bSPavel Begunkov 
931493108d9SPavel Begunkov 	if (req_has_async_data(req)) {
932493108d9SPavel Begunkov 		io = req->async_data;
9334c17a496SPavel Begunkov 		/* might be ->fast_iov if *msg_copy_hdr failed */
9344c17a496SPavel Begunkov 		if (io->free_iov != io->fast_iov)
935493108d9SPavel Begunkov 			kfree(io->free_iov);
936493108d9SPavel Begunkov 	}
937a75155faSPavel Begunkov 	if (zc->notif) {
938b48c312bSPavel Begunkov 		io_notif_flush(zc->notif);
939b48c312bSPavel Begunkov 		zc->notif = NULL;
940b48c312bSPavel Begunkov 	}
941a75155faSPavel Begunkov }
942b48c312bSPavel Begunkov 
94340725d1bSPavel Begunkov #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
94440725d1bSPavel Begunkov #define IO_ZC_FLAGS_VALID  (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE)
94540725d1bSPavel Begunkov 
946b0e9b551SPavel Begunkov int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
94706a5464bSPavel Begunkov {
948ac9e5784SPavel Begunkov 	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
94910c7d33eSPavel Begunkov 	struct io_ring_ctx *ctx = req->ctx;
950b48c312bSPavel Begunkov 	struct io_kiocb *notif;
95106a5464bSPavel Begunkov 
952493108d9SPavel Begunkov 	if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
953b48c312bSPavel Begunkov 		return -EINVAL;
954b48c312bSPavel Begunkov 	/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
955b48c312bSPavel Begunkov 	if (req->flags & REQ_F_CQE_SKIP)
95606a5464bSPavel Begunkov 		return -EINVAL;
95706a5464bSPavel Begunkov 
958b48c312bSPavel Begunkov 	notif = zc->notif = io_alloc_notif(ctx);
959b48c312bSPavel Begunkov 	if (!notif)
960b48c312bSPavel Begunkov 		return -ENOMEM;
961b48c312bSPavel Begunkov 	notif->cqe.user_data = req->cqe.user_data;
962b48c312bSPavel Begunkov 	notif->cqe.res = 0;
963b48c312bSPavel Begunkov 	notif->cqe.flags = IORING_CQE_F_NOTIF;
964b48c312bSPavel Begunkov 	req->flags |= REQ_F_NEED_CLEANUP;
96540725d1bSPavel Begunkov 
96640725d1bSPavel Begunkov 	zc->flags = READ_ONCE(sqe->ioprio);
96740725d1bSPavel Begunkov 	if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
96840725d1bSPavel Begunkov 		if (zc->flags & ~IO_ZC_FLAGS_VALID)
96940725d1bSPavel Begunkov 			return -EINVAL;
97040725d1bSPavel Begunkov 		if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
97140725d1bSPavel Begunkov 			io_notif_set_extended(notif);
97240725d1bSPavel Begunkov 			io_notif_to_data(notif)->zc_report = true;
97340725d1bSPavel Begunkov 		}
97440725d1bSPavel Begunkov 	}
97540725d1bSPavel Begunkov 
976e3366e02SPavel Begunkov 	if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
977e3366e02SPavel Begunkov 		unsigned idx = READ_ONCE(sqe->buf_index);
978e3366e02SPavel Begunkov 
979e3366e02SPavel Begunkov 		if (unlikely(idx >= ctx->nr_user_bufs))
980e3366e02SPavel Begunkov 			return -EFAULT;
981e3366e02SPavel Begunkov 		idx = array_index_nospec(idx, ctx->nr_user_bufs);
982e3366e02SPavel Begunkov 		req->imu = READ_ONCE(ctx->user_bufs[idx]);
983e3366e02SPavel Begunkov 		io_req_set_rsrc_node(notif, ctx, 0);
984e3366e02SPavel Begunkov 	}
98506a5464bSPavel Begunkov 
986493108d9SPavel Begunkov 	if (req->opcode == IORING_OP_SEND_ZC) {
987493108d9SPavel Begunkov 		if (READ_ONCE(sqe->__pad3[0]))
988493108d9SPavel Begunkov 			return -EINVAL;
989493108d9SPavel Begunkov 		zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
990493108d9SPavel Begunkov 		zc->addr_len = READ_ONCE(sqe->addr_len);
991493108d9SPavel Begunkov 	} else {
992493108d9SPavel Begunkov 		if (unlikely(sqe->addr2 || sqe->file_index))
993493108d9SPavel Begunkov 			return -EINVAL;
994493108d9SPavel Begunkov 		if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
995493108d9SPavel Begunkov 			return -EINVAL;
996493108d9SPavel Begunkov 	}
997493108d9SPavel Begunkov 
99806a5464bSPavel Begunkov 	zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
99906a5464bSPavel Begunkov 	zc->len = READ_ONCE(sqe->len);
100006a5464bSPavel Begunkov 	zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
100106a5464bSPavel Begunkov 	if (zc->msg_flags & MSG_DONTWAIT)
100206a5464bSPavel Begunkov 		req->flags |= REQ_F_NOWAIT;
1003092aeedbSPavel Begunkov 
10044a933e62SPavel Begunkov 	zc->done_io = 0;
1005092aeedbSPavel Begunkov 
100606a5464bSPavel Begunkov #ifdef CONFIG_COMPAT
100706a5464bSPavel Begunkov 	if (req->ctx->compat)
100806a5464bSPavel Begunkov 		zc->msg_flags |= MSG_CMSG_COMPAT;
100906a5464bSPavel Begunkov #endif
101006a5464bSPavel Begunkov 	return 0;
101106a5464bSPavel Begunkov }
101206a5464bSPavel Begunkov 
1013cd9021e8SPavel Begunkov static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb,
1014cd9021e8SPavel Begunkov 				 struct iov_iter *from, size_t length)
1015cd9021e8SPavel Begunkov {
1016cd9021e8SPavel Begunkov 	skb_zcopy_downgrade_managed(skb);
1017cd9021e8SPavel Begunkov 	return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
1018cd9021e8SPavel Begunkov }
1019cd9021e8SPavel Begunkov 
10203ff1a0d3SPavel Begunkov static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
10213ff1a0d3SPavel Begunkov 			   struct iov_iter *from, size_t length)
10223ff1a0d3SPavel Begunkov {
10233ff1a0d3SPavel Begunkov 	struct skb_shared_info *shinfo = skb_shinfo(skb);
10243ff1a0d3SPavel Begunkov 	int frag = shinfo->nr_frags;
10253ff1a0d3SPavel Begunkov 	int ret = 0;
10263ff1a0d3SPavel Begunkov 	struct bvec_iter bi;
10273ff1a0d3SPavel Begunkov 	ssize_t copied = 0;
10283ff1a0d3SPavel Begunkov 	unsigned long truesize = 0;
10293ff1a0d3SPavel Begunkov 
1030cd9021e8SPavel Begunkov 	if (!frag)
10313ff1a0d3SPavel Begunkov 		shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
1032cd9021e8SPavel Begunkov 	else if (unlikely(!skb_zcopy_managed(skb)))
10333ff1a0d3SPavel Begunkov 		return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
10343ff1a0d3SPavel Begunkov 
10353ff1a0d3SPavel Begunkov 	bi.bi_size = min(from->count, length);
10363ff1a0d3SPavel Begunkov 	bi.bi_bvec_done = from->iov_offset;
10373ff1a0d3SPavel Begunkov 	bi.bi_idx = 0;
10383ff1a0d3SPavel Begunkov 
10393ff1a0d3SPavel Begunkov 	while (bi.bi_size && frag < MAX_SKB_FRAGS) {
10403ff1a0d3SPavel Begunkov 		struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
10413ff1a0d3SPavel Begunkov 
10423ff1a0d3SPavel Begunkov 		copied += v.bv_len;
10433ff1a0d3SPavel Begunkov 		truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
10443ff1a0d3SPavel Begunkov 		__skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
10453ff1a0d3SPavel Begunkov 					   v.bv_offset, v.bv_len);
10463ff1a0d3SPavel Begunkov 		bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
10473ff1a0d3SPavel Begunkov 	}
10483ff1a0d3SPavel Begunkov 	if (bi.bi_size)
10493ff1a0d3SPavel Begunkov 		ret = -EMSGSIZE;
10503ff1a0d3SPavel Begunkov 
10513ff1a0d3SPavel Begunkov 	shinfo->nr_frags = frag;
10523ff1a0d3SPavel Begunkov 	from->bvec += bi.bi_idx;
10533ff1a0d3SPavel Begunkov 	from->nr_segs -= bi.bi_idx;
1054dfb58b17SPavel Begunkov 	from->count -= copied;
10553ff1a0d3SPavel Begunkov 	from->iov_offset = bi.bi_bvec_done;
10563ff1a0d3SPavel Begunkov 
10573ff1a0d3SPavel Begunkov 	skb->data_len += copied;
10583ff1a0d3SPavel Begunkov 	skb->len += copied;
10593ff1a0d3SPavel Begunkov 	skb->truesize += truesize;
10603ff1a0d3SPavel Begunkov 
10613ff1a0d3SPavel Begunkov 	if (sk && sk->sk_type == SOCK_STREAM) {
10623ff1a0d3SPavel Begunkov 		sk_wmem_queued_add(sk, truesize);
10633ff1a0d3SPavel Begunkov 		if (!skb_zcopy_pure(skb))
10643ff1a0d3SPavel Begunkov 			sk_mem_charge(sk, truesize);
10653ff1a0d3SPavel Begunkov 	} else {
10663ff1a0d3SPavel Begunkov 		refcount_add(truesize, &skb->sk->sk_wmem_alloc);
10673ff1a0d3SPavel Begunkov 	}
10683ff1a0d3SPavel Begunkov 	return ret;
10693ff1a0d3SPavel Begunkov }
10703ff1a0d3SPavel Begunkov 
1071b0e9b551SPavel Begunkov int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
107206a5464bSPavel Begunkov {
10736ae61b7aSPavel Begunkov 	struct sockaddr_storage __address;
1074ac9e5784SPavel Begunkov 	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
107506a5464bSPavel Begunkov 	struct msghdr msg;
107606a5464bSPavel Begunkov 	struct iovec iov;
107706a5464bSPavel Begunkov 	struct socket *sock;
10786ae91ac9SPavel Begunkov 	unsigned msg_flags;
107906a5464bSPavel Begunkov 	int ret, min_ret = 0;
108006a5464bSPavel Begunkov 
108106a5464bSPavel Begunkov 	sock = sock_from_file(req->file);
108206a5464bSPavel Begunkov 	if (unlikely(!sock))
108306a5464bSPavel Begunkov 		return -ENOTSOCK;
1084edf81438SPavel Begunkov 	if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1085edf81438SPavel Begunkov 		return -EOPNOTSUPP;
108606a5464bSPavel Begunkov 
108706a5464bSPavel Begunkov 	msg.msg_name = NULL;
108806a5464bSPavel Begunkov 	msg.msg_control = NULL;
108906a5464bSPavel Begunkov 	msg.msg_controllen = 0;
109006a5464bSPavel Begunkov 	msg.msg_namelen = 0;
109106a5464bSPavel Begunkov 
109286dc8f23SPavel Begunkov 	if (zc->addr) {
1093581711c4SPavel Begunkov 		if (req_has_async_data(req)) {
1094581711c4SPavel Begunkov 			struct io_async_msghdr *io = req->async_data;
1095581711c4SPavel Begunkov 
10966ae61b7aSPavel Begunkov 			msg.msg_name = &io->addr;
1097581711c4SPavel Begunkov 		} else {
1098581711c4SPavel Begunkov 			ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address);
109986dc8f23SPavel Begunkov 			if (unlikely(ret < 0))
110086dc8f23SPavel Begunkov 				return ret;
1101581711c4SPavel Begunkov 			msg.msg_name = (struct sockaddr *)&__address;
1102581711c4SPavel Begunkov 		}
110386dc8f23SPavel Begunkov 		msg.msg_namelen = zc->addr_len;
110486dc8f23SPavel Begunkov 	}
110586dc8f23SPavel Begunkov 
11063c840053SPavel Begunkov 	if (!(req->flags & REQ_F_POLLED) &&
11073c840053SPavel Begunkov 	    (zc->flags & IORING_RECVSEND_POLL_FIRST))
11086ae61b7aSPavel Begunkov 		return io_setup_async_addr(req, &__address, issue_flags);
11093c840053SPavel Begunkov 
111010c7d33eSPavel Begunkov 	if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
111110c7d33eSPavel Begunkov 		ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu,
111210c7d33eSPavel Begunkov 					(u64)(uintptr_t)zc->buf, zc->len);
111310c7d33eSPavel Begunkov 		if (unlikely(ret))
111410c7d33eSPavel Begunkov 			return ret;
1115cd9021e8SPavel Begunkov 		msg.sg_from_iter = io_sg_from_iter;
111610c7d33eSPavel Begunkov 	} else {
111742385b02SPavel Begunkov 		io_notif_set_extended(zc->notif);
111810c7d33eSPavel Begunkov 		ret = import_single_range(WRITE, zc->buf, zc->len, &iov,
111910c7d33eSPavel Begunkov 					  &msg.msg_iter);
112006a5464bSPavel Begunkov 		if (unlikely(ret))
112106a5464bSPavel Begunkov 			return ret;
1122b48c312bSPavel Begunkov 		ret = io_notif_account_mem(zc->notif, zc->len);
11232e32ba56SPavel Begunkov 		if (unlikely(ret))
11242e32ba56SPavel Begunkov 			return ret;
1125cd9021e8SPavel Begunkov 		msg.sg_from_iter = io_sg_from_iter_iovec;
112610c7d33eSPavel Begunkov 	}
112706a5464bSPavel Begunkov 
112806a5464bSPavel Begunkov 	msg_flags = zc->msg_flags | MSG_ZEROCOPY;
112906a5464bSPavel Begunkov 	if (issue_flags & IO_URING_F_NONBLOCK)
113006a5464bSPavel Begunkov 		msg_flags |= MSG_DONTWAIT;
113106a5464bSPavel Begunkov 	if (msg_flags & MSG_WAITALL)
113206a5464bSPavel Begunkov 		min_ret = iov_iter_count(&msg.msg_iter);
113306a5464bSPavel Begunkov 
113406a5464bSPavel Begunkov 	msg.msg_flags = msg_flags;
1135b48c312bSPavel Begunkov 	msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
113606a5464bSPavel Begunkov 	ret = sock_sendmsg(sock, &msg);
113706a5464bSPavel Begunkov 
113806a5464bSPavel Begunkov 	if (unlikely(ret < min_ret)) {
113906a5464bSPavel Begunkov 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
11406ae61b7aSPavel Begunkov 			return io_setup_async_addr(req, &__address, issue_flags);
1141581711c4SPavel Begunkov 
11424a933e62SPavel Begunkov 		if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
11434a933e62SPavel Begunkov 			zc->len -= ret;
11444a933e62SPavel Begunkov 			zc->buf += ret;
11454a933e62SPavel Begunkov 			zc->done_io += ret;
11464a933e62SPavel Begunkov 			req->flags |= REQ_F_PARTIAL_IO;
11476ae61b7aSPavel Begunkov 			return io_setup_async_addr(req, &__address, issue_flags);
11484a933e62SPavel Begunkov 		}
11494a933e62SPavel Begunkov 		if (ret == -ERESTARTSYS)
11504a933e62SPavel Begunkov 			ret = -EINTR;
11515a848b7cSPavel Begunkov 		req_set_fail(req);
115206a5464bSPavel Begunkov 	}
115306a5464bSPavel Begunkov 
11544a933e62SPavel Begunkov 	if (ret >= 0)
11554a933e62SPavel Begunkov 		ret += zc->done_io;
11564a933e62SPavel Begunkov 	else if (zc->done_io)
11574a933e62SPavel Begunkov 		ret = zc->done_io;
1158b48c312bSPavel Begunkov 
1159108893ddSPavel Begunkov 	/*
1160108893ddSPavel Begunkov 	 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1161108893ddSPavel Begunkov 	 * flushing notif to io_send_zc_cleanup()
1162108893ddSPavel Begunkov 	 */
1163108893ddSPavel Begunkov 	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1164b48c312bSPavel Begunkov 		io_notif_flush(zc->notif);
1165b48c312bSPavel Begunkov 		req->flags &= ~REQ_F_NEED_CLEANUP;
1166108893ddSPavel Begunkov 	}
11676ae91ac9SPavel Begunkov 	io_req_set_res(req, ret, IORING_CQE_F_MORE);
116806a5464bSPavel Begunkov 	return IOU_OK;
116906a5464bSPavel Begunkov }
117006a5464bSPavel Begunkov 
1171493108d9SPavel Begunkov int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
1172493108d9SPavel Begunkov {
1173493108d9SPavel Begunkov 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1174493108d9SPavel Begunkov 	struct io_async_msghdr iomsg, *kmsg;
1175493108d9SPavel Begunkov 	struct socket *sock;
11766ae91ac9SPavel Begunkov 	unsigned flags;
1177493108d9SPavel Begunkov 	int ret, min_ret = 0;
1178493108d9SPavel Begunkov 
117942385b02SPavel Begunkov 	io_notif_set_extended(sr->notif);
118042385b02SPavel Begunkov 
1181493108d9SPavel Begunkov 	sock = sock_from_file(req->file);
1182493108d9SPavel Begunkov 	if (unlikely(!sock))
1183493108d9SPavel Begunkov 		return -ENOTSOCK;
1184cc767e7cSPavel Begunkov 	if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1185cc767e7cSPavel Begunkov 		return -EOPNOTSUPP;
1186493108d9SPavel Begunkov 
1187493108d9SPavel Begunkov 	if (req_has_async_data(req)) {
1188493108d9SPavel Begunkov 		kmsg = req->async_data;
1189493108d9SPavel Begunkov 	} else {
1190493108d9SPavel Begunkov 		ret = io_sendmsg_copy_hdr(req, &iomsg);
1191493108d9SPavel Begunkov 		if (ret)
1192493108d9SPavel Begunkov 			return ret;
1193493108d9SPavel Begunkov 		kmsg = &iomsg;
1194493108d9SPavel Begunkov 	}
1195493108d9SPavel Begunkov 
1196493108d9SPavel Begunkov 	if (!(req->flags & REQ_F_POLLED) &&
1197493108d9SPavel Begunkov 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
1198493108d9SPavel Begunkov 		return io_setup_async_msg(req, kmsg, issue_flags);
1199493108d9SPavel Begunkov 
1200493108d9SPavel Begunkov 	flags = sr->msg_flags | MSG_ZEROCOPY;
1201493108d9SPavel Begunkov 	if (issue_flags & IO_URING_F_NONBLOCK)
1202493108d9SPavel Begunkov 		flags |= MSG_DONTWAIT;
1203493108d9SPavel Begunkov 	if (flags & MSG_WAITALL)
1204493108d9SPavel Begunkov 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1205493108d9SPavel Begunkov 
1206493108d9SPavel Begunkov 	kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
1207493108d9SPavel Begunkov 	kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1208493108d9SPavel Begunkov 	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
1209493108d9SPavel Begunkov 
1210493108d9SPavel Begunkov 	if (unlikely(ret < min_ret)) {
1211493108d9SPavel Begunkov 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1212493108d9SPavel Begunkov 			return io_setup_async_msg(req, kmsg, issue_flags);
1213493108d9SPavel Begunkov 
1214493108d9SPavel Begunkov 		if (ret > 0 && io_net_retry(sock, flags)) {
1215493108d9SPavel Begunkov 			sr->done_io += ret;
1216493108d9SPavel Begunkov 			req->flags |= REQ_F_PARTIAL_IO;
1217493108d9SPavel Begunkov 			return io_setup_async_msg(req, kmsg, issue_flags);
1218493108d9SPavel Begunkov 		}
1219493108d9SPavel Begunkov 		if (ret == -ERESTARTSYS)
1220493108d9SPavel Begunkov 			ret = -EINTR;
1221493108d9SPavel Begunkov 		req_set_fail(req);
1222493108d9SPavel Begunkov 	}
1223493108d9SPavel Begunkov 	/* fast path, check for non-NULL to avoid function call */
1224108893ddSPavel Begunkov 	if (kmsg->free_iov) {
1225493108d9SPavel Begunkov 		kfree(kmsg->free_iov);
1226108893ddSPavel Begunkov 		kmsg->free_iov = NULL;
1227108893ddSPavel Begunkov 	}
1228493108d9SPavel Begunkov 
1229493108d9SPavel Begunkov 	io_netmsg_recycle(req, issue_flags);
1230493108d9SPavel Begunkov 	if (ret >= 0)
1231493108d9SPavel Begunkov 		ret += sr->done_io;
1232493108d9SPavel Begunkov 	else if (sr->done_io)
1233493108d9SPavel Begunkov 		ret = sr->done_io;
1234493108d9SPavel Begunkov 
1235108893ddSPavel Begunkov 	/*
1236108893ddSPavel Begunkov 	 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1237108893ddSPavel Begunkov 	 * flushing notif to io_send_zc_cleanup()
1238108893ddSPavel Begunkov 	 */
1239108893ddSPavel Begunkov 	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1240493108d9SPavel Begunkov 		io_notif_flush(sr->notif);
1241493108d9SPavel Begunkov 		req->flags &= ~REQ_F_NEED_CLEANUP;
1242108893ddSPavel Begunkov 	}
12436ae91ac9SPavel Begunkov 	io_req_set_res(req, ret, IORING_CQE_F_MORE);
1244493108d9SPavel Begunkov 	return IOU_OK;
1245493108d9SPavel Begunkov }
1246493108d9SPavel Begunkov 
12477e6b638eSPavel Begunkov void io_sendrecv_fail(struct io_kiocb *req)
12487e6b638eSPavel Begunkov {
12497e6b638eSPavel Begunkov 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
12507e6b638eSPavel Begunkov 
12517e6b638eSPavel Begunkov 	if (req->flags & REQ_F_PARTIAL_IO)
12526ae91ac9SPavel Begunkov 		req->cqe.res = sr->done_io;
12536ae91ac9SPavel Begunkov 
1254c4c0009eSPavel Begunkov 	if ((req->flags & REQ_F_NEED_CLEANUP) &&
12556ae91ac9SPavel Begunkov 	    (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
12566ae91ac9SPavel Begunkov 		req->cqe.flags |= IORING_CQE_F_MORE;
12575693bcceSPavel Begunkov }
12585693bcceSPavel Begunkov 
1259f9ead18cSJens Axboe int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1260f9ead18cSJens Axboe {
1261f2ccb5aeSStefan Metzmacher 	struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1262f9ead18cSJens Axboe 	unsigned flags;
1263f9ead18cSJens Axboe 
1264f9ead18cSJens Axboe 	if (sqe->len || sqe->buf_index)
1265f9ead18cSJens Axboe 		return -EINVAL;
1266f9ead18cSJens Axboe 
1267f9ead18cSJens Axboe 	accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1268f9ead18cSJens Axboe 	accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1269f9ead18cSJens Axboe 	accept->flags = READ_ONCE(sqe->accept_flags);
1270f9ead18cSJens Axboe 	accept->nofile = rlimit(RLIMIT_NOFILE);
1271f9ead18cSJens Axboe 	flags = READ_ONCE(sqe->ioprio);
1272f9ead18cSJens Axboe 	if (flags & ~IORING_ACCEPT_MULTISHOT)
1273f9ead18cSJens Axboe 		return -EINVAL;
1274f9ead18cSJens Axboe 
1275f9ead18cSJens Axboe 	accept->file_slot = READ_ONCE(sqe->file_index);
1276f9ead18cSJens Axboe 	if (accept->file_slot) {
1277f9ead18cSJens Axboe 		if (accept->flags & SOCK_CLOEXEC)
1278f9ead18cSJens Axboe 			return -EINVAL;
1279f9ead18cSJens Axboe 		if (flags & IORING_ACCEPT_MULTISHOT &&
1280f9ead18cSJens Axboe 		    accept->file_slot != IORING_FILE_INDEX_ALLOC)
1281f9ead18cSJens Axboe 			return -EINVAL;
1282f9ead18cSJens Axboe 	}
1283f9ead18cSJens Axboe 	if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1284f9ead18cSJens Axboe 		return -EINVAL;
1285f9ead18cSJens Axboe 	if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1286f9ead18cSJens Axboe 		accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1287f9ead18cSJens Axboe 	if (flags & IORING_ACCEPT_MULTISHOT)
1288f9ead18cSJens Axboe 		req->flags |= REQ_F_APOLL_MULTISHOT;
1289f9ead18cSJens Axboe 	return 0;
1290f9ead18cSJens Axboe }
1291f9ead18cSJens Axboe 
1292f9ead18cSJens Axboe int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1293f9ead18cSJens Axboe {
1294f9ead18cSJens Axboe 	struct io_ring_ctx *ctx = req->ctx;
1295f2ccb5aeSStefan Metzmacher 	struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1296f9ead18cSJens Axboe 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1297f9ead18cSJens Axboe 	unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
1298f9ead18cSJens Axboe 	bool fixed = !!accept->file_slot;
1299f9ead18cSJens Axboe 	struct file *file;
1300f9ead18cSJens Axboe 	int ret, fd;
1301f9ead18cSJens Axboe 
1302*17add5ceSPavel Begunkov 	if (!io_check_multishot(req, issue_flags))
1303*17add5ceSPavel Begunkov 		return -EAGAIN;
1304f9ead18cSJens Axboe retry:
1305f9ead18cSJens Axboe 	if (!fixed) {
1306f9ead18cSJens Axboe 		fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1307f9ead18cSJens Axboe 		if (unlikely(fd < 0))
1308f9ead18cSJens Axboe 			return fd;
1309f9ead18cSJens Axboe 	}
1310f9ead18cSJens Axboe 	file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
1311f9ead18cSJens Axboe 			 accept->flags);
1312f9ead18cSJens Axboe 	if (IS_ERR(file)) {
1313f9ead18cSJens Axboe 		if (!fixed)
1314f9ead18cSJens Axboe 			put_unused_fd(fd);
1315f9ead18cSJens Axboe 		ret = PTR_ERR(file);
1316f9ead18cSJens Axboe 		if (ret == -EAGAIN && force_nonblock) {
1317f9ead18cSJens Axboe 			/*
1318f9ead18cSJens Axboe 			 * if it's multishot and polled, we don't need to
1319f9ead18cSJens Axboe 			 * return EAGAIN to arm the poll infra since it
1320f9ead18cSJens Axboe 			 * has already been done
1321f9ead18cSJens Axboe 			 */
132291482864SPavel Begunkov 			if (issue_flags & IO_URING_F_MULTISHOT)
1323f9ead18cSJens Axboe 				ret = IOU_ISSUE_SKIP_COMPLETE;
1324f9ead18cSJens Axboe 			return ret;
1325f9ead18cSJens Axboe 		}
1326f9ead18cSJens Axboe 		if (ret == -ERESTARTSYS)
1327f9ead18cSJens Axboe 			ret = -EINTR;
1328f9ead18cSJens Axboe 		req_set_fail(req);
1329f9ead18cSJens Axboe 	} else if (!fixed) {
1330f9ead18cSJens Axboe 		fd_install(fd, file);
1331f9ead18cSJens Axboe 		ret = fd;
1332f9ead18cSJens Axboe 	} else {
1333f9ead18cSJens Axboe 		ret = io_fixed_fd_install(req, issue_flags, file,
1334f9ead18cSJens Axboe 						accept->file_slot);
1335f9ead18cSJens Axboe 	}
1336f9ead18cSJens Axboe 
1337f9ead18cSJens Axboe 	if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
1338f9ead18cSJens Axboe 		io_req_set_res(req, ret, 0);
1339f9ead18cSJens Axboe 		return IOU_OK;
1340f9ead18cSJens Axboe 	}
1341f9ead18cSJens Axboe 
1342515e2696SDylan Yudaken 	if (ret < 0)
1343515e2696SDylan Yudaken 		return ret;
13449b8c5475SDylan Yudaken 	if (io_aux_cqe(ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
13459b8c5475SDylan Yudaken 		       req->cqe.user_data, ret, IORING_CQE_F_MORE, true))
1346d245bca6SPavel Begunkov 		goto retry;
1347cbd25748SDylan Yudaken 
1348515e2696SDylan Yudaken 	return -ECANCELED;
1349f9ead18cSJens Axboe }
1350f9ead18cSJens Axboe 
1351f9ead18cSJens Axboe int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1352f9ead18cSJens Axboe {
1353f2ccb5aeSStefan Metzmacher 	struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1354f9ead18cSJens Axboe 
1355f9ead18cSJens Axboe 	if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1356f9ead18cSJens Axboe 		return -EINVAL;
1357f9ead18cSJens Axboe 
1358f9ead18cSJens Axboe 	sock->domain = READ_ONCE(sqe->fd);
1359f9ead18cSJens Axboe 	sock->type = READ_ONCE(sqe->off);
1360f9ead18cSJens Axboe 	sock->protocol = READ_ONCE(sqe->len);
1361f9ead18cSJens Axboe 	sock->file_slot = READ_ONCE(sqe->file_index);
1362f9ead18cSJens Axboe 	sock->nofile = rlimit(RLIMIT_NOFILE);
1363f9ead18cSJens Axboe 
1364f9ead18cSJens Axboe 	sock->flags = sock->type & ~SOCK_TYPE_MASK;
1365f9ead18cSJens Axboe 	if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1366f9ead18cSJens Axboe 		return -EINVAL;
1367f9ead18cSJens Axboe 	if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1368f9ead18cSJens Axboe 		return -EINVAL;
1369f9ead18cSJens Axboe 	return 0;
1370f9ead18cSJens Axboe }
1371f9ead18cSJens Axboe 
1372f9ead18cSJens Axboe int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1373f9ead18cSJens Axboe {
1374f2ccb5aeSStefan Metzmacher 	struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1375f9ead18cSJens Axboe 	bool fixed = !!sock->file_slot;
1376f9ead18cSJens Axboe 	struct file *file;
1377f9ead18cSJens Axboe 	int ret, fd;
1378f9ead18cSJens Axboe 
1379f9ead18cSJens Axboe 	if (!fixed) {
1380f9ead18cSJens Axboe 		fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1381f9ead18cSJens Axboe 		if (unlikely(fd < 0))
1382f9ead18cSJens Axboe 			return fd;
1383f9ead18cSJens Axboe 	}
1384f9ead18cSJens Axboe 	file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1385f9ead18cSJens Axboe 	if (IS_ERR(file)) {
1386f9ead18cSJens Axboe 		if (!fixed)
1387f9ead18cSJens Axboe 			put_unused_fd(fd);
1388f9ead18cSJens Axboe 		ret = PTR_ERR(file);
1389f9ead18cSJens Axboe 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1390f9ead18cSJens Axboe 			return -EAGAIN;
1391f9ead18cSJens Axboe 		if (ret == -ERESTARTSYS)
1392f9ead18cSJens Axboe 			ret = -EINTR;
1393f9ead18cSJens Axboe 		req_set_fail(req);
1394f9ead18cSJens Axboe 	} else if (!fixed) {
1395f9ead18cSJens Axboe 		fd_install(fd, file);
1396f9ead18cSJens Axboe 		ret = fd;
1397f9ead18cSJens Axboe 	} else {
1398f9ead18cSJens Axboe 		ret = io_fixed_fd_install(req, issue_flags, file,
1399f9ead18cSJens Axboe 					    sock->file_slot);
1400f9ead18cSJens Axboe 	}
1401f9ead18cSJens Axboe 	io_req_set_res(req, ret, 0);
1402f9ead18cSJens Axboe 	return IOU_OK;
1403f9ead18cSJens Axboe }
1404f9ead18cSJens Axboe 
1405f9ead18cSJens Axboe int io_connect_prep_async(struct io_kiocb *req)
1406f9ead18cSJens Axboe {
1407f9ead18cSJens Axboe 	struct io_async_connect *io = req->async_data;
1408f2ccb5aeSStefan Metzmacher 	struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1409f9ead18cSJens Axboe 
1410f9ead18cSJens Axboe 	return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
1411f9ead18cSJens Axboe }
1412f9ead18cSJens Axboe 
1413f9ead18cSJens Axboe int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1414f9ead18cSJens Axboe {
1415f2ccb5aeSStefan Metzmacher 	struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1416f9ead18cSJens Axboe 
1417f9ead18cSJens Axboe 	if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1418f9ead18cSJens Axboe 		return -EINVAL;
1419f9ead18cSJens Axboe 
1420f9ead18cSJens Axboe 	conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1421f9ead18cSJens Axboe 	conn->addr_len =  READ_ONCE(sqe->addr2);
14223fb1bd68SJens Axboe 	conn->in_progress = false;
1423f9ead18cSJens Axboe 	return 0;
1424f9ead18cSJens Axboe }
1425f9ead18cSJens Axboe 
1426f9ead18cSJens Axboe int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1427f9ead18cSJens Axboe {
1428f2ccb5aeSStefan Metzmacher 	struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1429f9ead18cSJens Axboe 	struct io_async_connect __io, *io;
1430f9ead18cSJens Axboe 	unsigned file_flags;
1431f9ead18cSJens Axboe 	int ret;
1432f9ead18cSJens Axboe 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1433f9ead18cSJens Axboe 
14343fb1bd68SJens Axboe 	if (connect->in_progress) {
14353fb1bd68SJens Axboe 		struct socket *socket;
14363fb1bd68SJens Axboe 
14373fb1bd68SJens Axboe 		ret = -ENOTSOCK;
14383fb1bd68SJens Axboe 		socket = sock_from_file(req->file);
14393fb1bd68SJens Axboe 		if (socket)
14403fb1bd68SJens Axboe 			ret = sock_error(socket->sk);
14413fb1bd68SJens Axboe 		goto out;
14423fb1bd68SJens Axboe 	}
14433fb1bd68SJens Axboe 
1444f9ead18cSJens Axboe 	if (req_has_async_data(req)) {
1445f9ead18cSJens Axboe 		io = req->async_data;
1446f9ead18cSJens Axboe 	} else {
1447f9ead18cSJens Axboe 		ret = move_addr_to_kernel(connect->addr,
1448f9ead18cSJens Axboe 						connect->addr_len,
1449f9ead18cSJens Axboe 						&__io.address);
1450f9ead18cSJens Axboe 		if (ret)
1451f9ead18cSJens Axboe 			goto out;
1452f9ead18cSJens Axboe 		io = &__io;
1453f9ead18cSJens Axboe 	}
1454f9ead18cSJens Axboe 
1455f9ead18cSJens Axboe 	file_flags = force_nonblock ? O_NONBLOCK : 0;
1456f9ead18cSJens Axboe 
1457f9ead18cSJens Axboe 	ret = __sys_connect_file(req->file, &io->address,
1458f9ead18cSJens Axboe 					connect->addr_len, file_flags);
1459f9ead18cSJens Axboe 	if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
14603fb1bd68SJens Axboe 		if (ret == -EINPROGRESS) {
14613fb1bd68SJens Axboe 			connect->in_progress = true;
14623fb1bd68SJens Axboe 		} else {
1463f9ead18cSJens Axboe 			if (req_has_async_data(req))
1464f9ead18cSJens Axboe 				return -EAGAIN;
1465f9ead18cSJens Axboe 			if (io_alloc_async_data(req)) {
1466f9ead18cSJens Axboe 				ret = -ENOMEM;
1467f9ead18cSJens Axboe 				goto out;
1468f9ead18cSJens Axboe 			}
1469f9ead18cSJens Axboe 			memcpy(req->async_data, &__io, sizeof(__io));
14703fb1bd68SJens Axboe 		}
1471f9ead18cSJens Axboe 		return -EAGAIN;
1472f9ead18cSJens Axboe 	}
1473f9ead18cSJens Axboe 	if (ret == -ERESTARTSYS)
1474f9ead18cSJens Axboe 		ret = -EINTR;
1475f9ead18cSJens Axboe out:
1476f9ead18cSJens Axboe 	if (ret < 0)
1477f9ead18cSJens Axboe 		req_set_fail(req);
1478f9ead18cSJens Axboe 	io_req_set_res(req, ret, 0);
1479f9ead18cSJens Axboe 	return IOU_OK;
1480f9ead18cSJens Axboe }
148143e0bbbdSJens Axboe 
148243e0bbbdSJens Axboe void io_netmsg_cache_free(struct io_cache_entry *entry)
148343e0bbbdSJens Axboe {
148443e0bbbdSJens Axboe 	kfree(container_of(entry, struct io_async_msghdr, cache));
148543e0bbbdSJens Axboe }
1486f9ead18cSJens Axboe #endif
1487