xref: /openbmc/linux/io_uring/net.c (revision 96b7b093)
1f9ead18cSJens Axboe // SPDX-License-Identifier: GPL-2.0
2f9ead18cSJens Axboe #include <linux/kernel.h>
3f9ead18cSJens Axboe #include <linux/errno.h>
4f9ead18cSJens Axboe #include <linux/file.h>
5f9ead18cSJens Axboe #include <linux/slab.h>
6f9ead18cSJens Axboe #include <linux/net.h>
7f9ead18cSJens Axboe #include <linux/compat.h>
8f9ead18cSJens Axboe #include <net/compat.h>
9f9ead18cSJens Axboe #include <linux/io_uring.h>
10f9ead18cSJens Axboe 
11f9ead18cSJens Axboe #include <uapi/linux/io_uring.h>
12f9ead18cSJens Axboe 
13f9ead18cSJens Axboe #include "io_uring.h"
143b77495aSJens Axboe #include "kbuf.h"
1543e0bbbdSJens Axboe #include "alloc_cache.h"
16f9ead18cSJens Axboe #include "net.h"
1706a5464bSPavel Begunkov #include "notif.h"
1810c7d33eSPavel Begunkov #include "rsrc.h"
19f9ead18cSJens Axboe 
20f9ead18cSJens Axboe #if defined(CONFIG_NET)
21f9ead18cSJens Axboe struct io_shutdown {
22f9ead18cSJens Axboe 	struct file			*file;
23f9ead18cSJens Axboe 	int				how;
24f9ead18cSJens Axboe };
25f9ead18cSJens Axboe 
26f9ead18cSJens Axboe struct io_accept {
27f9ead18cSJens Axboe 	struct file			*file;
28f9ead18cSJens Axboe 	struct sockaddr __user		*addr;
29f9ead18cSJens Axboe 	int __user			*addr_len;
30f9ead18cSJens Axboe 	int				flags;
31f9ead18cSJens Axboe 	u32				file_slot;
32f9ead18cSJens Axboe 	unsigned long			nofile;
33f9ead18cSJens Axboe };
34f9ead18cSJens Axboe 
35f9ead18cSJens Axboe struct io_socket {
36f9ead18cSJens Axboe 	struct file			*file;
37f9ead18cSJens Axboe 	int				domain;
38f9ead18cSJens Axboe 	int				type;
39f9ead18cSJens Axboe 	int				protocol;
40f9ead18cSJens Axboe 	int				flags;
41f9ead18cSJens Axboe 	u32				file_slot;
42f9ead18cSJens Axboe 	unsigned long			nofile;
43f9ead18cSJens Axboe };
44f9ead18cSJens Axboe 
45f9ead18cSJens Axboe struct io_connect {
46f9ead18cSJens Axboe 	struct file			*file;
47f9ead18cSJens Axboe 	struct sockaddr __user		*addr;
48f9ead18cSJens Axboe 	int				addr_len;
493fb1bd68SJens Axboe 	bool				in_progress;
5074e2e17eSJens Axboe 	bool				seen_econnaborted;
51f9ead18cSJens Axboe };
52f9ead18cSJens Axboe 
53f9ead18cSJens Axboe struct io_sr_msg {
54f9ead18cSJens Axboe 	struct file			*file;
55f9ead18cSJens Axboe 	union {
56f9ead18cSJens Axboe 		struct compat_msghdr __user	*umsg_compat;
57f9ead18cSJens Axboe 		struct user_msghdr __user	*umsg;
58f9ead18cSJens Axboe 		void __user			*buf;
59f9ead18cSJens Axboe 	};
600b048557SPavel Begunkov 	unsigned			len;
610b048557SPavel Begunkov 	unsigned			done_io;
62293402e5SPavel Begunkov 	unsigned			msg_flags;
6322ccf61cSJens Axboe 	unsigned			nr_multishot_loops;
640b048557SPavel Begunkov 	u16				flags;
65516e82f0SPavel Begunkov 	/* initialised and used only by !msg send variants */
660b048557SPavel Begunkov 	u16				addr_len;
67b00c51efSJens Axboe 	u16				buf_group;
68092aeedbSPavel Begunkov 	void __user			*addr;
69cac9e441SJens Axboe 	void __user			*msg_control;
70516e82f0SPavel Begunkov 	/* used only for send zerocopy */
71b48c312bSPavel Begunkov 	struct io_kiocb 		*notif;
7206a5464bSPavel Begunkov };
7306a5464bSPavel Begunkov 
7422ccf61cSJens Axboe /*
7522ccf61cSJens Axboe  * Number of times we'll try and do receives if there's more data. If we
7622ccf61cSJens Axboe  * exceed this limit, then add us to the back of the queue and retry from
7722ccf61cSJens Axboe  * there. This helps fairness between flooding clients.
7822ccf61cSJens Axboe  */
7922ccf61cSJens Axboe #define MULTISHOT_MAX_RETRY	32
8022ccf61cSJens Axboe 
io_check_multishot(struct io_kiocb * req,unsigned int issue_flags)8117add5ceSPavel Begunkov static inline bool io_check_multishot(struct io_kiocb *req,
8217add5ceSPavel Begunkov 				      unsigned int issue_flags)
8317add5ceSPavel Begunkov {
8417add5ceSPavel Begunkov 	/*
8517add5ceSPavel Begunkov 	 * When ->locked_cq is set we only allow to post CQEs from the original
8617add5ceSPavel Begunkov 	 * task context. Usual request completions will be handled in other
8717add5ceSPavel Begunkov 	 * generic paths but multipoll may decide to post extra cqes.
8817add5ceSPavel Begunkov 	 */
8917add5ceSPavel Begunkov 	return !(issue_flags & IO_URING_F_IOWQ) ||
900f314c3bSPavel Begunkov 		!(req->flags & REQ_F_APOLL_MULTISHOT) ||
9117add5ceSPavel Begunkov 		!req->ctx->task_complete;
9217add5ceSPavel Begunkov }
9317add5ceSPavel Begunkov 
io_shutdown_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)94f9ead18cSJens Axboe int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
95f9ead18cSJens Axboe {
96f2ccb5aeSStefan Metzmacher 	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
97f9ead18cSJens Axboe 
98f9ead18cSJens Axboe 	if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
99f9ead18cSJens Axboe 		     sqe->buf_index || sqe->splice_fd_in))
100f9ead18cSJens Axboe 		return -EINVAL;
101f9ead18cSJens Axboe 
102f9ead18cSJens Axboe 	shutdown->how = READ_ONCE(sqe->len);
103aebb224fSDylan Yudaken 	req->flags |= REQ_F_FORCE_ASYNC;
104f9ead18cSJens Axboe 	return 0;
105f9ead18cSJens Axboe }
106f9ead18cSJens Axboe 
io_shutdown(struct io_kiocb * req,unsigned int issue_flags)107f9ead18cSJens Axboe int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
108f9ead18cSJens Axboe {
109f2ccb5aeSStefan Metzmacher 	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
110f9ead18cSJens Axboe 	struct socket *sock;
111f9ead18cSJens Axboe 	int ret;
112f9ead18cSJens Axboe 
113aebb224fSDylan Yudaken 	WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
114f9ead18cSJens Axboe 
115f9ead18cSJens Axboe 	sock = sock_from_file(req->file);
116f9ead18cSJens Axboe 	if (unlikely(!sock))
117f9ead18cSJens Axboe 		return -ENOTSOCK;
118f9ead18cSJens Axboe 
119f9ead18cSJens Axboe 	ret = __sys_shutdown_sock(sock, shutdown->how);
120f9ead18cSJens Axboe 	io_req_set_res(req, ret, 0);
121f9ead18cSJens Axboe 	return IOU_OK;
122f9ead18cSJens Axboe }
123f9ead18cSJens Axboe 
io_net_retry(struct socket * sock,int flags)124f9ead18cSJens Axboe static bool io_net_retry(struct socket *sock, int flags)
125f9ead18cSJens Axboe {
126f9ead18cSJens Axboe 	if (!(flags & MSG_WAITALL))
127f9ead18cSJens Axboe 		return false;
128f9ead18cSJens Axboe 	return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
129f9ead18cSJens Axboe }
130f9ead18cSJens Axboe 
io_netmsg_recycle(struct io_kiocb * req,unsigned int issue_flags)13143e0bbbdSJens Axboe static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
13243e0bbbdSJens Axboe {
13343e0bbbdSJens Axboe 	struct io_async_msghdr *hdr = req->async_data;
13443e0bbbdSJens Axboe 
13506360426SPavel Begunkov 	if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED)
13643e0bbbdSJens Axboe 		return;
13743e0bbbdSJens Axboe 
13843e0bbbdSJens Axboe 	/* Let normal cleanup path reap it if we fail adding to the cache */
13943e0bbbdSJens Axboe 	if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) {
14043e0bbbdSJens Axboe 		req->async_data = NULL;
14143e0bbbdSJens Axboe 		req->flags &= ~REQ_F_ASYNC_DATA;
14243e0bbbdSJens Axboe 	}
14343e0bbbdSJens Axboe }
14443e0bbbdSJens Axboe 
io_msg_alloc_async(struct io_kiocb * req,unsigned int issue_flags)145858c293eSPavel Begunkov static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req,
14643e0bbbdSJens Axboe 						  unsigned int issue_flags)
14743e0bbbdSJens Axboe {
14843e0bbbdSJens Axboe 	struct io_ring_ctx *ctx = req->ctx;
14943e0bbbdSJens Axboe 	struct io_cache_entry *entry;
1504c17a496SPavel Begunkov 	struct io_async_msghdr *hdr;
15143e0bbbdSJens Axboe 
152df730ec2SXinghui Li 	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
153df730ec2SXinghui Li 		entry = io_alloc_cache_get(&ctx->netmsg_cache);
154df730ec2SXinghui Li 		if (entry) {
15543e0bbbdSJens Axboe 			hdr = container_of(entry, struct io_async_msghdr, cache);
1564c17a496SPavel Begunkov 			hdr->free_iov = NULL;
15743e0bbbdSJens Axboe 			req->flags |= REQ_F_ASYNC_DATA;
15843e0bbbdSJens Axboe 			req->async_data = hdr;
15943e0bbbdSJens Axboe 			return hdr;
16043e0bbbdSJens Axboe 		}
161df730ec2SXinghui Li 	}
16243e0bbbdSJens Axboe 
1634c17a496SPavel Begunkov 	if (!io_alloc_async_data(req)) {
1644c17a496SPavel Begunkov 		hdr = req->async_data;
1654c17a496SPavel Begunkov 		hdr->free_iov = NULL;
1664c17a496SPavel Begunkov 		return hdr;
1674c17a496SPavel Begunkov 	}
16843e0bbbdSJens Axboe 	return NULL;
16943e0bbbdSJens Axboe }
17043e0bbbdSJens Axboe 
io_msg_alloc_async_prep(struct io_kiocb * req)171858c293eSPavel Begunkov static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req)
172858c293eSPavel Begunkov {
173858c293eSPavel Begunkov 	/* ->prep_async is always called from the submission context */
174858c293eSPavel Begunkov 	return io_msg_alloc_async(req, 0);
175858c293eSPavel Begunkov }
176858c293eSPavel Begunkov 
io_setup_async_msg(struct io_kiocb * req,struct io_async_msghdr * kmsg,unsigned int issue_flags)177f9ead18cSJens Axboe static int io_setup_async_msg(struct io_kiocb *req,
17843e0bbbdSJens Axboe 			      struct io_async_msghdr *kmsg,
17943e0bbbdSJens Axboe 			      unsigned int issue_flags)
180f9ead18cSJens Axboe {
1813f743e9bSPavel Begunkov 	struct io_async_msghdr *async_msg;
182f9ead18cSJens Axboe 
1833f743e9bSPavel Begunkov 	if (req_has_async_data(req))
184f9ead18cSJens Axboe 		return -EAGAIN;
185858c293eSPavel Begunkov 	async_msg = io_msg_alloc_async(req, issue_flags);
18643e0bbbdSJens Axboe 	if (!async_msg) {
187f9ead18cSJens Axboe 		kfree(kmsg->free_iov);
188f9ead18cSJens Axboe 		return -ENOMEM;
189f9ead18cSJens Axboe 	}
190f9ead18cSJens Axboe 	req->flags |= REQ_F_NEED_CLEANUP;
191f9ead18cSJens Axboe 	memcpy(async_msg, kmsg, sizeof(*kmsg));
1926f10ae8aSPavel Begunkov 	if (async_msg->msg.msg_name)
193f9ead18cSJens Axboe 		async_msg->msg.msg_name = &async_msg->addr;
194c21a8027SPavel Begunkov 
195c21a8027SPavel Begunkov 	if ((req->flags & REQ_F_BUFFER_SELECT) && !async_msg->msg.msg_iter.nr_segs)
196c21a8027SPavel Begunkov 		return -EAGAIN;
197c21a8027SPavel Begunkov 
198f9ead18cSJens Axboe 	/* if were using fast_iov, set it to the new one */
1994b61152eSJens Axboe 	if (iter_is_iovec(&kmsg->msg.msg_iter) && !kmsg->free_iov) {
200de4f5fedSJens Axboe 		size_t fast_idx = iter_iov(&kmsg->msg.msg_iter) - kmsg->fast_iov;
201de4f5fedSJens Axboe 		async_msg->msg.msg_iter.__iov = &async_msg->fast_iov[fast_idx];
2023e4cb6ebSStefan Metzmacher 	}
203f9ead18cSJens Axboe 
204f9ead18cSJens Axboe 	return -EAGAIN;
205f9ead18cSJens Axboe }
206f9ead18cSJens Axboe 
2071a033109SJens Axboe #ifdef CONFIG_COMPAT
io_compat_msg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg,struct compat_msghdr * msg,int ddir)20851d28472SJens Axboe static int io_compat_msg_copy_hdr(struct io_kiocb *req,
2091a033109SJens Axboe 				  struct io_async_msghdr *iomsg,
21051d28472SJens Axboe 				  struct compat_msghdr *msg, int ddir)
2111a033109SJens Axboe {
2121a033109SJens Axboe 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
2131a033109SJens Axboe 	struct compat_iovec __user *uiov;
2141a033109SJens Axboe 	int ret;
2151a033109SJens Axboe 
21651d28472SJens Axboe 	if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
2171a033109SJens Axboe 		return -EFAULT;
2181a033109SJens Axboe 
21951d28472SJens Axboe 	uiov = compat_ptr(msg->msg_iov);
2201a033109SJens Axboe 	if (req->flags & REQ_F_BUFFER_SELECT) {
2211a033109SJens Axboe 		compat_ssize_t clen;
2221a033109SJens Axboe 
2231a033109SJens Axboe 		iomsg->free_iov = NULL;
22451d28472SJens Axboe 		if (msg->msg_iovlen == 0) {
2251a033109SJens Axboe 			sr->len = 0;
22651d28472SJens Axboe 		} else if (msg->msg_iovlen > 1) {
2271a033109SJens Axboe 			return -EINVAL;
2281a033109SJens Axboe 		} else {
2291a033109SJens Axboe 			if (!access_ok(uiov, sizeof(*uiov)))
2301a033109SJens Axboe 				return -EFAULT;
2311a033109SJens Axboe 			if (__get_user(clen, &uiov->iov_len))
2321a033109SJens Axboe 				return -EFAULT;
2331a033109SJens Axboe 			if (clen < 0)
2341a033109SJens Axboe 				return -EINVAL;
2351a033109SJens Axboe 			sr->len = clen;
2361a033109SJens Axboe 		}
2371a033109SJens Axboe 
2381a033109SJens Axboe 		return 0;
2391a033109SJens Axboe 	}
2401a033109SJens Axboe 
2411a033109SJens Axboe 	iomsg->free_iov = iomsg->fast_iov;
24251d28472SJens Axboe 	ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen,
2431a033109SJens Axboe 				UIO_FASTIOV, &iomsg->free_iov,
2441a033109SJens Axboe 				&iomsg->msg.msg_iter, true);
2451a033109SJens Axboe 	if (unlikely(ret < 0))
2461a033109SJens Axboe 		return ret;
2471a033109SJens Axboe 
2481a033109SJens Axboe 	return 0;
2491a033109SJens Axboe }
2501a033109SJens Axboe #endif
2511a033109SJens Axboe 
io_msg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg,struct user_msghdr * msg,int ddir)25251d28472SJens Axboe static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
25351d28472SJens Axboe 			   struct user_msghdr *msg, int ddir)
2541a033109SJens Axboe {
2551a033109SJens Axboe 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
2561a033109SJens Axboe 	int ret;
2571a033109SJens Axboe 
25851d28472SJens Axboe 	if (copy_from_user(msg, sr->umsg, sizeof(*sr->umsg)))
2591a033109SJens Axboe 		return -EFAULT;
2601a033109SJens Axboe 
2611a033109SJens Axboe 	if (req->flags & REQ_F_BUFFER_SELECT) {
26251d28472SJens Axboe 		if (msg->msg_iovlen == 0) {
2631a033109SJens Axboe 			sr->len = iomsg->fast_iov[0].iov_len = 0;
2641a033109SJens Axboe 			iomsg->fast_iov[0].iov_base = NULL;
2651a033109SJens Axboe 			iomsg->free_iov = NULL;
26651d28472SJens Axboe 		} else if (msg->msg_iovlen > 1) {
2671a033109SJens Axboe 			return -EINVAL;
2681a033109SJens Axboe 		} else {
26951d28472SJens Axboe 			if (copy_from_user(iomsg->fast_iov, msg->msg_iov,
27051d28472SJens Axboe 					   sizeof(*msg->msg_iov)))
2711a033109SJens Axboe 				return -EFAULT;
2721a033109SJens Axboe 			sr->len = iomsg->fast_iov[0].iov_len;
2731a033109SJens Axboe 			iomsg->free_iov = NULL;
2741a033109SJens Axboe 		}
2751a033109SJens Axboe 
2761a033109SJens Axboe 		return 0;
2771a033109SJens Axboe 	}
2781a033109SJens Axboe 
2791a033109SJens Axboe 	iomsg->free_iov = iomsg->fast_iov;
28051d28472SJens Axboe 	ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, UIO_FASTIOV,
2811a033109SJens Axboe 				&iomsg->free_iov, &iomsg->msg.msg_iter, false);
2821a033109SJens Axboe 	if (unlikely(ret < 0))
2831a033109SJens Axboe 		return ret;
2841a033109SJens Axboe 
2851a033109SJens Axboe 	return 0;
2861a033109SJens Axboe }
2871a033109SJens Axboe 
io_sendmsg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg)288f9ead18cSJens Axboe static int io_sendmsg_copy_hdr(struct io_kiocb *req,
289f9ead18cSJens Axboe 			       struct io_async_msghdr *iomsg)
290f9ead18cSJens Axboe {
291f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
29251d28472SJens Axboe 	struct user_msghdr msg;
293cac9e441SJens Axboe 	int ret;
294f9ead18cSJens Axboe 
29551d28472SJens Axboe 	iomsg->msg.msg_name = &iomsg->addr;
29651d28472SJens Axboe 	iomsg->msg.msg_iter.nr_segs = 0;
29751d28472SJens Axboe 
29851d28472SJens Axboe #ifdef CONFIG_COMPAT
29951d28472SJens Axboe 	if (unlikely(req->ctx->compat)) {
30051d28472SJens Axboe 		struct compat_msghdr cmsg;
30151d28472SJens Axboe 
30251d28472SJens Axboe 		ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE);
30351d28472SJens Axboe 		if (unlikely(ret))
3041a033109SJens Axboe 			return ret;
3051a033109SJens Axboe 
30651d28472SJens Axboe 		return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL);
30751d28472SJens Axboe 	}
30851d28472SJens Axboe #endif
30951d28472SJens Axboe 
31051d28472SJens Axboe 	ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE);
31151d28472SJens Axboe 	if (unlikely(ret))
31251d28472SJens Axboe 		return ret;
31351d28472SJens Axboe 
31451d28472SJens Axboe 	ret = __copy_msghdr(&iomsg->msg, &msg, NULL);
31551d28472SJens Axboe 
316cac9e441SJens Axboe 	/* save msg_control as sys_sendmsg() overwrites it */
31726fed836SJens Axboe 	sr->msg_control = iomsg->msg.msg_control_user;
318cac9e441SJens Axboe 	return ret;
319f9ead18cSJens Axboe }
320f9ead18cSJens Axboe 
io_send_prep_async(struct io_kiocb * req)321516e82f0SPavel Begunkov int io_send_prep_async(struct io_kiocb *req)
322581711c4SPavel Begunkov {
323ac9e5784SPavel Begunkov 	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
324581711c4SPavel Begunkov 	struct io_async_msghdr *io;
325581711c4SPavel Begunkov 	int ret;
326581711c4SPavel Begunkov 
327581711c4SPavel Begunkov 	if (!zc->addr || req_has_async_data(req))
328581711c4SPavel Begunkov 		return 0;
3296bf8ad25SPavel Begunkov 	io = io_msg_alloc_async_prep(req);
3306bf8ad25SPavel Begunkov 	if (!io)
331581711c4SPavel Begunkov 		return -ENOMEM;
332581711c4SPavel Begunkov 	ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr);
333581711c4SPavel Begunkov 	return ret;
334581711c4SPavel Begunkov }
335581711c4SPavel Begunkov 
io_setup_async_addr(struct io_kiocb * req,struct sockaddr_storage * addr_storage,unsigned int issue_flags)336581711c4SPavel Begunkov static int io_setup_async_addr(struct io_kiocb *req,
3376ae61b7aSPavel Begunkov 			      struct sockaddr_storage *addr_storage,
338581711c4SPavel Begunkov 			      unsigned int issue_flags)
339581711c4SPavel Begunkov {
3406ae61b7aSPavel Begunkov 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
341581711c4SPavel Begunkov 	struct io_async_msghdr *io;
342581711c4SPavel Begunkov 
3436ae61b7aSPavel Begunkov 	if (!sr->addr || req_has_async_data(req))
344581711c4SPavel Begunkov 		return -EAGAIN;
3456bf8ad25SPavel Begunkov 	io = io_msg_alloc_async(req, issue_flags);
3466bf8ad25SPavel Begunkov 	if (!io)
347581711c4SPavel Begunkov 		return -ENOMEM;
3486ae61b7aSPavel Begunkov 	memcpy(&io->addr, addr_storage, sizeof(io->addr));
349581711c4SPavel Begunkov 	return -EAGAIN;
350581711c4SPavel Begunkov }
351581711c4SPavel Begunkov 
io_sendmsg_prep_async(struct io_kiocb * req)352f9ead18cSJens Axboe int io_sendmsg_prep_async(struct io_kiocb *req)
353f9ead18cSJens Axboe {
354f9ead18cSJens Axboe 	int ret;
355f9ead18cSJens Axboe 
356858c293eSPavel Begunkov 	if (!io_msg_alloc_async_prep(req))
357858c293eSPavel Begunkov 		return -ENOMEM;
358f9ead18cSJens Axboe 	ret = io_sendmsg_copy_hdr(req, req->async_data);
359f9ead18cSJens Axboe 	if (!ret)
360f9ead18cSJens Axboe 		req->flags |= REQ_F_NEED_CLEANUP;
361f9ead18cSJens Axboe 	return ret;
362f9ead18cSJens Axboe }
363f9ead18cSJens Axboe 
io_sendmsg_recvmsg_cleanup(struct io_kiocb * req)364f9ead18cSJens Axboe void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
365f9ead18cSJens Axboe {
366f9ead18cSJens Axboe 	struct io_async_msghdr *io = req->async_data;
367f9ead18cSJens Axboe 
368f9ead18cSJens Axboe 	kfree(io->free_iov);
369f9ead18cSJens Axboe }
370f9ead18cSJens Axboe 
io_sendmsg_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)371f9ead18cSJens Axboe int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
372f9ead18cSJens Axboe {
373f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
374f9ead18cSJens Axboe 
375516e82f0SPavel Begunkov 	if (req->opcode == IORING_OP_SEND) {
376516e82f0SPavel Begunkov 		if (READ_ONCE(sqe->__pad3[0]))
377f9ead18cSJens Axboe 			return -EINVAL;
378516e82f0SPavel Begunkov 		sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
379516e82f0SPavel Begunkov 		sr->addr_len = READ_ONCE(sqe->addr_len);
380516e82f0SPavel Begunkov 	} else if (sqe->addr2 || sqe->file_index) {
381516e82f0SPavel Begunkov 		return -EINVAL;
382516e82f0SPavel Begunkov 	}
383f9ead18cSJens Axboe 
384f9ead18cSJens Axboe 	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
385f9ead18cSJens Axboe 	sr->len = READ_ONCE(sqe->len);
386f9ead18cSJens Axboe 	sr->flags = READ_ONCE(sqe->ioprio);
387f9ead18cSJens Axboe 	if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
388f9ead18cSJens Axboe 		return -EINVAL;
389f9ead18cSJens Axboe 	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
390f9ead18cSJens Axboe 	if (sr->msg_flags & MSG_DONTWAIT)
391f9ead18cSJens Axboe 		req->flags |= REQ_F_NOWAIT;
392f9ead18cSJens Axboe 
393f9ead18cSJens Axboe #ifdef CONFIG_COMPAT
394f9ead18cSJens Axboe 	if (req->ctx->compat)
395f9ead18cSJens Axboe 		sr->msg_flags |= MSG_CMSG_COMPAT;
396f9ead18cSJens Axboe #endif
397f9ead18cSJens Axboe 	sr->done_io = 0;
398f9ead18cSJens Axboe 	return 0;
399f9ead18cSJens Axboe }
400f9ead18cSJens Axboe 
io_sendmsg(struct io_kiocb * req,unsigned int issue_flags)401f9ead18cSJens Axboe int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
402f9ead18cSJens Axboe {
403f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
404f9ead18cSJens Axboe 	struct io_async_msghdr iomsg, *kmsg;
405f9ead18cSJens Axboe 	struct socket *sock;
406f9ead18cSJens Axboe 	unsigned flags;
407f9ead18cSJens Axboe 	int min_ret = 0;
408f9ead18cSJens Axboe 	int ret;
409f9ead18cSJens Axboe 
410f9ead18cSJens Axboe 	sock = sock_from_file(req->file);
411f9ead18cSJens Axboe 	if (unlikely(!sock))
412f9ead18cSJens Axboe 		return -ENOTSOCK;
413f9ead18cSJens Axboe 
414f9ead18cSJens Axboe 	if (req_has_async_data(req)) {
415f9ead18cSJens Axboe 		kmsg = req->async_data;
41626fed836SJens Axboe 		kmsg->msg.msg_control_user = sr->msg_control;
417f9ead18cSJens Axboe 	} else {
418f9ead18cSJens Axboe 		ret = io_sendmsg_copy_hdr(req, &iomsg);
419f9ead18cSJens Axboe 		if (ret)
420f9ead18cSJens Axboe 			return ret;
421f9ead18cSJens Axboe 		kmsg = &iomsg;
422f9ead18cSJens Axboe 	}
423f9ead18cSJens Axboe 
424f9ead18cSJens Axboe 	if (!(req->flags & REQ_F_POLLED) &&
425f9ead18cSJens Axboe 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
42643e0bbbdSJens Axboe 		return io_setup_async_msg(req, kmsg, issue_flags);
427f9ead18cSJens Axboe 
428f9ead18cSJens Axboe 	flags = sr->msg_flags;
429f9ead18cSJens Axboe 	if (issue_flags & IO_URING_F_NONBLOCK)
430f9ead18cSJens Axboe 		flags |= MSG_DONTWAIT;
431f9ead18cSJens Axboe 	if (flags & MSG_WAITALL)
432f9ead18cSJens Axboe 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
433f9ead18cSJens Axboe 
434f9ead18cSJens Axboe 	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
435f9ead18cSJens Axboe 
436f9ead18cSJens Axboe 	if (ret < min_ret) {
437f9ead18cSJens Axboe 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
43843e0bbbdSJens Axboe 			return io_setup_async_msg(req, kmsg, issue_flags);
439f9ead18cSJens Axboe 		if (ret > 0 && io_net_retry(sock, flags)) {
440b1dc4920SJens Axboe 			kmsg->msg.msg_controllen = 0;
441b1dc4920SJens Axboe 			kmsg->msg.msg_control = NULL;
442f9ead18cSJens Axboe 			sr->done_io += ret;
443f9ead18cSJens Axboe 			req->flags |= REQ_F_PARTIAL_IO;
44443e0bbbdSJens Axboe 			return io_setup_async_msg(req, kmsg, issue_flags);
445f9ead18cSJens Axboe 		}
44695eafc74SPavel Begunkov 		if (ret == -ERESTARTSYS)
44795eafc74SPavel Begunkov 			ret = -EINTR;
448f9ead18cSJens Axboe 		req_set_fail(req);
449f9ead18cSJens Axboe 	}
450f9ead18cSJens Axboe 	/* fast path, check for non-NULL to avoid function call */
451f9ead18cSJens Axboe 	if (kmsg->free_iov)
452f9ead18cSJens Axboe 		kfree(kmsg->free_iov);
453f9ead18cSJens Axboe 	req->flags &= ~REQ_F_NEED_CLEANUP;
45443e0bbbdSJens Axboe 	io_netmsg_recycle(req, issue_flags);
455f9ead18cSJens Axboe 	if (ret >= 0)
456f9ead18cSJens Axboe 		ret += sr->done_io;
457f9ead18cSJens Axboe 	else if (sr->done_io)
458f9ead18cSJens Axboe 		ret = sr->done_io;
459f9ead18cSJens Axboe 	io_req_set_res(req, ret, 0);
460f9ead18cSJens Axboe 	return IOU_OK;
461f9ead18cSJens Axboe }
462f9ead18cSJens Axboe 
io_send(struct io_kiocb * req,unsigned int issue_flags)463f9ead18cSJens Axboe int io_send(struct io_kiocb *req, unsigned int issue_flags)
464f9ead18cSJens Axboe {
465516e82f0SPavel Begunkov 	struct sockaddr_storage __address;
466f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
467f9ead18cSJens Axboe 	struct msghdr msg;
468f9ead18cSJens Axboe 	struct socket *sock;
469f9ead18cSJens Axboe 	unsigned flags;
470f9ead18cSJens Axboe 	int min_ret = 0;
471f9ead18cSJens Axboe 	int ret;
472f9ead18cSJens Axboe 
47304360d3eSPavel Begunkov 	msg.msg_name = NULL;
47404360d3eSPavel Begunkov 	msg.msg_control = NULL;
47504360d3eSPavel Begunkov 	msg.msg_controllen = 0;
47604360d3eSPavel Begunkov 	msg.msg_namelen = 0;
47704360d3eSPavel Begunkov 	msg.msg_ubuf = NULL;
47804360d3eSPavel Begunkov 
479516e82f0SPavel Begunkov 	if (sr->addr) {
480516e82f0SPavel Begunkov 		if (req_has_async_data(req)) {
481516e82f0SPavel Begunkov 			struct io_async_msghdr *io = req->async_data;
482516e82f0SPavel Begunkov 
483516e82f0SPavel Begunkov 			msg.msg_name = &io->addr;
484516e82f0SPavel Begunkov 		} else {
485516e82f0SPavel Begunkov 			ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address);
486516e82f0SPavel Begunkov 			if (unlikely(ret < 0))
487516e82f0SPavel Begunkov 				return ret;
488516e82f0SPavel Begunkov 			msg.msg_name = (struct sockaddr *)&__address;
489516e82f0SPavel Begunkov 		}
490516e82f0SPavel Begunkov 		msg.msg_namelen = sr->addr_len;
491516e82f0SPavel Begunkov 	}
492516e82f0SPavel Begunkov 
493f9ead18cSJens Axboe 	if (!(req->flags & REQ_F_POLLED) &&
494f9ead18cSJens Axboe 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
495516e82f0SPavel Begunkov 		return io_setup_async_addr(req, &__address, issue_flags);
496f9ead18cSJens Axboe 
497f9ead18cSJens Axboe 	sock = sock_from_file(req->file);
498f9ead18cSJens Axboe 	if (unlikely(!sock))
499f9ead18cSJens Axboe 		return -ENOTSOCK;
500f9ead18cSJens Axboe 
5014b61152eSJens Axboe 	ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &msg.msg_iter);
502f9ead18cSJens Axboe 	if (unlikely(ret))
503f9ead18cSJens Axboe 		return ret;
504f9ead18cSJens Axboe 
505f9ead18cSJens Axboe 	flags = sr->msg_flags;
506f9ead18cSJens Axboe 	if (issue_flags & IO_URING_F_NONBLOCK)
507f9ead18cSJens Axboe 		flags |= MSG_DONTWAIT;
508f9ead18cSJens Axboe 	if (flags & MSG_WAITALL)
509f9ead18cSJens Axboe 		min_ret = iov_iter_count(&msg.msg_iter);
510f9ead18cSJens Axboe 
511b841b901SDavid Howells 	flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
512f9ead18cSJens Axboe 	msg.msg_flags = flags;
513f9ead18cSJens Axboe 	ret = sock_sendmsg(sock, &msg);
514f9ead18cSJens Axboe 	if (ret < min_ret) {
515f9ead18cSJens Axboe 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
516516e82f0SPavel Begunkov 			return io_setup_async_addr(req, &__address, issue_flags);
517516e82f0SPavel Begunkov 
518f9ead18cSJens Axboe 		if (ret > 0 && io_net_retry(sock, flags)) {
519f9ead18cSJens Axboe 			sr->len -= ret;
520f9ead18cSJens Axboe 			sr->buf += ret;
521f9ead18cSJens Axboe 			sr->done_io += ret;
522f9ead18cSJens Axboe 			req->flags |= REQ_F_PARTIAL_IO;
523516e82f0SPavel Begunkov 			return io_setup_async_addr(req, &__address, issue_flags);
524f9ead18cSJens Axboe 		}
52595eafc74SPavel Begunkov 		if (ret == -ERESTARTSYS)
52695eafc74SPavel Begunkov 			ret = -EINTR;
527f9ead18cSJens Axboe 		req_set_fail(req);
528f9ead18cSJens Axboe 	}
529f9ead18cSJens Axboe 	if (ret >= 0)
530f9ead18cSJens Axboe 		ret += sr->done_io;
531f9ead18cSJens Axboe 	else if (sr->done_io)
532f9ead18cSJens Axboe 		ret = sr->done_io;
533f9ead18cSJens Axboe 	io_req_set_res(req, ret, 0);
534f9ead18cSJens Axboe 	return IOU_OK;
535f9ead18cSJens Axboe }
536f9ead18cSJens Axboe 
io_recvmsg_mshot_prep(struct io_kiocb * req,struct io_async_msghdr * iomsg,int namelen,size_t controllen)53751d28472SJens Axboe static int io_recvmsg_mshot_prep(struct io_kiocb *req,
53851d28472SJens Axboe 				 struct io_async_msghdr *iomsg,
539f263cf16SMuhammad Usama Anjum 				 int namelen, size_t controllen)
54051d28472SJens Axboe {
54151d28472SJens Axboe 	if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
54251d28472SJens Axboe 			  (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
54351d28472SJens Axboe 		int hdr;
54451d28472SJens Axboe 
54551d28472SJens Axboe 		if (unlikely(namelen < 0))
54651d28472SJens Axboe 			return -EOVERFLOW;
54759a53469SDan Carpenter 		if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
54851d28472SJens Axboe 					namelen, &hdr))
54951d28472SJens Axboe 			return -EOVERFLOW;
55059a53469SDan Carpenter 		if (check_add_overflow(hdr, controllen, &hdr))
55151d28472SJens Axboe 			return -EOVERFLOW;
55251d28472SJens Axboe 
55351d28472SJens Axboe 		iomsg->namelen = namelen;
55451d28472SJens Axboe 		iomsg->controllen = controllen;
55551d28472SJens Axboe 		return 0;
55651d28472SJens Axboe 	}
55751d28472SJens Axboe 
55851d28472SJens Axboe 	return 0;
55951d28472SJens Axboe }
56051d28472SJens Axboe 
io_recvmsg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg)561f9ead18cSJens Axboe static int io_recvmsg_copy_hdr(struct io_kiocb *req,
562f9ead18cSJens Axboe 			       struct io_async_msghdr *iomsg)
563f9ead18cSJens Axboe {
56451d28472SJens Axboe 	struct user_msghdr msg;
56551d28472SJens Axboe 	int ret;
56651d28472SJens Axboe 
56751d28472SJens Axboe 	iomsg->msg.msg_name = &iomsg->addr;
56851d28472SJens Axboe 	iomsg->msg.msg_iter.nr_segs = 0;
56951d28472SJens Axboe 
57051d28472SJens Axboe #ifdef CONFIG_COMPAT
57151d28472SJens Axboe 	if (unlikely(req->ctx->compat)) {
57251d28472SJens Axboe 		struct compat_msghdr cmsg;
57351d28472SJens Axboe 
57451d28472SJens Axboe 		ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST);
57551d28472SJens Axboe 		if (unlikely(ret))
57651d28472SJens Axboe 			return ret;
57751d28472SJens Axboe 
57851d28472SJens Axboe 		ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr);
57951d28472SJens Axboe 		if (unlikely(ret))
58051d28472SJens Axboe 			return ret;
58151d28472SJens Axboe 
58251d28472SJens Axboe 		return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen,
58351d28472SJens Axboe 						cmsg.msg_controllen);
58451d28472SJens Axboe 	}
58551d28472SJens Axboe #endif
58651d28472SJens Axboe 
58751d28472SJens Axboe 	ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST);
58851d28472SJens Axboe 	if (unlikely(ret))
58951d28472SJens Axboe 		return ret;
59051d28472SJens Axboe 
59151d28472SJens Axboe 	ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
59251d28472SJens Axboe 	if (unlikely(ret))
59351d28472SJens Axboe 		return ret;
59451d28472SJens Axboe 
59551d28472SJens Axboe 	return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
59651d28472SJens Axboe 					msg.msg_controllen);
597f9ead18cSJens Axboe }
598f9ead18cSJens Axboe 
io_recvmsg_prep_async(struct io_kiocb * req)599f9ead18cSJens Axboe int io_recvmsg_prep_async(struct io_kiocb *req)
600f9ead18cSJens Axboe {
6011a033109SJens Axboe 	struct io_async_msghdr *iomsg;
602f9ead18cSJens Axboe 	int ret;
603f9ead18cSJens Axboe 
604858c293eSPavel Begunkov 	if (!io_msg_alloc_async_prep(req))
605858c293eSPavel Begunkov 		return -ENOMEM;
6061a033109SJens Axboe 	iomsg = req->async_data;
6071a033109SJens Axboe 	ret = io_recvmsg_copy_hdr(req, iomsg);
608f9ead18cSJens Axboe 	if (!ret)
609f9ead18cSJens Axboe 		req->flags |= REQ_F_NEED_CLEANUP;
610f9ead18cSJens Axboe 	return ret;
611f9ead18cSJens Axboe }
612f9ead18cSJens Axboe 
613b3fdea6eSDylan Yudaken #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)
614b3fdea6eSDylan Yudaken 
io_recvmsg_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)615f9ead18cSJens Axboe int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
616f9ead18cSJens Axboe {
617f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
618f9ead18cSJens Axboe 
619f9ead18cSJens Axboe 	if (unlikely(sqe->file_index || sqe->addr2))
620f9ead18cSJens Axboe 		return -EINVAL;
621f9ead18cSJens Axboe 
622f9ead18cSJens Axboe 	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
623f9ead18cSJens Axboe 	sr->len = READ_ONCE(sqe->len);
624f9ead18cSJens Axboe 	sr->flags = READ_ONCE(sqe->ioprio);
625b3fdea6eSDylan Yudaken 	if (sr->flags & ~(RECVMSG_FLAGS))
626f9ead18cSJens Axboe 		return -EINVAL;
6277605c43dSDavid Lamparter 	sr->msg_flags = READ_ONCE(sqe->msg_flags);
628f9ead18cSJens Axboe 	if (sr->msg_flags & MSG_DONTWAIT)
629f9ead18cSJens Axboe 		req->flags |= REQ_F_NOWAIT;
630f9ead18cSJens Axboe 	if (sr->msg_flags & MSG_ERRQUEUE)
631f9ead18cSJens Axboe 		req->flags |= REQ_F_CLEAR_POLLIN;
632b3fdea6eSDylan Yudaken 	if (sr->flags & IORING_RECV_MULTISHOT) {
633b3fdea6eSDylan Yudaken 		if (!(req->flags & REQ_F_BUFFER_SELECT))
634b3fdea6eSDylan Yudaken 			return -EINVAL;
635b3fdea6eSDylan Yudaken 		if (sr->msg_flags & MSG_WAITALL)
636b3fdea6eSDylan Yudaken 			return -EINVAL;
637b3fdea6eSDylan Yudaken 		if (req->opcode == IORING_OP_RECV && sr->len)
638b3fdea6eSDylan Yudaken 			return -EINVAL;
639b3fdea6eSDylan Yudaken 		req->flags |= REQ_F_APOLL_MULTISHOT;
640b00c51efSJens Axboe 		/*
641b00c51efSJens Axboe 		 * Store the buffer group for this multishot receive separately,
642b00c51efSJens Axboe 		 * as if we end up doing an io-wq based issue that selects a
643b00c51efSJens Axboe 		 * buffer, it has to be committed immediately and that will
644b00c51efSJens Axboe 		 * clear ->buf_list. This means we lose the link to the buffer
645b00c51efSJens Axboe 		 * list, and the eventual buffer put on completion then cannot
646b00c51efSJens Axboe 		 * restore it.
647b00c51efSJens Axboe 		 */
648b00c51efSJens Axboe 		sr->buf_group = req->buf_index;
649b3fdea6eSDylan Yudaken 	}
650f9ead18cSJens Axboe 
651f9ead18cSJens Axboe #ifdef CONFIG_COMPAT
652f9ead18cSJens Axboe 	if (req->ctx->compat)
653f9ead18cSJens Axboe 		sr->msg_flags |= MSG_CMSG_COMPAT;
654f9ead18cSJens Axboe #endif
655f9ead18cSJens Axboe 	sr->done_io = 0;
65622ccf61cSJens Axboe 	sr->nr_multishot_loops = 0;
657f9ead18cSJens Axboe 	return 0;
658f9ead18cSJens Axboe }
659f9ead18cSJens Axboe 
io_recv_prep_retry(struct io_kiocb * req)660b3fdea6eSDylan Yudaken static inline void io_recv_prep_retry(struct io_kiocb *req)
661b3fdea6eSDylan Yudaken {
662f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
663b3fdea6eSDylan Yudaken 
664b3fdea6eSDylan Yudaken 	sr->done_io = 0;
665b3fdea6eSDylan Yudaken 	sr->len = 0; /* get from the provided buffer */
666b00c51efSJens Axboe 	req->buf_index = sr->buf_group;
667b3fdea6eSDylan Yudaken }
668b3fdea6eSDylan Yudaken 
669b3fdea6eSDylan Yudaken /*
6709bb66906SDylan Yudaken  * Finishes io_recv and io_recvmsg.
671b3fdea6eSDylan Yudaken  *
672b3fdea6eSDylan Yudaken  * Returns true if it is actually finished, or false if it should run
673b3fdea6eSDylan Yudaken  * again (for multishot).
674b3fdea6eSDylan Yudaken  */
io_recv_finish(struct io_kiocb * req,int * ret,struct msghdr * msg,bool mshot_finished,unsigned issue_flags)6759bb66906SDylan Yudaken static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
6767d41bcb7SJens Axboe 				  struct msghdr *msg, bool mshot_finished,
677100d6b17SPavel Begunkov 				  unsigned issue_flags)
678b3fdea6eSDylan Yudaken {
6797d41bcb7SJens Axboe 	unsigned int cflags;
6807d41bcb7SJens Axboe 
6817d41bcb7SJens Axboe 	cflags = io_put_kbuf(req, issue_flags);
682b65db921SJens Axboe 	if (msg->msg_inq && msg->msg_inq != -1)
6837d41bcb7SJens Axboe 		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
6847d41bcb7SJens Axboe 
685b3fdea6eSDylan Yudaken 	if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
686b3fdea6eSDylan Yudaken 		io_req_set_res(req, *ret, cflags);
687b3fdea6eSDylan Yudaken 		*ret = IOU_OK;
688b3fdea6eSDylan Yudaken 		return true;
689b3fdea6eSDylan Yudaken 	}
690b3fdea6eSDylan Yudaken 
691eac52bfeSJens Axboe 	if (mshot_finished)
692eac52bfeSJens Axboe 		goto finish;
693eac52bfeSJens Axboe 
694eac52bfeSJens Axboe 	/*
695eac52bfeSJens Axboe 	 * Fill CQE for this receive and see if we should keep trying to
696eac52bfeSJens Axboe 	 * receive from this socket.
697eac52bfeSJens Axboe 	 */
698b6b2bb58SPavel Begunkov 	if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
699b6b2bb58SPavel Begunkov 				*ret, cflags | IORING_CQE_F_MORE)) {
70022ccf61cSJens Axboe 		struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
70122ccf61cSJens Axboe 		int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
70222ccf61cSJens Axboe 
703b3fdea6eSDylan Yudaken 		io_recv_prep_retry(req);
704a2741c58SJens Axboe 		/* Known not-empty or unknown state, retry */
70522ccf61cSJens Axboe 		if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) {
70622ccf61cSJens Axboe 			if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
707b3fdea6eSDylan Yudaken 				return false;
70822ccf61cSJens Axboe 			/* mshot retries exceeded, force a requeue */
70922ccf61cSJens Axboe 			sr->nr_multishot_loops = 0;
71022ccf61cSJens Axboe 			mshot_retry_ret = IOU_REQUEUE;
71122ccf61cSJens Axboe 		}
712a2741c58SJens Axboe 		if (issue_flags & IO_URING_F_MULTISHOT)
71322ccf61cSJens Axboe 			*ret = mshot_retry_ret;
714a2741c58SJens Axboe 		else
715a2741c58SJens Axboe 			*ret = -EAGAIN;
716a2741c58SJens Axboe 		return true;
717b3fdea6eSDylan Yudaken 	}
718e2ad599dSDylan Yudaken 	/* Otherwise stop multishot but use the current result. */
719eac52bfeSJens Axboe finish:
720b3fdea6eSDylan Yudaken 	io_req_set_res(req, *ret, cflags);
721b3fdea6eSDylan Yudaken 
722100d6b17SPavel Begunkov 	if (issue_flags & IO_URING_F_MULTISHOT)
723b3fdea6eSDylan Yudaken 		*ret = IOU_STOP_MULTISHOT;
724e2df2ccbSDylan Yudaken 	else
725e2df2ccbSDylan Yudaken 		*ret = IOU_OK;
726b3fdea6eSDylan Yudaken 	return true;
727b3fdea6eSDylan Yudaken }
728b3fdea6eSDylan Yudaken 
io_recvmsg_prep_multishot(struct io_async_msghdr * kmsg,struct io_sr_msg * sr,void __user ** buf,size_t * len)7299bb66906SDylan Yudaken static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
7309bb66906SDylan Yudaken 				     struct io_sr_msg *sr, void __user **buf,
7319bb66906SDylan Yudaken 				     size_t *len)
7329bb66906SDylan Yudaken {
7339bb66906SDylan Yudaken 	unsigned long ubuf = (unsigned long) *buf;
7349bb66906SDylan Yudaken 	unsigned long hdr;
7359bb66906SDylan Yudaken 
7369bb66906SDylan Yudaken 	hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
7379bb66906SDylan Yudaken 		kmsg->controllen;
7389bb66906SDylan Yudaken 	if (*len < hdr)
7399bb66906SDylan Yudaken 		return -EFAULT;
7409bb66906SDylan Yudaken 
7419bb66906SDylan Yudaken 	if (kmsg->controllen) {
7429bb66906SDylan Yudaken 		unsigned long control = ubuf + hdr - kmsg->controllen;
7439bb66906SDylan Yudaken 
744d1f6222cSDylan Yudaken 		kmsg->msg.msg_control_user = (void __user *) control;
7459bb66906SDylan Yudaken 		kmsg->msg.msg_controllen = kmsg->controllen;
7469bb66906SDylan Yudaken 	}
7479bb66906SDylan Yudaken 
7489bb66906SDylan Yudaken 	sr->buf = *buf; /* stash for later copy */
749d1f6222cSDylan Yudaken 	*buf = (void __user *) (ubuf + hdr);
7509bb66906SDylan Yudaken 	kmsg->payloadlen = *len = *len - hdr;
7519bb66906SDylan Yudaken 	return 0;
7529bb66906SDylan Yudaken }
7539bb66906SDylan Yudaken 
7549bb66906SDylan Yudaken struct io_recvmsg_multishot_hdr {
7559bb66906SDylan Yudaken 	struct io_uring_recvmsg_out msg;
7569bb66906SDylan Yudaken 	struct sockaddr_storage addr;
7579bb66906SDylan Yudaken };
7589bb66906SDylan Yudaken 
io_recvmsg_multishot(struct socket * sock,struct io_sr_msg * io,struct io_async_msghdr * kmsg,unsigned int flags,bool * finished)7599bb66906SDylan Yudaken static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
7609bb66906SDylan Yudaken 				struct io_async_msghdr *kmsg,
7619bb66906SDylan Yudaken 				unsigned int flags, bool *finished)
7629bb66906SDylan Yudaken {
7639bb66906SDylan Yudaken 	int err;
7649bb66906SDylan Yudaken 	int copy_len;
7659bb66906SDylan Yudaken 	struct io_recvmsg_multishot_hdr hdr;
7669bb66906SDylan Yudaken 
7679bb66906SDylan Yudaken 	if (kmsg->namelen)
7689bb66906SDylan Yudaken 		kmsg->msg.msg_name = &hdr.addr;
7699bb66906SDylan Yudaken 	kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
7709bb66906SDylan Yudaken 	kmsg->msg.msg_namelen = 0;
7719bb66906SDylan Yudaken 
7729bb66906SDylan Yudaken 	if (sock->file->f_flags & O_NONBLOCK)
7739bb66906SDylan Yudaken 		flags |= MSG_DONTWAIT;
7749bb66906SDylan Yudaken 
7759bb66906SDylan Yudaken 	err = sock_recvmsg(sock, &kmsg->msg, flags);
7769bb66906SDylan Yudaken 	*finished = err <= 0;
7779bb66906SDylan Yudaken 	if (err < 0)
7789bb66906SDylan Yudaken 		return err;
7799bb66906SDylan Yudaken 
7809bb66906SDylan Yudaken 	hdr.msg = (struct io_uring_recvmsg_out) {
7819bb66906SDylan Yudaken 		.controllen = kmsg->controllen - kmsg->msg.msg_controllen,
7829bb66906SDylan Yudaken 		.flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
7839bb66906SDylan Yudaken 	};
7849bb66906SDylan Yudaken 
7859bb66906SDylan Yudaken 	hdr.msg.payloadlen = err;
7869bb66906SDylan Yudaken 	if (err > kmsg->payloadlen)
7879bb66906SDylan Yudaken 		err = kmsg->payloadlen;
7889bb66906SDylan Yudaken 
7899bb66906SDylan Yudaken 	copy_len = sizeof(struct io_uring_recvmsg_out);
7909bb66906SDylan Yudaken 	if (kmsg->msg.msg_namelen > kmsg->namelen)
7919bb66906SDylan Yudaken 		copy_len += kmsg->namelen;
7929bb66906SDylan Yudaken 	else
7939bb66906SDylan Yudaken 		copy_len += kmsg->msg.msg_namelen;
7949bb66906SDylan Yudaken 
7959bb66906SDylan Yudaken 	/*
7969bb66906SDylan Yudaken 	 *      "fromlen shall refer to the value before truncation.."
7979bb66906SDylan Yudaken 	 *                      1003.1g
7989bb66906SDylan Yudaken 	 */
7999bb66906SDylan Yudaken 	hdr.msg.namelen = kmsg->msg.msg_namelen;
8009bb66906SDylan Yudaken 
8019bb66906SDylan Yudaken 	/* ensure that there is no gap between hdr and sockaddr_storage */
8029bb66906SDylan Yudaken 	BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
8039bb66906SDylan Yudaken 		     sizeof(struct io_uring_recvmsg_out));
8049bb66906SDylan Yudaken 	if (copy_to_user(io->buf, &hdr, copy_len)) {
8059bb66906SDylan Yudaken 		*finished = true;
8069bb66906SDylan Yudaken 		return -EFAULT;
8079bb66906SDylan Yudaken 	}
8089bb66906SDylan Yudaken 
8099bb66906SDylan Yudaken 	return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
8109bb66906SDylan Yudaken 			kmsg->controllen + err;
8119bb66906SDylan Yudaken }
8129bb66906SDylan Yudaken 
io_recvmsg(struct io_kiocb * req,unsigned int issue_flags)813f9ead18cSJens Axboe int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
814f9ead18cSJens Axboe {
815f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
816f9ead18cSJens Axboe 	struct io_async_msghdr iomsg, *kmsg;
817f9ead18cSJens Axboe 	struct socket *sock;
818f9ead18cSJens Axboe 	unsigned flags;
819f9ead18cSJens Axboe 	int ret, min_ret = 0;
820f9ead18cSJens Axboe 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
8219bb66906SDylan Yudaken 	bool mshot_finished = true;
822f9ead18cSJens Axboe 
823f9ead18cSJens Axboe 	sock = sock_from_file(req->file);
824f9ead18cSJens Axboe 	if (unlikely(!sock))
825f9ead18cSJens Axboe 		return -ENOTSOCK;
826f9ead18cSJens Axboe 
827f9ead18cSJens Axboe 	if (req_has_async_data(req)) {
828f9ead18cSJens Axboe 		kmsg = req->async_data;
829f9ead18cSJens Axboe 	} else {
830f9ead18cSJens Axboe 		ret = io_recvmsg_copy_hdr(req, &iomsg);
831f9ead18cSJens Axboe 		if (ret)
832f9ead18cSJens Axboe 			return ret;
833f9ead18cSJens Axboe 		kmsg = &iomsg;
834f9ead18cSJens Axboe 	}
835f9ead18cSJens Axboe 
836f9ead18cSJens Axboe 	if (!(req->flags & REQ_F_POLLED) &&
837f9ead18cSJens Axboe 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
83843e0bbbdSJens Axboe 		return io_setup_async_msg(req, kmsg, issue_flags);
839f9ead18cSJens Axboe 
84017add5ceSPavel Begunkov 	if (!io_check_multishot(req, issue_flags))
84117add5ceSPavel Begunkov 		return io_setup_async_msg(req, kmsg, issue_flags);
84217add5ceSPavel Begunkov 
8439bb66906SDylan Yudaken retry_multishot:
844f9ead18cSJens Axboe 	if (io_do_buffer_select(req)) {
845f9ead18cSJens Axboe 		void __user *buf;
8469bb66906SDylan Yudaken 		size_t len = sr->len;
847f9ead18cSJens Axboe 
8489bb66906SDylan Yudaken 		buf = io_buffer_select(req, &len, issue_flags);
849f9ead18cSJens Axboe 		if (!buf)
850f9ead18cSJens Axboe 			return -ENOBUFS;
8519bb66906SDylan Yudaken 
8529bb66906SDylan Yudaken 		if (req->flags & REQ_F_APOLL_MULTISHOT) {
8539bb66906SDylan Yudaken 			ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
8549bb66906SDylan Yudaken 			if (ret) {
8559bb66906SDylan Yudaken 				io_kbuf_recycle(req, issue_flags);
8569bb66906SDylan Yudaken 				return ret;
8579bb66906SDylan Yudaken 			}
8589bb66906SDylan Yudaken 		}
8599bb66906SDylan Yudaken 
8604b61152eSJens Axboe 		iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len);
861f9ead18cSJens Axboe 	}
862f9ead18cSJens Axboe 
863f9ead18cSJens Axboe 	flags = sr->msg_flags;
864f9ead18cSJens Axboe 	if (force_nonblock)
865f9ead18cSJens Axboe 		flags |= MSG_DONTWAIT;
866f9ead18cSJens Axboe 
867f9ead18cSJens Axboe 	kmsg->msg.msg_get_inq = 1;
868b65db921SJens Axboe 	kmsg->msg.msg_inq = -1;
86978d0d206SJens Axboe 	if (req->flags & REQ_F_APOLL_MULTISHOT) {
8709bb66906SDylan Yudaken 		ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
8719bb66906SDylan Yudaken 					   &mshot_finished);
87278d0d206SJens Axboe 	} else {
87378d0d206SJens Axboe 		/* disable partial retry for recvmsg with cmsg attached */
87478d0d206SJens Axboe 		if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
87578d0d206SJens Axboe 			min_ret = iov_iter_count(&kmsg->msg.msg_iter);
87678d0d206SJens Axboe 
8779bb66906SDylan Yudaken 		ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
8789bb66906SDylan Yudaken 					 kmsg->uaddr, flags);
87978d0d206SJens Axboe 	}
8809bb66906SDylan Yudaken 
881f9ead18cSJens Axboe 	if (ret < min_ret) {
8829bb66906SDylan Yudaken 		if (ret == -EAGAIN && force_nonblock) {
8839bb66906SDylan Yudaken 			ret = io_setup_async_msg(req, kmsg, issue_flags);
884100d6b17SPavel Begunkov 			if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) {
8859bb66906SDylan Yudaken 				io_kbuf_recycle(req, issue_flags);
8869bb66906SDylan Yudaken 				return IOU_ISSUE_SKIP_COMPLETE;
8879bb66906SDylan Yudaken 			}
8889bb66906SDylan Yudaken 			return ret;
8899bb66906SDylan Yudaken 		}
890f9ead18cSJens Axboe 		if (ret > 0 && io_net_retry(sock, flags)) {
891f9ead18cSJens Axboe 			sr->done_io += ret;
892f9ead18cSJens Axboe 			req->flags |= REQ_F_PARTIAL_IO;
89343e0bbbdSJens Axboe 			return io_setup_async_msg(req, kmsg, issue_flags);
894f9ead18cSJens Axboe 		}
89595eafc74SPavel Begunkov 		if (ret == -ERESTARTSYS)
89695eafc74SPavel Begunkov 			ret = -EINTR;
897f9ead18cSJens Axboe 		req_set_fail(req);
898f9ead18cSJens Axboe 	} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
899f9ead18cSJens Axboe 		req_set_fail(req);
900f9ead18cSJens Axboe 	}
901f9ead18cSJens Axboe 
902d4e097daSDylan Yudaken 	if (ret > 0)
903f9ead18cSJens Axboe 		ret += sr->done_io;
904f9ead18cSJens Axboe 	else if (sr->done_io)
905f9ead18cSJens Axboe 		ret = sr->done_io;
906d4e097daSDylan Yudaken 	else
907d4e097daSDylan Yudaken 		io_kbuf_recycle(req, issue_flags);
908d4e097daSDylan Yudaken 
9097d41bcb7SJens Axboe 	if (!io_recv_finish(req, &ret, &kmsg->msg, mshot_finished, issue_flags))
9109bb66906SDylan Yudaken 		goto retry_multishot;
9119bb66906SDylan Yudaken 
9129bb66906SDylan Yudaken 	if (mshot_finished) {
9139bb66906SDylan Yudaken 		/* fast path, check for non-NULL to avoid function call */
9149bb66906SDylan Yudaken 		if (kmsg->free_iov)
9159bb66906SDylan Yudaken 			kfree(kmsg->free_iov);
9166c3e8955SPavel Begunkov 		io_netmsg_recycle(req, issue_flags);
9179bb66906SDylan Yudaken 		req->flags &= ~REQ_F_NEED_CLEANUP;
918f55ecbd9SJens Axboe 	} else if (ret == -EAGAIN)
919f55ecbd9SJens Axboe 		return io_setup_async_msg(req, kmsg, issue_flags);
9209bb66906SDylan Yudaken 
9219bb66906SDylan Yudaken 	return ret;
922f9ead18cSJens Axboe }
923f9ead18cSJens Axboe 
io_recv(struct io_kiocb * req,unsigned int issue_flags)924f9ead18cSJens Axboe int io_recv(struct io_kiocb *req, unsigned int issue_flags)
925f9ead18cSJens Axboe {
926f2ccb5aeSStefan Metzmacher 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
927f9ead18cSJens Axboe 	struct msghdr msg;
928f9ead18cSJens Axboe 	struct socket *sock;
929f9ead18cSJens Axboe 	unsigned flags;
930f9ead18cSJens Axboe 	int ret, min_ret = 0;
931f9ead18cSJens Axboe 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
932b3fdea6eSDylan Yudaken 	size_t len = sr->len;
933f9ead18cSJens Axboe 
934f9ead18cSJens Axboe 	if (!(req->flags & REQ_F_POLLED) &&
935f9ead18cSJens Axboe 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
936f9ead18cSJens Axboe 		return -EAGAIN;
937f9ead18cSJens Axboe 
93817add5ceSPavel Begunkov 	if (!io_check_multishot(req, issue_flags))
93917add5ceSPavel Begunkov 		return -EAGAIN;
94017add5ceSPavel Begunkov 
941f9ead18cSJens Axboe 	sock = sock_from_file(req->file);
942f9ead18cSJens Axboe 	if (unlikely(!sock))
943f9ead18cSJens Axboe 		return -ENOTSOCK;
944f9ead18cSJens Axboe 
945bf34e697SJens Axboe 	msg.msg_name = NULL;
946bf34e697SJens Axboe 	msg.msg_namelen = 0;
947bf34e697SJens Axboe 	msg.msg_control = NULL;
948bf34e697SJens Axboe 	msg.msg_get_inq = 1;
949bf34e697SJens Axboe 	msg.msg_controllen = 0;
950bf34e697SJens Axboe 	msg.msg_iocb = NULL;
951bf34e697SJens Axboe 	msg.msg_ubuf = NULL;
952bf34e697SJens Axboe 
953b3fdea6eSDylan Yudaken retry_multishot:
954f9ead18cSJens Axboe 	if (io_do_buffer_select(req)) {
955f9ead18cSJens Axboe 		void __user *buf;
956f9ead18cSJens Axboe 
957b3fdea6eSDylan Yudaken 		buf = io_buffer_select(req, &len, issue_flags);
958f9ead18cSJens Axboe 		if (!buf)
959f9ead18cSJens Axboe 			return -ENOBUFS;
960f9ead18cSJens Axboe 		sr->buf = buf;
9610ceb7a92SJens Axboe 		sr->len = len;
962f9ead18cSJens Axboe 	}
963f9ead18cSJens Axboe 
9644b61152eSJens Axboe 	ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter);
965f9ead18cSJens Axboe 	if (unlikely(ret))
966f9ead18cSJens Axboe 		goto out_free;
967f9ead18cSJens Axboe 
968b65db921SJens Axboe 	msg.msg_inq = -1;
969f9ead18cSJens Axboe 	msg.msg_flags = 0;
970f9ead18cSJens Axboe 
971f9ead18cSJens Axboe 	flags = sr->msg_flags;
972f9ead18cSJens Axboe 	if (force_nonblock)
973f9ead18cSJens Axboe 		flags |= MSG_DONTWAIT;
974f9ead18cSJens Axboe 	if (flags & MSG_WAITALL)
975f9ead18cSJens Axboe 		min_ret = iov_iter_count(&msg.msg_iter);
976f9ead18cSJens Axboe 
977f9ead18cSJens Axboe 	ret = sock_recvmsg(sock, &msg, flags);
978f9ead18cSJens Axboe 	if (ret < min_ret) {
979b3fdea6eSDylan Yudaken 		if (ret == -EAGAIN && force_nonblock) {
980100d6b17SPavel Begunkov 			if (issue_flags & IO_URING_F_MULTISHOT) {
981b3fdea6eSDylan Yudaken 				io_kbuf_recycle(req, issue_flags);
982b3fdea6eSDylan Yudaken 				return IOU_ISSUE_SKIP_COMPLETE;
983b3fdea6eSDylan Yudaken 			}
984b3fdea6eSDylan Yudaken 
985f9ead18cSJens Axboe 			return -EAGAIN;
986b3fdea6eSDylan Yudaken 		}
987f9ead18cSJens Axboe 		if (ret > 0 && io_net_retry(sock, flags)) {
988f9ead18cSJens Axboe 			sr->len -= ret;
989f9ead18cSJens Axboe 			sr->buf += ret;
990f9ead18cSJens Axboe 			sr->done_io += ret;
991f9ead18cSJens Axboe 			req->flags |= REQ_F_PARTIAL_IO;
992f9ead18cSJens Axboe 			return -EAGAIN;
993f9ead18cSJens Axboe 		}
99495eafc74SPavel Begunkov 		if (ret == -ERESTARTSYS)
99595eafc74SPavel Begunkov 			ret = -EINTR;
996f9ead18cSJens Axboe 		req_set_fail(req);
997f9ead18cSJens Axboe 	} else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
998f9ead18cSJens Axboe out_free:
999f9ead18cSJens Axboe 		req_set_fail(req);
1000f9ead18cSJens Axboe 	}
1001f9ead18cSJens Axboe 
1002d4e097daSDylan Yudaken 	if (ret > 0)
1003f9ead18cSJens Axboe 		ret += sr->done_io;
1004f9ead18cSJens Axboe 	else if (sr->done_io)
1005f9ead18cSJens Axboe 		ret = sr->done_io;
1006d4e097daSDylan Yudaken 	else
1007d4e097daSDylan Yudaken 		io_kbuf_recycle(req, issue_flags);
1008d4e097daSDylan Yudaken 
10097d41bcb7SJens Axboe 	if (!io_recv_finish(req, &ret, &msg, ret <= 0, issue_flags))
1010b3fdea6eSDylan Yudaken 		goto retry_multishot;
1011b3fdea6eSDylan Yudaken 
1012b3fdea6eSDylan Yudaken 	return ret;
1013f9ead18cSJens Axboe }
1014f9ead18cSJens Axboe 
io_send_zc_cleanup(struct io_kiocb * req)1015b0e9b551SPavel Begunkov void io_send_zc_cleanup(struct io_kiocb *req)
1016b48c312bSPavel Begunkov {
1017ac9e5784SPavel Begunkov 	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1018493108d9SPavel Begunkov 	struct io_async_msghdr *io;
1019b48c312bSPavel Begunkov 
1020493108d9SPavel Begunkov 	if (req_has_async_data(req)) {
1021493108d9SPavel Begunkov 		io = req->async_data;
10224c17a496SPavel Begunkov 		/* might be ->fast_iov if *msg_copy_hdr failed */
10234c17a496SPavel Begunkov 		if (io->free_iov != io->fast_iov)
1024493108d9SPavel Begunkov 			kfree(io->free_iov);
1025493108d9SPavel Begunkov 	}
1026a75155faSPavel Begunkov 	if (zc->notif) {
1027b48c312bSPavel Begunkov 		io_notif_flush(zc->notif);
1028b48c312bSPavel Begunkov 		zc->notif = NULL;
1029b48c312bSPavel Begunkov 	}
1030a75155faSPavel Begunkov }
1031b48c312bSPavel Begunkov 
103240725d1bSPavel Begunkov #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
103340725d1bSPavel Begunkov #define IO_ZC_FLAGS_VALID  (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE)
103440725d1bSPavel Begunkov 
io_send_zc_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1035b0e9b551SPavel Begunkov int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
103606a5464bSPavel Begunkov {
1037ac9e5784SPavel Begunkov 	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
103810c7d33eSPavel Begunkov 	struct io_ring_ctx *ctx = req->ctx;
1039b48c312bSPavel Begunkov 	struct io_kiocb *notif;
104006a5464bSPavel Begunkov 
1041493108d9SPavel Begunkov 	if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
1042b48c312bSPavel Begunkov 		return -EINVAL;
1043b48c312bSPavel Begunkov 	/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
1044b48c312bSPavel Begunkov 	if (req->flags & REQ_F_CQE_SKIP)
104506a5464bSPavel Begunkov 		return -EINVAL;
104606a5464bSPavel Begunkov 
1047b48c312bSPavel Begunkov 	notif = zc->notif = io_alloc_notif(ctx);
1048b48c312bSPavel Begunkov 	if (!notif)
1049b48c312bSPavel Begunkov 		return -ENOMEM;
1050b48c312bSPavel Begunkov 	notif->cqe.user_data = req->cqe.user_data;
1051b48c312bSPavel Begunkov 	notif->cqe.res = 0;
1052b48c312bSPavel Begunkov 	notif->cqe.flags = IORING_CQE_F_NOTIF;
1053b48c312bSPavel Begunkov 	req->flags |= REQ_F_NEED_CLEANUP;
105440725d1bSPavel Begunkov 
105540725d1bSPavel Begunkov 	zc->flags = READ_ONCE(sqe->ioprio);
105640725d1bSPavel Begunkov 	if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
105740725d1bSPavel Begunkov 		if (zc->flags & ~IO_ZC_FLAGS_VALID)
105840725d1bSPavel Begunkov 			return -EINVAL;
105940725d1bSPavel Begunkov 		if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
106040725d1bSPavel Begunkov 			io_notif_set_extended(notif);
106140725d1bSPavel Begunkov 			io_notif_to_data(notif)->zc_report = true;
106240725d1bSPavel Begunkov 		}
106340725d1bSPavel Begunkov 	}
106440725d1bSPavel Begunkov 
1065e3366e02SPavel Begunkov 	if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
1066e3366e02SPavel Begunkov 		unsigned idx = READ_ONCE(sqe->buf_index);
1067e3366e02SPavel Begunkov 
1068e3366e02SPavel Begunkov 		if (unlikely(idx >= ctx->nr_user_bufs))
1069e3366e02SPavel Begunkov 			return -EFAULT;
1070e3366e02SPavel Begunkov 		idx = array_index_nospec(idx, ctx->nr_user_bufs);
1071e3366e02SPavel Begunkov 		req->imu = READ_ONCE(ctx->user_bufs[idx]);
1072e3366e02SPavel Begunkov 		io_req_set_rsrc_node(notif, ctx, 0);
1073e3366e02SPavel Begunkov 	}
107406a5464bSPavel Begunkov 
1075493108d9SPavel Begunkov 	if (req->opcode == IORING_OP_SEND_ZC) {
1076493108d9SPavel Begunkov 		if (READ_ONCE(sqe->__pad3[0]))
1077493108d9SPavel Begunkov 			return -EINVAL;
1078493108d9SPavel Begunkov 		zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1079493108d9SPavel Begunkov 		zc->addr_len = READ_ONCE(sqe->addr_len);
1080493108d9SPavel Begunkov 	} else {
1081493108d9SPavel Begunkov 		if (unlikely(sqe->addr2 || sqe->file_index))
1082493108d9SPavel Begunkov 			return -EINVAL;
1083493108d9SPavel Begunkov 		if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
1084493108d9SPavel Begunkov 			return -EINVAL;
1085493108d9SPavel Begunkov 	}
1086493108d9SPavel Begunkov 
108706a5464bSPavel Begunkov 	zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
108806a5464bSPavel Begunkov 	zc->len = READ_ONCE(sqe->len);
108906a5464bSPavel Begunkov 	zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
109006a5464bSPavel Begunkov 	if (zc->msg_flags & MSG_DONTWAIT)
109106a5464bSPavel Begunkov 		req->flags |= REQ_F_NOWAIT;
1092092aeedbSPavel Begunkov 
10934a933e62SPavel Begunkov 	zc->done_io = 0;
1094092aeedbSPavel Begunkov 
109506a5464bSPavel Begunkov #ifdef CONFIG_COMPAT
109606a5464bSPavel Begunkov 	if (req->ctx->compat)
109706a5464bSPavel Begunkov 		zc->msg_flags |= MSG_CMSG_COMPAT;
109806a5464bSPavel Begunkov #endif
109906a5464bSPavel Begunkov 	return 0;
110006a5464bSPavel Begunkov }
110106a5464bSPavel Begunkov 
io_sg_from_iter_iovec(struct sock * sk,struct sk_buff * skb,struct iov_iter * from,size_t length)1102cd9021e8SPavel Begunkov static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb,
1103cd9021e8SPavel Begunkov 				 struct iov_iter *from, size_t length)
1104cd9021e8SPavel Begunkov {
1105cd9021e8SPavel Begunkov 	skb_zcopy_downgrade_managed(skb);
1106cd9021e8SPavel Begunkov 	return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
1107cd9021e8SPavel Begunkov }
1108cd9021e8SPavel Begunkov 
io_sg_from_iter(struct sock * sk,struct sk_buff * skb,struct iov_iter * from,size_t length)11093ff1a0d3SPavel Begunkov static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
11103ff1a0d3SPavel Begunkov 			   struct iov_iter *from, size_t length)
11113ff1a0d3SPavel Begunkov {
11123ff1a0d3SPavel Begunkov 	struct skb_shared_info *shinfo = skb_shinfo(skb);
11133ff1a0d3SPavel Begunkov 	int frag = shinfo->nr_frags;
11143ff1a0d3SPavel Begunkov 	int ret = 0;
11153ff1a0d3SPavel Begunkov 	struct bvec_iter bi;
11163ff1a0d3SPavel Begunkov 	ssize_t copied = 0;
11173ff1a0d3SPavel Begunkov 	unsigned long truesize = 0;
11183ff1a0d3SPavel Begunkov 
1119cd9021e8SPavel Begunkov 	if (!frag)
11203ff1a0d3SPavel Begunkov 		shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
1121cd9021e8SPavel Begunkov 	else if (unlikely(!skb_zcopy_managed(skb)))
11223ff1a0d3SPavel Begunkov 		return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
11233ff1a0d3SPavel Begunkov 
11243ff1a0d3SPavel Begunkov 	bi.bi_size = min(from->count, length);
11253ff1a0d3SPavel Begunkov 	bi.bi_bvec_done = from->iov_offset;
11263ff1a0d3SPavel Begunkov 	bi.bi_idx = 0;
11273ff1a0d3SPavel Begunkov 
11283ff1a0d3SPavel Begunkov 	while (bi.bi_size && frag < MAX_SKB_FRAGS) {
11293ff1a0d3SPavel Begunkov 		struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
11303ff1a0d3SPavel Begunkov 
11313ff1a0d3SPavel Begunkov 		copied += v.bv_len;
11323ff1a0d3SPavel Begunkov 		truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
11333ff1a0d3SPavel Begunkov 		__skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
11343ff1a0d3SPavel Begunkov 					   v.bv_offset, v.bv_len);
11353ff1a0d3SPavel Begunkov 		bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
11363ff1a0d3SPavel Begunkov 	}
11373ff1a0d3SPavel Begunkov 	if (bi.bi_size)
11383ff1a0d3SPavel Begunkov 		ret = -EMSGSIZE;
11393ff1a0d3SPavel Begunkov 
11403ff1a0d3SPavel Begunkov 	shinfo->nr_frags = frag;
11413ff1a0d3SPavel Begunkov 	from->bvec += bi.bi_idx;
11423ff1a0d3SPavel Begunkov 	from->nr_segs -= bi.bi_idx;
1143dfb58b17SPavel Begunkov 	from->count -= copied;
11443ff1a0d3SPavel Begunkov 	from->iov_offset = bi.bi_bvec_done;
11453ff1a0d3SPavel Begunkov 
11463ff1a0d3SPavel Begunkov 	skb->data_len += copied;
11473ff1a0d3SPavel Begunkov 	skb->len += copied;
11483ff1a0d3SPavel Begunkov 	skb->truesize += truesize;
11493ff1a0d3SPavel Begunkov 
11503ff1a0d3SPavel Begunkov 	if (sk && sk->sk_type == SOCK_STREAM) {
11513ff1a0d3SPavel Begunkov 		sk_wmem_queued_add(sk, truesize);
11523ff1a0d3SPavel Begunkov 		if (!skb_zcopy_pure(skb))
11533ff1a0d3SPavel Begunkov 			sk_mem_charge(sk, truesize);
11543ff1a0d3SPavel Begunkov 	} else {
11553ff1a0d3SPavel Begunkov 		refcount_add(truesize, &skb->sk->sk_wmem_alloc);
11563ff1a0d3SPavel Begunkov 	}
11573ff1a0d3SPavel Begunkov 	return ret;
11583ff1a0d3SPavel Begunkov }
11593ff1a0d3SPavel Begunkov 
io_send_zc(struct io_kiocb * req,unsigned int issue_flags)1160b0e9b551SPavel Begunkov int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
116106a5464bSPavel Begunkov {
11626ae61b7aSPavel Begunkov 	struct sockaddr_storage __address;
1163ac9e5784SPavel Begunkov 	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
116406a5464bSPavel Begunkov 	struct msghdr msg;
116506a5464bSPavel Begunkov 	struct socket *sock;
11666ae91ac9SPavel Begunkov 	unsigned msg_flags;
116706a5464bSPavel Begunkov 	int ret, min_ret = 0;
116806a5464bSPavel Begunkov 
116906a5464bSPavel Begunkov 	sock = sock_from_file(req->file);
117006a5464bSPavel Begunkov 	if (unlikely(!sock))
117106a5464bSPavel Begunkov 		return -ENOTSOCK;
1172edf81438SPavel Begunkov 	if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1173edf81438SPavel Begunkov 		return -EOPNOTSUPP;
117406a5464bSPavel Begunkov 
117506a5464bSPavel Begunkov 	msg.msg_name = NULL;
117606a5464bSPavel Begunkov 	msg.msg_control = NULL;
117706a5464bSPavel Begunkov 	msg.msg_controllen = 0;
117806a5464bSPavel Begunkov 	msg.msg_namelen = 0;
117906a5464bSPavel Begunkov 
118086dc8f23SPavel Begunkov 	if (zc->addr) {
1181581711c4SPavel Begunkov 		if (req_has_async_data(req)) {
1182581711c4SPavel Begunkov 			struct io_async_msghdr *io = req->async_data;
1183581711c4SPavel Begunkov 
11846ae61b7aSPavel Begunkov 			msg.msg_name = &io->addr;
1185581711c4SPavel Begunkov 		} else {
1186581711c4SPavel Begunkov 			ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address);
118786dc8f23SPavel Begunkov 			if (unlikely(ret < 0))
118886dc8f23SPavel Begunkov 				return ret;
1189581711c4SPavel Begunkov 			msg.msg_name = (struct sockaddr *)&__address;
1190581711c4SPavel Begunkov 		}
119186dc8f23SPavel Begunkov 		msg.msg_namelen = zc->addr_len;
119286dc8f23SPavel Begunkov 	}
119386dc8f23SPavel Begunkov 
11943c840053SPavel Begunkov 	if (!(req->flags & REQ_F_POLLED) &&
11953c840053SPavel Begunkov 	    (zc->flags & IORING_RECVSEND_POLL_FIRST))
11966ae61b7aSPavel Begunkov 		return io_setup_async_addr(req, &__address, issue_flags);
11973c840053SPavel Begunkov 
119810c7d33eSPavel Begunkov 	if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
1199de4eda9dSAl Viro 		ret = io_import_fixed(ITER_SOURCE, &msg.msg_iter, req->imu,
120010c7d33eSPavel Begunkov 					(u64)(uintptr_t)zc->buf, zc->len);
120110c7d33eSPavel Begunkov 		if (unlikely(ret))
120210c7d33eSPavel Begunkov 			return ret;
1203cd9021e8SPavel Begunkov 		msg.sg_from_iter = io_sg_from_iter;
120410c7d33eSPavel Begunkov 	} else {
120542385b02SPavel Begunkov 		io_notif_set_extended(zc->notif);
12064b61152eSJens Axboe 		ret = import_ubuf(ITER_SOURCE, zc->buf, zc->len, &msg.msg_iter);
120706a5464bSPavel Begunkov 		if (unlikely(ret))
120806a5464bSPavel Begunkov 			return ret;
1209b48c312bSPavel Begunkov 		ret = io_notif_account_mem(zc->notif, zc->len);
12102e32ba56SPavel Begunkov 		if (unlikely(ret))
12112e32ba56SPavel Begunkov 			return ret;
1212cd9021e8SPavel Begunkov 		msg.sg_from_iter = io_sg_from_iter_iovec;
121310c7d33eSPavel Begunkov 	}
121406a5464bSPavel Begunkov 
121506a5464bSPavel Begunkov 	msg_flags = zc->msg_flags | MSG_ZEROCOPY;
121606a5464bSPavel Begunkov 	if (issue_flags & IO_URING_F_NONBLOCK)
121706a5464bSPavel Begunkov 		msg_flags |= MSG_DONTWAIT;
121806a5464bSPavel Begunkov 	if (msg_flags & MSG_WAITALL)
121906a5464bSPavel Begunkov 		min_ret = iov_iter_count(&msg.msg_iter);
1220b841b901SDavid Howells 	msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
122106a5464bSPavel Begunkov 
122206a5464bSPavel Begunkov 	msg.msg_flags = msg_flags;
1223b48c312bSPavel Begunkov 	msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
122406a5464bSPavel Begunkov 	ret = sock_sendmsg(sock, &msg);
122506a5464bSPavel Begunkov 
122606a5464bSPavel Begunkov 	if (unlikely(ret < min_ret)) {
122706a5464bSPavel Begunkov 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
12286ae61b7aSPavel Begunkov 			return io_setup_async_addr(req, &__address, issue_flags);
1229581711c4SPavel Begunkov 
12304a933e62SPavel Begunkov 		if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
12314a933e62SPavel Begunkov 			zc->len -= ret;
12324a933e62SPavel Begunkov 			zc->buf += ret;
12334a933e62SPavel Begunkov 			zc->done_io += ret;
12344a933e62SPavel Begunkov 			req->flags |= REQ_F_PARTIAL_IO;
12356ae61b7aSPavel Begunkov 			return io_setup_async_addr(req, &__address, issue_flags);
12364a933e62SPavel Begunkov 		}
12374a933e62SPavel Begunkov 		if (ret == -ERESTARTSYS)
12384a933e62SPavel Begunkov 			ret = -EINTR;
12395a848b7cSPavel Begunkov 		req_set_fail(req);
124006a5464bSPavel Begunkov 	}
124106a5464bSPavel Begunkov 
12424a933e62SPavel Begunkov 	if (ret >= 0)
12434a933e62SPavel Begunkov 		ret += zc->done_io;
12444a933e62SPavel Begunkov 	else if (zc->done_io)
12454a933e62SPavel Begunkov 		ret = zc->done_io;
1246b48c312bSPavel Begunkov 
1247108893ddSPavel Begunkov 	/*
1248108893ddSPavel Begunkov 	 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1249108893ddSPavel Begunkov 	 * flushing notif to io_send_zc_cleanup()
1250108893ddSPavel Begunkov 	 */
1251108893ddSPavel Begunkov 	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1252b48c312bSPavel Begunkov 		io_notif_flush(zc->notif);
1253b48c312bSPavel Begunkov 		req->flags &= ~REQ_F_NEED_CLEANUP;
1254108893ddSPavel Begunkov 	}
12556ae91ac9SPavel Begunkov 	io_req_set_res(req, ret, IORING_CQE_F_MORE);
125606a5464bSPavel Begunkov 	return IOU_OK;
125706a5464bSPavel Begunkov }
125806a5464bSPavel Begunkov 
io_sendmsg_zc(struct io_kiocb * req,unsigned int issue_flags)1259493108d9SPavel Begunkov int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
1260493108d9SPavel Begunkov {
1261493108d9SPavel Begunkov 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1262493108d9SPavel Begunkov 	struct io_async_msghdr iomsg, *kmsg;
1263493108d9SPavel Begunkov 	struct socket *sock;
12646ae91ac9SPavel Begunkov 	unsigned flags;
1265493108d9SPavel Begunkov 	int ret, min_ret = 0;
1266493108d9SPavel Begunkov 
126742385b02SPavel Begunkov 	io_notif_set_extended(sr->notif);
126842385b02SPavel Begunkov 
1269493108d9SPavel Begunkov 	sock = sock_from_file(req->file);
1270493108d9SPavel Begunkov 	if (unlikely(!sock))
1271493108d9SPavel Begunkov 		return -ENOTSOCK;
1272cc767e7cSPavel Begunkov 	if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1273cc767e7cSPavel Begunkov 		return -EOPNOTSUPP;
1274493108d9SPavel Begunkov 
1275493108d9SPavel Begunkov 	if (req_has_async_data(req)) {
1276493108d9SPavel Begunkov 		kmsg = req->async_data;
127796b7b093SPavel Begunkov 		kmsg->msg.msg_control_user = sr->msg_control;
1278493108d9SPavel Begunkov 	} else {
1279493108d9SPavel Begunkov 		ret = io_sendmsg_copy_hdr(req, &iomsg);
1280493108d9SPavel Begunkov 		if (ret)
1281493108d9SPavel Begunkov 			return ret;
1282493108d9SPavel Begunkov 		kmsg = &iomsg;
1283493108d9SPavel Begunkov 	}
1284493108d9SPavel Begunkov 
1285493108d9SPavel Begunkov 	if (!(req->flags & REQ_F_POLLED) &&
1286493108d9SPavel Begunkov 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
1287493108d9SPavel Begunkov 		return io_setup_async_msg(req, kmsg, issue_flags);
1288493108d9SPavel Begunkov 
1289493108d9SPavel Begunkov 	flags = sr->msg_flags | MSG_ZEROCOPY;
1290493108d9SPavel Begunkov 	if (issue_flags & IO_URING_F_NONBLOCK)
1291493108d9SPavel Begunkov 		flags |= MSG_DONTWAIT;
1292493108d9SPavel Begunkov 	if (flags & MSG_WAITALL)
1293493108d9SPavel Begunkov 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1294493108d9SPavel Begunkov 
1295493108d9SPavel Begunkov 	kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
1296493108d9SPavel Begunkov 	kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1297493108d9SPavel Begunkov 	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
1298493108d9SPavel Begunkov 
1299493108d9SPavel Begunkov 	if (unlikely(ret < min_ret)) {
1300493108d9SPavel Begunkov 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1301493108d9SPavel Begunkov 			return io_setup_async_msg(req, kmsg, issue_flags);
1302493108d9SPavel Begunkov 
1303493108d9SPavel Begunkov 		if (ret > 0 && io_net_retry(sock, flags)) {
1304493108d9SPavel Begunkov 			sr->done_io += ret;
1305493108d9SPavel Begunkov 			req->flags |= REQ_F_PARTIAL_IO;
1306493108d9SPavel Begunkov 			return io_setup_async_msg(req, kmsg, issue_flags);
1307493108d9SPavel Begunkov 		}
1308493108d9SPavel Begunkov 		if (ret == -ERESTARTSYS)
1309493108d9SPavel Begunkov 			ret = -EINTR;
1310493108d9SPavel Begunkov 		req_set_fail(req);
1311493108d9SPavel Begunkov 	}
1312493108d9SPavel Begunkov 	/* fast path, check for non-NULL to avoid function call */
1313108893ddSPavel Begunkov 	if (kmsg->free_iov) {
1314493108d9SPavel Begunkov 		kfree(kmsg->free_iov);
1315108893ddSPavel Begunkov 		kmsg->free_iov = NULL;
1316108893ddSPavel Begunkov 	}
1317493108d9SPavel Begunkov 
1318493108d9SPavel Begunkov 	io_netmsg_recycle(req, issue_flags);
1319493108d9SPavel Begunkov 	if (ret >= 0)
1320493108d9SPavel Begunkov 		ret += sr->done_io;
1321493108d9SPavel Begunkov 	else if (sr->done_io)
1322493108d9SPavel Begunkov 		ret = sr->done_io;
1323493108d9SPavel Begunkov 
1324108893ddSPavel Begunkov 	/*
1325108893ddSPavel Begunkov 	 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1326108893ddSPavel Begunkov 	 * flushing notif to io_send_zc_cleanup()
1327108893ddSPavel Begunkov 	 */
1328108893ddSPavel Begunkov 	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1329493108d9SPavel Begunkov 		io_notif_flush(sr->notif);
1330493108d9SPavel Begunkov 		req->flags &= ~REQ_F_NEED_CLEANUP;
1331108893ddSPavel Begunkov 	}
13326ae91ac9SPavel Begunkov 	io_req_set_res(req, ret, IORING_CQE_F_MORE);
1333493108d9SPavel Begunkov 	return IOU_OK;
1334493108d9SPavel Begunkov }
1335493108d9SPavel Begunkov 
io_sendrecv_fail(struct io_kiocb * req)13367e6b638eSPavel Begunkov void io_sendrecv_fail(struct io_kiocb *req)
13377e6b638eSPavel Begunkov {
13387e6b638eSPavel Begunkov 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
13397e6b638eSPavel Begunkov 
13407e6b638eSPavel Begunkov 	if (req->flags & REQ_F_PARTIAL_IO)
13416ae91ac9SPavel Begunkov 		req->cqe.res = sr->done_io;
13426ae91ac9SPavel Begunkov 
1343c4c0009eSPavel Begunkov 	if ((req->flags & REQ_F_NEED_CLEANUP) &&
13446ae91ac9SPavel Begunkov 	    (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
13456ae91ac9SPavel Begunkov 		req->cqe.flags |= IORING_CQE_F_MORE;
13465693bcceSPavel Begunkov }
13475693bcceSPavel Begunkov 
io_accept_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1348f9ead18cSJens Axboe int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1349f9ead18cSJens Axboe {
1350f2ccb5aeSStefan Metzmacher 	struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1351f9ead18cSJens Axboe 	unsigned flags;
1352f9ead18cSJens Axboe 
1353f9ead18cSJens Axboe 	if (sqe->len || sqe->buf_index)
1354f9ead18cSJens Axboe 		return -EINVAL;
1355f9ead18cSJens Axboe 
1356f9ead18cSJens Axboe 	accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1357f9ead18cSJens Axboe 	accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1358f9ead18cSJens Axboe 	accept->flags = READ_ONCE(sqe->accept_flags);
1359f9ead18cSJens Axboe 	accept->nofile = rlimit(RLIMIT_NOFILE);
1360f9ead18cSJens Axboe 	flags = READ_ONCE(sqe->ioprio);
1361f9ead18cSJens Axboe 	if (flags & ~IORING_ACCEPT_MULTISHOT)
1362f9ead18cSJens Axboe 		return -EINVAL;
1363f9ead18cSJens Axboe 
1364f9ead18cSJens Axboe 	accept->file_slot = READ_ONCE(sqe->file_index);
1365f9ead18cSJens Axboe 	if (accept->file_slot) {
1366f9ead18cSJens Axboe 		if (accept->flags & SOCK_CLOEXEC)
1367f9ead18cSJens Axboe 			return -EINVAL;
1368f9ead18cSJens Axboe 		if (flags & IORING_ACCEPT_MULTISHOT &&
1369f9ead18cSJens Axboe 		    accept->file_slot != IORING_FILE_INDEX_ALLOC)
1370f9ead18cSJens Axboe 			return -EINVAL;
1371f9ead18cSJens Axboe 	}
1372f9ead18cSJens Axboe 	if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1373f9ead18cSJens Axboe 		return -EINVAL;
1374f9ead18cSJens Axboe 	if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1375f9ead18cSJens Axboe 		accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1376f9ead18cSJens Axboe 	if (flags & IORING_ACCEPT_MULTISHOT)
1377f9ead18cSJens Axboe 		req->flags |= REQ_F_APOLL_MULTISHOT;
1378f9ead18cSJens Axboe 	return 0;
1379f9ead18cSJens Axboe }
1380f9ead18cSJens Axboe 
io_accept(struct io_kiocb * req,unsigned int issue_flags)1381f9ead18cSJens Axboe int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1382f9ead18cSJens Axboe {
1383f2ccb5aeSStefan Metzmacher 	struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1384f9ead18cSJens Axboe 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1385f9ead18cSJens Axboe 	unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
1386f9ead18cSJens Axboe 	bool fixed = !!accept->file_slot;
1387f9ead18cSJens Axboe 	struct file *file;
1388f9ead18cSJens Axboe 	int ret, fd;
1389f9ead18cSJens Axboe 
139017add5ceSPavel Begunkov 	if (!io_check_multishot(req, issue_flags))
139117add5ceSPavel Begunkov 		return -EAGAIN;
1392f9ead18cSJens Axboe retry:
1393f9ead18cSJens Axboe 	if (!fixed) {
1394f9ead18cSJens Axboe 		fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1395f9ead18cSJens Axboe 		if (unlikely(fd < 0))
1396f9ead18cSJens Axboe 			return fd;
1397f9ead18cSJens Axboe 	}
1398f9ead18cSJens Axboe 	file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
1399f9ead18cSJens Axboe 			 accept->flags);
1400f9ead18cSJens Axboe 	if (IS_ERR(file)) {
1401f9ead18cSJens Axboe 		if (!fixed)
1402f9ead18cSJens Axboe 			put_unused_fd(fd);
1403f9ead18cSJens Axboe 		ret = PTR_ERR(file);
1404f9ead18cSJens Axboe 		if (ret == -EAGAIN && force_nonblock) {
1405f9ead18cSJens Axboe 			/*
1406f9ead18cSJens Axboe 			 * if it's multishot and polled, we don't need to
1407f9ead18cSJens Axboe 			 * return EAGAIN to arm the poll infra since it
1408f9ead18cSJens Axboe 			 * has already been done
1409f9ead18cSJens Axboe 			 */
141091482864SPavel Begunkov 			if (issue_flags & IO_URING_F_MULTISHOT)
141113b01aedSJens Axboe 				return IOU_ISSUE_SKIP_COMPLETE;
1412f9ead18cSJens Axboe 			return ret;
1413f9ead18cSJens Axboe 		}
1414f9ead18cSJens Axboe 		if (ret == -ERESTARTSYS)
1415f9ead18cSJens Axboe 			ret = -EINTR;
1416f9ead18cSJens Axboe 		req_set_fail(req);
1417f9ead18cSJens Axboe 	} else if (!fixed) {
1418f9ead18cSJens Axboe 		fd_install(fd, file);
1419f9ead18cSJens Axboe 		ret = fd;
1420f9ead18cSJens Axboe 	} else {
1421f9ead18cSJens Axboe 		ret = io_fixed_fd_install(req, issue_flags, file,
1422f9ead18cSJens Axboe 						accept->file_slot);
1423f9ead18cSJens Axboe 	}
1424f9ead18cSJens Axboe 
1425f9ead18cSJens Axboe 	if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
1426f9ead18cSJens Axboe 		io_req_set_res(req, ret, 0);
1427f9ead18cSJens Axboe 		return IOU_OK;
1428f9ead18cSJens Axboe 	}
1429f9ead18cSJens Axboe 
1430515e2696SDylan Yudaken 	if (ret < 0)
1431515e2696SDylan Yudaken 		return ret;
1432b6b2bb58SPavel Begunkov 	if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
1433b6b2bb58SPavel Begunkov 				ret, IORING_CQE_F_MORE))
1434d245bca6SPavel Begunkov 		goto retry;
1435cbd25748SDylan Yudaken 
143613b01aedSJens Axboe 	io_req_set_res(req, ret, 0);
143713b01aedSJens Axboe 	return IOU_STOP_MULTISHOT;
1438f9ead18cSJens Axboe }
1439f9ead18cSJens Axboe 
io_socket_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1440f9ead18cSJens Axboe int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1441f9ead18cSJens Axboe {
1442f2ccb5aeSStefan Metzmacher 	struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1443f9ead18cSJens Axboe 
1444f9ead18cSJens Axboe 	if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1445f9ead18cSJens Axboe 		return -EINVAL;
1446f9ead18cSJens Axboe 
1447f9ead18cSJens Axboe 	sock->domain = READ_ONCE(sqe->fd);
1448f9ead18cSJens Axboe 	sock->type = READ_ONCE(sqe->off);
1449f9ead18cSJens Axboe 	sock->protocol = READ_ONCE(sqe->len);
1450f9ead18cSJens Axboe 	sock->file_slot = READ_ONCE(sqe->file_index);
1451f9ead18cSJens Axboe 	sock->nofile = rlimit(RLIMIT_NOFILE);
1452f9ead18cSJens Axboe 
1453f9ead18cSJens Axboe 	sock->flags = sock->type & ~SOCK_TYPE_MASK;
1454f9ead18cSJens Axboe 	if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1455f9ead18cSJens Axboe 		return -EINVAL;
1456f9ead18cSJens Axboe 	if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1457f9ead18cSJens Axboe 		return -EINVAL;
1458f9ead18cSJens Axboe 	return 0;
1459f9ead18cSJens Axboe }
1460f9ead18cSJens Axboe 
io_socket(struct io_kiocb * req,unsigned int issue_flags)1461f9ead18cSJens Axboe int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1462f9ead18cSJens Axboe {
1463f2ccb5aeSStefan Metzmacher 	struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1464f9ead18cSJens Axboe 	bool fixed = !!sock->file_slot;
1465f9ead18cSJens Axboe 	struct file *file;
1466f9ead18cSJens Axboe 	int ret, fd;
1467f9ead18cSJens Axboe 
1468f9ead18cSJens Axboe 	if (!fixed) {
1469f9ead18cSJens Axboe 		fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1470f9ead18cSJens Axboe 		if (unlikely(fd < 0))
1471f9ead18cSJens Axboe 			return fd;
1472f9ead18cSJens Axboe 	}
1473f9ead18cSJens Axboe 	file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1474f9ead18cSJens Axboe 	if (IS_ERR(file)) {
1475f9ead18cSJens Axboe 		if (!fixed)
1476f9ead18cSJens Axboe 			put_unused_fd(fd);
1477f9ead18cSJens Axboe 		ret = PTR_ERR(file);
1478f9ead18cSJens Axboe 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1479f9ead18cSJens Axboe 			return -EAGAIN;
1480f9ead18cSJens Axboe 		if (ret == -ERESTARTSYS)
1481f9ead18cSJens Axboe 			ret = -EINTR;
1482f9ead18cSJens Axboe 		req_set_fail(req);
1483f9ead18cSJens Axboe 	} else if (!fixed) {
1484f9ead18cSJens Axboe 		fd_install(fd, file);
1485f9ead18cSJens Axboe 		ret = fd;
1486f9ead18cSJens Axboe 	} else {
1487f9ead18cSJens Axboe 		ret = io_fixed_fd_install(req, issue_flags, file,
1488f9ead18cSJens Axboe 					    sock->file_slot);
1489f9ead18cSJens Axboe 	}
1490f9ead18cSJens Axboe 	io_req_set_res(req, ret, 0);
1491f9ead18cSJens Axboe 	return IOU_OK;
1492f9ead18cSJens Axboe }
1493f9ead18cSJens Axboe 
io_connect_prep_async(struct io_kiocb * req)1494f9ead18cSJens Axboe int io_connect_prep_async(struct io_kiocb *req)
1495f9ead18cSJens Axboe {
1496f9ead18cSJens Axboe 	struct io_async_connect *io = req->async_data;
1497f2ccb5aeSStefan Metzmacher 	struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1498f9ead18cSJens Axboe 
1499f9ead18cSJens Axboe 	return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
1500f9ead18cSJens Axboe }
1501f9ead18cSJens Axboe 
io_connect_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1502f9ead18cSJens Axboe int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1503f9ead18cSJens Axboe {
1504f2ccb5aeSStefan Metzmacher 	struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1505f9ead18cSJens Axboe 
1506f9ead18cSJens Axboe 	if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1507f9ead18cSJens Axboe 		return -EINVAL;
1508f9ead18cSJens Axboe 
1509f9ead18cSJens Axboe 	conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1510f9ead18cSJens Axboe 	conn->addr_len =  READ_ONCE(sqe->addr2);
151174e2e17eSJens Axboe 	conn->in_progress = conn->seen_econnaborted = false;
1512f9ead18cSJens Axboe 	return 0;
1513f9ead18cSJens Axboe }
1514f9ead18cSJens Axboe 
io_connect(struct io_kiocb * req,unsigned int issue_flags)1515f9ead18cSJens Axboe int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1516f9ead18cSJens Axboe {
1517f2ccb5aeSStefan Metzmacher 	struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1518f9ead18cSJens Axboe 	struct io_async_connect __io, *io;
1519f9ead18cSJens Axboe 	unsigned file_flags;
1520f9ead18cSJens Axboe 	int ret;
1521f9ead18cSJens Axboe 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1522f9ead18cSJens Axboe 
1523f9ead18cSJens Axboe 	if (req_has_async_data(req)) {
1524f9ead18cSJens Axboe 		io = req->async_data;
1525f9ead18cSJens Axboe 	} else {
1526f9ead18cSJens Axboe 		ret = move_addr_to_kernel(connect->addr,
1527f9ead18cSJens Axboe 						connect->addr_len,
1528f9ead18cSJens Axboe 						&__io.address);
1529f9ead18cSJens Axboe 		if (ret)
1530f9ead18cSJens Axboe 			goto out;
1531f9ead18cSJens Axboe 		io = &__io;
1532f9ead18cSJens Axboe 	}
1533f9ead18cSJens Axboe 
1534f9ead18cSJens Axboe 	file_flags = force_nonblock ? O_NONBLOCK : 0;
1535f9ead18cSJens Axboe 
1536f9ead18cSJens Axboe 	ret = __sys_connect_file(req->file, &io->address,
1537f9ead18cSJens Axboe 					connect->addr_len, file_flags);
153874e2e17eSJens Axboe 	if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED)
153974e2e17eSJens Axboe 	    && force_nonblock) {
15403fb1bd68SJens Axboe 		if (ret == -EINPROGRESS) {
15413fb1bd68SJens Axboe 			connect->in_progress = true;
15422bb15fb6SJens Axboe 		} else if (ret == -ECONNABORTED) {
154374e2e17eSJens Axboe 			if (connect->seen_econnaborted)
154474e2e17eSJens Axboe 				goto out;
154574e2e17eSJens Axboe 			connect->seen_econnaborted = true;
154674e2e17eSJens Axboe 		}
1547f9ead18cSJens Axboe 		if (req_has_async_data(req))
1548f9ead18cSJens Axboe 			return -EAGAIN;
1549f9ead18cSJens Axboe 		if (io_alloc_async_data(req)) {
1550f9ead18cSJens Axboe 			ret = -ENOMEM;
1551f9ead18cSJens Axboe 			goto out;
1552f9ead18cSJens Axboe 		}
1553f9ead18cSJens Axboe 		memcpy(req->async_data, &__io, sizeof(__io));
1554f9ead18cSJens Axboe 		return -EAGAIN;
1555f9ead18cSJens Axboe 	}
15562bb15fb6SJens Axboe 	if (connect->in_progress) {
15572bb15fb6SJens Axboe 		/*
15582bb15fb6SJens Axboe 		 * At least bluetooth will return -EBADFD on a re-connect
15592bb15fb6SJens Axboe 		 * attempt, and it's (supposedly) also valid to get -EISCONN
15602bb15fb6SJens Axboe 		 * which means the previous result is good. For both of these,
15612bb15fb6SJens Axboe 		 * grab the sock_error() and use that for the completion.
15622bb15fb6SJens Axboe 		 */
15632bb15fb6SJens Axboe 		if (ret == -EBADFD || ret == -EISCONN)
15642bb15fb6SJens Axboe 			ret = sock_error(sock_from_file(req->file)->sk);
15652bb15fb6SJens Axboe 	}
1566f9ead18cSJens Axboe 	if (ret == -ERESTARTSYS)
1567f9ead18cSJens Axboe 		ret = -EINTR;
1568f9ead18cSJens Axboe out:
1569f9ead18cSJens Axboe 	if (ret < 0)
1570f9ead18cSJens Axboe 		req_set_fail(req);
1571f9ead18cSJens Axboe 	io_req_set_res(req, ret, 0);
1572f9ead18cSJens Axboe 	return IOU_OK;
1573f9ead18cSJens Axboe }
157443e0bbbdSJens Axboe 
io_netmsg_cache_free(struct io_cache_entry * entry)157543e0bbbdSJens Axboe void io_netmsg_cache_free(struct io_cache_entry *entry)
157643e0bbbdSJens Axboe {
157743e0bbbdSJens Axboe 	kfree(container_of(entry, struct io_async_msghdr, cache));
157843e0bbbdSJens Axboe }
1579f9ead18cSJens Axboe #endif
1580