xref: /openbmc/linux/io_uring/net.c (revision 0153682e)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/slab.h>
6 #include <linux/net.h>
7 #include <linux/compat.h>
8 #include <net/compat.h>
9 #include <linux/io_uring.h>
10 
11 #include <uapi/linux/io_uring.h>
12 
13 #include "io_uring.h"
14 #include "kbuf.h"
15 #include "alloc_cache.h"
16 #include "net.h"
17 #include "notif.h"
18 #include "rsrc.h"
19 
20 #if defined(CONFIG_NET)
21 struct io_shutdown {
22 	struct file			*file;
23 	int				how;
24 };
25 
26 struct io_accept {
27 	struct file			*file;
28 	struct sockaddr __user		*addr;
29 	int __user			*addr_len;
30 	int				flags;
31 	u32				file_slot;
32 	unsigned long			nofile;
33 };
34 
35 struct io_socket {
36 	struct file			*file;
37 	int				domain;
38 	int				type;
39 	int				protocol;
40 	int				flags;
41 	u32				file_slot;
42 	unsigned long			nofile;
43 };
44 
45 struct io_connect {
46 	struct file			*file;
47 	struct sockaddr __user		*addr;
48 	int				addr_len;
49 };
50 
51 struct io_sr_msg {
52 	struct file			*file;
53 	union {
54 		struct compat_msghdr __user	*umsg_compat;
55 		struct user_msghdr __user	*umsg;
56 		void __user			*buf;
57 	};
58 	unsigned			msg_flags;
59 	unsigned			flags;
60 	size_t				len;
61 	size_t				done_io;
62 };
63 
64 struct io_sendzc {
65 	struct file			*file;
66 	void __user			*buf;
67 	size_t				len;
68 	unsigned			msg_flags;
69 	unsigned			flags;
70 	unsigned			addr_len;
71 	void __user			*addr;
72 	size_t				done_io;
73 	struct io_kiocb 		*notif;
74 };
75 
76 #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
77 
78 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
79 {
80 	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
81 
82 	if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
83 		     sqe->buf_index || sqe->splice_fd_in))
84 		return -EINVAL;
85 
86 	shutdown->how = READ_ONCE(sqe->len);
87 	return 0;
88 }
89 
90 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
91 {
92 	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
93 	struct socket *sock;
94 	int ret;
95 
96 	if (issue_flags & IO_URING_F_NONBLOCK)
97 		return -EAGAIN;
98 
99 	sock = sock_from_file(req->file);
100 	if (unlikely(!sock))
101 		return -ENOTSOCK;
102 
103 	ret = __sys_shutdown_sock(sock, shutdown->how);
104 	io_req_set_res(req, ret, 0);
105 	return IOU_OK;
106 }
107 
108 static bool io_net_retry(struct socket *sock, int flags)
109 {
110 	if (!(flags & MSG_WAITALL))
111 		return false;
112 	return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
113 }
114 
115 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
116 {
117 	struct io_async_msghdr *hdr = req->async_data;
118 
119 	if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED)
120 		return;
121 
122 	/* Let normal cleanup path reap it if we fail adding to the cache */
123 	if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) {
124 		req->async_data = NULL;
125 		req->flags &= ~REQ_F_ASYNC_DATA;
126 	}
127 }
128 
129 static struct io_async_msghdr *io_recvmsg_alloc_async(struct io_kiocb *req,
130 						      unsigned int issue_flags)
131 {
132 	struct io_ring_ctx *ctx = req->ctx;
133 	struct io_cache_entry *entry;
134 
135 	if (!(issue_flags & IO_URING_F_UNLOCKED) &&
136 	    (entry = io_alloc_cache_get(&ctx->netmsg_cache)) != NULL) {
137 		struct io_async_msghdr *hdr;
138 
139 		hdr = container_of(entry, struct io_async_msghdr, cache);
140 		req->flags |= REQ_F_ASYNC_DATA;
141 		req->async_data = hdr;
142 		return hdr;
143 	}
144 
145 	if (!io_alloc_async_data(req))
146 		return req->async_data;
147 
148 	return NULL;
149 }
150 
151 static int io_setup_async_msg(struct io_kiocb *req,
152 			      struct io_async_msghdr *kmsg,
153 			      unsigned int issue_flags)
154 {
155 	struct io_async_msghdr *async_msg;
156 
157 	if (req_has_async_data(req))
158 		return -EAGAIN;
159 	async_msg = io_recvmsg_alloc_async(req, issue_flags);
160 	if (!async_msg) {
161 		kfree(kmsg->free_iov);
162 		return -ENOMEM;
163 	}
164 	req->flags |= REQ_F_NEED_CLEANUP;
165 	memcpy(async_msg, kmsg, sizeof(*kmsg));
166 	async_msg->msg.msg_name = &async_msg->addr;
167 	/* if were using fast_iov, set it to the new one */
168 	if (!async_msg->free_iov)
169 		async_msg->msg.msg_iter.iov = async_msg->fast_iov;
170 
171 	return -EAGAIN;
172 }
173 
174 static int io_sendmsg_copy_hdr(struct io_kiocb *req,
175 			       struct io_async_msghdr *iomsg)
176 {
177 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
178 
179 	iomsg->msg.msg_name = &iomsg->addr;
180 	iomsg->free_iov = iomsg->fast_iov;
181 	return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
182 					&iomsg->free_iov);
183 }
184 
185 int io_sendzc_prep_async(struct io_kiocb *req)
186 {
187 	struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
188 	struct io_async_msghdr *io;
189 	int ret;
190 
191 	if (!zc->addr || req_has_async_data(req))
192 		return 0;
193 	if (io_alloc_async_data(req))
194 		return -ENOMEM;
195 
196 	io = req->async_data;
197 	ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr);
198 	return ret;
199 }
200 
201 static int io_setup_async_addr(struct io_kiocb *req,
202 			      struct sockaddr_storage *addr,
203 			      unsigned int issue_flags)
204 {
205 	struct io_async_msghdr *io;
206 
207 	if (!addr || req_has_async_data(req))
208 		return -EAGAIN;
209 	if (io_alloc_async_data(req))
210 		return -ENOMEM;
211 	io = req->async_data;
212 	memcpy(&io->addr, addr, sizeof(io->addr));
213 	return -EAGAIN;
214 }
215 
216 int io_sendmsg_prep_async(struct io_kiocb *req)
217 {
218 	int ret;
219 
220 	ret = io_sendmsg_copy_hdr(req, req->async_data);
221 	if (!ret)
222 		req->flags |= REQ_F_NEED_CLEANUP;
223 	return ret;
224 }
225 
226 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
227 {
228 	struct io_async_msghdr *io = req->async_data;
229 
230 	kfree(io->free_iov);
231 }
232 
233 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
234 {
235 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
236 
237 	if (unlikely(sqe->file_index || sqe->addr2))
238 		return -EINVAL;
239 
240 	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
241 	sr->len = READ_ONCE(sqe->len);
242 	sr->flags = READ_ONCE(sqe->ioprio);
243 	if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
244 		return -EINVAL;
245 	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
246 	if (sr->msg_flags & MSG_DONTWAIT)
247 		req->flags |= REQ_F_NOWAIT;
248 
249 #ifdef CONFIG_COMPAT
250 	if (req->ctx->compat)
251 		sr->msg_flags |= MSG_CMSG_COMPAT;
252 #endif
253 	sr->done_io = 0;
254 	return 0;
255 }
256 
257 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
258 {
259 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
260 	struct io_async_msghdr iomsg, *kmsg;
261 	struct socket *sock;
262 	unsigned flags;
263 	int min_ret = 0;
264 	int ret;
265 
266 	sock = sock_from_file(req->file);
267 	if (unlikely(!sock))
268 		return -ENOTSOCK;
269 
270 	if (req_has_async_data(req)) {
271 		kmsg = req->async_data;
272 	} else {
273 		ret = io_sendmsg_copy_hdr(req, &iomsg);
274 		if (ret)
275 			return ret;
276 		kmsg = &iomsg;
277 	}
278 
279 	if (!(req->flags & REQ_F_POLLED) &&
280 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
281 		return io_setup_async_msg(req, kmsg, issue_flags);
282 
283 	flags = sr->msg_flags;
284 	if (issue_flags & IO_URING_F_NONBLOCK)
285 		flags |= MSG_DONTWAIT;
286 	if (flags & MSG_WAITALL)
287 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
288 
289 	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
290 
291 	if (ret < min_ret) {
292 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
293 			return io_setup_async_msg(req, kmsg, issue_flags);
294 		if (ret == -ERESTARTSYS)
295 			ret = -EINTR;
296 		if (ret > 0 && io_net_retry(sock, flags)) {
297 			sr->done_io += ret;
298 			req->flags |= REQ_F_PARTIAL_IO;
299 			return io_setup_async_msg(req, kmsg, issue_flags);
300 		}
301 		req_set_fail(req);
302 	}
303 	/* fast path, check for non-NULL to avoid function call */
304 	if (kmsg->free_iov)
305 		kfree(kmsg->free_iov);
306 	req->flags &= ~REQ_F_NEED_CLEANUP;
307 	io_netmsg_recycle(req, issue_flags);
308 	if (ret >= 0)
309 		ret += sr->done_io;
310 	else if (sr->done_io)
311 		ret = sr->done_io;
312 	io_req_set_res(req, ret, 0);
313 	return IOU_OK;
314 }
315 
316 int io_send(struct io_kiocb *req, unsigned int issue_flags)
317 {
318 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
319 	struct msghdr msg;
320 	struct iovec iov;
321 	struct socket *sock;
322 	unsigned flags;
323 	int min_ret = 0;
324 	int ret;
325 
326 	if (!(req->flags & REQ_F_POLLED) &&
327 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
328 		return -EAGAIN;
329 
330 	sock = sock_from_file(req->file);
331 	if (unlikely(!sock))
332 		return -ENOTSOCK;
333 
334 	ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
335 	if (unlikely(ret))
336 		return ret;
337 
338 	msg.msg_name = NULL;
339 	msg.msg_control = NULL;
340 	msg.msg_controllen = 0;
341 	msg.msg_namelen = 0;
342 	msg.msg_ubuf = NULL;
343 
344 	flags = sr->msg_flags;
345 	if (issue_flags & IO_URING_F_NONBLOCK)
346 		flags |= MSG_DONTWAIT;
347 	if (flags & MSG_WAITALL)
348 		min_ret = iov_iter_count(&msg.msg_iter);
349 
350 	msg.msg_flags = flags;
351 	ret = sock_sendmsg(sock, &msg);
352 	if (ret < min_ret) {
353 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
354 			return -EAGAIN;
355 		if (ret == -ERESTARTSYS)
356 			ret = -EINTR;
357 		if (ret > 0 && io_net_retry(sock, flags)) {
358 			sr->len -= ret;
359 			sr->buf += ret;
360 			sr->done_io += ret;
361 			req->flags |= REQ_F_PARTIAL_IO;
362 			return -EAGAIN;
363 		}
364 		req_set_fail(req);
365 	}
366 	if (ret >= 0)
367 		ret += sr->done_io;
368 	else if (sr->done_io)
369 		ret = sr->done_io;
370 	io_req_set_res(req, ret, 0);
371 	return IOU_OK;
372 }
373 
374 static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg)
375 {
376 	int hdr;
377 
378 	if (iomsg->namelen < 0)
379 		return true;
380 	if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out),
381 			       iomsg->namelen, &hdr))
382 		return true;
383 	if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr))
384 		return true;
385 
386 	return false;
387 }
388 
389 static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
390 				 struct io_async_msghdr *iomsg)
391 {
392 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
393 	struct user_msghdr msg;
394 	int ret;
395 
396 	if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg)))
397 		return -EFAULT;
398 
399 	ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
400 	if (ret)
401 		return ret;
402 
403 	if (req->flags & REQ_F_BUFFER_SELECT) {
404 		if (msg.msg_iovlen == 0) {
405 			sr->len = iomsg->fast_iov[0].iov_len = 0;
406 			iomsg->fast_iov[0].iov_base = NULL;
407 			iomsg->free_iov = NULL;
408 		} else if (msg.msg_iovlen > 1) {
409 			return -EINVAL;
410 		} else {
411 			if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov)))
412 				return -EFAULT;
413 			sr->len = iomsg->fast_iov[0].iov_len;
414 			iomsg->free_iov = NULL;
415 		}
416 
417 		if (req->flags & REQ_F_APOLL_MULTISHOT) {
418 			iomsg->namelen = msg.msg_namelen;
419 			iomsg->controllen = msg.msg_controllen;
420 			if (io_recvmsg_multishot_overflow(iomsg))
421 				return -EOVERFLOW;
422 		}
423 	} else {
424 		iomsg->free_iov = iomsg->fast_iov;
425 		ret = __import_iovec(READ, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV,
426 				     &iomsg->free_iov, &iomsg->msg.msg_iter,
427 				     false);
428 		if (ret > 0)
429 			ret = 0;
430 	}
431 
432 	return ret;
433 }
434 
435 #ifdef CONFIG_COMPAT
436 static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
437 					struct io_async_msghdr *iomsg)
438 {
439 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
440 	struct compat_msghdr msg;
441 	struct compat_iovec __user *uiov;
442 	int ret;
443 
444 	if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg)))
445 		return -EFAULT;
446 
447 	ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
448 	if (ret)
449 		return ret;
450 
451 	uiov = compat_ptr(msg.msg_iov);
452 	if (req->flags & REQ_F_BUFFER_SELECT) {
453 		compat_ssize_t clen;
454 
455 		if (msg.msg_iovlen == 0) {
456 			sr->len = 0;
457 			iomsg->free_iov = NULL;
458 		} else if (msg.msg_iovlen > 1) {
459 			return -EINVAL;
460 		} else {
461 			if (!access_ok(uiov, sizeof(*uiov)))
462 				return -EFAULT;
463 			if (__get_user(clen, &uiov->iov_len))
464 				return -EFAULT;
465 			if (clen < 0)
466 				return -EINVAL;
467 			sr->len = clen;
468 			iomsg->free_iov = NULL;
469 		}
470 
471 		if (req->flags & REQ_F_APOLL_MULTISHOT) {
472 			iomsg->namelen = msg.msg_namelen;
473 			iomsg->controllen = msg.msg_controllen;
474 			if (io_recvmsg_multishot_overflow(iomsg))
475 				return -EOVERFLOW;
476 		}
477 	} else {
478 		iomsg->free_iov = iomsg->fast_iov;
479 		ret = __import_iovec(READ, (struct iovec __user *)uiov, msg.msg_iovlen,
480 				   UIO_FASTIOV, &iomsg->free_iov,
481 				   &iomsg->msg.msg_iter, true);
482 		if (ret < 0)
483 			return ret;
484 	}
485 
486 	return 0;
487 }
488 #endif
489 
490 static int io_recvmsg_copy_hdr(struct io_kiocb *req,
491 			       struct io_async_msghdr *iomsg)
492 {
493 	iomsg->msg.msg_name = &iomsg->addr;
494 
495 #ifdef CONFIG_COMPAT
496 	if (req->ctx->compat)
497 		return __io_compat_recvmsg_copy_hdr(req, iomsg);
498 #endif
499 
500 	return __io_recvmsg_copy_hdr(req, iomsg);
501 }
502 
503 int io_recvmsg_prep_async(struct io_kiocb *req)
504 {
505 	int ret;
506 
507 	ret = io_recvmsg_copy_hdr(req, req->async_data);
508 	if (!ret)
509 		req->flags |= REQ_F_NEED_CLEANUP;
510 	return ret;
511 }
512 
513 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)
514 
515 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
516 {
517 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
518 
519 	if (unlikely(sqe->file_index || sqe->addr2))
520 		return -EINVAL;
521 
522 	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
523 	sr->len = READ_ONCE(sqe->len);
524 	sr->flags = READ_ONCE(sqe->ioprio);
525 	if (sr->flags & ~(RECVMSG_FLAGS))
526 		return -EINVAL;
527 	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
528 	if (sr->msg_flags & MSG_DONTWAIT)
529 		req->flags |= REQ_F_NOWAIT;
530 	if (sr->msg_flags & MSG_ERRQUEUE)
531 		req->flags |= REQ_F_CLEAR_POLLIN;
532 	if (sr->flags & IORING_RECV_MULTISHOT) {
533 		if (!(req->flags & REQ_F_BUFFER_SELECT))
534 			return -EINVAL;
535 		if (sr->msg_flags & MSG_WAITALL)
536 			return -EINVAL;
537 		if (req->opcode == IORING_OP_RECV && sr->len)
538 			return -EINVAL;
539 		req->flags |= REQ_F_APOLL_MULTISHOT;
540 	}
541 
542 #ifdef CONFIG_COMPAT
543 	if (req->ctx->compat)
544 		sr->msg_flags |= MSG_CMSG_COMPAT;
545 #endif
546 	sr->done_io = 0;
547 	return 0;
548 }
549 
550 static inline void io_recv_prep_retry(struct io_kiocb *req)
551 {
552 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
553 
554 	sr->done_io = 0;
555 	sr->len = 0; /* get from the provided buffer */
556 }
557 
558 /*
559  * Finishes io_recv and io_recvmsg.
560  *
561  * Returns true if it is actually finished, or false if it should run
562  * again (for multishot).
563  */
564 static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
565 				  unsigned int cflags, bool mshot_finished)
566 {
567 	if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
568 		io_req_set_res(req, *ret, cflags);
569 		*ret = IOU_OK;
570 		return true;
571 	}
572 
573 	if (!mshot_finished) {
574 		if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret,
575 				    cflags | IORING_CQE_F_MORE, false)) {
576 			io_recv_prep_retry(req);
577 			return false;
578 		}
579 		/*
580 		 * Otherwise stop multishot but use the current result.
581 		 * Probably will end up going into overflow, but this means
582 		 * we cannot trust the ordering anymore
583 		 */
584 	}
585 
586 	io_req_set_res(req, *ret, cflags);
587 
588 	if (req->flags & REQ_F_POLLED)
589 		*ret = IOU_STOP_MULTISHOT;
590 	else
591 		*ret = IOU_OK;
592 	return true;
593 }
594 
595 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
596 				     struct io_sr_msg *sr, void __user **buf,
597 				     size_t *len)
598 {
599 	unsigned long ubuf = (unsigned long) *buf;
600 	unsigned long hdr;
601 
602 	hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
603 		kmsg->controllen;
604 	if (*len < hdr)
605 		return -EFAULT;
606 
607 	if (kmsg->controllen) {
608 		unsigned long control = ubuf + hdr - kmsg->controllen;
609 
610 		kmsg->msg.msg_control_user = (void __user *) control;
611 		kmsg->msg.msg_controllen = kmsg->controllen;
612 	}
613 
614 	sr->buf = *buf; /* stash for later copy */
615 	*buf = (void __user *) (ubuf + hdr);
616 	kmsg->payloadlen = *len = *len - hdr;
617 	return 0;
618 }
619 
620 struct io_recvmsg_multishot_hdr {
621 	struct io_uring_recvmsg_out msg;
622 	struct sockaddr_storage addr;
623 };
624 
625 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
626 				struct io_async_msghdr *kmsg,
627 				unsigned int flags, bool *finished)
628 {
629 	int err;
630 	int copy_len;
631 	struct io_recvmsg_multishot_hdr hdr;
632 
633 	if (kmsg->namelen)
634 		kmsg->msg.msg_name = &hdr.addr;
635 	kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
636 	kmsg->msg.msg_namelen = 0;
637 
638 	if (sock->file->f_flags & O_NONBLOCK)
639 		flags |= MSG_DONTWAIT;
640 
641 	err = sock_recvmsg(sock, &kmsg->msg, flags);
642 	*finished = err <= 0;
643 	if (err < 0)
644 		return err;
645 
646 	hdr.msg = (struct io_uring_recvmsg_out) {
647 		.controllen = kmsg->controllen - kmsg->msg.msg_controllen,
648 		.flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
649 	};
650 
651 	hdr.msg.payloadlen = err;
652 	if (err > kmsg->payloadlen)
653 		err = kmsg->payloadlen;
654 
655 	copy_len = sizeof(struct io_uring_recvmsg_out);
656 	if (kmsg->msg.msg_namelen > kmsg->namelen)
657 		copy_len += kmsg->namelen;
658 	else
659 		copy_len += kmsg->msg.msg_namelen;
660 
661 	/*
662 	 *      "fromlen shall refer to the value before truncation.."
663 	 *                      1003.1g
664 	 */
665 	hdr.msg.namelen = kmsg->msg.msg_namelen;
666 
667 	/* ensure that there is no gap between hdr and sockaddr_storage */
668 	BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
669 		     sizeof(struct io_uring_recvmsg_out));
670 	if (copy_to_user(io->buf, &hdr, copy_len)) {
671 		*finished = true;
672 		return -EFAULT;
673 	}
674 
675 	return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
676 			kmsg->controllen + err;
677 }
678 
679 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
680 {
681 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
682 	struct io_async_msghdr iomsg, *kmsg;
683 	struct socket *sock;
684 	unsigned int cflags;
685 	unsigned flags;
686 	int ret, min_ret = 0;
687 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
688 	bool mshot_finished = true;
689 
690 	sock = sock_from_file(req->file);
691 	if (unlikely(!sock))
692 		return -ENOTSOCK;
693 
694 	if (req_has_async_data(req)) {
695 		kmsg = req->async_data;
696 	} else {
697 		ret = io_recvmsg_copy_hdr(req, &iomsg);
698 		if (ret)
699 			return ret;
700 		kmsg = &iomsg;
701 	}
702 
703 	if (!(req->flags & REQ_F_POLLED) &&
704 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
705 		return io_setup_async_msg(req, kmsg, issue_flags);
706 
707 retry_multishot:
708 	if (io_do_buffer_select(req)) {
709 		void __user *buf;
710 		size_t len = sr->len;
711 
712 		buf = io_buffer_select(req, &len, issue_flags);
713 		if (!buf)
714 			return -ENOBUFS;
715 
716 		if (req->flags & REQ_F_APOLL_MULTISHOT) {
717 			ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
718 			if (ret) {
719 				io_kbuf_recycle(req, issue_flags);
720 				return ret;
721 			}
722 		}
723 
724 		kmsg->fast_iov[0].iov_base = buf;
725 		kmsg->fast_iov[0].iov_len = len;
726 		iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
727 				len);
728 	}
729 
730 	flags = sr->msg_flags;
731 	if (force_nonblock)
732 		flags |= MSG_DONTWAIT;
733 	if (flags & MSG_WAITALL)
734 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
735 
736 	kmsg->msg.msg_get_inq = 1;
737 	if (req->flags & REQ_F_APOLL_MULTISHOT)
738 		ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
739 					   &mshot_finished);
740 	else
741 		ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
742 					 kmsg->uaddr, flags);
743 
744 	if (ret < min_ret) {
745 		if (ret == -EAGAIN && force_nonblock) {
746 			ret = io_setup_async_msg(req, kmsg, issue_flags);
747 			if (ret == -EAGAIN && (req->flags & IO_APOLL_MULTI_POLLED) ==
748 					       IO_APOLL_MULTI_POLLED) {
749 				io_kbuf_recycle(req, issue_flags);
750 				return IOU_ISSUE_SKIP_COMPLETE;
751 			}
752 			return ret;
753 		}
754 		if (ret == -ERESTARTSYS)
755 			ret = -EINTR;
756 		if (ret > 0 && io_net_retry(sock, flags)) {
757 			sr->done_io += ret;
758 			req->flags |= REQ_F_PARTIAL_IO;
759 			return io_setup_async_msg(req, kmsg, issue_flags);
760 		}
761 		req_set_fail(req);
762 	} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
763 		req_set_fail(req);
764 	}
765 
766 	if (ret > 0)
767 		ret += sr->done_io;
768 	else if (sr->done_io)
769 		ret = sr->done_io;
770 	else
771 		io_kbuf_recycle(req, issue_flags);
772 
773 	cflags = io_put_kbuf(req, issue_flags);
774 	if (kmsg->msg.msg_inq)
775 		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
776 
777 	if (!io_recv_finish(req, &ret, cflags, mshot_finished))
778 		goto retry_multishot;
779 
780 	if (mshot_finished) {
781 		io_netmsg_recycle(req, issue_flags);
782 		/* fast path, check for non-NULL to avoid function call */
783 		if (kmsg->free_iov)
784 			kfree(kmsg->free_iov);
785 		req->flags &= ~REQ_F_NEED_CLEANUP;
786 	}
787 
788 	return ret;
789 }
790 
791 int io_recv(struct io_kiocb *req, unsigned int issue_flags)
792 {
793 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
794 	struct msghdr msg;
795 	struct socket *sock;
796 	struct iovec iov;
797 	unsigned int cflags;
798 	unsigned flags;
799 	int ret, min_ret = 0;
800 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
801 	size_t len = sr->len;
802 
803 	if (!(req->flags & REQ_F_POLLED) &&
804 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
805 		return -EAGAIN;
806 
807 	sock = sock_from_file(req->file);
808 	if (unlikely(!sock))
809 		return -ENOTSOCK;
810 
811 retry_multishot:
812 	if (io_do_buffer_select(req)) {
813 		void __user *buf;
814 
815 		buf = io_buffer_select(req, &len, issue_flags);
816 		if (!buf)
817 			return -ENOBUFS;
818 		sr->buf = buf;
819 	}
820 
821 	ret = import_single_range(READ, sr->buf, len, &iov, &msg.msg_iter);
822 	if (unlikely(ret))
823 		goto out_free;
824 
825 	msg.msg_name = NULL;
826 	msg.msg_namelen = 0;
827 	msg.msg_control = NULL;
828 	msg.msg_get_inq = 1;
829 	msg.msg_flags = 0;
830 	msg.msg_controllen = 0;
831 	msg.msg_iocb = NULL;
832 	msg.msg_ubuf = NULL;
833 
834 	flags = sr->msg_flags;
835 	if (force_nonblock)
836 		flags |= MSG_DONTWAIT;
837 	if (flags & MSG_WAITALL)
838 		min_ret = iov_iter_count(&msg.msg_iter);
839 
840 	ret = sock_recvmsg(sock, &msg, flags);
841 	if (ret < min_ret) {
842 		if (ret == -EAGAIN && force_nonblock) {
843 			if ((req->flags & IO_APOLL_MULTI_POLLED) == IO_APOLL_MULTI_POLLED) {
844 				io_kbuf_recycle(req, issue_flags);
845 				return IOU_ISSUE_SKIP_COMPLETE;
846 			}
847 
848 			return -EAGAIN;
849 		}
850 		if (ret == -ERESTARTSYS)
851 			ret = -EINTR;
852 		if (ret > 0 && io_net_retry(sock, flags)) {
853 			sr->len -= ret;
854 			sr->buf += ret;
855 			sr->done_io += ret;
856 			req->flags |= REQ_F_PARTIAL_IO;
857 			return -EAGAIN;
858 		}
859 		req_set_fail(req);
860 	} else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
861 out_free:
862 		req_set_fail(req);
863 	}
864 
865 	if (ret > 0)
866 		ret += sr->done_io;
867 	else if (sr->done_io)
868 		ret = sr->done_io;
869 	else
870 		io_kbuf_recycle(req, issue_flags);
871 
872 	cflags = io_put_kbuf(req, issue_flags);
873 	if (msg.msg_inq)
874 		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
875 
876 	if (!io_recv_finish(req, &ret, cflags, ret <= 0))
877 		goto retry_multishot;
878 
879 	return ret;
880 }
881 
882 void io_sendzc_cleanup(struct io_kiocb *req)
883 {
884 	struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
885 
886 	zc->notif->flags |= REQ_F_CQE_SKIP;
887 	io_notif_flush(zc->notif);
888 	zc->notif = NULL;
889 }
890 
891 int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
892 {
893 	struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
894 	struct io_ring_ctx *ctx = req->ctx;
895 	struct io_kiocb *notif;
896 
897 	if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3) ||
898 	    READ_ONCE(sqe->__pad3[0]))
899 		return -EINVAL;
900 	/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
901 	if (req->flags & REQ_F_CQE_SKIP)
902 		return -EINVAL;
903 
904 	zc->flags = READ_ONCE(sqe->ioprio);
905 	if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST |
906 			  IORING_RECVSEND_FIXED_BUF))
907 		return -EINVAL;
908 	if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
909 		unsigned idx = READ_ONCE(sqe->buf_index);
910 
911 		if (unlikely(idx >= ctx->nr_user_bufs))
912 			return -EFAULT;
913 		idx = array_index_nospec(idx, ctx->nr_user_bufs);
914 		req->imu = READ_ONCE(ctx->user_bufs[idx]);
915 		io_req_set_rsrc_node(req, ctx, 0);
916 	}
917 	notif = zc->notif = io_alloc_notif(ctx);
918 	if (!notif)
919 		return -ENOMEM;
920 	notif->cqe.user_data = req->cqe.user_data;
921 	notif->cqe.res = 0;
922 	notif->cqe.flags = IORING_CQE_F_NOTIF;
923 	req->flags |= REQ_F_NEED_CLEANUP;
924 
925 	zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
926 	zc->len = READ_ONCE(sqe->len);
927 	zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
928 	if (zc->msg_flags & MSG_DONTWAIT)
929 		req->flags |= REQ_F_NOWAIT;
930 
931 	zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
932 	zc->addr_len = READ_ONCE(sqe->addr_len);
933 	zc->done_io = 0;
934 
935 #ifdef CONFIG_COMPAT
936 	if (req->ctx->compat)
937 		zc->msg_flags |= MSG_CMSG_COMPAT;
938 #endif
939 	return 0;
940 }
941 
942 static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
943 			   struct iov_iter *from, size_t length)
944 {
945 	struct skb_shared_info *shinfo = skb_shinfo(skb);
946 	int frag = shinfo->nr_frags;
947 	int ret = 0;
948 	struct bvec_iter bi;
949 	ssize_t copied = 0;
950 	unsigned long truesize = 0;
951 
952 	if (!shinfo->nr_frags)
953 		shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
954 
955 	if (!skb_zcopy_managed(skb) || !iov_iter_is_bvec(from)) {
956 		skb_zcopy_downgrade_managed(skb);
957 		return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
958 	}
959 
960 	bi.bi_size = min(from->count, length);
961 	bi.bi_bvec_done = from->iov_offset;
962 	bi.bi_idx = 0;
963 
964 	while (bi.bi_size && frag < MAX_SKB_FRAGS) {
965 		struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
966 
967 		copied += v.bv_len;
968 		truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
969 		__skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
970 					   v.bv_offset, v.bv_len);
971 		bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
972 	}
973 	if (bi.bi_size)
974 		ret = -EMSGSIZE;
975 
976 	shinfo->nr_frags = frag;
977 	from->bvec += bi.bi_idx;
978 	from->nr_segs -= bi.bi_idx;
979 	from->count -= copied;
980 	from->iov_offset = bi.bi_bvec_done;
981 
982 	skb->data_len += copied;
983 	skb->len += copied;
984 	skb->truesize += truesize;
985 
986 	if (sk && sk->sk_type == SOCK_STREAM) {
987 		sk_wmem_queued_add(sk, truesize);
988 		if (!skb_zcopy_pure(skb))
989 			sk_mem_charge(sk, truesize);
990 	} else {
991 		refcount_add(truesize, &skb->sk->sk_wmem_alloc);
992 	}
993 	return ret;
994 }
995 
996 int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
997 {
998 	struct sockaddr_storage __address, *addr = NULL;
999 	struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
1000 	struct msghdr msg;
1001 	struct iovec iov;
1002 	struct socket *sock;
1003 	unsigned msg_flags, cflags;
1004 	int ret, min_ret = 0;
1005 
1006 	sock = sock_from_file(req->file);
1007 	if (unlikely(!sock))
1008 		return -ENOTSOCK;
1009 
1010 	msg.msg_name = NULL;
1011 	msg.msg_control = NULL;
1012 	msg.msg_controllen = 0;
1013 	msg.msg_namelen = 0;
1014 
1015 	if (zc->addr) {
1016 		if (req_has_async_data(req)) {
1017 			struct io_async_msghdr *io = req->async_data;
1018 
1019 			msg.msg_name = addr = &io->addr;
1020 		} else {
1021 			ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address);
1022 			if (unlikely(ret < 0))
1023 				return ret;
1024 			msg.msg_name = (struct sockaddr *)&__address;
1025 			addr = &__address;
1026 		}
1027 		msg.msg_namelen = zc->addr_len;
1028 	}
1029 
1030 	if (!(req->flags & REQ_F_POLLED) &&
1031 	    (zc->flags & IORING_RECVSEND_POLL_FIRST))
1032 		return io_setup_async_addr(req, addr, issue_flags);
1033 
1034 	if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
1035 		ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu,
1036 					(u64)(uintptr_t)zc->buf, zc->len);
1037 		if (unlikely(ret))
1038 			return ret;
1039 	} else {
1040 		ret = import_single_range(WRITE, zc->buf, zc->len, &iov,
1041 					  &msg.msg_iter);
1042 		if (unlikely(ret))
1043 			return ret;
1044 		ret = io_notif_account_mem(zc->notif, zc->len);
1045 		if (unlikely(ret))
1046 			return ret;
1047 	}
1048 
1049 	msg_flags = zc->msg_flags | MSG_ZEROCOPY;
1050 	if (issue_flags & IO_URING_F_NONBLOCK)
1051 		msg_flags |= MSG_DONTWAIT;
1052 	if (msg_flags & MSG_WAITALL)
1053 		min_ret = iov_iter_count(&msg.msg_iter);
1054 
1055 	msg.msg_flags = msg_flags;
1056 	msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
1057 	msg.sg_from_iter = io_sg_from_iter;
1058 	ret = sock_sendmsg(sock, &msg);
1059 
1060 	if (unlikely(ret < min_ret)) {
1061 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1062 			return io_setup_async_addr(req, addr, issue_flags);
1063 
1064 		if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
1065 			zc->len -= ret;
1066 			zc->buf += ret;
1067 			zc->done_io += ret;
1068 			req->flags |= REQ_F_PARTIAL_IO;
1069 			return io_setup_async_addr(req, addr, issue_flags);
1070 		}
1071 		if (ret < 0 && !zc->done_io)
1072 			zc->notif->flags |= REQ_F_CQE_SKIP;
1073 		if (ret == -ERESTARTSYS)
1074 			ret = -EINTR;
1075 		req_set_fail(req);
1076 	}
1077 
1078 	if (ret >= 0)
1079 		ret += zc->done_io;
1080 	else if (zc->done_io)
1081 		ret = zc->done_io;
1082 
1083 	io_notif_flush(zc->notif);
1084 	req->flags &= ~REQ_F_NEED_CLEANUP;
1085 	cflags = ret >= 0 ? IORING_CQE_F_MORE : 0;
1086 	io_req_set_res(req, ret, cflags);
1087 	return IOU_OK;
1088 }
1089 
1090 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1091 {
1092 	struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1093 	unsigned flags;
1094 
1095 	if (sqe->len || sqe->buf_index)
1096 		return -EINVAL;
1097 
1098 	accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1099 	accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1100 	accept->flags = READ_ONCE(sqe->accept_flags);
1101 	accept->nofile = rlimit(RLIMIT_NOFILE);
1102 	flags = READ_ONCE(sqe->ioprio);
1103 	if (flags & ~IORING_ACCEPT_MULTISHOT)
1104 		return -EINVAL;
1105 
1106 	accept->file_slot = READ_ONCE(sqe->file_index);
1107 	if (accept->file_slot) {
1108 		if (accept->flags & SOCK_CLOEXEC)
1109 			return -EINVAL;
1110 		if (flags & IORING_ACCEPT_MULTISHOT &&
1111 		    accept->file_slot != IORING_FILE_INDEX_ALLOC)
1112 			return -EINVAL;
1113 	}
1114 	if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1115 		return -EINVAL;
1116 	if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1117 		accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1118 	if (flags & IORING_ACCEPT_MULTISHOT)
1119 		req->flags |= REQ_F_APOLL_MULTISHOT;
1120 	return 0;
1121 }
1122 
1123 int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1124 {
1125 	struct io_ring_ctx *ctx = req->ctx;
1126 	struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1127 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1128 	unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
1129 	bool fixed = !!accept->file_slot;
1130 	struct file *file;
1131 	int ret, fd;
1132 
1133 retry:
1134 	if (!fixed) {
1135 		fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1136 		if (unlikely(fd < 0))
1137 			return fd;
1138 	}
1139 	file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
1140 			 accept->flags);
1141 	if (IS_ERR(file)) {
1142 		if (!fixed)
1143 			put_unused_fd(fd);
1144 		ret = PTR_ERR(file);
1145 		if (ret == -EAGAIN && force_nonblock) {
1146 			/*
1147 			 * if it's multishot and polled, we don't need to
1148 			 * return EAGAIN to arm the poll infra since it
1149 			 * has already been done
1150 			 */
1151 			if ((req->flags & IO_APOLL_MULTI_POLLED) ==
1152 			    IO_APOLL_MULTI_POLLED)
1153 				ret = IOU_ISSUE_SKIP_COMPLETE;
1154 			return ret;
1155 		}
1156 		if (ret == -ERESTARTSYS)
1157 			ret = -EINTR;
1158 		req_set_fail(req);
1159 	} else if (!fixed) {
1160 		fd_install(fd, file);
1161 		ret = fd;
1162 	} else {
1163 		ret = io_fixed_fd_install(req, issue_flags, file,
1164 						accept->file_slot);
1165 	}
1166 
1167 	if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
1168 		io_req_set_res(req, ret, 0);
1169 		return IOU_OK;
1170 	}
1171 
1172 	if (ret >= 0 &&
1173 	    io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false))
1174 		goto retry;
1175 
1176 	io_req_set_res(req, ret, 0);
1177 	if (req->flags & REQ_F_POLLED)
1178 		return IOU_STOP_MULTISHOT;
1179 	return IOU_OK;
1180 }
1181 
1182 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1183 {
1184 	struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1185 
1186 	if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1187 		return -EINVAL;
1188 
1189 	sock->domain = READ_ONCE(sqe->fd);
1190 	sock->type = READ_ONCE(sqe->off);
1191 	sock->protocol = READ_ONCE(sqe->len);
1192 	sock->file_slot = READ_ONCE(sqe->file_index);
1193 	sock->nofile = rlimit(RLIMIT_NOFILE);
1194 
1195 	sock->flags = sock->type & ~SOCK_TYPE_MASK;
1196 	if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1197 		return -EINVAL;
1198 	if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1199 		return -EINVAL;
1200 	return 0;
1201 }
1202 
1203 int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1204 {
1205 	struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1206 	bool fixed = !!sock->file_slot;
1207 	struct file *file;
1208 	int ret, fd;
1209 
1210 	if (!fixed) {
1211 		fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1212 		if (unlikely(fd < 0))
1213 			return fd;
1214 	}
1215 	file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1216 	if (IS_ERR(file)) {
1217 		if (!fixed)
1218 			put_unused_fd(fd);
1219 		ret = PTR_ERR(file);
1220 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1221 			return -EAGAIN;
1222 		if (ret == -ERESTARTSYS)
1223 			ret = -EINTR;
1224 		req_set_fail(req);
1225 	} else if (!fixed) {
1226 		fd_install(fd, file);
1227 		ret = fd;
1228 	} else {
1229 		ret = io_fixed_fd_install(req, issue_flags, file,
1230 					    sock->file_slot);
1231 	}
1232 	io_req_set_res(req, ret, 0);
1233 	return IOU_OK;
1234 }
1235 
1236 int io_connect_prep_async(struct io_kiocb *req)
1237 {
1238 	struct io_async_connect *io = req->async_data;
1239 	struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1240 
1241 	return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
1242 }
1243 
1244 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1245 {
1246 	struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1247 
1248 	if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1249 		return -EINVAL;
1250 
1251 	conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1252 	conn->addr_len =  READ_ONCE(sqe->addr2);
1253 	return 0;
1254 }
1255 
1256 int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1257 {
1258 	struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1259 	struct io_async_connect __io, *io;
1260 	unsigned file_flags;
1261 	int ret;
1262 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1263 
1264 	if (req_has_async_data(req)) {
1265 		io = req->async_data;
1266 	} else {
1267 		ret = move_addr_to_kernel(connect->addr,
1268 						connect->addr_len,
1269 						&__io.address);
1270 		if (ret)
1271 			goto out;
1272 		io = &__io;
1273 	}
1274 
1275 	file_flags = force_nonblock ? O_NONBLOCK : 0;
1276 
1277 	ret = __sys_connect_file(req->file, &io->address,
1278 					connect->addr_len, file_flags);
1279 	if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
1280 		if (req_has_async_data(req))
1281 			return -EAGAIN;
1282 		if (io_alloc_async_data(req)) {
1283 			ret = -ENOMEM;
1284 			goto out;
1285 		}
1286 		memcpy(req->async_data, &__io, sizeof(__io));
1287 		return -EAGAIN;
1288 	}
1289 	if (ret == -ERESTARTSYS)
1290 		ret = -EINTR;
1291 out:
1292 	if (ret < 0)
1293 		req_set_fail(req);
1294 	io_req_set_res(req, ret, 0);
1295 	return IOU_OK;
1296 }
1297 
1298 void io_netmsg_cache_free(struct io_cache_entry *entry)
1299 {
1300 	kfree(container_of(entry, struct io_async_msghdr, cache));
1301 }
1302 #endif
1303