1f9ead18cSJens Axboe // SPDX-License-Identifier: GPL-2.0 2f9ead18cSJens Axboe #include <linux/kernel.h> 3f9ead18cSJens Axboe #include <linux/errno.h> 4f9ead18cSJens Axboe #include <linux/file.h> 5f9ead18cSJens Axboe #include <linux/slab.h> 6f9ead18cSJens Axboe #include <linux/net.h> 7f9ead18cSJens Axboe #include <linux/compat.h> 8f9ead18cSJens Axboe #include <net/compat.h> 9f9ead18cSJens Axboe #include <linux/io_uring.h> 10f9ead18cSJens Axboe 11f9ead18cSJens Axboe #include <uapi/linux/io_uring.h> 12f9ead18cSJens Axboe 13f9ead18cSJens Axboe #include "io_uring.h" 143b77495aSJens Axboe #include "kbuf.h" 1543e0bbbdSJens Axboe #include "alloc_cache.h" 16f9ead18cSJens Axboe #include "net.h" 1706a5464bSPavel Begunkov #include "notif.h" 1810c7d33eSPavel Begunkov #include "rsrc.h" 19f9ead18cSJens Axboe 20f9ead18cSJens Axboe #if defined(CONFIG_NET) 21f9ead18cSJens Axboe struct io_shutdown { 22f9ead18cSJens Axboe struct file *file; 23f9ead18cSJens Axboe int how; 24f9ead18cSJens Axboe }; 25f9ead18cSJens Axboe 26f9ead18cSJens Axboe struct io_accept { 27f9ead18cSJens Axboe struct file *file; 28f9ead18cSJens Axboe struct sockaddr __user *addr; 29f9ead18cSJens Axboe int __user *addr_len; 30f9ead18cSJens Axboe int flags; 31f9ead18cSJens Axboe u32 file_slot; 32f9ead18cSJens Axboe unsigned long nofile; 33f9ead18cSJens Axboe }; 34f9ead18cSJens Axboe 35f9ead18cSJens Axboe struct io_socket { 36f9ead18cSJens Axboe struct file *file; 37f9ead18cSJens Axboe int domain; 38f9ead18cSJens Axboe int type; 39f9ead18cSJens Axboe int protocol; 40f9ead18cSJens Axboe int flags; 41f9ead18cSJens Axboe u32 file_slot; 42f9ead18cSJens Axboe unsigned long nofile; 43f9ead18cSJens Axboe }; 44f9ead18cSJens Axboe 45f9ead18cSJens Axboe struct io_connect { 46f9ead18cSJens Axboe struct file *file; 47f9ead18cSJens Axboe struct sockaddr __user *addr; 48f9ead18cSJens Axboe int addr_len; 493fb1bd68SJens Axboe bool in_progress; 5074e2e17eSJens Axboe bool seen_econnaborted; 51f9ead18cSJens Axboe }; 52f9ead18cSJens Axboe 53f9ead18cSJens Axboe struct io_sr_msg { 54f9ead18cSJens Axboe struct file *file; 55f9ead18cSJens Axboe union { 56f9ead18cSJens Axboe struct compat_msghdr __user *umsg_compat; 57f9ead18cSJens Axboe struct user_msghdr __user *umsg; 58f9ead18cSJens Axboe void __user *buf; 59f9ead18cSJens Axboe }; 600b048557SPavel Begunkov unsigned len; 610b048557SPavel Begunkov unsigned done_io; 62293402e5SPavel Begunkov unsigned msg_flags; 6322ccf61cSJens Axboe unsigned nr_multishot_loops; 640b048557SPavel Begunkov u16 flags; 65516e82f0SPavel Begunkov /* initialised and used only by !msg send variants */ 660b048557SPavel Begunkov u16 addr_len; 67b00c51efSJens Axboe u16 buf_group; 68092aeedbSPavel Begunkov void __user *addr; 69cac9e441SJens Axboe void __user *msg_control; 70516e82f0SPavel Begunkov /* used only for send zerocopy */ 71b48c312bSPavel Begunkov struct io_kiocb *notif; 7206a5464bSPavel Begunkov }; 7306a5464bSPavel Begunkov 7422ccf61cSJens Axboe /* 7522ccf61cSJens Axboe * Number of times we'll try and do receives if there's more data. If we 7622ccf61cSJens Axboe * exceed this limit, then add us to the back of the queue and retry from 7722ccf61cSJens Axboe * there. This helps fairness between flooding clients. 7822ccf61cSJens Axboe */ 7922ccf61cSJens Axboe #define MULTISHOT_MAX_RETRY 32 8022ccf61cSJens Axboe 8117add5ceSPavel Begunkov static inline bool io_check_multishot(struct io_kiocb *req, 8217add5ceSPavel Begunkov unsigned int issue_flags) 8317add5ceSPavel Begunkov { 8417add5ceSPavel Begunkov /* 8517add5ceSPavel Begunkov * When ->locked_cq is set we only allow to post CQEs from the original 8617add5ceSPavel Begunkov * task context. Usual request completions will be handled in other 8717add5ceSPavel Begunkov * generic paths but multipoll may decide to post extra cqes. 8817add5ceSPavel Begunkov */ 8917add5ceSPavel Begunkov return !(issue_flags & IO_URING_F_IOWQ) || 9017add5ceSPavel Begunkov !(issue_flags & IO_URING_F_MULTISHOT) || 9117add5ceSPavel Begunkov !req->ctx->task_complete; 9217add5ceSPavel Begunkov } 9317add5ceSPavel Begunkov 94f9ead18cSJens Axboe int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 95f9ead18cSJens Axboe { 96f2ccb5aeSStefan Metzmacher struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 97f9ead18cSJens Axboe 98f9ead18cSJens Axboe if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 99f9ead18cSJens Axboe sqe->buf_index || sqe->splice_fd_in)) 100f9ead18cSJens Axboe return -EINVAL; 101f9ead18cSJens Axboe 102f9ead18cSJens Axboe shutdown->how = READ_ONCE(sqe->len); 103aebb224fSDylan Yudaken req->flags |= REQ_F_FORCE_ASYNC; 104f9ead18cSJens Axboe return 0; 105f9ead18cSJens Axboe } 106f9ead18cSJens Axboe 107f9ead18cSJens Axboe int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 108f9ead18cSJens Axboe { 109f2ccb5aeSStefan Metzmacher struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 110f9ead18cSJens Axboe struct socket *sock; 111f9ead18cSJens Axboe int ret; 112f9ead18cSJens Axboe 113aebb224fSDylan Yudaken WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 114f9ead18cSJens Axboe 115f9ead18cSJens Axboe sock = sock_from_file(req->file); 116f9ead18cSJens Axboe if (unlikely(!sock)) 117f9ead18cSJens Axboe return -ENOTSOCK; 118f9ead18cSJens Axboe 119f9ead18cSJens Axboe ret = __sys_shutdown_sock(sock, shutdown->how); 120f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 121f9ead18cSJens Axboe return IOU_OK; 122f9ead18cSJens Axboe } 123f9ead18cSJens Axboe 124f9ead18cSJens Axboe static bool io_net_retry(struct socket *sock, int flags) 125f9ead18cSJens Axboe { 126f9ead18cSJens Axboe if (!(flags & MSG_WAITALL)) 127f9ead18cSJens Axboe return false; 128f9ead18cSJens Axboe return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 129f9ead18cSJens Axboe } 130f9ead18cSJens Axboe 13143e0bbbdSJens Axboe static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 13243e0bbbdSJens Axboe { 13343e0bbbdSJens Axboe struct io_async_msghdr *hdr = req->async_data; 13443e0bbbdSJens Axboe 13506360426SPavel Begunkov if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED) 13643e0bbbdSJens Axboe return; 13743e0bbbdSJens Axboe 13843e0bbbdSJens Axboe /* Let normal cleanup path reap it if we fail adding to the cache */ 13943e0bbbdSJens Axboe if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) { 14043e0bbbdSJens Axboe req->async_data = NULL; 14143e0bbbdSJens Axboe req->flags &= ~REQ_F_ASYNC_DATA; 14243e0bbbdSJens Axboe } 14343e0bbbdSJens Axboe } 14443e0bbbdSJens Axboe 145858c293eSPavel Begunkov static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req, 14643e0bbbdSJens Axboe unsigned int issue_flags) 14743e0bbbdSJens Axboe { 14843e0bbbdSJens Axboe struct io_ring_ctx *ctx = req->ctx; 14943e0bbbdSJens Axboe struct io_cache_entry *entry; 1504c17a496SPavel Begunkov struct io_async_msghdr *hdr; 15143e0bbbdSJens Axboe 152df730ec2SXinghui Li if (!(issue_flags & IO_URING_F_UNLOCKED)) { 153df730ec2SXinghui Li entry = io_alloc_cache_get(&ctx->netmsg_cache); 154df730ec2SXinghui Li if (entry) { 15543e0bbbdSJens Axboe hdr = container_of(entry, struct io_async_msghdr, cache); 1564c17a496SPavel Begunkov hdr->free_iov = NULL; 15743e0bbbdSJens Axboe req->flags |= REQ_F_ASYNC_DATA; 15843e0bbbdSJens Axboe req->async_data = hdr; 15943e0bbbdSJens Axboe return hdr; 16043e0bbbdSJens Axboe } 161df730ec2SXinghui Li } 16243e0bbbdSJens Axboe 1634c17a496SPavel Begunkov if (!io_alloc_async_data(req)) { 1644c17a496SPavel Begunkov hdr = req->async_data; 1654c17a496SPavel Begunkov hdr->free_iov = NULL; 1664c17a496SPavel Begunkov return hdr; 1674c17a496SPavel Begunkov } 16843e0bbbdSJens Axboe return NULL; 16943e0bbbdSJens Axboe } 17043e0bbbdSJens Axboe 171858c293eSPavel Begunkov static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req) 172858c293eSPavel Begunkov { 173858c293eSPavel Begunkov /* ->prep_async is always called from the submission context */ 174858c293eSPavel Begunkov return io_msg_alloc_async(req, 0); 175858c293eSPavel Begunkov } 176858c293eSPavel Begunkov 177f9ead18cSJens Axboe static int io_setup_async_msg(struct io_kiocb *req, 17843e0bbbdSJens Axboe struct io_async_msghdr *kmsg, 17943e0bbbdSJens Axboe unsigned int issue_flags) 180f9ead18cSJens Axboe { 1813f743e9bSPavel Begunkov struct io_async_msghdr *async_msg; 182f9ead18cSJens Axboe 1833f743e9bSPavel Begunkov if (req_has_async_data(req)) 184f9ead18cSJens Axboe return -EAGAIN; 185858c293eSPavel Begunkov async_msg = io_msg_alloc_async(req, issue_flags); 18643e0bbbdSJens Axboe if (!async_msg) { 187f9ead18cSJens Axboe kfree(kmsg->free_iov); 188f9ead18cSJens Axboe return -ENOMEM; 189f9ead18cSJens Axboe } 190f9ead18cSJens Axboe req->flags |= REQ_F_NEED_CLEANUP; 191f9ead18cSJens Axboe memcpy(async_msg, kmsg, sizeof(*kmsg)); 1926f10ae8aSPavel Begunkov if (async_msg->msg.msg_name) 193f9ead18cSJens Axboe async_msg->msg.msg_name = &async_msg->addr; 194c21a8027SPavel Begunkov 195c21a8027SPavel Begunkov if ((req->flags & REQ_F_BUFFER_SELECT) && !async_msg->msg.msg_iter.nr_segs) 196c21a8027SPavel Begunkov return -EAGAIN; 197c21a8027SPavel Begunkov 198f9ead18cSJens Axboe /* if were using fast_iov, set it to the new one */ 1994b61152eSJens Axboe if (iter_is_iovec(&kmsg->msg.msg_iter) && !kmsg->free_iov) { 200de4f5fedSJens Axboe size_t fast_idx = iter_iov(&kmsg->msg.msg_iter) - kmsg->fast_iov; 201de4f5fedSJens Axboe async_msg->msg.msg_iter.__iov = &async_msg->fast_iov[fast_idx]; 2023e4cb6ebSStefan Metzmacher } 203f9ead18cSJens Axboe 204f9ead18cSJens Axboe return -EAGAIN; 205f9ead18cSJens Axboe } 206f9ead18cSJens Axboe 2071a033109SJens Axboe #ifdef CONFIG_COMPAT 20851d28472SJens Axboe static int io_compat_msg_copy_hdr(struct io_kiocb *req, 2091a033109SJens Axboe struct io_async_msghdr *iomsg, 21051d28472SJens Axboe struct compat_msghdr *msg, int ddir) 2111a033109SJens Axboe { 2121a033109SJens Axboe struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 2131a033109SJens Axboe struct compat_iovec __user *uiov; 2141a033109SJens Axboe int ret; 2151a033109SJens Axboe 21651d28472SJens Axboe if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 2171a033109SJens Axboe return -EFAULT; 2181a033109SJens Axboe 21951d28472SJens Axboe uiov = compat_ptr(msg->msg_iov); 2201a033109SJens Axboe if (req->flags & REQ_F_BUFFER_SELECT) { 2211a033109SJens Axboe compat_ssize_t clen; 2221a033109SJens Axboe 2231a033109SJens Axboe iomsg->free_iov = NULL; 22451d28472SJens Axboe if (msg->msg_iovlen == 0) { 2251a033109SJens Axboe sr->len = 0; 22651d28472SJens Axboe } else if (msg->msg_iovlen > 1) { 2271a033109SJens Axboe return -EINVAL; 2281a033109SJens Axboe } else { 2291a033109SJens Axboe if (!access_ok(uiov, sizeof(*uiov))) 2301a033109SJens Axboe return -EFAULT; 2311a033109SJens Axboe if (__get_user(clen, &uiov->iov_len)) 2321a033109SJens Axboe return -EFAULT; 2331a033109SJens Axboe if (clen < 0) 2341a033109SJens Axboe return -EINVAL; 2351a033109SJens Axboe sr->len = clen; 2361a033109SJens Axboe } 2371a033109SJens Axboe 2381a033109SJens Axboe return 0; 2391a033109SJens Axboe } 2401a033109SJens Axboe 2411a033109SJens Axboe iomsg->free_iov = iomsg->fast_iov; 24251d28472SJens Axboe ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen, 2431a033109SJens Axboe UIO_FASTIOV, &iomsg->free_iov, 2441a033109SJens Axboe &iomsg->msg.msg_iter, true); 2451a033109SJens Axboe if (unlikely(ret < 0)) 2461a033109SJens Axboe return ret; 2471a033109SJens Axboe 2481a033109SJens Axboe return 0; 2491a033109SJens Axboe } 2501a033109SJens Axboe #endif 2511a033109SJens Axboe 25251d28472SJens Axboe static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 25351d28472SJens Axboe struct user_msghdr *msg, int ddir) 2541a033109SJens Axboe { 2551a033109SJens Axboe struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 2561a033109SJens Axboe int ret; 2571a033109SJens Axboe 25851d28472SJens Axboe if (copy_from_user(msg, sr->umsg, sizeof(*sr->umsg))) 2591a033109SJens Axboe return -EFAULT; 2601a033109SJens Axboe 2611a033109SJens Axboe if (req->flags & REQ_F_BUFFER_SELECT) { 26251d28472SJens Axboe if (msg->msg_iovlen == 0) { 2631a033109SJens Axboe sr->len = iomsg->fast_iov[0].iov_len = 0; 2641a033109SJens Axboe iomsg->fast_iov[0].iov_base = NULL; 2651a033109SJens Axboe iomsg->free_iov = NULL; 26651d28472SJens Axboe } else if (msg->msg_iovlen > 1) { 2671a033109SJens Axboe return -EINVAL; 2681a033109SJens Axboe } else { 26951d28472SJens Axboe if (copy_from_user(iomsg->fast_iov, msg->msg_iov, 27051d28472SJens Axboe sizeof(*msg->msg_iov))) 2711a033109SJens Axboe return -EFAULT; 2721a033109SJens Axboe sr->len = iomsg->fast_iov[0].iov_len; 2731a033109SJens Axboe iomsg->free_iov = NULL; 2741a033109SJens Axboe } 2751a033109SJens Axboe 2761a033109SJens Axboe return 0; 2771a033109SJens Axboe } 2781a033109SJens Axboe 2791a033109SJens Axboe iomsg->free_iov = iomsg->fast_iov; 28051d28472SJens Axboe ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, UIO_FASTIOV, 2811a033109SJens Axboe &iomsg->free_iov, &iomsg->msg.msg_iter, false); 2821a033109SJens Axboe if (unlikely(ret < 0)) 2831a033109SJens Axboe return ret; 2841a033109SJens Axboe 2851a033109SJens Axboe return 0; 2861a033109SJens Axboe } 2871a033109SJens Axboe 288f9ead18cSJens Axboe static int io_sendmsg_copy_hdr(struct io_kiocb *req, 289f9ead18cSJens Axboe struct io_async_msghdr *iomsg) 290f9ead18cSJens Axboe { 291f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 29251d28472SJens Axboe struct user_msghdr msg; 293cac9e441SJens Axboe int ret; 294f9ead18cSJens Axboe 29551d28472SJens Axboe iomsg->msg.msg_name = &iomsg->addr; 29651d28472SJens Axboe iomsg->msg.msg_iter.nr_segs = 0; 29751d28472SJens Axboe 29851d28472SJens Axboe #ifdef CONFIG_COMPAT 29951d28472SJens Axboe if (unlikely(req->ctx->compat)) { 30051d28472SJens Axboe struct compat_msghdr cmsg; 30151d28472SJens Axboe 30251d28472SJens Axboe ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE); 30351d28472SJens Axboe if (unlikely(ret)) 3041a033109SJens Axboe return ret; 3051a033109SJens Axboe 30651d28472SJens Axboe return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); 30751d28472SJens Axboe } 30851d28472SJens Axboe #endif 30951d28472SJens Axboe 31051d28472SJens Axboe ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE); 31151d28472SJens Axboe if (unlikely(ret)) 31251d28472SJens Axboe return ret; 31351d28472SJens Axboe 31451d28472SJens Axboe ret = __copy_msghdr(&iomsg->msg, &msg, NULL); 31551d28472SJens Axboe 316cac9e441SJens Axboe /* save msg_control as sys_sendmsg() overwrites it */ 31726fed836SJens Axboe sr->msg_control = iomsg->msg.msg_control_user; 318cac9e441SJens Axboe return ret; 319f9ead18cSJens Axboe } 320f9ead18cSJens Axboe 321516e82f0SPavel Begunkov int io_send_prep_async(struct io_kiocb *req) 322581711c4SPavel Begunkov { 323ac9e5784SPavel Begunkov struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 324581711c4SPavel Begunkov struct io_async_msghdr *io; 325581711c4SPavel Begunkov int ret; 326581711c4SPavel Begunkov 327581711c4SPavel Begunkov if (!zc->addr || req_has_async_data(req)) 328581711c4SPavel Begunkov return 0; 3296bf8ad25SPavel Begunkov io = io_msg_alloc_async_prep(req); 3306bf8ad25SPavel Begunkov if (!io) 331581711c4SPavel Begunkov return -ENOMEM; 332581711c4SPavel Begunkov ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr); 333581711c4SPavel Begunkov return ret; 334581711c4SPavel Begunkov } 335581711c4SPavel Begunkov 336581711c4SPavel Begunkov static int io_setup_async_addr(struct io_kiocb *req, 3376ae61b7aSPavel Begunkov struct sockaddr_storage *addr_storage, 338581711c4SPavel Begunkov unsigned int issue_flags) 339581711c4SPavel Begunkov { 3406ae61b7aSPavel Begunkov struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 341581711c4SPavel Begunkov struct io_async_msghdr *io; 342581711c4SPavel Begunkov 3436ae61b7aSPavel Begunkov if (!sr->addr || req_has_async_data(req)) 344581711c4SPavel Begunkov return -EAGAIN; 3456bf8ad25SPavel Begunkov io = io_msg_alloc_async(req, issue_flags); 3466bf8ad25SPavel Begunkov if (!io) 347581711c4SPavel Begunkov return -ENOMEM; 3486ae61b7aSPavel Begunkov memcpy(&io->addr, addr_storage, sizeof(io->addr)); 349581711c4SPavel Begunkov return -EAGAIN; 350581711c4SPavel Begunkov } 351581711c4SPavel Begunkov 352f9ead18cSJens Axboe int io_sendmsg_prep_async(struct io_kiocb *req) 353f9ead18cSJens Axboe { 354f9ead18cSJens Axboe int ret; 355f9ead18cSJens Axboe 356858c293eSPavel Begunkov if (!io_msg_alloc_async_prep(req)) 357858c293eSPavel Begunkov return -ENOMEM; 358f9ead18cSJens Axboe ret = io_sendmsg_copy_hdr(req, req->async_data); 359f9ead18cSJens Axboe if (!ret) 360f9ead18cSJens Axboe req->flags |= REQ_F_NEED_CLEANUP; 361f9ead18cSJens Axboe return ret; 362f9ead18cSJens Axboe } 363f9ead18cSJens Axboe 364f9ead18cSJens Axboe void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 365f9ead18cSJens Axboe { 366f9ead18cSJens Axboe struct io_async_msghdr *io = req->async_data; 367f9ead18cSJens Axboe 368f9ead18cSJens Axboe kfree(io->free_iov); 369f9ead18cSJens Axboe } 370f9ead18cSJens Axboe 371f9ead18cSJens Axboe int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 372f9ead18cSJens Axboe { 373f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 374f9ead18cSJens Axboe 375516e82f0SPavel Begunkov if (req->opcode == IORING_OP_SEND) { 376516e82f0SPavel Begunkov if (READ_ONCE(sqe->__pad3[0])) 377f9ead18cSJens Axboe return -EINVAL; 378516e82f0SPavel Begunkov sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 379516e82f0SPavel Begunkov sr->addr_len = READ_ONCE(sqe->addr_len); 380516e82f0SPavel Begunkov } else if (sqe->addr2 || sqe->file_index) { 381516e82f0SPavel Begunkov return -EINVAL; 382516e82f0SPavel Begunkov } 383f9ead18cSJens Axboe 384f9ead18cSJens Axboe sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 385f9ead18cSJens Axboe sr->len = READ_ONCE(sqe->len); 386f9ead18cSJens Axboe sr->flags = READ_ONCE(sqe->ioprio); 387f9ead18cSJens Axboe if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) 388f9ead18cSJens Axboe return -EINVAL; 389f9ead18cSJens Axboe sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 390f9ead18cSJens Axboe if (sr->msg_flags & MSG_DONTWAIT) 391f9ead18cSJens Axboe req->flags |= REQ_F_NOWAIT; 392f9ead18cSJens Axboe 393f9ead18cSJens Axboe #ifdef CONFIG_COMPAT 394f9ead18cSJens Axboe if (req->ctx->compat) 395f9ead18cSJens Axboe sr->msg_flags |= MSG_CMSG_COMPAT; 396f9ead18cSJens Axboe #endif 397f9ead18cSJens Axboe sr->done_io = 0; 398f9ead18cSJens Axboe return 0; 399f9ead18cSJens Axboe } 400f9ead18cSJens Axboe 401f9ead18cSJens Axboe int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 402f9ead18cSJens Axboe { 403f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 404f9ead18cSJens Axboe struct io_async_msghdr iomsg, *kmsg; 405f9ead18cSJens Axboe struct socket *sock; 406f9ead18cSJens Axboe unsigned flags; 407f9ead18cSJens Axboe int min_ret = 0; 408f9ead18cSJens Axboe int ret; 409f9ead18cSJens Axboe 410f9ead18cSJens Axboe sock = sock_from_file(req->file); 411f9ead18cSJens Axboe if (unlikely(!sock)) 412f9ead18cSJens Axboe return -ENOTSOCK; 413f9ead18cSJens Axboe 414f9ead18cSJens Axboe if (req_has_async_data(req)) { 415f9ead18cSJens Axboe kmsg = req->async_data; 41626fed836SJens Axboe kmsg->msg.msg_control_user = sr->msg_control; 417f9ead18cSJens Axboe } else { 418f9ead18cSJens Axboe ret = io_sendmsg_copy_hdr(req, &iomsg); 419f9ead18cSJens Axboe if (ret) 420f9ead18cSJens Axboe return ret; 421f9ead18cSJens Axboe kmsg = &iomsg; 422f9ead18cSJens Axboe } 423f9ead18cSJens Axboe 424f9ead18cSJens Axboe if (!(req->flags & REQ_F_POLLED) && 425f9ead18cSJens Axboe (sr->flags & IORING_RECVSEND_POLL_FIRST)) 42643e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 427f9ead18cSJens Axboe 428f9ead18cSJens Axboe flags = sr->msg_flags; 429f9ead18cSJens Axboe if (issue_flags & IO_URING_F_NONBLOCK) 430f9ead18cSJens Axboe flags |= MSG_DONTWAIT; 431f9ead18cSJens Axboe if (flags & MSG_WAITALL) 432f9ead18cSJens Axboe min_ret = iov_iter_count(&kmsg->msg.msg_iter); 433f9ead18cSJens Axboe 434f9ead18cSJens Axboe ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 435f9ead18cSJens Axboe 436f9ead18cSJens Axboe if (ret < min_ret) { 437f9ead18cSJens Axboe if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 43843e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 439f9ead18cSJens Axboe if (ret > 0 && io_net_retry(sock, flags)) { 440b1dc4920SJens Axboe kmsg->msg.msg_controllen = 0; 441b1dc4920SJens Axboe kmsg->msg.msg_control = NULL; 442f9ead18cSJens Axboe sr->done_io += ret; 443f9ead18cSJens Axboe req->flags |= REQ_F_PARTIAL_IO; 44443e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 445f9ead18cSJens Axboe } 44695eafc74SPavel Begunkov if (ret == -ERESTARTSYS) 44795eafc74SPavel Begunkov ret = -EINTR; 448f9ead18cSJens Axboe req_set_fail(req); 449f9ead18cSJens Axboe } 450f9ead18cSJens Axboe /* fast path, check for non-NULL to avoid function call */ 451f9ead18cSJens Axboe if (kmsg->free_iov) 452f9ead18cSJens Axboe kfree(kmsg->free_iov); 453f9ead18cSJens Axboe req->flags &= ~REQ_F_NEED_CLEANUP; 45443e0bbbdSJens Axboe io_netmsg_recycle(req, issue_flags); 455f9ead18cSJens Axboe if (ret >= 0) 456f9ead18cSJens Axboe ret += sr->done_io; 457f9ead18cSJens Axboe else if (sr->done_io) 458f9ead18cSJens Axboe ret = sr->done_io; 459f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 460f9ead18cSJens Axboe return IOU_OK; 461f9ead18cSJens Axboe } 462f9ead18cSJens Axboe 463f9ead18cSJens Axboe int io_send(struct io_kiocb *req, unsigned int issue_flags) 464f9ead18cSJens Axboe { 465516e82f0SPavel Begunkov struct sockaddr_storage __address; 466f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 467f9ead18cSJens Axboe struct msghdr msg; 468f9ead18cSJens Axboe struct socket *sock; 469f9ead18cSJens Axboe unsigned flags; 470f9ead18cSJens Axboe int min_ret = 0; 471f9ead18cSJens Axboe int ret; 472f9ead18cSJens Axboe 47304360d3eSPavel Begunkov msg.msg_name = NULL; 47404360d3eSPavel Begunkov msg.msg_control = NULL; 47504360d3eSPavel Begunkov msg.msg_controllen = 0; 47604360d3eSPavel Begunkov msg.msg_namelen = 0; 47704360d3eSPavel Begunkov msg.msg_ubuf = NULL; 47804360d3eSPavel Begunkov 479516e82f0SPavel Begunkov if (sr->addr) { 480516e82f0SPavel Begunkov if (req_has_async_data(req)) { 481516e82f0SPavel Begunkov struct io_async_msghdr *io = req->async_data; 482516e82f0SPavel Begunkov 483516e82f0SPavel Begunkov msg.msg_name = &io->addr; 484516e82f0SPavel Begunkov } else { 485516e82f0SPavel Begunkov ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address); 486516e82f0SPavel Begunkov if (unlikely(ret < 0)) 487516e82f0SPavel Begunkov return ret; 488516e82f0SPavel Begunkov msg.msg_name = (struct sockaddr *)&__address; 489516e82f0SPavel Begunkov } 490516e82f0SPavel Begunkov msg.msg_namelen = sr->addr_len; 491516e82f0SPavel Begunkov } 492516e82f0SPavel Begunkov 493f9ead18cSJens Axboe if (!(req->flags & REQ_F_POLLED) && 494f9ead18cSJens Axboe (sr->flags & IORING_RECVSEND_POLL_FIRST)) 495516e82f0SPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 496f9ead18cSJens Axboe 497f9ead18cSJens Axboe sock = sock_from_file(req->file); 498f9ead18cSJens Axboe if (unlikely(!sock)) 499f9ead18cSJens Axboe return -ENOTSOCK; 500f9ead18cSJens Axboe 5014b61152eSJens Axboe ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &msg.msg_iter); 502f9ead18cSJens Axboe if (unlikely(ret)) 503f9ead18cSJens Axboe return ret; 504f9ead18cSJens Axboe 505f9ead18cSJens Axboe flags = sr->msg_flags; 506f9ead18cSJens Axboe if (issue_flags & IO_URING_F_NONBLOCK) 507f9ead18cSJens Axboe flags |= MSG_DONTWAIT; 508f9ead18cSJens Axboe if (flags & MSG_WAITALL) 509f9ead18cSJens Axboe min_ret = iov_iter_count(&msg.msg_iter); 510f9ead18cSJens Axboe 511b841b901SDavid Howells flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 512f9ead18cSJens Axboe msg.msg_flags = flags; 513f9ead18cSJens Axboe ret = sock_sendmsg(sock, &msg); 514f9ead18cSJens Axboe if (ret < min_ret) { 515f9ead18cSJens Axboe if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 516516e82f0SPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 517516e82f0SPavel Begunkov 518f9ead18cSJens Axboe if (ret > 0 && io_net_retry(sock, flags)) { 519f9ead18cSJens Axboe sr->len -= ret; 520f9ead18cSJens Axboe sr->buf += ret; 521f9ead18cSJens Axboe sr->done_io += ret; 522f9ead18cSJens Axboe req->flags |= REQ_F_PARTIAL_IO; 523516e82f0SPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 524f9ead18cSJens Axboe } 52595eafc74SPavel Begunkov if (ret == -ERESTARTSYS) 52695eafc74SPavel Begunkov ret = -EINTR; 527f9ead18cSJens Axboe req_set_fail(req); 528f9ead18cSJens Axboe } 529f9ead18cSJens Axboe if (ret >= 0) 530f9ead18cSJens Axboe ret += sr->done_io; 531f9ead18cSJens Axboe else if (sr->done_io) 532f9ead18cSJens Axboe ret = sr->done_io; 533f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 534f9ead18cSJens Axboe return IOU_OK; 535f9ead18cSJens Axboe } 536f9ead18cSJens Axboe 53751d28472SJens Axboe static int io_recvmsg_mshot_prep(struct io_kiocb *req, 53851d28472SJens Axboe struct io_async_msghdr *iomsg, 539*f263cf16SMuhammad Usama Anjum int namelen, size_t controllen) 54051d28472SJens Axboe { 54151d28472SJens Axboe if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 54251d28472SJens Axboe (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 54351d28472SJens Axboe int hdr; 54451d28472SJens Axboe 54551d28472SJens Axboe if (unlikely(namelen < 0)) 54651d28472SJens Axboe return -EOVERFLOW; 54759a53469SDan Carpenter if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 54851d28472SJens Axboe namelen, &hdr)) 54951d28472SJens Axboe return -EOVERFLOW; 55059a53469SDan Carpenter if (check_add_overflow(hdr, controllen, &hdr)) 55151d28472SJens Axboe return -EOVERFLOW; 55251d28472SJens Axboe 55351d28472SJens Axboe iomsg->namelen = namelen; 55451d28472SJens Axboe iomsg->controllen = controllen; 55551d28472SJens Axboe return 0; 55651d28472SJens Axboe } 55751d28472SJens Axboe 55851d28472SJens Axboe return 0; 55951d28472SJens Axboe } 56051d28472SJens Axboe 561f9ead18cSJens Axboe static int io_recvmsg_copy_hdr(struct io_kiocb *req, 562f9ead18cSJens Axboe struct io_async_msghdr *iomsg) 563f9ead18cSJens Axboe { 56451d28472SJens Axboe struct user_msghdr msg; 56551d28472SJens Axboe int ret; 56651d28472SJens Axboe 56751d28472SJens Axboe iomsg->msg.msg_name = &iomsg->addr; 56851d28472SJens Axboe iomsg->msg.msg_iter.nr_segs = 0; 56951d28472SJens Axboe 57051d28472SJens Axboe #ifdef CONFIG_COMPAT 57151d28472SJens Axboe if (unlikely(req->ctx->compat)) { 57251d28472SJens Axboe struct compat_msghdr cmsg; 57351d28472SJens Axboe 57451d28472SJens Axboe ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST); 57551d28472SJens Axboe if (unlikely(ret)) 57651d28472SJens Axboe return ret; 57751d28472SJens Axboe 57851d28472SJens Axboe ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr); 57951d28472SJens Axboe if (unlikely(ret)) 58051d28472SJens Axboe return ret; 58151d28472SJens Axboe 58251d28472SJens Axboe return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen, 58351d28472SJens Axboe cmsg.msg_controllen); 58451d28472SJens Axboe } 58551d28472SJens Axboe #endif 58651d28472SJens Axboe 58751d28472SJens Axboe ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST); 58851d28472SJens Axboe if (unlikely(ret)) 58951d28472SJens Axboe return ret; 59051d28472SJens Axboe 59151d28472SJens Axboe ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 59251d28472SJens Axboe if (unlikely(ret)) 59351d28472SJens Axboe return ret; 59451d28472SJens Axboe 59551d28472SJens Axboe return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 59651d28472SJens Axboe msg.msg_controllen); 597f9ead18cSJens Axboe } 598f9ead18cSJens Axboe 599f9ead18cSJens Axboe int io_recvmsg_prep_async(struct io_kiocb *req) 600f9ead18cSJens Axboe { 6011a033109SJens Axboe struct io_async_msghdr *iomsg; 602f9ead18cSJens Axboe int ret; 603f9ead18cSJens Axboe 604858c293eSPavel Begunkov if (!io_msg_alloc_async_prep(req)) 605858c293eSPavel Begunkov return -ENOMEM; 6061a033109SJens Axboe iomsg = req->async_data; 6071a033109SJens Axboe ret = io_recvmsg_copy_hdr(req, iomsg); 608f9ead18cSJens Axboe if (!ret) 609f9ead18cSJens Axboe req->flags |= REQ_F_NEED_CLEANUP; 610f9ead18cSJens Axboe return ret; 611f9ead18cSJens Axboe } 612f9ead18cSJens Axboe 613b3fdea6eSDylan Yudaken #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT) 614b3fdea6eSDylan Yudaken 615f9ead18cSJens Axboe int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 616f9ead18cSJens Axboe { 617f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 618f9ead18cSJens Axboe 619f9ead18cSJens Axboe if (unlikely(sqe->file_index || sqe->addr2)) 620f9ead18cSJens Axboe return -EINVAL; 621f9ead18cSJens Axboe 622f9ead18cSJens Axboe sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 623f9ead18cSJens Axboe sr->len = READ_ONCE(sqe->len); 624f9ead18cSJens Axboe sr->flags = READ_ONCE(sqe->ioprio); 625b3fdea6eSDylan Yudaken if (sr->flags & ~(RECVMSG_FLAGS)) 626f9ead18cSJens Axboe return -EINVAL; 6277605c43dSDavid Lamparter sr->msg_flags = READ_ONCE(sqe->msg_flags); 628f9ead18cSJens Axboe if (sr->msg_flags & MSG_DONTWAIT) 629f9ead18cSJens Axboe req->flags |= REQ_F_NOWAIT; 630f9ead18cSJens Axboe if (sr->msg_flags & MSG_ERRQUEUE) 631f9ead18cSJens Axboe req->flags |= REQ_F_CLEAR_POLLIN; 632b3fdea6eSDylan Yudaken if (sr->flags & IORING_RECV_MULTISHOT) { 633b3fdea6eSDylan Yudaken if (!(req->flags & REQ_F_BUFFER_SELECT)) 634b3fdea6eSDylan Yudaken return -EINVAL; 635b3fdea6eSDylan Yudaken if (sr->msg_flags & MSG_WAITALL) 636b3fdea6eSDylan Yudaken return -EINVAL; 637b3fdea6eSDylan Yudaken if (req->opcode == IORING_OP_RECV && sr->len) 638b3fdea6eSDylan Yudaken return -EINVAL; 639b3fdea6eSDylan Yudaken req->flags |= REQ_F_APOLL_MULTISHOT; 640b00c51efSJens Axboe /* 641b00c51efSJens Axboe * Store the buffer group for this multishot receive separately, 642b00c51efSJens Axboe * as if we end up doing an io-wq based issue that selects a 643b00c51efSJens Axboe * buffer, it has to be committed immediately and that will 644b00c51efSJens Axboe * clear ->buf_list. This means we lose the link to the buffer 645b00c51efSJens Axboe * list, and the eventual buffer put on completion then cannot 646b00c51efSJens Axboe * restore it. 647b00c51efSJens Axboe */ 648b00c51efSJens Axboe sr->buf_group = req->buf_index; 649b3fdea6eSDylan Yudaken } 650f9ead18cSJens Axboe 651f9ead18cSJens Axboe #ifdef CONFIG_COMPAT 652f9ead18cSJens Axboe if (req->ctx->compat) 653f9ead18cSJens Axboe sr->msg_flags |= MSG_CMSG_COMPAT; 654f9ead18cSJens Axboe #endif 655f9ead18cSJens Axboe sr->done_io = 0; 65622ccf61cSJens Axboe sr->nr_multishot_loops = 0; 657f9ead18cSJens Axboe return 0; 658f9ead18cSJens Axboe } 659f9ead18cSJens Axboe 660b3fdea6eSDylan Yudaken static inline void io_recv_prep_retry(struct io_kiocb *req) 661b3fdea6eSDylan Yudaken { 662f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 663b3fdea6eSDylan Yudaken 664b3fdea6eSDylan Yudaken sr->done_io = 0; 665b3fdea6eSDylan Yudaken sr->len = 0; /* get from the provided buffer */ 666b00c51efSJens Axboe req->buf_index = sr->buf_group; 667b3fdea6eSDylan Yudaken } 668b3fdea6eSDylan Yudaken 669b3fdea6eSDylan Yudaken /* 6709bb66906SDylan Yudaken * Finishes io_recv and io_recvmsg. 671b3fdea6eSDylan Yudaken * 672b3fdea6eSDylan Yudaken * Returns true if it is actually finished, or false if it should run 673b3fdea6eSDylan Yudaken * again (for multishot). 674b3fdea6eSDylan Yudaken */ 6759bb66906SDylan Yudaken static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 6767d41bcb7SJens Axboe struct msghdr *msg, bool mshot_finished, 677100d6b17SPavel Begunkov unsigned issue_flags) 678b3fdea6eSDylan Yudaken { 6797d41bcb7SJens Axboe unsigned int cflags; 6807d41bcb7SJens Axboe 6817d41bcb7SJens Axboe cflags = io_put_kbuf(req, issue_flags); 682b65db921SJens Axboe if (msg->msg_inq && msg->msg_inq != -1) 6837d41bcb7SJens Axboe cflags |= IORING_CQE_F_SOCK_NONEMPTY; 6847d41bcb7SJens Axboe 685b3fdea6eSDylan Yudaken if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 686b3fdea6eSDylan Yudaken io_req_set_res(req, *ret, cflags); 687b3fdea6eSDylan Yudaken *ret = IOU_OK; 688b3fdea6eSDylan Yudaken return true; 689b3fdea6eSDylan Yudaken } 690b3fdea6eSDylan Yudaken 691eac52bfeSJens Axboe if (mshot_finished) 692eac52bfeSJens Axboe goto finish; 693eac52bfeSJens Axboe 694eac52bfeSJens Axboe /* 695eac52bfeSJens Axboe * Fill CQE for this receive and see if we should keep trying to 696eac52bfeSJens Axboe * receive from this socket. 697eac52bfeSJens Axboe */ 698b6b2bb58SPavel Begunkov if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, 699b6b2bb58SPavel Begunkov *ret, cflags | IORING_CQE_F_MORE)) { 70022ccf61cSJens Axboe struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 70122ccf61cSJens Axboe int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE; 70222ccf61cSJens Axboe 703b3fdea6eSDylan Yudaken io_recv_prep_retry(req); 704a2741c58SJens Axboe /* Known not-empty or unknown state, retry */ 70522ccf61cSJens Axboe if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) { 70622ccf61cSJens Axboe if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY) 707b3fdea6eSDylan Yudaken return false; 70822ccf61cSJens Axboe /* mshot retries exceeded, force a requeue */ 70922ccf61cSJens Axboe sr->nr_multishot_loops = 0; 71022ccf61cSJens Axboe mshot_retry_ret = IOU_REQUEUE; 71122ccf61cSJens Axboe } 712a2741c58SJens Axboe if (issue_flags & IO_URING_F_MULTISHOT) 71322ccf61cSJens Axboe *ret = mshot_retry_ret; 714a2741c58SJens Axboe else 715a2741c58SJens Axboe *ret = -EAGAIN; 716a2741c58SJens Axboe return true; 717b3fdea6eSDylan Yudaken } 718e2ad599dSDylan Yudaken /* Otherwise stop multishot but use the current result. */ 719eac52bfeSJens Axboe finish: 720b3fdea6eSDylan Yudaken io_req_set_res(req, *ret, cflags); 721b3fdea6eSDylan Yudaken 722100d6b17SPavel Begunkov if (issue_flags & IO_URING_F_MULTISHOT) 723b3fdea6eSDylan Yudaken *ret = IOU_STOP_MULTISHOT; 724e2df2ccbSDylan Yudaken else 725e2df2ccbSDylan Yudaken *ret = IOU_OK; 726b3fdea6eSDylan Yudaken return true; 727b3fdea6eSDylan Yudaken } 728b3fdea6eSDylan Yudaken 7299bb66906SDylan Yudaken static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 7309bb66906SDylan Yudaken struct io_sr_msg *sr, void __user **buf, 7319bb66906SDylan Yudaken size_t *len) 7329bb66906SDylan Yudaken { 7339bb66906SDylan Yudaken unsigned long ubuf = (unsigned long) *buf; 7349bb66906SDylan Yudaken unsigned long hdr; 7359bb66906SDylan Yudaken 7369bb66906SDylan Yudaken hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 7379bb66906SDylan Yudaken kmsg->controllen; 7389bb66906SDylan Yudaken if (*len < hdr) 7399bb66906SDylan Yudaken return -EFAULT; 7409bb66906SDylan Yudaken 7419bb66906SDylan Yudaken if (kmsg->controllen) { 7429bb66906SDylan Yudaken unsigned long control = ubuf + hdr - kmsg->controllen; 7439bb66906SDylan Yudaken 744d1f6222cSDylan Yudaken kmsg->msg.msg_control_user = (void __user *) control; 7459bb66906SDylan Yudaken kmsg->msg.msg_controllen = kmsg->controllen; 7469bb66906SDylan Yudaken } 7479bb66906SDylan Yudaken 7489bb66906SDylan Yudaken sr->buf = *buf; /* stash for later copy */ 749d1f6222cSDylan Yudaken *buf = (void __user *) (ubuf + hdr); 7509bb66906SDylan Yudaken kmsg->payloadlen = *len = *len - hdr; 7519bb66906SDylan Yudaken return 0; 7529bb66906SDylan Yudaken } 7539bb66906SDylan Yudaken 7549bb66906SDylan Yudaken struct io_recvmsg_multishot_hdr { 7559bb66906SDylan Yudaken struct io_uring_recvmsg_out msg; 7569bb66906SDylan Yudaken struct sockaddr_storage addr; 7579bb66906SDylan Yudaken }; 7589bb66906SDylan Yudaken 7599bb66906SDylan Yudaken static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 7609bb66906SDylan Yudaken struct io_async_msghdr *kmsg, 7619bb66906SDylan Yudaken unsigned int flags, bool *finished) 7629bb66906SDylan Yudaken { 7639bb66906SDylan Yudaken int err; 7649bb66906SDylan Yudaken int copy_len; 7659bb66906SDylan Yudaken struct io_recvmsg_multishot_hdr hdr; 7669bb66906SDylan Yudaken 7679bb66906SDylan Yudaken if (kmsg->namelen) 7689bb66906SDylan Yudaken kmsg->msg.msg_name = &hdr.addr; 7699bb66906SDylan Yudaken kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 7709bb66906SDylan Yudaken kmsg->msg.msg_namelen = 0; 7719bb66906SDylan Yudaken 7729bb66906SDylan Yudaken if (sock->file->f_flags & O_NONBLOCK) 7739bb66906SDylan Yudaken flags |= MSG_DONTWAIT; 7749bb66906SDylan Yudaken 7759bb66906SDylan Yudaken err = sock_recvmsg(sock, &kmsg->msg, flags); 7769bb66906SDylan Yudaken *finished = err <= 0; 7779bb66906SDylan Yudaken if (err < 0) 7789bb66906SDylan Yudaken return err; 7799bb66906SDylan Yudaken 7809bb66906SDylan Yudaken hdr.msg = (struct io_uring_recvmsg_out) { 7819bb66906SDylan Yudaken .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 7829bb66906SDylan Yudaken .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 7839bb66906SDylan Yudaken }; 7849bb66906SDylan Yudaken 7859bb66906SDylan Yudaken hdr.msg.payloadlen = err; 7869bb66906SDylan Yudaken if (err > kmsg->payloadlen) 7879bb66906SDylan Yudaken err = kmsg->payloadlen; 7889bb66906SDylan Yudaken 7899bb66906SDylan Yudaken copy_len = sizeof(struct io_uring_recvmsg_out); 7909bb66906SDylan Yudaken if (kmsg->msg.msg_namelen > kmsg->namelen) 7919bb66906SDylan Yudaken copy_len += kmsg->namelen; 7929bb66906SDylan Yudaken else 7939bb66906SDylan Yudaken copy_len += kmsg->msg.msg_namelen; 7949bb66906SDylan Yudaken 7959bb66906SDylan Yudaken /* 7969bb66906SDylan Yudaken * "fromlen shall refer to the value before truncation.." 7979bb66906SDylan Yudaken * 1003.1g 7989bb66906SDylan Yudaken */ 7999bb66906SDylan Yudaken hdr.msg.namelen = kmsg->msg.msg_namelen; 8009bb66906SDylan Yudaken 8019bb66906SDylan Yudaken /* ensure that there is no gap between hdr and sockaddr_storage */ 8029bb66906SDylan Yudaken BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 8039bb66906SDylan Yudaken sizeof(struct io_uring_recvmsg_out)); 8049bb66906SDylan Yudaken if (copy_to_user(io->buf, &hdr, copy_len)) { 8059bb66906SDylan Yudaken *finished = true; 8069bb66906SDylan Yudaken return -EFAULT; 8079bb66906SDylan Yudaken } 8089bb66906SDylan Yudaken 8099bb66906SDylan Yudaken return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 8109bb66906SDylan Yudaken kmsg->controllen + err; 8119bb66906SDylan Yudaken } 8129bb66906SDylan Yudaken 813f9ead18cSJens Axboe int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 814f9ead18cSJens Axboe { 815f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 816f9ead18cSJens Axboe struct io_async_msghdr iomsg, *kmsg; 817f9ead18cSJens Axboe struct socket *sock; 818f9ead18cSJens Axboe unsigned flags; 819f9ead18cSJens Axboe int ret, min_ret = 0; 820f9ead18cSJens Axboe bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 8219bb66906SDylan Yudaken bool mshot_finished = true; 822f9ead18cSJens Axboe 823f9ead18cSJens Axboe sock = sock_from_file(req->file); 824f9ead18cSJens Axboe if (unlikely(!sock)) 825f9ead18cSJens Axboe return -ENOTSOCK; 826f9ead18cSJens Axboe 827f9ead18cSJens Axboe if (req_has_async_data(req)) { 828f9ead18cSJens Axboe kmsg = req->async_data; 829f9ead18cSJens Axboe } else { 830f9ead18cSJens Axboe ret = io_recvmsg_copy_hdr(req, &iomsg); 831f9ead18cSJens Axboe if (ret) 832f9ead18cSJens Axboe return ret; 833f9ead18cSJens Axboe kmsg = &iomsg; 834f9ead18cSJens Axboe } 835f9ead18cSJens Axboe 836f9ead18cSJens Axboe if (!(req->flags & REQ_F_POLLED) && 837f9ead18cSJens Axboe (sr->flags & IORING_RECVSEND_POLL_FIRST)) 83843e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 839f9ead18cSJens Axboe 84017add5ceSPavel Begunkov if (!io_check_multishot(req, issue_flags)) 84117add5ceSPavel Begunkov return io_setup_async_msg(req, kmsg, issue_flags); 84217add5ceSPavel Begunkov 8439bb66906SDylan Yudaken retry_multishot: 844f9ead18cSJens Axboe if (io_do_buffer_select(req)) { 845f9ead18cSJens Axboe void __user *buf; 8469bb66906SDylan Yudaken size_t len = sr->len; 847f9ead18cSJens Axboe 8489bb66906SDylan Yudaken buf = io_buffer_select(req, &len, issue_flags); 849f9ead18cSJens Axboe if (!buf) 850f9ead18cSJens Axboe return -ENOBUFS; 8519bb66906SDylan Yudaken 8529bb66906SDylan Yudaken if (req->flags & REQ_F_APOLL_MULTISHOT) { 8539bb66906SDylan Yudaken ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 8549bb66906SDylan Yudaken if (ret) { 8559bb66906SDylan Yudaken io_kbuf_recycle(req, issue_flags); 8569bb66906SDylan Yudaken return ret; 8579bb66906SDylan Yudaken } 8589bb66906SDylan Yudaken } 8599bb66906SDylan Yudaken 8604b61152eSJens Axboe iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len); 861f9ead18cSJens Axboe } 862f9ead18cSJens Axboe 863f9ead18cSJens Axboe flags = sr->msg_flags; 864f9ead18cSJens Axboe if (force_nonblock) 865f9ead18cSJens Axboe flags |= MSG_DONTWAIT; 866f9ead18cSJens Axboe 867f9ead18cSJens Axboe kmsg->msg.msg_get_inq = 1; 868b65db921SJens Axboe kmsg->msg.msg_inq = -1; 86978d0d206SJens Axboe if (req->flags & REQ_F_APOLL_MULTISHOT) { 8709bb66906SDylan Yudaken ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 8719bb66906SDylan Yudaken &mshot_finished); 87278d0d206SJens Axboe } else { 87378d0d206SJens Axboe /* disable partial retry for recvmsg with cmsg attached */ 87478d0d206SJens Axboe if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 87578d0d206SJens Axboe min_ret = iov_iter_count(&kmsg->msg.msg_iter); 87678d0d206SJens Axboe 8779bb66906SDylan Yudaken ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 8789bb66906SDylan Yudaken kmsg->uaddr, flags); 87978d0d206SJens Axboe } 8809bb66906SDylan Yudaken 881f9ead18cSJens Axboe if (ret < min_ret) { 8829bb66906SDylan Yudaken if (ret == -EAGAIN && force_nonblock) { 8839bb66906SDylan Yudaken ret = io_setup_async_msg(req, kmsg, issue_flags); 884100d6b17SPavel Begunkov if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) { 8859bb66906SDylan Yudaken io_kbuf_recycle(req, issue_flags); 8869bb66906SDylan Yudaken return IOU_ISSUE_SKIP_COMPLETE; 8879bb66906SDylan Yudaken } 8889bb66906SDylan Yudaken return ret; 8899bb66906SDylan Yudaken } 890f9ead18cSJens Axboe if (ret > 0 && io_net_retry(sock, flags)) { 891f9ead18cSJens Axboe sr->done_io += ret; 892f9ead18cSJens Axboe req->flags |= REQ_F_PARTIAL_IO; 89343e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 894f9ead18cSJens Axboe } 89595eafc74SPavel Begunkov if (ret == -ERESTARTSYS) 89695eafc74SPavel Begunkov ret = -EINTR; 897f9ead18cSJens Axboe req_set_fail(req); 898f9ead18cSJens Axboe } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 899f9ead18cSJens Axboe req_set_fail(req); 900f9ead18cSJens Axboe } 901f9ead18cSJens Axboe 902d4e097daSDylan Yudaken if (ret > 0) 903f9ead18cSJens Axboe ret += sr->done_io; 904f9ead18cSJens Axboe else if (sr->done_io) 905f9ead18cSJens Axboe ret = sr->done_io; 906d4e097daSDylan Yudaken else 907d4e097daSDylan Yudaken io_kbuf_recycle(req, issue_flags); 908d4e097daSDylan Yudaken 9097d41bcb7SJens Axboe if (!io_recv_finish(req, &ret, &kmsg->msg, mshot_finished, issue_flags)) 9109bb66906SDylan Yudaken goto retry_multishot; 9119bb66906SDylan Yudaken 9129bb66906SDylan Yudaken if (mshot_finished) { 9139bb66906SDylan Yudaken /* fast path, check for non-NULL to avoid function call */ 9149bb66906SDylan Yudaken if (kmsg->free_iov) 9159bb66906SDylan Yudaken kfree(kmsg->free_iov); 9166c3e8955SPavel Begunkov io_netmsg_recycle(req, issue_flags); 9179bb66906SDylan Yudaken req->flags &= ~REQ_F_NEED_CLEANUP; 9189bb66906SDylan Yudaken } 9199bb66906SDylan Yudaken 9209bb66906SDylan Yudaken return ret; 921f9ead18cSJens Axboe } 922f9ead18cSJens Axboe 923f9ead18cSJens Axboe int io_recv(struct io_kiocb *req, unsigned int issue_flags) 924f9ead18cSJens Axboe { 925f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 926f9ead18cSJens Axboe struct msghdr msg; 927f9ead18cSJens Axboe struct socket *sock; 928f9ead18cSJens Axboe unsigned flags; 929f9ead18cSJens Axboe int ret, min_ret = 0; 930f9ead18cSJens Axboe bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 931b3fdea6eSDylan Yudaken size_t len = sr->len; 932f9ead18cSJens Axboe 933f9ead18cSJens Axboe if (!(req->flags & REQ_F_POLLED) && 934f9ead18cSJens Axboe (sr->flags & IORING_RECVSEND_POLL_FIRST)) 935f9ead18cSJens Axboe return -EAGAIN; 936f9ead18cSJens Axboe 93717add5ceSPavel Begunkov if (!io_check_multishot(req, issue_flags)) 93817add5ceSPavel Begunkov return -EAGAIN; 93917add5ceSPavel Begunkov 940f9ead18cSJens Axboe sock = sock_from_file(req->file); 941f9ead18cSJens Axboe if (unlikely(!sock)) 942f9ead18cSJens Axboe return -ENOTSOCK; 943f9ead18cSJens Axboe 944bf34e697SJens Axboe msg.msg_name = NULL; 945bf34e697SJens Axboe msg.msg_namelen = 0; 946bf34e697SJens Axboe msg.msg_control = NULL; 947bf34e697SJens Axboe msg.msg_get_inq = 1; 948bf34e697SJens Axboe msg.msg_controllen = 0; 949bf34e697SJens Axboe msg.msg_iocb = NULL; 950bf34e697SJens Axboe msg.msg_ubuf = NULL; 951bf34e697SJens Axboe 952b3fdea6eSDylan Yudaken retry_multishot: 953f9ead18cSJens Axboe if (io_do_buffer_select(req)) { 954f9ead18cSJens Axboe void __user *buf; 955f9ead18cSJens Axboe 956b3fdea6eSDylan Yudaken buf = io_buffer_select(req, &len, issue_flags); 957f9ead18cSJens Axboe if (!buf) 958f9ead18cSJens Axboe return -ENOBUFS; 959f9ead18cSJens Axboe sr->buf = buf; 9600ceb7a92SJens Axboe sr->len = len; 961f9ead18cSJens Axboe } 962f9ead18cSJens Axboe 9634b61152eSJens Axboe ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter); 964f9ead18cSJens Axboe if (unlikely(ret)) 965f9ead18cSJens Axboe goto out_free; 966f9ead18cSJens Axboe 967b65db921SJens Axboe msg.msg_inq = -1; 968f9ead18cSJens Axboe msg.msg_flags = 0; 969f9ead18cSJens Axboe 970f9ead18cSJens Axboe flags = sr->msg_flags; 971f9ead18cSJens Axboe if (force_nonblock) 972f9ead18cSJens Axboe flags |= MSG_DONTWAIT; 973f9ead18cSJens Axboe if (flags & MSG_WAITALL) 974f9ead18cSJens Axboe min_ret = iov_iter_count(&msg.msg_iter); 975f9ead18cSJens Axboe 976f9ead18cSJens Axboe ret = sock_recvmsg(sock, &msg, flags); 977f9ead18cSJens Axboe if (ret < min_ret) { 978b3fdea6eSDylan Yudaken if (ret == -EAGAIN && force_nonblock) { 979100d6b17SPavel Begunkov if (issue_flags & IO_URING_F_MULTISHOT) { 980b3fdea6eSDylan Yudaken io_kbuf_recycle(req, issue_flags); 981b3fdea6eSDylan Yudaken return IOU_ISSUE_SKIP_COMPLETE; 982b3fdea6eSDylan Yudaken } 983b3fdea6eSDylan Yudaken 984f9ead18cSJens Axboe return -EAGAIN; 985b3fdea6eSDylan Yudaken } 986f9ead18cSJens Axboe if (ret > 0 && io_net_retry(sock, flags)) { 987f9ead18cSJens Axboe sr->len -= ret; 988f9ead18cSJens Axboe sr->buf += ret; 989f9ead18cSJens Axboe sr->done_io += ret; 990f9ead18cSJens Axboe req->flags |= REQ_F_PARTIAL_IO; 991f9ead18cSJens Axboe return -EAGAIN; 992f9ead18cSJens Axboe } 99395eafc74SPavel Begunkov if (ret == -ERESTARTSYS) 99495eafc74SPavel Begunkov ret = -EINTR; 995f9ead18cSJens Axboe req_set_fail(req); 996f9ead18cSJens Axboe } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 997f9ead18cSJens Axboe out_free: 998f9ead18cSJens Axboe req_set_fail(req); 999f9ead18cSJens Axboe } 1000f9ead18cSJens Axboe 1001d4e097daSDylan Yudaken if (ret > 0) 1002f9ead18cSJens Axboe ret += sr->done_io; 1003f9ead18cSJens Axboe else if (sr->done_io) 1004f9ead18cSJens Axboe ret = sr->done_io; 1005d4e097daSDylan Yudaken else 1006d4e097daSDylan Yudaken io_kbuf_recycle(req, issue_flags); 1007d4e097daSDylan Yudaken 10087d41bcb7SJens Axboe if (!io_recv_finish(req, &ret, &msg, ret <= 0, issue_flags)) 1009b3fdea6eSDylan Yudaken goto retry_multishot; 1010b3fdea6eSDylan Yudaken 1011b3fdea6eSDylan Yudaken return ret; 1012f9ead18cSJens Axboe } 1013f9ead18cSJens Axboe 1014b0e9b551SPavel Begunkov void io_send_zc_cleanup(struct io_kiocb *req) 1015b48c312bSPavel Begunkov { 1016ac9e5784SPavel Begunkov struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1017493108d9SPavel Begunkov struct io_async_msghdr *io; 1018b48c312bSPavel Begunkov 1019493108d9SPavel Begunkov if (req_has_async_data(req)) { 1020493108d9SPavel Begunkov io = req->async_data; 10214c17a496SPavel Begunkov /* might be ->fast_iov if *msg_copy_hdr failed */ 10224c17a496SPavel Begunkov if (io->free_iov != io->fast_iov) 1023493108d9SPavel Begunkov kfree(io->free_iov); 1024493108d9SPavel Begunkov } 1025a75155faSPavel Begunkov if (zc->notif) { 1026b48c312bSPavel Begunkov io_notif_flush(zc->notif); 1027b48c312bSPavel Begunkov zc->notif = NULL; 1028b48c312bSPavel Begunkov } 1029a75155faSPavel Begunkov } 1030b48c312bSPavel Begunkov 103140725d1bSPavel Begunkov #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 103240725d1bSPavel Begunkov #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) 103340725d1bSPavel Begunkov 1034b0e9b551SPavel Begunkov int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 103506a5464bSPavel Begunkov { 1036ac9e5784SPavel Begunkov struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 103710c7d33eSPavel Begunkov struct io_ring_ctx *ctx = req->ctx; 1038b48c312bSPavel Begunkov struct io_kiocb *notif; 103906a5464bSPavel Begunkov 1040493108d9SPavel Begunkov if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 1041b48c312bSPavel Begunkov return -EINVAL; 1042b48c312bSPavel Begunkov /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1043b48c312bSPavel Begunkov if (req->flags & REQ_F_CQE_SKIP) 104406a5464bSPavel Begunkov return -EINVAL; 104506a5464bSPavel Begunkov 1046b48c312bSPavel Begunkov notif = zc->notif = io_alloc_notif(ctx); 1047b48c312bSPavel Begunkov if (!notif) 1048b48c312bSPavel Begunkov return -ENOMEM; 1049b48c312bSPavel Begunkov notif->cqe.user_data = req->cqe.user_data; 1050b48c312bSPavel Begunkov notif->cqe.res = 0; 1051b48c312bSPavel Begunkov notif->cqe.flags = IORING_CQE_F_NOTIF; 1052b48c312bSPavel Begunkov req->flags |= REQ_F_NEED_CLEANUP; 105340725d1bSPavel Begunkov 105440725d1bSPavel Begunkov zc->flags = READ_ONCE(sqe->ioprio); 105540725d1bSPavel Begunkov if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 105640725d1bSPavel Begunkov if (zc->flags & ~IO_ZC_FLAGS_VALID) 105740725d1bSPavel Begunkov return -EINVAL; 105840725d1bSPavel Begunkov if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 105940725d1bSPavel Begunkov io_notif_set_extended(notif); 106040725d1bSPavel Begunkov io_notif_to_data(notif)->zc_report = true; 106140725d1bSPavel Begunkov } 106240725d1bSPavel Begunkov } 106340725d1bSPavel Begunkov 1064e3366e02SPavel Begunkov if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 1065e3366e02SPavel Begunkov unsigned idx = READ_ONCE(sqe->buf_index); 1066e3366e02SPavel Begunkov 1067e3366e02SPavel Begunkov if (unlikely(idx >= ctx->nr_user_bufs)) 1068e3366e02SPavel Begunkov return -EFAULT; 1069e3366e02SPavel Begunkov idx = array_index_nospec(idx, ctx->nr_user_bufs); 1070e3366e02SPavel Begunkov req->imu = READ_ONCE(ctx->user_bufs[idx]); 1071e3366e02SPavel Begunkov io_req_set_rsrc_node(notif, ctx, 0); 1072e3366e02SPavel Begunkov } 107306a5464bSPavel Begunkov 1074493108d9SPavel Begunkov if (req->opcode == IORING_OP_SEND_ZC) { 1075493108d9SPavel Begunkov if (READ_ONCE(sqe->__pad3[0])) 1076493108d9SPavel Begunkov return -EINVAL; 1077493108d9SPavel Begunkov zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1078493108d9SPavel Begunkov zc->addr_len = READ_ONCE(sqe->addr_len); 1079493108d9SPavel Begunkov } else { 1080493108d9SPavel Begunkov if (unlikely(sqe->addr2 || sqe->file_index)) 1081493108d9SPavel Begunkov return -EINVAL; 1082493108d9SPavel Begunkov if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 1083493108d9SPavel Begunkov return -EINVAL; 1084493108d9SPavel Begunkov } 1085493108d9SPavel Begunkov 108606a5464bSPavel Begunkov zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 108706a5464bSPavel Begunkov zc->len = READ_ONCE(sqe->len); 108806a5464bSPavel Begunkov zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 108906a5464bSPavel Begunkov if (zc->msg_flags & MSG_DONTWAIT) 109006a5464bSPavel Begunkov req->flags |= REQ_F_NOWAIT; 1091092aeedbSPavel Begunkov 10924a933e62SPavel Begunkov zc->done_io = 0; 1093092aeedbSPavel Begunkov 109406a5464bSPavel Begunkov #ifdef CONFIG_COMPAT 109506a5464bSPavel Begunkov if (req->ctx->compat) 109606a5464bSPavel Begunkov zc->msg_flags |= MSG_CMSG_COMPAT; 109706a5464bSPavel Begunkov #endif 109806a5464bSPavel Begunkov return 0; 109906a5464bSPavel Begunkov } 110006a5464bSPavel Begunkov 1101cd9021e8SPavel Begunkov static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb, 1102cd9021e8SPavel Begunkov struct iov_iter *from, size_t length) 1103cd9021e8SPavel Begunkov { 1104cd9021e8SPavel Begunkov skb_zcopy_downgrade_managed(skb); 1105cd9021e8SPavel Begunkov return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 1106cd9021e8SPavel Begunkov } 1107cd9021e8SPavel Begunkov 11083ff1a0d3SPavel Begunkov static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, 11093ff1a0d3SPavel Begunkov struct iov_iter *from, size_t length) 11103ff1a0d3SPavel Begunkov { 11113ff1a0d3SPavel Begunkov struct skb_shared_info *shinfo = skb_shinfo(skb); 11123ff1a0d3SPavel Begunkov int frag = shinfo->nr_frags; 11133ff1a0d3SPavel Begunkov int ret = 0; 11143ff1a0d3SPavel Begunkov struct bvec_iter bi; 11153ff1a0d3SPavel Begunkov ssize_t copied = 0; 11163ff1a0d3SPavel Begunkov unsigned long truesize = 0; 11173ff1a0d3SPavel Begunkov 1118cd9021e8SPavel Begunkov if (!frag) 11193ff1a0d3SPavel Begunkov shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1120cd9021e8SPavel Begunkov else if (unlikely(!skb_zcopy_managed(skb))) 11213ff1a0d3SPavel Begunkov return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 11223ff1a0d3SPavel Begunkov 11233ff1a0d3SPavel Begunkov bi.bi_size = min(from->count, length); 11243ff1a0d3SPavel Begunkov bi.bi_bvec_done = from->iov_offset; 11253ff1a0d3SPavel Begunkov bi.bi_idx = 0; 11263ff1a0d3SPavel Begunkov 11273ff1a0d3SPavel Begunkov while (bi.bi_size && frag < MAX_SKB_FRAGS) { 11283ff1a0d3SPavel Begunkov struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 11293ff1a0d3SPavel Begunkov 11303ff1a0d3SPavel Begunkov copied += v.bv_len; 11313ff1a0d3SPavel Begunkov truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 11323ff1a0d3SPavel Begunkov __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 11333ff1a0d3SPavel Begunkov v.bv_offset, v.bv_len); 11343ff1a0d3SPavel Begunkov bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 11353ff1a0d3SPavel Begunkov } 11363ff1a0d3SPavel Begunkov if (bi.bi_size) 11373ff1a0d3SPavel Begunkov ret = -EMSGSIZE; 11383ff1a0d3SPavel Begunkov 11393ff1a0d3SPavel Begunkov shinfo->nr_frags = frag; 11403ff1a0d3SPavel Begunkov from->bvec += bi.bi_idx; 11413ff1a0d3SPavel Begunkov from->nr_segs -= bi.bi_idx; 1142dfb58b17SPavel Begunkov from->count -= copied; 11433ff1a0d3SPavel Begunkov from->iov_offset = bi.bi_bvec_done; 11443ff1a0d3SPavel Begunkov 11453ff1a0d3SPavel Begunkov skb->data_len += copied; 11463ff1a0d3SPavel Begunkov skb->len += copied; 11473ff1a0d3SPavel Begunkov skb->truesize += truesize; 11483ff1a0d3SPavel Begunkov 11493ff1a0d3SPavel Begunkov if (sk && sk->sk_type == SOCK_STREAM) { 11503ff1a0d3SPavel Begunkov sk_wmem_queued_add(sk, truesize); 11513ff1a0d3SPavel Begunkov if (!skb_zcopy_pure(skb)) 11523ff1a0d3SPavel Begunkov sk_mem_charge(sk, truesize); 11533ff1a0d3SPavel Begunkov } else { 11543ff1a0d3SPavel Begunkov refcount_add(truesize, &skb->sk->sk_wmem_alloc); 11553ff1a0d3SPavel Begunkov } 11563ff1a0d3SPavel Begunkov return ret; 11573ff1a0d3SPavel Begunkov } 11583ff1a0d3SPavel Begunkov 1159b0e9b551SPavel Begunkov int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 116006a5464bSPavel Begunkov { 11616ae61b7aSPavel Begunkov struct sockaddr_storage __address; 1162ac9e5784SPavel Begunkov struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 116306a5464bSPavel Begunkov struct msghdr msg; 116406a5464bSPavel Begunkov struct socket *sock; 11656ae91ac9SPavel Begunkov unsigned msg_flags; 116606a5464bSPavel Begunkov int ret, min_ret = 0; 116706a5464bSPavel Begunkov 116806a5464bSPavel Begunkov sock = sock_from_file(req->file); 116906a5464bSPavel Begunkov if (unlikely(!sock)) 117006a5464bSPavel Begunkov return -ENOTSOCK; 1171edf81438SPavel Begunkov if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1172edf81438SPavel Begunkov return -EOPNOTSUPP; 117306a5464bSPavel Begunkov 117406a5464bSPavel Begunkov msg.msg_name = NULL; 117506a5464bSPavel Begunkov msg.msg_control = NULL; 117606a5464bSPavel Begunkov msg.msg_controllen = 0; 117706a5464bSPavel Begunkov msg.msg_namelen = 0; 117806a5464bSPavel Begunkov 117986dc8f23SPavel Begunkov if (zc->addr) { 1180581711c4SPavel Begunkov if (req_has_async_data(req)) { 1181581711c4SPavel Begunkov struct io_async_msghdr *io = req->async_data; 1182581711c4SPavel Begunkov 11836ae61b7aSPavel Begunkov msg.msg_name = &io->addr; 1184581711c4SPavel Begunkov } else { 1185581711c4SPavel Begunkov ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address); 118686dc8f23SPavel Begunkov if (unlikely(ret < 0)) 118786dc8f23SPavel Begunkov return ret; 1188581711c4SPavel Begunkov msg.msg_name = (struct sockaddr *)&__address; 1189581711c4SPavel Begunkov } 119086dc8f23SPavel Begunkov msg.msg_namelen = zc->addr_len; 119186dc8f23SPavel Begunkov } 119286dc8f23SPavel Begunkov 11933c840053SPavel Begunkov if (!(req->flags & REQ_F_POLLED) && 11943c840053SPavel Begunkov (zc->flags & IORING_RECVSEND_POLL_FIRST)) 11956ae61b7aSPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 11963c840053SPavel Begunkov 119710c7d33eSPavel Begunkov if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 1198de4eda9dSAl Viro ret = io_import_fixed(ITER_SOURCE, &msg.msg_iter, req->imu, 119910c7d33eSPavel Begunkov (u64)(uintptr_t)zc->buf, zc->len); 120010c7d33eSPavel Begunkov if (unlikely(ret)) 120110c7d33eSPavel Begunkov return ret; 1202cd9021e8SPavel Begunkov msg.sg_from_iter = io_sg_from_iter; 120310c7d33eSPavel Begunkov } else { 120442385b02SPavel Begunkov io_notif_set_extended(zc->notif); 12054b61152eSJens Axboe ret = import_ubuf(ITER_SOURCE, zc->buf, zc->len, &msg.msg_iter); 120606a5464bSPavel Begunkov if (unlikely(ret)) 120706a5464bSPavel Begunkov return ret; 1208b48c312bSPavel Begunkov ret = io_notif_account_mem(zc->notif, zc->len); 12092e32ba56SPavel Begunkov if (unlikely(ret)) 12102e32ba56SPavel Begunkov return ret; 1211cd9021e8SPavel Begunkov msg.sg_from_iter = io_sg_from_iter_iovec; 121210c7d33eSPavel Begunkov } 121306a5464bSPavel Begunkov 121406a5464bSPavel Begunkov msg_flags = zc->msg_flags | MSG_ZEROCOPY; 121506a5464bSPavel Begunkov if (issue_flags & IO_URING_F_NONBLOCK) 121606a5464bSPavel Begunkov msg_flags |= MSG_DONTWAIT; 121706a5464bSPavel Begunkov if (msg_flags & MSG_WAITALL) 121806a5464bSPavel Begunkov min_ret = iov_iter_count(&msg.msg_iter); 1219b841b901SDavid Howells msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 122006a5464bSPavel Begunkov 122106a5464bSPavel Begunkov msg.msg_flags = msg_flags; 1222b48c312bSPavel Begunkov msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 122306a5464bSPavel Begunkov ret = sock_sendmsg(sock, &msg); 122406a5464bSPavel Begunkov 122506a5464bSPavel Begunkov if (unlikely(ret < min_ret)) { 122606a5464bSPavel Begunkov if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 12276ae61b7aSPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 1228581711c4SPavel Begunkov 12294a933e62SPavel Begunkov if (ret > 0 && io_net_retry(sock, msg.msg_flags)) { 12304a933e62SPavel Begunkov zc->len -= ret; 12314a933e62SPavel Begunkov zc->buf += ret; 12324a933e62SPavel Begunkov zc->done_io += ret; 12334a933e62SPavel Begunkov req->flags |= REQ_F_PARTIAL_IO; 12346ae61b7aSPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 12354a933e62SPavel Begunkov } 12364a933e62SPavel Begunkov if (ret == -ERESTARTSYS) 12374a933e62SPavel Begunkov ret = -EINTR; 12385a848b7cSPavel Begunkov req_set_fail(req); 123906a5464bSPavel Begunkov } 124006a5464bSPavel Begunkov 12414a933e62SPavel Begunkov if (ret >= 0) 12424a933e62SPavel Begunkov ret += zc->done_io; 12434a933e62SPavel Begunkov else if (zc->done_io) 12444a933e62SPavel Begunkov ret = zc->done_io; 1245b48c312bSPavel Begunkov 1246108893ddSPavel Begunkov /* 1247108893ddSPavel Begunkov * If we're in io-wq we can't rely on tw ordering guarantees, defer 1248108893ddSPavel Begunkov * flushing notif to io_send_zc_cleanup() 1249108893ddSPavel Begunkov */ 1250108893ddSPavel Begunkov if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1251b48c312bSPavel Begunkov io_notif_flush(zc->notif); 1252b48c312bSPavel Begunkov req->flags &= ~REQ_F_NEED_CLEANUP; 1253108893ddSPavel Begunkov } 12546ae91ac9SPavel Begunkov io_req_set_res(req, ret, IORING_CQE_F_MORE); 125506a5464bSPavel Begunkov return IOU_OK; 125606a5464bSPavel Begunkov } 125706a5464bSPavel Begunkov 1258493108d9SPavel Begunkov int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1259493108d9SPavel Begunkov { 1260493108d9SPavel Begunkov struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1261493108d9SPavel Begunkov struct io_async_msghdr iomsg, *kmsg; 1262493108d9SPavel Begunkov struct socket *sock; 12636ae91ac9SPavel Begunkov unsigned flags; 1264493108d9SPavel Begunkov int ret, min_ret = 0; 1265493108d9SPavel Begunkov 126642385b02SPavel Begunkov io_notif_set_extended(sr->notif); 126742385b02SPavel Begunkov 1268493108d9SPavel Begunkov sock = sock_from_file(req->file); 1269493108d9SPavel Begunkov if (unlikely(!sock)) 1270493108d9SPavel Begunkov return -ENOTSOCK; 1271cc767e7cSPavel Begunkov if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1272cc767e7cSPavel Begunkov return -EOPNOTSUPP; 1273493108d9SPavel Begunkov 1274493108d9SPavel Begunkov if (req_has_async_data(req)) { 1275493108d9SPavel Begunkov kmsg = req->async_data; 1276493108d9SPavel Begunkov } else { 1277493108d9SPavel Begunkov ret = io_sendmsg_copy_hdr(req, &iomsg); 1278493108d9SPavel Begunkov if (ret) 1279493108d9SPavel Begunkov return ret; 1280493108d9SPavel Begunkov kmsg = &iomsg; 1281493108d9SPavel Begunkov } 1282493108d9SPavel Begunkov 1283493108d9SPavel Begunkov if (!(req->flags & REQ_F_POLLED) && 1284493108d9SPavel Begunkov (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1285493108d9SPavel Begunkov return io_setup_async_msg(req, kmsg, issue_flags); 1286493108d9SPavel Begunkov 1287493108d9SPavel Begunkov flags = sr->msg_flags | MSG_ZEROCOPY; 1288493108d9SPavel Begunkov if (issue_flags & IO_URING_F_NONBLOCK) 1289493108d9SPavel Begunkov flags |= MSG_DONTWAIT; 1290493108d9SPavel Begunkov if (flags & MSG_WAITALL) 1291493108d9SPavel Begunkov min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1292493108d9SPavel Begunkov 1293493108d9SPavel Begunkov kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1294493108d9SPavel Begunkov kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1295493108d9SPavel Begunkov ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1296493108d9SPavel Begunkov 1297493108d9SPavel Begunkov if (unlikely(ret < min_ret)) { 1298493108d9SPavel Begunkov if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1299493108d9SPavel Begunkov return io_setup_async_msg(req, kmsg, issue_flags); 1300493108d9SPavel Begunkov 1301493108d9SPavel Begunkov if (ret > 0 && io_net_retry(sock, flags)) { 1302493108d9SPavel Begunkov sr->done_io += ret; 1303493108d9SPavel Begunkov req->flags |= REQ_F_PARTIAL_IO; 1304493108d9SPavel Begunkov return io_setup_async_msg(req, kmsg, issue_flags); 1305493108d9SPavel Begunkov } 1306493108d9SPavel Begunkov if (ret == -ERESTARTSYS) 1307493108d9SPavel Begunkov ret = -EINTR; 1308493108d9SPavel Begunkov req_set_fail(req); 1309493108d9SPavel Begunkov } 1310493108d9SPavel Begunkov /* fast path, check for non-NULL to avoid function call */ 1311108893ddSPavel Begunkov if (kmsg->free_iov) { 1312493108d9SPavel Begunkov kfree(kmsg->free_iov); 1313108893ddSPavel Begunkov kmsg->free_iov = NULL; 1314108893ddSPavel Begunkov } 1315493108d9SPavel Begunkov 1316493108d9SPavel Begunkov io_netmsg_recycle(req, issue_flags); 1317493108d9SPavel Begunkov if (ret >= 0) 1318493108d9SPavel Begunkov ret += sr->done_io; 1319493108d9SPavel Begunkov else if (sr->done_io) 1320493108d9SPavel Begunkov ret = sr->done_io; 1321493108d9SPavel Begunkov 1322108893ddSPavel Begunkov /* 1323108893ddSPavel Begunkov * If we're in io-wq we can't rely on tw ordering guarantees, defer 1324108893ddSPavel Begunkov * flushing notif to io_send_zc_cleanup() 1325108893ddSPavel Begunkov */ 1326108893ddSPavel Begunkov if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1327493108d9SPavel Begunkov io_notif_flush(sr->notif); 1328493108d9SPavel Begunkov req->flags &= ~REQ_F_NEED_CLEANUP; 1329108893ddSPavel Begunkov } 13306ae91ac9SPavel Begunkov io_req_set_res(req, ret, IORING_CQE_F_MORE); 1331493108d9SPavel Begunkov return IOU_OK; 1332493108d9SPavel Begunkov } 1333493108d9SPavel Begunkov 13347e6b638eSPavel Begunkov void io_sendrecv_fail(struct io_kiocb *req) 13357e6b638eSPavel Begunkov { 13367e6b638eSPavel Begunkov struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 13377e6b638eSPavel Begunkov 13387e6b638eSPavel Begunkov if (req->flags & REQ_F_PARTIAL_IO) 13396ae91ac9SPavel Begunkov req->cqe.res = sr->done_io; 13406ae91ac9SPavel Begunkov 1341c4c0009eSPavel Begunkov if ((req->flags & REQ_F_NEED_CLEANUP) && 13426ae91ac9SPavel Begunkov (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 13436ae91ac9SPavel Begunkov req->cqe.flags |= IORING_CQE_F_MORE; 13445693bcceSPavel Begunkov } 13455693bcceSPavel Begunkov 1346f9ead18cSJens Axboe int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1347f9ead18cSJens Axboe { 1348f2ccb5aeSStefan Metzmacher struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1349f9ead18cSJens Axboe unsigned flags; 1350f9ead18cSJens Axboe 1351f9ead18cSJens Axboe if (sqe->len || sqe->buf_index) 1352f9ead18cSJens Axboe return -EINVAL; 1353f9ead18cSJens Axboe 1354f9ead18cSJens Axboe accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1355f9ead18cSJens Axboe accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1356f9ead18cSJens Axboe accept->flags = READ_ONCE(sqe->accept_flags); 1357f9ead18cSJens Axboe accept->nofile = rlimit(RLIMIT_NOFILE); 1358f9ead18cSJens Axboe flags = READ_ONCE(sqe->ioprio); 1359f9ead18cSJens Axboe if (flags & ~IORING_ACCEPT_MULTISHOT) 1360f9ead18cSJens Axboe return -EINVAL; 1361f9ead18cSJens Axboe 1362f9ead18cSJens Axboe accept->file_slot = READ_ONCE(sqe->file_index); 1363f9ead18cSJens Axboe if (accept->file_slot) { 1364f9ead18cSJens Axboe if (accept->flags & SOCK_CLOEXEC) 1365f9ead18cSJens Axboe return -EINVAL; 1366f9ead18cSJens Axboe if (flags & IORING_ACCEPT_MULTISHOT && 1367f9ead18cSJens Axboe accept->file_slot != IORING_FILE_INDEX_ALLOC) 1368f9ead18cSJens Axboe return -EINVAL; 1369f9ead18cSJens Axboe } 1370f9ead18cSJens Axboe if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1371f9ead18cSJens Axboe return -EINVAL; 1372f9ead18cSJens Axboe if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1373f9ead18cSJens Axboe accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1374f9ead18cSJens Axboe if (flags & IORING_ACCEPT_MULTISHOT) 1375f9ead18cSJens Axboe req->flags |= REQ_F_APOLL_MULTISHOT; 1376f9ead18cSJens Axboe return 0; 1377f9ead18cSJens Axboe } 1378f9ead18cSJens Axboe 1379f9ead18cSJens Axboe int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1380f9ead18cSJens Axboe { 1381f2ccb5aeSStefan Metzmacher struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1382f9ead18cSJens Axboe bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1383f9ead18cSJens Axboe unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; 1384f9ead18cSJens Axboe bool fixed = !!accept->file_slot; 1385f9ead18cSJens Axboe struct file *file; 1386f9ead18cSJens Axboe int ret, fd; 1387f9ead18cSJens Axboe 138817add5ceSPavel Begunkov if (!io_check_multishot(req, issue_flags)) 138917add5ceSPavel Begunkov return -EAGAIN; 1390f9ead18cSJens Axboe retry: 1391f9ead18cSJens Axboe if (!fixed) { 1392f9ead18cSJens Axboe fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1393f9ead18cSJens Axboe if (unlikely(fd < 0)) 1394f9ead18cSJens Axboe return fd; 1395f9ead18cSJens Axboe } 1396f9ead18cSJens Axboe file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, 1397f9ead18cSJens Axboe accept->flags); 1398f9ead18cSJens Axboe if (IS_ERR(file)) { 1399f9ead18cSJens Axboe if (!fixed) 1400f9ead18cSJens Axboe put_unused_fd(fd); 1401f9ead18cSJens Axboe ret = PTR_ERR(file); 1402f9ead18cSJens Axboe if (ret == -EAGAIN && force_nonblock) { 1403f9ead18cSJens Axboe /* 1404f9ead18cSJens Axboe * if it's multishot and polled, we don't need to 1405f9ead18cSJens Axboe * return EAGAIN to arm the poll infra since it 1406f9ead18cSJens Axboe * has already been done 1407f9ead18cSJens Axboe */ 140891482864SPavel Begunkov if (issue_flags & IO_URING_F_MULTISHOT) 140913b01aedSJens Axboe return IOU_ISSUE_SKIP_COMPLETE; 1410f9ead18cSJens Axboe return ret; 1411f9ead18cSJens Axboe } 1412f9ead18cSJens Axboe if (ret == -ERESTARTSYS) 1413f9ead18cSJens Axboe ret = -EINTR; 1414f9ead18cSJens Axboe req_set_fail(req); 1415f9ead18cSJens Axboe } else if (!fixed) { 1416f9ead18cSJens Axboe fd_install(fd, file); 1417f9ead18cSJens Axboe ret = fd; 1418f9ead18cSJens Axboe } else { 1419f9ead18cSJens Axboe ret = io_fixed_fd_install(req, issue_flags, file, 1420f9ead18cSJens Axboe accept->file_slot); 1421f9ead18cSJens Axboe } 1422f9ead18cSJens Axboe 1423f9ead18cSJens Axboe if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1424f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 1425f9ead18cSJens Axboe return IOU_OK; 1426f9ead18cSJens Axboe } 1427f9ead18cSJens Axboe 1428515e2696SDylan Yudaken if (ret < 0) 1429515e2696SDylan Yudaken return ret; 1430b6b2bb58SPavel Begunkov if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, 1431b6b2bb58SPavel Begunkov ret, IORING_CQE_F_MORE)) 1432d245bca6SPavel Begunkov goto retry; 1433cbd25748SDylan Yudaken 143413b01aedSJens Axboe io_req_set_res(req, ret, 0); 143513b01aedSJens Axboe return IOU_STOP_MULTISHOT; 1436f9ead18cSJens Axboe } 1437f9ead18cSJens Axboe 1438f9ead18cSJens Axboe int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1439f9ead18cSJens Axboe { 1440f2ccb5aeSStefan Metzmacher struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1441f9ead18cSJens Axboe 1442f9ead18cSJens Axboe if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1443f9ead18cSJens Axboe return -EINVAL; 1444f9ead18cSJens Axboe 1445f9ead18cSJens Axboe sock->domain = READ_ONCE(sqe->fd); 1446f9ead18cSJens Axboe sock->type = READ_ONCE(sqe->off); 1447f9ead18cSJens Axboe sock->protocol = READ_ONCE(sqe->len); 1448f9ead18cSJens Axboe sock->file_slot = READ_ONCE(sqe->file_index); 1449f9ead18cSJens Axboe sock->nofile = rlimit(RLIMIT_NOFILE); 1450f9ead18cSJens Axboe 1451f9ead18cSJens Axboe sock->flags = sock->type & ~SOCK_TYPE_MASK; 1452f9ead18cSJens Axboe if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1453f9ead18cSJens Axboe return -EINVAL; 1454f9ead18cSJens Axboe if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1455f9ead18cSJens Axboe return -EINVAL; 1456f9ead18cSJens Axboe return 0; 1457f9ead18cSJens Axboe } 1458f9ead18cSJens Axboe 1459f9ead18cSJens Axboe int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1460f9ead18cSJens Axboe { 1461f2ccb5aeSStefan Metzmacher struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1462f9ead18cSJens Axboe bool fixed = !!sock->file_slot; 1463f9ead18cSJens Axboe struct file *file; 1464f9ead18cSJens Axboe int ret, fd; 1465f9ead18cSJens Axboe 1466f9ead18cSJens Axboe if (!fixed) { 1467f9ead18cSJens Axboe fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1468f9ead18cSJens Axboe if (unlikely(fd < 0)) 1469f9ead18cSJens Axboe return fd; 1470f9ead18cSJens Axboe } 1471f9ead18cSJens Axboe file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1472f9ead18cSJens Axboe if (IS_ERR(file)) { 1473f9ead18cSJens Axboe if (!fixed) 1474f9ead18cSJens Axboe put_unused_fd(fd); 1475f9ead18cSJens Axboe ret = PTR_ERR(file); 1476f9ead18cSJens Axboe if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1477f9ead18cSJens Axboe return -EAGAIN; 1478f9ead18cSJens Axboe if (ret == -ERESTARTSYS) 1479f9ead18cSJens Axboe ret = -EINTR; 1480f9ead18cSJens Axboe req_set_fail(req); 1481f9ead18cSJens Axboe } else if (!fixed) { 1482f9ead18cSJens Axboe fd_install(fd, file); 1483f9ead18cSJens Axboe ret = fd; 1484f9ead18cSJens Axboe } else { 1485f9ead18cSJens Axboe ret = io_fixed_fd_install(req, issue_flags, file, 1486f9ead18cSJens Axboe sock->file_slot); 1487f9ead18cSJens Axboe } 1488f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 1489f9ead18cSJens Axboe return IOU_OK; 1490f9ead18cSJens Axboe } 1491f9ead18cSJens Axboe 1492f9ead18cSJens Axboe int io_connect_prep_async(struct io_kiocb *req) 1493f9ead18cSJens Axboe { 1494f9ead18cSJens Axboe struct io_async_connect *io = req->async_data; 1495f2ccb5aeSStefan Metzmacher struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1496f9ead18cSJens Axboe 1497f9ead18cSJens Axboe return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); 1498f9ead18cSJens Axboe } 1499f9ead18cSJens Axboe 1500f9ead18cSJens Axboe int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1501f9ead18cSJens Axboe { 1502f2ccb5aeSStefan Metzmacher struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1503f9ead18cSJens Axboe 1504f9ead18cSJens Axboe if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1505f9ead18cSJens Axboe return -EINVAL; 1506f9ead18cSJens Axboe 1507f9ead18cSJens Axboe conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1508f9ead18cSJens Axboe conn->addr_len = READ_ONCE(sqe->addr2); 150974e2e17eSJens Axboe conn->in_progress = conn->seen_econnaborted = false; 1510f9ead18cSJens Axboe return 0; 1511f9ead18cSJens Axboe } 1512f9ead18cSJens Axboe 1513f9ead18cSJens Axboe int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1514f9ead18cSJens Axboe { 1515f2ccb5aeSStefan Metzmacher struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1516f9ead18cSJens Axboe struct io_async_connect __io, *io; 1517f9ead18cSJens Axboe unsigned file_flags; 1518f9ead18cSJens Axboe int ret; 1519f9ead18cSJens Axboe bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1520f9ead18cSJens Axboe 1521f9ead18cSJens Axboe if (req_has_async_data(req)) { 1522f9ead18cSJens Axboe io = req->async_data; 1523f9ead18cSJens Axboe } else { 1524f9ead18cSJens Axboe ret = move_addr_to_kernel(connect->addr, 1525f9ead18cSJens Axboe connect->addr_len, 1526f9ead18cSJens Axboe &__io.address); 1527f9ead18cSJens Axboe if (ret) 1528f9ead18cSJens Axboe goto out; 1529f9ead18cSJens Axboe io = &__io; 1530f9ead18cSJens Axboe } 1531f9ead18cSJens Axboe 1532f9ead18cSJens Axboe file_flags = force_nonblock ? O_NONBLOCK : 0; 1533f9ead18cSJens Axboe 1534f9ead18cSJens Axboe ret = __sys_connect_file(req->file, &io->address, 1535f9ead18cSJens Axboe connect->addr_len, file_flags); 153674e2e17eSJens Axboe if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 153774e2e17eSJens Axboe && force_nonblock) { 15383fb1bd68SJens Axboe if (ret == -EINPROGRESS) { 15393fb1bd68SJens Axboe connect->in_progress = true; 15402bb15fb6SJens Axboe } else if (ret == -ECONNABORTED) { 154174e2e17eSJens Axboe if (connect->seen_econnaborted) 154274e2e17eSJens Axboe goto out; 154374e2e17eSJens Axboe connect->seen_econnaborted = true; 154474e2e17eSJens Axboe } 1545f9ead18cSJens Axboe if (req_has_async_data(req)) 1546f9ead18cSJens Axboe return -EAGAIN; 1547f9ead18cSJens Axboe if (io_alloc_async_data(req)) { 1548f9ead18cSJens Axboe ret = -ENOMEM; 1549f9ead18cSJens Axboe goto out; 1550f9ead18cSJens Axboe } 1551f9ead18cSJens Axboe memcpy(req->async_data, &__io, sizeof(__io)); 1552f9ead18cSJens Axboe return -EAGAIN; 1553f9ead18cSJens Axboe } 15542bb15fb6SJens Axboe if (connect->in_progress) { 15552bb15fb6SJens Axboe /* 15562bb15fb6SJens Axboe * At least bluetooth will return -EBADFD on a re-connect 15572bb15fb6SJens Axboe * attempt, and it's (supposedly) also valid to get -EISCONN 15582bb15fb6SJens Axboe * which means the previous result is good. For both of these, 15592bb15fb6SJens Axboe * grab the sock_error() and use that for the completion. 15602bb15fb6SJens Axboe */ 15612bb15fb6SJens Axboe if (ret == -EBADFD || ret == -EISCONN) 15622bb15fb6SJens Axboe ret = sock_error(sock_from_file(req->file)->sk); 15632bb15fb6SJens Axboe } 1564f9ead18cSJens Axboe if (ret == -ERESTARTSYS) 1565f9ead18cSJens Axboe ret = -EINTR; 1566f9ead18cSJens Axboe out: 1567f9ead18cSJens Axboe if (ret < 0) 1568f9ead18cSJens Axboe req_set_fail(req); 1569f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 1570f9ead18cSJens Axboe return IOU_OK; 1571f9ead18cSJens Axboe } 157243e0bbbdSJens Axboe 157343e0bbbdSJens Axboe void io_netmsg_cache_free(struct io_cache_entry *entry) 157443e0bbbdSJens Axboe { 157543e0bbbdSJens Axboe kfree(container_of(entry, struct io_async_msghdr, cache)); 157643e0bbbdSJens Axboe } 1577f9ead18cSJens Axboe #endif 1578