1f9ead18cSJens Axboe // SPDX-License-Identifier: GPL-2.0 2f9ead18cSJens Axboe #include <linux/kernel.h> 3f9ead18cSJens Axboe #include <linux/errno.h> 4f9ead18cSJens Axboe #include <linux/file.h> 5f9ead18cSJens Axboe #include <linux/slab.h> 6f9ead18cSJens Axboe #include <linux/net.h> 7f9ead18cSJens Axboe #include <linux/compat.h> 8f9ead18cSJens Axboe #include <net/compat.h> 9f9ead18cSJens Axboe #include <linux/io_uring.h> 10f9ead18cSJens Axboe 11f9ead18cSJens Axboe #include <uapi/linux/io_uring.h> 12f9ead18cSJens Axboe 13f9ead18cSJens Axboe #include "io_uring.h" 143b77495aSJens Axboe #include "kbuf.h" 1543e0bbbdSJens Axboe #include "alloc_cache.h" 16f9ead18cSJens Axboe #include "net.h" 1706a5464bSPavel Begunkov #include "notif.h" 1810c7d33eSPavel Begunkov #include "rsrc.h" 19f9ead18cSJens Axboe 20f9ead18cSJens Axboe #if defined(CONFIG_NET) 21f9ead18cSJens Axboe struct io_shutdown { 22f9ead18cSJens Axboe struct file *file; 23f9ead18cSJens Axboe int how; 24f9ead18cSJens Axboe }; 25f9ead18cSJens Axboe 26f9ead18cSJens Axboe struct io_accept { 27f9ead18cSJens Axboe struct file *file; 28f9ead18cSJens Axboe struct sockaddr __user *addr; 29f9ead18cSJens Axboe int __user *addr_len; 30f9ead18cSJens Axboe int flags; 31f9ead18cSJens Axboe u32 file_slot; 32f9ead18cSJens Axboe unsigned long nofile; 33f9ead18cSJens Axboe }; 34f9ead18cSJens Axboe 35f9ead18cSJens Axboe struct io_socket { 36f9ead18cSJens Axboe struct file *file; 37f9ead18cSJens Axboe int domain; 38f9ead18cSJens Axboe int type; 39f9ead18cSJens Axboe int protocol; 40f9ead18cSJens Axboe int flags; 41f9ead18cSJens Axboe u32 file_slot; 42f9ead18cSJens Axboe unsigned long nofile; 43f9ead18cSJens Axboe }; 44f9ead18cSJens Axboe 45f9ead18cSJens Axboe struct io_connect { 46f9ead18cSJens Axboe struct file *file; 47f9ead18cSJens Axboe struct sockaddr __user *addr; 48f9ead18cSJens Axboe int addr_len; 49f9ead18cSJens Axboe }; 50f9ead18cSJens Axboe 51f9ead18cSJens Axboe struct io_sr_msg { 52f9ead18cSJens Axboe struct file *file; 53f9ead18cSJens Axboe union { 54f9ead18cSJens Axboe struct compat_msghdr __user *umsg_compat; 55f9ead18cSJens Axboe struct user_msghdr __user *umsg; 56f9ead18cSJens Axboe void __user *buf; 57f9ead18cSJens Axboe }; 580b048557SPavel Begunkov unsigned len; 590b048557SPavel Begunkov unsigned done_io; 60293402e5SPavel Begunkov unsigned msg_flags; 610b048557SPavel Begunkov u16 flags; 62516e82f0SPavel Begunkov /* initialised and used only by !msg send variants */ 630b048557SPavel Begunkov u16 addr_len; 64092aeedbSPavel Begunkov void __user *addr; 65516e82f0SPavel Begunkov /* used only for send zerocopy */ 66b48c312bSPavel Begunkov struct io_kiocb *notif; 6706a5464bSPavel Begunkov }; 6806a5464bSPavel Begunkov 69f9ead18cSJens Axboe #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) 70f9ead18cSJens Axboe 71f9ead18cSJens Axboe int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 72f9ead18cSJens Axboe { 73f2ccb5aeSStefan Metzmacher struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 74f9ead18cSJens Axboe 75f9ead18cSJens Axboe if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 76f9ead18cSJens Axboe sqe->buf_index || sqe->splice_fd_in)) 77f9ead18cSJens Axboe return -EINVAL; 78f9ead18cSJens Axboe 79f9ead18cSJens Axboe shutdown->how = READ_ONCE(sqe->len); 80f9ead18cSJens Axboe return 0; 81f9ead18cSJens Axboe } 82f9ead18cSJens Axboe 83f9ead18cSJens Axboe int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 84f9ead18cSJens Axboe { 85f2ccb5aeSStefan Metzmacher struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 86f9ead18cSJens Axboe struct socket *sock; 87f9ead18cSJens Axboe int ret; 88f9ead18cSJens Axboe 89f9ead18cSJens Axboe if (issue_flags & IO_URING_F_NONBLOCK) 90f9ead18cSJens Axboe return -EAGAIN; 91f9ead18cSJens Axboe 92f9ead18cSJens Axboe sock = sock_from_file(req->file); 93f9ead18cSJens Axboe if (unlikely(!sock)) 94f9ead18cSJens Axboe return -ENOTSOCK; 95f9ead18cSJens Axboe 96f9ead18cSJens Axboe ret = __sys_shutdown_sock(sock, shutdown->how); 97f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 98f9ead18cSJens Axboe return IOU_OK; 99f9ead18cSJens Axboe } 100f9ead18cSJens Axboe 101f9ead18cSJens Axboe static bool io_net_retry(struct socket *sock, int flags) 102f9ead18cSJens Axboe { 103f9ead18cSJens Axboe if (!(flags & MSG_WAITALL)) 104f9ead18cSJens Axboe return false; 105f9ead18cSJens Axboe return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 106f9ead18cSJens Axboe } 107f9ead18cSJens Axboe 10843e0bbbdSJens Axboe static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 10943e0bbbdSJens Axboe { 11043e0bbbdSJens Axboe struct io_async_msghdr *hdr = req->async_data; 11143e0bbbdSJens Axboe 11206360426SPavel Begunkov if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED) 11343e0bbbdSJens Axboe return; 11443e0bbbdSJens Axboe 11543e0bbbdSJens Axboe /* Let normal cleanup path reap it if we fail adding to the cache */ 11643e0bbbdSJens Axboe if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) { 11743e0bbbdSJens Axboe req->async_data = NULL; 11843e0bbbdSJens Axboe req->flags &= ~REQ_F_ASYNC_DATA; 11943e0bbbdSJens Axboe } 12043e0bbbdSJens Axboe } 12143e0bbbdSJens Axboe 122858c293eSPavel Begunkov static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req, 12343e0bbbdSJens Axboe unsigned int issue_flags) 12443e0bbbdSJens Axboe { 12543e0bbbdSJens Axboe struct io_ring_ctx *ctx = req->ctx; 12643e0bbbdSJens Axboe struct io_cache_entry *entry; 1274c17a496SPavel Begunkov struct io_async_msghdr *hdr; 12843e0bbbdSJens Axboe 12943e0bbbdSJens Axboe if (!(issue_flags & IO_URING_F_UNLOCKED) && 13043e0bbbdSJens Axboe (entry = io_alloc_cache_get(&ctx->netmsg_cache)) != NULL) { 13143e0bbbdSJens Axboe hdr = container_of(entry, struct io_async_msghdr, cache); 1324c17a496SPavel Begunkov hdr->free_iov = NULL; 13343e0bbbdSJens Axboe req->flags |= REQ_F_ASYNC_DATA; 13443e0bbbdSJens Axboe req->async_data = hdr; 13543e0bbbdSJens Axboe return hdr; 13643e0bbbdSJens Axboe } 13743e0bbbdSJens Axboe 1384c17a496SPavel Begunkov if (!io_alloc_async_data(req)) { 1394c17a496SPavel Begunkov hdr = req->async_data; 1404c17a496SPavel Begunkov hdr->free_iov = NULL; 1414c17a496SPavel Begunkov return hdr; 1424c17a496SPavel Begunkov } 14343e0bbbdSJens Axboe return NULL; 14443e0bbbdSJens Axboe } 14543e0bbbdSJens Axboe 146858c293eSPavel Begunkov static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req) 147858c293eSPavel Begunkov { 148858c293eSPavel Begunkov /* ->prep_async is always called from the submission context */ 149858c293eSPavel Begunkov return io_msg_alloc_async(req, 0); 150858c293eSPavel Begunkov } 151858c293eSPavel Begunkov 152f9ead18cSJens Axboe static int io_setup_async_msg(struct io_kiocb *req, 15343e0bbbdSJens Axboe struct io_async_msghdr *kmsg, 15443e0bbbdSJens Axboe unsigned int issue_flags) 155f9ead18cSJens Axboe { 1563f743e9bSPavel Begunkov struct io_async_msghdr *async_msg; 157f9ead18cSJens Axboe 1583f743e9bSPavel Begunkov if (req_has_async_data(req)) 159f9ead18cSJens Axboe return -EAGAIN; 160858c293eSPavel Begunkov async_msg = io_msg_alloc_async(req, issue_flags); 16143e0bbbdSJens Axboe if (!async_msg) { 162f9ead18cSJens Axboe kfree(kmsg->free_iov); 163f9ead18cSJens Axboe return -ENOMEM; 164f9ead18cSJens Axboe } 165f9ead18cSJens Axboe req->flags |= REQ_F_NEED_CLEANUP; 166f9ead18cSJens Axboe memcpy(async_msg, kmsg, sizeof(*kmsg)); 167f9ead18cSJens Axboe async_msg->msg.msg_name = &async_msg->addr; 168f9ead18cSJens Axboe /* if were using fast_iov, set it to the new one */ 169*3e4cb6ebSStefan Metzmacher if (!kmsg->free_iov) { 170*3e4cb6ebSStefan Metzmacher size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov; 171*3e4cb6ebSStefan Metzmacher async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx]; 172*3e4cb6ebSStefan Metzmacher } 173f9ead18cSJens Axboe 174f9ead18cSJens Axboe return -EAGAIN; 175f9ead18cSJens Axboe } 176f9ead18cSJens Axboe 177f9ead18cSJens Axboe static int io_sendmsg_copy_hdr(struct io_kiocb *req, 178f9ead18cSJens Axboe struct io_async_msghdr *iomsg) 179f9ead18cSJens Axboe { 180f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 181f9ead18cSJens Axboe 182f9ead18cSJens Axboe iomsg->msg.msg_name = &iomsg->addr; 183f9ead18cSJens Axboe iomsg->free_iov = iomsg->fast_iov; 184f9ead18cSJens Axboe return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, 185f9ead18cSJens Axboe &iomsg->free_iov); 186f9ead18cSJens Axboe } 187f9ead18cSJens Axboe 188516e82f0SPavel Begunkov int io_send_prep_async(struct io_kiocb *req) 189581711c4SPavel Begunkov { 190ac9e5784SPavel Begunkov struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 191581711c4SPavel Begunkov struct io_async_msghdr *io; 192581711c4SPavel Begunkov int ret; 193581711c4SPavel Begunkov 194581711c4SPavel Begunkov if (!zc->addr || req_has_async_data(req)) 195581711c4SPavel Begunkov return 0; 1966bf8ad25SPavel Begunkov io = io_msg_alloc_async_prep(req); 1976bf8ad25SPavel Begunkov if (!io) 198581711c4SPavel Begunkov return -ENOMEM; 199581711c4SPavel Begunkov ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr); 200581711c4SPavel Begunkov return ret; 201581711c4SPavel Begunkov } 202581711c4SPavel Begunkov 203581711c4SPavel Begunkov static int io_setup_async_addr(struct io_kiocb *req, 2046ae61b7aSPavel Begunkov struct sockaddr_storage *addr_storage, 205581711c4SPavel Begunkov unsigned int issue_flags) 206581711c4SPavel Begunkov { 2076ae61b7aSPavel Begunkov struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 208581711c4SPavel Begunkov struct io_async_msghdr *io; 209581711c4SPavel Begunkov 2106ae61b7aSPavel Begunkov if (!sr->addr || req_has_async_data(req)) 211581711c4SPavel Begunkov return -EAGAIN; 2126bf8ad25SPavel Begunkov io = io_msg_alloc_async(req, issue_flags); 2136bf8ad25SPavel Begunkov if (!io) 214581711c4SPavel Begunkov return -ENOMEM; 2156ae61b7aSPavel Begunkov memcpy(&io->addr, addr_storage, sizeof(io->addr)); 216581711c4SPavel Begunkov return -EAGAIN; 217581711c4SPavel Begunkov } 218581711c4SPavel Begunkov 219f9ead18cSJens Axboe int io_sendmsg_prep_async(struct io_kiocb *req) 220f9ead18cSJens Axboe { 221f9ead18cSJens Axboe int ret; 222f9ead18cSJens Axboe 223858c293eSPavel Begunkov if (!io_msg_alloc_async_prep(req)) 224858c293eSPavel Begunkov return -ENOMEM; 225f9ead18cSJens Axboe ret = io_sendmsg_copy_hdr(req, req->async_data); 226f9ead18cSJens Axboe if (!ret) 227f9ead18cSJens Axboe req->flags |= REQ_F_NEED_CLEANUP; 228f9ead18cSJens Axboe return ret; 229f9ead18cSJens Axboe } 230f9ead18cSJens Axboe 231f9ead18cSJens Axboe void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 232f9ead18cSJens Axboe { 233f9ead18cSJens Axboe struct io_async_msghdr *io = req->async_data; 234f9ead18cSJens Axboe 235f9ead18cSJens Axboe kfree(io->free_iov); 236f9ead18cSJens Axboe } 237f9ead18cSJens Axboe 238f9ead18cSJens Axboe int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 239f9ead18cSJens Axboe { 240f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 241f9ead18cSJens Axboe 242516e82f0SPavel Begunkov if (req->opcode == IORING_OP_SEND) { 243516e82f0SPavel Begunkov if (READ_ONCE(sqe->__pad3[0])) 244f9ead18cSJens Axboe return -EINVAL; 245516e82f0SPavel Begunkov sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 246516e82f0SPavel Begunkov sr->addr_len = READ_ONCE(sqe->addr_len); 247516e82f0SPavel Begunkov } else if (sqe->addr2 || sqe->file_index) { 248516e82f0SPavel Begunkov return -EINVAL; 249516e82f0SPavel Begunkov } 250f9ead18cSJens Axboe 251f9ead18cSJens Axboe sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 252f9ead18cSJens Axboe sr->len = READ_ONCE(sqe->len); 253f9ead18cSJens Axboe sr->flags = READ_ONCE(sqe->ioprio); 254f9ead18cSJens Axboe if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) 255f9ead18cSJens Axboe return -EINVAL; 256f9ead18cSJens Axboe sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 257f9ead18cSJens Axboe if (sr->msg_flags & MSG_DONTWAIT) 258f9ead18cSJens Axboe req->flags |= REQ_F_NOWAIT; 259f9ead18cSJens Axboe 260f9ead18cSJens Axboe #ifdef CONFIG_COMPAT 261f9ead18cSJens Axboe if (req->ctx->compat) 262f9ead18cSJens Axboe sr->msg_flags |= MSG_CMSG_COMPAT; 263f9ead18cSJens Axboe #endif 264f9ead18cSJens Axboe sr->done_io = 0; 265f9ead18cSJens Axboe return 0; 266f9ead18cSJens Axboe } 267f9ead18cSJens Axboe 268f9ead18cSJens Axboe int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 269f9ead18cSJens Axboe { 270f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 271f9ead18cSJens Axboe struct io_async_msghdr iomsg, *kmsg; 272f9ead18cSJens Axboe struct socket *sock; 273f9ead18cSJens Axboe unsigned flags; 274f9ead18cSJens Axboe int min_ret = 0; 275f9ead18cSJens Axboe int ret; 276f9ead18cSJens Axboe 277f9ead18cSJens Axboe sock = sock_from_file(req->file); 278f9ead18cSJens Axboe if (unlikely(!sock)) 279f9ead18cSJens Axboe return -ENOTSOCK; 280f9ead18cSJens Axboe 281f9ead18cSJens Axboe if (req_has_async_data(req)) { 282f9ead18cSJens Axboe kmsg = req->async_data; 283f9ead18cSJens Axboe } else { 284f9ead18cSJens Axboe ret = io_sendmsg_copy_hdr(req, &iomsg); 285f9ead18cSJens Axboe if (ret) 286f9ead18cSJens Axboe return ret; 287f9ead18cSJens Axboe kmsg = &iomsg; 288f9ead18cSJens Axboe } 289f9ead18cSJens Axboe 290f9ead18cSJens Axboe if (!(req->flags & REQ_F_POLLED) && 291f9ead18cSJens Axboe (sr->flags & IORING_RECVSEND_POLL_FIRST)) 29243e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 293f9ead18cSJens Axboe 294f9ead18cSJens Axboe flags = sr->msg_flags; 295f9ead18cSJens Axboe if (issue_flags & IO_URING_F_NONBLOCK) 296f9ead18cSJens Axboe flags |= MSG_DONTWAIT; 297f9ead18cSJens Axboe if (flags & MSG_WAITALL) 298f9ead18cSJens Axboe min_ret = iov_iter_count(&kmsg->msg.msg_iter); 299f9ead18cSJens Axboe 300f9ead18cSJens Axboe ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 301f9ead18cSJens Axboe 302f9ead18cSJens Axboe if (ret < min_ret) { 303f9ead18cSJens Axboe if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 30443e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 305f9ead18cSJens Axboe if (ret > 0 && io_net_retry(sock, flags)) { 306f9ead18cSJens Axboe sr->done_io += ret; 307f9ead18cSJens Axboe req->flags |= REQ_F_PARTIAL_IO; 30843e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 309f9ead18cSJens Axboe } 31095eafc74SPavel Begunkov if (ret == -ERESTARTSYS) 31195eafc74SPavel Begunkov ret = -EINTR; 312f9ead18cSJens Axboe req_set_fail(req); 313f9ead18cSJens Axboe } 314f9ead18cSJens Axboe /* fast path, check for non-NULL to avoid function call */ 315f9ead18cSJens Axboe if (kmsg->free_iov) 316f9ead18cSJens Axboe kfree(kmsg->free_iov); 317f9ead18cSJens Axboe req->flags &= ~REQ_F_NEED_CLEANUP; 31843e0bbbdSJens Axboe io_netmsg_recycle(req, issue_flags); 319f9ead18cSJens Axboe if (ret >= 0) 320f9ead18cSJens Axboe ret += sr->done_io; 321f9ead18cSJens Axboe else if (sr->done_io) 322f9ead18cSJens Axboe ret = sr->done_io; 323f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 324f9ead18cSJens Axboe return IOU_OK; 325f9ead18cSJens Axboe } 326f9ead18cSJens Axboe 327f9ead18cSJens Axboe int io_send(struct io_kiocb *req, unsigned int issue_flags) 328f9ead18cSJens Axboe { 329516e82f0SPavel Begunkov struct sockaddr_storage __address; 330f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 331f9ead18cSJens Axboe struct msghdr msg; 332f9ead18cSJens Axboe struct iovec iov; 333f9ead18cSJens Axboe struct socket *sock; 334f9ead18cSJens Axboe unsigned flags; 335f9ead18cSJens Axboe int min_ret = 0; 336f9ead18cSJens Axboe int ret; 337f9ead18cSJens Axboe 33804360d3eSPavel Begunkov msg.msg_name = NULL; 33904360d3eSPavel Begunkov msg.msg_control = NULL; 34004360d3eSPavel Begunkov msg.msg_controllen = 0; 34104360d3eSPavel Begunkov msg.msg_namelen = 0; 34204360d3eSPavel Begunkov msg.msg_ubuf = NULL; 34304360d3eSPavel Begunkov 344516e82f0SPavel Begunkov if (sr->addr) { 345516e82f0SPavel Begunkov if (req_has_async_data(req)) { 346516e82f0SPavel Begunkov struct io_async_msghdr *io = req->async_data; 347516e82f0SPavel Begunkov 348516e82f0SPavel Begunkov msg.msg_name = &io->addr; 349516e82f0SPavel Begunkov } else { 350516e82f0SPavel Begunkov ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address); 351516e82f0SPavel Begunkov if (unlikely(ret < 0)) 352516e82f0SPavel Begunkov return ret; 353516e82f0SPavel Begunkov msg.msg_name = (struct sockaddr *)&__address; 354516e82f0SPavel Begunkov } 355516e82f0SPavel Begunkov msg.msg_namelen = sr->addr_len; 356516e82f0SPavel Begunkov } 357516e82f0SPavel Begunkov 358f9ead18cSJens Axboe if (!(req->flags & REQ_F_POLLED) && 359f9ead18cSJens Axboe (sr->flags & IORING_RECVSEND_POLL_FIRST)) 360516e82f0SPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 361f9ead18cSJens Axboe 362f9ead18cSJens Axboe sock = sock_from_file(req->file); 363f9ead18cSJens Axboe if (unlikely(!sock)) 364f9ead18cSJens Axboe return -ENOTSOCK; 365f9ead18cSJens Axboe 366f9ead18cSJens Axboe ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter); 367f9ead18cSJens Axboe if (unlikely(ret)) 368f9ead18cSJens Axboe return ret; 369f9ead18cSJens Axboe 370f9ead18cSJens Axboe flags = sr->msg_flags; 371f9ead18cSJens Axboe if (issue_flags & IO_URING_F_NONBLOCK) 372f9ead18cSJens Axboe flags |= MSG_DONTWAIT; 373f9ead18cSJens Axboe if (flags & MSG_WAITALL) 374f9ead18cSJens Axboe min_ret = iov_iter_count(&msg.msg_iter); 375f9ead18cSJens Axboe 376f9ead18cSJens Axboe msg.msg_flags = flags; 377f9ead18cSJens Axboe ret = sock_sendmsg(sock, &msg); 378f9ead18cSJens Axboe if (ret < min_ret) { 379f9ead18cSJens Axboe if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 380516e82f0SPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 381516e82f0SPavel Begunkov 382f9ead18cSJens Axboe if (ret > 0 && io_net_retry(sock, flags)) { 383f9ead18cSJens Axboe sr->len -= ret; 384f9ead18cSJens Axboe sr->buf += ret; 385f9ead18cSJens Axboe sr->done_io += ret; 386f9ead18cSJens Axboe req->flags |= REQ_F_PARTIAL_IO; 387516e82f0SPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 388f9ead18cSJens Axboe } 38995eafc74SPavel Begunkov if (ret == -ERESTARTSYS) 39095eafc74SPavel Begunkov ret = -EINTR; 391f9ead18cSJens Axboe req_set_fail(req); 392f9ead18cSJens Axboe } 393f9ead18cSJens Axboe if (ret >= 0) 394f9ead18cSJens Axboe ret += sr->done_io; 395f9ead18cSJens Axboe else if (sr->done_io) 396f9ead18cSJens Axboe ret = sr->done_io; 397f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 398f9ead18cSJens Axboe return IOU_OK; 399f9ead18cSJens Axboe } 400f9ead18cSJens Axboe 4019bb66906SDylan Yudaken static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) 4029bb66906SDylan Yudaken { 4039b0fc3c0SDylan Yudaken int hdr; 4049bb66906SDylan Yudaken 4059b0fc3c0SDylan Yudaken if (iomsg->namelen < 0) 4069bb66906SDylan Yudaken return true; 4079b0fc3c0SDylan Yudaken if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), 4089b0fc3c0SDylan Yudaken iomsg->namelen, &hdr)) 4099bb66906SDylan Yudaken return true; 4109b0fc3c0SDylan Yudaken if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) 4119bb66906SDylan Yudaken return true; 4129bb66906SDylan Yudaken 4139bb66906SDylan Yudaken return false; 4149bb66906SDylan Yudaken } 4159bb66906SDylan Yudaken 416f9ead18cSJens Axboe static int __io_recvmsg_copy_hdr(struct io_kiocb *req, 417f9ead18cSJens Axboe struct io_async_msghdr *iomsg) 418f9ead18cSJens Axboe { 419f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 4207fa875b8SDylan Yudaken struct user_msghdr msg; 421f9ead18cSJens Axboe int ret; 422f9ead18cSJens Axboe 4237fa875b8SDylan Yudaken if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) 4247fa875b8SDylan Yudaken return -EFAULT; 4257fa875b8SDylan Yudaken 4267fa875b8SDylan Yudaken ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 427f9ead18cSJens Axboe if (ret) 428f9ead18cSJens Axboe return ret; 429f9ead18cSJens Axboe 430f9ead18cSJens Axboe if (req->flags & REQ_F_BUFFER_SELECT) { 4317fa875b8SDylan Yudaken if (msg.msg_iovlen == 0) { 4325702196eSDylan Yudaken sr->len = iomsg->fast_iov[0].iov_len = 0; 4335702196eSDylan Yudaken iomsg->fast_iov[0].iov_base = NULL; 4345702196eSDylan Yudaken iomsg->free_iov = NULL; 4357fa875b8SDylan Yudaken } else if (msg.msg_iovlen > 1) { 436f9ead18cSJens Axboe return -EINVAL; 4375702196eSDylan Yudaken } else { 4387fa875b8SDylan Yudaken if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov))) 439f9ead18cSJens Axboe return -EFAULT; 440f9ead18cSJens Axboe sr->len = iomsg->fast_iov[0].iov_len; 441f9ead18cSJens Axboe iomsg->free_iov = NULL; 4425702196eSDylan Yudaken } 4439bb66906SDylan Yudaken 4449bb66906SDylan Yudaken if (req->flags & REQ_F_APOLL_MULTISHOT) { 4459bb66906SDylan Yudaken iomsg->namelen = msg.msg_namelen; 4469bb66906SDylan Yudaken iomsg->controllen = msg.msg_controllen; 4479bb66906SDylan Yudaken if (io_recvmsg_multishot_overflow(iomsg)) 4489bb66906SDylan Yudaken return -EOVERFLOW; 4499bb66906SDylan Yudaken } 450f9ead18cSJens Axboe } else { 451f9ead18cSJens Axboe iomsg->free_iov = iomsg->fast_iov; 4527fa875b8SDylan Yudaken ret = __import_iovec(READ, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, 453f9ead18cSJens Axboe &iomsg->free_iov, &iomsg->msg.msg_iter, 454f9ead18cSJens Axboe false); 455f9ead18cSJens Axboe if (ret > 0) 456f9ead18cSJens Axboe ret = 0; 457f9ead18cSJens Axboe } 458f9ead18cSJens Axboe 459f9ead18cSJens Axboe return ret; 460f9ead18cSJens Axboe } 461f9ead18cSJens Axboe 462f9ead18cSJens Axboe #ifdef CONFIG_COMPAT 463f9ead18cSJens Axboe static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, 464f9ead18cSJens Axboe struct io_async_msghdr *iomsg) 465f9ead18cSJens Axboe { 466f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 46772c531f8SDylan Yudaken struct compat_msghdr msg; 468f9ead18cSJens Axboe struct compat_iovec __user *uiov; 469f9ead18cSJens Axboe int ret; 470f9ead18cSJens Axboe 47172c531f8SDylan Yudaken if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) 47272c531f8SDylan Yudaken return -EFAULT; 47372c531f8SDylan Yudaken 4744f6a94d3SJens Axboe ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 475f9ead18cSJens Axboe if (ret) 476f9ead18cSJens Axboe return ret; 477f9ead18cSJens Axboe 47872c531f8SDylan Yudaken uiov = compat_ptr(msg.msg_iov); 479f9ead18cSJens Axboe if (req->flags & REQ_F_BUFFER_SELECT) { 480f9ead18cSJens Axboe compat_ssize_t clen; 481f9ead18cSJens Axboe 48272c531f8SDylan Yudaken if (msg.msg_iovlen == 0) { 4836d2f75a0SDylan Yudaken sr->len = 0; 48472c531f8SDylan Yudaken } else if (msg.msg_iovlen > 1) { 485f9ead18cSJens Axboe return -EINVAL; 4866d2f75a0SDylan Yudaken } else { 487f9ead18cSJens Axboe if (!access_ok(uiov, sizeof(*uiov))) 488f9ead18cSJens Axboe return -EFAULT; 489f9ead18cSJens Axboe if (__get_user(clen, &uiov->iov_len)) 490f9ead18cSJens Axboe return -EFAULT; 491f9ead18cSJens Axboe if (clen < 0) 492f9ead18cSJens Axboe return -EINVAL; 493f9ead18cSJens Axboe sr->len = clen; 4946d2f75a0SDylan Yudaken } 4959bb66906SDylan Yudaken 4969bb66906SDylan Yudaken if (req->flags & REQ_F_APOLL_MULTISHOT) { 4979bb66906SDylan Yudaken iomsg->namelen = msg.msg_namelen; 4989bb66906SDylan Yudaken iomsg->controllen = msg.msg_controllen; 4999bb66906SDylan Yudaken if (io_recvmsg_multishot_overflow(iomsg)) 5009bb66906SDylan Yudaken return -EOVERFLOW; 5019bb66906SDylan Yudaken } 502f9ead18cSJens Axboe } else { 503f9ead18cSJens Axboe iomsg->free_iov = iomsg->fast_iov; 50472c531f8SDylan Yudaken ret = __import_iovec(READ, (struct iovec __user *)uiov, msg.msg_iovlen, 505f9ead18cSJens Axboe UIO_FASTIOV, &iomsg->free_iov, 506f9ead18cSJens Axboe &iomsg->msg.msg_iter, true); 507f9ead18cSJens Axboe if (ret < 0) 508f9ead18cSJens Axboe return ret; 509f9ead18cSJens Axboe } 510f9ead18cSJens Axboe 511f9ead18cSJens Axboe return 0; 512f9ead18cSJens Axboe } 513f9ead18cSJens Axboe #endif 514f9ead18cSJens Axboe 515f9ead18cSJens Axboe static int io_recvmsg_copy_hdr(struct io_kiocb *req, 516f9ead18cSJens Axboe struct io_async_msghdr *iomsg) 517f9ead18cSJens Axboe { 518f9ead18cSJens Axboe iomsg->msg.msg_name = &iomsg->addr; 519f9ead18cSJens Axboe 520f9ead18cSJens Axboe #ifdef CONFIG_COMPAT 521f9ead18cSJens Axboe if (req->ctx->compat) 522f9ead18cSJens Axboe return __io_compat_recvmsg_copy_hdr(req, iomsg); 523f9ead18cSJens Axboe #endif 524f9ead18cSJens Axboe 525f9ead18cSJens Axboe return __io_recvmsg_copy_hdr(req, iomsg); 526f9ead18cSJens Axboe } 527f9ead18cSJens Axboe 528f9ead18cSJens Axboe int io_recvmsg_prep_async(struct io_kiocb *req) 529f9ead18cSJens Axboe { 530f9ead18cSJens Axboe int ret; 531f9ead18cSJens Axboe 532858c293eSPavel Begunkov if (!io_msg_alloc_async_prep(req)) 533858c293eSPavel Begunkov return -ENOMEM; 534f9ead18cSJens Axboe ret = io_recvmsg_copy_hdr(req, req->async_data); 535f9ead18cSJens Axboe if (!ret) 536f9ead18cSJens Axboe req->flags |= REQ_F_NEED_CLEANUP; 537f9ead18cSJens Axboe return ret; 538f9ead18cSJens Axboe } 539f9ead18cSJens Axboe 540b3fdea6eSDylan Yudaken #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT) 541b3fdea6eSDylan Yudaken 542f9ead18cSJens Axboe int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 543f9ead18cSJens Axboe { 544f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 545f9ead18cSJens Axboe 546f9ead18cSJens Axboe if (unlikely(sqe->file_index || sqe->addr2)) 547f9ead18cSJens Axboe return -EINVAL; 548f9ead18cSJens Axboe 549f9ead18cSJens Axboe sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 550f9ead18cSJens Axboe sr->len = READ_ONCE(sqe->len); 551f9ead18cSJens Axboe sr->flags = READ_ONCE(sqe->ioprio); 552b3fdea6eSDylan Yudaken if (sr->flags & ~(RECVMSG_FLAGS)) 553f9ead18cSJens Axboe return -EINVAL; 554f9ead18cSJens Axboe sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 555f9ead18cSJens Axboe if (sr->msg_flags & MSG_DONTWAIT) 556f9ead18cSJens Axboe req->flags |= REQ_F_NOWAIT; 557f9ead18cSJens Axboe if (sr->msg_flags & MSG_ERRQUEUE) 558f9ead18cSJens Axboe req->flags |= REQ_F_CLEAR_POLLIN; 559b3fdea6eSDylan Yudaken if (sr->flags & IORING_RECV_MULTISHOT) { 560b3fdea6eSDylan Yudaken if (!(req->flags & REQ_F_BUFFER_SELECT)) 561b3fdea6eSDylan Yudaken return -EINVAL; 562b3fdea6eSDylan Yudaken if (sr->msg_flags & MSG_WAITALL) 563b3fdea6eSDylan Yudaken return -EINVAL; 564b3fdea6eSDylan Yudaken if (req->opcode == IORING_OP_RECV && sr->len) 565b3fdea6eSDylan Yudaken return -EINVAL; 566b3fdea6eSDylan Yudaken req->flags |= REQ_F_APOLL_MULTISHOT; 567b3fdea6eSDylan Yudaken } 568f9ead18cSJens Axboe 569f9ead18cSJens Axboe #ifdef CONFIG_COMPAT 570f9ead18cSJens Axboe if (req->ctx->compat) 571f9ead18cSJens Axboe sr->msg_flags |= MSG_CMSG_COMPAT; 572f9ead18cSJens Axboe #endif 573f9ead18cSJens Axboe sr->done_io = 0; 574f9ead18cSJens Axboe return 0; 575f9ead18cSJens Axboe } 576f9ead18cSJens Axboe 577b3fdea6eSDylan Yudaken static inline void io_recv_prep_retry(struct io_kiocb *req) 578b3fdea6eSDylan Yudaken { 579f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 580b3fdea6eSDylan Yudaken 581b3fdea6eSDylan Yudaken sr->done_io = 0; 582b3fdea6eSDylan Yudaken sr->len = 0; /* get from the provided buffer */ 583b3fdea6eSDylan Yudaken } 584b3fdea6eSDylan Yudaken 585b3fdea6eSDylan Yudaken /* 5869bb66906SDylan Yudaken * Finishes io_recv and io_recvmsg. 587b3fdea6eSDylan Yudaken * 588b3fdea6eSDylan Yudaken * Returns true if it is actually finished, or false if it should run 589b3fdea6eSDylan Yudaken * again (for multishot). 590b3fdea6eSDylan Yudaken */ 5919bb66906SDylan Yudaken static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 5929bb66906SDylan Yudaken unsigned int cflags, bool mshot_finished) 593b3fdea6eSDylan Yudaken { 594b3fdea6eSDylan Yudaken if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 595b3fdea6eSDylan Yudaken io_req_set_res(req, *ret, cflags); 596b3fdea6eSDylan Yudaken *ret = IOU_OK; 597b3fdea6eSDylan Yudaken return true; 598b3fdea6eSDylan Yudaken } 599b3fdea6eSDylan Yudaken 6009bb66906SDylan Yudaken if (!mshot_finished) { 601b3fdea6eSDylan Yudaken if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret, 602b3fdea6eSDylan Yudaken cflags | IORING_CQE_F_MORE, false)) { 603b3fdea6eSDylan Yudaken io_recv_prep_retry(req); 604b3fdea6eSDylan Yudaken return false; 605b3fdea6eSDylan Yudaken } 606b3fdea6eSDylan Yudaken /* 607b3fdea6eSDylan Yudaken * Otherwise stop multishot but use the current result. 608b3fdea6eSDylan Yudaken * Probably will end up going into overflow, but this means 609b3fdea6eSDylan Yudaken * we cannot trust the ordering anymore 610b3fdea6eSDylan Yudaken */ 611b3fdea6eSDylan Yudaken } 612b3fdea6eSDylan Yudaken 613b3fdea6eSDylan Yudaken io_req_set_res(req, *ret, cflags); 614b3fdea6eSDylan Yudaken 615b3fdea6eSDylan Yudaken if (req->flags & REQ_F_POLLED) 616b3fdea6eSDylan Yudaken *ret = IOU_STOP_MULTISHOT; 617e2df2ccbSDylan Yudaken else 618e2df2ccbSDylan Yudaken *ret = IOU_OK; 619b3fdea6eSDylan Yudaken return true; 620b3fdea6eSDylan Yudaken } 621b3fdea6eSDylan Yudaken 6229bb66906SDylan Yudaken static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 6239bb66906SDylan Yudaken struct io_sr_msg *sr, void __user **buf, 6249bb66906SDylan Yudaken size_t *len) 6259bb66906SDylan Yudaken { 6269bb66906SDylan Yudaken unsigned long ubuf = (unsigned long) *buf; 6279bb66906SDylan Yudaken unsigned long hdr; 6289bb66906SDylan Yudaken 6299bb66906SDylan Yudaken hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 6309bb66906SDylan Yudaken kmsg->controllen; 6319bb66906SDylan Yudaken if (*len < hdr) 6329bb66906SDylan Yudaken return -EFAULT; 6339bb66906SDylan Yudaken 6349bb66906SDylan Yudaken if (kmsg->controllen) { 6359bb66906SDylan Yudaken unsigned long control = ubuf + hdr - kmsg->controllen; 6369bb66906SDylan Yudaken 637d1f6222cSDylan Yudaken kmsg->msg.msg_control_user = (void __user *) control; 6389bb66906SDylan Yudaken kmsg->msg.msg_controllen = kmsg->controllen; 6399bb66906SDylan Yudaken } 6409bb66906SDylan Yudaken 6419bb66906SDylan Yudaken sr->buf = *buf; /* stash for later copy */ 642d1f6222cSDylan Yudaken *buf = (void __user *) (ubuf + hdr); 6439bb66906SDylan Yudaken kmsg->payloadlen = *len = *len - hdr; 6449bb66906SDylan Yudaken return 0; 6459bb66906SDylan Yudaken } 6469bb66906SDylan Yudaken 6479bb66906SDylan Yudaken struct io_recvmsg_multishot_hdr { 6489bb66906SDylan Yudaken struct io_uring_recvmsg_out msg; 6499bb66906SDylan Yudaken struct sockaddr_storage addr; 6509bb66906SDylan Yudaken }; 6519bb66906SDylan Yudaken 6529bb66906SDylan Yudaken static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 6539bb66906SDylan Yudaken struct io_async_msghdr *kmsg, 6549bb66906SDylan Yudaken unsigned int flags, bool *finished) 6559bb66906SDylan Yudaken { 6569bb66906SDylan Yudaken int err; 6579bb66906SDylan Yudaken int copy_len; 6589bb66906SDylan Yudaken struct io_recvmsg_multishot_hdr hdr; 6599bb66906SDylan Yudaken 6609bb66906SDylan Yudaken if (kmsg->namelen) 6619bb66906SDylan Yudaken kmsg->msg.msg_name = &hdr.addr; 6629bb66906SDylan Yudaken kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 6639bb66906SDylan Yudaken kmsg->msg.msg_namelen = 0; 6649bb66906SDylan Yudaken 6659bb66906SDylan Yudaken if (sock->file->f_flags & O_NONBLOCK) 6669bb66906SDylan Yudaken flags |= MSG_DONTWAIT; 6679bb66906SDylan Yudaken 6689bb66906SDylan Yudaken err = sock_recvmsg(sock, &kmsg->msg, flags); 6699bb66906SDylan Yudaken *finished = err <= 0; 6709bb66906SDylan Yudaken if (err < 0) 6719bb66906SDylan Yudaken return err; 6729bb66906SDylan Yudaken 6739bb66906SDylan Yudaken hdr.msg = (struct io_uring_recvmsg_out) { 6749bb66906SDylan Yudaken .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 6759bb66906SDylan Yudaken .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 6769bb66906SDylan Yudaken }; 6779bb66906SDylan Yudaken 6789bb66906SDylan Yudaken hdr.msg.payloadlen = err; 6799bb66906SDylan Yudaken if (err > kmsg->payloadlen) 6809bb66906SDylan Yudaken err = kmsg->payloadlen; 6819bb66906SDylan Yudaken 6829bb66906SDylan Yudaken copy_len = sizeof(struct io_uring_recvmsg_out); 6839bb66906SDylan Yudaken if (kmsg->msg.msg_namelen > kmsg->namelen) 6849bb66906SDylan Yudaken copy_len += kmsg->namelen; 6859bb66906SDylan Yudaken else 6869bb66906SDylan Yudaken copy_len += kmsg->msg.msg_namelen; 6879bb66906SDylan Yudaken 6889bb66906SDylan Yudaken /* 6899bb66906SDylan Yudaken * "fromlen shall refer to the value before truncation.." 6909bb66906SDylan Yudaken * 1003.1g 6919bb66906SDylan Yudaken */ 6929bb66906SDylan Yudaken hdr.msg.namelen = kmsg->msg.msg_namelen; 6939bb66906SDylan Yudaken 6949bb66906SDylan Yudaken /* ensure that there is no gap between hdr and sockaddr_storage */ 6959bb66906SDylan Yudaken BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 6969bb66906SDylan Yudaken sizeof(struct io_uring_recvmsg_out)); 6979bb66906SDylan Yudaken if (copy_to_user(io->buf, &hdr, copy_len)) { 6989bb66906SDylan Yudaken *finished = true; 6999bb66906SDylan Yudaken return -EFAULT; 7009bb66906SDylan Yudaken } 7019bb66906SDylan Yudaken 7029bb66906SDylan Yudaken return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 7039bb66906SDylan Yudaken kmsg->controllen + err; 7049bb66906SDylan Yudaken } 7059bb66906SDylan Yudaken 706f9ead18cSJens Axboe int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 707f9ead18cSJens Axboe { 708f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 709f9ead18cSJens Axboe struct io_async_msghdr iomsg, *kmsg; 710f9ead18cSJens Axboe struct socket *sock; 711f9ead18cSJens Axboe unsigned int cflags; 712f9ead18cSJens Axboe unsigned flags; 713f9ead18cSJens Axboe int ret, min_ret = 0; 714f9ead18cSJens Axboe bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 7159bb66906SDylan Yudaken bool mshot_finished = true; 716f9ead18cSJens Axboe 717f9ead18cSJens Axboe sock = sock_from_file(req->file); 718f9ead18cSJens Axboe if (unlikely(!sock)) 719f9ead18cSJens Axboe return -ENOTSOCK; 720f9ead18cSJens Axboe 721f9ead18cSJens Axboe if (req_has_async_data(req)) { 722f9ead18cSJens Axboe kmsg = req->async_data; 723f9ead18cSJens Axboe } else { 724f9ead18cSJens Axboe ret = io_recvmsg_copy_hdr(req, &iomsg); 725f9ead18cSJens Axboe if (ret) 726f9ead18cSJens Axboe return ret; 727f9ead18cSJens Axboe kmsg = &iomsg; 728f9ead18cSJens Axboe } 729f9ead18cSJens Axboe 730f9ead18cSJens Axboe if (!(req->flags & REQ_F_POLLED) && 731f9ead18cSJens Axboe (sr->flags & IORING_RECVSEND_POLL_FIRST)) 73243e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 733f9ead18cSJens Axboe 7349bb66906SDylan Yudaken retry_multishot: 735f9ead18cSJens Axboe if (io_do_buffer_select(req)) { 736f9ead18cSJens Axboe void __user *buf; 7379bb66906SDylan Yudaken size_t len = sr->len; 738f9ead18cSJens Axboe 7399bb66906SDylan Yudaken buf = io_buffer_select(req, &len, issue_flags); 740f9ead18cSJens Axboe if (!buf) 741f9ead18cSJens Axboe return -ENOBUFS; 7429bb66906SDylan Yudaken 7439bb66906SDylan Yudaken if (req->flags & REQ_F_APOLL_MULTISHOT) { 7449bb66906SDylan Yudaken ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 7459bb66906SDylan Yudaken if (ret) { 7469bb66906SDylan Yudaken io_kbuf_recycle(req, issue_flags); 7479bb66906SDylan Yudaken return ret; 7489bb66906SDylan Yudaken } 7499bb66906SDylan Yudaken } 7509bb66906SDylan Yudaken 751f9ead18cSJens Axboe kmsg->fast_iov[0].iov_base = buf; 7529bb66906SDylan Yudaken kmsg->fast_iov[0].iov_len = len; 753f9ead18cSJens Axboe iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1, 7549bb66906SDylan Yudaken len); 755f9ead18cSJens Axboe } 756f9ead18cSJens Axboe 757f9ead18cSJens Axboe flags = sr->msg_flags; 758f9ead18cSJens Axboe if (force_nonblock) 759f9ead18cSJens Axboe flags |= MSG_DONTWAIT; 760f9ead18cSJens Axboe if (flags & MSG_WAITALL) 761f9ead18cSJens Axboe min_ret = iov_iter_count(&kmsg->msg.msg_iter); 762f9ead18cSJens Axboe 763f9ead18cSJens Axboe kmsg->msg.msg_get_inq = 1; 7649bb66906SDylan Yudaken if (req->flags & REQ_F_APOLL_MULTISHOT) 7659bb66906SDylan Yudaken ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 7669bb66906SDylan Yudaken &mshot_finished); 7679bb66906SDylan Yudaken else 7689bb66906SDylan Yudaken ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 7699bb66906SDylan Yudaken kmsg->uaddr, flags); 7709bb66906SDylan Yudaken 771f9ead18cSJens Axboe if (ret < min_ret) { 7729bb66906SDylan Yudaken if (ret == -EAGAIN && force_nonblock) { 7739bb66906SDylan Yudaken ret = io_setup_async_msg(req, kmsg, issue_flags); 7749bb66906SDylan Yudaken if (ret == -EAGAIN && (req->flags & IO_APOLL_MULTI_POLLED) == 7759bb66906SDylan Yudaken IO_APOLL_MULTI_POLLED) { 7769bb66906SDylan Yudaken io_kbuf_recycle(req, issue_flags); 7779bb66906SDylan Yudaken return IOU_ISSUE_SKIP_COMPLETE; 7789bb66906SDylan Yudaken } 7799bb66906SDylan Yudaken return ret; 7809bb66906SDylan Yudaken } 781f9ead18cSJens Axboe if (ret > 0 && io_net_retry(sock, flags)) { 782f9ead18cSJens Axboe sr->done_io += ret; 783f9ead18cSJens Axboe req->flags |= REQ_F_PARTIAL_IO; 78443e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 785f9ead18cSJens Axboe } 78695eafc74SPavel Begunkov if (ret == -ERESTARTSYS) 78795eafc74SPavel Begunkov ret = -EINTR; 788f9ead18cSJens Axboe req_set_fail(req); 789f9ead18cSJens Axboe } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 790f9ead18cSJens Axboe req_set_fail(req); 791f9ead18cSJens Axboe } 792f9ead18cSJens Axboe 793d4e097daSDylan Yudaken if (ret > 0) 794f9ead18cSJens Axboe ret += sr->done_io; 795f9ead18cSJens Axboe else if (sr->done_io) 796f9ead18cSJens Axboe ret = sr->done_io; 797d4e097daSDylan Yudaken else 798d4e097daSDylan Yudaken io_kbuf_recycle(req, issue_flags); 799d4e097daSDylan Yudaken 800f9ead18cSJens Axboe cflags = io_put_kbuf(req, issue_flags); 801f9ead18cSJens Axboe if (kmsg->msg.msg_inq) 802f9ead18cSJens Axboe cflags |= IORING_CQE_F_SOCK_NONEMPTY; 803b3fdea6eSDylan Yudaken 8049bb66906SDylan Yudaken if (!io_recv_finish(req, &ret, cflags, mshot_finished)) 8059bb66906SDylan Yudaken goto retry_multishot; 8069bb66906SDylan Yudaken 8079bb66906SDylan Yudaken if (mshot_finished) { 8089bb66906SDylan Yudaken io_netmsg_recycle(req, issue_flags); 8099bb66906SDylan Yudaken /* fast path, check for non-NULL to avoid function call */ 8109bb66906SDylan Yudaken if (kmsg->free_iov) 8119bb66906SDylan Yudaken kfree(kmsg->free_iov); 8129bb66906SDylan Yudaken req->flags &= ~REQ_F_NEED_CLEANUP; 8139bb66906SDylan Yudaken } 8149bb66906SDylan Yudaken 8159bb66906SDylan Yudaken return ret; 816f9ead18cSJens Axboe } 817f9ead18cSJens Axboe 818f9ead18cSJens Axboe int io_recv(struct io_kiocb *req, unsigned int issue_flags) 819f9ead18cSJens Axboe { 820f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 821f9ead18cSJens Axboe struct msghdr msg; 822f9ead18cSJens Axboe struct socket *sock; 823f9ead18cSJens Axboe struct iovec iov; 824f9ead18cSJens Axboe unsigned int cflags; 825f9ead18cSJens Axboe unsigned flags; 826f9ead18cSJens Axboe int ret, min_ret = 0; 827f9ead18cSJens Axboe bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 828b3fdea6eSDylan Yudaken size_t len = sr->len; 829f9ead18cSJens Axboe 830f9ead18cSJens Axboe if (!(req->flags & REQ_F_POLLED) && 831f9ead18cSJens Axboe (sr->flags & IORING_RECVSEND_POLL_FIRST)) 832f9ead18cSJens Axboe return -EAGAIN; 833f9ead18cSJens Axboe 834f9ead18cSJens Axboe sock = sock_from_file(req->file); 835f9ead18cSJens Axboe if (unlikely(!sock)) 836f9ead18cSJens Axboe return -ENOTSOCK; 837f9ead18cSJens Axboe 838b3fdea6eSDylan Yudaken retry_multishot: 839f9ead18cSJens Axboe if (io_do_buffer_select(req)) { 840f9ead18cSJens Axboe void __user *buf; 841f9ead18cSJens Axboe 842b3fdea6eSDylan Yudaken buf = io_buffer_select(req, &len, issue_flags); 843f9ead18cSJens Axboe if (!buf) 844f9ead18cSJens Axboe return -ENOBUFS; 845f9ead18cSJens Axboe sr->buf = buf; 846f9ead18cSJens Axboe } 847f9ead18cSJens Axboe 848b3fdea6eSDylan Yudaken ret = import_single_range(READ, sr->buf, len, &iov, &msg.msg_iter); 849f9ead18cSJens Axboe if (unlikely(ret)) 850f9ead18cSJens Axboe goto out_free; 851f9ead18cSJens Axboe 852f9ead18cSJens Axboe msg.msg_name = NULL; 853f9ead18cSJens Axboe msg.msg_namelen = 0; 854f9ead18cSJens Axboe msg.msg_control = NULL; 855f9ead18cSJens Axboe msg.msg_get_inq = 1; 856f9ead18cSJens Axboe msg.msg_flags = 0; 857f9ead18cSJens Axboe msg.msg_controllen = 0; 858f9ead18cSJens Axboe msg.msg_iocb = NULL; 859e02b6651SPavel Begunkov msg.msg_ubuf = NULL; 860f9ead18cSJens Axboe 861f9ead18cSJens Axboe flags = sr->msg_flags; 862f9ead18cSJens Axboe if (force_nonblock) 863f9ead18cSJens Axboe flags |= MSG_DONTWAIT; 864f9ead18cSJens Axboe if (flags & MSG_WAITALL) 865f9ead18cSJens Axboe min_ret = iov_iter_count(&msg.msg_iter); 866f9ead18cSJens Axboe 867f9ead18cSJens Axboe ret = sock_recvmsg(sock, &msg, flags); 868f9ead18cSJens Axboe if (ret < min_ret) { 869b3fdea6eSDylan Yudaken if (ret == -EAGAIN && force_nonblock) { 870b3fdea6eSDylan Yudaken if ((req->flags & IO_APOLL_MULTI_POLLED) == IO_APOLL_MULTI_POLLED) { 871b3fdea6eSDylan Yudaken io_kbuf_recycle(req, issue_flags); 872b3fdea6eSDylan Yudaken return IOU_ISSUE_SKIP_COMPLETE; 873b3fdea6eSDylan Yudaken } 874b3fdea6eSDylan Yudaken 875f9ead18cSJens Axboe return -EAGAIN; 876b3fdea6eSDylan Yudaken } 877f9ead18cSJens Axboe if (ret > 0 && io_net_retry(sock, flags)) { 878f9ead18cSJens Axboe sr->len -= ret; 879f9ead18cSJens Axboe sr->buf += ret; 880f9ead18cSJens Axboe sr->done_io += ret; 881f9ead18cSJens Axboe req->flags |= REQ_F_PARTIAL_IO; 882f9ead18cSJens Axboe return -EAGAIN; 883f9ead18cSJens Axboe } 88495eafc74SPavel Begunkov if (ret == -ERESTARTSYS) 88595eafc74SPavel Begunkov ret = -EINTR; 886f9ead18cSJens Axboe req_set_fail(req); 887f9ead18cSJens Axboe } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 888f9ead18cSJens Axboe out_free: 889f9ead18cSJens Axboe req_set_fail(req); 890f9ead18cSJens Axboe } 891f9ead18cSJens Axboe 892d4e097daSDylan Yudaken if (ret > 0) 893f9ead18cSJens Axboe ret += sr->done_io; 894f9ead18cSJens Axboe else if (sr->done_io) 895f9ead18cSJens Axboe ret = sr->done_io; 896d4e097daSDylan Yudaken else 897d4e097daSDylan Yudaken io_kbuf_recycle(req, issue_flags); 898d4e097daSDylan Yudaken 899f9ead18cSJens Axboe cflags = io_put_kbuf(req, issue_flags); 900f9ead18cSJens Axboe if (msg.msg_inq) 901f9ead18cSJens Axboe cflags |= IORING_CQE_F_SOCK_NONEMPTY; 902b3fdea6eSDylan Yudaken 9039bb66906SDylan Yudaken if (!io_recv_finish(req, &ret, cflags, ret <= 0)) 904b3fdea6eSDylan Yudaken goto retry_multishot; 905b3fdea6eSDylan Yudaken 906b3fdea6eSDylan Yudaken return ret; 907f9ead18cSJens Axboe } 908f9ead18cSJens Axboe 909b0e9b551SPavel Begunkov void io_send_zc_cleanup(struct io_kiocb *req) 910b48c312bSPavel Begunkov { 911ac9e5784SPavel Begunkov struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 912493108d9SPavel Begunkov struct io_async_msghdr *io; 913b48c312bSPavel Begunkov 914493108d9SPavel Begunkov if (req_has_async_data(req)) { 915493108d9SPavel Begunkov io = req->async_data; 9164c17a496SPavel Begunkov /* might be ->fast_iov if *msg_copy_hdr failed */ 9174c17a496SPavel Begunkov if (io->free_iov != io->fast_iov) 918493108d9SPavel Begunkov kfree(io->free_iov); 919493108d9SPavel Begunkov } 920a75155faSPavel Begunkov if (zc->notif) { 921b48c312bSPavel Begunkov io_notif_flush(zc->notif); 922b48c312bSPavel Begunkov zc->notif = NULL; 923b48c312bSPavel Begunkov } 924a75155faSPavel Begunkov } 925b48c312bSPavel Begunkov 926b0e9b551SPavel Begunkov int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 92706a5464bSPavel Begunkov { 928ac9e5784SPavel Begunkov struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 92910c7d33eSPavel Begunkov struct io_ring_ctx *ctx = req->ctx; 930b48c312bSPavel Begunkov struct io_kiocb *notif; 93106a5464bSPavel Begunkov 932493108d9SPavel Begunkov if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 933b48c312bSPavel Begunkov return -EINVAL; 934b48c312bSPavel Begunkov /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 935b48c312bSPavel Begunkov if (req->flags & REQ_F_CQE_SKIP) 93606a5464bSPavel Begunkov return -EINVAL; 93706a5464bSPavel Begunkov 93806a5464bSPavel Begunkov zc->flags = READ_ONCE(sqe->ioprio); 93963809137SPavel Begunkov if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | 94057f33224SPavel Begunkov IORING_RECVSEND_FIXED_BUF)) 94106a5464bSPavel Begunkov return -EINVAL; 942b48c312bSPavel Begunkov notif = zc->notif = io_alloc_notif(ctx); 943b48c312bSPavel Begunkov if (!notif) 944b48c312bSPavel Begunkov return -ENOMEM; 945b48c312bSPavel Begunkov notif->cqe.user_data = req->cqe.user_data; 946b48c312bSPavel Begunkov notif->cqe.res = 0; 947b48c312bSPavel Begunkov notif->cqe.flags = IORING_CQE_F_NOTIF; 948b48c312bSPavel Begunkov req->flags |= REQ_F_NEED_CLEANUP; 949e3366e02SPavel Begunkov if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 950e3366e02SPavel Begunkov unsigned idx = READ_ONCE(sqe->buf_index); 951e3366e02SPavel Begunkov 952e3366e02SPavel Begunkov if (unlikely(idx >= ctx->nr_user_bufs)) 953e3366e02SPavel Begunkov return -EFAULT; 954e3366e02SPavel Begunkov idx = array_index_nospec(idx, ctx->nr_user_bufs); 955e3366e02SPavel Begunkov req->imu = READ_ONCE(ctx->user_bufs[idx]); 956e3366e02SPavel Begunkov io_req_set_rsrc_node(notif, ctx, 0); 957e3366e02SPavel Begunkov } 95806a5464bSPavel Begunkov 959493108d9SPavel Begunkov if (req->opcode == IORING_OP_SEND_ZC) { 960493108d9SPavel Begunkov if (READ_ONCE(sqe->__pad3[0])) 961493108d9SPavel Begunkov return -EINVAL; 962493108d9SPavel Begunkov zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 963493108d9SPavel Begunkov zc->addr_len = READ_ONCE(sqe->addr_len); 964493108d9SPavel Begunkov } else { 965493108d9SPavel Begunkov if (unlikely(sqe->addr2 || sqe->file_index)) 966493108d9SPavel Begunkov return -EINVAL; 967493108d9SPavel Begunkov if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 968493108d9SPavel Begunkov return -EINVAL; 969493108d9SPavel Begunkov } 970493108d9SPavel Begunkov 97106a5464bSPavel Begunkov zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 97206a5464bSPavel Begunkov zc->len = READ_ONCE(sqe->len); 97306a5464bSPavel Begunkov zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 97406a5464bSPavel Begunkov if (zc->msg_flags & MSG_DONTWAIT) 97506a5464bSPavel Begunkov req->flags |= REQ_F_NOWAIT; 976092aeedbSPavel Begunkov 9774a933e62SPavel Begunkov zc->done_io = 0; 978092aeedbSPavel Begunkov 97906a5464bSPavel Begunkov #ifdef CONFIG_COMPAT 98006a5464bSPavel Begunkov if (req->ctx->compat) 98106a5464bSPavel Begunkov zc->msg_flags |= MSG_CMSG_COMPAT; 98206a5464bSPavel Begunkov #endif 98306a5464bSPavel Begunkov return 0; 98406a5464bSPavel Begunkov } 98506a5464bSPavel Begunkov 986cd9021e8SPavel Begunkov static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb, 987cd9021e8SPavel Begunkov struct iov_iter *from, size_t length) 988cd9021e8SPavel Begunkov { 989cd9021e8SPavel Begunkov skb_zcopy_downgrade_managed(skb); 990cd9021e8SPavel Begunkov return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 991cd9021e8SPavel Begunkov } 992cd9021e8SPavel Begunkov 9933ff1a0d3SPavel Begunkov static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, 9943ff1a0d3SPavel Begunkov struct iov_iter *from, size_t length) 9953ff1a0d3SPavel Begunkov { 9963ff1a0d3SPavel Begunkov struct skb_shared_info *shinfo = skb_shinfo(skb); 9973ff1a0d3SPavel Begunkov int frag = shinfo->nr_frags; 9983ff1a0d3SPavel Begunkov int ret = 0; 9993ff1a0d3SPavel Begunkov struct bvec_iter bi; 10003ff1a0d3SPavel Begunkov ssize_t copied = 0; 10013ff1a0d3SPavel Begunkov unsigned long truesize = 0; 10023ff1a0d3SPavel Begunkov 1003cd9021e8SPavel Begunkov if (!frag) 10043ff1a0d3SPavel Begunkov shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1005cd9021e8SPavel Begunkov else if (unlikely(!skb_zcopy_managed(skb))) 10063ff1a0d3SPavel Begunkov return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 10073ff1a0d3SPavel Begunkov 10083ff1a0d3SPavel Begunkov bi.bi_size = min(from->count, length); 10093ff1a0d3SPavel Begunkov bi.bi_bvec_done = from->iov_offset; 10103ff1a0d3SPavel Begunkov bi.bi_idx = 0; 10113ff1a0d3SPavel Begunkov 10123ff1a0d3SPavel Begunkov while (bi.bi_size && frag < MAX_SKB_FRAGS) { 10133ff1a0d3SPavel Begunkov struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 10143ff1a0d3SPavel Begunkov 10153ff1a0d3SPavel Begunkov copied += v.bv_len; 10163ff1a0d3SPavel Begunkov truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 10173ff1a0d3SPavel Begunkov __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 10183ff1a0d3SPavel Begunkov v.bv_offset, v.bv_len); 10193ff1a0d3SPavel Begunkov bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 10203ff1a0d3SPavel Begunkov } 10213ff1a0d3SPavel Begunkov if (bi.bi_size) 10223ff1a0d3SPavel Begunkov ret = -EMSGSIZE; 10233ff1a0d3SPavel Begunkov 10243ff1a0d3SPavel Begunkov shinfo->nr_frags = frag; 10253ff1a0d3SPavel Begunkov from->bvec += bi.bi_idx; 10263ff1a0d3SPavel Begunkov from->nr_segs -= bi.bi_idx; 1027dfb58b17SPavel Begunkov from->count -= copied; 10283ff1a0d3SPavel Begunkov from->iov_offset = bi.bi_bvec_done; 10293ff1a0d3SPavel Begunkov 10303ff1a0d3SPavel Begunkov skb->data_len += copied; 10313ff1a0d3SPavel Begunkov skb->len += copied; 10323ff1a0d3SPavel Begunkov skb->truesize += truesize; 10333ff1a0d3SPavel Begunkov 10343ff1a0d3SPavel Begunkov if (sk && sk->sk_type == SOCK_STREAM) { 10353ff1a0d3SPavel Begunkov sk_wmem_queued_add(sk, truesize); 10363ff1a0d3SPavel Begunkov if (!skb_zcopy_pure(skb)) 10373ff1a0d3SPavel Begunkov sk_mem_charge(sk, truesize); 10383ff1a0d3SPavel Begunkov } else { 10393ff1a0d3SPavel Begunkov refcount_add(truesize, &skb->sk->sk_wmem_alloc); 10403ff1a0d3SPavel Begunkov } 10413ff1a0d3SPavel Begunkov return ret; 10423ff1a0d3SPavel Begunkov } 10433ff1a0d3SPavel Begunkov 1044b0e9b551SPavel Begunkov int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 104506a5464bSPavel Begunkov { 10466ae61b7aSPavel Begunkov struct sockaddr_storage __address; 1047ac9e5784SPavel Begunkov struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 104806a5464bSPavel Begunkov struct msghdr msg; 104906a5464bSPavel Begunkov struct iovec iov; 105006a5464bSPavel Begunkov struct socket *sock; 10516ae91ac9SPavel Begunkov unsigned msg_flags; 105206a5464bSPavel Begunkov int ret, min_ret = 0; 105306a5464bSPavel Begunkov 105406a5464bSPavel Begunkov sock = sock_from_file(req->file); 105506a5464bSPavel Begunkov if (unlikely(!sock)) 105606a5464bSPavel Begunkov return -ENOTSOCK; 105706a5464bSPavel Begunkov 105806a5464bSPavel Begunkov msg.msg_name = NULL; 105906a5464bSPavel Begunkov msg.msg_control = NULL; 106006a5464bSPavel Begunkov msg.msg_controllen = 0; 106106a5464bSPavel Begunkov msg.msg_namelen = 0; 106206a5464bSPavel Begunkov 106386dc8f23SPavel Begunkov if (zc->addr) { 1064581711c4SPavel Begunkov if (req_has_async_data(req)) { 1065581711c4SPavel Begunkov struct io_async_msghdr *io = req->async_data; 1066581711c4SPavel Begunkov 10676ae61b7aSPavel Begunkov msg.msg_name = &io->addr; 1068581711c4SPavel Begunkov } else { 1069581711c4SPavel Begunkov ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address); 107086dc8f23SPavel Begunkov if (unlikely(ret < 0)) 107186dc8f23SPavel Begunkov return ret; 1072581711c4SPavel Begunkov msg.msg_name = (struct sockaddr *)&__address; 1073581711c4SPavel Begunkov } 107486dc8f23SPavel Begunkov msg.msg_namelen = zc->addr_len; 107586dc8f23SPavel Begunkov } 107686dc8f23SPavel Begunkov 10773c840053SPavel Begunkov if (!(req->flags & REQ_F_POLLED) && 10783c840053SPavel Begunkov (zc->flags & IORING_RECVSEND_POLL_FIRST)) 10796ae61b7aSPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 10803c840053SPavel Begunkov 108110c7d33eSPavel Begunkov if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 108210c7d33eSPavel Begunkov ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, 108310c7d33eSPavel Begunkov (u64)(uintptr_t)zc->buf, zc->len); 108410c7d33eSPavel Begunkov if (unlikely(ret)) 108510c7d33eSPavel Begunkov return ret; 1086cd9021e8SPavel Begunkov msg.sg_from_iter = io_sg_from_iter; 108710c7d33eSPavel Begunkov } else { 108810c7d33eSPavel Begunkov ret = import_single_range(WRITE, zc->buf, zc->len, &iov, 108910c7d33eSPavel Begunkov &msg.msg_iter); 109006a5464bSPavel Begunkov if (unlikely(ret)) 109106a5464bSPavel Begunkov return ret; 1092b48c312bSPavel Begunkov ret = io_notif_account_mem(zc->notif, zc->len); 10932e32ba56SPavel Begunkov if (unlikely(ret)) 10942e32ba56SPavel Begunkov return ret; 1095cd9021e8SPavel Begunkov msg.sg_from_iter = io_sg_from_iter_iovec; 109610c7d33eSPavel Begunkov } 109706a5464bSPavel Begunkov 109806a5464bSPavel Begunkov msg_flags = zc->msg_flags | MSG_ZEROCOPY; 109906a5464bSPavel Begunkov if (issue_flags & IO_URING_F_NONBLOCK) 110006a5464bSPavel Begunkov msg_flags |= MSG_DONTWAIT; 110106a5464bSPavel Begunkov if (msg_flags & MSG_WAITALL) 110206a5464bSPavel Begunkov min_ret = iov_iter_count(&msg.msg_iter); 110306a5464bSPavel Begunkov 110406a5464bSPavel Begunkov msg.msg_flags = msg_flags; 1105b48c312bSPavel Begunkov msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 110606a5464bSPavel Begunkov ret = sock_sendmsg(sock, &msg); 110706a5464bSPavel Begunkov 110806a5464bSPavel Begunkov if (unlikely(ret < min_ret)) { 110906a5464bSPavel Begunkov if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 11106ae61b7aSPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 1111581711c4SPavel Begunkov 11124a933e62SPavel Begunkov if (ret > 0 && io_net_retry(sock, msg.msg_flags)) { 11134a933e62SPavel Begunkov zc->len -= ret; 11144a933e62SPavel Begunkov zc->buf += ret; 11154a933e62SPavel Begunkov zc->done_io += ret; 11164a933e62SPavel Begunkov req->flags |= REQ_F_PARTIAL_IO; 11176ae61b7aSPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 11184a933e62SPavel Begunkov } 11194a933e62SPavel Begunkov if (ret == -ERESTARTSYS) 11204a933e62SPavel Begunkov ret = -EINTR; 11215a848b7cSPavel Begunkov req_set_fail(req); 112206a5464bSPavel Begunkov } 112306a5464bSPavel Begunkov 11244a933e62SPavel Begunkov if (ret >= 0) 11254a933e62SPavel Begunkov ret += zc->done_io; 11264a933e62SPavel Begunkov else if (zc->done_io) 11274a933e62SPavel Begunkov ret = zc->done_io; 1128b48c312bSPavel Begunkov 1129b48c312bSPavel Begunkov io_notif_flush(zc->notif); 1130b48c312bSPavel Begunkov req->flags &= ~REQ_F_NEED_CLEANUP; 11316ae91ac9SPavel Begunkov io_req_set_res(req, ret, IORING_CQE_F_MORE); 113206a5464bSPavel Begunkov return IOU_OK; 113306a5464bSPavel Begunkov } 113406a5464bSPavel Begunkov 1135493108d9SPavel Begunkov int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1136493108d9SPavel Begunkov { 1137493108d9SPavel Begunkov struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1138493108d9SPavel Begunkov struct io_async_msghdr iomsg, *kmsg; 1139493108d9SPavel Begunkov struct socket *sock; 11406ae91ac9SPavel Begunkov unsigned flags; 1141493108d9SPavel Begunkov int ret, min_ret = 0; 1142493108d9SPavel Begunkov 1143493108d9SPavel Begunkov sock = sock_from_file(req->file); 1144493108d9SPavel Begunkov if (unlikely(!sock)) 1145493108d9SPavel Begunkov return -ENOTSOCK; 1146493108d9SPavel Begunkov 1147493108d9SPavel Begunkov if (req_has_async_data(req)) { 1148493108d9SPavel Begunkov kmsg = req->async_data; 1149493108d9SPavel Begunkov } else { 1150493108d9SPavel Begunkov ret = io_sendmsg_copy_hdr(req, &iomsg); 1151493108d9SPavel Begunkov if (ret) 1152493108d9SPavel Begunkov return ret; 1153493108d9SPavel Begunkov kmsg = &iomsg; 1154493108d9SPavel Begunkov } 1155493108d9SPavel Begunkov 1156493108d9SPavel Begunkov if (!(req->flags & REQ_F_POLLED) && 1157493108d9SPavel Begunkov (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1158493108d9SPavel Begunkov return io_setup_async_msg(req, kmsg, issue_flags); 1159493108d9SPavel Begunkov 1160493108d9SPavel Begunkov flags = sr->msg_flags | MSG_ZEROCOPY; 1161493108d9SPavel Begunkov if (issue_flags & IO_URING_F_NONBLOCK) 1162493108d9SPavel Begunkov flags |= MSG_DONTWAIT; 1163493108d9SPavel Begunkov if (flags & MSG_WAITALL) 1164493108d9SPavel Begunkov min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1165493108d9SPavel Begunkov 1166493108d9SPavel Begunkov kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1167493108d9SPavel Begunkov kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1168493108d9SPavel Begunkov ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1169493108d9SPavel Begunkov 1170493108d9SPavel Begunkov if (unlikely(ret < min_ret)) { 1171493108d9SPavel Begunkov if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1172493108d9SPavel Begunkov return io_setup_async_msg(req, kmsg, issue_flags); 1173493108d9SPavel Begunkov 1174493108d9SPavel Begunkov if (ret > 0 && io_net_retry(sock, flags)) { 1175493108d9SPavel Begunkov sr->done_io += ret; 1176493108d9SPavel Begunkov req->flags |= REQ_F_PARTIAL_IO; 1177493108d9SPavel Begunkov return io_setup_async_msg(req, kmsg, issue_flags); 1178493108d9SPavel Begunkov } 1179493108d9SPavel Begunkov if (ret == -ERESTARTSYS) 1180493108d9SPavel Begunkov ret = -EINTR; 1181493108d9SPavel Begunkov req_set_fail(req); 1182493108d9SPavel Begunkov } 1183493108d9SPavel Begunkov /* fast path, check for non-NULL to avoid function call */ 1184493108d9SPavel Begunkov if (kmsg->free_iov) 1185493108d9SPavel Begunkov kfree(kmsg->free_iov); 1186493108d9SPavel Begunkov 1187493108d9SPavel Begunkov io_netmsg_recycle(req, issue_flags); 1188493108d9SPavel Begunkov if (ret >= 0) 1189493108d9SPavel Begunkov ret += sr->done_io; 1190493108d9SPavel Begunkov else if (sr->done_io) 1191493108d9SPavel Begunkov ret = sr->done_io; 1192493108d9SPavel Begunkov 1193493108d9SPavel Begunkov io_notif_flush(sr->notif); 1194493108d9SPavel Begunkov req->flags &= ~REQ_F_NEED_CLEANUP; 11956ae91ac9SPavel Begunkov io_req_set_res(req, ret, IORING_CQE_F_MORE); 1196493108d9SPavel Begunkov return IOU_OK; 1197493108d9SPavel Begunkov } 1198493108d9SPavel Begunkov 11997e6b638eSPavel Begunkov void io_sendrecv_fail(struct io_kiocb *req) 12007e6b638eSPavel Begunkov { 12017e6b638eSPavel Begunkov struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 12027e6b638eSPavel Begunkov 12037e6b638eSPavel Begunkov if (req->flags & REQ_F_PARTIAL_IO) 12046ae91ac9SPavel Begunkov req->cqe.res = sr->done_io; 12056ae91ac9SPavel Begunkov 1206c4c0009eSPavel Begunkov if ((req->flags & REQ_F_NEED_CLEANUP) && 12076ae91ac9SPavel Begunkov (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 12086ae91ac9SPavel Begunkov req->cqe.flags |= IORING_CQE_F_MORE; 12095693bcceSPavel Begunkov } 12105693bcceSPavel Begunkov 1211f9ead18cSJens Axboe int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1212f9ead18cSJens Axboe { 1213f2ccb5aeSStefan Metzmacher struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1214f9ead18cSJens Axboe unsigned flags; 1215f9ead18cSJens Axboe 1216f9ead18cSJens Axboe if (sqe->len || sqe->buf_index) 1217f9ead18cSJens Axboe return -EINVAL; 1218f9ead18cSJens Axboe 1219f9ead18cSJens Axboe accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1220f9ead18cSJens Axboe accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1221f9ead18cSJens Axboe accept->flags = READ_ONCE(sqe->accept_flags); 1222f9ead18cSJens Axboe accept->nofile = rlimit(RLIMIT_NOFILE); 1223f9ead18cSJens Axboe flags = READ_ONCE(sqe->ioprio); 1224f9ead18cSJens Axboe if (flags & ~IORING_ACCEPT_MULTISHOT) 1225f9ead18cSJens Axboe return -EINVAL; 1226f9ead18cSJens Axboe 1227f9ead18cSJens Axboe accept->file_slot = READ_ONCE(sqe->file_index); 1228f9ead18cSJens Axboe if (accept->file_slot) { 1229f9ead18cSJens Axboe if (accept->flags & SOCK_CLOEXEC) 1230f9ead18cSJens Axboe return -EINVAL; 1231f9ead18cSJens Axboe if (flags & IORING_ACCEPT_MULTISHOT && 1232f9ead18cSJens Axboe accept->file_slot != IORING_FILE_INDEX_ALLOC) 1233f9ead18cSJens Axboe return -EINVAL; 1234f9ead18cSJens Axboe } 1235f9ead18cSJens Axboe if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1236f9ead18cSJens Axboe return -EINVAL; 1237f9ead18cSJens Axboe if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1238f9ead18cSJens Axboe accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1239f9ead18cSJens Axboe if (flags & IORING_ACCEPT_MULTISHOT) 1240f9ead18cSJens Axboe req->flags |= REQ_F_APOLL_MULTISHOT; 1241f9ead18cSJens Axboe return 0; 1242f9ead18cSJens Axboe } 1243f9ead18cSJens Axboe 1244f9ead18cSJens Axboe int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1245f9ead18cSJens Axboe { 1246f9ead18cSJens Axboe struct io_ring_ctx *ctx = req->ctx; 1247f2ccb5aeSStefan Metzmacher struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1248f9ead18cSJens Axboe bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1249f9ead18cSJens Axboe unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; 1250f9ead18cSJens Axboe bool fixed = !!accept->file_slot; 1251f9ead18cSJens Axboe struct file *file; 1252f9ead18cSJens Axboe int ret, fd; 1253f9ead18cSJens Axboe 1254f9ead18cSJens Axboe retry: 1255f9ead18cSJens Axboe if (!fixed) { 1256f9ead18cSJens Axboe fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1257f9ead18cSJens Axboe if (unlikely(fd < 0)) 1258f9ead18cSJens Axboe return fd; 1259f9ead18cSJens Axboe } 1260f9ead18cSJens Axboe file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, 1261f9ead18cSJens Axboe accept->flags); 1262f9ead18cSJens Axboe if (IS_ERR(file)) { 1263f9ead18cSJens Axboe if (!fixed) 1264f9ead18cSJens Axboe put_unused_fd(fd); 1265f9ead18cSJens Axboe ret = PTR_ERR(file); 1266f9ead18cSJens Axboe if (ret == -EAGAIN && force_nonblock) { 1267f9ead18cSJens Axboe /* 1268f9ead18cSJens Axboe * if it's multishot and polled, we don't need to 1269f9ead18cSJens Axboe * return EAGAIN to arm the poll infra since it 1270f9ead18cSJens Axboe * has already been done 1271f9ead18cSJens Axboe */ 1272f9ead18cSJens Axboe if ((req->flags & IO_APOLL_MULTI_POLLED) == 1273f9ead18cSJens Axboe IO_APOLL_MULTI_POLLED) 1274f9ead18cSJens Axboe ret = IOU_ISSUE_SKIP_COMPLETE; 1275f9ead18cSJens Axboe return ret; 1276f9ead18cSJens Axboe } 1277f9ead18cSJens Axboe if (ret == -ERESTARTSYS) 1278f9ead18cSJens Axboe ret = -EINTR; 1279f9ead18cSJens Axboe req_set_fail(req); 1280f9ead18cSJens Axboe } else if (!fixed) { 1281f9ead18cSJens Axboe fd_install(fd, file); 1282f9ead18cSJens Axboe ret = fd; 1283f9ead18cSJens Axboe } else { 1284f9ead18cSJens Axboe ret = io_fixed_fd_install(req, issue_flags, file, 1285f9ead18cSJens Axboe accept->file_slot); 1286f9ead18cSJens Axboe } 1287f9ead18cSJens Axboe 1288f9ead18cSJens Axboe if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1289f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 1290f9ead18cSJens Axboe return IOU_OK; 1291f9ead18cSJens Axboe } 1292f9ead18cSJens Axboe 1293cbd25748SDylan Yudaken if (ret >= 0 && 1294cbd25748SDylan Yudaken io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false)) 1295d245bca6SPavel Begunkov goto retry; 1296cbd25748SDylan Yudaken 1297cbd25748SDylan Yudaken io_req_set_res(req, ret, 0); 1298cbd25748SDylan Yudaken if (req->flags & REQ_F_POLLED) 1299cbd25748SDylan Yudaken return IOU_STOP_MULTISHOT; 1300cbd25748SDylan Yudaken return IOU_OK; 1301f9ead18cSJens Axboe } 1302f9ead18cSJens Axboe 1303f9ead18cSJens Axboe int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1304f9ead18cSJens Axboe { 1305f2ccb5aeSStefan Metzmacher struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1306f9ead18cSJens Axboe 1307f9ead18cSJens Axboe if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1308f9ead18cSJens Axboe return -EINVAL; 1309f9ead18cSJens Axboe 1310f9ead18cSJens Axboe sock->domain = READ_ONCE(sqe->fd); 1311f9ead18cSJens Axboe sock->type = READ_ONCE(sqe->off); 1312f9ead18cSJens Axboe sock->protocol = READ_ONCE(sqe->len); 1313f9ead18cSJens Axboe sock->file_slot = READ_ONCE(sqe->file_index); 1314f9ead18cSJens Axboe sock->nofile = rlimit(RLIMIT_NOFILE); 1315f9ead18cSJens Axboe 1316f9ead18cSJens Axboe sock->flags = sock->type & ~SOCK_TYPE_MASK; 1317f9ead18cSJens Axboe if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1318f9ead18cSJens Axboe return -EINVAL; 1319f9ead18cSJens Axboe if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1320f9ead18cSJens Axboe return -EINVAL; 1321f9ead18cSJens Axboe return 0; 1322f9ead18cSJens Axboe } 1323f9ead18cSJens Axboe 1324f9ead18cSJens Axboe int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1325f9ead18cSJens Axboe { 1326f2ccb5aeSStefan Metzmacher struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1327f9ead18cSJens Axboe bool fixed = !!sock->file_slot; 1328f9ead18cSJens Axboe struct file *file; 1329f9ead18cSJens Axboe int ret, fd; 1330f9ead18cSJens Axboe 1331f9ead18cSJens Axboe if (!fixed) { 1332f9ead18cSJens Axboe fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1333f9ead18cSJens Axboe if (unlikely(fd < 0)) 1334f9ead18cSJens Axboe return fd; 1335f9ead18cSJens Axboe } 1336f9ead18cSJens Axboe file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1337f9ead18cSJens Axboe if (IS_ERR(file)) { 1338f9ead18cSJens Axboe if (!fixed) 1339f9ead18cSJens Axboe put_unused_fd(fd); 1340f9ead18cSJens Axboe ret = PTR_ERR(file); 1341f9ead18cSJens Axboe if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1342f9ead18cSJens Axboe return -EAGAIN; 1343f9ead18cSJens Axboe if (ret == -ERESTARTSYS) 1344f9ead18cSJens Axboe ret = -EINTR; 1345f9ead18cSJens Axboe req_set_fail(req); 1346f9ead18cSJens Axboe } else if (!fixed) { 1347f9ead18cSJens Axboe fd_install(fd, file); 1348f9ead18cSJens Axboe ret = fd; 1349f9ead18cSJens Axboe } else { 1350f9ead18cSJens Axboe ret = io_fixed_fd_install(req, issue_flags, file, 1351f9ead18cSJens Axboe sock->file_slot); 1352f9ead18cSJens Axboe } 1353f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 1354f9ead18cSJens Axboe return IOU_OK; 1355f9ead18cSJens Axboe } 1356f9ead18cSJens Axboe 1357f9ead18cSJens Axboe int io_connect_prep_async(struct io_kiocb *req) 1358f9ead18cSJens Axboe { 1359f9ead18cSJens Axboe struct io_async_connect *io = req->async_data; 1360f2ccb5aeSStefan Metzmacher struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1361f9ead18cSJens Axboe 1362f9ead18cSJens Axboe return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); 1363f9ead18cSJens Axboe } 1364f9ead18cSJens Axboe 1365f9ead18cSJens Axboe int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1366f9ead18cSJens Axboe { 1367f2ccb5aeSStefan Metzmacher struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1368f9ead18cSJens Axboe 1369f9ead18cSJens Axboe if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1370f9ead18cSJens Axboe return -EINVAL; 1371f9ead18cSJens Axboe 1372f9ead18cSJens Axboe conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1373f9ead18cSJens Axboe conn->addr_len = READ_ONCE(sqe->addr2); 1374f9ead18cSJens Axboe return 0; 1375f9ead18cSJens Axboe } 1376f9ead18cSJens Axboe 1377f9ead18cSJens Axboe int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1378f9ead18cSJens Axboe { 1379f2ccb5aeSStefan Metzmacher struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1380f9ead18cSJens Axboe struct io_async_connect __io, *io; 1381f9ead18cSJens Axboe unsigned file_flags; 1382f9ead18cSJens Axboe int ret; 1383f9ead18cSJens Axboe bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1384f9ead18cSJens Axboe 1385f9ead18cSJens Axboe if (req_has_async_data(req)) { 1386f9ead18cSJens Axboe io = req->async_data; 1387f9ead18cSJens Axboe } else { 1388f9ead18cSJens Axboe ret = move_addr_to_kernel(connect->addr, 1389f9ead18cSJens Axboe connect->addr_len, 1390f9ead18cSJens Axboe &__io.address); 1391f9ead18cSJens Axboe if (ret) 1392f9ead18cSJens Axboe goto out; 1393f9ead18cSJens Axboe io = &__io; 1394f9ead18cSJens Axboe } 1395f9ead18cSJens Axboe 1396f9ead18cSJens Axboe file_flags = force_nonblock ? O_NONBLOCK : 0; 1397f9ead18cSJens Axboe 1398f9ead18cSJens Axboe ret = __sys_connect_file(req->file, &io->address, 1399f9ead18cSJens Axboe connect->addr_len, file_flags); 1400f9ead18cSJens Axboe if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { 1401f9ead18cSJens Axboe if (req_has_async_data(req)) 1402f9ead18cSJens Axboe return -EAGAIN; 1403f9ead18cSJens Axboe if (io_alloc_async_data(req)) { 1404f9ead18cSJens Axboe ret = -ENOMEM; 1405f9ead18cSJens Axboe goto out; 1406f9ead18cSJens Axboe } 1407f9ead18cSJens Axboe memcpy(req->async_data, &__io, sizeof(__io)); 1408f9ead18cSJens Axboe return -EAGAIN; 1409f9ead18cSJens Axboe } 1410f9ead18cSJens Axboe if (ret == -ERESTARTSYS) 1411f9ead18cSJens Axboe ret = -EINTR; 1412f9ead18cSJens Axboe out: 1413f9ead18cSJens Axboe if (ret < 0) 1414f9ead18cSJens Axboe req_set_fail(req); 1415f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 1416f9ead18cSJens Axboe return IOU_OK; 1417f9ead18cSJens Axboe } 141843e0bbbdSJens Axboe 141943e0bbbdSJens Axboe void io_netmsg_cache_free(struct io_cache_entry *entry) 142043e0bbbdSJens Axboe { 142143e0bbbdSJens Axboe kfree(container_of(entry, struct io_async_msghdr, cache)); 142243e0bbbdSJens Axboe } 1423f9ead18cSJens Axboe #endif 1424