1f9ead18cSJens Axboe // SPDX-License-Identifier: GPL-2.0 2f9ead18cSJens Axboe #include <linux/kernel.h> 3f9ead18cSJens Axboe #include <linux/errno.h> 4f9ead18cSJens Axboe #include <linux/file.h> 5f9ead18cSJens Axboe #include <linux/slab.h> 6f9ead18cSJens Axboe #include <linux/net.h> 7f9ead18cSJens Axboe #include <linux/compat.h> 8f9ead18cSJens Axboe #include <net/compat.h> 9f9ead18cSJens Axboe #include <linux/io_uring.h> 10f9ead18cSJens Axboe 11f9ead18cSJens Axboe #include <uapi/linux/io_uring.h> 12f9ead18cSJens Axboe 13f9ead18cSJens Axboe #include "io_uring.h" 143b77495aSJens Axboe #include "kbuf.h" 1543e0bbbdSJens Axboe #include "alloc_cache.h" 16f9ead18cSJens Axboe #include "net.h" 1706a5464bSPavel Begunkov #include "notif.h" 1810c7d33eSPavel Begunkov #include "rsrc.h" 19f9ead18cSJens Axboe 20f9ead18cSJens Axboe #if defined(CONFIG_NET) 21f9ead18cSJens Axboe struct io_shutdown { 22f9ead18cSJens Axboe struct file *file; 23f9ead18cSJens Axboe int how; 24f9ead18cSJens Axboe }; 25f9ead18cSJens Axboe 26f9ead18cSJens Axboe struct io_accept { 27f9ead18cSJens Axboe struct file *file; 28f9ead18cSJens Axboe struct sockaddr __user *addr; 29f9ead18cSJens Axboe int __user *addr_len; 30f9ead18cSJens Axboe int flags; 31f9ead18cSJens Axboe u32 file_slot; 32f9ead18cSJens Axboe unsigned long nofile; 33f9ead18cSJens Axboe }; 34f9ead18cSJens Axboe 35f9ead18cSJens Axboe struct io_socket { 36f9ead18cSJens Axboe struct file *file; 37f9ead18cSJens Axboe int domain; 38f9ead18cSJens Axboe int type; 39f9ead18cSJens Axboe int protocol; 40f9ead18cSJens Axboe int flags; 41f9ead18cSJens Axboe u32 file_slot; 42f9ead18cSJens Axboe unsigned long nofile; 43f9ead18cSJens Axboe }; 44f9ead18cSJens Axboe 45f9ead18cSJens Axboe struct io_connect { 46f9ead18cSJens Axboe struct file *file; 47f9ead18cSJens Axboe struct sockaddr __user *addr; 48f9ead18cSJens Axboe int addr_len; 49f9ead18cSJens Axboe }; 50f9ead18cSJens Axboe 51f9ead18cSJens Axboe struct io_sr_msg { 52f9ead18cSJens Axboe struct file *file; 53f9ead18cSJens Axboe union { 54f9ead18cSJens Axboe struct compat_msghdr __user *umsg_compat; 55f9ead18cSJens Axboe struct user_msghdr __user *umsg; 56f9ead18cSJens Axboe void __user *buf; 57f9ead18cSJens Axboe }; 580b048557SPavel Begunkov unsigned len; 590b048557SPavel Begunkov unsigned done_io; 60293402e5SPavel Begunkov unsigned msg_flags; 610b048557SPavel Begunkov u16 flags; 62516e82f0SPavel Begunkov /* initialised and used only by !msg send variants */ 630b048557SPavel Begunkov u16 addr_len; 64092aeedbSPavel Begunkov void __user *addr; 65516e82f0SPavel Begunkov /* used only for send zerocopy */ 66b48c312bSPavel Begunkov struct io_kiocb *notif; 6706a5464bSPavel Begunkov }; 6806a5464bSPavel Begunkov 69f9ead18cSJens Axboe #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) 70f9ead18cSJens Axboe 71f9ead18cSJens Axboe int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 72f9ead18cSJens Axboe { 73f2ccb5aeSStefan Metzmacher struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 74f9ead18cSJens Axboe 75f9ead18cSJens Axboe if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 76f9ead18cSJens Axboe sqe->buf_index || sqe->splice_fd_in)) 77f9ead18cSJens Axboe return -EINVAL; 78f9ead18cSJens Axboe 79f9ead18cSJens Axboe shutdown->how = READ_ONCE(sqe->len); 80f9ead18cSJens Axboe return 0; 81f9ead18cSJens Axboe } 82f9ead18cSJens Axboe 83f9ead18cSJens Axboe int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 84f9ead18cSJens Axboe { 85f2ccb5aeSStefan Metzmacher struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 86f9ead18cSJens Axboe struct socket *sock; 87f9ead18cSJens Axboe int ret; 88f9ead18cSJens Axboe 89f9ead18cSJens Axboe if (issue_flags & IO_URING_F_NONBLOCK) 90f9ead18cSJens Axboe return -EAGAIN; 91f9ead18cSJens Axboe 92f9ead18cSJens Axboe sock = sock_from_file(req->file); 93f9ead18cSJens Axboe if (unlikely(!sock)) 94f9ead18cSJens Axboe return -ENOTSOCK; 95f9ead18cSJens Axboe 96f9ead18cSJens Axboe ret = __sys_shutdown_sock(sock, shutdown->how); 97f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 98f9ead18cSJens Axboe return IOU_OK; 99f9ead18cSJens Axboe } 100f9ead18cSJens Axboe 101f9ead18cSJens Axboe static bool io_net_retry(struct socket *sock, int flags) 102f9ead18cSJens Axboe { 103f9ead18cSJens Axboe if (!(flags & MSG_WAITALL)) 104f9ead18cSJens Axboe return false; 105f9ead18cSJens Axboe return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 106f9ead18cSJens Axboe } 107f9ead18cSJens Axboe 10843e0bbbdSJens Axboe static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 10943e0bbbdSJens Axboe { 11043e0bbbdSJens Axboe struct io_async_msghdr *hdr = req->async_data; 11143e0bbbdSJens Axboe 11206360426SPavel Begunkov if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED) 11343e0bbbdSJens Axboe return; 11443e0bbbdSJens Axboe 11543e0bbbdSJens Axboe /* Let normal cleanup path reap it if we fail adding to the cache */ 11643e0bbbdSJens Axboe if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) { 11743e0bbbdSJens Axboe req->async_data = NULL; 11843e0bbbdSJens Axboe req->flags &= ~REQ_F_ASYNC_DATA; 11943e0bbbdSJens Axboe } 12043e0bbbdSJens Axboe } 12143e0bbbdSJens Axboe 122858c293eSPavel Begunkov static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req, 12343e0bbbdSJens Axboe unsigned int issue_flags) 12443e0bbbdSJens Axboe { 12543e0bbbdSJens Axboe struct io_ring_ctx *ctx = req->ctx; 12643e0bbbdSJens Axboe struct io_cache_entry *entry; 1274c17a496SPavel Begunkov struct io_async_msghdr *hdr; 12843e0bbbdSJens Axboe 12943e0bbbdSJens Axboe if (!(issue_flags & IO_URING_F_UNLOCKED) && 13043e0bbbdSJens Axboe (entry = io_alloc_cache_get(&ctx->netmsg_cache)) != NULL) { 13143e0bbbdSJens Axboe hdr = container_of(entry, struct io_async_msghdr, cache); 1324c17a496SPavel Begunkov hdr->free_iov = NULL; 13343e0bbbdSJens Axboe req->flags |= REQ_F_ASYNC_DATA; 13443e0bbbdSJens Axboe req->async_data = hdr; 13543e0bbbdSJens Axboe return hdr; 13643e0bbbdSJens Axboe } 13743e0bbbdSJens Axboe 1384c17a496SPavel Begunkov if (!io_alloc_async_data(req)) { 1394c17a496SPavel Begunkov hdr = req->async_data; 1404c17a496SPavel Begunkov hdr->free_iov = NULL; 1414c17a496SPavel Begunkov return hdr; 1424c17a496SPavel Begunkov } 14343e0bbbdSJens Axboe return NULL; 14443e0bbbdSJens Axboe } 14543e0bbbdSJens Axboe 146858c293eSPavel Begunkov static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req) 147858c293eSPavel Begunkov { 148858c293eSPavel Begunkov /* ->prep_async is always called from the submission context */ 149858c293eSPavel Begunkov return io_msg_alloc_async(req, 0); 150858c293eSPavel Begunkov } 151858c293eSPavel Begunkov 152f9ead18cSJens Axboe static int io_setup_async_msg(struct io_kiocb *req, 15343e0bbbdSJens Axboe struct io_async_msghdr *kmsg, 15443e0bbbdSJens Axboe unsigned int issue_flags) 155f9ead18cSJens Axboe { 1563f743e9bSPavel Begunkov struct io_async_msghdr *async_msg; 157f9ead18cSJens Axboe 1583f743e9bSPavel Begunkov if (req_has_async_data(req)) 159f9ead18cSJens Axboe return -EAGAIN; 160858c293eSPavel Begunkov async_msg = io_msg_alloc_async(req, issue_flags); 16143e0bbbdSJens Axboe if (!async_msg) { 162f9ead18cSJens Axboe kfree(kmsg->free_iov); 163f9ead18cSJens Axboe return -ENOMEM; 164f9ead18cSJens Axboe } 165f9ead18cSJens Axboe req->flags |= REQ_F_NEED_CLEANUP; 166f9ead18cSJens Axboe memcpy(async_msg, kmsg, sizeof(*kmsg)); 167f9ead18cSJens Axboe async_msg->msg.msg_name = &async_msg->addr; 168f9ead18cSJens Axboe /* if were using fast_iov, set it to the new one */ 169f9ead18cSJens Axboe if (!async_msg->free_iov) 170f9ead18cSJens Axboe async_msg->msg.msg_iter.iov = async_msg->fast_iov; 171f9ead18cSJens Axboe 172f9ead18cSJens Axboe return -EAGAIN; 173f9ead18cSJens Axboe } 174f9ead18cSJens Axboe 175f9ead18cSJens Axboe static int io_sendmsg_copy_hdr(struct io_kiocb *req, 176f9ead18cSJens Axboe struct io_async_msghdr *iomsg) 177f9ead18cSJens Axboe { 178f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 179f9ead18cSJens Axboe 180f9ead18cSJens Axboe iomsg->msg.msg_name = &iomsg->addr; 181f9ead18cSJens Axboe iomsg->free_iov = iomsg->fast_iov; 182f9ead18cSJens Axboe return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, 183f9ead18cSJens Axboe &iomsg->free_iov); 184f9ead18cSJens Axboe } 185f9ead18cSJens Axboe 186516e82f0SPavel Begunkov int io_send_prep_async(struct io_kiocb *req) 187581711c4SPavel Begunkov { 188ac9e5784SPavel Begunkov struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 189581711c4SPavel Begunkov struct io_async_msghdr *io; 190581711c4SPavel Begunkov int ret; 191581711c4SPavel Begunkov 192581711c4SPavel Begunkov if (!zc->addr || req_has_async_data(req)) 193581711c4SPavel Begunkov return 0; 1946bf8ad25SPavel Begunkov io = io_msg_alloc_async_prep(req); 1956bf8ad25SPavel Begunkov if (!io) 196581711c4SPavel Begunkov return -ENOMEM; 197581711c4SPavel Begunkov ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr); 198581711c4SPavel Begunkov return ret; 199581711c4SPavel Begunkov } 200581711c4SPavel Begunkov 201581711c4SPavel Begunkov static int io_setup_async_addr(struct io_kiocb *req, 2026ae61b7aSPavel Begunkov struct sockaddr_storage *addr_storage, 203581711c4SPavel Begunkov unsigned int issue_flags) 204581711c4SPavel Begunkov { 2056ae61b7aSPavel Begunkov struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 206581711c4SPavel Begunkov struct io_async_msghdr *io; 207581711c4SPavel Begunkov 2086ae61b7aSPavel Begunkov if (!sr->addr || req_has_async_data(req)) 209581711c4SPavel Begunkov return -EAGAIN; 2106bf8ad25SPavel Begunkov io = io_msg_alloc_async(req, issue_flags); 2116bf8ad25SPavel Begunkov if (!io) 212581711c4SPavel Begunkov return -ENOMEM; 2136ae61b7aSPavel Begunkov memcpy(&io->addr, addr_storage, sizeof(io->addr)); 214581711c4SPavel Begunkov return -EAGAIN; 215581711c4SPavel Begunkov } 216581711c4SPavel Begunkov 217f9ead18cSJens Axboe int io_sendmsg_prep_async(struct io_kiocb *req) 218f9ead18cSJens Axboe { 219f9ead18cSJens Axboe int ret; 220f9ead18cSJens Axboe 221858c293eSPavel Begunkov if (!io_msg_alloc_async_prep(req)) 222858c293eSPavel Begunkov return -ENOMEM; 223f9ead18cSJens Axboe ret = io_sendmsg_copy_hdr(req, req->async_data); 224f9ead18cSJens Axboe if (!ret) 225f9ead18cSJens Axboe req->flags |= REQ_F_NEED_CLEANUP; 226f9ead18cSJens Axboe return ret; 227f9ead18cSJens Axboe } 228f9ead18cSJens Axboe 229f9ead18cSJens Axboe void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 230f9ead18cSJens Axboe { 231f9ead18cSJens Axboe struct io_async_msghdr *io = req->async_data; 232f9ead18cSJens Axboe 233f9ead18cSJens Axboe kfree(io->free_iov); 234f9ead18cSJens Axboe } 235f9ead18cSJens Axboe 236f9ead18cSJens Axboe int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 237f9ead18cSJens Axboe { 238f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 239f9ead18cSJens Axboe 240516e82f0SPavel Begunkov if (req->opcode == IORING_OP_SEND) { 241516e82f0SPavel Begunkov if (READ_ONCE(sqe->__pad3[0])) 242f9ead18cSJens Axboe return -EINVAL; 243516e82f0SPavel Begunkov sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 244516e82f0SPavel Begunkov sr->addr_len = READ_ONCE(sqe->addr_len); 245516e82f0SPavel Begunkov } else if (sqe->addr2 || sqe->file_index) { 246516e82f0SPavel Begunkov return -EINVAL; 247516e82f0SPavel Begunkov } 248f9ead18cSJens Axboe 249f9ead18cSJens Axboe sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 250f9ead18cSJens Axboe sr->len = READ_ONCE(sqe->len); 251f9ead18cSJens Axboe sr->flags = READ_ONCE(sqe->ioprio); 252f9ead18cSJens Axboe if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) 253f9ead18cSJens Axboe return -EINVAL; 254f9ead18cSJens Axboe sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 255f9ead18cSJens Axboe if (sr->msg_flags & MSG_DONTWAIT) 256f9ead18cSJens Axboe req->flags |= REQ_F_NOWAIT; 257f9ead18cSJens Axboe 258f9ead18cSJens Axboe #ifdef CONFIG_COMPAT 259f9ead18cSJens Axboe if (req->ctx->compat) 260f9ead18cSJens Axboe sr->msg_flags |= MSG_CMSG_COMPAT; 261f9ead18cSJens Axboe #endif 262f9ead18cSJens Axboe sr->done_io = 0; 263f9ead18cSJens Axboe return 0; 264f9ead18cSJens Axboe } 265f9ead18cSJens Axboe 266f9ead18cSJens Axboe int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 267f9ead18cSJens Axboe { 268f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 269f9ead18cSJens Axboe struct io_async_msghdr iomsg, *kmsg; 270f9ead18cSJens Axboe struct socket *sock; 271f9ead18cSJens Axboe unsigned flags; 272f9ead18cSJens Axboe int min_ret = 0; 273f9ead18cSJens Axboe int ret; 274f9ead18cSJens Axboe 275f9ead18cSJens Axboe sock = sock_from_file(req->file); 276f9ead18cSJens Axboe if (unlikely(!sock)) 277f9ead18cSJens Axboe return -ENOTSOCK; 278f9ead18cSJens Axboe 279f9ead18cSJens Axboe if (req_has_async_data(req)) { 280f9ead18cSJens Axboe kmsg = req->async_data; 281f9ead18cSJens Axboe } else { 282f9ead18cSJens Axboe ret = io_sendmsg_copy_hdr(req, &iomsg); 283f9ead18cSJens Axboe if (ret) 284f9ead18cSJens Axboe return ret; 285f9ead18cSJens Axboe kmsg = &iomsg; 286f9ead18cSJens Axboe } 287f9ead18cSJens Axboe 288f9ead18cSJens Axboe if (!(req->flags & REQ_F_POLLED) && 289f9ead18cSJens Axboe (sr->flags & IORING_RECVSEND_POLL_FIRST)) 29043e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 291f9ead18cSJens Axboe 292f9ead18cSJens Axboe flags = sr->msg_flags; 293f9ead18cSJens Axboe if (issue_flags & IO_URING_F_NONBLOCK) 294f9ead18cSJens Axboe flags |= MSG_DONTWAIT; 295f9ead18cSJens Axboe if (flags & MSG_WAITALL) 296f9ead18cSJens Axboe min_ret = iov_iter_count(&kmsg->msg.msg_iter); 297f9ead18cSJens Axboe 298f9ead18cSJens Axboe ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 299f9ead18cSJens Axboe 300f9ead18cSJens Axboe if (ret < min_ret) { 301f9ead18cSJens Axboe if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 30243e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 303f9ead18cSJens Axboe if (ret > 0 && io_net_retry(sock, flags)) { 304f9ead18cSJens Axboe sr->done_io += ret; 305f9ead18cSJens Axboe req->flags |= REQ_F_PARTIAL_IO; 30643e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 307f9ead18cSJens Axboe } 30895eafc74SPavel Begunkov if (ret == -ERESTARTSYS) 30995eafc74SPavel Begunkov ret = -EINTR; 310f9ead18cSJens Axboe req_set_fail(req); 311f9ead18cSJens Axboe } 312f9ead18cSJens Axboe /* fast path, check for non-NULL to avoid function call */ 313f9ead18cSJens Axboe if (kmsg->free_iov) 314f9ead18cSJens Axboe kfree(kmsg->free_iov); 315f9ead18cSJens Axboe req->flags &= ~REQ_F_NEED_CLEANUP; 31643e0bbbdSJens Axboe io_netmsg_recycle(req, issue_flags); 317f9ead18cSJens Axboe if (ret >= 0) 318f9ead18cSJens Axboe ret += sr->done_io; 319f9ead18cSJens Axboe else if (sr->done_io) 320f9ead18cSJens Axboe ret = sr->done_io; 321f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 322f9ead18cSJens Axboe return IOU_OK; 323f9ead18cSJens Axboe } 324f9ead18cSJens Axboe 325f9ead18cSJens Axboe int io_send(struct io_kiocb *req, unsigned int issue_flags) 326f9ead18cSJens Axboe { 327516e82f0SPavel Begunkov struct sockaddr_storage __address; 328f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 329f9ead18cSJens Axboe struct msghdr msg; 330f9ead18cSJens Axboe struct iovec iov; 331f9ead18cSJens Axboe struct socket *sock; 332f9ead18cSJens Axboe unsigned flags; 333f9ead18cSJens Axboe int min_ret = 0; 334f9ead18cSJens Axboe int ret; 335f9ead18cSJens Axboe 336*04360d3eSPavel Begunkov msg.msg_name = NULL; 337*04360d3eSPavel Begunkov msg.msg_control = NULL; 338*04360d3eSPavel Begunkov msg.msg_controllen = 0; 339*04360d3eSPavel Begunkov msg.msg_namelen = 0; 340*04360d3eSPavel Begunkov msg.msg_ubuf = NULL; 341*04360d3eSPavel Begunkov 342516e82f0SPavel Begunkov if (sr->addr) { 343516e82f0SPavel Begunkov if (req_has_async_data(req)) { 344516e82f0SPavel Begunkov struct io_async_msghdr *io = req->async_data; 345516e82f0SPavel Begunkov 346516e82f0SPavel Begunkov msg.msg_name = &io->addr; 347516e82f0SPavel Begunkov } else { 348516e82f0SPavel Begunkov ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address); 349516e82f0SPavel Begunkov if (unlikely(ret < 0)) 350516e82f0SPavel Begunkov return ret; 351516e82f0SPavel Begunkov msg.msg_name = (struct sockaddr *)&__address; 352516e82f0SPavel Begunkov } 353516e82f0SPavel Begunkov msg.msg_namelen = sr->addr_len; 354516e82f0SPavel Begunkov } 355516e82f0SPavel Begunkov 356f9ead18cSJens Axboe if (!(req->flags & REQ_F_POLLED) && 357f9ead18cSJens Axboe (sr->flags & IORING_RECVSEND_POLL_FIRST)) 358516e82f0SPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 359f9ead18cSJens Axboe 360f9ead18cSJens Axboe sock = sock_from_file(req->file); 361f9ead18cSJens Axboe if (unlikely(!sock)) 362f9ead18cSJens Axboe return -ENOTSOCK; 363f9ead18cSJens Axboe 364f9ead18cSJens Axboe ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter); 365f9ead18cSJens Axboe if (unlikely(ret)) 366f9ead18cSJens Axboe return ret; 367f9ead18cSJens Axboe 368f9ead18cSJens Axboe flags = sr->msg_flags; 369f9ead18cSJens Axboe if (issue_flags & IO_URING_F_NONBLOCK) 370f9ead18cSJens Axboe flags |= MSG_DONTWAIT; 371f9ead18cSJens Axboe if (flags & MSG_WAITALL) 372f9ead18cSJens Axboe min_ret = iov_iter_count(&msg.msg_iter); 373f9ead18cSJens Axboe 374f9ead18cSJens Axboe msg.msg_flags = flags; 375f9ead18cSJens Axboe ret = sock_sendmsg(sock, &msg); 376f9ead18cSJens Axboe if (ret < min_ret) { 377f9ead18cSJens Axboe if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 378516e82f0SPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 379516e82f0SPavel Begunkov 380f9ead18cSJens Axboe if (ret > 0 && io_net_retry(sock, flags)) { 381f9ead18cSJens Axboe sr->len -= ret; 382f9ead18cSJens Axboe sr->buf += ret; 383f9ead18cSJens Axboe sr->done_io += ret; 384f9ead18cSJens Axboe req->flags |= REQ_F_PARTIAL_IO; 385516e82f0SPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 386f9ead18cSJens Axboe } 38795eafc74SPavel Begunkov if (ret == -ERESTARTSYS) 38895eafc74SPavel Begunkov ret = -EINTR; 389f9ead18cSJens Axboe req_set_fail(req); 390f9ead18cSJens Axboe } 391f9ead18cSJens Axboe if (ret >= 0) 392f9ead18cSJens Axboe ret += sr->done_io; 393f9ead18cSJens Axboe else if (sr->done_io) 394f9ead18cSJens Axboe ret = sr->done_io; 395f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 396f9ead18cSJens Axboe return IOU_OK; 397f9ead18cSJens Axboe } 398f9ead18cSJens Axboe 3999bb66906SDylan Yudaken static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) 4009bb66906SDylan Yudaken { 4019b0fc3c0SDylan Yudaken int hdr; 4029bb66906SDylan Yudaken 4039b0fc3c0SDylan Yudaken if (iomsg->namelen < 0) 4049bb66906SDylan Yudaken return true; 4059b0fc3c0SDylan Yudaken if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), 4069b0fc3c0SDylan Yudaken iomsg->namelen, &hdr)) 4079bb66906SDylan Yudaken return true; 4089b0fc3c0SDylan Yudaken if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) 4099bb66906SDylan Yudaken return true; 4109bb66906SDylan Yudaken 4119bb66906SDylan Yudaken return false; 4129bb66906SDylan Yudaken } 4139bb66906SDylan Yudaken 414f9ead18cSJens Axboe static int __io_recvmsg_copy_hdr(struct io_kiocb *req, 415f9ead18cSJens Axboe struct io_async_msghdr *iomsg) 416f9ead18cSJens Axboe { 417f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 4187fa875b8SDylan Yudaken struct user_msghdr msg; 419f9ead18cSJens Axboe int ret; 420f9ead18cSJens Axboe 4217fa875b8SDylan Yudaken if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) 4227fa875b8SDylan Yudaken return -EFAULT; 4237fa875b8SDylan Yudaken 4247fa875b8SDylan Yudaken ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 425f9ead18cSJens Axboe if (ret) 426f9ead18cSJens Axboe return ret; 427f9ead18cSJens Axboe 428f9ead18cSJens Axboe if (req->flags & REQ_F_BUFFER_SELECT) { 4297fa875b8SDylan Yudaken if (msg.msg_iovlen == 0) { 4305702196eSDylan Yudaken sr->len = iomsg->fast_iov[0].iov_len = 0; 4315702196eSDylan Yudaken iomsg->fast_iov[0].iov_base = NULL; 4325702196eSDylan Yudaken iomsg->free_iov = NULL; 4337fa875b8SDylan Yudaken } else if (msg.msg_iovlen > 1) { 434f9ead18cSJens Axboe return -EINVAL; 4355702196eSDylan Yudaken } else { 4367fa875b8SDylan Yudaken if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov))) 437f9ead18cSJens Axboe return -EFAULT; 438f9ead18cSJens Axboe sr->len = iomsg->fast_iov[0].iov_len; 439f9ead18cSJens Axboe iomsg->free_iov = NULL; 4405702196eSDylan Yudaken } 4419bb66906SDylan Yudaken 4429bb66906SDylan Yudaken if (req->flags & REQ_F_APOLL_MULTISHOT) { 4439bb66906SDylan Yudaken iomsg->namelen = msg.msg_namelen; 4449bb66906SDylan Yudaken iomsg->controllen = msg.msg_controllen; 4459bb66906SDylan Yudaken if (io_recvmsg_multishot_overflow(iomsg)) 4469bb66906SDylan Yudaken return -EOVERFLOW; 4479bb66906SDylan Yudaken } 448f9ead18cSJens Axboe } else { 449f9ead18cSJens Axboe iomsg->free_iov = iomsg->fast_iov; 4507fa875b8SDylan Yudaken ret = __import_iovec(READ, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, 451f9ead18cSJens Axboe &iomsg->free_iov, &iomsg->msg.msg_iter, 452f9ead18cSJens Axboe false); 453f9ead18cSJens Axboe if (ret > 0) 454f9ead18cSJens Axboe ret = 0; 455f9ead18cSJens Axboe } 456f9ead18cSJens Axboe 457f9ead18cSJens Axboe return ret; 458f9ead18cSJens Axboe } 459f9ead18cSJens Axboe 460f9ead18cSJens Axboe #ifdef CONFIG_COMPAT 461f9ead18cSJens Axboe static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, 462f9ead18cSJens Axboe struct io_async_msghdr *iomsg) 463f9ead18cSJens Axboe { 464f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 46572c531f8SDylan Yudaken struct compat_msghdr msg; 466f9ead18cSJens Axboe struct compat_iovec __user *uiov; 467f9ead18cSJens Axboe int ret; 468f9ead18cSJens Axboe 46972c531f8SDylan Yudaken if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) 47072c531f8SDylan Yudaken return -EFAULT; 47172c531f8SDylan Yudaken 4724f6a94d3SJens Axboe ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 473f9ead18cSJens Axboe if (ret) 474f9ead18cSJens Axboe return ret; 475f9ead18cSJens Axboe 47672c531f8SDylan Yudaken uiov = compat_ptr(msg.msg_iov); 477f9ead18cSJens Axboe if (req->flags & REQ_F_BUFFER_SELECT) { 478f9ead18cSJens Axboe compat_ssize_t clen; 479f9ead18cSJens Axboe 48072c531f8SDylan Yudaken if (msg.msg_iovlen == 0) { 4816d2f75a0SDylan Yudaken sr->len = 0; 48272c531f8SDylan Yudaken } else if (msg.msg_iovlen > 1) { 483f9ead18cSJens Axboe return -EINVAL; 4846d2f75a0SDylan Yudaken } else { 485f9ead18cSJens Axboe if (!access_ok(uiov, sizeof(*uiov))) 486f9ead18cSJens Axboe return -EFAULT; 487f9ead18cSJens Axboe if (__get_user(clen, &uiov->iov_len)) 488f9ead18cSJens Axboe return -EFAULT; 489f9ead18cSJens Axboe if (clen < 0) 490f9ead18cSJens Axboe return -EINVAL; 491f9ead18cSJens Axboe sr->len = clen; 4926d2f75a0SDylan Yudaken } 4939bb66906SDylan Yudaken 4949bb66906SDylan Yudaken if (req->flags & REQ_F_APOLL_MULTISHOT) { 4959bb66906SDylan Yudaken iomsg->namelen = msg.msg_namelen; 4969bb66906SDylan Yudaken iomsg->controllen = msg.msg_controllen; 4979bb66906SDylan Yudaken if (io_recvmsg_multishot_overflow(iomsg)) 4989bb66906SDylan Yudaken return -EOVERFLOW; 4999bb66906SDylan Yudaken } 500f9ead18cSJens Axboe } else { 501f9ead18cSJens Axboe iomsg->free_iov = iomsg->fast_iov; 50272c531f8SDylan Yudaken ret = __import_iovec(READ, (struct iovec __user *)uiov, msg.msg_iovlen, 503f9ead18cSJens Axboe UIO_FASTIOV, &iomsg->free_iov, 504f9ead18cSJens Axboe &iomsg->msg.msg_iter, true); 505f9ead18cSJens Axboe if (ret < 0) 506f9ead18cSJens Axboe return ret; 507f9ead18cSJens Axboe } 508f9ead18cSJens Axboe 509f9ead18cSJens Axboe return 0; 510f9ead18cSJens Axboe } 511f9ead18cSJens Axboe #endif 512f9ead18cSJens Axboe 513f9ead18cSJens Axboe static int io_recvmsg_copy_hdr(struct io_kiocb *req, 514f9ead18cSJens Axboe struct io_async_msghdr *iomsg) 515f9ead18cSJens Axboe { 516f9ead18cSJens Axboe iomsg->msg.msg_name = &iomsg->addr; 517f9ead18cSJens Axboe 518f9ead18cSJens Axboe #ifdef CONFIG_COMPAT 519f9ead18cSJens Axboe if (req->ctx->compat) 520f9ead18cSJens Axboe return __io_compat_recvmsg_copy_hdr(req, iomsg); 521f9ead18cSJens Axboe #endif 522f9ead18cSJens Axboe 523f9ead18cSJens Axboe return __io_recvmsg_copy_hdr(req, iomsg); 524f9ead18cSJens Axboe } 525f9ead18cSJens Axboe 526f9ead18cSJens Axboe int io_recvmsg_prep_async(struct io_kiocb *req) 527f9ead18cSJens Axboe { 528f9ead18cSJens Axboe int ret; 529f9ead18cSJens Axboe 530858c293eSPavel Begunkov if (!io_msg_alloc_async_prep(req)) 531858c293eSPavel Begunkov return -ENOMEM; 532f9ead18cSJens Axboe ret = io_recvmsg_copy_hdr(req, req->async_data); 533f9ead18cSJens Axboe if (!ret) 534f9ead18cSJens Axboe req->flags |= REQ_F_NEED_CLEANUP; 535f9ead18cSJens Axboe return ret; 536f9ead18cSJens Axboe } 537f9ead18cSJens Axboe 538b3fdea6eSDylan Yudaken #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT) 539b3fdea6eSDylan Yudaken 540f9ead18cSJens Axboe int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 541f9ead18cSJens Axboe { 542f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 543f9ead18cSJens Axboe 544f9ead18cSJens Axboe if (unlikely(sqe->file_index || sqe->addr2)) 545f9ead18cSJens Axboe return -EINVAL; 546f9ead18cSJens Axboe 547f9ead18cSJens Axboe sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 548f9ead18cSJens Axboe sr->len = READ_ONCE(sqe->len); 549f9ead18cSJens Axboe sr->flags = READ_ONCE(sqe->ioprio); 550b3fdea6eSDylan Yudaken if (sr->flags & ~(RECVMSG_FLAGS)) 551f9ead18cSJens Axboe return -EINVAL; 552f9ead18cSJens Axboe sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 553f9ead18cSJens Axboe if (sr->msg_flags & MSG_DONTWAIT) 554f9ead18cSJens Axboe req->flags |= REQ_F_NOWAIT; 555f9ead18cSJens Axboe if (sr->msg_flags & MSG_ERRQUEUE) 556f9ead18cSJens Axboe req->flags |= REQ_F_CLEAR_POLLIN; 557b3fdea6eSDylan Yudaken if (sr->flags & IORING_RECV_MULTISHOT) { 558b3fdea6eSDylan Yudaken if (!(req->flags & REQ_F_BUFFER_SELECT)) 559b3fdea6eSDylan Yudaken return -EINVAL; 560b3fdea6eSDylan Yudaken if (sr->msg_flags & MSG_WAITALL) 561b3fdea6eSDylan Yudaken return -EINVAL; 562b3fdea6eSDylan Yudaken if (req->opcode == IORING_OP_RECV && sr->len) 563b3fdea6eSDylan Yudaken return -EINVAL; 564b3fdea6eSDylan Yudaken req->flags |= REQ_F_APOLL_MULTISHOT; 565b3fdea6eSDylan Yudaken } 566f9ead18cSJens Axboe 567f9ead18cSJens Axboe #ifdef CONFIG_COMPAT 568f9ead18cSJens Axboe if (req->ctx->compat) 569f9ead18cSJens Axboe sr->msg_flags |= MSG_CMSG_COMPAT; 570f9ead18cSJens Axboe #endif 571f9ead18cSJens Axboe sr->done_io = 0; 572f9ead18cSJens Axboe return 0; 573f9ead18cSJens Axboe } 574f9ead18cSJens Axboe 575b3fdea6eSDylan Yudaken static inline void io_recv_prep_retry(struct io_kiocb *req) 576b3fdea6eSDylan Yudaken { 577f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 578b3fdea6eSDylan Yudaken 579b3fdea6eSDylan Yudaken sr->done_io = 0; 580b3fdea6eSDylan Yudaken sr->len = 0; /* get from the provided buffer */ 581b3fdea6eSDylan Yudaken } 582b3fdea6eSDylan Yudaken 583b3fdea6eSDylan Yudaken /* 5849bb66906SDylan Yudaken * Finishes io_recv and io_recvmsg. 585b3fdea6eSDylan Yudaken * 586b3fdea6eSDylan Yudaken * Returns true if it is actually finished, or false if it should run 587b3fdea6eSDylan Yudaken * again (for multishot). 588b3fdea6eSDylan Yudaken */ 5899bb66906SDylan Yudaken static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 5909bb66906SDylan Yudaken unsigned int cflags, bool mshot_finished) 591b3fdea6eSDylan Yudaken { 592b3fdea6eSDylan Yudaken if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 593b3fdea6eSDylan Yudaken io_req_set_res(req, *ret, cflags); 594b3fdea6eSDylan Yudaken *ret = IOU_OK; 595b3fdea6eSDylan Yudaken return true; 596b3fdea6eSDylan Yudaken } 597b3fdea6eSDylan Yudaken 5989bb66906SDylan Yudaken if (!mshot_finished) { 599b3fdea6eSDylan Yudaken if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret, 600b3fdea6eSDylan Yudaken cflags | IORING_CQE_F_MORE, false)) { 601b3fdea6eSDylan Yudaken io_recv_prep_retry(req); 602b3fdea6eSDylan Yudaken return false; 603b3fdea6eSDylan Yudaken } 604b3fdea6eSDylan Yudaken /* 605b3fdea6eSDylan Yudaken * Otherwise stop multishot but use the current result. 606b3fdea6eSDylan Yudaken * Probably will end up going into overflow, but this means 607b3fdea6eSDylan Yudaken * we cannot trust the ordering anymore 608b3fdea6eSDylan Yudaken */ 609b3fdea6eSDylan Yudaken } 610b3fdea6eSDylan Yudaken 611b3fdea6eSDylan Yudaken io_req_set_res(req, *ret, cflags); 612b3fdea6eSDylan Yudaken 613b3fdea6eSDylan Yudaken if (req->flags & REQ_F_POLLED) 614b3fdea6eSDylan Yudaken *ret = IOU_STOP_MULTISHOT; 615e2df2ccbSDylan Yudaken else 616e2df2ccbSDylan Yudaken *ret = IOU_OK; 617b3fdea6eSDylan Yudaken return true; 618b3fdea6eSDylan Yudaken } 619b3fdea6eSDylan Yudaken 6209bb66906SDylan Yudaken static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 6219bb66906SDylan Yudaken struct io_sr_msg *sr, void __user **buf, 6229bb66906SDylan Yudaken size_t *len) 6239bb66906SDylan Yudaken { 6249bb66906SDylan Yudaken unsigned long ubuf = (unsigned long) *buf; 6259bb66906SDylan Yudaken unsigned long hdr; 6269bb66906SDylan Yudaken 6279bb66906SDylan Yudaken hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 6289bb66906SDylan Yudaken kmsg->controllen; 6299bb66906SDylan Yudaken if (*len < hdr) 6309bb66906SDylan Yudaken return -EFAULT; 6319bb66906SDylan Yudaken 6329bb66906SDylan Yudaken if (kmsg->controllen) { 6339bb66906SDylan Yudaken unsigned long control = ubuf + hdr - kmsg->controllen; 6349bb66906SDylan Yudaken 635d1f6222cSDylan Yudaken kmsg->msg.msg_control_user = (void __user *) control; 6369bb66906SDylan Yudaken kmsg->msg.msg_controllen = kmsg->controllen; 6379bb66906SDylan Yudaken } 6389bb66906SDylan Yudaken 6399bb66906SDylan Yudaken sr->buf = *buf; /* stash for later copy */ 640d1f6222cSDylan Yudaken *buf = (void __user *) (ubuf + hdr); 6419bb66906SDylan Yudaken kmsg->payloadlen = *len = *len - hdr; 6429bb66906SDylan Yudaken return 0; 6439bb66906SDylan Yudaken } 6449bb66906SDylan Yudaken 6459bb66906SDylan Yudaken struct io_recvmsg_multishot_hdr { 6469bb66906SDylan Yudaken struct io_uring_recvmsg_out msg; 6479bb66906SDylan Yudaken struct sockaddr_storage addr; 6489bb66906SDylan Yudaken }; 6499bb66906SDylan Yudaken 6509bb66906SDylan Yudaken static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 6519bb66906SDylan Yudaken struct io_async_msghdr *kmsg, 6529bb66906SDylan Yudaken unsigned int flags, bool *finished) 6539bb66906SDylan Yudaken { 6549bb66906SDylan Yudaken int err; 6559bb66906SDylan Yudaken int copy_len; 6569bb66906SDylan Yudaken struct io_recvmsg_multishot_hdr hdr; 6579bb66906SDylan Yudaken 6589bb66906SDylan Yudaken if (kmsg->namelen) 6599bb66906SDylan Yudaken kmsg->msg.msg_name = &hdr.addr; 6609bb66906SDylan Yudaken kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 6619bb66906SDylan Yudaken kmsg->msg.msg_namelen = 0; 6629bb66906SDylan Yudaken 6639bb66906SDylan Yudaken if (sock->file->f_flags & O_NONBLOCK) 6649bb66906SDylan Yudaken flags |= MSG_DONTWAIT; 6659bb66906SDylan Yudaken 6669bb66906SDylan Yudaken err = sock_recvmsg(sock, &kmsg->msg, flags); 6679bb66906SDylan Yudaken *finished = err <= 0; 6689bb66906SDylan Yudaken if (err < 0) 6699bb66906SDylan Yudaken return err; 6709bb66906SDylan Yudaken 6719bb66906SDylan Yudaken hdr.msg = (struct io_uring_recvmsg_out) { 6729bb66906SDylan Yudaken .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 6739bb66906SDylan Yudaken .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 6749bb66906SDylan Yudaken }; 6759bb66906SDylan Yudaken 6769bb66906SDylan Yudaken hdr.msg.payloadlen = err; 6779bb66906SDylan Yudaken if (err > kmsg->payloadlen) 6789bb66906SDylan Yudaken err = kmsg->payloadlen; 6799bb66906SDylan Yudaken 6809bb66906SDylan Yudaken copy_len = sizeof(struct io_uring_recvmsg_out); 6819bb66906SDylan Yudaken if (kmsg->msg.msg_namelen > kmsg->namelen) 6829bb66906SDylan Yudaken copy_len += kmsg->namelen; 6839bb66906SDylan Yudaken else 6849bb66906SDylan Yudaken copy_len += kmsg->msg.msg_namelen; 6859bb66906SDylan Yudaken 6869bb66906SDylan Yudaken /* 6879bb66906SDylan Yudaken * "fromlen shall refer to the value before truncation.." 6889bb66906SDylan Yudaken * 1003.1g 6899bb66906SDylan Yudaken */ 6909bb66906SDylan Yudaken hdr.msg.namelen = kmsg->msg.msg_namelen; 6919bb66906SDylan Yudaken 6929bb66906SDylan Yudaken /* ensure that there is no gap between hdr and sockaddr_storage */ 6939bb66906SDylan Yudaken BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 6949bb66906SDylan Yudaken sizeof(struct io_uring_recvmsg_out)); 6959bb66906SDylan Yudaken if (copy_to_user(io->buf, &hdr, copy_len)) { 6969bb66906SDylan Yudaken *finished = true; 6979bb66906SDylan Yudaken return -EFAULT; 6989bb66906SDylan Yudaken } 6999bb66906SDylan Yudaken 7009bb66906SDylan Yudaken return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 7019bb66906SDylan Yudaken kmsg->controllen + err; 7029bb66906SDylan Yudaken } 7039bb66906SDylan Yudaken 704f9ead18cSJens Axboe int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 705f9ead18cSJens Axboe { 706f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 707f9ead18cSJens Axboe struct io_async_msghdr iomsg, *kmsg; 708f9ead18cSJens Axboe struct socket *sock; 709f9ead18cSJens Axboe unsigned int cflags; 710f9ead18cSJens Axboe unsigned flags; 711f9ead18cSJens Axboe int ret, min_ret = 0; 712f9ead18cSJens Axboe bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 7139bb66906SDylan Yudaken bool mshot_finished = true; 714f9ead18cSJens Axboe 715f9ead18cSJens Axboe sock = sock_from_file(req->file); 716f9ead18cSJens Axboe if (unlikely(!sock)) 717f9ead18cSJens Axboe return -ENOTSOCK; 718f9ead18cSJens Axboe 719f9ead18cSJens Axboe if (req_has_async_data(req)) { 720f9ead18cSJens Axboe kmsg = req->async_data; 721f9ead18cSJens Axboe } else { 722f9ead18cSJens Axboe ret = io_recvmsg_copy_hdr(req, &iomsg); 723f9ead18cSJens Axboe if (ret) 724f9ead18cSJens Axboe return ret; 725f9ead18cSJens Axboe kmsg = &iomsg; 726f9ead18cSJens Axboe } 727f9ead18cSJens Axboe 728f9ead18cSJens Axboe if (!(req->flags & REQ_F_POLLED) && 729f9ead18cSJens Axboe (sr->flags & IORING_RECVSEND_POLL_FIRST)) 73043e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 731f9ead18cSJens Axboe 7329bb66906SDylan Yudaken retry_multishot: 733f9ead18cSJens Axboe if (io_do_buffer_select(req)) { 734f9ead18cSJens Axboe void __user *buf; 7359bb66906SDylan Yudaken size_t len = sr->len; 736f9ead18cSJens Axboe 7379bb66906SDylan Yudaken buf = io_buffer_select(req, &len, issue_flags); 738f9ead18cSJens Axboe if (!buf) 739f9ead18cSJens Axboe return -ENOBUFS; 7409bb66906SDylan Yudaken 7419bb66906SDylan Yudaken if (req->flags & REQ_F_APOLL_MULTISHOT) { 7429bb66906SDylan Yudaken ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 7439bb66906SDylan Yudaken if (ret) { 7449bb66906SDylan Yudaken io_kbuf_recycle(req, issue_flags); 7459bb66906SDylan Yudaken return ret; 7469bb66906SDylan Yudaken } 7479bb66906SDylan Yudaken } 7489bb66906SDylan Yudaken 749f9ead18cSJens Axboe kmsg->fast_iov[0].iov_base = buf; 7509bb66906SDylan Yudaken kmsg->fast_iov[0].iov_len = len; 751f9ead18cSJens Axboe iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1, 7529bb66906SDylan Yudaken len); 753f9ead18cSJens Axboe } 754f9ead18cSJens Axboe 755f9ead18cSJens Axboe flags = sr->msg_flags; 756f9ead18cSJens Axboe if (force_nonblock) 757f9ead18cSJens Axboe flags |= MSG_DONTWAIT; 758f9ead18cSJens Axboe if (flags & MSG_WAITALL) 759f9ead18cSJens Axboe min_ret = iov_iter_count(&kmsg->msg.msg_iter); 760f9ead18cSJens Axboe 761f9ead18cSJens Axboe kmsg->msg.msg_get_inq = 1; 7629bb66906SDylan Yudaken if (req->flags & REQ_F_APOLL_MULTISHOT) 7639bb66906SDylan Yudaken ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 7649bb66906SDylan Yudaken &mshot_finished); 7659bb66906SDylan Yudaken else 7669bb66906SDylan Yudaken ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 7679bb66906SDylan Yudaken kmsg->uaddr, flags); 7689bb66906SDylan Yudaken 769f9ead18cSJens Axboe if (ret < min_ret) { 7709bb66906SDylan Yudaken if (ret == -EAGAIN && force_nonblock) { 7719bb66906SDylan Yudaken ret = io_setup_async_msg(req, kmsg, issue_flags); 7729bb66906SDylan Yudaken if (ret == -EAGAIN && (req->flags & IO_APOLL_MULTI_POLLED) == 7739bb66906SDylan Yudaken IO_APOLL_MULTI_POLLED) { 7749bb66906SDylan Yudaken io_kbuf_recycle(req, issue_flags); 7759bb66906SDylan Yudaken return IOU_ISSUE_SKIP_COMPLETE; 7769bb66906SDylan Yudaken } 7779bb66906SDylan Yudaken return ret; 7789bb66906SDylan Yudaken } 779f9ead18cSJens Axboe if (ret > 0 && io_net_retry(sock, flags)) { 780f9ead18cSJens Axboe sr->done_io += ret; 781f9ead18cSJens Axboe req->flags |= REQ_F_PARTIAL_IO; 78243e0bbbdSJens Axboe return io_setup_async_msg(req, kmsg, issue_flags); 783f9ead18cSJens Axboe } 78495eafc74SPavel Begunkov if (ret == -ERESTARTSYS) 78595eafc74SPavel Begunkov ret = -EINTR; 786f9ead18cSJens Axboe req_set_fail(req); 787f9ead18cSJens Axboe } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 788f9ead18cSJens Axboe req_set_fail(req); 789f9ead18cSJens Axboe } 790f9ead18cSJens Axboe 791d4e097daSDylan Yudaken if (ret > 0) 792f9ead18cSJens Axboe ret += sr->done_io; 793f9ead18cSJens Axboe else if (sr->done_io) 794f9ead18cSJens Axboe ret = sr->done_io; 795d4e097daSDylan Yudaken else 796d4e097daSDylan Yudaken io_kbuf_recycle(req, issue_flags); 797d4e097daSDylan Yudaken 798f9ead18cSJens Axboe cflags = io_put_kbuf(req, issue_flags); 799f9ead18cSJens Axboe if (kmsg->msg.msg_inq) 800f9ead18cSJens Axboe cflags |= IORING_CQE_F_SOCK_NONEMPTY; 801b3fdea6eSDylan Yudaken 8029bb66906SDylan Yudaken if (!io_recv_finish(req, &ret, cflags, mshot_finished)) 8039bb66906SDylan Yudaken goto retry_multishot; 8049bb66906SDylan Yudaken 8059bb66906SDylan Yudaken if (mshot_finished) { 8069bb66906SDylan Yudaken io_netmsg_recycle(req, issue_flags); 8079bb66906SDylan Yudaken /* fast path, check for non-NULL to avoid function call */ 8089bb66906SDylan Yudaken if (kmsg->free_iov) 8099bb66906SDylan Yudaken kfree(kmsg->free_iov); 8109bb66906SDylan Yudaken req->flags &= ~REQ_F_NEED_CLEANUP; 8119bb66906SDylan Yudaken } 8129bb66906SDylan Yudaken 8139bb66906SDylan Yudaken return ret; 814f9ead18cSJens Axboe } 815f9ead18cSJens Axboe 816f9ead18cSJens Axboe int io_recv(struct io_kiocb *req, unsigned int issue_flags) 817f9ead18cSJens Axboe { 818f2ccb5aeSStefan Metzmacher struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 819f9ead18cSJens Axboe struct msghdr msg; 820f9ead18cSJens Axboe struct socket *sock; 821f9ead18cSJens Axboe struct iovec iov; 822f9ead18cSJens Axboe unsigned int cflags; 823f9ead18cSJens Axboe unsigned flags; 824f9ead18cSJens Axboe int ret, min_ret = 0; 825f9ead18cSJens Axboe bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 826b3fdea6eSDylan Yudaken size_t len = sr->len; 827f9ead18cSJens Axboe 828f9ead18cSJens Axboe if (!(req->flags & REQ_F_POLLED) && 829f9ead18cSJens Axboe (sr->flags & IORING_RECVSEND_POLL_FIRST)) 830f9ead18cSJens Axboe return -EAGAIN; 831f9ead18cSJens Axboe 832f9ead18cSJens Axboe sock = sock_from_file(req->file); 833f9ead18cSJens Axboe if (unlikely(!sock)) 834f9ead18cSJens Axboe return -ENOTSOCK; 835f9ead18cSJens Axboe 836b3fdea6eSDylan Yudaken retry_multishot: 837f9ead18cSJens Axboe if (io_do_buffer_select(req)) { 838f9ead18cSJens Axboe void __user *buf; 839f9ead18cSJens Axboe 840b3fdea6eSDylan Yudaken buf = io_buffer_select(req, &len, issue_flags); 841f9ead18cSJens Axboe if (!buf) 842f9ead18cSJens Axboe return -ENOBUFS; 843f9ead18cSJens Axboe sr->buf = buf; 844f9ead18cSJens Axboe } 845f9ead18cSJens Axboe 846b3fdea6eSDylan Yudaken ret = import_single_range(READ, sr->buf, len, &iov, &msg.msg_iter); 847f9ead18cSJens Axboe if (unlikely(ret)) 848f9ead18cSJens Axboe goto out_free; 849f9ead18cSJens Axboe 850f9ead18cSJens Axboe msg.msg_name = NULL; 851f9ead18cSJens Axboe msg.msg_namelen = 0; 852f9ead18cSJens Axboe msg.msg_control = NULL; 853f9ead18cSJens Axboe msg.msg_get_inq = 1; 854f9ead18cSJens Axboe msg.msg_flags = 0; 855f9ead18cSJens Axboe msg.msg_controllen = 0; 856f9ead18cSJens Axboe msg.msg_iocb = NULL; 857e02b6651SPavel Begunkov msg.msg_ubuf = NULL; 858f9ead18cSJens Axboe 859f9ead18cSJens Axboe flags = sr->msg_flags; 860f9ead18cSJens Axboe if (force_nonblock) 861f9ead18cSJens Axboe flags |= MSG_DONTWAIT; 862f9ead18cSJens Axboe if (flags & MSG_WAITALL) 863f9ead18cSJens Axboe min_ret = iov_iter_count(&msg.msg_iter); 864f9ead18cSJens Axboe 865f9ead18cSJens Axboe ret = sock_recvmsg(sock, &msg, flags); 866f9ead18cSJens Axboe if (ret < min_ret) { 867b3fdea6eSDylan Yudaken if (ret == -EAGAIN && force_nonblock) { 868b3fdea6eSDylan Yudaken if ((req->flags & IO_APOLL_MULTI_POLLED) == IO_APOLL_MULTI_POLLED) { 869b3fdea6eSDylan Yudaken io_kbuf_recycle(req, issue_flags); 870b3fdea6eSDylan Yudaken return IOU_ISSUE_SKIP_COMPLETE; 871b3fdea6eSDylan Yudaken } 872b3fdea6eSDylan Yudaken 873f9ead18cSJens Axboe return -EAGAIN; 874b3fdea6eSDylan Yudaken } 875f9ead18cSJens Axboe if (ret > 0 && io_net_retry(sock, flags)) { 876f9ead18cSJens Axboe sr->len -= ret; 877f9ead18cSJens Axboe sr->buf += ret; 878f9ead18cSJens Axboe sr->done_io += ret; 879f9ead18cSJens Axboe req->flags |= REQ_F_PARTIAL_IO; 880f9ead18cSJens Axboe return -EAGAIN; 881f9ead18cSJens Axboe } 88295eafc74SPavel Begunkov if (ret == -ERESTARTSYS) 88395eafc74SPavel Begunkov ret = -EINTR; 884f9ead18cSJens Axboe req_set_fail(req); 885f9ead18cSJens Axboe } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 886f9ead18cSJens Axboe out_free: 887f9ead18cSJens Axboe req_set_fail(req); 888f9ead18cSJens Axboe } 889f9ead18cSJens Axboe 890d4e097daSDylan Yudaken if (ret > 0) 891f9ead18cSJens Axboe ret += sr->done_io; 892f9ead18cSJens Axboe else if (sr->done_io) 893f9ead18cSJens Axboe ret = sr->done_io; 894d4e097daSDylan Yudaken else 895d4e097daSDylan Yudaken io_kbuf_recycle(req, issue_flags); 896d4e097daSDylan Yudaken 897f9ead18cSJens Axboe cflags = io_put_kbuf(req, issue_flags); 898f9ead18cSJens Axboe if (msg.msg_inq) 899f9ead18cSJens Axboe cflags |= IORING_CQE_F_SOCK_NONEMPTY; 900b3fdea6eSDylan Yudaken 9019bb66906SDylan Yudaken if (!io_recv_finish(req, &ret, cflags, ret <= 0)) 902b3fdea6eSDylan Yudaken goto retry_multishot; 903b3fdea6eSDylan Yudaken 904b3fdea6eSDylan Yudaken return ret; 905f9ead18cSJens Axboe } 906f9ead18cSJens Axboe 907b0e9b551SPavel Begunkov void io_send_zc_cleanup(struct io_kiocb *req) 908b48c312bSPavel Begunkov { 909ac9e5784SPavel Begunkov struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 910493108d9SPavel Begunkov struct io_async_msghdr *io; 911b48c312bSPavel Begunkov 912493108d9SPavel Begunkov if (req_has_async_data(req)) { 913493108d9SPavel Begunkov io = req->async_data; 9144c17a496SPavel Begunkov /* might be ->fast_iov if *msg_copy_hdr failed */ 9154c17a496SPavel Begunkov if (io->free_iov != io->fast_iov) 916493108d9SPavel Begunkov kfree(io->free_iov); 917493108d9SPavel Begunkov } 918a75155faSPavel Begunkov if (zc->notif) { 919b48c312bSPavel Begunkov io_notif_flush(zc->notif); 920b48c312bSPavel Begunkov zc->notif = NULL; 921b48c312bSPavel Begunkov } 922a75155faSPavel Begunkov } 923b48c312bSPavel Begunkov 924b0e9b551SPavel Begunkov int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 92506a5464bSPavel Begunkov { 926ac9e5784SPavel Begunkov struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 92710c7d33eSPavel Begunkov struct io_ring_ctx *ctx = req->ctx; 928b48c312bSPavel Begunkov struct io_kiocb *notif; 92906a5464bSPavel Begunkov 930493108d9SPavel Begunkov if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 931b48c312bSPavel Begunkov return -EINVAL; 932b48c312bSPavel Begunkov /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 933b48c312bSPavel Begunkov if (req->flags & REQ_F_CQE_SKIP) 93406a5464bSPavel Begunkov return -EINVAL; 93506a5464bSPavel Begunkov 93606a5464bSPavel Begunkov zc->flags = READ_ONCE(sqe->ioprio); 93763809137SPavel Begunkov if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | 93857f33224SPavel Begunkov IORING_RECVSEND_FIXED_BUF)) 93906a5464bSPavel Begunkov return -EINVAL; 940b48c312bSPavel Begunkov notif = zc->notif = io_alloc_notif(ctx); 941b48c312bSPavel Begunkov if (!notif) 942b48c312bSPavel Begunkov return -ENOMEM; 943b48c312bSPavel Begunkov notif->cqe.user_data = req->cqe.user_data; 944b48c312bSPavel Begunkov notif->cqe.res = 0; 945b48c312bSPavel Begunkov notif->cqe.flags = IORING_CQE_F_NOTIF; 946b48c312bSPavel Begunkov req->flags |= REQ_F_NEED_CLEANUP; 947e3366e02SPavel Begunkov if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 948e3366e02SPavel Begunkov unsigned idx = READ_ONCE(sqe->buf_index); 949e3366e02SPavel Begunkov 950e3366e02SPavel Begunkov if (unlikely(idx >= ctx->nr_user_bufs)) 951e3366e02SPavel Begunkov return -EFAULT; 952e3366e02SPavel Begunkov idx = array_index_nospec(idx, ctx->nr_user_bufs); 953e3366e02SPavel Begunkov req->imu = READ_ONCE(ctx->user_bufs[idx]); 954e3366e02SPavel Begunkov io_req_set_rsrc_node(notif, ctx, 0); 955e3366e02SPavel Begunkov } 95606a5464bSPavel Begunkov 957493108d9SPavel Begunkov if (req->opcode == IORING_OP_SEND_ZC) { 958493108d9SPavel Begunkov if (READ_ONCE(sqe->__pad3[0])) 959493108d9SPavel Begunkov return -EINVAL; 960493108d9SPavel Begunkov zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 961493108d9SPavel Begunkov zc->addr_len = READ_ONCE(sqe->addr_len); 962493108d9SPavel Begunkov } else { 963493108d9SPavel Begunkov if (unlikely(sqe->addr2 || sqe->file_index)) 964493108d9SPavel Begunkov return -EINVAL; 965493108d9SPavel Begunkov if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 966493108d9SPavel Begunkov return -EINVAL; 967493108d9SPavel Begunkov } 968493108d9SPavel Begunkov 96906a5464bSPavel Begunkov zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 97006a5464bSPavel Begunkov zc->len = READ_ONCE(sqe->len); 97106a5464bSPavel Begunkov zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 97206a5464bSPavel Begunkov if (zc->msg_flags & MSG_DONTWAIT) 97306a5464bSPavel Begunkov req->flags |= REQ_F_NOWAIT; 974092aeedbSPavel Begunkov 9754a933e62SPavel Begunkov zc->done_io = 0; 976092aeedbSPavel Begunkov 97706a5464bSPavel Begunkov #ifdef CONFIG_COMPAT 97806a5464bSPavel Begunkov if (req->ctx->compat) 97906a5464bSPavel Begunkov zc->msg_flags |= MSG_CMSG_COMPAT; 98006a5464bSPavel Begunkov #endif 98106a5464bSPavel Begunkov return 0; 98206a5464bSPavel Begunkov } 98306a5464bSPavel Begunkov 984cd9021e8SPavel Begunkov static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb, 985cd9021e8SPavel Begunkov struct iov_iter *from, size_t length) 986cd9021e8SPavel Begunkov { 987cd9021e8SPavel Begunkov skb_zcopy_downgrade_managed(skb); 988cd9021e8SPavel Begunkov return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 989cd9021e8SPavel Begunkov } 990cd9021e8SPavel Begunkov 9913ff1a0d3SPavel Begunkov static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, 9923ff1a0d3SPavel Begunkov struct iov_iter *from, size_t length) 9933ff1a0d3SPavel Begunkov { 9943ff1a0d3SPavel Begunkov struct skb_shared_info *shinfo = skb_shinfo(skb); 9953ff1a0d3SPavel Begunkov int frag = shinfo->nr_frags; 9963ff1a0d3SPavel Begunkov int ret = 0; 9973ff1a0d3SPavel Begunkov struct bvec_iter bi; 9983ff1a0d3SPavel Begunkov ssize_t copied = 0; 9993ff1a0d3SPavel Begunkov unsigned long truesize = 0; 10003ff1a0d3SPavel Begunkov 1001cd9021e8SPavel Begunkov if (!frag) 10023ff1a0d3SPavel Begunkov shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1003cd9021e8SPavel Begunkov else if (unlikely(!skb_zcopy_managed(skb))) 10043ff1a0d3SPavel Begunkov return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 10053ff1a0d3SPavel Begunkov 10063ff1a0d3SPavel Begunkov bi.bi_size = min(from->count, length); 10073ff1a0d3SPavel Begunkov bi.bi_bvec_done = from->iov_offset; 10083ff1a0d3SPavel Begunkov bi.bi_idx = 0; 10093ff1a0d3SPavel Begunkov 10103ff1a0d3SPavel Begunkov while (bi.bi_size && frag < MAX_SKB_FRAGS) { 10113ff1a0d3SPavel Begunkov struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 10123ff1a0d3SPavel Begunkov 10133ff1a0d3SPavel Begunkov copied += v.bv_len; 10143ff1a0d3SPavel Begunkov truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 10153ff1a0d3SPavel Begunkov __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 10163ff1a0d3SPavel Begunkov v.bv_offset, v.bv_len); 10173ff1a0d3SPavel Begunkov bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 10183ff1a0d3SPavel Begunkov } 10193ff1a0d3SPavel Begunkov if (bi.bi_size) 10203ff1a0d3SPavel Begunkov ret = -EMSGSIZE; 10213ff1a0d3SPavel Begunkov 10223ff1a0d3SPavel Begunkov shinfo->nr_frags = frag; 10233ff1a0d3SPavel Begunkov from->bvec += bi.bi_idx; 10243ff1a0d3SPavel Begunkov from->nr_segs -= bi.bi_idx; 1025dfb58b17SPavel Begunkov from->count -= copied; 10263ff1a0d3SPavel Begunkov from->iov_offset = bi.bi_bvec_done; 10273ff1a0d3SPavel Begunkov 10283ff1a0d3SPavel Begunkov skb->data_len += copied; 10293ff1a0d3SPavel Begunkov skb->len += copied; 10303ff1a0d3SPavel Begunkov skb->truesize += truesize; 10313ff1a0d3SPavel Begunkov 10323ff1a0d3SPavel Begunkov if (sk && sk->sk_type == SOCK_STREAM) { 10333ff1a0d3SPavel Begunkov sk_wmem_queued_add(sk, truesize); 10343ff1a0d3SPavel Begunkov if (!skb_zcopy_pure(skb)) 10353ff1a0d3SPavel Begunkov sk_mem_charge(sk, truesize); 10363ff1a0d3SPavel Begunkov } else { 10373ff1a0d3SPavel Begunkov refcount_add(truesize, &skb->sk->sk_wmem_alloc); 10383ff1a0d3SPavel Begunkov } 10393ff1a0d3SPavel Begunkov return ret; 10403ff1a0d3SPavel Begunkov } 10413ff1a0d3SPavel Begunkov 1042b0e9b551SPavel Begunkov int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 104306a5464bSPavel Begunkov { 10446ae61b7aSPavel Begunkov struct sockaddr_storage __address; 1045ac9e5784SPavel Begunkov struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 104606a5464bSPavel Begunkov struct msghdr msg; 104706a5464bSPavel Begunkov struct iovec iov; 104806a5464bSPavel Begunkov struct socket *sock; 10496ae91ac9SPavel Begunkov unsigned msg_flags; 105006a5464bSPavel Begunkov int ret, min_ret = 0; 105106a5464bSPavel Begunkov 105206a5464bSPavel Begunkov sock = sock_from_file(req->file); 105306a5464bSPavel Begunkov if (unlikely(!sock)) 105406a5464bSPavel Begunkov return -ENOTSOCK; 105506a5464bSPavel Begunkov 105606a5464bSPavel Begunkov msg.msg_name = NULL; 105706a5464bSPavel Begunkov msg.msg_control = NULL; 105806a5464bSPavel Begunkov msg.msg_controllen = 0; 105906a5464bSPavel Begunkov msg.msg_namelen = 0; 106006a5464bSPavel Begunkov 106186dc8f23SPavel Begunkov if (zc->addr) { 1062581711c4SPavel Begunkov if (req_has_async_data(req)) { 1063581711c4SPavel Begunkov struct io_async_msghdr *io = req->async_data; 1064581711c4SPavel Begunkov 10656ae61b7aSPavel Begunkov msg.msg_name = &io->addr; 1066581711c4SPavel Begunkov } else { 1067581711c4SPavel Begunkov ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address); 106886dc8f23SPavel Begunkov if (unlikely(ret < 0)) 106986dc8f23SPavel Begunkov return ret; 1070581711c4SPavel Begunkov msg.msg_name = (struct sockaddr *)&__address; 1071581711c4SPavel Begunkov } 107286dc8f23SPavel Begunkov msg.msg_namelen = zc->addr_len; 107386dc8f23SPavel Begunkov } 107486dc8f23SPavel Begunkov 10753c840053SPavel Begunkov if (!(req->flags & REQ_F_POLLED) && 10763c840053SPavel Begunkov (zc->flags & IORING_RECVSEND_POLL_FIRST)) 10776ae61b7aSPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 10783c840053SPavel Begunkov 107910c7d33eSPavel Begunkov if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 108010c7d33eSPavel Begunkov ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, 108110c7d33eSPavel Begunkov (u64)(uintptr_t)zc->buf, zc->len); 108210c7d33eSPavel Begunkov if (unlikely(ret)) 108310c7d33eSPavel Begunkov return ret; 1084cd9021e8SPavel Begunkov msg.sg_from_iter = io_sg_from_iter; 108510c7d33eSPavel Begunkov } else { 108610c7d33eSPavel Begunkov ret = import_single_range(WRITE, zc->buf, zc->len, &iov, 108710c7d33eSPavel Begunkov &msg.msg_iter); 108806a5464bSPavel Begunkov if (unlikely(ret)) 108906a5464bSPavel Begunkov return ret; 1090b48c312bSPavel Begunkov ret = io_notif_account_mem(zc->notif, zc->len); 10912e32ba56SPavel Begunkov if (unlikely(ret)) 10922e32ba56SPavel Begunkov return ret; 1093cd9021e8SPavel Begunkov msg.sg_from_iter = io_sg_from_iter_iovec; 109410c7d33eSPavel Begunkov } 109506a5464bSPavel Begunkov 109606a5464bSPavel Begunkov msg_flags = zc->msg_flags | MSG_ZEROCOPY; 109706a5464bSPavel Begunkov if (issue_flags & IO_URING_F_NONBLOCK) 109806a5464bSPavel Begunkov msg_flags |= MSG_DONTWAIT; 109906a5464bSPavel Begunkov if (msg_flags & MSG_WAITALL) 110006a5464bSPavel Begunkov min_ret = iov_iter_count(&msg.msg_iter); 110106a5464bSPavel Begunkov 110206a5464bSPavel Begunkov msg.msg_flags = msg_flags; 1103b48c312bSPavel Begunkov msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 110406a5464bSPavel Begunkov ret = sock_sendmsg(sock, &msg); 110506a5464bSPavel Begunkov 110606a5464bSPavel Begunkov if (unlikely(ret < min_ret)) { 110706a5464bSPavel Begunkov if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 11086ae61b7aSPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 1109581711c4SPavel Begunkov 11104a933e62SPavel Begunkov if (ret > 0 && io_net_retry(sock, msg.msg_flags)) { 11114a933e62SPavel Begunkov zc->len -= ret; 11124a933e62SPavel Begunkov zc->buf += ret; 11134a933e62SPavel Begunkov zc->done_io += ret; 11144a933e62SPavel Begunkov req->flags |= REQ_F_PARTIAL_IO; 11156ae61b7aSPavel Begunkov return io_setup_async_addr(req, &__address, issue_flags); 11164a933e62SPavel Begunkov } 11174a933e62SPavel Begunkov if (ret == -ERESTARTSYS) 11184a933e62SPavel Begunkov ret = -EINTR; 11195a848b7cSPavel Begunkov req_set_fail(req); 112006a5464bSPavel Begunkov } 112106a5464bSPavel Begunkov 11224a933e62SPavel Begunkov if (ret >= 0) 11234a933e62SPavel Begunkov ret += zc->done_io; 11244a933e62SPavel Begunkov else if (zc->done_io) 11254a933e62SPavel Begunkov ret = zc->done_io; 1126b48c312bSPavel Begunkov 1127b48c312bSPavel Begunkov io_notif_flush(zc->notif); 1128b48c312bSPavel Begunkov req->flags &= ~REQ_F_NEED_CLEANUP; 11296ae91ac9SPavel Begunkov io_req_set_res(req, ret, IORING_CQE_F_MORE); 113006a5464bSPavel Begunkov return IOU_OK; 113106a5464bSPavel Begunkov } 113206a5464bSPavel Begunkov 1133493108d9SPavel Begunkov int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1134493108d9SPavel Begunkov { 1135493108d9SPavel Begunkov struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1136493108d9SPavel Begunkov struct io_async_msghdr iomsg, *kmsg; 1137493108d9SPavel Begunkov struct socket *sock; 11386ae91ac9SPavel Begunkov unsigned flags; 1139493108d9SPavel Begunkov int ret, min_ret = 0; 1140493108d9SPavel Begunkov 1141493108d9SPavel Begunkov sock = sock_from_file(req->file); 1142493108d9SPavel Begunkov if (unlikely(!sock)) 1143493108d9SPavel Begunkov return -ENOTSOCK; 1144493108d9SPavel Begunkov 1145493108d9SPavel Begunkov if (req_has_async_data(req)) { 1146493108d9SPavel Begunkov kmsg = req->async_data; 1147493108d9SPavel Begunkov } else { 1148493108d9SPavel Begunkov ret = io_sendmsg_copy_hdr(req, &iomsg); 1149493108d9SPavel Begunkov if (ret) 1150493108d9SPavel Begunkov return ret; 1151493108d9SPavel Begunkov kmsg = &iomsg; 1152493108d9SPavel Begunkov } 1153493108d9SPavel Begunkov 1154493108d9SPavel Begunkov if (!(req->flags & REQ_F_POLLED) && 1155493108d9SPavel Begunkov (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1156493108d9SPavel Begunkov return io_setup_async_msg(req, kmsg, issue_flags); 1157493108d9SPavel Begunkov 1158493108d9SPavel Begunkov flags = sr->msg_flags | MSG_ZEROCOPY; 1159493108d9SPavel Begunkov if (issue_flags & IO_URING_F_NONBLOCK) 1160493108d9SPavel Begunkov flags |= MSG_DONTWAIT; 1161493108d9SPavel Begunkov if (flags & MSG_WAITALL) 1162493108d9SPavel Begunkov min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1163493108d9SPavel Begunkov 1164493108d9SPavel Begunkov kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1165493108d9SPavel Begunkov kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1166493108d9SPavel Begunkov ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1167493108d9SPavel Begunkov 1168493108d9SPavel Begunkov if (unlikely(ret < min_ret)) { 1169493108d9SPavel Begunkov if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1170493108d9SPavel Begunkov return io_setup_async_msg(req, kmsg, issue_flags); 1171493108d9SPavel Begunkov 1172493108d9SPavel Begunkov if (ret > 0 && io_net_retry(sock, flags)) { 1173493108d9SPavel Begunkov sr->done_io += ret; 1174493108d9SPavel Begunkov req->flags |= REQ_F_PARTIAL_IO; 1175493108d9SPavel Begunkov return io_setup_async_msg(req, kmsg, issue_flags); 1176493108d9SPavel Begunkov } 1177493108d9SPavel Begunkov if (ret == -ERESTARTSYS) 1178493108d9SPavel Begunkov ret = -EINTR; 1179493108d9SPavel Begunkov req_set_fail(req); 1180493108d9SPavel Begunkov } 1181493108d9SPavel Begunkov /* fast path, check for non-NULL to avoid function call */ 1182493108d9SPavel Begunkov if (kmsg->free_iov) 1183493108d9SPavel Begunkov kfree(kmsg->free_iov); 1184493108d9SPavel Begunkov 1185493108d9SPavel Begunkov io_netmsg_recycle(req, issue_flags); 1186493108d9SPavel Begunkov if (ret >= 0) 1187493108d9SPavel Begunkov ret += sr->done_io; 1188493108d9SPavel Begunkov else if (sr->done_io) 1189493108d9SPavel Begunkov ret = sr->done_io; 1190493108d9SPavel Begunkov 1191493108d9SPavel Begunkov io_notif_flush(sr->notif); 1192493108d9SPavel Begunkov req->flags &= ~REQ_F_NEED_CLEANUP; 11936ae91ac9SPavel Begunkov io_req_set_res(req, ret, IORING_CQE_F_MORE); 1194493108d9SPavel Begunkov return IOU_OK; 1195493108d9SPavel Begunkov } 1196493108d9SPavel Begunkov 11977e6b638eSPavel Begunkov void io_sendrecv_fail(struct io_kiocb *req) 11987e6b638eSPavel Begunkov { 11997e6b638eSPavel Begunkov struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 12007e6b638eSPavel Begunkov 12017e6b638eSPavel Begunkov if (req->flags & REQ_F_PARTIAL_IO) 12026ae91ac9SPavel Begunkov req->cqe.res = sr->done_io; 12036ae91ac9SPavel Begunkov 1204c4c0009eSPavel Begunkov if ((req->flags & REQ_F_NEED_CLEANUP) && 12056ae91ac9SPavel Begunkov (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 12066ae91ac9SPavel Begunkov req->cqe.flags |= IORING_CQE_F_MORE; 12075693bcceSPavel Begunkov } 12085693bcceSPavel Begunkov 1209f9ead18cSJens Axboe int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1210f9ead18cSJens Axboe { 1211f2ccb5aeSStefan Metzmacher struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1212f9ead18cSJens Axboe unsigned flags; 1213f9ead18cSJens Axboe 1214f9ead18cSJens Axboe if (sqe->len || sqe->buf_index) 1215f9ead18cSJens Axboe return -EINVAL; 1216f9ead18cSJens Axboe 1217f9ead18cSJens Axboe accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1218f9ead18cSJens Axboe accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1219f9ead18cSJens Axboe accept->flags = READ_ONCE(sqe->accept_flags); 1220f9ead18cSJens Axboe accept->nofile = rlimit(RLIMIT_NOFILE); 1221f9ead18cSJens Axboe flags = READ_ONCE(sqe->ioprio); 1222f9ead18cSJens Axboe if (flags & ~IORING_ACCEPT_MULTISHOT) 1223f9ead18cSJens Axboe return -EINVAL; 1224f9ead18cSJens Axboe 1225f9ead18cSJens Axboe accept->file_slot = READ_ONCE(sqe->file_index); 1226f9ead18cSJens Axboe if (accept->file_slot) { 1227f9ead18cSJens Axboe if (accept->flags & SOCK_CLOEXEC) 1228f9ead18cSJens Axboe return -EINVAL; 1229f9ead18cSJens Axboe if (flags & IORING_ACCEPT_MULTISHOT && 1230f9ead18cSJens Axboe accept->file_slot != IORING_FILE_INDEX_ALLOC) 1231f9ead18cSJens Axboe return -EINVAL; 1232f9ead18cSJens Axboe } 1233f9ead18cSJens Axboe if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1234f9ead18cSJens Axboe return -EINVAL; 1235f9ead18cSJens Axboe if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1236f9ead18cSJens Axboe accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1237f9ead18cSJens Axboe if (flags & IORING_ACCEPT_MULTISHOT) 1238f9ead18cSJens Axboe req->flags |= REQ_F_APOLL_MULTISHOT; 1239f9ead18cSJens Axboe return 0; 1240f9ead18cSJens Axboe } 1241f9ead18cSJens Axboe 1242f9ead18cSJens Axboe int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1243f9ead18cSJens Axboe { 1244f9ead18cSJens Axboe struct io_ring_ctx *ctx = req->ctx; 1245f2ccb5aeSStefan Metzmacher struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1246f9ead18cSJens Axboe bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1247f9ead18cSJens Axboe unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; 1248f9ead18cSJens Axboe bool fixed = !!accept->file_slot; 1249f9ead18cSJens Axboe struct file *file; 1250f9ead18cSJens Axboe int ret, fd; 1251f9ead18cSJens Axboe 1252f9ead18cSJens Axboe retry: 1253f9ead18cSJens Axboe if (!fixed) { 1254f9ead18cSJens Axboe fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1255f9ead18cSJens Axboe if (unlikely(fd < 0)) 1256f9ead18cSJens Axboe return fd; 1257f9ead18cSJens Axboe } 1258f9ead18cSJens Axboe file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, 1259f9ead18cSJens Axboe accept->flags); 1260f9ead18cSJens Axboe if (IS_ERR(file)) { 1261f9ead18cSJens Axboe if (!fixed) 1262f9ead18cSJens Axboe put_unused_fd(fd); 1263f9ead18cSJens Axboe ret = PTR_ERR(file); 1264f9ead18cSJens Axboe if (ret == -EAGAIN && force_nonblock) { 1265f9ead18cSJens Axboe /* 1266f9ead18cSJens Axboe * if it's multishot and polled, we don't need to 1267f9ead18cSJens Axboe * return EAGAIN to arm the poll infra since it 1268f9ead18cSJens Axboe * has already been done 1269f9ead18cSJens Axboe */ 1270f9ead18cSJens Axboe if ((req->flags & IO_APOLL_MULTI_POLLED) == 1271f9ead18cSJens Axboe IO_APOLL_MULTI_POLLED) 1272f9ead18cSJens Axboe ret = IOU_ISSUE_SKIP_COMPLETE; 1273f9ead18cSJens Axboe return ret; 1274f9ead18cSJens Axboe } 1275f9ead18cSJens Axboe if (ret == -ERESTARTSYS) 1276f9ead18cSJens Axboe ret = -EINTR; 1277f9ead18cSJens Axboe req_set_fail(req); 1278f9ead18cSJens Axboe } else if (!fixed) { 1279f9ead18cSJens Axboe fd_install(fd, file); 1280f9ead18cSJens Axboe ret = fd; 1281f9ead18cSJens Axboe } else { 1282f9ead18cSJens Axboe ret = io_fixed_fd_install(req, issue_flags, file, 1283f9ead18cSJens Axboe accept->file_slot); 1284f9ead18cSJens Axboe } 1285f9ead18cSJens Axboe 1286f9ead18cSJens Axboe if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1287f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 1288f9ead18cSJens Axboe return IOU_OK; 1289f9ead18cSJens Axboe } 1290f9ead18cSJens Axboe 1291cbd25748SDylan Yudaken if (ret >= 0 && 1292cbd25748SDylan Yudaken io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false)) 1293d245bca6SPavel Begunkov goto retry; 1294cbd25748SDylan Yudaken 1295cbd25748SDylan Yudaken io_req_set_res(req, ret, 0); 1296cbd25748SDylan Yudaken if (req->flags & REQ_F_POLLED) 1297cbd25748SDylan Yudaken return IOU_STOP_MULTISHOT; 1298cbd25748SDylan Yudaken return IOU_OK; 1299f9ead18cSJens Axboe } 1300f9ead18cSJens Axboe 1301f9ead18cSJens Axboe int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1302f9ead18cSJens Axboe { 1303f2ccb5aeSStefan Metzmacher struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1304f9ead18cSJens Axboe 1305f9ead18cSJens Axboe if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1306f9ead18cSJens Axboe return -EINVAL; 1307f9ead18cSJens Axboe 1308f9ead18cSJens Axboe sock->domain = READ_ONCE(sqe->fd); 1309f9ead18cSJens Axboe sock->type = READ_ONCE(sqe->off); 1310f9ead18cSJens Axboe sock->protocol = READ_ONCE(sqe->len); 1311f9ead18cSJens Axboe sock->file_slot = READ_ONCE(sqe->file_index); 1312f9ead18cSJens Axboe sock->nofile = rlimit(RLIMIT_NOFILE); 1313f9ead18cSJens Axboe 1314f9ead18cSJens Axboe sock->flags = sock->type & ~SOCK_TYPE_MASK; 1315f9ead18cSJens Axboe if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1316f9ead18cSJens Axboe return -EINVAL; 1317f9ead18cSJens Axboe if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1318f9ead18cSJens Axboe return -EINVAL; 1319f9ead18cSJens Axboe return 0; 1320f9ead18cSJens Axboe } 1321f9ead18cSJens Axboe 1322f9ead18cSJens Axboe int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1323f9ead18cSJens Axboe { 1324f2ccb5aeSStefan Metzmacher struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1325f9ead18cSJens Axboe bool fixed = !!sock->file_slot; 1326f9ead18cSJens Axboe struct file *file; 1327f9ead18cSJens Axboe int ret, fd; 1328f9ead18cSJens Axboe 1329f9ead18cSJens Axboe if (!fixed) { 1330f9ead18cSJens Axboe fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1331f9ead18cSJens Axboe if (unlikely(fd < 0)) 1332f9ead18cSJens Axboe return fd; 1333f9ead18cSJens Axboe } 1334f9ead18cSJens Axboe file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1335f9ead18cSJens Axboe if (IS_ERR(file)) { 1336f9ead18cSJens Axboe if (!fixed) 1337f9ead18cSJens Axboe put_unused_fd(fd); 1338f9ead18cSJens Axboe ret = PTR_ERR(file); 1339f9ead18cSJens Axboe if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1340f9ead18cSJens Axboe return -EAGAIN; 1341f9ead18cSJens Axboe if (ret == -ERESTARTSYS) 1342f9ead18cSJens Axboe ret = -EINTR; 1343f9ead18cSJens Axboe req_set_fail(req); 1344f9ead18cSJens Axboe } else if (!fixed) { 1345f9ead18cSJens Axboe fd_install(fd, file); 1346f9ead18cSJens Axboe ret = fd; 1347f9ead18cSJens Axboe } else { 1348f9ead18cSJens Axboe ret = io_fixed_fd_install(req, issue_flags, file, 1349f9ead18cSJens Axboe sock->file_slot); 1350f9ead18cSJens Axboe } 1351f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 1352f9ead18cSJens Axboe return IOU_OK; 1353f9ead18cSJens Axboe } 1354f9ead18cSJens Axboe 1355f9ead18cSJens Axboe int io_connect_prep_async(struct io_kiocb *req) 1356f9ead18cSJens Axboe { 1357f9ead18cSJens Axboe struct io_async_connect *io = req->async_data; 1358f2ccb5aeSStefan Metzmacher struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1359f9ead18cSJens Axboe 1360f9ead18cSJens Axboe return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); 1361f9ead18cSJens Axboe } 1362f9ead18cSJens Axboe 1363f9ead18cSJens Axboe int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1364f9ead18cSJens Axboe { 1365f2ccb5aeSStefan Metzmacher struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1366f9ead18cSJens Axboe 1367f9ead18cSJens Axboe if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1368f9ead18cSJens Axboe return -EINVAL; 1369f9ead18cSJens Axboe 1370f9ead18cSJens Axboe conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1371f9ead18cSJens Axboe conn->addr_len = READ_ONCE(sqe->addr2); 1372f9ead18cSJens Axboe return 0; 1373f9ead18cSJens Axboe } 1374f9ead18cSJens Axboe 1375f9ead18cSJens Axboe int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1376f9ead18cSJens Axboe { 1377f2ccb5aeSStefan Metzmacher struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1378f9ead18cSJens Axboe struct io_async_connect __io, *io; 1379f9ead18cSJens Axboe unsigned file_flags; 1380f9ead18cSJens Axboe int ret; 1381f9ead18cSJens Axboe bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1382f9ead18cSJens Axboe 1383f9ead18cSJens Axboe if (req_has_async_data(req)) { 1384f9ead18cSJens Axboe io = req->async_data; 1385f9ead18cSJens Axboe } else { 1386f9ead18cSJens Axboe ret = move_addr_to_kernel(connect->addr, 1387f9ead18cSJens Axboe connect->addr_len, 1388f9ead18cSJens Axboe &__io.address); 1389f9ead18cSJens Axboe if (ret) 1390f9ead18cSJens Axboe goto out; 1391f9ead18cSJens Axboe io = &__io; 1392f9ead18cSJens Axboe } 1393f9ead18cSJens Axboe 1394f9ead18cSJens Axboe file_flags = force_nonblock ? O_NONBLOCK : 0; 1395f9ead18cSJens Axboe 1396f9ead18cSJens Axboe ret = __sys_connect_file(req->file, &io->address, 1397f9ead18cSJens Axboe connect->addr_len, file_flags); 1398f9ead18cSJens Axboe if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { 1399f9ead18cSJens Axboe if (req_has_async_data(req)) 1400f9ead18cSJens Axboe return -EAGAIN; 1401f9ead18cSJens Axboe if (io_alloc_async_data(req)) { 1402f9ead18cSJens Axboe ret = -ENOMEM; 1403f9ead18cSJens Axboe goto out; 1404f9ead18cSJens Axboe } 1405f9ead18cSJens Axboe memcpy(req->async_data, &__io, sizeof(__io)); 1406f9ead18cSJens Axboe return -EAGAIN; 1407f9ead18cSJens Axboe } 1408f9ead18cSJens Axboe if (ret == -ERESTARTSYS) 1409f9ead18cSJens Axboe ret = -EINTR; 1410f9ead18cSJens Axboe out: 1411f9ead18cSJens Axboe if (ret < 0) 1412f9ead18cSJens Axboe req_set_fail(req); 1413f9ead18cSJens Axboe io_req_set_res(req, ret, 0); 1414f9ead18cSJens Axboe return IOU_OK; 1415f9ead18cSJens Axboe } 141643e0bbbdSJens Axboe 141743e0bbbdSJens Axboe void io_netmsg_cache_free(struct io_cache_entry *entry) 141843e0bbbdSJens Axboe { 141943e0bbbdSJens Axboe kfree(container_of(entry, struct io_async_msghdr, cache)); 142043e0bbbdSJens Axboe } 1421f9ead18cSJens Axboe #endif 1422