1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 20 #if defined(CONFIG_NET) 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 u32 file_slot; 32 unsigned long nofile; 33 }; 34 35 struct io_socket { 36 struct file *file; 37 int domain; 38 int type; 39 int protocol; 40 int flags; 41 u32 file_slot; 42 unsigned long nofile; 43 }; 44 45 struct io_connect { 46 struct file *file; 47 struct sockaddr __user *addr; 48 int addr_len; 49 }; 50 51 struct io_sr_msg { 52 struct file *file; 53 union { 54 struct compat_msghdr __user *umsg_compat; 55 struct user_msghdr __user *umsg; 56 void __user *buf; 57 }; 58 unsigned msg_flags; 59 unsigned flags; 60 size_t len; 61 size_t done_io; 62 }; 63 64 struct io_sendzc { 65 struct file *file; 66 void __user *buf; 67 size_t len; 68 u16 slot_idx; 69 unsigned msg_flags; 70 unsigned flags; 71 unsigned addr_len; 72 void __user *addr; 73 }; 74 75 #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) 76 77 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 78 { 79 struct io_shutdown *shutdown = io_kiocb_to_cmd(req); 80 81 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 82 sqe->buf_index || sqe->splice_fd_in)) 83 return -EINVAL; 84 85 shutdown->how = READ_ONCE(sqe->len); 86 return 0; 87 } 88 89 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 90 { 91 struct io_shutdown *shutdown = io_kiocb_to_cmd(req); 92 struct socket *sock; 93 int ret; 94 95 if (issue_flags & IO_URING_F_NONBLOCK) 96 return -EAGAIN; 97 98 sock = sock_from_file(req->file); 99 if (unlikely(!sock)) 100 return -ENOTSOCK; 101 102 ret = __sys_shutdown_sock(sock, shutdown->how); 103 io_req_set_res(req, ret, 0); 104 return IOU_OK; 105 } 106 107 static bool io_net_retry(struct socket *sock, int flags) 108 { 109 if (!(flags & MSG_WAITALL)) 110 return false; 111 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 112 } 113 114 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 115 { 116 struct io_async_msghdr *hdr = req->async_data; 117 118 if (!hdr || issue_flags & IO_URING_F_UNLOCKED) 119 return; 120 121 /* Let normal cleanup path reap it if we fail adding to the cache */ 122 if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) { 123 req->async_data = NULL; 124 req->flags &= ~REQ_F_ASYNC_DATA; 125 } 126 } 127 128 static struct io_async_msghdr *io_recvmsg_alloc_async(struct io_kiocb *req, 129 unsigned int issue_flags) 130 { 131 struct io_ring_ctx *ctx = req->ctx; 132 struct io_cache_entry *entry; 133 134 if (!(issue_flags & IO_URING_F_UNLOCKED) && 135 (entry = io_alloc_cache_get(&ctx->netmsg_cache)) != NULL) { 136 struct io_async_msghdr *hdr; 137 138 hdr = container_of(entry, struct io_async_msghdr, cache); 139 req->flags |= REQ_F_ASYNC_DATA; 140 req->async_data = hdr; 141 return hdr; 142 } 143 144 if (!io_alloc_async_data(req)) 145 return req->async_data; 146 147 return NULL; 148 } 149 150 static int io_setup_async_msg(struct io_kiocb *req, 151 struct io_async_msghdr *kmsg, 152 unsigned int issue_flags) 153 { 154 struct io_async_msghdr *async_msg = req->async_data; 155 156 if (async_msg) 157 return -EAGAIN; 158 async_msg = io_recvmsg_alloc_async(req, issue_flags); 159 if (!async_msg) { 160 kfree(kmsg->free_iov); 161 return -ENOMEM; 162 } 163 req->flags |= REQ_F_NEED_CLEANUP; 164 memcpy(async_msg, kmsg, sizeof(*kmsg)); 165 async_msg->msg.msg_name = &async_msg->addr; 166 /* if were using fast_iov, set it to the new one */ 167 if (!async_msg->free_iov) 168 async_msg->msg.msg_iter.iov = async_msg->fast_iov; 169 170 return -EAGAIN; 171 } 172 173 static int io_sendmsg_copy_hdr(struct io_kiocb *req, 174 struct io_async_msghdr *iomsg) 175 { 176 struct io_sr_msg *sr = io_kiocb_to_cmd(req); 177 178 iomsg->msg.msg_name = &iomsg->addr; 179 iomsg->free_iov = iomsg->fast_iov; 180 return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, 181 &iomsg->free_iov); 182 } 183 184 int io_sendmsg_prep_async(struct io_kiocb *req) 185 { 186 int ret; 187 188 ret = io_sendmsg_copy_hdr(req, req->async_data); 189 if (!ret) 190 req->flags |= REQ_F_NEED_CLEANUP; 191 return ret; 192 } 193 194 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 195 { 196 struct io_async_msghdr *io = req->async_data; 197 198 kfree(io->free_iov); 199 } 200 201 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 202 { 203 struct io_sr_msg *sr = io_kiocb_to_cmd(req); 204 205 if (unlikely(sqe->file_index || sqe->addr2)) 206 return -EINVAL; 207 208 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 209 sr->len = READ_ONCE(sqe->len); 210 sr->flags = READ_ONCE(sqe->ioprio); 211 if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) 212 return -EINVAL; 213 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 214 if (sr->msg_flags & MSG_DONTWAIT) 215 req->flags |= REQ_F_NOWAIT; 216 217 #ifdef CONFIG_COMPAT 218 if (req->ctx->compat) 219 sr->msg_flags |= MSG_CMSG_COMPAT; 220 #endif 221 sr->done_io = 0; 222 return 0; 223 } 224 225 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 226 { 227 struct io_sr_msg *sr = io_kiocb_to_cmd(req); 228 struct io_async_msghdr iomsg, *kmsg; 229 struct socket *sock; 230 unsigned flags; 231 int min_ret = 0; 232 int ret; 233 234 sock = sock_from_file(req->file); 235 if (unlikely(!sock)) 236 return -ENOTSOCK; 237 238 if (req_has_async_data(req)) { 239 kmsg = req->async_data; 240 } else { 241 ret = io_sendmsg_copy_hdr(req, &iomsg); 242 if (ret) 243 return ret; 244 kmsg = &iomsg; 245 } 246 247 if (!(req->flags & REQ_F_POLLED) && 248 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 249 return io_setup_async_msg(req, kmsg, issue_flags); 250 251 flags = sr->msg_flags; 252 if (issue_flags & IO_URING_F_NONBLOCK) 253 flags |= MSG_DONTWAIT; 254 if (flags & MSG_WAITALL) 255 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 256 257 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 258 259 if (ret < min_ret) { 260 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 261 return io_setup_async_msg(req, kmsg, issue_flags); 262 if (ret == -ERESTARTSYS) 263 ret = -EINTR; 264 if (ret > 0 && io_net_retry(sock, flags)) { 265 sr->done_io += ret; 266 req->flags |= REQ_F_PARTIAL_IO; 267 return io_setup_async_msg(req, kmsg, issue_flags); 268 } 269 req_set_fail(req); 270 } 271 /* fast path, check for non-NULL to avoid function call */ 272 if (kmsg->free_iov) 273 kfree(kmsg->free_iov); 274 req->flags &= ~REQ_F_NEED_CLEANUP; 275 io_netmsg_recycle(req, issue_flags); 276 if (ret >= 0) 277 ret += sr->done_io; 278 else if (sr->done_io) 279 ret = sr->done_io; 280 io_req_set_res(req, ret, 0); 281 return IOU_OK; 282 } 283 284 int io_send(struct io_kiocb *req, unsigned int issue_flags) 285 { 286 struct io_sr_msg *sr = io_kiocb_to_cmd(req); 287 struct msghdr msg; 288 struct iovec iov; 289 struct socket *sock; 290 unsigned flags; 291 int min_ret = 0; 292 int ret; 293 294 if (!(req->flags & REQ_F_POLLED) && 295 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 296 return -EAGAIN; 297 298 sock = sock_from_file(req->file); 299 if (unlikely(!sock)) 300 return -ENOTSOCK; 301 302 ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter); 303 if (unlikely(ret)) 304 return ret; 305 306 msg.msg_name = NULL; 307 msg.msg_control = NULL; 308 msg.msg_controllen = 0; 309 msg.msg_namelen = 0; 310 msg.msg_ubuf = NULL; 311 312 flags = sr->msg_flags; 313 if (issue_flags & IO_URING_F_NONBLOCK) 314 flags |= MSG_DONTWAIT; 315 if (flags & MSG_WAITALL) 316 min_ret = iov_iter_count(&msg.msg_iter); 317 318 msg.msg_flags = flags; 319 ret = sock_sendmsg(sock, &msg); 320 if (ret < min_ret) { 321 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 322 return -EAGAIN; 323 if (ret == -ERESTARTSYS) 324 ret = -EINTR; 325 if (ret > 0 && io_net_retry(sock, flags)) { 326 sr->len -= ret; 327 sr->buf += ret; 328 sr->done_io += ret; 329 req->flags |= REQ_F_PARTIAL_IO; 330 return -EAGAIN; 331 } 332 req_set_fail(req); 333 } 334 if (ret >= 0) 335 ret += sr->done_io; 336 else if (sr->done_io) 337 ret = sr->done_io; 338 io_req_set_res(req, ret, 0); 339 return IOU_OK; 340 } 341 342 static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) 343 { 344 int hdr; 345 346 if (iomsg->namelen < 0) 347 return true; 348 if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), 349 iomsg->namelen, &hdr)) 350 return true; 351 if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) 352 return true; 353 354 return false; 355 } 356 357 static int __io_recvmsg_copy_hdr(struct io_kiocb *req, 358 struct io_async_msghdr *iomsg) 359 { 360 struct io_sr_msg *sr = io_kiocb_to_cmd(req); 361 struct user_msghdr msg; 362 int ret; 363 364 if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) 365 return -EFAULT; 366 367 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 368 if (ret) 369 return ret; 370 371 if (req->flags & REQ_F_BUFFER_SELECT) { 372 if (msg.msg_iovlen == 0) { 373 sr->len = iomsg->fast_iov[0].iov_len = 0; 374 iomsg->fast_iov[0].iov_base = NULL; 375 iomsg->free_iov = NULL; 376 } else if (msg.msg_iovlen > 1) { 377 return -EINVAL; 378 } else { 379 if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov))) 380 return -EFAULT; 381 sr->len = iomsg->fast_iov[0].iov_len; 382 iomsg->free_iov = NULL; 383 } 384 385 if (req->flags & REQ_F_APOLL_MULTISHOT) { 386 iomsg->namelen = msg.msg_namelen; 387 iomsg->controllen = msg.msg_controllen; 388 if (io_recvmsg_multishot_overflow(iomsg)) 389 return -EOVERFLOW; 390 } 391 } else { 392 iomsg->free_iov = iomsg->fast_iov; 393 ret = __import_iovec(READ, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, 394 &iomsg->free_iov, &iomsg->msg.msg_iter, 395 false); 396 if (ret > 0) 397 ret = 0; 398 } 399 400 return ret; 401 } 402 403 #ifdef CONFIG_COMPAT 404 static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, 405 struct io_async_msghdr *iomsg) 406 { 407 struct io_sr_msg *sr = io_kiocb_to_cmd(req); 408 struct compat_msghdr msg; 409 struct compat_iovec __user *uiov; 410 int ret; 411 412 if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) 413 return -EFAULT; 414 415 ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 416 if (ret) 417 return ret; 418 419 uiov = compat_ptr(msg.msg_iov); 420 if (req->flags & REQ_F_BUFFER_SELECT) { 421 compat_ssize_t clen; 422 423 if (msg.msg_iovlen == 0) { 424 sr->len = 0; 425 iomsg->free_iov = NULL; 426 } else if (msg.msg_iovlen > 1) { 427 return -EINVAL; 428 } else { 429 if (!access_ok(uiov, sizeof(*uiov))) 430 return -EFAULT; 431 if (__get_user(clen, &uiov->iov_len)) 432 return -EFAULT; 433 if (clen < 0) 434 return -EINVAL; 435 sr->len = clen; 436 iomsg->free_iov = NULL; 437 } 438 439 if (req->flags & REQ_F_APOLL_MULTISHOT) { 440 iomsg->namelen = msg.msg_namelen; 441 iomsg->controllen = msg.msg_controllen; 442 if (io_recvmsg_multishot_overflow(iomsg)) 443 return -EOVERFLOW; 444 } 445 } else { 446 iomsg->free_iov = iomsg->fast_iov; 447 ret = __import_iovec(READ, (struct iovec __user *)uiov, msg.msg_iovlen, 448 UIO_FASTIOV, &iomsg->free_iov, 449 &iomsg->msg.msg_iter, true); 450 if (ret < 0) 451 return ret; 452 } 453 454 return 0; 455 } 456 #endif 457 458 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 459 struct io_async_msghdr *iomsg) 460 { 461 iomsg->msg.msg_name = &iomsg->addr; 462 463 #ifdef CONFIG_COMPAT 464 if (req->ctx->compat) 465 return __io_compat_recvmsg_copy_hdr(req, iomsg); 466 #endif 467 468 return __io_recvmsg_copy_hdr(req, iomsg); 469 } 470 471 int io_recvmsg_prep_async(struct io_kiocb *req) 472 { 473 int ret; 474 475 ret = io_recvmsg_copy_hdr(req, req->async_data); 476 if (!ret) 477 req->flags |= REQ_F_NEED_CLEANUP; 478 return ret; 479 } 480 481 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT) 482 483 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 484 { 485 struct io_sr_msg *sr = io_kiocb_to_cmd(req); 486 487 if (unlikely(sqe->file_index || sqe->addr2)) 488 return -EINVAL; 489 490 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 491 sr->len = READ_ONCE(sqe->len); 492 sr->flags = READ_ONCE(sqe->ioprio); 493 if (sr->flags & ~(RECVMSG_FLAGS)) 494 return -EINVAL; 495 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 496 if (sr->msg_flags & MSG_DONTWAIT) 497 req->flags |= REQ_F_NOWAIT; 498 if (sr->msg_flags & MSG_ERRQUEUE) 499 req->flags |= REQ_F_CLEAR_POLLIN; 500 if (sr->flags & IORING_RECV_MULTISHOT) { 501 if (!(req->flags & REQ_F_BUFFER_SELECT)) 502 return -EINVAL; 503 if (sr->msg_flags & MSG_WAITALL) 504 return -EINVAL; 505 if (req->opcode == IORING_OP_RECV && sr->len) 506 return -EINVAL; 507 req->flags |= REQ_F_APOLL_MULTISHOT; 508 } 509 510 #ifdef CONFIG_COMPAT 511 if (req->ctx->compat) 512 sr->msg_flags |= MSG_CMSG_COMPAT; 513 #endif 514 sr->done_io = 0; 515 return 0; 516 } 517 518 static inline void io_recv_prep_retry(struct io_kiocb *req) 519 { 520 struct io_sr_msg *sr = io_kiocb_to_cmd(req); 521 522 sr->done_io = 0; 523 sr->len = 0; /* get from the provided buffer */ 524 } 525 526 /* 527 * Finishes io_recv and io_recvmsg. 528 * 529 * Returns true if it is actually finished, or false if it should run 530 * again (for multishot). 531 */ 532 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 533 unsigned int cflags, bool mshot_finished) 534 { 535 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 536 io_req_set_res(req, *ret, cflags); 537 *ret = IOU_OK; 538 return true; 539 } 540 541 if (!mshot_finished) { 542 if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret, 543 cflags | IORING_CQE_F_MORE, false)) { 544 io_recv_prep_retry(req); 545 return false; 546 } 547 /* 548 * Otherwise stop multishot but use the current result. 549 * Probably will end up going into overflow, but this means 550 * we cannot trust the ordering anymore 551 */ 552 } 553 554 io_req_set_res(req, *ret, cflags); 555 556 if (req->flags & REQ_F_POLLED) 557 *ret = IOU_STOP_MULTISHOT; 558 else 559 *ret = IOU_OK; 560 return true; 561 } 562 563 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 564 struct io_sr_msg *sr, void __user **buf, 565 size_t *len) 566 { 567 unsigned long ubuf = (unsigned long) *buf; 568 unsigned long hdr; 569 570 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 571 kmsg->controllen; 572 if (*len < hdr) 573 return -EFAULT; 574 575 if (kmsg->controllen) { 576 unsigned long control = ubuf + hdr - kmsg->controllen; 577 578 kmsg->msg.msg_control_user = (void *) control; 579 kmsg->msg.msg_controllen = kmsg->controllen; 580 } 581 582 sr->buf = *buf; /* stash for later copy */ 583 *buf = (void *) (ubuf + hdr); 584 kmsg->payloadlen = *len = *len - hdr; 585 return 0; 586 } 587 588 struct io_recvmsg_multishot_hdr { 589 struct io_uring_recvmsg_out msg; 590 struct sockaddr_storage addr; 591 }; 592 593 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 594 struct io_async_msghdr *kmsg, 595 unsigned int flags, bool *finished) 596 { 597 int err; 598 int copy_len; 599 struct io_recvmsg_multishot_hdr hdr; 600 601 if (kmsg->namelen) 602 kmsg->msg.msg_name = &hdr.addr; 603 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 604 kmsg->msg.msg_namelen = 0; 605 606 if (sock->file->f_flags & O_NONBLOCK) 607 flags |= MSG_DONTWAIT; 608 609 err = sock_recvmsg(sock, &kmsg->msg, flags); 610 *finished = err <= 0; 611 if (err < 0) 612 return err; 613 614 hdr.msg = (struct io_uring_recvmsg_out) { 615 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 616 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 617 }; 618 619 hdr.msg.payloadlen = err; 620 if (err > kmsg->payloadlen) 621 err = kmsg->payloadlen; 622 623 copy_len = sizeof(struct io_uring_recvmsg_out); 624 if (kmsg->msg.msg_namelen > kmsg->namelen) 625 copy_len += kmsg->namelen; 626 else 627 copy_len += kmsg->msg.msg_namelen; 628 629 /* 630 * "fromlen shall refer to the value before truncation.." 631 * 1003.1g 632 */ 633 hdr.msg.namelen = kmsg->msg.msg_namelen; 634 635 /* ensure that there is no gap between hdr and sockaddr_storage */ 636 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 637 sizeof(struct io_uring_recvmsg_out)); 638 if (copy_to_user(io->buf, &hdr, copy_len)) { 639 *finished = true; 640 return -EFAULT; 641 } 642 643 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 644 kmsg->controllen + err; 645 } 646 647 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 648 { 649 struct io_sr_msg *sr = io_kiocb_to_cmd(req); 650 struct io_async_msghdr iomsg, *kmsg; 651 struct socket *sock; 652 unsigned int cflags; 653 unsigned flags; 654 int ret, min_ret = 0; 655 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 656 bool mshot_finished = true; 657 658 sock = sock_from_file(req->file); 659 if (unlikely(!sock)) 660 return -ENOTSOCK; 661 662 if (req_has_async_data(req)) { 663 kmsg = req->async_data; 664 } else { 665 ret = io_recvmsg_copy_hdr(req, &iomsg); 666 if (ret) 667 return ret; 668 kmsg = &iomsg; 669 } 670 671 if (!(req->flags & REQ_F_POLLED) && 672 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 673 return io_setup_async_msg(req, kmsg, issue_flags); 674 675 retry_multishot: 676 if (io_do_buffer_select(req)) { 677 void __user *buf; 678 size_t len = sr->len; 679 680 buf = io_buffer_select(req, &len, issue_flags); 681 if (!buf) 682 return -ENOBUFS; 683 684 if (req->flags & REQ_F_APOLL_MULTISHOT) { 685 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 686 if (ret) { 687 io_kbuf_recycle(req, issue_flags); 688 return ret; 689 } 690 } 691 692 kmsg->fast_iov[0].iov_base = buf; 693 kmsg->fast_iov[0].iov_len = len; 694 iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1, 695 len); 696 } 697 698 flags = sr->msg_flags; 699 if (force_nonblock) 700 flags |= MSG_DONTWAIT; 701 if (flags & MSG_WAITALL) 702 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 703 704 kmsg->msg.msg_get_inq = 1; 705 if (req->flags & REQ_F_APOLL_MULTISHOT) 706 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 707 &mshot_finished); 708 else 709 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 710 kmsg->uaddr, flags); 711 712 if (ret < min_ret) { 713 if (ret == -EAGAIN && force_nonblock) { 714 ret = io_setup_async_msg(req, kmsg, issue_flags); 715 if (ret == -EAGAIN && (req->flags & IO_APOLL_MULTI_POLLED) == 716 IO_APOLL_MULTI_POLLED) { 717 io_kbuf_recycle(req, issue_flags); 718 return IOU_ISSUE_SKIP_COMPLETE; 719 } 720 return ret; 721 } 722 if (ret == -ERESTARTSYS) 723 ret = -EINTR; 724 if (ret > 0 && io_net_retry(sock, flags)) { 725 sr->done_io += ret; 726 req->flags |= REQ_F_PARTIAL_IO; 727 return io_setup_async_msg(req, kmsg, issue_flags); 728 } 729 req_set_fail(req); 730 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 731 req_set_fail(req); 732 } 733 734 if (ret > 0) 735 ret += sr->done_io; 736 else if (sr->done_io) 737 ret = sr->done_io; 738 else 739 io_kbuf_recycle(req, issue_flags); 740 741 cflags = io_put_kbuf(req, issue_flags); 742 if (kmsg->msg.msg_inq) 743 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 744 745 if (!io_recv_finish(req, &ret, cflags, mshot_finished)) 746 goto retry_multishot; 747 748 if (mshot_finished) { 749 io_netmsg_recycle(req, issue_flags); 750 /* fast path, check for non-NULL to avoid function call */ 751 if (kmsg->free_iov) 752 kfree(kmsg->free_iov); 753 req->flags &= ~REQ_F_NEED_CLEANUP; 754 } 755 756 return ret; 757 } 758 759 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 760 { 761 struct io_sr_msg *sr = io_kiocb_to_cmd(req); 762 struct msghdr msg; 763 struct socket *sock; 764 struct iovec iov; 765 unsigned int cflags; 766 unsigned flags; 767 int ret, min_ret = 0; 768 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 769 size_t len = sr->len; 770 771 if (!(req->flags & REQ_F_POLLED) && 772 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 773 return -EAGAIN; 774 775 sock = sock_from_file(req->file); 776 if (unlikely(!sock)) 777 return -ENOTSOCK; 778 779 retry_multishot: 780 if (io_do_buffer_select(req)) { 781 void __user *buf; 782 783 buf = io_buffer_select(req, &len, issue_flags); 784 if (!buf) 785 return -ENOBUFS; 786 sr->buf = buf; 787 } 788 789 ret = import_single_range(READ, sr->buf, len, &iov, &msg.msg_iter); 790 if (unlikely(ret)) 791 goto out_free; 792 793 msg.msg_name = NULL; 794 msg.msg_namelen = 0; 795 msg.msg_control = NULL; 796 msg.msg_get_inq = 1; 797 msg.msg_flags = 0; 798 msg.msg_controllen = 0; 799 msg.msg_iocb = NULL; 800 msg.msg_ubuf = NULL; 801 802 flags = sr->msg_flags; 803 if (force_nonblock) 804 flags |= MSG_DONTWAIT; 805 if (flags & MSG_WAITALL) 806 min_ret = iov_iter_count(&msg.msg_iter); 807 808 ret = sock_recvmsg(sock, &msg, flags); 809 if (ret < min_ret) { 810 if (ret == -EAGAIN && force_nonblock) { 811 if ((req->flags & IO_APOLL_MULTI_POLLED) == IO_APOLL_MULTI_POLLED) { 812 io_kbuf_recycle(req, issue_flags); 813 return IOU_ISSUE_SKIP_COMPLETE; 814 } 815 816 return -EAGAIN; 817 } 818 if (ret == -ERESTARTSYS) 819 ret = -EINTR; 820 if (ret > 0 && io_net_retry(sock, flags)) { 821 sr->len -= ret; 822 sr->buf += ret; 823 sr->done_io += ret; 824 req->flags |= REQ_F_PARTIAL_IO; 825 return -EAGAIN; 826 } 827 req_set_fail(req); 828 } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 829 out_free: 830 req_set_fail(req); 831 } 832 833 if (ret > 0) 834 ret += sr->done_io; 835 else if (sr->done_io) 836 ret = sr->done_io; 837 else 838 io_kbuf_recycle(req, issue_flags); 839 840 cflags = io_put_kbuf(req, issue_flags); 841 if (msg.msg_inq) 842 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 843 844 if (!io_recv_finish(req, &ret, cflags, ret <= 0)) 845 goto retry_multishot; 846 847 return ret; 848 } 849 850 int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 851 { 852 struct io_sendzc *zc = io_kiocb_to_cmd(req); 853 struct io_ring_ctx *ctx = req->ctx; 854 855 if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)) 856 return -EINVAL; 857 858 zc->flags = READ_ONCE(sqe->ioprio); 859 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | 860 IORING_RECVSEND_FIXED_BUF | IORING_RECVSEND_NOTIF_FLUSH)) 861 return -EINVAL; 862 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 863 unsigned idx = READ_ONCE(sqe->buf_index); 864 865 if (unlikely(idx >= ctx->nr_user_bufs)) 866 return -EFAULT; 867 idx = array_index_nospec(idx, ctx->nr_user_bufs); 868 req->imu = READ_ONCE(ctx->user_bufs[idx]); 869 io_req_set_rsrc_node(req, ctx, 0); 870 } 871 872 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 873 zc->len = READ_ONCE(sqe->len); 874 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 875 zc->slot_idx = READ_ONCE(sqe->notification_idx); 876 if (zc->msg_flags & MSG_DONTWAIT) 877 req->flags |= REQ_F_NOWAIT; 878 879 zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 880 zc->addr_len = READ_ONCE(sqe->addr_len); 881 882 #ifdef CONFIG_COMPAT 883 if (req->ctx->compat) 884 zc->msg_flags |= MSG_CMSG_COMPAT; 885 #endif 886 return 0; 887 } 888 889 static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, 890 struct iov_iter *from, size_t length) 891 { 892 struct skb_shared_info *shinfo = skb_shinfo(skb); 893 int frag = shinfo->nr_frags; 894 int ret = 0; 895 struct bvec_iter bi; 896 ssize_t copied = 0; 897 unsigned long truesize = 0; 898 899 if (!shinfo->nr_frags) 900 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 901 902 if (!skb_zcopy_managed(skb) || !iov_iter_is_bvec(from)) { 903 skb_zcopy_downgrade_managed(skb); 904 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 905 } 906 907 bi.bi_size = min(from->count, length); 908 bi.bi_bvec_done = from->iov_offset; 909 bi.bi_idx = 0; 910 911 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 912 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 913 914 copied += v.bv_len; 915 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 916 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 917 v.bv_offset, v.bv_len); 918 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 919 } 920 if (bi.bi_size) 921 ret = -EMSGSIZE; 922 923 shinfo->nr_frags = frag; 924 from->bvec += bi.bi_idx; 925 from->nr_segs -= bi.bi_idx; 926 from->count = bi.bi_size; 927 from->iov_offset = bi.bi_bvec_done; 928 929 skb->data_len += copied; 930 skb->len += copied; 931 skb->truesize += truesize; 932 933 if (sk && sk->sk_type == SOCK_STREAM) { 934 sk_wmem_queued_add(sk, truesize); 935 if (!skb_zcopy_pure(skb)) 936 sk_mem_charge(sk, truesize); 937 } else { 938 refcount_add(truesize, &skb->sk->sk_wmem_alloc); 939 } 940 return ret; 941 } 942 943 int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) 944 { 945 struct sockaddr_storage address; 946 struct io_ring_ctx *ctx = req->ctx; 947 struct io_sendzc *zc = io_kiocb_to_cmd(req); 948 struct io_notif_slot *notif_slot; 949 struct io_kiocb *notif; 950 struct msghdr msg; 951 struct iovec iov; 952 struct socket *sock; 953 unsigned msg_flags; 954 int ret, min_ret = 0; 955 956 if (!(req->flags & REQ_F_POLLED) && 957 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 958 return -EAGAIN; 959 960 if (issue_flags & IO_URING_F_UNLOCKED) 961 return -EAGAIN; 962 sock = sock_from_file(req->file); 963 if (unlikely(!sock)) 964 return -ENOTSOCK; 965 966 notif_slot = io_get_notif_slot(ctx, zc->slot_idx); 967 if (!notif_slot) 968 return -EINVAL; 969 notif = io_get_notif(ctx, notif_slot); 970 if (!notif) 971 return -ENOMEM; 972 973 msg.msg_name = NULL; 974 msg.msg_control = NULL; 975 msg.msg_controllen = 0; 976 msg.msg_namelen = 0; 977 978 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 979 ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, 980 (u64)(uintptr_t)zc->buf, zc->len); 981 if (unlikely(ret)) 982 return ret; 983 } else { 984 ret = import_single_range(WRITE, zc->buf, zc->len, &iov, 985 &msg.msg_iter); 986 if (unlikely(ret)) 987 return ret; 988 ret = io_notif_account_mem(notif, zc->len); 989 if (unlikely(ret)) 990 return ret; 991 } 992 993 if (zc->addr) { 994 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &address); 995 if (unlikely(ret < 0)) 996 return ret; 997 msg.msg_name = (struct sockaddr *)&address; 998 msg.msg_namelen = zc->addr_len; 999 } 1000 1001 msg_flags = zc->msg_flags | MSG_ZEROCOPY; 1002 if (issue_flags & IO_URING_F_NONBLOCK) 1003 msg_flags |= MSG_DONTWAIT; 1004 if (msg_flags & MSG_WAITALL) 1005 min_ret = iov_iter_count(&msg.msg_iter); 1006 1007 msg.msg_flags = msg_flags; 1008 msg.msg_ubuf = &io_notif_to_data(notif)->uarg; 1009 msg.sg_from_iter = io_sg_from_iter; 1010 ret = sock_sendmsg(sock, &msg); 1011 1012 if (unlikely(ret < min_ret)) { 1013 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1014 return -EAGAIN; 1015 return ret == -ERESTARTSYS ? -EINTR : ret; 1016 } 1017 1018 if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH) 1019 io_notif_slot_flush_submit(notif_slot, 0); 1020 io_req_set_res(req, ret, 0); 1021 return IOU_OK; 1022 } 1023 1024 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1025 { 1026 struct io_accept *accept = io_kiocb_to_cmd(req); 1027 unsigned flags; 1028 1029 if (sqe->len || sqe->buf_index) 1030 return -EINVAL; 1031 1032 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1033 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1034 accept->flags = READ_ONCE(sqe->accept_flags); 1035 accept->nofile = rlimit(RLIMIT_NOFILE); 1036 flags = READ_ONCE(sqe->ioprio); 1037 if (flags & ~IORING_ACCEPT_MULTISHOT) 1038 return -EINVAL; 1039 1040 accept->file_slot = READ_ONCE(sqe->file_index); 1041 if (accept->file_slot) { 1042 if (accept->flags & SOCK_CLOEXEC) 1043 return -EINVAL; 1044 if (flags & IORING_ACCEPT_MULTISHOT && 1045 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1046 return -EINVAL; 1047 } 1048 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1049 return -EINVAL; 1050 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1051 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1052 if (flags & IORING_ACCEPT_MULTISHOT) 1053 req->flags |= REQ_F_APOLL_MULTISHOT; 1054 return 0; 1055 } 1056 1057 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1058 { 1059 struct io_ring_ctx *ctx = req->ctx; 1060 struct io_accept *accept = io_kiocb_to_cmd(req); 1061 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1062 unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; 1063 bool fixed = !!accept->file_slot; 1064 struct file *file; 1065 int ret, fd; 1066 1067 retry: 1068 if (!fixed) { 1069 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1070 if (unlikely(fd < 0)) 1071 return fd; 1072 } 1073 file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, 1074 accept->flags); 1075 if (IS_ERR(file)) { 1076 if (!fixed) 1077 put_unused_fd(fd); 1078 ret = PTR_ERR(file); 1079 if (ret == -EAGAIN && force_nonblock) { 1080 /* 1081 * if it's multishot and polled, we don't need to 1082 * return EAGAIN to arm the poll infra since it 1083 * has already been done 1084 */ 1085 if ((req->flags & IO_APOLL_MULTI_POLLED) == 1086 IO_APOLL_MULTI_POLLED) 1087 ret = IOU_ISSUE_SKIP_COMPLETE; 1088 return ret; 1089 } 1090 if (ret == -ERESTARTSYS) 1091 ret = -EINTR; 1092 req_set_fail(req); 1093 } else if (!fixed) { 1094 fd_install(fd, file); 1095 ret = fd; 1096 } else { 1097 ret = io_fixed_fd_install(req, issue_flags, file, 1098 accept->file_slot); 1099 } 1100 1101 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1102 io_req_set_res(req, ret, 0); 1103 return IOU_OK; 1104 } 1105 1106 if (ret >= 0 && 1107 io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false)) 1108 goto retry; 1109 1110 io_req_set_res(req, ret, 0); 1111 if (req->flags & REQ_F_POLLED) 1112 return IOU_STOP_MULTISHOT; 1113 return IOU_OK; 1114 } 1115 1116 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1117 { 1118 struct io_socket *sock = io_kiocb_to_cmd(req); 1119 1120 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1121 return -EINVAL; 1122 1123 sock->domain = READ_ONCE(sqe->fd); 1124 sock->type = READ_ONCE(sqe->off); 1125 sock->protocol = READ_ONCE(sqe->len); 1126 sock->file_slot = READ_ONCE(sqe->file_index); 1127 sock->nofile = rlimit(RLIMIT_NOFILE); 1128 1129 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1130 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1131 return -EINVAL; 1132 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1133 return -EINVAL; 1134 return 0; 1135 } 1136 1137 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1138 { 1139 struct io_socket *sock = io_kiocb_to_cmd(req); 1140 bool fixed = !!sock->file_slot; 1141 struct file *file; 1142 int ret, fd; 1143 1144 if (!fixed) { 1145 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1146 if (unlikely(fd < 0)) 1147 return fd; 1148 } 1149 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1150 if (IS_ERR(file)) { 1151 if (!fixed) 1152 put_unused_fd(fd); 1153 ret = PTR_ERR(file); 1154 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1155 return -EAGAIN; 1156 if (ret == -ERESTARTSYS) 1157 ret = -EINTR; 1158 req_set_fail(req); 1159 } else if (!fixed) { 1160 fd_install(fd, file); 1161 ret = fd; 1162 } else { 1163 ret = io_fixed_fd_install(req, issue_flags, file, 1164 sock->file_slot); 1165 } 1166 io_req_set_res(req, ret, 0); 1167 return IOU_OK; 1168 } 1169 1170 int io_connect_prep_async(struct io_kiocb *req) 1171 { 1172 struct io_async_connect *io = req->async_data; 1173 struct io_connect *conn = io_kiocb_to_cmd(req); 1174 1175 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); 1176 } 1177 1178 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1179 { 1180 struct io_connect *conn = io_kiocb_to_cmd(req); 1181 1182 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1183 return -EINVAL; 1184 1185 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1186 conn->addr_len = READ_ONCE(sqe->addr2); 1187 return 0; 1188 } 1189 1190 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1191 { 1192 struct io_connect *connect = io_kiocb_to_cmd(req); 1193 struct io_async_connect __io, *io; 1194 unsigned file_flags; 1195 int ret; 1196 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1197 1198 if (req_has_async_data(req)) { 1199 io = req->async_data; 1200 } else { 1201 ret = move_addr_to_kernel(connect->addr, 1202 connect->addr_len, 1203 &__io.address); 1204 if (ret) 1205 goto out; 1206 io = &__io; 1207 } 1208 1209 file_flags = force_nonblock ? O_NONBLOCK : 0; 1210 1211 ret = __sys_connect_file(req->file, &io->address, 1212 connect->addr_len, file_flags); 1213 if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { 1214 if (req_has_async_data(req)) 1215 return -EAGAIN; 1216 if (io_alloc_async_data(req)) { 1217 ret = -ENOMEM; 1218 goto out; 1219 } 1220 memcpy(req->async_data, &__io, sizeof(__io)); 1221 return -EAGAIN; 1222 } 1223 if (ret == -ERESTARTSYS) 1224 ret = -EINTR; 1225 out: 1226 if (ret < 0) 1227 req_set_fail(req); 1228 io_req_set_res(req, ret, 0); 1229 return IOU_OK; 1230 } 1231 1232 void io_netmsg_cache_free(struct io_cache_entry *entry) 1233 { 1234 kfree(container_of(entry, struct io_async_msghdr, cache)); 1235 } 1236 #endif 1237