1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 20 #if defined(CONFIG_NET) 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 u32 file_slot; 32 unsigned long nofile; 33 }; 34 35 struct io_socket { 36 struct file *file; 37 int domain; 38 int type; 39 int protocol; 40 int flags; 41 u32 file_slot; 42 unsigned long nofile; 43 }; 44 45 struct io_connect { 46 struct file *file; 47 struct sockaddr __user *addr; 48 int addr_len; 49 bool in_progress; 50 bool seen_econnaborted; 51 }; 52 53 struct io_sr_msg { 54 struct file *file; 55 union { 56 struct compat_msghdr __user *umsg_compat; 57 struct user_msghdr __user *umsg; 58 void __user *buf; 59 }; 60 unsigned len; 61 unsigned done_io; 62 unsigned msg_flags; 63 u16 flags; 64 /* initialised and used only by !msg send variants */ 65 u16 addr_len; 66 u16 buf_group; 67 void __user *addr; 68 void __user *msg_control; 69 /* used only for send zerocopy */ 70 struct io_kiocb *notif; 71 }; 72 73 static inline bool io_check_multishot(struct io_kiocb *req, 74 unsigned int issue_flags) 75 { 76 /* 77 * When ->locked_cq is set we only allow to post CQEs from the original 78 * task context. Usual request completions will be handled in other 79 * generic paths but multipoll may decide to post extra cqes. 80 */ 81 return !(issue_flags & IO_URING_F_IOWQ) || 82 !(issue_flags & IO_URING_F_MULTISHOT) || 83 !req->ctx->task_complete; 84 } 85 86 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 87 { 88 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 89 90 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 91 sqe->buf_index || sqe->splice_fd_in)) 92 return -EINVAL; 93 94 shutdown->how = READ_ONCE(sqe->len); 95 req->flags |= REQ_F_FORCE_ASYNC; 96 return 0; 97 } 98 99 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 100 { 101 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 102 struct socket *sock; 103 int ret; 104 105 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 106 107 sock = sock_from_file(req->file); 108 if (unlikely(!sock)) 109 return -ENOTSOCK; 110 111 ret = __sys_shutdown_sock(sock, shutdown->how); 112 io_req_set_res(req, ret, 0); 113 return IOU_OK; 114 } 115 116 static bool io_net_retry(struct socket *sock, int flags) 117 { 118 if (!(flags & MSG_WAITALL)) 119 return false; 120 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 121 } 122 123 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 124 { 125 struct io_async_msghdr *hdr = req->async_data; 126 127 if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED) 128 return; 129 130 /* Let normal cleanup path reap it if we fail adding to the cache */ 131 if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) { 132 req->async_data = NULL; 133 req->flags &= ~REQ_F_ASYNC_DATA; 134 } 135 } 136 137 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req, 138 unsigned int issue_flags) 139 { 140 struct io_ring_ctx *ctx = req->ctx; 141 struct io_cache_entry *entry; 142 struct io_async_msghdr *hdr; 143 144 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 145 entry = io_alloc_cache_get(&ctx->netmsg_cache); 146 if (entry) { 147 hdr = container_of(entry, struct io_async_msghdr, cache); 148 hdr->free_iov = NULL; 149 req->flags |= REQ_F_ASYNC_DATA; 150 req->async_data = hdr; 151 return hdr; 152 } 153 } 154 155 if (!io_alloc_async_data(req)) { 156 hdr = req->async_data; 157 hdr->free_iov = NULL; 158 return hdr; 159 } 160 return NULL; 161 } 162 163 static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req) 164 { 165 /* ->prep_async is always called from the submission context */ 166 return io_msg_alloc_async(req, 0); 167 } 168 169 static int io_setup_async_msg(struct io_kiocb *req, 170 struct io_async_msghdr *kmsg, 171 unsigned int issue_flags) 172 { 173 struct io_async_msghdr *async_msg; 174 175 if (req_has_async_data(req)) 176 return -EAGAIN; 177 async_msg = io_msg_alloc_async(req, issue_flags); 178 if (!async_msg) { 179 kfree(kmsg->free_iov); 180 return -ENOMEM; 181 } 182 req->flags |= REQ_F_NEED_CLEANUP; 183 memcpy(async_msg, kmsg, sizeof(*kmsg)); 184 if (async_msg->msg.msg_name) 185 async_msg->msg.msg_name = &async_msg->addr; 186 /* if were using fast_iov, set it to the new one */ 187 if (iter_is_iovec(&kmsg->msg.msg_iter) && !kmsg->free_iov) { 188 size_t fast_idx = iter_iov(&kmsg->msg.msg_iter) - kmsg->fast_iov; 189 async_msg->msg.msg_iter.__iov = &async_msg->fast_iov[fast_idx]; 190 } 191 192 return -EAGAIN; 193 } 194 195 static int io_sendmsg_copy_hdr(struct io_kiocb *req, 196 struct io_async_msghdr *iomsg) 197 { 198 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 199 int ret; 200 201 iomsg->msg.msg_name = &iomsg->addr; 202 iomsg->free_iov = iomsg->fast_iov; 203 ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, 204 &iomsg->free_iov); 205 /* save msg_control as sys_sendmsg() overwrites it */ 206 sr->msg_control = iomsg->msg.msg_control; 207 return ret; 208 } 209 210 int io_send_prep_async(struct io_kiocb *req) 211 { 212 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 213 struct io_async_msghdr *io; 214 int ret; 215 216 if (!zc->addr || req_has_async_data(req)) 217 return 0; 218 io = io_msg_alloc_async_prep(req); 219 if (!io) 220 return -ENOMEM; 221 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr); 222 return ret; 223 } 224 225 static int io_setup_async_addr(struct io_kiocb *req, 226 struct sockaddr_storage *addr_storage, 227 unsigned int issue_flags) 228 { 229 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 230 struct io_async_msghdr *io; 231 232 if (!sr->addr || req_has_async_data(req)) 233 return -EAGAIN; 234 io = io_msg_alloc_async(req, issue_flags); 235 if (!io) 236 return -ENOMEM; 237 memcpy(&io->addr, addr_storage, sizeof(io->addr)); 238 return -EAGAIN; 239 } 240 241 int io_sendmsg_prep_async(struct io_kiocb *req) 242 { 243 int ret; 244 245 if (!io_msg_alloc_async_prep(req)) 246 return -ENOMEM; 247 ret = io_sendmsg_copy_hdr(req, req->async_data); 248 if (!ret) 249 req->flags |= REQ_F_NEED_CLEANUP; 250 return ret; 251 } 252 253 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 254 { 255 struct io_async_msghdr *io = req->async_data; 256 257 kfree(io->free_iov); 258 } 259 260 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 261 { 262 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 263 264 if (req->opcode == IORING_OP_SEND) { 265 if (READ_ONCE(sqe->__pad3[0])) 266 return -EINVAL; 267 sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 268 sr->addr_len = READ_ONCE(sqe->addr_len); 269 } else if (sqe->addr2 || sqe->file_index) { 270 return -EINVAL; 271 } 272 273 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 274 sr->len = READ_ONCE(sqe->len); 275 sr->flags = READ_ONCE(sqe->ioprio); 276 if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) 277 return -EINVAL; 278 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 279 if (sr->msg_flags & MSG_DONTWAIT) 280 req->flags |= REQ_F_NOWAIT; 281 282 #ifdef CONFIG_COMPAT 283 if (req->ctx->compat) 284 sr->msg_flags |= MSG_CMSG_COMPAT; 285 #endif 286 sr->done_io = 0; 287 return 0; 288 } 289 290 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 291 { 292 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 293 struct io_async_msghdr iomsg, *kmsg; 294 struct socket *sock; 295 unsigned flags; 296 int min_ret = 0; 297 int ret; 298 299 sock = sock_from_file(req->file); 300 if (unlikely(!sock)) 301 return -ENOTSOCK; 302 303 if (req_has_async_data(req)) { 304 kmsg = req->async_data; 305 kmsg->msg.msg_control = sr->msg_control; 306 } else { 307 ret = io_sendmsg_copy_hdr(req, &iomsg); 308 if (ret) 309 return ret; 310 kmsg = &iomsg; 311 } 312 313 if (!(req->flags & REQ_F_POLLED) && 314 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 315 return io_setup_async_msg(req, kmsg, issue_flags); 316 317 flags = sr->msg_flags; 318 if (issue_flags & IO_URING_F_NONBLOCK) 319 flags |= MSG_DONTWAIT; 320 if (flags & MSG_WAITALL) 321 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 322 323 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 324 325 if (ret < min_ret) { 326 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 327 return io_setup_async_msg(req, kmsg, issue_flags); 328 if (ret > 0 && io_net_retry(sock, flags)) { 329 sr->done_io += ret; 330 req->flags |= REQ_F_PARTIAL_IO; 331 return io_setup_async_msg(req, kmsg, issue_flags); 332 } 333 if (ret == -ERESTARTSYS) 334 ret = -EINTR; 335 req_set_fail(req); 336 } 337 /* fast path, check for non-NULL to avoid function call */ 338 if (kmsg->free_iov) 339 kfree(kmsg->free_iov); 340 req->flags &= ~REQ_F_NEED_CLEANUP; 341 io_netmsg_recycle(req, issue_flags); 342 if (ret >= 0) 343 ret += sr->done_io; 344 else if (sr->done_io) 345 ret = sr->done_io; 346 io_req_set_res(req, ret, 0); 347 return IOU_OK; 348 } 349 350 int io_send(struct io_kiocb *req, unsigned int issue_flags) 351 { 352 struct sockaddr_storage __address; 353 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 354 struct msghdr msg; 355 struct socket *sock; 356 unsigned flags; 357 int min_ret = 0; 358 int ret; 359 360 msg.msg_name = NULL; 361 msg.msg_control = NULL; 362 msg.msg_controllen = 0; 363 msg.msg_namelen = 0; 364 msg.msg_ubuf = NULL; 365 366 if (sr->addr) { 367 if (req_has_async_data(req)) { 368 struct io_async_msghdr *io = req->async_data; 369 370 msg.msg_name = &io->addr; 371 } else { 372 ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address); 373 if (unlikely(ret < 0)) 374 return ret; 375 msg.msg_name = (struct sockaddr *)&__address; 376 } 377 msg.msg_namelen = sr->addr_len; 378 } 379 380 if (!(req->flags & REQ_F_POLLED) && 381 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 382 return io_setup_async_addr(req, &__address, issue_flags); 383 384 sock = sock_from_file(req->file); 385 if (unlikely(!sock)) 386 return -ENOTSOCK; 387 388 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &msg.msg_iter); 389 if (unlikely(ret)) 390 return ret; 391 392 flags = sr->msg_flags; 393 if (issue_flags & IO_URING_F_NONBLOCK) 394 flags |= MSG_DONTWAIT; 395 if (flags & MSG_WAITALL) 396 min_ret = iov_iter_count(&msg.msg_iter); 397 398 msg.msg_flags = flags; 399 ret = sock_sendmsg(sock, &msg); 400 if (ret < min_ret) { 401 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 402 return io_setup_async_addr(req, &__address, issue_flags); 403 404 if (ret > 0 && io_net_retry(sock, flags)) { 405 sr->len -= ret; 406 sr->buf += ret; 407 sr->done_io += ret; 408 req->flags |= REQ_F_PARTIAL_IO; 409 return io_setup_async_addr(req, &__address, issue_flags); 410 } 411 if (ret == -ERESTARTSYS) 412 ret = -EINTR; 413 req_set_fail(req); 414 } 415 if (ret >= 0) 416 ret += sr->done_io; 417 else if (sr->done_io) 418 ret = sr->done_io; 419 io_req_set_res(req, ret, 0); 420 return IOU_OK; 421 } 422 423 static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) 424 { 425 int hdr; 426 427 if (iomsg->namelen < 0) 428 return true; 429 if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), 430 iomsg->namelen, &hdr)) 431 return true; 432 if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) 433 return true; 434 435 return false; 436 } 437 438 static int __io_recvmsg_copy_hdr(struct io_kiocb *req, 439 struct io_async_msghdr *iomsg) 440 { 441 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 442 struct user_msghdr msg; 443 int ret; 444 445 if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) 446 return -EFAULT; 447 448 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 449 if (ret) 450 return ret; 451 452 if (req->flags & REQ_F_BUFFER_SELECT) { 453 if (msg.msg_iovlen == 0) { 454 sr->len = iomsg->fast_iov[0].iov_len = 0; 455 iomsg->fast_iov[0].iov_base = NULL; 456 iomsg->free_iov = NULL; 457 } else if (msg.msg_iovlen > 1) { 458 return -EINVAL; 459 } else { 460 if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov))) 461 return -EFAULT; 462 sr->len = iomsg->fast_iov[0].iov_len; 463 iomsg->free_iov = NULL; 464 } 465 466 if (req->flags & REQ_F_APOLL_MULTISHOT) { 467 iomsg->namelen = msg.msg_namelen; 468 iomsg->controllen = msg.msg_controllen; 469 if (io_recvmsg_multishot_overflow(iomsg)) 470 return -EOVERFLOW; 471 } 472 } else { 473 iomsg->free_iov = iomsg->fast_iov; 474 ret = __import_iovec(ITER_DEST, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, 475 &iomsg->free_iov, &iomsg->msg.msg_iter, 476 false); 477 if (ret > 0) 478 ret = 0; 479 } 480 481 return ret; 482 } 483 484 #ifdef CONFIG_COMPAT 485 static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, 486 struct io_async_msghdr *iomsg) 487 { 488 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 489 struct compat_msghdr msg; 490 struct compat_iovec __user *uiov; 491 int ret; 492 493 if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) 494 return -EFAULT; 495 496 ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 497 if (ret) 498 return ret; 499 500 uiov = compat_ptr(msg.msg_iov); 501 if (req->flags & REQ_F_BUFFER_SELECT) { 502 compat_ssize_t clen; 503 504 iomsg->free_iov = NULL; 505 if (msg.msg_iovlen == 0) { 506 sr->len = 0; 507 } else if (msg.msg_iovlen > 1) { 508 return -EINVAL; 509 } else { 510 if (!access_ok(uiov, sizeof(*uiov))) 511 return -EFAULT; 512 if (__get_user(clen, &uiov->iov_len)) 513 return -EFAULT; 514 if (clen < 0) 515 return -EINVAL; 516 sr->len = clen; 517 } 518 519 if (req->flags & REQ_F_APOLL_MULTISHOT) { 520 iomsg->namelen = msg.msg_namelen; 521 iomsg->controllen = msg.msg_controllen; 522 if (io_recvmsg_multishot_overflow(iomsg)) 523 return -EOVERFLOW; 524 } 525 } else { 526 iomsg->free_iov = iomsg->fast_iov; 527 ret = __import_iovec(ITER_DEST, (struct iovec __user *)uiov, msg.msg_iovlen, 528 UIO_FASTIOV, &iomsg->free_iov, 529 &iomsg->msg.msg_iter, true); 530 if (ret < 0) 531 return ret; 532 } 533 534 return 0; 535 } 536 #endif 537 538 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 539 struct io_async_msghdr *iomsg) 540 { 541 iomsg->msg.msg_name = &iomsg->addr; 542 543 #ifdef CONFIG_COMPAT 544 if (req->ctx->compat) 545 return __io_compat_recvmsg_copy_hdr(req, iomsg); 546 #endif 547 548 return __io_recvmsg_copy_hdr(req, iomsg); 549 } 550 551 int io_recvmsg_prep_async(struct io_kiocb *req) 552 { 553 int ret; 554 555 if (!io_msg_alloc_async_prep(req)) 556 return -ENOMEM; 557 ret = io_recvmsg_copy_hdr(req, req->async_data); 558 if (!ret) 559 req->flags |= REQ_F_NEED_CLEANUP; 560 return ret; 561 } 562 563 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT) 564 565 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 566 { 567 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 568 569 if (unlikely(sqe->file_index || sqe->addr2)) 570 return -EINVAL; 571 572 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 573 sr->len = READ_ONCE(sqe->len); 574 sr->flags = READ_ONCE(sqe->ioprio); 575 if (sr->flags & ~(RECVMSG_FLAGS)) 576 return -EINVAL; 577 sr->msg_flags = READ_ONCE(sqe->msg_flags); 578 if (sr->msg_flags & MSG_DONTWAIT) 579 req->flags |= REQ_F_NOWAIT; 580 if (sr->msg_flags & MSG_ERRQUEUE) 581 req->flags |= REQ_F_CLEAR_POLLIN; 582 if (sr->flags & IORING_RECV_MULTISHOT) { 583 if (!(req->flags & REQ_F_BUFFER_SELECT)) 584 return -EINVAL; 585 if (sr->msg_flags & MSG_WAITALL) 586 return -EINVAL; 587 if (req->opcode == IORING_OP_RECV && sr->len) 588 return -EINVAL; 589 req->flags |= REQ_F_APOLL_MULTISHOT; 590 /* 591 * Store the buffer group for this multishot receive separately, 592 * as if we end up doing an io-wq based issue that selects a 593 * buffer, it has to be committed immediately and that will 594 * clear ->buf_list. This means we lose the link to the buffer 595 * list, and the eventual buffer put on completion then cannot 596 * restore it. 597 */ 598 sr->buf_group = req->buf_index; 599 } 600 601 #ifdef CONFIG_COMPAT 602 if (req->ctx->compat) 603 sr->msg_flags |= MSG_CMSG_COMPAT; 604 #endif 605 sr->done_io = 0; 606 return 0; 607 } 608 609 static inline void io_recv_prep_retry(struct io_kiocb *req) 610 { 611 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 612 613 sr->done_io = 0; 614 sr->len = 0; /* get from the provided buffer */ 615 req->buf_index = sr->buf_group; 616 } 617 618 /* 619 * Finishes io_recv and io_recvmsg. 620 * 621 * Returns true if it is actually finished, or false if it should run 622 * again (for multishot). 623 */ 624 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 625 unsigned int cflags, bool mshot_finished, 626 unsigned issue_flags) 627 { 628 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 629 io_req_set_res(req, *ret, cflags); 630 *ret = IOU_OK; 631 return true; 632 } 633 634 if (!mshot_finished) { 635 if (io_aux_cqe(req->ctx, issue_flags & IO_URING_F_COMPLETE_DEFER, 636 req->cqe.user_data, *ret, cflags | IORING_CQE_F_MORE, true)) { 637 io_recv_prep_retry(req); 638 return false; 639 } 640 /* Otherwise stop multishot but use the current result. */ 641 } 642 643 io_req_set_res(req, *ret, cflags); 644 645 if (issue_flags & IO_URING_F_MULTISHOT) 646 *ret = IOU_STOP_MULTISHOT; 647 else 648 *ret = IOU_OK; 649 return true; 650 } 651 652 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 653 struct io_sr_msg *sr, void __user **buf, 654 size_t *len) 655 { 656 unsigned long ubuf = (unsigned long) *buf; 657 unsigned long hdr; 658 659 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 660 kmsg->controllen; 661 if (*len < hdr) 662 return -EFAULT; 663 664 if (kmsg->controllen) { 665 unsigned long control = ubuf + hdr - kmsg->controllen; 666 667 kmsg->msg.msg_control_user = (void __user *) control; 668 kmsg->msg.msg_controllen = kmsg->controllen; 669 } 670 671 sr->buf = *buf; /* stash for later copy */ 672 *buf = (void __user *) (ubuf + hdr); 673 kmsg->payloadlen = *len = *len - hdr; 674 return 0; 675 } 676 677 struct io_recvmsg_multishot_hdr { 678 struct io_uring_recvmsg_out msg; 679 struct sockaddr_storage addr; 680 }; 681 682 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 683 struct io_async_msghdr *kmsg, 684 unsigned int flags, bool *finished) 685 { 686 int err; 687 int copy_len; 688 struct io_recvmsg_multishot_hdr hdr; 689 690 if (kmsg->namelen) 691 kmsg->msg.msg_name = &hdr.addr; 692 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 693 kmsg->msg.msg_namelen = 0; 694 695 if (sock->file->f_flags & O_NONBLOCK) 696 flags |= MSG_DONTWAIT; 697 698 err = sock_recvmsg(sock, &kmsg->msg, flags); 699 *finished = err <= 0; 700 if (err < 0) 701 return err; 702 703 hdr.msg = (struct io_uring_recvmsg_out) { 704 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 705 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 706 }; 707 708 hdr.msg.payloadlen = err; 709 if (err > kmsg->payloadlen) 710 err = kmsg->payloadlen; 711 712 copy_len = sizeof(struct io_uring_recvmsg_out); 713 if (kmsg->msg.msg_namelen > kmsg->namelen) 714 copy_len += kmsg->namelen; 715 else 716 copy_len += kmsg->msg.msg_namelen; 717 718 /* 719 * "fromlen shall refer to the value before truncation.." 720 * 1003.1g 721 */ 722 hdr.msg.namelen = kmsg->msg.msg_namelen; 723 724 /* ensure that there is no gap between hdr and sockaddr_storage */ 725 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 726 sizeof(struct io_uring_recvmsg_out)); 727 if (copy_to_user(io->buf, &hdr, copy_len)) { 728 *finished = true; 729 return -EFAULT; 730 } 731 732 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 733 kmsg->controllen + err; 734 } 735 736 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 737 { 738 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 739 struct io_async_msghdr iomsg, *kmsg; 740 struct socket *sock; 741 unsigned int cflags; 742 unsigned flags; 743 int ret, min_ret = 0; 744 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 745 bool mshot_finished = true; 746 747 sock = sock_from_file(req->file); 748 if (unlikely(!sock)) 749 return -ENOTSOCK; 750 751 if (req_has_async_data(req)) { 752 kmsg = req->async_data; 753 } else { 754 ret = io_recvmsg_copy_hdr(req, &iomsg); 755 if (ret) 756 return ret; 757 kmsg = &iomsg; 758 } 759 760 if (!(req->flags & REQ_F_POLLED) && 761 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 762 return io_setup_async_msg(req, kmsg, issue_flags); 763 764 if (!io_check_multishot(req, issue_flags)) 765 return io_setup_async_msg(req, kmsg, issue_flags); 766 767 retry_multishot: 768 if (io_do_buffer_select(req)) { 769 void __user *buf; 770 size_t len = sr->len; 771 772 buf = io_buffer_select(req, &len, issue_flags); 773 if (!buf) 774 return -ENOBUFS; 775 776 if (req->flags & REQ_F_APOLL_MULTISHOT) { 777 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 778 if (ret) { 779 io_kbuf_recycle(req, issue_flags); 780 return ret; 781 } 782 } 783 784 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len); 785 } 786 787 flags = sr->msg_flags; 788 if (force_nonblock) 789 flags |= MSG_DONTWAIT; 790 if (flags & MSG_WAITALL) 791 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 792 793 kmsg->msg.msg_get_inq = 1; 794 if (req->flags & REQ_F_APOLL_MULTISHOT) 795 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 796 &mshot_finished); 797 else 798 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 799 kmsg->uaddr, flags); 800 801 if (ret < min_ret) { 802 if (ret == -EAGAIN && force_nonblock) { 803 ret = io_setup_async_msg(req, kmsg, issue_flags); 804 if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) { 805 io_kbuf_recycle(req, issue_flags); 806 return IOU_ISSUE_SKIP_COMPLETE; 807 } 808 return ret; 809 } 810 if (ret > 0 && io_net_retry(sock, flags)) { 811 sr->done_io += ret; 812 req->flags |= REQ_F_PARTIAL_IO; 813 return io_setup_async_msg(req, kmsg, issue_flags); 814 } 815 if (ret == -ERESTARTSYS) 816 ret = -EINTR; 817 req_set_fail(req); 818 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 819 req_set_fail(req); 820 } 821 822 if (ret > 0) 823 ret += sr->done_io; 824 else if (sr->done_io) 825 ret = sr->done_io; 826 else 827 io_kbuf_recycle(req, issue_flags); 828 829 cflags = io_put_kbuf(req, issue_flags); 830 if (kmsg->msg.msg_inq) 831 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 832 833 if (!io_recv_finish(req, &ret, cflags, mshot_finished, issue_flags)) 834 goto retry_multishot; 835 836 if (mshot_finished) { 837 /* fast path, check for non-NULL to avoid function call */ 838 if (kmsg->free_iov) 839 kfree(kmsg->free_iov); 840 io_netmsg_recycle(req, issue_flags); 841 req->flags &= ~REQ_F_NEED_CLEANUP; 842 } 843 844 return ret; 845 } 846 847 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 848 { 849 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 850 struct msghdr msg; 851 struct socket *sock; 852 unsigned int cflags; 853 unsigned flags; 854 int ret, min_ret = 0; 855 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 856 size_t len = sr->len; 857 858 if (!(req->flags & REQ_F_POLLED) && 859 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 860 return -EAGAIN; 861 862 if (!io_check_multishot(req, issue_flags)) 863 return -EAGAIN; 864 865 sock = sock_from_file(req->file); 866 if (unlikely(!sock)) 867 return -ENOTSOCK; 868 869 retry_multishot: 870 if (io_do_buffer_select(req)) { 871 void __user *buf; 872 873 buf = io_buffer_select(req, &len, issue_flags); 874 if (!buf) 875 return -ENOBUFS; 876 sr->buf = buf; 877 } 878 879 ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter); 880 if (unlikely(ret)) 881 goto out_free; 882 883 msg.msg_name = NULL; 884 msg.msg_namelen = 0; 885 msg.msg_control = NULL; 886 msg.msg_get_inq = 1; 887 msg.msg_flags = 0; 888 msg.msg_controllen = 0; 889 msg.msg_iocb = NULL; 890 msg.msg_ubuf = NULL; 891 892 flags = sr->msg_flags; 893 if (force_nonblock) 894 flags |= MSG_DONTWAIT; 895 if (flags & MSG_WAITALL) 896 min_ret = iov_iter_count(&msg.msg_iter); 897 898 ret = sock_recvmsg(sock, &msg, flags); 899 if (ret < min_ret) { 900 if (ret == -EAGAIN && force_nonblock) { 901 if (issue_flags & IO_URING_F_MULTISHOT) { 902 io_kbuf_recycle(req, issue_flags); 903 return IOU_ISSUE_SKIP_COMPLETE; 904 } 905 906 return -EAGAIN; 907 } 908 if (ret > 0 && io_net_retry(sock, flags)) { 909 sr->len -= ret; 910 sr->buf += ret; 911 sr->done_io += ret; 912 req->flags |= REQ_F_PARTIAL_IO; 913 return -EAGAIN; 914 } 915 if (ret == -ERESTARTSYS) 916 ret = -EINTR; 917 req_set_fail(req); 918 } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 919 out_free: 920 req_set_fail(req); 921 } 922 923 if (ret > 0) 924 ret += sr->done_io; 925 else if (sr->done_io) 926 ret = sr->done_io; 927 else 928 io_kbuf_recycle(req, issue_flags); 929 930 cflags = io_put_kbuf(req, issue_flags); 931 if (msg.msg_inq) 932 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 933 934 if (!io_recv_finish(req, &ret, cflags, ret <= 0, issue_flags)) 935 goto retry_multishot; 936 937 return ret; 938 } 939 940 void io_send_zc_cleanup(struct io_kiocb *req) 941 { 942 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 943 struct io_async_msghdr *io; 944 945 if (req_has_async_data(req)) { 946 io = req->async_data; 947 /* might be ->fast_iov if *msg_copy_hdr failed */ 948 if (io->free_iov != io->fast_iov) 949 kfree(io->free_iov); 950 } 951 if (zc->notif) { 952 io_notif_flush(zc->notif); 953 zc->notif = NULL; 954 } 955 } 956 957 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 958 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) 959 960 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 961 { 962 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 963 struct io_ring_ctx *ctx = req->ctx; 964 struct io_kiocb *notif; 965 966 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 967 return -EINVAL; 968 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 969 if (req->flags & REQ_F_CQE_SKIP) 970 return -EINVAL; 971 972 notif = zc->notif = io_alloc_notif(ctx); 973 if (!notif) 974 return -ENOMEM; 975 notif->cqe.user_data = req->cqe.user_data; 976 notif->cqe.res = 0; 977 notif->cqe.flags = IORING_CQE_F_NOTIF; 978 req->flags |= REQ_F_NEED_CLEANUP; 979 980 zc->flags = READ_ONCE(sqe->ioprio); 981 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 982 if (zc->flags & ~IO_ZC_FLAGS_VALID) 983 return -EINVAL; 984 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 985 io_notif_set_extended(notif); 986 io_notif_to_data(notif)->zc_report = true; 987 } 988 } 989 990 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 991 unsigned idx = READ_ONCE(sqe->buf_index); 992 993 if (unlikely(idx >= ctx->nr_user_bufs)) 994 return -EFAULT; 995 idx = array_index_nospec(idx, ctx->nr_user_bufs); 996 req->imu = READ_ONCE(ctx->user_bufs[idx]); 997 io_req_set_rsrc_node(notif, ctx, 0); 998 } 999 1000 if (req->opcode == IORING_OP_SEND_ZC) { 1001 if (READ_ONCE(sqe->__pad3[0])) 1002 return -EINVAL; 1003 zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1004 zc->addr_len = READ_ONCE(sqe->addr_len); 1005 } else { 1006 if (unlikely(sqe->addr2 || sqe->file_index)) 1007 return -EINVAL; 1008 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 1009 return -EINVAL; 1010 } 1011 1012 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1013 zc->len = READ_ONCE(sqe->len); 1014 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 1015 if (zc->msg_flags & MSG_DONTWAIT) 1016 req->flags |= REQ_F_NOWAIT; 1017 1018 zc->done_io = 0; 1019 1020 #ifdef CONFIG_COMPAT 1021 if (req->ctx->compat) 1022 zc->msg_flags |= MSG_CMSG_COMPAT; 1023 #endif 1024 return 0; 1025 } 1026 1027 static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb, 1028 struct iov_iter *from, size_t length) 1029 { 1030 skb_zcopy_downgrade_managed(skb); 1031 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 1032 } 1033 1034 static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, 1035 struct iov_iter *from, size_t length) 1036 { 1037 struct skb_shared_info *shinfo = skb_shinfo(skb); 1038 int frag = shinfo->nr_frags; 1039 int ret = 0; 1040 struct bvec_iter bi; 1041 ssize_t copied = 0; 1042 unsigned long truesize = 0; 1043 1044 if (!frag) 1045 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1046 else if (unlikely(!skb_zcopy_managed(skb))) 1047 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 1048 1049 bi.bi_size = min(from->count, length); 1050 bi.bi_bvec_done = from->iov_offset; 1051 bi.bi_idx = 0; 1052 1053 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1054 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1055 1056 copied += v.bv_len; 1057 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1058 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1059 v.bv_offset, v.bv_len); 1060 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1061 } 1062 if (bi.bi_size) 1063 ret = -EMSGSIZE; 1064 1065 shinfo->nr_frags = frag; 1066 from->bvec += bi.bi_idx; 1067 from->nr_segs -= bi.bi_idx; 1068 from->count -= copied; 1069 from->iov_offset = bi.bi_bvec_done; 1070 1071 skb->data_len += copied; 1072 skb->len += copied; 1073 skb->truesize += truesize; 1074 1075 if (sk && sk->sk_type == SOCK_STREAM) { 1076 sk_wmem_queued_add(sk, truesize); 1077 if (!skb_zcopy_pure(skb)) 1078 sk_mem_charge(sk, truesize); 1079 } else { 1080 refcount_add(truesize, &skb->sk->sk_wmem_alloc); 1081 } 1082 return ret; 1083 } 1084 1085 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1086 { 1087 struct sockaddr_storage __address; 1088 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1089 struct msghdr msg; 1090 struct socket *sock; 1091 unsigned msg_flags; 1092 int ret, min_ret = 0; 1093 1094 sock = sock_from_file(req->file); 1095 if (unlikely(!sock)) 1096 return -ENOTSOCK; 1097 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1098 return -EOPNOTSUPP; 1099 1100 msg.msg_name = NULL; 1101 msg.msg_control = NULL; 1102 msg.msg_controllen = 0; 1103 msg.msg_namelen = 0; 1104 1105 if (zc->addr) { 1106 if (req_has_async_data(req)) { 1107 struct io_async_msghdr *io = req->async_data; 1108 1109 msg.msg_name = &io->addr; 1110 } else { 1111 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address); 1112 if (unlikely(ret < 0)) 1113 return ret; 1114 msg.msg_name = (struct sockaddr *)&__address; 1115 } 1116 msg.msg_namelen = zc->addr_len; 1117 } 1118 1119 if (!(req->flags & REQ_F_POLLED) && 1120 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1121 return io_setup_async_addr(req, &__address, issue_flags); 1122 1123 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 1124 ret = io_import_fixed(ITER_SOURCE, &msg.msg_iter, req->imu, 1125 (u64)(uintptr_t)zc->buf, zc->len); 1126 if (unlikely(ret)) 1127 return ret; 1128 msg.sg_from_iter = io_sg_from_iter; 1129 } else { 1130 io_notif_set_extended(zc->notif); 1131 ret = import_ubuf(ITER_SOURCE, zc->buf, zc->len, &msg.msg_iter); 1132 if (unlikely(ret)) 1133 return ret; 1134 ret = io_notif_account_mem(zc->notif, zc->len); 1135 if (unlikely(ret)) 1136 return ret; 1137 msg.sg_from_iter = io_sg_from_iter_iovec; 1138 } 1139 1140 msg_flags = zc->msg_flags | MSG_ZEROCOPY; 1141 if (issue_flags & IO_URING_F_NONBLOCK) 1142 msg_flags |= MSG_DONTWAIT; 1143 if (msg_flags & MSG_WAITALL) 1144 min_ret = iov_iter_count(&msg.msg_iter); 1145 1146 msg.msg_flags = msg_flags; 1147 msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1148 ret = sock_sendmsg(sock, &msg); 1149 1150 if (unlikely(ret < min_ret)) { 1151 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1152 return io_setup_async_addr(req, &__address, issue_flags); 1153 1154 if (ret > 0 && io_net_retry(sock, msg.msg_flags)) { 1155 zc->len -= ret; 1156 zc->buf += ret; 1157 zc->done_io += ret; 1158 req->flags |= REQ_F_PARTIAL_IO; 1159 return io_setup_async_addr(req, &__address, issue_flags); 1160 } 1161 if (ret == -ERESTARTSYS) 1162 ret = -EINTR; 1163 req_set_fail(req); 1164 } 1165 1166 if (ret >= 0) 1167 ret += zc->done_io; 1168 else if (zc->done_io) 1169 ret = zc->done_io; 1170 1171 /* 1172 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1173 * flushing notif to io_send_zc_cleanup() 1174 */ 1175 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1176 io_notif_flush(zc->notif); 1177 req->flags &= ~REQ_F_NEED_CLEANUP; 1178 } 1179 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1180 return IOU_OK; 1181 } 1182 1183 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1184 { 1185 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1186 struct io_async_msghdr iomsg, *kmsg; 1187 struct socket *sock; 1188 unsigned flags; 1189 int ret, min_ret = 0; 1190 1191 io_notif_set_extended(sr->notif); 1192 1193 sock = sock_from_file(req->file); 1194 if (unlikely(!sock)) 1195 return -ENOTSOCK; 1196 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1197 return -EOPNOTSUPP; 1198 1199 if (req_has_async_data(req)) { 1200 kmsg = req->async_data; 1201 } else { 1202 ret = io_sendmsg_copy_hdr(req, &iomsg); 1203 if (ret) 1204 return ret; 1205 kmsg = &iomsg; 1206 } 1207 1208 if (!(req->flags & REQ_F_POLLED) && 1209 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1210 return io_setup_async_msg(req, kmsg, issue_flags); 1211 1212 flags = sr->msg_flags | MSG_ZEROCOPY; 1213 if (issue_flags & IO_URING_F_NONBLOCK) 1214 flags |= MSG_DONTWAIT; 1215 if (flags & MSG_WAITALL) 1216 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1217 1218 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1219 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1220 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1221 1222 if (unlikely(ret < min_ret)) { 1223 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1224 return io_setup_async_msg(req, kmsg, issue_flags); 1225 1226 if (ret > 0 && io_net_retry(sock, flags)) { 1227 sr->done_io += ret; 1228 req->flags |= REQ_F_PARTIAL_IO; 1229 return io_setup_async_msg(req, kmsg, issue_flags); 1230 } 1231 if (ret == -ERESTARTSYS) 1232 ret = -EINTR; 1233 req_set_fail(req); 1234 } 1235 /* fast path, check for non-NULL to avoid function call */ 1236 if (kmsg->free_iov) { 1237 kfree(kmsg->free_iov); 1238 kmsg->free_iov = NULL; 1239 } 1240 1241 io_netmsg_recycle(req, issue_flags); 1242 if (ret >= 0) 1243 ret += sr->done_io; 1244 else if (sr->done_io) 1245 ret = sr->done_io; 1246 1247 /* 1248 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1249 * flushing notif to io_send_zc_cleanup() 1250 */ 1251 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1252 io_notif_flush(sr->notif); 1253 req->flags &= ~REQ_F_NEED_CLEANUP; 1254 } 1255 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1256 return IOU_OK; 1257 } 1258 1259 void io_sendrecv_fail(struct io_kiocb *req) 1260 { 1261 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1262 1263 if (req->flags & REQ_F_PARTIAL_IO) 1264 req->cqe.res = sr->done_io; 1265 1266 if ((req->flags & REQ_F_NEED_CLEANUP) && 1267 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1268 req->cqe.flags |= IORING_CQE_F_MORE; 1269 } 1270 1271 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1272 { 1273 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1274 unsigned flags; 1275 1276 if (sqe->len || sqe->buf_index) 1277 return -EINVAL; 1278 1279 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1280 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1281 accept->flags = READ_ONCE(sqe->accept_flags); 1282 accept->nofile = rlimit(RLIMIT_NOFILE); 1283 flags = READ_ONCE(sqe->ioprio); 1284 if (flags & ~IORING_ACCEPT_MULTISHOT) 1285 return -EINVAL; 1286 1287 accept->file_slot = READ_ONCE(sqe->file_index); 1288 if (accept->file_slot) { 1289 if (accept->flags & SOCK_CLOEXEC) 1290 return -EINVAL; 1291 if (flags & IORING_ACCEPT_MULTISHOT && 1292 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1293 return -EINVAL; 1294 } 1295 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1296 return -EINVAL; 1297 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1298 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1299 if (flags & IORING_ACCEPT_MULTISHOT) 1300 req->flags |= REQ_F_APOLL_MULTISHOT; 1301 return 0; 1302 } 1303 1304 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1305 { 1306 struct io_ring_ctx *ctx = req->ctx; 1307 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1308 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1309 unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; 1310 bool fixed = !!accept->file_slot; 1311 struct file *file; 1312 int ret, fd; 1313 1314 if (!io_check_multishot(req, issue_flags)) 1315 return -EAGAIN; 1316 retry: 1317 if (!fixed) { 1318 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1319 if (unlikely(fd < 0)) 1320 return fd; 1321 } 1322 file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, 1323 accept->flags); 1324 if (IS_ERR(file)) { 1325 if (!fixed) 1326 put_unused_fd(fd); 1327 ret = PTR_ERR(file); 1328 if (ret == -EAGAIN && force_nonblock) { 1329 /* 1330 * if it's multishot and polled, we don't need to 1331 * return EAGAIN to arm the poll infra since it 1332 * has already been done 1333 */ 1334 if (issue_flags & IO_URING_F_MULTISHOT) 1335 ret = IOU_ISSUE_SKIP_COMPLETE; 1336 return ret; 1337 } 1338 if (ret == -ERESTARTSYS) 1339 ret = -EINTR; 1340 req_set_fail(req); 1341 } else if (!fixed) { 1342 fd_install(fd, file); 1343 ret = fd; 1344 } else { 1345 ret = io_fixed_fd_install(req, issue_flags, file, 1346 accept->file_slot); 1347 } 1348 1349 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1350 io_req_set_res(req, ret, 0); 1351 return IOU_OK; 1352 } 1353 1354 if (ret < 0) 1355 return ret; 1356 if (io_aux_cqe(ctx, issue_flags & IO_URING_F_COMPLETE_DEFER, 1357 req->cqe.user_data, ret, IORING_CQE_F_MORE, true)) 1358 goto retry; 1359 1360 return -ECANCELED; 1361 } 1362 1363 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1364 { 1365 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1366 1367 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1368 return -EINVAL; 1369 1370 sock->domain = READ_ONCE(sqe->fd); 1371 sock->type = READ_ONCE(sqe->off); 1372 sock->protocol = READ_ONCE(sqe->len); 1373 sock->file_slot = READ_ONCE(sqe->file_index); 1374 sock->nofile = rlimit(RLIMIT_NOFILE); 1375 1376 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1377 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1378 return -EINVAL; 1379 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1380 return -EINVAL; 1381 return 0; 1382 } 1383 1384 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1385 { 1386 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1387 bool fixed = !!sock->file_slot; 1388 struct file *file; 1389 int ret, fd; 1390 1391 if (!fixed) { 1392 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1393 if (unlikely(fd < 0)) 1394 return fd; 1395 } 1396 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1397 if (IS_ERR(file)) { 1398 if (!fixed) 1399 put_unused_fd(fd); 1400 ret = PTR_ERR(file); 1401 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1402 return -EAGAIN; 1403 if (ret == -ERESTARTSYS) 1404 ret = -EINTR; 1405 req_set_fail(req); 1406 } else if (!fixed) { 1407 fd_install(fd, file); 1408 ret = fd; 1409 } else { 1410 ret = io_fixed_fd_install(req, issue_flags, file, 1411 sock->file_slot); 1412 } 1413 io_req_set_res(req, ret, 0); 1414 return IOU_OK; 1415 } 1416 1417 int io_connect_prep_async(struct io_kiocb *req) 1418 { 1419 struct io_async_connect *io = req->async_data; 1420 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1421 1422 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); 1423 } 1424 1425 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1426 { 1427 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1428 1429 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1430 return -EINVAL; 1431 1432 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1433 conn->addr_len = READ_ONCE(sqe->addr2); 1434 conn->in_progress = conn->seen_econnaborted = false; 1435 return 0; 1436 } 1437 1438 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1439 { 1440 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1441 struct io_async_connect __io, *io; 1442 unsigned file_flags; 1443 int ret; 1444 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1445 1446 if (connect->in_progress) { 1447 struct socket *socket; 1448 1449 ret = -ENOTSOCK; 1450 socket = sock_from_file(req->file); 1451 if (socket) 1452 ret = sock_error(socket->sk); 1453 goto out; 1454 } 1455 1456 if (req_has_async_data(req)) { 1457 io = req->async_data; 1458 } else { 1459 ret = move_addr_to_kernel(connect->addr, 1460 connect->addr_len, 1461 &__io.address); 1462 if (ret) 1463 goto out; 1464 io = &__io; 1465 } 1466 1467 file_flags = force_nonblock ? O_NONBLOCK : 0; 1468 1469 ret = __sys_connect_file(req->file, &io->address, 1470 connect->addr_len, file_flags); 1471 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1472 && force_nonblock) { 1473 if (ret == -EINPROGRESS) { 1474 connect->in_progress = true; 1475 return -EAGAIN; 1476 } 1477 if (ret == -ECONNABORTED) { 1478 if (connect->seen_econnaborted) 1479 goto out; 1480 connect->seen_econnaborted = true; 1481 } 1482 if (req_has_async_data(req)) 1483 return -EAGAIN; 1484 if (io_alloc_async_data(req)) { 1485 ret = -ENOMEM; 1486 goto out; 1487 } 1488 memcpy(req->async_data, &__io, sizeof(__io)); 1489 return -EAGAIN; 1490 } 1491 if (ret == -ERESTARTSYS) 1492 ret = -EINTR; 1493 out: 1494 if (ret < 0) 1495 req_set_fail(req); 1496 io_req_set_res(req, ret, 0); 1497 return IOU_OK; 1498 } 1499 1500 void io_netmsg_cache_free(struct io_cache_entry *entry) 1501 { 1502 kfree(container_of(entry, struct io_async_msghdr, cache)); 1503 } 1504 #endif 1505