1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 20 #if defined(CONFIG_NET) 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 u32 file_slot; 32 unsigned long nofile; 33 }; 34 35 struct io_socket { 36 struct file *file; 37 int domain; 38 int type; 39 int protocol; 40 int flags; 41 u32 file_slot; 42 unsigned long nofile; 43 }; 44 45 struct io_connect { 46 struct file *file; 47 struct sockaddr __user *addr; 48 int addr_len; 49 bool in_progress; 50 }; 51 52 struct io_sr_msg { 53 struct file *file; 54 union { 55 struct compat_msghdr __user *umsg_compat; 56 struct user_msghdr __user *umsg; 57 void __user *buf; 58 }; 59 unsigned len; 60 unsigned done_io; 61 unsigned msg_flags; 62 u16 flags; 63 /* initialised and used only by !msg send variants */ 64 u16 addr_len; 65 void __user *addr; 66 /* used only for send zerocopy */ 67 struct io_kiocb *notif; 68 }; 69 70 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 71 { 72 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 73 74 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 75 sqe->buf_index || sqe->splice_fd_in)) 76 return -EINVAL; 77 78 shutdown->how = READ_ONCE(sqe->len); 79 return 0; 80 } 81 82 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 83 { 84 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 85 struct socket *sock; 86 int ret; 87 88 if (issue_flags & IO_URING_F_NONBLOCK) 89 return -EAGAIN; 90 91 sock = sock_from_file(req->file); 92 if (unlikely(!sock)) 93 return -ENOTSOCK; 94 95 ret = __sys_shutdown_sock(sock, shutdown->how); 96 io_req_set_res(req, ret, 0); 97 return IOU_OK; 98 } 99 100 static bool io_net_retry(struct socket *sock, int flags) 101 { 102 if (!(flags & MSG_WAITALL)) 103 return false; 104 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 105 } 106 107 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 108 { 109 struct io_async_msghdr *hdr = req->async_data; 110 111 if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED) 112 return; 113 114 /* Let normal cleanup path reap it if we fail adding to the cache */ 115 if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) { 116 req->async_data = NULL; 117 req->flags &= ~REQ_F_ASYNC_DATA; 118 } 119 } 120 121 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req, 122 unsigned int issue_flags) 123 { 124 struct io_ring_ctx *ctx = req->ctx; 125 struct io_cache_entry *entry; 126 struct io_async_msghdr *hdr; 127 128 if (!(issue_flags & IO_URING_F_UNLOCKED) && 129 (entry = io_alloc_cache_get(&ctx->netmsg_cache)) != NULL) { 130 hdr = container_of(entry, struct io_async_msghdr, cache); 131 hdr->free_iov = NULL; 132 req->flags |= REQ_F_ASYNC_DATA; 133 req->async_data = hdr; 134 return hdr; 135 } 136 137 if (!io_alloc_async_data(req)) { 138 hdr = req->async_data; 139 hdr->free_iov = NULL; 140 return hdr; 141 } 142 return NULL; 143 } 144 145 static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req) 146 { 147 /* ->prep_async is always called from the submission context */ 148 return io_msg_alloc_async(req, 0); 149 } 150 151 static int io_setup_async_msg(struct io_kiocb *req, 152 struct io_async_msghdr *kmsg, 153 unsigned int issue_flags) 154 { 155 struct io_async_msghdr *async_msg; 156 157 if (req_has_async_data(req)) 158 return -EAGAIN; 159 async_msg = io_msg_alloc_async(req, issue_flags); 160 if (!async_msg) { 161 kfree(kmsg->free_iov); 162 return -ENOMEM; 163 } 164 req->flags |= REQ_F_NEED_CLEANUP; 165 memcpy(async_msg, kmsg, sizeof(*kmsg)); 166 if (async_msg->msg.msg_name) 167 async_msg->msg.msg_name = &async_msg->addr; 168 /* if were using fast_iov, set it to the new one */ 169 if (!kmsg->free_iov) { 170 size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov; 171 async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx]; 172 } 173 174 return -EAGAIN; 175 } 176 177 static int io_sendmsg_copy_hdr(struct io_kiocb *req, 178 struct io_async_msghdr *iomsg) 179 { 180 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 181 182 iomsg->msg.msg_name = &iomsg->addr; 183 iomsg->free_iov = iomsg->fast_iov; 184 return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, 185 &iomsg->free_iov); 186 } 187 188 int io_send_prep_async(struct io_kiocb *req) 189 { 190 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 191 struct io_async_msghdr *io; 192 int ret; 193 194 if (!zc->addr || req_has_async_data(req)) 195 return 0; 196 io = io_msg_alloc_async_prep(req); 197 if (!io) 198 return -ENOMEM; 199 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr); 200 return ret; 201 } 202 203 static int io_setup_async_addr(struct io_kiocb *req, 204 struct sockaddr_storage *addr_storage, 205 unsigned int issue_flags) 206 { 207 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 208 struct io_async_msghdr *io; 209 210 if (!sr->addr || req_has_async_data(req)) 211 return -EAGAIN; 212 io = io_msg_alloc_async(req, issue_flags); 213 if (!io) 214 return -ENOMEM; 215 memcpy(&io->addr, addr_storage, sizeof(io->addr)); 216 return -EAGAIN; 217 } 218 219 int io_sendmsg_prep_async(struct io_kiocb *req) 220 { 221 int ret; 222 223 if (!io_msg_alloc_async_prep(req)) 224 return -ENOMEM; 225 ret = io_sendmsg_copy_hdr(req, req->async_data); 226 if (!ret) 227 req->flags |= REQ_F_NEED_CLEANUP; 228 return ret; 229 } 230 231 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 232 { 233 struct io_async_msghdr *io = req->async_data; 234 235 kfree(io->free_iov); 236 } 237 238 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 239 { 240 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 241 242 if (req->opcode == IORING_OP_SEND) { 243 if (READ_ONCE(sqe->__pad3[0])) 244 return -EINVAL; 245 sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 246 sr->addr_len = READ_ONCE(sqe->addr_len); 247 } else if (sqe->addr2 || sqe->file_index) { 248 return -EINVAL; 249 } 250 251 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 252 sr->len = READ_ONCE(sqe->len); 253 sr->flags = READ_ONCE(sqe->ioprio); 254 if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) 255 return -EINVAL; 256 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 257 if (sr->msg_flags & MSG_DONTWAIT) 258 req->flags |= REQ_F_NOWAIT; 259 260 #ifdef CONFIG_COMPAT 261 if (req->ctx->compat) 262 sr->msg_flags |= MSG_CMSG_COMPAT; 263 #endif 264 sr->done_io = 0; 265 return 0; 266 } 267 268 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 269 { 270 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 271 struct io_async_msghdr iomsg, *kmsg; 272 struct socket *sock; 273 unsigned flags; 274 int min_ret = 0; 275 int ret; 276 277 sock = sock_from_file(req->file); 278 if (unlikely(!sock)) 279 return -ENOTSOCK; 280 281 if (req_has_async_data(req)) { 282 kmsg = req->async_data; 283 } else { 284 ret = io_sendmsg_copy_hdr(req, &iomsg); 285 if (ret) 286 return ret; 287 kmsg = &iomsg; 288 } 289 290 if (!(req->flags & REQ_F_POLLED) && 291 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 292 return io_setup_async_msg(req, kmsg, issue_flags); 293 294 flags = sr->msg_flags; 295 if (issue_flags & IO_URING_F_NONBLOCK) 296 flags |= MSG_DONTWAIT; 297 if (flags & MSG_WAITALL) 298 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 299 300 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 301 302 if (ret < min_ret) { 303 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 304 return io_setup_async_msg(req, kmsg, issue_flags); 305 if (ret > 0 && io_net_retry(sock, flags)) { 306 sr->done_io += ret; 307 req->flags |= REQ_F_PARTIAL_IO; 308 return io_setup_async_msg(req, kmsg, issue_flags); 309 } 310 if (ret == -ERESTARTSYS) 311 ret = -EINTR; 312 req_set_fail(req); 313 } 314 /* fast path, check for non-NULL to avoid function call */ 315 if (kmsg->free_iov) 316 kfree(kmsg->free_iov); 317 req->flags &= ~REQ_F_NEED_CLEANUP; 318 io_netmsg_recycle(req, issue_flags); 319 if (ret >= 0) 320 ret += sr->done_io; 321 else if (sr->done_io) 322 ret = sr->done_io; 323 io_req_set_res(req, ret, 0); 324 return IOU_OK; 325 } 326 327 int io_send(struct io_kiocb *req, unsigned int issue_flags) 328 { 329 struct sockaddr_storage __address; 330 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 331 struct msghdr msg; 332 struct iovec iov; 333 struct socket *sock; 334 unsigned flags; 335 int min_ret = 0; 336 int ret; 337 338 msg.msg_name = NULL; 339 msg.msg_control = NULL; 340 msg.msg_controllen = 0; 341 msg.msg_namelen = 0; 342 msg.msg_ubuf = NULL; 343 344 if (sr->addr) { 345 if (req_has_async_data(req)) { 346 struct io_async_msghdr *io = req->async_data; 347 348 msg.msg_name = &io->addr; 349 } else { 350 ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address); 351 if (unlikely(ret < 0)) 352 return ret; 353 msg.msg_name = (struct sockaddr *)&__address; 354 } 355 msg.msg_namelen = sr->addr_len; 356 } 357 358 if (!(req->flags & REQ_F_POLLED) && 359 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 360 return io_setup_async_addr(req, &__address, issue_flags); 361 362 sock = sock_from_file(req->file); 363 if (unlikely(!sock)) 364 return -ENOTSOCK; 365 366 ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter); 367 if (unlikely(ret)) 368 return ret; 369 370 flags = sr->msg_flags; 371 if (issue_flags & IO_URING_F_NONBLOCK) 372 flags |= MSG_DONTWAIT; 373 if (flags & MSG_WAITALL) 374 min_ret = iov_iter_count(&msg.msg_iter); 375 376 msg.msg_flags = flags; 377 ret = sock_sendmsg(sock, &msg); 378 if (ret < min_ret) { 379 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 380 return io_setup_async_addr(req, &__address, issue_flags); 381 382 if (ret > 0 && io_net_retry(sock, flags)) { 383 sr->len -= ret; 384 sr->buf += ret; 385 sr->done_io += ret; 386 req->flags |= REQ_F_PARTIAL_IO; 387 return io_setup_async_addr(req, &__address, issue_flags); 388 } 389 if (ret == -ERESTARTSYS) 390 ret = -EINTR; 391 req_set_fail(req); 392 } 393 if (ret >= 0) 394 ret += sr->done_io; 395 else if (sr->done_io) 396 ret = sr->done_io; 397 io_req_set_res(req, ret, 0); 398 return IOU_OK; 399 } 400 401 static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) 402 { 403 int hdr; 404 405 if (iomsg->namelen < 0) 406 return true; 407 if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), 408 iomsg->namelen, &hdr)) 409 return true; 410 if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) 411 return true; 412 413 return false; 414 } 415 416 static int __io_recvmsg_copy_hdr(struct io_kiocb *req, 417 struct io_async_msghdr *iomsg) 418 { 419 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 420 struct user_msghdr msg; 421 int ret; 422 423 if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) 424 return -EFAULT; 425 426 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 427 if (ret) 428 return ret; 429 430 if (req->flags & REQ_F_BUFFER_SELECT) { 431 if (msg.msg_iovlen == 0) { 432 sr->len = iomsg->fast_iov[0].iov_len = 0; 433 iomsg->fast_iov[0].iov_base = NULL; 434 iomsg->free_iov = NULL; 435 } else if (msg.msg_iovlen > 1) { 436 return -EINVAL; 437 } else { 438 if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov))) 439 return -EFAULT; 440 sr->len = iomsg->fast_iov[0].iov_len; 441 iomsg->free_iov = NULL; 442 } 443 444 if (req->flags & REQ_F_APOLL_MULTISHOT) { 445 iomsg->namelen = msg.msg_namelen; 446 iomsg->controllen = msg.msg_controllen; 447 if (io_recvmsg_multishot_overflow(iomsg)) 448 return -EOVERFLOW; 449 } 450 } else { 451 iomsg->free_iov = iomsg->fast_iov; 452 ret = __import_iovec(READ, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, 453 &iomsg->free_iov, &iomsg->msg.msg_iter, 454 false); 455 if (ret > 0) 456 ret = 0; 457 } 458 459 return ret; 460 } 461 462 #ifdef CONFIG_COMPAT 463 static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, 464 struct io_async_msghdr *iomsg) 465 { 466 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 467 struct compat_msghdr msg; 468 struct compat_iovec __user *uiov; 469 int ret; 470 471 if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) 472 return -EFAULT; 473 474 ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 475 if (ret) 476 return ret; 477 478 uiov = compat_ptr(msg.msg_iov); 479 if (req->flags & REQ_F_BUFFER_SELECT) { 480 compat_ssize_t clen; 481 482 if (msg.msg_iovlen == 0) { 483 sr->len = 0; 484 } else if (msg.msg_iovlen > 1) { 485 return -EINVAL; 486 } else { 487 if (!access_ok(uiov, sizeof(*uiov))) 488 return -EFAULT; 489 if (__get_user(clen, &uiov->iov_len)) 490 return -EFAULT; 491 if (clen < 0) 492 return -EINVAL; 493 sr->len = clen; 494 } 495 496 if (req->flags & REQ_F_APOLL_MULTISHOT) { 497 iomsg->namelen = msg.msg_namelen; 498 iomsg->controllen = msg.msg_controllen; 499 if (io_recvmsg_multishot_overflow(iomsg)) 500 return -EOVERFLOW; 501 } 502 } else { 503 iomsg->free_iov = iomsg->fast_iov; 504 ret = __import_iovec(READ, (struct iovec __user *)uiov, msg.msg_iovlen, 505 UIO_FASTIOV, &iomsg->free_iov, 506 &iomsg->msg.msg_iter, true); 507 if (ret < 0) 508 return ret; 509 } 510 511 return 0; 512 } 513 #endif 514 515 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 516 struct io_async_msghdr *iomsg) 517 { 518 iomsg->msg.msg_name = &iomsg->addr; 519 520 #ifdef CONFIG_COMPAT 521 if (req->ctx->compat) 522 return __io_compat_recvmsg_copy_hdr(req, iomsg); 523 #endif 524 525 return __io_recvmsg_copy_hdr(req, iomsg); 526 } 527 528 int io_recvmsg_prep_async(struct io_kiocb *req) 529 { 530 int ret; 531 532 if (!io_msg_alloc_async_prep(req)) 533 return -ENOMEM; 534 ret = io_recvmsg_copy_hdr(req, req->async_data); 535 if (!ret) 536 req->flags |= REQ_F_NEED_CLEANUP; 537 return ret; 538 } 539 540 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT) 541 542 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 543 { 544 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 545 546 if (unlikely(sqe->file_index || sqe->addr2)) 547 return -EINVAL; 548 549 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 550 sr->len = READ_ONCE(sqe->len); 551 sr->flags = READ_ONCE(sqe->ioprio); 552 if (sr->flags & ~(RECVMSG_FLAGS)) 553 return -EINVAL; 554 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 555 if (sr->msg_flags & MSG_DONTWAIT) 556 req->flags |= REQ_F_NOWAIT; 557 if (sr->msg_flags & MSG_ERRQUEUE) 558 req->flags |= REQ_F_CLEAR_POLLIN; 559 if (sr->flags & IORING_RECV_MULTISHOT) { 560 if (!(req->flags & REQ_F_BUFFER_SELECT)) 561 return -EINVAL; 562 if (sr->msg_flags & MSG_WAITALL) 563 return -EINVAL; 564 if (req->opcode == IORING_OP_RECV && sr->len) 565 return -EINVAL; 566 req->flags |= REQ_F_APOLL_MULTISHOT; 567 } 568 569 #ifdef CONFIG_COMPAT 570 if (req->ctx->compat) 571 sr->msg_flags |= MSG_CMSG_COMPAT; 572 #endif 573 sr->done_io = 0; 574 return 0; 575 } 576 577 static inline void io_recv_prep_retry(struct io_kiocb *req) 578 { 579 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 580 581 sr->done_io = 0; 582 sr->len = 0; /* get from the provided buffer */ 583 } 584 585 /* 586 * Finishes io_recv and io_recvmsg. 587 * 588 * Returns true if it is actually finished, or false if it should run 589 * again (for multishot). 590 */ 591 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 592 unsigned int cflags, bool mshot_finished, 593 unsigned issue_flags) 594 { 595 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 596 io_req_set_res(req, *ret, cflags); 597 *ret = IOU_OK; 598 return true; 599 } 600 601 if (!mshot_finished) { 602 if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret, 603 cflags | IORING_CQE_F_MORE, false)) { 604 io_recv_prep_retry(req); 605 return false; 606 } 607 /* 608 * Otherwise stop multishot but use the current result. 609 * Probably will end up going into overflow, but this means 610 * we cannot trust the ordering anymore 611 */ 612 } 613 614 io_req_set_res(req, *ret, cflags); 615 616 if (issue_flags & IO_URING_F_MULTISHOT) 617 *ret = IOU_STOP_MULTISHOT; 618 else 619 *ret = IOU_OK; 620 return true; 621 } 622 623 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 624 struct io_sr_msg *sr, void __user **buf, 625 size_t *len) 626 { 627 unsigned long ubuf = (unsigned long) *buf; 628 unsigned long hdr; 629 630 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 631 kmsg->controllen; 632 if (*len < hdr) 633 return -EFAULT; 634 635 if (kmsg->controllen) { 636 unsigned long control = ubuf + hdr - kmsg->controllen; 637 638 kmsg->msg.msg_control_user = (void __user *) control; 639 kmsg->msg.msg_controllen = kmsg->controllen; 640 } 641 642 sr->buf = *buf; /* stash for later copy */ 643 *buf = (void __user *) (ubuf + hdr); 644 kmsg->payloadlen = *len = *len - hdr; 645 return 0; 646 } 647 648 struct io_recvmsg_multishot_hdr { 649 struct io_uring_recvmsg_out msg; 650 struct sockaddr_storage addr; 651 }; 652 653 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 654 struct io_async_msghdr *kmsg, 655 unsigned int flags, bool *finished) 656 { 657 int err; 658 int copy_len; 659 struct io_recvmsg_multishot_hdr hdr; 660 661 if (kmsg->namelen) 662 kmsg->msg.msg_name = &hdr.addr; 663 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 664 kmsg->msg.msg_namelen = 0; 665 666 if (sock->file->f_flags & O_NONBLOCK) 667 flags |= MSG_DONTWAIT; 668 669 err = sock_recvmsg(sock, &kmsg->msg, flags); 670 *finished = err <= 0; 671 if (err < 0) 672 return err; 673 674 hdr.msg = (struct io_uring_recvmsg_out) { 675 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 676 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 677 }; 678 679 hdr.msg.payloadlen = err; 680 if (err > kmsg->payloadlen) 681 err = kmsg->payloadlen; 682 683 copy_len = sizeof(struct io_uring_recvmsg_out); 684 if (kmsg->msg.msg_namelen > kmsg->namelen) 685 copy_len += kmsg->namelen; 686 else 687 copy_len += kmsg->msg.msg_namelen; 688 689 /* 690 * "fromlen shall refer to the value before truncation.." 691 * 1003.1g 692 */ 693 hdr.msg.namelen = kmsg->msg.msg_namelen; 694 695 /* ensure that there is no gap between hdr and sockaddr_storage */ 696 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 697 sizeof(struct io_uring_recvmsg_out)); 698 if (copy_to_user(io->buf, &hdr, copy_len)) { 699 *finished = true; 700 return -EFAULT; 701 } 702 703 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 704 kmsg->controllen + err; 705 } 706 707 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 708 { 709 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 710 struct io_async_msghdr iomsg, *kmsg; 711 struct socket *sock; 712 unsigned int cflags; 713 unsigned flags; 714 int ret, min_ret = 0; 715 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 716 bool mshot_finished = true; 717 718 sock = sock_from_file(req->file); 719 if (unlikely(!sock)) 720 return -ENOTSOCK; 721 722 if (req_has_async_data(req)) { 723 kmsg = req->async_data; 724 } else { 725 ret = io_recvmsg_copy_hdr(req, &iomsg); 726 if (ret) 727 return ret; 728 kmsg = &iomsg; 729 } 730 731 if (!(req->flags & REQ_F_POLLED) && 732 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 733 return io_setup_async_msg(req, kmsg, issue_flags); 734 735 retry_multishot: 736 if (io_do_buffer_select(req)) { 737 void __user *buf; 738 size_t len = sr->len; 739 740 buf = io_buffer_select(req, &len, issue_flags); 741 if (!buf) 742 return -ENOBUFS; 743 744 if (req->flags & REQ_F_APOLL_MULTISHOT) { 745 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 746 if (ret) { 747 io_kbuf_recycle(req, issue_flags); 748 return ret; 749 } 750 } 751 752 kmsg->fast_iov[0].iov_base = buf; 753 kmsg->fast_iov[0].iov_len = len; 754 iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1, 755 len); 756 } 757 758 flags = sr->msg_flags; 759 if (force_nonblock) 760 flags |= MSG_DONTWAIT; 761 if (flags & MSG_WAITALL) 762 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 763 764 kmsg->msg.msg_get_inq = 1; 765 if (req->flags & REQ_F_APOLL_MULTISHOT) 766 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 767 &mshot_finished); 768 else 769 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 770 kmsg->uaddr, flags); 771 772 if (ret < min_ret) { 773 if (ret == -EAGAIN && force_nonblock) { 774 ret = io_setup_async_msg(req, kmsg, issue_flags); 775 if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) { 776 io_kbuf_recycle(req, issue_flags); 777 return IOU_ISSUE_SKIP_COMPLETE; 778 } 779 return ret; 780 } 781 if (ret > 0 && io_net_retry(sock, flags)) { 782 sr->done_io += ret; 783 req->flags |= REQ_F_PARTIAL_IO; 784 return io_setup_async_msg(req, kmsg, issue_flags); 785 } 786 if (ret == -ERESTARTSYS) 787 ret = -EINTR; 788 req_set_fail(req); 789 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 790 req_set_fail(req); 791 } 792 793 if (ret > 0) 794 ret += sr->done_io; 795 else if (sr->done_io) 796 ret = sr->done_io; 797 else 798 io_kbuf_recycle(req, issue_flags); 799 800 cflags = io_put_kbuf(req, issue_flags); 801 if (kmsg->msg.msg_inq) 802 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 803 804 if (!io_recv_finish(req, &ret, cflags, mshot_finished, issue_flags)) 805 goto retry_multishot; 806 807 if (mshot_finished) { 808 io_netmsg_recycle(req, issue_flags); 809 /* fast path, check for non-NULL to avoid function call */ 810 if (kmsg->free_iov) 811 kfree(kmsg->free_iov); 812 req->flags &= ~REQ_F_NEED_CLEANUP; 813 } 814 815 return ret; 816 } 817 818 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 819 { 820 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 821 struct msghdr msg; 822 struct socket *sock; 823 struct iovec iov; 824 unsigned int cflags; 825 unsigned flags; 826 int ret, min_ret = 0; 827 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 828 size_t len = sr->len; 829 830 if (!(req->flags & REQ_F_POLLED) && 831 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 832 return -EAGAIN; 833 834 sock = sock_from_file(req->file); 835 if (unlikely(!sock)) 836 return -ENOTSOCK; 837 838 retry_multishot: 839 if (io_do_buffer_select(req)) { 840 void __user *buf; 841 842 buf = io_buffer_select(req, &len, issue_flags); 843 if (!buf) 844 return -ENOBUFS; 845 sr->buf = buf; 846 } 847 848 ret = import_single_range(READ, sr->buf, len, &iov, &msg.msg_iter); 849 if (unlikely(ret)) 850 goto out_free; 851 852 msg.msg_name = NULL; 853 msg.msg_namelen = 0; 854 msg.msg_control = NULL; 855 msg.msg_get_inq = 1; 856 msg.msg_flags = 0; 857 msg.msg_controllen = 0; 858 msg.msg_iocb = NULL; 859 msg.msg_ubuf = NULL; 860 861 flags = sr->msg_flags; 862 if (force_nonblock) 863 flags |= MSG_DONTWAIT; 864 if (flags & MSG_WAITALL) 865 min_ret = iov_iter_count(&msg.msg_iter); 866 867 ret = sock_recvmsg(sock, &msg, flags); 868 if (ret < min_ret) { 869 if (ret == -EAGAIN && force_nonblock) { 870 if (issue_flags & IO_URING_F_MULTISHOT) { 871 io_kbuf_recycle(req, issue_flags); 872 return IOU_ISSUE_SKIP_COMPLETE; 873 } 874 875 return -EAGAIN; 876 } 877 if (ret > 0 && io_net_retry(sock, flags)) { 878 sr->len -= ret; 879 sr->buf += ret; 880 sr->done_io += ret; 881 req->flags |= REQ_F_PARTIAL_IO; 882 return -EAGAIN; 883 } 884 if (ret == -ERESTARTSYS) 885 ret = -EINTR; 886 req_set_fail(req); 887 } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 888 out_free: 889 req_set_fail(req); 890 } 891 892 if (ret > 0) 893 ret += sr->done_io; 894 else if (sr->done_io) 895 ret = sr->done_io; 896 else 897 io_kbuf_recycle(req, issue_flags); 898 899 cflags = io_put_kbuf(req, issue_flags); 900 if (msg.msg_inq) 901 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 902 903 if (!io_recv_finish(req, &ret, cflags, ret <= 0, issue_flags)) 904 goto retry_multishot; 905 906 return ret; 907 } 908 909 void io_send_zc_cleanup(struct io_kiocb *req) 910 { 911 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 912 struct io_async_msghdr *io; 913 914 if (req_has_async_data(req)) { 915 io = req->async_data; 916 /* might be ->fast_iov if *msg_copy_hdr failed */ 917 if (io->free_iov != io->fast_iov) 918 kfree(io->free_iov); 919 } 920 if (zc->notif) { 921 io_notif_flush(zc->notif); 922 zc->notif = NULL; 923 } 924 } 925 926 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 927 { 928 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 929 struct io_ring_ctx *ctx = req->ctx; 930 struct io_kiocb *notif; 931 932 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 933 return -EINVAL; 934 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 935 if (req->flags & REQ_F_CQE_SKIP) 936 return -EINVAL; 937 938 zc->flags = READ_ONCE(sqe->ioprio); 939 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | 940 IORING_RECVSEND_FIXED_BUF)) 941 return -EINVAL; 942 notif = zc->notif = io_alloc_notif(ctx); 943 if (!notif) 944 return -ENOMEM; 945 notif->cqe.user_data = req->cqe.user_data; 946 notif->cqe.res = 0; 947 notif->cqe.flags = IORING_CQE_F_NOTIF; 948 req->flags |= REQ_F_NEED_CLEANUP; 949 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 950 unsigned idx = READ_ONCE(sqe->buf_index); 951 952 if (unlikely(idx >= ctx->nr_user_bufs)) 953 return -EFAULT; 954 idx = array_index_nospec(idx, ctx->nr_user_bufs); 955 req->imu = READ_ONCE(ctx->user_bufs[idx]); 956 io_req_set_rsrc_node(notif, ctx, 0); 957 } 958 959 if (req->opcode == IORING_OP_SEND_ZC) { 960 if (READ_ONCE(sqe->__pad3[0])) 961 return -EINVAL; 962 zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 963 zc->addr_len = READ_ONCE(sqe->addr_len); 964 } else { 965 if (unlikely(sqe->addr2 || sqe->file_index)) 966 return -EINVAL; 967 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 968 return -EINVAL; 969 } 970 971 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 972 zc->len = READ_ONCE(sqe->len); 973 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 974 if (zc->msg_flags & MSG_DONTWAIT) 975 req->flags |= REQ_F_NOWAIT; 976 977 zc->done_io = 0; 978 979 #ifdef CONFIG_COMPAT 980 if (req->ctx->compat) 981 zc->msg_flags |= MSG_CMSG_COMPAT; 982 #endif 983 return 0; 984 } 985 986 static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb, 987 struct iov_iter *from, size_t length) 988 { 989 skb_zcopy_downgrade_managed(skb); 990 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 991 } 992 993 static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, 994 struct iov_iter *from, size_t length) 995 { 996 struct skb_shared_info *shinfo = skb_shinfo(skb); 997 int frag = shinfo->nr_frags; 998 int ret = 0; 999 struct bvec_iter bi; 1000 ssize_t copied = 0; 1001 unsigned long truesize = 0; 1002 1003 if (!frag) 1004 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1005 else if (unlikely(!skb_zcopy_managed(skb))) 1006 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 1007 1008 bi.bi_size = min(from->count, length); 1009 bi.bi_bvec_done = from->iov_offset; 1010 bi.bi_idx = 0; 1011 1012 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1013 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1014 1015 copied += v.bv_len; 1016 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1017 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1018 v.bv_offset, v.bv_len); 1019 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1020 } 1021 if (bi.bi_size) 1022 ret = -EMSGSIZE; 1023 1024 shinfo->nr_frags = frag; 1025 from->bvec += bi.bi_idx; 1026 from->nr_segs -= bi.bi_idx; 1027 from->count -= copied; 1028 from->iov_offset = bi.bi_bvec_done; 1029 1030 skb->data_len += copied; 1031 skb->len += copied; 1032 skb->truesize += truesize; 1033 1034 if (sk && sk->sk_type == SOCK_STREAM) { 1035 sk_wmem_queued_add(sk, truesize); 1036 if (!skb_zcopy_pure(skb)) 1037 sk_mem_charge(sk, truesize); 1038 } else { 1039 refcount_add(truesize, &skb->sk->sk_wmem_alloc); 1040 } 1041 return ret; 1042 } 1043 1044 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1045 { 1046 struct sockaddr_storage __address; 1047 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1048 struct msghdr msg; 1049 struct iovec iov; 1050 struct socket *sock; 1051 unsigned msg_flags; 1052 int ret, min_ret = 0; 1053 1054 sock = sock_from_file(req->file); 1055 if (unlikely(!sock)) 1056 return -ENOTSOCK; 1057 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1058 return -EOPNOTSUPP; 1059 1060 msg.msg_name = NULL; 1061 msg.msg_control = NULL; 1062 msg.msg_controllen = 0; 1063 msg.msg_namelen = 0; 1064 1065 if (zc->addr) { 1066 if (req_has_async_data(req)) { 1067 struct io_async_msghdr *io = req->async_data; 1068 1069 msg.msg_name = &io->addr; 1070 } else { 1071 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address); 1072 if (unlikely(ret < 0)) 1073 return ret; 1074 msg.msg_name = (struct sockaddr *)&__address; 1075 } 1076 msg.msg_namelen = zc->addr_len; 1077 } 1078 1079 if (!(req->flags & REQ_F_POLLED) && 1080 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1081 return io_setup_async_addr(req, &__address, issue_flags); 1082 1083 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 1084 ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, 1085 (u64)(uintptr_t)zc->buf, zc->len); 1086 if (unlikely(ret)) 1087 return ret; 1088 msg.sg_from_iter = io_sg_from_iter; 1089 } else { 1090 ret = import_single_range(WRITE, zc->buf, zc->len, &iov, 1091 &msg.msg_iter); 1092 if (unlikely(ret)) 1093 return ret; 1094 ret = io_notif_account_mem(zc->notif, zc->len); 1095 if (unlikely(ret)) 1096 return ret; 1097 msg.sg_from_iter = io_sg_from_iter_iovec; 1098 } 1099 1100 msg_flags = zc->msg_flags | MSG_ZEROCOPY; 1101 if (issue_flags & IO_URING_F_NONBLOCK) 1102 msg_flags |= MSG_DONTWAIT; 1103 if (msg_flags & MSG_WAITALL) 1104 min_ret = iov_iter_count(&msg.msg_iter); 1105 1106 msg.msg_flags = msg_flags; 1107 msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1108 ret = sock_sendmsg(sock, &msg); 1109 1110 if (unlikely(ret < min_ret)) { 1111 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1112 return io_setup_async_addr(req, &__address, issue_flags); 1113 1114 if (ret > 0 && io_net_retry(sock, msg.msg_flags)) { 1115 zc->len -= ret; 1116 zc->buf += ret; 1117 zc->done_io += ret; 1118 req->flags |= REQ_F_PARTIAL_IO; 1119 return io_setup_async_addr(req, &__address, issue_flags); 1120 } 1121 if (ret == -ERESTARTSYS) 1122 ret = -EINTR; 1123 req_set_fail(req); 1124 } 1125 1126 if (ret >= 0) 1127 ret += zc->done_io; 1128 else if (zc->done_io) 1129 ret = zc->done_io; 1130 1131 /* 1132 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1133 * flushing notif to io_send_zc_cleanup() 1134 */ 1135 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1136 io_notif_flush(zc->notif); 1137 req->flags &= ~REQ_F_NEED_CLEANUP; 1138 } 1139 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1140 return IOU_OK; 1141 } 1142 1143 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1144 { 1145 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1146 struct io_async_msghdr iomsg, *kmsg; 1147 struct socket *sock; 1148 unsigned flags; 1149 int ret, min_ret = 0; 1150 1151 sock = sock_from_file(req->file); 1152 if (unlikely(!sock)) 1153 return -ENOTSOCK; 1154 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1155 return -EOPNOTSUPP; 1156 1157 if (req_has_async_data(req)) { 1158 kmsg = req->async_data; 1159 } else { 1160 ret = io_sendmsg_copy_hdr(req, &iomsg); 1161 if (ret) 1162 return ret; 1163 kmsg = &iomsg; 1164 } 1165 1166 if (!(req->flags & REQ_F_POLLED) && 1167 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1168 return io_setup_async_msg(req, kmsg, issue_flags); 1169 1170 flags = sr->msg_flags | MSG_ZEROCOPY; 1171 if (issue_flags & IO_URING_F_NONBLOCK) 1172 flags |= MSG_DONTWAIT; 1173 if (flags & MSG_WAITALL) 1174 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1175 1176 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1177 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1178 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1179 1180 if (unlikely(ret < min_ret)) { 1181 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1182 return io_setup_async_msg(req, kmsg, issue_flags); 1183 1184 if (ret > 0 && io_net_retry(sock, flags)) { 1185 sr->done_io += ret; 1186 req->flags |= REQ_F_PARTIAL_IO; 1187 return io_setup_async_msg(req, kmsg, issue_flags); 1188 } 1189 if (ret == -ERESTARTSYS) 1190 ret = -EINTR; 1191 req_set_fail(req); 1192 } 1193 /* fast path, check for non-NULL to avoid function call */ 1194 if (kmsg->free_iov) { 1195 kfree(kmsg->free_iov); 1196 kmsg->free_iov = NULL; 1197 } 1198 1199 io_netmsg_recycle(req, issue_flags); 1200 if (ret >= 0) 1201 ret += sr->done_io; 1202 else if (sr->done_io) 1203 ret = sr->done_io; 1204 1205 /* 1206 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1207 * flushing notif to io_send_zc_cleanup() 1208 */ 1209 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1210 io_notif_flush(sr->notif); 1211 req->flags &= ~REQ_F_NEED_CLEANUP; 1212 } 1213 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1214 return IOU_OK; 1215 } 1216 1217 void io_sendrecv_fail(struct io_kiocb *req) 1218 { 1219 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1220 1221 if (req->flags & REQ_F_PARTIAL_IO) 1222 req->cqe.res = sr->done_io; 1223 1224 if ((req->flags & REQ_F_NEED_CLEANUP) && 1225 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1226 req->cqe.flags |= IORING_CQE_F_MORE; 1227 } 1228 1229 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1230 { 1231 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1232 unsigned flags; 1233 1234 if (sqe->len || sqe->buf_index) 1235 return -EINVAL; 1236 1237 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1238 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1239 accept->flags = READ_ONCE(sqe->accept_flags); 1240 accept->nofile = rlimit(RLIMIT_NOFILE); 1241 flags = READ_ONCE(sqe->ioprio); 1242 if (flags & ~IORING_ACCEPT_MULTISHOT) 1243 return -EINVAL; 1244 1245 accept->file_slot = READ_ONCE(sqe->file_index); 1246 if (accept->file_slot) { 1247 if (accept->flags & SOCK_CLOEXEC) 1248 return -EINVAL; 1249 if (flags & IORING_ACCEPT_MULTISHOT && 1250 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1251 return -EINVAL; 1252 } 1253 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1254 return -EINVAL; 1255 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1256 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1257 if (flags & IORING_ACCEPT_MULTISHOT) 1258 req->flags |= REQ_F_APOLL_MULTISHOT; 1259 return 0; 1260 } 1261 1262 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1263 { 1264 struct io_ring_ctx *ctx = req->ctx; 1265 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1266 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1267 unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; 1268 bool fixed = !!accept->file_slot; 1269 struct file *file; 1270 int ret, fd; 1271 1272 retry: 1273 if (!fixed) { 1274 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1275 if (unlikely(fd < 0)) 1276 return fd; 1277 } 1278 file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, 1279 accept->flags); 1280 if (IS_ERR(file)) { 1281 if (!fixed) 1282 put_unused_fd(fd); 1283 ret = PTR_ERR(file); 1284 if (ret == -EAGAIN && force_nonblock) { 1285 /* 1286 * if it's multishot and polled, we don't need to 1287 * return EAGAIN to arm the poll infra since it 1288 * has already been done 1289 */ 1290 if (issue_flags & IO_URING_F_MULTISHOT) 1291 ret = IOU_ISSUE_SKIP_COMPLETE; 1292 return ret; 1293 } 1294 if (ret == -ERESTARTSYS) 1295 ret = -EINTR; 1296 req_set_fail(req); 1297 } else if (!fixed) { 1298 fd_install(fd, file); 1299 ret = fd; 1300 } else { 1301 ret = io_fixed_fd_install(req, issue_flags, file, 1302 accept->file_slot); 1303 } 1304 1305 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1306 io_req_set_res(req, ret, 0); 1307 return IOU_OK; 1308 } 1309 1310 if (ret >= 0 && 1311 io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false)) 1312 goto retry; 1313 1314 io_req_set_res(req, ret, 0); 1315 return (issue_flags & IO_URING_F_MULTISHOT) ? IOU_STOP_MULTISHOT : IOU_OK; 1316 } 1317 1318 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1319 { 1320 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1321 1322 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1323 return -EINVAL; 1324 1325 sock->domain = READ_ONCE(sqe->fd); 1326 sock->type = READ_ONCE(sqe->off); 1327 sock->protocol = READ_ONCE(sqe->len); 1328 sock->file_slot = READ_ONCE(sqe->file_index); 1329 sock->nofile = rlimit(RLIMIT_NOFILE); 1330 1331 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1332 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1333 return -EINVAL; 1334 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1335 return -EINVAL; 1336 return 0; 1337 } 1338 1339 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1340 { 1341 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1342 bool fixed = !!sock->file_slot; 1343 struct file *file; 1344 int ret, fd; 1345 1346 if (!fixed) { 1347 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1348 if (unlikely(fd < 0)) 1349 return fd; 1350 } 1351 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1352 if (IS_ERR(file)) { 1353 if (!fixed) 1354 put_unused_fd(fd); 1355 ret = PTR_ERR(file); 1356 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1357 return -EAGAIN; 1358 if (ret == -ERESTARTSYS) 1359 ret = -EINTR; 1360 req_set_fail(req); 1361 } else if (!fixed) { 1362 fd_install(fd, file); 1363 ret = fd; 1364 } else { 1365 ret = io_fixed_fd_install(req, issue_flags, file, 1366 sock->file_slot); 1367 } 1368 io_req_set_res(req, ret, 0); 1369 return IOU_OK; 1370 } 1371 1372 int io_connect_prep_async(struct io_kiocb *req) 1373 { 1374 struct io_async_connect *io = req->async_data; 1375 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1376 1377 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); 1378 } 1379 1380 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1381 { 1382 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1383 1384 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1385 return -EINVAL; 1386 1387 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1388 conn->addr_len = READ_ONCE(sqe->addr2); 1389 conn->in_progress = false; 1390 return 0; 1391 } 1392 1393 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1394 { 1395 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1396 struct io_async_connect __io, *io; 1397 unsigned file_flags; 1398 int ret; 1399 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1400 1401 if (connect->in_progress) { 1402 struct socket *socket; 1403 1404 ret = -ENOTSOCK; 1405 socket = sock_from_file(req->file); 1406 if (socket) 1407 ret = sock_error(socket->sk); 1408 goto out; 1409 } 1410 1411 if (req_has_async_data(req)) { 1412 io = req->async_data; 1413 } else { 1414 ret = move_addr_to_kernel(connect->addr, 1415 connect->addr_len, 1416 &__io.address); 1417 if (ret) 1418 goto out; 1419 io = &__io; 1420 } 1421 1422 file_flags = force_nonblock ? O_NONBLOCK : 0; 1423 1424 ret = __sys_connect_file(req->file, &io->address, 1425 connect->addr_len, file_flags); 1426 if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { 1427 if (ret == -EINPROGRESS) { 1428 connect->in_progress = true; 1429 } else { 1430 if (req_has_async_data(req)) 1431 return -EAGAIN; 1432 if (io_alloc_async_data(req)) { 1433 ret = -ENOMEM; 1434 goto out; 1435 } 1436 memcpy(req->async_data, &__io, sizeof(__io)); 1437 } 1438 return -EAGAIN; 1439 } 1440 if (ret == -ERESTARTSYS) 1441 ret = -EINTR; 1442 out: 1443 if (ret < 0) 1444 req_set_fail(req); 1445 io_req_set_res(req, ret, 0); 1446 return IOU_OK; 1447 } 1448 1449 void io_netmsg_cache_free(struct io_cache_entry *entry) 1450 { 1451 kfree(container_of(entry, struct io_async_msghdr, cache)); 1452 } 1453 #endif 1454