1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 20 #if defined(CONFIG_NET) 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 u32 file_slot; 32 unsigned long nofile; 33 }; 34 35 struct io_socket { 36 struct file *file; 37 int domain; 38 int type; 39 int protocol; 40 int flags; 41 u32 file_slot; 42 unsigned long nofile; 43 }; 44 45 struct io_connect { 46 struct file *file; 47 struct sockaddr __user *addr; 48 int addr_len; 49 bool in_progress; 50 }; 51 52 struct io_sr_msg { 53 struct file *file; 54 union { 55 struct compat_msghdr __user *umsg_compat; 56 struct user_msghdr __user *umsg; 57 void __user *buf; 58 }; 59 unsigned len; 60 unsigned done_io; 61 unsigned msg_flags; 62 u16 flags; 63 /* initialised and used only by !msg send variants */ 64 u16 addr_len; 65 void __user *addr; 66 /* used only for send zerocopy */ 67 struct io_kiocb *notif; 68 }; 69 70 static inline bool io_check_multishot(struct io_kiocb *req, 71 unsigned int issue_flags) 72 { 73 /* 74 * When ->locked_cq is set we only allow to post CQEs from the original 75 * task context. Usual request completions will be handled in other 76 * generic paths but multipoll may decide to post extra cqes. 77 */ 78 return !(issue_flags & IO_URING_F_IOWQ) || 79 !(issue_flags & IO_URING_F_MULTISHOT) || 80 !req->ctx->task_complete; 81 } 82 83 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 84 { 85 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 86 87 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 88 sqe->buf_index || sqe->splice_fd_in)) 89 return -EINVAL; 90 91 shutdown->how = READ_ONCE(sqe->len); 92 return 0; 93 } 94 95 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 96 { 97 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 98 struct socket *sock; 99 int ret; 100 101 if (issue_flags & IO_URING_F_NONBLOCK) 102 return -EAGAIN; 103 104 sock = sock_from_file(req->file); 105 if (unlikely(!sock)) 106 return -ENOTSOCK; 107 108 ret = __sys_shutdown_sock(sock, shutdown->how); 109 io_req_set_res(req, ret, 0); 110 return IOU_OK; 111 } 112 113 static bool io_net_retry(struct socket *sock, int flags) 114 { 115 if (!(flags & MSG_WAITALL)) 116 return false; 117 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 118 } 119 120 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 121 { 122 struct io_async_msghdr *hdr = req->async_data; 123 124 if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED) 125 return; 126 127 /* Let normal cleanup path reap it if we fail adding to the cache */ 128 if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) { 129 req->async_data = NULL; 130 req->flags &= ~REQ_F_ASYNC_DATA; 131 } 132 } 133 134 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req, 135 unsigned int issue_flags) 136 { 137 struct io_ring_ctx *ctx = req->ctx; 138 struct io_cache_entry *entry; 139 struct io_async_msghdr *hdr; 140 141 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 142 entry = io_alloc_cache_get(&ctx->netmsg_cache); 143 if (entry) { 144 hdr = container_of(entry, struct io_async_msghdr, cache); 145 hdr->free_iov = NULL; 146 req->flags |= REQ_F_ASYNC_DATA; 147 req->async_data = hdr; 148 return hdr; 149 } 150 } 151 152 if (!io_alloc_async_data(req)) { 153 hdr = req->async_data; 154 hdr->free_iov = NULL; 155 return hdr; 156 } 157 return NULL; 158 } 159 160 static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req) 161 { 162 /* ->prep_async is always called from the submission context */ 163 return io_msg_alloc_async(req, 0); 164 } 165 166 static int io_setup_async_msg(struct io_kiocb *req, 167 struct io_async_msghdr *kmsg, 168 unsigned int issue_flags) 169 { 170 struct io_async_msghdr *async_msg; 171 172 if (req_has_async_data(req)) 173 return -EAGAIN; 174 async_msg = io_msg_alloc_async(req, issue_flags); 175 if (!async_msg) { 176 kfree(kmsg->free_iov); 177 return -ENOMEM; 178 } 179 req->flags |= REQ_F_NEED_CLEANUP; 180 memcpy(async_msg, kmsg, sizeof(*kmsg)); 181 if (async_msg->msg.msg_name) 182 async_msg->msg.msg_name = &async_msg->addr; 183 /* if were using fast_iov, set it to the new one */ 184 if (!kmsg->free_iov) { 185 size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov; 186 async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx]; 187 } 188 189 return -EAGAIN; 190 } 191 192 static int io_sendmsg_copy_hdr(struct io_kiocb *req, 193 struct io_async_msghdr *iomsg) 194 { 195 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 196 197 iomsg->msg.msg_name = &iomsg->addr; 198 iomsg->free_iov = iomsg->fast_iov; 199 return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, 200 &iomsg->free_iov); 201 } 202 203 int io_send_prep_async(struct io_kiocb *req) 204 { 205 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 206 struct io_async_msghdr *io; 207 int ret; 208 209 if (!zc->addr || req_has_async_data(req)) 210 return 0; 211 io = io_msg_alloc_async_prep(req); 212 if (!io) 213 return -ENOMEM; 214 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr); 215 return ret; 216 } 217 218 static int io_setup_async_addr(struct io_kiocb *req, 219 struct sockaddr_storage *addr_storage, 220 unsigned int issue_flags) 221 { 222 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 223 struct io_async_msghdr *io; 224 225 if (!sr->addr || req_has_async_data(req)) 226 return -EAGAIN; 227 io = io_msg_alloc_async(req, issue_flags); 228 if (!io) 229 return -ENOMEM; 230 memcpy(&io->addr, addr_storage, sizeof(io->addr)); 231 return -EAGAIN; 232 } 233 234 int io_sendmsg_prep_async(struct io_kiocb *req) 235 { 236 int ret; 237 238 if (!io_msg_alloc_async_prep(req)) 239 return -ENOMEM; 240 ret = io_sendmsg_copy_hdr(req, req->async_data); 241 if (!ret) 242 req->flags |= REQ_F_NEED_CLEANUP; 243 return ret; 244 } 245 246 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 247 { 248 struct io_async_msghdr *io = req->async_data; 249 250 kfree(io->free_iov); 251 } 252 253 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 254 { 255 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 256 257 if (req->opcode == IORING_OP_SEND) { 258 if (READ_ONCE(sqe->__pad3[0])) 259 return -EINVAL; 260 sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 261 sr->addr_len = READ_ONCE(sqe->addr_len); 262 } else if (sqe->addr2 || sqe->file_index) { 263 return -EINVAL; 264 } 265 266 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 267 sr->len = READ_ONCE(sqe->len); 268 sr->flags = READ_ONCE(sqe->ioprio); 269 if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) 270 return -EINVAL; 271 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 272 if (sr->msg_flags & MSG_DONTWAIT) 273 req->flags |= REQ_F_NOWAIT; 274 275 #ifdef CONFIG_COMPAT 276 if (req->ctx->compat) 277 sr->msg_flags |= MSG_CMSG_COMPAT; 278 #endif 279 sr->done_io = 0; 280 return 0; 281 } 282 283 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 284 { 285 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 286 struct io_async_msghdr iomsg, *kmsg; 287 struct socket *sock; 288 unsigned flags; 289 int min_ret = 0; 290 int ret; 291 292 sock = sock_from_file(req->file); 293 if (unlikely(!sock)) 294 return -ENOTSOCK; 295 296 if (req_has_async_data(req)) { 297 kmsg = req->async_data; 298 } else { 299 ret = io_sendmsg_copy_hdr(req, &iomsg); 300 if (ret) 301 return ret; 302 kmsg = &iomsg; 303 } 304 305 if (!(req->flags & REQ_F_POLLED) && 306 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 307 return io_setup_async_msg(req, kmsg, issue_flags); 308 309 flags = sr->msg_flags; 310 if (issue_flags & IO_URING_F_NONBLOCK) 311 flags |= MSG_DONTWAIT; 312 if (flags & MSG_WAITALL) 313 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 314 315 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 316 317 if (ret < min_ret) { 318 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 319 return io_setup_async_msg(req, kmsg, issue_flags); 320 if (ret > 0 && io_net_retry(sock, flags)) { 321 sr->done_io += ret; 322 req->flags |= REQ_F_PARTIAL_IO; 323 return io_setup_async_msg(req, kmsg, issue_flags); 324 } 325 if (ret == -ERESTARTSYS) 326 ret = -EINTR; 327 req_set_fail(req); 328 } 329 /* fast path, check for non-NULL to avoid function call */ 330 if (kmsg->free_iov) 331 kfree(kmsg->free_iov); 332 req->flags &= ~REQ_F_NEED_CLEANUP; 333 io_netmsg_recycle(req, issue_flags); 334 if (ret >= 0) 335 ret += sr->done_io; 336 else if (sr->done_io) 337 ret = sr->done_io; 338 io_req_set_res(req, ret, 0); 339 return IOU_OK; 340 } 341 342 int io_send(struct io_kiocb *req, unsigned int issue_flags) 343 { 344 struct sockaddr_storage __address; 345 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 346 struct msghdr msg; 347 struct iovec iov; 348 struct socket *sock; 349 unsigned flags; 350 int min_ret = 0; 351 int ret; 352 353 msg.msg_name = NULL; 354 msg.msg_control = NULL; 355 msg.msg_controllen = 0; 356 msg.msg_namelen = 0; 357 msg.msg_ubuf = NULL; 358 359 if (sr->addr) { 360 if (req_has_async_data(req)) { 361 struct io_async_msghdr *io = req->async_data; 362 363 msg.msg_name = &io->addr; 364 } else { 365 ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address); 366 if (unlikely(ret < 0)) 367 return ret; 368 msg.msg_name = (struct sockaddr *)&__address; 369 } 370 msg.msg_namelen = sr->addr_len; 371 } 372 373 if (!(req->flags & REQ_F_POLLED) && 374 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 375 return io_setup_async_addr(req, &__address, issue_flags); 376 377 sock = sock_from_file(req->file); 378 if (unlikely(!sock)) 379 return -ENOTSOCK; 380 381 ret = import_single_range(ITER_SOURCE, sr->buf, sr->len, &iov, &msg.msg_iter); 382 if (unlikely(ret)) 383 return ret; 384 385 flags = sr->msg_flags; 386 if (issue_flags & IO_URING_F_NONBLOCK) 387 flags |= MSG_DONTWAIT; 388 if (flags & MSG_WAITALL) 389 min_ret = iov_iter_count(&msg.msg_iter); 390 391 msg.msg_flags = flags; 392 ret = sock_sendmsg(sock, &msg); 393 if (ret < min_ret) { 394 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 395 return io_setup_async_addr(req, &__address, issue_flags); 396 397 if (ret > 0 && io_net_retry(sock, flags)) { 398 sr->len -= ret; 399 sr->buf += ret; 400 sr->done_io += ret; 401 req->flags |= REQ_F_PARTIAL_IO; 402 return io_setup_async_addr(req, &__address, issue_flags); 403 } 404 if (ret == -ERESTARTSYS) 405 ret = -EINTR; 406 req_set_fail(req); 407 } 408 if (ret >= 0) 409 ret += sr->done_io; 410 else if (sr->done_io) 411 ret = sr->done_io; 412 io_req_set_res(req, ret, 0); 413 return IOU_OK; 414 } 415 416 static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) 417 { 418 int hdr; 419 420 if (iomsg->namelen < 0) 421 return true; 422 if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), 423 iomsg->namelen, &hdr)) 424 return true; 425 if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) 426 return true; 427 428 return false; 429 } 430 431 static int __io_recvmsg_copy_hdr(struct io_kiocb *req, 432 struct io_async_msghdr *iomsg) 433 { 434 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 435 struct user_msghdr msg; 436 int ret; 437 438 if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) 439 return -EFAULT; 440 441 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 442 if (ret) 443 return ret; 444 445 if (req->flags & REQ_F_BUFFER_SELECT) { 446 if (msg.msg_iovlen == 0) { 447 sr->len = iomsg->fast_iov[0].iov_len = 0; 448 iomsg->fast_iov[0].iov_base = NULL; 449 iomsg->free_iov = NULL; 450 } else if (msg.msg_iovlen > 1) { 451 return -EINVAL; 452 } else { 453 if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov))) 454 return -EFAULT; 455 sr->len = iomsg->fast_iov[0].iov_len; 456 iomsg->free_iov = NULL; 457 } 458 459 if (req->flags & REQ_F_APOLL_MULTISHOT) { 460 iomsg->namelen = msg.msg_namelen; 461 iomsg->controllen = msg.msg_controllen; 462 if (io_recvmsg_multishot_overflow(iomsg)) 463 return -EOVERFLOW; 464 } 465 } else { 466 iomsg->free_iov = iomsg->fast_iov; 467 ret = __import_iovec(ITER_DEST, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, 468 &iomsg->free_iov, &iomsg->msg.msg_iter, 469 false); 470 if (ret > 0) 471 ret = 0; 472 } 473 474 return ret; 475 } 476 477 #ifdef CONFIG_COMPAT 478 static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, 479 struct io_async_msghdr *iomsg) 480 { 481 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 482 struct compat_msghdr msg; 483 struct compat_iovec __user *uiov; 484 int ret; 485 486 if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) 487 return -EFAULT; 488 489 ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 490 if (ret) 491 return ret; 492 493 uiov = compat_ptr(msg.msg_iov); 494 if (req->flags & REQ_F_BUFFER_SELECT) { 495 compat_ssize_t clen; 496 497 if (msg.msg_iovlen == 0) { 498 sr->len = 0; 499 } else if (msg.msg_iovlen > 1) { 500 return -EINVAL; 501 } else { 502 if (!access_ok(uiov, sizeof(*uiov))) 503 return -EFAULT; 504 if (__get_user(clen, &uiov->iov_len)) 505 return -EFAULT; 506 if (clen < 0) 507 return -EINVAL; 508 sr->len = clen; 509 } 510 511 if (req->flags & REQ_F_APOLL_MULTISHOT) { 512 iomsg->namelen = msg.msg_namelen; 513 iomsg->controllen = msg.msg_controllen; 514 if (io_recvmsg_multishot_overflow(iomsg)) 515 return -EOVERFLOW; 516 } 517 } else { 518 iomsg->free_iov = iomsg->fast_iov; 519 ret = __import_iovec(ITER_DEST, (struct iovec __user *)uiov, msg.msg_iovlen, 520 UIO_FASTIOV, &iomsg->free_iov, 521 &iomsg->msg.msg_iter, true); 522 if (ret < 0) 523 return ret; 524 } 525 526 return 0; 527 } 528 #endif 529 530 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 531 struct io_async_msghdr *iomsg) 532 { 533 iomsg->msg.msg_name = &iomsg->addr; 534 535 #ifdef CONFIG_COMPAT 536 if (req->ctx->compat) 537 return __io_compat_recvmsg_copy_hdr(req, iomsg); 538 #endif 539 540 return __io_recvmsg_copy_hdr(req, iomsg); 541 } 542 543 int io_recvmsg_prep_async(struct io_kiocb *req) 544 { 545 int ret; 546 547 if (!io_msg_alloc_async_prep(req)) 548 return -ENOMEM; 549 ret = io_recvmsg_copy_hdr(req, req->async_data); 550 if (!ret) 551 req->flags |= REQ_F_NEED_CLEANUP; 552 return ret; 553 } 554 555 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT) 556 557 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 558 { 559 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 560 561 if (unlikely(sqe->file_index || sqe->addr2)) 562 return -EINVAL; 563 564 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 565 sr->len = READ_ONCE(sqe->len); 566 sr->flags = READ_ONCE(sqe->ioprio); 567 if (sr->flags & ~(RECVMSG_FLAGS)) 568 return -EINVAL; 569 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 570 if (sr->msg_flags & MSG_DONTWAIT) 571 req->flags |= REQ_F_NOWAIT; 572 if (sr->msg_flags & MSG_ERRQUEUE) 573 req->flags |= REQ_F_CLEAR_POLLIN; 574 if (sr->flags & IORING_RECV_MULTISHOT) { 575 if (!(req->flags & REQ_F_BUFFER_SELECT)) 576 return -EINVAL; 577 if (sr->msg_flags & MSG_WAITALL) 578 return -EINVAL; 579 if (req->opcode == IORING_OP_RECV && sr->len) 580 return -EINVAL; 581 req->flags |= REQ_F_APOLL_MULTISHOT; 582 } 583 584 #ifdef CONFIG_COMPAT 585 if (req->ctx->compat) 586 sr->msg_flags |= MSG_CMSG_COMPAT; 587 #endif 588 sr->done_io = 0; 589 return 0; 590 } 591 592 static inline void io_recv_prep_retry(struct io_kiocb *req) 593 { 594 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 595 596 sr->done_io = 0; 597 sr->len = 0; /* get from the provided buffer */ 598 } 599 600 /* 601 * Finishes io_recv and io_recvmsg. 602 * 603 * Returns true if it is actually finished, or false if it should run 604 * again (for multishot). 605 */ 606 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 607 unsigned int cflags, bool mshot_finished, 608 unsigned issue_flags) 609 { 610 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 611 io_req_set_res(req, *ret, cflags); 612 *ret = IOU_OK; 613 return true; 614 } 615 616 if (!mshot_finished) { 617 if (io_aux_cqe(req->ctx, issue_flags & IO_URING_F_COMPLETE_DEFER, 618 req->cqe.user_data, *ret, cflags | IORING_CQE_F_MORE, true)) { 619 io_recv_prep_retry(req); 620 return false; 621 } 622 /* Otherwise stop multishot but use the current result. */ 623 } 624 625 io_req_set_res(req, *ret, cflags); 626 627 if (issue_flags & IO_URING_F_MULTISHOT) 628 *ret = IOU_STOP_MULTISHOT; 629 else 630 *ret = IOU_OK; 631 return true; 632 } 633 634 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 635 struct io_sr_msg *sr, void __user **buf, 636 size_t *len) 637 { 638 unsigned long ubuf = (unsigned long) *buf; 639 unsigned long hdr; 640 641 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 642 kmsg->controllen; 643 if (*len < hdr) 644 return -EFAULT; 645 646 if (kmsg->controllen) { 647 unsigned long control = ubuf + hdr - kmsg->controllen; 648 649 kmsg->msg.msg_control_user = (void __user *) control; 650 kmsg->msg.msg_controllen = kmsg->controllen; 651 } 652 653 sr->buf = *buf; /* stash for later copy */ 654 *buf = (void __user *) (ubuf + hdr); 655 kmsg->payloadlen = *len = *len - hdr; 656 return 0; 657 } 658 659 struct io_recvmsg_multishot_hdr { 660 struct io_uring_recvmsg_out msg; 661 struct sockaddr_storage addr; 662 }; 663 664 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 665 struct io_async_msghdr *kmsg, 666 unsigned int flags, bool *finished) 667 { 668 int err; 669 int copy_len; 670 struct io_recvmsg_multishot_hdr hdr; 671 672 if (kmsg->namelen) 673 kmsg->msg.msg_name = &hdr.addr; 674 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 675 kmsg->msg.msg_namelen = 0; 676 677 if (sock->file->f_flags & O_NONBLOCK) 678 flags |= MSG_DONTWAIT; 679 680 err = sock_recvmsg(sock, &kmsg->msg, flags); 681 *finished = err <= 0; 682 if (err < 0) 683 return err; 684 685 hdr.msg = (struct io_uring_recvmsg_out) { 686 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 687 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 688 }; 689 690 hdr.msg.payloadlen = err; 691 if (err > kmsg->payloadlen) 692 err = kmsg->payloadlen; 693 694 copy_len = sizeof(struct io_uring_recvmsg_out); 695 if (kmsg->msg.msg_namelen > kmsg->namelen) 696 copy_len += kmsg->namelen; 697 else 698 copy_len += kmsg->msg.msg_namelen; 699 700 /* 701 * "fromlen shall refer to the value before truncation.." 702 * 1003.1g 703 */ 704 hdr.msg.namelen = kmsg->msg.msg_namelen; 705 706 /* ensure that there is no gap between hdr and sockaddr_storage */ 707 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 708 sizeof(struct io_uring_recvmsg_out)); 709 if (copy_to_user(io->buf, &hdr, copy_len)) { 710 *finished = true; 711 return -EFAULT; 712 } 713 714 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 715 kmsg->controllen + err; 716 } 717 718 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 719 { 720 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 721 struct io_async_msghdr iomsg, *kmsg; 722 struct socket *sock; 723 unsigned int cflags; 724 unsigned flags; 725 int ret, min_ret = 0; 726 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 727 bool mshot_finished = true; 728 729 sock = sock_from_file(req->file); 730 if (unlikely(!sock)) 731 return -ENOTSOCK; 732 733 if (req_has_async_data(req)) { 734 kmsg = req->async_data; 735 } else { 736 ret = io_recvmsg_copy_hdr(req, &iomsg); 737 if (ret) 738 return ret; 739 kmsg = &iomsg; 740 } 741 742 if (!(req->flags & REQ_F_POLLED) && 743 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 744 return io_setup_async_msg(req, kmsg, issue_flags); 745 746 if (!io_check_multishot(req, issue_flags)) 747 return io_setup_async_msg(req, kmsg, issue_flags); 748 749 retry_multishot: 750 if (io_do_buffer_select(req)) { 751 void __user *buf; 752 size_t len = sr->len; 753 754 buf = io_buffer_select(req, &len, issue_flags); 755 if (!buf) 756 return -ENOBUFS; 757 758 if (req->flags & REQ_F_APOLL_MULTISHOT) { 759 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 760 if (ret) { 761 io_kbuf_recycle(req, issue_flags); 762 return ret; 763 } 764 } 765 766 kmsg->fast_iov[0].iov_base = buf; 767 kmsg->fast_iov[0].iov_len = len; 768 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, kmsg->fast_iov, 1, 769 len); 770 } 771 772 flags = sr->msg_flags; 773 if (force_nonblock) 774 flags |= MSG_DONTWAIT; 775 if (flags & MSG_WAITALL) 776 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 777 778 kmsg->msg.msg_get_inq = 1; 779 if (req->flags & REQ_F_APOLL_MULTISHOT) 780 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 781 &mshot_finished); 782 else 783 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 784 kmsg->uaddr, flags); 785 786 if (ret < min_ret) { 787 if (ret == -EAGAIN && force_nonblock) { 788 ret = io_setup_async_msg(req, kmsg, issue_flags); 789 if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) { 790 io_kbuf_recycle(req, issue_flags); 791 return IOU_ISSUE_SKIP_COMPLETE; 792 } 793 return ret; 794 } 795 if (ret > 0 && io_net_retry(sock, flags)) { 796 sr->done_io += ret; 797 req->flags |= REQ_F_PARTIAL_IO; 798 return io_setup_async_msg(req, kmsg, issue_flags); 799 } 800 if (ret == -ERESTARTSYS) 801 ret = -EINTR; 802 req_set_fail(req); 803 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 804 req_set_fail(req); 805 } 806 807 if (ret > 0) 808 ret += sr->done_io; 809 else if (sr->done_io) 810 ret = sr->done_io; 811 else 812 io_kbuf_recycle(req, issue_flags); 813 814 cflags = io_put_kbuf(req, issue_flags); 815 if (kmsg->msg.msg_inq) 816 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 817 818 if (!io_recv_finish(req, &ret, cflags, mshot_finished, issue_flags)) 819 goto retry_multishot; 820 821 if (mshot_finished) { 822 io_netmsg_recycle(req, issue_flags); 823 /* fast path, check for non-NULL to avoid function call */ 824 if (kmsg->free_iov) 825 kfree(kmsg->free_iov); 826 req->flags &= ~REQ_F_NEED_CLEANUP; 827 } 828 829 return ret; 830 } 831 832 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 833 { 834 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 835 struct msghdr msg; 836 struct socket *sock; 837 struct iovec iov; 838 unsigned int cflags; 839 unsigned flags; 840 int ret, min_ret = 0; 841 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 842 size_t len = sr->len; 843 844 if (!(req->flags & REQ_F_POLLED) && 845 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 846 return -EAGAIN; 847 848 if (!io_check_multishot(req, issue_flags)) 849 return -EAGAIN; 850 851 sock = sock_from_file(req->file); 852 if (unlikely(!sock)) 853 return -ENOTSOCK; 854 855 retry_multishot: 856 if (io_do_buffer_select(req)) { 857 void __user *buf; 858 859 buf = io_buffer_select(req, &len, issue_flags); 860 if (!buf) 861 return -ENOBUFS; 862 sr->buf = buf; 863 } 864 865 ret = import_single_range(ITER_DEST, sr->buf, len, &iov, &msg.msg_iter); 866 if (unlikely(ret)) 867 goto out_free; 868 869 msg.msg_name = NULL; 870 msg.msg_namelen = 0; 871 msg.msg_control = NULL; 872 msg.msg_get_inq = 1; 873 msg.msg_flags = 0; 874 msg.msg_controllen = 0; 875 msg.msg_iocb = NULL; 876 msg.msg_ubuf = NULL; 877 878 flags = sr->msg_flags; 879 if (force_nonblock) 880 flags |= MSG_DONTWAIT; 881 if (flags & MSG_WAITALL) 882 min_ret = iov_iter_count(&msg.msg_iter); 883 884 ret = sock_recvmsg(sock, &msg, flags); 885 if (ret < min_ret) { 886 if (ret == -EAGAIN && force_nonblock) { 887 if (issue_flags & IO_URING_F_MULTISHOT) { 888 io_kbuf_recycle(req, issue_flags); 889 return IOU_ISSUE_SKIP_COMPLETE; 890 } 891 892 return -EAGAIN; 893 } 894 if (ret > 0 && io_net_retry(sock, flags)) { 895 sr->len -= ret; 896 sr->buf += ret; 897 sr->done_io += ret; 898 req->flags |= REQ_F_PARTIAL_IO; 899 return -EAGAIN; 900 } 901 if (ret == -ERESTARTSYS) 902 ret = -EINTR; 903 req_set_fail(req); 904 } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 905 out_free: 906 req_set_fail(req); 907 } 908 909 if (ret > 0) 910 ret += sr->done_io; 911 else if (sr->done_io) 912 ret = sr->done_io; 913 else 914 io_kbuf_recycle(req, issue_flags); 915 916 cflags = io_put_kbuf(req, issue_flags); 917 if (msg.msg_inq) 918 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 919 920 if (!io_recv_finish(req, &ret, cflags, ret <= 0, issue_flags)) 921 goto retry_multishot; 922 923 return ret; 924 } 925 926 void io_send_zc_cleanup(struct io_kiocb *req) 927 { 928 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 929 struct io_async_msghdr *io; 930 931 if (req_has_async_data(req)) { 932 io = req->async_data; 933 /* might be ->fast_iov if *msg_copy_hdr failed */ 934 if (io->free_iov != io->fast_iov) 935 kfree(io->free_iov); 936 } 937 if (zc->notif) { 938 io_notif_flush(zc->notif); 939 zc->notif = NULL; 940 } 941 } 942 943 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 944 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) 945 946 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 947 { 948 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 949 struct io_ring_ctx *ctx = req->ctx; 950 struct io_kiocb *notif; 951 952 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 953 return -EINVAL; 954 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 955 if (req->flags & REQ_F_CQE_SKIP) 956 return -EINVAL; 957 958 notif = zc->notif = io_alloc_notif(ctx); 959 if (!notif) 960 return -ENOMEM; 961 notif->cqe.user_data = req->cqe.user_data; 962 notif->cqe.res = 0; 963 notif->cqe.flags = IORING_CQE_F_NOTIF; 964 req->flags |= REQ_F_NEED_CLEANUP; 965 966 zc->flags = READ_ONCE(sqe->ioprio); 967 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 968 if (zc->flags & ~IO_ZC_FLAGS_VALID) 969 return -EINVAL; 970 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 971 io_notif_set_extended(notif); 972 io_notif_to_data(notif)->zc_report = true; 973 } 974 } 975 976 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 977 unsigned idx = READ_ONCE(sqe->buf_index); 978 979 if (unlikely(idx >= ctx->nr_user_bufs)) 980 return -EFAULT; 981 idx = array_index_nospec(idx, ctx->nr_user_bufs); 982 req->imu = READ_ONCE(ctx->user_bufs[idx]); 983 io_req_set_rsrc_node(notif, ctx, 0); 984 } 985 986 if (req->opcode == IORING_OP_SEND_ZC) { 987 if (READ_ONCE(sqe->__pad3[0])) 988 return -EINVAL; 989 zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 990 zc->addr_len = READ_ONCE(sqe->addr_len); 991 } else { 992 if (unlikely(sqe->addr2 || sqe->file_index)) 993 return -EINVAL; 994 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 995 return -EINVAL; 996 } 997 998 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 999 zc->len = READ_ONCE(sqe->len); 1000 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 1001 if (zc->msg_flags & MSG_DONTWAIT) 1002 req->flags |= REQ_F_NOWAIT; 1003 1004 zc->done_io = 0; 1005 1006 #ifdef CONFIG_COMPAT 1007 if (req->ctx->compat) 1008 zc->msg_flags |= MSG_CMSG_COMPAT; 1009 #endif 1010 return 0; 1011 } 1012 1013 static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb, 1014 struct iov_iter *from, size_t length) 1015 { 1016 skb_zcopy_downgrade_managed(skb); 1017 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 1018 } 1019 1020 static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, 1021 struct iov_iter *from, size_t length) 1022 { 1023 struct skb_shared_info *shinfo = skb_shinfo(skb); 1024 int frag = shinfo->nr_frags; 1025 int ret = 0; 1026 struct bvec_iter bi; 1027 ssize_t copied = 0; 1028 unsigned long truesize = 0; 1029 1030 if (!frag) 1031 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1032 else if (unlikely(!skb_zcopy_managed(skb))) 1033 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 1034 1035 bi.bi_size = min(from->count, length); 1036 bi.bi_bvec_done = from->iov_offset; 1037 bi.bi_idx = 0; 1038 1039 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1040 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1041 1042 copied += v.bv_len; 1043 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1044 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1045 v.bv_offset, v.bv_len); 1046 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1047 } 1048 if (bi.bi_size) 1049 ret = -EMSGSIZE; 1050 1051 shinfo->nr_frags = frag; 1052 from->bvec += bi.bi_idx; 1053 from->nr_segs -= bi.bi_idx; 1054 from->count -= copied; 1055 from->iov_offset = bi.bi_bvec_done; 1056 1057 skb->data_len += copied; 1058 skb->len += copied; 1059 skb->truesize += truesize; 1060 1061 if (sk && sk->sk_type == SOCK_STREAM) { 1062 sk_wmem_queued_add(sk, truesize); 1063 if (!skb_zcopy_pure(skb)) 1064 sk_mem_charge(sk, truesize); 1065 } else { 1066 refcount_add(truesize, &skb->sk->sk_wmem_alloc); 1067 } 1068 return ret; 1069 } 1070 1071 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1072 { 1073 struct sockaddr_storage __address; 1074 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1075 struct msghdr msg; 1076 struct iovec iov; 1077 struct socket *sock; 1078 unsigned msg_flags; 1079 int ret, min_ret = 0; 1080 1081 sock = sock_from_file(req->file); 1082 if (unlikely(!sock)) 1083 return -ENOTSOCK; 1084 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1085 return -EOPNOTSUPP; 1086 1087 msg.msg_name = NULL; 1088 msg.msg_control = NULL; 1089 msg.msg_controllen = 0; 1090 msg.msg_namelen = 0; 1091 1092 if (zc->addr) { 1093 if (req_has_async_data(req)) { 1094 struct io_async_msghdr *io = req->async_data; 1095 1096 msg.msg_name = &io->addr; 1097 } else { 1098 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address); 1099 if (unlikely(ret < 0)) 1100 return ret; 1101 msg.msg_name = (struct sockaddr *)&__address; 1102 } 1103 msg.msg_namelen = zc->addr_len; 1104 } 1105 1106 if (!(req->flags & REQ_F_POLLED) && 1107 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1108 return io_setup_async_addr(req, &__address, issue_flags); 1109 1110 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 1111 ret = io_import_fixed(ITER_SOURCE, &msg.msg_iter, req->imu, 1112 (u64)(uintptr_t)zc->buf, zc->len); 1113 if (unlikely(ret)) 1114 return ret; 1115 msg.sg_from_iter = io_sg_from_iter; 1116 } else { 1117 io_notif_set_extended(zc->notif); 1118 ret = import_single_range(ITER_SOURCE, zc->buf, zc->len, &iov, 1119 &msg.msg_iter); 1120 if (unlikely(ret)) 1121 return ret; 1122 ret = io_notif_account_mem(zc->notif, zc->len); 1123 if (unlikely(ret)) 1124 return ret; 1125 msg.sg_from_iter = io_sg_from_iter_iovec; 1126 } 1127 1128 msg_flags = zc->msg_flags | MSG_ZEROCOPY; 1129 if (issue_flags & IO_URING_F_NONBLOCK) 1130 msg_flags |= MSG_DONTWAIT; 1131 if (msg_flags & MSG_WAITALL) 1132 min_ret = iov_iter_count(&msg.msg_iter); 1133 1134 msg.msg_flags = msg_flags; 1135 msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1136 ret = sock_sendmsg(sock, &msg); 1137 1138 if (unlikely(ret < min_ret)) { 1139 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1140 return io_setup_async_addr(req, &__address, issue_flags); 1141 1142 if (ret > 0 && io_net_retry(sock, msg.msg_flags)) { 1143 zc->len -= ret; 1144 zc->buf += ret; 1145 zc->done_io += ret; 1146 req->flags |= REQ_F_PARTIAL_IO; 1147 return io_setup_async_addr(req, &__address, issue_flags); 1148 } 1149 if (ret == -ERESTARTSYS) 1150 ret = -EINTR; 1151 req_set_fail(req); 1152 } 1153 1154 if (ret >= 0) 1155 ret += zc->done_io; 1156 else if (zc->done_io) 1157 ret = zc->done_io; 1158 1159 /* 1160 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1161 * flushing notif to io_send_zc_cleanup() 1162 */ 1163 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1164 io_notif_flush(zc->notif); 1165 req->flags &= ~REQ_F_NEED_CLEANUP; 1166 } 1167 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1168 return IOU_OK; 1169 } 1170 1171 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1172 { 1173 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1174 struct io_async_msghdr iomsg, *kmsg; 1175 struct socket *sock; 1176 unsigned flags; 1177 int ret, min_ret = 0; 1178 1179 io_notif_set_extended(sr->notif); 1180 1181 sock = sock_from_file(req->file); 1182 if (unlikely(!sock)) 1183 return -ENOTSOCK; 1184 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1185 return -EOPNOTSUPP; 1186 1187 if (req_has_async_data(req)) { 1188 kmsg = req->async_data; 1189 } else { 1190 ret = io_sendmsg_copy_hdr(req, &iomsg); 1191 if (ret) 1192 return ret; 1193 kmsg = &iomsg; 1194 } 1195 1196 if (!(req->flags & REQ_F_POLLED) && 1197 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1198 return io_setup_async_msg(req, kmsg, issue_flags); 1199 1200 flags = sr->msg_flags | MSG_ZEROCOPY; 1201 if (issue_flags & IO_URING_F_NONBLOCK) 1202 flags |= MSG_DONTWAIT; 1203 if (flags & MSG_WAITALL) 1204 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1205 1206 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1207 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1208 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1209 1210 if (unlikely(ret < min_ret)) { 1211 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1212 return io_setup_async_msg(req, kmsg, issue_flags); 1213 1214 if (ret > 0 && io_net_retry(sock, flags)) { 1215 sr->done_io += ret; 1216 req->flags |= REQ_F_PARTIAL_IO; 1217 return io_setup_async_msg(req, kmsg, issue_flags); 1218 } 1219 if (ret == -ERESTARTSYS) 1220 ret = -EINTR; 1221 req_set_fail(req); 1222 } 1223 /* fast path, check for non-NULL to avoid function call */ 1224 if (kmsg->free_iov) { 1225 kfree(kmsg->free_iov); 1226 kmsg->free_iov = NULL; 1227 } 1228 1229 io_netmsg_recycle(req, issue_flags); 1230 if (ret >= 0) 1231 ret += sr->done_io; 1232 else if (sr->done_io) 1233 ret = sr->done_io; 1234 1235 /* 1236 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1237 * flushing notif to io_send_zc_cleanup() 1238 */ 1239 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1240 io_notif_flush(sr->notif); 1241 req->flags &= ~REQ_F_NEED_CLEANUP; 1242 } 1243 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1244 return IOU_OK; 1245 } 1246 1247 void io_sendrecv_fail(struct io_kiocb *req) 1248 { 1249 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1250 1251 if (req->flags & REQ_F_PARTIAL_IO) 1252 req->cqe.res = sr->done_io; 1253 1254 if ((req->flags & REQ_F_NEED_CLEANUP) && 1255 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1256 req->cqe.flags |= IORING_CQE_F_MORE; 1257 } 1258 1259 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1260 { 1261 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1262 unsigned flags; 1263 1264 if (sqe->len || sqe->buf_index) 1265 return -EINVAL; 1266 1267 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1268 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1269 accept->flags = READ_ONCE(sqe->accept_flags); 1270 accept->nofile = rlimit(RLIMIT_NOFILE); 1271 flags = READ_ONCE(sqe->ioprio); 1272 if (flags & ~IORING_ACCEPT_MULTISHOT) 1273 return -EINVAL; 1274 1275 accept->file_slot = READ_ONCE(sqe->file_index); 1276 if (accept->file_slot) { 1277 if (accept->flags & SOCK_CLOEXEC) 1278 return -EINVAL; 1279 if (flags & IORING_ACCEPT_MULTISHOT && 1280 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1281 return -EINVAL; 1282 } 1283 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1284 return -EINVAL; 1285 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1286 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1287 if (flags & IORING_ACCEPT_MULTISHOT) 1288 req->flags |= REQ_F_APOLL_MULTISHOT; 1289 return 0; 1290 } 1291 1292 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1293 { 1294 struct io_ring_ctx *ctx = req->ctx; 1295 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1296 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1297 unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; 1298 bool fixed = !!accept->file_slot; 1299 struct file *file; 1300 int ret, fd; 1301 1302 if (!io_check_multishot(req, issue_flags)) 1303 return -EAGAIN; 1304 retry: 1305 if (!fixed) { 1306 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1307 if (unlikely(fd < 0)) 1308 return fd; 1309 } 1310 file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, 1311 accept->flags); 1312 if (IS_ERR(file)) { 1313 if (!fixed) 1314 put_unused_fd(fd); 1315 ret = PTR_ERR(file); 1316 if (ret == -EAGAIN && force_nonblock) { 1317 /* 1318 * if it's multishot and polled, we don't need to 1319 * return EAGAIN to arm the poll infra since it 1320 * has already been done 1321 */ 1322 if (issue_flags & IO_URING_F_MULTISHOT) 1323 ret = IOU_ISSUE_SKIP_COMPLETE; 1324 return ret; 1325 } 1326 if (ret == -ERESTARTSYS) 1327 ret = -EINTR; 1328 req_set_fail(req); 1329 } else if (!fixed) { 1330 fd_install(fd, file); 1331 ret = fd; 1332 } else { 1333 ret = io_fixed_fd_install(req, issue_flags, file, 1334 accept->file_slot); 1335 } 1336 1337 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1338 io_req_set_res(req, ret, 0); 1339 return IOU_OK; 1340 } 1341 1342 if (ret < 0) 1343 return ret; 1344 if (io_aux_cqe(ctx, issue_flags & IO_URING_F_COMPLETE_DEFER, 1345 req->cqe.user_data, ret, IORING_CQE_F_MORE, true)) 1346 goto retry; 1347 1348 return -ECANCELED; 1349 } 1350 1351 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1352 { 1353 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1354 1355 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1356 return -EINVAL; 1357 1358 sock->domain = READ_ONCE(sqe->fd); 1359 sock->type = READ_ONCE(sqe->off); 1360 sock->protocol = READ_ONCE(sqe->len); 1361 sock->file_slot = READ_ONCE(sqe->file_index); 1362 sock->nofile = rlimit(RLIMIT_NOFILE); 1363 1364 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1365 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1366 return -EINVAL; 1367 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1368 return -EINVAL; 1369 return 0; 1370 } 1371 1372 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1373 { 1374 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1375 bool fixed = !!sock->file_slot; 1376 struct file *file; 1377 int ret, fd; 1378 1379 if (!fixed) { 1380 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1381 if (unlikely(fd < 0)) 1382 return fd; 1383 } 1384 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1385 if (IS_ERR(file)) { 1386 if (!fixed) 1387 put_unused_fd(fd); 1388 ret = PTR_ERR(file); 1389 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1390 return -EAGAIN; 1391 if (ret == -ERESTARTSYS) 1392 ret = -EINTR; 1393 req_set_fail(req); 1394 } else if (!fixed) { 1395 fd_install(fd, file); 1396 ret = fd; 1397 } else { 1398 ret = io_fixed_fd_install(req, issue_flags, file, 1399 sock->file_slot); 1400 } 1401 io_req_set_res(req, ret, 0); 1402 return IOU_OK; 1403 } 1404 1405 int io_connect_prep_async(struct io_kiocb *req) 1406 { 1407 struct io_async_connect *io = req->async_data; 1408 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1409 1410 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); 1411 } 1412 1413 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1414 { 1415 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1416 1417 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1418 return -EINVAL; 1419 1420 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1421 conn->addr_len = READ_ONCE(sqe->addr2); 1422 conn->in_progress = false; 1423 return 0; 1424 } 1425 1426 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1427 { 1428 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1429 struct io_async_connect __io, *io; 1430 unsigned file_flags; 1431 int ret; 1432 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1433 1434 if (connect->in_progress) { 1435 struct socket *socket; 1436 1437 ret = -ENOTSOCK; 1438 socket = sock_from_file(req->file); 1439 if (socket) 1440 ret = sock_error(socket->sk); 1441 goto out; 1442 } 1443 1444 if (req_has_async_data(req)) { 1445 io = req->async_data; 1446 } else { 1447 ret = move_addr_to_kernel(connect->addr, 1448 connect->addr_len, 1449 &__io.address); 1450 if (ret) 1451 goto out; 1452 io = &__io; 1453 } 1454 1455 file_flags = force_nonblock ? O_NONBLOCK : 0; 1456 1457 ret = __sys_connect_file(req->file, &io->address, 1458 connect->addr_len, file_flags); 1459 if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { 1460 if (ret == -EINPROGRESS) { 1461 connect->in_progress = true; 1462 } else { 1463 if (req_has_async_data(req)) 1464 return -EAGAIN; 1465 if (io_alloc_async_data(req)) { 1466 ret = -ENOMEM; 1467 goto out; 1468 } 1469 memcpy(req->async_data, &__io, sizeof(__io)); 1470 } 1471 return -EAGAIN; 1472 } 1473 if (ret == -ERESTARTSYS) 1474 ret = -EINTR; 1475 out: 1476 if (ret < 0) 1477 req_set_fail(req); 1478 io_req_set_res(req, ret, 0); 1479 return IOU_OK; 1480 } 1481 1482 void io_netmsg_cache_free(struct io_cache_entry *entry) 1483 { 1484 kfree(container_of(entry, struct io_async_msghdr, cache)); 1485 } 1486 #endif 1487