1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 20 #if defined(CONFIG_NET) 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 u32 file_slot; 32 unsigned long nofile; 33 }; 34 35 struct io_socket { 36 struct file *file; 37 int domain; 38 int type; 39 int protocol; 40 int flags; 41 u32 file_slot; 42 unsigned long nofile; 43 }; 44 45 struct io_connect { 46 struct file *file; 47 struct sockaddr __user *addr; 48 int addr_len; 49 bool in_progress; 50 bool seen_econnaborted; 51 }; 52 53 struct io_sr_msg { 54 struct file *file; 55 union { 56 struct compat_msghdr __user *umsg_compat; 57 struct user_msghdr __user *umsg; 58 void __user *buf; 59 }; 60 unsigned len; 61 unsigned done_io; 62 unsigned msg_flags; 63 unsigned nr_multishot_loops; 64 u16 flags; 65 /* initialised and used only by !msg send variants */ 66 u16 addr_len; 67 u16 buf_group; 68 void __user *addr; 69 void __user *msg_control; 70 /* used only for send zerocopy */ 71 struct io_kiocb *notif; 72 }; 73 74 /* 75 * Number of times we'll try and do receives if there's more data. If we 76 * exceed this limit, then add us to the back of the queue and retry from 77 * there. This helps fairness between flooding clients. 78 */ 79 #define MULTISHOT_MAX_RETRY 32 80 81 static inline bool io_check_multishot(struct io_kiocb *req, 82 unsigned int issue_flags) 83 { 84 /* 85 * When ->locked_cq is set we only allow to post CQEs from the original 86 * task context. Usual request completions will be handled in other 87 * generic paths but multipoll may decide to post extra cqes. 88 */ 89 return !(issue_flags & IO_URING_F_IOWQ) || 90 !(issue_flags & IO_URING_F_MULTISHOT) || 91 !req->ctx->task_complete; 92 } 93 94 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 95 { 96 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 97 98 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 99 sqe->buf_index || sqe->splice_fd_in)) 100 return -EINVAL; 101 102 shutdown->how = READ_ONCE(sqe->len); 103 req->flags |= REQ_F_FORCE_ASYNC; 104 return 0; 105 } 106 107 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 108 { 109 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 110 struct socket *sock; 111 int ret; 112 113 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 114 115 sock = sock_from_file(req->file); 116 if (unlikely(!sock)) 117 return -ENOTSOCK; 118 119 ret = __sys_shutdown_sock(sock, shutdown->how); 120 io_req_set_res(req, ret, 0); 121 return IOU_OK; 122 } 123 124 static bool io_net_retry(struct socket *sock, int flags) 125 { 126 if (!(flags & MSG_WAITALL)) 127 return false; 128 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 129 } 130 131 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 132 { 133 struct io_async_msghdr *hdr = req->async_data; 134 135 if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED) 136 return; 137 138 /* Let normal cleanup path reap it if we fail adding to the cache */ 139 if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) { 140 req->async_data = NULL; 141 req->flags &= ~REQ_F_ASYNC_DATA; 142 } 143 } 144 145 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req, 146 unsigned int issue_flags) 147 { 148 struct io_ring_ctx *ctx = req->ctx; 149 struct io_cache_entry *entry; 150 struct io_async_msghdr *hdr; 151 152 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 153 entry = io_alloc_cache_get(&ctx->netmsg_cache); 154 if (entry) { 155 hdr = container_of(entry, struct io_async_msghdr, cache); 156 hdr->free_iov = NULL; 157 req->flags |= REQ_F_ASYNC_DATA; 158 req->async_data = hdr; 159 return hdr; 160 } 161 } 162 163 if (!io_alloc_async_data(req)) { 164 hdr = req->async_data; 165 hdr->free_iov = NULL; 166 return hdr; 167 } 168 return NULL; 169 } 170 171 static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req) 172 { 173 /* ->prep_async is always called from the submission context */ 174 return io_msg_alloc_async(req, 0); 175 } 176 177 static int io_setup_async_msg(struct io_kiocb *req, 178 struct io_async_msghdr *kmsg, 179 unsigned int issue_flags) 180 { 181 struct io_async_msghdr *async_msg; 182 183 if (req_has_async_data(req)) 184 return -EAGAIN; 185 async_msg = io_msg_alloc_async(req, issue_flags); 186 if (!async_msg) { 187 kfree(kmsg->free_iov); 188 return -ENOMEM; 189 } 190 req->flags |= REQ_F_NEED_CLEANUP; 191 memcpy(async_msg, kmsg, sizeof(*kmsg)); 192 if (async_msg->msg.msg_name) 193 async_msg->msg.msg_name = &async_msg->addr; 194 195 if ((req->flags & REQ_F_BUFFER_SELECT) && !async_msg->msg.msg_iter.nr_segs) 196 return -EAGAIN; 197 198 /* if were using fast_iov, set it to the new one */ 199 if (iter_is_iovec(&kmsg->msg.msg_iter) && !kmsg->free_iov) { 200 size_t fast_idx = iter_iov(&kmsg->msg.msg_iter) - kmsg->fast_iov; 201 async_msg->msg.msg_iter.__iov = &async_msg->fast_iov[fast_idx]; 202 } 203 204 return -EAGAIN; 205 } 206 207 static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) 208 { 209 int hdr; 210 211 if (iomsg->namelen < 0) 212 return true; 213 if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), 214 iomsg->namelen, &hdr)) 215 return true; 216 if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) 217 return true; 218 219 return false; 220 } 221 222 #ifdef CONFIG_COMPAT 223 static int __io_compat_msg_copy_hdr(struct io_kiocb *req, 224 struct io_async_msghdr *iomsg, 225 struct sockaddr __user **addr, int ddir) 226 { 227 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 228 struct compat_msghdr msg; 229 struct compat_iovec __user *uiov; 230 int ret; 231 232 if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) 233 return -EFAULT; 234 235 ret = __get_compat_msghdr(&iomsg->msg, &msg, addr); 236 if (ret) 237 return ret; 238 239 uiov = compat_ptr(msg.msg_iov); 240 if (req->flags & REQ_F_BUFFER_SELECT) { 241 compat_ssize_t clen; 242 243 iomsg->free_iov = NULL; 244 if (msg.msg_iovlen == 0) { 245 sr->len = 0; 246 } else if (msg.msg_iovlen > 1) { 247 return -EINVAL; 248 } else { 249 if (!access_ok(uiov, sizeof(*uiov))) 250 return -EFAULT; 251 if (__get_user(clen, &uiov->iov_len)) 252 return -EFAULT; 253 if (clen < 0) 254 return -EINVAL; 255 sr->len = clen; 256 } 257 258 if (ddir == ITER_DEST && req->flags & REQ_F_APOLL_MULTISHOT) { 259 iomsg->namelen = msg.msg_namelen; 260 iomsg->controllen = msg.msg_controllen; 261 if (io_recvmsg_multishot_overflow(iomsg)) 262 return -EOVERFLOW; 263 } 264 265 return 0; 266 } 267 268 iomsg->free_iov = iomsg->fast_iov; 269 ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg.msg_iovlen, 270 UIO_FASTIOV, &iomsg->free_iov, 271 &iomsg->msg.msg_iter, true); 272 if (unlikely(ret < 0)) 273 return ret; 274 275 return 0; 276 } 277 #endif 278 279 static int __io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 280 struct sockaddr __user **addr, int ddir) 281 { 282 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 283 struct user_msghdr msg; 284 int ret; 285 286 if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) 287 return -EFAULT; 288 289 ret = __copy_msghdr(&iomsg->msg, &msg, addr); 290 if (ret) 291 return ret; 292 293 if (req->flags & REQ_F_BUFFER_SELECT) { 294 if (msg.msg_iovlen == 0) { 295 sr->len = iomsg->fast_iov[0].iov_len = 0; 296 iomsg->fast_iov[0].iov_base = NULL; 297 iomsg->free_iov = NULL; 298 } else if (msg.msg_iovlen > 1) { 299 return -EINVAL; 300 } else { 301 if (copy_from_user(iomsg->fast_iov, msg.msg_iov, 302 sizeof(*msg.msg_iov))) 303 return -EFAULT; 304 sr->len = iomsg->fast_iov[0].iov_len; 305 iomsg->free_iov = NULL; 306 } 307 308 if (ddir == ITER_DEST && req->flags & REQ_F_APOLL_MULTISHOT) { 309 iomsg->namelen = msg.msg_namelen; 310 iomsg->controllen = msg.msg_controllen; 311 if (io_recvmsg_multishot_overflow(iomsg)) 312 return -EOVERFLOW; 313 } 314 315 return 0; 316 } 317 318 iomsg->free_iov = iomsg->fast_iov; 319 ret = __import_iovec(ddir, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, 320 &iomsg->free_iov, &iomsg->msg.msg_iter, false); 321 if (unlikely(ret < 0)) 322 return ret; 323 324 return 0; 325 } 326 327 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 328 struct sockaddr __user **addr, int ddir) 329 { 330 iomsg->msg.msg_name = &iomsg->addr; 331 iomsg->msg.msg_iter.nr_segs = 0; 332 333 #ifdef CONFIG_COMPAT 334 if (req->ctx->compat) 335 return __io_compat_msg_copy_hdr(req, iomsg, addr, ddir); 336 #endif 337 338 return __io_msg_copy_hdr(req, iomsg, addr, ddir); 339 } 340 341 static int io_sendmsg_copy_hdr(struct io_kiocb *req, 342 struct io_async_msghdr *iomsg) 343 { 344 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 345 int ret; 346 347 ret = io_msg_copy_hdr(req, iomsg, NULL, ITER_SOURCE); 348 if (ret) 349 return ret; 350 351 /* save msg_control as sys_sendmsg() overwrites it */ 352 sr->msg_control = iomsg->msg.msg_control_user; 353 return ret; 354 } 355 356 int io_send_prep_async(struct io_kiocb *req) 357 { 358 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 359 struct io_async_msghdr *io; 360 int ret; 361 362 if (!zc->addr || req_has_async_data(req)) 363 return 0; 364 io = io_msg_alloc_async_prep(req); 365 if (!io) 366 return -ENOMEM; 367 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr); 368 return ret; 369 } 370 371 static int io_setup_async_addr(struct io_kiocb *req, 372 struct sockaddr_storage *addr_storage, 373 unsigned int issue_flags) 374 { 375 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 376 struct io_async_msghdr *io; 377 378 if (!sr->addr || req_has_async_data(req)) 379 return -EAGAIN; 380 io = io_msg_alloc_async(req, issue_flags); 381 if (!io) 382 return -ENOMEM; 383 memcpy(&io->addr, addr_storage, sizeof(io->addr)); 384 return -EAGAIN; 385 } 386 387 int io_sendmsg_prep_async(struct io_kiocb *req) 388 { 389 int ret; 390 391 if (!io_msg_alloc_async_prep(req)) 392 return -ENOMEM; 393 ret = io_sendmsg_copy_hdr(req, req->async_data); 394 if (!ret) 395 req->flags |= REQ_F_NEED_CLEANUP; 396 return ret; 397 } 398 399 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 400 { 401 struct io_async_msghdr *io = req->async_data; 402 403 kfree(io->free_iov); 404 } 405 406 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 407 { 408 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 409 410 if (req->opcode == IORING_OP_SEND) { 411 if (READ_ONCE(sqe->__pad3[0])) 412 return -EINVAL; 413 sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 414 sr->addr_len = READ_ONCE(sqe->addr_len); 415 } else if (sqe->addr2 || sqe->file_index) { 416 return -EINVAL; 417 } 418 419 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 420 sr->len = READ_ONCE(sqe->len); 421 sr->flags = READ_ONCE(sqe->ioprio); 422 if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) 423 return -EINVAL; 424 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 425 if (sr->msg_flags & MSG_DONTWAIT) 426 req->flags |= REQ_F_NOWAIT; 427 428 #ifdef CONFIG_COMPAT 429 if (req->ctx->compat) 430 sr->msg_flags |= MSG_CMSG_COMPAT; 431 #endif 432 sr->done_io = 0; 433 return 0; 434 } 435 436 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 437 { 438 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 439 struct io_async_msghdr iomsg, *kmsg; 440 struct socket *sock; 441 unsigned flags; 442 int min_ret = 0; 443 int ret; 444 445 sock = sock_from_file(req->file); 446 if (unlikely(!sock)) 447 return -ENOTSOCK; 448 449 if (req_has_async_data(req)) { 450 kmsg = req->async_data; 451 kmsg->msg.msg_control_user = sr->msg_control; 452 } else { 453 ret = io_sendmsg_copy_hdr(req, &iomsg); 454 if (ret) 455 return ret; 456 kmsg = &iomsg; 457 } 458 459 if (!(req->flags & REQ_F_POLLED) && 460 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 461 return io_setup_async_msg(req, kmsg, issue_flags); 462 463 flags = sr->msg_flags; 464 if (issue_flags & IO_URING_F_NONBLOCK) 465 flags |= MSG_DONTWAIT; 466 if (flags & MSG_WAITALL) 467 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 468 469 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 470 471 if (ret < min_ret) { 472 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 473 return io_setup_async_msg(req, kmsg, issue_flags); 474 if (ret > 0 && io_net_retry(sock, flags)) { 475 kmsg->msg.msg_controllen = 0; 476 kmsg->msg.msg_control = NULL; 477 sr->done_io += ret; 478 req->flags |= REQ_F_PARTIAL_IO; 479 return io_setup_async_msg(req, kmsg, issue_flags); 480 } 481 if (ret == -ERESTARTSYS) 482 ret = -EINTR; 483 req_set_fail(req); 484 } 485 /* fast path, check for non-NULL to avoid function call */ 486 if (kmsg->free_iov) 487 kfree(kmsg->free_iov); 488 req->flags &= ~REQ_F_NEED_CLEANUP; 489 io_netmsg_recycle(req, issue_flags); 490 if (ret >= 0) 491 ret += sr->done_io; 492 else if (sr->done_io) 493 ret = sr->done_io; 494 io_req_set_res(req, ret, 0); 495 return IOU_OK; 496 } 497 498 int io_send(struct io_kiocb *req, unsigned int issue_flags) 499 { 500 struct sockaddr_storage __address; 501 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 502 struct msghdr msg; 503 struct socket *sock; 504 unsigned flags; 505 int min_ret = 0; 506 int ret; 507 508 msg.msg_name = NULL; 509 msg.msg_control = NULL; 510 msg.msg_controllen = 0; 511 msg.msg_namelen = 0; 512 msg.msg_ubuf = NULL; 513 514 if (sr->addr) { 515 if (req_has_async_data(req)) { 516 struct io_async_msghdr *io = req->async_data; 517 518 msg.msg_name = &io->addr; 519 } else { 520 ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address); 521 if (unlikely(ret < 0)) 522 return ret; 523 msg.msg_name = (struct sockaddr *)&__address; 524 } 525 msg.msg_namelen = sr->addr_len; 526 } 527 528 if (!(req->flags & REQ_F_POLLED) && 529 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 530 return io_setup_async_addr(req, &__address, issue_flags); 531 532 sock = sock_from_file(req->file); 533 if (unlikely(!sock)) 534 return -ENOTSOCK; 535 536 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &msg.msg_iter); 537 if (unlikely(ret)) 538 return ret; 539 540 flags = sr->msg_flags; 541 if (issue_flags & IO_URING_F_NONBLOCK) 542 flags |= MSG_DONTWAIT; 543 if (flags & MSG_WAITALL) 544 min_ret = iov_iter_count(&msg.msg_iter); 545 546 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 547 msg.msg_flags = flags; 548 ret = sock_sendmsg(sock, &msg); 549 if (ret < min_ret) { 550 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 551 return io_setup_async_addr(req, &__address, issue_flags); 552 553 if (ret > 0 && io_net_retry(sock, flags)) { 554 sr->len -= ret; 555 sr->buf += ret; 556 sr->done_io += ret; 557 req->flags |= REQ_F_PARTIAL_IO; 558 return io_setup_async_addr(req, &__address, issue_flags); 559 } 560 if (ret == -ERESTARTSYS) 561 ret = -EINTR; 562 req_set_fail(req); 563 } 564 if (ret >= 0) 565 ret += sr->done_io; 566 else if (sr->done_io) 567 ret = sr->done_io; 568 io_req_set_res(req, ret, 0); 569 return IOU_OK; 570 } 571 572 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 573 struct io_async_msghdr *iomsg) 574 { 575 return io_msg_copy_hdr(req, iomsg, &iomsg->uaddr, ITER_DEST); 576 } 577 578 int io_recvmsg_prep_async(struct io_kiocb *req) 579 { 580 struct io_async_msghdr *iomsg; 581 int ret; 582 583 if (!io_msg_alloc_async_prep(req)) 584 return -ENOMEM; 585 iomsg = req->async_data; 586 ret = io_recvmsg_copy_hdr(req, iomsg); 587 if (!ret) 588 req->flags |= REQ_F_NEED_CLEANUP; 589 return ret; 590 } 591 592 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT) 593 594 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 595 { 596 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 597 598 if (unlikely(sqe->file_index || sqe->addr2)) 599 return -EINVAL; 600 601 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 602 sr->len = READ_ONCE(sqe->len); 603 sr->flags = READ_ONCE(sqe->ioprio); 604 if (sr->flags & ~(RECVMSG_FLAGS)) 605 return -EINVAL; 606 sr->msg_flags = READ_ONCE(sqe->msg_flags); 607 if (sr->msg_flags & MSG_DONTWAIT) 608 req->flags |= REQ_F_NOWAIT; 609 if (sr->msg_flags & MSG_ERRQUEUE) 610 req->flags |= REQ_F_CLEAR_POLLIN; 611 if (sr->flags & IORING_RECV_MULTISHOT) { 612 if (!(req->flags & REQ_F_BUFFER_SELECT)) 613 return -EINVAL; 614 if (sr->msg_flags & MSG_WAITALL) 615 return -EINVAL; 616 if (req->opcode == IORING_OP_RECV && sr->len) 617 return -EINVAL; 618 req->flags |= REQ_F_APOLL_MULTISHOT; 619 /* 620 * Store the buffer group for this multishot receive separately, 621 * as if we end up doing an io-wq based issue that selects a 622 * buffer, it has to be committed immediately and that will 623 * clear ->buf_list. This means we lose the link to the buffer 624 * list, and the eventual buffer put on completion then cannot 625 * restore it. 626 */ 627 sr->buf_group = req->buf_index; 628 } 629 630 #ifdef CONFIG_COMPAT 631 if (req->ctx->compat) 632 sr->msg_flags |= MSG_CMSG_COMPAT; 633 #endif 634 sr->done_io = 0; 635 sr->nr_multishot_loops = 0; 636 return 0; 637 } 638 639 static inline void io_recv_prep_retry(struct io_kiocb *req) 640 { 641 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 642 643 sr->done_io = 0; 644 sr->len = 0; /* get from the provided buffer */ 645 req->buf_index = sr->buf_group; 646 } 647 648 /* 649 * Finishes io_recv and io_recvmsg. 650 * 651 * Returns true if it is actually finished, or false if it should run 652 * again (for multishot). 653 */ 654 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 655 struct msghdr *msg, bool mshot_finished, 656 unsigned issue_flags) 657 { 658 unsigned int cflags; 659 660 cflags = io_put_kbuf(req, issue_flags); 661 if (msg->msg_inq && msg->msg_inq != -1) 662 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 663 664 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 665 io_req_set_res(req, *ret, cflags); 666 *ret = IOU_OK; 667 return true; 668 } 669 670 if (mshot_finished) 671 goto finish; 672 673 /* 674 * Fill CQE for this receive and see if we should keep trying to 675 * receive from this socket. 676 */ 677 if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, 678 *ret, cflags | IORING_CQE_F_MORE)) { 679 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 680 int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE; 681 682 io_recv_prep_retry(req); 683 /* Known not-empty or unknown state, retry */ 684 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) { 685 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY) 686 return false; 687 /* mshot retries exceeded, force a requeue */ 688 sr->nr_multishot_loops = 0; 689 mshot_retry_ret = IOU_REQUEUE; 690 } 691 if (issue_flags & IO_URING_F_MULTISHOT) 692 *ret = mshot_retry_ret; 693 else 694 *ret = -EAGAIN; 695 return true; 696 } 697 /* Otherwise stop multishot but use the current result. */ 698 finish: 699 io_req_set_res(req, *ret, cflags); 700 701 if (issue_flags & IO_URING_F_MULTISHOT) 702 *ret = IOU_STOP_MULTISHOT; 703 else 704 *ret = IOU_OK; 705 return true; 706 } 707 708 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 709 struct io_sr_msg *sr, void __user **buf, 710 size_t *len) 711 { 712 unsigned long ubuf = (unsigned long) *buf; 713 unsigned long hdr; 714 715 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 716 kmsg->controllen; 717 if (*len < hdr) 718 return -EFAULT; 719 720 if (kmsg->controllen) { 721 unsigned long control = ubuf + hdr - kmsg->controllen; 722 723 kmsg->msg.msg_control_user = (void __user *) control; 724 kmsg->msg.msg_controllen = kmsg->controllen; 725 } 726 727 sr->buf = *buf; /* stash for later copy */ 728 *buf = (void __user *) (ubuf + hdr); 729 kmsg->payloadlen = *len = *len - hdr; 730 return 0; 731 } 732 733 struct io_recvmsg_multishot_hdr { 734 struct io_uring_recvmsg_out msg; 735 struct sockaddr_storage addr; 736 }; 737 738 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 739 struct io_async_msghdr *kmsg, 740 unsigned int flags, bool *finished) 741 { 742 int err; 743 int copy_len; 744 struct io_recvmsg_multishot_hdr hdr; 745 746 if (kmsg->namelen) 747 kmsg->msg.msg_name = &hdr.addr; 748 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 749 kmsg->msg.msg_namelen = 0; 750 751 if (sock->file->f_flags & O_NONBLOCK) 752 flags |= MSG_DONTWAIT; 753 754 err = sock_recvmsg(sock, &kmsg->msg, flags); 755 *finished = err <= 0; 756 if (err < 0) 757 return err; 758 759 hdr.msg = (struct io_uring_recvmsg_out) { 760 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 761 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 762 }; 763 764 hdr.msg.payloadlen = err; 765 if (err > kmsg->payloadlen) 766 err = kmsg->payloadlen; 767 768 copy_len = sizeof(struct io_uring_recvmsg_out); 769 if (kmsg->msg.msg_namelen > kmsg->namelen) 770 copy_len += kmsg->namelen; 771 else 772 copy_len += kmsg->msg.msg_namelen; 773 774 /* 775 * "fromlen shall refer to the value before truncation.." 776 * 1003.1g 777 */ 778 hdr.msg.namelen = kmsg->msg.msg_namelen; 779 780 /* ensure that there is no gap between hdr and sockaddr_storage */ 781 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 782 sizeof(struct io_uring_recvmsg_out)); 783 if (copy_to_user(io->buf, &hdr, copy_len)) { 784 *finished = true; 785 return -EFAULT; 786 } 787 788 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 789 kmsg->controllen + err; 790 } 791 792 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 793 { 794 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 795 struct io_async_msghdr iomsg, *kmsg; 796 struct socket *sock; 797 unsigned flags; 798 int ret, min_ret = 0; 799 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 800 bool mshot_finished = true; 801 802 sock = sock_from_file(req->file); 803 if (unlikely(!sock)) 804 return -ENOTSOCK; 805 806 if (req_has_async_data(req)) { 807 kmsg = req->async_data; 808 } else { 809 ret = io_recvmsg_copy_hdr(req, &iomsg); 810 if (ret) 811 return ret; 812 kmsg = &iomsg; 813 } 814 815 if (!(req->flags & REQ_F_POLLED) && 816 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 817 return io_setup_async_msg(req, kmsg, issue_flags); 818 819 if (!io_check_multishot(req, issue_flags)) 820 return io_setup_async_msg(req, kmsg, issue_flags); 821 822 retry_multishot: 823 if (io_do_buffer_select(req)) { 824 void __user *buf; 825 size_t len = sr->len; 826 827 buf = io_buffer_select(req, &len, issue_flags); 828 if (!buf) 829 return -ENOBUFS; 830 831 if (req->flags & REQ_F_APOLL_MULTISHOT) { 832 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 833 if (ret) { 834 io_kbuf_recycle(req, issue_flags); 835 return ret; 836 } 837 } 838 839 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len); 840 } 841 842 flags = sr->msg_flags; 843 if (force_nonblock) 844 flags |= MSG_DONTWAIT; 845 846 kmsg->msg.msg_get_inq = 1; 847 kmsg->msg.msg_inq = -1; 848 if (req->flags & REQ_F_APOLL_MULTISHOT) { 849 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 850 &mshot_finished); 851 } else { 852 /* disable partial retry for recvmsg with cmsg attached */ 853 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 854 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 855 856 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 857 kmsg->uaddr, flags); 858 } 859 860 if (ret < min_ret) { 861 if (ret == -EAGAIN && force_nonblock) { 862 ret = io_setup_async_msg(req, kmsg, issue_flags); 863 if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) { 864 io_kbuf_recycle(req, issue_flags); 865 return IOU_ISSUE_SKIP_COMPLETE; 866 } 867 return ret; 868 } 869 if (ret > 0 && io_net_retry(sock, flags)) { 870 sr->done_io += ret; 871 req->flags |= REQ_F_PARTIAL_IO; 872 return io_setup_async_msg(req, kmsg, issue_flags); 873 } 874 if (ret == -ERESTARTSYS) 875 ret = -EINTR; 876 req_set_fail(req); 877 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 878 req_set_fail(req); 879 } 880 881 if (ret > 0) 882 ret += sr->done_io; 883 else if (sr->done_io) 884 ret = sr->done_io; 885 else 886 io_kbuf_recycle(req, issue_flags); 887 888 if (!io_recv_finish(req, &ret, &kmsg->msg, mshot_finished, issue_flags)) 889 goto retry_multishot; 890 891 if (mshot_finished) { 892 /* fast path, check for non-NULL to avoid function call */ 893 if (kmsg->free_iov) 894 kfree(kmsg->free_iov); 895 io_netmsg_recycle(req, issue_flags); 896 req->flags &= ~REQ_F_NEED_CLEANUP; 897 } 898 899 return ret; 900 } 901 902 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 903 { 904 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 905 struct msghdr msg; 906 struct socket *sock; 907 unsigned flags; 908 int ret, min_ret = 0; 909 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 910 size_t len = sr->len; 911 912 if (!(req->flags & REQ_F_POLLED) && 913 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 914 return -EAGAIN; 915 916 if (!io_check_multishot(req, issue_flags)) 917 return -EAGAIN; 918 919 sock = sock_from_file(req->file); 920 if (unlikely(!sock)) 921 return -ENOTSOCK; 922 923 msg.msg_name = NULL; 924 msg.msg_namelen = 0; 925 msg.msg_control = NULL; 926 msg.msg_get_inq = 1; 927 msg.msg_controllen = 0; 928 msg.msg_iocb = NULL; 929 msg.msg_ubuf = NULL; 930 931 retry_multishot: 932 if (io_do_buffer_select(req)) { 933 void __user *buf; 934 935 buf = io_buffer_select(req, &len, issue_flags); 936 if (!buf) 937 return -ENOBUFS; 938 sr->buf = buf; 939 sr->len = len; 940 } 941 942 ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter); 943 if (unlikely(ret)) 944 goto out_free; 945 946 msg.msg_inq = -1; 947 msg.msg_flags = 0; 948 949 flags = sr->msg_flags; 950 if (force_nonblock) 951 flags |= MSG_DONTWAIT; 952 if (flags & MSG_WAITALL) 953 min_ret = iov_iter_count(&msg.msg_iter); 954 955 ret = sock_recvmsg(sock, &msg, flags); 956 if (ret < min_ret) { 957 if (ret == -EAGAIN && force_nonblock) { 958 if (issue_flags & IO_URING_F_MULTISHOT) { 959 io_kbuf_recycle(req, issue_flags); 960 return IOU_ISSUE_SKIP_COMPLETE; 961 } 962 963 return -EAGAIN; 964 } 965 if (ret > 0 && io_net_retry(sock, flags)) { 966 sr->len -= ret; 967 sr->buf += ret; 968 sr->done_io += ret; 969 req->flags |= REQ_F_PARTIAL_IO; 970 return -EAGAIN; 971 } 972 if (ret == -ERESTARTSYS) 973 ret = -EINTR; 974 req_set_fail(req); 975 } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 976 out_free: 977 req_set_fail(req); 978 } 979 980 if (ret > 0) 981 ret += sr->done_io; 982 else if (sr->done_io) 983 ret = sr->done_io; 984 else 985 io_kbuf_recycle(req, issue_flags); 986 987 if (!io_recv_finish(req, &ret, &msg, ret <= 0, issue_flags)) 988 goto retry_multishot; 989 990 return ret; 991 } 992 993 void io_send_zc_cleanup(struct io_kiocb *req) 994 { 995 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 996 struct io_async_msghdr *io; 997 998 if (req_has_async_data(req)) { 999 io = req->async_data; 1000 /* might be ->fast_iov if *msg_copy_hdr failed */ 1001 if (io->free_iov != io->fast_iov) 1002 kfree(io->free_iov); 1003 } 1004 if (zc->notif) { 1005 io_notif_flush(zc->notif); 1006 zc->notif = NULL; 1007 } 1008 } 1009 1010 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1011 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) 1012 1013 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1014 { 1015 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1016 struct io_ring_ctx *ctx = req->ctx; 1017 struct io_kiocb *notif; 1018 1019 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 1020 return -EINVAL; 1021 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1022 if (req->flags & REQ_F_CQE_SKIP) 1023 return -EINVAL; 1024 1025 notif = zc->notif = io_alloc_notif(ctx); 1026 if (!notif) 1027 return -ENOMEM; 1028 notif->cqe.user_data = req->cqe.user_data; 1029 notif->cqe.res = 0; 1030 notif->cqe.flags = IORING_CQE_F_NOTIF; 1031 req->flags |= REQ_F_NEED_CLEANUP; 1032 1033 zc->flags = READ_ONCE(sqe->ioprio); 1034 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1035 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1036 return -EINVAL; 1037 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1038 io_notif_set_extended(notif); 1039 io_notif_to_data(notif)->zc_report = true; 1040 } 1041 } 1042 1043 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 1044 unsigned idx = READ_ONCE(sqe->buf_index); 1045 1046 if (unlikely(idx >= ctx->nr_user_bufs)) 1047 return -EFAULT; 1048 idx = array_index_nospec(idx, ctx->nr_user_bufs); 1049 req->imu = READ_ONCE(ctx->user_bufs[idx]); 1050 io_req_set_rsrc_node(notif, ctx, 0); 1051 } 1052 1053 if (req->opcode == IORING_OP_SEND_ZC) { 1054 if (READ_ONCE(sqe->__pad3[0])) 1055 return -EINVAL; 1056 zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1057 zc->addr_len = READ_ONCE(sqe->addr_len); 1058 } else { 1059 if (unlikely(sqe->addr2 || sqe->file_index)) 1060 return -EINVAL; 1061 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 1062 return -EINVAL; 1063 } 1064 1065 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1066 zc->len = READ_ONCE(sqe->len); 1067 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 1068 if (zc->msg_flags & MSG_DONTWAIT) 1069 req->flags |= REQ_F_NOWAIT; 1070 1071 zc->done_io = 0; 1072 1073 #ifdef CONFIG_COMPAT 1074 if (req->ctx->compat) 1075 zc->msg_flags |= MSG_CMSG_COMPAT; 1076 #endif 1077 return 0; 1078 } 1079 1080 static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb, 1081 struct iov_iter *from, size_t length) 1082 { 1083 skb_zcopy_downgrade_managed(skb); 1084 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 1085 } 1086 1087 static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, 1088 struct iov_iter *from, size_t length) 1089 { 1090 struct skb_shared_info *shinfo = skb_shinfo(skb); 1091 int frag = shinfo->nr_frags; 1092 int ret = 0; 1093 struct bvec_iter bi; 1094 ssize_t copied = 0; 1095 unsigned long truesize = 0; 1096 1097 if (!frag) 1098 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1099 else if (unlikely(!skb_zcopy_managed(skb))) 1100 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 1101 1102 bi.bi_size = min(from->count, length); 1103 bi.bi_bvec_done = from->iov_offset; 1104 bi.bi_idx = 0; 1105 1106 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1107 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1108 1109 copied += v.bv_len; 1110 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1111 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1112 v.bv_offset, v.bv_len); 1113 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1114 } 1115 if (bi.bi_size) 1116 ret = -EMSGSIZE; 1117 1118 shinfo->nr_frags = frag; 1119 from->bvec += bi.bi_idx; 1120 from->nr_segs -= bi.bi_idx; 1121 from->count -= copied; 1122 from->iov_offset = bi.bi_bvec_done; 1123 1124 skb->data_len += copied; 1125 skb->len += copied; 1126 skb->truesize += truesize; 1127 1128 if (sk && sk->sk_type == SOCK_STREAM) { 1129 sk_wmem_queued_add(sk, truesize); 1130 if (!skb_zcopy_pure(skb)) 1131 sk_mem_charge(sk, truesize); 1132 } else { 1133 refcount_add(truesize, &skb->sk->sk_wmem_alloc); 1134 } 1135 return ret; 1136 } 1137 1138 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1139 { 1140 struct sockaddr_storage __address; 1141 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1142 struct msghdr msg; 1143 struct socket *sock; 1144 unsigned msg_flags; 1145 int ret, min_ret = 0; 1146 1147 sock = sock_from_file(req->file); 1148 if (unlikely(!sock)) 1149 return -ENOTSOCK; 1150 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1151 return -EOPNOTSUPP; 1152 1153 msg.msg_name = NULL; 1154 msg.msg_control = NULL; 1155 msg.msg_controllen = 0; 1156 msg.msg_namelen = 0; 1157 1158 if (zc->addr) { 1159 if (req_has_async_data(req)) { 1160 struct io_async_msghdr *io = req->async_data; 1161 1162 msg.msg_name = &io->addr; 1163 } else { 1164 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address); 1165 if (unlikely(ret < 0)) 1166 return ret; 1167 msg.msg_name = (struct sockaddr *)&__address; 1168 } 1169 msg.msg_namelen = zc->addr_len; 1170 } 1171 1172 if (!(req->flags & REQ_F_POLLED) && 1173 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1174 return io_setup_async_addr(req, &__address, issue_flags); 1175 1176 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 1177 ret = io_import_fixed(ITER_SOURCE, &msg.msg_iter, req->imu, 1178 (u64)(uintptr_t)zc->buf, zc->len); 1179 if (unlikely(ret)) 1180 return ret; 1181 msg.sg_from_iter = io_sg_from_iter; 1182 } else { 1183 io_notif_set_extended(zc->notif); 1184 ret = import_ubuf(ITER_SOURCE, zc->buf, zc->len, &msg.msg_iter); 1185 if (unlikely(ret)) 1186 return ret; 1187 ret = io_notif_account_mem(zc->notif, zc->len); 1188 if (unlikely(ret)) 1189 return ret; 1190 msg.sg_from_iter = io_sg_from_iter_iovec; 1191 } 1192 1193 msg_flags = zc->msg_flags | MSG_ZEROCOPY; 1194 if (issue_flags & IO_URING_F_NONBLOCK) 1195 msg_flags |= MSG_DONTWAIT; 1196 if (msg_flags & MSG_WAITALL) 1197 min_ret = iov_iter_count(&msg.msg_iter); 1198 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1199 1200 msg.msg_flags = msg_flags; 1201 msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1202 ret = sock_sendmsg(sock, &msg); 1203 1204 if (unlikely(ret < min_ret)) { 1205 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1206 return io_setup_async_addr(req, &__address, issue_flags); 1207 1208 if (ret > 0 && io_net_retry(sock, msg.msg_flags)) { 1209 zc->len -= ret; 1210 zc->buf += ret; 1211 zc->done_io += ret; 1212 req->flags |= REQ_F_PARTIAL_IO; 1213 return io_setup_async_addr(req, &__address, issue_flags); 1214 } 1215 if (ret == -ERESTARTSYS) 1216 ret = -EINTR; 1217 req_set_fail(req); 1218 } 1219 1220 if (ret >= 0) 1221 ret += zc->done_io; 1222 else if (zc->done_io) 1223 ret = zc->done_io; 1224 1225 /* 1226 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1227 * flushing notif to io_send_zc_cleanup() 1228 */ 1229 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1230 io_notif_flush(zc->notif); 1231 req->flags &= ~REQ_F_NEED_CLEANUP; 1232 } 1233 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1234 return IOU_OK; 1235 } 1236 1237 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1238 { 1239 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1240 struct io_async_msghdr iomsg, *kmsg; 1241 struct socket *sock; 1242 unsigned flags; 1243 int ret, min_ret = 0; 1244 1245 io_notif_set_extended(sr->notif); 1246 1247 sock = sock_from_file(req->file); 1248 if (unlikely(!sock)) 1249 return -ENOTSOCK; 1250 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1251 return -EOPNOTSUPP; 1252 1253 if (req_has_async_data(req)) { 1254 kmsg = req->async_data; 1255 } else { 1256 ret = io_sendmsg_copy_hdr(req, &iomsg); 1257 if (ret) 1258 return ret; 1259 kmsg = &iomsg; 1260 } 1261 1262 if (!(req->flags & REQ_F_POLLED) && 1263 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1264 return io_setup_async_msg(req, kmsg, issue_flags); 1265 1266 flags = sr->msg_flags | MSG_ZEROCOPY; 1267 if (issue_flags & IO_URING_F_NONBLOCK) 1268 flags |= MSG_DONTWAIT; 1269 if (flags & MSG_WAITALL) 1270 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1271 1272 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1273 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1274 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1275 1276 if (unlikely(ret < min_ret)) { 1277 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1278 return io_setup_async_msg(req, kmsg, issue_flags); 1279 1280 if (ret > 0 && io_net_retry(sock, flags)) { 1281 sr->done_io += ret; 1282 req->flags |= REQ_F_PARTIAL_IO; 1283 return io_setup_async_msg(req, kmsg, issue_flags); 1284 } 1285 if (ret == -ERESTARTSYS) 1286 ret = -EINTR; 1287 req_set_fail(req); 1288 } 1289 /* fast path, check for non-NULL to avoid function call */ 1290 if (kmsg->free_iov) { 1291 kfree(kmsg->free_iov); 1292 kmsg->free_iov = NULL; 1293 } 1294 1295 io_netmsg_recycle(req, issue_flags); 1296 if (ret >= 0) 1297 ret += sr->done_io; 1298 else if (sr->done_io) 1299 ret = sr->done_io; 1300 1301 /* 1302 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1303 * flushing notif to io_send_zc_cleanup() 1304 */ 1305 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1306 io_notif_flush(sr->notif); 1307 req->flags &= ~REQ_F_NEED_CLEANUP; 1308 } 1309 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1310 return IOU_OK; 1311 } 1312 1313 void io_sendrecv_fail(struct io_kiocb *req) 1314 { 1315 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1316 1317 if (req->flags & REQ_F_PARTIAL_IO) 1318 req->cqe.res = sr->done_io; 1319 1320 if ((req->flags & REQ_F_NEED_CLEANUP) && 1321 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1322 req->cqe.flags |= IORING_CQE_F_MORE; 1323 } 1324 1325 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1326 { 1327 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1328 unsigned flags; 1329 1330 if (sqe->len || sqe->buf_index) 1331 return -EINVAL; 1332 1333 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1334 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1335 accept->flags = READ_ONCE(sqe->accept_flags); 1336 accept->nofile = rlimit(RLIMIT_NOFILE); 1337 flags = READ_ONCE(sqe->ioprio); 1338 if (flags & ~IORING_ACCEPT_MULTISHOT) 1339 return -EINVAL; 1340 1341 accept->file_slot = READ_ONCE(sqe->file_index); 1342 if (accept->file_slot) { 1343 if (accept->flags & SOCK_CLOEXEC) 1344 return -EINVAL; 1345 if (flags & IORING_ACCEPT_MULTISHOT && 1346 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1347 return -EINVAL; 1348 } 1349 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1350 return -EINVAL; 1351 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1352 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1353 if (flags & IORING_ACCEPT_MULTISHOT) 1354 req->flags |= REQ_F_APOLL_MULTISHOT; 1355 return 0; 1356 } 1357 1358 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1359 { 1360 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1361 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1362 unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; 1363 bool fixed = !!accept->file_slot; 1364 struct file *file; 1365 int ret, fd; 1366 1367 if (!io_check_multishot(req, issue_flags)) 1368 return -EAGAIN; 1369 retry: 1370 if (!fixed) { 1371 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1372 if (unlikely(fd < 0)) 1373 return fd; 1374 } 1375 file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, 1376 accept->flags); 1377 if (IS_ERR(file)) { 1378 if (!fixed) 1379 put_unused_fd(fd); 1380 ret = PTR_ERR(file); 1381 if (ret == -EAGAIN && force_nonblock) { 1382 /* 1383 * if it's multishot and polled, we don't need to 1384 * return EAGAIN to arm the poll infra since it 1385 * has already been done 1386 */ 1387 if (issue_flags & IO_URING_F_MULTISHOT) 1388 return IOU_ISSUE_SKIP_COMPLETE; 1389 return ret; 1390 } 1391 if (ret == -ERESTARTSYS) 1392 ret = -EINTR; 1393 req_set_fail(req); 1394 } else if (!fixed) { 1395 fd_install(fd, file); 1396 ret = fd; 1397 } else { 1398 ret = io_fixed_fd_install(req, issue_flags, file, 1399 accept->file_slot); 1400 } 1401 1402 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1403 io_req_set_res(req, ret, 0); 1404 return IOU_OK; 1405 } 1406 1407 if (ret < 0) 1408 return ret; 1409 if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, 1410 ret, IORING_CQE_F_MORE)) 1411 goto retry; 1412 1413 io_req_set_res(req, ret, 0); 1414 return IOU_STOP_MULTISHOT; 1415 } 1416 1417 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1418 { 1419 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1420 1421 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1422 return -EINVAL; 1423 1424 sock->domain = READ_ONCE(sqe->fd); 1425 sock->type = READ_ONCE(sqe->off); 1426 sock->protocol = READ_ONCE(sqe->len); 1427 sock->file_slot = READ_ONCE(sqe->file_index); 1428 sock->nofile = rlimit(RLIMIT_NOFILE); 1429 1430 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1431 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1432 return -EINVAL; 1433 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1434 return -EINVAL; 1435 return 0; 1436 } 1437 1438 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1439 { 1440 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1441 bool fixed = !!sock->file_slot; 1442 struct file *file; 1443 int ret, fd; 1444 1445 if (!fixed) { 1446 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1447 if (unlikely(fd < 0)) 1448 return fd; 1449 } 1450 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1451 if (IS_ERR(file)) { 1452 if (!fixed) 1453 put_unused_fd(fd); 1454 ret = PTR_ERR(file); 1455 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1456 return -EAGAIN; 1457 if (ret == -ERESTARTSYS) 1458 ret = -EINTR; 1459 req_set_fail(req); 1460 } else if (!fixed) { 1461 fd_install(fd, file); 1462 ret = fd; 1463 } else { 1464 ret = io_fixed_fd_install(req, issue_flags, file, 1465 sock->file_slot); 1466 } 1467 io_req_set_res(req, ret, 0); 1468 return IOU_OK; 1469 } 1470 1471 int io_connect_prep_async(struct io_kiocb *req) 1472 { 1473 struct io_async_connect *io = req->async_data; 1474 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1475 1476 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); 1477 } 1478 1479 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1480 { 1481 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1482 1483 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1484 return -EINVAL; 1485 1486 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1487 conn->addr_len = READ_ONCE(sqe->addr2); 1488 conn->in_progress = conn->seen_econnaborted = false; 1489 return 0; 1490 } 1491 1492 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1493 { 1494 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1495 struct io_async_connect __io, *io; 1496 unsigned file_flags; 1497 int ret; 1498 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1499 1500 if (req_has_async_data(req)) { 1501 io = req->async_data; 1502 } else { 1503 ret = move_addr_to_kernel(connect->addr, 1504 connect->addr_len, 1505 &__io.address); 1506 if (ret) 1507 goto out; 1508 io = &__io; 1509 } 1510 1511 file_flags = force_nonblock ? O_NONBLOCK : 0; 1512 1513 ret = __sys_connect_file(req->file, &io->address, 1514 connect->addr_len, file_flags); 1515 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1516 && force_nonblock) { 1517 if (ret == -EINPROGRESS) { 1518 connect->in_progress = true; 1519 } else if (ret == -ECONNABORTED) { 1520 if (connect->seen_econnaborted) 1521 goto out; 1522 connect->seen_econnaborted = true; 1523 } 1524 if (req_has_async_data(req)) 1525 return -EAGAIN; 1526 if (io_alloc_async_data(req)) { 1527 ret = -ENOMEM; 1528 goto out; 1529 } 1530 memcpy(req->async_data, &__io, sizeof(__io)); 1531 return -EAGAIN; 1532 } 1533 if (connect->in_progress) { 1534 /* 1535 * At least bluetooth will return -EBADFD on a re-connect 1536 * attempt, and it's (supposedly) also valid to get -EISCONN 1537 * which means the previous result is good. For both of these, 1538 * grab the sock_error() and use that for the completion. 1539 */ 1540 if (ret == -EBADFD || ret == -EISCONN) 1541 ret = sock_error(sock_from_file(req->file)->sk); 1542 } 1543 if (ret == -ERESTARTSYS) 1544 ret = -EINTR; 1545 out: 1546 if (ret < 0) 1547 req_set_fail(req); 1548 io_req_set_res(req, ret, 0); 1549 return IOU_OK; 1550 } 1551 1552 void io_netmsg_cache_free(struct io_cache_entry *entry) 1553 { 1554 kfree(container_of(entry, struct io_async_msghdr, cache)); 1555 } 1556 #endif 1557