1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/io_uring.h> 6 7 #include <trace/events/io_uring.h> 8 9 #include <uapi/linux/io_uring.h> 10 11 #include "io_uring.h" 12 #include "refs.h" 13 #include "cancel.h" 14 #include "timeout.h" 15 16 struct io_timeout { 17 struct file *file; 18 u32 off; 19 u32 target_seq; 20 struct list_head list; 21 /* head of the link, used by linked timeouts only */ 22 struct io_kiocb *head; 23 /* for linked completions */ 24 struct io_kiocb *prev; 25 }; 26 27 struct io_timeout_rem { 28 struct file *file; 29 u64 addr; 30 31 /* timeout update */ 32 struct timespec64 ts; 33 u32 flags; 34 bool ltimeout; 35 }; 36 37 static inline bool io_is_timeout_noseq(struct io_kiocb *req) 38 { 39 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); 40 41 return !timeout->off; 42 } 43 44 static inline void io_put_req(struct io_kiocb *req) 45 { 46 if (req_ref_put_and_test(req)) { 47 io_queue_next(req); 48 io_free_req(req); 49 } 50 } 51 52 static bool io_kill_timeout(struct io_kiocb *req, int status) 53 __must_hold(&req->ctx->completion_lock) 54 __must_hold(&req->ctx->timeout_lock) 55 { 56 struct io_timeout_data *io = req->async_data; 57 58 if (hrtimer_try_to_cancel(&io->timer) != -1) { 59 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); 60 61 if (status) 62 req_set_fail(req); 63 atomic_set(&req->ctx->cq_timeouts, 64 atomic_read(&req->ctx->cq_timeouts) + 1); 65 list_del_init(&timeout->list); 66 io_req_tw_post_queue(req, status, 0); 67 return true; 68 } 69 return false; 70 } 71 72 __cold void io_flush_timeouts(struct io_ring_ctx *ctx) 73 __must_hold(&ctx->completion_lock) 74 { 75 u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); 76 struct io_timeout *timeout, *tmp; 77 78 spin_lock_irq(&ctx->timeout_lock); 79 list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { 80 struct io_kiocb *req = cmd_to_io_kiocb(timeout); 81 u32 events_needed, events_got; 82 83 if (io_is_timeout_noseq(req)) 84 break; 85 86 /* 87 * Since seq can easily wrap around over time, subtract 88 * the last seq at which timeouts were flushed before comparing. 89 * Assuming not more than 2^31-1 events have happened since, 90 * these subtractions won't have wrapped, so we can check if 91 * target is in [last_seq, current_seq] by comparing the two. 92 */ 93 events_needed = timeout->target_seq - ctx->cq_last_tm_flush; 94 events_got = seq - ctx->cq_last_tm_flush; 95 if (events_got < events_needed) 96 break; 97 98 io_kill_timeout(req, 0); 99 } 100 ctx->cq_last_tm_flush = seq; 101 spin_unlock_irq(&ctx->timeout_lock); 102 } 103 104 static void io_req_tw_fail_links(struct io_kiocb *link, bool *locked) 105 { 106 io_tw_lock(link->ctx, locked); 107 while (link) { 108 struct io_kiocb *nxt = link->link; 109 long res = -ECANCELED; 110 111 if (link->flags & REQ_F_FAIL) 112 res = link->cqe.res; 113 link->link = NULL; 114 io_req_set_res(link, res, 0); 115 io_req_task_complete(link, locked); 116 link = nxt; 117 } 118 } 119 120 static void io_fail_links(struct io_kiocb *req) 121 __must_hold(&req->ctx->completion_lock) 122 { 123 struct io_kiocb *link = req->link; 124 bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES; 125 126 if (!link) 127 return; 128 129 while (link) { 130 if (ignore_cqes) 131 link->flags |= REQ_F_CQE_SKIP; 132 else 133 link->flags &= ~REQ_F_CQE_SKIP; 134 trace_io_uring_fail_link(req, link); 135 link = link->link; 136 } 137 138 link = req->link; 139 link->io_task_work.func = io_req_tw_fail_links; 140 io_req_task_work_add(link); 141 req->link = NULL; 142 } 143 144 static inline void io_remove_next_linked(struct io_kiocb *req) 145 { 146 struct io_kiocb *nxt = req->link; 147 148 req->link = nxt->link; 149 nxt->link = NULL; 150 } 151 152 bool io_disarm_next(struct io_kiocb *req) 153 __must_hold(&req->ctx->completion_lock) 154 { 155 struct io_kiocb *link = NULL; 156 bool posted = false; 157 158 if (req->flags & REQ_F_ARM_LTIMEOUT) { 159 link = req->link; 160 req->flags &= ~REQ_F_ARM_LTIMEOUT; 161 if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { 162 io_remove_next_linked(req); 163 io_req_tw_post_queue(link, -ECANCELED, 0); 164 posted = true; 165 } 166 } else if (req->flags & REQ_F_LINK_TIMEOUT) { 167 struct io_ring_ctx *ctx = req->ctx; 168 169 spin_lock_irq(&ctx->timeout_lock); 170 link = io_disarm_linked_timeout(req); 171 spin_unlock_irq(&ctx->timeout_lock); 172 if (link) { 173 posted = true; 174 io_req_tw_post_queue(link, -ECANCELED, 0); 175 } 176 } 177 if (unlikely((req->flags & REQ_F_FAIL) && 178 !(req->flags & REQ_F_HARDLINK))) { 179 posted |= (req->link != NULL); 180 io_fail_links(req); 181 } 182 return posted; 183 } 184 185 struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req, 186 struct io_kiocb *link) 187 __must_hold(&req->ctx->completion_lock) 188 __must_hold(&req->ctx->timeout_lock) 189 { 190 struct io_timeout_data *io = link->async_data; 191 struct io_timeout *timeout = io_kiocb_to_cmd(link, struct io_timeout); 192 193 io_remove_next_linked(req); 194 timeout->head = NULL; 195 if (hrtimer_try_to_cancel(&io->timer) != -1) { 196 list_del(&timeout->list); 197 return link; 198 } 199 200 return NULL; 201 } 202 203 static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) 204 { 205 struct io_timeout_data *data = container_of(timer, 206 struct io_timeout_data, timer); 207 struct io_kiocb *req = data->req; 208 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); 209 struct io_ring_ctx *ctx = req->ctx; 210 unsigned long flags; 211 212 spin_lock_irqsave(&ctx->timeout_lock, flags); 213 list_del_init(&timeout->list); 214 atomic_set(&req->ctx->cq_timeouts, 215 atomic_read(&req->ctx->cq_timeouts) + 1); 216 spin_unlock_irqrestore(&ctx->timeout_lock, flags); 217 218 if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS)) 219 req_set_fail(req); 220 221 io_req_set_res(req, -ETIME, 0); 222 req->io_task_work.func = io_req_task_complete; 223 io_req_task_work_add(req); 224 return HRTIMER_NORESTART; 225 } 226 227 static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx, 228 struct io_cancel_data *cd) 229 __must_hold(&ctx->timeout_lock) 230 { 231 struct io_timeout *timeout; 232 struct io_timeout_data *io; 233 struct io_kiocb *req = NULL; 234 235 list_for_each_entry(timeout, &ctx->timeout_list, list) { 236 struct io_kiocb *tmp = cmd_to_io_kiocb(timeout); 237 238 if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && 239 cd->data != tmp->cqe.user_data) 240 continue; 241 if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) { 242 if (cd->seq == tmp->work.cancel_seq) 243 continue; 244 tmp->work.cancel_seq = cd->seq; 245 } 246 req = tmp; 247 break; 248 } 249 if (!req) 250 return ERR_PTR(-ENOENT); 251 252 io = req->async_data; 253 if (hrtimer_try_to_cancel(&io->timer) == -1) 254 return ERR_PTR(-EALREADY); 255 timeout = io_kiocb_to_cmd(req, struct io_timeout); 256 list_del_init(&timeout->list); 257 return req; 258 } 259 260 int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) 261 __must_hold(&ctx->completion_lock) 262 { 263 struct io_kiocb *req; 264 265 spin_lock_irq(&ctx->timeout_lock); 266 req = io_timeout_extract(ctx, cd); 267 spin_unlock_irq(&ctx->timeout_lock); 268 269 if (IS_ERR(req)) 270 return PTR_ERR(req); 271 io_req_task_queue_fail(req, -ECANCELED); 272 return 0; 273 } 274 275 static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) 276 { 277 unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED; 278 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); 279 struct io_kiocb *prev = timeout->prev; 280 int ret = -ENOENT; 281 282 if (prev) { 283 if (!(req->task->flags & PF_EXITING)) { 284 struct io_cancel_data cd = { 285 .ctx = req->ctx, 286 .data = prev->cqe.user_data, 287 }; 288 289 ret = io_try_cancel(req->task->io_uring, &cd, issue_flags); 290 } 291 io_req_set_res(req, ret ?: -ETIME, 0); 292 io_req_complete_post(req); 293 io_put_req(prev); 294 } else { 295 io_req_set_res(req, -ETIME, 0); 296 io_req_complete_post(req); 297 } 298 } 299 300 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) 301 { 302 struct io_timeout_data *data = container_of(timer, 303 struct io_timeout_data, timer); 304 struct io_kiocb *prev, *req = data->req; 305 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); 306 struct io_ring_ctx *ctx = req->ctx; 307 unsigned long flags; 308 309 spin_lock_irqsave(&ctx->timeout_lock, flags); 310 prev = timeout->head; 311 timeout->head = NULL; 312 313 /* 314 * We don't expect the list to be empty, that will only happen if we 315 * race with the completion of the linked work. 316 */ 317 if (prev) { 318 io_remove_next_linked(prev); 319 if (!req_ref_inc_not_zero(prev)) 320 prev = NULL; 321 } 322 list_del(&timeout->list); 323 timeout->prev = prev; 324 spin_unlock_irqrestore(&ctx->timeout_lock, flags); 325 326 req->io_task_work.func = io_req_task_link_timeout; 327 io_req_task_work_add(req); 328 return HRTIMER_NORESTART; 329 } 330 331 static clockid_t io_timeout_get_clock(struct io_timeout_data *data) 332 { 333 switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { 334 case IORING_TIMEOUT_BOOTTIME: 335 return CLOCK_BOOTTIME; 336 case IORING_TIMEOUT_REALTIME: 337 return CLOCK_REALTIME; 338 default: 339 /* can't happen, vetted at prep time */ 340 WARN_ON_ONCE(1); 341 fallthrough; 342 case 0: 343 return CLOCK_MONOTONIC; 344 } 345 } 346 347 static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, 348 struct timespec64 *ts, enum hrtimer_mode mode) 349 __must_hold(&ctx->timeout_lock) 350 { 351 struct io_timeout_data *io; 352 struct io_timeout *timeout; 353 struct io_kiocb *req = NULL; 354 355 list_for_each_entry(timeout, &ctx->ltimeout_list, list) { 356 struct io_kiocb *tmp = cmd_to_io_kiocb(timeout); 357 358 if (user_data == tmp->cqe.user_data) { 359 req = tmp; 360 break; 361 } 362 } 363 if (!req) 364 return -ENOENT; 365 366 io = req->async_data; 367 if (hrtimer_try_to_cancel(&io->timer) == -1) 368 return -EALREADY; 369 hrtimer_init(&io->timer, io_timeout_get_clock(io), mode); 370 io->timer.function = io_link_timeout_fn; 371 hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode); 372 return 0; 373 } 374 375 static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, 376 struct timespec64 *ts, enum hrtimer_mode mode) 377 __must_hold(&ctx->timeout_lock) 378 { 379 struct io_cancel_data cd = { .data = user_data, }; 380 struct io_kiocb *req = io_timeout_extract(ctx, &cd); 381 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); 382 struct io_timeout_data *data; 383 384 if (IS_ERR(req)) 385 return PTR_ERR(req); 386 387 timeout->off = 0; /* noseq */ 388 data = req->async_data; 389 list_add_tail(&timeout->list, &ctx->timeout_list); 390 hrtimer_init(&data->timer, io_timeout_get_clock(data), mode); 391 data->timer.function = io_timeout_fn; 392 hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode); 393 return 0; 394 } 395 396 int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 397 { 398 struct io_timeout_rem *tr = io_kiocb_to_cmd(req, struct io_timeout_rem); 399 400 if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) 401 return -EINVAL; 402 if (sqe->buf_index || sqe->len || sqe->splice_fd_in) 403 return -EINVAL; 404 405 tr->ltimeout = false; 406 tr->addr = READ_ONCE(sqe->addr); 407 tr->flags = READ_ONCE(sqe->timeout_flags); 408 if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) { 409 if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1) 410 return -EINVAL; 411 if (tr->flags & IORING_LINK_TIMEOUT_UPDATE) 412 tr->ltimeout = true; 413 if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) 414 return -EINVAL; 415 if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) 416 return -EFAULT; 417 if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0) 418 return -EINVAL; 419 } else if (tr->flags) { 420 /* timeout removal doesn't support flags */ 421 return -EINVAL; 422 } 423 424 return 0; 425 } 426 427 static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags) 428 { 429 return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS 430 : HRTIMER_MODE_REL; 431 } 432 433 /* 434 * Remove or update an existing timeout command 435 */ 436 int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) 437 { 438 struct io_timeout_rem *tr = io_kiocb_to_cmd(req, struct io_timeout_rem); 439 struct io_ring_ctx *ctx = req->ctx; 440 int ret; 441 442 if (!(tr->flags & IORING_TIMEOUT_UPDATE)) { 443 struct io_cancel_data cd = { .data = tr->addr, }; 444 445 spin_lock(&ctx->completion_lock); 446 ret = io_timeout_cancel(ctx, &cd); 447 spin_unlock(&ctx->completion_lock); 448 } else { 449 enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags); 450 451 spin_lock_irq(&ctx->timeout_lock); 452 if (tr->ltimeout) 453 ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode); 454 else 455 ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); 456 spin_unlock_irq(&ctx->timeout_lock); 457 } 458 459 if (ret < 0) 460 req_set_fail(req); 461 io_req_set_res(req, ret, 0); 462 return IOU_OK; 463 } 464 465 static int __io_timeout_prep(struct io_kiocb *req, 466 const struct io_uring_sqe *sqe, 467 bool is_timeout_link) 468 { 469 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); 470 struct io_timeout_data *data; 471 unsigned flags; 472 u32 off = READ_ONCE(sqe->off); 473 474 if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in) 475 return -EINVAL; 476 if (off && is_timeout_link) 477 return -EINVAL; 478 flags = READ_ONCE(sqe->timeout_flags); 479 if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK | 480 IORING_TIMEOUT_ETIME_SUCCESS)) 481 return -EINVAL; 482 /* more than one clock specified is invalid, obviously */ 483 if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) 484 return -EINVAL; 485 486 INIT_LIST_HEAD(&timeout->list); 487 timeout->off = off; 488 if (unlikely(off && !req->ctx->off_timeout_used)) 489 req->ctx->off_timeout_used = true; 490 491 if (WARN_ON_ONCE(req_has_async_data(req))) 492 return -EFAULT; 493 if (io_alloc_async_data(req)) 494 return -ENOMEM; 495 496 data = req->async_data; 497 data->req = req; 498 data->flags = flags; 499 500 if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) 501 return -EFAULT; 502 503 if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) 504 return -EINVAL; 505 506 INIT_LIST_HEAD(&timeout->list); 507 data->mode = io_translate_timeout_mode(flags); 508 hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); 509 510 if (is_timeout_link) { 511 struct io_submit_link *link = &req->ctx->submit_state.link; 512 513 if (!link->head) 514 return -EINVAL; 515 if (link->last->opcode == IORING_OP_LINK_TIMEOUT) 516 return -EINVAL; 517 timeout->head = link->last; 518 link->last->flags |= REQ_F_ARM_LTIMEOUT; 519 } 520 return 0; 521 } 522 523 int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 524 { 525 return __io_timeout_prep(req, sqe, false); 526 } 527 528 int io_link_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 529 { 530 return __io_timeout_prep(req, sqe, true); 531 } 532 533 int io_timeout(struct io_kiocb *req, unsigned int issue_flags) 534 { 535 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); 536 struct io_ring_ctx *ctx = req->ctx; 537 struct io_timeout_data *data = req->async_data; 538 struct list_head *entry; 539 u32 tail, off = timeout->off; 540 541 spin_lock_irq(&ctx->timeout_lock); 542 543 /* 544 * sqe->off holds how many events that need to occur for this 545 * timeout event to be satisfied. If it isn't set, then this is 546 * a pure timeout request, sequence isn't used. 547 */ 548 if (io_is_timeout_noseq(req)) { 549 entry = ctx->timeout_list.prev; 550 goto add; 551 } 552 553 tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); 554 timeout->target_seq = tail + off; 555 556 /* Update the last seq here in case io_flush_timeouts() hasn't. 557 * This is safe because ->completion_lock is held, and submissions 558 * and completions are never mixed in the same ->completion_lock section. 559 */ 560 ctx->cq_last_tm_flush = tail; 561 562 /* 563 * Insertion sort, ensuring the first entry in the list is always 564 * the one we need first. 565 */ 566 list_for_each_prev(entry, &ctx->timeout_list) { 567 struct io_timeout *nextt = list_entry(entry, struct io_timeout, list); 568 struct io_kiocb *nxt = cmd_to_io_kiocb(nextt); 569 570 if (io_is_timeout_noseq(nxt)) 571 continue; 572 /* nxt.seq is behind @tail, otherwise would've been completed */ 573 if (off >= nextt->target_seq - tail) 574 break; 575 } 576 add: 577 list_add(&timeout->list, entry); 578 data->timer.function = io_timeout_fn; 579 hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); 580 spin_unlock_irq(&ctx->timeout_lock); 581 return IOU_ISSUE_SKIP_COMPLETE; 582 } 583 584 void io_queue_linked_timeout(struct io_kiocb *req) 585 { 586 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); 587 struct io_ring_ctx *ctx = req->ctx; 588 589 spin_lock_irq(&ctx->timeout_lock); 590 /* 591 * If the back reference is NULL, then our linked request finished 592 * before we got a chance to setup the timer 593 */ 594 if (timeout->head) { 595 struct io_timeout_data *data = req->async_data; 596 597 data->timer.function = io_link_timeout_fn; 598 hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), 599 data->mode); 600 list_add_tail(&timeout->list, &ctx->ltimeout_list); 601 } 602 spin_unlock_irq(&ctx->timeout_lock); 603 /* drop submission reference */ 604 io_put_req(req); 605 } 606 607 static bool io_match_task(struct io_kiocb *head, struct task_struct *task, 608 bool cancel_all) 609 __must_hold(&req->ctx->timeout_lock) 610 { 611 struct io_kiocb *req; 612 613 if (task && head->task != task) 614 return false; 615 if (cancel_all) 616 return true; 617 618 io_for_each_link(req, head) { 619 if (req->flags & REQ_F_INFLIGHT) 620 return true; 621 } 622 return false; 623 } 624 625 /* Returns true if we found and killed one or more timeouts */ 626 __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, 627 bool cancel_all) 628 { 629 struct io_timeout *timeout, *tmp; 630 int canceled = 0; 631 632 io_cq_lock(ctx); 633 spin_lock_irq(&ctx->timeout_lock); 634 list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { 635 struct io_kiocb *req = cmd_to_io_kiocb(timeout); 636 637 if (io_match_task(req, tsk, cancel_all) && 638 io_kill_timeout(req, -ECANCELED)) 639 canceled++; 640 } 641 spin_unlock_irq(&ctx->timeout_lock); 642 io_cq_unlock_post(ctx); 643 return canceled != 0; 644 } 645