1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/io_uring.h> 6 7 #include <trace/events/io_uring.h> 8 9 #include <uapi/linux/io_uring.h> 10 11 #include "io_uring.h" 12 #include "refs.h" 13 #include "cancel.h" 14 #include "timeout.h" 15 16 struct io_timeout { 17 struct file *file; 18 u32 off; 19 u32 target_seq; 20 struct list_head list; 21 /* head of the link, used by linked timeouts only */ 22 struct io_kiocb *head; 23 /* for linked completions */ 24 struct io_kiocb *prev; 25 }; 26 27 struct io_timeout_rem { 28 struct file *file; 29 u64 addr; 30 31 /* timeout update */ 32 struct timespec64 ts; 33 u32 flags; 34 bool ltimeout; 35 }; 36 37 static inline bool io_is_timeout_noseq(struct io_kiocb *req) 38 { 39 struct io_timeout *timeout = io_kiocb_to_cmd(req); 40 41 return !timeout->off; 42 } 43 44 static inline void io_put_req(struct io_kiocb *req) 45 { 46 if (req_ref_put_and_test(req)) { 47 io_queue_next(req); 48 io_free_req(req); 49 } 50 } 51 52 static bool io_kill_timeout(struct io_kiocb *req, int status) 53 __must_hold(&req->ctx->completion_lock) 54 __must_hold(&req->ctx->timeout_lock) 55 { 56 struct io_timeout_data *io = req->async_data; 57 58 if (hrtimer_try_to_cancel(&io->timer) != -1) { 59 struct io_timeout *timeout = io_kiocb_to_cmd(req); 60 61 if (status) 62 req_set_fail(req); 63 atomic_set(&req->ctx->cq_timeouts, 64 atomic_read(&req->ctx->cq_timeouts) + 1); 65 list_del_init(&timeout->list); 66 io_req_tw_post_queue(req, status, 0); 67 return true; 68 } 69 return false; 70 } 71 72 __cold void io_flush_timeouts(struct io_ring_ctx *ctx) 73 __must_hold(&ctx->completion_lock) 74 { 75 u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); 76 struct io_timeout *timeout, *tmp; 77 78 spin_lock_irq(&ctx->timeout_lock); 79 list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { 80 struct io_kiocb *req = cmd_to_io_kiocb(timeout); 81 u32 events_needed, events_got; 82 83 if (io_is_timeout_noseq(req)) 84 break; 85 86 /* 87 * Since seq can easily wrap around over time, subtract 88 * the last seq at which timeouts were flushed before comparing. 89 * Assuming not more than 2^31-1 events have happened since, 90 * these subtractions won't have wrapped, so we can check if 91 * target is in [last_seq, current_seq] by comparing the two. 92 */ 93 events_needed = timeout->target_seq - ctx->cq_last_tm_flush; 94 events_got = seq - ctx->cq_last_tm_flush; 95 if (events_got < events_needed) 96 break; 97 98 io_kill_timeout(req, 0); 99 } 100 ctx->cq_last_tm_flush = seq; 101 spin_unlock_irq(&ctx->timeout_lock); 102 } 103 104 static void io_fail_links(struct io_kiocb *req) 105 __must_hold(&req->ctx->completion_lock) 106 { 107 struct io_kiocb *nxt, *link = req->link; 108 bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES; 109 110 req->link = NULL; 111 while (link) { 112 long res = -ECANCELED; 113 114 if (link->flags & REQ_F_FAIL) 115 res = link->cqe.res; 116 117 nxt = link->link; 118 link->link = NULL; 119 120 trace_io_uring_fail_link(req, link); 121 122 if (ignore_cqes) 123 link->flags |= REQ_F_CQE_SKIP; 124 else 125 link->flags &= ~REQ_F_CQE_SKIP; 126 io_req_set_res(link, res, 0); 127 __io_req_complete_post(link); 128 link = nxt; 129 } 130 } 131 132 static inline void io_remove_next_linked(struct io_kiocb *req) 133 { 134 struct io_kiocb *nxt = req->link; 135 136 req->link = nxt->link; 137 nxt->link = NULL; 138 } 139 140 bool io_disarm_next(struct io_kiocb *req) 141 __must_hold(&req->ctx->completion_lock) 142 { 143 struct io_kiocb *link = NULL; 144 bool posted = false; 145 146 if (req->flags & REQ_F_ARM_LTIMEOUT) { 147 link = req->link; 148 req->flags &= ~REQ_F_ARM_LTIMEOUT; 149 if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { 150 io_remove_next_linked(req); 151 io_req_tw_post_queue(link, -ECANCELED, 0); 152 posted = true; 153 } 154 } else if (req->flags & REQ_F_LINK_TIMEOUT) { 155 struct io_ring_ctx *ctx = req->ctx; 156 157 spin_lock_irq(&ctx->timeout_lock); 158 link = io_disarm_linked_timeout(req); 159 spin_unlock_irq(&ctx->timeout_lock); 160 if (link) { 161 posted = true; 162 io_req_tw_post_queue(link, -ECANCELED, 0); 163 } 164 } 165 if (unlikely((req->flags & REQ_F_FAIL) && 166 !(req->flags & REQ_F_HARDLINK))) { 167 posted |= (req->link != NULL); 168 io_fail_links(req); 169 } 170 return posted; 171 } 172 173 struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req, 174 struct io_kiocb *link) 175 __must_hold(&req->ctx->completion_lock) 176 __must_hold(&req->ctx->timeout_lock) 177 { 178 struct io_timeout_data *io = link->async_data; 179 struct io_timeout *timeout = io_kiocb_to_cmd(link); 180 181 io_remove_next_linked(req); 182 timeout->head = NULL; 183 if (hrtimer_try_to_cancel(&io->timer) != -1) { 184 list_del(&timeout->list); 185 return link; 186 } 187 188 return NULL; 189 } 190 191 static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) 192 { 193 struct io_timeout_data *data = container_of(timer, 194 struct io_timeout_data, timer); 195 struct io_kiocb *req = data->req; 196 struct io_timeout *timeout = io_kiocb_to_cmd(req); 197 struct io_ring_ctx *ctx = req->ctx; 198 unsigned long flags; 199 200 spin_lock_irqsave(&ctx->timeout_lock, flags); 201 list_del_init(&timeout->list); 202 atomic_set(&req->ctx->cq_timeouts, 203 atomic_read(&req->ctx->cq_timeouts) + 1); 204 spin_unlock_irqrestore(&ctx->timeout_lock, flags); 205 206 if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS)) 207 req_set_fail(req); 208 209 io_req_set_res(req, -ETIME, 0); 210 req->io_task_work.func = io_req_task_complete; 211 io_req_task_work_add(req); 212 return HRTIMER_NORESTART; 213 } 214 215 static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx, 216 struct io_cancel_data *cd) 217 __must_hold(&ctx->timeout_lock) 218 { 219 struct io_timeout *timeout; 220 struct io_timeout_data *io; 221 struct io_kiocb *req = NULL; 222 223 list_for_each_entry(timeout, &ctx->timeout_list, list) { 224 struct io_kiocb *tmp = cmd_to_io_kiocb(timeout); 225 226 if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && 227 cd->data != tmp->cqe.user_data) 228 continue; 229 if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) { 230 if (cd->seq == tmp->work.cancel_seq) 231 continue; 232 tmp->work.cancel_seq = cd->seq; 233 } 234 req = tmp; 235 break; 236 } 237 if (!req) 238 return ERR_PTR(-ENOENT); 239 240 io = req->async_data; 241 if (hrtimer_try_to_cancel(&io->timer) == -1) 242 return ERR_PTR(-EALREADY); 243 timeout = io_kiocb_to_cmd(req); 244 list_del_init(&timeout->list); 245 return req; 246 } 247 248 int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) 249 __must_hold(&ctx->completion_lock) 250 { 251 struct io_kiocb *req; 252 253 spin_lock_irq(&ctx->timeout_lock); 254 req = io_timeout_extract(ctx, cd); 255 spin_unlock_irq(&ctx->timeout_lock); 256 257 if (IS_ERR(req)) 258 return PTR_ERR(req); 259 io_req_task_queue_fail(req, -ECANCELED); 260 return 0; 261 } 262 263 static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) 264 { 265 unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED; 266 struct io_timeout *timeout = io_kiocb_to_cmd(req); 267 struct io_kiocb *prev = timeout->prev; 268 int ret = -ENOENT; 269 270 if (prev) { 271 if (!(req->task->flags & PF_EXITING)) { 272 struct io_cancel_data cd = { 273 .ctx = req->ctx, 274 .data = prev->cqe.user_data, 275 }; 276 277 ret = io_try_cancel(req, &cd, issue_flags); 278 } 279 io_req_set_res(req, ret ?: -ETIME, 0); 280 io_req_complete_post(req); 281 io_put_req(prev); 282 } else { 283 io_req_set_res(req, -ETIME, 0); 284 io_req_complete_post(req); 285 } 286 } 287 288 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) 289 { 290 struct io_timeout_data *data = container_of(timer, 291 struct io_timeout_data, timer); 292 struct io_kiocb *prev, *req = data->req; 293 struct io_timeout *timeout = io_kiocb_to_cmd(req); 294 struct io_ring_ctx *ctx = req->ctx; 295 unsigned long flags; 296 297 spin_lock_irqsave(&ctx->timeout_lock, flags); 298 prev = timeout->head; 299 timeout->head = NULL; 300 301 /* 302 * We don't expect the list to be empty, that will only happen if we 303 * race with the completion of the linked work. 304 */ 305 if (prev) { 306 io_remove_next_linked(prev); 307 if (!req_ref_inc_not_zero(prev)) 308 prev = NULL; 309 } 310 list_del(&timeout->list); 311 timeout->prev = prev; 312 spin_unlock_irqrestore(&ctx->timeout_lock, flags); 313 314 req->io_task_work.func = io_req_task_link_timeout; 315 io_req_task_work_add(req); 316 return HRTIMER_NORESTART; 317 } 318 319 static clockid_t io_timeout_get_clock(struct io_timeout_data *data) 320 { 321 switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { 322 case IORING_TIMEOUT_BOOTTIME: 323 return CLOCK_BOOTTIME; 324 case IORING_TIMEOUT_REALTIME: 325 return CLOCK_REALTIME; 326 default: 327 /* can't happen, vetted at prep time */ 328 WARN_ON_ONCE(1); 329 fallthrough; 330 case 0: 331 return CLOCK_MONOTONIC; 332 } 333 } 334 335 static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, 336 struct timespec64 *ts, enum hrtimer_mode mode) 337 __must_hold(&ctx->timeout_lock) 338 { 339 struct io_timeout_data *io; 340 struct io_timeout *timeout; 341 struct io_kiocb *req = NULL; 342 343 list_for_each_entry(timeout, &ctx->ltimeout_list, list) { 344 struct io_kiocb *tmp = cmd_to_io_kiocb(timeout); 345 346 if (user_data == tmp->cqe.user_data) { 347 req = tmp; 348 break; 349 } 350 } 351 if (!req) 352 return -ENOENT; 353 354 io = req->async_data; 355 if (hrtimer_try_to_cancel(&io->timer) == -1) 356 return -EALREADY; 357 hrtimer_init(&io->timer, io_timeout_get_clock(io), mode); 358 io->timer.function = io_link_timeout_fn; 359 hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode); 360 return 0; 361 } 362 363 static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, 364 struct timespec64 *ts, enum hrtimer_mode mode) 365 __must_hold(&ctx->timeout_lock) 366 { 367 struct io_cancel_data cd = { .data = user_data, }; 368 struct io_kiocb *req = io_timeout_extract(ctx, &cd); 369 struct io_timeout *timeout = io_kiocb_to_cmd(req); 370 struct io_timeout_data *data; 371 372 if (IS_ERR(req)) 373 return PTR_ERR(req); 374 375 timeout->off = 0; /* noseq */ 376 data = req->async_data; 377 list_add_tail(&timeout->list, &ctx->timeout_list); 378 hrtimer_init(&data->timer, io_timeout_get_clock(data), mode); 379 data->timer.function = io_timeout_fn; 380 hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode); 381 return 0; 382 } 383 384 int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 385 { 386 struct io_timeout_rem *tr = io_kiocb_to_cmd(req); 387 388 if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) 389 return -EINVAL; 390 if (sqe->buf_index || sqe->len || sqe->splice_fd_in) 391 return -EINVAL; 392 393 tr->ltimeout = false; 394 tr->addr = READ_ONCE(sqe->addr); 395 tr->flags = READ_ONCE(sqe->timeout_flags); 396 if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) { 397 if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1) 398 return -EINVAL; 399 if (tr->flags & IORING_LINK_TIMEOUT_UPDATE) 400 tr->ltimeout = true; 401 if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) 402 return -EINVAL; 403 if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) 404 return -EFAULT; 405 if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0) 406 return -EINVAL; 407 } else if (tr->flags) { 408 /* timeout removal doesn't support flags */ 409 return -EINVAL; 410 } 411 412 return 0; 413 } 414 415 static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags) 416 { 417 return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS 418 : HRTIMER_MODE_REL; 419 } 420 421 /* 422 * Remove or update an existing timeout command 423 */ 424 int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) 425 { 426 struct io_timeout_rem *tr = io_kiocb_to_cmd(req); 427 struct io_ring_ctx *ctx = req->ctx; 428 int ret; 429 430 if (!(tr->flags & IORING_TIMEOUT_UPDATE)) { 431 struct io_cancel_data cd = { .data = tr->addr, }; 432 433 spin_lock(&ctx->completion_lock); 434 ret = io_timeout_cancel(ctx, &cd); 435 spin_unlock(&ctx->completion_lock); 436 } else { 437 enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags); 438 439 spin_lock_irq(&ctx->timeout_lock); 440 if (tr->ltimeout) 441 ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode); 442 else 443 ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); 444 spin_unlock_irq(&ctx->timeout_lock); 445 } 446 447 if (ret < 0) 448 req_set_fail(req); 449 io_req_set_res(req, ret, 0); 450 return IOU_OK; 451 } 452 453 static int __io_timeout_prep(struct io_kiocb *req, 454 const struct io_uring_sqe *sqe, 455 bool is_timeout_link) 456 { 457 struct io_timeout *timeout = io_kiocb_to_cmd(req); 458 struct io_timeout_data *data; 459 unsigned flags; 460 u32 off = READ_ONCE(sqe->off); 461 462 if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in) 463 return -EINVAL; 464 if (off && is_timeout_link) 465 return -EINVAL; 466 flags = READ_ONCE(sqe->timeout_flags); 467 if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK | 468 IORING_TIMEOUT_ETIME_SUCCESS)) 469 return -EINVAL; 470 /* more than one clock specified is invalid, obviously */ 471 if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) 472 return -EINVAL; 473 474 INIT_LIST_HEAD(&timeout->list); 475 timeout->off = off; 476 if (unlikely(off && !req->ctx->off_timeout_used)) 477 req->ctx->off_timeout_used = true; 478 479 if (WARN_ON_ONCE(req_has_async_data(req))) 480 return -EFAULT; 481 if (io_alloc_async_data(req)) 482 return -ENOMEM; 483 484 data = req->async_data; 485 data->req = req; 486 data->flags = flags; 487 488 if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) 489 return -EFAULT; 490 491 if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) 492 return -EINVAL; 493 494 INIT_LIST_HEAD(&timeout->list); 495 data->mode = io_translate_timeout_mode(flags); 496 hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); 497 498 if (is_timeout_link) { 499 struct io_submit_link *link = &req->ctx->submit_state.link; 500 501 if (!link->head) 502 return -EINVAL; 503 if (link->last->opcode == IORING_OP_LINK_TIMEOUT) 504 return -EINVAL; 505 timeout->head = link->last; 506 link->last->flags |= REQ_F_ARM_LTIMEOUT; 507 } 508 return 0; 509 } 510 511 int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 512 { 513 return __io_timeout_prep(req, sqe, false); 514 } 515 516 int io_link_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 517 { 518 return __io_timeout_prep(req, sqe, true); 519 } 520 521 int io_timeout(struct io_kiocb *req, unsigned int issue_flags) 522 { 523 struct io_timeout *timeout = io_kiocb_to_cmd(req); 524 struct io_ring_ctx *ctx = req->ctx; 525 struct io_timeout_data *data = req->async_data; 526 struct list_head *entry; 527 u32 tail, off = timeout->off; 528 529 spin_lock_irq(&ctx->timeout_lock); 530 531 /* 532 * sqe->off holds how many events that need to occur for this 533 * timeout event to be satisfied. If it isn't set, then this is 534 * a pure timeout request, sequence isn't used. 535 */ 536 if (io_is_timeout_noseq(req)) { 537 entry = ctx->timeout_list.prev; 538 goto add; 539 } 540 541 tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); 542 timeout->target_seq = tail + off; 543 544 /* Update the last seq here in case io_flush_timeouts() hasn't. 545 * This is safe because ->completion_lock is held, and submissions 546 * and completions are never mixed in the same ->completion_lock section. 547 */ 548 ctx->cq_last_tm_flush = tail; 549 550 /* 551 * Insertion sort, ensuring the first entry in the list is always 552 * the one we need first. 553 */ 554 list_for_each_prev(entry, &ctx->timeout_list) { 555 struct io_timeout *nextt = list_entry(entry, struct io_timeout, list); 556 struct io_kiocb *nxt = cmd_to_io_kiocb(nextt); 557 558 if (io_is_timeout_noseq(nxt)) 559 continue; 560 /* nxt.seq is behind @tail, otherwise would've been completed */ 561 if (off >= nextt->target_seq - tail) 562 break; 563 } 564 add: 565 list_add(&timeout->list, entry); 566 data->timer.function = io_timeout_fn; 567 hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); 568 spin_unlock_irq(&ctx->timeout_lock); 569 return IOU_ISSUE_SKIP_COMPLETE; 570 } 571 572 void io_queue_linked_timeout(struct io_kiocb *req) 573 { 574 struct io_timeout *timeout = io_kiocb_to_cmd(req); 575 struct io_ring_ctx *ctx = req->ctx; 576 577 spin_lock_irq(&ctx->timeout_lock); 578 /* 579 * If the back reference is NULL, then our linked request finished 580 * before we got a chance to setup the timer 581 */ 582 if (timeout->head) { 583 struct io_timeout_data *data = req->async_data; 584 585 data->timer.function = io_link_timeout_fn; 586 hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), 587 data->mode); 588 list_add_tail(&timeout->list, &ctx->ltimeout_list); 589 } 590 spin_unlock_irq(&ctx->timeout_lock); 591 /* drop submission reference */ 592 io_put_req(req); 593 } 594 595 static bool io_match_task(struct io_kiocb *head, struct task_struct *task, 596 bool cancel_all) 597 __must_hold(&req->ctx->timeout_lock) 598 { 599 struct io_kiocb *req; 600 601 if (task && head->task != task) 602 return false; 603 if (cancel_all) 604 return true; 605 606 io_for_each_link(req, head) { 607 if (req->flags & REQ_F_INFLIGHT) 608 return true; 609 } 610 return false; 611 } 612 613 /* Returns true if we found and killed one or more timeouts */ 614 __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, 615 bool cancel_all) 616 { 617 struct io_timeout *timeout, *tmp; 618 int canceled = 0; 619 620 spin_lock(&ctx->completion_lock); 621 spin_lock_irq(&ctx->timeout_lock); 622 list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { 623 struct io_kiocb *req = cmd_to_io_kiocb(timeout); 624 625 if (io_match_task(req, tsk, cancel_all) && 626 io_kill_timeout(req, -ECANCELED)) 627 canceled++; 628 } 629 spin_unlock_irq(&ctx->timeout_lock); 630 io_commit_cqring(ctx); 631 spin_unlock(&ctx->completion_lock); 632 if (canceled != 0) 633 io_cqring_ev_posted(ctx); 634 return canceled != 0; 635 } 636