xref: /openbmc/linux/io_uring/timeout.c (revision ba3cdb6f)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/io_uring.h>
6 
7 #include <trace/events/io_uring.h>
8 
9 #include <uapi/linux/io_uring.h>
10 
11 #include "io_uring.h"
12 #include "refs.h"
13 #include "cancel.h"
14 #include "timeout.h"
15 
16 struct io_timeout {
17 	struct file			*file;
18 	u32				off;
19 	u32				target_seq;
20 	struct list_head		list;
21 	/* head of the link, used by linked timeouts only */
22 	struct io_kiocb			*head;
23 	/* for linked completions */
24 	struct io_kiocb			*prev;
25 };
26 
27 struct io_timeout_rem {
28 	struct file			*file;
29 	u64				addr;
30 
31 	/* timeout update */
32 	struct timespec64		ts;
33 	u32				flags;
34 	bool				ltimeout;
35 };
36 
37 static inline bool io_is_timeout_noseq(struct io_kiocb *req)
38 {
39 	struct io_timeout *timeout = io_kiocb_to_cmd(req);
40 
41 	return !timeout->off;
42 }
43 
44 static inline void io_put_req(struct io_kiocb *req)
45 {
46 	if (req_ref_put_and_test(req)) {
47 		io_queue_next(req);
48 		io_free_req(req);
49 	}
50 }
51 
52 static bool io_kill_timeout(struct io_kiocb *req, int status)
53 	__must_hold(&req->ctx->completion_lock)
54 	__must_hold(&req->ctx->timeout_lock)
55 {
56 	struct io_timeout_data *io = req->async_data;
57 
58 	if (hrtimer_try_to_cancel(&io->timer) != -1) {
59 		struct io_timeout *timeout = io_kiocb_to_cmd(req);
60 
61 		if (status)
62 			req_set_fail(req);
63 		atomic_set(&req->ctx->cq_timeouts,
64 			atomic_read(&req->ctx->cq_timeouts) + 1);
65 		list_del_init(&timeout->list);
66 		io_req_tw_post_queue(req, status, 0);
67 		return true;
68 	}
69 	return false;
70 }
71 
72 __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
73 	__must_hold(&ctx->completion_lock)
74 {
75 	u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
76 	struct io_timeout *timeout, *tmp;
77 
78 	spin_lock_irq(&ctx->timeout_lock);
79 	list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
80 		struct io_kiocb *req = cmd_to_io_kiocb(timeout);
81 		u32 events_needed, events_got;
82 
83 		if (io_is_timeout_noseq(req))
84 			break;
85 
86 		/*
87 		 * Since seq can easily wrap around over time, subtract
88 		 * the last seq at which timeouts were flushed before comparing.
89 		 * Assuming not more than 2^31-1 events have happened since,
90 		 * these subtractions won't have wrapped, so we can check if
91 		 * target is in [last_seq, current_seq] by comparing the two.
92 		 */
93 		events_needed = timeout->target_seq - ctx->cq_last_tm_flush;
94 		events_got = seq - ctx->cq_last_tm_flush;
95 		if (events_got < events_needed)
96 			break;
97 
98 		io_kill_timeout(req, 0);
99 	}
100 	ctx->cq_last_tm_flush = seq;
101 	spin_unlock_irq(&ctx->timeout_lock);
102 }
103 
104 static void io_fail_links(struct io_kiocb *req)
105 	__must_hold(&req->ctx->completion_lock)
106 {
107 	struct io_kiocb *nxt, *link = req->link;
108 	bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES;
109 
110 	req->link = NULL;
111 	while (link) {
112 		long res = -ECANCELED;
113 
114 		if (link->flags & REQ_F_FAIL)
115 			res = link->cqe.res;
116 
117 		nxt = link->link;
118 		link->link = NULL;
119 
120 		trace_io_uring_fail_link(req, link);
121 
122 		if (ignore_cqes)
123 			link->flags |= REQ_F_CQE_SKIP;
124 		else
125 			link->flags &= ~REQ_F_CQE_SKIP;
126 		io_req_set_res(link, res, 0);
127 		__io_req_complete_post(link);
128 		link = nxt;
129 	}
130 }
131 
132 static inline void io_remove_next_linked(struct io_kiocb *req)
133 {
134 	struct io_kiocb *nxt = req->link;
135 
136 	req->link = nxt->link;
137 	nxt->link = NULL;
138 }
139 
140 bool io_disarm_next(struct io_kiocb *req)
141 	__must_hold(&req->ctx->completion_lock)
142 {
143 	struct io_kiocb *link = NULL;
144 	bool posted = false;
145 
146 	if (req->flags & REQ_F_ARM_LTIMEOUT) {
147 		link = req->link;
148 		req->flags &= ~REQ_F_ARM_LTIMEOUT;
149 		if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
150 			io_remove_next_linked(req);
151 			io_req_tw_post_queue(link, -ECANCELED, 0);
152 			posted = true;
153 		}
154 	} else if (req->flags & REQ_F_LINK_TIMEOUT) {
155 		struct io_ring_ctx *ctx = req->ctx;
156 
157 		spin_lock_irq(&ctx->timeout_lock);
158 		link = io_disarm_linked_timeout(req);
159 		spin_unlock_irq(&ctx->timeout_lock);
160 		if (link) {
161 			posted = true;
162 			io_req_tw_post_queue(link, -ECANCELED, 0);
163 		}
164 	}
165 	if (unlikely((req->flags & REQ_F_FAIL) &&
166 		     !(req->flags & REQ_F_HARDLINK))) {
167 		posted |= (req->link != NULL);
168 		io_fail_links(req);
169 	}
170 	return posted;
171 }
172 
173 struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
174 					    struct io_kiocb *link)
175 	__must_hold(&req->ctx->completion_lock)
176 	__must_hold(&req->ctx->timeout_lock)
177 {
178 	struct io_timeout_data *io = link->async_data;
179 	struct io_timeout *timeout = io_kiocb_to_cmd(link);
180 
181 	io_remove_next_linked(req);
182 	timeout->head = NULL;
183 	if (hrtimer_try_to_cancel(&io->timer) != -1) {
184 		list_del(&timeout->list);
185 		return link;
186 	}
187 
188 	return NULL;
189 }
190 
191 static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
192 {
193 	struct io_timeout_data *data = container_of(timer,
194 						struct io_timeout_data, timer);
195 	struct io_kiocb *req = data->req;
196 	struct io_timeout *timeout = io_kiocb_to_cmd(req);
197 	struct io_ring_ctx *ctx = req->ctx;
198 	unsigned long flags;
199 
200 	spin_lock_irqsave(&ctx->timeout_lock, flags);
201 	list_del_init(&timeout->list);
202 	atomic_set(&req->ctx->cq_timeouts,
203 		atomic_read(&req->ctx->cq_timeouts) + 1);
204 	spin_unlock_irqrestore(&ctx->timeout_lock, flags);
205 
206 	if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS))
207 		req_set_fail(req);
208 
209 	io_req_set_res(req, -ETIME, 0);
210 	req->io_task_work.func = io_req_task_complete;
211 	io_req_task_work_add(req);
212 	return HRTIMER_NORESTART;
213 }
214 
215 static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
216 					   struct io_cancel_data *cd)
217 	__must_hold(&ctx->timeout_lock)
218 {
219 	struct io_timeout *timeout;
220 	struct io_timeout_data *io;
221 	struct io_kiocb *req = NULL;
222 
223 	list_for_each_entry(timeout, &ctx->timeout_list, list) {
224 		struct io_kiocb *tmp = cmd_to_io_kiocb(timeout);
225 
226 		if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
227 		    cd->data != tmp->cqe.user_data)
228 			continue;
229 		if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) {
230 			if (cd->seq == tmp->work.cancel_seq)
231 				continue;
232 			tmp->work.cancel_seq = cd->seq;
233 		}
234 		req = tmp;
235 		break;
236 	}
237 	if (!req)
238 		return ERR_PTR(-ENOENT);
239 
240 	io = req->async_data;
241 	if (hrtimer_try_to_cancel(&io->timer) == -1)
242 		return ERR_PTR(-EALREADY);
243 	timeout = io_kiocb_to_cmd(req);
244 	list_del_init(&timeout->list);
245 	return req;
246 }
247 
248 int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
249 	__must_hold(&ctx->completion_lock)
250 {
251 	struct io_kiocb *req;
252 
253 	spin_lock_irq(&ctx->timeout_lock);
254 	req = io_timeout_extract(ctx, cd);
255 	spin_unlock_irq(&ctx->timeout_lock);
256 
257 	if (IS_ERR(req))
258 		return PTR_ERR(req);
259 	io_req_task_queue_fail(req, -ECANCELED);
260 	return 0;
261 }
262 
263 static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
264 {
265 	unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED;
266 	struct io_timeout *timeout = io_kiocb_to_cmd(req);
267 	struct io_kiocb *prev = timeout->prev;
268 	int ret = -ENOENT;
269 
270 	if (prev) {
271 		if (!(req->task->flags & PF_EXITING)) {
272 			struct io_cancel_data cd = {
273 				.ctx		= req->ctx,
274 				.data		= prev->cqe.user_data,
275 			};
276 
277 			ret = io_try_cancel(req, &cd, issue_flags);
278 		}
279 		io_req_set_res(req, ret ?: -ETIME, 0);
280 		io_req_complete_post(req);
281 		io_put_req(prev);
282 	} else {
283 		io_req_set_res(req, -ETIME, 0);
284 		io_req_complete_post(req);
285 	}
286 }
287 
288 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
289 {
290 	struct io_timeout_data *data = container_of(timer,
291 						struct io_timeout_data, timer);
292 	struct io_kiocb *prev, *req = data->req;
293 	struct io_timeout *timeout = io_kiocb_to_cmd(req);
294 	struct io_ring_ctx *ctx = req->ctx;
295 	unsigned long flags;
296 
297 	spin_lock_irqsave(&ctx->timeout_lock, flags);
298 	prev = timeout->head;
299 	timeout->head = NULL;
300 
301 	/*
302 	 * We don't expect the list to be empty, that will only happen if we
303 	 * race with the completion of the linked work.
304 	 */
305 	if (prev) {
306 		io_remove_next_linked(prev);
307 		if (!req_ref_inc_not_zero(prev))
308 			prev = NULL;
309 	}
310 	list_del(&timeout->list);
311 	timeout->prev = prev;
312 	spin_unlock_irqrestore(&ctx->timeout_lock, flags);
313 
314 	req->io_task_work.func = io_req_task_link_timeout;
315 	io_req_task_work_add(req);
316 	return HRTIMER_NORESTART;
317 }
318 
319 static clockid_t io_timeout_get_clock(struct io_timeout_data *data)
320 {
321 	switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) {
322 	case IORING_TIMEOUT_BOOTTIME:
323 		return CLOCK_BOOTTIME;
324 	case IORING_TIMEOUT_REALTIME:
325 		return CLOCK_REALTIME;
326 	default:
327 		/* can't happen, vetted at prep time */
328 		WARN_ON_ONCE(1);
329 		fallthrough;
330 	case 0:
331 		return CLOCK_MONOTONIC;
332 	}
333 }
334 
335 static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
336 				    struct timespec64 *ts, enum hrtimer_mode mode)
337 	__must_hold(&ctx->timeout_lock)
338 {
339 	struct io_timeout_data *io;
340 	struct io_timeout *timeout;
341 	struct io_kiocb *req = NULL;
342 
343 	list_for_each_entry(timeout, &ctx->ltimeout_list, list) {
344 		struct io_kiocb *tmp = cmd_to_io_kiocb(timeout);
345 
346 		if (user_data == tmp->cqe.user_data) {
347 			req = tmp;
348 			break;
349 		}
350 	}
351 	if (!req)
352 		return -ENOENT;
353 
354 	io = req->async_data;
355 	if (hrtimer_try_to_cancel(&io->timer) == -1)
356 		return -EALREADY;
357 	hrtimer_init(&io->timer, io_timeout_get_clock(io), mode);
358 	io->timer.function = io_link_timeout_fn;
359 	hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode);
360 	return 0;
361 }
362 
363 static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
364 			     struct timespec64 *ts, enum hrtimer_mode mode)
365 	__must_hold(&ctx->timeout_lock)
366 {
367 	struct io_cancel_data cd = { .data = user_data, };
368 	struct io_kiocb *req = io_timeout_extract(ctx, &cd);
369 	struct io_timeout *timeout = io_kiocb_to_cmd(req);
370 	struct io_timeout_data *data;
371 
372 	if (IS_ERR(req))
373 		return PTR_ERR(req);
374 
375 	timeout->off = 0; /* noseq */
376 	data = req->async_data;
377 	list_add_tail(&timeout->list, &ctx->timeout_list);
378 	hrtimer_init(&data->timer, io_timeout_get_clock(data), mode);
379 	data->timer.function = io_timeout_fn;
380 	hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode);
381 	return 0;
382 }
383 
384 int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
385 {
386 	struct io_timeout_rem *tr = io_kiocb_to_cmd(req);
387 
388 	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
389 		return -EINVAL;
390 	if (sqe->buf_index || sqe->len || sqe->splice_fd_in)
391 		return -EINVAL;
392 
393 	tr->ltimeout = false;
394 	tr->addr = READ_ONCE(sqe->addr);
395 	tr->flags = READ_ONCE(sqe->timeout_flags);
396 	if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) {
397 		if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
398 			return -EINVAL;
399 		if (tr->flags & IORING_LINK_TIMEOUT_UPDATE)
400 			tr->ltimeout = true;
401 		if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS))
402 			return -EINVAL;
403 		if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2)))
404 			return -EFAULT;
405 		if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0)
406 			return -EINVAL;
407 	} else if (tr->flags) {
408 		/* timeout removal doesn't support flags */
409 		return -EINVAL;
410 	}
411 
412 	return 0;
413 }
414 
415 static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags)
416 {
417 	return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS
418 					    : HRTIMER_MODE_REL;
419 }
420 
421 /*
422  * Remove or update an existing timeout command
423  */
424 int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
425 {
426 	struct io_timeout_rem *tr = io_kiocb_to_cmd(req);
427 	struct io_ring_ctx *ctx = req->ctx;
428 	int ret;
429 
430 	if (!(tr->flags & IORING_TIMEOUT_UPDATE)) {
431 		struct io_cancel_data cd = { .data = tr->addr, };
432 
433 		spin_lock(&ctx->completion_lock);
434 		ret = io_timeout_cancel(ctx, &cd);
435 		spin_unlock(&ctx->completion_lock);
436 	} else {
437 		enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
438 
439 		spin_lock_irq(&ctx->timeout_lock);
440 		if (tr->ltimeout)
441 			ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode);
442 		else
443 			ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
444 		spin_unlock_irq(&ctx->timeout_lock);
445 	}
446 
447 	if (ret < 0)
448 		req_set_fail(req);
449 	io_req_set_res(req, ret, 0);
450 	return IOU_OK;
451 }
452 
453 static int __io_timeout_prep(struct io_kiocb *req,
454 			     const struct io_uring_sqe *sqe,
455 			     bool is_timeout_link)
456 {
457 	struct io_timeout *timeout = io_kiocb_to_cmd(req);
458 	struct io_timeout_data *data;
459 	unsigned flags;
460 	u32 off = READ_ONCE(sqe->off);
461 
462 	if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in)
463 		return -EINVAL;
464 	if (off && is_timeout_link)
465 		return -EINVAL;
466 	flags = READ_ONCE(sqe->timeout_flags);
467 	if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK |
468 		      IORING_TIMEOUT_ETIME_SUCCESS))
469 		return -EINVAL;
470 	/* more than one clock specified is invalid, obviously */
471 	if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
472 		return -EINVAL;
473 
474 	INIT_LIST_HEAD(&timeout->list);
475 	timeout->off = off;
476 	if (unlikely(off && !req->ctx->off_timeout_used))
477 		req->ctx->off_timeout_used = true;
478 
479 	if (WARN_ON_ONCE(req_has_async_data(req)))
480 		return -EFAULT;
481 	if (io_alloc_async_data(req))
482 		return -ENOMEM;
483 
484 	data = req->async_data;
485 	data->req = req;
486 	data->flags = flags;
487 
488 	if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
489 		return -EFAULT;
490 
491 	if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0)
492 		return -EINVAL;
493 
494 	INIT_LIST_HEAD(&timeout->list);
495 	data->mode = io_translate_timeout_mode(flags);
496 	hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
497 
498 	if (is_timeout_link) {
499 		struct io_submit_link *link = &req->ctx->submit_state.link;
500 
501 		if (!link->head)
502 			return -EINVAL;
503 		if (link->last->opcode == IORING_OP_LINK_TIMEOUT)
504 			return -EINVAL;
505 		timeout->head = link->last;
506 		link->last->flags |= REQ_F_ARM_LTIMEOUT;
507 	}
508 	return 0;
509 }
510 
511 int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
512 {
513 	return __io_timeout_prep(req, sqe, false);
514 }
515 
516 int io_link_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
517 {
518 	return __io_timeout_prep(req, sqe, true);
519 }
520 
521 int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
522 {
523 	struct io_timeout *timeout = io_kiocb_to_cmd(req);
524 	struct io_ring_ctx *ctx = req->ctx;
525 	struct io_timeout_data *data = req->async_data;
526 	struct list_head *entry;
527 	u32 tail, off = timeout->off;
528 
529 	spin_lock_irq(&ctx->timeout_lock);
530 
531 	/*
532 	 * sqe->off holds how many events that need to occur for this
533 	 * timeout event to be satisfied. If it isn't set, then this is
534 	 * a pure timeout request, sequence isn't used.
535 	 */
536 	if (io_is_timeout_noseq(req)) {
537 		entry = ctx->timeout_list.prev;
538 		goto add;
539 	}
540 
541 	tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
542 	timeout->target_seq = tail + off;
543 
544 	/* Update the last seq here in case io_flush_timeouts() hasn't.
545 	 * This is safe because ->completion_lock is held, and submissions
546 	 * and completions are never mixed in the same ->completion_lock section.
547 	 */
548 	ctx->cq_last_tm_flush = tail;
549 
550 	/*
551 	 * Insertion sort, ensuring the first entry in the list is always
552 	 * the one we need first.
553 	 */
554 	list_for_each_prev(entry, &ctx->timeout_list) {
555 		struct io_timeout *nextt = list_entry(entry, struct io_timeout, list);
556 		struct io_kiocb *nxt = cmd_to_io_kiocb(nextt);
557 
558 		if (io_is_timeout_noseq(nxt))
559 			continue;
560 		/* nxt.seq is behind @tail, otherwise would've been completed */
561 		if (off >= nextt->target_seq - tail)
562 			break;
563 	}
564 add:
565 	list_add(&timeout->list, entry);
566 	data->timer.function = io_timeout_fn;
567 	hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
568 	spin_unlock_irq(&ctx->timeout_lock);
569 	return IOU_ISSUE_SKIP_COMPLETE;
570 }
571 
572 void io_queue_linked_timeout(struct io_kiocb *req)
573 {
574 	struct io_timeout *timeout = io_kiocb_to_cmd(req);
575 	struct io_ring_ctx *ctx = req->ctx;
576 
577 	spin_lock_irq(&ctx->timeout_lock);
578 	/*
579 	 * If the back reference is NULL, then our linked request finished
580 	 * before we got a chance to setup the timer
581 	 */
582 	if (timeout->head) {
583 		struct io_timeout_data *data = req->async_data;
584 
585 		data->timer.function = io_link_timeout_fn;
586 		hrtimer_start(&data->timer, timespec64_to_ktime(data->ts),
587 				data->mode);
588 		list_add_tail(&timeout->list, &ctx->ltimeout_list);
589 	}
590 	spin_unlock_irq(&ctx->timeout_lock);
591 	/* drop submission reference */
592 	io_put_req(req);
593 }
594 
595 static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
596 			  bool cancel_all)
597 	__must_hold(&req->ctx->timeout_lock)
598 {
599 	struct io_kiocb *req;
600 
601 	if (task && head->task != task)
602 		return false;
603 	if (cancel_all)
604 		return true;
605 
606 	io_for_each_link(req, head) {
607 		if (req->flags & REQ_F_INFLIGHT)
608 			return true;
609 	}
610 	return false;
611 }
612 
613 /* Returns true if we found and killed one or more timeouts */
614 __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
615 			     bool cancel_all)
616 {
617 	struct io_timeout *timeout, *tmp;
618 	int canceled = 0;
619 
620 	spin_lock(&ctx->completion_lock);
621 	spin_lock_irq(&ctx->timeout_lock);
622 	list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
623 		struct io_kiocb *req = cmd_to_io_kiocb(timeout);
624 
625 		if (io_match_task(req, tsk, cancel_all) &&
626 		    io_kill_timeout(req, -ECANCELED))
627 			canceled++;
628 	}
629 	spin_unlock_irq(&ctx->timeout_lock);
630 	io_commit_cqring(ctx);
631 	spin_unlock(&ctx->completion_lock);
632 	if (canceled != 0)
633 		io_cqring_ev_posted(ctx);
634 	return canceled != 0;
635 }
636