1f3b44f92SJens Axboe // SPDX-License-Identifier: GPL-2.0
2f3b44f92SJens Axboe #include <linux/kernel.h>
3f3b44f92SJens Axboe #include <linux/errno.h>
4f3b44f92SJens Axboe #include <linux/fs.h>
5f3b44f92SJens Axboe #include <linux/file.h>
6f3b44f92SJens Axboe #include <linux/blk-mq.h>
7f3b44f92SJens Axboe #include <linux/mm.h>
8f3b44f92SJens Axboe #include <linux/slab.h>
9f3b44f92SJens Axboe #include <linux/fsnotify.h>
10f3b44f92SJens Axboe #include <linux/poll.h>
11f3b44f92SJens Axboe #include <linux/nospec.h>
12f3b44f92SJens Axboe #include <linux/compat.h>
13f3b44f92SJens Axboe #include <linux/io_uring.h>
14f3b44f92SJens Axboe
15f3b44f92SJens Axboe #include <uapi/linux/io_uring.h>
16f3b44f92SJens Axboe
17f3b44f92SJens Axboe #include "io_uring.h"
18f3b44f92SJens Axboe #include "opdef.h"
19f3b44f92SJens Axboe #include "kbuf.h"
20f3b44f92SJens Axboe #include "rsrc.h"
21f3b44f92SJens Axboe #include "rw.h"
22f3b44f92SJens Axboe
23f3b44f92SJens Axboe struct io_rw {
24f3b44f92SJens Axboe /* NOTE: kiocb has the file as the first member, so don't do it here */
25f3b44f92SJens Axboe struct kiocb kiocb;
26f3b44f92SJens Axboe u64 addr;
27f3b44f92SJens Axboe u32 len;
28f3b44f92SJens Axboe rwf_t flags;
29f3b44f92SJens Axboe };
30f3b44f92SJens Axboe
io_file_supports_nowait(struct io_kiocb * req)31f3b44f92SJens Axboe static inline bool io_file_supports_nowait(struct io_kiocb *req)
32f3b44f92SJens Axboe {
33f3b44f92SJens Axboe return req->flags & REQ_F_SUPPORT_NOWAIT;
34f3b44f92SJens Axboe }
35f3b44f92SJens Axboe
364ab9d465SDylan Yudaken #ifdef CONFIG_COMPAT
io_iov_compat_buffer_select_prep(struct io_rw * rw)374ab9d465SDylan Yudaken static int io_iov_compat_buffer_select_prep(struct io_rw *rw)
384ab9d465SDylan Yudaken {
394ab9d465SDylan Yudaken struct compat_iovec __user *uiov;
404ab9d465SDylan Yudaken compat_ssize_t clen;
414ab9d465SDylan Yudaken
424ab9d465SDylan Yudaken uiov = u64_to_user_ptr(rw->addr);
434ab9d465SDylan Yudaken if (!access_ok(uiov, sizeof(*uiov)))
444ab9d465SDylan Yudaken return -EFAULT;
454ab9d465SDylan Yudaken if (__get_user(clen, &uiov->iov_len))
464ab9d465SDylan Yudaken return -EFAULT;
474ab9d465SDylan Yudaken if (clen < 0)
484ab9d465SDylan Yudaken return -EINVAL;
494ab9d465SDylan Yudaken
504ab9d465SDylan Yudaken rw->len = clen;
514ab9d465SDylan Yudaken return 0;
524ab9d465SDylan Yudaken }
534ab9d465SDylan Yudaken #endif
544ab9d465SDylan Yudaken
io_iov_buffer_select_prep(struct io_kiocb * req)554ab9d465SDylan Yudaken static int io_iov_buffer_select_prep(struct io_kiocb *req)
564ab9d465SDylan Yudaken {
574ab9d465SDylan Yudaken struct iovec __user *uiov;
584ab9d465SDylan Yudaken struct iovec iov;
594ab9d465SDylan Yudaken struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
604ab9d465SDylan Yudaken
614ab9d465SDylan Yudaken if (rw->len != 1)
624ab9d465SDylan Yudaken return -EINVAL;
634ab9d465SDylan Yudaken
644ab9d465SDylan Yudaken #ifdef CONFIG_COMPAT
654ab9d465SDylan Yudaken if (req->ctx->compat)
664ab9d465SDylan Yudaken return io_iov_compat_buffer_select_prep(rw);
674ab9d465SDylan Yudaken #endif
684ab9d465SDylan Yudaken
694ab9d465SDylan Yudaken uiov = u64_to_user_ptr(rw->addr);
704ab9d465SDylan Yudaken if (copy_from_user(&iov, uiov, sizeof(*uiov)))
714ab9d465SDylan Yudaken return -EFAULT;
724ab9d465SDylan Yudaken rw->len = iov.iov_len;
734ab9d465SDylan Yudaken return 0;
744ab9d465SDylan Yudaken }
754ab9d465SDylan Yudaken
io_prep_rw(struct io_kiocb * req,const struct io_uring_sqe * sqe)76f3b44f92SJens Axboe int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
77f3b44f92SJens Axboe {
78f2ccb5aeSStefan Metzmacher struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
79f3b44f92SJens Axboe unsigned ioprio;
80f3b44f92SJens Axboe int ret;
81f3b44f92SJens Axboe
82f3b44f92SJens Axboe rw->kiocb.ki_pos = READ_ONCE(sqe->off);
83f3b44f92SJens Axboe /* used for fixed read/write too - just read unconditionally */
84f3b44f92SJens Axboe req->buf_index = READ_ONCE(sqe->buf_index);
85f3b44f92SJens Axboe
86f3b44f92SJens Axboe if (req->opcode == IORING_OP_READ_FIXED ||
87f3b44f92SJens Axboe req->opcode == IORING_OP_WRITE_FIXED) {
88f3b44f92SJens Axboe struct io_ring_ctx *ctx = req->ctx;
89f3b44f92SJens Axboe u16 index;
90f3b44f92SJens Axboe
91f3b44f92SJens Axboe if (unlikely(req->buf_index >= ctx->nr_user_bufs))
92f3b44f92SJens Axboe return -EFAULT;
93f3b44f92SJens Axboe index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
94f3b44f92SJens Axboe req->imu = ctx->user_bufs[index];
95f3b44f92SJens Axboe io_req_set_rsrc_node(req, ctx, 0);
96f3b44f92SJens Axboe }
97f3b44f92SJens Axboe
98f3b44f92SJens Axboe ioprio = READ_ONCE(sqe->ioprio);
99f3b44f92SJens Axboe if (ioprio) {
100f3b44f92SJens Axboe ret = ioprio_check_cap(ioprio);
101f3b44f92SJens Axboe if (ret)
102f3b44f92SJens Axboe return ret;
103f3b44f92SJens Axboe
104f3b44f92SJens Axboe rw->kiocb.ki_ioprio = ioprio;
105f3b44f92SJens Axboe } else {
106f3b44f92SJens Axboe rw->kiocb.ki_ioprio = get_current_ioprio();
107f3b44f92SJens Axboe }
108099ada2cSJens Axboe rw->kiocb.dio_complete = NULL;
109f3b44f92SJens Axboe
110f3b44f92SJens Axboe rw->addr = READ_ONCE(sqe->addr);
111f3b44f92SJens Axboe rw->len = READ_ONCE(sqe->len);
112f3b44f92SJens Axboe rw->flags = READ_ONCE(sqe->rw_flags);
1134ab9d465SDylan Yudaken
1144ab9d465SDylan Yudaken /* Have to do this validation here, as this is in io_read() rw->len might
1154ab9d465SDylan Yudaken * have chanaged due to buffer selection
1164ab9d465SDylan Yudaken */
1174ab9d465SDylan Yudaken if (req->opcode == IORING_OP_READV && req->flags & REQ_F_BUFFER_SELECT) {
1184ab9d465SDylan Yudaken ret = io_iov_buffer_select_prep(req);
1194ab9d465SDylan Yudaken if (ret)
1204ab9d465SDylan Yudaken return ret;
1214ab9d465SDylan Yudaken }
1224ab9d465SDylan Yudaken
123f3b44f92SJens Axboe return 0;
124f3b44f92SJens Axboe }
125f3b44f92SJens Axboe
io_readv_writev_cleanup(struct io_kiocb * req)126f3b44f92SJens Axboe void io_readv_writev_cleanup(struct io_kiocb *req)
127f3b44f92SJens Axboe {
128f3b44f92SJens Axboe struct io_async_rw *io = req->async_data;
129f3b44f92SJens Axboe
130f3b44f92SJens Axboe kfree(io->free_iovec);
131f3b44f92SJens Axboe }
132f3b44f92SJens Axboe
io_rw_done(struct kiocb * kiocb,ssize_t ret)133f3b44f92SJens Axboe static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
134f3b44f92SJens Axboe {
135f3b44f92SJens Axboe switch (ret) {
136f3b44f92SJens Axboe case -EIOCBQUEUED:
137f3b44f92SJens Axboe break;
138f3b44f92SJens Axboe case -ERESTARTSYS:
139f3b44f92SJens Axboe case -ERESTARTNOINTR:
140f3b44f92SJens Axboe case -ERESTARTNOHAND:
141f3b44f92SJens Axboe case -ERESTART_RESTARTBLOCK:
142f3b44f92SJens Axboe /*
143f3b44f92SJens Axboe * We can't just restart the syscall, since previously
144f3b44f92SJens Axboe * submitted sqes may already be in progress. Just fail this
145f3b44f92SJens Axboe * IO with EINTR.
146f3b44f92SJens Axboe */
147f3b44f92SJens Axboe ret = -EINTR;
148f3b44f92SJens Axboe fallthrough;
149f3b44f92SJens Axboe default:
150f3b44f92SJens Axboe kiocb->ki_complete(kiocb, ret);
151f3b44f92SJens Axboe }
152f3b44f92SJens Axboe }
153f3b44f92SJens Axboe
io_kiocb_update_pos(struct io_kiocb * req)154f3b44f92SJens Axboe static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
155f3b44f92SJens Axboe {
156f2ccb5aeSStefan Metzmacher struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
157f3b44f92SJens Axboe
158f3b44f92SJens Axboe if (rw->kiocb.ki_pos != -1)
159f3b44f92SJens Axboe return &rw->kiocb.ki_pos;
160f3b44f92SJens Axboe
161f3b44f92SJens Axboe if (!(req->file->f_mode & FMODE_STREAM)) {
162f3b44f92SJens Axboe req->flags |= REQ_F_CUR_POS;
163f3b44f92SJens Axboe rw->kiocb.ki_pos = req->file->f_pos;
164f3b44f92SJens Axboe return &rw->kiocb.ki_pos;
165f3b44f92SJens Axboe }
166f3b44f92SJens Axboe
167f3b44f92SJens Axboe rw->kiocb.ki_pos = 0;
168f3b44f92SJens Axboe return NULL;
169f3b44f92SJens Axboe }
170f3b44f92SJens Axboe
171f3b44f92SJens Axboe #ifdef CONFIG_BLOCK
io_resubmit_prep(struct io_kiocb * req)172f3b44f92SJens Axboe static bool io_resubmit_prep(struct io_kiocb *req)
173f3b44f92SJens Axboe {
174f3b44f92SJens Axboe struct io_async_rw *io = req->async_data;
175f3b44f92SJens Axboe
176f3b44f92SJens Axboe if (!req_has_async_data(req))
177f3b44f92SJens Axboe return !io_req_prep_async(req);
178f3b44f92SJens Axboe iov_iter_restore(&io->s.iter, &io->s.iter_state);
179f3b44f92SJens Axboe return true;
180f3b44f92SJens Axboe }
181f3b44f92SJens Axboe
io_rw_should_reissue(struct io_kiocb * req)182f3b44f92SJens Axboe static bool io_rw_should_reissue(struct io_kiocb *req)
183f3b44f92SJens Axboe {
184f3b44f92SJens Axboe umode_t mode = file_inode(req->file)->i_mode;
185f3b44f92SJens Axboe struct io_ring_ctx *ctx = req->ctx;
186f3b44f92SJens Axboe
187f3b44f92SJens Axboe if (!S_ISBLK(mode) && !S_ISREG(mode))
188f3b44f92SJens Axboe return false;
189f3b44f92SJens Axboe if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() &&
190f3b44f92SJens Axboe !(ctx->flags & IORING_SETUP_IOPOLL)))
191f3b44f92SJens Axboe return false;
192f3b44f92SJens Axboe /*
193f3b44f92SJens Axboe * If ref is dying, we might be running poll reap from the exit work.
194f3b44f92SJens Axboe * Don't attempt to reissue from that path, just let it fail with
195f3b44f92SJens Axboe * -EAGAIN.
196f3b44f92SJens Axboe */
197f3b44f92SJens Axboe if (percpu_ref_is_dying(&ctx->refs))
198f3b44f92SJens Axboe return false;
199f3b44f92SJens Axboe /*
200f3b44f92SJens Axboe * Play it safe and assume not safe to re-import and reissue if we're
201f3b44f92SJens Axboe * not in the original thread group (or in task context).
202f3b44f92SJens Axboe */
203f3b44f92SJens Axboe if (!same_thread_group(req->task, current) || !in_task())
204f3b44f92SJens Axboe return false;
205f3b44f92SJens Axboe return true;
206f3b44f92SJens Axboe }
207f3b44f92SJens Axboe #else
io_resubmit_prep(struct io_kiocb * req)208f3b44f92SJens Axboe static bool io_resubmit_prep(struct io_kiocb *req)
209f3b44f92SJens Axboe {
210f3b44f92SJens Axboe return false;
211f3b44f92SJens Axboe }
io_rw_should_reissue(struct io_kiocb * req)212f3b44f92SJens Axboe static bool io_rw_should_reissue(struct io_kiocb *req)
213f3b44f92SJens Axboe {
214f3b44f92SJens Axboe return false;
215f3b44f92SJens Axboe }
216f3b44f92SJens Axboe #endif
217f3b44f92SJens Axboe
io_req_end_write(struct io_kiocb * req)218a370167fSAmir Goldstein static void io_req_end_write(struct io_kiocb *req)
219f3b44f92SJens Axboe {
220f3b44f92SJens Axboe if (req->flags & REQ_F_ISREG) {
221e484fd73SAmir Goldstein struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
222f3b44f92SJens Axboe
223e484fd73SAmir Goldstein kiocb_end_write(&rw->kiocb);
224f3b44f92SJens Axboe }
225f3b44f92SJens Axboe }
226f3b44f92SJens Axboe
2272ec33a6cSJens Axboe /*
2282ec33a6cSJens Axboe * Trigger the notifications after having done some IO, and finish the write
2292ec33a6cSJens Axboe * accounting, if any.
2302ec33a6cSJens Axboe */
io_req_io_end(struct io_kiocb * req)2312ec33a6cSJens Axboe static void io_req_io_end(struct io_kiocb *req)
2322ec33a6cSJens Axboe {
2332ec33a6cSJens Axboe struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
2342ec33a6cSJens Axboe
2352ec33a6cSJens Axboe if (rw->kiocb.ki_flags & IOCB_WRITE) {
236a370167fSAmir Goldstein io_req_end_write(req);
2372ec33a6cSJens Axboe fsnotify_modify(req->file);
2382ec33a6cSJens Axboe } else {
2392ec33a6cSJens Axboe fsnotify_access(req->file);
2402ec33a6cSJens Axboe }
2412ec33a6cSJens Axboe }
2422ec33a6cSJens Axboe
__io_complete_rw_common(struct io_kiocb * req,long res)243f3b44f92SJens Axboe static bool __io_complete_rw_common(struct io_kiocb *req, long res)
244f3b44f92SJens Axboe {
245f3b44f92SJens Axboe if (unlikely(res != req->cqe.res)) {
246f3b44f92SJens Axboe if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
247f3b44f92SJens Axboe io_rw_should_reissue(req)) {
2482ec33a6cSJens Axboe /*
2492ec33a6cSJens Axboe * Reissue will start accounting again, finish the
2502ec33a6cSJens Axboe * current cycle.
2512ec33a6cSJens Axboe */
2522ec33a6cSJens Axboe io_req_io_end(req);
253f3b44f92SJens Axboe req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
254f3b44f92SJens Axboe return true;
255f3b44f92SJens Axboe }
256f3b44f92SJens Axboe req_set_fail(req);
257f3b44f92SJens Axboe req->cqe.res = res;
258f3b44f92SJens Axboe }
259f3b44f92SJens Axboe return false;
260f3b44f92SJens Axboe }
261f3b44f92SJens Axboe
io_fixup_rw_res(struct io_kiocb * req,long res)26262bb0647SPavel Begunkov static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
2634d9cb92cSPavel Begunkov {
2644d9cb92cSPavel Begunkov struct io_async_rw *io = req->async_data;
2654d9cb92cSPavel Begunkov
2664d9cb92cSPavel Begunkov /* add previously done IO, if any */
2674d9cb92cSPavel Begunkov if (req_has_async_data(req) && io->bytes_done > 0) {
2684d9cb92cSPavel Begunkov if (res < 0)
2694d9cb92cSPavel Begunkov res = io->bytes_done;
2704d9cb92cSPavel Begunkov else
2714d9cb92cSPavel Begunkov res += io->bytes_done;
2724d9cb92cSPavel Begunkov }
2734d9cb92cSPavel Begunkov return res;
2744d9cb92cSPavel Begunkov }
2754d9cb92cSPavel Begunkov
io_req_rw_complete(struct io_kiocb * req,struct io_tw_state * ts)276c92fcfc2SJens Axboe void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
277b000145eSJens Axboe {
278099ada2cSJens Axboe struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
279099ada2cSJens Axboe struct kiocb *kiocb = &rw->kiocb;
280099ada2cSJens Axboe
281099ada2cSJens Axboe if ((kiocb->ki_flags & IOCB_DIO_CALLER_COMP) && kiocb->dio_complete) {
282099ada2cSJens Axboe long res = kiocb->dio_complete(rw->kiocb.private);
283099ada2cSJens Axboe
284099ada2cSJens Axboe io_req_set_res(req, io_fixup_rw_res(req, res), 0);
285099ada2cSJens Axboe }
286099ada2cSJens Axboe
2872ec33a6cSJens Axboe io_req_io_end(req);
2883671163bSPavel Begunkov
2893671163bSPavel Begunkov if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
290a282967cSPavel Begunkov unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
2913671163bSPavel Begunkov
2923671163bSPavel Begunkov req->cqe.flags |= io_put_kbuf(req, issue_flags);
2933671163bSPavel Begunkov }
294a282967cSPavel Begunkov io_req_task_complete(req, ts);
295b000145eSJens Axboe }
296b000145eSJens Axboe
io_complete_rw(struct kiocb * kiocb,long res)297f3b44f92SJens Axboe static void io_complete_rw(struct kiocb *kiocb, long res)
298f3b44f92SJens Axboe {
299f3b44f92SJens Axboe struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
300f3b44f92SJens Axboe struct io_kiocb *req = cmd_to_io_kiocb(rw);
301f3b44f92SJens Axboe
302099ada2cSJens Axboe if (!kiocb->dio_complete || !(kiocb->ki_flags & IOCB_DIO_CALLER_COMP)) {
303f3b44f92SJens Axboe if (__io_complete_rw_common(req, res))
304f3b44f92SJens Axboe return;
3054d9cb92cSPavel Begunkov io_req_set_res(req, io_fixup_rw_res(req, res), 0);
306099ada2cSJens Axboe }
307b000145eSJens Axboe req->io_task_work.func = io_req_rw_complete;
3088751d154SPavel Begunkov __io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
309f3b44f92SJens Axboe }
310f3b44f92SJens Axboe
io_complete_rw_iopoll(struct kiocb * kiocb,long res)311f3b44f92SJens Axboe static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
312f3b44f92SJens Axboe {
313f3b44f92SJens Axboe struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
314f3b44f92SJens Axboe struct io_kiocb *req = cmd_to_io_kiocb(rw);
315f3b44f92SJens Axboe
316f3b44f92SJens Axboe if (kiocb->ki_flags & IOCB_WRITE)
317a370167fSAmir Goldstein io_req_end_write(req);
318f3b44f92SJens Axboe if (unlikely(res != req->cqe.res)) {
319f3b44f92SJens Axboe if (res == -EAGAIN && io_rw_should_reissue(req)) {
320f3b44f92SJens Axboe req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
321f3b44f92SJens Axboe return;
322f3b44f92SJens Axboe }
323f3b44f92SJens Axboe req->cqe.res = res;
324f3b44f92SJens Axboe }
325f3b44f92SJens Axboe
326f3b44f92SJens Axboe /* order with io_iopoll_complete() checking ->iopoll_completed */
327f3b44f92SJens Axboe smp_store_release(&req->iopoll_completed, 1);
328f3b44f92SJens Axboe }
329f3b44f92SJens Axboe
kiocb_done(struct io_kiocb * req,ssize_t ret,unsigned int issue_flags)330df9830d8SPavel Begunkov static int kiocb_done(struct io_kiocb *req, ssize_t ret,
331f3b44f92SJens Axboe unsigned int issue_flags)
332f3b44f92SJens Axboe {
333f2ccb5aeSStefan Metzmacher struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
3344d9cb92cSPavel Begunkov unsigned final_ret = io_fixup_rw_res(req, ret);
335f3b44f92SJens Axboe
3361939316bSAl Viro if (ret >= 0 && req->flags & REQ_F_CUR_POS)
337f3b44f92SJens Axboe req->file->f_pos = rw->kiocb.ki_pos;
338df9830d8SPavel Begunkov if (ret >= 0 && (rw->kiocb.ki_complete == io_complete_rw)) {
339df9830d8SPavel Begunkov if (!__io_complete_rw_common(req, ret)) {
3402ec33a6cSJens Axboe /*
3412ec33a6cSJens Axboe * Safe to call io_end from here as we're inline
3422ec33a6cSJens Axboe * from the submission path.
3432ec33a6cSJens Axboe */
3442ec33a6cSJens Axboe io_req_io_end(req);
3454d9cb92cSPavel Begunkov io_req_set_res(req, final_ret,
346df9830d8SPavel Begunkov io_put_kbuf(req, issue_flags));
347df9830d8SPavel Begunkov return IOU_OK;
348df9830d8SPavel Begunkov }
349df9830d8SPavel Begunkov } else {
350f3b44f92SJens Axboe io_rw_done(&rw->kiocb, ret);
351df9830d8SPavel Begunkov }
352f3b44f92SJens Axboe
353f3b44f92SJens Axboe if (req->flags & REQ_F_REISSUE) {
354f3b44f92SJens Axboe req->flags &= ~REQ_F_REISSUE;
355f3b44f92SJens Axboe if (io_resubmit_prep(req))
3564cba4412SPavel Begunkov return -EAGAIN;
357f3b44f92SJens Axboe else
3584d9cb92cSPavel Begunkov io_req_task_queue_fail(req, final_ret);
359f3b44f92SJens Axboe }
360df9830d8SPavel Begunkov return IOU_ISSUE_SKIP_COMPLETE;
361f3b44f92SJens Axboe }
362f3b44f92SJens Axboe
__io_import_iovec(int ddir,struct io_kiocb * req,struct io_rw_state * s,unsigned int issue_flags)363f3b44f92SJens Axboe static struct iovec *__io_import_iovec(int ddir, struct io_kiocb *req,
364f3b44f92SJens Axboe struct io_rw_state *s,
365f3b44f92SJens Axboe unsigned int issue_flags)
366f3b44f92SJens Axboe {
367f2ccb5aeSStefan Metzmacher struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
368f3b44f92SJens Axboe struct iov_iter *iter = &s->iter;
369f3b44f92SJens Axboe u8 opcode = req->opcode;
370f3b44f92SJens Axboe struct iovec *iovec;
371f3b44f92SJens Axboe void __user *buf;
372f3b44f92SJens Axboe size_t sqe_len;
373f3b44f92SJens Axboe ssize_t ret;
374f3b44f92SJens Axboe
375f3b44f92SJens Axboe if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
376f337a84dSPavel Begunkov ret = io_import_fixed(ddir, iter, req->imu, rw->addr, rw->len);
377f3b44f92SJens Axboe if (ret)
378f3b44f92SJens Axboe return ERR_PTR(ret);
379f3b44f92SJens Axboe return NULL;
380f3b44f92SJens Axboe }
381f3b44f92SJens Axboe
382f3b44f92SJens Axboe buf = u64_to_user_ptr(rw->addr);
383f3b44f92SJens Axboe sqe_len = rw->len;
384f3b44f92SJens Axboe
3854ab9d465SDylan Yudaken if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE ||
3864ab9d465SDylan Yudaken (req->flags & REQ_F_BUFFER_SELECT)) {
387f3b44f92SJens Axboe if (io_do_buffer_select(req)) {
388f3b44f92SJens Axboe buf = io_buffer_select(req, &sqe_len, issue_flags);
389f3b44f92SJens Axboe if (!buf)
390f3b44f92SJens Axboe return ERR_PTR(-ENOBUFS);
391f3b44f92SJens Axboe rw->addr = (unsigned long) buf;
392f3b44f92SJens Axboe rw->len = sqe_len;
393f3b44f92SJens Axboe }
394f3b44f92SJens Axboe
3951e23db45SJens Axboe ret = import_ubuf(ddir, buf, sqe_len, iter);
396f3b44f92SJens Axboe if (ret)
397f3b44f92SJens Axboe return ERR_PTR(ret);
398f3b44f92SJens Axboe return NULL;
399f3b44f92SJens Axboe }
400f3b44f92SJens Axboe
401f3b44f92SJens Axboe iovec = s->fast_iov;
402f3b44f92SJens Axboe ret = __import_iovec(ddir, buf, sqe_len, UIO_FASTIOV, &iovec, iter,
403f3b44f92SJens Axboe req->ctx->compat);
404f3b44f92SJens Axboe if (unlikely(ret < 0))
405f3b44f92SJens Axboe return ERR_PTR(ret);
406f3b44f92SJens Axboe return iovec;
407f3b44f92SJens Axboe }
408f3b44f92SJens Axboe
io_import_iovec(int rw,struct io_kiocb * req,struct iovec ** iovec,struct io_rw_state * s,unsigned int issue_flags)409f3b44f92SJens Axboe static inline int io_import_iovec(int rw, struct io_kiocb *req,
410f3b44f92SJens Axboe struct iovec **iovec, struct io_rw_state *s,
411f3b44f92SJens Axboe unsigned int issue_flags)
412f3b44f92SJens Axboe {
413f3b44f92SJens Axboe *iovec = __io_import_iovec(rw, req, s, issue_flags);
41481594e7eSDmitrii Bundin if (IS_ERR(*iovec))
415f3b44f92SJens Axboe return PTR_ERR(*iovec);
416f3b44f92SJens Axboe
417f3b44f92SJens Axboe iov_iter_save_state(&s->iter, &s->iter_state);
418f3b44f92SJens Axboe return 0;
419f3b44f92SJens Axboe }
420f3b44f92SJens Axboe
io_kiocb_ppos(struct kiocb * kiocb)421f3b44f92SJens Axboe static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb)
422f3b44f92SJens Axboe {
423f3b44f92SJens Axboe return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos;
424f3b44f92SJens Axboe }
425f3b44f92SJens Axboe
426f3b44f92SJens Axboe /*
427f3b44f92SJens Axboe * For files that don't have ->read_iter() and ->write_iter(), handle them
428f3b44f92SJens Axboe * by looping over ->read() or ->write() manually.
429f3b44f92SJens Axboe */
loop_rw_iter(int ddir,struct io_rw * rw,struct iov_iter * iter)430f3b44f92SJens Axboe static ssize_t loop_rw_iter(int ddir, struct io_rw *rw, struct iov_iter *iter)
431f3b44f92SJens Axboe {
432f3b44f92SJens Axboe struct kiocb *kiocb = &rw->kiocb;
433f3b44f92SJens Axboe struct file *file = kiocb->ki_filp;
434f3b44f92SJens Axboe ssize_t ret = 0;
435f3b44f92SJens Axboe loff_t *ppos;
436f3b44f92SJens Axboe
437f3b44f92SJens Axboe /*
438f3b44f92SJens Axboe * Don't support polled IO through this interface, and we can't
439f3b44f92SJens Axboe * support non-blocking either. For the latter, this just causes
440f3b44f92SJens Axboe * the kiocb to be handled from an async context.
441f3b44f92SJens Axboe */
442f3b44f92SJens Axboe if (kiocb->ki_flags & IOCB_HIPRI)
443f3b44f92SJens Axboe return -EOPNOTSUPP;
444f3b44f92SJens Axboe if ((kiocb->ki_flags & IOCB_NOWAIT) &&
445f3b44f92SJens Axboe !(kiocb->ki_filp->f_flags & O_NONBLOCK))
446f3b44f92SJens Axboe return -EAGAIN;
447f3b44f92SJens Axboe
448f3b44f92SJens Axboe ppos = io_kiocb_ppos(kiocb);
449f3b44f92SJens Axboe
450f3b44f92SJens Axboe while (iov_iter_count(iter)) {
45195e49cf8SJens Axboe void __user *addr;
45295e49cf8SJens Axboe size_t len;
453f3b44f92SJens Axboe ssize_t nr;
454f3b44f92SJens Axboe
4551e23db45SJens Axboe if (iter_is_ubuf(iter)) {
45695e49cf8SJens Axboe addr = iter->ubuf + iter->iov_offset;
45795e49cf8SJens Axboe len = iov_iter_count(iter);
4581e23db45SJens Axboe } else if (!iov_iter_is_bvec(iter)) {
45995e49cf8SJens Axboe addr = iter_iov_addr(iter);
46095e49cf8SJens Axboe len = iter_iov_len(iter);
461f3b44f92SJens Axboe } else {
46295e49cf8SJens Axboe addr = u64_to_user_ptr(rw->addr);
46395e49cf8SJens Axboe len = rw->len;
464f3b44f92SJens Axboe }
465f3b44f92SJens Axboe
46695e49cf8SJens Axboe if (ddir == READ)
46795e49cf8SJens Axboe nr = file->f_op->read(file, addr, len, ppos);
46895e49cf8SJens Axboe else
46995e49cf8SJens Axboe nr = file->f_op->write(file, addr, len, ppos);
470f3b44f92SJens Axboe
471f3b44f92SJens Axboe if (nr < 0) {
472f3b44f92SJens Axboe if (!ret)
473f3b44f92SJens Axboe ret = nr;
474f3b44f92SJens Axboe break;
475f3b44f92SJens Axboe }
476f3b44f92SJens Axboe ret += nr;
477f3b44f92SJens Axboe if (!iov_iter_is_bvec(iter)) {
478f3b44f92SJens Axboe iov_iter_advance(iter, nr);
479f3b44f92SJens Axboe } else {
480f3b44f92SJens Axboe rw->addr += nr;
481f3b44f92SJens Axboe rw->len -= nr;
482f3b44f92SJens Axboe if (!rw->len)
483f3b44f92SJens Axboe break;
484f3b44f92SJens Axboe }
48595e49cf8SJens Axboe if (nr != len)
486f3b44f92SJens Axboe break;
487f3b44f92SJens Axboe }
488f3b44f92SJens Axboe
489f3b44f92SJens Axboe return ret;
490f3b44f92SJens Axboe }
491f3b44f92SJens Axboe
io_req_map_rw(struct io_kiocb * req,const struct iovec * iovec,const struct iovec * fast_iov,struct iov_iter * iter)492f3b44f92SJens Axboe static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
493f3b44f92SJens Axboe const struct iovec *fast_iov, struct iov_iter *iter)
494f3b44f92SJens Axboe {
495f3b44f92SJens Axboe struct io_async_rw *io = req->async_data;
496f3b44f92SJens Axboe
497f3b44f92SJens Axboe memcpy(&io->s.iter, iter, sizeof(*iter));
498f3b44f92SJens Axboe io->free_iovec = iovec;
499f3b44f92SJens Axboe io->bytes_done = 0;
500f3b44f92SJens Axboe /* can only be fixed buffers, no need to do anything */
5011e23db45SJens Axboe if (iov_iter_is_bvec(iter) || iter_is_ubuf(iter))
502f3b44f92SJens Axboe return;
503f3b44f92SJens Axboe if (!iovec) {
504f3b44f92SJens Axboe unsigned iov_off = 0;
505f3b44f92SJens Axboe
506de4f5fedSJens Axboe io->s.iter.__iov = io->s.fast_iov;
507de4f5fedSJens Axboe if (iter->__iov != fast_iov) {
508de4f5fedSJens Axboe iov_off = iter_iov(iter) - fast_iov;
509de4f5fedSJens Axboe io->s.iter.__iov += iov_off;
510f3b44f92SJens Axboe }
511f3b44f92SJens Axboe if (io->s.fast_iov != fast_iov)
512f3b44f92SJens Axboe memcpy(io->s.fast_iov + iov_off, fast_iov + iov_off,
513f3b44f92SJens Axboe sizeof(struct iovec) * iter->nr_segs);
514f3b44f92SJens Axboe } else {
515f3b44f92SJens Axboe req->flags |= REQ_F_NEED_CLEANUP;
516f3b44f92SJens Axboe }
517f3b44f92SJens Axboe }
518f3b44f92SJens Axboe
io_setup_async_rw(struct io_kiocb * req,const struct iovec * iovec,struct io_rw_state * s,bool force)519f3b44f92SJens Axboe static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
520f3b44f92SJens Axboe struct io_rw_state *s, bool force)
521f3b44f92SJens Axboe {
522f30bd4d0SBreno Leitao if (!force && !io_cold_defs[req->opcode].prep_async)
523f3b44f92SJens Axboe return 0;
524f3b44f92SJens Axboe if (!req_has_async_data(req)) {
525f3b44f92SJens Axboe struct io_async_rw *iorw;
526f3b44f92SJens Axboe
527f3b44f92SJens Axboe if (io_alloc_async_data(req)) {
528f3b44f92SJens Axboe kfree(iovec);
529f3b44f92SJens Axboe return -ENOMEM;
530f3b44f92SJens Axboe }
531f3b44f92SJens Axboe
532f3b44f92SJens Axboe io_req_map_rw(req, iovec, s->fast_iov, &s->iter);
533f3b44f92SJens Axboe iorw = req->async_data;
534f3b44f92SJens Axboe /* we've copied and mapped the iter, ensure state is saved */
535f3b44f92SJens Axboe iov_iter_save_state(&iorw->s.iter, &iorw->s.iter_state);
536f3b44f92SJens Axboe }
537f3b44f92SJens Axboe return 0;
538f3b44f92SJens Axboe }
539f3b44f92SJens Axboe
io_rw_prep_async(struct io_kiocb * req,int rw)540f3b44f92SJens Axboe static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
541f3b44f92SJens Axboe {
542f3b44f92SJens Axboe struct io_async_rw *iorw = req->async_data;
543f3b44f92SJens Axboe struct iovec *iov;
544f3b44f92SJens Axboe int ret;
545f3b44f92SJens Axboe
546c239b77eSJens Axboe iorw->bytes_done = 0;
547c239b77eSJens Axboe iorw->free_iovec = NULL;
548c239b77eSJens Axboe
549f3b44f92SJens Axboe /* submission path, ->uring_lock should already be taken */
550f3b44f92SJens Axboe ret = io_import_iovec(rw, req, &iov, &iorw->s, 0);
551f3b44f92SJens Axboe if (unlikely(ret < 0))
552f3b44f92SJens Axboe return ret;
553f3b44f92SJens Axboe
554c239b77eSJens Axboe if (iov) {
555f3b44f92SJens Axboe iorw->free_iovec = iov;
556f3b44f92SJens Axboe req->flags |= REQ_F_NEED_CLEANUP;
557c239b77eSJens Axboe }
558c239b77eSJens Axboe
559f3b44f92SJens Axboe return 0;
560f3b44f92SJens Axboe }
561f3b44f92SJens Axboe
io_readv_prep_async(struct io_kiocb * req)562f3b44f92SJens Axboe int io_readv_prep_async(struct io_kiocb *req)
563f3b44f92SJens Axboe {
564de4eda9dSAl Viro return io_rw_prep_async(req, ITER_DEST);
565f3b44f92SJens Axboe }
566f3b44f92SJens Axboe
io_writev_prep_async(struct io_kiocb * req)567f3b44f92SJens Axboe int io_writev_prep_async(struct io_kiocb *req)
568f3b44f92SJens Axboe {
569de4eda9dSAl Viro return io_rw_prep_async(req, ITER_SOURCE);
570f3b44f92SJens Axboe }
571f3b44f92SJens Axboe
572f3b44f92SJens Axboe /*
573f3b44f92SJens Axboe * This is our waitqueue callback handler, registered through __folio_lock_async()
574f3b44f92SJens Axboe * when we initially tried to do the IO with the iocb armed our waitqueue.
575f3b44f92SJens Axboe * This gets called when the page is unlocked, and we generally expect that to
576f3b44f92SJens Axboe * happen when the page IO is completed and the page is now uptodate. This will
577f3b44f92SJens Axboe * queue a task_work based retry of the operation, attempting to copy the data
578f3b44f92SJens Axboe * again. If the latter fails because the page was NOT uptodate, then we will
579f3b44f92SJens Axboe * do a thread based blocking retry of the operation. That's the unexpected
580f3b44f92SJens Axboe * slow path.
581f3b44f92SJens Axboe */
io_async_buf_func(struct wait_queue_entry * wait,unsigned mode,int sync,void * arg)582f3b44f92SJens Axboe static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
583f3b44f92SJens Axboe int sync, void *arg)
584f3b44f92SJens Axboe {
585f3b44f92SJens Axboe struct wait_page_queue *wpq;
586f3b44f92SJens Axboe struct io_kiocb *req = wait->private;
587f2ccb5aeSStefan Metzmacher struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
588f3b44f92SJens Axboe struct wait_page_key *key = arg;
589f3b44f92SJens Axboe
590f3b44f92SJens Axboe wpq = container_of(wait, struct wait_page_queue, wait);
591f3b44f92SJens Axboe
592f3b44f92SJens Axboe if (!wake_page_match(wpq, key))
593f3b44f92SJens Axboe return 0;
594f3b44f92SJens Axboe
595f3b44f92SJens Axboe rw->kiocb.ki_flags &= ~IOCB_WAITQ;
596f3b44f92SJens Axboe list_del_init(&wait->entry);
597f3b44f92SJens Axboe io_req_task_queue(req);
598f3b44f92SJens Axboe return 1;
599f3b44f92SJens Axboe }
600f3b44f92SJens Axboe
601f3b44f92SJens Axboe /*
602f3b44f92SJens Axboe * This controls whether a given IO request should be armed for async page
603f3b44f92SJens Axboe * based retry. If we return false here, the request is handed to the async
604f3b44f92SJens Axboe * worker threads for retry. If we're doing buffered reads on a regular file,
605f3b44f92SJens Axboe * we prepare a private wait_page_queue entry and retry the operation. This
606f3b44f92SJens Axboe * will either succeed because the page is now uptodate and unlocked, or it
607f3b44f92SJens Axboe * will register a callback when the page is unlocked at IO completion. Through
608f3b44f92SJens Axboe * that callback, io_uring uses task_work to setup a retry of the operation.
609f3b44f92SJens Axboe * That retry will attempt the buffered read again. The retry will generally
610f3b44f92SJens Axboe * succeed, or in rare cases where it fails, we then fall back to using the
611f3b44f92SJens Axboe * async worker threads for a blocking retry.
612f3b44f92SJens Axboe */
io_rw_should_retry(struct io_kiocb * req)613f3b44f92SJens Axboe static bool io_rw_should_retry(struct io_kiocb *req)
614f3b44f92SJens Axboe {
615f3b44f92SJens Axboe struct io_async_rw *io = req->async_data;
616f3b44f92SJens Axboe struct wait_page_queue *wait = &io->wpq;
617f2ccb5aeSStefan Metzmacher struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
618f3b44f92SJens Axboe struct kiocb *kiocb = &rw->kiocb;
619f3b44f92SJens Axboe
620f3b44f92SJens Axboe /* never retry for NOWAIT, we just complete with -EAGAIN */
621f3b44f92SJens Axboe if (req->flags & REQ_F_NOWAIT)
622f3b44f92SJens Axboe return false;
623f3b44f92SJens Axboe
624f3b44f92SJens Axboe /* Only for buffered IO */
625f3b44f92SJens Axboe if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI))
626f3b44f92SJens Axboe return false;
627f3b44f92SJens Axboe
628f3b44f92SJens Axboe /*
629f3b44f92SJens Axboe * just use poll if we can, and don't attempt if the fs doesn't
630f3b44f92SJens Axboe * support callback based unlocks
631f3b44f92SJens Axboe */
632f3b44f92SJens Axboe if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
633f3b44f92SJens Axboe return false;
634f3b44f92SJens Axboe
635f3b44f92SJens Axboe wait->wait.func = io_async_buf_func;
636f3b44f92SJens Axboe wait->wait.private = req;
637f3b44f92SJens Axboe wait->wait.flags = 0;
638f3b44f92SJens Axboe INIT_LIST_HEAD(&wait->wait.entry);
639f3b44f92SJens Axboe kiocb->ki_flags |= IOCB_WAITQ;
640f3b44f92SJens Axboe kiocb->ki_flags &= ~IOCB_NOWAIT;
641f3b44f92SJens Axboe kiocb->ki_waitq = wait;
642f3b44f92SJens Axboe return true;
643f3b44f92SJens Axboe }
644f3b44f92SJens Axboe
io_iter_do_read(struct io_rw * rw,struct iov_iter * iter)645f3b44f92SJens Axboe static inline int io_iter_do_read(struct io_rw *rw, struct iov_iter *iter)
646f3b44f92SJens Axboe {
647f3b44f92SJens Axboe struct file *file = rw->kiocb.ki_filp;
648f3b44f92SJens Axboe
649f3b44f92SJens Axboe if (likely(file->f_op->read_iter))
650f3b44f92SJens Axboe return call_read_iter(file, &rw->kiocb, iter);
651f3b44f92SJens Axboe else if (file->f_op->read)
652f3b44f92SJens Axboe return loop_rw_iter(READ, rw, iter);
653f3b44f92SJens Axboe else
654f3b44f92SJens Axboe return -EINVAL;
655f3b44f92SJens Axboe }
656f3b44f92SJens Axboe
need_complete_io(struct io_kiocb * req)6574e17aaabSStefan Roesch static bool need_complete_io(struct io_kiocb *req)
658f3b44f92SJens Axboe {
659f3b44f92SJens Axboe return req->flags & REQ_F_ISREG ||
660f3b44f92SJens Axboe S_ISBLK(file_inode(req->file)->i_mode);
661f3b44f92SJens Axboe }
662f3b44f92SJens Axboe
io_rw_init_file(struct io_kiocb * req,fmode_t mode)663f3b44f92SJens Axboe static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
664f3b44f92SJens Axboe {
665f2ccb5aeSStefan Metzmacher struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
666f3b44f92SJens Axboe struct kiocb *kiocb = &rw->kiocb;
667f3b44f92SJens Axboe struct io_ring_ctx *ctx = req->ctx;
668f3b44f92SJens Axboe struct file *file = req->file;
669f3b44f92SJens Axboe int ret;
670f3b44f92SJens Axboe
671f3b44f92SJens Axboe if (unlikely(!file || !(file->f_mode & mode)))
672f3b44f92SJens Axboe return -EBADF;
673f3b44f92SJens Axboe
6743beed235SChristoph Hellwig if (!(req->flags & REQ_F_FIXED_FILE))
6758487f083SChristoph Hellwig req->flags |= io_file_get_flags(file);
676f3b44f92SJens Axboe
6775264406cSLinus Torvalds kiocb->ki_flags = file->f_iocb_flags;
678f3b44f92SJens Axboe ret = kiocb_set_rw_flags(kiocb, rw->flags);
679f3b44f92SJens Axboe if (unlikely(ret))
680f3b44f92SJens Axboe return ret;
68112e4e8c7SPavel Begunkov kiocb->ki_flags |= IOCB_ALLOC_CACHE;
682f3b44f92SJens Axboe
683f3b44f92SJens Axboe /*
684f3b44f92SJens Axboe * If the file is marked O_NONBLOCK, still allow retry for it if it
685f3b44f92SJens Axboe * supports async. Otherwise it's impossible to use O_NONBLOCK files
686f3b44f92SJens Axboe * reliably. If not, or it IOCB_NOWAIT is set, don't retry.
687f3b44f92SJens Axboe */
688f3b44f92SJens Axboe if ((kiocb->ki_flags & IOCB_NOWAIT) ||
689f3b44f92SJens Axboe ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req)))
690f3b44f92SJens Axboe req->flags |= REQ_F_NOWAIT;
691f3b44f92SJens Axboe
692f3b44f92SJens Axboe if (ctx->flags & IORING_SETUP_IOPOLL) {
693f3b44f92SJens Axboe if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll)
694f3b44f92SJens Axboe return -EOPNOTSUPP;
695f3b44f92SJens Axboe
696f3b44f92SJens Axboe kiocb->private = NULL;
69712e4e8c7SPavel Begunkov kiocb->ki_flags |= IOCB_HIPRI;
698f3b44f92SJens Axboe kiocb->ki_complete = io_complete_rw_iopoll;
699f3b44f92SJens Axboe req->iopoll_completed = 0;
700f3b44f92SJens Axboe } else {
701f3b44f92SJens Axboe if (kiocb->ki_flags & IOCB_HIPRI)
702f3b44f92SJens Axboe return -EINVAL;
703f3b44f92SJens Axboe kiocb->ki_complete = io_complete_rw;
704f3b44f92SJens Axboe }
705f3b44f92SJens Axboe
706f3b44f92SJens Axboe return 0;
707f3b44f92SJens Axboe }
708f3b44f92SJens Axboe
__io_read(struct io_kiocb * req,unsigned int issue_flags)7096c27fc6aSJens Axboe static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
710f3b44f92SJens Axboe {
711f2ccb5aeSStefan Metzmacher struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
712f3b44f92SJens Axboe struct io_rw_state __s, *s = &__s;
713f3b44f92SJens Axboe struct iovec *iovec;
714f3b44f92SJens Axboe struct kiocb *kiocb = &rw->kiocb;
715f3b44f92SJens Axboe bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
716f3b44f92SJens Axboe struct io_async_rw *io;
717f3b44f92SJens Axboe ssize_t ret, ret2;
718f3b44f92SJens Axboe loff_t *ppos;
719f3b44f92SJens Axboe
720f3b44f92SJens Axboe if (!req_has_async_data(req)) {
721de4eda9dSAl Viro ret = io_import_iovec(ITER_DEST, req, &iovec, s, issue_flags);
722f3b44f92SJens Axboe if (unlikely(ret < 0))
723f3b44f92SJens Axboe return ret;
724f3b44f92SJens Axboe } else {
725f3b44f92SJens Axboe io = req->async_data;
726f3b44f92SJens Axboe s = &io->s;
727f3b44f92SJens Axboe
728f3b44f92SJens Axboe /*
729f3b44f92SJens Axboe * Safe and required to re-import if we're using provided
730f3b44f92SJens Axboe * buffers, as we dropped the selected one before retry.
731f3b44f92SJens Axboe */
732f3b44f92SJens Axboe if (io_do_buffer_select(req)) {
733de4eda9dSAl Viro ret = io_import_iovec(ITER_DEST, req, &iovec, s, issue_flags);
734f3b44f92SJens Axboe if (unlikely(ret < 0))
735f3b44f92SJens Axboe return ret;
736f3b44f92SJens Axboe }
737f3b44f92SJens Axboe
738f3b44f92SJens Axboe /*
739f3b44f92SJens Axboe * We come here from an earlier attempt, restore our state to
740f3b44f92SJens Axboe * match in case it doesn't. It's cheap enough that we don't
741f3b44f92SJens Axboe * need to make this conditional.
742f3b44f92SJens Axboe */
743f3b44f92SJens Axboe iov_iter_restore(&s->iter, &s->iter_state);
744f3b44f92SJens Axboe iovec = NULL;
745f3b44f92SJens Axboe }
746f3b44f92SJens Axboe ret = io_rw_init_file(req, FMODE_READ);
747f3b44f92SJens Axboe if (unlikely(ret)) {
748f3b44f92SJens Axboe kfree(iovec);
749f3b44f92SJens Axboe return ret;
750f3b44f92SJens Axboe }
751f3b44f92SJens Axboe req->cqe.res = iov_iter_count(&s->iter);
752f3b44f92SJens Axboe
753f3b44f92SJens Axboe if (force_nonblock) {
754f3b44f92SJens Axboe /* If the file doesn't support async, just async punt */
755f3b44f92SJens Axboe if (unlikely(!io_file_supports_nowait(req))) {
756f3b44f92SJens Axboe ret = io_setup_async_rw(req, iovec, s, true);
757f3b44f92SJens Axboe return ret ?: -EAGAIN;
758f3b44f92SJens Axboe }
759f3b44f92SJens Axboe kiocb->ki_flags |= IOCB_NOWAIT;
760f3b44f92SJens Axboe } else {
761f3b44f92SJens Axboe /* Ensure we clear previously set non-block flag */
762f3b44f92SJens Axboe kiocb->ki_flags &= ~IOCB_NOWAIT;
763f3b44f92SJens Axboe }
764f3b44f92SJens Axboe
765f3b44f92SJens Axboe ppos = io_kiocb_update_pos(req);
766f3b44f92SJens Axboe
767f3b44f92SJens Axboe ret = rw_verify_area(READ, req->file, ppos, req->cqe.res);
768f3b44f92SJens Axboe if (unlikely(ret)) {
769f3b44f92SJens Axboe kfree(iovec);
770f3b44f92SJens Axboe return ret;
771f3b44f92SJens Axboe }
772f3b44f92SJens Axboe
773f3b44f92SJens Axboe ret = io_iter_do_read(rw, &s->iter);
774f3b44f92SJens Axboe
77541928840SJens Axboe /*
77641928840SJens Axboe * Some file systems like to return -EOPNOTSUPP for an IOCB_NOWAIT
77741928840SJens Axboe * issue, even though they should be returning -EAGAIN. To be safe,
77841928840SJens Axboe * retry from blocking context for either.
77941928840SJens Axboe */
78041928840SJens Axboe if (ret == -EOPNOTSUPP && force_nonblock)
78141928840SJens Axboe ret = -EAGAIN;
78241928840SJens Axboe
783f3b44f92SJens Axboe if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) {
784f3b44f92SJens Axboe req->flags &= ~REQ_F_REISSUE;
785f3b44f92SJens Axboe /* if we can poll, just do that */
786f3b44f92SJens Axboe if (req->opcode == IORING_OP_READ && file_can_poll(req->file))
787f3b44f92SJens Axboe return -EAGAIN;
788f3b44f92SJens Axboe /* IOPOLL retry should happen for io-wq threads */
789f3b44f92SJens Axboe if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
790f3b44f92SJens Axboe goto done;
791f3b44f92SJens Axboe /* no retry on NONBLOCK nor RWF_NOWAIT */
792f3b44f92SJens Axboe if (req->flags & REQ_F_NOWAIT)
793f3b44f92SJens Axboe goto done;
794f3b44f92SJens Axboe ret = 0;
795f3b44f92SJens Axboe } else if (ret == -EIOCBQUEUED) {
796*644636eeSPavel Begunkov req->flags |= REQ_F_PARTIAL_IO;
797*644636eeSPavel Begunkov io_kbuf_recycle(req, issue_flags);
798df9830d8SPavel Begunkov if (iovec)
799df9830d8SPavel Begunkov kfree(iovec);
800df9830d8SPavel Begunkov return IOU_ISSUE_SKIP_COMPLETE;
801f3b44f92SJens Axboe } else if (ret == req->cqe.res || ret <= 0 || !force_nonblock ||
8024e17aaabSStefan Roesch (req->flags & REQ_F_NOWAIT) || !need_complete_io(req)) {
803f3b44f92SJens Axboe /* read all, failed, already did sync or don't want to retry */
804f3b44f92SJens Axboe goto done;
805f3b44f92SJens Axboe }
806f3b44f92SJens Axboe
807f3b44f92SJens Axboe /*
808f3b44f92SJens Axboe * Don't depend on the iter state matching what was consumed, or being
809f3b44f92SJens Axboe * untouched in case of error. Restore it and we'll advance it
810f3b44f92SJens Axboe * manually if we need to.
811f3b44f92SJens Axboe */
812f3b44f92SJens Axboe iov_iter_restore(&s->iter, &s->iter_state);
813f3b44f92SJens Axboe
814f3b44f92SJens Axboe ret2 = io_setup_async_rw(req, iovec, s, true);
815f3b44f92SJens Axboe iovec = NULL;
816c278d9f8SPavel Begunkov if (ret2) {
817c278d9f8SPavel Begunkov ret = ret > 0 ? ret : ret2;
818c278d9f8SPavel Begunkov goto done;
819c278d9f8SPavel Begunkov }
820c278d9f8SPavel Begunkov
821*644636eeSPavel Begunkov req->flags |= REQ_F_PARTIAL_IO;
822*644636eeSPavel Begunkov io_kbuf_recycle(req, issue_flags);
823*644636eeSPavel Begunkov
824f3b44f92SJens Axboe io = req->async_data;
825f3b44f92SJens Axboe s = &io->s;
826f3b44f92SJens Axboe /*
827f3b44f92SJens Axboe * Now use our persistent iterator and state, if we aren't already.
828f3b44f92SJens Axboe * We've restored and mapped the iter to match.
829f3b44f92SJens Axboe */
830f3b44f92SJens Axboe
831f3b44f92SJens Axboe do {
832f3b44f92SJens Axboe /*
833f3b44f92SJens Axboe * We end up here because of a partial read, either from
834f3b44f92SJens Axboe * above or inside this loop. Advance the iter by the bytes
835f3b44f92SJens Axboe * that were consumed.
836f3b44f92SJens Axboe */
837f3b44f92SJens Axboe iov_iter_advance(&s->iter, ret);
838f3b44f92SJens Axboe if (!iov_iter_count(&s->iter))
839f3b44f92SJens Axboe break;
840f3b44f92SJens Axboe io->bytes_done += ret;
841f3b44f92SJens Axboe iov_iter_save_state(&s->iter, &s->iter_state);
842f3b44f92SJens Axboe
843f3b44f92SJens Axboe /* if we can retry, do so with the callbacks armed */
844f3b44f92SJens Axboe if (!io_rw_should_retry(req)) {
845f3b44f92SJens Axboe kiocb->ki_flags &= ~IOCB_WAITQ;
846f3b44f92SJens Axboe return -EAGAIN;
847f3b44f92SJens Axboe }
848f3b44f92SJens Axboe
849bf68b5b3SPavel Begunkov req->cqe.res = iov_iter_count(&s->iter);
850f3b44f92SJens Axboe /*
851f3b44f92SJens Axboe * Now retry read with the IOCB_WAITQ parts set in the iocb. If
852f3b44f92SJens Axboe * we get -EIOCBQUEUED, then we'll get a notification when the
853f3b44f92SJens Axboe * desired page gets unlocked. We can also get a partial read
854f3b44f92SJens Axboe * here, and if we do, then just retry at the new offset.
855f3b44f92SJens Axboe */
856f3b44f92SJens Axboe ret = io_iter_do_read(rw, &s->iter);
857f3b44f92SJens Axboe if (ret == -EIOCBQUEUED)
858f3b44f92SJens Axboe return IOU_ISSUE_SKIP_COMPLETE;
859f3b44f92SJens Axboe /* we got some bytes, but not all. retry. */
860f3b44f92SJens Axboe kiocb->ki_flags &= ~IOCB_WAITQ;
861f3b44f92SJens Axboe iov_iter_restore(&s->iter, &s->iter_state);
862f3b44f92SJens Axboe } while (ret > 0);
863f3b44f92SJens Axboe done:
864f3b44f92SJens Axboe /* it's faster to check here then delegate to kfree */
865f3b44f92SJens Axboe if (iovec)
866f3b44f92SJens Axboe kfree(iovec);
8676c27fc6aSJens Axboe return ret;
8686c27fc6aSJens Axboe }
8696c27fc6aSJens Axboe
io_read(struct io_kiocb * req,unsigned int issue_flags)8706c27fc6aSJens Axboe int io_read(struct io_kiocb *req, unsigned int issue_flags)
8716c27fc6aSJens Axboe {
8726c27fc6aSJens Axboe int ret;
8736c27fc6aSJens Axboe
8746c27fc6aSJens Axboe ret = __io_read(req, issue_flags);
8756c27fc6aSJens Axboe if (ret >= 0)
876df9830d8SPavel Begunkov return kiocb_done(req, ret, issue_flags);
8776c27fc6aSJens Axboe
8786c27fc6aSJens Axboe return ret;
879f3b44f92SJens Axboe }
880f3b44f92SJens Axboe
io_kiocb_start_write(struct io_kiocb * req,struct kiocb * kiocb)881003d2996SJens Axboe static bool io_kiocb_start_write(struct io_kiocb *req, struct kiocb *kiocb)
882003d2996SJens Axboe {
883003d2996SJens Axboe struct inode *inode;
884003d2996SJens Axboe bool ret;
885003d2996SJens Axboe
886003d2996SJens Axboe if (!(req->flags & REQ_F_ISREG))
887003d2996SJens Axboe return true;
888003d2996SJens Axboe if (!(kiocb->ki_flags & IOCB_NOWAIT)) {
889003d2996SJens Axboe kiocb_start_write(kiocb);
890003d2996SJens Axboe return true;
891003d2996SJens Axboe }
892003d2996SJens Axboe
893003d2996SJens Axboe inode = file_inode(kiocb->ki_filp);
894003d2996SJens Axboe ret = sb_start_write_trylock(inode->i_sb);
895003d2996SJens Axboe if (ret)
896003d2996SJens Axboe __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
897003d2996SJens Axboe return ret;
898003d2996SJens Axboe }
899003d2996SJens Axboe
io_write(struct io_kiocb * req,unsigned int issue_flags)900f3b44f92SJens Axboe int io_write(struct io_kiocb *req, unsigned int issue_flags)
901f3b44f92SJens Axboe {
902f2ccb5aeSStefan Metzmacher struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
903f3b44f92SJens Axboe struct io_rw_state __s, *s = &__s;
904f3b44f92SJens Axboe struct iovec *iovec;
905f3b44f92SJens Axboe struct kiocb *kiocb = &rw->kiocb;
906f3b44f92SJens Axboe bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
907f3b44f92SJens Axboe ssize_t ret, ret2;
908f3b44f92SJens Axboe loff_t *ppos;
909f3b44f92SJens Axboe
910f3b44f92SJens Axboe if (!req_has_async_data(req)) {
911de4eda9dSAl Viro ret = io_import_iovec(ITER_SOURCE, req, &iovec, s, issue_flags);
912f3b44f92SJens Axboe if (unlikely(ret < 0))
913f3b44f92SJens Axboe return ret;
914f3b44f92SJens Axboe } else {
915f3b44f92SJens Axboe struct io_async_rw *io = req->async_data;
916f3b44f92SJens Axboe
917f3b44f92SJens Axboe s = &io->s;
918f3b44f92SJens Axboe iov_iter_restore(&s->iter, &s->iter_state);
919f3b44f92SJens Axboe iovec = NULL;
920f3b44f92SJens Axboe }
921f3b44f92SJens Axboe ret = io_rw_init_file(req, FMODE_WRITE);
922f3b44f92SJens Axboe if (unlikely(ret)) {
923f3b44f92SJens Axboe kfree(iovec);
924f3b44f92SJens Axboe return ret;
925f3b44f92SJens Axboe }
926f3b44f92SJens Axboe req->cqe.res = iov_iter_count(&s->iter);
927f3b44f92SJens Axboe
928f3b44f92SJens Axboe if (force_nonblock) {
929f3b44f92SJens Axboe /* If the file doesn't support async, just async punt */
930f3b44f92SJens Axboe if (unlikely(!io_file_supports_nowait(req)))
931f3b44f92SJens Axboe goto copy_iov;
932f3b44f92SJens Axboe
9334e17aaabSStefan Roesch /* File path supports NOWAIT for non-direct_IO only for block devices. */
9344e17aaabSStefan Roesch if (!(kiocb->ki_flags & IOCB_DIRECT) &&
9354e17aaabSStefan Roesch !(kiocb->ki_filp->f_mode & FMODE_BUF_WASYNC) &&
936f3b44f92SJens Axboe (req->flags & REQ_F_ISREG))
937f3b44f92SJens Axboe goto copy_iov;
938f3b44f92SJens Axboe
939f3b44f92SJens Axboe kiocb->ki_flags |= IOCB_NOWAIT;
940f3b44f92SJens Axboe } else {
941f3b44f92SJens Axboe /* Ensure we clear previously set non-block flag */
942f3b44f92SJens Axboe kiocb->ki_flags &= ~IOCB_NOWAIT;
943f3b44f92SJens Axboe }
944f3b44f92SJens Axboe
945f3b44f92SJens Axboe ppos = io_kiocb_update_pos(req);
946f3b44f92SJens Axboe
947f3b44f92SJens Axboe ret = rw_verify_area(WRITE, req->file, ppos, req->cqe.res);
948df9830d8SPavel Begunkov if (unlikely(ret)) {
949df9830d8SPavel Begunkov kfree(iovec);
950df9830d8SPavel Begunkov return ret;
951df9830d8SPavel Begunkov }
952f3b44f92SJens Axboe
953003d2996SJens Axboe if (unlikely(!io_kiocb_start_write(req, kiocb)))
954003d2996SJens Axboe return -EAGAIN;
955f3b44f92SJens Axboe kiocb->ki_flags |= IOCB_WRITE;
956f3b44f92SJens Axboe
957f3b44f92SJens Axboe if (likely(req->file->f_op->write_iter))
958f3b44f92SJens Axboe ret2 = call_write_iter(req->file, kiocb, &s->iter);
959f3b44f92SJens Axboe else if (req->file->f_op->write)
960f3b44f92SJens Axboe ret2 = loop_rw_iter(WRITE, rw, &s->iter);
961f3b44f92SJens Axboe else
962f3b44f92SJens Axboe ret2 = -EINVAL;
963f3b44f92SJens Axboe
964*644636eeSPavel Begunkov if (ret2 == -EIOCBQUEUED) {
965*644636eeSPavel Begunkov req->flags |= REQ_F_PARTIAL_IO;
966*644636eeSPavel Begunkov io_kbuf_recycle(req, issue_flags);
967*644636eeSPavel Begunkov }
968*644636eeSPavel Begunkov
969f3b44f92SJens Axboe if (req->flags & REQ_F_REISSUE) {
970f3b44f92SJens Axboe req->flags &= ~REQ_F_REISSUE;
971f3b44f92SJens Axboe ret2 = -EAGAIN;
972f3b44f92SJens Axboe }
973f3b44f92SJens Axboe
974f3b44f92SJens Axboe /*
975f3b44f92SJens Axboe * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just
976f3b44f92SJens Axboe * retry them without IOCB_NOWAIT.
977f3b44f92SJens Axboe */
978f3b44f92SJens Axboe if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT))
979f3b44f92SJens Axboe ret2 = -EAGAIN;
980f3b44f92SJens Axboe /* no retry on NONBLOCK nor RWF_NOWAIT */
981f3b44f92SJens Axboe if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT))
982f3b44f92SJens Axboe goto done;
983f3b44f92SJens Axboe if (!force_nonblock || ret2 != -EAGAIN) {
984f3b44f92SJens Axboe /* IOPOLL retry should happen for io-wq threads */
985f3b44f92SJens Axboe if (ret2 == -EAGAIN && (req->ctx->flags & IORING_SETUP_IOPOLL))
986f3b44f92SJens Axboe goto copy_iov;
9874e17aaabSStefan Roesch
9884e17aaabSStefan Roesch if (ret2 != req->cqe.res && ret2 >= 0 && need_complete_io(req)) {
989c86416c6SStefan Roesch struct io_async_rw *io;
9904e17aaabSStefan Roesch
9911c849b48SStefan Roesch trace_io_uring_short_write(req->ctx, kiocb->ki_pos - ret2,
9921c849b48SStefan Roesch req->cqe.res, ret2);
9931c849b48SStefan Roesch
9944e17aaabSStefan Roesch /* This is a partial write. The file pos has already been
9954e17aaabSStefan Roesch * updated, setup the async struct to complete the request
9964e17aaabSStefan Roesch * in the worker. Also update bytes_done to account for
9974e17aaabSStefan Roesch * the bytes already written.
9984e17aaabSStefan Roesch */
9994e17aaabSStefan Roesch iov_iter_save_state(&s->iter, &s->iter_state);
10004e17aaabSStefan Roesch ret = io_setup_async_rw(req, iovec, s, true);
10014e17aaabSStefan Roesch
1002c86416c6SStefan Roesch io = req->async_data;
1003c86416c6SStefan Roesch if (io)
1004c86416c6SStefan Roesch io->bytes_done += ret2;
10054e17aaabSStefan Roesch
1006e053aaf4SJens Axboe if (kiocb->ki_flags & IOCB_WRITE)
1007a370167fSAmir Goldstein io_req_end_write(req);
10084e17aaabSStefan Roesch return ret ? ret : -EAGAIN;
10094e17aaabSStefan Roesch }
1010f3b44f92SJens Axboe done:
1011df9830d8SPavel Begunkov ret = kiocb_done(req, ret2, issue_flags);
1012f3b44f92SJens Axboe } else {
1013f3b44f92SJens Axboe copy_iov:
1014f3b44f92SJens Axboe iov_iter_restore(&s->iter, &s->iter_state);
1015f3b44f92SJens Axboe ret = io_setup_async_rw(req, iovec, s, false);
1016e053aaf4SJens Axboe if (!ret) {
1017e053aaf4SJens Axboe if (kiocb->ki_flags & IOCB_WRITE)
1018a370167fSAmir Goldstein io_req_end_write(req);
1019e053aaf4SJens Axboe return -EAGAIN;
1020e053aaf4SJens Axboe }
1021e053aaf4SJens Axboe return ret;
1022f3b44f92SJens Axboe }
1023f3b44f92SJens Axboe /* it's reportedly faster than delegating the null check to kfree() */
1024f3b44f92SJens Axboe if (iovec)
1025f3b44f92SJens Axboe kfree(iovec);
1026f3b44f92SJens Axboe return ret;
1027f3b44f92SJens Axboe }
1028f3b44f92SJens Axboe
io_rw_fail(struct io_kiocb * req)102947b4c686SPavel Begunkov void io_rw_fail(struct io_kiocb *req)
103047b4c686SPavel Begunkov {
103147b4c686SPavel Begunkov int res;
103247b4c686SPavel Begunkov
103347b4c686SPavel Begunkov res = io_fixup_rw_res(req, req->cqe.res);
103447b4c686SPavel Begunkov io_req_set_res(req, res, req->cqe.flags);
103547b4c686SPavel Begunkov }
103647b4c686SPavel Begunkov
io_do_iopoll(struct io_ring_ctx * ctx,bool force_nonspin)1037f3b44f92SJens Axboe int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
1038f3b44f92SJens Axboe {
1039f3b44f92SJens Axboe struct io_wq_work_node *pos, *start, *prev;
104054bdd67dSKeith Busch unsigned int poll_flags = 0;
1041f3b44f92SJens Axboe DEFINE_IO_COMP_BATCH(iob);
1042f3b44f92SJens Axboe int nr_events = 0;
1043f3b44f92SJens Axboe
1044f3b44f92SJens Axboe /*
1045f3b44f92SJens Axboe * Only spin for completions if we don't have multiple devices hanging
1046f3b44f92SJens Axboe * off our complete list.
1047f3b44f92SJens Axboe */
1048f3b44f92SJens Axboe if (ctx->poll_multi_queue || force_nonspin)
1049f3b44f92SJens Axboe poll_flags |= BLK_POLL_ONESHOT;
1050f3b44f92SJens Axboe
1051f3b44f92SJens Axboe wq_list_for_each(pos, start, &ctx->iopoll_list) {
1052f3b44f92SJens Axboe struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
1053a1119fb0SJens Axboe struct file *file = req->file;
1054f3b44f92SJens Axboe int ret;
1055f3b44f92SJens Axboe
1056f3b44f92SJens Axboe /*
1057f3b44f92SJens Axboe * Move completed and retryable entries to our local lists.
1058f3b44f92SJens Axboe * If we find a request that requires polling, break out
1059f3b44f92SJens Axboe * and complete those lists first, if we have entries there.
1060f3b44f92SJens Axboe */
1061f3b44f92SJens Axboe if (READ_ONCE(req->iopoll_completed))
1062f3b44f92SJens Axboe break;
1063f3b44f92SJens Axboe
10645756a3a7SKanchan Joshi if (req->opcode == IORING_OP_URING_CMD) {
1065a1119fb0SJens Axboe struct io_uring_cmd *ioucmd;
10665756a3a7SKanchan Joshi
1067a1119fb0SJens Axboe ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
1068de97fcb3SJens Axboe ret = file->f_op->uring_cmd_iopoll(ioucmd, &iob,
1069de97fcb3SJens Axboe poll_flags);
1070a1119fb0SJens Axboe } else {
1071a1119fb0SJens Axboe struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
1072a1119fb0SJens Axboe
1073a1119fb0SJens Axboe ret = file->f_op->iopoll(&rw->kiocb, &iob, poll_flags);
1074a1119fb0SJens Axboe }
1075f3b44f92SJens Axboe if (unlikely(ret < 0))
1076f3b44f92SJens Axboe return ret;
1077f3b44f92SJens Axboe else if (ret)
1078f3b44f92SJens Axboe poll_flags |= BLK_POLL_ONESHOT;
1079f3b44f92SJens Axboe
1080f3b44f92SJens Axboe /* iopoll may have completed current req */
1081f3b44f92SJens Axboe if (!rq_list_empty(iob.req_list) ||
1082f3b44f92SJens Axboe READ_ONCE(req->iopoll_completed))
1083f3b44f92SJens Axboe break;
1084f3b44f92SJens Axboe }
1085f3b44f92SJens Axboe
1086f3b44f92SJens Axboe if (!rq_list_empty(iob.req_list))
1087f3b44f92SJens Axboe iob.complete(&iob);
1088f3b44f92SJens Axboe else if (!pos)
1089f3b44f92SJens Axboe return 0;
1090f3b44f92SJens Axboe
1091f3b44f92SJens Axboe prev = start;
1092f3b44f92SJens Axboe wq_list_for_each_resume(pos, prev) {
1093f3b44f92SJens Axboe struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
1094f3b44f92SJens Axboe
1095f3b44f92SJens Axboe /* order with io_complete_rw_iopoll(), e.g. ->result updates */
1096f3b44f92SJens Axboe if (!smp_load_acquire(&req->iopoll_completed))
1097f3b44f92SJens Axboe break;
1098f3b44f92SJens Axboe nr_events++;
1099f3b44f92SJens Axboe req->cqe.flags = io_put_kbuf(req, 0);
1100544d163dSPavel Begunkov }
1101f3b44f92SJens Axboe if (unlikely(!nr_events))
1102f3b44f92SJens Axboe return 0;
1103f3b44f92SJens Axboe
1104f3b44f92SJens Axboe pos = start ? start->next : ctx->iopoll_list.first;
1105f3b44f92SJens Axboe wq_list_cut(&ctx->iopoll_list, prev, start);
1106ec26c225SPavel Begunkov
1107ec26c225SPavel Begunkov if (WARN_ON_ONCE(!wq_list_empty(&ctx->submit_state.compl_reqs)))
1108ec26c225SPavel Begunkov return 0;
1109ec26c225SPavel Begunkov ctx->submit_state.compl_reqs.first = pos;
1110ec26c225SPavel Begunkov __io_submit_flush_completions(ctx);
1111f3b44f92SJens Axboe return nr_events;
1112f3b44f92SJens Axboe }
1113