136404b09SJens Axboe // SPDX-License-Identifier: GPL-2.0
236404b09SJens Axboe #include <linux/kernel.h>
336404b09SJens Axboe #include <linux/errno.h>
436404b09SJens Axboe #include <linux/file.h>
536404b09SJens Axboe #include <linux/slab.h>
6e6130ebaSJens Axboe #include <linux/nospec.h>
736404b09SJens Axboe #include <linux/io_uring.h>
836404b09SJens Axboe
936404b09SJens Axboe #include <uapi/linux/io_uring.h>
1036404b09SJens Axboe
1136404b09SJens Axboe #include "io_uring.h"
12e6130ebaSJens Axboe #include "rsrc.h"
13e6130ebaSJens Axboe #include "filetable.h"
1436404b09SJens Axboe #include "msg_ring.h"
1536404b09SJens Axboe
16cbeb47a7SBreno Leitao
17cbeb47a7SBreno Leitao /* All valid masks for MSG_RING */
18cbeb47a7SBreno Leitao #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \
19cbeb47a7SBreno Leitao IORING_MSG_RING_FLAGS_PASS)
20cbeb47a7SBreno Leitao
2136404b09SJens Axboe struct io_msg {
2236404b09SJens Axboe struct file *file;
2311373026SPavel Begunkov struct file *src_file;
246d043ee1SPavel Begunkov struct callback_head tw;
2536404b09SJens Axboe u64 user_data;
2636404b09SJens Axboe u32 len;
27e6130ebaSJens Axboe u32 cmd;
28e6130ebaSJens Axboe u32 src_fd;
29cbeb47a7SBreno Leitao union {
30e6130ebaSJens Axboe u32 dst_fd;
31cbeb47a7SBreno Leitao u32 cqe_flags;
32cbeb47a7SBreno Leitao };
33e6130ebaSJens Axboe u32 flags;
3436404b09SJens Axboe };
3536404b09SJens Axboe
io_double_unlock_ctx(struct io_ring_ctx * octx)36423d5081SJens Axboe static void io_double_unlock_ctx(struct io_ring_ctx *octx)
37423d5081SJens Axboe {
38423d5081SJens Axboe mutex_unlock(&octx->uring_lock);
39423d5081SJens Axboe }
40423d5081SJens Axboe
io_double_lock_ctx(struct io_ring_ctx * octx,unsigned int issue_flags)41423d5081SJens Axboe static int io_double_lock_ctx(struct io_ring_ctx *octx,
42423d5081SJens Axboe unsigned int issue_flags)
43423d5081SJens Axboe {
44423d5081SJens Axboe /*
45423d5081SJens Axboe * To ensure proper ordering between the two ctxs, we can only
46423d5081SJens Axboe * attempt a trylock on the target. If that fails and we already have
47423d5081SJens Axboe * the source ctx lock, punt to io-wq.
48423d5081SJens Axboe */
49423d5081SJens Axboe if (!(issue_flags & IO_URING_F_UNLOCKED)) {
50423d5081SJens Axboe if (!mutex_trylock(&octx->uring_lock))
51423d5081SJens Axboe return -EAGAIN;
52423d5081SJens Axboe return 0;
53423d5081SJens Axboe }
54423d5081SJens Axboe mutex_lock(&octx->uring_lock);
55423d5081SJens Axboe return 0;
56423d5081SJens Axboe }
57423d5081SJens Axboe
io_msg_ring_cleanup(struct io_kiocb * req)5811373026SPavel Begunkov void io_msg_ring_cleanup(struct io_kiocb *req)
5911373026SPavel Begunkov {
6011373026SPavel Begunkov struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
6111373026SPavel Begunkov
6211373026SPavel Begunkov if (WARN_ON_ONCE(!msg->src_file))
6311373026SPavel Begunkov return;
6411373026SPavel Begunkov
6511373026SPavel Begunkov fput(msg->src_file);
6611373026SPavel Begunkov msg->src_file = NULL;
6711373026SPavel Begunkov }
6811373026SPavel Begunkov
io_msg_need_remote(struct io_ring_ctx * target_ctx)6956d8e318SPavel Begunkov static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
7056d8e318SPavel Begunkov {
7156d8e318SPavel Begunkov if (!target_ctx->task_complete)
7256d8e318SPavel Begunkov return false;
7356d8e318SPavel Begunkov return current != target_ctx->submitter_task;
7456d8e318SPavel Begunkov }
7556d8e318SPavel Begunkov
io_msg_exec_remote(struct io_kiocb * req,task_work_func_t func)7656d8e318SPavel Begunkov static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func)
7756d8e318SPavel Begunkov {
7856d8e318SPavel Begunkov struct io_ring_ctx *ctx = req->file->private_data;
7956d8e318SPavel Begunkov struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
808579538cSPavel Begunkov struct task_struct *task = READ_ONCE(ctx->submitter_task);
818579538cSPavel Begunkov
828579538cSPavel Begunkov if (unlikely(!task))
838579538cSPavel Begunkov return -EOWNERDEAD;
8456d8e318SPavel Begunkov
8556d8e318SPavel Begunkov init_task_work(&msg->tw, func);
8656d8e318SPavel Begunkov if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL))
8756d8e318SPavel Begunkov return -EOWNERDEAD;
8856d8e318SPavel Begunkov
8956d8e318SPavel Begunkov return IOU_ISSUE_SKIP_COMPLETE;
9056d8e318SPavel Begunkov }
9156d8e318SPavel Begunkov
io_msg_tw_complete(struct callback_head * head)926d043ee1SPavel Begunkov static void io_msg_tw_complete(struct callback_head *head)
936d043ee1SPavel Begunkov {
946d043ee1SPavel Begunkov struct io_msg *msg = container_of(head, struct io_msg, tw);
956d043ee1SPavel Begunkov struct io_kiocb *req = cmd_to_io_kiocb(msg);
966d043ee1SPavel Begunkov struct io_ring_ctx *target_ctx = req->file->private_data;
976d043ee1SPavel Begunkov int ret = 0;
986d043ee1SPavel Begunkov
99e12d7a46SJens Axboe if (current->flags & PF_EXITING) {
1006d043ee1SPavel Begunkov ret = -EOWNERDEAD;
101e12d7a46SJens Axboe } else {
1028572df94SJens Axboe u32 flags = 0;
1038572df94SJens Axboe
1048572df94SJens Axboe if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
1058572df94SJens Axboe flags = msg->cqe_flags;
1068572df94SJens Axboe
107e12d7a46SJens Axboe /*
108e12d7a46SJens Axboe * If the target ring is using IOPOLL mode, then we need to be
109e12d7a46SJens Axboe * holding the uring_lock for posting completions. Other ring
110e12d7a46SJens Axboe * types rely on the regular completion locking, which is
111e12d7a46SJens Axboe * handled while posting.
112e12d7a46SJens Axboe */
113e12d7a46SJens Axboe if (target_ctx->flags & IORING_SETUP_IOPOLL)
114e12d7a46SJens Axboe mutex_lock(&target_ctx->uring_lock);
1158572df94SJens Axboe if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
1166d043ee1SPavel Begunkov ret = -EOVERFLOW;
117e12d7a46SJens Axboe if (target_ctx->flags & IORING_SETUP_IOPOLL)
118e12d7a46SJens Axboe mutex_unlock(&target_ctx->uring_lock);
119e12d7a46SJens Axboe }
1206d043ee1SPavel Begunkov
1216d043ee1SPavel Begunkov if (ret < 0)
1226d043ee1SPavel Begunkov req_set_fail(req);
1236d043ee1SPavel Begunkov io_req_queue_tw_complete(req, ret);
1246d043ee1SPavel Begunkov }
1256d043ee1SPavel Begunkov
io_msg_ring_data(struct io_kiocb * req,unsigned int issue_flags)126e12d7a46SJens Axboe static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
127e6130ebaSJens Axboe {
128e6130ebaSJens Axboe struct io_ring_ctx *target_ctx = req->file->private_data;
129f2ccb5aeSStefan Metzmacher struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
130cbeb47a7SBreno Leitao u32 flags = 0;
131e12d7a46SJens Axboe int ret;
132e6130ebaSJens Axboe
133cbeb47a7SBreno Leitao if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS)
134cbeb47a7SBreno Leitao return -EINVAL;
135cbeb47a7SBreno Leitao if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd)
136e6130ebaSJens Axboe return -EINVAL;
1378579538cSPavel Begunkov if (target_ctx->flags & IORING_SETUP_R_DISABLED)
1388579538cSPavel Begunkov return -EBADFD;
139e6130ebaSJens Axboe
14056d8e318SPavel Begunkov if (io_msg_need_remote(target_ctx))
14156d8e318SPavel Begunkov return io_msg_exec_remote(req, io_msg_tw_complete);
1426d043ee1SPavel Begunkov
143cbeb47a7SBreno Leitao if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
144cbeb47a7SBreno Leitao flags = msg->cqe_flags;
145cbeb47a7SBreno Leitao
146e12d7a46SJens Axboe ret = -EOVERFLOW;
147e12d7a46SJens Axboe if (target_ctx->flags & IORING_SETUP_IOPOLL) {
148e12d7a46SJens Axboe if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
149e12d7a46SJens Axboe return -EAGAIN;
150cbeb47a7SBreno Leitao if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
151e12d7a46SJens Axboe ret = 0;
152e12d7a46SJens Axboe io_double_unlock_ctx(target_ctx);
153e12d7a46SJens Axboe } else {
154cbeb47a7SBreno Leitao if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
155e12d7a46SJens Axboe ret = 0;
156e12d7a46SJens Axboe }
157e12d7a46SJens Axboe return ret;
158e6130ebaSJens Axboe }
159e6130ebaSJens Axboe
io_msg_grab_file(struct io_kiocb * req,unsigned int issue_flags)16011373026SPavel Begunkov static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags)
16111373026SPavel Begunkov {
16211373026SPavel Begunkov struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
16311373026SPavel Begunkov struct io_ring_ctx *ctx = req->ctx;
16411373026SPavel Begunkov struct file *file = NULL;
16511373026SPavel Begunkov int idx = msg->src_fd;
16611373026SPavel Begunkov
16711373026SPavel Begunkov io_ring_submit_lock(ctx, issue_flags);
16811373026SPavel Begunkov if (likely(idx < ctx->nr_user_files)) {
16911373026SPavel Begunkov idx = array_index_nospec(idx, ctx->nr_user_files);
17011373026SPavel Begunkov file = io_file_from_index(&ctx->file_table, idx);
17111373026SPavel Begunkov if (file)
17211373026SPavel Begunkov get_file(file);
17311373026SPavel Begunkov }
17411373026SPavel Begunkov io_ring_submit_unlock(ctx, issue_flags);
17511373026SPavel Begunkov return file;
17611373026SPavel Begunkov }
17711373026SPavel Begunkov
io_msg_install_complete(struct io_kiocb * req,unsigned int issue_flags)178e6130ebaSJens Axboe static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags)
179e6130ebaSJens Axboe {
18017211310SPavel Begunkov struct io_ring_ctx *target_ctx = req->file->private_data;
181e6130ebaSJens Axboe struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
182e6130ebaSJens Axboe struct file *src_file = msg->src_file;
183f2ccb5aeSStefan Metzmacher int ret;
18411373026SPavel Begunkov
185e6130ebaSJens Axboe if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
186e6130ebaSJens Axboe return -EAGAIN;
18711373026SPavel Begunkov
18811373026SPavel Begunkov ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd);
189e6130ebaSJens Axboe if (ret < 0)
190e6130ebaSJens Axboe goto out_unlock;
19111373026SPavel Begunkov
192e6130ebaSJens Axboe msg->src_file = NULL;
19317211310SPavel Begunkov req->flags &= ~REQ_F_NEED_CLEANUP;
19411373026SPavel Begunkov
19511373026SPavel Begunkov if (msg->flags & IORING_MSG_RING_CQE_SKIP)
196e6130ebaSJens Axboe goto out_unlock;
197e6130ebaSJens Axboe /*
198e6130ebaSJens Axboe * If this fails, the target still received the file descriptor but
199e6130ebaSJens Axboe * wasn't notified of the fact. This means that if this request
200e6130ebaSJens Axboe * completes with -EOVERFLOW, then the sender must ensure that a
201e6130ebaSJens Axboe * later IORING_OP_MSG_RING delivers the message.
202e6130ebaSJens Axboe */
203e6130ebaSJens Axboe if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0))
204e6130ebaSJens Axboe ret = -EOVERFLOW;
205*5da28eddSPavel Begunkov out_unlock:
206e6130ebaSJens Axboe io_double_unlock_ctx(target_ctx);
207e6130ebaSJens Axboe return ret;
208423d5081SJens Axboe }
209e6130ebaSJens Axboe
io_msg_tw_fd_complete(struct callback_head * head)210e6130ebaSJens Axboe static void io_msg_tw_fd_complete(struct callback_head *head)
211e6130ebaSJens Axboe {
2126d043ee1SPavel Begunkov struct io_msg *msg = container_of(head, struct io_msg, tw);
2136d043ee1SPavel Begunkov struct io_kiocb *req = cmd_to_io_kiocb(msg);
2146d043ee1SPavel Begunkov int ret = -EOWNERDEAD;
2156d043ee1SPavel Begunkov
2166d043ee1SPavel Begunkov if (!(current->flags & PF_EXITING))
2176d043ee1SPavel Begunkov ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED);
2186d043ee1SPavel Begunkov if (ret < 0)
2196d043ee1SPavel Begunkov req_set_fail(req);
2206d043ee1SPavel Begunkov io_req_queue_tw_complete(req, ret);
2216d043ee1SPavel Begunkov }
2226d043ee1SPavel Begunkov
io_msg_send_fd(struct io_kiocb * req,unsigned int issue_flags)2236d043ee1SPavel Begunkov static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
2246d043ee1SPavel Begunkov {
22517211310SPavel Begunkov struct io_ring_ctx *target_ctx = req->file->private_data;
22617211310SPavel Begunkov struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
22717211310SPavel Begunkov struct io_ring_ctx *ctx = req->ctx;
22817211310SPavel Begunkov struct file *src_file = msg->src_file;
22917211310SPavel Begunkov
23017211310SPavel Begunkov if (msg->len)
23117211310SPavel Begunkov return -EINVAL;
232*5da28eddSPavel Begunkov if (target_ctx == ctx)
233*5da28eddSPavel Begunkov return -EINVAL;
23417211310SPavel Begunkov if (target_ctx->flags & IORING_SETUP_R_DISABLED)
23517211310SPavel Begunkov return -EBADFD;
2368579538cSPavel Begunkov if (!src_file) {
2378579538cSPavel Begunkov src_file = io_msg_grab_file(req, issue_flags);
23817211310SPavel Begunkov if (!src_file)
23917211310SPavel Begunkov return -EBADF;
24017211310SPavel Begunkov msg->src_file = src_file;
24117211310SPavel Begunkov req->flags |= REQ_F_NEED_CLEANUP;
24217211310SPavel Begunkov }
24317211310SPavel Begunkov
24417211310SPavel Begunkov if (io_msg_need_remote(target_ctx))
2456d043ee1SPavel Begunkov return io_msg_exec_remote(req, io_msg_tw_fd_complete);
24656d8e318SPavel Begunkov return io_msg_install_complete(req, issue_flags);
24756d8e318SPavel Begunkov }
24817211310SPavel Begunkov
io_msg_ring_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)24917211310SPavel Begunkov int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
25017211310SPavel Begunkov {
25136404b09SJens Axboe struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
25236404b09SJens Axboe
253f2ccb5aeSStefan Metzmacher if (unlikely(sqe->buf_index || sqe->personality))
25436404b09SJens Axboe return -EINVAL;
255e6130ebaSJens Axboe
25636404b09SJens Axboe msg->src_file = NULL;
25736404b09SJens Axboe msg->user_data = READ_ONCE(sqe->off);
25811373026SPavel Begunkov msg->len = READ_ONCE(sqe->len);
25936404b09SJens Axboe msg->cmd = READ_ONCE(sqe->addr);
26036404b09SJens Axboe msg->src_fd = READ_ONCE(sqe->addr3);
261e6130ebaSJens Axboe msg->dst_fd = READ_ONCE(sqe->file_index);
262e6130ebaSJens Axboe msg->flags = READ_ONCE(sqe->msg_ring_flags);
263e6130ebaSJens Axboe if (msg->flags & ~IORING_MSG_RING_MASK)
264e6130ebaSJens Axboe return -EINVAL;
265cbeb47a7SBreno Leitao
266e6130ebaSJens Axboe return 0;
267e6130ebaSJens Axboe }
26836404b09SJens Axboe
io_msg_ring(struct io_kiocb * req,unsigned int issue_flags)26936404b09SJens Axboe int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
27036404b09SJens Axboe {
27136404b09SJens Axboe struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
27236404b09SJens Axboe int ret;
273f2ccb5aeSStefan Metzmacher
27436404b09SJens Axboe ret = -EBADFD;
27536404b09SJens Axboe if (!io_is_uring_fops(req->file))
27636404b09SJens Axboe goto done;
27736404b09SJens Axboe
27836404b09SJens Axboe switch (msg->cmd) {
27936404b09SJens Axboe case IORING_MSG_DATA:
280e6130ebaSJens Axboe ret = io_msg_ring_data(req, issue_flags);
281e6130ebaSJens Axboe break;
282e12d7a46SJens Axboe case IORING_MSG_SEND_FD:
283e6130ebaSJens Axboe ret = io_msg_send_fd(req, issue_flags);
284e6130ebaSJens Axboe break;
285e6130ebaSJens Axboe default:
286e6130ebaSJens Axboe ret = -EINVAL;
287e6130ebaSJens Axboe break;
288e6130ebaSJens Axboe }
289e6130ebaSJens Axboe
290e6130ebaSJens Axboe done:
29136404b09SJens Axboe if (ret < 0) {
29236404b09SJens Axboe if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE)
2936d043ee1SPavel Begunkov return ret;
2946d043ee1SPavel Begunkov req_set_fail(req);
2956d043ee1SPavel Begunkov }
29636404b09SJens Axboe io_req_set_res(req, ret, 0);
2976d043ee1SPavel Begunkov return IOU_OK;
29836404b09SJens Axboe }
29936404b09SJens Axboe