xref: /openbmc/linux/io_uring/msg_ring.c (revision f432c8c8)
136404b09SJens Axboe // SPDX-License-Identifier: GPL-2.0
236404b09SJens Axboe #include <linux/kernel.h>
336404b09SJens Axboe #include <linux/errno.h>
436404b09SJens Axboe #include <linux/file.h>
536404b09SJens Axboe #include <linux/slab.h>
6e6130ebaSJens Axboe #include <linux/nospec.h>
736404b09SJens Axboe #include <linux/io_uring.h>
836404b09SJens Axboe 
936404b09SJens Axboe #include <uapi/linux/io_uring.h>
1036404b09SJens Axboe 
1136404b09SJens Axboe #include "io_uring.h"
12e6130ebaSJens Axboe #include "rsrc.h"
13e6130ebaSJens Axboe #include "filetable.h"
1436404b09SJens Axboe #include "msg_ring.h"
1536404b09SJens Axboe 
16cbeb47a7SBreno Leitao 
17cbeb47a7SBreno Leitao /* All valid masks for MSG_RING */
18cbeb47a7SBreno Leitao #define IORING_MSG_RING_MASK		(IORING_MSG_RING_CQE_SKIP | \
19cbeb47a7SBreno Leitao 					IORING_MSG_RING_FLAGS_PASS)
20cbeb47a7SBreno Leitao 
2136404b09SJens Axboe struct io_msg {
2236404b09SJens Axboe 	struct file			*file;
2311373026SPavel Begunkov 	struct file			*src_file;
246d043ee1SPavel Begunkov 	struct callback_head		tw;
2536404b09SJens Axboe 	u64 user_data;
2636404b09SJens Axboe 	u32 len;
27e6130ebaSJens Axboe 	u32 cmd;
28e6130ebaSJens Axboe 	u32 src_fd;
29cbeb47a7SBreno Leitao 	union {
30e6130ebaSJens Axboe 		u32 dst_fd;
31cbeb47a7SBreno Leitao 		u32 cqe_flags;
32cbeb47a7SBreno Leitao 	};
33e6130ebaSJens Axboe 	u32 flags;
3436404b09SJens Axboe };
3536404b09SJens Axboe 
io_double_unlock_ctx(struct io_ring_ctx * octx)36423d5081SJens Axboe static void io_double_unlock_ctx(struct io_ring_ctx *octx)
37423d5081SJens Axboe {
38423d5081SJens Axboe 	mutex_unlock(&octx->uring_lock);
39423d5081SJens Axboe }
40423d5081SJens Axboe 
io_double_lock_ctx(struct io_ring_ctx * octx,unsigned int issue_flags)41423d5081SJens Axboe static int io_double_lock_ctx(struct io_ring_ctx *octx,
42423d5081SJens Axboe 			      unsigned int issue_flags)
43423d5081SJens Axboe {
44423d5081SJens Axboe 	/*
45423d5081SJens Axboe 	 * To ensure proper ordering between the two ctxs, we can only
46423d5081SJens Axboe 	 * attempt a trylock on the target. If that fails and we already have
47423d5081SJens Axboe 	 * the source ctx lock, punt to io-wq.
48423d5081SJens Axboe 	 */
49423d5081SJens Axboe 	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
50423d5081SJens Axboe 		if (!mutex_trylock(&octx->uring_lock))
51423d5081SJens Axboe 			return -EAGAIN;
52423d5081SJens Axboe 		return 0;
53423d5081SJens Axboe 	}
54423d5081SJens Axboe 	mutex_lock(&octx->uring_lock);
55423d5081SJens Axboe 	return 0;
56423d5081SJens Axboe }
57423d5081SJens Axboe 
io_msg_ring_cleanup(struct io_kiocb * req)5811373026SPavel Begunkov void io_msg_ring_cleanup(struct io_kiocb *req)
5911373026SPavel Begunkov {
6011373026SPavel Begunkov 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
6111373026SPavel Begunkov 
6211373026SPavel Begunkov 	if (WARN_ON_ONCE(!msg->src_file))
6311373026SPavel Begunkov 		return;
6411373026SPavel Begunkov 
6511373026SPavel Begunkov 	fput(msg->src_file);
6611373026SPavel Begunkov 	msg->src_file = NULL;
6711373026SPavel Begunkov }
6811373026SPavel Begunkov 
io_msg_need_remote(struct io_ring_ctx * target_ctx)6956d8e318SPavel Begunkov static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
7056d8e318SPavel Begunkov {
7156d8e318SPavel Begunkov 	if (!target_ctx->task_complete)
7256d8e318SPavel Begunkov 		return false;
7356d8e318SPavel Begunkov 	return current != target_ctx->submitter_task;
7456d8e318SPavel Begunkov }
7556d8e318SPavel Begunkov 
io_msg_exec_remote(struct io_kiocb * req,task_work_func_t func)7656d8e318SPavel Begunkov static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func)
7756d8e318SPavel Begunkov {
7856d8e318SPavel Begunkov 	struct io_ring_ctx *ctx = req->file->private_data;
7956d8e318SPavel Begunkov 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
808579538cSPavel Begunkov 	struct task_struct *task = READ_ONCE(ctx->submitter_task);
818579538cSPavel Begunkov 
828579538cSPavel Begunkov 	if (unlikely(!task))
838579538cSPavel Begunkov 		return -EOWNERDEAD;
8456d8e318SPavel Begunkov 
8556d8e318SPavel Begunkov 	init_task_work(&msg->tw, func);
8656d8e318SPavel Begunkov 	if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL))
8756d8e318SPavel Begunkov 		return -EOWNERDEAD;
8856d8e318SPavel Begunkov 
8956d8e318SPavel Begunkov 	return IOU_ISSUE_SKIP_COMPLETE;
9056d8e318SPavel Begunkov }
9156d8e318SPavel Begunkov 
io_msg_tw_complete(struct callback_head * head)926d043ee1SPavel Begunkov static void io_msg_tw_complete(struct callback_head *head)
936d043ee1SPavel Begunkov {
946d043ee1SPavel Begunkov 	struct io_msg *msg = container_of(head, struct io_msg, tw);
956d043ee1SPavel Begunkov 	struct io_kiocb *req = cmd_to_io_kiocb(msg);
966d043ee1SPavel Begunkov 	struct io_ring_ctx *target_ctx = req->file->private_data;
976d043ee1SPavel Begunkov 	int ret = 0;
986d043ee1SPavel Begunkov 
99e12d7a46SJens Axboe 	if (current->flags & PF_EXITING) {
1006d043ee1SPavel Begunkov 		ret = -EOWNERDEAD;
101e12d7a46SJens Axboe 	} else {
1028572df94SJens Axboe 		u32 flags = 0;
1038572df94SJens Axboe 
1048572df94SJens Axboe 		if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
1058572df94SJens Axboe 			flags = msg->cqe_flags;
1068572df94SJens Axboe 
107e12d7a46SJens Axboe 		/*
108e12d7a46SJens Axboe 		 * If the target ring is using IOPOLL mode, then we need to be
109e12d7a46SJens Axboe 		 * holding the uring_lock for posting completions. Other ring
110e12d7a46SJens Axboe 		 * types rely on the regular completion locking, which is
111e12d7a46SJens Axboe 		 * handled while posting.
112e12d7a46SJens Axboe 		 */
113e12d7a46SJens Axboe 		if (target_ctx->flags & IORING_SETUP_IOPOLL)
114e12d7a46SJens Axboe 			mutex_lock(&target_ctx->uring_lock);
1158572df94SJens Axboe 		if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
1166d043ee1SPavel Begunkov 			ret = -EOVERFLOW;
117e12d7a46SJens Axboe 		if (target_ctx->flags & IORING_SETUP_IOPOLL)
118e12d7a46SJens Axboe 			mutex_unlock(&target_ctx->uring_lock);
119e12d7a46SJens Axboe 	}
1206d043ee1SPavel Begunkov 
1216d043ee1SPavel Begunkov 	if (ret < 0)
1226d043ee1SPavel Begunkov 		req_set_fail(req);
1236d043ee1SPavel Begunkov 	io_req_queue_tw_complete(req, ret);
1246d043ee1SPavel Begunkov }
1256d043ee1SPavel Begunkov 
io_msg_ring_data(struct io_kiocb * req,unsigned int issue_flags)126e12d7a46SJens Axboe static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
127e6130ebaSJens Axboe {
128e6130ebaSJens Axboe 	struct io_ring_ctx *target_ctx = req->file->private_data;
129f2ccb5aeSStefan Metzmacher 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
130cbeb47a7SBreno Leitao 	u32 flags = 0;
131e12d7a46SJens Axboe 	int ret;
132e6130ebaSJens Axboe 
133cbeb47a7SBreno Leitao 	if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS)
134cbeb47a7SBreno Leitao 		return -EINVAL;
135cbeb47a7SBreno Leitao 	if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd)
136e6130ebaSJens Axboe 		return -EINVAL;
1378579538cSPavel Begunkov 	if (target_ctx->flags & IORING_SETUP_R_DISABLED)
1388579538cSPavel Begunkov 		return -EBADFD;
139e6130ebaSJens Axboe 
14056d8e318SPavel Begunkov 	if (io_msg_need_remote(target_ctx))
14156d8e318SPavel Begunkov 		return io_msg_exec_remote(req, io_msg_tw_complete);
1426d043ee1SPavel Begunkov 
143cbeb47a7SBreno Leitao 	if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
144cbeb47a7SBreno Leitao 		flags = msg->cqe_flags;
145cbeb47a7SBreno Leitao 
146e12d7a46SJens Axboe 	ret = -EOVERFLOW;
147e12d7a46SJens Axboe 	if (target_ctx->flags & IORING_SETUP_IOPOLL) {
148e12d7a46SJens Axboe 		if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
149e12d7a46SJens Axboe 			return -EAGAIN;
150cbeb47a7SBreno Leitao 		if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
151e12d7a46SJens Axboe 			ret = 0;
152e12d7a46SJens Axboe 		io_double_unlock_ctx(target_ctx);
153e12d7a46SJens Axboe 	} else {
154cbeb47a7SBreno Leitao 		if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
155e12d7a46SJens Axboe 			ret = 0;
156e12d7a46SJens Axboe 	}
157e12d7a46SJens Axboe 	return ret;
158e6130ebaSJens Axboe }
159e6130ebaSJens Axboe 
io_msg_grab_file(struct io_kiocb * req,unsigned int issue_flags)16011373026SPavel Begunkov static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags)
16111373026SPavel Begunkov {
16211373026SPavel Begunkov 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
16311373026SPavel Begunkov 	struct io_ring_ctx *ctx = req->ctx;
16411373026SPavel Begunkov 	struct file *file = NULL;
16511373026SPavel Begunkov 	int idx = msg->src_fd;
16611373026SPavel Begunkov 
16711373026SPavel Begunkov 	io_ring_submit_lock(ctx, issue_flags);
16811373026SPavel Begunkov 	if (likely(idx < ctx->nr_user_files)) {
16911373026SPavel Begunkov 		idx = array_index_nospec(idx, ctx->nr_user_files);
170*f432c8c8SChristoph Hellwig 		file = io_file_from_index(&ctx->file_table, idx);
17111373026SPavel Begunkov 		if (file)
17211373026SPavel Begunkov 			get_file(file);
17311373026SPavel Begunkov 	}
17411373026SPavel Begunkov 	io_ring_submit_unlock(ctx, issue_flags);
17511373026SPavel Begunkov 	return file;
176e6130ebaSJens Axboe }
177e6130ebaSJens Axboe 
io_msg_install_complete(struct io_kiocb * req,unsigned int issue_flags)17817211310SPavel Begunkov static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags)
179e6130ebaSJens Axboe {
180e6130ebaSJens Axboe 	struct io_ring_ctx *target_ctx = req->file->private_data;
181f2ccb5aeSStefan Metzmacher 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
18211373026SPavel Begunkov 	struct file *src_file = msg->src_file;
183e6130ebaSJens Axboe 	int ret;
184e6130ebaSJens Axboe 
18511373026SPavel Begunkov 	if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
18611373026SPavel Begunkov 		return -EAGAIN;
187e6130ebaSJens Axboe 
188e6130ebaSJens Axboe 	ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd);
18911373026SPavel Begunkov 	if (ret < 0)
190e6130ebaSJens Axboe 		goto out_unlock;
19117211310SPavel Begunkov 
19211373026SPavel Begunkov 	msg->src_file = NULL;
19311373026SPavel Begunkov 	req->flags &= ~REQ_F_NEED_CLEANUP;
194e6130ebaSJens Axboe 
195e6130ebaSJens Axboe 	if (msg->flags & IORING_MSG_RING_CQE_SKIP)
196e6130ebaSJens Axboe 		goto out_unlock;
197e6130ebaSJens Axboe 	/*
198e6130ebaSJens Axboe 	 * If this fails, the target still received the file descriptor but
199e6130ebaSJens Axboe 	 * wasn't notified of the fact. This means that if this request
200e6130ebaSJens Axboe 	 * completes with -EOVERFLOW, then the sender must ensure that a
201e6130ebaSJens Axboe 	 * later IORING_OP_MSG_RING delivers the message.
202e6130ebaSJens Axboe 	 */
2035da28eddSPavel Begunkov 	if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0))
204e6130ebaSJens Axboe 		ret = -EOVERFLOW;
205e6130ebaSJens Axboe out_unlock:
206423d5081SJens Axboe 	io_double_unlock_ctx(target_ctx);
207e6130ebaSJens Axboe 	return ret;
208e6130ebaSJens Axboe }
209e6130ebaSJens Axboe 
io_msg_tw_fd_complete(struct callback_head * head)2106d043ee1SPavel Begunkov static void io_msg_tw_fd_complete(struct callback_head *head)
2116d043ee1SPavel Begunkov {
2126d043ee1SPavel Begunkov 	struct io_msg *msg = container_of(head, struct io_msg, tw);
2136d043ee1SPavel Begunkov 	struct io_kiocb *req = cmd_to_io_kiocb(msg);
2146d043ee1SPavel Begunkov 	int ret = -EOWNERDEAD;
2156d043ee1SPavel Begunkov 
2166d043ee1SPavel Begunkov 	if (!(current->flags & PF_EXITING))
2176d043ee1SPavel Begunkov 		ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED);
2186d043ee1SPavel Begunkov 	if (ret < 0)
2196d043ee1SPavel Begunkov 		req_set_fail(req);
2206d043ee1SPavel Begunkov 	io_req_queue_tw_complete(req, ret);
2216d043ee1SPavel Begunkov }
2226d043ee1SPavel Begunkov 
io_msg_send_fd(struct io_kiocb * req,unsigned int issue_flags)22317211310SPavel Begunkov static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
22417211310SPavel Begunkov {
22517211310SPavel Begunkov 	struct io_ring_ctx *target_ctx = req->file->private_data;
22617211310SPavel Begunkov 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
22717211310SPavel Begunkov 	struct io_ring_ctx *ctx = req->ctx;
22817211310SPavel Begunkov 	struct file *src_file = msg->src_file;
22917211310SPavel Begunkov 
2305da28eddSPavel Begunkov 	if (msg->len)
2315da28eddSPavel Begunkov 		return -EINVAL;
23217211310SPavel Begunkov 	if (target_ctx == ctx)
23317211310SPavel Begunkov 		return -EINVAL;
2348579538cSPavel Begunkov 	if (target_ctx->flags & IORING_SETUP_R_DISABLED)
2358579538cSPavel Begunkov 		return -EBADFD;
23617211310SPavel Begunkov 	if (!src_file) {
23717211310SPavel Begunkov 		src_file = io_msg_grab_file(req, issue_flags);
23817211310SPavel Begunkov 		if (!src_file)
23917211310SPavel Begunkov 			return -EBADF;
24017211310SPavel Begunkov 		msg->src_file = src_file;
24117211310SPavel Begunkov 		req->flags |= REQ_F_NEED_CLEANUP;
24217211310SPavel Begunkov 	}
2436d043ee1SPavel Begunkov 
24456d8e318SPavel Begunkov 	if (io_msg_need_remote(target_ctx))
24556d8e318SPavel Begunkov 		return io_msg_exec_remote(req, io_msg_tw_fd_complete);
24617211310SPavel Begunkov 	return io_msg_install_complete(req, issue_flags);
24717211310SPavel Begunkov }
24817211310SPavel Begunkov 
io_msg_ring_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)24936404b09SJens Axboe int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
25036404b09SJens Axboe {
251f2ccb5aeSStefan Metzmacher 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
25236404b09SJens Axboe 
253e6130ebaSJens Axboe 	if (unlikely(sqe->buf_index || sqe->personality))
25436404b09SJens Axboe 		return -EINVAL;
25536404b09SJens Axboe 
25611373026SPavel Begunkov 	msg->src_file = NULL;
25736404b09SJens Axboe 	msg->user_data = READ_ONCE(sqe->off);
25836404b09SJens Axboe 	msg->len = READ_ONCE(sqe->len);
259e6130ebaSJens Axboe 	msg->cmd = READ_ONCE(sqe->addr);
260e6130ebaSJens Axboe 	msg->src_fd = READ_ONCE(sqe->addr3);
261e6130ebaSJens Axboe 	msg->dst_fd = READ_ONCE(sqe->file_index);
262e6130ebaSJens Axboe 	msg->flags = READ_ONCE(sqe->msg_ring_flags);
263cbeb47a7SBreno Leitao 	if (msg->flags & ~IORING_MSG_RING_MASK)
264e6130ebaSJens Axboe 		return -EINVAL;
265e6130ebaSJens Axboe 
26636404b09SJens Axboe 	return 0;
26736404b09SJens Axboe }
26836404b09SJens Axboe 
io_msg_ring(struct io_kiocb * req,unsigned int issue_flags)26936404b09SJens Axboe int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
27036404b09SJens Axboe {
271f2ccb5aeSStefan Metzmacher 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
27236404b09SJens Axboe 	int ret;
27336404b09SJens Axboe 
27436404b09SJens Axboe 	ret = -EBADFD;
27536404b09SJens Axboe 	if (!io_is_uring_fops(req->file))
27636404b09SJens Axboe 		goto done;
27736404b09SJens Axboe 
278e6130ebaSJens Axboe 	switch (msg->cmd) {
279e6130ebaSJens Axboe 	case IORING_MSG_DATA:
280e12d7a46SJens Axboe 		ret = io_msg_ring_data(req, issue_flags);
281e6130ebaSJens Axboe 		break;
282e6130ebaSJens Axboe 	case IORING_MSG_SEND_FD:
283e6130ebaSJens Axboe 		ret = io_msg_send_fd(req, issue_flags);
284e6130ebaSJens Axboe 		break;
285e6130ebaSJens Axboe 	default:
286e6130ebaSJens Axboe 		ret = -EINVAL;
287e6130ebaSJens Axboe 		break;
288e6130ebaSJens Axboe 	}
28936404b09SJens Axboe 
29036404b09SJens Axboe done:
2916d043ee1SPavel Begunkov 	if (ret < 0) {
2926d043ee1SPavel Begunkov 		if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE)
2936d043ee1SPavel Begunkov 			return ret;
29436404b09SJens Axboe 		req_set_fail(req);
2956d043ee1SPavel Begunkov 	}
29636404b09SJens Axboe 	io_req_set_res(req, ret, 0);
29736404b09SJens Axboe 	return IOU_OK;
29836404b09SJens Axboe }
299