xref: /openbmc/linux/io_uring/msg_ring.c (revision 423d5081)
136404b09SJens Axboe // SPDX-License-Identifier: GPL-2.0
236404b09SJens Axboe #include <linux/kernel.h>
336404b09SJens Axboe #include <linux/errno.h>
436404b09SJens Axboe #include <linux/file.h>
536404b09SJens Axboe #include <linux/slab.h>
6e6130ebaSJens Axboe #include <linux/nospec.h>
736404b09SJens Axboe #include <linux/io_uring.h>
836404b09SJens Axboe 
936404b09SJens Axboe #include <uapi/linux/io_uring.h>
1036404b09SJens Axboe 
1136404b09SJens Axboe #include "io_uring.h"
12e6130ebaSJens Axboe #include "rsrc.h"
13e6130ebaSJens Axboe #include "filetable.h"
1436404b09SJens Axboe #include "msg_ring.h"
1536404b09SJens Axboe 
1636404b09SJens Axboe struct io_msg {
1736404b09SJens Axboe 	struct file			*file;
1811373026SPavel Begunkov 	struct file			*src_file;
196d043ee1SPavel Begunkov 	struct callback_head		tw;
2036404b09SJens Axboe 	u64 user_data;
2136404b09SJens Axboe 	u32 len;
22e6130ebaSJens Axboe 	u32 cmd;
23e6130ebaSJens Axboe 	u32 src_fd;
24e6130ebaSJens Axboe 	u32 dst_fd;
25e6130ebaSJens Axboe 	u32 flags;
2636404b09SJens Axboe };
2736404b09SJens Axboe 
28*423d5081SJens Axboe static void io_double_unlock_ctx(struct io_ring_ctx *octx)
29*423d5081SJens Axboe {
30*423d5081SJens Axboe 	mutex_unlock(&octx->uring_lock);
31*423d5081SJens Axboe }
32*423d5081SJens Axboe 
33*423d5081SJens Axboe static int io_double_lock_ctx(struct io_ring_ctx *octx,
34*423d5081SJens Axboe 			      unsigned int issue_flags)
35*423d5081SJens Axboe {
36*423d5081SJens Axboe 	/*
37*423d5081SJens Axboe 	 * To ensure proper ordering between the two ctxs, we can only
38*423d5081SJens Axboe 	 * attempt a trylock on the target. If that fails and we already have
39*423d5081SJens Axboe 	 * the source ctx lock, punt to io-wq.
40*423d5081SJens Axboe 	 */
41*423d5081SJens Axboe 	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
42*423d5081SJens Axboe 		if (!mutex_trylock(&octx->uring_lock))
43*423d5081SJens Axboe 			return -EAGAIN;
44*423d5081SJens Axboe 		return 0;
45*423d5081SJens Axboe 	}
46*423d5081SJens Axboe 	mutex_lock(&octx->uring_lock);
47*423d5081SJens Axboe 	return 0;
48*423d5081SJens Axboe }
49*423d5081SJens Axboe 
5011373026SPavel Begunkov void io_msg_ring_cleanup(struct io_kiocb *req)
5111373026SPavel Begunkov {
5211373026SPavel Begunkov 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
5311373026SPavel Begunkov 
5411373026SPavel Begunkov 	if (WARN_ON_ONCE(!msg->src_file))
5511373026SPavel Begunkov 		return;
5611373026SPavel Begunkov 
5711373026SPavel Begunkov 	fput(msg->src_file);
5811373026SPavel Begunkov 	msg->src_file = NULL;
5911373026SPavel Begunkov }
6011373026SPavel Begunkov 
616d043ee1SPavel Begunkov static void io_msg_tw_complete(struct callback_head *head)
626d043ee1SPavel Begunkov {
636d043ee1SPavel Begunkov 	struct io_msg *msg = container_of(head, struct io_msg, tw);
646d043ee1SPavel Begunkov 	struct io_kiocb *req = cmd_to_io_kiocb(msg);
656d043ee1SPavel Begunkov 	struct io_ring_ctx *target_ctx = req->file->private_data;
666d043ee1SPavel Begunkov 	int ret = 0;
676d043ee1SPavel Begunkov 
686d043ee1SPavel Begunkov 	if (current->flags & PF_EXITING)
696d043ee1SPavel Begunkov 		ret = -EOWNERDEAD;
706d043ee1SPavel Begunkov 	else if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
716d043ee1SPavel Begunkov 		ret = -EOVERFLOW;
726d043ee1SPavel Begunkov 
736d043ee1SPavel Begunkov 	if (ret < 0)
746d043ee1SPavel Begunkov 		req_set_fail(req);
756d043ee1SPavel Begunkov 	io_req_queue_tw_complete(req, ret);
766d043ee1SPavel Begunkov }
776d043ee1SPavel Begunkov 
78e6130ebaSJens Axboe static int io_msg_ring_data(struct io_kiocb *req)
79e6130ebaSJens Axboe {
80e6130ebaSJens Axboe 	struct io_ring_ctx *target_ctx = req->file->private_data;
81f2ccb5aeSStefan Metzmacher 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
82e6130ebaSJens Axboe 
83e6130ebaSJens Axboe 	if (msg->src_fd || msg->dst_fd || msg->flags)
84e6130ebaSJens Axboe 		return -EINVAL;
85e6130ebaSJens Axboe 
866d043ee1SPavel Begunkov 	if (target_ctx->task_complete && current != target_ctx->submitter_task) {
876d043ee1SPavel Begunkov 		init_task_work(&msg->tw, io_msg_tw_complete);
886d043ee1SPavel Begunkov 		if (task_work_add(target_ctx->submitter_task, &msg->tw,
896d043ee1SPavel Begunkov 				  TWA_SIGNAL_NO_IPI))
906d043ee1SPavel Begunkov 			return -EOWNERDEAD;
916d043ee1SPavel Begunkov 
92761c61c1SJens Axboe 		atomic_or(IORING_SQ_TASKRUN, &target_ctx->rings->sq_flags);
936d043ee1SPavel Begunkov 		return IOU_ISSUE_SKIP_COMPLETE;
946d043ee1SPavel Begunkov 	}
956d043ee1SPavel Begunkov 
96b529c96aSDylan Yudaken 	if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
97e6130ebaSJens Axboe 		return 0;
98e6130ebaSJens Axboe 
99e6130ebaSJens Axboe 	return -EOVERFLOW;
100e6130ebaSJens Axboe }
101e6130ebaSJens Axboe 
10211373026SPavel Begunkov static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags)
10311373026SPavel Begunkov {
10411373026SPavel Begunkov 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
10511373026SPavel Begunkov 	struct io_ring_ctx *ctx = req->ctx;
10611373026SPavel Begunkov 	struct file *file = NULL;
10711373026SPavel Begunkov 	unsigned long file_ptr;
10811373026SPavel Begunkov 	int idx = msg->src_fd;
10911373026SPavel Begunkov 
11011373026SPavel Begunkov 	io_ring_submit_lock(ctx, issue_flags);
11111373026SPavel Begunkov 	if (likely(idx < ctx->nr_user_files)) {
11211373026SPavel Begunkov 		idx = array_index_nospec(idx, ctx->nr_user_files);
11311373026SPavel Begunkov 		file_ptr = io_fixed_file_slot(&ctx->file_table, idx)->file_ptr;
11411373026SPavel Begunkov 		file = (struct file *) (file_ptr & FFS_MASK);
11511373026SPavel Begunkov 		if (file)
11611373026SPavel Begunkov 			get_file(file);
11711373026SPavel Begunkov 	}
11811373026SPavel Begunkov 	io_ring_submit_unlock(ctx, issue_flags);
11911373026SPavel Begunkov 	return file;
120e6130ebaSJens Axboe }
121e6130ebaSJens Axboe 
12217211310SPavel Begunkov static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags)
123e6130ebaSJens Axboe {
124e6130ebaSJens Axboe 	struct io_ring_ctx *target_ctx = req->file->private_data;
125f2ccb5aeSStefan Metzmacher 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
12611373026SPavel Begunkov 	struct file *src_file = msg->src_file;
127e6130ebaSJens Axboe 	int ret;
128e6130ebaSJens Axboe 
12911373026SPavel Begunkov 	if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
13011373026SPavel Begunkov 		return -EAGAIN;
131e6130ebaSJens Axboe 
132e6130ebaSJens Axboe 	ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd);
13311373026SPavel Begunkov 	if (ret < 0)
134e6130ebaSJens Axboe 		goto out_unlock;
13517211310SPavel Begunkov 
13611373026SPavel Begunkov 	msg->src_file = NULL;
13711373026SPavel Begunkov 	req->flags &= ~REQ_F_NEED_CLEANUP;
138e6130ebaSJens Axboe 
139e6130ebaSJens Axboe 	if (msg->flags & IORING_MSG_RING_CQE_SKIP)
140e6130ebaSJens Axboe 		goto out_unlock;
141e6130ebaSJens Axboe 	/*
142e6130ebaSJens Axboe 	 * If this fails, the target still received the file descriptor but
143e6130ebaSJens Axboe 	 * wasn't notified of the fact. This means that if this request
144e6130ebaSJens Axboe 	 * completes with -EOVERFLOW, then the sender must ensure that a
145e6130ebaSJens Axboe 	 * later IORING_OP_MSG_RING delivers the message.
146e6130ebaSJens Axboe 	 */
147b529c96aSDylan Yudaken 	if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
148e6130ebaSJens Axboe 		ret = -EOVERFLOW;
149e6130ebaSJens Axboe out_unlock:
150*423d5081SJens Axboe 	io_double_unlock_ctx(target_ctx);
151e6130ebaSJens Axboe 	return ret;
152e6130ebaSJens Axboe }
153e6130ebaSJens Axboe 
1546d043ee1SPavel Begunkov static void io_msg_tw_fd_complete(struct callback_head *head)
1556d043ee1SPavel Begunkov {
1566d043ee1SPavel Begunkov 	struct io_msg *msg = container_of(head, struct io_msg, tw);
1576d043ee1SPavel Begunkov 	struct io_kiocb *req = cmd_to_io_kiocb(msg);
1586d043ee1SPavel Begunkov 	int ret = -EOWNERDEAD;
1596d043ee1SPavel Begunkov 
1606d043ee1SPavel Begunkov 	if (!(current->flags & PF_EXITING))
1616d043ee1SPavel Begunkov 		ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED);
1626d043ee1SPavel Begunkov 	if (ret < 0)
1636d043ee1SPavel Begunkov 		req_set_fail(req);
1646d043ee1SPavel Begunkov 	io_req_queue_tw_complete(req, ret);
1656d043ee1SPavel Begunkov }
1666d043ee1SPavel Begunkov 
16717211310SPavel Begunkov static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
16817211310SPavel Begunkov {
16917211310SPavel Begunkov 	struct io_ring_ctx *target_ctx = req->file->private_data;
17017211310SPavel Begunkov 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
17117211310SPavel Begunkov 	struct io_ring_ctx *ctx = req->ctx;
17217211310SPavel Begunkov 	struct file *src_file = msg->src_file;
17317211310SPavel Begunkov 
17417211310SPavel Begunkov 	if (target_ctx == ctx)
17517211310SPavel Begunkov 		return -EINVAL;
17617211310SPavel Begunkov 	if (!src_file) {
17717211310SPavel Begunkov 		src_file = io_msg_grab_file(req, issue_flags);
17817211310SPavel Begunkov 		if (!src_file)
17917211310SPavel Begunkov 			return -EBADF;
18017211310SPavel Begunkov 		msg->src_file = src_file;
18117211310SPavel Begunkov 		req->flags |= REQ_F_NEED_CLEANUP;
18217211310SPavel Begunkov 	}
1836d043ee1SPavel Begunkov 
1846d043ee1SPavel Begunkov 	if (target_ctx->task_complete && current != target_ctx->submitter_task) {
1856d043ee1SPavel Begunkov 		init_task_work(&msg->tw, io_msg_tw_fd_complete);
1866d043ee1SPavel Begunkov 		if (task_work_add(target_ctx->submitter_task, &msg->tw,
1876d043ee1SPavel Begunkov 				  TWA_SIGNAL))
1886d043ee1SPavel Begunkov 			return -EOWNERDEAD;
1896d043ee1SPavel Begunkov 
1906d043ee1SPavel Begunkov 		return IOU_ISSUE_SKIP_COMPLETE;
1916d043ee1SPavel Begunkov 	}
19217211310SPavel Begunkov 	return io_msg_install_complete(req, issue_flags);
19317211310SPavel Begunkov }
19417211310SPavel Begunkov 
19536404b09SJens Axboe int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
19636404b09SJens Axboe {
197f2ccb5aeSStefan Metzmacher 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
19836404b09SJens Axboe 
199e6130ebaSJens Axboe 	if (unlikely(sqe->buf_index || sqe->personality))
20036404b09SJens Axboe 		return -EINVAL;
20136404b09SJens Axboe 
20211373026SPavel Begunkov 	msg->src_file = NULL;
20336404b09SJens Axboe 	msg->user_data = READ_ONCE(sqe->off);
20436404b09SJens Axboe 	msg->len = READ_ONCE(sqe->len);
205e6130ebaSJens Axboe 	msg->cmd = READ_ONCE(sqe->addr);
206e6130ebaSJens Axboe 	msg->src_fd = READ_ONCE(sqe->addr3);
207e6130ebaSJens Axboe 	msg->dst_fd = READ_ONCE(sqe->file_index);
208e6130ebaSJens Axboe 	msg->flags = READ_ONCE(sqe->msg_ring_flags);
209e6130ebaSJens Axboe 	if (msg->flags & ~IORING_MSG_RING_CQE_SKIP)
210e6130ebaSJens Axboe 		return -EINVAL;
211e6130ebaSJens Axboe 
21236404b09SJens Axboe 	return 0;
21336404b09SJens Axboe }
21436404b09SJens Axboe 
21536404b09SJens Axboe int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
21636404b09SJens Axboe {
217f2ccb5aeSStefan Metzmacher 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
21836404b09SJens Axboe 	int ret;
21936404b09SJens Axboe 
22036404b09SJens Axboe 	ret = -EBADFD;
22136404b09SJens Axboe 	if (!io_is_uring_fops(req->file))
22236404b09SJens Axboe 		goto done;
22336404b09SJens Axboe 
224e6130ebaSJens Axboe 	switch (msg->cmd) {
225e6130ebaSJens Axboe 	case IORING_MSG_DATA:
226e6130ebaSJens Axboe 		ret = io_msg_ring_data(req);
227e6130ebaSJens Axboe 		break;
228e6130ebaSJens Axboe 	case IORING_MSG_SEND_FD:
229e6130ebaSJens Axboe 		ret = io_msg_send_fd(req, issue_flags);
230e6130ebaSJens Axboe 		break;
231e6130ebaSJens Axboe 	default:
232e6130ebaSJens Axboe 		ret = -EINVAL;
233e6130ebaSJens Axboe 		break;
234e6130ebaSJens Axboe 	}
23536404b09SJens Axboe 
23636404b09SJens Axboe done:
2376d043ee1SPavel Begunkov 	if (ret < 0) {
2386d043ee1SPavel Begunkov 		if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE)
2396d043ee1SPavel Begunkov 			return ret;
24036404b09SJens Axboe 		req_set_fail(req);
2416d043ee1SPavel Begunkov 	}
24236404b09SJens Axboe 	io_req_set_res(req, ret, 0);
24336404b09SJens Axboe 	return IOU_OK;
24436404b09SJens Axboe }
245