1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/nospec.h> 7 #include <linux/io_uring.h> 8 9 #include <uapi/linux/io_uring.h> 10 11 #include "io_uring.h" 12 #include "rsrc.h" 13 #include "filetable.h" 14 #include "msg_ring.h" 15 16 17 /* All valid masks for MSG_RING */ 18 #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \ 19 IORING_MSG_RING_FLAGS_PASS) 20 21 struct io_msg { 22 struct file *file; 23 struct file *src_file; 24 struct callback_head tw; 25 u64 user_data; 26 u32 len; 27 u32 cmd; 28 u32 src_fd; 29 union { 30 u32 dst_fd; 31 u32 cqe_flags; 32 }; 33 u32 flags; 34 }; 35 36 static void io_double_unlock_ctx(struct io_ring_ctx *octx) 37 { 38 mutex_unlock(&octx->uring_lock); 39 } 40 41 static int io_double_lock_ctx(struct io_ring_ctx *octx, 42 unsigned int issue_flags) 43 { 44 /* 45 * To ensure proper ordering between the two ctxs, we can only 46 * attempt a trylock on the target. If that fails and we already have 47 * the source ctx lock, punt to io-wq. 48 */ 49 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 50 if (!mutex_trylock(&octx->uring_lock)) 51 return -EAGAIN; 52 return 0; 53 } 54 mutex_lock(&octx->uring_lock); 55 return 0; 56 } 57 58 void io_msg_ring_cleanup(struct io_kiocb *req) 59 { 60 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 61 62 if (WARN_ON_ONCE(!msg->src_file)) 63 return; 64 65 fput(msg->src_file); 66 msg->src_file = NULL; 67 } 68 69 static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx) 70 { 71 if (!target_ctx->task_complete) 72 return false; 73 return current != target_ctx->submitter_task; 74 } 75 76 static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func) 77 { 78 struct io_ring_ctx *ctx = req->file->private_data; 79 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 80 struct task_struct *task = READ_ONCE(ctx->submitter_task); 81 82 if (unlikely(!task)) 83 return -EOWNERDEAD; 84 85 init_task_work(&msg->tw, func); 86 if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL)) 87 return -EOWNERDEAD; 88 89 return IOU_ISSUE_SKIP_COMPLETE; 90 } 91 92 static void io_msg_tw_complete(struct callback_head *head) 93 { 94 struct io_msg *msg = container_of(head, struct io_msg, tw); 95 struct io_kiocb *req = cmd_to_io_kiocb(msg); 96 struct io_ring_ctx *target_ctx = req->file->private_data; 97 int ret = 0; 98 99 if (current->flags & PF_EXITING) { 100 ret = -EOWNERDEAD; 101 } else { 102 u32 flags = 0; 103 104 if (msg->flags & IORING_MSG_RING_FLAGS_PASS) 105 flags = msg->cqe_flags; 106 107 /* 108 * If the target ring is using IOPOLL mode, then we need to be 109 * holding the uring_lock for posting completions. Other ring 110 * types rely on the regular completion locking, which is 111 * handled while posting. 112 */ 113 if (target_ctx->flags & IORING_SETUP_IOPOLL) 114 mutex_lock(&target_ctx->uring_lock); 115 if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) 116 ret = -EOVERFLOW; 117 if (target_ctx->flags & IORING_SETUP_IOPOLL) 118 mutex_unlock(&target_ctx->uring_lock); 119 } 120 121 if (ret < 0) 122 req_set_fail(req); 123 io_req_queue_tw_complete(req, ret); 124 } 125 126 static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags) 127 { 128 struct io_ring_ctx *target_ctx = req->file->private_data; 129 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 130 u32 flags = 0; 131 int ret; 132 133 if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS) 134 return -EINVAL; 135 if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd) 136 return -EINVAL; 137 if (target_ctx->flags & IORING_SETUP_R_DISABLED) 138 return -EBADFD; 139 140 if (io_msg_need_remote(target_ctx)) 141 return io_msg_exec_remote(req, io_msg_tw_complete); 142 143 if (msg->flags & IORING_MSG_RING_FLAGS_PASS) 144 flags = msg->cqe_flags; 145 146 ret = -EOVERFLOW; 147 if (target_ctx->flags & IORING_SETUP_IOPOLL) { 148 if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) 149 return -EAGAIN; 150 if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) 151 ret = 0; 152 io_double_unlock_ctx(target_ctx); 153 } else { 154 if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) 155 ret = 0; 156 } 157 return ret; 158 } 159 160 static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags) 161 { 162 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 163 struct io_ring_ctx *ctx = req->ctx; 164 struct file *file = NULL; 165 unsigned long file_ptr; 166 int idx = msg->src_fd; 167 168 io_ring_submit_lock(ctx, issue_flags); 169 if (likely(idx < ctx->nr_user_files)) { 170 idx = array_index_nospec(idx, ctx->nr_user_files); 171 file_ptr = io_fixed_file_slot(&ctx->file_table, idx)->file_ptr; 172 file = (struct file *) (file_ptr & FFS_MASK); 173 if (file) 174 get_file(file); 175 } 176 io_ring_submit_unlock(ctx, issue_flags); 177 return file; 178 } 179 180 static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags) 181 { 182 struct io_ring_ctx *target_ctx = req->file->private_data; 183 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 184 struct file *src_file = msg->src_file; 185 int ret; 186 187 if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) 188 return -EAGAIN; 189 190 ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd); 191 if (ret < 0) 192 goto out_unlock; 193 194 msg->src_file = NULL; 195 req->flags &= ~REQ_F_NEED_CLEANUP; 196 197 if (msg->flags & IORING_MSG_RING_CQE_SKIP) 198 goto out_unlock; 199 /* 200 * If this fails, the target still received the file descriptor but 201 * wasn't notified of the fact. This means that if this request 202 * completes with -EOVERFLOW, then the sender must ensure that a 203 * later IORING_OP_MSG_RING delivers the message. 204 */ 205 if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0)) 206 ret = -EOVERFLOW; 207 out_unlock: 208 io_double_unlock_ctx(target_ctx); 209 return ret; 210 } 211 212 static void io_msg_tw_fd_complete(struct callback_head *head) 213 { 214 struct io_msg *msg = container_of(head, struct io_msg, tw); 215 struct io_kiocb *req = cmd_to_io_kiocb(msg); 216 int ret = -EOWNERDEAD; 217 218 if (!(current->flags & PF_EXITING)) 219 ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED); 220 if (ret < 0) 221 req_set_fail(req); 222 io_req_queue_tw_complete(req, ret); 223 } 224 225 static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) 226 { 227 struct io_ring_ctx *target_ctx = req->file->private_data; 228 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 229 struct io_ring_ctx *ctx = req->ctx; 230 struct file *src_file = msg->src_file; 231 232 if (msg->len) 233 return -EINVAL; 234 if (target_ctx == ctx) 235 return -EINVAL; 236 if (target_ctx->flags & IORING_SETUP_R_DISABLED) 237 return -EBADFD; 238 if (!src_file) { 239 src_file = io_msg_grab_file(req, issue_flags); 240 if (!src_file) 241 return -EBADF; 242 msg->src_file = src_file; 243 req->flags |= REQ_F_NEED_CLEANUP; 244 } 245 246 if (io_msg_need_remote(target_ctx)) 247 return io_msg_exec_remote(req, io_msg_tw_fd_complete); 248 return io_msg_install_complete(req, issue_flags); 249 } 250 251 int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 252 { 253 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 254 255 if (unlikely(sqe->buf_index || sqe->personality)) 256 return -EINVAL; 257 258 msg->src_file = NULL; 259 msg->user_data = READ_ONCE(sqe->off); 260 msg->len = READ_ONCE(sqe->len); 261 msg->cmd = READ_ONCE(sqe->addr); 262 msg->src_fd = READ_ONCE(sqe->addr3); 263 msg->dst_fd = READ_ONCE(sqe->file_index); 264 msg->flags = READ_ONCE(sqe->msg_ring_flags); 265 if (msg->flags & ~IORING_MSG_RING_MASK) 266 return -EINVAL; 267 268 return 0; 269 } 270 271 int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) 272 { 273 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 274 int ret; 275 276 ret = -EBADFD; 277 if (!io_is_uring_fops(req->file)) 278 goto done; 279 280 switch (msg->cmd) { 281 case IORING_MSG_DATA: 282 ret = io_msg_ring_data(req, issue_flags); 283 break; 284 case IORING_MSG_SEND_FD: 285 ret = io_msg_send_fd(req, issue_flags); 286 break; 287 default: 288 ret = -EINVAL; 289 break; 290 } 291 292 done: 293 if (ret < 0) { 294 if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE) 295 return ret; 296 req_set_fail(req); 297 } 298 io_req_set_res(req, ret, 0); 299 return IOU_OK; 300 } 301