1329061d3SJens Axboe // SPDX-License-Identifier: GPL-2.0 2329061d3SJens Axboe #include <linux/kernel.h> 3329061d3SJens Axboe #include <linux/errno.h> 4329061d3SJens Axboe #include <linux/fs.h> 5329061d3SJens Axboe #include <linux/file.h> 6329061d3SJens Axboe #include <linux/mm.h> 7329061d3SJens Axboe #include <linux/slab.h> 8329061d3SJens Axboe #include <linux/poll.h> 9329061d3SJens Axboe #include <linux/hashtable.h> 10329061d3SJens Axboe #include <linux/io_uring.h> 11329061d3SJens Axboe 12329061d3SJens Axboe #include <trace/events/io_uring.h> 13329061d3SJens Axboe 14329061d3SJens Axboe #include <uapi/linux/io_uring.h> 15329061d3SJens Axboe 16329061d3SJens Axboe #include "io_uring_types.h" 17329061d3SJens Axboe #include "io_uring.h" 18329061d3SJens Axboe #include "refs.h" 19329061d3SJens Axboe #include "opdef.h" 20*3b77495aSJens Axboe #include "kbuf.h" 21329061d3SJens Axboe #include "poll.h" 22329061d3SJens Axboe 23329061d3SJens Axboe struct io_poll_update { 24329061d3SJens Axboe struct file *file; 25329061d3SJens Axboe u64 old_user_data; 26329061d3SJens Axboe u64 new_user_data; 27329061d3SJens Axboe __poll_t events; 28329061d3SJens Axboe bool update_events; 29329061d3SJens Axboe bool update_user_data; 30329061d3SJens Axboe }; 31329061d3SJens Axboe 32329061d3SJens Axboe struct io_poll_table { 33329061d3SJens Axboe struct poll_table_struct pt; 34329061d3SJens Axboe struct io_kiocb *req; 35329061d3SJens Axboe int nr_entries; 36329061d3SJens Axboe int error; 37329061d3SJens Axboe }; 38329061d3SJens Axboe 39329061d3SJens Axboe #define IO_POLL_CANCEL_FLAG BIT(31) 40329061d3SJens Axboe #define IO_POLL_REF_MASK GENMASK(30, 0) 41329061d3SJens Axboe 42329061d3SJens Axboe /* 43329061d3SJens Axboe * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can 44329061d3SJens Axboe * bump it and acquire ownership. It's disallowed to modify requests while not 45329061d3SJens Axboe * owning it, that prevents from races for enqueueing task_work's and b/w 46329061d3SJens Axboe * arming poll and wakeups. 47329061d3SJens Axboe */ 48329061d3SJens Axboe static inline bool io_poll_get_ownership(struct io_kiocb *req) 49329061d3SJens Axboe { 50329061d3SJens Axboe return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); 51329061d3SJens Axboe } 52329061d3SJens Axboe 53329061d3SJens Axboe static void io_poll_mark_cancelled(struct io_kiocb *req) 54329061d3SJens Axboe { 55329061d3SJens Axboe atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); 56329061d3SJens Axboe } 57329061d3SJens Axboe 58329061d3SJens Axboe static struct io_poll *io_poll_get_double(struct io_kiocb *req) 59329061d3SJens Axboe { 60329061d3SJens Axboe /* pure poll stashes this in ->async_data, poll driven retry elsewhere */ 61329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 62329061d3SJens Axboe return req->async_data; 63329061d3SJens Axboe return req->apoll->double_poll; 64329061d3SJens Axboe } 65329061d3SJens Axboe 66329061d3SJens Axboe static struct io_poll *io_poll_get_single(struct io_kiocb *req) 67329061d3SJens Axboe { 68329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 69329061d3SJens Axboe return io_kiocb_to_cmd(req); 70329061d3SJens Axboe return &req->apoll->poll; 71329061d3SJens Axboe } 72329061d3SJens Axboe 73329061d3SJens Axboe static void io_poll_req_insert(struct io_kiocb *req) 74329061d3SJens Axboe { 75329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 76329061d3SJens Axboe struct hlist_head *list; 77329061d3SJens Axboe 78329061d3SJens Axboe list = &ctx->cancel_hash[hash_long(req->cqe.user_data, ctx->cancel_hash_bits)]; 79329061d3SJens Axboe hlist_add_head(&req->hash_node, list); 80329061d3SJens Axboe } 81329061d3SJens Axboe 82329061d3SJens Axboe static void io_init_poll_iocb(struct io_poll *poll, __poll_t events, 83329061d3SJens Axboe wait_queue_func_t wake_func) 84329061d3SJens Axboe { 85329061d3SJens Axboe poll->head = NULL; 86329061d3SJens Axboe #define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) 87329061d3SJens Axboe /* mask in events that we always want/need */ 88329061d3SJens Axboe poll->events = events | IO_POLL_UNMASK; 89329061d3SJens Axboe INIT_LIST_HEAD(&poll->wait.entry); 90329061d3SJens Axboe init_waitqueue_func_entry(&poll->wait, wake_func); 91329061d3SJens Axboe } 92329061d3SJens Axboe 93329061d3SJens Axboe static inline void io_poll_remove_entry(struct io_poll *poll) 94329061d3SJens Axboe { 95329061d3SJens Axboe struct wait_queue_head *head = smp_load_acquire(&poll->head); 96329061d3SJens Axboe 97329061d3SJens Axboe if (head) { 98329061d3SJens Axboe spin_lock_irq(&head->lock); 99329061d3SJens Axboe list_del_init(&poll->wait.entry); 100329061d3SJens Axboe poll->head = NULL; 101329061d3SJens Axboe spin_unlock_irq(&head->lock); 102329061d3SJens Axboe } 103329061d3SJens Axboe } 104329061d3SJens Axboe 105329061d3SJens Axboe static void io_poll_remove_entries(struct io_kiocb *req) 106329061d3SJens Axboe { 107329061d3SJens Axboe /* 108329061d3SJens Axboe * Nothing to do if neither of those flags are set. Avoid dipping 109329061d3SJens Axboe * into the poll/apoll/double cachelines if we can. 110329061d3SJens Axboe */ 111329061d3SJens Axboe if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL))) 112329061d3SJens Axboe return; 113329061d3SJens Axboe 114329061d3SJens Axboe /* 115329061d3SJens Axboe * While we hold the waitqueue lock and the waitqueue is nonempty, 116329061d3SJens Axboe * wake_up_pollfree() will wait for us. However, taking the waitqueue 117329061d3SJens Axboe * lock in the first place can race with the waitqueue being freed. 118329061d3SJens Axboe * 119329061d3SJens Axboe * We solve this as eventpoll does: by taking advantage of the fact that 120329061d3SJens Axboe * all users of wake_up_pollfree() will RCU-delay the actual free. If 121329061d3SJens Axboe * we enter rcu_read_lock() and see that the pointer to the queue is 122329061d3SJens Axboe * non-NULL, we can then lock it without the memory being freed out from 123329061d3SJens Axboe * under us. 124329061d3SJens Axboe * 125329061d3SJens Axboe * Keep holding rcu_read_lock() as long as we hold the queue lock, in 126329061d3SJens Axboe * case the caller deletes the entry from the queue, leaving it empty. 127329061d3SJens Axboe * In that case, only RCU prevents the queue memory from being freed. 128329061d3SJens Axboe */ 129329061d3SJens Axboe rcu_read_lock(); 130329061d3SJens Axboe if (req->flags & REQ_F_SINGLE_POLL) 131329061d3SJens Axboe io_poll_remove_entry(io_poll_get_single(req)); 132329061d3SJens Axboe if (req->flags & REQ_F_DOUBLE_POLL) 133329061d3SJens Axboe io_poll_remove_entry(io_poll_get_double(req)); 134329061d3SJens Axboe rcu_read_unlock(); 135329061d3SJens Axboe } 136329061d3SJens Axboe 137329061d3SJens Axboe /* 138329061d3SJens Axboe * All poll tw should go through this. Checks for poll events, manages 139329061d3SJens Axboe * references, does rewait, etc. 140329061d3SJens Axboe * 141329061d3SJens Axboe * Returns a negative error on failure. >0 when no action require, which is 142329061d3SJens Axboe * either spurious wakeup or multishot CQE is served. 0 when it's done with 143329061d3SJens Axboe * the request, then the mask is stored in req->cqe.res. 144329061d3SJens Axboe */ 145329061d3SJens Axboe static int io_poll_check_events(struct io_kiocb *req, bool *locked) 146329061d3SJens Axboe { 147329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 148329061d3SJens Axboe int v, ret; 149329061d3SJens Axboe 150329061d3SJens Axboe /* req->task == current here, checking PF_EXITING is safe */ 151329061d3SJens Axboe if (unlikely(req->task->flags & PF_EXITING)) 152329061d3SJens Axboe return -ECANCELED; 153329061d3SJens Axboe 154329061d3SJens Axboe do { 155329061d3SJens Axboe v = atomic_read(&req->poll_refs); 156329061d3SJens Axboe 157329061d3SJens Axboe /* tw handler should be the owner, and so have some references */ 158329061d3SJens Axboe if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) 159329061d3SJens Axboe return 0; 160329061d3SJens Axboe if (v & IO_POLL_CANCEL_FLAG) 161329061d3SJens Axboe return -ECANCELED; 162329061d3SJens Axboe 163329061d3SJens Axboe if (!req->cqe.res) { 164329061d3SJens Axboe struct poll_table_struct pt = { ._key = req->apoll_events }; 165329061d3SJens Axboe req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; 166329061d3SJens Axboe } 167329061d3SJens Axboe 168329061d3SJens Axboe if ((unlikely(!req->cqe.res))) 169329061d3SJens Axboe continue; 170329061d3SJens Axboe if (req->apoll_events & EPOLLONESHOT) 171329061d3SJens Axboe return 0; 172329061d3SJens Axboe 173329061d3SJens Axboe /* multishot, just fill a CQE and proceed */ 174329061d3SJens Axboe if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 175329061d3SJens Axboe __poll_t mask = mangle_poll(req->cqe.res & 176329061d3SJens Axboe req->apoll_events); 177329061d3SJens Axboe bool filled; 178329061d3SJens Axboe 179329061d3SJens Axboe spin_lock(&ctx->completion_lock); 180329061d3SJens Axboe filled = io_fill_cqe_aux(ctx, req->cqe.user_data, 181329061d3SJens Axboe mask, IORING_CQE_F_MORE); 182329061d3SJens Axboe io_commit_cqring(ctx); 183329061d3SJens Axboe spin_unlock(&ctx->completion_lock); 184329061d3SJens Axboe if (filled) { 185329061d3SJens Axboe io_cqring_ev_posted(ctx); 186329061d3SJens Axboe continue; 187329061d3SJens Axboe } 188329061d3SJens Axboe return -ECANCELED; 189329061d3SJens Axboe } 190329061d3SJens Axboe 191329061d3SJens Axboe ret = io_poll_issue(req, locked); 192329061d3SJens Axboe if (ret) 193329061d3SJens Axboe return ret; 194329061d3SJens Axboe 195329061d3SJens Axboe /* 196329061d3SJens Axboe * Release all references, retry if someone tried to restart 197329061d3SJens Axboe * task_work while we were executing it. 198329061d3SJens Axboe */ 199329061d3SJens Axboe } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); 200329061d3SJens Axboe 201329061d3SJens Axboe return 1; 202329061d3SJens Axboe } 203329061d3SJens Axboe 204329061d3SJens Axboe static void io_poll_task_func(struct io_kiocb *req, bool *locked) 205329061d3SJens Axboe { 206329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 207329061d3SJens Axboe int ret; 208329061d3SJens Axboe 209329061d3SJens Axboe ret = io_poll_check_events(req, locked); 210329061d3SJens Axboe if (ret > 0) 211329061d3SJens Axboe return; 212329061d3SJens Axboe 213329061d3SJens Axboe if (!ret) { 214329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(req); 215329061d3SJens Axboe 216329061d3SJens Axboe req->cqe.res = mangle_poll(req->cqe.res & poll->events); 217329061d3SJens Axboe } else { 218329061d3SJens Axboe req->cqe.res = ret; 219329061d3SJens Axboe req_set_fail(req); 220329061d3SJens Axboe } 221329061d3SJens Axboe 222329061d3SJens Axboe io_poll_remove_entries(req); 223329061d3SJens Axboe spin_lock(&ctx->completion_lock); 224329061d3SJens Axboe hash_del(&req->hash_node); 225329061d3SJens Axboe req->cqe.flags = 0; 226329061d3SJens Axboe __io_req_complete_post(req); 227329061d3SJens Axboe io_commit_cqring(ctx); 228329061d3SJens Axboe spin_unlock(&ctx->completion_lock); 229329061d3SJens Axboe io_cqring_ev_posted(ctx); 230329061d3SJens Axboe } 231329061d3SJens Axboe 232329061d3SJens Axboe static void io_apoll_task_func(struct io_kiocb *req, bool *locked) 233329061d3SJens Axboe { 234329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 235329061d3SJens Axboe int ret; 236329061d3SJens Axboe 237329061d3SJens Axboe ret = io_poll_check_events(req, locked); 238329061d3SJens Axboe if (ret > 0) 239329061d3SJens Axboe return; 240329061d3SJens Axboe 241329061d3SJens Axboe io_poll_remove_entries(req); 242329061d3SJens Axboe spin_lock(&ctx->completion_lock); 243329061d3SJens Axboe hash_del(&req->hash_node); 244329061d3SJens Axboe spin_unlock(&ctx->completion_lock); 245329061d3SJens Axboe 246329061d3SJens Axboe if (!ret) 247329061d3SJens Axboe io_req_task_submit(req, locked); 248329061d3SJens Axboe else 249329061d3SJens Axboe io_req_complete_failed(req, ret); 250329061d3SJens Axboe } 251329061d3SJens Axboe 252329061d3SJens Axboe static void __io_poll_execute(struct io_kiocb *req, int mask, 253329061d3SJens Axboe __poll_t __maybe_unused events) 254329061d3SJens Axboe { 255329061d3SJens Axboe io_req_set_res(req, mask, 0); 256329061d3SJens Axboe /* 257329061d3SJens Axboe * This is useful for poll that is armed on behalf of another 258329061d3SJens Axboe * request, and where the wakeup path could be on a different 259329061d3SJens Axboe * CPU. We want to avoid pulling in req->apoll->events for that 260329061d3SJens Axboe * case. 261329061d3SJens Axboe */ 262329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 263329061d3SJens Axboe req->io_task_work.func = io_poll_task_func; 264329061d3SJens Axboe else 265329061d3SJens Axboe req->io_task_work.func = io_apoll_task_func; 266329061d3SJens Axboe 267329061d3SJens Axboe trace_io_uring_task_add(req->ctx, req, req->cqe.user_data, req->opcode, mask); 268329061d3SJens Axboe io_req_task_work_add(req); 269329061d3SJens Axboe } 270329061d3SJens Axboe 271329061d3SJens Axboe static inline void io_poll_execute(struct io_kiocb *req, int res, 272329061d3SJens Axboe __poll_t events) 273329061d3SJens Axboe { 274329061d3SJens Axboe if (io_poll_get_ownership(req)) 275329061d3SJens Axboe __io_poll_execute(req, res, events); 276329061d3SJens Axboe } 277329061d3SJens Axboe 278329061d3SJens Axboe static void io_poll_cancel_req(struct io_kiocb *req) 279329061d3SJens Axboe { 280329061d3SJens Axboe io_poll_mark_cancelled(req); 281329061d3SJens Axboe /* kick tw, which should complete the request */ 282329061d3SJens Axboe io_poll_execute(req, 0, 0); 283329061d3SJens Axboe } 284329061d3SJens Axboe 285329061d3SJens Axboe #define wqe_to_req(wait) ((void *)((unsigned long) (wait)->private & ~1)) 286329061d3SJens Axboe #define wqe_is_double(wait) ((unsigned long) (wait)->private & 1) 287329061d3SJens Axboe #define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI) 288329061d3SJens Axboe 289329061d3SJens Axboe static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 290329061d3SJens Axboe void *key) 291329061d3SJens Axboe { 292329061d3SJens Axboe struct io_kiocb *req = wqe_to_req(wait); 293329061d3SJens Axboe struct io_poll *poll = container_of(wait, struct io_poll, wait); 294329061d3SJens Axboe __poll_t mask = key_to_poll(key); 295329061d3SJens Axboe 296329061d3SJens Axboe if (unlikely(mask & POLLFREE)) { 297329061d3SJens Axboe io_poll_mark_cancelled(req); 298329061d3SJens Axboe /* we have to kick tw in case it's not already */ 299329061d3SJens Axboe io_poll_execute(req, 0, poll->events); 300329061d3SJens Axboe 301329061d3SJens Axboe /* 302329061d3SJens Axboe * If the waitqueue is being freed early but someone is already 303329061d3SJens Axboe * holds ownership over it, we have to tear down the request as 304329061d3SJens Axboe * best we can. That means immediately removing the request from 305329061d3SJens Axboe * its waitqueue and preventing all further accesses to the 306329061d3SJens Axboe * waitqueue via the request. 307329061d3SJens Axboe */ 308329061d3SJens Axboe list_del_init(&poll->wait.entry); 309329061d3SJens Axboe 310329061d3SJens Axboe /* 311329061d3SJens Axboe * Careful: this *must* be the last step, since as soon 312329061d3SJens Axboe * as req->head is NULL'ed out, the request can be 313329061d3SJens Axboe * completed and freed, since aio_poll_complete_work() 314329061d3SJens Axboe * will no longer need to take the waitqueue lock. 315329061d3SJens Axboe */ 316329061d3SJens Axboe smp_store_release(&poll->head, NULL); 317329061d3SJens Axboe return 1; 318329061d3SJens Axboe } 319329061d3SJens Axboe 320329061d3SJens Axboe /* for instances that support it check for an event match first */ 321329061d3SJens Axboe if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON))) 322329061d3SJens Axboe return 0; 323329061d3SJens Axboe 324329061d3SJens Axboe if (io_poll_get_ownership(req)) { 325329061d3SJens Axboe /* optional, saves extra locking for removal in tw handler */ 326329061d3SJens Axboe if (mask && poll->events & EPOLLONESHOT) { 327329061d3SJens Axboe list_del_init(&poll->wait.entry); 328329061d3SJens Axboe poll->head = NULL; 329329061d3SJens Axboe if (wqe_is_double(wait)) 330329061d3SJens Axboe req->flags &= ~REQ_F_DOUBLE_POLL; 331329061d3SJens Axboe else 332329061d3SJens Axboe req->flags &= ~REQ_F_SINGLE_POLL; 333329061d3SJens Axboe } 334329061d3SJens Axboe __io_poll_execute(req, mask, poll->events); 335329061d3SJens Axboe } 336329061d3SJens Axboe return 1; 337329061d3SJens Axboe } 338329061d3SJens Axboe 339329061d3SJens Axboe static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, 340329061d3SJens Axboe struct wait_queue_head *head, 341329061d3SJens Axboe struct io_poll **poll_ptr) 342329061d3SJens Axboe { 343329061d3SJens Axboe struct io_kiocb *req = pt->req; 344329061d3SJens Axboe unsigned long wqe_private = (unsigned long) req; 345329061d3SJens Axboe 346329061d3SJens Axboe /* 347329061d3SJens Axboe * The file being polled uses multiple waitqueues for poll handling 348329061d3SJens Axboe * (e.g. one for read, one for write). Setup a separate io_poll 349329061d3SJens Axboe * if this happens. 350329061d3SJens Axboe */ 351329061d3SJens Axboe if (unlikely(pt->nr_entries)) { 352329061d3SJens Axboe struct io_poll *first = poll; 353329061d3SJens Axboe 354329061d3SJens Axboe /* double add on the same waitqueue head, ignore */ 355329061d3SJens Axboe if (first->head == head) 356329061d3SJens Axboe return; 357329061d3SJens Axboe /* already have a 2nd entry, fail a third attempt */ 358329061d3SJens Axboe if (*poll_ptr) { 359329061d3SJens Axboe if ((*poll_ptr)->head == head) 360329061d3SJens Axboe return; 361329061d3SJens Axboe pt->error = -EINVAL; 362329061d3SJens Axboe return; 363329061d3SJens Axboe } 364329061d3SJens Axboe 365329061d3SJens Axboe poll = kmalloc(sizeof(*poll), GFP_ATOMIC); 366329061d3SJens Axboe if (!poll) { 367329061d3SJens Axboe pt->error = -ENOMEM; 368329061d3SJens Axboe return; 369329061d3SJens Axboe } 370329061d3SJens Axboe /* mark as double wq entry */ 371329061d3SJens Axboe wqe_private |= 1; 372329061d3SJens Axboe req->flags |= REQ_F_DOUBLE_POLL; 373329061d3SJens Axboe io_init_poll_iocb(poll, first->events, first->wait.func); 374329061d3SJens Axboe *poll_ptr = poll; 375329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 376329061d3SJens Axboe req->flags |= REQ_F_ASYNC_DATA; 377329061d3SJens Axboe } 378329061d3SJens Axboe 379329061d3SJens Axboe req->flags |= REQ_F_SINGLE_POLL; 380329061d3SJens Axboe pt->nr_entries++; 381329061d3SJens Axboe poll->head = head; 382329061d3SJens Axboe poll->wait.private = (void *) wqe_private; 383329061d3SJens Axboe 384329061d3SJens Axboe if (poll->events & EPOLLEXCLUSIVE) 385329061d3SJens Axboe add_wait_queue_exclusive(head, &poll->wait); 386329061d3SJens Axboe else 387329061d3SJens Axboe add_wait_queue(head, &poll->wait); 388329061d3SJens Axboe } 389329061d3SJens Axboe 390329061d3SJens Axboe static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, 391329061d3SJens Axboe struct poll_table_struct *p) 392329061d3SJens Axboe { 393329061d3SJens Axboe struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 394329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(pt->req); 395329061d3SJens Axboe 396329061d3SJens Axboe __io_queue_proc(poll, pt, head, 397329061d3SJens Axboe (struct io_poll **) &pt->req->async_data); 398329061d3SJens Axboe } 399329061d3SJens Axboe 400329061d3SJens Axboe static int __io_arm_poll_handler(struct io_kiocb *req, 401329061d3SJens Axboe struct io_poll *poll, 402329061d3SJens Axboe struct io_poll_table *ipt, __poll_t mask) 403329061d3SJens Axboe { 404329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 405329061d3SJens Axboe int v; 406329061d3SJens Axboe 407329061d3SJens Axboe INIT_HLIST_NODE(&req->hash_node); 408329061d3SJens Axboe req->work.cancel_seq = atomic_read(&ctx->cancel_seq); 409329061d3SJens Axboe io_init_poll_iocb(poll, mask, io_poll_wake); 410329061d3SJens Axboe poll->file = req->file; 411329061d3SJens Axboe 412329061d3SJens Axboe req->apoll_events = poll->events; 413329061d3SJens Axboe 414329061d3SJens Axboe ipt->pt._key = mask; 415329061d3SJens Axboe ipt->req = req; 416329061d3SJens Axboe ipt->error = 0; 417329061d3SJens Axboe ipt->nr_entries = 0; 418329061d3SJens Axboe 419329061d3SJens Axboe /* 420329061d3SJens Axboe * Take the ownership to delay any tw execution up until we're done 421329061d3SJens Axboe * with poll arming. see io_poll_get_ownership(). 422329061d3SJens Axboe */ 423329061d3SJens Axboe atomic_set(&req->poll_refs, 1); 424329061d3SJens Axboe mask = vfs_poll(req->file, &ipt->pt) & poll->events; 425329061d3SJens Axboe 426329061d3SJens Axboe if (mask && (poll->events & EPOLLONESHOT)) { 427329061d3SJens Axboe io_poll_remove_entries(req); 428329061d3SJens Axboe /* no one else has access to the req, forget about the ref */ 429329061d3SJens Axboe return mask; 430329061d3SJens Axboe } 431329061d3SJens Axboe if (!mask && unlikely(ipt->error || !ipt->nr_entries)) { 432329061d3SJens Axboe io_poll_remove_entries(req); 433329061d3SJens Axboe if (!ipt->error) 434329061d3SJens Axboe ipt->error = -EINVAL; 435329061d3SJens Axboe return 0; 436329061d3SJens Axboe } 437329061d3SJens Axboe 438329061d3SJens Axboe spin_lock(&ctx->completion_lock); 439329061d3SJens Axboe io_poll_req_insert(req); 440329061d3SJens Axboe spin_unlock(&ctx->completion_lock); 441329061d3SJens Axboe 442329061d3SJens Axboe if (mask) { 443329061d3SJens Axboe /* can't multishot if failed, just queue the event we've got */ 444329061d3SJens Axboe if (unlikely(ipt->error || !ipt->nr_entries)) { 445329061d3SJens Axboe poll->events |= EPOLLONESHOT; 446329061d3SJens Axboe req->apoll_events |= EPOLLONESHOT; 447329061d3SJens Axboe ipt->error = 0; 448329061d3SJens Axboe } 449329061d3SJens Axboe __io_poll_execute(req, mask, poll->events); 450329061d3SJens Axboe return 0; 451329061d3SJens Axboe } 452329061d3SJens Axboe 453329061d3SJens Axboe /* 454329061d3SJens Axboe * Release ownership. If someone tried to queue a tw while it was 455329061d3SJens Axboe * locked, kick it off for them. 456329061d3SJens Axboe */ 457329061d3SJens Axboe v = atomic_dec_return(&req->poll_refs); 458329061d3SJens Axboe if (unlikely(v & IO_POLL_REF_MASK)) 459329061d3SJens Axboe __io_poll_execute(req, 0, poll->events); 460329061d3SJens Axboe return 0; 461329061d3SJens Axboe } 462329061d3SJens Axboe 463329061d3SJens Axboe static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, 464329061d3SJens Axboe struct poll_table_struct *p) 465329061d3SJens Axboe { 466329061d3SJens Axboe struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 467329061d3SJens Axboe struct async_poll *apoll = pt->req->apoll; 468329061d3SJens Axboe 469329061d3SJens Axboe __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); 470329061d3SJens Axboe } 471329061d3SJens Axboe 472329061d3SJens Axboe int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) 473329061d3SJens Axboe { 474329061d3SJens Axboe const struct io_op_def *def = &io_op_defs[req->opcode]; 475329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 476329061d3SJens Axboe struct async_poll *apoll; 477329061d3SJens Axboe struct io_poll_table ipt; 478329061d3SJens Axboe __poll_t mask = POLLPRI | POLLERR; 479329061d3SJens Axboe int ret; 480329061d3SJens Axboe 481329061d3SJens Axboe if (!def->pollin && !def->pollout) 482329061d3SJens Axboe return IO_APOLL_ABORTED; 483329061d3SJens Axboe if (!file_can_poll(req->file)) 484329061d3SJens Axboe return IO_APOLL_ABORTED; 485329061d3SJens Axboe if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED) 486329061d3SJens Axboe return IO_APOLL_ABORTED; 487329061d3SJens Axboe if (!(req->flags & REQ_F_APOLL_MULTISHOT)) 488329061d3SJens Axboe mask |= EPOLLONESHOT; 489329061d3SJens Axboe 490329061d3SJens Axboe if (def->pollin) { 491329061d3SJens Axboe mask |= EPOLLIN | EPOLLRDNORM; 492329061d3SJens Axboe 493329061d3SJens Axboe /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ 494329061d3SJens Axboe if (req->flags & REQ_F_CLEAR_POLLIN) 495329061d3SJens Axboe mask &= ~EPOLLIN; 496329061d3SJens Axboe } else { 497329061d3SJens Axboe mask |= EPOLLOUT | EPOLLWRNORM; 498329061d3SJens Axboe } 499329061d3SJens Axboe if (def->poll_exclusive) 500329061d3SJens Axboe mask |= EPOLLEXCLUSIVE; 501329061d3SJens Axboe if (req->flags & REQ_F_POLLED) { 502329061d3SJens Axboe apoll = req->apoll; 503329061d3SJens Axboe kfree(apoll->double_poll); 504329061d3SJens Axboe } else if (!(issue_flags & IO_URING_F_UNLOCKED) && 505329061d3SJens Axboe !list_empty(&ctx->apoll_cache)) { 506329061d3SJens Axboe apoll = list_first_entry(&ctx->apoll_cache, struct async_poll, 507329061d3SJens Axboe poll.wait.entry); 508329061d3SJens Axboe list_del_init(&apoll->poll.wait.entry); 509329061d3SJens Axboe } else { 510329061d3SJens Axboe apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); 511329061d3SJens Axboe if (unlikely(!apoll)) 512329061d3SJens Axboe return IO_APOLL_ABORTED; 513329061d3SJens Axboe } 514329061d3SJens Axboe apoll->double_poll = NULL; 515329061d3SJens Axboe req->apoll = apoll; 516329061d3SJens Axboe req->flags |= REQ_F_POLLED; 517329061d3SJens Axboe ipt.pt._qproc = io_async_queue_proc; 518329061d3SJens Axboe 519329061d3SJens Axboe io_kbuf_recycle(req, issue_flags); 520329061d3SJens Axboe 521329061d3SJens Axboe ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask); 522329061d3SJens Axboe if (ret || ipt.error) 523329061d3SJens Axboe return ret ? IO_APOLL_READY : IO_APOLL_ABORTED; 524329061d3SJens Axboe 525329061d3SJens Axboe trace_io_uring_poll_arm(ctx, req, req->cqe.user_data, req->opcode, 526329061d3SJens Axboe mask, apoll->poll.events); 527329061d3SJens Axboe return IO_APOLL_OK; 528329061d3SJens Axboe } 529329061d3SJens Axboe 530329061d3SJens Axboe /* 531329061d3SJens Axboe * Returns true if we found and killed one or more poll requests 532329061d3SJens Axboe */ 533329061d3SJens Axboe __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, 534329061d3SJens Axboe bool cancel_all) 535329061d3SJens Axboe { 536329061d3SJens Axboe struct hlist_node *tmp; 537329061d3SJens Axboe struct io_kiocb *req; 538329061d3SJens Axboe bool found = false; 539329061d3SJens Axboe int i; 540329061d3SJens Axboe 541329061d3SJens Axboe spin_lock(&ctx->completion_lock); 542329061d3SJens Axboe for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { 543329061d3SJens Axboe struct hlist_head *list; 544329061d3SJens Axboe 545329061d3SJens Axboe list = &ctx->cancel_hash[i]; 546329061d3SJens Axboe hlist_for_each_entry_safe(req, tmp, list, hash_node) { 547329061d3SJens Axboe if (io_match_task_safe(req, tsk, cancel_all)) { 548329061d3SJens Axboe hlist_del_init(&req->hash_node); 549329061d3SJens Axboe io_poll_cancel_req(req); 550329061d3SJens Axboe found = true; 551329061d3SJens Axboe } 552329061d3SJens Axboe } 553329061d3SJens Axboe } 554329061d3SJens Axboe spin_unlock(&ctx->completion_lock); 555329061d3SJens Axboe return found; 556329061d3SJens Axboe } 557329061d3SJens Axboe 558329061d3SJens Axboe static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, 559329061d3SJens Axboe struct io_cancel_data *cd) 560329061d3SJens Axboe __must_hold(&ctx->completion_lock) 561329061d3SJens Axboe { 562329061d3SJens Axboe struct hlist_head *list; 563329061d3SJens Axboe struct io_kiocb *req; 564329061d3SJens Axboe 565329061d3SJens Axboe list = &ctx->cancel_hash[hash_long(cd->data, ctx->cancel_hash_bits)]; 566329061d3SJens Axboe hlist_for_each_entry(req, list, hash_node) { 567329061d3SJens Axboe if (cd->data != req->cqe.user_data) 568329061d3SJens Axboe continue; 569329061d3SJens Axboe if (poll_only && req->opcode != IORING_OP_POLL_ADD) 570329061d3SJens Axboe continue; 571329061d3SJens Axboe if (cd->flags & IORING_ASYNC_CANCEL_ALL) { 572329061d3SJens Axboe if (cd->seq == req->work.cancel_seq) 573329061d3SJens Axboe continue; 574329061d3SJens Axboe req->work.cancel_seq = cd->seq; 575329061d3SJens Axboe } 576329061d3SJens Axboe return req; 577329061d3SJens Axboe } 578329061d3SJens Axboe return NULL; 579329061d3SJens Axboe } 580329061d3SJens Axboe 581329061d3SJens Axboe static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, 582329061d3SJens Axboe struct io_cancel_data *cd) 583329061d3SJens Axboe __must_hold(&ctx->completion_lock) 584329061d3SJens Axboe { 585329061d3SJens Axboe struct io_kiocb *req; 586329061d3SJens Axboe int i; 587329061d3SJens Axboe 588329061d3SJens Axboe for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { 589329061d3SJens Axboe struct hlist_head *list; 590329061d3SJens Axboe 591329061d3SJens Axboe list = &ctx->cancel_hash[i]; 592329061d3SJens Axboe hlist_for_each_entry(req, list, hash_node) { 593329061d3SJens Axboe if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && 594329061d3SJens Axboe req->file != cd->file) 595329061d3SJens Axboe continue; 596329061d3SJens Axboe if (cd->seq == req->work.cancel_seq) 597329061d3SJens Axboe continue; 598329061d3SJens Axboe req->work.cancel_seq = cd->seq; 599329061d3SJens Axboe return req; 600329061d3SJens Axboe } 601329061d3SJens Axboe } 602329061d3SJens Axboe return NULL; 603329061d3SJens Axboe } 604329061d3SJens Axboe 605329061d3SJens Axboe static bool io_poll_disarm(struct io_kiocb *req) 606329061d3SJens Axboe __must_hold(&ctx->completion_lock) 607329061d3SJens Axboe { 608329061d3SJens Axboe if (!io_poll_get_ownership(req)) 609329061d3SJens Axboe return false; 610329061d3SJens Axboe io_poll_remove_entries(req); 611329061d3SJens Axboe hash_del(&req->hash_node); 612329061d3SJens Axboe return true; 613329061d3SJens Axboe } 614329061d3SJens Axboe 615329061d3SJens Axboe int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) 616329061d3SJens Axboe __must_hold(&ctx->completion_lock) 617329061d3SJens Axboe { 618329061d3SJens Axboe struct io_kiocb *req; 619329061d3SJens Axboe 620329061d3SJens Axboe if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY)) 621329061d3SJens Axboe req = io_poll_file_find(ctx, cd); 622329061d3SJens Axboe else 623329061d3SJens Axboe req = io_poll_find(ctx, false, cd); 624329061d3SJens Axboe if (!req) 625329061d3SJens Axboe return -ENOENT; 626329061d3SJens Axboe io_poll_cancel_req(req); 627329061d3SJens Axboe return 0; 628329061d3SJens Axboe } 629329061d3SJens Axboe 630329061d3SJens Axboe static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, 631329061d3SJens Axboe unsigned int flags) 632329061d3SJens Axboe { 633329061d3SJens Axboe u32 events; 634329061d3SJens Axboe 635329061d3SJens Axboe events = READ_ONCE(sqe->poll32_events); 636329061d3SJens Axboe #ifdef __BIG_ENDIAN 637329061d3SJens Axboe events = swahw32(events); 638329061d3SJens Axboe #endif 639329061d3SJens Axboe if (!(flags & IORING_POLL_ADD_MULTI)) 640329061d3SJens Axboe events |= EPOLLONESHOT; 641329061d3SJens Axboe return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT)); 642329061d3SJens Axboe } 643329061d3SJens Axboe 644329061d3SJens Axboe int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 645329061d3SJens Axboe { 646329061d3SJens Axboe struct io_poll_update *upd = io_kiocb_to_cmd(req); 647329061d3SJens Axboe u32 flags; 648329061d3SJens Axboe 649329061d3SJens Axboe if (sqe->buf_index || sqe->splice_fd_in) 650329061d3SJens Axboe return -EINVAL; 651329061d3SJens Axboe flags = READ_ONCE(sqe->len); 652329061d3SJens Axboe if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA | 653329061d3SJens Axboe IORING_POLL_ADD_MULTI)) 654329061d3SJens Axboe return -EINVAL; 655329061d3SJens Axboe /* meaningless without update */ 656329061d3SJens Axboe if (flags == IORING_POLL_ADD_MULTI) 657329061d3SJens Axboe return -EINVAL; 658329061d3SJens Axboe 659329061d3SJens Axboe upd->old_user_data = READ_ONCE(sqe->addr); 660329061d3SJens Axboe upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; 661329061d3SJens Axboe upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA; 662329061d3SJens Axboe 663329061d3SJens Axboe upd->new_user_data = READ_ONCE(sqe->off); 664329061d3SJens Axboe if (!upd->update_user_data && upd->new_user_data) 665329061d3SJens Axboe return -EINVAL; 666329061d3SJens Axboe if (upd->update_events) 667329061d3SJens Axboe upd->events = io_poll_parse_events(sqe, flags); 668329061d3SJens Axboe else if (sqe->poll32_events) 669329061d3SJens Axboe return -EINVAL; 670329061d3SJens Axboe 671329061d3SJens Axboe return 0; 672329061d3SJens Axboe } 673329061d3SJens Axboe 674329061d3SJens Axboe int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 675329061d3SJens Axboe { 676329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(req); 677329061d3SJens Axboe u32 flags; 678329061d3SJens Axboe 679329061d3SJens Axboe if (sqe->buf_index || sqe->off || sqe->addr) 680329061d3SJens Axboe return -EINVAL; 681329061d3SJens Axboe flags = READ_ONCE(sqe->len); 682329061d3SJens Axboe if (flags & ~IORING_POLL_ADD_MULTI) 683329061d3SJens Axboe return -EINVAL; 684329061d3SJens Axboe if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP)) 685329061d3SJens Axboe return -EINVAL; 686329061d3SJens Axboe 687329061d3SJens Axboe io_req_set_refcount(req); 688329061d3SJens Axboe poll->events = io_poll_parse_events(sqe, flags); 689329061d3SJens Axboe return 0; 690329061d3SJens Axboe } 691329061d3SJens Axboe 692329061d3SJens Axboe int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) 693329061d3SJens Axboe { 694329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(req); 695329061d3SJens Axboe struct io_poll_table ipt; 696329061d3SJens Axboe int ret; 697329061d3SJens Axboe 698329061d3SJens Axboe ipt.pt._qproc = io_poll_queue_proc; 699329061d3SJens Axboe 700329061d3SJens Axboe ret = __io_arm_poll_handler(req, poll, &ipt, poll->events); 701329061d3SJens Axboe if (ret) { 702329061d3SJens Axboe io_req_set_res(req, ret, 0); 703329061d3SJens Axboe return IOU_OK; 704329061d3SJens Axboe } 705329061d3SJens Axboe if (ipt.error) { 706329061d3SJens Axboe req_set_fail(req); 707329061d3SJens Axboe return ipt.error; 708329061d3SJens Axboe } 709329061d3SJens Axboe 710329061d3SJens Axboe return IOU_ISSUE_SKIP_COMPLETE; 711329061d3SJens Axboe } 712329061d3SJens Axboe 713329061d3SJens Axboe int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) 714329061d3SJens Axboe { 715329061d3SJens Axboe struct io_poll_update *poll_update = io_kiocb_to_cmd(req); 716329061d3SJens Axboe struct io_cancel_data cd = { .data = poll_update->old_user_data, }; 717329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 718329061d3SJens Axboe struct io_kiocb *preq; 719329061d3SJens Axboe int ret2, ret = 0; 720329061d3SJens Axboe bool locked; 721329061d3SJens Axboe 722329061d3SJens Axboe spin_lock(&ctx->completion_lock); 723329061d3SJens Axboe preq = io_poll_find(ctx, true, &cd); 724329061d3SJens Axboe if (!preq || !io_poll_disarm(preq)) { 725329061d3SJens Axboe spin_unlock(&ctx->completion_lock); 726329061d3SJens Axboe ret = preq ? -EALREADY : -ENOENT; 727329061d3SJens Axboe goto out; 728329061d3SJens Axboe } 729329061d3SJens Axboe spin_unlock(&ctx->completion_lock); 730329061d3SJens Axboe 731329061d3SJens Axboe if (poll_update->update_events || poll_update->update_user_data) { 732329061d3SJens Axboe /* only mask one event flags, keep behavior flags */ 733329061d3SJens Axboe if (poll_update->update_events) { 734329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(preq); 735329061d3SJens Axboe 736329061d3SJens Axboe poll->events &= ~0xffff; 737329061d3SJens Axboe poll->events |= poll_update->events & 0xffff; 738329061d3SJens Axboe poll->events |= IO_POLL_UNMASK; 739329061d3SJens Axboe } 740329061d3SJens Axboe if (poll_update->update_user_data) 741329061d3SJens Axboe preq->cqe.user_data = poll_update->new_user_data; 742329061d3SJens Axboe 743329061d3SJens Axboe ret2 = io_poll_add(preq, issue_flags); 744329061d3SJens Axboe /* successfully updated, don't complete poll request */ 745329061d3SJens Axboe if (!ret2 || ret2 == -EIOCBQUEUED) 746329061d3SJens Axboe goto out; 747329061d3SJens Axboe } 748329061d3SJens Axboe 749329061d3SJens Axboe req_set_fail(preq); 750329061d3SJens Axboe io_req_set_res(preq, -ECANCELED, 0); 751329061d3SJens Axboe locked = !(issue_flags & IO_URING_F_UNLOCKED); 752329061d3SJens Axboe io_req_task_complete(preq, &locked); 753329061d3SJens Axboe out: 754329061d3SJens Axboe if (ret < 0) { 755329061d3SJens Axboe req_set_fail(req); 756329061d3SJens Axboe return ret; 757329061d3SJens Axboe } 758329061d3SJens Axboe /* complete update request, we're done with it */ 759329061d3SJens Axboe io_req_set_res(req, ret, 0); 760329061d3SJens Axboe return IOU_OK; 761329061d3SJens Axboe } 762