1329061d3SJens Axboe // SPDX-License-Identifier: GPL-2.0 2329061d3SJens Axboe #include <linux/kernel.h> 3329061d3SJens Axboe #include <linux/errno.h> 4329061d3SJens Axboe #include <linux/fs.h> 5329061d3SJens Axboe #include <linux/file.h> 6329061d3SJens Axboe #include <linux/mm.h> 7329061d3SJens Axboe #include <linux/slab.h> 8329061d3SJens Axboe #include <linux/poll.h> 9329061d3SJens Axboe #include <linux/hashtable.h> 10329061d3SJens Axboe #include <linux/io_uring.h> 11329061d3SJens Axboe 12329061d3SJens Axboe #include <trace/events/io_uring.h> 13329061d3SJens Axboe 14329061d3SJens Axboe #include <uapi/linux/io_uring.h> 15329061d3SJens Axboe 16329061d3SJens Axboe #include "io_uring_types.h" 17329061d3SJens Axboe #include "io_uring.h" 18329061d3SJens Axboe #include "refs.h" 19329061d3SJens Axboe #include "opdef.h" 203b77495aSJens Axboe #include "kbuf.h" 21329061d3SJens Axboe #include "poll.h" 2238513c46SHao Xu #include "cancel.h" 23329061d3SJens Axboe 24329061d3SJens Axboe struct io_poll_update { 25329061d3SJens Axboe struct file *file; 26329061d3SJens Axboe u64 old_user_data; 27329061d3SJens Axboe u64 new_user_data; 28329061d3SJens Axboe __poll_t events; 29329061d3SJens Axboe bool update_events; 30329061d3SJens Axboe bool update_user_data; 31329061d3SJens Axboe }; 32329061d3SJens Axboe 33329061d3SJens Axboe struct io_poll_table { 34329061d3SJens Axboe struct poll_table_struct pt; 35329061d3SJens Axboe struct io_kiocb *req; 36329061d3SJens Axboe int nr_entries; 37329061d3SJens Axboe int error; 38329061d3SJens Axboe }; 39329061d3SJens Axboe 40329061d3SJens Axboe #define IO_POLL_CANCEL_FLAG BIT(31) 41329061d3SJens Axboe #define IO_POLL_REF_MASK GENMASK(30, 0) 42329061d3SJens Axboe 43329061d3SJens Axboe /* 44329061d3SJens Axboe * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can 45329061d3SJens Axboe * bump it and acquire ownership. It's disallowed to modify requests while not 46329061d3SJens Axboe * owning it, that prevents from races for enqueueing task_work's and b/w 47329061d3SJens Axboe * arming poll and wakeups. 48329061d3SJens Axboe */ 49329061d3SJens Axboe static inline bool io_poll_get_ownership(struct io_kiocb *req) 50329061d3SJens Axboe { 51329061d3SJens Axboe return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); 52329061d3SJens Axboe } 53329061d3SJens Axboe 54329061d3SJens Axboe static void io_poll_mark_cancelled(struct io_kiocb *req) 55329061d3SJens Axboe { 56329061d3SJens Axboe atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); 57329061d3SJens Axboe } 58329061d3SJens Axboe 59329061d3SJens Axboe static struct io_poll *io_poll_get_double(struct io_kiocb *req) 60329061d3SJens Axboe { 61329061d3SJens Axboe /* pure poll stashes this in ->async_data, poll driven retry elsewhere */ 62329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 63329061d3SJens Axboe return req->async_data; 64329061d3SJens Axboe return req->apoll->double_poll; 65329061d3SJens Axboe } 66329061d3SJens Axboe 67329061d3SJens Axboe static struct io_poll *io_poll_get_single(struct io_kiocb *req) 68329061d3SJens Axboe { 69329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 70329061d3SJens Axboe return io_kiocb_to_cmd(req); 71329061d3SJens Axboe return &req->apoll->poll; 72329061d3SJens Axboe } 73329061d3SJens Axboe 74329061d3SJens Axboe static void io_poll_req_insert(struct io_kiocb *req) 75329061d3SJens Axboe { 76329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 7738513c46SHao Xu u32 index = hash_long(req->cqe.user_data, ctx->cancel_hash_bits); 7838513c46SHao Xu struct io_hash_bucket *hb = &ctx->cancel_hash[index]; 79329061d3SJens Axboe 8038513c46SHao Xu spin_lock(&hb->lock); 8138513c46SHao Xu hlist_add_head(&req->hash_node, &hb->list); 8238513c46SHao Xu spin_unlock(&hb->lock); 8338513c46SHao Xu } 8438513c46SHao Xu 8538513c46SHao Xu static void io_poll_req_delete(struct io_kiocb *req, struct io_ring_ctx *ctx) 8638513c46SHao Xu { 8738513c46SHao Xu u32 index = hash_long(req->cqe.user_data, ctx->cancel_hash_bits); 8838513c46SHao Xu spinlock_t *lock = &ctx->cancel_hash[index].lock; 8938513c46SHao Xu 9038513c46SHao Xu spin_lock(lock); 9138513c46SHao Xu hash_del(&req->hash_node); 9238513c46SHao Xu spin_unlock(lock); 93329061d3SJens Axboe } 94329061d3SJens Axboe 95329061d3SJens Axboe static void io_init_poll_iocb(struct io_poll *poll, __poll_t events, 96329061d3SJens Axboe wait_queue_func_t wake_func) 97329061d3SJens Axboe { 98329061d3SJens Axboe poll->head = NULL; 99329061d3SJens Axboe #define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) 100329061d3SJens Axboe /* mask in events that we always want/need */ 101329061d3SJens Axboe poll->events = events | IO_POLL_UNMASK; 102329061d3SJens Axboe INIT_LIST_HEAD(&poll->wait.entry); 103329061d3SJens Axboe init_waitqueue_func_entry(&poll->wait, wake_func); 104329061d3SJens Axboe } 105329061d3SJens Axboe 106329061d3SJens Axboe static inline void io_poll_remove_entry(struct io_poll *poll) 107329061d3SJens Axboe { 108329061d3SJens Axboe struct wait_queue_head *head = smp_load_acquire(&poll->head); 109329061d3SJens Axboe 110329061d3SJens Axboe if (head) { 111329061d3SJens Axboe spin_lock_irq(&head->lock); 112329061d3SJens Axboe list_del_init(&poll->wait.entry); 113329061d3SJens Axboe poll->head = NULL; 114329061d3SJens Axboe spin_unlock_irq(&head->lock); 115329061d3SJens Axboe } 116329061d3SJens Axboe } 117329061d3SJens Axboe 118329061d3SJens Axboe static void io_poll_remove_entries(struct io_kiocb *req) 119329061d3SJens Axboe { 120329061d3SJens Axboe /* 121329061d3SJens Axboe * Nothing to do if neither of those flags are set. Avoid dipping 122329061d3SJens Axboe * into the poll/apoll/double cachelines if we can. 123329061d3SJens Axboe */ 124329061d3SJens Axboe if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL))) 125329061d3SJens Axboe return; 126329061d3SJens Axboe 127329061d3SJens Axboe /* 128329061d3SJens Axboe * While we hold the waitqueue lock and the waitqueue is nonempty, 129329061d3SJens Axboe * wake_up_pollfree() will wait for us. However, taking the waitqueue 130329061d3SJens Axboe * lock in the first place can race with the waitqueue being freed. 131329061d3SJens Axboe * 132329061d3SJens Axboe * We solve this as eventpoll does: by taking advantage of the fact that 133329061d3SJens Axboe * all users of wake_up_pollfree() will RCU-delay the actual free. If 134329061d3SJens Axboe * we enter rcu_read_lock() and see that the pointer to the queue is 135329061d3SJens Axboe * non-NULL, we can then lock it without the memory being freed out from 136329061d3SJens Axboe * under us. 137329061d3SJens Axboe * 138329061d3SJens Axboe * Keep holding rcu_read_lock() as long as we hold the queue lock, in 139329061d3SJens Axboe * case the caller deletes the entry from the queue, leaving it empty. 140329061d3SJens Axboe * In that case, only RCU prevents the queue memory from being freed. 141329061d3SJens Axboe */ 142329061d3SJens Axboe rcu_read_lock(); 143329061d3SJens Axboe if (req->flags & REQ_F_SINGLE_POLL) 144329061d3SJens Axboe io_poll_remove_entry(io_poll_get_single(req)); 145329061d3SJens Axboe if (req->flags & REQ_F_DOUBLE_POLL) 146329061d3SJens Axboe io_poll_remove_entry(io_poll_get_double(req)); 147329061d3SJens Axboe rcu_read_unlock(); 148329061d3SJens Axboe } 149329061d3SJens Axboe 150329061d3SJens Axboe /* 151329061d3SJens Axboe * All poll tw should go through this. Checks for poll events, manages 152329061d3SJens Axboe * references, does rewait, etc. 153329061d3SJens Axboe * 154329061d3SJens Axboe * Returns a negative error on failure. >0 when no action require, which is 155329061d3SJens Axboe * either spurious wakeup or multishot CQE is served. 0 when it's done with 156329061d3SJens Axboe * the request, then the mask is stored in req->cqe.res. 157329061d3SJens Axboe */ 158329061d3SJens Axboe static int io_poll_check_events(struct io_kiocb *req, bool *locked) 159329061d3SJens Axboe { 160329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 161329061d3SJens Axboe int v, ret; 162329061d3SJens Axboe 163329061d3SJens Axboe /* req->task == current here, checking PF_EXITING is safe */ 164329061d3SJens Axboe if (unlikely(req->task->flags & PF_EXITING)) 165329061d3SJens Axboe return -ECANCELED; 166329061d3SJens Axboe 167329061d3SJens Axboe do { 168329061d3SJens Axboe v = atomic_read(&req->poll_refs); 169329061d3SJens Axboe 170329061d3SJens Axboe /* tw handler should be the owner, and so have some references */ 171329061d3SJens Axboe if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) 172329061d3SJens Axboe return 0; 173329061d3SJens Axboe if (v & IO_POLL_CANCEL_FLAG) 174329061d3SJens Axboe return -ECANCELED; 175329061d3SJens Axboe 176329061d3SJens Axboe if (!req->cqe.res) { 177329061d3SJens Axboe struct poll_table_struct pt = { ._key = req->apoll_events }; 178329061d3SJens Axboe req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; 179329061d3SJens Axboe } 180329061d3SJens Axboe 181329061d3SJens Axboe if ((unlikely(!req->cqe.res))) 182329061d3SJens Axboe continue; 183329061d3SJens Axboe if (req->apoll_events & EPOLLONESHOT) 184329061d3SJens Axboe return 0; 185329061d3SJens Axboe 186329061d3SJens Axboe /* multishot, just fill a CQE and proceed */ 187329061d3SJens Axboe if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 188329061d3SJens Axboe __poll_t mask = mangle_poll(req->cqe.res & 189329061d3SJens Axboe req->apoll_events); 190329061d3SJens Axboe bool filled; 191329061d3SJens Axboe 192329061d3SJens Axboe spin_lock(&ctx->completion_lock); 193329061d3SJens Axboe filled = io_fill_cqe_aux(ctx, req->cqe.user_data, 194329061d3SJens Axboe mask, IORING_CQE_F_MORE); 195329061d3SJens Axboe io_commit_cqring(ctx); 196329061d3SJens Axboe spin_unlock(&ctx->completion_lock); 197329061d3SJens Axboe if (filled) { 198329061d3SJens Axboe io_cqring_ev_posted(ctx); 199329061d3SJens Axboe continue; 200329061d3SJens Axboe } 201329061d3SJens Axboe return -ECANCELED; 202329061d3SJens Axboe } 203329061d3SJens Axboe 204329061d3SJens Axboe ret = io_poll_issue(req, locked); 205329061d3SJens Axboe if (ret) 206329061d3SJens Axboe return ret; 207329061d3SJens Axboe 208329061d3SJens Axboe /* 209329061d3SJens Axboe * Release all references, retry if someone tried to restart 210329061d3SJens Axboe * task_work while we were executing it. 211329061d3SJens Axboe */ 212329061d3SJens Axboe } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); 213329061d3SJens Axboe 214329061d3SJens Axboe return 1; 215329061d3SJens Axboe } 216329061d3SJens Axboe 217329061d3SJens Axboe static void io_poll_task_func(struct io_kiocb *req, bool *locked) 218329061d3SJens Axboe { 219329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 220329061d3SJens Axboe int ret; 221329061d3SJens Axboe 222329061d3SJens Axboe ret = io_poll_check_events(req, locked); 223329061d3SJens Axboe if (ret > 0) 224329061d3SJens Axboe return; 225329061d3SJens Axboe 226329061d3SJens Axboe if (!ret) { 227329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(req); 228329061d3SJens Axboe 229329061d3SJens Axboe req->cqe.res = mangle_poll(req->cqe.res & poll->events); 230329061d3SJens Axboe } else { 231329061d3SJens Axboe req->cqe.res = ret; 232329061d3SJens Axboe req_set_fail(req); 233329061d3SJens Axboe } 234329061d3SJens Axboe 235329061d3SJens Axboe io_poll_remove_entries(req); 23638513c46SHao Xu io_poll_req_delete(req, ctx); 237*0ec6dca2SPavel Begunkov io_req_set_res(req, req->cqe.res, 0); 238*0ec6dca2SPavel Begunkov io_req_task_complete(req, locked); 239329061d3SJens Axboe } 240329061d3SJens Axboe 241329061d3SJens Axboe static void io_apoll_task_func(struct io_kiocb *req, bool *locked) 242329061d3SJens Axboe { 243329061d3SJens Axboe int ret; 244329061d3SJens Axboe 245329061d3SJens Axboe ret = io_poll_check_events(req, locked); 246329061d3SJens Axboe if (ret > 0) 247329061d3SJens Axboe return; 248329061d3SJens Axboe 249329061d3SJens Axboe io_poll_remove_entries(req); 25038513c46SHao Xu io_poll_req_delete(req, req->ctx); 251329061d3SJens Axboe 252329061d3SJens Axboe if (!ret) 253329061d3SJens Axboe io_req_task_submit(req, locked); 254329061d3SJens Axboe else 255329061d3SJens Axboe io_req_complete_failed(req, ret); 256329061d3SJens Axboe } 257329061d3SJens Axboe 258329061d3SJens Axboe static void __io_poll_execute(struct io_kiocb *req, int mask, 259329061d3SJens Axboe __poll_t __maybe_unused events) 260329061d3SJens Axboe { 261329061d3SJens Axboe io_req_set_res(req, mask, 0); 262329061d3SJens Axboe /* 263329061d3SJens Axboe * This is useful for poll that is armed on behalf of another 264329061d3SJens Axboe * request, and where the wakeup path could be on a different 265329061d3SJens Axboe * CPU. We want to avoid pulling in req->apoll->events for that 266329061d3SJens Axboe * case. 267329061d3SJens Axboe */ 268329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 269329061d3SJens Axboe req->io_task_work.func = io_poll_task_func; 270329061d3SJens Axboe else 271329061d3SJens Axboe req->io_task_work.func = io_apoll_task_func; 272329061d3SJens Axboe 273329061d3SJens Axboe trace_io_uring_task_add(req->ctx, req, req->cqe.user_data, req->opcode, mask); 274329061d3SJens Axboe io_req_task_work_add(req); 275329061d3SJens Axboe } 276329061d3SJens Axboe 277329061d3SJens Axboe static inline void io_poll_execute(struct io_kiocb *req, int res, 278329061d3SJens Axboe __poll_t events) 279329061d3SJens Axboe { 280329061d3SJens Axboe if (io_poll_get_ownership(req)) 281329061d3SJens Axboe __io_poll_execute(req, res, events); 282329061d3SJens Axboe } 283329061d3SJens Axboe 284329061d3SJens Axboe static void io_poll_cancel_req(struct io_kiocb *req) 285329061d3SJens Axboe { 286329061d3SJens Axboe io_poll_mark_cancelled(req); 287329061d3SJens Axboe /* kick tw, which should complete the request */ 288329061d3SJens Axboe io_poll_execute(req, 0, 0); 289329061d3SJens Axboe } 290329061d3SJens Axboe 291329061d3SJens Axboe #define wqe_to_req(wait) ((void *)((unsigned long) (wait)->private & ~1)) 292329061d3SJens Axboe #define wqe_is_double(wait) ((unsigned long) (wait)->private & 1) 293329061d3SJens Axboe #define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI) 294329061d3SJens Axboe 295329061d3SJens Axboe static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 296329061d3SJens Axboe void *key) 297329061d3SJens Axboe { 298329061d3SJens Axboe struct io_kiocb *req = wqe_to_req(wait); 299329061d3SJens Axboe struct io_poll *poll = container_of(wait, struct io_poll, wait); 300329061d3SJens Axboe __poll_t mask = key_to_poll(key); 301329061d3SJens Axboe 302329061d3SJens Axboe if (unlikely(mask & POLLFREE)) { 303329061d3SJens Axboe io_poll_mark_cancelled(req); 304329061d3SJens Axboe /* we have to kick tw in case it's not already */ 305329061d3SJens Axboe io_poll_execute(req, 0, poll->events); 306329061d3SJens Axboe 307329061d3SJens Axboe /* 308329061d3SJens Axboe * If the waitqueue is being freed early but someone is already 309329061d3SJens Axboe * holds ownership over it, we have to tear down the request as 310329061d3SJens Axboe * best we can. That means immediately removing the request from 311329061d3SJens Axboe * its waitqueue and preventing all further accesses to the 312329061d3SJens Axboe * waitqueue via the request. 313329061d3SJens Axboe */ 314329061d3SJens Axboe list_del_init(&poll->wait.entry); 315329061d3SJens Axboe 316329061d3SJens Axboe /* 317329061d3SJens Axboe * Careful: this *must* be the last step, since as soon 318329061d3SJens Axboe * as req->head is NULL'ed out, the request can be 319329061d3SJens Axboe * completed and freed, since aio_poll_complete_work() 320329061d3SJens Axboe * will no longer need to take the waitqueue lock. 321329061d3SJens Axboe */ 322329061d3SJens Axboe smp_store_release(&poll->head, NULL); 323329061d3SJens Axboe return 1; 324329061d3SJens Axboe } 325329061d3SJens Axboe 326329061d3SJens Axboe /* for instances that support it check for an event match first */ 327329061d3SJens Axboe if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON))) 328329061d3SJens Axboe return 0; 329329061d3SJens Axboe 330329061d3SJens Axboe if (io_poll_get_ownership(req)) { 331329061d3SJens Axboe /* optional, saves extra locking for removal in tw handler */ 332329061d3SJens Axboe if (mask && poll->events & EPOLLONESHOT) { 333329061d3SJens Axboe list_del_init(&poll->wait.entry); 334329061d3SJens Axboe poll->head = NULL; 335329061d3SJens Axboe if (wqe_is_double(wait)) 336329061d3SJens Axboe req->flags &= ~REQ_F_DOUBLE_POLL; 337329061d3SJens Axboe else 338329061d3SJens Axboe req->flags &= ~REQ_F_SINGLE_POLL; 339329061d3SJens Axboe } 340329061d3SJens Axboe __io_poll_execute(req, mask, poll->events); 341329061d3SJens Axboe } 342329061d3SJens Axboe return 1; 343329061d3SJens Axboe } 344329061d3SJens Axboe 345329061d3SJens Axboe static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, 346329061d3SJens Axboe struct wait_queue_head *head, 347329061d3SJens Axboe struct io_poll **poll_ptr) 348329061d3SJens Axboe { 349329061d3SJens Axboe struct io_kiocb *req = pt->req; 350329061d3SJens Axboe unsigned long wqe_private = (unsigned long) req; 351329061d3SJens Axboe 352329061d3SJens Axboe /* 353329061d3SJens Axboe * The file being polled uses multiple waitqueues for poll handling 354329061d3SJens Axboe * (e.g. one for read, one for write). Setup a separate io_poll 355329061d3SJens Axboe * if this happens. 356329061d3SJens Axboe */ 357329061d3SJens Axboe if (unlikely(pt->nr_entries)) { 358329061d3SJens Axboe struct io_poll *first = poll; 359329061d3SJens Axboe 360329061d3SJens Axboe /* double add on the same waitqueue head, ignore */ 361329061d3SJens Axboe if (first->head == head) 362329061d3SJens Axboe return; 363329061d3SJens Axboe /* already have a 2nd entry, fail a third attempt */ 364329061d3SJens Axboe if (*poll_ptr) { 365329061d3SJens Axboe if ((*poll_ptr)->head == head) 366329061d3SJens Axboe return; 367329061d3SJens Axboe pt->error = -EINVAL; 368329061d3SJens Axboe return; 369329061d3SJens Axboe } 370329061d3SJens Axboe 371329061d3SJens Axboe poll = kmalloc(sizeof(*poll), GFP_ATOMIC); 372329061d3SJens Axboe if (!poll) { 373329061d3SJens Axboe pt->error = -ENOMEM; 374329061d3SJens Axboe return; 375329061d3SJens Axboe } 376329061d3SJens Axboe /* mark as double wq entry */ 377329061d3SJens Axboe wqe_private |= 1; 378329061d3SJens Axboe req->flags |= REQ_F_DOUBLE_POLL; 379329061d3SJens Axboe io_init_poll_iocb(poll, first->events, first->wait.func); 380329061d3SJens Axboe *poll_ptr = poll; 381329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 382329061d3SJens Axboe req->flags |= REQ_F_ASYNC_DATA; 383329061d3SJens Axboe } 384329061d3SJens Axboe 385329061d3SJens Axboe req->flags |= REQ_F_SINGLE_POLL; 386329061d3SJens Axboe pt->nr_entries++; 387329061d3SJens Axboe poll->head = head; 388329061d3SJens Axboe poll->wait.private = (void *) wqe_private; 389329061d3SJens Axboe 390329061d3SJens Axboe if (poll->events & EPOLLEXCLUSIVE) 391329061d3SJens Axboe add_wait_queue_exclusive(head, &poll->wait); 392329061d3SJens Axboe else 393329061d3SJens Axboe add_wait_queue(head, &poll->wait); 394329061d3SJens Axboe } 395329061d3SJens Axboe 396329061d3SJens Axboe static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, 397329061d3SJens Axboe struct poll_table_struct *p) 398329061d3SJens Axboe { 399329061d3SJens Axboe struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 400329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(pt->req); 401329061d3SJens Axboe 402329061d3SJens Axboe __io_queue_proc(poll, pt, head, 403329061d3SJens Axboe (struct io_poll **) &pt->req->async_data); 404329061d3SJens Axboe } 405329061d3SJens Axboe 406329061d3SJens Axboe static int __io_arm_poll_handler(struct io_kiocb *req, 407329061d3SJens Axboe struct io_poll *poll, 408329061d3SJens Axboe struct io_poll_table *ipt, __poll_t mask) 409329061d3SJens Axboe { 410329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 411329061d3SJens Axboe int v; 412329061d3SJens Axboe 413329061d3SJens Axboe INIT_HLIST_NODE(&req->hash_node); 414329061d3SJens Axboe req->work.cancel_seq = atomic_read(&ctx->cancel_seq); 415329061d3SJens Axboe io_init_poll_iocb(poll, mask, io_poll_wake); 416329061d3SJens Axboe poll->file = req->file; 417329061d3SJens Axboe 418329061d3SJens Axboe req->apoll_events = poll->events; 419329061d3SJens Axboe 420329061d3SJens Axboe ipt->pt._key = mask; 421329061d3SJens Axboe ipt->req = req; 422329061d3SJens Axboe ipt->error = 0; 423329061d3SJens Axboe ipt->nr_entries = 0; 424329061d3SJens Axboe 425329061d3SJens Axboe /* 426329061d3SJens Axboe * Take the ownership to delay any tw execution up until we're done 427329061d3SJens Axboe * with poll arming. see io_poll_get_ownership(). 428329061d3SJens Axboe */ 429329061d3SJens Axboe atomic_set(&req->poll_refs, 1); 430329061d3SJens Axboe mask = vfs_poll(req->file, &ipt->pt) & poll->events; 431329061d3SJens Axboe 432b9ba8a44SJens Axboe if (mask && 433b9ba8a44SJens Axboe ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) { 434329061d3SJens Axboe io_poll_remove_entries(req); 435329061d3SJens Axboe /* no one else has access to the req, forget about the ref */ 436329061d3SJens Axboe return mask; 437329061d3SJens Axboe } 438b9ba8a44SJens Axboe 439329061d3SJens Axboe if (!mask && unlikely(ipt->error || !ipt->nr_entries)) { 440329061d3SJens Axboe io_poll_remove_entries(req); 441329061d3SJens Axboe if (!ipt->error) 442329061d3SJens Axboe ipt->error = -EINVAL; 443329061d3SJens Axboe return 0; 444329061d3SJens Axboe } 445329061d3SJens Axboe 446329061d3SJens Axboe io_poll_req_insert(req); 447329061d3SJens Axboe 448b9ba8a44SJens Axboe if (mask && (poll->events & EPOLLET)) { 449329061d3SJens Axboe /* can't multishot if failed, just queue the event we've got */ 450329061d3SJens Axboe if (unlikely(ipt->error || !ipt->nr_entries)) { 451329061d3SJens Axboe poll->events |= EPOLLONESHOT; 452329061d3SJens Axboe req->apoll_events |= EPOLLONESHOT; 453329061d3SJens Axboe ipt->error = 0; 454329061d3SJens Axboe } 455329061d3SJens Axboe __io_poll_execute(req, mask, poll->events); 456329061d3SJens Axboe return 0; 457329061d3SJens Axboe } 458329061d3SJens Axboe 459329061d3SJens Axboe /* 460329061d3SJens Axboe * Release ownership. If someone tried to queue a tw while it was 461329061d3SJens Axboe * locked, kick it off for them. 462329061d3SJens Axboe */ 463329061d3SJens Axboe v = atomic_dec_return(&req->poll_refs); 464329061d3SJens Axboe if (unlikely(v & IO_POLL_REF_MASK)) 465329061d3SJens Axboe __io_poll_execute(req, 0, poll->events); 466329061d3SJens Axboe return 0; 467329061d3SJens Axboe } 468329061d3SJens Axboe 469329061d3SJens Axboe static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, 470329061d3SJens Axboe struct poll_table_struct *p) 471329061d3SJens Axboe { 472329061d3SJens Axboe struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 473329061d3SJens Axboe struct async_poll *apoll = pt->req->apoll; 474329061d3SJens Axboe 475329061d3SJens Axboe __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); 476329061d3SJens Axboe } 477329061d3SJens Axboe 478329061d3SJens Axboe int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) 479329061d3SJens Axboe { 480329061d3SJens Axboe const struct io_op_def *def = &io_op_defs[req->opcode]; 481329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 482329061d3SJens Axboe struct async_poll *apoll; 483329061d3SJens Axboe struct io_poll_table ipt; 484b9ba8a44SJens Axboe __poll_t mask = POLLPRI | POLLERR | EPOLLET; 485329061d3SJens Axboe int ret; 486329061d3SJens Axboe 487329061d3SJens Axboe if (!def->pollin && !def->pollout) 488329061d3SJens Axboe return IO_APOLL_ABORTED; 489329061d3SJens Axboe if (!file_can_poll(req->file)) 490329061d3SJens Axboe return IO_APOLL_ABORTED; 491329061d3SJens Axboe if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED) 492329061d3SJens Axboe return IO_APOLL_ABORTED; 493329061d3SJens Axboe if (!(req->flags & REQ_F_APOLL_MULTISHOT)) 494329061d3SJens Axboe mask |= EPOLLONESHOT; 495329061d3SJens Axboe 496329061d3SJens Axboe if (def->pollin) { 497329061d3SJens Axboe mask |= EPOLLIN | EPOLLRDNORM; 498329061d3SJens Axboe 499329061d3SJens Axboe /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ 500329061d3SJens Axboe if (req->flags & REQ_F_CLEAR_POLLIN) 501329061d3SJens Axboe mask &= ~EPOLLIN; 502329061d3SJens Axboe } else { 503329061d3SJens Axboe mask |= EPOLLOUT | EPOLLWRNORM; 504329061d3SJens Axboe } 505329061d3SJens Axboe if (def->poll_exclusive) 506329061d3SJens Axboe mask |= EPOLLEXCLUSIVE; 507329061d3SJens Axboe if (req->flags & REQ_F_POLLED) { 508329061d3SJens Axboe apoll = req->apoll; 509329061d3SJens Axboe kfree(apoll->double_poll); 510329061d3SJens Axboe } else if (!(issue_flags & IO_URING_F_UNLOCKED) && 511329061d3SJens Axboe !list_empty(&ctx->apoll_cache)) { 512329061d3SJens Axboe apoll = list_first_entry(&ctx->apoll_cache, struct async_poll, 513329061d3SJens Axboe poll.wait.entry); 514329061d3SJens Axboe list_del_init(&apoll->poll.wait.entry); 515329061d3SJens Axboe } else { 516329061d3SJens Axboe apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); 517329061d3SJens Axboe if (unlikely(!apoll)) 518329061d3SJens Axboe return IO_APOLL_ABORTED; 519329061d3SJens Axboe } 520329061d3SJens Axboe apoll->double_poll = NULL; 521329061d3SJens Axboe req->apoll = apoll; 522329061d3SJens Axboe req->flags |= REQ_F_POLLED; 523329061d3SJens Axboe ipt.pt._qproc = io_async_queue_proc; 524329061d3SJens Axboe 525329061d3SJens Axboe io_kbuf_recycle(req, issue_flags); 526329061d3SJens Axboe 527329061d3SJens Axboe ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask); 528329061d3SJens Axboe if (ret || ipt.error) 529329061d3SJens Axboe return ret ? IO_APOLL_READY : IO_APOLL_ABORTED; 530329061d3SJens Axboe 531329061d3SJens Axboe trace_io_uring_poll_arm(ctx, req, req->cqe.user_data, req->opcode, 532329061d3SJens Axboe mask, apoll->poll.events); 533329061d3SJens Axboe return IO_APOLL_OK; 534329061d3SJens Axboe } 535329061d3SJens Axboe 536329061d3SJens Axboe /* 537329061d3SJens Axboe * Returns true if we found and killed one or more poll requests 538329061d3SJens Axboe */ 539329061d3SJens Axboe __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, 540329061d3SJens Axboe bool cancel_all) 541329061d3SJens Axboe { 542329061d3SJens Axboe struct hlist_node *tmp; 543329061d3SJens Axboe struct io_kiocb *req; 544329061d3SJens Axboe bool found = false; 545329061d3SJens Axboe int i; 546329061d3SJens Axboe 547329061d3SJens Axboe for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { 54838513c46SHao Xu struct io_hash_bucket *hb = &ctx->cancel_hash[i]; 549329061d3SJens Axboe 55038513c46SHao Xu spin_lock(&hb->lock); 55138513c46SHao Xu hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) { 552329061d3SJens Axboe if (io_match_task_safe(req, tsk, cancel_all)) { 553329061d3SJens Axboe hlist_del_init(&req->hash_node); 554329061d3SJens Axboe io_poll_cancel_req(req); 555329061d3SJens Axboe found = true; 556329061d3SJens Axboe } 557329061d3SJens Axboe } 55838513c46SHao Xu spin_unlock(&hb->lock); 559329061d3SJens Axboe } 560329061d3SJens Axboe return found; 561329061d3SJens Axboe } 562329061d3SJens Axboe 563329061d3SJens Axboe static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, 5641ab1edb0SPavel Begunkov struct io_cancel_data *cd, 5651ab1edb0SPavel Begunkov struct io_hash_bucket **out_bucket) 566329061d3SJens Axboe { 567329061d3SJens Axboe struct io_kiocb *req; 56838513c46SHao Xu u32 index = hash_long(cd->data, ctx->cancel_hash_bits); 56938513c46SHao Xu struct io_hash_bucket *hb = &ctx->cancel_hash[index]; 570329061d3SJens Axboe 5711ab1edb0SPavel Begunkov *out_bucket = NULL; 5721ab1edb0SPavel Begunkov 57338513c46SHao Xu spin_lock(&hb->lock); 57438513c46SHao Xu hlist_for_each_entry(req, &hb->list, hash_node) { 575329061d3SJens Axboe if (cd->data != req->cqe.user_data) 576329061d3SJens Axboe continue; 577329061d3SJens Axboe if (poll_only && req->opcode != IORING_OP_POLL_ADD) 578329061d3SJens Axboe continue; 579329061d3SJens Axboe if (cd->flags & IORING_ASYNC_CANCEL_ALL) { 580329061d3SJens Axboe if (cd->seq == req->work.cancel_seq) 581329061d3SJens Axboe continue; 582329061d3SJens Axboe req->work.cancel_seq = cd->seq; 583329061d3SJens Axboe } 5841ab1edb0SPavel Begunkov *out_bucket = hb; 585329061d3SJens Axboe return req; 586329061d3SJens Axboe } 58738513c46SHao Xu spin_unlock(&hb->lock); 588329061d3SJens Axboe return NULL; 589329061d3SJens Axboe } 590329061d3SJens Axboe 591329061d3SJens Axboe static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, 5921ab1edb0SPavel Begunkov struct io_cancel_data *cd, 5931ab1edb0SPavel Begunkov struct io_hash_bucket **out_bucket) 594329061d3SJens Axboe { 595329061d3SJens Axboe struct io_kiocb *req; 596329061d3SJens Axboe int i; 597329061d3SJens Axboe 5981ab1edb0SPavel Begunkov *out_bucket = NULL; 5991ab1edb0SPavel Begunkov 600329061d3SJens Axboe for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { 60138513c46SHao Xu struct io_hash_bucket *hb = &ctx->cancel_hash[i]; 602329061d3SJens Axboe 60338513c46SHao Xu spin_lock(&hb->lock); 60438513c46SHao Xu hlist_for_each_entry(req, &hb->list, hash_node) { 605329061d3SJens Axboe if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && 606329061d3SJens Axboe req->file != cd->file) 607329061d3SJens Axboe continue; 608329061d3SJens Axboe if (cd->seq == req->work.cancel_seq) 609329061d3SJens Axboe continue; 610329061d3SJens Axboe req->work.cancel_seq = cd->seq; 6111ab1edb0SPavel Begunkov *out_bucket = hb; 612329061d3SJens Axboe return req; 613329061d3SJens Axboe } 61438513c46SHao Xu spin_unlock(&hb->lock); 615329061d3SJens Axboe } 616329061d3SJens Axboe return NULL; 617329061d3SJens Axboe } 618329061d3SJens Axboe 619329061d3SJens Axboe static bool io_poll_disarm(struct io_kiocb *req) 620329061d3SJens Axboe { 621329061d3SJens Axboe if (!io_poll_get_ownership(req)) 622329061d3SJens Axboe return false; 623329061d3SJens Axboe io_poll_remove_entries(req); 624329061d3SJens Axboe hash_del(&req->hash_node); 625329061d3SJens Axboe return true; 626329061d3SJens Axboe } 627329061d3SJens Axboe 628329061d3SJens Axboe int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) 629329061d3SJens Axboe { 6301ab1edb0SPavel Begunkov struct io_hash_bucket *bucket; 631329061d3SJens Axboe struct io_kiocb *req; 632329061d3SJens Axboe 633329061d3SJens Axboe if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY)) 6341ab1edb0SPavel Begunkov req = io_poll_file_find(ctx, cd, &bucket); 635329061d3SJens Axboe else 6361ab1edb0SPavel Begunkov req = io_poll_find(ctx, false, cd, &bucket); 6371ab1edb0SPavel Begunkov 6381ab1edb0SPavel Begunkov if (req) 639329061d3SJens Axboe io_poll_cancel_req(req); 6401ab1edb0SPavel Begunkov if (bucket) 6411ab1edb0SPavel Begunkov spin_unlock(&bucket->lock); 6421ab1edb0SPavel Begunkov return req ? 0 : -ENOENT; 643329061d3SJens Axboe } 644329061d3SJens Axboe 645329061d3SJens Axboe static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, 646329061d3SJens Axboe unsigned int flags) 647329061d3SJens Axboe { 648329061d3SJens Axboe u32 events; 649329061d3SJens Axboe 650329061d3SJens Axboe events = READ_ONCE(sqe->poll32_events); 651329061d3SJens Axboe #ifdef __BIG_ENDIAN 652329061d3SJens Axboe events = swahw32(events); 653329061d3SJens Axboe #endif 654329061d3SJens Axboe if (!(flags & IORING_POLL_ADD_MULTI)) 655329061d3SJens Axboe events |= EPOLLONESHOT; 656b9ba8a44SJens Axboe if (!(flags & IORING_POLL_ADD_LEVEL)) 657b9ba8a44SJens Axboe events |= EPOLLET; 658b9ba8a44SJens Axboe return demangle_poll(events) | 659b9ba8a44SJens Axboe (events & (EPOLLEXCLUSIVE|EPOLLONESHOT|EPOLLET)); 660329061d3SJens Axboe } 661329061d3SJens Axboe 662329061d3SJens Axboe int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 663329061d3SJens Axboe { 664329061d3SJens Axboe struct io_poll_update *upd = io_kiocb_to_cmd(req); 665329061d3SJens Axboe u32 flags; 666329061d3SJens Axboe 667329061d3SJens Axboe if (sqe->buf_index || sqe->splice_fd_in) 668329061d3SJens Axboe return -EINVAL; 669329061d3SJens Axboe flags = READ_ONCE(sqe->len); 670329061d3SJens Axboe if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA | 671329061d3SJens Axboe IORING_POLL_ADD_MULTI)) 672329061d3SJens Axboe return -EINVAL; 673329061d3SJens Axboe /* meaningless without update */ 674329061d3SJens Axboe if (flags == IORING_POLL_ADD_MULTI) 675329061d3SJens Axboe return -EINVAL; 676329061d3SJens Axboe 677329061d3SJens Axboe upd->old_user_data = READ_ONCE(sqe->addr); 678329061d3SJens Axboe upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; 679329061d3SJens Axboe upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA; 680329061d3SJens Axboe 681329061d3SJens Axboe upd->new_user_data = READ_ONCE(sqe->off); 682329061d3SJens Axboe if (!upd->update_user_data && upd->new_user_data) 683329061d3SJens Axboe return -EINVAL; 684329061d3SJens Axboe if (upd->update_events) 685329061d3SJens Axboe upd->events = io_poll_parse_events(sqe, flags); 686329061d3SJens Axboe else if (sqe->poll32_events) 687329061d3SJens Axboe return -EINVAL; 688329061d3SJens Axboe 689329061d3SJens Axboe return 0; 690329061d3SJens Axboe } 691329061d3SJens Axboe 692329061d3SJens Axboe int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 693329061d3SJens Axboe { 694329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(req); 695329061d3SJens Axboe u32 flags; 696329061d3SJens Axboe 697329061d3SJens Axboe if (sqe->buf_index || sqe->off || sqe->addr) 698329061d3SJens Axboe return -EINVAL; 699329061d3SJens Axboe flags = READ_ONCE(sqe->len); 700b9ba8a44SJens Axboe if (flags & ~(IORING_POLL_ADD_MULTI|IORING_POLL_ADD_LEVEL)) 701329061d3SJens Axboe return -EINVAL; 702329061d3SJens Axboe if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP)) 703329061d3SJens Axboe return -EINVAL; 704329061d3SJens Axboe 705329061d3SJens Axboe poll->events = io_poll_parse_events(sqe, flags); 706329061d3SJens Axboe return 0; 707329061d3SJens Axboe } 708329061d3SJens Axboe 709329061d3SJens Axboe int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) 710329061d3SJens Axboe { 711329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(req); 712329061d3SJens Axboe struct io_poll_table ipt; 713329061d3SJens Axboe int ret; 714329061d3SJens Axboe 715329061d3SJens Axboe ipt.pt._qproc = io_poll_queue_proc; 716329061d3SJens Axboe 717329061d3SJens Axboe ret = __io_arm_poll_handler(req, poll, &ipt, poll->events); 718329061d3SJens Axboe if (ret) { 719329061d3SJens Axboe io_req_set_res(req, ret, 0); 720329061d3SJens Axboe return IOU_OK; 721329061d3SJens Axboe } 722329061d3SJens Axboe if (ipt.error) { 723329061d3SJens Axboe req_set_fail(req); 724329061d3SJens Axboe return ipt.error; 725329061d3SJens Axboe } 726329061d3SJens Axboe 727329061d3SJens Axboe return IOU_ISSUE_SKIP_COMPLETE; 728329061d3SJens Axboe } 729329061d3SJens Axboe 730329061d3SJens Axboe int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) 731329061d3SJens Axboe { 732329061d3SJens Axboe struct io_poll_update *poll_update = io_kiocb_to_cmd(req); 733329061d3SJens Axboe struct io_cancel_data cd = { .data = poll_update->old_user_data, }; 734329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 7351ab1edb0SPavel Begunkov struct io_hash_bucket *bucket; 736329061d3SJens Axboe struct io_kiocb *preq; 737329061d3SJens Axboe int ret2, ret = 0; 738329061d3SJens Axboe bool locked; 739329061d3SJens Axboe 7401ab1edb0SPavel Begunkov preq = io_poll_find(ctx, true, &cd, &bucket); 7411ab1edb0SPavel Begunkov if (preq) 7421ab1edb0SPavel Begunkov ret2 = io_poll_disarm(preq); 7431ab1edb0SPavel Begunkov if (bucket) 7441ab1edb0SPavel Begunkov spin_unlock(&bucket->lock); 7451ab1edb0SPavel Begunkov 74638513c46SHao Xu if (!preq) { 74738513c46SHao Xu ret = -ENOENT; 748329061d3SJens Axboe goto out; 749329061d3SJens Axboe } 75038513c46SHao Xu if (!ret2) { 75138513c46SHao Xu ret = -EALREADY; 75238513c46SHao Xu goto out; 75338513c46SHao Xu } 754329061d3SJens Axboe 755329061d3SJens Axboe if (poll_update->update_events || poll_update->update_user_data) { 756329061d3SJens Axboe /* only mask one event flags, keep behavior flags */ 757329061d3SJens Axboe if (poll_update->update_events) { 758329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(preq); 759329061d3SJens Axboe 760329061d3SJens Axboe poll->events &= ~0xffff; 761329061d3SJens Axboe poll->events |= poll_update->events & 0xffff; 762329061d3SJens Axboe poll->events |= IO_POLL_UNMASK; 763329061d3SJens Axboe } 764329061d3SJens Axboe if (poll_update->update_user_data) 765329061d3SJens Axboe preq->cqe.user_data = poll_update->new_user_data; 766329061d3SJens Axboe 767329061d3SJens Axboe ret2 = io_poll_add(preq, issue_flags); 768329061d3SJens Axboe /* successfully updated, don't complete poll request */ 769329061d3SJens Axboe if (!ret2 || ret2 == -EIOCBQUEUED) 770329061d3SJens Axboe goto out; 771329061d3SJens Axboe } 772329061d3SJens Axboe 773329061d3SJens Axboe req_set_fail(preq); 774329061d3SJens Axboe io_req_set_res(preq, -ECANCELED, 0); 775329061d3SJens Axboe locked = !(issue_flags & IO_URING_F_UNLOCKED); 776329061d3SJens Axboe io_req_task_complete(preq, &locked); 777329061d3SJens Axboe out: 778329061d3SJens Axboe if (ret < 0) { 779329061d3SJens Axboe req_set_fail(req); 780329061d3SJens Axboe return ret; 781329061d3SJens Axboe } 782329061d3SJens Axboe /* complete update request, we're done with it */ 783329061d3SJens Axboe io_req_set_res(req, ret, 0); 784329061d3SJens Axboe return IOU_OK; 785329061d3SJens Axboe } 786