1329061d3SJens Axboe // SPDX-License-Identifier: GPL-2.0 2329061d3SJens Axboe #include <linux/kernel.h> 3329061d3SJens Axboe #include <linux/errno.h> 4329061d3SJens Axboe #include <linux/fs.h> 5329061d3SJens Axboe #include <linux/file.h> 6329061d3SJens Axboe #include <linux/mm.h> 7329061d3SJens Axboe #include <linux/slab.h> 8329061d3SJens Axboe #include <linux/poll.h> 9329061d3SJens Axboe #include <linux/hashtable.h> 10329061d3SJens Axboe #include <linux/io_uring.h> 11329061d3SJens Axboe 12329061d3SJens Axboe #include <trace/events/io_uring.h> 13329061d3SJens Axboe 14329061d3SJens Axboe #include <uapi/linux/io_uring.h> 15329061d3SJens Axboe 16329061d3SJens Axboe #include "io_uring.h" 17329061d3SJens Axboe #include "refs.h" 18329061d3SJens Axboe #include "opdef.h" 193b77495aSJens Axboe #include "kbuf.h" 20329061d3SJens Axboe #include "poll.h" 2138513c46SHao Xu #include "cancel.h" 22329061d3SJens Axboe 23329061d3SJens Axboe struct io_poll_update { 24329061d3SJens Axboe struct file *file; 25329061d3SJens Axboe u64 old_user_data; 26329061d3SJens Axboe u64 new_user_data; 27329061d3SJens Axboe __poll_t events; 28329061d3SJens Axboe bool update_events; 29329061d3SJens Axboe bool update_user_data; 30329061d3SJens Axboe }; 31329061d3SJens Axboe 32329061d3SJens Axboe struct io_poll_table { 33329061d3SJens Axboe struct poll_table_struct pt; 34329061d3SJens Axboe struct io_kiocb *req; 35329061d3SJens Axboe int nr_entries; 36329061d3SJens Axboe int error; 3749f1c68eSPavel Begunkov bool owning; 38063a0079SPavel Begunkov /* output value, set only if arm poll returns >0 */ 39063a0079SPavel Begunkov __poll_t result_mask; 40329061d3SJens Axboe }; 41329061d3SJens Axboe 42329061d3SJens Axboe #define IO_POLL_CANCEL_FLAG BIT(31) 43329061d3SJens Axboe #define IO_POLL_REF_MASK GENMASK(30, 0) 44329061d3SJens Axboe 450638cd7bSPavel Begunkov #define IO_WQE_F_DOUBLE 1 460638cd7bSPavel Begunkov 470638cd7bSPavel Begunkov static inline struct io_kiocb *wqe_to_req(struct wait_queue_entry *wqe) 480638cd7bSPavel Begunkov { 490638cd7bSPavel Begunkov unsigned long priv = (unsigned long)wqe->private; 500638cd7bSPavel Begunkov 510638cd7bSPavel Begunkov return (struct io_kiocb *)(priv & ~IO_WQE_F_DOUBLE); 520638cd7bSPavel Begunkov } 530638cd7bSPavel Begunkov 540638cd7bSPavel Begunkov static inline bool wqe_is_double(struct wait_queue_entry *wqe) 550638cd7bSPavel Begunkov { 560638cd7bSPavel Begunkov unsigned long priv = (unsigned long)wqe->private; 570638cd7bSPavel Begunkov 580638cd7bSPavel Begunkov return priv & IO_WQE_F_DOUBLE; 590638cd7bSPavel Begunkov } 600638cd7bSPavel Begunkov 61329061d3SJens Axboe /* 62329061d3SJens Axboe * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can 63329061d3SJens Axboe * bump it and acquire ownership. It's disallowed to modify requests while not 64329061d3SJens Axboe * owning it, that prevents from races for enqueueing task_work's and b/w 65329061d3SJens Axboe * arming poll and wakeups. 66329061d3SJens Axboe */ 67329061d3SJens Axboe static inline bool io_poll_get_ownership(struct io_kiocb *req) 68329061d3SJens Axboe { 69329061d3SJens Axboe return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); 70329061d3SJens Axboe } 71329061d3SJens Axboe 72329061d3SJens Axboe static void io_poll_mark_cancelled(struct io_kiocb *req) 73329061d3SJens Axboe { 74329061d3SJens Axboe atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); 75329061d3SJens Axboe } 76329061d3SJens Axboe 77329061d3SJens Axboe static struct io_poll *io_poll_get_double(struct io_kiocb *req) 78329061d3SJens Axboe { 79329061d3SJens Axboe /* pure poll stashes this in ->async_data, poll driven retry elsewhere */ 80329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 81329061d3SJens Axboe return req->async_data; 82329061d3SJens Axboe return req->apoll->double_poll; 83329061d3SJens Axboe } 84329061d3SJens Axboe 85329061d3SJens Axboe static struct io_poll *io_poll_get_single(struct io_kiocb *req) 86329061d3SJens Axboe { 87329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 88329061d3SJens Axboe return io_kiocb_to_cmd(req); 89329061d3SJens Axboe return &req->apoll->poll; 90329061d3SJens Axboe } 91329061d3SJens Axboe 92329061d3SJens Axboe static void io_poll_req_insert(struct io_kiocb *req) 93329061d3SJens Axboe { 94e6f89be6SPavel Begunkov struct io_hash_table *table = &req->ctx->cancel_table; 95e6f89be6SPavel Begunkov u32 index = hash_long(req->cqe.user_data, table->hash_bits); 96e6f89be6SPavel Begunkov struct io_hash_bucket *hb = &table->hbs[index]; 97329061d3SJens Axboe 9838513c46SHao Xu spin_lock(&hb->lock); 9938513c46SHao Xu hlist_add_head(&req->hash_node, &hb->list); 10038513c46SHao Xu spin_unlock(&hb->lock); 10138513c46SHao Xu } 10238513c46SHao Xu 10338513c46SHao Xu static void io_poll_req_delete(struct io_kiocb *req, struct io_ring_ctx *ctx) 10438513c46SHao Xu { 105e6f89be6SPavel Begunkov struct io_hash_table *table = &req->ctx->cancel_table; 106e6f89be6SPavel Begunkov u32 index = hash_long(req->cqe.user_data, table->hash_bits); 107e6f89be6SPavel Begunkov spinlock_t *lock = &table->hbs[index].lock; 10838513c46SHao Xu 10938513c46SHao Xu spin_lock(lock); 11038513c46SHao Xu hash_del(&req->hash_node); 11138513c46SHao Xu spin_unlock(lock); 112329061d3SJens Axboe } 113329061d3SJens Axboe 1149ca9fb24SPavel Begunkov static void io_poll_req_insert_locked(struct io_kiocb *req) 1159ca9fb24SPavel Begunkov { 1169ca9fb24SPavel Begunkov struct io_hash_table *table = &req->ctx->cancel_table_locked; 1179ca9fb24SPavel Begunkov u32 index = hash_long(req->cqe.user_data, table->hash_bits); 1189ca9fb24SPavel Begunkov 1199ca9fb24SPavel Begunkov hlist_add_head(&req->hash_node, &table->hbs[index].list); 1209ca9fb24SPavel Begunkov } 1219ca9fb24SPavel Begunkov 1229ca9fb24SPavel Begunkov static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked) 1239ca9fb24SPavel Begunkov { 1249ca9fb24SPavel Begunkov struct io_ring_ctx *ctx = req->ctx; 1259ca9fb24SPavel Begunkov 1269ca9fb24SPavel Begunkov if (req->flags & REQ_F_HASH_LOCKED) { 1279ca9fb24SPavel Begunkov /* 1289ca9fb24SPavel Begunkov * ->cancel_table_locked is protected by ->uring_lock in 1299ca9fb24SPavel Begunkov * contrast to per bucket spinlocks. Likely, tctx_task_work() 1309ca9fb24SPavel Begunkov * already grabbed the mutex for us, but there is a chance it 1319ca9fb24SPavel Begunkov * failed. 1329ca9fb24SPavel Begunkov */ 1339ca9fb24SPavel Begunkov io_tw_lock(ctx, locked); 1349ca9fb24SPavel Begunkov hash_del(&req->hash_node); 135b21a51e2SPavel Begunkov req->flags &= ~REQ_F_HASH_LOCKED; 1369ca9fb24SPavel Begunkov } else { 1379ca9fb24SPavel Begunkov io_poll_req_delete(req, ctx); 1389ca9fb24SPavel Begunkov } 1399ca9fb24SPavel Begunkov } 1409ca9fb24SPavel Begunkov 141329061d3SJens Axboe static void io_init_poll_iocb(struct io_poll *poll, __poll_t events, 142329061d3SJens Axboe wait_queue_func_t wake_func) 143329061d3SJens Axboe { 144329061d3SJens Axboe poll->head = NULL; 145329061d3SJens Axboe #define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) 146329061d3SJens Axboe /* mask in events that we always want/need */ 147329061d3SJens Axboe poll->events = events | IO_POLL_UNMASK; 148329061d3SJens Axboe INIT_LIST_HEAD(&poll->wait.entry); 149329061d3SJens Axboe init_waitqueue_func_entry(&poll->wait, wake_func); 150329061d3SJens Axboe } 151329061d3SJens Axboe 152329061d3SJens Axboe static inline void io_poll_remove_entry(struct io_poll *poll) 153329061d3SJens Axboe { 154329061d3SJens Axboe struct wait_queue_head *head = smp_load_acquire(&poll->head); 155329061d3SJens Axboe 156329061d3SJens Axboe if (head) { 157329061d3SJens Axboe spin_lock_irq(&head->lock); 158329061d3SJens Axboe list_del_init(&poll->wait.entry); 159329061d3SJens Axboe poll->head = NULL; 160329061d3SJens Axboe spin_unlock_irq(&head->lock); 161329061d3SJens Axboe } 162329061d3SJens Axboe } 163329061d3SJens Axboe 164329061d3SJens Axboe static void io_poll_remove_entries(struct io_kiocb *req) 165329061d3SJens Axboe { 166329061d3SJens Axboe /* 167329061d3SJens Axboe * Nothing to do if neither of those flags are set. Avoid dipping 168329061d3SJens Axboe * into the poll/apoll/double cachelines if we can. 169329061d3SJens Axboe */ 170329061d3SJens Axboe if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL))) 171329061d3SJens Axboe return; 172329061d3SJens Axboe 173329061d3SJens Axboe /* 174329061d3SJens Axboe * While we hold the waitqueue lock and the waitqueue is nonempty, 175329061d3SJens Axboe * wake_up_pollfree() will wait for us. However, taking the waitqueue 176329061d3SJens Axboe * lock in the first place can race with the waitqueue being freed. 177329061d3SJens Axboe * 178329061d3SJens Axboe * We solve this as eventpoll does: by taking advantage of the fact that 179329061d3SJens Axboe * all users of wake_up_pollfree() will RCU-delay the actual free. If 180329061d3SJens Axboe * we enter rcu_read_lock() and see that the pointer to the queue is 181329061d3SJens Axboe * non-NULL, we can then lock it without the memory being freed out from 182329061d3SJens Axboe * under us. 183329061d3SJens Axboe * 184329061d3SJens Axboe * Keep holding rcu_read_lock() as long as we hold the queue lock, in 185329061d3SJens Axboe * case the caller deletes the entry from the queue, leaving it empty. 186329061d3SJens Axboe * In that case, only RCU prevents the queue memory from being freed. 187329061d3SJens Axboe */ 188329061d3SJens Axboe rcu_read_lock(); 189329061d3SJens Axboe if (req->flags & REQ_F_SINGLE_POLL) 190329061d3SJens Axboe io_poll_remove_entry(io_poll_get_single(req)); 191329061d3SJens Axboe if (req->flags & REQ_F_DOUBLE_POLL) 192329061d3SJens Axboe io_poll_remove_entry(io_poll_get_double(req)); 193329061d3SJens Axboe rcu_read_unlock(); 194329061d3SJens Axboe } 195329061d3SJens Axboe 1962ba69707SDylan Yudaken enum { 1972ba69707SDylan Yudaken IOU_POLL_DONE = 0, 1982ba69707SDylan Yudaken IOU_POLL_NO_ACTION = 1, 199114eccdfSDylan Yudaken IOU_POLL_REMOVE_POLL_USE_RES = 2, 2002ba69707SDylan Yudaken }; 2012ba69707SDylan Yudaken 202329061d3SJens Axboe /* 203329061d3SJens Axboe * All poll tw should go through this. Checks for poll events, manages 204329061d3SJens Axboe * references, does rewait, etc. 205329061d3SJens Axboe * 2062ba69707SDylan Yudaken * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action require, 2072ba69707SDylan Yudaken * which is either spurious wakeup or multishot CQE is served. 2082ba69707SDylan Yudaken * IOU_POLL_DONE when it's done with the request, then the mask is stored in req->cqe.res. 209114eccdfSDylan Yudaken * IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot poll and that the result 210114eccdfSDylan Yudaken * is stored in req->cqe. 211329061d3SJens Axboe */ 212329061d3SJens Axboe static int io_poll_check_events(struct io_kiocb *req, bool *locked) 213329061d3SJens Axboe { 214329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 215329061d3SJens Axboe int v, ret; 216329061d3SJens Axboe 217329061d3SJens Axboe /* req->task == current here, checking PF_EXITING is safe */ 218329061d3SJens Axboe if (unlikely(req->task->flags & PF_EXITING)) 219329061d3SJens Axboe return -ECANCELED; 220329061d3SJens Axboe 221329061d3SJens Axboe do { 222329061d3SJens Axboe v = atomic_read(&req->poll_refs); 223329061d3SJens Axboe 224329061d3SJens Axboe /* tw handler should be the owner, and so have some references */ 225329061d3SJens Axboe if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) 2262ba69707SDylan Yudaken return IOU_POLL_DONE; 227329061d3SJens Axboe if (v & IO_POLL_CANCEL_FLAG) 228329061d3SJens Axboe return -ECANCELED; 229329061d3SJens Axboe 2302ba69707SDylan Yudaken /* the mask was stashed in __io_poll_execute */ 231329061d3SJens Axboe if (!req->cqe.res) { 232329061d3SJens Axboe struct poll_table_struct pt = { ._key = req->apoll_events }; 233329061d3SJens Axboe req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; 234329061d3SJens Axboe } 235329061d3SJens Axboe 236329061d3SJens Axboe if ((unlikely(!req->cqe.res))) 237329061d3SJens Axboe continue; 238329061d3SJens Axboe if (req->apoll_events & EPOLLONESHOT) 2392ba69707SDylan Yudaken return IOU_POLL_DONE; 240329061d3SJens Axboe 241329061d3SJens Axboe /* multishot, just fill a CQE and proceed */ 242329061d3SJens Axboe if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 243329061d3SJens Axboe __poll_t mask = mangle_poll(req->cqe.res & 244329061d3SJens Axboe req->apoll_events); 245329061d3SJens Axboe 246d245bca6SPavel Begunkov if (!io_post_aux_cqe(ctx, req->cqe.user_data, 247a2da6763SDylan Yudaken mask, IORING_CQE_F_MORE, false)) { 248a2da6763SDylan Yudaken io_req_set_res(req, mask, 0); 249a2da6763SDylan Yudaken return IOU_POLL_REMOVE_POLL_USE_RES; 250a2da6763SDylan Yudaken } 251d245bca6SPavel Begunkov } else { 252329061d3SJens Axboe ret = io_poll_issue(req, locked); 253114eccdfSDylan Yudaken if (ret == IOU_STOP_MULTISHOT) 254114eccdfSDylan Yudaken return IOU_POLL_REMOVE_POLL_USE_RES; 2552ba69707SDylan Yudaken if (ret < 0) 256329061d3SJens Axboe return ret; 257d245bca6SPavel Begunkov } 258329061d3SJens Axboe 259329061d3SJens Axboe /* 260329061d3SJens Axboe * Release all references, retry if someone tried to restart 261329061d3SJens Axboe * task_work while we were executing it. 262329061d3SJens Axboe */ 263329061d3SJens Axboe } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); 264329061d3SJens Axboe 2652ba69707SDylan Yudaken return IOU_POLL_NO_ACTION; 266329061d3SJens Axboe } 267329061d3SJens Axboe 268329061d3SJens Axboe static void io_poll_task_func(struct io_kiocb *req, bool *locked) 269329061d3SJens Axboe { 270329061d3SJens Axboe int ret; 271329061d3SJens Axboe 272329061d3SJens Axboe ret = io_poll_check_events(req, locked); 2732ba69707SDylan Yudaken if (ret == IOU_POLL_NO_ACTION) 274329061d3SJens Axboe return; 275329061d3SJens Axboe 2762ba69707SDylan Yudaken if (ret == IOU_POLL_DONE) { 277329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(req); 278329061d3SJens Axboe req->cqe.res = mangle_poll(req->cqe.res & poll->events); 279114eccdfSDylan Yudaken } else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) { 280329061d3SJens Axboe req->cqe.res = ret; 281329061d3SJens Axboe req_set_fail(req); 282329061d3SJens Axboe } 283329061d3SJens Axboe 284329061d3SJens Axboe io_poll_remove_entries(req); 2859ca9fb24SPavel Begunkov io_poll_tw_hash_eject(req, locked); 2869ca9fb24SPavel Begunkov 2870ec6dca2SPavel Begunkov io_req_set_res(req, req->cqe.res, 0); 2880ec6dca2SPavel Begunkov io_req_task_complete(req, locked); 289329061d3SJens Axboe } 290329061d3SJens Axboe 291329061d3SJens Axboe static void io_apoll_task_func(struct io_kiocb *req, bool *locked) 292329061d3SJens Axboe { 293329061d3SJens Axboe int ret; 294329061d3SJens Axboe 295329061d3SJens Axboe ret = io_poll_check_events(req, locked); 2962ba69707SDylan Yudaken if (ret == IOU_POLL_NO_ACTION) 297329061d3SJens Axboe return; 298329061d3SJens Axboe 299329061d3SJens Axboe io_poll_remove_entries(req); 3009ca9fb24SPavel Begunkov io_poll_tw_hash_eject(req, locked); 301329061d3SJens Axboe 302114eccdfSDylan Yudaken if (ret == IOU_POLL_REMOVE_POLL_USE_RES) 303114eccdfSDylan Yudaken io_req_complete_post(req); 304114eccdfSDylan Yudaken else if (ret == IOU_POLL_DONE) 305329061d3SJens Axboe io_req_task_submit(req, locked); 306329061d3SJens Axboe else 307329061d3SJens Axboe io_req_complete_failed(req, ret); 308329061d3SJens Axboe } 309329061d3SJens Axboe 31013a99017SPavel Begunkov static void __io_poll_execute(struct io_kiocb *req, int mask) 311329061d3SJens Axboe { 312329061d3SJens Axboe io_req_set_res(req, mask, 0); 313329061d3SJens Axboe /* 314329061d3SJens Axboe * This is useful for poll that is armed on behalf of another 315329061d3SJens Axboe * request, and where the wakeup path could be on a different 316329061d3SJens Axboe * CPU. We want to avoid pulling in req->apoll->events for that 317329061d3SJens Axboe * case. 318329061d3SJens Axboe */ 319329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 320329061d3SJens Axboe req->io_task_work.func = io_poll_task_func; 321329061d3SJens Axboe else 322329061d3SJens Axboe req->io_task_work.func = io_apoll_task_func; 323329061d3SJens Axboe 32448863ffdSPavel Begunkov trace_io_uring_task_add(req, mask); 325329061d3SJens Axboe io_req_task_work_add(req); 326329061d3SJens Axboe } 327329061d3SJens Axboe 32813a99017SPavel Begunkov static inline void io_poll_execute(struct io_kiocb *req, int res) 329329061d3SJens Axboe { 330329061d3SJens Axboe if (io_poll_get_ownership(req)) 33113a99017SPavel Begunkov __io_poll_execute(req, res); 332329061d3SJens Axboe } 333329061d3SJens Axboe 334329061d3SJens Axboe static void io_poll_cancel_req(struct io_kiocb *req) 335329061d3SJens Axboe { 336329061d3SJens Axboe io_poll_mark_cancelled(req); 337329061d3SJens Axboe /* kick tw, which should complete the request */ 33813a99017SPavel Begunkov io_poll_execute(req, 0); 339329061d3SJens Axboe } 340329061d3SJens Axboe 341329061d3SJens Axboe #define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI) 342329061d3SJens Axboe 343fe991a76SJens Axboe static __cold int io_pollfree_wake(struct io_kiocb *req, struct io_poll *poll) 344329061d3SJens Axboe { 345329061d3SJens Axboe io_poll_mark_cancelled(req); 346329061d3SJens Axboe /* we have to kick tw in case it's not already */ 34713a99017SPavel Begunkov io_poll_execute(req, 0); 348329061d3SJens Axboe 349329061d3SJens Axboe /* 350329061d3SJens Axboe * If the waitqueue is being freed early but someone is already 351329061d3SJens Axboe * holds ownership over it, we have to tear down the request as 352329061d3SJens Axboe * best we can. That means immediately removing the request from 353329061d3SJens Axboe * its waitqueue and preventing all further accesses to the 354329061d3SJens Axboe * waitqueue via the request. 355329061d3SJens Axboe */ 356329061d3SJens Axboe list_del_init(&poll->wait.entry); 357329061d3SJens Axboe 358329061d3SJens Axboe /* 359329061d3SJens Axboe * Careful: this *must* be the last step, since as soon 360329061d3SJens Axboe * as req->head is NULL'ed out, the request can be 361329061d3SJens Axboe * completed and freed, since aio_poll_complete_work() 362329061d3SJens Axboe * will no longer need to take the waitqueue lock. 363329061d3SJens Axboe */ 364329061d3SJens Axboe smp_store_release(&poll->head, NULL); 365329061d3SJens Axboe return 1; 366329061d3SJens Axboe } 367329061d3SJens Axboe 368fe991a76SJens Axboe static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 369fe991a76SJens Axboe void *key) 370fe991a76SJens Axboe { 371fe991a76SJens Axboe struct io_kiocb *req = wqe_to_req(wait); 372fe991a76SJens Axboe struct io_poll *poll = container_of(wait, struct io_poll, wait); 373fe991a76SJens Axboe __poll_t mask = key_to_poll(key); 374fe991a76SJens Axboe 375fe991a76SJens Axboe if (unlikely(mask & POLLFREE)) 376fe991a76SJens Axboe return io_pollfree_wake(req, poll); 377fe991a76SJens Axboe 378329061d3SJens Axboe /* for instances that support it check for an event match first */ 379329061d3SJens Axboe if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON))) 380329061d3SJens Axboe return 0; 381329061d3SJens Axboe 382329061d3SJens Axboe if (io_poll_get_ownership(req)) { 383329061d3SJens Axboe /* optional, saves extra locking for removal in tw handler */ 384329061d3SJens Axboe if (mask && poll->events & EPOLLONESHOT) { 385329061d3SJens Axboe list_del_init(&poll->wait.entry); 386329061d3SJens Axboe poll->head = NULL; 387329061d3SJens Axboe if (wqe_is_double(wait)) 388329061d3SJens Axboe req->flags &= ~REQ_F_DOUBLE_POLL; 389329061d3SJens Axboe else 390329061d3SJens Axboe req->flags &= ~REQ_F_SINGLE_POLL; 391329061d3SJens Axboe } 39213a99017SPavel Begunkov __io_poll_execute(req, mask); 393329061d3SJens Axboe } 394329061d3SJens Axboe return 1; 395329061d3SJens Axboe } 396329061d3SJens Axboe 39749f1c68eSPavel Begunkov static void io_poll_double_prepare(struct io_kiocb *req) 39849f1c68eSPavel Begunkov { 39949f1c68eSPavel Begunkov struct wait_queue_head *head; 40049f1c68eSPavel Begunkov struct io_poll *poll = io_poll_get_single(req); 40149f1c68eSPavel Begunkov 40249f1c68eSPavel Begunkov /* head is RCU protected, see io_poll_remove_entries() comments */ 40349f1c68eSPavel Begunkov rcu_read_lock(); 40449f1c68eSPavel Begunkov head = smp_load_acquire(&poll->head); 40549f1c68eSPavel Begunkov /* 40649f1c68eSPavel Begunkov * poll arm may not hold ownership and so race with 40749f1c68eSPavel Begunkov * io_poll_wake() by modifying req->flags. There is only one 40849f1c68eSPavel Begunkov * poll entry queued, serialise with it by taking its head lock. 40949f1c68eSPavel Begunkov */ 4107a121cedSPavel Begunkov if (head) 41149f1c68eSPavel Begunkov spin_lock_irq(&head->lock); 4127a121cedSPavel Begunkov 41349f1c68eSPavel Begunkov req->flags |= REQ_F_DOUBLE_POLL; 414ceff5017SPavel Begunkov if (req->opcode == IORING_OP_POLL_ADD) 415ceff5017SPavel Begunkov req->flags |= REQ_F_ASYNC_DATA; 4167a121cedSPavel Begunkov 4177a121cedSPavel Begunkov if (head) 41849f1c68eSPavel Begunkov spin_unlock_irq(&head->lock); 41949f1c68eSPavel Begunkov rcu_read_unlock(); 42049f1c68eSPavel Begunkov } 42149f1c68eSPavel Begunkov 422329061d3SJens Axboe static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, 423329061d3SJens Axboe struct wait_queue_head *head, 424329061d3SJens Axboe struct io_poll **poll_ptr) 425329061d3SJens Axboe { 426329061d3SJens Axboe struct io_kiocb *req = pt->req; 427329061d3SJens Axboe unsigned long wqe_private = (unsigned long) req; 428329061d3SJens Axboe 429329061d3SJens Axboe /* 430329061d3SJens Axboe * The file being polled uses multiple waitqueues for poll handling 431329061d3SJens Axboe * (e.g. one for read, one for write). Setup a separate io_poll 432329061d3SJens Axboe * if this happens. 433329061d3SJens Axboe */ 434329061d3SJens Axboe if (unlikely(pt->nr_entries)) { 435329061d3SJens Axboe struct io_poll *first = poll; 436329061d3SJens Axboe 437329061d3SJens Axboe /* double add on the same waitqueue head, ignore */ 438329061d3SJens Axboe if (first->head == head) 439329061d3SJens Axboe return; 440329061d3SJens Axboe /* already have a 2nd entry, fail a third attempt */ 441329061d3SJens Axboe if (*poll_ptr) { 442329061d3SJens Axboe if ((*poll_ptr)->head == head) 443329061d3SJens Axboe return; 444329061d3SJens Axboe pt->error = -EINVAL; 445329061d3SJens Axboe return; 446329061d3SJens Axboe } 447329061d3SJens Axboe 448329061d3SJens Axboe poll = kmalloc(sizeof(*poll), GFP_ATOMIC); 449329061d3SJens Axboe if (!poll) { 450329061d3SJens Axboe pt->error = -ENOMEM; 451329061d3SJens Axboe return; 452329061d3SJens Axboe } 45349f1c68eSPavel Begunkov 454329061d3SJens Axboe /* mark as double wq entry */ 4550638cd7bSPavel Begunkov wqe_private |= IO_WQE_F_DOUBLE; 456329061d3SJens Axboe io_init_poll_iocb(poll, first->events, first->wait.func); 457ceff5017SPavel Begunkov io_poll_double_prepare(req); 458329061d3SJens Axboe *poll_ptr = poll; 45949f1c68eSPavel Begunkov } else { 46049f1c68eSPavel Begunkov /* fine to modify, there is no poll queued to race with us */ 46149f1c68eSPavel Begunkov req->flags |= REQ_F_SINGLE_POLL; 462329061d3SJens Axboe } 463329061d3SJens Axboe 464329061d3SJens Axboe pt->nr_entries++; 465329061d3SJens Axboe poll->head = head; 466329061d3SJens Axboe poll->wait.private = (void *) wqe_private; 467329061d3SJens Axboe 468329061d3SJens Axboe if (poll->events & EPOLLEXCLUSIVE) 469329061d3SJens Axboe add_wait_queue_exclusive(head, &poll->wait); 470329061d3SJens Axboe else 471329061d3SJens Axboe add_wait_queue(head, &poll->wait); 472329061d3SJens Axboe } 473329061d3SJens Axboe 474329061d3SJens Axboe static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, 475329061d3SJens Axboe struct poll_table_struct *p) 476329061d3SJens Axboe { 477329061d3SJens Axboe struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 478329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(pt->req); 479329061d3SJens Axboe 480329061d3SJens Axboe __io_queue_proc(poll, pt, head, 481329061d3SJens Axboe (struct io_poll **) &pt->req->async_data); 482329061d3SJens Axboe } 483329061d3SJens Axboe 48449f1c68eSPavel Begunkov static bool io_poll_can_finish_inline(struct io_kiocb *req, 48549f1c68eSPavel Begunkov struct io_poll_table *pt) 48649f1c68eSPavel Begunkov { 48749f1c68eSPavel Begunkov return pt->owning || io_poll_get_ownership(req); 48849f1c68eSPavel Begunkov } 48949f1c68eSPavel Begunkov 490de08356fSPavel Begunkov /* 491de08356fSPavel Begunkov * Returns 0 when it's handed over for polling. The caller owns the requests if 492de08356fSPavel Begunkov * it returns non-zero, but otherwise should not touch it. Negative values 493de08356fSPavel Begunkov * contain an error code. When the result is >0, the polling has completed 494de08356fSPavel Begunkov * inline and ipt.result_mask is set to the mask. 495de08356fSPavel Begunkov */ 496329061d3SJens Axboe static int __io_arm_poll_handler(struct io_kiocb *req, 497329061d3SJens Axboe struct io_poll *poll, 49849f1c68eSPavel Begunkov struct io_poll_table *ipt, __poll_t mask, 49949f1c68eSPavel Begunkov unsigned issue_flags) 500329061d3SJens Axboe { 501329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 502329061d3SJens Axboe int v; 503329061d3SJens Axboe 504329061d3SJens Axboe INIT_HLIST_NODE(&req->hash_node); 505329061d3SJens Axboe req->work.cancel_seq = atomic_read(&ctx->cancel_seq); 506329061d3SJens Axboe io_init_poll_iocb(poll, mask, io_poll_wake); 507329061d3SJens Axboe poll->file = req->file; 508329061d3SJens Axboe req->apoll_events = poll->events; 509329061d3SJens Axboe 510329061d3SJens Axboe ipt->pt._key = mask; 511329061d3SJens Axboe ipt->req = req; 512329061d3SJens Axboe ipt->error = 0; 513329061d3SJens Axboe ipt->nr_entries = 0; 514329061d3SJens Axboe /* 51549f1c68eSPavel Begunkov * Polling is either completed here or via task_work, so if we're in the 51649f1c68eSPavel Begunkov * task context we're naturally serialised with tw by merit of running 51749f1c68eSPavel Begunkov * the same task. When it's io-wq, take the ownership to prevent tw 51849f1c68eSPavel Begunkov * from running. However, when we're in the task context, skip taking 51949f1c68eSPavel Begunkov * it as an optimisation. 52049f1c68eSPavel Begunkov * 52149f1c68eSPavel Begunkov * Note: even though the request won't be completed/freed, without 52249f1c68eSPavel Begunkov * ownership we still can race with io_poll_wake(). 52349f1c68eSPavel Begunkov * io_poll_can_finish_inline() tries to deal with that. 524329061d3SJens Axboe */ 52549f1c68eSPavel Begunkov ipt->owning = issue_flags & IO_URING_F_UNLOCKED; 52649f1c68eSPavel Begunkov atomic_set(&req->poll_refs, (int)ipt->owning); 527e8375e43SPavel Begunkov 528e8375e43SPavel Begunkov /* io-wq doesn't hold uring_lock */ 529e8375e43SPavel Begunkov if (issue_flags & IO_URING_F_UNLOCKED) 530e8375e43SPavel Begunkov req->flags &= ~REQ_F_HASH_LOCKED; 531e8375e43SPavel Begunkov 532329061d3SJens Axboe mask = vfs_poll(req->file, &ipt->pt) & poll->events; 533329061d3SJens Axboe 534de08356fSPavel Begunkov if (unlikely(ipt->error || !ipt->nr_entries)) { 535de08356fSPavel Begunkov io_poll_remove_entries(req); 536de08356fSPavel Begunkov 53749f1c68eSPavel Begunkov if (!io_poll_can_finish_inline(req, ipt)) { 53849f1c68eSPavel Begunkov io_poll_mark_cancelled(req); 53949f1c68eSPavel Begunkov return 0; 54049f1c68eSPavel Begunkov } else if (mask && (poll->events & EPOLLET)) { 541de08356fSPavel Begunkov ipt->result_mask = mask; 542de08356fSPavel Begunkov return 1; 543de08356fSPavel Begunkov } 54449f1c68eSPavel Begunkov return ipt->error ?: -EINVAL; 545de08356fSPavel Begunkov } 546de08356fSPavel Begunkov 547b9ba8a44SJens Axboe if (mask && 548b9ba8a44SJens Axboe ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) { 54949f1c68eSPavel Begunkov if (!io_poll_can_finish_inline(req, ipt)) 55049f1c68eSPavel Begunkov return 0; 551329061d3SJens Axboe io_poll_remove_entries(req); 552063a0079SPavel Begunkov ipt->result_mask = mask; 553329061d3SJens Axboe /* no one else has access to the req, forget about the ref */ 554063a0079SPavel Begunkov return 1; 555329061d3SJens Axboe } 556b9ba8a44SJens Axboe 5579ca9fb24SPavel Begunkov if (req->flags & REQ_F_HASH_LOCKED) 5589ca9fb24SPavel Begunkov io_poll_req_insert_locked(req); 5599ca9fb24SPavel Begunkov else 560329061d3SJens Axboe io_poll_req_insert(req); 561329061d3SJens Axboe 56249f1c68eSPavel Begunkov if (mask && (poll->events & EPOLLET) && 56349f1c68eSPavel Begunkov io_poll_can_finish_inline(req, ipt)) { 56413a99017SPavel Begunkov __io_poll_execute(req, mask); 565329061d3SJens Axboe return 0; 566329061d3SJens Axboe } 567329061d3SJens Axboe 56849f1c68eSPavel Begunkov if (ipt->owning) { 569329061d3SJens Axboe /* 570329061d3SJens Axboe * Release ownership. If someone tried to queue a tw while it was 571329061d3SJens Axboe * locked, kick it off for them. 572329061d3SJens Axboe */ 573329061d3SJens Axboe v = atomic_dec_return(&req->poll_refs); 574329061d3SJens Axboe if (unlikely(v & IO_POLL_REF_MASK)) 57513a99017SPavel Begunkov __io_poll_execute(req, 0); 57649f1c68eSPavel Begunkov } 577329061d3SJens Axboe return 0; 578329061d3SJens Axboe } 579329061d3SJens Axboe 580329061d3SJens Axboe static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, 581329061d3SJens Axboe struct poll_table_struct *p) 582329061d3SJens Axboe { 583329061d3SJens Axboe struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 584329061d3SJens Axboe struct async_poll *apoll = pt->req->apoll; 585329061d3SJens Axboe 586329061d3SJens Axboe __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); 587329061d3SJens Axboe } 588329061d3SJens Axboe 5895204aa8cSPavel Begunkov static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req, 5905204aa8cSPavel Begunkov unsigned issue_flags) 5915204aa8cSPavel Begunkov { 5925204aa8cSPavel Begunkov struct io_ring_ctx *ctx = req->ctx; 593*9b797a37SJens Axboe struct io_cache_entry *entry; 5945204aa8cSPavel Begunkov struct async_poll *apoll; 5955204aa8cSPavel Begunkov 5965204aa8cSPavel Begunkov if (req->flags & REQ_F_POLLED) { 5975204aa8cSPavel Begunkov apoll = req->apoll; 5985204aa8cSPavel Begunkov kfree(apoll->double_poll); 5995204aa8cSPavel Begunkov } else if (!(issue_flags & IO_URING_F_UNLOCKED) && 600*9b797a37SJens Axboe (entry = io_alloc_cache_get(&ctx->apoll_cache)) != NULL) { 601*9b797a37SJens Axboe apoll = container_of(entry, struct async_poll, cache); 6025204aa8cSPavel Begunkov } else { 6035204aa8cSPavel Begunkov apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); 6045204aa8cSPavel Begunkov if (unlikely(!apoll)) 6055204aa8cSPavel Begunkov return NULL; 6065204aa8cSPavel Begunkov } 6075204aa8cSPavel Begunkov apoll->double_poll = NULL; 6085204aa8cSPavel Begunkov req->apoll = apoll; 6095204aa8cSPavel Begunkov return apoll; 6105204aa8cSPavel Begunkov } 6115204aa8cSPavel Begunkov 612329061d3SJens Axboe int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) 613329061d3SJens Axboe { 614329061d3SJens Axboe const struct io_op_def *def = &io_op_defs[req->opcode]; 615329061d3SJens Axboe struct async_poll *apoll; 616329061d3SJens Axboe struct io_poll_table ipt; 617b9ba8a44SJens Axboe __poll_t mask = POLLPRI | POLLERR | EPOLLET; 618329061d3SJens Axboe int ret; 619329061d3SJens Axboe 6209ca9fb24SPavel Begunkov /* 6219ca9fb24SPavel Begunkov * apoll requests already grab the mutex to complete in the tw handler, 6229ca9fb24SPavel Begunkov * so removal from the mutex-backed hash is free, use it by default. 6239ca9fb24SPavel Begunkov */ 6249ca9fb24SPavel Begunkov req->flags |= REQ_F_HASH_LOCKED; 6259ca9fb24SPavel Begunkov 626329061d3SJens Axboe if (!def->pollin && !def->pollout) 627329061d3SJens Axboe return IO_APOLL_ABORTED; 628329061d3SJens Axboe if (!file_can_poll(req->file)) 629329061d3SJens Axboe return IO_APOLL_ABORTED; 630329061d3SJens Axboe if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED) 631329061d3SJens Axboe return IO_APOLL_ABORTED; 632329061d3SJens Axboe if (!(req->flags & REQ_F_APOLL_MULTISHOT)) 633329061d3SJens Axboe mask |= EPOLLONESHOT; 634329061d3SJens Axboe 635329061d3SJens Axboe if (def->pollin) { 636329061d3SJens Axboe mask |= EPOLLIN | EPOLLRDNORM; 637329061d3SJens Axboe 638329061d3SJens Axboe /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ 639329061d3SJens Axboe if (req->flags & REQ_F_CLEAR_POLLIN) 640329061d3SJens Axboe mask &= ~EPOLLIN; 641329061d3SJens Axboe } else { 642329061d3SJens Axboe mask |= EPOLLOUT | EPOLLWRNORM; 643329061d3SJens Axboe } 644329061d3SJens Axboe if (def->poll_exclusive) 645329061d3SJens Axboe mask |= EPOLLEXCLUSIVE; 6465204aa8cSPavel Begunkov 6475204aa8cSPavel Begunkov apoll = io_req_alloc_apoll(req, issue_flags); 6485204aa8cSPavel Begunkov if (!apoll) 649329061d3SJens Axboe return IO_APOLL_ABORTED; 650329061d3SJens Axboe req->flags |= REQ_F_POLLED; 651329061d3SJens Axboe ipt.pt._qproc = io_async_queue_proc; 652329061d3SJens Axboe 653329061d3SJens Axboe io_kbuf_recycle(req, issue_flags); 654329061d3SJens Axboe 65549f1c68eSPavel Begunkov ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags); 656de08356fSPavel Begunkov if (ret) 657de08356fSPavel Begunkov return ret > 0 ? IO_APOLL_READY : IO_APOLL_ABORTED; 65848863ffdSPavel Begunkov trace_io_uring_poll_arm(req, mask, apoll->poll.events); 659329061d3SJens Axboe return IO_APOLL_OK; 660329061d3SJens Axboe } 661329061d3SJens Axboe 6629ca9fb24SPavel Begunkov static __cold bool io_poll_remove_all_table(struct task_struct *tsk, 6639ca9fb24SPavel Begunkov struct io_hash_table *table, 664329061d3SJens Axboe bool cancel_all) 665329061d3SJens Axboe { 666e6f89be6SPavel Begunkov unsigned nr_buckets = 1U << table->hash_bits; 667329061d3SJens Axboe struct hlist_node *tmp; 668329061d3SJens Axboe struct io_kiocb *req; 669329061d3SJens Axboe bool found = false; 670329061d3SJens Axboe int i; 671329061d3SJens Axboe 672e6f89be6SPavel Begunkov for (i = 0; i < nr_buckets; i++) { 673e6f89be6SPavel Begunkov struct io_hash_bucket *hb = &table->hbs[i]; 674329061d3SJens Axboe 67538513c46SHao Xu spin_lock(&hb->lock); 67638513c46SHao Xu hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) { 677329061d3SJens Axboe if (io_match_task_safe(req, tsk, cancel_all)) { 678329061d3SJens Axboe hlist_del_init(&req->hash_node); 679329061d3SJens Axboe io_poll_cancel_req(req); 680329061d3SJens Axboe found = true; 681329061d3SJens Axboe } 682329061d3SJens Axboe } 68338513c46SHao Xu spin_unlock(&hb->lock); 684329061d3SJens Axboe } 685329061d3SJens Axboe return found; 686329061d3SJens Axboe } 687329061d3SJens Axboe 6889ca9fb24SPavel Begunkov /* 6899ca9fb24SPavel Begunkov * Returns true if we found and killed one or more poll requests 6909ca9fb24SPavel Begunkov */ 6919ca9fb24SPavel Begunkov __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, 6929ca9fb24SPavel Begunkov bool cancel_all) 6939ca9fb24SPavel Begunkov __must_hold(&ctx->uring_lock) 6949ca9fb24SPavel Begunkov { 695b321823aSPavel Begunkov bool ret; 696b321823aSPavel Begunkov 697b321823aSPavel Begunkov ret = io_poll_remove_all_table(tsk, &ctx->cancel_table, cancel_all); 698b321823aSPavel Begunkov ret |= io_poll_remove_all_table(tsk, &ctx->cancel_table_locked, cancel_all); 699b321823aSPavel Begunkov return ret; 7009ca9fb24SPavel Begunkov } 7019ca9fb24SPavel Begunkov 702329061d3SJens Axboe static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, 7031ab1edb0SPavel Begunkov struct io_cancel_data *cd, 704e6f89be6SPavel Begunkov struct io_hash_table *table, 7051ab1edb0SPavel Begunkov struct io_hash_bucket **out_bucket) 706329061d3SJens Axboe { 707329061d3SJens Axboe struct io_kiocb *req; 708e6f89be6SPavel Begunkov u32 index = hash_long(cd->data, table->hash_bits); 709e6f89be6SPavel Begunkov struct io_hash_bucket *hb = &table->hbs[index]; 710329061d3SJens Axboe 7111ab1edb0SPavel Begunkov *out_bucket = NULL; 7121ab1edb0SPavel Begunkov 71338513c46SHao Xu spin_lock(&hb->lock); 71438513c46SHao Xu hlist_for_each_entry(req, &hb->list, hash_node) { 715329061d3SJens Axboe if (cd->data != req->cqe.user_data) 716329061d3SJens Axboe continue; 717329061d3SJens Axboe if (poll_only && req->opcode != IORING_OP_POLL_ADD) 718329061d3SJens Axboe continue; 719329061d3SJens Axboe if (cd->flags & IORING_ASYNC_CANCEL_ALL) { 720329061d3SJens Axboe if (cd->seq == req->work.cancel_seq) 721329061d3SJens Axboe continue; 722329061d3SJens Axboe req->work.cancel_seq = cd->seq; 723329061d3SJens Axboe } 7241ab1edb0SPavel Begunkov *out_bucket = hb; 725329061d3SJens Axboe return req; 726329061d3SJens Axboe } 72738513c46SHao Xu spin_unlock(&hb->lock); 728329061d3SJens Axboe return NULL; 729329061d3SJens Axboe } 730329061d3SJens Axboe 731329061d3SJens Axboe static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, 7321ab1edb0SPavel Begunkov struct io_cancel_data *cd, 733e6f89be6SPavel Begunkov struct io_hash_table *table, 7341ab1edb0SPavel Begunkov struct io_hash_bucket **out_bucket) 735329061d3SJens Axboe { 736e6f89be6SPavel Begunkov unsigned nr_buckets = 1U << table->hash_bits; 737329061d3SJens Axboe struct io_kiocb *req; 738329061d3SJens Axboe int i; 739329061d3SJens Axboe 7401ab1edb0SPavel Begunkov *out_bucket = NULL; 7411ab1edb0SPavel Begunkov 742e6f89be6SPavel Begunkov for (i = 0; i < nr_buckets; i++) { 743e6f89be6SPavel Begunkov struct io_hash_bucket *hb = &table->hbs[i]; 744329061d3SJens Axboe 74538513c46SHao Xu spin_lock(&hb->lock); 74638513c46SHao Xu hlist_for_each_entry(req, &hb->list, hash_node) { 747329061d3SJens Axboe if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && 748329061d3SJens Axboe req->file != cd->file) 749329061d3SJens Axboe continue; 750329061d3SJens Axboe if (cd->seq == req->work.cancel_seq) 751329061d3SJens Axboe continue; 752329061d3SJens Axboe req->work.cancel_seq = cd->seq; 7531ab1edb0SPavel Begunkov *out_bucket = hb; 754329061d3SJens Axboe return req; 755329061d3SJens Axboe } 75638513c46SHao Xu spin_unlock(&hb->lock); 757329061d3SJens Axboe } 758329061d3SJens Axboe return NULL; 759329061d3SJens Axboe } 760329061d3SJens Axboe 7619ca9fb24SPavel Begunkov static int io_poll_disarm(struct io_kiocb *req) 762329061d3SJens Axboe { 7639ca9fb24SPavel Begunkov if (!req) 7649ca9fb24SPavel Begunkov return -ENOENT; 765329061d3SJens Axboe if (!io_poll_get_ownership(req)) 7669ca9fb24SPavel Begunkov return -EALREADY; 767329061d3SJens Axboe io_poll_remove_entries(req); 768329061d3SJens Axboe hash_del(&req->hash_node); 7699ca9fb24SPavel Begunkov return 0; 770329061d3SJens Axboe } 771329061d3SJens Axboe 772a2cdd519SPavel Begunkov static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 773e6f89be6SPavel Begunkov struct io_hash_table *table) 774329061d3SJens Axboe { 7751ab1edb0SPavel Begunkov struct io_hash_bucket *bucket; 776329061d3SJens Axboe struct io_kiocb *req; 777329061d3SJens Axboe 778329061d3SJens Axboe if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY)) 779e6f89be6SPavel Begunkov req = io_poll_file_find(ctx, cd, table, &bucket); 780329061d3SJens Axboe else 781e6f89be6SPavel Begunkov req = io_poll_find(ctx, false, cd, table, &bucket); 7821ab1edb0SPavel Begunkov 7831ab1edb0SPavel Begunkov if (req) 784329061d3SJens Axboe io_poll_cancel_req(req); 7851ab1edb0SPavel Begunkov if (bucket) 7861ab1edb0SPavel Begunkov spin_unlock(&bucket->lock); 7871ab1edb0SPavel Begunkov return req ? 0 : -ENOENT; 788329061d3SJens Axboe } 789329061d3SJens Axboe 7905d7943d9SPavel Begunkov int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 7915d7943d9SPavel Begunkov unsigned issue_flags) 792a2cdd519SPavel Begunkov { 7939ca9fb24SPavel Begunkov int ret; 7949ca9fb24SPavel Begunkov 7959ca9fb24SPavel Begunkov ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table); 7969ca9fb24SPavel Begunkov if (ret != -ENOENT) 7979ca9fb24SPavel Begunkov return ret; 7989ca9fb24SPavel Begunkov 7999ca9fb24SPavel Begunkov io_ring_submit_lock(ctx, issue_flags); 8009ca9fb24SPavel Begunkov ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table_locked); 8019ca9fb24SPavel Begunkov io_ring_submit_unlock(ctx, issue_flags); 8029ca9fb24SPavel Begunkov return ret; 803a2cdd519SPavel Begunkov } 804a2cdd519SPavel Begunkov 805329061d3SJens Axboe static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, 806329061d3SJens Axboe unsigned int flags) 807329061d3SJens Axboe { 808329061d3SJens Axboe u32 events; 809329061d3SJens Axboe 810329061d3SJens Axboe events = READ_ONCE(sqe->poll32_events); 811329061d3SJens Axboe #ifdef __BIG_ENDIAN 812329061d3SJens Axboe events = swahw32(events); 813329061d3SJens Axboe #endif 814329061d3SJens Axboe if (!(flags & IORING_POLL_ADD_MULTI)) 815329061d3SJens Axboe events |= EPOLLONESHOT; 816b9ba8a44SJens Axboe if (!(flags & IORING_POLL_ADD_LEVEL)) 817b9ba8a44SJens Axboe events |= EPOLLET; 818b9ba8a44SJens Axboe return demangle_poll(events) | 819b9ba8a44SJens Axboe (events & (EPOLLEXCLUSIVE|EPOLLONESHOT|EPOLLET)); 820329061d3SJens Axboe } 821329061d3SJens Axboe 822329061d3SJens Axboe int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 823329061d3SJens Axboe { 824329061d3SJens Axboe struct io_poll_update *upd = io_kiocb_to_cmd(req); 825329061d3SJens Axboe u32 flags; 826329061d3SJens Axboe 827329061d3SJens Axboe if (sqe->buf_index || sqe->splice_fd_in) 828329061d3SJens Axboe return -EINVAL; 829329061d3SJens Axboe flags = READ_ONCE(sqe->len); 830329061d3SJens Axboe if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA | 831329061d3SJens Axboe IORING_POLL_ADD_MULTI)) 832329061d3SJens Axboe return -EINVAL; 833329061d3SJens Axboe /* meaningless without update */ 834329061d3SJens Axboe if (flags == IORING_POLL_ADD_MULTI) 835329061d3SJens Axboe return -EINVAL; 836329061d3SJens Axboe 837329061d3SJens Axboe upd->old_user_data = READ_ONCE(sqe->addr); 838329061d3SJens Axboe upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; 839329061d3SJens Axboe upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA; 840329061d3SJens Axboe 841329061d3SJens Axboe upd->new_user_data = READ_ONCE(sqe->off); 842329061d3SJens Axboe if (!upd->update_user_data && upd->new_user_data) 843329061d3SJens Axboe return -EINVAL; 844329061d3SJens Axboe if (upd->update_events) 845329061d3SJens Axboe upd->events = io_poll_parse_events(sqe, flags); 846329061d3SJens Axboe else if (sqe->poll32_events) 847329061d3SJens Axboe return -EINVAL; 848329061d3SJens Axboe 849329061d3SJens Axboe return 0; 850329061d3SJens Axboe } 851329061d3SJens Axboe 852329061d3SJens Axboe int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 853329061d3SJens Axboe { 854329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(req); 855329061d3SJens Axboe u32 flags; 856329061d3SJens Axboe 857329061d3SJens Axboe if (sqe->buf_index || sqe->off || sqe->addr) 858329061d3SJens Axboe return -EINVAL; 859329061d3SJens Axboe flags = READ_ONCE(sqe->len); 860b9ba8a44SJens Axboe if (flags & ~(IORING_POLL_ADD_MULTI|IORING_POLL_ADD_LEVEL)) 861329061d3SJens Axboe return -EINVAL; 862329061d3SJens Axboe if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP)) 863329061d3SJens Axboe return -EINVAL; 864329061d3SJens Axboe 865329061d3SJens Axboe poll->events = io_poll_parse_events(sqe, flags); 866329061d3SJens Axboe return 0; 867329061d3SJens Axboe } 868329061d3SJens Axboe 869329061d3SJens Axboe int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) 870329061d3SJens Axboe { 871329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(req); 872329061d3SJens Axboe struct io_poll_table ipt; 873329061d3SJens Axboe int ret; 874329061d3SJens Axboe 875329061d3SJens Axboe ipt.pt._qproc = io_poll_queue_proc; 876329061d3SJens Axboe 8779ca9fb24SPavel Begunkov /* 8789ca9fb24SPavel Begunkov * If sqpoll or single issuer, there is no contention for ->uring_lock 8799ca9fb24SPavel Begunkov * and we'll end up holding it in tw handlers anyway. 8809ca9fb24SPavel Begunkov */ 881e8375e43SPavel Begunkov if (req->ctx->flags & (IORING_SETUP_SQPOLL|IORING_SETUP_SINGLE_ISSUER)) 8829ca9fb24SPavel Begunkov req->flags |= REQ_F_HASH_LOCKED; 8839ca9fb24SPavel Begunkov 88449f1c68eSPavel Begunkov ret = __io_arm_poll_handler(req, poll, &ipt, poll->events, issue_flags); 885de08356fSPavel Begunkov if (ret > 0) { 886063a0079SPavel Begunkov io_req_set_res(req, ipt.result_mask, 0); 887329061d3SJens Axboe return IOU_OK; 888329061d3SJens Axboe } 889de08356fSPavel Begunkov return ret ?: IOU_ISSUE_SKIP_COMPLETE; 890329061d3SJens Axboe } 891329061d3SJens Axboe 892329061d3SJens Axboe int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) 893329061d3SJens Axboe { 894329061d3SJens Axboe struct io_poll_update *poll_update = io_kiocb_to_cmd(req); 895329061d3SJens Axboe struct io_cancel_data cd = { .data = poll_update->old_user_data, }; 896329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 8971ab1edb0SPavel Begunkov struct io_hash_bucket *bucket; 898329061d3SJens Axboe struct io_kiocb *preq; 899329061d3SJens Axboe int ret2, ret = 0; 900329061d3SJens Axboe bool locked; 901329061d3SJens Axboe 902e6f89be6SPavel Begunkov preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket); 9031ab1edb0SPavel Begunkov ret2 = io_poll_disarm(preq); 9041ab1edb0SPavel Begunkov if (bucket) 9051ab1edb0SPavel Begunkov spin_unlock(&bucket->lock); 9069ca9fb24SPavel Begunkov if (!ret2) 9079ca9fb24SPavel Begunkov goto found; 9089ca9fb24SPavel Begunkov if (ret2 != -ENOENT) { 9099ca9fb24SPavel Begunkov ret = ret2; 91038513c46SHao Xu goto out; 91138513c46SHao Xu } 912329061d3SJens Axboe 9139ca9fb24SPavel Begunkov io_ring_submit_lock(ctx, issue_flags); 9149ca9fb24SPavel Begunkov preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table_locked, &bucket); 9159ca9fb24SPavel Begunkov ret2 = io_poll_disarm(preq); 9169ca9fb24SPavel Begunkov if (bucket) 9179ca9fb24SPavel Begunkov spin_unlock(&bucket->lock); 9189ca9fb24SPavel Begunkov io_ring_submit_unlock(ctx, issue_flags); 9199ca9fb24SPavel Begunkov if (ret2) { 9209ca9fb24SPavel Begunkov ret = ret2; 9219ca9fb24SPavel Begunkov goto out; 9229ca9fb24SPavel Begunkov } 9239ca9fb24SPavel Begunkov 9249ca9fb24SPavel Begunkov found: 925bce5d70cSPavel Begunkov if (WARN_ON_ONCE(preq->opcode != IORING_OP_POLL_ADD)) { 926bce5d70cSPavel Begunkov ret = -EFAULT; 927bce5d70cSPavel Begunkov goto out; 928bce5d70cSPavel Begunkov } 929bce5d70cSPavel Begunkov 930329061d3SJens Axboe if (poll_update->update_events || poll_update->update_user_data) { 931329061d3SJens Axboe /* only mask one event flags, keep behavior flags */ 932329061d3SJens Axboe if (poll_update->update_events) { 933329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(preq); 934329061d3SJens Axboe 935329061d3SJens Axboe poll->events &= ~0xffff; 936329061d3SJens Axboe poll->events |= poll_update->events & 0xffff; 937329061d3SJens Axboe poll->events |= IO_POLL_UNMASK; 938329061d3SJens Axboe } 939329061d3SJens Axboe if (poll_update->update_user_data) 940329061d3SJens Axboe preq->cqe.user_data = poll_update->new_user_data; 941329061d3SJens Axboe 942329061d3SJens Axboe ret2 = io_poll_add(preq, issue_flags); 943329061d3SJens Axboe /* successfully updated, don't complete poll request */ 944329061d3SJens Axboe if (!ret2 || ret2 == -EIOCBQUEUED) 945329061d3SJens Axboe goto out; 946329061d3SJens Axboe } 947329061d3SJens Axboe 948329061d3SJens Axboe req_set_fail(preq); 949329061d3SJens Axboe io_req_set_res(preq, -ECANCELED, 0); 950329061d3SJens Axboe locked = !(issue_flags & IO_URING_F_UNLOCKED); 951329061d3SJens Axboe io_req_task_complete(preq, &locked); 952329061d3SJens Axboe out: 953329061d3SJens Axboe if (ret < 0) { 954329061d3SJens Axboe req_set_fail(req); 955329061d3SJens Axboe return ret; 956329061d3SJens Axboe } 957329061d3SJens Axboe /* complete update request, we're done with it */ 958329061d3SJens Axboe io_req_set_res(req, ret, 0); 959329061d3SJens Axboe return IOU_OK; 960329061d3SJens Axboe } 9619da7471eSJens Axboe 962*9b797a37SJens Axboe void io_apoll_cache_free(struct io_cache_entry *entry) 9639da7471eSJens Axboe { 964*9b797a37SJens Axboe kfree(container_of(entry, struct async_poll, cache)); 9659da7471eSJens Axboe } 966