1329061d3SJens Axboe // SPDX-License-Identifier: GPL-2.0 2329061d3SJens Axboe #include <linux/kernel.h> 3329061d3SJens Axboe #include <linux/errno.h> 4329061d3SJens Axboe #include <linux/fs.h> 5329061d3SJens Axboe #include <linux/file.h> 6329061d3SJens Axboe #include <linux/mm.h> 7329061d3SJens Axboe #include <linux/slab.h> 8329061d3SJens Axboe #include <linux/poll.h> 9329061d3SJens Axboe #include <linux/hashtable.h> 10329061d3SJens Axboe #include <linux/io_uring.h> 11329061d3SJens Axboe 12329061d3SJens Axboe #include <trace/events/io_uring.h> 13329061d3SJens Axboe 14329061d3SJens Axboe #include <uapi/linux/io_uring.h> 15329061d3SJens Axboe 16329061d3SJens Axboe #include "io_uring.h" 17329061d3SJens Axboe #include "refs.h" 18329061d3SJens Axboe #include "opdef.h" 193b77495aSJens Axboe #include "kbuf.h" 20329061d3SJens Axboe #include "poll.h" 2138513c46SHao Xu #include "cancel.h" 22329061d3SJens Axboe 23329061d3SJens Axboe struct io_poll_update { 24329061d3SJens Axboe struct file *file; 25329061d3SJens Axboe u64 old_user_data; 26329061d3SJens Axboe u64 new_user_data; 27329061d3SJens Axboe __poll_t events; 28329061d3SJens Axboe bool update_events; 29329061d3SJens Axboe bool update_user_data; 30329061d3SJens Axboe }; 31329061d3SJens Axboe 32329061d3SJens Axboe struct io_poll_table { 33329061d3SJens Axboe struct poll_table_struct pt; 34329061d3SJens Axboe struct io_kiocb *req; 35329061d3SJens Axboe int nr_entries; 36329061d3SJens Axboe int error; 3749f1c68eSPavel Begunkov bool owning; 38063a0079SPavel Begunkov /* output value, set only if arm poll returns >0 */ 39063a0079SPavel Begunkov __poll_t result_mask; 40329061d3SJens Axboe }; 41329061d3SJens Axboe 42329061d3SJens Axboe #define IO_POLL_CANCEL_FLAG BIT(31) 43329061d3SJens Axboe #define IO_POLL_REF_MASK GENMASK(30, 0) 44329061d3SJens Axboe 450638cd7bSPavel Begunkov #define IO_WQE_F_DOUBLE 1 460638cd7bSPavel Begunkov 470638cd7bSPavel Begunkov static inline struct io_kiocb *wqe_to_req(struct wait_queue_entry *wqe) 480638cd7bSPavel Begunkov { 490638cd7bSPavel Begunkov unsigned long priv = (unsigned long)wqe->private; 500638cd7bSPavel Begunkov 510638cd7bSPavel Begunkov return (struct io_kiocb *)(priv & ~IO_WQE_F_DOUBLE); 520638cd7bSPavel Begunkov } 530638cd7bSPavel Begunkov 540638cd7bSPavel Begunkov static inline bool wqe_is_double(struct wait_queue_entry *wqe) 550638cd7bSPavel Begunkov { 560638cd7bSPavel Begunkov unsigned long priv = (unsigned long)wqe->private; 570638cd7bSPavel Begunkov 580638cd7bSPavel Begunkov return priv & IO_WQE_F_DOUBLE; 590638cd7bSPavel Begunkov } 600638cd7bSPavel Begunkov 61329061d3SJens Axboe /* 62329061d3SJens Axboe * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can 63329061d3SJens Axboe * bump it and acquire ownership. It's disallowed to modify requests while not 64329061d3SJens Axboe * owning it, that prevents from races for enqueueing task_work's and b/w 65329061d3SJens Axboe * arming poll and wakeups. 66329061d3SJens Axboe */ 67329061d3SJens Axboe static inline bool io_poll_get_ownership(struct io_kiocb *req) 68329061d3SJens Axboe { 69329061d3SJens Axboe return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); 70329061d3SJens Axboe } 71329061d3SJens Axboe 72329061d3SJens Axboe static void io_poll_mark_cancelled(struct io_kiocb *req) 73329061d3SJens Axboe { 74329061d3SJens Axboe atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); 75329061d3SJens Axboe } 76329061d3SJens Axboe 77329061d3SJens Axboe static struct io_poll *io_poll_get_double(struct io_kiocb *req) 78329061d3SJens Axboe { 79329061d3SJens Axboe /* pure poll stashes this in ->async_data, poll driven retry elsewhere */ 80329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 81329061d3SJens Axboe return req->async_data; 82329061d3SJens Axboe return req->apoll->double_poll; 83329061d3SJens Axboe } 84329061d3SJens Axboe 85329061d3SJens Axboe static struct io_poll *io_poll_get_single(struct io_kiocb *req) 86329061d3SJens Axboe { 87329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 88329061d3SJens Axboe return io_kiocb_to_cmd(req); 89329061d3SJens Axboe return &req->apoll->poll; 90329061d3SJens Axboe } 91329061d3SJens Axboe 92329061d3SJens Axboe static void io_poll_req_insert(struct io_kiocb *req) 93329061d3SJens Axboe { 94e6f89be6SPavel Begunkov struct io_hash_table *table = &req->ctx->cancel_table; 95e6f89be6SPavel Begunkov u32 index = hash_long(req->cqe.user_data, table->hash_bits); 96e6f89be6SPavel Begunkov struct io_hash_bucket *hb = &table->hbs[index]; 97329061d3SJens Axboe 9838513c46SHao Xu spin_lock(&hb->lock); 9938513c46SHao Xu hlist_add_head(&req->hash_node, &hb->list); 10038513c46SHao Xu spin_unlock(&hb->lock); 10138513c46SHao Xu } 10238513c46SHao Xu 10338513c46SHao Xu static void io_poll_req_delete(struct io_kiocb *req, struct io_ring_ctx *ctx) 10438513c46SHao Xu { 105e6f89be6SPavel Begunkov struct io_hash_table *table = &req->ctx->cancel_table; 106e6f89be6SPavel Begunkov u32 index = hash_long(req->cqe.user_data, table->hash_bits); 107e6f89be6SPavel Begunkov spinlock_t *lock = &table->hbs[index].lock; 10838513c46SHao Xu 10938513c46SHao Xu spin_lock(lock); 11038513c46SHao Xu hash_del(&req->hash_node); 11138513c46SHao Xu spin_unlock(lock); 112329061d3SJens Axboe } 113329061d3SJens Axboe 1149ca9fb24SPavel Begunkov static void io_poll_req_insert_locked(struct io_kiocb *req) 1159ca9fb24SPavel Begunkov { 1169ca9fb24SPavel Begunkov struct io_hash_table *table = &req->ctx->cancel_table_locked; 1179ca9fb24SPavel Begunkov u32 index = hash_long(req->cqe.user_data, table->hash_bits); 1189ca9fb24SPavel Begunkov 1199ca9fb24SPavel Begunkov hlist_add_head(&req->hash_node, &table->hbs[index].list); 1209ca9fb24SPavel Begunkov } 1219ca9fb24SPavel Begunkov 1229ca9fb24SPavel Begunkov static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked) 1239ca9fb24SPavel Begunkov { 1249ca9fb24SPavel Begunkov struct io_ring_ctx *ctx = req->ctx; 1259ca9fb24SPavel Begunkov 1269ca9fb24SPavel Begunkov if (req->flags & REQ_F_HASH_LOCKED) { 1279ca9fb24SPavel Begunkov /* 1289ca9fb24SPavel Begunkov * ->cancel_table_locked is protected by ->uring_lock in 1299ca9fb24SPavel Begunkov * contrast to per bucket spinlocks. Likely, tctx_task_work() 1309ca9fb24SPavel Begunkov * already grabbed the mutex for us, but there is a chance it 1319ca9fb24SPavel Begunkov * failed. 1329ca9fb24SPavel Begunkov */ 1339ca9fb24SPavel Begunkov io_tw_lock(ctx, locked); 1349ca9fb24SPavel Begunkov hash_del(&req->hash_node); 135*b21a51e2SPavel Begunkov req->flags &= ~REQ_F_HASH_LOCKED; 1369ca9fb24SPavel Begunkov } else { 1379ca9fb24SPavel Begunkov io_poll_req_delete(req, ctx); 1389ca9fb24SPavel Begunkov } 1399ca9fb24SPavel Begunkov } 1409ca9fb24SPavel Begunkov 141329061d3SJens Axboe static void io_init_poll_iocb(struct io_poll *poll, __poll_t events, 142329061d3SJens Axboe wait_queue_func_t wake_func) 143329061d3SJens Axboe { 144329061d3SJens Axboe poll->head = NULL; 145329061d3SJens Axboe #define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) 146329061d3SJens Axboe /* mask in events that we always want/need */ 147329061d3SJens Axboe poll->events = events | IO_POLL_UNMASK; 148329061d3SJens Axboe INIT_LIST_HEAD(&poll->wait.entry); 149329061d3SJens Axboe init_waitqueue_func_entry(&poll->wait, wake_func); 150329061d3SJens Axboe } 151329061d3SJens Axboe 152329061d3SJens Axboe static inline void io_poll_remove_entry(struct io_poll *poll) 153329061d3SJens Axboe { 154329061d3SJens Axboe struct wait_queue_head *head = smp_load_acquire(&poll->head); 155329061d3SJens Axboe 156329061d3SJens Axboe if (head) { 157329061d3SJens Axboe spin_lock_irq(&head->lock); 158329061d3SJens Axboe list_del_init(&poll->wait.entry); 159329061d3SJens Axboe poll->head = NULL; 160329061d3SJens Axboe spin_unlock_irq(&head->lock); 161329061d3SJens Axboe } 162329061d3SJens Axboe } 163329061d3SJens Axboe 164329061d3SJens Axboe static void io_poll_remove_entries(struct io_kiocb *req) 165329061d3SJens Axboe { 166329061d3SJens Axboe /* 167329061d3SJens Axboe * Nothing to do if neither of those flags are set. Avoid dipping 168329061d3SJens Axboe * into the poll/apoll/double cachelines if we can. 169329061d3SJens Axboe */ 170329061d3SJens Axboe if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL))) 171329061d3SJens Axboe return; 172329061d3SJens Axboe 173329061d3SJens Axboe /* 174329061d3SJens Axboe * While we hold the waitqueue lock and the waitqueue is nonempty, 175329061d3SJens Axboe * wake_up_pollfree() will wait for us. However, taking the waitqueue 176329061d3SJens Axboe * lock in the first place can race with the waitqueue being freed. 177329061d3SJens Axboe * 178329061d3SJens Axboe * We solve this as eventpoll does: by taking advantage of the fact that 179329061d3SJens Axboe * all users of wake_up_pollfree() will RCU-delay the actual free. If 180329061d3SJens Axboe * we enter rcu_read_lock() and see that the pointer to the queue is 181329061d3SJens Axboe * non-NULL, we can then lock it without the memory being freed out from 182329061d3SJens Axboe * under us. 183329061d3SJens Axboe * 184329061d3SJens Axboe * Keep holding rcu_read_lock() as long as we hold the queue lock, in 185329061d3SJens Axboe * case the caller deletes the entry from the queue, leaving it empty. 186329061d3SJens Axboe * In that case, only RCU prevents the queue memory from being freed. 187329061d3SJens Axboe */ 188329061d3SJens Axboe rcu_read_lock(); 189329061d3SJens Axboe if (req->flags & REQ_F_SINGLE_POLL) 190329061d3SJens Axboe io_poll_remove_entry(io_poll_get_single(req)); 191329061d3SJens Axboe if (req->flags & REQ_F_DOUBLE_POLL) 192329061d3SJens Axboe io_poll_remove_entry(io_poll_get_double(req)); 193329061d3SJens Axboe rcu_read_unlock(); 194329061d3SJens Axboe } 195329061d3SJens Axboe 1962ba69707SDylan Yudaken enum { 1972ba69707SDylan Yudaken IOU_POLL_DONE = 0, 1982ba69707SDylan Yudaken IOU_POLL_NO_ACTION = 1, 199114eccdfSDylan Yudaken IOU_POLL_REMOVE_POLL_USE_RES = 2, 2002ba69707SDylan Yudaken }; 2012ba69707SDylan Yudaken 202329061d3SJens Axboe /* 203329061d3SJens Axboe * All poll tw should go through this. Checks for poll events, manages 204329061d3SJens Axboe * references, does rewait, etc. 205329061d3SJens Axboe * 2062ba69707SDylan Yudaken * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action require, 2072ba69707SDylan Yudaken * which is either spurious wakeup or multishot CQE is served. 2082ba69707SDylan Yudaken * IOU_POLL_DONE when it's done with the request, then the mask is stored in req->cqe.res. 209114eccdfSDylan Yudaken * IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot poll and that the result 210114eccdfSDylan Yudaken * is stored in req->cqe. 211329061d3SJens Axboe */ 212329061d3SJens Axboe static int io_poll_check_events(struct io_kiocb *req, bool *locked) 213329061d3SJens Axboe { 214329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 215329061d3SJens Axboe int v, ret; 216329061d3SJens Axboe 217329061d3SJens Axboe /* req->task == current here, checking PF_EXITING is safe */ 218329061d3SJens Axboe if (unlikely(req->task->flags & PF_EXITING)) 219329061d3SJens Axboe return -ECANCELED; 220329061d3SJens Axboe 221329061d3SJens Axboe do { 222329061d3SJens Axboe v = atomic_read(&req->poll_refs); 223329061d3SJens Axboe 224329061d3SJens Axboe /* tw handler should be the owner, and so have some references */ 225329061d3SJens Axboe if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) 2262ba69707SDylan Yudaken return IOU_POLL_DONE; 227329061d3SJens Axboe if (v & IO_POLL_CANCEL_FLAG) 228329061d3SJens Axboe return -ECANCELED; 229329061d3SJens Axboe 2302ba69707SDylan Yudaken /* the mask was stashed in __io_poll_execute */ 231329061d3SJens Axboe if (!req->cqe.res) { 232329061d3SJens Axboe struct poll_table_struct pt = { ._key = req->apoll_events }; 233329061d3SJens Axboe req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; 234329061d3SJens Axboe } 235329061d3SJens Axboe 236329061d3SJens Axboe if ((unlikely(!req->cqe.res))) 237329061d3SJens Axboe continue; 238329061d3SJens Axboe if (req->apoll_events & EPOLLONESHOT) 2392ba69707SDylan Yudaken return IOU_POLL_DONE; 240329061d3SJens Axboe 241329061d3SJens Axboe /* multishot, just fill a CQE and proceed */ 242329061d3SJens Axboe if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 243329061d3SJens Axboe __poll_t mask = mangle_poll(req->cqe.res & 244329061d3SJens Axboe req->apoll_events); 245329061d3SJens Axboe 246d245bca6SPavel Begunkov if (!io_post_aux_cqe(ctx, req->cqe.user_data, 247a2da6763SDylan Yudaken mask, IORING_CQE_F_MORE, false)) { 248a2da6763SDylan Yudaken io_req_set_res(req, mask, 0); 249a2da6763SDylan Yudaken return IOU_POLL_REMOVE_POLL_USE_RES; 250a2da6763SDylan Yudaken } 251d245bca6SPavel Begunkov } else { 252329061d3SJens Axboe ret = io_poll_issue(req, locked); 253114eccdfSDylan Yudaken if (ret == IOU_STOP_MULTISHOT) 254114eccdfSDylan Yudaken return IOU_POLL_REMOVE_POLL_USE_RES; 2552ba69707SDylan Yudaken if (ret < 0) 256329061d3SJens Axboe return ret; 257d245bca6SPavel Begunkov } 258329061d3SJens Axboe 259329061d3SJens Axboe /* 260329061d3SJens Axboe * Release all references, retry if someone tried to restart 261329061d3SJens Axboe * task_work while we were executing it. 262329061d3SJens Axboe */ 263329061d3SJens Axboe } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); 264329061d3SJens Axboe 2652ba69707SDylan Yudaken return IOU_POLL_NO_ACTION; 266329061d3SJens Axboe } 267329061d3SJens Axboe 268329061d3SJens Axboe static void io_poll_task_func(struct io_kiocb *req, bool *locked) 269329061d3SJens Axboe { 270329061d3SJens Axboe int ret; 271329061d3SJens Axboe 272329061d3SJens Axboe ret = io_poll_check_events(req, locked); 2732ba69707SDylan Yudaken if (ret == IOU_POLL_NO_ACTION) 274329061d3SJens Axboe return; 275329061d3SJens Axboe 2762ba69707SDylan Yudaken if (ret == IOU_POLL_DONE) { 277329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(req); 278329061d3SJens Axboe req->cqe.res = mangle_poll(req->cqe.res & poll->events); 279114eccdfSDylan Yudaken } else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) { 280329061d3SJens Axboe req->cqe.res = ret; 281329061d3SJens Axboe req_set_fail(req); 282329061d3SJens Axboe } 283329061d3SJens Axboe 284329061d3SJens Axboe io_poll_remove_entries(req); 2859ca9fb24SPavel Begunkov io_poll_tw_hash_eject(req, locked); 2869ca9fb24SPavel Begunkov 2870ec6dca2SPavel Begunkov io_req_set_res(req, req->cqe.res, 0); 2880ec6dca2SPavel Begunkov io_req_task_complete(req, locked); 289329061d3SJens Axboe } 290329061d3SJens Axboe 291329061d3SJens Axboe static void io_apoll_task_func(struct io_kiocb *req, bool *locked) 292329061d3SJens Axboe { 293329061d3SJens Axboe int ret; 294329061d3SJens Axboe 295329061d3SJens Axboe ret = io_poll_check_events(req, locked); 2962ba69707SDylan Yudaken if (ret == IOU_POLL_NO_ACTION) 297329061d3SJens Axboe return; 298329061d3SJens Axboe 299329061d3SJens Axboe io_poll_remove_entries(req); 3009ca9fb24SPavel Begunkov io_poll_tw_hash_eject(req, locked); 301329061d3SJens Axboe 302114eccdfSDylan Yudaken if (ret == IOU_POLL_REMOVE_POLL_USE_RES) 303114eccdfSDylan Yudaken io_req_complete_post(req); 304114eccdfSDylan Yudaken else if (ret == IOU_POLL_DONE) 305329061d3SJens Axboe io_req_task_submit(req, locked); 306329061d3SJens Axboe else 307329061d3SJens Axboe io_req_complete_failed(req, ret); 308329061d3SJens Axboe } 309329061d3SJens Axboe 31013a99017SPavel Begunkov static void __io_poll_execute(struct io_kiocb *req, int mask) 311329061d3SJens Axboe { 312329061d3SJens Axboe io_req_set_res(req, mask, 0); 313329061d3SJens Axboe /* 314329061d3SJens Axboe * This is useful for poll that is armed on behalf of another 315329061d3SJens Axboe * request, and where the wakeup path could be on a different 316329061d3SJens Axboe * CPU. We want to avoid pulling in req->apoll->events for that 317329061d3SJens Axboe * case. 318329061d3SJens Axboe */ 319329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 320329061d3SJens Axboe req->io_task_work.func = io_poll_task_func; 321329061d3SJens Axboe else 322329061d3SJens Axboe req->io_task_work.func = io_apoll_task_func; 323329061d3SJens Axboe 32448863ffdSPavel Begunkov trace_io_uring_task_add(req, mask); 325329061d3SJens Axboe io_req_task_work_add(req); 326329061d3SJens Axboe } 327329061d3SJens Axboe 32813a99017SPavel Begunkov static inline void io_poll_execute(struct io_kiocb *req, int res) 329329061d3SJens Axboe { 330329061d3SJens Axboe if (io_poll_get_ownership(req)) 33113a99017SPavel Begunkov __io_poll_execute(req, res); 332329061d3SJens Axboe } 333329061d3SJens Axboe 334329061d3SJens Axboe static void io_poll_cancel_req(struct io_kiocb *req) 335329061d3SJens Axboe { 336329061d3SJens Axboe io_poll_mark_cancelled(req); 337329061d3SJens Axboe /* kick tw, which should complete the request */ 33813a99017SPavel Begunkov io_poll_execute(req, 0); 339329061d3SJens Axboe } 340329061d3SJens Axboe 341329061d3SJens Axboe #define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI) 342329061d3SJens Axboe 343fe991a76SJens Axboe static __cold int io_pollfree_wake(struct io_kiocb *req, struct io_poll *poll) 344329061d3SJens Axboe { 345329061d3SJens Axboe io_poll_mark_cancelled(req); 346329061d3SJens Axboe /* we have to kick tw in case it's not already */ 34713a99017SPavel Begunkov io_poll_execute(req, 0); 348329061d3SJens Axboe 349329061d3SJens Axboe /* 350329061d3SJens Axboe * If the waitqueue is being freed early but someone is already 351329061d3SJens Axboe * holds ownership over it, we have to tear down the request as 352329061d3SJens Axboe * best we can. That means immediately removing the request from 353329061d3SJens Axboe * its waitqueue and preventing all further accesses to the 354329061d3SJens Axboe * waitqueue via the request. 355329061d3SJens Axboe */ 356329061d3SJens Axboe list_del_init(&poll->wait.entry); 357329061d3SJens Axboe 358329061d3SJens Axboe /* 359329061d3SJens Axboe * Careful: this *must* be the last step, since as soon 360329061d3SJens Axboe * as req->head is NULL'ed out, the request can be 361329061d3SJens Axboe * completed and freed, since aio_poll_complete_work() 362329061d3SJens Axboe * will no longer need to take the waitqueue lock. 363329061d3SJens Axboe */ 364329061d3SJens Axboe smp_store_release(&poll->head, NULL); 365329061d3SJens Axboe return 1; 366329061d3SJens Axboe } 367329061d3SJens Axboe 368fe991a76SJens Axboe static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 369fe991a76SJens Axboe void *key) 370fe991a76SJens Axboe { 371fe991a76SJens Axboe struct io_kiocb *req = wqe_to_req(wait); 372fe991a76SJens Axboe struct io_poll *poll = container_of(wait, struct io_poll, wait); 373fe991a76SJens Axboe __poll_t mask = key_to_poll(key); 374fe991a76SJens Axboe 375fe991a76SJens Axboe if (unlikely(mask & POLLFREE)) 376fe991a76SJens Axboe return io_pollfree_wake(req, poll); 377fe991a76SJens Axboe 378329061d3SJens Axboe /* for instances that support it check for an event match first */ 379329061d3SJens Axboe if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON))) 380329061d3SJens Axboe return 0; 381329061d3SJens Axboe 382329061d3SJens Axboe if (io_poll_get_ownership(req)) { 383329061d3SJens Axboe /* optional, saves extra locking for removal in tw handler */ 384329061d3SJens Axboe if (mask && poll->events & EPOLLONESHOT) { 385329061d3SJens Axboe list_del_init(&poll->wait.entry); 386329061d3SJens Axboe poll->head = NULL; 387329061d3SJens Axboe if (wqe_is_double(wait)) 388329061d3SJens Axboe req->flags &= ~REQ_F_DOUBLE_POLL; 389329061d3SJens Axboe else 390329061d3SJens Axboe req->flags &= ~REQ_F_SINGLE_POLL; 391329061d3SJens Axboe } 39213a99017SPavel Begunkov __io_poll_execute(req, mask); 393329061d3SJens Axboe } 394329061d3SJens Axboe return 1; 395329061d3SJens Axboe } 396329061d3SJens Axboe 39749f1c68eSPavel Begunkov static void io_poll_double_prepare(struct io_kiocb *req) 39849f1c68eSPavel Begunkov { 39949f1c68eSPavel Begunkov struct wait_queue_head *head; 40049f1c68eSPavel Begunkov struct io_poll *poll = io_poll_get_single(req); 40149f1c68eSPavel Begunkov 40249f1c68eSPavel Begunkov /* head is RCU protected, see io_poll_remove_entries() comments */ 40349f1c68eSPavel Begunkov rcu_read_lock(); 40449f1c68eSPavel Begunkov head = smp_load_acquire(&poll->head); 40549f1c68eSPavel Begunkov /* 40649f1c68eSPavel Begunkov * poll arm may not hold ownership and so race with 40749f1c68eSPavel Begunkov * io_poll_wake() by modifying req->flags. There is only one 40849f1c68eSPavel Begunkov * poll entry queued, serialise with it by taking its head lock. 40949f1c68eSPavel Begunkov */ 4107a121cedSPavel Begunkov if (head) 41149f1c68eSPavel Begunkov spin_lock_irq(&head->lock); 4127a121cedSPavel Begunkov 41349f1c68eSPavel Begunkov req->flags |= REQ_F_DOUBLE_POLL; 414ceff5017SPavel Begunkov if (req->opcode == IORING_OP_POLL_ADD) 415ceff5017SPavel Begunkov req->flags |= REQ_F_ASYNC_DATA; 4167a121cedSPavel Begunkov 4177a121cedSPavel Begunkov if (head) 41849f1c68eSPavel Begunkov spin_unlock_irq(&head->lock); 41949f1c68eSPavel Begunkov rcu_read_unlock(); 42049f1c68eSPavel Begunkov } 42149f1c68eSPavel Begunkov 422329061d3SJens Axboe static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, 423329061d3SJens Axboe struct wait_queue_head *head, 424329061d3SJens Axboe struct io_poll **poll_ptr) 425329061d3SJens Axboe { 426329061d3SJens Axboe struct io_kiocb *req = pt->req; 427329061d3SJens Axboe unsigned long wqe_private = (unsigned long) req; 428329061d3SJens Axboe 429329061d3SJens Axboe /* 430329061d3SJens Axboe * The file being polled uses multiple waitqueues for poll handling 431329061d3SJens Axboe * (e.g. one for read, one for write). Setup a separate io_poll 432329061d3SJens Axboe * if this happens. 433329061d3SJens Axboe */ 434329061d3SJens Axboe if (unlikely(pt->nr_entries)) { 435329061d3SJens Axboe struct io_poll *first = poll; 436329061d3SJens Axboe 437329061d3SJens Axboe /* double add on the same waitqueue head, ignore */ 438329061d3SJens Axboe if (first->head == head) 439329061d3SJens Axboe return; 440329061d3SJens Axboe /* already have a 2nd entry, fail a third attempt */ 441329061d3SJens Axboe if (*poll_ptr) { 442329061d3SJens Axboe if ((*poll_ptr)->head == head) 443329061d3SJens Axboe return; 444329061d3SJens Axboe pt->error = -EINVAL; 445329061d3SJens Axboe return; 446329061d3SJens Axboe } 447329061d3SJens Axboe 448329061d3SJens Axboe poll = kmalloc(sizeof(*poll), GFP_ATOMIC); 449329061d3SJens Axboe if (!poll) { 450329061d3SJens Axboe pt->error = -ENOMEM; 451329061d3SJens Axboe return; 452329061d3SJens Axboe } 45349f1c68eSPavel Begunkov 454329061d3SJens Axboe /* mark as double wq entry */ 4550638cd7bSPavel Begunkov wqe_private |= IO_WQE_F_DOUBLE; 456329061d3SJens Axboe io_init_poll_iocb(poll, first->events, first->wait.func); 457ceff5017SPavel Begunkov io_poll_double_prepare(req); 458329061d3SJens Axboe *poll_ptr = poll; 45949f1c68eSPavel Begunkov } else { 46049f1c68eSPavel Begunkov /* fine to modify, there is no poll queued to race with us */ 46149f1c68eSPavel Begunkov req->flags |= REQ_F_SINGLE_POLL; 462329061d3SJens Axboe } 463329061d3SJens Axboe 464329061d3SJens Axboe pt->nr_entries++; 465329061d3SJens Axboe poll->head = head; 466329061d3SJens Axboe poll->wait.private = (void *) wqe_private; 467329061d3SJens Axboe 468329061d3SJens Axboe if (poll->events & EPOLLEXCLUSIVE) 469329061d3SJens Axboe add_wait_queue_exclusive(head, &poll->wait); 470329061d3SJens Axboe else 471329061d3SJens Axboe add_wait_queue(head, &poll->wait); 472329061d3SJens Axboe } 473329061d3SJens Axboe 474329061d3SJens Axboe static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, 475329061d3SJens Axboe struct poll_table_struct *p) 476329061d3SJens Axboe { 477329061d3SJens Axboe struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 478329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(pt->req); 479329061d3SJens Axboe 480329061d3SJens Axboe __io_queue_proc(poll, pt, head, 481329061d3SJens Axboe (struct io_poll **) &pt->req->async_data); 482329061d3SJens Axboe } 483329061d3SJens Axboe 48449f1c68eSPavel Begunkov static bool io_poll_can_finish_inline(struct io_kiocb *req, 48549f1c68eSPavel Begunkov struct io_poll_table *pt) 48649f1c68eSPavel Begunkov { 48749f1c68eSPavel Begunkov return pt->owning || io_poll_get_ownership(req); 48849f1c68eSPavel Begunkov } 48949f1c68eSPavel Begunkov 490de08356fSPavel Begunkov /* 491de08356fSPavel Begunkov * Returns 0 when it's handed over for polling. The caller owns the requests if 492de08356fSPavel Begunkov * it returns non-zero, but otherwise should not touch it. Negative values 493de08356fSPavel Begunkov * contain an error code. When the result is >0, the polling has completed 494de08356fSPavel Begunkov * inline and ipt.result_mask is set to the mask. 495de08356fSPavel Begunkov */ 496329061d3SJens Axboe static int __io_arm_poll_handler(struct io_kiocb *req, 497329061d3SJens Axboe struct io_poll *poll, 49849f1c68eSPavel Begunkov struct io_poll_table *ipt, __poll_t mask, 49949f1c68eSPavel Begunkov unsigned issue_flags) 500329061d3SJens Axboe { 501329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 502329061d3SJens Axboe int v; 503329061d3SJens Axboe 504329061d3SJens Axboe INIT_HLIST_NODE(&req->hash_node); 505329061d3SJens Axboe req->work.cancel_seq = atomic_read(&ctx->cancel_seq); 506329061d3SJens Axboe io_init_poll_iocb(poll, mask, io_poll_wake); 507329061d3SJens Axboe poll->file = req->file; 508329061d3SJens Axboe req->apoll_events = poll->events; 509329061d3SJens Axboe 510329061d3SJens Axboe ipt->pt._key = mask; 511329061d3SJens Axboe ipt->req = req; 512329061d3SJens Axboe ipt->error = 0; 513329061d3SJens Axboe ipt->nr_entries = 0; 514329061d3SJens Axboe /* 51549f1c68eSPavel Begunkov * Polling is either completed here or via task_work, so if we're in the 51649f1c68eSPavel Begunkov * task context we're naturally serialised with tw by merit of running 51749f1c68eSPavel Begunkov * the same task. When it's io-wq, take the ownership to prevent tw 51849f1c68eSPavel Begunkov * from running. However, when we're in the task context, skip taking 51949f1c68eSPavel Begunkov * it as an optimisation. 52049f1c68eSPavel Begunkov * 52149f1c68eSPavel Begunkov * Note: even though the request won't be completed/freed, without 52249f1c68eSPavel Begunkov * ownership we still can race with io_poll_wake(). 52349f1c68eSPavel Begunkov * io_poll_can_finish_inline() tries to deal with that. 524329061d3SJens Axboe */ 52549f1c68eSPavel Begunkov ipt->owning = issue_flags & IO_URING_F_UNLOCKED; 52649f1c68eSPavel Begunkov 52749f1c68eSPavel Begunkov atomic_set(&req->poll_refs, (int)ipt->owning); 528329061d3SJens Axboe mask = vfs_poll(req->file, &ipt->pt) & poll->events; 529329061d3SJens Axboe 530de08356fSPavel Begunkov if (unlikely(ipt->error || !ipt->nr_entries)) { 531de08356fSPavel Begunkov io_poll_remove_entries(req); 532de08356fSPavel Begunkov 53349f1c68eSPavel Begunkov if (!io_poll_can_finish_inline(req, ipt)) { 53449f1c68eSPavel Begunkov io_poll_mark_cancelled(req); 53549f1c68eSPavel Begunkov return 0; 53649f1c68eSPavel Begunkov } else if (mask && (poll->events & EPOLLET)) { 537de08356fSPavel Begunkov ipt->result_mask = mask; 538de08356fSPavel Begunkov return 1; 539de08356fSPavel Begunkov } 54049f1c68eSPavel Begunkov return ipt->error ?: -EINVAL; 541de08356fSPavel Begunkov } 542de08356fSPavel Begunkov 543b9ba8a44SJens Axboe if (mask && 544b9ba8a44SJens Axboe ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) { 54549f1c68eSPavel Begunkov if (!io_poll_can_finish_inline(req, ipt)) 54649f1c68eSPavel Begunkov return 0; 547329061d3SJens Axboe io_poll_remove_entries(req); 548063a0079SPavel Begunkov ipt->result_mask = mask; 549329061d3SJens Axboe /* no one else has access to the req, forget about the ref */ 550063a0079SPavel Begunkov return 1; 551329061d3SJens Axboe } 552b9ba8a44SJens Axboe 5539ca9fb24SPavel Begunkov if (req->flags & REQ_F_HASH_LOCKED) 5549ca9fb24SPavel Begunkov io_poll_req_insert_locked(req); 5559ca9fb24SPavel Begunkov else 556329061d3SJens Axboe io_poll_req_insert(req); 557329061d3SJens Axboe 55849f1c68eSPavel Begunkov if (mask && (poll->events & EPOLLET) && 55949f1c68eSPavel Begunkov io_poll_can_finish_inline(req, ipt)) { 56013a99017SPavel Begunkov __io_poll_execute(req, mask); 561329061d3SJens Axboe return 0; 562329061d3SJens Axboe } 563329061d3SJens Axboe 56449f1c68eSPavel Begunkov if (ipt->owning) { 565329061d3SJens Axboe /* 566329061d3SJens Axboe * Release ownership. If someone tried to queue a tw while it was 567329061d3SJens Axboe * locked, kick it off for them. 568329061d3SJens Axboe */ 569329061d3SJens Axboe v = atomic_dec_return(&req->poll_refs); 570329061d3SJens Axboe if (unlikely(v & IO_POLL_REF_MASK)) 57113a99017SPavel Begunkov __io_poll_execute(req, 0); 57249f1c68eSPavel Begunkov } 573329061d3SJens Axboe return 0; 574329061d3SJens Axboe } 575329061d3SJens Axboe 576329061d3SJens Axboe static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, 577329061d3SJens Axboe struct poll_table_struct *p) 578329061d3SJens Axboe { 579329061d3SJens Axboe struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 580329061d3SJens Axboe struct async_poll *apoll = pt->req->apoll; 581329061d3SJens Axboe 582329061d3SJens Axboe __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); 583329061d3SJens Axboe } 584329061d3SJens Axboe 5855204aa8cSPavel Begunkov static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req, 5865204aa8cSPavel Begunkov unsigned issue_flags) 5875204aa8cSPavel Begunkov { 5885204aa8cSPavel Begunkov struct io_ring_ctx *ctx = req->ctx; 5895204aa8cSPavel Begunkov struct async_poll *apoll; 5905204aa8cSPavel Begunkov 5915204aa8cSPavel Begunkov if (req->flags & REQ_F_POLLED) { 5925204aa8cSPavel Begunkov apoll = req->apoll; 5935204aa8cSPavel Begunkov kfree(apoll->double_poll); 5945204aa8cSPavel Begunkov } else if (!(issue_flags & IO_URING_F_UNLOCKED) && 5955204aa8cSPavel Begunkov !list_empty(&ctx->apoll_cache)) { 5965204aa8cSPavel Begunkov apoll = list_first_entry(&ctx->apoll_cache, struct async_poll, 5975204aa8cSPavel Begunkov poll.wait.entry); 5985204aa8cSPavel Begunkov list_del_init(&apoll->poll.wait.entry); 5995204aa8cSPavel Begunkov } else { 6005204aa8cSPavel Begunkov apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); 6015204aa8cSPavel Begunkov if (unlikely(!apoll)) 6025204aa8cSPavel Begunkov return NULL; 6035204aa8cSPavel Begunkov } 6045204aa8cSPavel Begunkov apoll->double_poll = NULL; 6055204aa8cSPavel Begunkov req->apoll = apoll; 6065204aa8cSPavel Begunkov return apoll; 6075204aa8cSPavel Begunkov } 6085204aa8cSPavel Begunkov 609329061d3SJens Axboe int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) 610329061d3SJens Axboe { 611329061d3SJens Axboe const struct io_op_def *def = &io_op_defs[req->opcode]; 612329061d3SJens Axboe struct async_poll *apoll; 613329061d3SJens Axboe struct io_poll_table ipt; 614b9ba8a44SJens Axboe __poll_t mask = POLLPRI | POLLERR | EPOLLET; 615329061d3SJens Axboe int ret; 616329061d3SJens Axboe 6179ca9fb24SPavel Begunkov /* 6189ca9fb24SPavel Begunkov * apoll requests already grab the mutex to complete in the tw handler, 6199ca9fb24SPavel Begunkov * so removal from the mutex-backed hash is free, use it by default. 6209ca9fb24SPavel Begunkov */ 621*b21a51e2SPavel Begunkov if (!(issue_flags & IO_URING_F_UNLOCKED)) 6229ca9fb24SPavel Begunkov req->flags |= REQ_F_HASH_LOCKED; 6239ca9fb24SPavel Begunkov 624329061d3SJens Axboe if (!def->pollin && !def->pollout) 625329061d3SJens Axboe return IO_APOLL_ABORTED; 626329061d3SJens Axboe if (!file_can_poll(req->file)) 627329061d3SJens Axboe return IO_APOLL_ABORTED; 628329061d3SJens Axboe if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED) 629329061d3SJens Axboe return IO_APOLL_ABORTED; 630329061d3SJens Axboe if (!(req->flags & REQ_F_APOLL_MULTISHOT)) 631329061d3SJens Axboe mask |= EPOLLONESHOT; 632329061d3SJens Axboe 633329061d3SJens Axboe if (def->pollin) { 634329061d3SJens Axboe mask |= EPOLLIN | EPOLLRDNORM; 635329061d3SJens Axboe 636329061d3SJens Axboe /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ 637329061d3SJens Axboe if (req->flags & REQ_F_CLEAR_POLLIN) 638329061d3SJens Axboe mask &= ~EPOLLIN; 639329061d3SJens Axboe } else { 640329061d3SJens Axboe mask |= EPOLLOUT | EPOLLWRNORM; 641329061d3SJens Axboe } 642329061d3SJens Axboe if (def->poll_exclusive) 643329061d3SJens Axboe mask |= EPOLLEXCLUSIVE; 6445204aa8cSPavel Begunkov 6455204aa8cSPavel Begunkov apoll = io_req_alloc_apoll(req, issue_flags); 6465204aa8cSPavel Begunkov if (!apoll) 647329061d3SJens Axboe return IO_APOLL_ABORTED; 648329061d3SJens Axboe req->flags |= REQ_F_POLLED; 649329061d3SJens Axboe ipt.pt._qproc = io_async_queue_proc; 650329061d3SJens Axboe 651329061d3SJens Axboe io_kbuf_recycle(req, issue_flags); 652329061d3SJens Axboe 65349f1c68eSPavel Begunkov ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags); 654de08356fSPavel Begunkov if (ret) 655de08356fSPavel Begunkov return ret > 0 ? IO_APOLL_READY : IO_APOLL_ABORTED; 65648863ffdSPavel Begunkov trace_io_uring_poll_arm(req, mask, apoll->poll.events); 657329061d3SJens Axboe return IO_APOLL_OK; 658329061d3SJens Axboe } 659329061d3SJens Axboe 6609ca9fb24SPavel Begunkov static __cold bool io_poll_remove_all_table(struct task_struct *tsk, 6619ca9fb24SPavel Begunkov struct io_hash_table *table, 662329061d3SJens Axboe bool cancel_all) 663329061d3SJens Axboe { 664e6f89be6SPavel Begunkov unsigned nr_buckets = 1U << table->hash_bits; 665329061d3SJens Axboe struct hlist_node *tmp; 666329061d3SJens Axboe struct io_kiocb *req; 667329061d3SJens Axboe bool found = false; 668329061d3SJens Axboe int i; 669329061d3SJens Axboe 670e6f89be6SPavel Begunkov for (i = 0; i < nr_buckets; i++) { 671e6f89be6SPavel Begunkov struct io_hash_bucket *hb = &table->hbs[i]; 672329061d3SJens Axboe 67338513c46SHao Xu spin_lock(&hb->lock); 67438513c46SHao Xu hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) { 675329061d3SJens Axboe if (io_match_task_safe(req, tsk, cancel_all)) { 676329061d3SJens Axboe hlist_del_init(&req->hash_node); 677329061d3SJens Axboe io_poll_cancel_req(req); 678329061d3SJens Axboe found = true; 679329061d3SJens Axboe } 680329061d3SJens Axboe } 68138513c46SHao Xu spin_unlock(&hb->lock); 682329061d3SJens Axboe } 683329061d3SJens Axboe return found; 684329061d3SJens Axboe } 685329061d3SJens Axboe 6869ca9fb24SPavel Begunkov /* 6879ca9fb24SPavel Begunkov * Returns true if we found and killed one or more poll requests 6889ca9fb24SPavel Begunkov */ 6899ca9fb24SPavel Begunkov __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, 6909ca9fb24SPavel Begunkov bool cancel_all) 6919ca9fb24SPavel Begunkov __must_hold(&ctx->uring_lock) 6929ca9fb24SPavel Begunkov { 693b321823aSPavel Begunkov bool ret; 694b321823aSPavel Begunkov 695b321823aSPavel Begunkov ret = io_poll_remove_all_table(tsk, &ctx->cancel_table, cancel_all); 696b321823aSPavel Begunkov ret |= io_poll_remove_all_table(tsk, &ctx->cancel_table_locked, cancel_all); 697b321823aSPavel Begunkov return ret; 6989ca9fb24SPavel Begunkov } 6999ca9fb24SPavel Begunkov 700329061d3SJens Axboe static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, 7011ab1edb0SPavel Begunkov struct io_cancel_data *cd, 702e6f89be6SPavel Begunkov struct io_hash_table *table, 7031ab1edb0SPavel Begunkov struct io_hash_bucket **out_bucket) 704329061d3SJens Axboe { 705329061d3SJens Axboe struct io_kiocb *req; 706e6f89be6SPavel Begunkov u32 index = hash_long(cd->data, table->hash_bits); 707e6f89be6SPavel Begunkov struct io_hash_bucket *hb = &table->hbs[index]; 708329061d3SJens Axboe 7091ab1edb0SPavel Begunkov *out_bucket = NULL; 7101ab1edb0SPavel Begunkov 71138513c46SHao Xu spin_lock(&hb->lock); 71238513c46SHao Xu hlist_for_each_entry(req, &hb->list, hash_node) { 713329061d3SJens Axboe if (cd->data != req->cqe.user_data) 714329061d3SJens Axboe continue; 715329061d3SJens Axboe if (poll_only && req->opcode != IORING_OP_POLL_ADD) 716329061d3SJens Axboe continue; 717329061d3SJens Axboe if (cd->flags & IORING_ASYNC_CANCEL_ALL) { 718329061d3SJens Axboe if (cd->seq == req->work.cancel_seq) 719329061d3SJens Axboe continue; 720329061d3SJens Axboe req->work.cancel_seq = cd->seq; 721329061d3SJens Axboe } 7221ab1edb0SPavel Begunkov *out_bucket = hb; 723329061d3SJens Axboe return req; 724329061d3SJens Axboe } 72538513c46SHao Xu spin_unlock(&hb->lock); 726329061d3SJens Axboe return NULL; 727329061d3SJens Axboe } 728329061d3SJens Axboe 729329061d3SJens Axboe static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, 7301ab1edb0SPavel Begunkov struct io_cancel_data *cd, 731e6f89be6SPavel Begunkov struct io_hash_table *table, 7321ab1edb0SPavel Begunkov struct io_hash_bucket **out_bucket) 733329061d3SJens Axboe { 734e6f89be6SPavel Begunkov unsigned nr_buckets = 1U << table->hash_bits; 735329061d3SJens Axboe struct io_kiocb *req; 736329061d3SJens Axboe int i; 737329061d3SJens Axboe 7381ab1edb0SPavel Begunkov *out_bucket = NULL; 7391ab1edb0SPavel Begunkov 740e6f89be6SPavel Begunkov for (i = 0; i < nr_buckets; i++) { 741e6f89be6SPavel Begunkov struct io_hash_bucket *hb = &table->hbs[i]; 742329061d3SJens Axboe 74338513c46SHao Xu spin_lock(&hb->lock); 74438513c46SHao Xu hlist_for_each_entry(req, &hb->list, hash_node) { 745329061d3SJens Axboe if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && 746329061d3SJens Axboe req->file != cd->file) 747329061d3SJens Axboe continue; 748329061d3SJens Axboe if (cd->seq == req->work.cancel_seq) 749329061d3SJens Axboe continue; 750329061d3SJens Axboe req->work.cancel_seq = cd->seq; 7511ab1edb0SPavel Begunkov *out_bucket = hb; 752329061d3SJens Axboe return req; 753329061d3SJens Axboe } 75438513c46SHao Xu spin_unlock(&hb->lock); 755329061d3SJens Axboe } 756329061d3SJens Axboe return NULL; 757329061d3SJens Axboe } 758329061d3SJens Axboe 7599ca9fb24SPavel Begunkov static int io_poll_disarm(struct io_kiocb *req) 760329061d3SJens Axboe { 7619ca9fb24SPavel Begunkov if (!req) 7629ca9fb24SPavel Begunkov return -ENOENT; 763329061d3SJens Axboe if (!io_poll_get_ownership(req)) 7649ca9fb24SPavel Begunkov return -EALREADY; 765329061d3SJens Axboe io_poll_remove_entries(req); 766329061d3SJens Axboe hash_del(&req->hash_node); 7679ca9fb24SPavel Begunkov return 0; 768329061d3SJens Axboe } 769329061d3SJens Axboe 770a2cdd519SPavel Begunkov static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 771e6f89be6SPavel Begunkov struct io_hash_table *table) 772329061d3SJens Axboe { 7731ab1edb0SPavel Begunkov struct io_hash_bucket *bucket; 774329061d3SJens Axboe struct io_kiocb *req; 775329061d3SJens Axboe 776329061d3SJens Axboe if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY)) 777e6f89be6SPavel Begunkov req = io_poll_file_find(ctx, cd, table, &bucket); 778329061d3SJens Axboe else 779e6f89be6SPavel Begunkov req = io_poll_find(ctx, false, cd, table, &bucket); 7801ab1edb0SPavel Begunkov 7811ab1edb0SPavel Begunkov if (req) 782329061d3SJens Axboe io_poll_cancel_req(req); 7831ab1edb0SPavel Begunkov if (bucket) 7841ab1edb0SPavel Begunkov spin_unlock(&bucket->lock); 7851ab1edb0SPavel Begunkov return req ? 0 : -ENOENT; 786329061d3SJens Axboe } 787329061d3SJens Axboe 7885d7943d9SPavel Begunkov int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 7895d7943d9SPavel Begunkov unsigned issue_flags) 790a2cdd519SPavel Begunkov { 7919ca9fb24SPavel Begunkov int ret; 7929ca9fb24SPavel Begunkov 7939ca9fb24SPavel Begunkov ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table); 7949ca9fb24SPavel Begunkov if (ret != -ENOENT) 7959ca9fb24SPavel Begunkov return ret; 7969ca9fb24SPavel Begunkov 7979ca9fb24SPavel Begunkov io_ring_submit_lock(ctx, issue_flags); 7989ca9fb24SPavel Begunkov ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table_locked); 7999ca9fb24SPavel Begunkov io_ring_submit_unlock(ctx, issue_flags); 8009ca9fb24SPavel Begunkov return ret; 801a2cdd519SPavel Begunkov } 802a2cdd519SPavel Begunkov 803329061d3SJens Axboe static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, 804329061d3SJens Axboe unsigned int flags) 805329061d3SJens Axboe { 806329061d3SJens Axboe u32 events; 807329061d3SJens Axboe 808329061d3SJens Axboe events = READ_ONCE(sqe->poll32_events); 809329061d3SJens Axboe #ifdef __BIG_ENDIAN 810329061d3SJens Axboe events = swahw32(events); 811329061d3SJens Axboe #endif 812329061d3SJens Axboe if (!(flags & IORING_POLL_ADD_MULTI)) 813329061d3SJens Axboe events |= EPOLLONESHOT; 814b9ba8a44SJens Axboe if (!(flags & IORING_POLL_ADD_LEVEL)) 815b9ba8a44SJens Axboe events |= EPOLLET; 816b9ba8a44SJens Axboe return demangle_poll(events) | 817b9ba8a44SJens Axboe (events & (EPOLLEXCLUSIVE|EPOLLONESHOT|EPOLLET)); 818329061d3SJens Axboe } 819329061d3SJens Axboe 820329061d3SJens Axboe int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 821329061d3SJens Axboe { 822329061d3SJens Axboe struct io_poll_update *upd = io_kiocb_to_cmd(req); 823329061d3SJens Axboe u32 flags; 824329061d3SJens Axboe 825329061d3SJens Axboe if (sqe->buf_index || sqe->splice_fd_in) 826329061d3SJens Axboe return -EINVAL; 827329061d3SJens Axboe flags = READ_ONCE(sqe->len); 828329061d3SJens Axboe if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA | 829329061d3SJens Axboe IORING_POLL_ADD_MULTI)) 830329061d3SJens Axboe return -EINVAL; 831329061d3SJens Axboe /* meaningless without update */ 832329061d3SJens Axboe if (flags == IORING_POLL_ADD_MULTI) 833329061d3SJens Axboe return -EINVAL; 834329061d3SJens Axboe 835329061d3SJens Axboe upd->old_user_data = READ_ONCE(sqe->addr); 836329061d3SJens Axboe upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; 837329061d3SJens Axboe upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA; 838329061d3SJens Axboe 839329061d3SJens Axboe upd->new_user_data = READ_ONCE(sqe->off); 840329061d3SJens Axboe if (!upd->update_user_data && upd->new_user_data) 841329061d3SJens Axboe return -EINVAL; 842329061d3SJens Axboe if (upd->update_events) 843329061d3SJens Axboe upd->events = io_poll_parse_events(sqe, flags); 844329061d3SJens Axboe else if (sqe->poll32_events) 845329061d3SJens Axboe return -EINVAL; 846329061d3SJens Axboe 847329061d3SJens Axboe return 0; 848329061d3SJens Axboe } 849329061d3SJens Axboe 850329061d3SJens Axboe int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 851329061d3SJens Axboe { 852329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(req); 853329061d3SJens Axboe u32 flags; 854329061d3SJens Axboe 855329061d3SJens Axboe if (sqe->buf_index || sqe->off || sqe->addr) 856329061d3SJens Axboe return -EINVAL; 857329061d3SJens Axboe flags = READ_ONCE(sqe->len); 858b9ba8a44SJens Axboe if (flags & ~(IORING_POLL_ADD_MULTI|IORING_POLL_ADD_LEVEL)) 859329061d3SJens Axboe return -EINVAL; 860329061d3SJens Axboe if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP)) 861329061d3SJens Axboe return -EINVAL; 862329061d3SJens Axboe 863329061d3SJens Axboe poll->events = io_poll_parse_events(sqe, flags); 864329061d3SJens Axboe return 0; 865329061d3SJens Axboe } 866329061d3SJens Axboe 867329061d3SJens Axboe int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) 868329061d3SJens Axboe { 869329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(req); 870329061d3SJens Axboe struct io_poll_table ipt; 871329061d3SJens Axboe int ret; 872329061d3SJens Axboe 873329061d3SJens Axboe ipt.pt._qproc = io_poll_queue_proc; 874329061d3SJens Axboe 8759ca9fb24SPavel Begunkov /* 8769ca9fb24SPavel Begunkov * If sqpoll or single issuer, there is no contention for ->uring_lock 8779ca9fb24SPavel Begunkov * and we'll end up holding it in tw handlers anyway. 8789ca9fb24SPavel Begunkov */ 8799ca9fb24SPavel Begunkov if (!(issue_flags & IO_URING_F_UNLOCKED) && 8809ca9fb24SPavel Begunkov (req->ctx->flags & (IORING_SETUP_SQPOLL | IORING_SETUP_SINGLE_ISSUER))) 8819ca9fb24SPavel Begunkov req->flags |= REQ_F_HASH_LOCKED; 8829ca9fb24SPavel Begunkov 88349f1c68eSPavel Begunkov ret = __io_arm_poll_handler(req, poll, &ipt, poll->events, issue_flags); 884de08356fSPavel Begunkov if (ret > 0) { 885063a0079SPavel Begunkov io_req_set_res(req, ipt.result_mask, 0); 886329061d3SJens Axboe return IOU_OK; 887329061d3SJens Axboe } 888de08356fSPavel Begunkov return ret ?: IOU_ISSUE_SKIP_COMPLETE; 889329061d3SJens Axboe } 890329061d3SJens Axboe 891329061d3SJens Axboe int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) 892329061d3SJens Axboe { 893329061d3SJens Axboe struct io_poll_update *poll_update = io_kiocb_to_cmd(req); 894329061d3SJens Axboe struct io_cancel_data cd = { .data = poll_update->old_user_data, }; 895329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 8961ab1edb0SPavel Begunkov struct io_hash_bucket *bucket; 897329061d3SJens Axboe struct io_kiocb *preq; 898329061d3SJens Axboe int ret2, ret = 0; 899329061d3SJens Axboe bool locked; 900329061d3SJens Axboe 901e6f89be6SPavel Begunkov preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket); 9021ab1edb0SPavel Begunkov ret2 = io_poll_disarm(preq); 9031ab1edb0SPavel Begunkov if (bucket) 9041ab1edb0SPavel Begunkov spin_unlock(&bucket->lock); 9059ca9fb24SPavel Begunkov if (!ret2) 9069ca9fb24SPavel Begunkov goto found; 9079ca9fb24SPavel Begunkov if (ret2 != -ENOENT) { 9089ca9fb24SPavel Begunkov ret = ret2; 90938513c46SHao Xu goto out; 91038513c46SHao Xu } 911329061d3SJens Axboe 9129ca9fb24SPavel Begunkov io_ring_submit_lock(ctx, issue_flags); 9139ca9fb24SPavel Begunkov preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table_locked, &bucket); 9149ca9fb24SPavel Begunkov ret2 = io_poll_disarm(preq); 9159ca9fb24SPavel Begunkov if (bucket) 9169ca9fb24SPavel Begunkov spin_unlock(&bucket->lock); 9179ca9fb24SPavel Begunkov io_ring_submit_unlock(ctx, issue_flags); 9189ca9fb24SPavel Begunkov if (ret2) { 9199ca9fb24SPavel Begunkov ret = ret2; 9209ca9fb24SPavel Begunkov goto out; 9219ca9fb24SPavel Begunkov } 9229ca9fb24SPavel Begunkov 9239ca9fb24SPavel Begunkov found: 924bce5d70cSPavel Begunkov if (WARN_ON_ONCE(preq->opcode != IORING_OP_POLL_ADD)) { 925bce5d70cSPavel Begunkov ret = -EFAULT; 926bce5d70cSPavel Begunkov goto out; 927bce5d70cSPavel Begunkov } 928bce5d70cSPavel Begunkov 929329061d3SJens Axboe if (poll_update->update_events || poll_update->update_user_data) { 930329061d3SJens Axboe /* only mask one event flags, keep behavior flags */ 931329061d3SJens Axboe if (poll_update->update_events) { 932329061d3SJens Axboe struct io_poll *poll = io_kiocb_to_cmd(preq); 933329061d3SJens Axboe 934329061d3SJens Axboe poll->events &= ~0xffff; 935329061d3SJens Axboe poll->events |= poll_update->events & 0xffff; 936329061d3SJens Axboe poll->events |= IO_POLL_UNMASK; 937329061d3SJens Axboe } 938329061d3SJens Axboe if (poll_update->update_user_data) 939329061d3SJens Axboe preq->cqe.user_data = poll_update->new_user_data; 940329061d3SJens Axboe 941329061d3SJens Axboe ret2 = io_poll_add(preq, issue_flags); 942329061d3SJens Axboe /* successfully updated, don't complete poll request */ 943329061d3SJens Axboe if (!ret2 || ret2 == -EIOCBQUEUED) 944329061d3SJens Axboe goto out; 945329061d3SJens Axboe } 946329061d3SJens Axboe 947329061d3SJens Axboe req_set_fail(preq); 948329061d3SJens Axboe io_req_set_res(preq, -ECANCELED, 0); 949329061d3SJens Axboe locked = !(issue_flags & IO_URING_F_UNLOCKED); 950329061d3SJens Axboe io_req_task_complete(preq, &locked); 951329061d3SJens Axboe out: 952329061d3SJens Axboe if (ret < 0) { 953329061d3SJens Axboe req_set_fail(req); 954329061d3SJens Axboe return ret; 955329061d3SJens Axboe } 956329061d3SJens Axboe /* complete update request, we're done with it */ 957329061d3SJens Axboe io_req_set_res(req, ret, 0); 958329061d3SJens Axboe return IOU_OK; 959329061d3SJens Axboe } 960