1329061d3SJens Axboe // SPDX-License-Identifier: GPL-2.0 2329061d3SJens Axboe #include <linux/kernel.h> 3329061d3SJens Axboe #include <linux/errno.h> 4329061d3SJens Axboe #include <linux/fs.h> 5329061d3SJens Axboe #include <linux/file.h> 6329061d3SJens Axboe #include <linux/mm.h> 7329061d3SJens Axboe #include <linux/slab.h> 8329061d3SJens Axboe #include <linux/poll.h> 9329061d3SJens Axboe #include <linux/hashtable.h> 10329061d3SJens Axboe #include <linux/io_uring.h> 11329061d3SJens Axboe 12329061d3SJens Axboe #include <trace/events/io_uring.h> 13329061d3SJens Axboe 14329061d3SJens Axboe #include <uapi/linux/io_uring.h> 15329061d3SJens Axboe 16329061d3SJens Axboe #include "io_uring.h" 17329061d3SJens Axboe #include "refs.h" 18329061d3SJens Axboe #include "opdef.h" 193b77495aSJens Axboe #include "kbuf.h" 20329061d3SJens Axboe #include "poll.h" 2138513c46SHao Xu #include "cancel.h" 22329061d3SJens Axboe 23329061d3SJens Axboe struct io_poll_update { 24329061d3SJens Axboe struct file *file; 25329061d3SJens Axboe u64 old_user_data; 26329061d3SJens Axboe u64 new_user_data; 27329061d3SJens Axboe __poll_t events; 28329061d3SJens Axboe bool update_events; 29329061d3SJens Axboe bool update_user_data; 30329061d3SJens Axboe }; 31329061d3SJens Axboe 32329061d3SJens Axboe struct io_poll_table { 33329061d3SJens Axboe struct poll_table_struct pt; 34329061d3SJens Axboe struct io_kiocb *req; 35329061d3SJens Axboe int nr_entries; 36329061d3SJens Axboe int error; 3749f1c68eSPavel Begunkov bool owning; 38063a0079SPavel Begunkov /* output value, set only if arm poll returns >0 */ 39063a0079SPavel Begunkov __poll_t result_mask; 40329061d3SJens Axboe }; 41329061d3SJens Axboe 42329061d3SJens Axboe #define IO_POLL_CANCEL_FLAG BIT(31) 43329061d3SJens Axboe #define IO_POLL_REF_MASK GENMASK(30, 0) 44329061d3SJens Axboe 450638cd7bSPavel Begunkov #define IO_WQE_F_DOUBLE 1 460638cd7bSPavel Begunkov 470638cd7bSPavel Begunkov static inline struct io_kiocb *wqe_to_req(struct wait_queue_entry *wqe) 480638cd7bSPavel Begunkov { 490638cd7bSPavel Begunkov unsigned long priv = (unsigned long)wqe->private; 500638cd7bSPavel Begunkov 510638cd7bSPavel Begunkov return (struct io_kiocb *)(priv & ~IO_WQE_F_DOUBLE); 520638cd7bSPavel Begunkov } 530638cd7bSPavel Begunkov 540638cd7bSPavel Begunkov static inline bool wqe_is_double(struct wait_queue_entry *wqe) 550638cd7bSPavel Begunkov { 560638cd7bSPavel Begunkov unsigned long priv = (unsigned long)wqe->private; 570638cd7bSPavel Begunkov 580638cd7bSPavel Begunkov return priv & IO_WQE_F_DOUBLE; 590638cd7bSPavel Begunkov } 600638cd7bSPavel Begunkov 61329061d3SJens Axboe /* 62329061d3SJens Axboe * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can 63329061d3SJens Axboe * bump it and acquire ownership. It's disallowed to modify requests while not 64329061d3SJens Axboe * owning it, that prevents from races for enqueueing task_work's and b/w 65329061d3SJens Axboe * arming poll and wakeups. 66329061d3SJens Axboe */ 67329061d3SJens Axboe static inline bool io_poll_get_ownership(struct io_kiocb *req) 68329061d3SJens Axboe { 69329061d3SJens Axboe return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); 70329061d3SJens Axboe } 71329061d3SJens Axboe 72329061d3SJens Axboe static void io_poll_mark_cancelled(struct io_kiocb *req) 73329061d3SJens Axboe { 74329061d3SJens Axboe atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); 75329061d3SJens Axboe } 76329061d3SJens Axboe 77329061d3SJens Axboe static struct io_poll *io_poll_get_double(struct io_kiocb *req) 78329061d3SJens Axboe { 79329061d3SJens Axboe /* pure poll stashes this in ->async_data, poll driven retry elsewhere */ 80329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 81329061d3SJens Axboe return req->async_data; 82329061d3SJens Axboe return req->apoll->double_poll; 83329061d3SJens Axboe } 84329061d3SJens Axboe 85329061d3SJens Axboe static struct io_poll *io_poll_get_single(struct io_kiocb *req) 86329061d3SJens Axboe { 87329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 88f2ccb5aeSStefan Metzmacher return io_kiocb_to_cmd(req, struct io_poll); 89329061d3SJens Axboe return &req->apoll->poll; 90329061d3SJens Axboe } 91329061d3SJens Axboe 92329061d3SJens Axboe static void io_poll_req_insert(struct io_kiocb *req) 93329061d3SJens Axboe { 94e6f89be6SPavel Begunkov struct io_hash_table *table = &req->ctx->cancel_table; 95e6f89be6SPavel Begunkov u32 index = hash_long(req->cqe.user_data, table->hash_bits); 96e6f89be6SPavel Begunkov struct io_hash_bucket *hb = &table->hbs[index]; 97329061d3SJens Axboe 9838513c46SHao Xu spin_lock(&hb->lock); 9938513c46SHao Xu hlist_add_head(&req->hash_node, &hb->list); 10038513c46SHao Xu spin_unlock(&hb->lock); 10138513c46SHao Xu } 10238513c46SHao Xu 10338513c46SHao Xu static void io_poll_req_delete(struct io_kiocb *req, struct io_ring_ctx *ctx) 10438513c46SHao Xu { 105e6f89be6SPavel Begunkov struct io_hash_table *table = &req->ctx->cancel_table; 106e6f89be6SPavel Begunkov u32 index = hash_long(req->cqe.user_data, table->hash_bits); 107e6f89be6SPavel Begunkov spinlock_t *lock = &table->hbs[index].lock; 10838513c46SHao Xu 10938513c46SHao Xu spin_lock(lock); 11038513c46SHao Xu hash_del(&req->hash_node); 11138513c46SHao Xu spin_unlock(lock); 112329061d3SJens Axboe } 113329061d3SJens Axboe 1149ca9fb24SPavel Begunkov static void io_poll_req_insert_locked(struct io_kiocb *req) 1159ca9fb24SPavel Begunkov { 1169ca9fb24SPavel Begunkov struct io_hash_table *table = &req->ctx->cancel_table_locked; 1179ca9fb24SPavel Begunkov u32 index = hash_long(req->cqe.user_data, table->hash_bits); 1189ca9fb24SPavel Begunkov 1195576035fSPavel Begunkov lockdep_assert_held(&req->ctx->uring_lock); 1205576035fSPavel Begunkov 1219ca9fb24SPavel Begunkov hlist_add_head(&req->hash_node, &table->hbs[index].list); 1229ca9fb24SPavel Begunkov } 1239ca9fb24SPavel Begunkov 1249ca9fb24SPavel Begunkov static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked) 1259ca9fb24SPavel Begunkov { 1269ca9fb24SPavel Begunkov struct io_ring_ctx *ctx = req->ctx; 1279ca9fb24SPavel Begunkov 1289ca9fb24SPavel Begunkov if (req->flags & REQ_F_HASH_LOCKED) { 1299ca9fb24SPavel Begunkov /* 1309ca9fb24SPavel Begunkov * ->cancel_table_locked is protected by ->uring_lock in 1319ca9fb24SPavel Begunkov * contrast to per bucket spinlocks. Likely, tctx_task_work() 1329ca9fb24SPavel Begunkov * already grabbed the mutex for us, but there is a chance it 1339ca9fb24SPavel Begunkov * failed. 1349ca9fb24SPavel Begunkov */ 1359ca9fb24SPavel Begunkov io_tw_lock(ctx, locked); 1369ca9fb24SPavel Begunkov hash_del(&req->hash_node); 137b21a51e2SPavel Begunkov req->flags &= ~REQ_F_HASH_LOCKED; 1389ca9fb24SPavel Begunkov } else { 1399ca9fb24SPavel Begunkov io_poll_req_delete(req, ctx); 1409ca9fb24SPavel Begunkov } 1419ca9fb24SPavel Begunkov } 1429ca9fb24SPavel Begunkov 143329061d3SJens Axboe static void io_init_poll_iocb(struct io_poll *poll, __poll_t events, 144329061d3SJens Axboe wait_queue_func_t wake_func) 145329061d3SJens Axboe { 146329061d3SJens Axboe poll->head = NULL; 147329061d3SJens Axboe #define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) 148329061d3SJens Axboe /* mask in events that we always want/need */ 149329061d3SJens Axboe poll->events = events | IO_POLL_UNMASK; 150329061d3SJens Axboe INIT_LIST_HEAD(&poll->wait.entry); 151329061d3SJens Axboe init_waitqueue_func_entry(&poll->wait, wake_func); 152329061d3SJens Axboe } 153329061d3SJens Axboe 154329061d3SJens Axboe static inline void io_poll_remove_entry(struct io_poll *poll) 155329061d3SJens Axboe { 156329061d3SJens Axboe struct wait_queue_head *head = smp_load_acquire(&poll->head); 157329061d3SJens Axboe 158329061d3SJens Axboe if (head) { 159329061d3SJens Axboe spin_lock_irq(&head->lock); 160329061d3SJens Axboe list_del_init(&poll->wait.entry); 161329061d3SJens Axboe poll->head = NULL; 162329061d3SJens Axboe spin_unlock_irq(&head->lock); 163329061d3SJens Axboe } 164329061d3SJens Axboe } 165329061d3SJens Axboe 166329061d3SJens Axboe static void io_poll_remove_entries(struct io_kiocb *req) 167329061d3SJens Axboe { 168329061d3SJens Axboe /* 169329061d3SJens Axboe * Nothing to do if neither of those flags are set. Avoid dipping 170329061d3SJens Axboe * into the poll/apoll/double cachelines if we can. 171329061d3SJens Axboe */ 172329061d3SJens Axboe if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL))) 173329061d3SJens Axboe return; 174329061d3SJens Axboe 175329061d3SJens Axboe /* 176329061d3SJens Axboe * While we hold the waitqueue lock and the waitqueue is nonempty, 177329061d3SJens Axboe * wake_up_pollfree() will wait for us. However, taking the waitqueue 178329061d3SJens Axboe * lock in the first place can race with the waitqueue being freed. 179329061d3SJens Axboe * 180329061d3SJens Axboe * We solve this as eventpoll does: by taking advantage of the fact that 181329061d3SJens Axboe * all users of wake_up_pollfree() will RCU-delay the actual free. If 182329061d3SJens Axboe * we enter rcu_read_lock() and see that the pointer to the queue is 183329061d3SJens Axboe * non-NULL, we can then lock it without the memory being freed out from 184329061d3SJens Axboe * under us. 185329061d3SJens Axboe * 186329061d3SJens Axboe * Keep holding rcu_read_lock() as long as we hold the queue lock, in 187329061d3SJens Axboe * case the caller deletes the entry from the queue, leaving it empty. 188329061d3SJens Axboe * In that case, only RCU prevents the queue memory from being freed. 189329061d3SJens Axboe */ 190329061d3SJens Axboe rcu_read_lock(); 191329061d3SJens Axboe if (req->flags & REQ_F_SINGLE_POLL) 192329061d3SJens Axboe io_poll_remove_entry(io_poll_get_single(req)); 193329061d3SJens Axboe if (req->flags & REQ_F_DOUBLE_POLL) 194329061d3SJens Axboe io_poll_remove_entry(io_poll_get_double(req)); 195329061d3SJens Axboe rcu_read_unlock(); 196329061d3SJens Axboe } 197329061d3SJens Axboe 1982ba69707SDylan Yudaken enum { 1992ba69707SDylan Yudaken IOU_POLL_DONE = 0, 2002ba69707SDylan Yudaken IOU_POLL_NO_ACTION = 1, 201114eccdfSDylan Yudaken IOU_POLL_REMOVE_POLL_USE_RES = 2, 2022ba69707SDylan Yudaken }; 2032ba69707SDylan Yudaken 204329061d3SJens Axboe /* 205329061d3SJens Axboe * All poll tw should go through this. Checks for poll events, manages 206329061d3SJens Axboe * references, does rewait, etc. 207329061d3SJens Axboe * 2082ba69707SDylan Yudaken * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action require, 2092ba69707SDylan Yudaken * which is either spurious wakeup or multishot CQE is served. 2102ba69707SDylan Yudaken * IOU_POLL_DONE when it's done with the request, then the mask is stored in req->cqe.res. 211114eccdfSDylan Yudaken * IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot poll and that the result 212114eccdfSDylan Yudaken * is stored in req->cqe. 213329061d3SJens Axboe */ 214329061d3SJens Axboe static int io_poll_check_events(struct io_kiocb *req, bool *locked) 215329061d3SJens Axboe { 216329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 217329061d3SJens Axboe int v, ret; 218329061d3SJens Axboe 219329061d3SJens Axboe /* req->task == current here, checking PF_EXITING is safe */ 220329061d3SJens Axboe if (unlikely(req->task->flags & PF_EXITING)) 221329061d3SJens Axboe return -ECANCELED; 222329061d3SJens Axboe 223329061d3SJens Axboe do { 224329061d3SJens Axboe v = atomic_read(&req->poll_refs); 225329061d3SJens Axboe 226329061d3SJens Axboe /* tw handler should be the owner, and so have some references */ 227329061d3SJens Axboe if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) 2282ba69707SDylan Yudaken return IOU_POLL_DONE; 229329061d3SJens Axboe if (v & IO_POLL_CANCEL_FLAG) 230329061d3SJens Axboe return -ECANCELED; 231539bcb57SPavel Begunkov /* 232539bcb57SPavel Begunkov * cqe.res contains only events of the first wake up 233539bcb57SPavel Begunkov * and all others are be lost. Redo vfs_poll() to get 234539bcb57SPavel Begunkov * up to date state. 235539bcb57SPavel Begunkov */ 236539bcb57SPavel Begunkov if ((v & IO_POLL_REF_MASK) != 1) 237539bcb57SPavel Begunkov req->cqe.res = 0; 238329061d3SJens Axboe 2392ba69707SDylan Yudaken /* the mask was stashed in __io_poll_execute */ 240329061d3SJens Axboe if (!req->cqe.res) { 241329061d3SJens Axboe struct poll_table_struct pt = { ._key = req->apoll_events }; 242329061d3SJens Axboe req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; 243329061d3SJens Axboe } 244329061d3SJens Axboe 245329061d3SJens Axboe if ((unlikely(!req->cqe.res))) 246329061d3SJens Axboe continue; 247329061d3SJens Axboe if (req->apoll_events & EPOLLONESHOT) 2482ba69707SDylan Yudaken return IOU_POLL_DONE; 249329061d3SJens Axboe 250329061d3SJens Axboe /* multishot, just fill a CQE and proceed */ 251329061d3SJens Axboe if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 252329061d3SJens Axboe __poll_t mask = mangle_poll(req->cqe.res & 253329061d3SJens Axboe req->apoll_events); 254329061d3SJens Axboe 255d245bca6SPavel Begunkov if (!io_post_aux_cqe(ctx, req->cqe.user_data, 256a2da6763SDylan Yudaken mask, IORING_CQE_F_MORE, false)) { 257a2da6763SDylan Yudaken io_req_set_res(req, mask, 0); 258a2da6763SDylan Yudaken return IOU_POLL_REMOVE_POLL_USE_RES; 259a2da6763SDylan Yudaken } 260d245bca6SPavel Begunkov } else { 261329061d3SJens Axboe ret = io_poll_issue(req, locked); 262114eccdfSDylan Yudaken if (ret == IOU_STOP_MULTISHOT) 263114eccdfSDylan Yudaken return IOU_POLL_REMOVE_POLL_USE_RES; 2642ba69707SDylan Yudaken if (ret < 0) 265329061d3SJens Axboe return ret; 266d245bca6SPavel Begunkov } 267329061d3SJens Axboe 268b98186aeSPavel Begunkov /* force the next iteration to vfs_poll() */ 269b98186aeSPavel Begunkov req->cqe.res = 0; 270b98186aeSPavel Begunkov 271329061d3SJens Axboe /* 272329061d3SJens Axboe * Release all references, retry if someone tried to restart 273329061d3SJens Axboe * task_work while we were executing it. 274329061d3SJens Axboe */ 275329061d3SJens Axboe } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); 276329061d3SJens Axboe 2772ba69707SDylan Yudaken return IOU_POLL_NO_ACTION; 278329061d3SJens Axboe } 279329061d3SJens Axboe 280329061d3SJens Axboe static void io_poll_task_func(struct io_kiocb *req, bool *locked) 281329061d3SJens Axboe { 282329061d3SJens Axboe int ret; 283329061d3SJens Axboe 284329061d3SJens Axboe ret = io_poll_check_events(req, locked); 2852ba69707SDylan Yudaken if (ret == IOU_POLL_NO_ACTION) 286329061d3SJens Axboe return; 287329061d3SJens Axboe 2882ba69707SDylan Yudaken if (ret == IOU_POLL_DONE) { 289f2ccb5aeSStefan Metzmacher struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll); 290329061d3SJens Axboe req->cqe.res = mangle_poll(req->cqe.res & poll->events); 291114eccdfSDylan Yudaken } else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) { 292329061d3SJens Axboe req->cqe.res = ret; 293329061d3SJens Axboe req_set_fail(req); 294329061d3SJens Axboe } 295329061d3SJens Axboe 296329061d3SJens Axboe io_poll_remove_entries(req); 2979ca9fb24SPavel Begunkov io_poll_tw_hash_eject(req, locked); 2989ca9fb24SPavel Begunkov 2990ec6dca2SPavel Begunkov io_req_set_res(req, req->cqe.res, 0); 3000ec6dca2SPavel Begunkov io_req_task_complete(req, locked); 301329061d3SJens Axboe } 302329061d3SJens Axboe 303329061d3SJens Axboe static void io_apoll_task_func(struct io_kiocb *req, bool *locked) 304329061d3SJens Axboe { 305329061d3SJens Axboe int ret; 306329061d3SJens Axboe 307329061d3SJens Axboe ret = io_poll_check_events(req, locked); 3082ba69707SDylan Yudaken if (ret == IOU_POLL_NO_ACTION) 309329061d3SJens Axboe return; 310329061d3SJens Axboe 311*c06c6c5dSDylan Yudaken io_tw_lock(req->ctx, locked); 312329061d3SJens Axboe io_poll_remove_entries(req); 3139ca9fb24SPavel Begunkov io_poll_tw_hash_eject(req, locked); 314329061d3SJens Axboe 315114eccdfSDylan Yudaken if (ret == IOU_POLL_REMOVE_POLL_USE_RES) 316*c06c6c5dSDylan Yudaken io_req_task_complete(req, locked); 317114eccdfSDylan Yudaken else if (ret == IOU_POLL_DONE) 318329061d3SJens Axboe io_req_task_submit(req, locked); 319329061d3SJens Axboe else 320329061d3SJens Axboe io_req_complete_failed(req, ret); 321329061d3SJens Axboe } 322329061d3SJens Axboe 32313a99017SPavel Begunkov static void __io_poll_execute(struct io_kiocb *req, int mask) 324329061d3SJens Axboe { 325329061d3SJens Axboe io_req_set_res(req, mask, 0); 326cd42a53dSLin Ma 327329061d3SJens Axboe if (req->opcode == IORING_OP_POLL_ADD) 328329061d3SJens Axboe req->io_task_work.func = io_poll_task_func; 329329061d3SJens Axboe else 330329061d3SJens Axboe req->io_task_work.func = io_apoll_task_func; 331329061d3SJens Axboe 33248863ffdSPavel Begunkov trace_io_uring_task_add(req, mask); 333329061d3SJens Axboe io_req_task_work_add(req); 334329061d3SJens Axboe } 335329061d3SJens Axboe 33613a99017SPavel Begunkov static inline void io_poll_execute(struct io_kiocb *req, int res) 337329061d3SJens Axboe { 338329061d3SJens Axboe if (io_poll_get_ownership(req)) 33913a99017SPavel Begunkov __io_poll_execute(req, res); 340329061d3SJens Axboe } 341329061d3SJens Axboe 342329061d3SJens Axboe static void io_poll_cancel_req(struct io_kiocb *req) 343329061d3SJens Axboe { 344329061d3SJens Axboe io_poll_mark_cancelled(req); 345329061d3SJens Axboe /* kick tw, which should complete the request */ 34613a99017SPavel Begunkov io_poll_execute(req, 0); 347329061d3SJens Axboe } 348329061d3SJens Axboe 349329061d3SJens Axboe #define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI) 350329061d3SJens Axboe 351fe991a76SJens Axboe static __cold int io_pollfree_wake(struct io_kiocb *req, struct io_poll *poll) 352329061d3SJens Axboe { 353329061d3SJens Axboe io_poll_mark_cancelled(req); 354329061d3SJens Axboe /* we have to kick tw in case it's not already */ 35513a99017SPavel Begunkov io_poll_execute(req, 0); 356329061d3SJens Axboe 357329061d3SJens Axboe /* 358329061d3SJens Axboe * If the waitqueue is being freed early but someone is already 359329061d3SJens Axboe * holds ownership over it, we have to tear down the request as 360329061d3SJens Axboe * best we can. That means immediately removing the request from 361329061d3SJens Axboe * its waitqueue and preventing all further accesses to the 362329061d3SJens Axboe * waitqueue via the request. 363329061d3SJens Axboe */ 364329061d3SJens Axboe list_del_init(&poll->wait.entry); 365329061d3SJens Axboe 366329061d3SJens Axboe /* 367329061d3SJens Axboe * Careful: this *must* be the last step, since as soon 368329061d3SJens Axboe * as req->head is NULL'ed out, the request can be 369329061d3SJens Axboe * completed and freed, since aio_poll_complete_work() 370329061d3SJens Axboe * will no longer need to take the waitqueue lock. 371329061d3SJens Axboe */ 372329061d3SJens Axboe smp_store_release(&poll->head, NULL); 373329061d3SJens Axboe return 1; 374329061d3SJens Axboe } 375329061d3SJens Axboe 376fe991a76SJens Axboe static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 377fe991a76SJens Axboe void *key) 378fe991a76SJens Axboe { 379fe991a76SJens Axboe struct io_kiocb *req = wqe_to_req(wait); 380fe991a76SJens Axboe struct io_poll *poll = container_of(wait, struct io_poll, wait); 381fe991a76SJens Axboe __poll_t mask = key_to_poll(key); 382fe991a76SJens Axboe 383fe991a76SJens Axboe if (unlikely(mask & POLLFREE)) 384fe991a76SJens Axboe return io_pollfree_wake(req, poll); 385fe991a76SJens Axboe 386329061d3SJens Axboe /* for instances that support it check for an event match first */ 387329061d3SJens Axboe if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON))) 388329061d3SJens Axboe return 0; 389329061d3SJens Axboe 390329061d3SJens Axboe if (io_poll_get_ownership(req)) { 39144648532SJens Axboe /* 39244648532SJens Axboe * If we trigger a multishot poll off our own wakeup path, 39344648532SJens Axboe * disable multishot as there is a circular dependency between 39444648532SJens Axboe * CQ posting and triggering the event. 39544648532SJens Axboe */ 39644648532SJens Axboe if (mask & EPOLL_URING_WAKE) 39744648532SJens Axboe poll->events |= EPOLLONESHOT; 39844648532SJens Axboe 399329061d3SJens Axboe /* optional, saves extra locking for removal in tw handler */ 400329061d3SJens Axboe if (mask && poll->events & EPOLLONESHOT) { 401329061d3SJens Axboe list_del_init(&poll->wait.entry); 402329061d3SJens Axboe poll->head = NULL; 403329061d3SJens Axboe if (wqe_is_double(wait)) 404329061d3SJens Axboe req->flags &= ~REQ_F_DOUBLE_POLL; 405329061d3SJens Axboe else 406329061d3SJens Axboe req->flags &= ~REQ_F_SINGLE_POLL; 407329061d3SJens Axboe } 40813a99017SPavel Begunkov __io_poll_execute(req, mask); 409329061d3SJens Axboe } 410329061d3SJens Axboe return 1; 411329061d3SJens Axboe } 412329061d3SJens Axboe 41330a33669SPavel Begunkov /* fails only when polling is already completing by the first entry */ 41430a33669SPavel Begunkov static bool io_poll_double_prepare(struct io_kiocb *req) 41549f1c68eSPavel Begunkov { 41649f1c68eSPavel Begunkov struct wait_queue_head *head; 41749f1c68eSPavel Begunkov struct io_poll *poll = io_poll_get_single(req); 41849f1c68eSPavel Begunkov 41949f1c68eSPavel Begunkov /* head is RCU protected, see io_poll_remove_entries() comments */ 42049f1c68eSPavel Begunkov rcu_read_lock(); 42149f1c68eSPavel Begunkov head = smp_load_acquire(&poll->head); 42249f1c68eSPavel Begunkov /* 42330a33669SPavel Begunkov * poll arm might not hold ownership and so race for req->flags with 42430a33669SPavel Begunkov * io_poll_wake(). There is only one poll entry queued, serialise with 42530a33669SPavel Begunkov * it by taking its head lock. As we're still arming the tw hanlder 42630a33669SPavel Begunkov * is not going to be run, so there are no races with it. 42749f1c68eSPavel Begunkov */ 42830a33669SPavel Begunkov if (head) { 42949f1c68eSPavel Begunkov spin_lock_irq(&head->lock); 43049f1c68eSPavel Begunkov req->flags |= REQ_F_DOUBLE_POLL; 431ceff5017SPavel Begunkov if (req->opcode == IORING_OP_POLL_ADD) 432ceff5017SPavel Begunkov req->flags |= REQ_F_ASYNC_DATA; 43349f1c68eSPavel Begunkov spin_unlock_irq(&head->lock); 43430a33669SPavel Begunkov } 43549f1c68eSPavel Begunkov rcu_read_unlock(); 43630a33669SPavel Begunkov return !!head; 43749f1c68eSPavel Begunkov } 43849f1c68eSPavel Begunkov 439329061d3SJens Axboe static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, 440329061d3SJens Axboe struct wait_queue_head *head, 441329061d3SJens Axboe struct io_poll **poll_ptr) 442329061d3SJens Axboe { 443329061d3SJens Axboe struct io_kiocb *req = pt->req; 444329061d3SJens Axboe unsigned long wqe_private = (unsigned long) req; 445329061d3SJens Axboe 446329061d3SJens Axboe /* 447329061d3SJens Axboe * The file being polled uses multiple waitqueues for poll handling 448329061d3SJens Axboe * (e.g. one for read, one for write). Setup a separate io_poll 449329061d3SJens Axboe * if this happens. 450329061d3SJens Axboe */ 451329061d3SJens Axboe if (unlikely(pt->nr_entries)) { 452329061d3SJens Axboe struct io_poll *first = poll; 453329061d3SJens Axboe 454329061d3SJens Axboe /* double add on the same waitqueue head, ignore */ 455329061d3SJens Axboe if (first->head == head) 456329061d3SJens Axboe return; 457329061d3SJens Axboe /* already have a 2nd entry, fail a third attempt */ 458329061d3SJens Axboe if (*poll_ptr) { 459329061d3SJens Axboe if ((*poll_ptr)->head == head) 460329061d3SJens Axboe return; 461329061d3SJens Axboe pt->error = -EINVAL; 462329061d3SJens Axboe return; 463329061d3SJens Axboe } 464329061d3SJens Axboe 465329061d3SJens Axboe poll = kmalloc(sizeof(*poll), GFP_ATOMIC); 466329061d3SJens Axboe if (!poll) { 467329061d3SJens Axboe pt->error = -ENOMEM; 468329061d3SJens Axboe return; 469329061d3SJens Axboe } 47049f1c68eSPavel Begunkov 471329061d3SJens Axboe /* mark as double wq entry */ 4720638cd7bSPavel Begunkov wqe_private |= IO_WQE_F_DOUBLE; 473329061d3SJens Axboe io_init_poll_iocb(poll, first->events, first->wait.func); 47430a33669SPavel Begunkov if (!io_poll_double_prepare(req)) { 47530a33669SPavel Begunkov /* the request is completing, just back off */ 47630a33669SPavel Begunkov kfree(poll); 47730a33669SPavel Begunkov return; 47830a33669SPavel Begunkov } 479329061d3SJens Axboe *poll_ptr = poll; 48049f1c68eSPavel Begunkov } else { 48149f1c68eSPavel Begunkov /* fine to modify, there is no poll queued to race with us */ 48249f1c68eSPavel Begunkov req->flags |= REQ_F_SINGLE_POLL; 483329061d3SJens Axboe } 484329061d3SJens Axboe 485329061d3SJens Axboe pt->nr_entries++; 486329061d3SJens Axboe poll->head = head; 487329061d3SJens Axboe poll->wait.private = (void *) wqe_private; 488329061d3SJens Axboe 489329061d3SJens Axboe if (poll->events & EPOLLEXCLUSIVE) 490329061d3SJens Axboe add_wait_queue_exclusive(head, &poll->wait); 491329061d3SJens Axboe else 492329061d3SJens Axboe add_wait_queue(head, &poll->wait); 493329061d3SJens Axboe } 494329061d3SJens Axboe 495329061d3SJens Axboe static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, 496329061d3SJens Axboe struct poll_table_struct *p) 497329061d3SJens Axboe { 498329061d3SJens Axboe struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 499f2ccb5aeSStefan Metzmacher struct io_poll *poll = io_kiocb_to_cmd(pt->req, struct io_poll); 500329061d3SJens Axboe 501329061d3SJens Axboe __io_queue_proc(poll, pt, head, 502329061d3SJens Axboe (struct io_poll **) &pt->req->async_data); 503329061d3SJens Axboe } 504329061d3SJens Axboe 50549f1c68eSPavel Begunkov static bool io_poll_can_finish_inline(struct io_kiocb *req, 50649f1c68eSPavel Begunkov struct io_poll_table *pt) 50749f1c68eSPavel Begunkov { 50849f1c68eSPavel Begunkov return pt->owning || io_poll_get_ownership(req); 50949f1c68eSPavel Begunkov } 51049f1c68eSPavel Begunkov 511de08356fSPavel Begunkov /* 512de08356fSPavel Begunkov * Returns 0 when it's handed over for polling. The caller owns the requests if 513de08356fSPavel Begunkov * it returns non-zero, but otherwise should not touch it. Negative values 514de08356fSPavel Begunkov * contain an error code. When the result is >0, the polling has completed 515de08356fSPavel Begunkov * inline and ipt.result_mask is set to the mask. 516de08356fSPavel Begunkov */ 517329061d3SJens Axboe static int __io_arm_poll_handler(struct io_kiocb *req, 518329061d3SJens Axboe struct io_poll *poll, 51949f1c68eSPavel Begunkov struct io_poll_table *ipt, __poll_t mask, 52049f1c68eSPavel Begunkov unsigned issue_flags) 521329061d3SJens Axboe { 522329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 523329061d3SJens Axboe int v; 524329061d3SJens Axboe 525329061d3SJens Axboe INIT_HLIST_NODE(&req->hash_node); 526329061d3SJens Axboe req->work.cancel_seq = atomic_read(&ctx->cancel_seq); 527329061d3SJens Axboe io_init_poll_iocb(poll, mask, io_poll_wake); 528329061d3SJens Axboe poll->file = req->file; 529329061d3SJens Axboe req->apoll_events = poll->events; 530329061d3SJens Axboe 531329061d3SJens Axboe ipt->pt._key = mask; 532329061d3SJens Axboe ipt->req = req; 533329061d3SJens Axboe ipt->error = 0; 534329061d3SJens Axboe ipt->nr_entries = 0; 535329061d3SJens Axboe /* 53649f1c68eSPavel Begunkov * Polling is either completed here or via task_work, so if we're in the 53749f1c68eSPavel Begunkov * task context we're naturally serialised with tw by merit of running 53849f1c68eSPavel Begunkov * the same task. When it's io-wq, take the ownership to prevent tw 53949f1c68eSPavel Begunkov * from running. However, when we're in the task context, skip taking 54049f1c68eSPavel Begunkov * it as an optimisation. 54149f1c68eSPavel Begunkov * 54249f1c68eSPavel Begunkov * Note: even though the request won't be completed/freed, without 54349f1c68eSPavel Begunkov * ownership we still can race with io_poll_wake(). 54449f1c68eSPavel Begunkov * io_poll_can_finish_inline() tries to deal with that. 545329061d3SJens Axboe */ 54649f1c68eSPavel Begunkov ipt->owning = issue_flags & IO_URING_F_UNLOCKED; 54749f1c68eSPavel Begunkov atomic_set(&req->poll_refs, (int)ipt->owning); 548e8375e43SPavel Begunkov 549e8375e43SPavel Begunkov /* io-wq doesn't hold uring_lock */ 550e8375e43SPavel Begunkov if (issue_flags & IO_URING_F_UNLOCKED) 551e8375e43SPavel Begunkov req->flags &= ~REQ_F_HASH_LOCKED; 552e8375e43SPavel Begunkov 553329061d3SJens Axboe mask = vfs_poll(req->file, &ipt->pt) & poll->events; 554329061d3SJens Axboe 555de08356fSPavel Begunkov if (unlikely(ipt->error || !ipt->nr_entries)) { 556de08356fSPavel Begunkov io_poll_remove_entries(req); 557de08356fSPavel Begunkov 55849f1c68eSPavel Begunkov if (!io_poll_can_finish_inline(req, ipt)) { 55949f1c68eSPavel Begunkov io_poll_mark_cancelled(req); 56049f1c68eSPavel Begunkov return 0; 56149f1c68eSPavel Begunkov } else if (mask && (poll->events & EPOLLET)) { 562de08356fSPavel Begunkov ipt->result_mask = mask; 563de08356fSPavel Begunkov return 1; 564de08356fSPavel Begunkov } 56549f1c68eSPavel Begunkov return ipt->error ?: -EINVAL; 566de08356fSPavel Begunkov } 567de08356fSPavel Begunkov 568b9ba8a44SJens Axboe if (mask && 569b9ba8a44SJens Axboe ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) { 57049f1c68eSPavel Begunkov if (!io_poll_can_finish_inline(req, ipt)) 57149f1c68eSPavel Begunkov return 0; 572329061d3SJens Axboe io_poll_remove_entries(req); 573063a0079SPavel Begunkov ipt->result_mask = mask; 574329061d3SJens Axboe /* no one else has access to the req, forget about the ref */ 575063a0079SPavel Begunkov return 1; 576329061d3SJens Axboe } 577b9ba8a44SJens Axboe 5789ca9fb24SPavel Begunkov if (req->flags & REQ_F_HASH_LOCKED) 5799ca9fb24SPavel Begunkov io_poll_req_insert_locked(req); 5809ca9fb24SPavel Begunkov else 581329061d3SJens Axboe io_poll_req_insert(req); 582329061d3SJens Axboe 58349f1c68eSPavel Begunkov if (mask && (poll->events & EPOLLET) && 58449f1c68eSPavel Begunkov io_poll_can_finish_inline(req, ipt)) { 58513a99017SPavel Begunkov __io_poll_execute(req, mask); 586329061d3SJens Axboe return 0; 587329061d3SJens Axboe } 588329061d3SJens Axboe 58949f1c68eSPavel Begunkov if (ipt->owning) { 590329061d3SJens Axboe /* 591329061d3SJens Axboe * Release ownership. If someone tried to queue a tw while it was 592329061d3SJens Axboe * locked, kick it off for them. 593329061d3SJens Axboe */ 594329061d3SJens Axboe v = atomic_dec_return(&req->poll_refs); 595329061d3SJens Axboe if (unlikely(v & IO_POLL_REF_MASK)) 59613a99017SPavel Begunkov __io_poll_execute(req, 0); 59749f1c68eSPavel Begunkov } 598329061d3SJens Axboe return 0; 599329061d3SJens Axboe } 600329061d3SJens Axboe 601329061d3SJens Axboe static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, 602329061d3SJens Axboe struct poll_table_struct *p) 603329061d3SJens Axboe { 604329061d3SJens Axboe struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 605329061d3SJens Axboe struct async_poll *apoll = pt->req->apoll; 606329061d3SJens Axboe 607329061d3SJens Axboe __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); 608329061d3SJens Axboe } 609329061d3SJens Axboe 6105204aa8cSPavel Begunkov static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req, 6115204aa8cSPavel Begunkov unsigned issue_flags) 6125204aa8cSPavel Begunkov { 6135204aa8cSPavel Begunkov struct io_ring_ctx *ctx = req->ctx; 6149b797a37SJens Axboe struct io_cache_entry *entry; 6155204aa8cSPavel Begunkov struct async_poll *apoll; 6165204aa8cSPavel Begunkov 6175204aa8cSPavel Begunkov if (req->flags & REQ_F_POLLED) { 6185204aa8cSPavel Begunkov apoll = req->apoll; 6195204aa8cSPavel Begunkov kfree(apoll->double_poll); 620df730ec2SXinghui Li } else if (!(issue_flags & IO_URING_F_UNLOCKED)) { 621df730ec2SXinghui Li entry = io_alloc_cache_get(&ctx->apoll_cache); 622df730ec2SXinghui Li if (entry == NULL) 623df730ec2SXinghui Li goto alloc_apoll; 6249b797a37SJens Axboe apoll = container_of(entry, struct async_poll, cache); 6255204aa8cSPavel Begunkov } else { 626df730ec2SXinghui Li alloc_apoll: 6275204aa8cSPavel Begunkov apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); 6285204aa8cSPavel Begunkov if (unlikely(!apoll)) 6295204aa8cSPavel Begunkov return NULL; 6305204aa8cSPavel Begunkov } 6315204aa8cSPavel Begunkov apoll->double_poll = NULL; 6325204aa8cSPavel Begunkov req->apoll = apoll; 6335204aa8cSPavel Begunkov return apoll; 6345204aa8cSPavel Begunkov } 6355204aa8cSPavel Begunkov 636329061d3SJens Axboe int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) 637329061d3SJens Axboe { 638329061d3SJens Axboe const struct io_op_def *def = &io_op_defs[req->opcode]; 639329061d3SJens Axboe struct async_poll *apoll; 640329061d3SJens Axboe struct io_poll_table ipt; 641b9ba8a44SJens Axboe __poll_t mask = POLLPRI | POLLERR | EPOLLET; 642329061d3SJens Axboe int ret; 643329061d3SJens Axboe 6449ca9fb24SPavel Begunkov /* 6459ca9fb24SPavel Begunkov * apoll requests already grab the mutex to complete in the tw handler, 6469ca9fb24SPavel Begunkov * so removal from the mutex-backed hash is free, use it by default. 6479ca9fb24SPavel Begunkov */ 6489ca9fb24SPavel Begunkov req->flags |= REQ_F_HASH_LOCKED; 6499ca9fb24SPavel Begunkov 650329061d3SJens Axboe if (!def->pollin && !def->pollout) 651329061d3SJens Axboe return IO_APOLL_ABORTED; 652329061d3SJens Axboe if (!file_can_poll(req->file)) 653329061d3SJens Axboe return IO_APOLL_ABORTED; 654329061d3SJens Axboe if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED) 655329061d3SJens Axboe return IO_APOLL_ABORTED; 656329061d3SJens Axboe if (!(req->flags & REQ_F_APOLL_MULTISHOT)) 657329061d3SJens Axboe mask |= EPOLLONESHOT; 658329061d3SJens Axboe 659329061d3SJens Axboe if (def->pollin) { 660329061d3SJens Axboe mask |= EPOLLIN | EPOLLRDNORM; 661329061d3SJens Axboe 662329061d3SJens Axboe /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ 663329061d3SJens Axboe if (req->flags & REQ_F_CLEAR_POLLIN) 664329061d3SJens Axboe mask &= ~EPOLLIN; 665329061d3SJens Axboe } else { 666329061d3SJens Axboe mask |= EPOLLOUT | EPOLLWRNORM; 667329061d3SJens Axboe } 668329061d3SJens Axboe if (def->poll_exclusive) 669329061d3SJens Axboe mask |= EPOLLEXCLUSIVE; 6705204aa8cSPavel Begunkov 6715204aa8cSPavel Begunkov apoll = io_req_alloc_apoll(req, issue_flags); 6725204aa8cSPavel Begunkov if (!apoll) 673329061d3SJens Axboe return IO_APOLL_ABORTED; 674329061d3SJens Axboe req->flags |= REQ_F_POLLED; 675329061d3SJens Axboe ipt.pt._qproc = io_async_queue_proc; 676329061d3SJens Axboe 677329061d3SJens Axboe io_kbuf_recycle(req, issue_flags); 678329061d3SJens Axboe 67949f1c68eSPavel Begunkov ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags); 680de08356fSPavel Begunkov if (ret) 681de08356fSPavel Begunkov return ret > 0 ? IO_APOLL_READY : IO_APOLL_ABORTED; 68248863ffdSPavel Begunkov trace_io_uring_poll_arm(req, mask, apoll->poll.events); 683329061d3SJens Axboe return IO_APOLL_OK; 684329061d3SJens Axboe } 685329061d3SJens Axboe 6869ca9fb24SPavel Begunkov static __cold bool io_poll_remove_all_table(struct task_struct *tsk, 6879ca9fb24SPavel Begunkov struct io_hash_table *table, 688329061d3SJens Axboe bool cancel_all) 689329061d3SJens Axboe { 690e6f89be6SPavel Begunkov unsigned nr_buckets = 1U << table->hash_bits; 691329061d3SJens Axboe struct hlist_node *tmp; 692329061d3SJens Axboe struct io_kiocb *req; 693329061d3SJens Axboe bool found = false; 694329061d3SJens Axboe int i; 695329061d3SJens Axboe 696e6f89be6SPavel Begunkov for (i = 0; i < nr_buckets; i++) { 697e6f89be6SPavel Begunkov struct io_hash_bucket *hb = &table->hbs[i]; 698329061d3SJens Axboe 69938513c46SHao Xu spin_lock(&hb->lock); 70038513c46SHao Xu hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) { 701329061d3SJens Axboe if (io_match_task_safe(req, tsk, cancel_all)) { 702329061d3SJens Axboe hlist_del_init(&req->hash_node); 703329061d3SJens Axboe io_poll_cancel_req(req); 704329061d3SJens Axboe found = true; 705329061d3SJens Axboe } 706329061d3SJens Axboe } 70738513c46SHao Xu spin_unlock(&hb->lock); 708329061d3SJens Axboe } 709329061d3SJens Axboe return found; 710329061d3SJens Axboe } 711329061d3SJens Axboe 7129ca9fb24SPavel Begunkov /* 7139ca9fb24SPavel Begunkov * Returns true if we found and killed one or more poll requests 7149ca9fb24SPavel Begunkov */ 7159ca9fb24SPavel Begunkov __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, 7169ca9fb24SPavel Begunkov bool cancel_all) 7179ca9fb24SPavel Begunkov __must_hold(&ctx->uring_lock) 7189ca9fb24SPavel Begunkov { 719b321823aSPavel Begunkov bool ret; 720b321823aSPavel Begunkov 721b321823aSPavel Begunkov ret = io_poll_remove_all_table(tsk, &ctx->cancel_table, cancel_all); 722b321823aSPavel Begunkov ret |= io_poll_remove_all_table(tsk, &ctx->cancel_table_locked, cancel_all); 723b321823aSPavel Begunkov return ret; 7249ca9fb24SPavel Begunkov } 7259ca9fb24SPavel Begunkov 726329061d3SJens Axboe static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, 7271ab1edb0SPavel Begunkov struct io_cancel_data *cd, 728e6f89be6SPavel Begunkov struct io_hash_table *table, 7291ab1edb0SPavel Begunkov struct io_hash_bucket **out_bucket) 730329061d3SJens Axboe { 731329061d3SJens Axboe struct io_kiocb *req; 732e6f89be6SPavel Begunkov u32 index = hash_long(cd->data, table->hash_bits); 733e6f89be6SPavel Begunkov struct io_hash_bucket *hb = &table->hbs[index]; 734329061d3SJens Axboe 7351ab1edb0SPavel Begunkov *out_bucket = NULL; 7361ab1edb0SPavel Begunkov 73738513c46SHao Xu spin_lock(&hb->lock); 73838513c46SHao Xu hlist_for_each_entry(req, &hb->list, hash_node) { 739329061d3SJens Axboe if (cd->data != req->cqe.user_data) 740329061d3SJens Axboe continue; 741329061d3SJens Axboe if (poll_only && req->opcode != IORING_OP_POLL_ADD) 742329061d3SJens Axboe continue; 743329061d3SJens Axboe if (cd->flags & IORING_ASYNC_CANCEL_ALL) { 744329061d3SJens Axboe if (cd->seq == req->work.cancel_seq) 745329061d3SJens Axboe continue; 746329061d3SJens Axboe req->work.cancel_seq = cd->seq; 747329061d3SJens Axboe } 7481ab1edb0SPavel Begunkov *out_bucket = hb; 749329061d3SJens Axboe return req; 750329061d3SJens Axboe } 75138513c46SHao Xu spin_unlock(&hb->lock); 752329061d3SJens Axboe return NULL; 753329061d3SJens Axboe } 754329061d3SJens Axboe 755329061d3SJens Axboe static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, 7561ab1edb0SPavel Begunkov struct io_cancel_data *cd, 757e6f89be6SPavel Begunkov struct io_hash_table *table, 7581ab1edb0SPavel Begunkov struct io_hash_bucket **out_bucket) 759329061d3SJens Axboe { 760e6f89be6SPavel Begunkov unsigned nr_buckets = 1U << table->hash_bits; 761329061d3SJens Axboe struct io_kiocb *req; 762329061d3SJens Axboe int i; 763329061d3SJens Axboe 7641ab1edb0SPavel Begunkov *out_bucket = NULL; 7651ab1edb0SPavel Begunkov 766e6f89be6SPavel Begunkov for (i = 0; i < nr_buckets; i++) { 767e6f89be6SPavel Begunkov struct io_hash_bucket *hb = &table->hbs[i]; 768329061d3SJens Axboe 76938513c46SHao Xu spin_lock(&hb->lock); 77038513c46SHao Xu hlist_for_each_entry(req, &hb->list, hash_node) { 771329061d3SJens Axboe if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && 772329061d3SJens Axboe req->file != cd->file) 773329061d3SJens Axboe continue; 774329061d3SJens Axboe if (cd->seq == req->work.cancel_seq) 775329061d3SJens Axboe continue; 776329061d3SJens Axboe req->work.cancel_seq = cd->seq; 7771ab1edb0SPavel Begunkov *out_bucket = hb; 778329061d3SJens Axboe return req; 779329061d3SJens Axboe } 78038513c46SHao Xu spin_unlock(&hb->lock); 781329061d3SJens Axboe } 782329061d3SJens Axboe return NULL; 783329061d3SJens Axboe } 784329061d3SJens Axboe 7859ca9fb24SPavel Begunkov static int io_poll_disarm(struct io_kiocb *req) 786329061d3SJens Axboe { 7879ca9fb24SPavel Begunkov if (!req) 7889ca9fb24SPavel Begunkov return -ENOENT; 789329061d3SJens Axboe if (!io_poll_get_ownership(req)) 7909ca9fb24SPavel Begunkov return -EALREADY; 791329061d3SJens Axboe io_poll_remove_entries(req); 792329061d3SJens Axboe hash_del(&req->hash_node); 7939ca9fb24SPavel Begunkov return 0; 794329061d3SJens Axboe } 795329061d3SJens Axboe 796a2cdd519SPavel Begunkov static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 797e6f89be6SPavel Begunkov struct io_hash_table *table) 798329061d3SJens Axboe { 7991ab1edb0SPavel Begunkov struct io_hash_bucket *bucket; 800329061d3SJens Axboe struct io_kiocb *req; 801329061d3SJens Axboe 802329061d3SJens Axboe if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY)) 803e6f89be6SPavel Begunkov req = io_poll_file_find(ctx, cd, table, &bucket); 804329061d3SJens Axboe else 805e6f89be6SPavel Begunkov req = io_poll_find(ctx, false, cd, table, &bucket); 8061ab1edb0SPavel Begunkov 8071ab1edb0SPavel Begunkov if (req) 808329061d3SJens Axboe io_poll_cancel_req(req); 8091ab1edb0SPavel Begunkov if (bucket) 8101ab1edb0SPavel Begunkov spin_unlock(&bucket->lock); 8111ab1edb0SPavel Begunkov return req ? 0 : -ENOENT; 812329061d3SJens Axboe } 813329061d3SJens Axboe 8145d7943d9SPavel Begunkov int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 8155d7943d9SPavel Begunkov unsigned issue_flags) 816a2cdd519SPavel Begunkov { 8179ca9fb24SPavel Begunkov int ret; 8189ca9fb24SPavel Begunkov 8199ca9fb24SPavel Begunkov ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table); 8209ca9fb24SPavel Begunkov if (ret != -ENOENT) 8219ca9fb24SPavel Begunkov return ret; 8229ca9fb24SPavel Begunkov 8239ca9fb24SPavel Begunkov io_ring_submit_lock(ctx, issue_flags); 8249ca9fb24SPavel Begunkov ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table_locked); 8259ca9fb24SPavel Begunkov io_ring_submit_unlock(ctx, issue_flags); 8269ca9fb24SPavel Begunkov return ret; 827a2cdd519SPavel Begunkov } 828a2cdd519SPavel Begunkov 829329061d3SJens Axboe static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, 830329061d3SJens Axboe unsigned int flags) 831329061d3SJens Axboe { 832329061d3SJens Axboe u32 events; 833329061d3SJens Axboe 834329061d3SJens Axboe events = READ_ONCE(sqe->poll32_events); 835329061d3SJens Axboe #ifdef __BIG_ENDIAN 836329061d3SJens Axboe events = swahw32(events); 837329061d3SJens Axboe #endif 838329061d3SJens Axboe if (!(flags & IORING_POLL_ADD_MULTI)) 839329061d3SJens Axboe events |= EPOLLONESHOT; 840b9ba8a44SJens Axboe if (!(flags & IORING_POLL_ADD_LEVEL)) 841b9ba8a44SJens Axboe events |= EPOLLET; 842b9ba8a44SJens Axboe return demangle_poll(events) | 843b9ba8a44SJens Axboe (events & (EPOLLEXCLUSIVE|EPOLLONESHOT|EPOLLET)); 844329061d3SJens Axboe } 845329061d3SJens Axboe 846329061d3SJens Axboe int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 847329061d3SJens Axboe { 848f2ccb5aeSStefan Metzmacher struct io_poll_update *upd = io_kiocb_to_cmd(req, struct io_poll_update); 849329061d3SJens Axboe u32 flags; 850329061d3SJens Axboe 851329061d3SJens Axboe if (sqe->buf_index || sqe->splice_fd_in) 852329061d3SJens Axboe return -EINVAL; 853329061d3SJens Axboe flags = READ_ONCE(sqe->len); 854329061d3SJens Axboe if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA | 855329061d3SJens Axboe IORING_POLL_ADD_MULTI)) 856329061d3SJens Axboe return -EINVAL; 857329061d3SJens Axboe /* meaningless without update */ 858329061d3SJens Axboe if (flags == IORING_POLL_ADD_MULTI) 859329061d3SJens Axboe return -EINVAL; 860329061d3SJens Axboe 861329061d3SJens Axboe upd->old_user_data = READ_ONCE(sqe->addr); 862329061d3SJens Axboe upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; 863329061d3SJens Axboe upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA; 864329061d3SJens Axboe 865329061d3SJens Axboe upd->new_user_data = READ_ONCE(sqe->off); 866329061d3SJens Axboe if (!upd->update_user_data && upd->new_user_data) 867329061d3SJens Axboe return -EINVAL; 868329061d3SJens Axboe if (upd->update_events) 869329061d3SJens Axboe upd->events = io_poll_parse_events(sqe, flags); 870329061d3SJens Axboe else if (sqe->poll32_events) 871329061d3SJens Axboe return -EINVAL; 872329061d3SJens Axboe 873329061d3SJens Axboe return 0; 874329061d3SJens Axboe } 875329061d3SJens Axboe 876329061d3SJens Axboe int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 877329061d3SJens Axboe { 878f2ccb5aeSStefan Metzmacher struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll); 879329061d3SJens Axboe u32 flags; 880329061d3SJens Axboe 881329061d3SJens Axboe if (sqe->buf_index || sqe->off || sqe->addr) 882329061d3SJens Axboe return -EINVAL; 883329061d3SJens Axboe flags = READ_ONCE(sqe->len); 884d59bd748SJens Axboe if (flags & ~IORING_POLL_ADD_MULTI) 885329061d3SJens Axboe return -EINVAL; 886329061d3SJens Axboe if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP)) 887329061d3SJens Axboe return -EINVAL; 888329061d3SJens Axboe 889329061d3SJens Axboe poll->events = io_poll_parse_events(sqe, flags); 890329061d3SJens Axboe return 0; 891329061d3SJens Axboe } 892329061d3SJens Axboe 893329061d3SJens Axboe int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) 894329061d3SJens Axboe { 895f2ccb5aeSStefan Metzmacher struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll); 896329061d3SJens Axboe struct io_poll_table ipt; 897329061d3SJens Axboe int ret; 898329061d3SJens Axboe 899329061d3SJens Axboe ipt.pt._qproc = io_poll_queue_proc; 900329061d3SJens Axboe 9019ca9fb24SPavel Begunkov /* 9029ca9fb24SPavel Begunkov * If sqpoll or single issuer, there is no contention for ->uring_lock 9039ca9fb24SPavel Begunkov * and we'll end up holding it in tw handlers anyway. 9049ca9fb24SPavel Begunkov */ 905e8375e43SPavel Begunkov if (req->ctx->flags & (IORING_SETUP_SQPOLL|IORING_SETUP_SINGLE_ISSUER)) 9069ca9fb24SPavel Begunkov req->flags |= REQ_F_HASH_LOCKED; 9079ca9fb24SPavel Begunkov 90849f1c68eSPavel Begunkov ret = __io_arm_poll_handler(req, poll, &ipt, poll->events, issue_flags); 909de08356fSPavel Begunkov if (ret > 0) { 910063a0079SPavel Begunkov io_req_set_res(req, ipt.result_mask, 0); 911329061d3SJens Axboe return IOU_OK; 912329061d3SJens Axboe } 913de08356fSPavel Begunkov return ret ?: IOU_ISSUE_SKIP_COMPLETE; 914329061d3SJens Axboe } 915329061d3SJens Axboe 916329061d3SJens Axboe int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) 917329061d3SJens Axboe { 918f2ccb5aeSStefan Metzmacher struct io_poll_update *poll_update = io_kiocb_to_cmd(req, struct io_poll_update); 919329061d3SJens Axboe struct io_cancel_data cd = { .data = poll_update->old_user_data, }; 920329061d3SJens Axboe struct io_ring_ctx *ctx = req->ctx; 9211ab1edb0SPavel Begunkov struct io_hash_bucket *bucket; 922329061d3SJens Axboe struct io_kiocb *preq; 923329061d3SJens Axboe int ret2, ret = 0; 924329061d3SJens Axboe bool locked; 925329061d3SJens Axboe 926e6f89be6SPavel Begunkov preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket); 9271ab1edb0SPavel Begunkov ret2 = io_poll_disarm(preq); 9281ab1edb0SPavel Begunkov if (bucket) 9291ab1edb0SPavel Begunkov spin_unlock(&bucket->lock); 9309ca9fb24SPavel Begunkov if (!ret2) 9319ca9fb24SPavel Begunkov goto found; 9329ca9fb24SPavel Begunkov if (ret2 != -ENOENT) { 9339ca9fb24SPavel Begunkov ret = ret2; 93438513c46SHao Xu goto out; 93538513c46SHao Xu } 936329061d3SJens Axboe 9379ca9fb24SPavel Begunkov io_ring_submit_lock(ctx, issue_flags); 9389ca9fb24SPavel Begunkov preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table_locked, &bucket); 9399ca9fb24SPavel Begunkov ret2 = io_poll_disarm(preq); 9409ca9fb24SPavel Begunkov if (bucket) 9419ca9fb24SPavel Begunkov spin_unlock(&bucket->lock); 9429ca9fb24SPavel Begunkov io_ring_submit_unlock(ctx, issue_flags); 9439ca9fb24SPavel Begunkov if (ret2) { 9449ca9fb24SPavel Begunkov ret = ret2; 9459ca9fb24SPavel Begunkov goto out; 9469ca9fb24SPavel Begunkov } 9479ca9fb24SPavel Begunkov 9489ca9fb24SPavel Begunkov found: 949bce5d70cSPavel Begunkov if (WARN_ON_ONCE(preq->opcode != IORING_OP_POLL_ADD)) { 950bce5d70cSPavel Begunkov ret = -EFAULT; 951bce5d70cSPavel Begunkov goto out; 952bce5d70cSPavel Begunkov } 953bce5d70cSPavel Begunkov 954329061d3SJens Axboe if (poll_update->update_events || poll_update->update_user_data) { 955329061d3SJens Axboe /* only mask one event flags, keep behavior flags */ 956329061d3SJens Axboe if (poll_update->update_events) { 957f2ccb5aeSStefan Metzmacher struct io_poll *poll = io_kiocb_to_cmd(preq, struct io_poll); 958329061d3SJens Axboe 959329061d3SJens Axboe poll->events &= ~0xffff; 960329061d3SJens Axboe poll->events |= poll_update->events & 0xffff; 961329061d3SJens Axboe poll->events |= IO_POLL_UNMASK; 962329061d3SJens Axboe } 963329061d3SJens Axboe if (poll_update->update_user_data) 964329061d3SJens Axboe preq->cqe.user_data = poll_update->new_user_data; 965329061d3SJens Axboe 966329061d3SJens Axboe ret2 = io_poll_add(preq, issue_flags); 967329061d3SJens Axboe /* successfully updated, don't complete poll request */ 968329061d3SJens Axboe if (!ret2 || ret2 == -EIOCBQUEUED) 969329061d3SJens Axboe goto out; 970329061d3SJens Axboe } 971329061d3SJens Axboe 972329061d3SJens Axboe req_set_fail(preq); 973329061d3SJens Axboe io_req_set_res(preq, -ECANCELED, 0); 974329061d3SJens Axboe locked = !(issue_flags & IO_URING_F_UNLOCKED); 975329061d3SJens Axboe io_req_task_complete(preq, &locked); 976329061d3SJens Axboe out: 977329061d3SJens Axboe if (ret < 0) { 978329061d3SJens Axboe req_set_fail(req); 979329061d3SJens Axboe return ret; 980329061d3SJens Axboe } 981329061d3SJens Axboe /* complete update request, we're done with it */ 982329061d3SJens Axboe io_req_set_res(req, ret, 0); 983329061d3SJens Axboe return IOU_OK; 984329061d3SJens Axboe } 9859da7471eSJens Axboe 9869b797a37SJens Axboe void io_apoll_cache_free(struct io_cache_entry *entry) 9879da7471eSJens Axboe { 9889b797a37SJens Axboe kfree(container_of(entry, struct async_poll, cache)); 9899da7471eSJens Axboe } 990