1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/fs.h> 5 #include <linux/file.h> 6 #include <linux/mm.h> 7 #include <linux/slab.h> 8 #include <linux/poll.h> 9 #include <linux/hashtable.h> 10 #include <linux/io_uring.h> 11 12 #include <trace/events/io_uring.h> 13 14 #include <uapi/linux/io_uring.h> 15 16 #include "io_uring.h" 17 #include "refs.h" 18 #include "opdef.h" 19 #include "kbuf.h" 20 #include "poll.h" 21 #include "cancel.h" 22 23 struct io_poll_update { 24 struct file *file; 25 u64 old_user_data; 26 u64 new_user_data; 27 __poll_t events; 28 bool update_events; 29 bool update_user_data; 30 }; 31 32 struct io_poll_table { 33 struct poll_table_struct pt; 34 struct io_kiocb *req; 35 int nr_entries; 36 int error; 37 bool owning; 38 /* output value, set only if arm poll returns >0 */ 39 __poll_t result_mask; 40 }; 41 42 #define IO_POLL_CANCEL_FLAG BIT(31) 43 #define IO_POLL_REF_MASK GENMASK(30, 0) 44 45 #define IO_WQE_F_DOUBLE 1 46 47 static inline struct io_kiocb *wqe_to_req(struct wait_queue_entry *wqe) 48 { 49 unsigned long priv = (unsigned long)wqe->private; 50 51 return (struct io_kiocb *)(priv & ~IO_WQE_F_DOUBLE); 52 } 53 54 static inline bool wqe_is_double(struct wait_queue_entry *wqe) 55 { 56 unsigned long priv = (unsigned long)wqe->private; 57 58 return priv & IO_WQE_F_DOUBLE; 59 } 60 61 /* 62 * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can 63 * bump it and acquire ownership. It's disallowed to modify requests while not 64 * owning it, that prevents from races for enqueueing task_work's and b/w 65 * arming poll and wakeups. 66 */ 67 static inline bool io_poll_get_ownership(struct io_kiocb *req) 68 { 69 return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); 70 } 71 72 static void io_poll_mark_cancelled(struct io_kiocb *req) 73 { 74 atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); 75 } 76 77 static struct io_poll *io_poll_get_double(struct io_kiocb *req) 78 { 79 /* pure poll stashes this in ->async_data, poll driven retry elsewhere */ 80 if (req->opcode == IORING_OP_POLL_ADD) 81 return req->async_data; 82 return req->apoll->double_poll; 83 } 84 85 static struct io_poll *io_poll_get_single(struct io_kiocb *req) 86 { 87 if (req->opcode == IORING_OP_POLL_ADD) 88 return io_kiocb_to_cmd(req, struct io_poll); 89 return &req->apoll->poll; 90 } 91 92 static void io_poll_req_insert(struct io_kiocb *req) 93 { 94 struct io_hash_table *table = &req->ctx->cancel_table; 95 u32 index = hash_long(req->cqe.user_data, table->hash_bits); 96 struct io_hash_bucket *hb = &table->hbs[index]; 97 98 spin_lock(&hb->lock); 99 hlist_add_head(&req->hash_node, &hb->list); 100 spin_unlock(&hb->lock); 101 } 102 103 static void io_poll_req_delete(struct io_kiocb *req, struct io_ring_ctx *ctx) 104 { 105 struct io_hash_table *table = &req->ctx->cancel_table; 106 u32 index = hash_long(req->cqe.user_data, table->hash_bits); 107 spinlock_t *lock = &table->hbs[index].lock; 108 109 spin_lock(lock); 110 hash_del(&req->hash_node); 111 spin_unlock(lock); 112 } 113 114 static void io_poll_req_insert_locked(struct io_kiocb *req) 115 { 116 struct io_hash_table *table = &req->ctx->cancel_table_locked; 117 u32 index = hash_long(req->cqe.user_data, table->hash_bits); 118 119 hlist_add_head(&req->hash_node, &table->hbs[index].list); 120 } 121 122 static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked) 123 { 124 struct io_ring_ctx *ctx = req->ctx; 125 126 if (req->flags & REQ_F_HASH_LOCKED) { 127 /* 128 * ->cancel_table_locked is protected by ->uring_lock in 129 * contrast to per bucket spinlocks. Likely, tctx_task_work() 130 * already grabbed the mutex for us, but there is a chance it 131 * failed. 132 */ 133 io_tw_lock(ctx, locked); 134 hash_del(&req->hash_node); 135 req->flags &= ~REQ_F_HASH_LOCKED; 136 } else { 137 io_poll_req_delete(req, ctx); 138 } 139 } 140 141 static void io_init_poll_iocb(struct io_poll *poll, __poll_t events, 142 wait_queue_func_t wake_func) 143 { 144 poll->head = NULL; 145 #define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) 146 /* mask in events that we always want/need */ 147 poll->events = events | IO_POLL_UNMASK; 148 INIT_LIST_HEAD(&poll->wait.entry); 149 init_waitqueue_func_entry(&poll->wait, wake_func); 150 } 151 152 static inline void io_poll_remove_entry(struct io_poll *poll) 153 { 154 struct wait_queue_head *head = smp_load_acquire(&poll->head); 155 156 if (head) { 157 spin_lock_irq(&head->lock); 158 list_del_init(&poll->wait.entry); 159 poll->head = NULL; 160 spin_unlock_irq(&head->lock); 161 } 162 } 163 164 static void io_poll_remove_entries(struct io_kiocb *req) 165 { 166 /* 167 * Nothing to do if neither of those flags are set. Avoid dipping 168 * into the poll/apoll/double cachelines if we can. 169 */ 170 if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL))) 171 return; 172 173 /* 174 * While we hold the waitqueue lock and the waitqueue is nonempty, 175 * wake_up_pollfree() will wait for us. However, taking the waitqueue 176 * lock in the first place can race with the waitqueue being freed. 177 * 178 * We solve this as eventpoll does: by taking advantage of the fact that 179 * all users of wake_up_pollfree() will RCU-delay the actual free. If 180 * we enter rcu_read_lock() and see that the pointer to the queue is 181 * non-NULL, we can then lock it without the memory being freed out from 182 * under us. 183 * 184 * Keep holding rcu_read_lock() as long as we hold the queue lock, in 185 * case the caller deletes the entry from the queue, leaving it empty. 186 * In that case, only RCU prevents the queue memory from being freed. 187 */ 188 rcu_read_lock(); 189 if (req->flags & REQ_F_SINGLE_POLL) 190 io_poll_remove_entry(io_poll_get_single(req)); 191 if (req->flags & REQ_F_DOUBLE_POLL) 192 io_poll_remove_entry(io_poll_get_double(req)); 193 rcu_read_unlock(); 194 } 195 196 enum { 197 IOU_POLL_DONE = 0, 198 IOU_POLL_NO_ACTION = 1, 199 IOU_POLL_REMOVE_POLL_USE_RES = 2, 200 }; 201 202 /* 203 * All poll tw should go through this. Checks for poll events, manages 204 * references, does rewait, etc. 205 * 206 * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action require, 207 * which is either spurious wakeup or multishot CQE is served. 208 * IOU_POLL_DONE when it's done with the request, then the mask is stored in req->cqe.res. 209 * IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot poll and that the result 210 * is stored in req->cqe. 211 */ 212 static int io_poll_check_events(struct io_kiocb *req, bool *locked) 213 { 214 struct io_ring_ctx *ctx = req->ctx; 215 int v, ret; 216 217 /* req->task == current here, checking PF_EXITING is safe */ 218 if (unlikely(req->task->flags & PF_EXITING)) 219 return -ECANCELED; 220 221 do { 222 v = atomic_read(&req->poll_refs); 223 224 /* tw handler should be the owner, and so have some references */ 225 if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) 226 return IOU_POLL_DONE; 227 if (v & IO_POLL_CANCEL_FLAG) 228 return -ECANCELED; 229 230 /* the mask was stashed in __io_poll_execute */ 231 if (!req->cqe.res) { 232 struct poll_table_struct pt = { ._key = req->apoll_events }; 233 req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; 234 } 235 236 if ((unlikely(!req->cqe.res))) 237 continue; 238 if (req->apoll_events & EPOLLONESHOT) 239 return IOU_POLL_DONE; 240 241 /* multishot, just fill a CQE and proceed */ 242 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 243 __poll_t mask = mangle_poll(req->cqe.res & 244 req->apoll_events); 245 246 if (!io_post_aux_cqe(ctx, req->cqe.user_data, 247 mask, IORING_CQE_F_MORE, false)) { 248 io_req_set_res(req, mask, 0); 249 return IOU_POLL_REMOVE_POLL_USE_RES; 250 } 251 } else { 252 ret = io_poll_issue(req, locked); 253 if (ret == IOU_STOP_MULTISHOT) 254 return IOU_POLL_REMOVE_POLL_USE_RES; 255 if (ret < 0) 256 return ret; 257 } 258 259 /* 260 * Release all references, retry if someone tried to restart 261 * task_work while we were executing it. 262 */ 263 } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); 264 265 return IOU_POLL_NO_ACTION; 266 } 267 268 static void io_poll_task_func(struct io_kiocb *req, bool *locked) 269 { 270 int ret; 271 272 ret = io_poll_check_events(req, locked); 273 if (ret == IOU_POLL_NO_ACTION) 274 return; 275 276 if (ret == IOU_POLL_DONE) { 277 struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll); 278 req->cqe.res = mangle_poll(req->cqe.res & poll->events); 279 } else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) { 280 req->cqe.res = ret; 281 req_set_fail(req); 282 } 283 284 io_poll_remove_entries(req); 285 io_poll_tw_hash_eject(req, locked); 286 287 io_req_set_res(req, req->cqe.res, 0); 288 io_req_task_complete(req, locked); 289 } 290 291 static void io_apoll_task_func(struct io_kiocb *req, bool *locked) 292 { 293 int ret; 294 295 ret = io_poll_check_events(req, locked); 296 if (ret == IOU_POLL_NO_ACTION) 297 return; 298 299 io_poll_remove_entries(req); 300 io_poll_tw_hash_eject(req, locked); 301 302 if (ret == IOU_POLL_REMOVE_POLL_USE_RES) 303 io_req_complete_post(req); 304 else if (ret == IOU_POLL_DONE) 305 io_req_task_submit(req, locked); 306 else 307 io_req_complete_failed(req, ret); 308 } 309 310 static void __io_poll_execute(struct io_kiocb *req, int mask) 311 { 312 io_req_set_res(req, mask, 0); 313 /* 314 * This is useful for poll that is armed on behalf of another 315 * request, and where the wakeup path could be on a different 316 * CPU. We want to avoid pulling in req->apoll->events for that 317 * case. 318 */ 319 if (req->opcode == IORING_OP_POLL_ADD) 320 req->io_task_work.func = io_poll_task_func; 321 else 322 req->io_task_work.func = io_apoll_task_func; 323 324 trace_io_uring_task_add(req, mask); 325 io_req_task_work_add(req); 326 } 327 328 static inline void io_poll_execute(struct io_kiocb *req, int res) 329 { 330 if (io_poll_get_ownership(req)) 331 __io_poll_execute(req, res); 332 } 333 334 static void io_poll_cancel_req(struct io_kiocb *req) 335 { 336 io_poll_mark_cancelled(req); 337 /* kick tw, which should complete the request */ 338 io_poll_execute(req, 0); 339 } 340 341 #define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI) 342 343 static __cold int io_pollfree_wake(struct io_kiocb *req, struct io_poll *poll) 344 { 345 io_poll_mark_cancelled(req); 346 /* we have to kick tw in case it's not already */ 347 io_poll_execute(req, 0); 348 349 /* 350 * If the waitqueue is being freed early but someone is already 351 * holds ownership over it, we have to tear down the request as 352 * best we can. That means immediately removing the request from 353 * its waitqueue and preventing all further accesses to the 354 * waitqueue via the request. 355 */ 356 list_del_init(&poll->wait.entry); 357 358 /* 359 * Careful: this *must* be the last step, since as soon 360 * as req->head is NULL'ed out, the request can be 361 * completed and freed, since aio_poll_complete_work() 362 * will no longer need to take the waitqueue lock. 363 */ 364 smp_store_release(&poll->head, NULL); 365 return 1; 366 } 367 368 static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 369 void *key) 370 { 371 struct io_kiocb *req = wqe_to_req(wait); 372 struct io_poll *poll = container_of(wait, struct io_poll, wait); 373 __poll_t mask = key_to_poll(key); 374 375 if (unlikely(mask & POLLFREE)) 376 return io_pollfree_wake(req, poll); 377 378 /* for instances that support it check for an event match first */ 379 if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON))) 380 return 0; 381 382 if (io_poll_get_ownership(req)) { 383 /* optional, saves extra locking for removal in tw handler */ 384 if (mask && poll->events & EPOLLONESHOT) { 385 list_del_init(&poll->wait.entry); 386 poll->head = NULL; 387 if (wqe_is_double(wait)) 388 req->flags &= ~REQ_F_DOUBLE_POLL; 389 else 390 req->flags &= ~REQ_F_SINGLE_POLL; 391 } 392 __io_poll_execute(req, mask); 393 } 394 return 1; 395 } 396 397 static void io_poll_double_prepare(struct io_kiocb *req) 398 { 399 struct wait_queue_head *head; 400 struct io_poll *poll = io_poll_get_single(req); 401 402 /* head is RCU protected, see io_poll_remove_entries() comments */ 403 rcu_read_lock(); 404 head = smp_load_acquire(&poll->head); 405 /* 406 * poll arm may not hold ownership and so race with 407 * io_poll_wake() by modifying req->flags. There is only one 408 * poll entry queued, serialise with it by taking its head lock. 409 */ 410 if (head) 411 spin_lock_irq(&head->lock); 412 413 req->flags |= REQ_F_DOUBLE_POLL; 414 if (req->opcode == IORING_OP_POLL_ADD) 415 req->flags |= REQ_F_ASYNC_DATA; 416 417 if (head) 418 spin_unlock_irq(&head->lock); 419 rcu_read_unlock(); 420 } 421 422 static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, 423 struct wait_queue_head *head, 424 struct io_poll **poll_ptr) 425 { 426 struct io_kiocb *req = pt->req; 427 unsigned long wqe_private = (unsigned long) req; 428 429 /* 430 * The file being polled uses multiple waitqueues for poll handling 431 * (e.g. one for read, one for write). Setup a separate io_poll 432 * if this happens. 433 */ 434 if (unlikely(pt->nr_entries)) { 435 struct io_poll *first = poll; 436 437 /* double add on the same waitqueue head, ignore */ 438 if (first->head == head) 439 return; 440 /* already have a 2nd entry, fail a third attempt */ 441 if (*poll_ptr) { 442 if ((*poll_ptr)->head == head) 443 return; 444 pt->error = -EINVAL; 445 return; 446 } 447 448 poll = kmalloc(sizeof(*poll), GFP_ATOMIC); 449 if (!poll) { 450 pt->error = -ENOMEM; 451 return; 452 } 453 454 /* mark as double wq entry */ 455 wqe_private |= IO_WQE_F_DOUBLE; 456 io_init_poll_iocb(poll, first->events, first->wait.func); 457 io_poll_double_prepare(req); 458 *poll_ptr = poll; 459 } else { 460 /* fine to modify, there is no poll queued to race with us */ 461 req->flags |= REQ_F_SINGLE_POLL; 462 } 463 464 pt->nr_entries++; 465 poll->head = head; 466 poll->wait.private = (void *) wqe_private; 467 468 if (poll->events & EPOLLEXCLUSIVE) 469 add_wait_queue_exclusive(head, &poll->wait); 470 else 471 add_wait_queue(head, &poll->wait); 472 } 473 474 static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, 475 struct poll_table_struct *p) 476 { 477 struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 478 struct io_poll *poll = io_kiocb_to_cmd(pt->req, struct io_poll); 479 480 __io_queue_proc(poll, pt, head, 481 (struct io_poll **) &pt->req->async_data); 482 } 483 484 static bool io_poll_can_finish_inline(struct io_kiocb *req, 485 struct io_poll_table *pt) 486 { 487 return pt->owning || io_poll_get_ownership(req); 488 } 489 490 /* 491 * Returns 0 when it's handed over for polling. The caller owns the requests if 492 * it returns non-zero, but otherwise should not touch it. Negative values 493 * contain an error code. When the result is >0, the polling has completed 494 * inline and ipt.result_mask is set to the mask. 495 */ 496 static int __io_arm_poll_handler(struct io_kiocb *req, 497 struct io_poll *poll, 498 struct io_poll_table *ipt, __poll_t mask, 499 unsigned issue_flags) 500 { 501 struct io_ring_ctx *ctx = req->ctx; 502 int v; 503 504 INIT_HLIST_NODE(&req->hash_node); 505 req->work.cancel_seq = atomic_read(&ctx->cancel_seq); 506 io_init_poll_iocb(poll, mask, io_poll_wake); 507 poll->file = req->file; 508 req->apoll_events = poll->events; 509 510 ipt->pt._key = mask; 511 ipt->req = req; 512 ipt->error = 0; 513 ipt->nr_entries = 0; 514 /* 515 * Polling is either completed here or via task_work, so if we're in the 516 * task context we're naturally serialised with tw by merit of running 517 * the same task. When it's io-wq, take the ownership to prevent tw 518 * from running. However, when we're in the task context, skip taking 519 * it as an optimisation. 520 * 521 * Note: even though the request won't be completed/freed, without 522 * ownership we still can race with io_poll_wake(). 523 * io_poll_can_finish_inline() tries to deal with that. 524 */ 525 ipt->owning = issue_flags & IO_URING_F_UNLOCKED; 526 atomic_set(&req->poll_refs, (int)ipt->owning); 527 528 /* io-wq doesn't hold uring_lock */ 529 if (issue_flags & IO_URING_F_UNLOCKED) 530 req->flags &= ~REQ_F_HASH_LOCKED; 531 532 mask = vfs_poll(req->file, &ipt->pt) & poll->events; 533 534 if (unlikely(ipt->error || !ipt->nr_entries)) { 535 io_poll_remove_entries(req); 536 537 if (!io_poll_can_finish_inline(req, ipt)) { 538 io_poll_mark_cancelled(req); 539 return 0; 540 } else if (mask && (poll->events & EPOLLET)) { 541 ipt->result_mask = mask; 542 return 1; 543 } 544 return ipt->error ?: -EINVAL; 545 } 546 547 if (mask && 548 ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) { 549 if (!io_poll_can_finish_inline(req, ipt)) 550 return 0; 551 io_poll_remove_entries(req); 552 ipt->result_mask = mask; 553 /* no one else has access to the req, forget about the ref */ 554 return 1; 555 } 556 557 if (req->flags & REQ_F_HASH_LOCKED) 558 io_poll_req_insert_locked(req); 559 else 560 io_poll_req_insert(req); 561 562 if (mask && (poll->events & EPOLLET) && 563 io_poll_can_finish_inline(req, ipt)) { 564 __io_poll_execute(req, mask); 565 return 0; 566 } 567 568 if (ipt->owning) { 569 /* 570 * Release ownership. If someone tried to queue a tw while it was 571 * locked, kick it off for them. 572 */ 573 v = atomic_dec_return(&req->poll_refs); 574 if (unlikely(v & IO_POLL_REF_MASK)) 575 __io_poll_execute(req, 0); 576 } 577 return 0; 578 } 579 580 static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, 581 struct poll_table_struct *p) 582 { 583 struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 584 struct async_poll *apoll = pt->req->apoll; 585 586 __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); 587 } 588 589 static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req, 590 unsigned issue_flags) 591 { 592 struct io_ring_ctx *ctx = req->ctx; 593 struct io_cache_entry *entry; 594 struct async_poll *apoll; 595 596 if (req->flags & REQ_F_POLLED) { 597 apoll = req->apoll; 598 kfree(apoll->double_poll); 599 } else if (!(issue_flags & IO_URING_F_UNLOCKED) && 600 (entry = io_alloc_cache_get(&ctx->apoll_cache)) != NULL) { 601 apoll = container_of(entry, struct async_poll, cache); 602 } else { 603 apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); 604 if (unlikely(!apoll)) 605 return NULL; 606 } 607 apoll->double_poll = NULL; 608 req->apoll = apoll; 609 return apoll; 610 } 611 612 int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) 613 { 614 const struct io_op_def *def = &io_op_defs[req->opcode]; 615 struct async_poll *apoll; 616 struct io_poll_table ipt; 617 __poll_t mask = POLLPRI | POLLERR | EPOLLET; 618 int ret; 619 620 /* 621 * apoll requests already grab the mutex to complete in the tw handler, 622 * so removal from the mutex-backed hash is free, use it by default. 623 */ 624 req->flags |= REQ_F_HASH_LOCKED; 625 626 if (!def->pollin && !def->pollout) 627 return IO_APOLL_ABORTED; 628 if (!file_can_poll(req->file)) 629 return IO_APOLL_ABORTED; 630 if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED) 631 return IO_APOLL_ABORTED; 632 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) 633 mask |= EPOLLONESHOT; 634 635 if (def->pollin) { 636 mask |= EPOLLIN | EPOLLRDNORM; 637 638 /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ 639 if (req->flags & REQ_F_CLEAR_POLLIN) 640 mask &= ~EPOLLIN; 641 } else { 642 mask |= EPOLLOUT | EPOLLWRNORM; 643 } 644 if (def->poll_exclusive) 645 mask |= EPOLLEXCLUSIVE; 646 647 apoll = io_req_alloc_apoll(req, issue_flags); 648 if (!apoll) 649 return IO_APOLL_ABORTED; 650 req->flags |= REQ_F_POLLED; 651 ipt.pt._qproc = io_async_queue_proc; 652 653 io_kbuf_recycle(req, issue_flags); 654 655 ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags); 656 if (ret) 657 return ret > 0 ? IO_APOLL_READY : IO_APOLL_ABORTED; 658 trace_io_uring_poll_arm(req, mask, apoll->poll.events); 659 return IO_APOLL_OK; 660 } 661 662 static __cold bool io_poll_remove_all_table(struct task_struct *tsk, 663 struct io_hash_table *table, 664 bool cancel_all) 665 { 666 unsigned nr_buckets = 1U << table->hash_bits; 667 struct hlist_node *tmp; 668 struct io_kiocb *req; 669 bool found = false; 670 int i; 671 672 for (i = 0; i < nr_buckets; i++) { 673 struct io_hash_bucket *hb = &table->hbs[i]; 674 675 spin_lock(&hb->lock); 676 hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) { 677 if (io_match_task_safe(req, tsk, cancel_all)) { 678 hlist_del_init(&req->hash_node); 679 io_poll_cancel_req(req); 680 found = true; 681 } 682 } 683 spin_unlock(&hb->lock); 684 } 685 return found; 686 } 687 688 /* 689 * Returns true if we found and killed one or more poll requests 690 */ 691 __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, 692 bool cancel_all) 693 __must_hold(&ctx->uring_lock) 694 { 695 bool ret; 696 697 ret = io_poll_remove_all_table(tsk, &ctx->cancel_table, cancel_all); 698 ret |= io_poll_remove_all_table(tsk, &ctx->cancel_table_locked, cancel_all); 699 return ret; 700 } 701 702 static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, 703 struct io_cancel_data *cd, 704 struct io_hash_table *table, 705 struct io_hash_bucket **out_bucket) 706 { 707 struct io_kiocb *req; 708 u32 index = hash_long(cd->data, table->hash_bits); 709 struct io_hash_bucket *hb = &table->hbs[index]; 710 711 *out_bucket = NULL; 712 713 spin_lock(&hb->lock); 714 hlist_for_each_entry(req, &hb->list, hash_node) { 715 if (cd->data != req->cqe.user_data) 716 continue; 717 if (poll_only && req->opcode != IORING_OP_POLL_ADD) 718 continue; 719 if (cd->flags & IORING_ASYNC_CANCEL_ALL) { 720 if (cd->seq == req->work.cancel_seq) 721 continue; 722 req->work.cancel_seq = cd->seq; 723 } 724 *out_bucket = hb; 725 return req; 726 } 727 spin_unlock(&hb->lock); 728 return NULL; 729 } 730 731 static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, 732 struct io_cancel_data *cd, 733 struct io_hash_table *table, 734 struct io_hash_bucket **out_bucket) 735 { 736 unsigned nr_buckets = 1U << table->hash_bits; 737 struct io_kiocb *req; 738 int i; 739 740 *out_bucket = NULL; 741 742 for (i = 0; i < nr_buckets; i++) { 743 struct io_hash_bucket *hb = &table->hbs[i]; 744 745 spin_lock(&hb->lock); 746 hlist_for_each_entry(req, &hb->list, hash_node) { 747 if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && 748 req->file != cd->file) 749 continue; 750 if (cd->seq == req->work.cancel_seq) 751 continue; 752 req->work.cancel_seq = cd->seq; 753 *out_bucket = hb; 754 return req; 755 } 756 spin_unlock(&hb->lock); 757 } 758 return NULL; 759 } 760 761 static int io_poll_disarm(struct io_kiocb *req) 762 { 763 if (!req) 764 return -ENOENT; 765 if (!io_poll_get_ownership(req)) 766 return -EALREADY; 767 io_poll_remove_entries(req); 768 hash_del(&req->hash_node); 769 return 0; 770 } 771 772 static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 773 struct io_hash_table *table) 774 { 775 struct io_hash_bucket *bucket; 776 struct io_kiocb *req; 777 778 if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY)) 779 req = io_poll_file_find(ctx, cd, table, &bucket); 780 else 781 req = io_poll_find(ctx, false, cd, table, &bucket); 782 783 if (req) 784 io_poll_cancel_req(req); 785 if (bucket) 786 spin_unlock(&bucket->lock); 787 return req ? 0 : -ENOENT; 788 } 789 790 int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 791 unsigned issue_flags) 792 { 793 int ret; 794 795 ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table); 796 if (ret != -ENOENT) 797 return ret; 798 799 io_ring_submit_lock(ctx, issue_flags); 800 ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table_locked); 801 io_ring_submit_unlock(ctx, issue_flags); 802 return ret; 803 } 804 805 static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, 806 unsigned int flags) 807 { 808 u32 events; 809 810 events = READ_ONCE(sqe->poll32_events); 811 #ifdef __BIG_ENDIAN 812 events = swahw32(events); 813 #endif 814 if (!(flags & IORING_POLL_ADD_MULTI)) 815 events |= EPOLLONESHOT; 816 if (!(flags & IORING_POLL_ADD_LEVEL)) 817 events |= EPOLLET; 818 return demangle_poll(events) | 819 (events & (EPOLLEXCLUSIVE|EPOLLONESHOT|EPOLLET)); 820 } 821 822 int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 823 { 824 struct io_poll_update *upd = io_kiocb_to_cmd(req, struct io_poll_update); 825 u32 flags; 826 827 if (sqe->buf_index || sqe->splice_fd_in) 828 return -EINVAL; 829 flags = READ_ONCE(sqe->len); 830 if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA | 831 IORING_POLL_ADD_MULTI)) 832 return -EINVAL; 833 /* meaningless without update */ 834 if (flags == IORING_POLL_ADD_MULTI) 835 return -EINVAL; 836 837 upd->old_user_data = READ_ONCE(sqe->addr); 838 upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; 839 upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA; 840 841 upd->new_user_data = READ_ONCE(sqe->off); 842 if (!upd->update_user_data && upd->new_user_data) 843 return -EINVAL; 844 if (upd->update_events) 845 upd->events = io_poll_parse_events(sqe, flags); 846 else if (sqe->poll32_events) 847 return -EINVAL; 848 849 return 0; 850 } 851 852 int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 853 { 854 struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll); 855 u32 flags; 856 857 if (sqe->buf_index || sqe->off || sqe->addr) 858 return -EINVAL; 859 flags = READ_ONCE(sqe->len); 860 if (flags & ~(IORING_POLL_ADD_MULTI|IORING_POLL_ADD_LEVEL)) 861 return -EINVAL; 862 if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP)) 863 return -EINVAL; 864 865 poll->events = io_poll_parse_events(sqe, flags); 866 return 0; 867 } 868 869 int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) 870 { 871 struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll); 872 struct io_poll_table ipt; 873 int ret; 874 875 ipt.pt._qproc = io_poll_queue_proc; 876 877 /* 878 * If sqpoll or single issuer, there is no contention for ->uring_lock 879 * and we'll end up holding it in tw handlers anyway. 880 */ 881 if (req->ctx->flags & (IORING_SETUP_SQPOLL|IORING_SETUP_SINGLE_ISSUER)) 882 req->flags |= REQ_F_HASH_LOCKED; 883 884 ret = __io_arm_poll_handler(req, poll, &ipt, poll->events, issue_flags); 885 if (ret > 0) { 886 io_req_set_res(req, ipt.result_mask, 0); 887 return IOU_OK; 888 } 889 return ret ?: IOU_ISSUE_SKIP_COMPLETE; 890 } 891 892 int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) 893 { 894 struct io_poll_update *poll_update = io_kiocb_to_cmd(req, struct io_poll_update); 895 struct io_cancel_data cd = { .data = poll_update->old_user_data, }; 896 struct io_ring_ctx *ctx = req->ctx; 897 struct io_hash_bucket *bucket; 898 struct io_kiocb *preq; 899 int ret2, ret = 0; 900 bool locked; 901 902 preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket); 903 ret2 = io_poll_disarm(preq); 904 if (bucket) 905 spin_unlock(&bucket->lock); 906 if (!ret2) 907 goto found; 908 if (ret2 != -ENOENT) { 909 ret = ret2; 910 goto out; 911 } 912 913 io_ring_submit_lock(ctx, issue_flags); 914 preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table_locked, &bucket); 915 ret2 = io_poll_disarm(preq); 916 if (bucket) 917 spin_unlock(&bucket->lock); 918 io_ring_submit_unlock(ctx, issue_flags); 919 if (ret2) { 920 ret = ret2; 921 goto out; 922 } 923 924 found: 925 if (WARN_ON_ONCE(preq->opcode != IORING_OP_POLL_ADD)) { 926 ret = -EFAULT; 927 goto out; 928 } 929 930 if (poll_update->update_events || poll_update->update_user_data) { 931 /* only mask one event flags, keep behavior flags */ 932 if (poll_update->update_events) { 933 struct io_poll *poll = io_kiocb_to_cmd(preq, struct io_poll); 934 935 poll->events &= ~0xffff; 936 poll->events |= poll_update->events & 0xffff; 937 poll->events |= IO_POLL_UNMASK; 938 } 939 if (poll_update->update_user_data) 940 preq->cqe.user_data = poll_update->new_user_data; 941 942 ret2 = io_poll_add(preq, issue_flags); 943 /* successfully updated, don't complete poll request */ 944 if (!ret2 || ret2 == -EIOCBQUEUED) 945 goto out; 946 } 947 948 req_set_fail(preq); 949 io_req_set_res(preq, -ECANCELED, 0); 950 locked = !(issue_flags & IO_URING_F_UNLOCKED); 951 io_req_task_complete(preq, &locked); 952 out: 953 if (ret < 0) { 954 req_set_fail(req); 955 return ret; 956 } 957 /* complete update request, we're done with it */ 958 io_req_set_res(req, ret, 0); 959 return IOU_OK; 960 } 961 962 void io_apoll_cache_free(struct io_cache_entry *entry) 963 { 964 kfree(container_of(entry, struct async_poll, cache)); 965 } 966