1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/fs.h> 5 #include <linux/file.h> 6 #include <linux/mm.h> 7 #include <linux/slab.h> 8 #include <linux/poll.h> 9 #include <linux/hashtable.h> 10 #include <linux/io_uring.h> 11 12 #include <trace/events/io_uring.h> 13 14 #include <uapi/linux/io_uring.h> 15 16 #include "io_uring.h" 17 #include "refs.h" 18 #include "opdef.h" 19 #include "kbuf.h" 20 #include "poll.h" 21 #include "cancel.h" 22 23 struct io_poll_update { 24 struct file *file; 25 u64 old_user_data; 26 u64 new_user_data; 27 __poll_t events; 28 bool update_events; 29 bool update_user_data; 30 }; 31 32 struct io_poll_table { 33 struct poll_table_struct pt; 34 struct io_kiocb *req; 35 int nr_entries; 36 int error; 37 bool owning; 38 /* output value, set only if arm poll returns >0 */ 39 __poll_t result_mask; 40 }; 41 42 #define IO_POLL_CANCEL_FLAG BIT(31) 43 #define IO_POLL_REF_MASK GENMASK(30, 0) 44 45 #define IO_WQE_F_DOUBLE 1 46 47 static inline struct io_kiocb *wqe_to_req(struct wait_queue_entry *wqe) 48 { 49 unsigned long priv = (unsigned long)wqe->private; 50 51 return (struct io_kiocb *)(priv & ~IO_WQE_F_DOUBLE); 52 } 53 54 static inline bool wqe_is_double(struct wait_queue_entry *wqe) 55 { 56 unsigned long priv = (unsigned long)wqe->private; 57 58 return priv & IO_WQE_F_DOUBLE; 59 } 60 61 /* 62 * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can 63 * bump it and acquire ownership. It's disallowed to modify requests while not 64 * owning it, that prevents from races for enqueueing task_work's and b/w 65 * arming poll and wakeups. 66 */ 67 static inline bool io_poll_get_ownership(struct io_kiocb *req) 68 { 69 return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); 70 } 71 72 static void io_poll_mark_cancelled(struct io_kiocb *req) 73 { 74 atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); 75 } 76 77 static struct io_poll *io_poll_get_double(struct io_kiocb *req) 78 { 79 /* pure poll stashes this in ->async_data, poll driven retry elsewhere */ 80 if (req->opcode == IORING_OP_POLL_ADD) 81 return req->async_data; 82 return req->apoll->double_poll; 83 } 84 85 static struct io_poll *io_poll_get_single(struct io_kiocb *req) 86 { 87 if (req->opcode == IORING_OP_POLL_ADD) 88 return io_kiocb_to_cmd(req, struct io_poll); 89 return &req->apoll->poll; 90 } 91 92 static void io_poll_req_insert(struct io_kiocb *req) 93 { 94 struct io_hash_table *table = &req->ctx->cancel_table; 95 u32 index = hash_long(req->cqe.user_data, table->hash_bits); 96 struct io_hash_bucket *hb = &table->hbs[index]; 97 98 spin_lock(&hb->lock); 99 hlist_add_head(&req->hash_node, &hb->list); 100 spin_unlock(&hb->lock); 101 } 102 103 static void io_poll_req_delete(struct io_kiocb *req, struct io_ring_ctx *ctx) 104 { 105 struct io_hash_table *table = &req->ctx->cancel_table; 106 u32 index = hash_long(req->cqe.user_data, table->hash_bits); 107 spinlock_t *lock = &table->hbs[index].lock; 108 109 spin_lock(lock); 110 hash_del(&req->hash_node); 111 spin_unlock(lock); 112 } 113 114 static void io_poll_req_insert_locked(struct io_kiocb *req) 115 { 116 struct io_hash_table *table = &req->ctx->cancel_table_locked; 117 u32 index = hash_long(req->cqe.user_data, table->hash_bits); 118 119 lockdep_assert_held(&req->ctx->uring_lock); 120 121 hlist_add_head(&req->hash_node, &table->hbs[index].list); 122 } 123 124 static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked) 125 { 126 struct io_ring_ctx *ctx = req->ctx; 127 128 if (req->flags & REQ_F_HASH_LOCKED) { 129 /* 130 * ->cancel_table_locked is protected by ->uring_lock in 131 * contrast to per bucket spinlocks. Likely, tctx_task_work() 132 * already grabbed the mutex for us, but there is a chance it 133 * failed. 134 */ 135 io_tw_lock(ctx, locked); 136 hash_del(&req->hash_node); 137 req->flags &= ~REQ_F_HASH_LOCKED; 138 } else { 139 io_poll_req_delete(req, ctx); 140 } 141 } 142 143 static void io_init_poll_iocb(struct io_poll *poll, __poll_t events, 144 wait_queue_func_t wake_func) 145 { 146 poll->head = NULL; 147 #define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) 148 /* mask in events that we always want/need */ 149 poll->events = events | IO_POLL_UNMASK; 150 INIT_LIST_HEAD(&poll->wait.entry); 151 init_waitqueue_func_entry(&poll->wait, wake_func); 152 } 153 154 static inline void io_poll_remove_entry(struct io_poll *poll) 155 { 156 struct wait_queue_head *head = smp_load_acquire(&poll->head); 157 158 if (head) { 159 spin_lock_irq(&head->lock); 160 list_del_init(&poll->wait.entry); 161 poll->head = NULL; 162 spin_unlock_irq(&head->lock); 163 } 164 } 165 166 static void io_poll_remove_entries(struct io_kiocb *req) 167 { 168 /* 169 * Nothing to do if neither of those flags are set. Avoid dipping 170 * into the poll/apoll/double cachelines if we can. 171 */ 172 if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL))) 173 return; 174 175 /* 176 * While we hold the waitqueue lock and the waitqueue is nonempty, 177 * wake_up_pollfree() will wait for us. However, taking the waitqueue 178 * lock in the first place can race with the waitqueue being freed. 179 * 180 * We solve this as eventpoll does: by taking advantage of the fact that 181 * all users of wake_up_pollfree() will RCU-delay the actual free. If 182 * we enter rcu_read_lock() and see that the pointer to the queue is 183 * non-NULL, we can then lock it without the memory being freed out from 184 * under us. 185 * 186 * Keep holding rcu_read_lock() as long as we hold the queue lock, in 187 * case the caller deletes the entry from the queue, leaving it empty. 188 * In that case, only RCU prevents the queue memory from being freed. 189 */ 190 rcu_read_lock(); 191 if (req->flags & REQ_F_SINGLE_POLL) 192 io_poll_remove_entry(io_poll_get_single(req)); 193 if (req->flags & REQ_F_DOUBLE_POLL) 194 io_poll_remove_entry(io_poll_get_double(req)); 195 rcu_read_unlock(); 196 } 197 198 enum { 199 IOU_POLL_DONE = 0, 200 IOU_POLL_NO_ACTION = 1, 201 IOU_POLL_REMOVE_POLL_USE_RES = 2, 202 }; 203 204 /* 205 * All poll tw should go through this. Checks for poll events, manages 206 * references, does rewait, etc. 207 * 208 * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action require, 209 * which is either spurious wakeup or multishot CQE is served. 210 * IOU_POLL_DONE when it's done with the request, then the mask is stored in req->cqe.res. 211 * IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot poll and that the result 212 * is stored in req->cqe. 213 */ 214 static int io_poll_check_events(struct io_kiocb *req, bool *locked) 215 { 216 struct io_ring_ctx *ctx = req->ctx; 217 int v, ret; 218 219 /* req->task == current here, checking PF_EXITING is safe */ 220 if (unlikely(req->task->flags & PF_EXITING)) 221 return -ECANCELED; 222 223 do { 224 v = atomic_read(&req->poll_refs); 225 226 /* tw handler should be the owner, and so have some references */ 227 if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) 228 return IOU_POLL_DONE; 229 if (v & IO_POLL_CANCEL_FLAG) 230 return -ECANCELED; 231 /* 232 * cqe.res contains only events of the first wake up 233 * and all others are be lost. Redo vfs_poll() to get 234 * up to date state. 235 */ 236 if ((v & IO_POLL_REF_MASK) != 1) 237 req->cqe.res = 0; 238 239 /* the mask was stashed in __io_poll_execute */ 240 if (!req->cqe.res) { 241 struct poll_table_struct pt = { ._key = req->apoll_events }; 242 req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; 243 } 244 245 if ((unlikely(!req->cqe.res))) 246 continue; 247 if (req->apoll_events & EPOLLONESHOT) 248 return IOU_POLL_DONE; 249 if (io_is_uring_fops(req->file)) 250 return IOU_POLL_DONE; 251 252 /* multishot, just fill a CQE and proceed */ 253 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 254 __poll_t mask = mangle_poll(req->cqe.res & 255 req->apoll_events); 256 257 if (!io_post_aux_cqe(ctx, req->cqe.user_data, 258 mask, IORING_CQE_F_MORE, false)) { 259 io_req_set_res(req, mask, 0); 260 return IOU_POLL_REMOVE_POLL_USE_RES; 261 } 262 } else { 263 ret = io_poll_issue(req, locked); 264 if (ret == IOU_STOP_MULTISHOT) 265 return IOU_POLL_REMOVE_POLL_USE_RES; 266 if (ret < 0) 267 return ret; 268 } 269 270 /* force the next iteration to vfs_poll() */ 271 req->cqe.res = 0; 272 273 /* 274 * Release all references, retry if someone tried to restart 275 * task_work while we were executing it. 276 */ 277 } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); 278 279 return IOU_POLL_NO_ACTION; 280 } 281 282 static void io_poll_task_func(struct io_kiocb *req, bool *locked) 283 { 284 int ret; 285 286 ret = io_poll_check_events(req, locked); 287 if (ret == IOU_POLL_NO_ACTION) 288 return; 289 290 if (ret == IOU_POLL_DONE) { 291 struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll); 292 req->cqe.res = mangle_poll(req->cqe.res & poll->events); 293 } else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) { 294 req->cqe.res = ret; 295 req_set_fail(req); 296 } 297 298 io_poll_remove_entries(req); 299 io_poll_tw_hash_eject(req, locked); 300 301 io_req_set_res(req, req->cqe.res, 0); 302 io_req_task_complete(req, locked); 303 } 304 305 static void io_apoll_task_func(struct io_kiocb *req, bool *locked) 306 { 307 int ret; 308 309 ret = io_poll_check_events(req, locked); 310 if (ret == IOU_POLL_NO_ACTION) 311 return; 312 313 io_poll_remove_entries(req); 314 io_poll_tw_hash_eject(req, locked); 315 316 if (ret == IOU_POLL_REMOVE_POLL_USE_RES) 317 io_req_complete_post(req); 318 else if (ret == IOU_POLL_DONE) 319 io_req_task_submit(req, locked); 320 else 321 io_req_complete_failed(req, ret); 322 } 323 324 static void __io_poll_execute(struct io_kiocb *req, int mask) 325 { 326 io_req_set_res(req, mask, 0); 327 /* 328 * This is useful for poll that is armed on behalf of another 329 * request, and where the wakeup path could be on a different 330 * CPU. We want to avoid pulling in req->apoll->events for that 331 * case. 332 */ 333 if (req->opcode == IORING_OP_POLL_ADD) 334 req->io_task_work.func = io_poll_task_func; 335 else 336 req->io_task_work.func = io_apoll_task_func; 337 338 trace_io_uring_task_add(req, mask); 339 io_req_task_work_add(req); 340 } 341 342 static inline void io_poll_execute(struct io_kiocb *req, int res) 343 { 344 if (io_poll_get_ownership(req)) 345 __io_poll_execute(req, res); 346 } 347 348 static void io_poll_cancel_req(struct io_kiocb *req) 349 { 350 io_poll_mark_cancelled(req); 351 /* kick tw, which should complete the request */ 352 io_poll_execute(req, 0); 353 } 354 355 #define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI) 356 357 static __cold int io_pollfree_wake(struct io_kiocb *req, struct io_poll *poll) 358 { 359 io_poll_mark_cancelled(req); 360 /* we have to kick tw in case it's not already */ 361 io_poll_execute(req, 0); 362 363 /* 364 * If the waitqueue is being freed early but someone is already 365 * holds ownership over it, we have to tear down the request as 366 * best we can. That means immediately removing the request from 367 * its waitqueue and preventing all further accesses to the 368 * waitqueue via the request. 369 */ 370 list_del_init(&poll->wait.entry); 371 372 /* 373 * Careful: this *must* be the last step, since as soon 374 * as req->head is NULL'ed out, the request can be 375 * completed and freed, since aio_poll_complete_work() 376 * will no longer need to take the waitqueue lock. 377 */ 378 smp_store_release(&poll->head, NULL); 379 return 1; 380 } 381 382 static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 383 void *key) 384 { 385 struct io_kiocb *req = wqe_to_req(wait); 386 struct io_poll *poll = container_of(wait, struct io_poll, wait); 387 __poll_t mask = key_to_poll(key); 388 389 if (unlikely(mask & POLLFREE)) 390 return io_pollfree_wake(req, poll); 391 392 /* for instances that support it check for an event match first */ 393 if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON))) 394 return 0; 395 396 if (io_poll_get_ownership(req)) { 397 /* optional, saves extra locking for removal in tw handler */ 398 if (mask && poll->events & EPOLLONESHOT) { 399 list_del_init(&poll->wait.entry); 400 poll->head = NULL; 401 if (wqe_is_double(wait)) 402 req->flags &= ~REQ_F_DOUBLE_POLL; 403 else 404 req->flags &= ~REQ_F_SINGLE_POLL; 405 } 406 __io_poll_execute(req, mask); 407 } 408 return 1; 409 } 410 411 /* fails only when polling is already completing by the first entry */ 412 static bool io_poll_double_prepare(struct io_kiocb *req) 413 { 414 struct wait_queue_head *head; 415 struct io_poll *poll = io_poll_get_single(req); 416 417 /* head is RCU protected, see io_poll_remove_entries() comments */ 418 rcu_read_lock(); 419 head = smp_load_acquire(&poll->head); 420 /* 421 * poll arm might not hold ownership and so race for req->flags with 422 * io_poll_wake(). There is only one poll entry queued, serialise with 423 * it by taking its head lock. As we're still arming the tw hanlder 424 * is not going to be run, so there are no races with it. 425 */ 426 if (head) { 427 spin_lock_irq(&head->lock); 428 req->flags |= REQ_F_DOUBLE_POLL; 429 if (req->opcode == IORING_OP_POLL_ADD) 430 req->flags |= REQ_F_ASYNC_DATA; 431 spin_unlock_irq(&head->lock); 432 } 433 rcu_read_unlock(); 434 return !!head; 435 } 436 437 static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, 438 struct wait_queue_head *head, 439 struct io_poll **poll_ptr) 440 { 441 struct io_kiocb *req = pt->req; 442 unsigned long wqe_private = (unsigned long) req; 443 444 /* 445 * The file being polled uses multiple waitqueues for poll handling 446 * (e.g. one for read, one for write). Setup a separate io_poll 447 * if this happens. 448 */ 449 if (unlikely(pt->nr_entries)) { 450 struct io_poll *first = poll; 451 452 /* double add on the same waitqueue head, ignore */ 453 if (first->head == head) 454 return; 455 /* already have a 2nd entry, fail a third attempt */ 456 if (*poll_ptr) { 457 if ((*poll_ptr)->head == head) 458 return; 459 pt->error = -EINVAL; 460 return; 461 } 462 463 poll = kmalloc(sizeof(*poll), GFP_ATOMIC); 464 if (!poll) { 465 pt->error = -ENOMEM; 466 return; 467 } 468 469 /* mark as double wq entry */ 470 wqe_private |= IO_WQE_F_DOUBLE; 471 io_init_poll_iocb(poll, first->events, first->wait.func); 472 if (!io_poll_double_prepare(req)) { 473 /* the request is completing, just back off */ 474 kfree(poll); 475 return; 476 } 477 *poll_ptr = poll; 478 } else { 479 /* fine to modify, there is no poll queued to race with us */ 480 req->flags |= REQ_F_SINGLE_POLL; 481 } 482 483 pt->nr_entries++; 484 poll->head = head; 485 poll->wait.private = (void *) wqe_private; 486 487 if (poll->events & EPOLLEXCLUSIVE) 488 add_wait_queue_exclusive(head, &poll->wait); 489 else 490 add_wait_queue(head, &poll->wait); 491 } 492 493 static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, 494 struct poll_table_struct *p) 495 { 496 struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 497 struct io_poll *poll = io_kiocb_to_cmd(pt->req, struct io_poll); 498 499 __io_queue_proc(poll, pt, head, 500 (struct io_poll **) &pt->req->async_data); 501 } 502 503 static bool io_poll_can_finish_inline(struct io_kiocb *req, 504 struct io_poll_table *pt) 505 { 506 return pt->owning || io_poll_get_ownership(req); 507 } 508 509 /* 510 * Returns 0 when it's handed over for polling. The caller owns the requests if 511 * it returns non-zero, but otherwise should not touch it. Negative values 512 * contain an error code. When the result is >0, the polling has completed 513 * inline and ipt.result_mask is set to the mask. 514 */ 515 static int __io_arm_poll_handler(struct io_kiocb *req, 516 struct io_poll *poll, 517 struct io_poll_table *ipt, __poll_t mask, 518 unsigned issue_flags) 519 { 520 struct io_ring_ctx *ctx = req->ctx; 521 int v; 522 523 INIT_HLIST_NODE(&req->hash_node); 524 req->work.cancel_seq = atomic_read(&ctx->cancel_seq); 525 io_init_poll_iocb(poll, mask, io_poll_wake); 526 poll->file = req->file; 527 req->apoll_events = poll->events; 528 529 ipt->pt._key = mask; 530 ipt->req = req; 531 ipt->error = 0; 532 ipt->nr_entries = 0; 533 /* 534 * Polling is either completed here or via task_work, so if we're in the 535 * task context we're naturally serialised with tw by merit of running 536 * the same task. When it's io-wq, take the ownership to prevent tw 537 * from running. However, when we're in the task context, skip taking 538 * it as an optimisation. 539 * 540 * Note: even though the request won't be completed/freed, without 541 * ownership we still can race with io_poll_wake(). 542 * io_poll_can_finish_inline() tries to deal with that. 543 */ 544 ipt->owning = issue_flags & IO_URING_F_UNLOCKED; 545 atomic_set(&req->poll_refs, (int)ipt->owning); 546 547 /* io-wq doesn't hold uring_lock */ 548 if (issue_flags & IO_URING_F_UNLOCKED) 549 req->flags &= ~REQ_F_HASH_LOCKED; 550 551 mask = vfs_poll(req->file, &ipt->pt) & poll->events; 552 553 if (unlikely(ipt->error || !ipt->nr_entries)) { 554 io_poll_remove_entries(req); 555 556 if (!io_poll_can_finish_inline(req, ipt)) { 557 io_poll_mark_cancelled(req); 558 return 0; 559 } else if (mask && (poll->events & EPOLLET)) { 560 ipt->result_mask = mask; 561 return 1; 562 } 563 return ipt->error ?: -EINVAL; 564 } 565 566 if (mask && 567 ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) { 568 if (!io_poll_can_finish_inline(req, ipt)) 569 return 0; 570 io_poll_remove_entries(req); 571 ipt->result_mask = mask; 572 /* no one else has access to the req, forget about the ref */ 573 return 1; 574 } 575 576 if (req->flags & REQ_F_HASH_LOCKED) 577 io_poll_req_insert_locked(req); 578 else 579 io_poll_req_insert(req); 580 581 if (mask && (poll->events & EPOLLET) && 582 io_poll_can_finish_inline(req, ipt)) { 583 __io_poll_execute(req, mask); 584 return 0; 585 } 586 587 if (ipt->owning) { 588 /* 589 * Release ownership. If someone tried to queue a tw while it was 590 * locked, kick it off for them. 591 */ 592 v = atomic_dec_return(&req->poll_refs); 593 if (unlikely(v & IO_POLL_REF_MASK)) 594 __io_poll_execute(req, 0); 595 } 596 return 0; 597 } 598 599 static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, 600 struct poll_table_struct *p) 601 { 602 struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 603 struct async_poll *apoll = pt->req->apoll; 604 605 __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); 606 } 607 608 static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req, 609 unsigned issue_flags) 610 { 611 struct io_ring_ctx *ctx = req->ctx; 612 struct io_cache_entry *entry; 613 struct async_poll *apoll; 614 615 if (req->flags & REQ_F_POLLED) { 616 apoll = req->apoll; 617 kfree(apoll->double_poll); 618 } else if (!(issue_flags & IO_URING_F_UNLOCKED) && 619 (entry = io_alloc_cache_get(&ctx->apoll_cache)) != NULL) { 620 apoll = container_of(entry, struct async_poll, cache); 621 } else { 622 apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); 623 if (unlikely(!apoll)) 624 return NULL; 625 } 626 apoll->double_poll = NULL; 627 req->apoll = apoll; 628 return apoll; 629 } 630 631 int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) 632 { 633 const struct io_op_def *def = &io_op_defs[req->opcode]; 634 struct async_poll *apoll; 635 struct io_poll_table ipt; 636 __poll_t mask = POLLPRI | POLLERR | EPOLLET; 637 int ret; 638 639 /* 640 * apoll requests already grab the mutex to complete in the tw handler, 641 * so removal from the mutex-backed hash is free, use it by default. 642 */ 643 req->flags |= REQ_F_HASH_LOCKED; 644 645 if (!def->pollin && !def->pollout) 646 return IO_APOLL_ABORTED; 647 if (!file_can_poll(req->file)) 648 return IO_APOLL_ABORTED; 649 if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED) 650 return IO_APOLL_ABORTED; 651 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) 652 mask |= EPOLLONESHOT; 653 654 if (def->pollin) { 655 mask |= EPOLLIN | EPOLLRDNORM; 656 657 /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ 658 if (req->flags & REQ_F_CLEAR_POLLIN) 659 mask &= ~EPOLLIN; 660 } else { 661 mask |= EPOLLOUT | EPOLLWRNORM; 662 } 663 if (def->poll_exclusive) 664 mask |= EPOLLEXCLUSIVE; 665 666 apoll = io_req_alloc_apoll(req, issue_flags); 667 if (!apoll) 668 return IO_APOLL_ABORTED; 669 req->flags |= REQ_F_POLLED; 670 ipt.pt._qproc = io_async_queue_proc; 671 672 io_kbuf_recycle(req, issue_flags); 673 674 ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags); 675 if (ret) 676 return ret > 0 ? IO_APOLL_READY : IO_APOLL_ABORTED; 677 trace_io_uring_poll_arm(req, mask, apoll->poll.events); 678 return IO_APOLL_OK; 679 } 680 681 static __cold bool io_poll_remove_all_table(struct task_struct *tsk, 682 struct io_hash_table *table, 683 bool cancel_all) 684 { 685 unsigned nr_buckets = 1U << table->hash_bits; 686 struct hlist_node *tmp; 687 struct io_kiocb *req; 688 bool found = false; 689 int i; 690 691 for (i = 0; i < nr_buckets; i++) { 692 struct io_hash_bucket *hb = &table->hbs[i]; 693 694 spin_lock(&hb->lock); 695 hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) { 696 if (io_match_task_safe(req, tsk, cancel_all)) { 697 hlist_del_init(&req->hash_node); 698 io_poll_cancel_req(req); 699 found = true; 700 } 701 } 702 spin_unlock(&hb->lock); 703 } 704 return found; 705 } 706 707 /* 708 * Returns true if we found and killed one or more poll requests 709 */ 710 __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, 711 bool cancel_all) 712 __must_hold(&ctx->uring_lock) 713 { 714 bool ret; 715 716 ret = io_poll_remove_all_table(tsk, &ctx->cancel_table, cancel_all); 717 ret |= io_poll_remove_all_table(tsk, &ctx->cancel_table_locked, cancel_all); 718 return ret; 719 } 720 721 static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, 722 struct io_cancel_data *cd, 723 struct io_hash_table *table, 724 struct io_hash_bucket **out_bucket) 725 { 726 struct io_kiocb *req; 727 u32 index = hash_long(cd->data, table->hash_bits); 728 struct io_hash_bucket *hb = &table->hbs[index]; 729 730 *out_bucket = NULL; 731 732 spin_lock(&hb->lock); 733 hlist_for_each_entry(req, &hb->list, hash_node) { 734 if (cd->data != req->cqe.user_data) 735 continue; 736 if (poll_only && req->opcode != IORING_OP_POLL_ADD) 737 continue; 738 if (cd->flags & IORING_ASYNC_CANCEL_ALL) { 739 if (cd->seq == req->work.cancel_seq) 740 continue; 741 req->work.cancel_seq = cd->seq; 742 } 743 *out_bucket = hb; 744 return req; 745 } 746 spin_unlock(&hb->lock); 747 return NULL; 748 } 749 750 static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, 751 struct io_cancel_data *cd, 752 struct io_hash_table *table, 753 struct io_hash_bucket **out_bucket) 754 { 755 unsigned nr_buckets = 1U << table->hash_bits; 756 struct io_kiocb *req; 757 int i; 758 759 *out_bucket = NULL; 760 761 for (i = 0; i < nr_buckets; i++) { 762 struct io_hash_bucket *hb = &table->hbs[i]; 763 764 spin_lock(&hb->lock); 765 hlist_for_each_entry(req, &hb->list, hash_node) { 766 if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && 767 req->file != cd->file) 768 continue; 769 if (cd->seq == req->work.cancel_seq) 770 continue; 771 req->work.cancel_seq = cd->seq; 772 *out_bucket = hb; 773 return req; 774 } 775 spin_unlock(&hb->lock); 776 } 777 return NULL; 778 } 779 780 static int io_poll_disarm(struct io_kiocb *req) 781 { 782 if (!req) 783 return -ENOENT; 784 if (!io_poll_get_ownership(req)) 785 return -EALREADY; 786 io_poll_remove_entries(req); 787 hash_del(&req->hash_node); 788 return 0; 789 } 790 791 static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 792 struct io_hash_table *table) 793 { 794 struct io_hash_bucket *bucket; 795 struct io_kiocb *req; 796 797 if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY)) 798 req = io_poll_file_find(ctx, cd, table, &bucket); 799 else 800 req = io_poll_find(ctx, false, cd, table, &bucket); 801 802 if (req) 803 io_poll_cancel_req(req); 804 if (bucket) 805 spin_unlock(&bucket->lock); 806 return req ? 0 : -ENOENT; 807 } 808 809 int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 810 unsigned issue_flags) 811 { 812 int ret; 813 814 ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table); 815 if (ret != -ENOENT) 816 return ret; 817 818 io_ring_submit_lock(ctx, issue_flags); 819 ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table_locked); 820 io_ring_submit_unlock(ctx, issue_flags); 821 return ret; 822 } 823 824 static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, 825 unsigned int flags) 826 { 827 u32 events; 828 829 events = READ_ONCE(sqe->poll32_events); 830 #ifdef __BIG_ENDIAN 831 events = swahw32(events); 832 #endif 833 if (!(flags & IORING_POLL_ADD_MULTI)) 834 events |= EPOLLONESHOT; 835 if (!(flags & IORING_POLL_ADD_LEVEL)) 836 events |= EPOLLET; 837 return demangle_poll(events) | 838 (events & (EPOLLEXCLUSIVE|EPOLLONESHOT|EPOLLET)); 839 } 840 841 int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 842 { 843 struct io_poll_update *upd = io_kiocb_to_cmd(req, struct io_poll_update); 844 u32 flags; 845 846 if (sqe->buf_index || sqe->splice_fd_in) 847 return -EINVAL; 848 flags = READ_ONCE(sqe->len); 849 if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA | 850 IORING_POLL_ADD_MULTI)) 851 return -EINVAL; 852 /* meaningless without update */ 853 if (flags == IORING_POLL_ADD_MULTI) 854 return -EINVAL; 855 856 upd->old_user_data = READ_ONCE(sqe->addr); 857 upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; 858 upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA; 859 860 upd->new_user_data = READ_ONCE(sqe->off); 861 if (!upd->update_user_data && upd->new_user_data) 862 return -EINVAL; 863 if (upd->update_events) 864 upd->events = io_poll_parse_events(sqe, flags); 865 else if (sqe->poll32_events) 866 return -EINVAL; 867 868 return 0; 869 } 870 871 int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 872 { 873 struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll); 874 u32 flags; 875 876 if (sqe->buf_index || sqe->off || sqe->addr) 877 return -EINVAL; 878 flags = READ_ONCE(sqe->len); 879 if (flags & ~IORING_POLL_ADD_MULTI) 880 return -EINVAL; 881 if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP)) 882 return -EINVAL; 883 884 poll->events = io_poll_parse_events(sqe, flags); 885 return 0; 886 } 887 888 int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) 889 { 890 struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll); 891 struct io_poll_table ipt; 892 int ret; 893 894 ipt.pt._qproc = io_poll_queue_proc; 895 896 /* 897 * If sqpoll or single issuer, there is no contention for ->uring_lock 898 * and we'll end up holding it in tw handlers anyway. 899 */ 900 if (req->ctx->flags & (IORING_SETUP_SQPOLL|IORING_SETUP_SINGLE_ISSUER)) 901 req->flags |= REQ_F_HASH_LOCKED; 902 903 ret = __io_arm_poll_handler(req, poll, &ipt, poll->events, issue_flags); 904 if (ret > 0) { 905 io_req_set_res(req, ipt.result_mask, 0); 906 return IOU_OK; 907 } 908 return ret ?: IOU_ISSUE_SKIP_COMPLETE; 909 } 910 911 int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) 912 { 913 struct io_poll_update *poll_update = io_kiocb_to_cmd(req, struct io_poll_update); 914 struct io_cancel_data cd = { .data = poll_update->old_user_data, }; 915 struct io_ring_ctx *ctx = req->ctx; 916 struct io_hash_bucket *bucket; 917 struct io_kiocb *preq; 918 int ret2, ret = 0; 919 bool locked; 920 921 preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket); 922 ret2 = io_poll_disarm(preq); 923 if (bucket) 924 spin_unlock(&bucket->lock); 925 if (!ret2) 926 goto found; 927 if (ret2 != -ENOENT) { 928 ret = ret2; 929 goto out; 930 } 931 932 io_ring_submit_lock(ctx, issue_flags); 933 preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table_locked, &bucket); 934 ret2 = io_poll_disarm(preq); 935 if (bucket) 936 spin_unlock(&bucket->lock); 937 io_ring_submit_unlock(ctx, issue_flags); 938 if (ret2) { 939 ret = ret2; 940 goto out; 941 } 942 943 found: 944 if (WARN_ON_ONCE(preq->opcode != IORING_OP_POLL_ADD)) { 945 ret = -EFAULT; 946 goto out; 947 } 948 949 if (poll_update->update_events || poll_update->update_user_data) { 950 /* only mask one event flags, keep behavior flags */ 951 if (poll_update->update_events) { 952 struct io_poll *poll = io_kiocb_to_cmd(preq, struct io_poll); 953 954 poll->events &= ~0xffff; 955 poll->events |= poll_update->events & 0xffff; 956 poll->events |= IO_POLL_UNMASK; 957 } 958 if (poll_update->update_user_data) 959 preq->cqe.user_data = poll_update->new_user_data; 960 961 ret2 = io_poll_add(preq, issue_flags); 962 /* successfully updated, don't complete poll request */ 963 if (!ret2 || ret2 == -EIOCBQUEUED) 964 goto out; 965 } 966 967 req_set_fail(preq); 968 io_req_set_res(preq, -ECANCELED, 0); 969 locked = !(issue_flags & IO_URING_F_UNLOCKED); 970 io_req_task_complete(preq, &locked); 971 out: 972 if (ret < 0) { 973 req_set_fail(req); 974 return ret; 975 } 976 /* complete update request, we're done with it */ 977 io_req_set_res(req, ret, 0); 978 return IOU_OK; 979 } 980 981 void io_apoll_cache_free(struct io_cache_entry *entry) 982 { 983 kfree(container_of(entry, struct async_poll, cache)); 984 } 985