aio.c (4cd81c3dfc4a34e4a0b6fa577860077c8e5b13af) | aio.c (db446a08c23d5475e6b08c87acca79ebb20f283c) |
---|---|
1/* 2 * An async IO implementation for Linux 3 * Written by Benjamin LaHaise <bcrl@kvack.org> 4 * 5 * Implements an efficient asynchronous io interface. 6 * 7 * Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved. 8 * --- 52 unchanged lines hidden (view full) --- 61 unsigned header_length; /* size of aio_ring */ 62 63 64 struct io_event io_events[0]; 65}; /* 128 bytes + ring size */ 66 67#define AIO_RING_PAGES 8 68 | 1/* 2 * An async IO implementation for Linux 3 * Written by Benjamin LaHaise <bcrl@kvack.org> 4 * 5 * Implements an efficient asynchronous io interface. 6 * 7 * Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved. 8 * --- 52 unchanged lines hidden (view full) --- 61 unsigned header_length; /* size of aio_ring */ 62 63 64 struct io_event io_events[0]; 65}; /* 128 bytes + ring size */ 66 67#define AIO_RING_PAGES 8 68 |
69struct kioctx_table { 70 struct rcu_head rcu; 71 unsigned nr; 72 struct kioctx *table[]; 73}; 74 |
|
69struct kioctx_cpu { 70 unsigned reqs_available; 71}; 72 73struct kioctx { 74 struct percpu_ref users; 75 atomic_t dead; 76 | 75struct kioctx_cpu { 76 unsigned reqs_available; 77}; 78 79struct kioctx { 80 struct percpu_ref users; 81 atomic_t dead; 82 |
77 /* This needs improving */ | |
78 unsigned long user_id; | 83 unsigned long user_id; |
79 struct hlist_node list; | |
80 81 struct __percpu kioctx_cpu *cpu; 82 83 /* 84 * For percpu reqs_available, number of slots we move to/from global 85 * counter at a time: 86 */ 87 unsigned req_batch; --- 42 unchanged lines hidden (view full) --- 130 131 struct { 132 unsigned tail; 133 spinlock_t completion_lock; 134 } ____cacheline_aligned_in_smp; 135 136 struct page *internal_pages[AIO_RING_PAGES]; 137 struct file *aio_ring_file; | 84 85 struct __percpu kioctx_cpu *cpu; 86 87 /* 88 * For percpu reqs_available, number of slots we move to/from global 89 * counter at a time: 90 */ 91 unsigned req_batch; --- 42 unchanged lines hidden (view full) --- 134 135 struct { 136 unsigned tail; 137 spinlock_t completion_lock; 138 } ____cacheline_aligned_in_smp; 139 140 struct page *internal_pages[AIO_RING_PAGES]; 141 struct file *aio_ring_file; |
142 143 unsigned id; |
|
138}; 139 140/*------ sysctl variables----*/ 141static DEFINE_SPINLOCK(aio_nr_lock); 142unsigned long aio_nr; /* current system wide number of aio requests */ 143unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ 144/*----end sysctl variables---*/ 145 --- 175 unchanged lines hidden (view full) --- 321 return -EAGAIN; 322 } 323 324 ctx->user_id = ctx->mmap_base; 325 ctx->nr_events = nr_events; /* trusted copy */ 326 327 ring = kmap_atomic(ctx->ring_pages[0]); 328 ring->nr = nr_events; /* user copy */ | 144}; 145 146/*------ sysctl variables----*/ 147static DEFINE_SPINLOCK(aio_nr_lock); 148unsigned long aio_nr; /* current system wide number of aio requests */ 149unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ 150/*----end sysctl variables---*/ 151 --- 175 unchanged lines hidden (view full) --- 327 return -EAGAIN; 328 } 329 330 ctx->user_id = ctx->mmap_base; 331 ctx->nr_events = nr_events; /* trusted copy */ 332 333 ring = kmap_atomic(ctx->ring_pages[0]); 334 ring->nr = nr_events; /* user copy */ |
329 ring->id = ctx->user_id; | 335 ring->id = ~0U; |
330 ring->head = ring->tail = 0; 331 ring->magic = AIO_RING_MAGIC; 332 ring->compat_features = AIO_RING_COMPAT_FEATURES; 333 ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; 334 ring->header_length = sizeof(struct aio_ring); 335 kunmap_atomic(ring); 336 flush_dcache_page(ctx->ring_pages[0]); 337 --- 119 unchanged lines hidden (view full) --- 457static void free_ioctx_ref(struct percpu_ref *ref) 458{ 459 struct kioctx *ctx = container_of(ref, struct kioctx, users); 460 461 INIT_WORK(&ctx->free_work, free_ioctx); 462 schedule_work(&ctx->free_work); 463} 464 | 336 ring->head = ring->tail = 0; 337 ring->magic = AIO_RING_MAGIC; 338 ring->compat_features = AIO_RING_COMPAT_FEATURES; 339 ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; 340 ring->header_length = sizeof(struct aio_ring); 341 kunmap_atomic(ring); 342 flush_dcache_page(ctx->ring_pages[0]); 343 --- 119 unchanged lines hidden (view full) --- 463static void free_ioctx_ref(struct percpu_ref *ref) 464{ 465 struct kioctx *ctx = container_of(ref, struct kioctx, users); 466 467 INIT_WORK(&ctx->free_work, free_ioctx); 468 schedule_work(&ctx->free_work); 469} 470 |
471static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) 472{ 473 unsigned i, new_nr; 474 struct kioctx_table *table, *old; 475 struct aio_ring *ring; 476 477 spin_lock(&mm->ioctx_lock); 478 table = rcu_dereference(mm->ioctx_table); 479 480 while (1) { 481 if (table) 482 for (i = 0; i < table->nr; i++) 483 if (!table->table[i]) { 484 ctx->id = i; 485 table->table[i] = ctx; 486 spin_unlock(&mm->ioctx_lock); 487 488 ring = kmap_atomic(ctx->ring_pages[0]); 489 ring->id = ctx->id; 490 kunmap_atomic(ring); 491 return 0; 492 } 493 494 new_nr = (table ? table->nr : 1) * 4; 495 496 spin_unlock(&mm->ioctx_lock); 497 498 table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) * 499 new_nr, GFP_KERNEL); 500 if (!table) 501 return -ENOMEM; 502 503 table->nr = new_nr; 504 505 spin_lock(&mm->ioctx_lock); 506 old = rcu_dereference(mm->ioctx_table); 507 508 if (!old) { 509 rcu_assign_pointer(mm->ioctx_table, table); 510 } else if (table->nr > old->nr) { 511 memcpy(table->table, old->table, 512 old->nr * sizeof(struct kioctx *)); 513 514 rcu_assign_pointer(mm->ioctx_table, table); 515 kfree_rcu(old, rcu); 516 } else { 517 kfree(table); 518 table = old; 519 } 520 } 521} 522 |
|
465/* ioctx_alloc 466 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 467 */ 468static struct kioctx *ioctx_alloc(unsigned nr_events) 469{ 470 struct mm_struct *mm = current->mm; 471 struct kioctx *ctx; 472 int err = -ENOMEM; --- 42 unchanged lines hidden (view full) --- 515 516 if (aio_setup_ring(ctx) < 0) 517 goto out_freepcpu; 518 519 atomic_set(&ctx->reqs_available, ctx->nr_events - 1); 520 ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); 521 BUG_ON(!ctx->req_batch); 522 | 523/* ioctx_alloc 524 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 525 */ 526static struct kioctx *ioctx_alloc(unsigned nr_events) 527{ 528 struct mm_struct *mm = current->mm; 529 struct kioctx *ctx; 530 int err = -ENOMEM; --- 42 unchanged lines hidden (view full) --- 573 574 if (aio_setup_ring(ctx) < 0) 575 goto out_freepcpu; 576 577 atomic_set(&ctx->reqs_available, ctx->nr_events - 1); 578 ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); 579 BUG_ON(!ctx->req_batch); 580 |
581 err = ioctx_add_table(ctx, mm); 582 if (err) 583 goto out_cleanup_noerr; 584 |
|
523 /* limit the number of system wide aios */ 524 spin_lock(&aio_nr_lock); 525 if (aio_nr + nr_events > (aio_max_nr * 2UL) || 526 aio_nr + nr_events < aio_nr) { 527 spin_unlock(&aio_nr_lock); 528 goto out_cleanup; 529 } 530 aio_nr += ctx->max_reqs; 531 spin_unlock(&aio_nr_lock); 532 533 percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ 534 | 585 /* limit the number of system wide aios */ 586 spin_lock(&aio_nr_lock); 587 if (aio_nr + nr_events > (aio_max_nr * 2UL) || 588 aio_nr + nr_events < aio_nr) { 589 spin_unlock(&aio_nr_lock); 590 goto out_cleanup; 591 } 592 aio_nr += ctx->max_reqs; 593 spin_unlock(&aio_nr_lock); 594 595 percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ 596 |
535 /* now link into global list. */ 536 spin_lock(&mm->ioctx_lock); 537 hlist_add_head_rcu(&ctx->list, &mm->ioctx_list); 538 spin_unlock(&mm->ioctx_lock); 539 | |
540 pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", 541 ctx, ctx->user_id, mm, ctx->nr_events); 542 return ctx; 543 544out_cleanup: 545 err = -EAGAIN; | 597 pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", 598 ctx, ctx->user_id, mm, ctx->nr_events); 599 return ctx; 600 601out_cleanup: 602 err = -EAGAIN; |
603out_cleanup_noerr: |
|
546 aio_free_ring(ctx); 547out_freepcpu: 548 free_percpu(ctx->cpu); 549out_freeref: 550 free_percpu(ctx->users.pcpu_count); 551out_freectx: 552 if (ctx->aio_ring_file) 553 fput(ctx->aio_ring_file); 554 kmem_cache_free(kioctx_cachep, ctx); 555 pr_debug("error allocating ioctx %d\n", err); 556 return ERR_PTR(err); 557} 558 559/* kill_ioctx 560 * Cancels all outstanding aio requests on an aio context. Used 561 * when the processes owning a context have all exited to encourage 562 * the rapid destruction of the kioctx. 563 */ | 604 aio_free_ring(ctx); 605out_freepcpu: 606 free_percpu(ctx->cpu); 607out_freeref: 608 free_percpu(ctx->users.pcpu_count); 609out_freectx: 610 if (ctx->aio_ring_file) 611 fput(ctx->aio_ring_file); 612 kmem_cache_free(kioctx_cachep, ctx); 613 pr_debug("error allocating ioctx %d\n", err); 614 return ERR_PTR(err); 615} 616 617/* kill_ioctx 618 * Cancels all outstanding aio requests on an aio context. Used 619 * when the processes owning a context have all exited to encourage 620 * the rapid destruction of the kioctx. 621 */ |
564static void kill_ioctx(struct kioctx *ctx) | 622static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) |
565{ 566 if (!atomic_xchg(&ctx->dead, 1)) { | 623{ 624 if (!atomic_xchg(&ctx->dead, 1)) { |
567 hlist_del_rcu(&ctx->list); | 625 struct kioctx_table *table; 626 627 spin_lock(&mm->ioctx_lock); 628 table = rcu_dereference(mm->ioctx_table); 629 630 WARN_ON(ctx != table->table[ctx->id]); 631 table->table[ctx->id] = NULL; 632 spin_unlock(&mm->ioctx_lock); 633 |
568 /* percpu_ref_kill() will do the necessary call_rcu() */ 569 wake_up_all(&ctx->wait); 570 571 /* 572 * It'd be more correct to do this in free_ioctx(), after all 573 * the outstanding kiocbs have finished - but by then io_destroy 574 * has already returned, so io_setup() could potentially return 575 * -EAGAIN with no ioctxs actually in use (as far as userspace --- 32 unchanged lines hidden (view full) --- 608 * no way for any new requests to be submited or any of the io_* syscalls to be 609 * called on the context. 610 * 611 * There may be outstanding kiocbs, but free_ioctx() will explicitly wait on 612 * them. 613 */ 614void exit_aio(struct mm_struct *mm) 615{ | 634 /* percpu_ref_kill() will do the necessary call_rcu() */ 635 wake_up_all(&ctx->wait); 636 637 /* 638 * It'd be more correct to do this in free_ioctx(), after all 639 * the outstanding kiocbs have finished - but by then io_destroy 640 * has already returned, so io_setup() could potentially return 641 * -EAGAIN with no ioctxs actually in use (as far as userspace --- 32 unchanged lines hidden (view full) --- 674 * no way for any new requests to be submited or any of the io_* syscalls to be 675 * called on the context. 676 * 677 * There may be outstanding kiocbs, but free_ioctx() will explicitly wait on 678 * them. 679 */ 680void exit_aio(struct mm_struct *mm) 681{ |
682 struct kioctx_table *table; |
|
616 struct kioctx *ctx; | 683 struct kioctx *ctx; |
617 struct hlist_node *n; | 684 unsigned i = 0; |
618 | 685 |
619 hlist_for_each_entry_safe(ctx, n, &mm->ioctx_list, list) { | 686 while (1) { 687 rcu_read_lock(); 688 table = rcu_dereference(mm->ioctx_table); 689 690 do { 691 if (!table || i >= table->nr) { 692 rcu_read_unlock(); 693 rcu_assign_pointer(mm->ioctx_table, NULL); 694 if (table) 695 kfree(table); 696 return; 697 } 698 699 ctx = table->table[i++]; 700 } while (!ctx); 701 702 rcu_read_unlock(); 703 |
620 /* 621 * We don't need to bother with munmap() here - 622 * exit_mmap(mm) is coming and it'll unmap everything. 623 * Since aio_free_ring() uses non-zero ->mmap_size 624 * as indicator that it needs to unmap the area, 625 * just set it to 0; aio_free_ring() is the only 626 * place that uses ->mmap_size, so it's safe. 627 */ 628 ctx->mmap_size = 0; 629 | 704 /* 705 * We don't need to bother with munmap() here - 706 * exit_mmap(mm) is coming and it'll unmap everything. 707 * Since aio_free_ring() uses non-zero ->mmap_size 708 * as indicator that it needs to unmap the area, 709 * just set it to 0; aio_free_ring() is the only 710 * place that uses ->mmap_size, so it's safe. 711 */ 712 ctx->mmap_size = 0; 713 |
630 kill_ioctx(ctx); | 714 kill_ioctx(mm, ctx); |
631 } 632} 633 634static void put_reqs_available(struct kioctx *ctx, unsigned nr) 635{ 636 struct kioctx_cpu *kcpu; 637 638 preempt_disable(); --- 66 unchanged lines hidden (view full) --- 705 fput(req->ki_filp); 706 if (req->ki_eventfd != NULL) 707 eventfd_ctx_put(req->ki_eventfd); 708 kmem_cache_free(kiocb_cachep, req); 709} 710 711static struct kioctx *lookup_ioctx(unsigned long ctx_id) 712{ | 715 } 716} 717 718static void put_reqs_available(struct kioctx *ctx, unsigned nr) 719{ 720 struct kioctx_cpu *kcpu; 721 722 preempt_disable(); --- 66 unchanged lines hidden (view full) --- 789 fput(req->ki_filp); 790 if (req->ki_eventfd != NULL) 791 eventfd_ctx_put(req->ki_eventfd); 792 kmem_cache_free(kiocb_cachep, req); 793} 794 795static struct kioctx *lookup_ioctx(unsigned long ctx_id) 796{ |
797 struct aio_ring __user *ring = (void __user *)ctx_id; |
|
713 struct mm_struct *mm = current->mm; 714 struct kioctx *ctx, *ret = NULL; | 798 struct mm_struct *mm = current->mm; 799 struct kioctx *ctx, *ret = NULL; |
800 struct kioctx_table *table; 801 unsigned id; |
|
715 | 802 |
803 if (get_user(id, &ring->id)) 804 return NULL; 805 |
|
716 rcu_read_lock(); | 806 rcu_read_lock(); |
807 table = rcu_dereference(mm->ioctx_table); |
|
717 | 808 |
718 hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) { 719 if (ctx->user_id == ctx_id) { 720 percpu_ref_get(&ctx->users); 721 ret = ctx; 722 break; 723 } 724 } | 809 if (!table || id >= table->nr) 810 goto out; |
725 | 811 |
812 ctx = table->table[id]; 813 if (ctx->user_id == ctx_id) { 814 percpu_ref_get(&ctx->users); 815 ret = ctx; 816 } 817out: |
|
726 rcu_read_unlock(); 727 return ret; 728} 729 730/* aio_complete 731 * Called when the io request on the given iocb is complete. 732 */ 733void aio_complete(struct kiocb *iocb, long res, long res2) --- 259 unchanged lines hidden (view full) --- 993 goto out; 994 } 995 996 ioctx = ioctx_alloc(nr_events); 997 ret = PTR_ERR(ioctx); 998 if (!IS_ERR(ioctx)) { 999 ret = put_user(ioctx->user_id, ctxp); 1000 if (ret) | 818 rcu_read_unlock(); 819 return ret; 820} 821 822/* aio_complete 823 * Called when the io request on the given iocb is complete. 824 */ 825void aio_complete(struct kiocb *iocb, long res, long res2) --- 259 unchanged lines hidden (view full) --- 1085 goto out; 1086 } 1087 1088 ioctx = ioctx_alloc(nr_events); 1089 ret = PTR_ERR(ioctx); 1090 if (!IS_ERR(ioctx)) { 1091 ret = put_user(ioctx->user_id, ctxp); 1092 if (ret) |
1001 kill_ioctx(ioctx); | 1093 kill_ioctx(current->mm, ioctx); |
1002 percpu_ref_put(&ioctx->users); 1003 } 1004 1005out: 1006 return ret; 1007} 1008 1009/* sys_io_destroy: 1010 * Destroy the aio_context specified. May cancel any outstanding 1011 * AIOs and block on completion. Will fail with -ENOSYS if not 1012 * implemented. May fail with -EINVAL if the context pointed to 1013 * is invalid. 1014 */ 1015SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) 1016{ 1017 struct kioctx *ioctx = lookup_ioctx(ctx); 1018 if (likely(NULL != ioctx)) { | 1094 percpu_ref_put(&ioctx->users); 1095 } 1096 1097out: 1098 return ret; 1099} 1100 1101/* sys_io_destroy: 1102 * Destroy the aio_context specified. May cancel any outstanding 1103 * AIOs and block on completion. Will fail with -ENOSYS if not 1104 * implemented. May fail with -EINVAL if the context pointed to 1105 * is invalid. 1106 */ 1107SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) 1108{ 1109 struct kioctx *ioctx = lookup_ioctx(ctx); 1110 if (likely(NULL != ioctx)) { |
1019 kill_ioctx(ioctx); | 1111 kill_ioctx(current->mm, ioctx); |
1020 percpu_ref_put(&ioctx->users); 1021 return 0; 1022 } 1023 pr_debug("EINVAL: io_destroy: invalid context id\n"); 1024 return -EINVAL; 1025} 1026 1027typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, --- 394 unchanged lines hidden --- | 1112 percpu_ref_put(&ioctx->users); 1113 return 0; 1114 } 1115 pr_debug("EINVAL: io_destroy: invalid context id\n"); 1116 return -EINVAL; 1117} 1118 1119typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, --- 394 unchanged lines hidden --- |