186db1e29SJens Axboe /* 286db1e29SJens Axboe * Functions related to io context handling 386db1e29SJens Axboe */ 486db1e29SJens Axboe #include <linux/kernel.h> 586db1e29SJens Axboe #include <linux/module.h> 686db1e29SJens Axboe #include <linux/init.h> 786db1e29SJens Axboe #include <linux/bio.h> 886db1e29SJens Axboe #include <linux/blkdev.h> 986db1e29SJens Axboe #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ 105a0e3ad6STejun Heo #include <linux/slab.h> 1186db1e29SJens Axboe 1286db1e29SJens Axboe #include "blk.h" 1386db1e29SJens Axboe 1486db1e29SJens Axboe /* 1586db1e29SJens Axboe * For io context allocations 1686db1e29SJens Axboe */ 1786db1e29SJens Axboe static struct kmem_cache *iocontext_cachep; 1886db1e29SJens Axboe 196e736be7STejun Heo /** 206e736be7STejun Heo * get_io_context - increment reference count to io_context 216e736be7STejun Heo * @ioc: io_context to get 226e736be7STejun Heo * 236e736be7STejun Heo * Increment reference count to @ioc. 246e736be7STejun Heo */ 256e736be7STejun Heo void get_io_context(struct io_context *ioc) 266e736be7STejun Heo { 276e736be7STejun Heo BUG_ON(atomic_long_read(&ioc->refcount) <= 0); 286e736be7STejun Heo atomic_long_inc(&ioc->refcount); 296e736be7STejun Heo } 306e736be7STejun Heo EXPORT_SYMBOL(get_io_context); 316e736be7STejun Heo 327e5a8794STejun Heo static void icq_free_icq_rcu(struct rcu_head *head) 337e5a8794STejun Heo { 347e5a8794STejun Heo struct io_cq *icq = container_of(head, struct io_cq, __rcu_head); 357e5a8794STejun Heo 367e5a8794STejun Heo kmem_cache_free(icq->__rcu_icq_cache, icq); 377e5a8794STejun Heo } 387e5a8794STejun Heo 39621032adSTejun Heo /* Exit an icq. Called with both ioc and q locked. */ 407e5a8794STejun Heo static void ioc_exit_icq(struct io_cq *icq) 417e5a8794STejun Heo { 42621032adSTejun Heo struct elevator_type *et = icq->q->elevator->type; 43621032adSTejun Heo 44621032adSTejun Heo if (icq->flags & ICQ_EXITED) 45621032adSTejun Heo return; 46621032adSTejun Heo 47621032adSTejun Heo if (et->ops.elevator_exit_icq_fn) 48621032adSTejun Heo et->ops.elevator_exit_icq_fn(icq); 49621032adSTejun Heo 50621032adSTejun Heo icq->flags |= ICQ_EXITED; 51621032adSTejun Heo } 52621032adSTejun Heo 53621032adSTejun Heo /* Release an icq. Called with both ioc and q locked. */ 54621032adSTejun Heo static void ioc_destroy_icq(struct io_cq *icq) 55621032adSTejun Heo { 567e5a8794STejun Heo struct io_context *ioc = icq->ioc; 577e5a8794STejun Heo struct request_queue *q = icq->q; 587e5a8794STejun Heo struct elevator_type *et = q->elevator->type; 597e5a8794STejun Heo 607e5a8794STejun Heo lockdep_assert_held(&ioc->lock); 617e5a8794STejun Heo lockdep_assert_held(q->queue_lock); 627e5a8794STejun Heo 637e5a8794STejun Heo radix_tree_delete(&ioc->icq_tree, icq->q->id); 647e5a8794STejun Heo hlist_del_init(&icq->ioc_node); 657e5a8794STejun Heo list_del_init(&icq->q_node); 667e5a8794STejun Heo 677e5a8794STejun Heo /* 687e5a8794STejun Heo * Both setting lookup hint to and clearing it from @icq are done 697e5a8794STejun Heo * under queue_lock. If it's not pointing to @icq now, it never 707e5a8794STejun Heo * will. Hint assignment itself can race safely. 717e5a8794STejun Heo */ 727e5a8794STejun Heo if (rcu_dereference_raw(ioc->icq_hint) == icq) 737e5a8794STejun Heo rcu_assign_pointer(ioc->icq_hint, NULL); 747e5a8794STejun Heo 75621032adSTejun Heo ioc_exit_icq(icq); 767e5a8794STejun Heo 777e5a8794STejun Heo /* 787e5a8794STejun Heo * @icq->q might have gone away by the time RCU callback runs 797e5a8794STejun Heo * making it impossible to determine icq_cache. Record it in @icq. 807e5a8794STejun Heo */ 817e5a8794STejun Heo icq->__rcu_icq_cache = et->icq_cache; 827e5a8794STejun Heo call_rcu(&icq->__rcu_head, icq_free_icq_rcu); 837e5a8794STejun Heo } 847e5a8794STejun Heo 85b2efa052STejun Heo /* 86b2efa052STejun Heo * Slow path for ioc release in put_io_context(). Performs double-lock 87c5869807STejun Heo * dancing to unlink all icq's and then frees ioc. 88b2efa052STejun Heo */ 89b2efa052STejun Heo static void ioc_release_fn(struct work_struct *work) 90b2efa052STejun Heo { 91b2efa052STejun Heo struct io_context *ioc = container_of(work, struct io_context, 92b2efa052STejun Heo release_work); 93d8c66c5dSTejun Heo unsigned long flags; 94b2efa052STejun Heo 95d8c66c5dSTejun Heo /* 96d8c66c5dSTejun Heo * Exiting icq may call into put_io_context() through elevator 97d8c66c5dSTejun Heo * which will trigger lockdep warning. The ioc's are guaranteed to 98d8c66c5dSTejun Heo * be different, use a different locking subclass here. Use 99d8c66c5dSTejun Heo * irqsave variant as there's no spin_lock_irq_nested(). 100d8c66c5dSTejun Heo */ 101d8c66c5dSTejun Heo spin_lock_irqsave_nested(&ioc->lock, flags, 1); 102b2efa052STejun Heo 103c5869807STejun Heo while (!hlist_empty(&ioc->icq_list)) { 104c5869807STejun Heo struct io_cq *icq = hlist_entry(ioc->icq_list.first, 105c5869807STejun Heo struct io_cq, ioc_node); 1062274b029STejun Heo struct request_queue *q = icq->q; 107b2efa052STejun Heo 1082274b029STejun Heo if (spin_trylock(q->queue_lock)) { 109621032adSTejun Heo ioc_destroy_icq(icq); 1102274b029STejun Heo spin_unlock(q->queue_lock); 111b2efa052STejun Heo } else { 112d8c66c5dSTejun Heo spin_unlock_irqrestore(&ioc->lock, flags); 1132274b029STejun Heo cpu_relax(); 1142274b029STejun Heo spin_lock_irqsave_nested(&ioc->lock, flags, 1); 115b2efa052STejun Heo } 1162274b029STejun Heo } 1172274b029STejun Heo 1182274b029STejun Heo spin_unlock_irqrestore(&ioc->lock, flags); 119b2efa052STejun Heo 120b2efa052STejun Heo kmem_cache_free(iocontext_cachep, ioc); 12186db1e29SJens Axboe } 12286db1e29SJens Axboe 12342ec57a8STejun Heo /** 12442ec57a8STejun Heo * put_io_context - put a reference of io_context 12542ec57a8STejun Heo * @ioc: io_context to put 12642ec57a8STejun Heo * 12742ec57a8STejun Heo * Decrement reference count of @ioc and release it if the count reaches 12811a3122fSTejun Heo * zero. 12986db1e29SJens Axboe */ 13011a3122fSTejun Heo void put_io_context(struct io_context *ioc) 13186db1e29SJens Axboe { 132b2efa052STejun Heo unsigned long flags; 133ff8c1474SXiaotian Feng bool free_ioc = false; 134b2efa052STejun Heo 13586db1e29SJens Axboe if (ioc == NULL) 13642ec57a8STejun Heo return; 13786db1e29SJens Axboe 13842ec57a8STejun Heo BUG_ON(atomic_long_read(&ioc->refcount) <= 0); 13942ec57a8STejun Heo 140b2efa052STejun Heo /* 14111a3122fSTejun Heo * Releasing ioc requires reverse order double locking and we may 14211a3122fSTejun Heo * already be holding a queue_lock. Do it asynchronously from wq. 143b2efa052STejun Heo */ 14411a3122fSTejun Heo if (atomic_long_dec_and_test(&ioc->refcount)) { 14511a3122fSTejun Heo spin_lock_irqsave(&ioc->lock, flags); 14611a3122fSTejun Heo if (!hlist_empty(&ioc->icq_list)) 147695588f9SViresh Kumar queue_work(system_power_efficient_wq, 148695588f9SViresh Kumar &ioc->release_work); 149ff8c1474SXiaotian Feng else 150ff8c1474SXiaotian Feng free_ioc = true; 15111a3122fSTejun Heo spin_unlock_irqrestore(&ioc->lock, flags); 15211a3122fSTejun Heo } 153ff8c1474SXiaotian Feng 154ff8c1474SXiaotian Feng if (free_ioc) 155ff8c1474SXiaotian Feng kmem_cache_free(iocontext_cachep, ioc); 15686db1e29SJens Axboe } 15786db1e29SJens Axboe EXPORT_SYMBOL(put_io_context); 15886db1e29SJens Axboe 159f6e8d01bSTejun Heo /** 160f6e8d01bSTejun Heo * put_io_context_active - put active reference on ioc 161f6e8d01bSTejun Heo * @ioc: ioc of interest 162f6e8d01bSTejun Heo * 163f6e8d01bSTejun Heo * Undo get_io_context_active(). If active reference reaches zero after 164f6e8d01bSTejun Heo * put, @ioc can never issue further IOs and ioscheds are notified. 165f6e8d01bSTejun Heo */ 166f6e8d01bSTejun Heo void put_io_context_active(struct io_context *ioc) 16786db1e29SJens Axboe { 168621032adSTejun Heo unsigned long flags; 169f6e8d01bSTejun Heo struct io_cq *icq; 17086db1e29SJens Axboe 171f6e8d01bSTejun Heo if (!atomic_dec_and_test(&ioc->active_ref)) { 172621032adSTejun Heo put_io_context(ioc); 173621032adSTejun Heo return; 174621032adSTejun Heo } 175621032adSTejun Heo 176621032adSTejun Heo /* 177621032adSTejun Heo * Need ioc lock to walk icq_list and q lock to exit icq. Perform 178621032adSTejun Heo * reverse double locking. Read comment in ioc_release_fn() for 179621032adSTejun Heo * explanation on the nested locking annotation. 180621032adSTejun Heo */ 181621032adSTejun Heo retry: 182621032adSTejun Heo spin_lock_irqsave_nested(&ioc->lock, flags, 1); 183b67bfe0dSSasha Levin hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) { 184621032adSTejun Heo if (icq->flags & ICQ_EXITED) 185621032adSTejun Heo continue; 186621032adSTejun Heo if (spin_trylock(icq->q->queue_lock)) { 187621032adSTejun Heo ioc_exit_icq(icq); 188621032adSTejun Heo spin_unlock(icq->q->queue_lock); 189621032adSTejun Heo } else { 190621032adSTejun Heo spin_unlock_irqrestore(&ioc->lock, flags); 191621032adSTejun Heo cpu_relax(); 192621032adSTejun Heo goto retry; 193621032adSTejun Heo } 194621032adSTejun Heo } 195621032adSTejun Heo spin_unlock_irqrestore(&ioc->lock, flags); 196621032adSTejun Heo 19711a3122fSTejun Heo put_io_context(ioc); 19886db1e29SJens Axboe } 19986db1e29SJens Axboe 200f6e8d01bSTejun Heo /* Called by the exiting task */ 201f6e8d01bSTejun Heo void exit_io_context(struct task_struct *task) 202f6e8d01bSTejun Heo { 203f6e8d01bSTejun Heo struct io_context *ioc; 204f6e8d01bSTejun Heo 205f6e8d01bSTejun Heo task_lock(task); 206f6e8d01bSTejun Heo ioc = task->io_context; 207f6e8d01bSTejun Heo task->io_context = NULL; 208f6e8d01bSTejun Heo task_unlock(task); 209f6e8d01bSTejun Heo 210f6e8d01bSTejun Heo atomic_dec(&ioc->nr_tasks); 211f6e8d01bSTejun Heo put_io_context_active(ioc); 212f6e8d01bSTejun Heo } 213f6e8d01bSTejun Heo 2147e5a8794STejun Heo /** 2157e5a8794STejun Heo * ioc_clear_queue - break any ioc association with the specified queue 2167e5a8794STejun Heo * @q: request_queue being cleared 2177e5a8794STejun Heo * 2187e5a8794STejun Heo * Walk @q->icq_list and exit all io_cq's. Must be called with @q locked. 2197e5a8794STejun Heo */ 2207e5a8794STejun Heo void ioc_clear_queue(struct request_queue *q) 2217e5a8794STejun Heo { 2227e5a8794STejun Heo lockdep_assert_held(q->queue_lock); 2237e5a8794STejun Heo 2247e5a8794STejun Heo while (!list_empty(&q->icq_list)) { 2257e5a8794STejun Heo struct io_cq *icq = list_entry(q->icq_list.next, 2267e5a8794STejun Heo struct io_cq, q_node); 2277e5a8794STejun Heo struct io_context *ioc = icq->ioc; 2287e5a8794STejun Heo 2297e5a8794STejun Heo spin_lock(&ioc->lock); 230621032adSTejun Heo ioc_destroy_icq(icq); 2317e5a8794STejun Heo spin_unlock(&ioc->lock); 2327e5a8794STejun Heo } 2337e5a8794STejun Heo } 2347e5a8794STejun Heo 23524acfc34STejun Heo int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node) 23686db1e29SJens Axboe { 237df415656SPaul Bolle struct io_context *ioc; 2383c9c708cSEric Dumazet int ret; 23986db1e29SJens Axboe 24042ec57a8STejun Heo ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO, 24142ec57a8STejun Heo node); 24242ec57a8STejun Heo if (unlikely(!ioc)) 24324acfc34STejun Heo return -ENOMEM; 24442ec57a8STejun Heo 24542ec57a8STejun Heo /* initialize */ 246df415656SPaul Bolle atomic_long_set(&ioc->refcount, 1); 2474638a83eSOlof Johansson atomic_set(&ioc->nr_tasks, 1); 248f6e8d01bSTejun Heo atomic_set(&ioc->active_ref, 1); 249df415656SPaul Bolle spin_lock_init(&ioc->lock); 250c5869807STejun Heo INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH); 251c5869807STejun Heo INIT_HLIST_HEAD(&ioc->icq_list); 252b2efa052STejun Heo INIT_WORK(&ioc->release_work, ioc_release_fn); 25386db1e29SJens Axboe 254fd638368STejun Heo /* 255fd638368STejun Heo * Try to install. ioc shouldn't be installed if someone else 256fd638368STejun Heo * already did or @task, which isn't %current, is exiting. Note 257fd638368STejun Heo * that we need to allow ioc creation on exiting %current as exit 258fd638368STejun Heo * path may issue IOs from e.g. exit_files(). The exit path is 259fd638368STejun Heo * responsible for not issuing IO after exit_io_context(). 260fd638368STejun Heo */ 2616e736be7STejun Heo task_lock(task); 262fd638368STejun Heo if (!task->io_context && 263fd638368STejun Heo (task == current || !(task->flags & PF_EXITING))) 2646e736be7STejun Heo task->io_context = ioc; 265f2dbd76aSTejun Heo else 2666e736be7STejun Heo kmem_cache_free(iocontext_cachep, ioc); 2673c9c708cSEric Dumazet 2683c9c708cSEric Dumazet ret = task->io_context ? 0 : -EBUSY; 2693c9c708cSEric Dumazet 2706e736be7STejun Heo task_unlock(task); 27124acfc34STejun Heo 2723c9c708cSEric Dumazet return ret; 27386db1e29SJens Axboe } 27486db1e29SJens Axboe 2756e736be7STejun Heo /** 2766e736be7STejun Heo * get_task_io_context - get io_context of a task 2776e736be7STejun Heo * @task: task of interest 2786e736be7STejun Heo * @gfp_flags: allocation flags, used if allocation is necessary 2796e736be7STejun Heo * @node: allocation node, used if allocation is necessary 28086db1e29SJens Axboe * 2816e736be7STejun Heo * Return io_context of @task. If it doesn't exist, it is created with 2826e736be7STejun Heo * @gfp_flags and @node. The returned io_context has its reference count 2836e736be7STejun Heo * incremented. 2846e736be7STejun Heo * 2856e736be7STejun Heo * This function always goes through task_lock() and it's better to use 286f2dbd76aSTejun Heo * %current->io_context + get_io_context() for %current. 28786db1e29SJens Axboe */ 2886e736be7STejun Heo struct io_context *get_task_io_context(struct task_struct *task, 2896e736be7STejun Heo gfp_t gfp_flags, int node) 29086db1e29SJens Axboe { 2916e736be7STejun Heo struct io_context *ioc; 29286db1e29SJens Axboe 2936e736be7STejun Heo might_sleep_if(gfp_flags & __GFP_WAIT); 29486db1e29SJens Axboe 295f2dbd76aSTejun Heo do { 2966e736be7STejun Heo task_lock(task); 2976e736be7STejun Heo ioc = task->io_context; 2986e736be7STejun Heo if (likely(ioc)) { 2996e736be7STejun Heo get_io_context(ioc); 3006e736be7STejun Heo task_unlock(task); 301df415656SPaul Bolle return ioc; 30286db1e29SJens Axboe } 3036e736be7STejun Heo task_unlock(task); 30424acfc34STejun Heo } while (!create_task_io_context(task, gfp_flags, node)); 3056e736be7STejun Heo 306f2dbd76aSTejun Heo return NULL; 3076e736be7STejun Heo } 3086e736be7STejun Heo EXPORT_SYMBOL(get_task_io_context); 30986db1e29SJens Axboe 31047fdd4caSTejun Heo /** 31147fdd4caSTejun Heo * ioc_lookup_icq - lookup io_cq from ioc 31247fdd4caSTejun Heo * @ioc: the associated io_context 31347fdd4caSTejun Heo * @q: the associated request_queue 31447fdd4caSTejun Heo * 31547fdd4caSTejun Heo * Look up io_cq associated with @ioc - @q pair from @ioc. Must be called 31647fdd4caSTejun Heo * with @q->queue_lock held. 31747fdd4caSTejun Heo */ 31847fdd4caSTejun Heo struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q) 31947fdd4caSTejun Heo { 32047fdd4caSTejun Heo struct io_cq *icq; 32147fdd4caSTejun Heo 32247fdd4caSTejun Heo lockdep_assert_held(q->queue_lock); 32347fdd4caSTejun Heo 32447fdd4caSTejun Heo /* 32547fdd4caSTejun Heo * icq's are indexed from @ioc using radix tree and hint pointer, 32647fdd4caSTejun Heo * both of which are protected with RCU. All removals are done 32747fdd4caSTejun Heo * holding both q and ioc locks, and we're holding q lock - if we 32847fdd4caSTejun Heo * find a icq which points to us, it's guaranteed to be valid. 32947fdd4caSTejun Heo */ 33047fdd4caSTejun Heo rcu_read_lock(); 33147fdd4caSTejun Heo icq = rcu_dereference(ioc->icq_hint); 33247fdd4caSTejun Heo if (icq && icq->q == q) 33347fdd4caSTejun Heo goto out; 33447fdd4caSTejun Heo 33547fdd4caSTejun Heo icq = radix_tree_lookup(&ioc->icq_tree, q->id); 33647fdd4caSTejun Heo if (icq && icq->q == q) 33747fdd4caSTejun Heo rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */ 33847fdd4caSTejun Heo else 33947fdd4caSTejun Heo icq = NULL; 34047fdd4caSTejun Heo out: 34147fdd4caSTejun Heo rcu_read_unlock(); 34247fdd4caSTejun Heo return icq; 34347fdd4caSTejun Heo } 34447fdd4caSTejun Heo EXPORT_SYMBOL(ioc_lookup_icq); 34547fdd4caSTejun Heo 346f1f8cc94STejun Heo /** 347f1f8cc94STejun Heo * ioc_create_icq - create and link io_cq 34824acfc34STejun Heo * @ioc: io_context of interest 349f1f8cc94STejun Heo * @q: request_queue of interest 350f1f8cc94STejun Heo * @gfp_mask: allocation mask 351f1f8cc94STejun Heo * 35224acfc34STejun Heo * Make sure io_cq linking @ioc and @q exists. If icq doesn't exist, they 35324acfc34STejun Heo * will be created using @gfp_mask. 354f1f8cc94STejun Heo * 355f1f8cc94STejun Heo * The caller is responsible for ensuring @ioc won't go away and @q is 356f1f8cc94STejun Heo * alive and will stay alive until this function returns. 357f1f8cc94STejun Heo */ 35824acfc34STejun Heo struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, 35924acfc34STejun Heo gfp_t gfp_mask) 360f1f8cc94STejun Heo { 361f1f8cc94STejun Heo struct elevator_type *et = q->elevator->type; 362f1f8cc94STejun Heo struct io_cq *icq; 363f1f8cc94STejun Heo 364f1f8cc94STejun Heo /* allocate stuff */ 365f1f8cc94STejun Heo icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO, 366f1f8cc94STejun Heo q->node); 367f1f8cc94STejun Heo if (!icq) 368f1f8cc94STejun Heo return NULL; 369f1f8cc94STejun Heo 3705e4c0d97SJan Kara if (radix_tree_maybe_preload(gfp_mask) < 0) { 371f1f8cc94STejun Heo kmem_cache_free(et->icq_cache, icq); 372f1f8cc94STejun Heo return NULL; 373f1f8cc94STejun Heo } 374f1f8cc94STejun Heo 375f1f8cc94STejun Heo icq->ioc = ioc; 376f1f8cc94STejun Heo icq->q = q; 377f1f8cc94STejun Heo INIT_LIST_HEAD(&icq->q_node); 378f1f8cc94STejun Heo INIT_HLIST_NODE(&icq->ioc_node); 379f1f8cc94STejun Heo 380f1f8cc94STejun Heo /* lock both q and ioc and try to link @icq */ 381f1f8cc94STejun Heo spin_lock_irq(q->queue_lock); 382f1f8cc94STejun Heo spin_lock(&ioc->lock); 383f1f8cc94STejun Heo 384f1f8cc94STejun Heo if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { 385f1f8cc94STejun Heo hlist_add_head(&icq->ioc_node, &ioc->icq_list); 386f1f8cc94STejun Heo list_add(&icq->q_node, &q->icq_list); 387f1f8cc94STejun Heo if (et->ops.elevator_init_icq_fn) 388f1f8cc94STejun Heo et->ops.elevator_init_icq_fn(icq); 389f1f8cc94STejun Heo } else { 390f1f8cc94STejun Heo kmem_cache_free(et->icq_cache, icq); 391f1f8cc94STejun Heo icq = ioc_lookup_icq(ioc, q); 392f1f8cc94STejun Heo if (!icq) 393f1f8cc94STejun Heo printk(KERN_ERR "cfq: icq link failed!\n"); 394f1f8cc94STejun Heo } 395f1f8cc94STejun Heo 396f1f8cc94STejun Heo spin_unlock(&ioc->lock); 397f1f8cc94STejun Heo spin_unlock_irq(q->queue_lock); 398f1f8cc94STejun Heo radix_tree_preload_end(); 399f1f8cc94STejun Heo return icq; 400f1f8cc94STejun Heo } 401f1f8cc94STejun Heo 40213341598SAdrian Bunk static int __init blk_ioc_init(void) 40386db1e29SJens Axboe { 40486db1e29SJens Axboe iocontext_cachep = kmem_cache_create("blkdev_ioc", 40586db1e29SJens Axboe sizeof(struct io_context), 0, SLAB_PANIC, NULL); 40686db1e29SJens Axboe return 0; 40786db1e29SJens Axboe } 40886db1e29SJens Axboe subsys_initcall(blk_ioc_init); 409