186db1e29SJens Axboe /* 286db1e29SJens Axboe * Functions related to io context handling 386db1e29SJens Axboe */ 486db1e29SJens Axboe #include <linux/kernel.h> 586db1e29SJens Axboe #include <linux/module.h> 686db1e29SJens Axboe #include <linux/init.h> 786db1e29SJens Axboe #include <linux/bio.h> 886db1e29SJens Axboe #include <linux/blkdev.h> 986db1e29SJens Axboe #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ 105a0e3ad6STejun Heo #include <linux/slab.h> 1186db1e29SJens Axboe 1286db1e29SJens Axboe #include "blk.h" 1386db1e29SJens Axboe 1486db1e29SJens Axboe /* 1586db1e29SJens Axboe * For io context allocations 1686db1e29SJens Axboe */ 1786db1e29SJens Axboe static struct kmem_cache *iocontext_cachep; 1886db1e29SJens Axboe 196e736be7STejun Heo /** 206e736be7STejun Heo * get_io_context - increment reference count to io_context 216e736be7STejun Heo * @ioc: io_context to get 226e736be7STejun Heo * 236e736be7STejun Heo * Increment reference count to @ioc. 246e736be7STejun Heo */ 256e736be7STejun Heo void get_io_context(struct io_context *ioc) 266e736be7STejun Heo { 276e736be7STejun Heo BUG_ON(atomic_long_read(&ioc->refcount) <= 0); 286e736be7STejun Heo atomic_long_inc(&ioc->refcount); 296e736be7STejun Heo } 306e736be7STejun Heo EXPORT_SYMBOL(get_io_context); 316e736be7STejun Heo 32b2efa052STejun Heo /* 33b2efa052STejun Heo * Releasing ioc may nest into another put_io_context() leading to nested 34b2efa052STejun Heo * fast path release. As the ioc's can't be the same, this is okay but 35b2efa052STejun Heo * makes lockdep whine. Keep track of nesting and use it as subclass. 36b2efa052STejun Heo */ 37b2efa052STejun Heo #ifdef CONFIG_LOCKDEP 38b2efa052STejun Heo #define ioc_release_depth(q) ((q) ? (q)->ioc_release_depth : 0) 39b2efa052STejun Heo #define ioc_release_depth_inc(q) (q)->ioc_release_depth++ 40b2efa052STejun Heo #define ioc_release_depth_dec(q) (q)->ioc_release_depth-- 41b2efa052STejun Heo #else 42b2efa052STejun Heo #define ioc_release_depth(q) 0 43b2efa052STejun Heo #define ioc_release_depth_inc(q) do { } while (0) 44b2efa052STejun Heo #define ioc_release_depth_dec(q) do { } while (0) 45b2efa052STejun Heo #endif 4686db1e29SJens Axboe 47b2efa052STejun Heo /* 48b2efa052STejun Heo * Slow path for ioc release in put_io_context(). Performs double-lock 49b2efa052STejun Heo * dancing to unlink all cic's and then frees ioc. 50b2efa052STejun Heo */ 51b2efa052STejun Heo static void ioc_release_fn(struct work_struct *work) 52b2efa052STejun Heo { 53b2efa052STejun Heo struct io_context *ioc = container_of(work, struct io_context, 54b2efa052STejun Heo release_work); 55b2efa052STejun Heo struct request_queue *last_q = NULL; 56b2efa052STejun Heo 57b2efa052STejun Heo spin_lock_irq(&ioc->lock); 58b2efa052STejun Heo 59b2efa052STejun Heo while (!hlist_empty(&ioc->cic_list)) { 60b2efa052STejun Heo struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first, 61b2efa052STejun Heo struct cfq_io_context, 62ffc4e759SJens Axboe cic_list); 63b2efa052STejun Heo struct request_queue *this_q = cic->q; 64b2efa052STejun Heo 65b2efa052STejun Heo if (this_q != last_q) { 66b2efa052STejun Heo /* 67b2efa052STejun Heo * Need to switch to @this_q. Once we release 68b2efa052STejun Heo * @ioc->lock, it can go away along with @cic. 69b2efa052STejun Heo * Hold on to it. 70b2efa052STejun Heo */ 71b2efa052STejun Heo __blk_get_queue(this_q); 72b2efa052STejun Heo 73b2efa052STejun Heo /* 74b2efa052STejun Heo * blk_put_queue() might sleep thanks to kobject 75b2efa052STejun Heo * idiocy. Always release both locks, put and 76b2efa052STejun Heo * restart. 77b2efa052STejun Heo */ 78b2efa052STejun Heo if (last_q) { 79b2efa052STejun Heo spin_unlock(last_q->queue_lock); 80b2efa052STejun Heo spin_unlock_irq(&ioc->lock); 81b2efa052STejun Heo blk_put_queue(last_q); 82b2efa052STejun Heo } else { 83b2efa052STejun Heo spin_unlock_irq(&ioc->lock); 84ffc4e759SJens Axboe } 85b2efa052STejun Heo 86b2efa052STejun Heo last_q = this_q; 87b2efa052STejun Heo spin_lock_irq(this_q->queue_lock); 88b2efa052STejun Heo spin_lock(&ioc->lock); 89b2efa052STejun Heo continue; 90b2efa052STejun Heo } 91b2efa052STejun Heo ioc_release_depth_inc(this_q); 92b2efa052STejun Heo cic->exit(cic); 93b2efa052STejun Heo cic->release(cic); 94b2efa052STejun Heo ioc_release_depth_dec(this_q); 95b2efa052STejun Heo } 96b2efa052STejun Heo 97b2efa052STejun Heo if (last_q) { 98b2efa052STejun Heo spin_unlock(last_q->queue_lock); 99b2efa052STejun Heo spin_unlock_irq(&ioc->lock); 100b2efa052STejun Heo blk_put_queue(last_q); 101b2efa052STejun Heo } else { 102b2efa052STejun Heo spin_unlock_irq(&ioc->lock); 103b2efa052STejun Heo } 104b2efa052STejun Heo 105b2efa052STejun Heo kmem_cache_free(iocontext_cachep, ioc); 10686db1e29SJens Axboe } 10786db1e29SJens Axboe 10842ec57a8STejun Heo /** 10942ec57a8STejun Heo * put_io_context - put a reference of io_context 11042ec57a8STejun Heo * @ioc: io_context to put 111b2efa052STejun Heo * @locked_q: request_queue the caller is holding queue_lock of (hint) 11242ec57a8STejun Heo * 11342ec57a8STejun Heo * Decrement reference count of @ioc and release it if the count reaches 114b2efa052STejun Heo * zero. If the caller is holding queue_lock of a queue, it can indicate 115b2efa052STejun Heo * that with @locked_q. This is an optimization hint and the caller is 116b2efa052STejun Heo * allowed to pass in %NULL even when it's holding a queue_lock. 11786db1e29SJens Axboe */ 118b2efa052STejun Heo void put_io_context(struct io_context *ioc, struct request_queue *locked_q) 11986db1e29SJens Axboe { 120b2efa052STejun Heo struct request_queue *last_q = locked_q; 121b2efa052STejun Heo unsigned long flags; 122b2efa052STejun Heo 12386db1e29SJens Axboe if (ioc == NULL) 12442ec57a8STejun Heo return; 12586db1e29SJens Axboe 12642ec57a8STejun Heo BUG_ON(atomic_long_read(&ioc->refcount) <= 0); 127b2efa052STejun Heo if (locked_q) 128b2efa052STejun Heo lockdep_assert_held(locked_q->queue_lock); 12986db1e29SJens Axboe 13042ec57a8STejun Heo if (!atomic_long_dec_and_test(&ioc->refcount)) 13142ec57a8STejun Heo return; 13242ec57a8STejun Heo 133b2efa052STejun Heo /* 134b2efa052STejun Heo * Destroy @ioc. This is a bit messy because cic's are chained 135b2efa052STejun Heo * from both ioc and queue, and ioc->lock nests inside queue_lock. 136b2efa052STejun Heo * The inner ioc->lock should be held to walk our cic_list and then 137b2efa052STejun Heo * for each cic the outer matching queue_lock should be grabbed. 138b2efa052STejun Heo * ie. We need to do reverse-order double lock dancing. 139b2efa052STejun Heo * 140b2efa052STejun Heo * Another twist is that we are often called with one of the 141b2efa052STejun Heo * matching queue_locks held as indicated by @locked_q, which 142b2efa052STejun Heo * prevents performing double-lock dance for other queues. 143b2efa052STejun Heo * 144b2efa052STejun Heo * So, we do it in two stages. The fast path uses the queue_lock 145b2efa052STejun Heo * the caller is holding and, if other queues need to be accessed, 146b2efa052STejun Heo * uses trylock to avoid introducing locking dependency. This can 147b2efa052STejun Heo * handle most cases, especially if @ioc was performing IO on only 148b2efa052STejun Heo * single device. 149b2efa052STejun Heo * 150b2efa052STejun Heo * If trylock doesn't cut it, we defer to @ioc->release_work which 151b2efa052STejun Heo * can do all the double-locking dancing. 152b2efa052STejun Heo */ 153b2efa052STejun Heo spin_lock_irqsave_nested(&ioc->lock, flags, 154b2efa052STejun Heo ioc_release_depth(locked_q)); 15586db1e29SJens Axboe 156b2efa052STejun Heo while (!hlist_empty(&ioc->cic_list)) { 157b2efa052STejun Heo struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first, 158b2efa052STejun Heo struct cfq_io_context, 159b2efa052STejun Heo cic_list); 160b2efa052STejun Heo struct request_queue *this_q = cic->q; 161b2efa052STejun Heo 162b2efa052STejun Heo if (this_q != last_q) { 163b2efa052STejun Heo if (last_q && last_q != locked_q) 164b2efa052STejun Heo spin_unlock(last_q->queue_lock); 165b2efa052STejun Heo last_q = NULL; 166b2efa052STejun Heo 167b2efa052STejun Heo if (!spin_trylock(this_q->queue_lock)) 168b2efa052STejun Heo break; 169b2efa052STejun Heo last_q = this_q; 170b2efa052STejun Heo continue; 171b2efa052STejun Heo } 172b2efa052STejun Heo ioc_release_depth_inc(this_q); 173b2efa052STejun Heo cic->exit(cic); 174b2efa052STejun Heo cic->release(cic); 175b2efa052STejun Heo ioc_release_depth_dec(this_q); 176b2efa052STejun Heo } 177b2efa052STejun Heo 178b2efa052STejun Heo if (last_q && last_q != locked_q) 179b2efa052STejun Heo spin_unlock(last_q->queue_lock); 180b2efa052STejun Heo 181b2efa052STejun Heo spin_unlock_irqrestore(&ioc->lock, flags); 182b2efa052STejun Heo 183b2efa052STejun Heo /* if no cic's left, we're done; otherwise, kick release_work */ 184b2efa052STejun Heo if (hlist_empty(&ioc->cic_list)) 18586db1e29SJens Axboe kmem_cache_free(iocontext_cachep, ioc); 186b2efa052STejun Heo else 187b2efa052STejun Heo schedule_work(&ioc->release_work); 18886db1e29SJens Axboe } 18986db1e29SJens Axboe EXPORT_SYMBOL(put_io_context); 19086db1e29SJens Axboe 19127667c99SBart Van Assche /* Called by the exiting task */ 192b69f2292SLouis Rilling void exit_io_context(struct task_struct *task) 19386db1e29SJens Axboe { 19486db1e29SJens Axboe struct io_context *ioc; 19586db1e29SJens Axboe 1966e736be7STejun Heo /* PF_EXITING prevents new io_context from being attached to @task */ 1976e736be7STejun Heo WARN_ON_ONCE(!(current->flags & PF_EXITING)); 1986e736be7STejun Heo 199b69f2292SLouis Rilling task_lock(task); 200b69f2292SLouis Rilling ioc = task->io_context; 201b69f2292SLouis Rilling task->io_context = NULL; 202b69f2292SLouis Rilling task_unlock(task); 20386db1e29SJens Axboe 204b2efa052STejun Heo atomic_dec(&ioc->nr_tasks); 205b2efa052STejun Heo put_io_context(ioc, NULL); 20686db1e29SJens Axboe } 20786db1e29SJens Axboe 208f2dbd76aSTejun Heo void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags, 209f2dbd76aSTejun Heo int node) 21086db1e29SJens Axboe { 211df415656SPaul Bolle struct io_context *ioc; 21286db1e29SJens Axboe 21342ec57a8STejun Heo ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO, 21442ec57a8STejun Heo node); 21542ec57a8STejun Heo if (unlikely(!ioc)) 216f2dbd76aSTejun Heo return; 21742ec57a8STejun Heo 21842ec57a8STejun Heo /* initialize */ 219df415656SPaul Bolle atomic_long_set(&ioc->refcount, 1); 220df415656SPaul Bolle atomic_set(&ioc->nr_tasks, 1); 221df415656SPaul Bolle spin_lock_init(&ioc->lock); 222df415656SPaul Bolle INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); 223df415656SPaul Bolle INIT_HLIST_HEAD(&ioc->cic_list); 224b2efa052STejun Heo INIT_WORK(&ioc->release_work, ioc_release_fn); 22586db1e29SJens Axboe 2266e736be7STejun Heo /* try to install, somebody might already have beaten us to it */ 2276e736be7STejun Heo task_lock(task); 228f2dbd76aSTejun Heo if (!task->io_context && !(task->flags & PF_EXITING)) 2296e736be7STejun Heo task->io_context = ioc; 230f2dbd76aSTejun Heo else 2316e736be7STejun Heo kmem_cache_free(iocontext_cachep, ioc); 2326e736be7STejun Heo task_unlock(task); 23386db1e29SJens Axboe } 234f2dbd76aSTejun Heo EXPORT_SYMBOL(create_io_context_slowpath); 23586db1e29SJens Axboe 2366e736be7STejun Heo /** 2376e736be7STejun Heo * get_task_io_context - get io_context of a task 2386e736be7STejun Heo * @task: task of interest 2396e736be7STejun Heo * @gfp_flags: allocation flags, used if allocation is necessary 2406e736be7STejun Heo * @node: allocation node, used if allocation is necessary 24186db1e29SJens Axboe * 2426e736be7STejun Heo * Return io_context of @task. If it doesn't exist, it is created with 2436e736be7STejun Heo * @gfp_flags and @node. The returned io_context has its reference count 2446e736be7STejun Heo * incremented. 2456e736be7STejun Heo * 2466e736be7STejun Heo * This function always goes through task_lock() and it's better to use 247f2dbd76aSTejun Heo * %current->io_context + get_io_context() for %current. 24886db1e29SJens Axboe */ 2496e736be7STejun Heo struct io_context *get_task_io_context(struct task_struct *task, 2506e736be7STejun Heo gfp_t gfp_flags, int node) 25186db1e29SJens Axboe { 2526e736be7STejun Heo struct io_context *ioc; 25386db1e29SJens Axboe 2546e736be7STejun Heo might_sleep_if(gfp_flags & __GFP_WAIT); 25586db1e29SJens Axboe 256f2dbd76aSTejun Heo do { 2576e736be7STejun Heo task_lock(task); 2586e736be7STejun Heo ioc = task->io_context; 2596e736be7STejun Heo if (likely(ioc)) { 2606e736be7STejun Heo get_io_context(ioc); 2616e736be7STejun Heo task_unlock(task); 262df415656SPaul Bolle return ioc; 26386db1e29SJens Axboe } 2646e736be7STejun Heo task_unlock(task); 265f2dbd76aSTejun Heo } while (create_io_context(task, gfp_flags, node)); 2666e736be7STejun Heo 267f2dbd76aSTejun Heo return NULL; 2686e736be7STejun Heo } 2696e736be7STejun Heo EXPORT_SYMBOL(get_task_io_context); 27086db1e29SJens Axboe 271dc86900eSTejun Heo void ioc_set_changed(struct io_context *ioc, int which) 272dc86900eSTejun Heo { 273dc86900eSTejun Heo struct cfq_io_context *cic; 274dc86900eSTejun Heo struct hlist_node *n; 275dc86900eSTejun Heo 276dc86900eSTejun Heo hlist_for_each_entry(cic, n, &ioc->cic_list, cic_list) 277dc86900eSTejun Heo set_bit(which, &cic->changed); 278dc86900eSTejun Heo } 279dc86900eSTejun Heo 280dc86900eSTejun Heo /** 281dc86900eSTejun Heo * ioc_ioprio_changed - notify ioprio change 282dc86900eSTejun Heo * @ioc: io_context of interest 283dc86900eSTejun Heo * @ioprio: new ioprio 284dc86900eSTejun Heo * 285dc86900eSTejun Heo * @ioc's ioprio has changed to @ioprio. Set %CIC_IOPRIO_CHANGED for all 286dc86900eSTejun Heo * cic's. iosched is responsible for checking the bit and applying it on 287dc86900eSTejun Heo * request issue path. 288dc86900eSTejun Heo */ 289dc86900eSTejun Heo void ioc_ioprio_changed(struct io_context *ioc, int ioprio) 290dc86900eSTejun Heo { 291dc86900eSTejun Heo unsigned long flags; 292dc86900eSTejun Heo 293dc86900eSTejun Heo spin_lock_irqsave(&ioc->lock, flags); 294dc86900eSTejun Heo ioc->ioprio = ioprio; 295dc86900eSTejun Heo ioc_set_changed(ioc, CIC_IOPRIO_CHANGED); 296dc86900eSTejun Heo spin_unlock_irqrestore(&ioc->lock, flags); 297dc86900eSTejun Heo } 298dc86900eSTejun Heo 299dc86900eSTejun Heo /** 300dc86900eSTejun Heo * ioc_cgroup_changed - notify cgroup change 301dc86900eSTejun Heo * @ioc: io_context of interest 302dc86900eSTejun Heo * 303dc86900eSTejun Heo * @ioc's cgroup has changed. Set %CIC_CGROUP_CHANGED for all cic's. 304dc86900eSTejun Heo * iosched is responsible for checking the bit and applying it on request 305dc86900eSTejun Heo * issue path. 306dc86900eSTejun Heo */ 307dc86900eSTejun Heo void ioc_cgroup_changed(struct io_context *ioc) 308dc86900eSTejun Heo { 309dc86900eSTejun Heo unsigned long flags; 310dc86900eSTejun Heo 311dc86900eSTejun Heo spin_lock_irqsave(&ioc->lock, flags); 312dc86900eSTejun Heo ioc_set_changed(ioc, CIC_CGROUP_CHANGED); 313dc86900eSTejun Heo spin_unlock_irqrestore(&ioc->lock, flags); 314dc86900eSTejun Heo } 315dc86900eSTejun Heo 31613341598SAdrian Bunk static int __init blk_ioc_init(void) 31786db1e29SJens Axboe { 31886db1e29SJens Axboe iocontext_cachep = kmem_cache_create("blkdev_ioc", 31986db1e29SJens Axboe sizeof(struct io_context), 0, SLAB_PANIC, NULL); 32086db1e29SJens Axboe return 0; 32186db1e29SJens Axboe } 32286db1e29SJens Axboe subsys_initcall(blk_ioc_init); 323