xref: /openbmc/linux/block/blk-ioc.c (revision f2dbd76a)
186db1e29SJens Axboe /*
286db1e29SJens Axboe  * Functions related to io context handling
386db1e29SJens Axboe  */
486db1e29SJens Axboe #include <linux/kernel.h>
586db1e29SJens Axboe #include <linux/module.h>
686db1e29SJens Axboe #include <linux/init.h>
786db1e29SJens Axboe #include <linux/bio.h>
886db1e29SJens Axboe #include <linux/blkdev.h>
986db1e29SJens Axboe #include <linux/bootmem.h>	/* for max_pfn/max_low_pfn */
105a0e3ad6STejun Heo #include <linux/slab.h>
1186db1e29SJens Axboe 
1286db1e29SJens Axboe #include "blk.h"
1386db1e29SJens Axboe 
1486db1e29SJens Axboe /*
1586db1e29SJens Axboe  * For io context allocations
1686db1e29SJens Axboe  */
1786db1e29SJens Axboe static struct kmem_cache *iocontext_cachep;
1886db1e29SJens Axboe 
196e736be7STejun Heo /**
206e736be7STejun Heo  * get_io_context - increment reference count to io_context
216e736be7STejun Heo  * @ioc: io_context to get
226e736be7STejun Heo  *
236e736be7STejun Heo  * Increment reference count to @ioc.
246e736be7STejun Heo  */
256e736be7STejun Heo void get_io_context(struct io_context *ioc)
266e736be7STejun Heo {
276e736be7STejun Heo 	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
286e736be7STejun Heo 	atomic_long_inc(&ioc->refcount);
296e736be7STejun Heo }
306e736be7STejun Heo EXPORT_SYMBOL(get_io_context);
316e736be7STejun Heo 
32b2efa052STejun Heo /*
33b2efa052STejun Heo  * Releasing ioc may nest into another put_io_context() leading to nested
34b2efa052STejun Heo  * fast path release.  As the ioc's can't be the same, this is okay but
35b2efa052STejun Heo  * makes lockdep whine.  Keep track of nesting and use it as subclass.
36b2efa052STejun Heo  */
37b2efa052STejun Heo #ifdef CONFIG_LOCKDEP
38b2efa052STejun Heo #define ioc_release_depth(q)		((q) ? (q)->ioc_release_depth : 0)
39b2efa052STejun Heo #define ioc_release_depth_inc(q)	(q)->ioc_release_depth++
40b2efa052STejun Heo #define ioc_release_depth_dec(q)	(q)->ioc_release_depth--
41b2efa052STejun Heo #else
42b2efa052STejun Heo #define ioc_release_depth(q)		0
43b2efa052STejun Heo #define ioc_release_depth_inc(q)	do { } while (0)
44b2efa052STejun Heo #define ioc_release_depth_dec(q)	do { } while (0)
45b2efa052STejun Heo #endif
4686db1e29SJens Axboe 
47b2efa052STejun Heo /*
48b2efa052STejun Heo  * Slow path for ioc release in put_io_context().  Performs double-lock
49b2efa052STejun Heo  * dancing to unlink all cic's and then frees ioc.
50b2efa052STejun Heo  */
51b2efa052STejun Heo static void ioc_release_fn(struct work_struct *work)
52b2efa052STejun Heo {
53b2efa052STejun Heo 	struct io_context *ioc = container_of(work, struct io_context,
54b2efa052STejun Heo 					      release_work);
55b2efa052STejun Heo 	struct request_queue *last_q = NULL;
56b2efa052STejun Heo 
57b2efa052STejun Heo 	spin_lock_irq(&ioc->lock);
58b2efa052STejun Heo 
59b2efa052STejun Heo 	while (!hlist_empty(&ioc->cic_list)) {
60b2efa052STejun Heo 		struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first,
61b2efa052STejun Heo 							 struct cfq_io_context,
62ffc4e759SJens Axboe 							 cic_list);
63b2efa052STejun Heo 		struct request_queue *this_q = cic->q;
64b2efa052STejun Heo 
65b2efa052STejun Heo 		if (this_q != last_q) {
66b2efa052STejun Heo 			/*
67b2efa052STejun Heo 			 * Need to switch to @this_q.  Once we release
68b2efa052STejun Heo 			 * @ioc->lock, it can go away along with @cic.
69b2efa052STejun Heo 			 * Hold on to it.
70b2efa052STejun Heo 			 */
71b2efa052STejun Heo 			__blk_get_queue(this_q);
72b2efa052STejun Heo 
73b2efa052STejun Heo 			/*
74b2efa052STejun Heo 			 * blk_put_queue() might sleep thanks to kobject
75b2efa052STejun Heo 			 * idiocy.  Always release both locks, put and
76b2efa052STejun Heo 			 * restart.
77b2efa052STejun Heo 			 */
78b2efa052STejun Heo 			if (last_q) {
79b2efa052STejun Heo 				spin_unlock(last_q->queue_lock);
80b2efa052STejun Heo 				spin_unlock_irq(&ioc->lock);
81b2efa052STejun Heo 				blk_put_queue(last_q);
82b2efa052STejun Heo 			} else {
83b2efa052STejun Heo 				spin_unlock_irq(&ioc->lock);
84ffc4e759SJens Axboe 			}
85b2efa052STejun Heo 
86b2efa052STejun Heo 			last_q = this_q;
87b2efa052STejun Heo 			spin_lock_irq(this_q->queue_lock);
88b2efa052STejun Heo 			spin_lock(&ioc->lock);
89b2efa052STejun Heo 			continue;
90b2efa052STejun Heo 		}
91b2efa052STejun Heo 		ioc_release_depth_inc(this_q);
92b2efa052STejun Heo 		cic->exit(cic);
93b2efa052STejun Heo 		cic->release(cic);
94b2efa052STejun Heo 		ioc_release_depth_dec(this_q);
95b2efa052STejun Heo 	}
96b2efa052STejun Heo 
97b2efa052STejun Heo 	if (last_q) {
98b2efa052STejun Heo 		spin_unlock(last_q->queue_lock);
99b2efa052STejun Heo 		spin_unlock_irq(&ioc->lock);
100b2efa052STejun Heo 		blk_put_queue(last_q);
101b2efa052STejun Heo 	} else {
102b2efa052STejun Heo 		spin_unlock_irq(&ioc->lock);
103b2efa052STejun Heo 	}
104b2efa052STejun Heo 
105b2efa052STejun Heo 	kmem_cache_free(iocontext_cachep, ioc);
10686db1e29SJens Axboe }
10786db1e29SJens Axboe 
10842ec57a8STejun Heo /**
10942ec57a8STejun Heo  * put_io_context - put a reference of io_context
11042ec57a8STejun Heo  * @ioc: io_context to put
111b2efa052STejun Heo  * @locked_q: request_queue the caller is holding queue_lock of (hint)
11242ec57a8STejun Heo  *
11342ec57a8STejun Heo  * Decrement reference count of @ioc and release it if the count reaches
114b2efa052STejun Heo  * zero.  If the caller is holding queue_lock of a queue, it can indicate
115b2efa052STejun Heo  * that with @locked_q.  This is an optimization hint and the caller is
116b2efa052STejun Heo  * allowed to pass in %NULL even when it's holding a queue_lock.
11786db1e29SJens Axboe  */
118b2efa052STejun Heo void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
11986db1e29SJens Axboe {
120b2efa052STejun Heo 	struct request_queue *last_q = locked_q;
121b2efa052STejun Heo 	unsigned long flags;
122b2efa052STejun Heo 
12386db1e29SJens Axboe 	if (ioc == NULL)
12442ec57a8STejun Heo 		return;
12586db1e29SJens Axboe 
12642ec57a8STejun Heo 	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
127b2efa052STejun Heo 	if (locked_q)
128b2efa052STejun Heo 		lockdep_assert_held(locked_q->queue_lock);
12986db1e29SJens Axboe 
13042ec57a8STejun Heo 	if (!atomic_long_dec_and_test(&ioc->refcount))
13142ec57a8STejun Heo 		return;
13242ec57a8STejun Heo 
133b2efa052STejun Heo 	/*
134b2efa052STejun Heo 	 * Destroy @ioc.  This is a bit messy because cic's are chained
135b2efa052STejun Heo 	 * from both ioc and queue, and ioc->lock nests inside queue_lock.
136b2efa052STejun Heo 	 * The inner ioc->lock should be held to walk our cic_list and then
137b2efa052STejun Heo 	 * for each cic the outer matching queue_lock should be grabbed.
138b2efa052STejun Heo 	 * ie. We need to do reverse-order double lock dancing.
139b2efa052STejun Heo 	 *
140b2efa052STejun Heo 	 * Another twist is that we are often called with one of the
141b2efa052STejun Heo 	 * matching queue_locks held as indicated by @locked_q, which
142b2efa052STejun Heo 	 * prevents performing double-lock dance for other queues.
143b2efa052STejun Heo 	 *
144b2efa052STejun Heo 	 * So, we do it in two stages.  The fast path uses the queue_lock
145b2efa052STejun Heo 	 * the caller is holding and, if other queues need to be accessed,
146b2efa052STejun Heo 	 * uses trylock to avoid introducing locking dependency.  This can
147b2efa052STejun Heo 	 * handle most cases, especially if @ioc was performing IO on only
148b2efa052STejun Heo 	 * single device.
149b2efa052STejun Heo 	 *
150b2efa052STejun Heo 	 * If trylock doesn't cut it, we defer to @ioc->release_work which
151b2efa052STejun Heo 	 * can do all the double-locking dancing.
152b2efa052STejun Heo 	 */
153b2efa052STejun Heo 	spin_lock_irqsave_nested(&ioc->lock, flags,
154b2efa052STejun Heo 				 ioc_release_depth(locked_q));
15586db1e29SJens Axboe 
156b2efa052STejun Heo 	while (!hlist_empty(&ioc->cic_list)) {
157b2efa052STejun Heo 		struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first,
158b2efa052STejun Heo 							 struct cfq_io_context,
159b2efa052STejun Heo 							 cic_list);
160b2efa052STejun Heo 		struct request_queue *this_q = cic->q;
161b2efa052STejun Heo 
162b2efa052STejun Heo 		if (this_q != last_q) {
163b2efa052STejun Heo 			if (last_q && last_q != locked_q)
164b2efa052STejun Heo 				spin_unlock(last_q->queue_lock);
165b2efa052STejun Heo 			last_q = NULL;
166b2efa052STejun Heo 
167b2efa052STejun Heo 			if (!spin_trylock(this_q->queue_lock))
168b2efa052STejun Heo 				break;
169b2efa052STejun Heo 			last_q = this_q;
170b2efa052STejun Heo 			continue;
171b2efa052STejun Heo 		}
172b2efa052STejun Heo 		ioc_release_depth_inc(this_q);
173b2efa052STejun Heo 		cic->exit(cic);
174b2efa052STejun Heo 		cic->release(cic);
175b2efa052STejun Heo 		ioc_release_depth_dec(this_q);
176b2efa052STejun Heo 	}
177b2efa052STejun Heo 
178b2efa052STejun Heo 	if (last_q && last_q != locked_q)
179b2efa052STejun Heo 		spin_unlock(last_q->queue_lock);
180b2efa052STejun Heo 
181b2efa052STejun Heo 	spin_unlock_irqrestore(&ioc->lock, flags);
182b2efa052STejun Heo 
183b2efa052STejun Heo 	/* if no cic's left, we're done; otherwise, kick release_work */
184b2efa052STejun Heo 	if (hlist_empty(&ioc->cic_list))
18586db1e29SJens Axboe 		kmem_cache_free(iocontext_cachep, ioc);
186b2efa052STejun Heo 	else
187b2efa052STejun Heo 		schedule_work(&ioc->release_work);
18886db1e29SJens Axboe }
18986db1e29SJens Axboe EXPORT_SYMBOL(put_io_context);
19086db1e29SJens Axboe 
19127667c99SBart Van Assche /* Called by the exiting task */
192b69f2292SLouis Rilling void exit_io_context(struct task_struct *task)
19386db1e29SJens Axboe {
19486db1e29SJens Axboe 	struct io_context *ioc;
19586db1e29SJens Axboe 
1966e736be7STejun Heo 	/* PF_EXITING prevents new io_context from being attached to @task */
1976e736be7STejun Heo 	WARN_ON_ONCE(!(current->flags & PF_EXITING));
1986e736be7STejun Heo 
199b69f2292SLouis Rilling 	task_lock(task);
200b69f2292SLouis Rilling 	ioc = task->io_context;
201b69f2292SLouis Rilling 	task->io_context = NULL;
202b69f2292SLouis Rilling 	task_unlock(task);
20386db1e29SJens Axboe 
204b2efa052STejun Heo 	atomic_dec(&ioc->nr_tasks);
205b2efa052STejun Heo 	put_io_context(ioc, NULL);
20686db1e29SJens Axboe }
20786db1e29SJens Axboe 
208f2dbd76aSTejun Heo void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
209f2dbd76aSTejun Heo 				int node)
21086db1e29SJens Axboe {
211df415656SPaul Bolle 	struct io_context *ioc;
21286db1e29SJens Axboe 
21342ec57a8STejun Heo 	ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
21442ec57a8STejun Heo 				    node);
21542ec57a8STejun Heo 	if (unlikely(!ioc))
216f2dbd76aSTejun Heo 		return;
21742ec57a8STejun Heo 
21842ec57a8STejun Heo 	/* initialize */
219df415656SPaul Bolle 	atomic_long_set(&ioc->refcount, 1);
220df415656SPaul Bolle 	atomic_set(&ioc->nr_tasks, 1);
221df415656SPaul Bolle 	spin_lock_init(&ioc->lock);
222df415656SPaul Bolle 	INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
223df415656SPaul Bolle 	INIT_HLIST_HEAD(&ioc->cic_list);
224b2efa052STejun Heo 	INIT_WORK(&ioc->release_work, ioc_release_fn);
22586db1e29SJens Axboe 
2266e736be7STejun Heo 	/* try to install, somebody might already have beaten us to it */
2276e736be7STejun Heo 	task_lock(task);
228f2dbd76aSTejun Heo 	if (!task->io_context && !(task->flags & PF_EXITING))
2296e736be7STejun Heo 		task->io_context = ioc;
230f2dbd76aSTejun Heo 	else
2316e736be7STejun Heo 		kmem_cache_free(iocontext_cachep, ioc);
2326e736be7STejun Heo 	task_unlock(task);
23386db1e29SJens Axboe }
234f2dbd76aSTejun Heo EXPORT_SYMBOL(create_io_context_slowpath);
23586db1e29SJens Axboe 
2366e736be7STejun Heo /**
2376e736be7STejun Heo  * get_task_io_context - get io_context of a task
2386e736be7STejun Heo  * @task: task of interest
2396e736be7STejun Heo  * @gfp_flags: allocation flags, used if allocation is necessary
2406e736be7STejun Heo  * @node: allocation node, used if allocation is necessary
24186db1e29SJens Axboe  *
2426e736be7STejun Heo  * Return io_context of @task.  If it doesn't exist, it is created with
2436e736be7STejun Heo  * @gfp_flags and @node.  The returned io_context has its reference count
2446e736be7STejun Heo  * incremented.
2456e736be7STejun Heo  *
2466e736be7STejun Heo  * This function always goes through task_lock() and it's better to use
247f2dbd76aSTejun Heo  * %current->io_context + get_io_context() for %current.
24886db1e29SJens Axboe  */
2496e736be7STejun Heo struct io_context *get_task_io_context(struct task_struct *task,
2506e736be7STejun Heo 				       gfp_t gfp_flags, int node)
25186db1e29SJens Axboe {
2526e736be7STejun Heo 	struct io_context *ioc;
25386db1e29SJens Axboe 
2546e736be7STejun Heo 	might_sleep_if(gfp_flags & __GFP_WAIT);
25586db1e29SJens Axboe 
256f2dbd76aSTejun Heo 	do {
2576e736be7STejun Heo 		task_lock(task);
2586e736be7STejun Heo 		ioc = task->io_context;
2596e736be7STejun Heo 		if (likely(ioc)) {
2606e736be7STejun Heo 			get_io_context(ioc);
2616e736be7STejun Heo 			task_unlock(task);
262df415656SPaul Bolle 			return ioc;
26386db1e29SJens Axboe 		}
2646e736be7STejun Heo 		task_unlock(task);
265f2dbd76aSTejun Heo 	} while (create_io_context(task, gfp_flags, node));
2666e736be7STejun Heo 
267f2dbd76aSTejun Heo 	return NULL;
2686e736be7STejun Heo }
2696e736be7STejun Heo EXPORT_SYMBOL(get_task_io_context);
27086db1e29SJens Axboe 
271dc86900eSTejun Heo void ioc_set_changed(struct io_context *ioc, int which)
272dc86900eSTejun Heo {
273dc86900eSTejun Heo 	struct cfq_io_context *cic;
274dc86900eSTejun Heo 	struct hlist_node *n;
275dc86900eSTejun Heo 
276dc86900eSTejun Heo 	hlist_for_each_entry(cic, n, &ioc->cic_list, cic_list)
277dc86900eSTejun Heo 		set_bit(which, &cic->changed);
278dc86900eSTejun Heo }
279dc86900eSTejun Heo 
280dc86900eSTejun Heo /**
281dc86900eSTejun Heo  * ioc_ioprio_changed - notify ioprio change
282dc86900eSTejun Heo  * @ioc: io_context of interest
283dc86900eSTejun Heo  * @ioprio: new ioprio
284dc86900eSTejun Heo  *
285dc86900eSTejun Heo  * @ioc's ioprio has changed to @ioprio.  Set %CIC_IOPRIO_CHANGED for all
286dc86900eSTejun Heo  * cic's.  iosched is responsible for checking the bit and applying it on
287dc86900eSTejun Heo  * request issue path.
288dc86900eSTejun Heo  */
289dc86900eSTejun Heo void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
290dc86900eSTejun Heo {
291dc86900eSTejun Heo 	unsigned long flags;
292dc86900eSTejun Heo 
293dc86900eSTejun Heo 	spin_lock_irqsave(&ioc->lock, flags);
294dc86900eSTejun Heo 	ioc->ioprio = ioprio;
295dc86900eSTejun Heo 	ioc_set_changed(ioc, CIC_IOPRIO_CHANGED);
296dc86900eSTejun Heo 	spin_unlock_irqrestore(&ioc->lock, flags);
297dc86900eSTejun Heo }
298dc86900eSTejun Heo 
299dc86900eSTejun Heo /**
300dc86900eSTejun Heo  * ioc_cgroup_changed - notify cgroup change
301dc86900eSTejun Heo  * @ioc: io_context of interest
302dc86900eSTejun Heo  *
303dc86900eSTejun Heo  * @ioc's cgroup has changed.  Set %CIC_CGROUP_CHANGED for all cic's.
304dc86900eSTejun Heo  * iosched is responsible for checking the bit and applying it on request
305dc86900eSTejun Heo  * issue path.
306dc86900eSTejun Heo  */
307dc86900eSTejun Heo void ioc_cgroup_changed(struct io_context *ioc)
308dc86900eSTejun Heo {
309dc86900eSTejun Heo 	unsigned long flags;
310dc86900eSTejun Heo 
311dc86900eSTejun Heo 	spin_lock_irqsave(&ioc->lock, flags);
312dc86900eSTejun Heo 	ioc_set_changed(ioc, CIC_CGROUP_CHANGED);
313dc86900eSTejun Heo 	spin_unlock_irqrestore(&ioc->lock, flags);
314dc86900eSTejun Heo }
315dc86900eSTejun Heo 
31613341598SAdrian Bunk static int __init blk_ioc_init(void)
31786db1e29SJens Axboe {
31886db1e29SJens Axboe 	iocontext_cachep = kmem_cache_create("blkdev_ioc",
31986db1e29SJens Axboe 			sizeof(struct io_context), 0, SLAB_PANIC, NULL);
32086db1e29SJens Axboe 	return 0;
32186db1e29SJens Axboe }
32286db1e29SJens Axboe subsys_initcall(blk_ioc_init);
323