xref: /openbmc/linux/block/blk-ioc.c (revision 64c42998)
186db1e29SJens Axboe /*
286db1e29SJens Axboe  * Functions related to io context handling
386db1e29SJens Axboe  */
486db1e29SJens Axboe #include <linux/kernel.h>
586db1e29SJens Axboe #include <linux/module.h>
686db1e29SJens Axboe #include <linux/init.h>
786db1e29SJens Axboe #include <linux/bio.h>
886db1e29SJens Axboe #include <linux/blkdev.h>
986db1e29SJens Axboe #include <linux/bootmem.h>	/* for max_pfn/max_low_pfn */
105a0e3ad6STejun Heo #include <linux/slab.h>
1186db1e29SJens Axboe 
1286db1e29SJens Axboe #include "blk.h"
1386db1e29SJens Axboe 
1486db1e29SJens Axboe /*
1586db1e29SJens Axboe  * For io context allocations
1686db1e29SJens Axboe  */
1786db1e29SJens Axboe static struct kmem_cache *iocontext_cachep;
1886db1e29SJens Axboe 
196e736be7STejun Heo /**
206e736be7STejun Heo  * get_io_context - increment reference count to io_context
216e736be7STejun Heo  * @ioc: io_context to get
226e736be7STejun Heo  *
236e736be7STejun Heo  * Increment reference count to @ioc.
246e736be7STejun Heo  */
256e736be7STejun Heo void get_io_context(struct io_context *ioc)
266e736be7STejun Heo {
276e736be7STejun Heo 	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
286e736be7STejun Heo 	atomic_long_inc(&ioc->refcount);
296e736be7STejun Heo }
306e736be7STejun Heo EXPORT_SYMBOL(get_io_context);
316e736be7STejun Heo 
32b2efa052STejun Heo /*
33b2efa052STejun Heo  * Releasing ioc may nest into another put_io_context() leading to nested
34b2efa052STejun Heo  * fast path release.  As the ioc's can't be the same, this is okay but
35b2efa052STejun Heo  * makes lockdep whine.  Keep track of nesting and use it as subclass.
36b2efa052STejun Heo  */
37b2efa052STejun Heo #ifdef CONFIG_LOCKDEP
38b2efa052STejun Heo #define ioc_release_depth(q)		((q) ? (q)->ioc_release_depth : 0)
39b2efa052STejun Heo #define ioc_release_depth_inc(q)	(q)->ioc_release_depth++
40b2efa052STejun Heo #define ioc_release_depth_dec(q)	(q)->ioc_release_depth--
41b2efa052STejun Heo #else
42b2efa052STejun Heo #define ioc_release_depth(q)		0
43b2efa052STejun Heo #define ioc_release_depth_inc(q)	do { } while (0)
44b2efa052STejun Heo #define ioc_release_depth_dec(q)	do { } while (0)
45b2efa052STejun Heo #endif
4686db1e29SJens Axboe 
477e5a8794STejun Heo static void icq_free_icq_rcu(struct rcu_head *head)
487e5a8794STejun Heo {
497e5a8794STejun Heo 	struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
507e5a8794STejun Heo 
517e5a8794STejun Heo 	kmem_cache_free(icq->__rcu_icq_cache, icq);
527e5a8794STejun Heo }
537e5a8794STejun Heo 
547e5a8794STejun Heo /*
557e5a8794STejun Heo  * Exit and free an icq.  Called with both ioc and q locked.
567e5a8794STejun Heo  */
577e5a8794STejun Heo static void ioc_exit_icq(struct io_cq *icq)
587e5a8794STejun Heo {
597e5a8794STejun Heo 	struct io_context *ioc = icq->ioc;
607e5a8794STejun Heo 	struct request_queue *q = icq->q;
617e5a8794STejun Heo 	struct elevator_type *et = q->elevator->type;
627e5a8794STejun Heo 
637e5a8794STejun Heo 	lockdep_assert_held(&ioc->lock);
647e5a8794STejun Heo 	lockdep_assert_held(q->queue_lock);
657e5a8794STejun Heo 
667e5a8794STejun Heo 	radix_tree_delete(&ioc->icq_tree, icq->q->id);
677e5a8794STejun Heo 	hlist_del_init(&icq->ioc_node);
687e5a8794STejun Heo 	list_del_init(&icq->q_node);
697e5a8794STejun Heo 
707e5a8794STejun Heo 	/*
717e5a8794STejun Heo 	 * Both setting lookup hint to and clearing it from @icq are done
727e5a8794STejun Heo 	 * under queue_lock.  If it's not pointing to @icq now, it never
737e5a8794STejun Heo 	 * will.  Hint assignment itself can race safely.
747e5a8794STejun Heo 	 */
757e5a8794STejun Heo 	if (rcu_dereference_raw(ioc->icq_hint) == icq)
767e5a8794STejun Heo 		rcu_assign_pointer(ioc->icq_hint, NULL);
777e5a8794STejun Heo 
787e5a8794STejun Heo 	if (et->ops.elevator_exit_icq_fn) {
797e5a8794STejun Heo 		ioc_release_depth_inc(q);
807e5a8794STejun Heo 		et->ops.elevator_exit_icq_fn(icq);
817e5a8794STejun Heo 		ioc_release_depth_dec(q);
827e5a8794STejun Heo 	}
837e5a8794STejun Heo 
847e5a8794STejun Heo 	/*
857e5a8794STejun Heo 	 * @icq->q might have gone away by the time RCU callback runs
867e5a8794STejun Heo 	 * making it impossible to determine icq_cache.  Record it in @icq.
877e5a8794STejun Heo 	 */
887e5a8794STejun Heo 	icq->__rcu_icq_cache = et->icq_cache;
897e5a8794STejun Heo 	call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
907e5a8794STejun Heo }
917e5a8794STejun Heo 
92b2efa052STejun Heo /*
93b2efa052STejun Heo  * Slow path for ioc release in put_io_context().  Performs double-lock
94c5869807STejun Heo  * dancing to unlink all icq's and then frees ioc.
95b2efa052STejun Heo  */
96b2efa052STejun Heo static void ioc_release_fn(struct work_struct *work)
97b2efa052STejun Heo {
98b2efa052STejun Heo 	struct io_context *ioc = container_of(work, struct io_context,
99b2efa052STejun Heo 					      release_work);
100b2efa052STejun Heo 	struct request_queue *last_q = NULL;
101b2efa052STejun Heo 
102b2efa052STejun Heo 	spin_lock_irq(&ioc->lock);
103b2efa052STejun Heo 
104c5869807STejun Heo 	while (!hlist_empty(&ioc->icq_list)) {
105c5869807STejun Heo 		struct io_cq *icq = hlist_entry(ioc->icq_list.first,
106c5869807STejun Heo 						struct io_cq, ioc_node);
107c5869807STejun Heo 		struct request_queue *this_q = icq->q;
108b2efa052STejun Heo 
109b2efa052STejun Heo 		if (this_q != last_q) {
110b2efa052STejun Heo 			/*
111b2efa052STejun Heo 			 * Need to switch to @this_q.  Once we release
112b2efa052STejun Heo 			 * @ioc->lock, it can go away along with @cic.
113b2efa052STejun Heo 			 * Hold on to it.
114b2efa052STejun Heo 			 */
115b2efa052STejun Heo 			__blk_get_queue(this_q);
116b2efa052STejun Heo 
117b2efa052STejun Heo 			/*
118b2efa052STejun Heo 			 * blk_put_queue() might sleep thanks to kobject
119b2efa052STejun Heo 			 * idiocy.  Always release both locks, put and
120b2efa052STejun Heo 			 * restart.
121b2efa052STejun Heo 			 */
122b2efa052STejun Heo 			if (last_q) {
123b2efa052STejun Heo 				spin_unlock(last_q->queue_lock);
124b2efa052STejun Heo 				spin_unlock_irq(&ioc->lock);
125b2efa052STejun Heo 				blk_put_queue(last_q);
126b2efa052STejun Heo 			} else {
127b2efa052STejun Heo 				spin_unlock_irq(&ioc->lock);
128ffc4e759SJens Axboe 			}
129b2efa052STejun Heo 
130b2efa052STejun Heo 			last_q = this_q;
131b2efa052STejun Heo 			spin_lock_irq(this_q->queue_lock);
132b2efa052STejun Heo 			spin_lock(&ioc->lock);
133b2efa052STejun Heo 			continue;
134b2efa052STejun Heo 		}
1357e5a8794STejun Heo 		ioc_exit_icq(icq);
136b2efa052STejun Heo 	}
137b2efa052STejun Heo 
138b2efa052STejun Heo 	if (last_q) {
139b2efa052STejun Heo 		spin_unlock(last_q->queue_lock);
140b2efa052STejun Heo 		spin_unlock_irq(&ioc->lock);
141b2efa052STejun Heo 		blk_put_queue(last_q);
142b2efa052STejun Heo 	} else {
143b2efa052STejun Heo 		spin_unlock_irq(&ioc->lock);
144b2efa052STejun Heo 	}
145b2efa052STejun Heo 
146b2efa052STejun Heo 	kmem_cache_free(iocontext_cachep, ioc);
14786db1e29SJens Axboe }
14886db1e29SJens Axboe 
14942ec57a8STejun Heo /**
15042ec57a8STejun Heo  * put_io_context - put a reference of io_context
15142ec57a8STejun Heo  * @ioc: io_context to put
152b2efa052STejun Heo  * @locked_q: request_queue the caller is holding queue_lock of (hint)
15342ec57a8STejun Heo  *
15442ec57a8STejun Heo  * Decrement reference count of @ioc and release it if the count reaches
155b2efa052STejun Heo  * zero.  If the caller is holding queue_lock of a queue, it can indicate
156b2efa052STejun Heo  * that with @locked_q.  This is an optimization hint and the caller is
157b2efa052STejun Heo  * allowed to pass in %NULL even when it's holding a queue_lock.
15886db1e29SJens Axboe  */
159b2efa052STejun Heo void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
16086db1e29SJens Axboe {
161b2efa052STejun Heo 	struct request_queue *last_q = locked_q;
162b2efa052STejun Heo 	unsigned long flags;
163b2efa052STejun Heo 
16486db1e29SJens Axboe 	if (ioc == NULL)
16542ec57a8STejun Heo 		return;
16686db1e29SJens Axboe 
16742ec57a8STejun Heo 	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
168b2efa052STejun Heo 	if (locked_q)
169b2efa052STejun Heo 		lockdep_assert_held(locked_q->queue_lock);
17086db1e29SJens Axboe 
17142ec57a8STejun Heo 	if (!atomic_long_dec_and_test(&ioc->refcount))
17242ec57a8STejun Heo 		return;
17342ec57a8STejun Heo 
174b2efa052STejun Heo 	/*
175c5869807STejun Heo 	 * Destroy @ioc.  This is a bit messy because icq's are chained
176b2efa052STejun Heo 	 * from both ioc and queue, and ioc->lock nests inside queue_lock.
177c5869807STejun Heo 	 * The inner ioc->lock should be held to walk our icq_list and then
178c5869807STejun Heo 	 * for each icq the outer matching queue_lock should be grabbed.
179b2efa052STejun Heo 	 * ie. We need to do reverse-order double lock dancing.
180b2efa052STejun Heo 	 *
181b2efa052STejun Heo 	 * Another twist is that we are often called with one of the
182b2efa052STejun Heo 	 * matching queue_locks held as indicated by @locked_q, which
183b2efa052STejun Heo 	 * prevents performing double-lock dance for other queues.
184b2efa052STejun Heo 	 *
185b2efa052STejun Heo 	 * So, we do it in two stages.  The fast path uses the queue_lock
186b2efa052STejun Heo 	 * the caller is holding and, if other queues need to be accessed,
187b2efa052STejun Heo 	 * uses trylock to avoid introducing locking dependency.  This can
188b2efa052STejun Heo 	 * handle most cases, especially if @ioc was performing IO on only
189b2efa052STejun Heo 	 * single device.
190b2efa052STejun Heo 	 *
191b2efa052STejun Heo 	 * If trylock doesn't cut it, we defer to @ioc->release_work which
192b2efa052STejun Heo 	 * can do all the double-locking dancing.
193b2efa052STejun Heo 	 */
194b2efa052STejun Heo 	spin_lock_irqsave_nested(&ioc->lock, flags,
195b2efa052STejun Heo 				 ioc_release_depth(locked_q));
19686db1e29SJens Axboe 
197c5869807STejun Heo 	while (!hlist_empty(&ioc->icq_list)) {
198c5869807STejun Heo 		struct io_cq *icq = hlist_entry(ioc->icq_list.first,
199c5869807STejun Heo 						struct io_cq, ioc_node);
200c5869807STejun Heo 		struct request_queue *this_q = icq->q;
201b2efa052STejun Heo 
202b2efa052STejun Heo 		if (this_q != last_q) {
203b2efa052STejun Heo 			if (last_q && last_q != locked_q)
204b2efa052STejun Heo 				spin_unlock(last_q->queue_lock);
205b2efa052STejun Heo 			last_q = NULL;
206b2efa052STejun Heo 
207b2efa052STejun Heo 			if (!spin_trylock(this_q->queue_lock))
208b2efa052STejun Heo 				break;
209b2efa052STejun Heo 			last_q = this_q;
210b2efa052STejun Heo 			continue;
211b2efa052STejun Heo 		}
2127e5a8794STejun Heo 		ioc_exit_icq(icq);
213b2efa052STejun Heo 	}
214b2efa052STejun Heo 
215b2efa052STejun Heo 	if (last_q && last_q != locked_q)
216b2efa052STejun Heo 		spin_unlock(last_q->queue_lock);
217b2efa052STejun Heo 
218b2efa052STejun Heo 	spin_unlock_irqrestore(&ioc->lock, flags);
219b2efa052STejun Heo 
220c5869807STejun Heo 	/* if no icq is left, we're done; otherwise, kick release_work */
221c5869807STejun Heo 	if (hlist_empty(&ioc->icq_list))
22286db1e29SJens Axboe 		kmem_cache_free(iocontext_cachep, ioc);
223b2efa052STejun Heo 	else
224b2efa052STejun Heo 		schedule_work(&ioc->release_work);
22586db1e29SJens Axboe }
22686db1e29SJens Axboe EXPORT_SYMBOL(put_io_context);
22786db1e29SJens Axboe 
22827667c99SBart Van Assche /* Called by the exiting task */
229b69f2292SLouis Rilling void exit_io_context(struct task_struct *task)
23086db1e29SJens Axboe {
23186db1e29SJens Axboe 	struct io_context *ioc;
23286db1e29SJens Axboe 
2336e736be7STejun Heo 	/* PF_EXITING prevents new io_context from being attached to @task */
2346e736be7STejun Heo 	WARN_ON_ONCE(!(current->flags & PF_EXITING));
2356e736be7STejun Heo 
236b69f2292SLouis Rilling 	task_lock(task);
237b69f2292SLouis Rilling 	ioc = task->io_context;
238b69f2292SLouis Rilling 	task->io_context = NULL;
239b69f2292SLouis Rilling 	task_unlock(task);
24086db1e29SJens Axboe 
241b2efa052STejun Heo 	atomic_dec(&ioc->nr_tasks);
242b2efa052STejun Heo 	put_io_context(ioc, NULL);
24386db1e29SJens Axboe }
24486db1e29SJens Axboe 
2457e5a8794STejun Heo /**
2467e5a8794STejun Heo  * ioc_clear_queue - break any ioc association with the specified queue
2477e5a8794STejun Heo  * @q: request_queue being cleared
2487e5a8794STejun Heo  *
2497e5a8794STejun Heo  * Walk @q->icq_list and exit all io_cq's.  Must be called with @q locked.
2507e5a8794STejun Heo  */
2517e5a8794STejun Heo void ioc_clear_queue(struct request_queue *q)
2527e5a8794STejun Heo {
2537e5a8794STejun Heo 	lockdep_assert_held(q->queue_lock);
2547e5a8794STejun Heo 
2557e5a8794STejun Heo 	while (!list_empty(&q->icq_list)) {
2567e5a8794STejun Heo 		struct io_cq *icq = list_entry(q->icq_list.next,
2577e5a8794STejun Heo 					       struct io_cq, q_node);
2587e5a8794STejun Heo 		struct io_context *ioc = icq->ioc;
2597e5a8794STejun Heo 
2607e5a8794STejun Heo 		spin_lock(&ioc->lock);
2617e5a8794STejun Heo 		ioc_exit_icq(icq);
2627e5a8794STejun Heo 		spin_unlock(&ioc->lock);
2637e5a8794STejun Heo 	}
2647e5a8794STejun Heo }
2657e5a8794STejun Heo 
266f2dbd76aSTejun Heo void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
267f2dbd76aSTejun Heo 				int node)
26886db1e29SJens Axboe {
269df415656SPaul Bolle 	struct io_context *ioc;
27086db1e29SJens Axboe 
27142ec57a8STejun Heo 	ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
27242ec57a8STejun Heo 				    node);
27342ec57a8STejun Heo 	if (unlikely(!ioc))
274f2dbd76aSTejun Heo 		return;
27542ec57a8STejun Heo 
27642ec57a8STejun Heo 	/* initialize */
277df415656SPaul Bolle 	atomic_long_set(&ioc->refcount, 1);
278df415656SPaul Bolle 	atomic_set(&ioc->nr_tasks, 1);
279df415656SPaul Bolle 	spin_lock_init(&ioc->lock);
280c5869807STejun Heo 	INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
281c5869807STejun Heo 	INIT_HLIST_HEAD(&ioc->icq_list);
282b2efa052STejun Heo 	INIT_WORK(&ioc->release_work, ioc_release_fn);
28386db1e29SJens Axboe 
2846e736be7STejun Heo 	/* try to install, somebody might already have beaten us to it */
2856e736be7STejun Heo 	task_lock(task);
286f2dbd76aSTejun Heo 	if (!task->io_context && !(task->flags & PF_EXITING))
2876e736be7STejun Heo 		task->io_context = ioc;
288f2dbd76aSTejun Heo 	else
2896e736be7STejun Heo 		kmem_cache_free(iocontext_cachep, ioc);
2906e736be7STejun Heo 	task_unlock(task);
29186db1e29SJens Axboe }
29286db1e29SJens Axboe 
2936e736be7STejun Heo /**
2946e736be7STejun Heo  * get_task_io_context - get io_context of a task
2956e736be7STejun Heo  * @task: task of interest
2966e736be7STejun Heo  * @gfp_flags: allocation flags, used if allocation is necessary
2976e736be7STejun Heo  * @node: allocation node, used if allocation is necessary
29886db1e29SJens Axboe  *
2996e736be7STejun Heo  * Return io_context of @task.  If it doesn't exist, it is created with
3006e736be7STejun Heo  * @gfp_flags and @node.  The returned io_context has its reference count
3016e736be7STejun Heo  * incremented.
3026e736be7STejun Heo  *
3036e736be7STejun Heo  * This function always goes through task_lock() and it's better to use
304f2dbd76aSTejun Heo  * %current->io_context + get_io_context() for %current.
30586db1e29SJens Axboe  */
3066e736be7STejun Heo struct io_context *get_task_io_context(struct task_struct *task,
3076e736be7STejun Heo 				       gfp_t gfp_flags, int node)
30886db1e29SJens Axboe {
3096e736be7STejun Heo 	struct io_context *ioc;
31086db1e29SJens Axboe 
3116e736be7STejun Heo 	might_sleep_if(gfp_flags & __GFP_WAIT);
31286db1e29SJens Axboe 
313f2dbd76aSTejun Heo 	do {
3146e736be7STejun Heo 		task_lock(task);
3156e736be7STejun Heo 		ioc = task->io_context;
3166e736be7STejun Heo 		if (likely(ioc)) {
3176e736be7STejun Heo 			get_io_context(ioc);
3186e736be7STejun Heo 			task_unlock(task);
319df415656SPaul Bolle 			return ioc;
32086db1e29SJens Axboe 		}
3216e736be7STejun Heo 		task_unlock(task);
322f2dbd76aSTejun Heo 	} while (create_io_context(task, gfp_flags, node));
3236e736be7STejun Heo 
324f2dbd76aSTejun Heo 	return NULL;
3256e736be7STejun Heo }
3266e736be7STejun Heo EXPORT_SYMBOL(get_task_io_context);
32786db1e29SJens Axboe 
32847fdd4caSTejun Heo /**
32947fdd4caSTejun Heo  * ioc_lookup_icq - lookup io_cq from ioc
33047fdd4caSTejun Heo  * @ioc: the associated io_context
33147fdd4caSTejun Heo  * @q: the associated request_queue
33247fdd4caSTejun Heo  *
33347fdd4caSTejun Heo  * Look up io_cq associated with @ioc - @q pair from @ioc.  Must be called
33447fdd4caSTejun Heo  * with @q->queue_lock held.
33547fdd4caSTejun Heo  */
33647fdd4caSTejun Heo struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q)
33747fdd4caSTejun Heo {
33847fdd4caSTejun Heo 	struct io_cq *icq;
33947fdd4caSTejun Heo 
34047fdd4caSTejun Heo 	lockdep_assert_held(q->queue_lock);
34147fdd4caSTejun Heo 
34247fdd4caSTejun Heo 	/*
34347fdd4caSTejun Heo 	 * icq's are indexed from @ioc using radix tree and hint pointer,
34447fdd4caSTejun Heo 	 * both of which are protected with RCU.  All removals are done
34547fdd4caSTejun Heo 	 * holding both q and ioc locks, and we're holding q lock - if we
34647fdd4caSTejun Heo 	 * find a icq which points to us, it's guaranteed to be valid.
34747fdd4caSTejun Heo 	 */
34847fdd4caSTejun Heo 	rcu_read_lock();
34947fdd4caSTejun Heo 	icq = rcu_dereference(ioc->icq_hint);
35047fdd4caSTejun Heo 	if (icq && icq->q == q)
35147fdd4caSTejun Heo 		goto out;
35247fdd4caSTejun Heo 
35347fdd4caSTejun Heo 	icq = radix_tree_lookup(&ioc->icq_tree, q->id);
35447fdd4caSTejun Heo 	if (icq && icq->q == q)
35547fdd4caSTejun Heo 		rcu_assign_pointer(ioc->icq_hint, icq);	/* allowed to race */
35647fdd4caSTejun Heo 	else
35747fdd4caSTejun Heo 		icq = NULL;
35847fdd4caSTejun Heo out:
35947fdd4caSTejun Heo 	rcu_read_unlock();
36047fdd4caSTejun Heo 	return icq;
36147fdd4caSTejun Heo }
36247fdd4caSTejun Heo EXPORT_SYMBOL(ioc_lookup_icq);
36347fdd4caSTejun Heo 
364f1f8cc94STejun Heo /**
365f1f8cc94STejun Heo  * ioc_create_icq - create and link io_cq
366f1f8cc94STejun Heo  * @q: request_queue of interest
367f1f8cc94STejun Heo  * @gfp_mask: allocation mask
368f1f8cc94STejun Heo  *
369f1f8cc94STejun Heo  * Make sure io_cq linking %current->io_context and @q exists.  If either
370f1f8cc94STejun Heo  * io_context and/or icq don't exist, they will be created using @gfp_mask.
371f1f8cc94STejun Heo  *
372f1f8cc94STejun Heo  * The caller is responsible for ensuring @ioc won't go away and @q is
373f1f8cc94STejun Heo  * alive and will stay alive until this function returns.
374f1f8cc94STejun Heo  */
375f1f8cc94STejun Heo struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
376f1f8cc94STejun Heo {
377f1f8cc94STejun Heo 	struct elevator_type *et = q->elevator->type;
378f1f8cc94STejun Heo 	struct io_context *ioc;
379f1f8cc94STejun Heo 	struct io_cq *icq;
380f1f8cc94STejun Heo 
381f1f8cc94STejun Heo 	/* allocate stuff */
382f1f8cc94STejun Heo 	ioc = create_io_context(current, gfp_mask, q->node);
383f1f8cc94STejun Heo 	if (!ioc)
384f1f8cc94STejun Heo 		return NULL;
385f1f8cc94STejun Heo 
386f1f8cc94STejun Heo 	icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
387f1f8cc94STejun Heo 				    q->node);
388f1f8cc94STejun Heo 	if (!icq)
389f1f8cc94STejun Heo 		return NULL;
390f1f8cc94STejun Heo 
391f1f8cc94STejun Heo 	if (radix_tree_preload(gfp_mask) < 0) {
392f1f8cc94STejun Heo 		kmem_cache_free(et->icq_cache, icq);
393f1f8cc94STejun Heo 		return NULL;
394f1f8cc94STejun Heo 	}
395f1f8cc94STejun Heo 
396f1f8cc94STejun Heo 	icq->ioc = ioc;
397f1f8cc94STejun Heo 	icq->q = q;
398f1f8cc94STejun Heo 	INIT_LIST_HEAD(&icq->q_node);
399f1f8cc94STejun Heo 	INIT_HLIST_NODE(&icq->ioc_node);
400f1f8cc94STejun Heo 
401f1f8cc94STejun Heo 	/* lock both q and ioc and try to link @icq */
402f1f8cc94STejun Heo 	spin_lock_irq(q->queue_lock);
403f1f8cc94STejun Heo 	spin_lock(&ioc->lock);
404f1f8cc94STejun Heo 
405f1f8cc94STejun Heo 	if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
406f1f8cc94STejun Heo 		hlist_add_head(&icq->ioc_node, &ioc->icq_list);
407f1f8cc94STejun Heo 		list_add(&icq->q_node, &q->icq_list);
408f1f8cc94STejun Heo 		if (et->ops.elevator_init_icq_fn)
409f1f8cc94STejun Heo 			et->ops.elevator_init_icq_fn(icq);
410f1f8cc94STejun Heo 	} else {
411f1f8cc94STejun Heo 		kmem_cache_free(et->icq_cache, icq);
412f1f8cc94STejun Heo 		icq = ioc_lookup_icq(ioc, q);
413f1f8cc94STejun Heo 		if (!icq)
414f1f8cc94STejun Heo 			printk(KERN_ERR "cfq: icq link failed!\n");
415f1f8cc94STejun Heo 	}
416f1f8cc94STejun Heo 
417f1f8cc94STejun Heo 	spin_unlock(&ioc->lock);
418f1f8cc94STejun Heo 	spin_unlock_irq(q->queue_lock);
419f1f8cc94STejun Heo 	radix_tree_preload_end();
420f1f8cc94STejun Heo 	return icq;
421f1f8cc94STejun Heo }
422f1f8cc94STejun Heo 
423dc86900eSTejun Heo void ioc_set_changed(struct io_context *ioc, int which)
424dc86900eSTejun Heo {
425c5869807STejun Heo 	struct io_cq *icq;
426dc86900eSTejun Heo 	struct hlist_node *n;
427dc86900eSTejun Heo 
428c5869807STejun Heo 	hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node)
429c5869807STejun Heo 		set_bit(which, &icq->changed);
430dc86900eSTejun Heo }
431dc86900eSTejun Heo 
432dc86900eSTejun Heo /**
433dc86900eSTejun Heo  * ioc_ioprio_changed - notify ioprio change
434dc86900eSTejun Heo  * @ioc: io_context of interest
435dc86900eSTejun Heo  * @ioprio: new ioprio
436dc86900eSTejun Heo  *
437c5869807STejun Heo  * @ioc's ioprio has changed to @ioprio.  Set %ICQ_IOPRIO_CHANGED for all
438c5869807STejun Heo  * icq's.  iosched is responsible for checking the bit and applying it on
439dc86900eSTejun Heo  * request issue path.
440dc86900eSTejun Heo  */
441dc86900eSTejun Heo void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
442dc86900eSTejun Heo {
443dc86900eSTejun Heo 	unsigned long flags;
444dc86900eSTejun Heo 
445dc86900eSTejun Heo 	spin_lock_irqsave(&ioc->lock, flags);
446dc86900eSTejun Heo 	ioc->ioprio = ioprio;
447c5869807STejun Heo 	ioc_set_changed(ioc, ICQ_IOPRIO_CHANGED);
448dc86900eSTejun Heo 	spin_unlock_irqrestore(&ioc->lock, flags);
449dc86900eSTejun Heo }
450dc86900eSTejun Heo 
451dc86900eSTejun Heo /**
452dc86900eSTejun Heo  * ioc_cgroup_changed - notify cgroup change
453dc86900eSTejun Heo  * @ioc: io_context of interest
454dc86900eSTejun Heo  *
455c5869807STejun Heo  * @ioc's cgroup has changed.  Set %ICQ_CGROUP_CHANGED for all icq's.
456dc86900eSTejun Heo  * iosched is responsible for checking the bit and applying it on request
457dc86900eSTejun Heo  * issue path.
458dc86900eSTejun Heo  */
459dc86900eSTejun Heo void ioc_cgroup_changed(struct io_context *ioc)
460dc86900eSTejun Heo {
461dc86900eSTejun Heo 	unsigned long flags;
462dc86900eSTejun Heo 
463dc86900eSTejun Heo 	spin_lock_irqsave(&ioc->lock, flags);
464c5869807STejun Heo 	ioc_set_changed(ioc, ICQ_CGROUP_CHANGED);
465dc86900eSTejun Heo 	spin_unlock_irqrestore(&ioc->lock, flags);
466dc86900eSTejun Heo }
46764c42998SJens Axboe EXPORT_SYMBOL(ioc_cgroup_changed);
468dc86900eSTejun Heo 
46913341598SAdrian Bunk static int __init blk_ioc_init(void)
47086db1e29SJens Axboe {
47186db1e29SJens Axboe 	iocontext_cachep = kmem_cache_create("blkdev_ioc",
47286db1e29SJens Axboe 			sizeof(struct io_context), 0, SLAB_PANIC, NULL);
47386db1e29SJens Axboe 	return 0;
47486db1e29SJens Axboe }
47586db1e29SJens Axboe subsys_initcall(blk_ioc_init);
476