xref: /openbmc/linux/block/blk-ioc.c (revision fd638368)
186db1e29SJens Axboe /*
286db1e29SJens Axboe  * Functions related to io context handling
386db1e29SJens Axboe  */
486db1e29SJens Axboe #include <linux/kernel.h>
586db1e29SJens Axboe #include <linux/module.h>
686db1e29SJens Axboe #include <linux/init.h>
786db1e29SJens Axboe #include <linux/bio.h>
886db1e29SJens Axboe #include <linux/blkdev.h>
986db1e29SJens Axboe #include <linux/bootmem.h>	/* for max_pfn/max_low_pfn */
105a0e3ad6STejun Heo #include <linux/slab.h>
1186db1e29SJens Axboe 
1286db1e29SJens Axboe #include "blk.h"
1386db1e29SJens Axboe 
1486db1e29SJens Axboe /*
1586db1e29SJens Axboe  * For io context allocations
1686db1e29SJens Axboe  */
1786db1e29SJens Axboe static struct kmem_cache *iocontext_cachep;
1886db1e29SJens Axboe 
196e736be7STejun Heo /**
206e736be7STejun Heo  * get_io_context - increment reference count to io_context
216e736be7STejun Heo  * @ioc: io_context to get
226e736be7STejun Heo  *
236e736be7STejun Heo  * Increment reference count to @ioc.
246e736be7STejun Heo  */
256e736be7STejun Heo void get_io_context(struct io_context *ioc)
266e736be7STejun Heo {
276e736be7STejun Heo 	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
286e736be7STejun Heo 	atomic_long_inc(&ioc->refcount);
296e736be7STejun Heo }
306e736be7STejun Heo EXPORT_SYMBOL(get_io_context);
316e736be7STejun Heo 
32b2efa052STejun Heo /*
33b2efa052STejun Heo  * Releasing ioc may nest into another put_io_context() leading to nested
34b2efa052STejun Heo  * fast path release.  As the ioc's can't be the same, this is okay but
35b2efa052STejun Heo  * makes lockdep whine.  Keep track of nesting and use it as subclass.
36b2efa052STejun Heo  */
37b2efa052STejun Heo #ifdef CONFIG_LOCKDEP
38b2efa052STejun Heo #define ioc_release_depth(q)		((q) ? (q)->ioc_release_depth : 0)
39b2efa052STejun Heo #define ioc_release_depth_inc(q)	(q)->ioc_release_depth++
40b2efa052STejun Heo #define ioc_release_depth_dec(q)	(q)->ioc_release_depth--
41b2efa052STejun Heo #else
42b2efa052STejun Heo #define ioc_release_depth(q)		0
43b2efa052STejun Heo #define ioc_release_depth_inc(q)	do { } while (0)
44b2efa052STejun Heo #define ioc_release_depth_dec(q)	do { } while (0)
45b2efa052STejun Heo #endif
4686db1e29SJens Axboe 
477e5a8794STejun Heo static void icq_free_icq_rcu(struct rcu_head *head)
487e5a8794STejun Heo {
497e5a8794STejun Heo 	struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
507e5a8794STejun Heo 
517e5a8794STejun Heo 	kmem_cache_free(icq->__rcu_icq_cache, icq);
527e5a8794STejun Heo }
537e5a8794STejun Heo 
547e5a8794STejun Heo /*
557e5a8794STejun Heo  * Exit and free an icq.  Called with both ioc and q locked.
567e5a8794STejun Heo  */
577e5a8794STejun Heo static void ioc_exit_icq(struct io_cq *icq)
587e5a8794STejun Heo {
597e5a8794STejun Heo 	struct io_context *ioc = icq->ioc;
607e5a8794STejun Heo 	struct request_queue *q = icq->q;
617e5a8794STejun Heo 	struct elevator_type *et = q->elevator->type;
627e5a8794STejun Heo 
637e5a8794STejun Heo 	lockdep_assert_held(&ioc->lock);
647e5a8794STejun Heo 	lockdep_assert_held(q->queue_lock);
657e5a8794STejun Heo 
667e5a8794STejun Heo 	radix_tree_delete(&ioc->icq_tree, icq->q->id);
677e5a8794STejun Heo 	hlist_del_init(&icq->ioc_node);
687e5a8794STejun Heo 	list_del_init(&icq->q_node);
697e5a8794STejun Heo 
707e5a8794STejun Heo 	/*
717e5a8794STejun Heo 	 * Both setting lookup hint to and clearing it from @icq are done
727e5a8794STejun Heo 	 * under queue_lock.  If it's not pointing to @icq now, it never
737e5a8794STejun Heo 	 * will.  Hint assignment itself can race safely.
747e5a8794STejun Heo 	 */
757e5a8794STejun Heo 	if (rcu_dereference_raw(ioc->icq_hint) == icq)
767e5a8794STejun Heo 		rcu_assign_pointer(ioc->icq_hint, NULL);
777e5a8794STejun Heo 
787e5a8794STejun Heo 	if (et->ops.elevator_exit_icq_fn) {
797e5a8794STejun Heo 		ioc_release_depth_inc(q);
807e5a8794STejun Heo 		et->ops.elevator_exit_icq_fn(icq);
817e5a8794STejun Heo 		ioc_release_depth_dec(q);
827e5a8794STejun Heo 	}
837e5a8794STejun Heo 
847e5a8794STejun Heo 	/*
857e5a8794STejun Heo 	 * @icq->q might have gone away by the time RCU callback runs
867e5a8794STejun Heo 	 * making it impossible to determine icq_cache.  Record it in @icq.
877e5a8794STejun Heo 	 */
887e5a8794STejun Heo 	icq->__rcu_icq_cache = et->icq_cache;
897e5a8794STejun Heo 	call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
907e5a8794STejun Heo }
917e5a8794STejun Heo 
92b2efa052STejun Heo /*
93b2efa052STejun Heo  * Slow path for ioc release in put_io_context().  Performs double-lock
94c5869807STejun Heo  * dancing to unlink all icq's and then frees ioc.
95b2efa052STejun Heo  */
96b2efa052STejun Heo static void ioc_release_fn(struct work_struct *work)
97b2efa052STejun Heo {
98b2efa052STejun Heo 	struct io_context *ioc = container_of(work, struct io_context,
99b2efa052STejun Heo 					      release_work);
100b2efa052STejun Heo 	struct request_queue *last_q = NULL;
101b2efa052STejun Heo 
102b2efa052STejun Heo 	spin_lock_irq(&ioc->lock);
103b2efa052STejun Heo 
104c5869807STejun Heo 	while (!hlist_empty(&ioc->icq_list)) {
105c5869807STejun Heo 		struct io_cq *icq = hlist_entry(ioc->icq_list.first,
106c5869807STejun Heo 						struct io_cq, ioc_node);
107c5869807STejun Heo 		struct request_queue *this_q = icq->q;
108b2efa052STejun Heo 
109b2efa052STejun Heo 		if (this_q != last_q) {
110b2efa052STejun Heo 			/*
111b2efa052STejun Heo 			 * Need to switch to @this_q.  Once we release
112b2efa052STejun Heo 			 * @ioc->lock, it can go away along with @cic.
113b2efa052STejun Heo 			 * Hold on to it.
114b2efa052STejun Heo 			 */
115b2efa052STejun Heo 			__blk_get_queue(this_q);
116b2efa052STejun Heo 
117b2efa052STejun Heo 			/*
118b2efa052STejun Heo 			 * blk_put_queue() might sleep thanks to kobject
119b2efa052STejun Heo 			 * idiocy.  Always release both locks, put and
120b2efa052STejun Heo 			 * restart.
121b2efa052STejun Heo 			 */
122b2efa052STejun Heo 			if (last_q) {
123b2efa052STejun Heo 				spin_unlock(last_q->queue_lock);
124b2efa052STejun Heo 				spin_unlock_irq(&ioc->lock);
125b2efa052STejun Heo 				blk_put_queue(last_q);
126b2efa052STejun Heo 			} else {
127b2efa052STejun Heo 				spin_unlock_irq(&ioc->lock);
128ffc4e759SJens Axboe 			}
129b2efa052STejun Heo 
130b2efa052STejun Heo 			last_q = this_q;
131b2efa052STejun Heo 			spin_lock_irq(this_q->queue_lock);
132b2efa052STejun Heo 			spin_lock(&ioc->lock);
133b2efa052STejun Heo 			continue;
134b2efa052STejun Heo 		}
1357e5a8794STejun Heo 		ioc_exit_icq(icq);
136b2efa052STejun Heo 	}
137b2efa052STejun Heo 
138b2efa052STejun Heo 	if (last_q) {
139b2efa052STejun Heo 		spin_unlock(last_q->queue_lock);
140b2efa052STejun Heo 		spin_unlock_irq(&ioc->lock);
141b2efa052STejun Heo 		blk_put_queue(last_q);
142b2efa052STejun Heo 	} else {
143b2efa052STejun Heo 		spin_unlock_irq(&ioc->lock);
144b2efa052STejun Heo 	}
145b2efa052STejun Heo 
146b2efa052STejun Heo 	kmem_cache_free(iocontext_cachep, ioc);
14786db1e29SJens Axboe }
14886db1e29SJens Axboe 
14942ec57a8STejun Heo /**
15042ec57a8STejun Heo  * put_io_context - put a reference of io_context
15142ec57a8STejun Heo  * @ioc: io_context to put
152b2efa052STejun Heo  * @locked_q: request_queue the caller is holding queue_lock of (hint)
15342ec57a8STejun Heo  *
15442ec57a8STejun Heo  * Decrement reference count of @ioc and release it if the count reaches
155b2efa052STejun Heo  * zero.  If the caller is holding queue_lock of a queue, it can indicate
156b2efa052STejun Heo  * that with @locked_q.  This is an optimization hint and the caller is
157b2efa052STejun Heo  * allowed to pass in %NULL even when it's holding a queue_lock.
15886db1e29SJens Axboe  */
159b2efa052STejun Heo void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
16086db1e29SJens Axboe {
161b2efa052STejun Heo 	struct request_queue *last_q = locked_q;
162b2efa052STejun Heo 	unsigned long flags;
163b2efa052STejun Heo 
16486db1e29SJens Axboe 	if (ioc == NULL)
16542ec57a8STejun Heo 		return;
16686db1e29SJens Axboe 
16742ec57a8STejun Heo 	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
168b2efa052STejun Heo 	if (locked_q)
169b2efa052STejun Heo 		lockdep_assert_held(locked_q->queue_lock);
17086db1e29SJens Axboe 
17142ec57a8STejun Heo 	if (!atomic_long_dec_and_test(&ioc->refcount))
17242ec57a8STejun Heo 		return;
17342ec57a8STejun Heo 
174b2efa052STejun Heo 	/*
175c5869807STejun Heo 	 * Destroy @ioc.  This is a bit messy because icq's are chained
176b2efa052STejun Heo 	 * from both ioc and queue, and ioc->lock nests inside queue_lock.
177c5869807STejun Heo 	 * The inner ioc->lock should be held to walk our icq_list and then
178c5869807STejun Heo 	 * for each icq the outer matching queue_lock should be grabbed.
179b2efa052STejun Heo 	 * ie. We need to do reverse-order double lock dancing.
180b2efa052STejun Heo 	 *
181b2efa052STejun Heo 	 * Another twist is that we are often called with one of the
182b2efa052STejun Heo 	 * matching queue_locks held as indicated by @locked_q, which
183b2efa052STejun Heo 	 * prevents performing double-lock dance for other queues.
184b2efa052STejun Heo 	 *
185b2efa052STejun Heo 	 * So, we do it in two stages.  The fast path uses the queue_lock
186b2efa052STejun Heo 	 * the caller is holding and, if other queues need to be accessed,
187b2efa052STejun Heo 	 * uses trylock to avoid introducing locking dependency.  This can
188b2efa052STejun Heo 	 * handle most cases, especially if @ioc was performing IO on only
189b2efa052STejun Heo 	 * single device.
190b2efa052STejun Heo 	 *
191b2efa052STejun Heo 	 * If trylock doesn't cut it, we defer to @ioc->release_work which
192b2efa052STejun Heo 	 * can do all the double-locking dancing.
193b2efa052STejun Heo 	 */
194b2efa052STejun Heo 	spin_lock_irqsave_nested(&ioc->lock, flags,
195b2efa052STejun Heo 				 ioc_release_depth(locked_q));
19686db1e29SJens Axboe 
197c5869807STejun Heo 	while (!hlist_empty(&ioc->icq_list)) {
198c5869807STejun Heo 		struct io_cq *icq = hlist_entry(ioc->icq_list.first,
199c5869807STejun Heo 						struct io_cq, ioc_node);
200c5869807STejun Heo 		struct request_queue *this_q = icq->q;
201b2efa052STejun Heo 
202b2efa052STejun Heo 		if (this_q != last_q) {
203b2efa052STejun Heo 			if (last_q && last_q != locked_q)
204b2efa052STejun Heo 				spin_unlock(last_q->queue_lock);
205b2efa052STejun Heo 			last_q = NULL;
206b2efa052STejun Heo 
207b2efa052STejun Heo 			if (!spin_trylock(this_q->queue_lock))
208b2efa052STejun Heo 				break;
209b2efa052STejun Heo 			last_q = this_q;
210b2efa052STejun Heo 			continue;
211b2efa052STejun Heo 		}
2127e5a8794STejun Heo 		ioc_exit_icq(icq);
213b2efa052STejun Heo 	}
214b2efa052STejun Heo 
215b2efa052STejun Heo 	if (last_q && last_q != locked_q)
216b2efa052STejun Heo 		spin_unlock(last_q->queue_lock);
217b2efa052STejun Heo 
218b2efa052STejun Heo 	spin_unlock_irqrestore(&ioc->lock, flags);
219b2efa052STejun Heo 
220c5869807STejun Heo 	/* if no icq is left, we're done; otherwise, kick release_work */
221c5869807STejun Heo 	if (hlist_empty(&ioc->icq_list))
22286db1e29SJens Axboe 		kmem_cache_free(iocontext_cachep, ioc);
223b2efa052STejun Heo 	else
224b2efa052STejun Heo 		schedule_work(&ioc->release_work);
22586db1e29SJens Axboe }
22686db1e29SJens Axboe EXPORT_SYMBOL(put_io_context);
22786db1e29SJens Axboe 
22827667c99SBart Van Assche /* Called by the exiting task */
229b69f2292SLouis Rilling void exit_io_context(struct task_struct *task)
23086db1e29SJens Axboe {
23186db1e29SJens Axboe 	struct io_context *ioc;
23286db1e29SJens Axboe 
2336e736be7STejun Heo 	/* PF_EXITING prevents new io_context from being attached to @task */
2346e736be7STejun Heo 	WARN_ON_ONCE(!(current->flags & PF_EXITING));
2356e736be7STejun Heo 
236b69f2292SLouis Rilling 	task_lock(task);
237b69f2292SLouis Rilling 	ioc = task->io_context;
238b69f2292SLouis Rilling 	task->io_context = NULL;
239b69f2292SLouis Rilling 	task_unlock(task);
24086db1e29SJens Axboe 
241b2efa052STejun Heo 	atomic_dec(&ioc->nr_tasks);
242b2efa052STejun Heo 	put_io_context(ioc, NULL);
24386db1e29SJens Axboe }
24486db1e29SJens Axboe 
2457e5a8794STejun Heo /**
2467e5a8794STejun Heo  * ioc_clear_queue - break any ioc association with the specified queue
2477e5a8794STejun Heo  * @q: request_queue being cleared
2487e5a8794STejun Heo  *
2497e5a8794STejun Heo  * Walk @q->icq_list and exit all io_cq's.  Must be called with @q locked.
2507e5a8794STejun Heo  */
2517e5a8794STejun Heo void ioc_clear_queue(struct request_queue *q)
2527e5a8794STejun Heo {
2537e5a8794STejun Heo 	lockdep_assert_held(q->queue_lock);
2547e5a8794STejun Heo 
2557e5a8794STejun Heo 	while (!list_empty(&q->icq_list)) {
2567e5a8794STejun Heo 		struct io_cq *icq = list_entry(q->icq_list.next,
2577e5a8794STejun Heo 					       struct io_cq, q_node);
2587e5a8794STejun Heo 		struct io_context *ioc = icq->ioc;
2597e5a8794STejun Heo 
2607e5a8794STejun Heo 		spin_lock(&ioc->lock);
2617e5a8794STejun Heo 		ioc_exit_icq(icq);
2627e5a8794STejun Heo 		spin_unlock(&ioc->lock);
2637e5a8794STejun Heo 	}
2647e5a8794STejun Heo }
2657e5a8794STejun Heo 
266f2dbd76aSTejun Heo void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
267f2dbd76aSTejun Heo 				int node)
26886db1e29SJens Axboe {
269df415656SPaul Bolle 	struct io_context *ioc;
27086db1e29SJens Axboe 
27142ec57a8STejun Heo 	ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
27242ec57a8STejun Heo 				    node);
27342ec57a8STejun Heo 	if (unlikely(!ioc))
274f2dbd76aSTejun Heo 		return;
27542ec57a8STejun Heo 
27642ec57a8STejun Heo 	/* initialize */
277df415656SPaul Bolle 	atomic_long_set(&ioc->refcount, 1);
278df415656SPaul Bolle 	atomic_set(&ioc->nr_tasks, 1);
279df415656SPaul Bolle 	spin_lock_init(&ioc->lock);
280c5869807STejun Heo 	INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
281c5869807STejun Heo 	INIT_HLIST_HEAD(&ioc->icq_list);
282b2efa052STejun Heo 	INIT_WORK(&ioc->release_work, ioc_release_fn);
28386db1e29SJens Axboe 
284fd638368STejun Heo 	/*
285fd638368STejun Heo 	 * Try to install.  ioc shouldn't be installed if someone else
286fd638368STejun Heo 	 * already did or @task, which isn't %current, is exiting.  Note
287fd638368STejun Heo 	 * that we need to allow ioc creation on exiting %current as exit
288fd638368STejun Heo 	 * path may issue IOs from e.g. exit_files().  The exit path is
289fd638368STejun Heo 	 * responsible for not issuing IO after exit_io_context().
290fd638368STejun Heo 	 */
2916e736be7STejun Heo 	task_lock(task);
292fd638368STejun Heo 	if (!task->io_context &&
293fd638368STejun Heo 	    (task == current || !(task->flags & PF_EXITING)))
2946e736be7STejun Heo 		task->io_context = ioc;
295f2dbd76aSTejun Heo 	else
2966e736be7STejun Heo 		kmem_cache_free(iocontext_cachep, ioc);
2976e736be7STejun Heo 	task_unlock(task);
29886db1e29SJens Axboe }
29986db1e29SJens Axboe 
3006e736be7STejun Heo /**
3016e736be7STejun Heo  * get_task_io_context - get io_context of a task
3026e736be7STejun Heo  * @task: task of interest
3036e736be7STejun Heo  * @gfp_flags: allocation flags, used if allocation is necessary
3046e736be7STejun Heo  * @node: allocation node, used if allocation is necessary
30586db1e29SJens Axboe  *
3066e736be7STejun Heo  * Return io_context of @task.  If it doesn't exist, it is created with
3076e736be7STejun Heo  * @gfp_flags and @node.  The returned io_context has its reference count
3086e736be7STejun Heo  * incremented.
3096e736be7STejun Heo  *
3106e736be7STejun Heo  * This function always goes through task_lock() and it's better to use
311f2dbd76aSTejun Heo  * %current->io_context + get_io_context() for %current.
31286db1e29SJens Axboe  */
3136e736be7STejun Heo struct io_context *get_task_io_context(struct task_struct *task,
3146e736be7STejun Heo 				       gfp_t gfp_flags, int node)
31586db1e29SJens Axboe {
3166e736be7STejun Heo 	struct io_context *ioc;
31786db1e29SJens Axboe 
3186e736be7STejun Heo 	might_sleep_if(gfp_flags & __GFP_WAIT);
31986db1e29SJens Axboe 
320f2dbd76aSTejun Heo 	do {
3216e736be7STejun Heo 		task_lock(task);
3226e736be7STejun Heo 		ioc = task->io_context;
3236e736be7STejun Heo 		if (likely(ioc)) {
3246e736be7STejun Heo 			get_io_context(ioc);
3256e736be7STejun Heo 			task_unlock(task);
326df415656SPaul Bolle 			return ioc;
32786db1e29SJens Axboe 		}
3286e736be7STejun Heo 		task_unlock(task);
329f2dbd76aSTejun Heo 	} while (create_io_context(task, gfp_flags, node));
3306e736be7STejun Heo 
331f2dbd76aSTejun Heo 	return NULL;
3326e736be7STejun Heo }
3336e736be7STejun Heo EXPORT_SYMBOL(get_task_io_context);
33486db1e29SJens Axboe 
33547fdd4caSTejun Heo /**
33647fdd4caSTejun Heo  * ioc_lookup_icq - lookup io_cq from ioc
33747fdd4caSTejun Heo  * @ioc: the associated io_context
33847fdd4caSTejun Heo  * @q: the associated request_queue
33947fdd4caSTejun Heo  *
34047fdd4caSTejun Heo  * Look up io_cq associated with @ioc - @q pair from @ioc.  Must be called
34147fdd4caSTejun Heo  * with @q->queue_lock held.
34247fdd4caSTejun Heo  */
34347fdd4caSTejun Heo struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q)
34447fdd4caSTejun Heo {
34547fdd4caSTejun Heo 	struct io_cq *icq;
34647fdd4caSTejun Heo 
34747fdd4caSTejun Heo 	lockdep_assert_held(q->queue_lock);
34847fdd4caSTejun Heo 
34947fdd4caSTejun Heo 	/*
35047fdd4caSTejun Heo 	 * icq's are indexed from @ioc using radix tree and hint pointer,
35147fdd4caSTejun Heo 	 * both of which are protected with RCU.  All removals are done
35247fdd4caSTejun Heo 	 * holding both q and ioc locks, and we're holding q lock - if we
35347fdd4caSTejun Heo 	 * find a icq which points to us, it's guaranteed to be valid.
35447fdd4caSTejun Heo 	 */
35547fdd4caSTejun Heo 	rcu_read_lock();
35647fdd4caSTejun Heo 	icq = rcu_dereference(ioc->icq_hint);
35747fdd4caSTejun Heo 	if (icq && icq->q == q)
35847fdd4caSTejun Heo 		goto out;
35947fdd4caSTejun Heo 
36047fdd4caSTejun Heo 	icq = radix_tree_lookup(&ioc->icq_tree, q->id);
36147fdd4caSTejun Heo 	if (icq && icq->q == q)
36247fdd4caSTejun Heo 		rcu_assign_pointer(ioc->icq_hint, icq);	/* allowed to race */
36347fdd4caSTejun Heo 	else
36447fdd4caSTejun Heo 		icq = NULL;
36547fdd4caSTejun Heo out:
36647fdd4caSTejun Heo 	rcu_read_unlock();
36747fdd4caSTejun Heo 	return icq;
36847fdd4caSTejun Heo }
36947fdd4caSTejun Heo EXPORT_SYMBOL(ioc_lookup_icq);
37047fdd4caSTejun Heo 
371f1f8cc94STejun Heo /**
372f1f8cc94STejun Heo  * ioc_create_icq - create and link io_cq
373f1f8cc94STejun Heo  * @q: request_queue of interest
374f1f8cc94STejun Heo  * @gfp_mask: allocation mask
375f1f8cc94STejun Heo  *
376f1f8cc94STejun Heo  * Make sure io_cq linking %current->io_context and @q exists.  If either
377f1f8cc94STejun Heo  * io_context and/or icq don't exist, they will be created using @gfp_mask.
378f1f8cc94STejun Heo  *
379f1f8cc94STejun Heo  * The caller is responsible for ensuring @ioc won't go away and @q is
380f1f8cc94STejun Heo  * alive and will stay alive until this function returns.
381f1f8cc94STejun Heo  */
382f1f8cc94STejun Heo struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
383f1f8cc94STejun Heo {
384f1f8cc94STejun Heo 	struct elevator_type *et = q->elevator->type;
385f1f8cc94STejun Heo 	struct io_context *ioc;
386f1f8cc94STejun Heo 	struct io_cq *icq;
387f1f8cc94STejun Heo 
388f1f8cc94STejun Heo 	/* allocate stuff */
389f1f8cc94STejun Heo 	ioc = create_io_context(current, gfp_mask, q->node);
390f1f8cc94STejun Heo 	if (!ioc)
391f1f8cc94STejun Heo 		return NULL;
392f1f8cc94STejun Heo 
393f1f8cc94STejun Heo 	icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
394f1f8cc94STejun Heo 				    q->node);
395f1f8cc94STejun Heo 	if (!icq)
396f1f8cc94STejun Heo 		return NULL;
397f1f8cc94STejun Heo 
398f1f8cc94STejun Heo 	if (radix_tree_preload(gfp_mask) < 0) {
399f1f8cc94STejun Heo 		kmem_cache_free(et->icq_cache, icq);
400f1f8cc94STejun Heo 		return NULL;
401f1f8cc94STejun Heo 	}
402f1f8cc94STejun Heo 
403f1f8cc94STejun Heo 	icq->ioc = ioc;
404f1f8cc94STejun Heo 	icq->q = q;
405f1f8cc94STejun Heo 	INIT_LIST_HEAD(&icq->q_node);
406f1f8cc94STejun Heo 	INIT_HLIST_NODE(&icq->ioc_node);
407f1f8cc94STejun Heo 
408f1f8cc94STejun Heo 	/* lock both q and ioc and try to link @icq */
409f1f8cc94STejun Heo 	spin_lock_irq(q->queue_lock);
410f1f8cc94STejun Heo 	spin_lock(&ioc->lock);
411f1f8cc94STejun Heo 
412f1f8cc94STejun Heo 	if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
413f1f8cc94STejun Heo 		hlist_add_head(&icq->ioc_node, &ioc->icq_list);
414f1f8cc94STejun Heo 		list_add(&icq->q_node, &q->icq_list);
415f1f8cc94STejun Heo 		if (et->ops.elevator_init_icq_fn)
416f1f8cc94STejun Heo 			et->ops.elevator_init_icq_fn(icq);
417f1f8cc94STejun Heo 	} else {
418f1f8cc94STejun Heo 		kmem_cache_free(et->icq_cache, icq);
419f1f8cc94STejun Heo 		icq = ioc_lookup_icq(ioc, q);
420f1f8cc94STejun Heo 		if (!icq)
421f1f8cc94STejun Heo 			printk(KERN_ERR "cfq: icq link failed!\n");
422f1f8cc94STejun Heo 	}
423f1f8cc94STejun Heo 
424f1f8cc94STejun Heo 	spin_unlock(&ioc->lock);
425f1f8cc94STejun Heo 	spin_unlock_irq(q->queue_lock);
426f1f8cc94STejun Heo 	radix_tree_preload_end();
427f1f8cc94STejun Heo 	return icq;
428f1f8cc94STejun Heo }
429f1f8cc94STejun Heo 
430dc86900eSTejun Heo void ioc_set_changed(struct io_context *ioc, int which)
431dc86900eSTejun Heo {
432c5869807STejun Heo 	struct io_cq *icq;
433dc86900eSTejun Heo 	struct hlist_node *n;
434dc86900eSTejun Heo 
435c5869807STejun Heo 	hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node)
436c5869807STejun Heo 		set_bit(which, &icq->changed);
437dc86900eSTejun Heo }
438dc86900eSTejun Heo 
439dc86900eSTejun Heo /**
440dc86900eSTejun Heo  * ioc_ioprio_changed - notify ioprio change
441dc86900eSTejun Heo  * @ioc: io_context of interest
442dc86900eSTejun Heo  * @ioprio: new ioprio
443dc86900eSTejun Heo  *
444c5869807STejun Heo  * @ioc's ioprio has changed to @ioprio.  Set %ICQ_IOPRIO_CHANGED for all
445c5869807STejun Heo  * icq's.  iosched is responsible for checking the bit and applying it on
446dc86900eSTejun Heo  * request issue path.
447dc86900eSTejun Heo  */
448dc86900eSTejun Heo void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
449dc86900eSTejun Heo {
450dc86900eSTejun Heo 	unsigned long flags;
451dc86900eSTejun Heo 
452dc86900eSTejun Heo 	spin_lock_irqsave(&ioc->lock, flags);
453dc86900eSTejun Heo 	ioc->ioprio = ioprio;
454c5869807STejun Heo 	ioc_set_changed(ioc, ICQ_IOPRIO_CHANGED);
455dc86900eSTejun Heo 	spin_unlock_irqrestore(&ioc->lock, flags);
456dc86900eSTejun Heo }
457dc86900eSTejun Heo 
458dc86900eSTejun Heo /**
459dc86900eSTejun Heo  * ioc_cgroup_changed - notify cgroup change
460dc86900eSTejun Heo  * @ioc: io_context of interest
461dc86900eSTejun Heo  *
462c5869807STejun Heo  * @ioc's cgroup has changed.  Set %ICQ_CGROUP_CHANGED for all icq's.
463dc86900eSTejun Heo  * iosched is responsible for checking the bit and applying it on request
464dc86900eSTejun Heo  * issue path.
465dc86900eSTejun Heo  */
466dc86900eSTejun Heo void ioc_cgroup_changed(struct io_context *ioc)
467dc86900eSTejun Heo {
468dc86900eSTejun Heo 	unsigned long flags;
469dc86900eSTejun Heo 
470dc86900eSTejun Heo 	spin_lock_irqsave(&ioc->lock, flags);
471c5869807STejun Heo 	ioc_set_changed(ioc, ICQ_CGROUP_CHANGED);
472dc86900eSTejun Heo 	spin_unlock_irqrestore(&ioc->lock, flags);
473dc86900eSTejun Heo }
47464c42998SJens Axboe EXPORT_SYMBOL(ioc_cgroup_changed);
475dc86900eSTejun Heo 
47613341598SAdrian Bunk static int __init blk_ioc_init(void)
47786db1e29SJens Axboe {
47886db1e29SJens Axboe 	iocontext_cachep = kmem_cache_create("blkdev_ioc",
47986db1e29SJens Axboe 			sizeof(struct io_context), 0, SLAB_PANIC, NULL);
48086db1e29SJens Axboe 	return 0;
48186db1e29SJens Axboe }
48286db1e29SJens Axboe subsys_initcall(blk_ioc_init);
483