xref: /openbmc/linux/block/blk-mq-sched.c (revision 5e3d02bb)
1bd166ef1SJens Axboe /*
2bd166ef1SJens Axboe  * blk-mq scheduling framework
3bd166ef1SJens Axboe  *
4bd166ef1SJens Axboe  * Copyright (C) 2016 Jens Axboe
5bd166ef1SJens Axboe  */
6bd166ef1SJens Axboe #include <linux/kernel.h>
7bd166ef1SJens Axboe #include <linux/module.h>
8bd166ef1SJens Axboe #include <linux/blk-mq.h>
9bd166ef1SJens Axboe 
10bd166ef1SJens Axboe #include <trace/events/block.h>
11bd166ef1SJens Axboe 
12bd166ef1SJens Axboe #include "blk.h"
13bd166ef1SJens Axboe #include "blk-mq.h"
14d332ce09SOmar Sandoval #include "blk-mq-debugfs.h"
15bd166ef1SJens Axboe #include "blk-mq-sched.h"
16bd166ef1SJens Axboe #include "blk-mq-tag.h"
17bd166ef1SJens Axboe #include "blk-wbt.h"
18bd166ef1SJens Axboe 
19bd166ef1SJens Axboe void blk_mq_sched_free_hctx_data(struct request_queue *q,
20bd166ef1SJens Axboe 				 void (*exit)(struct blk_mq_hw_ctx *))
21bd166ef1SJens Axboe {
22bd166ef1SJens Axboe 	struct blk_mq_hw_ctx *hctx;
23bd166ef1SJens Axboe 	int i;
24bd166ef1SJens Axboe 
25bd166ef1SJens Axboe 	queue_for_each_hw_ctx(q, hctx, i) {
26bd166ef1SJens Axboe 		if (exit && hctx->sched_data)
27bd166ef1SJens Axboe 			exit(hctx);
28bd166ef1SJens Axboe 		kfree(hctx->sched_data);
29bd166ef1SJens Axboe 		hctx->sched_data = NULL;
30bd166ef1SJens Axboe 	}
31bd166ef1SJens Axboe }
32bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
33bd166ef1SJens Axboe 
3444e8c2bfSChristoph Hellwig void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
35bd166ef1SJens Axboe {
3644e8c2bfSChristoph Hellwig 	struct request_queue *q = rq->q;
3744e8c2bfSChristoph Hellwig 	struct io_context *ioc = rq_ioc(bio);
38bd166ef1SJens Axboe 	struct io_cq *icq;
39bd166ef1SJens Axboe 
40bd166ef1SJens Axboe 	spin_lock_irq(q->queue_lock);
41bd166ef1SJens Axboe 	icq = ioc_lookup_icq(ioc, q);
42bd166ef1SJens Axboe 	spin_unlock_irq(q->queue_lock);
43bd166ef1SJens Axboe 
44bd166ef1SJens Axboe 	if (!icq) {
45bd166ef1SJens Axboe 		icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
46bd166ef1SJens Axboe 		if (!icq)
47bd166ef1SJens Axboe 			return;
48bd166ef1SJens Axboe 	}
49ea511e3cSChristoph Hellwig 	get_io_context(icq->ioc);
5044e8c2bfSChristoph Hellwig 	rq->elv.icq = icq;
51bd166ef1SJens Axboe }
52bd166ef1SJens Axboe 
538e8320c9SJens Axboe /*
548e8320c9SJens Axboe  * Mark a hardware queue as needing a restart. For shared queues, maintain
558e8320c9SJens Axboe  * a count of how many hardware queues are marked for restart.
568e8320c9SJens Axboe  */
578e8320c9SJens Axboe static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
588e8320c9SJens Axboe {
598e8320c9SJens Axboe 	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
608e8320c9SJens Axboe 		return;
618e8320c9SJens Axboe 
628e8320c9SJens Axboe 	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
638e8320c9SJens Axboe 		struct request_queue *q = hctx->queue;
648e8320c9SJens Axboe 
658e8320c9SJens Axboe 		if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
668e8320c9SJens Axboe 			atomic_inc(&q->shared_hctx_restart);
678e8320c9SJens Axboe 	} else
688e8320c9SJens Axboe 		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
698e8320c9SJens Axboe }
708e8320c9SJens Axboe 
718e8320c9SJens Axboe static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
728e8320c9SJens Axboe {
738e8320c9SJens Axboe 	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
748e8320c9SJens Axboe 		return false;
758e8320c9SJens Axboe 
768e8320c9SJens Axboe 	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
778e8320c9SJens Axboe 		struct request_queue *q = hctx->queue;
788e8320c9SJens Axboe 
798e8320c9SJens Axboe 		if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
808e8320c9SJens Axboe 			atomic_dec(&q->shared_hctx_restart);
818e8320c9SJens Axboe 	} else
828e8320c9SJens Axboe 		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
838e8320c9SJens Axboe 
848e8320c9SJens Axboe 	if (blk_mq_hctx_has_pending(hctx)) {
858e8320c9SJens Axboe 		blk_mq_run_hw_queue(hctx, true);
868e8320c9SJens Axboe 		return true;
878e8320c9SJens Axboe 	}
888e8320c9SJens Axboe 
898e8320c9SJens Axboe 	return false;
908e8320c9SJens Axboe }
918e8320c9SJens Axboe 
92bd166ef1SJens Axboe void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
93bd166ef1SJens Axboe {
9481380ca1SOmar Sandoval 	struct request_queue *q = hctx->queue;
9581380ca1SOmar Sandoval 	struct elevator_queue *e = q->elevator;
9664765a75SJens Axboe 	const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
975e3d02bbSMing Lei 	bool do_sched_dispatch = true;
98bd166ef1SJens Axboe 	LIST_HEAD(rq_list);
99bd166ef1SJens Axboe 
100f4560ffeSMing Lei 	/* RCU or SRCU read lock is needed before checking quiesced flag */
101f4560ffeSMing Lei 	if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
102bd166ef1SJens Axboe 		return;
103bd166ef1SJens Axboe 
104bd166ef1SJens Axboe 	hctx->run++;
105bd166ef1SJens Axboe 
106bd166ef1SJens Axboe 	/*
107bd166ef1SJens Axboe 	 * If we have previous entries on our dispatch list, grab them first for
108bd166ef1SJens Axboe 	 * more fair dispatch.
109bd166ef1SJens Axboe 	 */
110bd166ef1SJens Axboe 	if (!list_empty_careful(&hctx->dispatch)) {
111bd166ef1SJens Axboe 		spin_lock(&hctx->lock);
112bd166ef1SJens Axboe 		if (!list_empty(&hctx->dispatch))
113bd166ef1SJens Axboe 			list_splice_init(&hctx->dispatch, &rq_list);
114bd166ef1SJens Axboe 		spin_unlock(&hctx->lock);
115bd166ef1SJens Axboe 	}
116bd166ef1SJens Axboe 
117bd166ef1SJens Axboe 	/*
118bd166ef1SJens Axboe 	 * Only ask the scheduler for requests, if we didn't have residual
119bd166ef1SJens Axboe 	 * requests from the dispatch list. This is to avoid the case where
120bd166ef1SJens Axboe 	 * we only ever dispatch a fraction of the requests available because
121bd166ef1SJens Axboe 	 * of low device queue depth. Once we pull requests out of the IO
122bd166ef1SJens Axboe 	 * scheduler, we can no longer merge or sort them. So it's best to
123bd166ef1SJens Axboe 	 * leave them there for as long as we can. Mark the hw queue as
124bd166ef1SJens Axboe 	 * needing a restart in that case.
125bd166ef1SJens Axboe 	 */
126c13660a0SJens Axboe 	if (!list_empty(&rq_list)) {
127d38d3515SOmar Sandoval 		blk_mq_sched_mark_restart_hctx(hctx);
1285e3d02bbSMing Lei 		do_sched_dispatch = blk_mq_dispatch_rq_list(q, &rq_list);
12964765a75SJens Axboe 	} else if (!has_sched_dispatch) {
130c13660a0SJens Axboe 		blk_mq_flush_busy_ctxs(hctx, &rq_list);
13181380ca1SOmar Sandoval 		blk_mq_dispatch_rq_list(q, &rq_list);
13264765a75SJens Axboe 	}
13364765a75SJens Axboe 
13464765a75SJens Axboe 	/*
1355e3d02bbSMing Lei 	 * We want to dispatch from the scheduler if there was nothing
1365e3d02bbSMing Lei 	 * on the dispatch list or we were able to dispatch from the
1375e3d02bbSMing Lei 	 * dispatch list.
13864765a75SJens Axboe 	 */
1395e3d02bbSMing Lei 	if (do_sched_dispatch && has_sched_dispatch) {
140c13660a0SJens Axboe 		do {
141c13660a0SJens Axboe 			struct request *rq;
142c13660a0SJens Axboe 
143c13660a0SJens Axboe 			rq = e->type->ops.mq.dispatch_request(hctx);
144c13660a0SJens Axboe 			if (!rq)
145c13660a0SJens Axboe 				break;
146c13660a0SJens Axboe 			list_add(&rq->queuelist, &rq_list);
14781380ca1SOmar Sandoval 		} while (blk_mq_dispatch_rq_list(q, &rq_list));
148c13660a0SJens Axboe 	}
149bd166ef1SJens Axboe }
150bd166ef1SJens Axboe 
151e4d750c9SJens Axboe bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
152e4d750c9SJens Axboe 			    struct request **merged_request)
153bd166ef1SJens Axboe {
154bd166ef1SJens Axboe 	struct request *rq;
155bd166ef1SJens Axboe 
15634fe7c05SChristoph Hellwig 	switch (elv_merge(q, &rq, bio)) {
15734fe7c05SChristoph Hellwig 	case ELEVATOR_BACK_MERGE:
158bd166ef1SJens Axboe 		if (!blk_mq_sched_allow_merge(q, rq, bio))
159bd166ef1SJens Axboe 			return false;
16034fe7c05SChristoph Hellwig 		if (!bio_attempt_back_merge(q, rq, bio))
16134fe7c05SChristoph Hellwig 			return false;
162e4d750c9SJens Axboe 		*merged_request = attempt_back_merge(q, rq);
163e4d750c9SJens Axboe 		if (!*merged_request)
16434fe7c05SChristoph Hellwig 			elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
165bd166ef1SJens Axboe 		return true;
16634fe7c05SChristoph Hellwig 	case ELEVATOR_FRONT_MERGE:
167bd166ef1SJens Axboe 		if (!blk_mq_sched_allow_merge(q, rq, bio))
168bd166ef1SJens Axboe 			return false;
16934fe7c05SChristoph Hellwig 		if (!bio_attempt_front_merge(q, rq, bio))
17034fe7c05SChristoph Hellwig 			return false;
171e4d750c9SJens Axboe 		*merged_request = attempt_front_merge(q, rq);
172e4d750c9SJens Axboe 		if (!*merged_request)
17334fe7c05SChristoph Hellwig 			elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
174bd166ef1SJens Axboe 		return true;
17534fe7c05SChristoph Hellwig 	default:
176bd166ef1SJens Axboe 		return false;
177bd166ef1SJens Axboe 	}
17834fe7c05SChristoph Hellwig }
179bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
180bd166ef1SJens Axboe 
1819bddeb2aSMing Lei /*
1829bddeb2aSMing Lei  * Reverse check our software queue for entries that we could potentially
1839bddeb2aSMing Lei  * merge with. Currently includes a hand-wavy stop count of 8, to not spend
1849bddeb2aSMing Lei  * too much time checking for merges.
1859bddeb2aSMing Lei  */
1869bddeb2aSMing Lei static bool blk_mq_attempt_merge(struct request_queue *q,
1879bddeb2aSMing Lei 				 struct blk_mq_ctx *ctx, struct bio *bio)
1889bddeb2aSMing Lei {
1899bddeb2aSMing Lei 	struct request *rq;
1909bddeb2aSMing Lei 	int checked = 8;
1919bddeb2aSMing Lei 
1927b607814SBart Van Assche 	lockdep_assert_held(&ctx->lock);
1937b607814SBart Van Assche 
1949bddeb2aSMing Lei 	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
1959bddeb2aSMing Lei 		bool merged = false;
1969bddeb2aSMing Lei 
1979bddeb2aSMing Lei 		if (!checked--)
1989bddeb2aSMing Lei 			break;
1999bddeb2aSMing Lei 
2009bddeb2aSMing Lei 		if (!blk_rq_merge_ok(rq, bio))
2019bddeb2aSMing Lei 			continue;
2029bddeb2aSMing Lei 
2039bddeb2aSMing Lei 		switch (blk_try_merge(rq, bio)) {
2049bddeb2aSMing Lei 		case ELEVATOR_BACK_MERGE:
2059bddeb2aSMing Lei 			if (blk_mq_sched_allow_merge(q, rq, bio))
2069bddeb2aSMing Lei 				merged = bio_attempt_back_merge(q, rq, bio);
2079bddeb2aSMing Lei 			break;
2089bddeb2aSMing Lei 		case ELEVATOR_FRONT_MERGE:
2099bddeb2aSMing Lei 			if (blk_mq_sched_allow_merge(q, rq, bio))
2109bddeb2aSMing Lei 				merged = bio_attempt_front_merge(q, rq, bio);
2119bddeb2aSMing Lei 			break;
2129bddeb2aSMing Lei 		case ELEVATOR_DISCARD_MERGE:
2139bddeb2aSMing Lei 			merged = bio_attempt_discard_merge(q, rq, bio);
2149bddeb2aSMing Lei 			break;
2159bddeb2aSMing Lei 		default:
2169bddeb2aSMing Lei 			continue;
2179bddeb2aSMing Lei 		}
2189bddeb2aSMing Lei 
2199bddeb2aSMing Lei 		if (merged)
2209bddeb2aSMing Lei 			ctx->rq_merged++;
2219bddeb2aSMing Lei 		return merged;
2229bddeb2aSMing Lei 	}
2239bddeb2aSMing Lei 
2249bddeb2aSMing Lei 	return false;
2259bddeb2aSMing Lei }
2269bddeb2aSMing Lei 
227bd166ef1SJens Axboe bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
228bd166ef1SJens Axboe {
229bd166ef1SJens Axboe 	struct elevator_queue *e = q->elevator;
230bd166ef1SJens Axboe 	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
231bd166ef1SJens Axboe 	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
2329bddeb2aSMing Lei 	bool ret = false;
233bd166ef1SJens Axboe 
2349bddeb2aSMing Lei 	if (e && e->type->ops.mq.bio_merge) {
235bd166ef1SJens Axboe 		blk_mq_put_ctx(ctx);
236bd166ef1SJens Axboe 		return e->type->ops.mq.bio_merge(hctx, bio);
237bd166ef1SJens Axboe 	}
238bd166ef1SJens Axboe 
2399bddeb2aSMing Lei 	if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
2409bddeb2aSMing Lei 		/* default per sw-queue merge */
2419bddeb2aSMing Lei 		spin_lock(&ctx->lock);
2429bddeb2aSMing Lei 		ret = blk_mq_attempt_merge(q, ctx, bio);
2439bddeb2aSMing Lei 		spin_unlock(&ctx->lock);
2449bddeb2aSMing Lei 	}
2459bddeb2aSMing Lei 
2469bddeb2aSMing Lei 	blk_mq_put_ctx(ctx);
2479bddeb2aSMing Lei 	return ret;
248bd166ef1SJens Axboe }
249bd166ef1SJens Axboe 
250bd166ef1SJens Axboe bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
251bd166ef1SJens Axboe {
252bd166ef1SJens Axboe 	return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
253bd166ef1SJens Axboe }
254bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
255bd166ef1SJens Axboe 
256bd166ef1SJens Axboe void blk_mq_sched_request_inserted(struct request *rq)
257bd166ef1SJens Axboe {
258bd166ef1SJens Axboe 	trace_block_rq_insert(rq->q, rq);
259bd166ef1SJens Axboe }
260bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
261bd166ef1SJens Axboe 
2620cacba6cSOmar Sandoval static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
2630cacba6cSOmar Sandoval 				       struct request *rq)
264bd166ef1SJens Axboe {
265bd166ef1SJens Axboe 	if (rq->tag == -1) {
266bd166ef1SJens Axboe 		rq->rq_flags |= RQF_SORTED;
267bd166ef1SJens Axboe 		return false;
268bd166ef1SJens Axboe 	}
269bd166ef1SJens Axboe 
270bd166ef1SJens Axboe 	/*
271bd166ef1SJens Axboe 	 * If we already have a real request tag, send directly to
272bd166ef1SJens Axboe 	 * the dispatch list.
273bd166ef1SJens Axboe 	 */
274bd166ef1SJens Axboe 	spin_lock(&hctx->lock);
275bd166ef1SJens Axboe 	list_add(&rq->queuelist, &hctx->dispatch);
276bd166ef1SJens Axboe 	spin_unlock(&hctx->lock);
277bd166ef1SJens Axboe 	return true;
278bd166ef1SJens Axboe }
279bd166ef1SJens Axboe 
2806d8c6c0fSBart Van Assche /**
2816d8c6c0fSBart Van Assche  * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
2826d8c6c0fSBart Van Assche  * @pos:    loop cursor.
2836d8c6c0fSBart Van Assche  * @skip:   the list element that will not be examined. Iteration starts at
2846d8c6c0fSBart Van Assche  *          @skip->next.
2856d8c6c0fSBart Van Assche  * @head:   head of the list to examine. This list must have at least one
2866d8c6c0fSBart Van Assche  *          element, namely @skip.
2876d8c6c0fSBart Van Assche  * @member: name of the list_head structure within typeof(*pos).
2886d8c6c0fSBart Van Assche  */
2896d8c6c0fSBart Van Assche #define list_for_each_entry_rcu_rr(pos, skip, head, member)		\
2906d8c6c0fSBart Van Assche 	for ((pos) = (skip);						\
2916d8c6c0fSBart Van Assche 	     (pos = (pos)->member.next != (head) ? list_entry_rcu(	\
2926d8c6c0fSBart Van Assche 			(pos)->member.next, typeof(*pos), member) :	\
2936d8c6c0fSBart Van Assche 	      list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
2946d8c6c0fSBart Van Assche 	     (pos) != (skip); )
2956d8c6c0fSBart Van Assche 
2966d8c6c0fSBart Van Assche /*
2976d8c6c0fSBart Van Assche  * Called after a driver tag has been freed to check whether a hctx needs to
2986d8c6c0fSBart Van Assche  * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
2996d8c6c0fSBart Van Assche  * queues in a round-robin fashion if the tag set of @hctx is shared with other
3006d8c6c0fSBart Van Assche  * hardware queues.
3016d8c6c0fSBart Van Assche  */
3026d8c6c0fSBart Van Assche void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
30350e1dab8SJens Axboe {
3046d8c6c0fSBart Van Assche 	struct blk_mq_tags *const tags = hctx->tags;
3056d8c6c0fSBart Van Assche 	struct blk_mq_tag_set *const set = hctx->queue->tag_set;
3066d8c6c0fSBart Van Assche 	struct request_queue *const queue = hctx->queue, *q;
3076d8c6c0fSBart Van Assche 	struct blk_mq_hw_ctx *hctx2;
3086d8c6c0fSBart Van Assche 	unsigned int i, j;
30950e1dab8SJens Axboe 
3106d8c6c0fSBart Van Assche 	if (set->flags & BLK_MQ_F_TAG_SHARED) {
3118e8320c9SJens Axboe 		/*
3128e8320c9SJens Axboe 		 * If this is 0, then we know that no hardware queues
3138e8320c9SJens Axboe 		 * have RESTART marked. We're done.
3148e8320c9SJens Axboe 		 */
3158e8320c9SJens Axboe 		if (!atomic_read(&queue->shared_hctx_restart))
3168e8320c9SJens Axboe 			return;
3178e8320c9SJens Axboe 
3186d8c6c0fSBart Van Assche 		rcu_read_lock();
3196d8c6c0fSBart Van Assche 		list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
3206d8c6c0fSBart Van Assche 					   tag_set_list) {
3216d8c6c0fSBart Van Assche 			queue_for_each_hw_ctx(q, hctx2, i)
3226d8c6c0fSBart Van Assche 				if (hctx2->tags == tags &&
3236d8c6c0fSBart Van Assche 				    blk_mq_sched_restart_hctx(hctx2))
3246d8c6c0fSBart Van Assche 					goto done;
32550e1dab8SJens Axboe 		}
3266d8c6c0fSBart Van Assche 		j = hctx->queue_num + 1;
3276d8c6c0fSBart Van Assche 		for (i = 0; i < queue->nr_hw_queues; i++, j++) {
3286d8c6c0fSBart Van Assche 			if (j == queue->nr_hw_queues)
3296d8c6c0fSBart Van Assche 				j = 0;
3306d8c6c0fSBart Van Assche 			hctx2 = queue->queue_hw_ctx[j];
3316d8c6c0fSBart Van Assche 			if (hctx2->tags == tags &&
3326d8c6c0fSBart Van Assche 			    blk_mq_sched_restart_hctx(hctx2))
3336d8c6c0fSBart Van Assche 				break;
3346d8c6c0fSBart Van Assche 		}
3356d8c6c0fSBart Van Assche done:
3366d8c6c0fSBart Van Assche 		rcu_read_unlock();
337d38d3515SOmar Sandoval 	} else {
338d38d3515SOmar Sandoval 		blk_mq_sched_restart_hctx(hctx);
339d38d3515SOmar Sandoval 	}
34050e1dab8SJens Axboe }
34150e1dab8SJens Axboe 
342bd6737f1SJens Axboe /*
343bd6737f1SJens Axboe  * Add flush/fua to the queue. If we fail getting a driver tag, then
344bd6737f1SJens Axboe  * punt to the requeue list. Requeue will re-invoke us from a context
345bd6737f1SJens Axboe  * that's safe to block from.
346bd6737f1SJens Axboe  */
347bd6737f1SJens Axboe static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
348bd6737f1SJens Axboe 				      struct request *rq, bool can_block)
349bd6737f1SJens Axboe {
350bd6737f1SJens Axboe 	if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
351bd6737f1SJens Axboe 		blk_insert_flush(rq);
352bd6737f1SJens Axboe 		blk_mq_run_hw_queue(hctx, true);
353bd6737f1SJens Axboe 	} else
354c7a571b4SJens Axboe 		blk_mq_add_to_requeue_list(rq, false, true);
355bd6737f1SJens Axboe }
356bd6737f1SJens Axboe 
357bd6737f1SJens Axboe void blk_mq_sched_insert_request(struct request *rq, bool at_head,
358bd6737f1SJens Axboe 				 bool run_queue, bool async, bool can_block)
359bd6737f1SJens Axboe {
360bd6737f1SJens Axboe 	struct request_queue *q = rq->q;
361bd6737f1SJens Axboe 	struct elevator_queue *e = q->elevator;
362bd6737f1SJens Axboe 	struct blk_mq_ctx *ctx = rq->mq_ctx;
363bd6737f1SJens Axboe 	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
364bd6737f1SJens Axboe 
365f3a8ab7dSJens Axboe 	if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) {
366bd6737f1SJens Axboe 		blk_mq_sched_insert_flush(hctx, rq, can_block);
367bd6737f1SJens Axboe 		return;
368bd6737f1SJens Axboe 	}
369bd6737f1SJens Axboe 
3700cacba6cSOmar Sandoval 	if (e && blk_mq_sched_bypass_insert(hctx, rq))
3710cacba6cSOmar Sandoval 		goto run;
3720cacba6cSOmar Sandoval 
373bd6737f1SJens Axboe 	if (e && e->type->ops.mq.insert_requests) {
374bd6737f1SJens Axboe 		LIST_HEAD(list);
375bd6737f1SJens Axboe 
376bd6737f1SJens Axboe 		list_add(&rq->queuelist, &list);
377bd6737f1SJens Axboe 		e->type->ops.mq.insert_requests(hctx, &list, at_head);
378bd6737f1SJens Axboe 	} else {
379bd6737f1SJens Axboe 		spin_lock(&ctx->lock);
380bd6737f1SJens Axboe 		__blk_mq_insert_request(hctx, rq, at_head);
381bd6737f1SJens Axboe 		spin_unlock(&ctx->lock);
382bd6737f1SJens Axboe 	}
383bd6737f1SJens Axboe 
3840cacba6cSOmar Sandoval run:
385bd6737f1SJens Axboe 	if (run_queue)
386bd6737f1SJens Axboe 		blk_mq_run_hw_queue(hctx, async);
387bd6737f1SJens Axboe }
388bd6737f1SJens Axboe 
389bd6737f1SJens Axboe void blk_mq_sched_insert_requests(struct request_queue *q,
390bd6737f1SJens Axboe 				  struct blk_mq_ctx *ctx,
391bd6737f1SJens Axboe 				  struct list_head *list, bool run_queue_async)
392bd6737f1SJens Axboe {
393bd6737f1SJens Axboe 	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
394bd6737f1SJens Axboe 	struct elevator_queue *e = hctx->queue->elevator;
395bd6737f1SJens Axboe 
3960cacba6cSOmar Sandoval 	if (e) {
3970cacba6cSOmar Sandoval 		struct request *rq, *next;
3980cacba6cSOmar Sandoval 
3990cacba6cSOmar Sandoval 		/*
4000cacba6cSOmar Sandoval 		 * We bypass requests that already have a driver tag assigned,
4010cacba6cSOmar Sandoval 		 * which should only be flushes. Flushes are only ever inserted
4020cacba6cSOmar Sandoval 		 * as single requests, so we shouldn't ever hit the
4030cacba6cSOmar Sandoval 		 * WARN_ON_ONCE() below (but let's handle it just in case).
4040cacba6cSOmar Sandoval 		 */
4050cacba6cSOmar Sandoval 		list_for_each_entry_safe(rq, next, list, queuelist) {
4060cacba6cSOmar Sandoval 			if (WARN_ON_ONCE(rq->tag != -1)) {
4070cacba6cSOmar Sandoval 				list_del_init(&rq->queuelist);
4080cacba6cSOmar Sandoval 				blk_mq_sched_bypass_insert(hctx, rq);
4090cacba6cSOmar Sandoval 			}
4100cacba6cSOmar Sandoval 		}
4110cacba6cSOmar Sandoval 	}
4120cacba6cSOmar Sandoval 
413bd6737f1SJens Axboe 	if (e && e->type->ops.mq.insert_requests)
414bd6737f1SJens Axboe 		e->type->ops.mq.insert_requests(hctx, list, false);
415bd6737f1SJens Axboe 	else
416bd6737f1SJens Axboe 		blk_mq_insert_requests(hctx, ctx, list);
417bd6737f1SJens Axboe 
418bd6737f1SJens Axboe 	blk_mq_run_hw_queue(hctx, run_queue_async);
419bd6737f1SJens Axboe }
420bd6737f1SJens Axboe 
421bd166ef1SJens Axboe static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
422bd166ef1SJens Axboe 				   struct blk_mq_hw_ctx *hctx,
423bd166ef1SJens Axboe 				   unsigned int hctx_idx)
424bd166ef1SJens Axboe {
425bd166ef1SJens Axboe 	if (hctx->sched_tags) {
426bd166ef1SJens Axboe 		blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
427bd166ef1SJens Axboe 		blk_mq_free_rq_map(hctx->sched_tags);
428bd166ef1SJens Axboe 		hctx->sched_tags = NULL;
429bd166ef1SJens Axboe 	}
430bd166ef1SJens Axboe }
431bd166ef1SJens Axboe 
4326917ff0bSOmar Sandoval static int blk_mq_sched_alloc_tags(struct request_queue *q,
4336917ff0bSOmar Sandoval 				   struct blk_mq_hw_ctx *hctx,
4346917ff0bSOmar Sandoval 				   unsigned int hctx_idx)
435bd166ef1SJens Axboe {
436bd166ef1SJens Axboe 	struct blk_mq_tag_set *set = q->tag_set;
4376917ff0bSOmar Sandoval 	int ret;
438bd166ef1SJens Axboe 
4396917ff0bSOmar Sandoval 	hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
4406917ff0bSOmar Sandoval 					       set->reserved_tags);
441bd166ef1SJens Axboe 	if (!hctx->sched_tags)
4426917ff0bSOmar Sandoval 		return -ENOMEM;
4436917ff0bSOmar Sandoval 
4446917ff0bSOmar Sandoval 	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
4456917ff0bSOmar Sandoval 	if (ret)
4466917ff0bSOmar Sandoval 		blk_mq_sched_free_tags(set, hctx, hctx_idx);
447bd166ef1SJens Axboe 
448bd166ef1SJens Axboe 	return ret;
449bd166ef1SJens Axboe }
450bd166ef1SJens Axboe 
45154d5329dSOmar Sandoval static void blk_mq_sched_tags_teardown(struct request_queue *q)
452bd166ef1SJens Axboe {
453bd166ef1SJens Axboe 	struct blk_mq_tag_set *set = q->tag_set;
454bd166ef1SJens Axboe 	struct blk_mq_hw_ctx *hctx;
455bd166ef1SJens Axboe 	int i;
456bd166ef1SJens Axboe 
457bd166ef1SJens Axboe 	queue_for_each_hw_ctx(q, hctx, i)
458bd166ef1SJens Axboe 		blk_mq_sched_free_tags(set, hctx, i);
459bd166ef1SJens Axboe }
460d3484991SJens Axboe 
46193252632SOmar Sandoval int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
46293252632SOmar Sandoval 			   unsigned int hctx_idx)
46393252632SOmar Sandoval {
46493252632SOmar Sandoval 	struct elevator_queue *e = q->elevator;
465ee056f98SOmar Sandoval 	int ret;
46693252632SOmar Sandoval 
46793252632SOmar Sandoval 	if (!e)
46893252632SOmar Sandoval 		return 0;
46993252632SOmar Sandoval 
470ee056f98SOmar Sandoval 	ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
471ee056f98SOmar Sandoval 	if (ret)
472ee056f98SOmar Sandoval 		return ret;
473ee056f98SOmar Sandoval 
474ee056f98SOmar Sandoval 	if (e->type->ops.mq.init_hctx) {
475ee056f98SOmar Sandoval 		ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
476ee056f98SOmar Sandoval 		if (ret) {
477ee056f98SOmar Sandoval 			blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
478ee056f98SOmar Sandoval 			return ret;
479ee056f98SOmar Sandoval 		}
480ee056f98SOmar Sandoval 	}
481ee056f98SOmar Sandoval 
482d332ce09SOmar Sandoval 	blk_mq_debugfs_register_sched_hctx(q, hctx);
483d332ce09SOmar Sandoval 
484ee056f98SOmar Sandoval 	return 0;
48593252632SOmar Sandoval }
48693252632SOmar Sandoval 
48793252632SOmar Sandoval void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
48893252632SOmar Sandoval 			    unsigned int hctx_idx)
48993252632SOmar Sandoval {
49093252632SOmar Sandoval 	struct elevator_queue *e = q->elevator;
49193252632SOmar Sandoval 
49293252632SOmar Sandoval 	if (!e)
49393252632SOmar Sandoval 		return;
49493252632SOmar Sandoval 
495d332ce09SOmar Sandoval 	blk_mq_debugfs_unregister_sched_hctx(hctx);
496d332ce09SOmar Sandoval 
497ee056f98SOmar Sandoval 	if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
498ee056f98SOmar Sandoval 		e->type->ops.mq.exit_hctx(hctx, hctx_idx);
499ee056f98SOmar Sandoval 		hctx->sched_data = NULL;
500ee056f98SOmar Sandoval 	}
501ee056f98SOmar Sandoval 
50293252632SOmar Sandoval 	blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
50393252632SOmar Sandoval }
50493252632SOmar Sandoval 
5056917ff0bSOmar Sandoval int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
5066917ff0bSOmar Sandoval {
5076917ff0bSOmar Sandoval 	struct blk_mq_hw_ctx *hctx;
508ee056f98SOmar Sandoval 	struct elevator_queue *eq;
5096917ff0bSOmar Sandoval 	unsigned int i;
5106917ff0bSOmar Sandoval 	int ret;
5116917ff0bSOmar Sandoval 
5126917ff0bSOmar Sandoval 	if (!e) {
5136917ff0bSOmar Sandoval 		q->elevator = NULL;
5146917ff0bSOmar Sandoval 		return 0;
5156917ff0bSOmar Sandoval 	}
5166917ff0bSOmar Sandoval 
5176917ff0bSOmar Sandoval 	/*
51832825c45SMing Lei 	 * Default to double of smaller one between hw queue_depth and 128,
51932825c45SMing Lei 	 * since we don't split into sync/async like the old code did.
52032825c45SMing Lei 	 * Additionally, this is a per-hw queue depth.
5216917ff0bSOmar Sandoval 	 */
52232825c45SMing Lei 	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
52332825c45SMing Lei 				   BLKDEV_MAX_RQ);
5246917ff0bSOmar Sandoval 
5256917ff0bSOmar Sandoval 	queue_for_each_hw_ctx(q, hctx, i) {
5266917ff0bSOmar Sandoval 		ret = blk_mq_sched_alloc_tags(q, hctx, i);
5276917ff0bSOmar Sandoval 		if (ret)
5286917ff0bSOmar Sandoval 			goto err;
5296917ff0bSOmar Sandoval 	}
5306917ff0bSOmar Sandoval 
5316917ff0bSOmar Sandoval 	ret = e->ops.mq.init_sched(q, e);
5326917ff0bSOmar Sandoval 	if (ret)
5336917ff0bSOmar Sandoval 		goto err;
5346917ff0bSOmar Sandoval 
535d332ce09SOmar Sandoval 	blk_mq_debugfs_register_sched(q);
536d332ce09SOmar Sandoval 
537ee056f98SOmar Sandoval 	queue_for_each_hw_ctx(q, hctx, i) {
538d332ce09SOmar Sandoval 		if (e->ops.mq.init_hctx) {
539ee056f98SOmar Sandoval 			ret = e->ops.mq.init_hctx(hctx, i);
540ee056f98SOmar Sandoval 			if (ret) {
541ee056f98SOmar Sandoval 				eq = q->elevator;
542ee056f98SOmar Sandoval 				blk_mq_exit_sched(q, eq);
543ee056f98SOmar Sandoval 				kobject_put(&eq->kobj);
544ee056f98SOmar Sandoval 				return ret;
545ee056f98SOmar Sandoval 			}
546ee056f98SOmar Sandoval 		}
547d332ce09SOmar Sandoval 		blk_mq_debugfs_register_sched_hctx(q, hctx);
548ee056f98SOmar Sandoval 	}
549ee056f98SOmar Sandoval 
5506917ff0bSOmar Sandoval 	return 0;
5516917ff0bSOmar Sandoval 
5526917ff0bSOmar Sandoval err:
55354d5329dSOmar Sandoval 	blk_mq_sched_tags_teardown(q);
55454d5329dSOmar Sandoval 	q->elevator = NULL;
5556917ff0bSOmar Sandoval 	return ret;
5566917ff0bSOmar Sandoval }
5576917ff0bSOmar Sandoval 
55854d5329dSOmar Sandoval void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
55954d5329dSOmar Sandoval {
560ee056f98SOmar Sandoval 	struct blk_mq_hw_ctx *hctx;
561ee056f98SOmar Sandoval 	unsigned int i;
562ee056f98SOmar Sandoval 
563ee056f98SOmar Sandoval 	queue_for_each_hw_ctx(q, hctx, i) {
564d332ce09SOmar Sandoval 		blk_mq_debugfs_unregister_sched_hctx(hctx);
565d332ce09SOmar Sandoval 		if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
566ee056f98SOmar Sandoval 			e->type->ops.mq.exit_hctx(hctx, i);
567ee056f98SOmar Sandoval 			hctx->sched_data = NULL;
568ee056f98SOmar Sandoval 		}
569ee056f98SOmar Sandoval 	}
570d332ce09SOmar Sandoval 	blk_mq_debugfs_unregister_sched(q);
57154d5329dSOmar Sandoval 	if (e->type->ops.mq.exit_sched)
57254d5329dSOmar Sandoval 		e->type->ops.mq.exit_sched(e);
57354d5329dSOmar Sandoval 	blk_mq_sched_tags_teardown(q);
57454d5329dSOmar Sandoval 	q->elevator = NULL;
57554d5329dSOmar Sandoval }
57654d5329dSOmar Sandoval 
577d3484991SJens Axboe int blk_mq_sched_init(struct request_queue *q)
578d3484991SJens Axboe {
579d3484991SJens Axboe 	int ret;
580d3484991SJens Axboe 
581d3484991SJens Axboe 	mutex_lock(&q->sysfs_lock);
582d3484991SJens Axboe 	ret = elevator_init(q, NULL);
583d3484991SJens Axboe 	mutex_unlock(&q->sysfs_lock);
584d3484991SJens Axboe 
585d3484991SJens Axboe 	return ret;
586d3484991SJens Axboe }
587