1bd166ef1SJens Axboe /* 2bd166ef1SJens Axboe * blk-mq scheduling framework 3bd166ef1SJens Axboe * 4bd166ef1SJens Axboe * Copyright (C) 2016 Jens Axboe 5bd166ef1SJens Axboe */ 6bd166ef1SJens Axboe #include <linux/kernel.h> 7bd166ef1SJens Axboe #include <linux/module.h> 8bd166ef1SJens Axboe #include <linux/blk-mq.h> 9bd166ef1SJens Axboe 10bd166ef1SJens Axboe #include <trace/events/block.h> 11bd166ef1SJens Axboe 12bd166ef1SJens Axboe #include "blk.h" 13bd166ef1SJens Axboe #include "blk-mq.h" 14d332ce09SOmar Sandoval #include "blk-mq-debugfs.h" 15bd166ef1SJens Axboe #include "blk-mq-sched.h" 16bd166ef1SJens Axboe #include "blk-mq-tag.h" 17bd166ef1SJens Axboe #include "blk-wbt.h" 18bd166ef1SJens Axboe 19bd166ef1SJens Axboe void blk_mq_sched_free_hctx_data(struct request_queue *q, 20bd166ef1SJens Axboe void (*exit)(struct blk_mq_hw_ctx *)) 21bd166ef1SJens Axboe { 22bd166ef1SJens Axboe struct blk_mq_hw_ctx *hctx; 23bd166ef1SJens Axboe int i; 24bd166ef1SJens Axboe 25bd166ef1SJens Axboe queue_for_each_hw_ctx(q, hctx, i) { 26bd166ef1SJens Axboe if (exit && hctx->sched_data) 27bd166ef1SJens Axboe exit(hctx); 28bd166ef1SJens Axboe kfree(hctx->sched_data); 29bd166ef1SJens Axboe hctx->sched_data = NULL; 30bd166ef1SJens Axboe } 31bd166ef1SJens Axboe } 32bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data); 33bd166ef1SJens Axboe 3444e8c2bfSChristoph Hellwig void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio) 35bd166ef1SJens Axboe { 3644e8c2bfSChristoph Hellwig struct request_queue *q = rq->q; 3744e8c2bfSChristoph Hellwig struct io_context *ioc = rq_ioc(bio); 38bd166ef1SJens Axboe struct io_cq *icq; 39bd166ef1SJens Axboe 40bd166ef1SJens Axboe spin_lock_irq(q->queue_lock); 41bd166ef1SJens Axboe icq = ioc_lookup_icq(ioc, q); 42bd166ef1SJens Axboe spin_unlock_irq(q->queue_lock); 43bd166ef1SJens Axboe 44bd166ef1SJens Axboe if (!icq) { 45bd166ef1SJens Axboe icq = ioc_create_icq(ioc, q, GFP_ATOMIC); 46bd166ef1SJens Axboe if (!icq) 47bd166ef1SJens Axboe return; 48bd166ef1SJens Axboe } 49ea511e3cSChristoph Hellwig get_io_context(icq->ioc); 5044e8c2bfSChristoph Hellwig rq->elv.icq = icq; 51bd166ef1SJens Axboe } 52bd166ef1SJens Axboe 538e8320c9SJens Axboe /* 548e8320c9SJens Axboe * Mark a hardware queue as needing a restart. For shared queues, maintain 558e8320c9SJens Axboe * a count of how many hardware queues are marked for restart. 568e8320c9SJens Axboe */ 578e8320c9SJens Axboe static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) 588e8320c9SJens Axboe { 598e8320c9SJens Axboe if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 608e8320c9SJens Axboe return; 618e8320c9SJens Axboe 628e8320c9SJens Axboe if (hctx->flags & BLK_MQ_F_TAG_SHARED) { 638e8320c9SJens Axboe struct request_queue *q = hctx->queue; 648e8320c9SJens Axboe 658e8320c9SJens Axboe if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 668e8320c9SJens Axboe atomic_inc(&q->shared_hctx_restart); 678e8320c9SJens Axboe } else 688e8320c9SJens Axboe set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); 698e8320c9SJens Axboe } 708e8320c9SJens Axboe 718e8320c9SJens Axboe static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) 728e8320c9SJens Axboe { 738e8320c9SJens Axboe if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 748e8320c9SJens Axboe return false; 758e8320c9SJens Axboe 768e8320c9SJens Axboe if (hctx->flags & BLK_MQ_F_TAG_SHARED) { 778e8320c9SJens Axboe struct request_queue *q = hctx->queue; 788e8320c9SJens Axboe 798e8320c9SJens Axboe if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 808e8320c9SJens Axboe atomic_dec(&q->shared_hctx_restart); 818e8320c9SJens Axboe } else 828e8320c9SJens Axboe clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); 838e8320c9SJens Axboe 848e8320c9SJens Axboe if (blk_mq_hctx_has_pending(hctx)) { 858e8320c9SJens Axboe blk_mq_run_hw_queue(hctx, true); 868e8320c9SJens Axboe return true; 878e8320c9SJens Axboe } 888e8320c9SJens Axboe 898e8320c9SJens Axboe return false; 908e8320c9SJens Axboe } 918e8320c9SJens Axboe 92bd166ef1SJens Axboe void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) 93bd166ef1SJens Axboe { 9481380ca1SOmar Sandoval struct request_queue *q = hctx->queue; 9581380ca1SOmar Sandoval struct elevator_queue *e = q->elevator; 9664765a75SJens Axboe const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request; 975e3d02bbSMing Lei bool do_sched_dispatch = true; 98bd166ef1SJens Axboe LIST_HEAD(rq_list); 99bd166ef1SJens Axboe 100f4560ffeSMing Lei /* RCU or SRCU read lock is needed before checking quiesced flag */ 101f4560ffeSMing Lei if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) 102bd166ef1SJens Axboe return; 103bd166ef1SJens Axboe 104bd166ef1SJens Axboe hctx->run++; 105bd166ef1SJens Axboe 106bd166ef1SJens Axboe /* 107bd166ef1SJens Axboe * If we have previous entries on our dispatch list, grab them first for 108bd166ef1SJens Axboe * more fair dispatch. 109bd166ef1SJens Axboe */ 110bd166ef1SJens Axboe if (!list_empty_careful(&hctx->dispatch)) { 111bd166ef1SJens Axboe spin_lock(&hctx->lock); 112bd166ef1SJens Axboe if (!list_empty(&hctx->dispatch)) 113bd166ef1SJens Axboe list_splice_init(&hctx->dispatch, &rq_list); 114bd166ef1SJens Axboe spin_unlock(&hctx->lock); 115bd166ef1SJens Axboe } 116bd166ef1SJens Axboe 117bd166ef1SJens Axboe /* 118bd166ef1SJens Axboe * Only ask the scheduler for requests, if we didn't have residual 119bd166ef1SJens Axboe * requests from the dispatch list. This is to avoid the case where 120bd166ef1SJens Axboe * we only ever dispatch a fraction of the requests available because 121bd166ef1SJens Axboe * of low device queue depth. Once we pull requests out of the IO 122bd166ef1SJens Axboe * scheduler, we can no longer merge or sort them. So it's best to 123bd166ef1SJens Axboe * leave them there for as long as we can. Mark the hw queue as 124bd166ef1SJens Axboe * needing a restart in that case. 125bd166ef1SJens Axboe */ 126c13660a0SJens Axboe if (!list_empty(&rq_list)) { 127d38d3515SOmar Sandoval blk_mq_sched_mark_restart_hctx(hctx); 1285e3d02bbSMing Lei do_sched_dispatch = blk_mq_dispatch_rq_list(q, &rq_list); 12964765a75SJens Axboe } else if (!has_sched_dispatch) { 130c13660a0SJens Axboe blk_mq_flush_busy_ctxs(hctx, &rq_list); 13181380ca1SOmar Sandoval blk_mq_dispatch_rq_list(q, &rq_list); 13264765a75SJens Axboe } 13364765a75SJens Axboe 13464765a75SJens Axboe /* 1355e3d02bbSMing Lei * We want to dispatch from the scheduler if there was nothing 1365e3d02bbSMing Lei * on the dispatch list or we were able to dispatch from the 1375e3d02bbSMing Lei * dispatch list. 13864765a75SJens Axboe */ 1395e3d02bbSMing Lei if (do_sched_dispatch && has_sched_dispatch) { 140c13660a0SJens Axboe do { 141c13660a0SJens Axboe struct request *rq; 142c13660a0SJens Axboe 143c13660a0SJens Axboe rq = e->type->ops.mq.dispatch_request(hctx); 144c13660a0SJens Axboe if (!rq) 145c13660a0SJens Axboe break; 146c13660a0SJens Axboe list_add(&rq->queuelist, &rq_list); 14781380ca1SOmar Sandoval } while (blk_mq_dispatch_rq_list(q, &rq_list)); 148c13660a0SJens Axboe } 149bd166ef1SJens Axboe } 150bd166ef1SJens Axboe 151e4d750c9SJens Axboe bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, 152e4d750c9SJens Axboe struct request **merged_request) 153bd166ef1SJens Axboe { 154bd166ef1SJens Axboe struct request *rq; 155bd166ef1SJens Axboe 15634fe7c05SChristoph Hellwig switch (elv_merge(q, &rq, bio)) { 15734fe7c05SChristoph Hellwig case ELEVATOR_BACK_MERGE: 158bd166ef1SJens Axboe if (!blk_mq_sched_allow_merge(q, rq, bio)) 159bd166ef1SJens Axboe return false; 16034fe7c05SChristoph Hellwig if (!bio_attempt_back_merge(q, rq, bio)) 16134fe7c05SChristoph Hellwig return false; 162e4d750c9SJens Axboe *merged_request = attempt_back_merge(q, rq); 163e4d750c9SJens Axboe if (!*merged_request) 16434fe7c05SChristoph Hellwig elv_merged_request(q, rq, ELEVATOR_BACK_MERGE); 165bd166ef1SJens Axboe return true; 16634fe7c05SChristoph Hellwig case ELEVATOR_FRONT_MERGE: 167bd166ef1SJens Axboe if (!blk_mq_sched_allow_merge(q, rq, bio)) 168bd166ef1SJens Axboe return false; 16934fe7c05SChristoph Hellwig if (!bio_attempt_front_merge(q, rq, bio)) 17034fe7c05SChristoph Hellwig return false; 171e4d750c9SJens Axboe *merged_request = attempt_front_merge(q, rq); 172e4d750c9SJens Axboe if (!*merged_request) 17334fe7c05SChristoph Hellwig elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE); 174bd166ef1SJens Axboe return true; 17534fe7c05SChristoph Hellwig default: 176bd166ef1SJens Axboe return false; 177bd166ef1SJens Axboe } 17834fe7c05SChristoph Hellwig } 179bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); 180bd166ef1SJens Axboe 1819bddeb2aSMing Lei /* 1829bddeb2aSMing Lei * Reverse check our software queue for entries that we could potentially 1839bddeb2aSMing Lei * merge with. Currently includes a hand-wavy stop count of 8, to not spend 1849bddeb2aSMing Lei * too much time checking for merges. 1859bddeb2aSMing Lei */ 1869bddeb2aSMing Lei static bool blk_mq_attempt_merge(struct request_queue *q, 1879bddeb2aSMing Lei struct blk_mq_ctx *ctx, struct bio *bio) 1889bddeb2aSMing Lei { 1899bddeb2aSMing Lei struct request *rq; 1909bddeb2aSMing Lei int checked = 8; 1919bddeb2aSMing Lei 1927b607814SBart Van Assche lockdep_assert_held(&ctx->lock); 1937b607814SBart Van Assche 1949bddeb2aSMing Lei list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) { 1959bddeb2aSMing Lei bool merged = false; 1969bddeb2aSMing Lei 1979bddeb2aSMing Lei if (!checked--) 1989bddeb2aSMing Lei break; 1999bddeb2aSMing Lei 2009bddeb2aSMing Lei if (!blk_rq_merge_ok(rq, bio)) 2019bddeb2aSMing Lei continue; 2029bddeb2aSMing Lei 2039bddeb2aSMing Lei switch (blk_try_merge(rq, bio)) { 2049bddeb2aSMing Lei case ELEVATOR_BACK_MERGE: 2059bddeb2aSMing Lei if (blk_mq_sched_allow_merge(q, rq, bio)) 2069bddeb2aSMing Lei merged = bio_attempt_back_merge(q, rq, bio); 2079bddeb2aSMing Lei break; 2089bddeb2aSMing Lei case ELEVATOR_FRONT_MERGE: 2099bddeb2aSMing Lei if (blk_mq_sched_allow_merge(q, rq, bio)) 2109bddeb2aSMing Lei merged = bio_attempt_front_merge(q, rq, bio); 2119bddeb2aSMing Lei break; 2129bddeb2aSMing Lei case ELEVATOR_DISCARD_MERGE: 2139bddeb2aSMing Lei merged = bio_attempt_discard_merge(q, rq, bio); 2149bddeb2aSMing Lei break; 2159bddeb2aSMing Lei default: 2169bddeb2aSMing Lei continue; 2179bddeb2aSMing Lei } 2189bddeb2aSMing Lei 2199bddeb2aSMing Lei if (merged) 2209bddeb2aSMing Lei ctx->rq_merged++; 2219bddeb2aSMing Lei return merged; 2229bddeb2aSMing Lei } 2239bddeb2aSMing Lei 2249bddeb2aSMing Lei return false; 2259bddeb2aSMing Lei } 2269bddeb2aSMing Lei 227bd166ef1SJens Axboe bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) 228bd166ef1SJens Axboe { 229bd166ef1SJens Axboe struct elevator_queue *e = q->elevator; 230bd166ef1SJens Axboe struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); 231bd166ef1SJens Axboe struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); 2329bddeb2aSMing Lei bool ret = false; 233bd166ef1SJens Axboe 2349bddeb2aSMing Lei if (e && e->type->ops.mq.bio_merge) { 235bd166ef1SJens Axboe blk_mq_put_ctx(ctx); 236bd166ef1SJens Axboe return e->type->ops.mq.bio_merge(hctx, bio); 237bd166ef1SJens Axboe } 238bd166ef1SJens Axboe 2399bddeb2aSMing Lei if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) { 2409bddeb2aSMing Lei /* default per sw-queue merge */ 2419bddeb2aSMing Lei spin_lock(&ctx->lock); 2429bddeb2aSMing Lei ret = blk_mq_attempt_merge(q, ctx, bio); 2439bddeb2aSMing Lei spin_unlock(&ctx->lock); 2449bddeb2aSMing Lei } 2459bddeb2aSMing Lei 2469bddeb2aSMing Lei blk_mq_put_ctx(ctx); 2479bddeb2aSMing Lei return ret; 248bd166ef1SJens Axboe } 249bd166ef1SJens Axboe 250bd166ef1SJens Axboe bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq) 251bd166ef1SJens Axboe { 252bd166ef1SJens Axboe return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq); 253bd166ef1SJens Axboe } 254bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); 255bd166ef1SJens Axboe 256bd166ef1SJens Axboe void blk_mq_sched_request_inserted(struct request *rq) 257bd166ef1SJens Axboe { 258bd166ef1SJens Axboe trace_block_rq_insert(rq->q, rq); 259bd166ef1SJens Axboe } 260bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); 261bd166ef1SJens Axboe 2620cacba6cSOmar Sandoval static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, 2630cacba6cSOmar Sandoval struct request *rq) 264bd166ef1SJens Axboe { 265bd166ef1SJens Axboe if (rq->tag == -1) { 266bd166ef1SJens Axboe rq->rq_flags |= RQF_SORTED; 267bd166ef1SJens Axboe return false; 268bd166ef1SJens Axboe } 269bd166ef1SJens Axboe 270bd166ef1SJens Axboe /* 271bd166ef1SJens Axboe * If we already have a real request tag, send directly to 272bd166ef1SJens Axboe * the dispatch list. 273bd166ef1SJens Axboe */ 274bd166ef1SJens Axboe spin_lock(&hctx->lock); 275bd166ef1SJens Axboe list_add(&rq->queuelist, &hctx->dispatch); 276bd166ef1SJens Axboe spin_unlock(&hctx->lock); 277bd166ef1SJens Axboe return true; 278bd166ef1SJens Axboe } 279bd166ef1SJens Axboe 2806d8c6c0fSBart Van Assche /** 2816d8c6c0fSBart Van Assche * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list 2826d8c6c0fSBart Van Assche * @pos: loop cursor. 2836d8c6c0fSBart Van Assche * @skip: the list element that will not be examined. Iteration starts at 2846d8c6c0fSBart Van Assche * @skip->next. 2856d8c6c0fSBart Van Assche * @head: head of the list to examine. This list must have at least one 2866d8c6c0fSBart Van Assche * element, namely @skip. 2876d8c6c0fSBart Van Assche * @member: name of the list_head structure within typeof(*pos). 2886d8c6c0fSBart Van Assche */ 2896d8c6c0fSBart Van Assche #define list_for_each_entry_rcu_rr(pos, skip, head, member) \ 2906d8c6c0fSBart Van Assche for ((pos) = (skip); \ 2916d8c6c0fSBart Van Assche (pos = (pos)->member.next != (head) ? list_entry_rcu( \ 2926d8c6c0fSBart Van Assche (pos)->member.next, typeof(*pos), member) : \ 2936d8c6c0fSBart Van Assche list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \ 2946d8c6c0fSBart Van Assche (pos) != (skip); ) 2956d8c6c0fSBart Van Assche 2966d8c6c0fSBart Van Assche /* 2976d8c6c0fSBart Van Assche * Called after a driver tag has been freed to check whether a hctx needs to 2986d8c6c0fSBart Van Assche * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware 2996d8c6c0fSBart Van Assche * queues in a round-robin fashion if the tag set of @hctx is shared with other 3006d8c6c0fSBart Van Assche * hardware queues. 3016d8c6c0fSBart Van Assche */ 3026d8c6c0fSBart Van Assche void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx) 30350e1dab8SJens Axboe { 3046d8c6c0fSBart Van Assche struct blk_mq_tags *const tags = hctx->tags; 3056d8c6c0fSBart Van Assche struct blk_mq_tag_set *const set = hctx->queue->tag_set; 3066d8c6c0fSBart Van Assche struct request_queue *const queue = hctx->queue, *q; 3076d8c6c0fSBart Van Assche struct blk_mq_hw_ctx *hctx2; 3086d8c6c0fSBart Van Assche unsigned int i, j; 30950e1dab8SJens Axboe 3106d8c6c0fSBart Van Assche if (set->flags & BLK_MQ_F_TAG_SHARED) { 3118e8320c9SJens Axboe /* 3128e8320c9SJens Axboe * If this is 0, then we know that no hardware queues 3138e8320c9SJens Axboe * have RESTART marked. We're done. 3148e8320c9SJens Axboe */ 3158e8320c9SJens Axboe if (!atomic_read(&queue->shared_hctx_restart)) 3168e8320c9SJens Axboe return; 3178e8320c9SJens Axboe 3186d8c6c0fSBart Van Assche rcu_read_lock(); 3196d8c6c0fSBart Van Assche list_for_each_entry_rcu_rr(q, queue, &set->tag_list, 3206d8c6c0fSBart Van Assche tag_set_list) { 3216d8c6c0fSBart Van Assche queue_for_each_hw_ctx(q, hctx2, i) 3226d8c6c0fSBart Van Assche if (hctx2->tags == tags && 3236d8c6c0fSBart Van Assche blk_mq_sched_restart_hctx(hctx2)) 3246d8c6c0fSBart Van Assche goto done; 32550e1dab8SJens Axboe } 3266d8c6c0fSBart Van Assche j = hctx->queue_num + 1; 3276d8c6c0fSBart Van Assche for (i = 0; i < queue->nr_hw_queues; i++, j++) { 3286d8c6c0fSBart Van Assche if (j == queue->nr_hw_queues) 3296d8c6c0fSBart Van Assche j = 0; 3306d8c6c0fSBart Van Assche hctx2 = queue->queue_hw_ctx[j]; 3316d8c6c0fSBart Van Assche if (hctx2->tags == tags && 3326d8c6c0fSBart Van Assche blk_mq_sched_restart_hctx(hctx2)) 3336d8c6c0fSBart Van Assche break; 3346d8c6c0fSBart Van Assche } 3356d8c6c0fSBart Van Assche done: 3366d8c6c0fSBart Van Assche rcu_read_unlock(); 337d38d3515SOmar Sandoval } else { 338d38d3515SOmar Sandoval blk_mq_sched_restart_hctx(hctx); 339d38d3515SOmar Sandoval } 34050e1dab8SJens Axboe } 34150e1dab8SJens Axboe 342bd6737f1SJens Axboe /* 343bd6737f1SJens Axboe * Add flush/fua to the queue. If we fail getting a driver tag, then 344bd6737f1SJens Axboe * punt to the requeue list. Requeue will re-invoke us from a context 345bd6737f1SJens Axboe * that's safe to block from. 346bd6737f1SJens Axboe */ 347bd6737f1SJens Axboe static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx, 348bd6737f1SJens Axboe struct request *rq, bool can_block) 349bd6737f1SJens Axboe { 350bd6737f1SJens Axboe if (blk_mq_get_driver_tag(rq, &hctx, can_block)) { 351bd6737f1SJens Axboe blk_insert_flush(rq); 352bd6737f1SJens Axboe blk_mq_run_hw_queue(hctx, true); 353bd6737f1SJens Axboe } else 354c7a571b4SJens Axboe blk_mq_add_to_requeue_list(rq, false, true); 355bd6737f1SJens Axboe } 356bd6737f1SJens Axboe 357bd6737f1SJens Axboe void blk_mq_sched_insert_request(struct request *rq, bool at_head, 358bd6737f1SJens Axboe bool run_queue, bool async, bool can_block) 359bd6737f1SJens Axboe { 360bd6737f1SJens Axboe struct request_queue *q = rq->q; 361bd6737f1SJens Axboe struct elevator_queue *e = q->elevator; 362bd6737f1SJens Axboe struct blk_mq_ctx *ctx = rq->mq_ctx; 363bd6737f1SJens Axboe struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); 364bd6737f1SJens Axboe 365f3a8ab7dSJens Axboe if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) { 366bd6737f1SJens Axboe blk_mq_sched_insert_flush(hctx, rq, can_block); 367bd6737f1SJens Axboe return; 368bd6737f1SJens Axboe } 369bd6737f1SJens Axboe 3700cacba6cSOmar Sandoval if (e && blk_mq_sched_bypass_insert(hctx, rq)) 3710cacba6cSOmar Sandoval goto run; 3720cacba6cSOmar Sandoval 373bd6737f1SJens Axboe if (e && e->type->ops.mq.insert_requests) { 374bd6737f1SJens Axboe LIST_HEAD(list); 375bd6737f1SJens Axboe 376bd6737f1SJens Axboe list_add(&rq->queuelist, &list); 377bd6737f1SJens Axboe e->type->ops.mq.insert_requests(hctx, &list, at_head); 378bd6737f1SJens Axboe } else { 379bd6737f1SJens Axboe spin_lock(&ctx->lock); 380bd6737f1SJens Axboe __blk_mq_insert_request(hctx, rq, at_head); 381bd6737f1SJens Axboe spin_unlock(&ctx->lock); 382bd6737f1SJens Axboe } 383bd6737f1SJens Axboe 3840cacba6cSOmar Sandoval run: 385bd6737f1SJens Axboe if (run_queue) 386bd6737f1SJens Axboe blk_mq_run_hw_queue(hctx, async); 387bd6737f1SJens Axboe } 388bd6737f1SJens Axboe 389bd6737f1SJens Axboe void blk_mq_sched_insert_requests(struct request_queue *q, 390bd6737f1SJens Axboe struct blk_mq_ctx *ctx, 391bd6737f1SJens Axboe struct list_head *list, bool run_queue_async) 392bd6737f1SJens Axboe { 393bd6737f1SJens Axboe struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); 394bd6737f1SJens Axboe struct elevator_queue *e = hctx->queue->elevator; 395bd6737f1SJens Axboe 3960cacba6cSOmar Sandoval if (e) { 3970cacba6cSOmar Sandoval struct request *rq, *next; 3980cacba6cSOmar Sandoval 3990cacba6cSOmar Sandoval /* 4000cacba6cSOmar Sandoval * We bypass requests that already have a driver tag assigned, 4010cacba6cSOmar Sandoval * which should only be flushes. Flushes are only ever inserted 4020cacba6cSOmar Sandoval * as single requests, so we shouldn't ever hit the 4030cacba6cSOmar Sandoval * WARN_ON_ONCE() below (but let's handle it just in case). 4040cacba6cSOmar Sandoval */ 4050cacba6cSOmar Sandoval list_for_each_entry_safe(rq, next, list, queuelist) { 4060cacba6cSOmar Sandoval if (WARN_ON_ONCE(rq->tag != -1)) { 4070cacba6cSOmar Sandoval list_del_init(&rq->queuelist); 4080cacba6cSOmar Sandoval blk_mq_sched_bypass_insert(hctx, rq); 4090cacba6cSOmar Sandoval } 4100cacba6cSOmar Sandoval } 4110cacba6cSOmar Sandoval } 4120cacba6cSOmar Sandoval 413bd6737f1SJens Axboe if (e && e->type->ops.mq.insert_requests) 414bd6737f1SJens Axboe e->type->ops.mq.insert_requests(hctx, list, false); 415bd6737f1SJens Axboe else 416bd6737f1SJens Axboe blk_mq_insert_requests(hctx, ctx, list); 417bd6737f1SJens Axboe 418bd6737f1SJens Axboe blk_mq_run_hw_queue(hctx, run_queue_async); 419bd6737f1SJens Axboe } 420bd6737f1SJens Axboe 421bd166ef1SJens Axboe static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, 422bd166ef1SJens Axboe struct blk_mq_hw_ctx *hctx, 423bd166ef1SJens Axboe unsigned int hctx_idx) 424bd166ef1SJens Axboe { 425bd166ef1SJens Axboe if (hctx->sched_tags) { 426bd166ef1SJens Axboe blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx); 427bd166ef1SJens Axboe blk_mq_free_rq_map(hctx->sched_tags); 428bd166ef1SJens Axboe hctx->sched_tags = NULL; 429bd166ef1SJens Axboe } 430bd166ef1SJens Axboe } 431bd166ef1SJens Axboe 4326917ff0bSOmar Sandoval static int blk_mq_sched_alloc_tags(struct request_queue *q, 4336917ff0bSOmar Sandoval struct blk_mq_hw_ctx *hctx, 4346917ff0bSOmar Sandoval unsigned int hctx_idx) 435bd166ef1SJens Axboe { 436bd166ef1SJens Axboe struct blk_mq_tag_set *set = q->tag_set; 4376917ff0bSOmar Sandoval int ret; 438bd166ef1SJens Axboe 4396917ff0bSOmar Sandoval hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests, 4406917ff0bSOmar Sandoval set->reserved_tags); 441bd166ef1SJens Axboe if (!hctx->sched_tags) 4426917ff0bSOmar Sandoval return -ENOMEM; 4436917ff0bSOmar Sandoval 4446917ff0bSOmar Sandoval ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests); 4456917ff0bSOmar Sandoval if (ret) 4466917ff0bSOmar Sandoval blk_mq_sched_free_tags(set, hctx, hctx_idx); 447bd166ef1SJens Axboe 448bd166ef1SJens Axboe return ret; 449bd166ef1SJens Axboe } 450bd166ef1SJens Axboe 45154d5329dSOmar Sandoval static void blk_mq_sched_tags_teardown(struct request_queue *q) 452bd166ef1SJens Axboe { 453bd166ef1SJens Axboe struct blk_mq_tag_set *set = q->tag_set; 454bd166ef1SJens Axboe struct blk_mq_hw_ctx *hctx; 455bd166ef1SJens Axboe int i; 456bd166ef1SJens Axboe 457bd166ef1SJens Axboe queue_for_each_hw_ctx(q, hctx, i) 458bd166ef1SJens Axboe blk_mq_sched_free_tags(set, hctx, i); 459bd166ef1SJens Axboe } 460d3484991SJens Axboe 46193252632SOmar Sandoval int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, 46293252632SOmar Sandoval unsigned int hctx_idx) 46393252632SOmar Sandoval { 46493252632SOmar Sandoval struct elevator_queue *e = q->elevator; 465ee056f98SOmar Sandoval int ret; 46693252632SOmar Sandoval 46793252632SOmar Sandoval if (!e) 46893252632SOmar Sandoval return 0; 46993252632SOmar Sandoval 470ee056f98SOmar Sandoval ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx); 471ee056f98SOmar Sandoval if (ret) 472ee056f98SOmar Sandoval return ret; 473ee056f98SOmar Sandoval 474ee056f98SOmar Sandoval if (e->type->ops.mq.init_hctx) { 475ee056f98SOmar Sandoval ret = e->type->ops.mq.init_hctx(hctx, hctx_idx); 476ee056f98SOmar Sandoval if (ret) { 477ee056f98SOmar Sandoval blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx); 478ee056f98SOmar Sandoval return ret; 479ee056f98SOmar Sandoval } 480ee056f98SOmar Sandoval } 481ee056f98SOmar Sandoval 482d332ce09SOmar Sandoval blk_mq_debugfs_register_sched_hctx(q, hctx); 483d332ce09SOmar Sandoval 484ee056f98SOmar Sandoval return 0; 48593252632SOmar Sandoval } 48693252632SOmar Sandoval 48793252632SOmar Sandoval void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, 48893252632SOmar Sandoval unsigned int hctx_idx) 48993252632SOmar Sandoval { 49093252632SOmar Sandoval struct elevator_queue *e = q->elevator; 49193252632SOmar Sandoval 49293252632SOmar Sandoval if (!e) 49393252632SOmar Sandoval return; 49493252632SOmar Sandoval 495d332ce09SOmar Sandoval blk_mq_debugfs_unregister_sched_hctx(hctx); 496d332ce09SOmar Sandoval 497ee056f98SOmar Sandoval if (e->type->ops.mq.exit_hctx && hctx->sched_data) { 498ee056f98SOmar Sandoval e->type->ops.mq.exit_hctx(hctx, hctx_idx); 499ee056f98SOmar Sandoval hctx->sched_data = NULL; 500ee056f98SOmar Sandoval } 501ee056f98SOmar Sandoval 50293252632SOmar Sandoval blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx); 50393252632SOmar Sandoval } 50493252632SOmar Sandoval 5056917ff0bSOmar Sandoval int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) 5066917ff0bSOmar Sandoval { 5076917ff0bSOmar Sandoval struct blk_mq_hw_ctx *hctx; 508ee056f98SOmar Sandoval struct elevator_queue *eq; 5096917ff0bSOmar Sandoval unsigned int i; 5106917ff0bSOmar Sandoval int ret; 5116917ff0bSOmar Sandoval 5126917ff0bSOmar Sandoval if (!e) { 5136917ff0bSOmar Sandoval q->elevator = NULL; 5146917ff0bSOmar Sandoval return 0; 5156917ff0bSOmar Sandoval } 5166917ff0bSOmar Sandoval 5176917ff0bSOmar Sandoval /* 51832825c45SMing Lei * Default to double of smaller one between hw queue_depth and 128, 51932825c45SMing Lei * since we don't split into sync/async like the old code did. 52032825c45SMing Lei * Additionally, this is a per-hw queue depth. 5216917ff0bSOmar Sandoval */ 52232825c45SMing Lei q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth, 52332825c45SMing Lei BLKDEV_MAX_RQ); 5246917ff0bSOmar Sandoval 5256917ff0bSOmar Sandoval queue_for_each_hw_ctx(q, hctx, i) { 5266917ff0bSOmar Sandoval ret = blk_mq_sched_alloc_tags(q, hctx, i); 5276917ff0bSOmar Sandoval if (ret) 5286917ff0bSOmar Sandoval goto err; 5296917ff0bSOmar Sandoval } 5306917ff0bSOmar Sandoval 5316917ff0bSOmar Sandoval ret = e->ops.mq.init_sched(q, e); 5326917ff0bSOmar Sandoval if (ret) 5336917ff0bSOmar Sandoval goto err; 5346917ff0bSOmar Sandoval 535d332ce09SOmar Sandoval blk_mq_debugfs_register_sched(q); 536d332ce09SOmar Sandoval 537ee056f98SOmar Sandoval queue_for_each_hw_ctx(q, hctx, i) { 538d332ce09SOmar Sandoval if (e->ops.mq.init_hctx) { 539ee056f98SOmar Sandoval ret = e->ops.mq.init_hctx(hctx, i); 540ee056f98SOmar Sandoval if (ret) { 541ee056f98SOmar Sandoval eq = q->elevator; 542ee056f98SOmar Sandoval blk_mq_exit_sched(q, eq); 543ee056f98SOmar Sandoval kobject_put(&eq->kobj); 544ee056f98SOmar Sandoval return ret; 545ee056f98SOmar Sandoval } 546ee056f98SOmar Sandoval } 547d332ce09SOmar Sandoval blk_mq_debugfs_register_sched_hctx(q, hctx); 548ee056f98SOmar Sandoval } 549ee056f98SOmar Sandoval 5506917ff0bSOmar Sandoval return 0; 5516917ff0bSOmar Sandoval 5526917ff0bSOmar Sandoval err: 55354d5329dSOmar Sandoval blk_mq_sched_tags_teardown(q); 55454d5329dSOmar Sandoval q->elevator = NULL; 5556917ff0bSOmar Sandoval return ret; 5566917ff0bSOmar Sandoval } 5576917ff0bSOmar Sandoval 55854d5329dSOmar Sandoval void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) 55954d5329dSOmar Sandoval { 560ee056f98SOmar Sandoval struct blk_mq_hw_ctx *hctx; 561ee056f98SOmar Sandoval unsigned int i; 562ee056f98SOmar Sandoval 563ee056f98SOmar Sandoval queue_for_each_hw_ctx(q, hctx, i) { 564d332ce09SOmar Sandoval blk_mq_debugfs_unregister_sched_hctx(hctx); 565d332ce09SOmar Sandoval if (e->type->ops.mq.exit_hctx && hctx->sched_data) { 566ee056f98SOmar Sandoval e->type->ops.mq.exit_hctx(hctx, i); 567ee056f98SOmar Sandoval hctx->sched_data = NULL; 568ee056f98SOmar Sandoval } 569ee056f98SOmar Sandoval } 570d332ce09SOmar Sandoval blk_mq_debugfs_unregister_sched(q); 57154d5329dSOmar Sandoval if (e->type->ops.mq.exit_sched) 57254d5329dSOmar Sandoval e->type->ops.mq.exit_sched(e); 57354d5329dSOmar Sandoval blk_mq_sched_tags_teardown(q); 57454d5329dSOmar Sandoval q->elevator = NULL; 57554d5329dSOmar Sandoval } 57654d5329dSOmar Sandoval 577d3484991SJens Axboe int blk_mq_sched_init(struct request_queue *q) 578d3484991SJens Axboe { 579d3484991SJens Axboe int ret; 580d3484991SJens Axboe 581d3484991SJens Axboe mutex_lock(&q->sysfs_lock); 582d3484991SJens Axboe ret = elevator_init(q, NULL); 583d3484991SJens Axboe mutex_unlock(&q->sysfs_lock); 584d3484991SJens Axboe 585d3484991SJens Axboe return ret; 586d3484991SJens Axboe } 587