1bd166ef1SJens Axboe /* 2bd166ef1SJens Axboe * blk-mq scheduling framework 3bd166ef1SJens Axboe * 4bd166ef1SJens Axboe * Copyright (C) 2016 Jens Axboe 5bd166ef1SJens Axboe */ 6bd166ef1SJens Axboe #include <linux/kernel.h> 7bd166ef1SJens Axboe #include <linux/module.h> 8bd166ef1SJens Axboe #include <linux/blk-mq.h> 9bd166ef1SJens Axboe 10bd166ef1SJens Axboe #include <trace/events/block.h> 11bd166ef1SJens Axboe 12bd166ef1SJens Axboe #include "blk.h" 13bd166ef1SJens Axboe #include "blk-mq.h" 14d332ce09SOmar Sandoval #include "blk-mq-debugfs.h" 15bd166ef1SJens Axboe #include "blk-mq-sched.h" 16bd166ef1SJens Axboe #include "blk-mq-tag.h" 17bd166ef1SJens Axboe #include "blk-wbt.h" 18bd166ef1SJens Axboe 19bd166ef1SJens Axboe void blk_mq_sched_free_hctx_data(struct request_queue *q, 20bd166ef1SJens Axboe void (*exit)(struct blk_mq_hw_ctx *)) 21bd166ef1SJens Axboe { 22bd166ef1SJens Axboe struct blk_mq_hw_ctx *hctx; 23bd166ef1SJens Axboe int i; 24bd166ef1SJens Axboe 25bd166ef1SJens Axboe queue_for_each_hw_ctx(q, hctx, i) { 26bd166ef1SJens Axboe if (exit && hctx->sched_data) 27bd166ef1SJens Axboe exit(hctx); 28bd166ef1SJens Axboe kfree(hctx->sched_data); 29bd166ef1SJens Axboe hctx->sched_data = NULL; 30bd166ef1SJens Axboe } 31bd166ef1SJens Axboe } 32bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data); 33bd166ef1SJens Axboe 3444e8c2bfSChristoph Hellwig void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio) 35bd166ef1SJens Axboe { 3644e8c2bfSChristoph Hellwig struct request_queue *q = rq->q; 3744e8c2bfSChristoph Hellwig struct io_context *ioc = rq_ioc(bio); 38bd166ef1SJens Axboe struct io_cq *icq; 39bd166ef1SJens Axboe 40bd166ef1SJens Axboe spin_lock_irq(q->queue_lock); 41bd166ef1SJens Axboe icq = ioc_lookup_icq(ioc, q); 42bd166ef1SJens Axboe spin_unlock_irq(q->queue_lock); 43bd166ef1SJens Axboe 44bd166ef1SJens Axboe if (!icq) { 45bd166ef1SJens Axboe icq = ioc_create_icq(ioc, q, GFP_ATOMIC); 46bd166ef1SJens Axboe if (!icq) 47bd166ef1SJens Axboe return; 48bd166ef1SJens Axboe } 49ea511e3cSChristoph Hellwig get_io_context(icq->ioc); 5044e8c2bfSChristoph Hellwig rq->elv.icq = icq; 51bd166ef1SJens Axboe } 52bd166ef1SJens Axboe 538e8320c9SJens Axboe /* 548e8320c9SJens Axboe * Mark a hardware queue as needing a restart. For shared queues, maintain 558e8320c9SJens Axboe * a count of how many hardware queues are marked for restart. 568e8320c9SJens Axboe */ 578e8320c9SJens Axboe static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) 588e8320c9SJens Axboe { 598e8320c9SJens Axboe if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 608e8320c9SJens Axboe return; 618e8320c9SJens Axboe 628e8320c9SJens Axboe if (hctx->flags & BLK_MQ_F_TAG_SHARED) { 638e8320c9SJens Axboe struct request_queue *q = hctx->queue; 648e8320c9SJens Axboe 658e8320c9SJens Axboe if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 668e8320c9SJens Axboe atomic_inc(&q->shared_hctx_restart); 678e8320c9SJens Axboe } else 688e8320c9SJens Axboe set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); 698e8320c9SJens Axboe } 708e8320c9SJens Axboe 7105b79413SJens Axboe static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) 728e8320c9SJens Axboe { 738e8320c9SJens Axboe if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 7405b79413SJens Axboe return false; 758e8320c9SJens Axboe 7605b79413SJens Axboe if (hctx->flags & BLK_MQ_F_TAG_SHARED) { 7705b79413SJens Axboe struct request_queue *q = hctx->queue; 7805b79413SJens Axboe 7905b79413SJens Axboe if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 8005b79413SJens Axboe atomic_dec(&q->shared_hctx_restart); 8105b79413SJens Axboe } else 828e8320c9SJens Axboe clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); 838e8320c9SJens Axboe 848e8320c9SJens Axboe if (blk_mq_hctx_has_pending(hctx)) { 858e8320c9SJens Axboe blk_mq_run_hw_queue(hctx, true); 8605b79413SJens Axboe return true; 878e8320c9SJens Axboe } 8805b79413SJens Axboe 8905b79413SJens Axboe return false; 908e8320c9SJens Axboe } 918e8320c9SJens Axboe 921f460b63SMing Lei /* 931f460b63SMing Lei * Only SCSI implements .get_budget and .put_budget, and SCSI restarts 941f460b63SMing Lei * its queue by itself in its completion handler, so we don't need to 951f460b63SMing Lei * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE. 961f460b63SMing Lei */ 971f460b63SMing Lei static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) 98caf8eb0dSMing Lei { 99caf8eb0dSMing Lei struct request_queue *q = hctx->queue; 100caf8eb0dSMing Lei struct elevator_queue *e = q->elevator; 101caf8eb0dSMing Lei LIST_HEAD(rq_list); 102caf8eb0dSMing Lei 103caf8eb0dSMing Lei do { 104de148297SMing Lei struct request *rq; 105caf8eb0dSMing Lei 106de148297SMing Lei if (e->type->ops.mq.has_work && 107de148297SMing Lei !e->type->ops.mq.has_work(hctx)) 108caf8eb0dSMing Lei break; 109de148297SMing Lei 11088022d72SMing Lei if (!blk_mq_get_dispatch_budget(hctx)) 1111f460b63SMing Lei break; 112de148297SMing Lei 113de148297SMing Lei rq = e->type->ops.mq.dispatch_request(hctx); 114de148297SMing Lei if (!rq) { 115de148297SMing Lei blk_mq_put_dispatch_budget(hctx); 116de148297SMing Lei break; 117caf8eb0dSMing Lei } 118caf8eb0dSMing Lei 119de148297SMing Lei /* 120de148297SMing Lei * Now this rq owns the budget which has to be released 121de148297SMing Lei * if this rq won't be queued to driver via .queue_rq() 122de148297SMing Lei * in blk_mq_dispatch_rq_list(). 123de148297SMing Lei */ 124de148297SMing Lei list_add(&rq->queuelist, &rq_list); 125de148297SMing Lei } while (blk_mq_dispatch_rq_list(q, &rq_list, true)); 126de148297SMing Lei } 127de148297SMing Lei 128b347689fSMing Lei static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx, 129b347689fSMing Lei struct blk_mq_ctx *ctx) 130b347689fSMing Lei { 131b347689fSMing Lei unsigned idx = ctx->index_hw; 132b347689fSMing Lei 133b347689fSMing Lei if (++idx == hctx->nr_ctx) 134b347689fSMing Lei idx = 0; 135b347689fSMing Lei 136b347689fSMing Lei return hctx->ctxs[idx]; 137b347689fSMing Lei } 138b347689fSMing Lei 1391f460b63SMing Lei /* 1401f460b63SMing Lei * Only SCSI implements .get_budget and .put_budget, and SCSI restarts 1411f460b63SMing Lei * its queue by itself in its completion handler, so we don't need to 1421f460b63SMing Lei * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE. 1431f460b63SMing Lei */ 1441f460b63SMing Lei static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) 145b347689fSMing Lei { 146b347689fSMing Lei struct request_queue *q = hctx->queue; 147b347689fSMing Lei LIST_HEAD(rq_list); 148b347689fSMing Lei struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from); 149b347689fSMing Lei 150b347689fSMing Lei do { 151b347689fSMing Lei struct request *rq; 152b347689fSMing Lei 153b347689fSMing Lei if (!sbitmap_any_bit_set(&hctx->ctx_map)) 154b347689fSMing Lei break; 155b347689fSMing Lei 15688022d72SMing Lei if (!blk_mq_get_dispatch_budget(hctx)) 1571f460b63SMing Lei break; 158b347689fSMing Lei 159b347689fSMing Lei rq = blk_mq_dequeue_from_ctx(hctx, ctx); 160b347689fSMing Lei if (!rq) { 161b347689fSMing Lei blk_mq_put_dispatch_budget(hctx); 162b347689fSMing Lei break; 163b347689fSMing Lei } 164b347689fSMing Lei 165b347689fSMing Lei /* 166b347689fSMing Lei * Now this rq owns the budget which has to be released 167b347689fSMing Lei * if this rq won't be queued to driver via .queue_rq() 168b347689fSMing Lei * in blk_mq_dispatch_rq_list(). 169b347689fSMing Lei */ 170b347689fSMing Lei list_add(&rq->queuelist, &rq_list); 171b347689fSMing Lei 172b347689fSMing Lei /* round robin for fair dispatch */ 173b347689fSMing Lei ctx = blk_mq_next_ctx(hctx, rq->mq_ctx); 174b347689fSMing Lei 175b347689fSMing Lei } while (blk_mq_dispatch_rq_list(q, &rq_list, true)); 176b347689fSMing Lei 177b347689fSMing Lei WRITE_ONCE(hctx->dispatch_from, ctx); 178b347689fSMing Lei } 179b347689fSMing Lei 180de148297SMing Lei /* return true if hw queue need to be run again */ 1811f460b63SMing Lei void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) 182bd166ef1SJens Axboe { 18381380ca1SOmar Sandoval struct request_queue *q = hctx->queue; 18481380ca1SOmar Sandoval struct elevator_queue *e = q->elevator; 18564765a75SJens Axboe const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request; 186bd166ef1SJens Axboe LIST_HEAD(rq_list); 187bd166ef1SJens Axboe 188f4560ffeSMing Lei /* RCU or SRCU read lock is needed before checking quiesced flag */ 189f4560ffeSMing Lei if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) 1901f460b63SMing Lei return; 191bd166ef1SJens Axboe 192bd166ef1SJens Axboe hctx->run++; 193bd166ef1SJens Axboe 194bd166ef1SJens Axboe /* 195bd166ef1SJens Axboe * If we have previous entries on our dispatch list, grab them first for 196bd166ef1SJens Axboe * more fair dispatch. 197bd166ef1SJens Axboe */ 198bd166ef1SJens Axboe if (!list_empty_careful(&hctx->dispatch)) { 199bd166ef1SJens Axboe spin_lock(&hctx->lock); 200bd166ef1SJens Axboe if (!list_empty(&hctx->dispatch)) 201bd166ef1SJens Axboe list_splice_init(&hctx->dispatch, &rq_list); 202bd166ef1SJens Axboe spin_unlock(&hctx->lock); 203bd166ef1SJens Axboe } 204bd166ef1SJens Axboe 205bd166ef1SJens Axboe /* 206bd166ef1SJens Axboe * Only ask the scheduler for requests, if we didn't have residual 207bd166ef1SJens Axboe * requests from the dispatch list. This is to avoid the case where 208bd166ef1SJens Axboe * we only ever dispatch a fraction of the requests available because 209bd166ef1SJens Axboe * of low device queue depth. Once we pull requests out of the IO 210bd166ef1SJens Axboe * scheduler, we can no longer merge or sort them. So it's best to 211bd166ef1SJens Axboe * leave them there for as long as we can. Mark the hw queue as 212bd166ef1SJens Axboe * needing a restart in that case. 213caf8eb0dSMing Lei * 2145e3d02bbSMing Lei * We want to dispatch from the scheduler if there was nothing 2155e3d02bbSMing Lei * on the dispatch list or we were able to dispatch from the 2165e3d02bbSMing Lei * dispatch list. 21764765a75SJens Axboe */ 218caf8eb0dSMing Lei if (!list_empty(&rq_list)) { 219caf8eb0dSMing Lei blk_mq_sched_mark_restart_hctx(hctx); 220b347689fSMing Lei if (blk_mq_dispatch_rq_list(q, &rq_list, false)) { 221b347689fSMing Lei if (has_sched_dispatch) 2221f460b63SMing Lei blk_mq_do_dispatch_sched(hctx); 223b347689fSMing Lei else 2241f460b63SMing Lei blk_mq_do_dispatch_ctx(hctx); 225b347689fSMing Lei } 226caf8eb0dSMing Lei } else if (has_sched_dispatch) { 2271f460b63SMing Lei blk_mq_do_dispatch_sched(hctx); 228b347689fSMing Lei } else if (q->mq_ops->get_budget) { 229b347689fSMing Lei /* 230b347689fSMing Lei * If we need to get budget before queuing request, we 231b347689fSMing Lei * dequeue request one by one from sw queue for avoiding 232b347689fSMing Lei * to mess up I/O merge when dispatch runs out of resource. 233b347689fSMing Lei * 234b347689fSMing Lei * TODO: get more budgets, and dequeue more requests in 235b347689fSMing Lei * one time. 236b347689fSMing Lei */ 2371f460b63SMing Lei blk_mq_do_dispatch_ctx(hctx); 238caf8eb0dSMing Lei } else { 239caf8eb0dSMing Lei blk_mq_flush_busy_ctxs(hctx, &rq_list); 240de148297SMing Lei blk_mq_dispatch_rq_list(q, &rq_list, false); 241c13660a0SJens Axboe } 242bd166ef1SJens Axboe } 243bd166ef1SJens Axboe 244e4d750c9SJens Axboe bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, 245e4d750c9SJens Axboe struct request **merged_request) 246bd166ef1SJens Axboe { 247bd166ef1SJens Axboe struct request *rq; 248bd166ef1SJens Axboe 24934fe7c05SChristoph Hellwig switch (elv_merge(q, &rq, bio)) { 25034fe7c05SChristoph Hellwig case ELEVATOR_BACK_MERGE: 251bd166ef1SJens Axboe if (!blk_mq_sched_allow_merge(q, rq, bio)) 252bd166ef1SJens Axboe return false; 25334fe7c05SChristoph Hellwig if (!bio_attempt_back_merge(q, rq, bio)) 25434fe7c05SChristoph Hellwig return false; 255e4d750c9SJens Axboe *merged_request = attempt_back_merge(q, rq); 256e4d750c9SJens Axboe if (!*merged_request) 25734fe7c05SChristoph Hellwig elv_merged_request(q, rq, ELEVATOR_BACK_MERGE); 258bd166ef1SJens Axboe return true; 25934fe7c05SChristoph Hellwig case ELEVATOR_FRONT_MERGE: 260bd166ef1SJens Axboe if (!blk_mq_sched_allow_merge(q, rq, bio)) 261bd166ef1SJens Axboe return false; 26234fe7c05SChristoph Hellwig if (!bio_attempt_front_merge(q, rq, bio)) 26334fe7c05SChristoph Hellwig return false; 264e4d750c9SJens Axboe *merged_request = attempt_front_merge(q, rq); 265e4d750c9SJens Axboe if (!*merged_request) 26634fe7c05SChristoph Hellwig elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE); 267bd166ef1SJens Axboe return true; 26834fe7c05SChristoph Hellwig default: 269bd166ef1SJens Axboe return false; 270bd166ef1SJens Axboe } 27134fe7c05SChristoph Hellwig } 272bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); 273bd166ef1SJens Axboe 2749bddeb2aSMing Lei /* 2759bddeb2aSMing Lei * Reverse check our software queue for entries that we could potentially 2769bddeb2aSMing Lei * merge with. Currently includes a hand-wavy stop count of 8, to not spend 2779bddeb2aSMing Lei * too much time checking for merges. 2789bddeb2aSMing Lei */ 2799bddeb2aSMing Lei static bool blk_mq_attempt_merge(struct request_queue *q, 2809bddeb2aSMing Lei struct blk_mq_ctx *ctx, struct bio *bio) 2819bddeb2aSMing Lei { 2829bddeb2aSMing Lei struct request *rq; 2839bddeb2aSMing Lei int checked = 8; 2849bddeb2aSMing Lei 2857b607814SBart Van Assche lockdep_assert_held(&ctx->lock); 2867b607814SBart Van Assche 2879bddeb2aSMing Lei list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) { 2889bddeb2aSMing Lei bool merged = false; 2899bddeb2aSMing Lei 2909bddeb2aSMing Lei if (!checked--) 2919bddeb2aSMing Lei break; 2929bddeb2aSMing Lei 2939bddeb2aSMing Lei if (!blk_rq_merge_ok(rq, bio)) 2949bddeb2aSMing Lei continue; 2959bddeb2aSMing Lei 2969bddeb2aSMing Lei switch (blk_try_merge(rq, bio)) { 2979bddeb2aSMing Lei case ELEVATOR_BACK_MERGE: 2989bddeb2aSMing Lei if (blk_mq_sched_allow_merge(q, rq, bio)) 2999bddeb2aSMing Lei merged = bio_attempt_back_merge(q, rq, bio); 3009bddeb2aSMing Lei break; 3019bddeb2aSMing Lei case ELEVATOR_FRONT_MERGE: 3029bddeb2aSMing Lei if (blk_mq_sched_allow_merge(q, rq, bio)) 3039bddeb2aSMing Lei merged = bio_attempt_front_merge(q, rq, bio); 3049bddeb2aSMing Lei break; 3059bddeb2aSMing Lei case ELEVATOR_DISCARD_MERGE: 3069bddeb2aSMing Lei merged = bio_attempt_discard_merge(q, rq, bio); 3079bddeb2aSMing Lei break; 3089bddeb2aSMing Lei default: 3099bddeb2aSMing Lei continue; 3109bddeb2aSMing Lei } 3119bddeb2aSMing Lei 3129bddeb2aSMing Lei if (merged) 3139bddeb2aSMing Lei ctx->rq_merged++; 3149bddeb2aSMing Lei return merged; 3159bddeb2aSMing Lei } 3169bddeb2aSMing Lei 3179bddeb2aSMing Lei return false; 3189bddeb2aSMing Lei } 3199bddeb2aSMing Lei 320bd166ef1SJens Axboe bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) 321bd166ef1SJens Axboe { 322bd166ef1SJens Axboe struct elevator_queue *e = q->elevator; 323bd166ef1SJens Axboe struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); 324bd166ef1SJens Axboe struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); 3259bddeb2aSMing Lei bool ret = false; 326bd166ef1SJens Axboe 3279bddeb2aSMing Lei if (e && e->type->ops.mq.bio_merge) { 328bd166ef1SJens Axboe blk_mq_put_ctx(ctx); 329bd166ef1SJens Axboe return e->type->ops.mq.bio_merge(hctx, bio); 330bd166ef1SJens Axboe } 331bd166ef1SJens Axboe 3329bddeb2aSMing Lei if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) { 3339bddeb2aSMing Lei /* default per sw-queue merge */ 3349bddeb2aSMing Lei spin_lock(&ctx->lock); 3359bddeb2aSMing Lei ret = blk_mq_attempt_merge(q, ctx, bio); 3369bddeb2aSMing Lei spin_unlock(&ctx->lock); 3379bddeb2aSMing Lei } 3389bddeb2aSMing Lei 3399bddeb2aSMing Lei blk_mq_put_ctx(ctx); 3409bddeb2aSMing Lei return ret; 341bd166ef1SJens Axboe } 342bd166ef1SJens Axboe 343bd166ef1SJens Axboe bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq) 344bd166ef1SJens Axboe { 345bd166ef1SJens Axboe return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq); 346bd166ef1SJens Axboe } 347bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); 348bd166ef1SJens Axboe 349bd166ef1SJens Axboe void blk_mq_sched_request_inserted(struct request *rq) 350bd166ef1SJens Axboe { 351bd166ef1SJens Axboe trace_block_rq_insert(rq->q, rq); 352bd166ef1SJens Axboe } 353bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); 354bd166ef1SJens Axboe 3550cacba6cSOmar Sandoval static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, 356a6a252e6SMing Lei bool has_sched, 3570cacba6cSOmar Sandoval struct request *rq) 358bd166ef1SJens Axboe { 359a6a252e6SMing Lei /* dispatch flush rq directly */ 360a6a252e6SMing Lei if (rq->rq_flags & RQF_FLUSH_SEQ) { 361bd166ef1SJens Axboe spin_lock(&hctx->lock); 362bd166ef1SJens Axboe list_add(&rq->queuelist, &hctx->dispatch); 363bd166ef1SJens Axboe spin_unlock(&hctx->lock); 364bd166ef1SJens Axboe return true; 365bd166ef1SJens Axboe } 366bd166ef1SJens Axboe 367923218f6SMing Lei if (has_sched) 368a6a252e6SMing Lei rq->rq_flags |= RQF_SORTED; 369a6a252e6SMing Lei 370a6a252e6SMing Lei return false; 371a6a252e6SMing Lei } 372a6a252e6SMing Lei 37305b79413SJens Axboe /** 37405b79413SJens Axboe * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list 37505b79413SJens Axboe * @pos: loop cursor. 37605b79413SJens Axboe * @skip: the list element that will not be examined. Iteration starts at 37705b79413SJens Axboe * @skip->next. 37805b79413SJens Axboe * @head: head of the list to examine. This list must have at least one 37905b79413SJens Axboe * element, namely @skip. 38005b79413SJens Axboe * @member: name of the list_head structure within typeof(*pos). 38105b79413SJens Axboe */ 38205b79413SJens Axboe #define list_for_each_entry_rcu_rr(pos, skip, head, member) \ 38305b79413SJens Axboe for ((pos) = (skip); \ 38405b79413SJens Axboe (pos = (pos)->member.next != (head) ? list_entry_rcu( \ 38505b79413SJens Axboe (pos)->member.next, typeof(*pos), member) : \ 38605b79413SJens Axboe list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \ 38705b79413SJens Axboe (pos) != (skip); ) 38805b79413SJens Axboe 38905b79413SJens Axboe /* 39005b79413SJens Axboe * Called after a driver tag has been freed to check whether a hctx needs to 39105b79413SJens Axboe * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware 39205b79413SJens Axboe * queues in a round-robin fashion if the tag set of @hctx is shared with other 39305b79413SJens Axboe * hardware queues. 39405b79413SJens Axboe */ 39505b79413SJens Axboe void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx) 39605b79413SJens Axboe { 39705b79413SJens Axboe struct blk_mq_tags *const tags = hctx->tags; 39805b79413SJens Axboe struct blk_mq_tag_set *const set = hctx->queue->tag_set; 39905b79413SJens Axboe struct request_queue *const queue = hctx->queue, *q; 40005b79413SJens Axboe struct blk_mq_hw_ctx *hctx2; 40105b79413SJens Axboe unsigned int i, j; 40205b79413SJens Axboe 40305b79413SJens Axboe if (set->flags & BLK_MQ_F_TAG_SHARED) { 40405b79413SJens Axboe /* 40505b79413SJens Axboe * If this is 0, then we know that no hardware queues 40605b79413SJens Axboe * have RESTART marked. We're done. 40705b79413SJens Axboe */ 40805b79413SJens Axboe if (!atomic_read(&queue->shared_hctx_restart)) 40905b79413SJens Axboe return; 41005b79413SJens Axboe 41105b79413SJens Axboe rcu_read_lock(); 41205b79413SJens Axboe list_for_each_entry_rcu_rr(q, queue, &set->tag_list, 41305b79413SJens Axboe tag_set_list) { 41405b79413SJens Axboe queue_for_each_hw_ctx(q, hctx2, i) 41505b79413SJens Axboe if (hctx2->tags == tags && 41605b79413SJens Axboe blk_mq_sched_restart_hctx(hctx2)) 41705b79413SJens Axboe goto done; 41805b79413SJens Axboe } 41905b79413SJens Axboe j = hctx->queue_num + 1; 42005b79413SJens Axboe for (i = 0; i < queue->nr_hw_queues; i++, j++) { 42105b79413SJens Axboe if (j == queue->nr_hw_queues) 42205b79413SJens Axboe j = 0; 42305b79413SJens Axboe hctx2 = queue->queue_hw_ctx[j]; 42405b79413SJens Axboe if (hctx2->tags == tags && 42505b79413SJens Axboe blk_mq_sched_restart_hctx(hctx2)) 42605b79413SJens Axboe break; 42705b79413SJens Axboe } 42805b79413SJens Axboe done: 42905b79413SJens Axboe rcu_read_unlock(); 43005b79413SJens Axboe } else { 43105b79413SJens Axboe blk_mq_sched_restart_hctx(hctx); 43205b79413SJens Axboe } 43305b79413SJens Axboe } 43405b79413SJens Axboe 435bd6737f1SJens Axboe void blk_mq_sched_insert_request(struct request *rq, bool at_head, 436bd6737f1SJens Axboe bool run_queue, bool async, bool can_block) 437bd6737f1SJens Axboe { 438bd6737f1SJens Axboe struct request_queue *q = rq->q; 439bd6737f1SJens Axboe struct elevator_queue *e = q->elevator; 440bd6737f1SJens Axboe struct blk_mq_ctx *ctx = rq->mq_ctx; 441bd6737f1SJens Axboe struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); 442bd6737f1SJens Axboe 443a6a252e6SMing Lei /* flush rq in flush machinery need to be dispatched directly */ 444a6a252e6SMing Lei if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) { 445923218f6SMing Lei blk_insert_flush(rq); 446923218f6SMing Lei goto run; 447bd6737f1SJens Axboe } 448bd6737f1SJens Axboe 449923218f6SMing Lei WARN_ON(e && (rq->tag != -1)); 450923218f6SMing Lei 451a6a252e6SMing Lei if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) 4520cacba6cSOmar Sandoval goto run; 4530cacba6cSOmar Sandoval 454bd6737f1SJens Axboe if (e && e->type->ops.mq.insert_requests) { 455bd6737f1SJens Axboe LIST_HEAD(list); 456bd6737f1SJens Axboe 457bd6737f1SJens Axboe list_add(&rq->queuelist, &list); 458bd6737f1SJens Axboe e->type->ops.mq.insert_requests(hctx, &list, at_head); 459bd6737f1SJens Axboe } else { 460bd6737f1SJens Axboe spin_lock(&ctx->lock); 461bd6737f1SJens Axboe __blk_mq_insert_request(hctx, rq, at_head); 462bd6737f1SJens Axboe spin_unlock(&ctx->lock); 463bd6737f1SJens Axboe } 464bd6737f1SJens Axboe 4650cacba6cSOmar Sandoval run: 466bd6737f1SJens Axboe if (run_queue) 467bd6737f1SJens Axboe blk_mq_run_hw_queue(hctx, async); 468bd6737f1SJens Axboe } 469bd6737f1SJens Axboe 470bd6737f1SJens Axboe void blk_mq_sched_insert_requests(struct request_queue *q, 471bd6737f1SJens Axboe struct blk_mq_ctx *ctx, 472bd6737f1SJens Axboe struct list_head *list, bool run_queue_async) 473bd6737f1SJens Axboe { 474bd6737f1SJens Axboe struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); 475bd6737f1SJens Axboe struct elevator_queue *e = hctx->queue->elevator; 476bd6737f1SJens Axboe 477bd6737f1SJens Axboe if (e && e->type->ops.mq.insert_requests) 478bd6737f1SJens Axboe e->type->ops.mq.insert_requests(hctx, list, false); 479bd6737f1SJens Axboe else 480bd6737f1SJens Axboe blk_mq_insert_requests(hctx, ctx, list); 481bd6737f1SJens Axboe 482bd6737f1SJens Axboe blk_mq_run_hw_queue(hctx, run_queue_async); 483bd6737f1SJens Axboe } 484bd6737f1SJens Axboe 485bd166ef1SJens Axboe static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, 486bd166ef1SJens Axboe struct blk_mq_hw_ctx *hctx, 487bd166ef1SJens Axboe unsigned int hctx_idx) 488bd166ef1SJens Axboe { 489bd166ef1SJens Axboe if (hctx->sched_tags) { 490bd166ef1SJens Axboe blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx); 491bd166ef1SJens Axboe blk_mq_free_rq_map(hctx->sched_tags); 492bd166ef1SJens Axboe hctx->sched_tags = NULL; 493bd166ef1SJens Axboe } 494bd166ef1SJens Axboe } 495bd166ef1SJens Axboe 4966917ff0bSOmar Sandoval static int blk_mq_sched_alloc_tags(struct request_queue *q, 4976917ff0bSOmar Sandoval struct blk_mq_hw_ctx *hctx, 4986917ff0bSOmar Sandoval unsigned int hctx_idx) 499bd166ef1SJens Axboe { 500bd166ef1SJens Axboe struct blk_mq_tag_set *set = q->tag_set; 5016917ff0bSOmar Sandoval int ret; 502bd166ef1SJens Axboe 5036917ff0bSOmar Sandoval hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests, 5046917ff0bSOmar Sandoval set->reserved_tags); 505bd166ef1SJens Axboe if (!hctx->sched_tags) 5066917ff0bSOmar Sandoval return -ENOMEM; 5076917ff0bSOmar Sandoval 5086917ff0bSOmar Sandoval ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests); 5096917ff0bSOmar Sandoval if (ret) 5106917ff0bSOmar Sandoval blk_mq_sched_free_tags(set, hctx, hctx_idx); 511bd166ef1SJens Axboe 512bd166ef1SJens Axboe return ret; 513bd166ef1SJens Axboe } 514bd166ef1SJens Axboe 51554d5329dSOmar Sandoval static void blk_mq_sched_tags_teardown(struct request_queue *q) 516bd166ef1SJens Axboe { 517bd166ef1SJens Axboe struct blk_mq_tag_set *set = q->tag_set; 518bd166ef1SJens Axboe struct blk_mq_hw_ctx *hctx; 519bd166ef1SJens Axboe int i; 520bd166ef1SJens Axboe 521bd166ef1SJens Axboe queue_for_each_hw_ctx(q, hctx, i) 522bd166ef1SJens Axboe blk_mq_sched_free_tags(set, hctx, i); 523bd166ef1SJens Axboe } 524d3484991SJens Axboe 52593252632SOmar Sandoval int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, 52693252632SOmar Sandoval unsigned int hctx_idx) 52793252632SOmar Sandoval { 52893252632SOmar Sandoval struct elevator_queue *e = q->elevator; 529ee056f98SOmar Sandoval int ret; 53093252632SOmar Sandoval 53193252632SOmar Sandoval if (!e) 53293252632SOmar Sandoval return 0; 53393252632SOmar Sandoval 534ee056f98SOmar Sandoval ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx); 535ee056f98SOmar Sandoval if (ret) 536ee056f98SOmar Sandoval return ret; 537ee056f98SOmar Sandoval 538ee056f98SOmar Sandoval if (e->type->ops.mq.init_hctx) { 539ee056f98SOmar Sandoval ret = e->type->ops.mq.init_hctx(hctx, hctx_idx); 540ee056f98SOmar Sandoval if (ret) { 541ee056f98SOmar Sandoval blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx); 542ee056f98SOmar Sandoval return ret; 543ee056f98SOmar Sandoval } 544ee056f98SOmar Sandoval } 545ee056f98SOmar Sandoval 546d332ce09SOmar Sandoval blk_mq_debugfs_register_sched_hctx(q, hctx); 547d332ce09SOmar Sandoval 548ee056f98SOmar Sandoval return 0; 54993252632SOmar Sandoval } 55093252632SOmar Sandoval 55193252632SOmar Sandoval void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, 55293252632SOmar Sandoval unsigned int hctx_idx) 55393252632SOmar Sandoval { 55493252632SOmar Sandoval struct elevator_queue *e = q->elevator; 55593252632SOmar Sandoval 55693252632SOmar Sandoval if (!e) 55793252632SOmar Sandoval return; 55893252632SOmar Sandoval 559d332ce09SOmar Sandoval blk_mq_debugfs_unregister_sched_hctx(hctx); 560d332ce09SOmar Sandoval 561ee056f98SOmar Sandoval if (e->type->ops.mq.exit_hctx && hctx->sched_data) { 562ee056f98SOmar Sandoval e->type->ops.mq.exit_hctx(hctx, hctx_idx); 563ee056f98SOmar Sandoval hctx->sched_data = NULL; 564ee056f98SOmar Sandoval } 565ee056f98SOmar Sandoval 56693252632SOmar Sandoval blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx); 56793252632SOmar Sandoval } 56893252632SOmar Sandoval 5696917ff0bSOmar Sandoval int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) 5706917ff0bSOmar Sandoval { 5716917ff0bSOmar Sandoval struct blk_mq_hw_ctx *hctx; 572ee056f98SOmar Sandoval struct elevator_queue *eq; 5736917ff0bSOmar Sandoval unsigned int i; 5746917ff0bSOmar Sandoval int ret; 5756917ff0bSOmar Sandoval 5766917ff0bSOmar Sandoval if (!e) { 5776917ff0bSOmar Sandoval q->elevator = NULL; 5786917ff0bSOmar Sandoval return 0; 5796917ff0bSOmar Sandoval } 5806917ff0bSOmar Sandoval 5816917ff0bSOmar Sandoval /* 58232825c45SMing Lei * Default to double of smaller one between hw queue_depth and 128, 58332825c45SMing Lei * since we don't split into sync/async like the old code did. 58432825c45SMing Lei * Additionally, this is a per-hw queue depth. 5856917ff0bSOmar Sandoval */ 58632825c45SMing Lei q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth, 58732825c45SMing Lei BLKDEV_MAX_RQ); 5886917ff0bSOmar Sandoval 5896917ff0bSOmar Sandoval queue_for_each_hw_ctx(q, hctx, i) { 5906917ff0bSOmar Sandoval ret = blk_mq_sched_alloc_tags(q, hctx, i); 5916917ff0bSOmar Sandoval if (ret) 5926917ff0bSOmar Sandoval goto err; 5936917ff0bSOmar Sandoval } 5946917ff0bSOmar Sandoval 5956917ff0bSOmar Sandoval ret = e->ops.mq.init_sched(q, e); 5966917ff0bSOmar Sandoval if (ret) 5976917ff0bSOmar Sandoval goto err; 5986917ff0bSOmar Sandoval 599d332ce09SOmar Sandoval blk_mq_debugfs_register_sched(q); 600d332ce09SOmar Sandoval 601ee056f98SOmar Sandoval queue_for_each_hw_ctx(q, hctx, i) { 602d332ce09SOmar Sandoval if (e->ops.mq.init_hctx) { 603ee056f98SOmar Sandoval ret = e->ops.mq.init_hctx(hctx, i); 604ee056f98SOmar Sandoval if (ret) { 605ee056f98SOmar Sandoval eq = q->elevator; 606ee056f98SOmar Sandoval blk_mq_exit_sched(q, eq); 607ee056f98SOmar Sandoval kobject_put(&eq->kobj); 608ee056f98SOmar Sandoval return ret; 609ee056f98SOmar Sandoval } 610ee056f98SOmar Sandoval } 611d332ce09SOmar Sandoval blk_mq_debugfs_register_sched_hctx(q, hctx); 612ee056f98SOmar Sandoval } 613ee056f98SOmar Sandoval 6146917ff0bSOmar Sandoval return 0; 6156917ff0bSOmar Sandoval 6166917ff0bSOmar Sandoval err: 61754d5329dSOmar Sandoval blk_mq_sched_tags_teardown(q); 61854d5329dSOmar Sandoval q->elevator = NULL; 6196917ff0bSOmar Sandoval return ret; 6206917ff0bSOmar Sandoval } 6216917ff0bSOmar Sandoval 62254d5329dSOmar Sandoval void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) 62354d5329dSOmar Sandoval { 624ee056f98SOmar Sandoval struct blk_mq_hw_ctx *hctx; 625ee056f98SOmar Sandoval unsigned int i; 626ee056f98SOmar Sandoval 627ee056f98SOmar Sandoval queue_for_each_hw_ctx(q, hctx, i) { 628d332ce09SOmar Sandoval blk_mq_debugfs_unregister_sched_hctx(hctx); 629d332ce09SOmar Sandoval if (e->type->ops.mq.exit_hctx && hctx->sched_data) { 630ee056f98SOmar Sandoval e->type->ops.mq.exit_hctx(hctx, i); 631ee056f98SOmar Sandoval hctx->sched_data = NULL; 632ee056f98SOmar Sandoval } 633ee056f98SOmar Sandoval } 634d332ce09SOmar Sandoval blk_mq_debugfs_unregister_sched(q); 63554d5329dSOmar Sandoval if (e->type->ops.mq.exit_sched) 63654d5329dSOmar Sandoval e->type->ops.mq.exit_sched(e); 63754d5329dSOmar Sandoval blk_mq_sched_tags_teardown(q); 63854d5329dSOmar Sandoval q->elevator = NULL; 63954d5329dSOmar Sandoval } 64054d5329dSOmar Sandoval 641d3484991SJens Axboe int blk_mq_sched_init(struct request_queue *q) 642d3484991SJens Axboe { 643d3484991SJens Axboe int ret; 644d3484991SJens Axboe 645d3484991SJens Axboe mutex_lock(&q->sysfs_lock); 646d3484991SJens Axboe ret = elevator_init(q, NULL); 647d3484991SJens Axboe mutex_unlock(&q->sysfs_lock); 648d3484991SJens Axboe 649d3484991SJens Axboe return ret; 650d3484991SJens Axboe } 651