1bd166ef1SJens Axboe /* 2bd166ef1SJens Axboe * blk-mq scheduling framework 3bd166ef1SJens Axboe * 4bd166ef1SJens Axboe * Copyright (C) 2016 Jens Axboe 5bd166ef1SJens Axboe */ 6bd166ef1SJens Axboe #include <linux/kernel.h> 7bd166ef1SJens Axboe #include <linux/module.h> 8bd166ef1SJens Axboe #include <linux/blk-mq.h> 9bd166ef1SJens Axboe 10bd166ef1SJens Axboe #include <trace/events/block.h> 11bd166ef1SJens Axboe 12bd166ef1SJens Axboe #include "blk.h" 13bd166ef1SJens Axboe #include "blk-mq.h" 14d332ce09SOmar Sandoval #include "blk-mq-debugfs.h" 15bd166ef1SJens Axboe #include "blk-mq-sched.h" 16bd166ef1SJens Axboe #include "blk-mq-tag.h" 17bd166ef1SJens Axboe #include "blk-wbt.h" 18bd166ef1SJens Axboe 19bd166ef1SJens Axboe void blk_mq_sched_free_hctx_data(struct request_queue *q, 20bd166ef1SJens Axboe void (*exit)(struct blk_mq_hw_ctx *)) 21bd166ef1SJens Axboe { 22bd166ef1SJens Axboe struct blk_mq_hw_ctx *hctx; 23bd166ef1SJens Axboe int i; 24bd166ef1SJens Axboe 25bd166ef1SJens Axboe queue_for_each_hw_ctx(q, hctx, i) { 26bd166ef1SJens Axboe if (exit && hctx->sched_data) 27bd166ef1SJens Axboe exit(hctx); 28bd166ef1SJens Axboe kfree(hctx->sched_data); 29bd166ef1SJens Axboe hctx->sched_data = NULL; 30bd166ef1SJens Axboe } 31bd166ef1SJens Axboe } 32bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data); 33bd166ef1SJens Axboe 3444e8c2bfSChristoph Hellwig void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio) 35bd166ef1SJens Axboe { 3644e8c2bfSChristoph Hellwig struct request_queue *q = rq->q; 3744e8c2bfSChristoph Hellwig struct io_context *ioc = rq_ioc(bio); 38bd166ef1SJens Axboe struct io_cq *icq; 39bd166ef1SJens Axboe 40bd166ef1SJens Axboe spin_lock_irq(q->queue_lock); 41bd166ef1SJens Axboe icq = ioc_lookup_icq(ioc, q); 42bd166ef1SJens Axboe spin_unlock_irq(q->queue_lock); 43bd166ef1SJens Axboe 44bd166ef1SJens Axboe if (!icq) { 45bd166ef1SJens Axboe icq = ioc_create_icq(ioc, q, GFP_ATOMIC); 46bd166ef1SJens Axboe if (!icq) 47bd166ef1SJens Axboe return; 48bd166ef1SJens Axboe } 49ea511e3cSChristoph Hellwig get_io_context(icq->ioc); 5044e8c2bfSChristoph Hellwig rq->elv.icq = icq; 51bd166ef1SJens Axboe } 52bd166ef1SJens Axboe 538e8320c9SJens Axboe /* 548e8320c9SJens Axboe * Mark a hardware queue as needing a restart. For shared queues, maintain 558e8320c9SJens Axboe * a count of how many hardware queues are marked for restart. 568e8320c9SJens Axboe */ 578e8320c9SJens Axboe static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) 588e8320c9SJens Axboe { 598e8320c9SJens Axboe if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 608e8320c9SJens Axboe return; 618e8320c9SJens Axboe 628e8320c9SJens Axboe if (hctx->flags & BLK_MQ_F_TAG_SHARED) { 638e8320c9SJens Axboe struct request_queue *q = hctx->queue; 648e8320c9SJens Axboe 658e8320c9SJens Axboe if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 668e8320c9SJens Axboe atomic_inc(&q->shared_hctx_restart); 678e8320c9SJens Axboe } else 688e8320c9SJens Axboe set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); 698e8320c9SJens Axboe } 708e8320c9SJens Axboe 71358a3a6bSMing Lei void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) 728e8320c9SJens Axboe { 738e8320c9SJens Axboe if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 74358a3a6bSMing Lei return; 758e8320c9SJens Axboe 768e8320c9SJens Axboe clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); 778e8320c9SJens Axboe 788e8320c9SJens Axboe if (blk_mq_hctx_has_pending(hctx)) { 798e8320c9SJens Axboe blk_mq_run_hw_queue(hctx, true); 80358a3a6bSMing Lei return; 818e8320c9SJens Axboe } 828e8320c9SJens Axboe } 838e8320c9SJens Axboe 841f460b63SMing Lei /* 851f460b63SMing Lei * Only SCSI implements .get_budget and .put_budget, and SCSI restarts 861f460b63SMing Lei * its queue by itself in its completion handler, so we don't need to 871f460b63SMing Lei * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE. 881f460b63SMing Lei */ 891f460b63SMing Lei static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) 90caf8eb0dSMing Lei { 91caf8eb0dSMing Lei struct request_queue *q = hctx->queue; 92caf8eb0dSMing Lei struct elevator_queue *e = q->elevator; 93caf8eb0dSMing Lei LIST_HEAD(rq_list); 94caf8eb0dSMing Lei 95caf8eb0dSMing Lei do { 96de148297SMing Lei struct request *rq; 97de148297SMing Lei blk_status_t ret; 98caf8eb0dSMing Lei 99de148297SMing Lei if (e->type->ops.mq.has_work && 100de148297SMing Lei !e->type->ops.mq.has_work(hctx)) 101caf8eb0dSMing Lei break; 102de148297SMing Lei 103de148297SMing Lei ret = blk_mq_get_dispatch_budget(hctx); 104de148297SMing Lei if (ret == BLK_STS_RESOURCE) 1051f460b63SMing Lei break; 106de148297SMing Lei 107de148297SMing Lei rq = e->type->ops.mq.dispatch_request(hctx); 108de148297SMing Lei if (!rq) { 109de148297SMing Lei blk_mq_put_dispatch_budget(hctx); 110de148297SMing Lei break; 111de148297SMing Lei } else if (ret != BLK_STS_OK) { 112de148297SMing Lei blk_mq_end_request(rq, ret); 113de148297SMing Lei continue; 114caf8eb0dSMing Lei } 115caf8eb0dSMing Lei 116de148297SMing Lei /* 117de148297SMing Lei * Now this rq owns the budget which has to be released 118de148297SMing Lei * if this rq won't be queued to driver via .queue_rq() 119de148297SMing Lei * in blk_mq_dispatch_rq_list(). 120de148297SMing Lei */ 121de148297SMing Lei list_add(&rq->queuelist, &rq_list); 122de148297SMing Lei } while (blk_mq_dispatch_rq_list(q, &rq_list, true)); 123de148297SMing Lei } 124de148297SMing Lei 125b347689fSMing Lei static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx, 126b347689fSMing Lei struct blk_mq_ctx *ctx) 127b347689fSMing Lei { 128b347689fSMing Lei unsigned idx = ctx->index_hw; 129b347689fSMing Lei 130b347689fSMing Lei if (++idx == hctx->nr_ctx) 131b347689fSMing Lei idx = 0; 132b347689fSMing Lei 133b347689fSMing Lei return hctx->ctxs[idx]; 134b347689fSMing Lei } 135b347689fSMing Lei 1361f460b63SMing Lei /* 1371f460b63SMing Lei * Only SCSI implements .get_budget and .put_budget, and SCSI restarts 1381f460b63SMing Lei * its queue by itself in its completion handler, so we don't need to 1391f460b63SMing Lei * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE. 1401f460b63SMing Lei */ 1411f460b63SMing Lei static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) 142b347689fSMing Lei { 143b347689fSMing Lei struct request_queue *q = hctx->queue; 144b347689fSMing Lei LIST_HEAD(rq_list); 145b347689fSMing Lei struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from); 146b347689fSMing Lei 147b347689fSMing Lei do { 148b347689fSMing Lei struct request *rq; 149b347689fSMing Lei blk_status_t ret; 150b347689fSMing Lei 151b347689fSMing Lei if (!sbitmap_any_bit_set(&hctx->ctx_map)) 152b347689fSMing Lei break; 153b347689fSMing Lei 154b347689fSMing Lei ret = blk_mq_get_dispatch_budget(hctx); 155b347689fSMing Lei if (ret == BLK_STS_RESOURCE) 1561f460b63SMing Lei break; 157b347689fSMing Lei 158b347689fSMing Lei rq = blk_mq_dequeue_from_ctx(hctx, ctx); 159b347689fSMing Lei if (!rq) { 160b347689fSMing Lei blk_mq_put_dispatch_budget(hctx); 161b347689fSMing Lei break; 162b347689fSMing Lei } else if (ret != BLK_STS_OK) { 163b347689fSMing Lei blk_mq_end_request(rq, ret); 164b347689fSMing Lei continue; 165b347689fSMing Lei } 166b347689fSMing Lei 167b347689fSMing Lei /* 168b347689fSMing Lei * Now this rq owns the budget which has to be released 169b347689fSMing Lei * if this rq won't be queued to driver via .queue_rq() 170b347689fSMing Lei * in blk_mq_dispatch_rq_list(). 171b347689fSMing Lei */ 172b347689fSMing Lei list_add(&rq->queuelist, &rq_list); 173b347689fSMing Lei 174b347689fSMing Lei /* round robin for fair dispatch */ 175b347689fSMing Lei ctx = blk_mq_next_ctx(hctx, rq->mq_ctx); 176b347689fSMing Lei 177b347689fSMing Lei } while (blk_mq_dispatch_rq_list(q, &rq_list, true)); 178b347689fSMing Lei 179b347689fSMing Lei WRITE_ONCE(hctx->dispatch_from, ctx); 180b347689fSMing Lei } 181b347689fSMing Lei 182de148297SMing Lei /* return true if hw queue need to be run again */ 1831f460b63SMing Lei void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) 184bd166ef1SJens Axboe { 18581380ca1SOmar Sandoval struct request_queue *q = hctx->queue; 18681380ca1SOmar Sandoval struct elevator_queue *e = q->elevator; 18764765a75SJens Axboe const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request; 188bd166ef1SJens Axboe LIST_HEAD(rq_list); 189bd166ef1SJens Axboe 190f4560ffeSMing Lei /* RCU or SRCU read lock is needed before checking quiesced flag */ 191f4560ffeSMing Lei if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) 1921f460b63SMing Lei return; 193bd166ef1SJens Axboe 194bd166ef1SJens Axboe hctx->run++; 195bd166ef1SJens Axboe 196bd166ef1SJens Axboe /* 197bd166ef1SJens Axboe * If we have previous entries on our dispatch list, grab them first for 198bd166ef1SJens Axboe * more fair dispatch. 199bd166ef1SJens Axboe */ 200bd166ef1SJens Axboe if (!list_empty_careful(&hctx->dispatch)) { 201bd166ef1SJens Axboe spin_lock(&hctx->lock); 202bd166ef1SJens Axboe if (!list_empty(&hctx->dispatch)) 203bd166ef1SJens Axboe list_splice_init(&hctx->dispatch, &rq_list); 204bd166ef1SJens Axboe spin_unlock(&hctx->lock); 205bd166ef1SJens Axboe } 206bd166ef1SJens Axboe 207bd166ef1SJens Axboe /* 208bd166ef1SJens Axboe * Only ask the scheduler for requests, if we didn't have residual 209bd166ef1SJens Axboe * requests from the dispatch list. This is to avoid the case where 210bd166ef1SJens Axboe * we only ever dispatch a fraction of the requests available because 211bd166ef1SJens Axboe * of low device queue depth. Once we pull requests out of the IO 212bd166ef1SJens Axboe * scheduler, we can no longer merge or sort them. So it's best to 213bd166ef1SJens Axboe * leave them there for as long as we can. Mark the hw queue as 214bd166ef1SJens Axboe * needing a restart in that case. 215caf8eb0dSMing Lei * 2165e3d02bbSMing Lei * We want to dispatch from the scheduler if there was nothing 2175e3d02bbSMing Lei * on the dispatch list or we were able to dispatch from the 2185e3d02bbSMing Lei * dispatch list. 21964765a75SJens Axboe */ 220caf8eb0dSMing Lei if (!list_empty(&rq_list)) { 221caf8eb0dSMing Lei blk_mq_sched_mark_restart_hctx(hctx); 222b347689fSMing Lei if (blk_mq_dispatch_rq_list(q, &rq_list, false)) { 223b347689fSMing Lei if (has_sched_dispatch) 2241f460b63SMing Lei blk_mq_do_dispatch_sched(hctx); 225b347689fSMing Lei else 2261f460b63SMing Lei blk_mq_do_dispatch_ctx(hctx); 227b347689fSMing Lei } 228caf8eb0dSMing Lei } else if (has_sched_dispatch) { 2291f460b63SMing Lei blk_mq_do_dispatch_sched(hctx); 230b347689fSMing Lei } else if (q->mq_ops->get_budget) { 231b347689fSMing Lei /* 232b347689fSMing Lei * If we need to get budget before queuing request, we 233b347689fSMing Lei * dequeue request one by one from sw queue for avoiding 234b347689fSMing Lei * to mess up I/O merge when dispatch runs out of resource. 235b347689fSMing Lei * 236b347689fSMing Lei * TODO: get more budgets, and dequeue more requests in 237b347689fSMing Lei * one time. 238b347689fSMing Lei */ 2391f460b63SMing Lei blk_mq_do_dispatch_ctx(hctx); 240caf8eb0dSMing Lei } else { 241caf8eb0dSMing Lei blk_mq_flush_busy_ctxs(hctx, &rq_list); 242de148297SMing Lei blk_mq_dispatch_rq_list(q, &rq_list, false); 243c13660a0SJens Axboe } 244bd166ef1SJens Axboe } 245bd166ef1SJens Axboe 246e4d750c9SJens Axboe bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, 247e4d750c9SJens Axboe struct request **merged_request) 248bd166ef1SJens Axboe { 249bd166ef1SJens Axboe struct request *rq; 250bd166ef1SJens Axboe 25134fe7c05SChristoph Hellwig switch (elv_merge(q, &rq, bio)) { 25234fe7c05SChristoph Hellwig case ELEVATOR_BACK_MERGE: 253bd166ef1SJens Axboe if (!blk_mq_sched_allow_merge(q, rq, bio)) 254bd166ef1SJens Axboe return false; 25534fe7c05SChristoph Hellwig if (!bio_attempt_back_merge(q, rq, bio)) 25634fe7c05SChristoph Hellwig return false; 257e4d750c9SJens Axboe *merged_request = attempt_back_merge(q, rq); 258e4d750c9SJens Axboe if (!*merged_request) 25934fe7c05SChristoph Hellwig elv_merged_request(q, rq, ELEVATOR_BACK_MERGE); 260bd166ef1SJens Axboe return true; 26134fe7c05SChristoph Hellwig case ELEVATOR_FRONT_MERGE: 262bd166ef1SJens Axboe if (!blk_mq_sched_allow_merge(q, rq, bio)) 263bd166ef1SJens Axboe return false; 26434fe7c05SChristoph Hellwig if (!bio_attempt_front_merge(q, rq, bio)) 26534fe7c05SChristoph Hellwig return false; 266e4d750c9SJens Axboe *merged_request = attempt_front_merge(q, rq); 267e4d750c9SJens Axboe if (!*merged_request) 26834fe7c05SChristoph Hellwig elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE); 269bd166ef1SJens Axboe return true; 27034fe7c05SChristoph Hellwig default: 271bd166ef1SJens Axboe return false; 272bd166ef1SJens Axboe } 27334fe7c05SChristoph Hellwig } 274bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); 275bd166ef1SJens Axboe 2769bddeb2aSMing Lei /* 2779bddeb2aSMing Lei * Reverse check our software queue for entries that we could potentially 2789bddeb2aSMing Lei * merge with. Currently includes a hand-wavy stop count of 8, to not spend 2799bddeb2aSMing Lei * too much time checking for merges. 2809bddeb2aSMing Lei */ 2819bddeb2aSMing Lei static bool blk_mq_attempt_merge(struct request_queue *q, 2829bddeb2aSMing Lei struct blk_mq_ctx *ctx, struct bio *bio) 2839bddeb2aSMing Lei { 2849bddeb2aSMing Lei struct request *rq; 2859bddeb2aSMing Lei int checked = 8; 2869bddeb2aSMing Lei 2877b607814SBart Van Assche lockdep_assert_held(&ctx->lock); 2887b607814SBart Van Assche 2899bddeb2aSMing Lei list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) { 2909bddeb2aSMing Lei bool merged = false; 2919bddeb2aSMing Lei 2929bddeb2aSMing Lei if (!checked--) 2939bddeb2aSMing Lei break; 2949bddeb2aSMing Lei 2959bddeb2aSMing Lei if (!blk_rq_merge_ok(rq, bio)) 2969bddeb2aSMing Lei continue; 2979bddeb2aSMing Lei 2989bddeb2aSMing Lei switch (blk_try_merge(rq, bio)) { 2999bddeb2aSMing Lei case ELEVATOR_BACK_MERGE: 3009bddeb2aSMing Lei if (blk_mq_sched_allow_merge(q, rq, bio)) 3019bddeb2aSMing Lei merged = bio_attempt_back_merge(q, rq, bio); 3029bddeb2aSMing Lei break; 3039bddeb2aSMing Lei case ELEVATOR_FRONT_MERGE: 3049bddeb2aSMing Lei if (blk_mq_sched_allow_merge(q, rq, bio)) 3059bddeb2aSMing Lei merged = bio_attempt_front_merge(q, rq, bio); 3069bddeb2aSMing Lei break; 3079bddeb2aSMing Lei case ELEVATOR_DISCARD_MERGE: 3089bddeb2aSMing Lei merged = bio_attempt_discard_merge(q, rq, bio); 3099bddeb2aSMing Lei break; 3109bddeb2aSMing Lei default: 3119bddeb2aSMing Lei continue; 3129bddeb2aSMing Lei } 3139bddeb2aSMing Lei 3149bddeb2aSMing Lei if (merged) 3159bddeb2aSMing Lei ctx->rq_merged++; 3169bddeb2aSMing Lei return merged; 3179bddeb2aSMing Lei } 3189bddeb2aSMing Lei 3199bddeb2aSMing Lei return false; 3209bddeb2aSMing Lei } 3219bddeb2aSMing Lei 322bd166ef1SJens Axboe bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) 323bd166ef1SJens Axboe { 324bd166ef1SJens Axboe struct elevator_queue *e = q->elevator; 325bd166ef1SJens Axboe struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); 326bd166ef1SJens Axboe struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); 3279bddeb2aSMing Lei bool ret = false; 328bd166ef1SJens Axboe 3299bddeb2aSMing Lei if (e && e->type->ops.mq.bio_merge) { 330bd166ef1SJens Axboe blk_mq_put_ctx(ctx); 331bd166ef1SJens Axboe return e->type->ops.mq.bio_merge(hctx, bio); 332bd166ef1SJens Axboe } 333bd166ef1SJens Axboe 3349bddeb2aSMing Lei if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) { 3359bddeb2aSMing Lei /* default per sw-queue merge */ 3369bddeb2aSMing Lei spin_lock(&ctx->lock); 3379bddeb2aSMing Lei ret = blk_mq_attempt_merge(q, ctx, bio); 3389bddeb2aSMing Lei spin_unlock(&ctx->lock); 3399bddeb2aSMing Lei } 3409bddeb2aSMing Lei 3419bddeb2aSMing Lei blk_mq_put_ctx(ctx); 3429bddeb2aSMing Lei return ret; 343bd166ef1SJens Axboe } 344bd166ef1SJens Axboe 345bd166ef1SJens Axboe bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq) 346bd166ef1SJens Axboe { 347bd166ef1SJens Axboe return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq); 348bd166ef1SJens Axboe } 349bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); 350bd166ef1SJens Axboe 351bd166ef1SJens Axboe void blk_mq_sched_request_inserted(struct request *rq) 352bd166ef1SJens Axboe { 353bd166ef1SJens Axboe trace_block_rq_insert(rq->q, rq); 354bd166ef1SJens Axboe } 355bd166ef1SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); 356bd166ef1SJens Axboe 3570cacba6cSOmar Sandoval static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, 3580cacba6cSOmar Sandoval struct request *rq) 359bd166ef1SJens Axboe { 360bd166ef1SJens Axboe if (rq->tag == -1) { 361bd166ef1SJens Axboe rq->rq_flags |= RQF_SORTED; 362bd166ef1SJens Axboe return false; 363bd166ef1SJens Axboe } 364bd166ef1SJens Axboe 365bd166ef1SJens Axboe /* 366bd166ef1SJens Axboe * If we already have a real request tag, send directly to 367bd166ef1SJens Axboe * the dispatch list. 368bd166ef1SJens Axboe */ 369bd166ef1SJens Axboe spin_lock(&hctx->lock); 370bd166ef1SJens Axboe list_add(&rq->queuelist, &hctx->dispatch); 371bd166ef1SJens Axboe spin_unlock(&hctx->lock); 372bd166ef1SJens Axboe return true; 373bd166ef1SJens Axboe } 374bd166ef1SJens Axboe 375bd6737f1SJens Axboe /* 376bd6737f1SJens Axboe * Add flush/fua to the queue. If we fail getting a driver tag, then 377bd6737f1SJens Axboe * punt to the requeue list. Requeue will re-invoke us from a context 378bd6737f1SJens Axboe * that's safe to block from. 379bd6737f1SJens Axboe */ 380bd6737f1SJens Axboe static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx, 381bd6737f1SJens Axboe struct request *rq, bool can_block) 382bd6737f1SJens Axboe { 383bd6737f1SJens Axboe if (blk_mq_get_driver_tag(rq, &hctx, can_block)) { 384bd6737f1SJens Axboe blk_insert_flush(rq); 385bd6737f1SJens Axboe blk_mq_run_hw_queue(hctx, true); 386bd6737f1SJens Axboe } else 387c7a571b4SJens Axboe blk_mq_add_to_requeue_list(rq, false, true); 388bd6737f1SJens Axboe } 389bd6737f1SJens Axboe 390bd6737f1SJens Axboe void blk_mq_sched_insert_request(struct request *rq, bool at_head, 391bd6737f1SJens Axboe bool run_queue, bool async, bool can_block) 392bd6737f1SJens Axboe { 393bd6737f1SJens Axboe struct request_queue *q = rq->q; 394bd6737f1SJens Axboe struct elevator_queue *e = q->elevator; 395bd6737f1SJens Axboe struct blk_mq_ctx *ctx = rq->mq_ctx; 396bd6737f1SJens Axboe struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); 397bd6737f1SJens Axboe 398f3a8ab7dSJens Axboe if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) { 399bd6737f1SJens Axboe blk_mq_sched_insert_flush(hctx, rq, can_block); 400bd6737f1SJens Axboe return; 401bd6737f1SJens Axboe } 402bd6737f1SJens Axboe 4030cacba6cSOmar Sandoval if (e && blk_mq_sched_bypass_insert(hctx, rq)) 4040cacba6cSOmar Sandoval goto run; 4050cacba6cSOmar Sandoval 406bd6737f1SJens Axboe if (e && e->type->ops.mq.insert_requests) { 407bd6737f1SJens Axboe LIST_HEAD(list); 408bd6737f1SJens Axboe 409bd6737f1SJens Axboe list_add(&rq->queuelist, &list); 410bd6737f1SJens Axboe e->type->ops.mq.insert_requests(hctx, &list, at_head); 411bd6737f1SJens Axboe } else { 412bd6737f1SJens Axboe spin_lock(&ctx->lock); 413bd6737f1SJens Axboe __blk_mq_insert_request(hctx, rq, at_head); 414bd6737f1SJens Axboe spin_unlock(&ctx->lock); 415bd6737f1SJens Axboe } 416bd6737f1SJens Axboe 4170cacba6cSOmar Sandoval run: 418bd6737f1SJens Axboe if (run_queue) 419bd6737f1SJens Axboe blk_mq_run_hw_queue(hctx, async); 420bd6737f1SJens Axboe } 421bd6737f1SJens Axboe 422bd6737f1SJens Axboe void blk_mq_sched_insert_requests(struct request_queue *q, 423bd6737f1SJens Axboe struct blk_mq_ctx *ctx, 424bd6737f1SJens Axboe struct list_head *list, bool run_queue_async) 425bd6737f1SJens Axboe { 426bd6737f1SJens Axboe struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); 427bd6737f1SJens Axboe struct elevator_queue *e = hctx->queue->elevator; 428bd6737f1SJens Axboe 4290cacba6cSOmar Sandoval if (e) { 4300cacba6cSOmar Sandoval struct request *rq, *next; 4310cacba6cSOmar Sandoval 4320cacba6cSOmar Sandoval /* 4330cacba6cSOmar Sandoval * We bypass requests that already have a driver tag assigned, 4340cacba6cSOmar Sandoval * which should only be flushes. Flushes are only ever inserted 4350cacba6cSOmar Sandoval * as single requests, so we shouldn't ever hit the 4360cacba6cSOmar Sandoval * WARN_ON_ONCE() below (but let's handle it just in case). 4370cacba6cSOmar Sandoval */ 4380cacba6cSOmar Sandoval list_for_each_entry_safe(rq, next, list, queuelist) { 4390cacba6cSOmar Sandoval if (WARN_ON_ONCE(rq->tag != -1)) { 4400cacba6cSOmar Sandoval list_del_init(&rq->queuelist); 4410cacba6cSOmar Sandoval blk_mq_sched_bypass_insert(hctx, rq); 4420cacba6cSOmar Sandoval } 4430cacba6cSOmar Sandoval } 4440cacba6cSOmar Sandoval } 4450cacba6cSOmar Sandoval 446bd6737f1SJens Axboe if (e && e->type->ops.mq.insert_requests) 447bd6737f1SJens Axboe e->type->ops.mq.insert_requests(hctx, list, false); 448bd6737f1SJens Axboe else 449bd6737f1SJens Axboe blk_mq_insert_requests(hctx, ctx, list); 450bd6737f1SJens Axboe 451bd6737f1SJens Axboe blk_mq_run_hw_queue(hctx, run_queue_async); 452bd6737f1SJens Axboe } 453bd6737f1SJens Axboe 454bd166ef1SJens Axboe static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, 455bd166ef1SJens Axboe struct blk_mq_hw_ctx *hctx, 456bd166ef1SJens Axboe unsigned int hctx_idx) 457bd166ef1SJens Axboe { 458bd166ef1SJens Axboe if (hctx->sched_tags) { 459bd166ef1SJens Axboe blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx); 460bd166ef1SJens Axboe blk_mq_free_rq_map(hctx->sched_tags); 461bd166ef1SJens Axboe hctx->sched_tags = NULL; 462bd166ef1SJens Axboe } 463bd166ef1SJens Axboe } 464bd166ef1SJens Axboe 4656917ff0bSOmar Sandoval static int blk_mq_sched_alloc_tags(struct request_queue *q, 4666917ff0bSOmar Sandoval struct blk_mq_hw_ctx *hctx, 4676917ff0bSOmar Sandoval unsigned int hctx_idx) 468bd166ef1SJens Axboe { 469bd166ef1SJens Axboe struct blk_mq_tag_set *set = q->tag_set; 4706917ff0bSOmar Sandoval int ret; 471bd166ef1SJens Axboe 4726917ff0bSOmar Sandoval hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests, 4736917ff0bSOmar Sandoval set->reserved_tags); 474bd166ef1SJens Axboe if (!hctx->sched_tags) 4756917ff0bSOmar Sandoval return -ENOMEM; 4766917ff0bSOmar Sandoval 4776917ff0bSOmar Sandoval ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests); 4786917ff0bSOmar Sandoval if (ret) 4796917ff0bSOmar Sandoval blk_mq_sched_free_tags(set, hctx, hctx_idx); 480bd166ef1SJens Axboe 481bd166ef1SJens Axboe return ret; 482bd166ef1SJens Axboe } 483bd166ef1SJens Axboe 48454d5329dSOmar Sandoval static void blk_mq_sched_tags_teardown(struct request_queue *q) 485bd166ef1SJens Axboe { 486bd166ef1SJens Axboe struct blk_mq_tag_set *set = q->tag_set; 487bd166ef1SJens Axboe struct blk_mq_hw_ctx *hctx; 488bd166ef1SJens Axboe int i; 489bd166ef1SJens Axboe 490bd166ef1SJens Axboe queue_for_each_hw_ctx(q, hctx, i) 491bd166ef1SJens Axboe blk_mq_sched_free_tags(set, hctx, i); 492bd166ef1SJens Axboe } 493d3484991SJens Axboe 49493252632SOmar Sandoval int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, 49593252632SOmar Sandoval unsigned int hctx_idx) 49693252632SOmar Sandoval { 49793252632SOmar Sandoval struct elevator_queue *e = q->elevator; 498ee056f98SOmar Sandoval int ret; 49993252632SOmar Sandoval 50093252632SOmar Sandoval if (!e) 50193252632SOmar Sandoval return 0; 50293252632SOmar Sandoval 503ee056f98SOmar Sandoval ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx); 504ee056f98SOmar Sandoval if (ret) 505ee056f98SOmar Sandoval return ret; 506ee056f98SOmar Sandoval 507ee056f98SOmar Sandoval if (e->type->ops.mq.init_hctx) { 508ee056f98SOmar Sandoval ret = e->type->ops.mq.init_hctx(hctx, hctx_idx); 509ee056f98SOmar Sandoval if (ret) { 510ee056f98SOmar Sandoval blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx); 511ee056f98SOmar Sandoval return ret; 512ee056f98SOmar Sandoval } 513ee056f98SOmar Sandoval } 514ee056f98SOmar Sandoval 515d332ce09SOmar Sandoval blk_mq_debugfs_register_sched_hctx(q, hctx); 516d332ce09SOmar Sandoval 517ee056f98SOmar Sandoval return 0; 51893252632SOmar Sandoval } 51993252632SOmar Sandoval 52093252632SOmar Sandoval void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, 52193252632SOmar Sandoval unsigned int hctx_idx) 52293252632SOmar Sandoval { 52393252632SOmar Sandoval struct elevator_queue *e = q->elevator; 52493252632SOmar Sandoval 52593252632SOmar Sandoval if (!e) 52693252632SOmar Sandoval return; 52793252632SOmar Sandoval 528d332ce09SOmar Sandoval blk_mq_debugfs_unregister_sched_hctx(hctx); 529d332ce09SOmar Sandoval 530ee056f98SOmar Sandoval if (e->type->ops.mq.exit_hctx && hctx->sched_data) { 531ee056f98SOmar Sandoval e->type->ops.mq.exit_hctx(hctx, hctx_idx); 532ee056f98SOmar Sandoval hctx->sched_data = NULL; 533ee056f98SOmar Sandoval } 534ee056f98SOmar Sandoval 53593252632SOmar Sandoval blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx); 53693252632SOmar Sandoval } 53793252632SOmar Sandoval 5386917ff0bSOmar Sandoval int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) 5396917ff0bSOmar Sandoval { 5406917ff0bSOmar Sandoval struct blk_mq_hw_ctx *hctx; 541ee056f98SOmar Sandoval struct elevator_queue *eq; 5426917ff0bSOmar Sandoval unsigned int i; 5436917ff0bSOmar Sandoval int ret; 5446917ff0bSOmar Sandoval 5456917ff0bSOmar Sandoval if (!e) { 5466917ff0bSOmar Sandoval q->elevator = NULL; 5476917ff0bSOmar Sandoval return 0; 5486917ff0bSOmar Sandoval } 5496917ff0bSOmar Sandoval 5506917ff0bSOmar Sandoval /* 55132825c45SMing Lei * Default to double of smaller one between hw queue_depth and 128, 55232825c45SMing Lei * since we don't split into sync/async like the old code did. 55332825c45SMing Lei * Additionally, this is a per-hw queue depth. 5546917ff0bSOmar Sandoval */ 55532825c45SMing Lei q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth, 55632825c45SMing Lei BLKDEV_MAX_RQ); 5576917ff0bSOmar Sandoval 5586917ff0bSOmar Sandoval queue_for_each_hw_ctx(q, hctx, i) { 5596917ff0bSOmar Sandoval ret = blk_mq_sched_alloc_tags(q, hctx, i); 5606917ff0bSOmar Sandoval if (ret) 5616917ff0bSOmar Sandoval goto err; 5626917ff0bSOmar Sandoval } 5636917ff0bSOmar Sandoval 5646917ff0bSOmar Sandoval ret = e->ops.mq.init_sched(q, e); 5656917ff0bSOmar Sandoval if (ret) 5666917ff0bSOmar Sandoval goto err; 5676917ff0bSOmar Sandoval 568d332ce09SOmar Sandoval blk_mq_debugfs_register_sched(q); 569d332ce09SOmar Sandoval 570ee056f98SOmar Sandoval queue_for_each_hw_ctx(q, hctx, i) { 571d332ce09SOmar Sandoval if (e->ops.mq.init_hctx) { 572ee056f98SOmar Sandoval ret = e->ops.mq.init_hctx(hctx, i); 573ee056f98SOmar Sandoval if (ret) { 574ee056f98SOmar Sandoval eq = q->elevator; 575ee056f98SOmar Sandoval blk_mq_exit_sched(q, eq); 576ee056f98SOmar Sandoval kobject_put(&eq->kobj); 577ee056f98SOmar Sandoval return ret; 578ee056f98SOmar Sandoval } 579ee056f98SOmar Sandoval } 580d332ce09SOmar Sandoval blk_mq_debugfs_register_sched_hctx(q, hctx); 581ee056f98SOmar Sandoval } 582ee056f98SOmar Sandoval 5836917ff0bSOmar Sandoval return 0; 5846917ff0bSOmar Sandoval 5856917ff0bSOmar Sandoval err: 58654d5329dSOmar Sandoval blk_mq_sched_tags_teardown(q); 58754d5329dSOmar Sandoval q->elevator = NULL; 5886917ff0bSOmar Sandoval return ret; 5896917ff0bSOmar Sandoval } 5906917ff0bSOmar Sandoval 59154d5329dSOmar Sandoval void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) 59254d5329dSOmar Sandoval { 593ee056f98SOmar Sandoval struct blk_mq_hw_ctx *hctx; 594ee056f98SOmar Sandoval unsigned int i; 595ee056f98SOmar Sandoval 596ee056f98SOmar Sandoval queue_for_each_hw_ctx(q, hctx, i) { 597d332ce09SOmar Sandoval blk_mq_debugfs_unregister_sched_hctx(hctx); 598d332ce09SOmar Sandoval if (e->type->ops.mq.exit_hctx && hctx->sched_data) { 599ee056f98SOmar Sandoval e->type->ops.mq.exit_hctx(hctx, i); 600ee056f98SOmar Sandoval hctx->sched_data = NULL; 601ee056f98SOmar Sandoval } 602ee056f98SOmar Sandoval } 603d332ce09SOmar Sandoval blk_mq_debugfs_unregister_sched(q); 60454d5329dSOmar Sandoval if (e->type->ops.mq.exit_sched) 60554d5329dSOmar Sandoval e->type->ops.mq.exit_sched(e); 60654d5329dSOmar Sandoval blk_mq_sched_tags_teardown(q); 60754d5329dSOmar Sandoval q->elevator = NULL; 60854d5329dSOmar Sandoval } 60954d5329dSOmar Sandoval 610d3484991SJens Axboe int blk_mq_sched_init(struct request_queue *q) 611d3484991SJens Axboe { 612d3484991SJens Axboe int ret; 613d3484991SJens Axboe 614d3484991SJens Axboe mutex_lock(&q->sysfs_lock); 615d3484991SJens Axboe ret = elevator_init(q, NULL); 616d3484991SJens Axboe mutex_unlock(&q->sysfs_lock); 617d3484991SJens Axboe 618d3484991SJens Axboe return ret; 619d3484991SJens Axboe } 620