13dcf60bcSChristoph Hellwig // SPDX-License-Identifier: GPL-2.0 275bb4625SJens Axboe /* 375bb4625SJens Axboe * Block multiqueue core code 475bb4625SJens Axboe * 575bb4625SJens Axboe * Copyright (C) 2013-2014 Jens Axboe 675bb4625SJens Axboe * Copyright (C) 2013-2014 Christoph Hellwig 775bb4625SJens Axboe */ 8320ae51fSJens Axboe #include <linux/kernel.h> 9320ae51fSJens Axboe #include <linux/module.h> 10320ae51fSJens Axboe #include <linux/backing-dev.h> 11320ae51fSJens Axboe #include <linux/bio.h> 12320ae51fSJens Axboe #include <linux/blkdev.h> 13fe45e630SChristoph Hellwig #include <linux/blk-integrity.h> 14f75782e4SCatalin Marinas #include <linux/kmemleak.h> 15320ae51fSJens Axboe #include <linux/mm.h> 16320ae51fSJens Axboe #include <linux/init.h> 17320ae51fSJens Axboe #include <linux/slab.h> 18320ae51fSJens Axboe #include <linux/workqueue.h> 19320ae51fSJens Axboe #include <linux/smp.h> 20e41d12f5SChristoph Hellwig #include <linux/interrupt.h> 21320ae51fSJens Axboe #include <linux/llist.h> 22320ae51fSJens Axboe #include <linux/cpu.h> 23320ae51fSJens Axboe #include <linux/cache.h> 24320ae51fSJens Axboe #include <linux/sched/sysctl.h> 25105ab3d8SIngo Molnar #include <linux/sched/topology.h> 26174cd4b1SIngo Molnar #include <linux/sched/signal.h> 27320ae51fSJens Axboe #include <linux/delay.h> 28aedcd72fSJens Axboe #include <linux/crash_dump.h> 2988c7b2b7SJens Axboe #include <linux/prefetch.h> 30a892c8d5SSatya Tangirala #include <linux/blk-crypto.h> 3182d981d4SChristoph Hellwig #include <linux/part_stat.h> 32320ae51fSJens Axboe 33320ae51fSJens Axboe #include <trace/events/block.h> 34320ae51fSJens Axboe 3554d4e6abSMax Gurtovoy #include <linux/t10-pi.h> 36320ae51fSJens Axboe #include "blk.h" 37320ae51fSJens Axboe #include "blk-mq.h" 389c1051aaSOmar Sandoval #include "blk-mq-debugfs.h" 39986d413bSBart Van Assche #include "blk-pm.h" 40cf43e6beSJens Axboe #include "blk-stat.h" 41bd166ef1SJens Axboe #include "blk-mq-sched.h" 42c1c80384SJosef Bacik #include "blk-rq-qos.h" 4382b74cacSJan Kara #include "blk-ioprio.h" 44320ae51fSJens Axboe 45f9ab4918SSebastian Andrzej Siewior static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); 46c3077b5dSChristoph Hellwig 47710fa378SChristoph Hellwig static void blk_mq_insert_request(struct request *rq, blk_insert_t flags); 48360f2648SChristoph Hellwig static void blk_mq_request_bypass_insert(struct request *rq, 49360f2648SChristoph Hellwig blk_insert_t flags); 5094aa228cSChristoph Hellwig static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, 5194aa228cSChristoph Hellwig struct list_head *list); 52f6c80cffSKeith Busch static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx, 53f6c80cffSKeith Busch struct io_comp_batch *iob, unsigned int flags); 543e08773cSChristoph Hellwig 55320ae51fSJens Axboe /* 5685fae294SYufen Yu * Check if any of the ctx, dispatch list or elevator 5785fae294SYufen Yu * have pending work in this hardware queue. 58320ae51fSJens Axboe */ 5979f720a7SJens Axboe static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) 60320ae51fSJens Axboe { 6179f720a7SJens Axboe return !list_empty_careful(&hctx->dispatch) || 6279f720a7SJens Axboe sbitmap_any_bit_set(&hctx->ctx_map) || 63bd166ef1SJens Axboe blk_mq_sched_has_work(hctx); 64320ae51fSJens Axboe } 65320ae51fSJens Axboe 66320ae51fSJens Axboe /* 67320ae51fSJens Axboe * Mark this ctx as having pending work in this hardware queue 68320ae51fSJens Axboe */ 69320ae51fSJens Axboe static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx, 70320ae51fSJens Axboe struct blk_mq_ctx *ctx) 71320ae51fSJens Axboe { 72f31967f0SJens Axboe const int bit = ctx->index_hw[hctx->type]; 73f31967f0SJens Axboe 74f31967f0SJens Axboe if (!sbitmap_test_bit(&hctx->ctx_map, bit)) 75f31967f0SJens Axboe sbitmap_set_bit(&hctx->ctx_map, bit); 761429d7c9SJens Axboe } 771429d7c9SJens Axboe 781429d7c9SJens Axboe static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, 791429d7c9SJens Axboe struct blk_mq_ctx *ctx) 801429d7c9SJens Axboe { 81f31967f0SJens Axboe const int bit = ctx->index_hw[hctx->type]; 82f31967f0SJens Axboe 83f31967f0SJens Axboe sbitmap_clear_bit(&hctx->ctx_map, bit); 84320ae51fSJens Axboe } 85320ae51fSJens Axboe 86f299b7c7SJens Axboe struct mq_inflight { 878446fe92SChristoph Hellwig struct block_device *part; 88a2e80f6fSPavel Begunkov unsigned int inflight[2]; 89f299b7c7SJens Axboe }; 90f299b7c7SJens Axboe 912dd6532eSJohn Garry static bool blk_mq_check_inflight(struct request *rq, void *priv) 92f299b7c7SJens Axboe { 93f299b7c7SJens Axboe struct mq_inflight *mi = priv; 94f299b7c7SJens Axboe 95b81c14caSHaisu Wang if (rq->part && blk_do_io_stat(rq) && 96b81c14caSHaisu Wang (!mi->part->bd_partno || rq->part == mi->part) && 97b0d97557SJeffle Xu blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) 98bb4e6b14SPavel Begunkov mi->inflight[rq_data_dir(rq)]++; 997baa8572SJens Axboe 1007baa8572SJens Axboe return true; 101f299b7c7SJens Axboe } 102f299b7c7SJens Axboe 1038446fe92SChristoph Hellwig unsigned int blk_mq_in_flight(struct request_queue *q, 1048446fe92SChristoph Hellwig struct block_device *part) 105f299b7c7SJens Axboe { 106a2e80f6fSPavel Begunkov struct mq_inflight mi = { .part = part }; 107f299b7c7SJens Axboe 108f299b7c7SJens Axboe blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi); 109e016b782SMikulas Patocka 110a2e80f6fSPavel Begunkov return mi.inflight[0] + mi.inflight[1]; 111bf0ddabaSOmar Sandoval } 112bf0ddabaSOmar Sandoval 1138446fe92SChristoph Hellwig void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, 114bf0ddabaSOmar Sandoval unsigned int inflight[2]) 115bf0ddabaSOmar Sandoval { 116a2e80f6fSPavel Begunkov struct mq_inflight mi = { .part = part }; 117bf0ddabaSOmar Sandoval 118bb4e6b14SPavel Begunkov blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi); 119a2e80f6fSPavel Begunkov inflight[0] = mi.inflight[0]; 120a2e80f6fSPavel Begunkov inflight[1] = mi.inflight[1]; 121bf0ddabaSOmar Sandoval } 122bf0ddabaSOmar Sandoval 1231671d522SMing Lei void blk_freeze_queue_start(struct request_queue *q) 12443a5e4e2SMing Lei { 1257996a8b5SBob Liu mutex_lock(&q->mq_freeze_lock); 1267996a8b5SBob Liu if (++q->mq_freeze_depth == 1) { 1273ef28e83SDan Williams percpu_ref_kill(&q->q_usage_counter); 1287996a8b5SBob Liu mutex_unlock(&q->mq_freeze_lock); 129344e9ffcSJens Axboe if (queue_is_mq(q)) 130b94ec296SMike Snitzer blk_mq_run_hw_queues(q, false); 1317996a8b5SBob Liu } else { 1327996a8b5SBob Liu mutex_unlock(&q->mq_freeze_lock); 133cddd5d17STejun Heo } 134f3af020bSTejun Heo } 1351671d522SMing Lei EXPORT_SYMBOL_GPL(blk_freeze_queue_start); 136f3af020bSTejun Heo 1376bae363eSKeith Busch void blk_mq_freeze_queue_wait(struct request_queue *q) 138f3af020bSTejun Heo { 1393ef28e83SDan Williams wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); 14043a5e4e2SMing Lei } 1416bae363eSKeith Busch EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait); 14243a5e4e2SMing Lei 143f91328c4SKeith Busch int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, 144f91328c4SKeith Busch unsigned long timeout) 145f91328c4SKeith Busch { 146f91328c4SKeith Busch return wait_event_timeout(q->mq_freeze_wq, 147f91328c4SKeith Busch percpu_ref_is_zero(&q->q_usage_counter), 148f91328c4SKeith Busch timeout); 149f91328c4SKeith Busch } 150f91328c4SKeith Busch EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout); 151320ae51fSJens Axboe 152f3af020bSTejun Heo /* 153f3af020bSTejun Heo * Guarantee no request is in use, so we can change any data structure of 154f3af020bSTejun Heo * the queue afterward. 155f3af020bSTejun Heo */ 1563ef28e83SDan Williams void blk_freeze_queue(struct request_queue *q) 157f3af020bSTejun Heo { 1583ef28e83SDan Williams /* 1593ef28e83SDan Williams * In the !blk_mq case we are only calling this to kill the 1603ef28e83SDan Williams * q_usage_counter, otherwise this increases the freeze depth 1613ef28e83SDan Williams * and waits for it to return to zero. For this reason there is 1623ef28e83SDan Williams * no blk_unfreeze_queue(), and blk_freeze_queue() is not 1633ef28e83SDan Williams * exported to drivers as the only user for unfreeze is blk_mq. 1643ef28e83SDan Williams */ 1651671d522SMing Lei blk_freeze_queue_start(q); 166f3af020bSTejun Heo blk_mq_freeze_queue_wait(q); 167f3af020bSTejun Heo } 1683ef28e83SDan Williams 1693ef28e83SDan Williams void blk_mq_freeze_queue(struct request_queue *q) 1703ef28e83SDan Williams { 1713ef28e83SDan Williams /* 1723ef28e83SDan Williams * ...just an alias to keep freeze and unfreeze actions balanced 1733ef28e83SDan Williams * in the blk_mq_* namespace 1743ef28e83SDan Williams */ 1753ef28e83SDan Williams blk_freeze_queue(q); 1763ef28e83SDan Williams } 177c761d96bSJens Axboe EXPORT_SYMBOL_GPL(blk_mq_freeze_queue); 178f3af020bSTejun Heo 179aec89dc5SChristoph Hellwig void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) 180320ae51fSJens Axboe { 1817996a8b5SBob Liu mutex_lock(&q->mq_freeze_lock); 182aec89dc5SChristoph Hellwig if (force_atomic) 183aec89dc5SChristoph Hellwig q->q_usage_counter.data->force_atomic = true; 1847996a8b5SBob Liu q->mq_freeze_depth--; 1857996a8b5SBob Liu WARN_ON_ONCE(q->mq_freeze_depth < 0); 1867996a8b5SBob Liu if (!q->mq_freeze_depth) { 187bdd63160SBart Van Assche percpu_ref_resurrect(&q->q_usage_counter); 188320ae51fSJens Axboe wake_up_all(&q->mq_freeze_wq); 189320ae51fSJens Axboe } 1907996a8b5SBob Liu mutex_unlock(&q->mq_freeze_lock); 191add703fdSTejun Heo } 192aec89dc5SChristoph Hellwig 193aec89dc5SChristoph Hellwig void blk_mq_unfreeze_queue(struct request_queue *q) 194aec89dc5SChristoph Hellwig { 195aec89dc5SChristoph Hellwig __blk_mq_unfreeze_queue(q, false); 196aec89dc5SChristoph Hellwig } 197b4c6a028SKeith Busch EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); 198320ae51fSJens Axboe 199852ec809SBart Van Assche /* 200852ec809SBart Van Assche * FIXME: replace the scsi_internal_device_*block_nowait() calls in the 201852ec809SBart Van Assche * mpt3sas driver such that this function can be removed. 202852ec809SBart Van Assche */ 203852ec809SBart Van Assche void blk_mq_quiesce_queue_nowait(struct request_queue *q) 204852ec809SBart Van Assche { 205e70feb8bSMing Lei unsigned long flags; 206e70feb8bSMing Lei 207e70feb8bSMing Lei spin_lock_irqsave(&q->queue_lock, flags); 208e70feb8bSMing Lei if (!q->quiesce_depth++) 2098814ce8aSBart Van Assche blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q); 210e70feb8bSMing Lei spin_unlock_irqrestore(&q->queue_lock, flags); 211852ec809SBart Van Assche } 212852ec809SBart Van Assche EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait); 213852ec809SBart Van Assche 2146a83e74dSBart Van Assche /** 2159ef4d020SMing Lei * blk_mq_wait_quiesce_done() - wait until in-progress quiesce is done 216483239c7SChristoph Hellwig * @set: tag_set to wait on 2179ef4d020SMing Lei * 2189ef4d020SMing Lei * Note: it is driver's responsibility for making sure that quiesce has 219483239c7SChristoph Hellwig * been started on or more of the request_queues of the tag_set. This 220483239c7SChristoph Hellwig * function only waits for the quiesce on those request_queues that had 221483239c7SChristoph Hellwig * the quiesce flag set using blk_mq_quiesce_queue_nowait. 2229ef4d020SMing Lei */ 223483239c7SChristoph Hellwig void blk_mq_wait_quiesce_done(struct blk_mq_tag_set *set) 2249ef4d020SMing Lei { 225483239c7SChristoph Hellwig if (set->flags & BLK_MQ_F_BLOCKING) 226483239c7SChristoph Hellwig synchronize_srcu(set->srcu); 2279ef4d020SMing Lei else 2289ef4d020SMing Lei synchronize_rcu(); 2299ef4d020SMing Lei } 2309ef4d020SMing Lei EXPORT_SYMBOL_GPL(blk_mq_wait_quiesce_done); 2319ef4d020SMing Lei 2329ef4d020SMing Lei /** 23369e07c4aSMing Lei * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished 2346a83e74dSBart Van Assche * @q: request queue. 2356a83e74dSBart Van Assche * 2366a83e74dSBart Van Assche * Note: this function does not prevent that the struct request end_io() 23769e07c4aSMing Lei * callback function is invoked. Once this function is returned, we make 23869e07c4aSMing Lei * sure no dispatch can happen until the queue is unquiesced via 23969e07c4aSMing Lei * blk_mq_unquiesce_queue(). 2406a83e74dSBart Van Assche */ 2416a83e74dSBart Van Assche void blk_mq_quiesce_queue(struct request_queue *q) 2426a83e74dSBart Van Assche { 2431d9e9bc6SMing Lei blk_mq_quiesce_queue_nowait(q); 2448537380bSChristoph Hellwig /* nothing to wait for non-mq queues */ 2458537380bSChristoph Hellwig if (queue_is_mq(q)) 246483239c7SChristoph Hellwig blk_mq_wait_quiesce_done(q->tag_set); 2476a83e74dSBart Van Assche } 2486a83e74dSBart Van Assche EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue); 2496a83e74dSBart Van Assche 250e4e73913SMing Lei /* 251e4e73913SMing Lei * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue() 252e4e73913SMing Lei * @q: request queue. 253e4e73913SMing Lei * 254e4e73913SMing Lei * This function recovers queue into the state before quiescing 255e4e73913SMing Lei * which is done by blk_mq_quiesce_queue. 256e4e73913SMing Lei */ 257e4e73913SMing Lei void blk_mq_unquiesce_queue(struct request_queue *q) 258e4e73913SMing Lei { 259e70feb8bSMing Lei unsigned long flags; 260e70feb8bSMing Lei bool run_queue = false; 261e70feb8bSMing Lei 262e70feb8bSMing Lei spin_lock_irqsave(&q->queue_lock, flags); 263e70feb8bSMing Lei if (WARN_ON_ONCE(q->quiesce_depth <= 0)) { 264e70feb8bSMing Lei ; 265e70feb8bSMing Lei } else if (!--q->quiesce_depth) { 2668814ce8aSBart Van Assche blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q); 267e70feb8bSMing Lei run_queue = true; 268e70feb8bSMing Lei } 269e70feb8bSMing Lei spin_unlock_irqrestore(&q->queue_lock, flags); 270f4560ffeSMing Lei 2711d9e9bc6SMing Lei /* dispatch requests which are inserted during quiescing */ 272e70feb8bSMing Lei if (run_queue) 2731d9e9bc6SMing Lei blk_mq_run_hw_queues(q, true); 274e4e73913SMing Lei } 275e4e73913SMing Lei EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue); 276e4e73913SMing Lei 277414dd48eSChao Leng void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set) 278414dd48eSChao Leng { 279414dd48eSChao Leng struct request_queue *q; 280414dd48eSChao Leng 281414dd48eSChao Leng mutex_lock(&set->tag_list_lock); 282414dd48eSChao Leng list_for_each_entry(q, &set->tag_list, tag_set_list) { 283414dd48eSChao Leng if (!blk_queue_skip_tagset_quiesce(q)) 284414dd48eSChao Leng blk_mq_quiesce_queue_nowait(q); 285414dd48eSChao Leng } 286414dd48eSChao Leng blk_mq_wait_quiesce_done(set); 287414dd48eSChao Leng mutex_unlock(&set->tag_list_lock); 288414dd48eSChao Leng } 289414dd48eSChao Leng EXPORT_SYMBOL_GPL(blk_mq_quiesce_tagset); 290414dd48eSChao Leng 291414dd48eSChao Leng void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set) 292414dd48eSChao Leng { 293414dd48eSChao Leng struct request_queue *q; 294414dd48eSChao Leng 295414dd48eSChao Leng mutex_lock(&set->tag_list_lock); 296414dd48eSChao Leng list_for_each_entry(q, &set->tag_list, tag_set_list) { 297414dd48eSChao Leng if (!blk_queue_skip_tagset_quiesce(q)) 298414dd48eSChao Leng blk_mq_unquiesce_queue(q); 299414dd48eSChao Leng } 300414dd48eSChao Leng mutex_unlock(&set->tag_list_lock); 301414dd48eSChao Leng } 302414dd48eSChao Leng EXPORT_SYMBOL_GPL(blk_mq_unquiesce_tagset); 303414dd48eSChao Leng 304aed3ea94SJens Axboe void blk_mq_wake_waiters(struct request_queue *q) 305aed3ea94SJens Axboe { 306aed3ea94SJens Axboe struct blk_mq_hw_ctx *hctx; 3074f481208SMing Lei unsigned long i; 308aed3ea94SJens Axboe 309aed3ea94SJens Axboe queue_for_each_hw_ctx(q, hctx, i) 310aed3ea94SJens Axboe if (blk_mq_hw_queue_mapped(hctx)) 311aed3ea94SJens Axboe blk_mq_tag_wakeup_all(hctx->tags, true); 312aed3ea94SJens Axboe } 313aed3ea94SJens Axboe 31452fdbbccSChristoph Hellwig void blk_rq_init(struct request_queue *q, struct request *rq) 31552fdbbccSChristoph Hellwig { 31652fdbbccSChristoph Hellwig memset(rq, 0, sizeof(*rq)); 31752fdbbccSChristoph Hellwig 31852fdbbccSChristoph Hellwig INIT_LIST_HEAD(&rq->queuelist); 31952fdbbccSChristoph Hellwig rq->q = q; 32052fdbbccSChristoph Hellwig rq->__sector = (sector_t) -1; 32152fdbbccSChristoph Hellwig INIT_HLIST_NODE(&rq->hash); 32252fdbbccSChristoph Hellwig RB_CLEAR_NODE(&rq->rb_node); 32352fdbbccSChristoph Hellwig rq->tag = BLK_MQ_NO_TAG; 32452fdbbccSChristoph Hellwig rq->internal_tag = BLK_MQ_NO_TAG; 32552fdbbccSChristoph Hellwig rq->start_time_ns = ktime_get_ns(); 32652fdbbccSChristoph Hellwig rq->part = NULL; 32752fdbbccSChristoph Hellwig blk_crypto_rq_set_defaults(rq); 32852fdbbccSChristoph Hellwig } 32952fdbbccSChristoph Hellwig EXPORT_SYMBOL(blk_rq_init); 33052fdbbccSChristoph Hellwig 3315c17f45eSChengming Zhou /* Set start and alloc time when the allocated request is actually used */ 3325c17f45eSChengming Zhou static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns) 3335c17f45eSChengming Zhou { 3345c17f45eSChengming Zhou if (blk_mq_need_time_stamp(rq)) 3355c17f45eSChengming Zhou rq->start_time_ns = ktime_get_ns(); 3365c17f45eSChengming Zhou else 3375c17f45eSChengming Zhou rq->start_time_ns = 0; 3385c17f45eSChengming Zhou 3395c17f45eSChengming Zhou #ifdef CONFIG_BLK_RQ_ALLOC_TIME 3405c17f45eSChengming Zhou if (blk_queue_rq_alloc_time(rq->q)) 3415c17f45eSChengming Zhou rq->alloc_time_ns = alloc_time_ns ?: rq->start_time_ns; 3425c17f45eSChengming Zhou else 3435c17f45eSChengming Zhou rq->alloc_time_ns = 0; 3445c17f45eSChengming Zhou #endif 3455c17f45eSChengming Zhou } 3465c17f45eSChengming Zhou 347e4cdf1a1SChristoph Hellwig static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, 3485c17f45eSChengming Zhou struct blk_mq_tags *tags, unsigned int tag) 349320ae51fSJens Axboe { 350605f784eSPavel Begunkov struct blk_mq_ctx *ctx = data->ctx; 351605f784eSPavel Begunkov struct blk_mq_hw_ctx *hctx = data->hctx; 352605f784eSPavel Begunkov struct request_queue *q = data->q; 353e4cdf1a1SChristoph Hellwig struct request *rq = tags->static_rqs[tag]; 354c3a148d2SBart Van Assche 355c7b84d42SJens Axboe rq->q = q; 356c7b84d42SJens Axboe rq->mq_ctx = ctx; 357c7b84d42SJens Axboe rq->mq_hctx = hctx; 358c7b84d42SJens Axboe rq->cmd_flags = data->cmd_flags; 359e4cdf1a1SChristoph Hellwig 36012845906SPavel Begunkov if (data->flags & BLK_MQ_REQ_PM) 36156f8da64SJens Axboe data->rq_flags |= RQF_PM; 36212845906SPavel Begunkov if (blk_queue_io_stat(q)) 36356f8da64SJens Axboe data->rq_flags |= RQF_IO_STAT; 36456f8da64SJens Axboe rq->rq_flags = data->rq_flags; 36512845906SPavel Begunkov 366dd6216bbSChristoph Hellwig if (data->rq_flags & RQF_SCHED_TAGS) { 367c7b84d42SJens Axboe rq->tag = BLK_MQ_NO_TAG; 368c7b84d42SJens Axboe rq->internal_tag = tag; 369dd6216bbSChristoph Hellwig } else { 370dd6216bbSChristoph Hellwig rq->tag = tag; 371dd6216bbSChristoph Hellwig rq->internal_tag = BLK_MQ_NO_TAG; 372320ae51fSJens Axboe } 373c7b84d42SJens Axboe rq->timeout = 0; 374320ae51fSJens Axboe 375af76e555SChristoph Hellwig rq->part = NULL; 376544ccc8dSOmar Sandoval rq->io_start_time_ns = 0; 3773d244306SHou Tao rq->stats_sectors = 0; 378af76e555SChristoph Hellwig rq->nr_phys_segments = 0; 379af76e555SChristoph Hellwig #if defined(CONFIG_BLK_DEV_INTEGRITY) 380af76e555SChristoph Hellwig rq->nr_integrity_segments = 0; 381af76e555SChristoph Hellwig #endif 382af76e555SChristoph Hellwig rq->end_io = NULL; 383af76e555SChristoph Hellwig rq->end_io_data = NULL; 384af76e555SChristoph Hellwig 3854f266f2bSPavel Begunkov blk_crypto_rq_set_defaults(rq); 3864f266f2bSPavel Begunkov INIT_LIST_HEAD(&rq->queuelist); 3874f266f2bSPavel Begunkov /* tag was already set */ 3884f266f2bSPavel Begunkov WRITE_ONCE(rq->deadline, 0); 3890a467d0fSJens Axboe req_ref_set(rq, 1); 3907ea4d8a4SChristoph Hellwig 391dd6216bbSChristoph Hellwig if (rq->rq_flags & RQF_USE_SCHED) { 3927ea4d8a4SChristoph Hellwig struct elevator_queue *e = data->q->elevator; 3937ea4d8a4SChristoph Hellwig 3944f266f2bSPavel Begunkov INIT_HLIST_NODE(&rq->hash); 3954f266f2bSPavel Begunkov RB_CLEAR_NODE(&rq->rb_node); 3964f266f2bSPavel Begunkov 397dd6216bbSChristoph Hellwig if (e->type->ops.prepare_request) 3987ea4d8a4SChristoph Hellwig e->type->ops.prepare_request(rq); 3997ea4d8a4SChristoph Hellwig } 4007ea4d8a4SChristoph Hellwig 4015dee8577SChristoph Hellwig return rq; 4025dee8577SChristoph Hellwig } 4035dee8577SChristoph Hellwig 404349302daSJens Axboe static inline struct request * 4055c17f45eSChengming Zhou __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data) 406349302daSJens Axboe { 407349302daSJens Axboe unsigned int tag, tag_offset; 408fe6134f6SJens Axboe struct blk_mq_tags *tags; 409349302daSJens Axboe struct request *rq; 410fe6134f6SJens Axboe unsigned long tag_mask; 411349302daSJens Axboe int i, nr = 0; 412349302daSJens Axboe 413fe6134f6SJens Axboe tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset); 414fe6134f6SJens Axboe if (unlikely(!tag_mask)) 415349302daSJens Axboe return NULL; 416349302daSJens Axboe 417fe6134f6SJens Axboe tags = blk_mq_tags_from_data(data); 418fe6134f6SJens Axboe for (i = 0; tag_mask; i++) { 419fe6134f6SJens Axboe if (!(tag_mask & (1UL << i))) 420349302daSJens Axboe continue; 421349302daSJens Axboe tag = tag_offset + i; 422a22c00beSJens Axboe prefetch(tags->static_rqs[tag]); 423fe6134f6SJens Axboe tag_mask &= ~(1UL << i); 4245c17f45eSChengming Zhou rq = blk_mq_rq_ctx_init(data, tags, tag); 425013a7f95SJens Axboe rq_list_add(data->cached_rq, rq); 426c5fc7b93SJens Axboe nr++; 427349302daSJens Axboe } 428c5fc7b93SJens Axboe /* caller already holds a reference, add for remainder */ 429c5fc7b93SJens Axboe percpu_ref_get_many(&data->q->q_usage_counter, nr - 1); 430349302daSJens Axboe data->nr_tags -= nr; 431349302daSJens Axboe 432013a7f95SJens Axboe return rq_list_pop(data->cached_rq); 433349302daSJens Axboe } 434349302daSJens Axboe 435b90cfaedSChristoph Hellwig static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) 436d2c0d383SChristoph Hellwig { 437e6e7abffSChristoph Hellwig struct request_queue *q = data->q; 4386f816b4bSTejun Heo u64 alloc_time_ns = 0; 43947c122e3SJens Axboe struct request *rq; 440600c3b0cSChristoph Hellwig unsigned int tag; 441d2c0d383SChristoph Hellwig 4426f816b4bSTejun Heo /* alloc_time includes depth and tag waits */ 4436f816b4bSTejun Heo if (blk_queue_rq_alloc_time(q)) 4446f816b4bSTejun Heo alloc_time_ns = ktime_get_ns(); 4456f816b4bSTejun Heo 446f9afca4dSJens Axboe if (data->cmd_flags & REQ_NOWAIT) 44703a07c92SGoldwyn Rodrigues data->flags |= BLK_MQ_REQ_NOWAIT; 448d2c0d383SChristoph Hellwig 449781dd830SJens Axboe if (q->elevator) { 450dd6216bbSChristoph Hellwig /* 451dd6216bbSChristoph Hellwig * All requests use scheduler tags when an I/O scheduler is 452dd6216bbSChristoph Hellwig * enabled for the queue. 453dd6216bbSChristoph Hellwig */ 454dd6216bbSChristoph Hellwig data->rq_flags |= RQF_SCHED_TAGS; 455781dd830SJens Axboe 456d2c0d383SChristoph Hellwig /* 4578d663f34SLin Feng * Flush/passthrough requests are special and go directly to the 458dd6216bbSChristoph Hellwig * dispatch list. 459d2c0d383SChristoph Hellwig */ 460be4c4278SBart Van Assche if ((data->cmd_flags & REQ_OP_MASK) != REQ_OP_FLUSH && 461dd6216bbSChristoph Hellwig !blk_op_is_passthrough(data->cmd_flags)) { 462dd6216bbSChristoph Hellwig struct elevator_mq_ops *ops = &q->elevator->type->ops; 463dd6216bbSChristoph Hellwig 464dd6216bbSChristoph Hellwig WARN_ON_ONCE(data->flags & BLK_MQ_REQ_RESERVED); 465dd6216bbSChristoph Hellwig 466dd6216bbSChristoph Hellwig data->rq_flags |= RQF_USE_SCHED; 467dd6216bbSChristoph Hellwig if (ops->limit_depth) 468dd6216bbSChristoph Hellwig ops->limit_depth(data->cmd_flags, data); 469dd6216bbSChristoph Hellwig } 470d2c0d383SChristoph Hellwig } 471d2c0d383SChristoph Hellwig 472bf0beec0SMing Lei retry: 473600c3b0cSChristoph Hellwig data->ctx = blk_mq_get_ctx(q); 474600c3b0cSChristoph Hellwig data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx); 475dd6216bbSChristoph Hellwig if (!(data->rq_flags & RQF_SCHED_TAGS)) 476600c3b0cSChristoph Hellwig blk_mq_tag_busy(data->hctx); 477600c3b0cSChristoph Hellwig 47899e48cd6SJohn Garry if (data->flags & BLK_MQ_REQ_RESERVED) 47999e48cd6SJohn Garry data->rq_flags |= RQF_RESV; 48099e48cd6SJohn Garry 481bf0beec0SMing Lei /* 482349302daSJens Axboe * Try batched alloc if we want more than 1 tag. 483349302daSJens Axboe */ 484349302daSJens Axboe if (data->nr_tags > 1) { 4855c17f45eSChengming Zhou rq = __blk_mq_alloc_requests_batch(data); 4865c17f45eSChengming Zhou if (rq) { 4875c17f45eSChengming Zhou blk_mq_rq_time_init(rq, alloc_time_ns); 488349302daSJens Axboe return rq; 4895c17f45eSChengming Zhou } 490349302daSJens Axboe data->nr_tags = 1; 491349302daSJens Axboe } 492349302daSJens Axboe 493349302daSJens Axboe /* 494bf0beec0SMing Lei * Waiting allocations only fail because of an inactive hctx. In that 495bf0beec0SMing Lei * case just retry the hctx assignment and tag allocation as CPU hotplug 496bf0beec0SMing Lei * should have migrated us to an online CPU by now. 497bf0beec0SMing Lei */ 498e4cdf1a1SChristoph Hellwig tag = blk_mq_get_tag(data); 499bf0beec0SMing Lei if (tag == BLK_MQ_NO_TAG) { 500bf0beec0SMing Lei if (data->flags & BLK_MQ_REQ_NOWAIT) 501037cebb8SChristoph Hellwig return NULL; 502bf0beec0SMing Lei /* 503b90cfaedSChristoph Hellwig * Give up the CPU and sleep for a random short time to 504b90cfaedSChristoph Hellwig * ensure that thread using a realtime scheduling class 505b90cfaedSChristoph Hellwig * are migrated off the CPU, and thus off the hctx that 506b90cfaedSChristoph Hellwig * is going away. 507bf0beec0SMing Lei */ 508bf0beec0SMing Lei msleep(3); 509bf0beec0SMing Lei goto retry; 510bf0beec0SMing Lei } 511b90cfaedSChristoph Hellwig 5125c17f45eSChengming Zhou rq = blk_mq_rq_ctx_init(data, blk_mq_tags_from_data(data), tag); 5135c17f45eSChengming Zhou blk_mq_rq_time_init(rq, alloc_time_ns); 5145c17f45eSChengming Zhou return rq; 515d2c0d383SChristoph Hellwig } 516d2c0d383SChristoph Hellwig 5174b6a5d9cSJens Axboe static struct request *blk_mq_rq_cache_fill(struct request_queue *q, 5184b6a5d9cSJens Axboe struct blk_plug *plug, 5194b6a5d9cSJens Axboe blk_opf_t opf, 5209a95e4efSBart Van Assche blk_mq_req_flags_t flags) 521320ae51fSJens Axboe { 522e6e7abffSChristoph Hellwig struct blk_mq_alloc_data data = { 523e6e7abffSChristoph Hellwig .q = q, 524e6e7abffSChristoph Hellwig .flags = flags, 52516458cf3SBart Van Assche .cmd_flags = opf, 5264b6a5d9cSJens Axboe .nr_tags = plug->nr_ios, 5274b6a5d9cSJens Axboe .cached_rq = &plug->cached_rq, 528e6e7abffSChristoph Hellwig }; 529bd166ef1SJens Axboe struct request *rq; 5304b6a5d9cSJens Axboe 5314b6a5d9cSJens Axboe if (blk_queue_enter(q, flags)) 5324b6a5d9cSJens Axboe return NULL; 5334b6a5d9cSJens Axboe 5344b6a5d9cSJens Axboe plug->nr_ios = 1; 5354b6a5d9cSJens Axboe 5364b6a5d9cSJens Axboe rq = __blk_mq_alloc_requests(&data); 5374b6a5d9cSJens Axboe if (unlikely(!rq)) 5384b6a5d9cSJens Axboe blk_queue_exit(q); 5394b6a5d9cSJens Axboe return rq; 5404b6a5d9cSJens Axboe } 5414b6a5d9cSJens Axboe 5424b6a5d9cSJens Axboe static struct request *blk_mq_alloc_cached_request(struct request_queue *q, 5434b6a5d9cSJens Axboe blk_opf_t opf, 5444b6a5d9cSJens Axboe blk_mq_req_flags_t flags) 5454b6a5d9cSJens Axboe { 5464b6a5d9cSJens Axboe struct blk_plug *plug = current->plug; 5474b6a5d9cSJens Axboe struct request *rq; 5484b6a5d9cSJens Axboe 5494b6a5d9cSJens Axboe if (!plug) 5504b6a5d9cSJens Axboe return NULL; 55140467282SJinlong Chen 5524b6a5d9cSJens Axboe if (rq_list_empty(plug->cached_rq)) { 5534b6a5d9cSJens Axboe if (plug->nr_ios == 1) 5544b6a5d9cSJens Axboe return NULL; 5554b6a5d9cSJens Axboe rq = blk_mq_rq_cache_fill(q, plug, opf, flags); 55640467282SJinlong Chen if (!rq) 5574b6a5d9cSJens Axboe return NULL; 55840467282SJinlong Chen } else { 5594b6a5d9cSJens Axboe rq = rq_list_peek(&plug->cached_rq); 5604b6a5d9cSJens Axboe if (!rq || rq->q != q) 5614b6a5d9cSJens Axboe return NULL; 5624b6a5d9cSJens Axboe 5634b6a5d9cSJens Axboe if (blk_mq_get_hctx_type(opf) != rq->mq_hctx->type) 5644b6a5d9cSJens Axboe return NULL; 5654b6a5d9cSJens Axboe if (op_is_flush(rq->cmd_flags) != op_is_flush(opf)) 5664b6a5d9cSJens Axboe return NULL; 5674b6a5d9cSJens Axboe 5684b6a5d9cSJens Axboe plug->cached_rq = rq_list_next(rq); 5695c17f45eSChengming Zhou blk_mq_rq_time_init(rq, 0); 57040467282SJinlong Chen } 57140467282SJinlong Chen 5724b6a5d9cSJens Axboe rq->cmd_flags = opf; 5734b6a5d9cSJens Axboe INIT_LIST_HEAD(&rq->queuelist); 5744b6a5d9cSJens Axboe return rq; 5754b6a5d9cSJens Axboe } 5764b6a5d9cSJens Axboe 5774b6a5d9cSJens Axboe struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, 5784b6a5d9cSJens Axboe blk_mq_req_flags_t flags) 5794b6a5d9cSJens Axboe { 5804b6a5d9cSJens Axboe struct request *rq; 5814b6a5d9cSJens Axboe 5824b6a5d9cSJens Axboe rq = blk_mq_alloc_cached_request(q, opf, flags); 5834b6a5d9cSJens Axboe if (!rq) { 5844b6a5d9cSJens Axboe struct blk_mq_alloc_data data = { 5854b6a5d9cSJens Axboe .q = q, 5864b6a5d9cSJens Axboe .flags = flags, 5874b6a5d9cSJens Axboe .cmd_flags = opf, 5884b6a5d9cSJens Axboe .nr_tags = 1, 5894b6a5d9cSJens Axboe }; 590a492f075SJoe Lawrence int ret; 591320ae51fSJens Axboe 5923a0a5299SBart Van Assche ret = blk_queue_enter(q, flags); 593a492f075SJoe Lawrence if (ret) 594a492f075SJoe Lawrence return ERR_PTR(ret); 595320ae51fSJens Axboe 596b90cfaedSChristoph Hellwig rq = __blk_mq_alloc_requests(&data); 597bd166ef1SJens Axboe if (!rq) 598a5ea5811SChristoph Hellwig goto out_queue_exit; 5994b6a5d9cSJens Axboe } 6000c4de0f3SChristoph Hellwig rq->__data_len = 0; 6010c4de0f3SChristoph Hellwig rq->__sector = (sector_t) -1; 6020c4de0f3SChristoph Hellwig rq->bio = rq->biotail = NULL; 603320ae51fSJens Axboe return rq; 604a5ea5811SChristoph Hellwig out_queue_exit: 605a5ea5811SChristoph Hellwig blk_queue_exit(q); 606a5ea5811SChristoph Hellwig return ERR_PTR(-EWOULDBLOCK); 607320ae51fSJens Axboe } 6084bb659b1SJens Axboe EXPORT_SYMBOL(blk_mq_alloc_request); 609320ae51fSJens Axboe 610cd6ce148SBart Van Assche struct request *blk_mq_alloc_request_hctx(struct request_queue *q, 61116458cf3SBart Van Assche blk_opf_t opf, blk_mq_req_flags_t flags, unsigned int hctx_idx) 6121f5bd336SMing Lin { 613e6e7abffSChristoph Hellwig struct blk_mq_alloc_data data = { 614e6e7abffSChristoph Hellwig .q = q, 615e6e7abffSChristoph Hellwig .flags = flags, 61616458cf3SBart Van Assche .cmd_flags = opf, 61747c122e3SJens Axboe .nr_tags = 1, 618e6e7abffSChristoph Hellwig }; 619600c3b0cSChristoph Hellwig u64 alloc_time_ns = 0; 620e3c5a78cSJohn Garry struct request *rq; 6216d2809d5SOmar Sandoval unsigned int cpu; 622600c3b0cSChristoph Hellwig unsigned int tag; 6231f5bd336SMing Lin int ret; 6241f5bd336SMing Lin 625600c3b0cSChristoph Hellwig /* alloc_time includes depth and tag waits */ 626600c3b0cSChristoph Hellwig if (blk_queue_rq_alloc_time(q)) 627600c3b0cSChristoph Hellwig alloc_time_ns = ktime_get_ns(); 628600c3b0cSChristoph Hellwig 6291f5bd336SMing Lin /* 6301f5bd336SMing Lin * If the tag allocator sleeps we could get an allocation for a 6311f5bd336SMing Lin * different hardware context. No need to complicate the low level 6321f5bd336SMing Lin * allocator for this for the rare use case of a command tied to 6331f5bd336SMing Lin * a specific queue. 6341f5bd336SMing Lin */ 6356ee858a3SKemeng Shi if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)) || 6366ee858a3SKemeng Shi WARN_ON_ONCE(!(flags & BLK_MQ_REQ_RESERVED))) 6371f5bd336SMing Lin return ERR_PTR(-EINVAL); 6381f5bd336SMing Lin 6391f5bd336SMing Lin if (hctx_idx >= q->nr_hw_queues) 6401f5bd336SMing Lin return ERR_PTR(-EIO); 6411f5bd336SMing Lin 6423a0a5299SBart Van Assche ret = blk_queue_enter(q, flags); 6431f5bd336SMing Lin if (ret) 6441f5bd336SMing Lin return ERR_PTR(ret); 6451f5bd336SMing Lin 646c8712c6aSChristoph Hellwig /* 647c8712c6aSChristoph Hellwig * Check if the hardware context is actually mapped to anything. 648c8712c6aSChristoph Hellwig * If not tell the caller that it should skip this queue. 649c8712c6aSChristoph Hellwig */ 650a5ea5811SChristoph Hellwig ret = -EXDEV; 6514e5cc99eSMing Lei data.hctx = xa_load(&q->hctx_table, hctx_idx); 652e6e7abffSChristoph Hellwig if (!blk_mq_hw_queue_mapped(data.hctx)) 653a5ea5811SChristoph Hellwig goto out_queue_exit; 654e6e7abffSChristoph Hellwig cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask); 65514dc7a18SBart Van Assche if (cpu >= nr_cpu_ids) 65614dc7a18SBart Van Assche goto out_queue_exit; 657e6e7abffSChristoph Hellwig data.ctx = __blk_mq_get_ctx(q, cpu); 6581f5bd336SMing Lin 659dd6216bbSChristoph Hellwig if (q->elevator) 660dd6216bbSChristoph Hellwig data.rq_flags |= RQF_SCHED_TAGS; 661781dd830SJens Axboe else 662dd6216bbSChristoph Hellwig blk_mq_tag_busy(data.hctx); 663600c3b0cSChristoph Hellwig 66499e48cd6SJohn Garry if (flags & BLK_MQ_REQ_RESERVED) 66599e48cd6SJohn Garry data.rq_flags |= RQF_RESV; 66699e48cd6SJohn Garry 667a5ea5811SChristoph Hellwig ret = -EWOULDBLOCK; 668600c3b0cSChristoph Hellwig tag = blk_mq_get_tag(&data); 669600c3b0cSChristoph Hellwig if (tag == BLK_MQ_NO_TAG) 670a5ea5811SChristoph Hellwig goto out_queue_exit; 6715c17f45eSChengming Zhou rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag); 6725c17f45eSChengming Zhou blk_mq_rq_time_init(rq, alloc_time_ns); 673e3c5a78cSJohn Garry rq->__data_len = 0; 674e3c5a78cSJohn Garry rq->__sector = (sector_t) -1; 675e3c5a78cSJohn Garry rq->bio = rq->biotail = NULL; 676e3c5a78cSJohn Garry return rq; 677600c3b0cSChristoph Hellwig 678a5ea5811SChristoph Hellwig out_queue_exit: 679a5ea5811SChristoph Hellwig blk_queue_exit(q); 680a5ea5811SChristoph Hellwig return ERR_PTR(ret); 6811f5bd336SMing Lin } 6821f5bd336SMing Lin EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); 6831f5bd336SMing Lin 684*e5c0ca13SChengming Zhou static void blk_mq_finish_request(struct request *rq) 685*e5c0ca13SChengming Zhou { 686*e5c0ca13SChengming Zhou struct request_queue *q = rq->q; 687*e5c0ca13SChengming Zhou 688*e5c0ca13SChengming Zhou if (rq->rq_flags & RQF_USE_SCHED) { 689*e5c0ca13SChengming Zhou q->elevator->type->ops.finish_request(rq); 690*e5c0ca13SChengming Zhou /* 691*e5c0ca13SChengming Zhou * For postflush request that may need to be 692*e5c0ca13SChengming Zhou * completed twice, we should clear this flag 693*e5c0ca13SChengming Zhou * to avoid double finish_request() on the rq. 694*e5c0ca13SChengming Zhou */ 695*e5c0ca13SChengming Zhou rq->rq_flags &= ~RQF_USE_SCHED; 696*e5c0ca13SChengming Zhou } 697*e5c0ca13SChengming Zhou } 698*e5c0ca13SChengming Zhou 69912f5b931SKeith Busch static void __blk_mq_free_request(struct request *rq) 70012f5b931SKeith Busch { 70112f5b931SKeith Busch struct request_queue *q = rq->q; 70212f5b931SKeith Busch struct blk_mq_ctx *ctx = rq->mq_ctx; 703ea4f995eSJens Axboe struct blk_mq_hw_ctx *hctx = rq->mq_hctx; 70412f5b931SKeith Busch const int sched_tag = rq->internal_tag; 70512f5b931SKeith Busch 706a892c8d5SSatya Tangirala blk_crypto_free_request(rq); 707986d413bSBart Van Assche blk_pm_mark_last_busy(rq); 708ea4f995eSJens Axboe rq->mq_hctx = NULL; 709ddad5933STian Lan 710ddad5933STian Lan if (rq->rq_flags & RQF_MQ_INFLIGHT) 711ddad5933STian Lan __blk_mq_dec_active_requests(hctx); 712ddad5933STian Lan 71376647368SChristoph Hellwig if (rq->tag != BLK_MQ_NO_TAG) 714cae740a0SJohn Garry blk_mq_put_tag(hctx->tags, ctx, rq->tag); 71576647368SChristoph Hellwig if (sched_tag != BLK_MQ_NO_TAG) 716cae740a0SJohn Garry blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag); 71712f5b931SKeith Busch blk_mq_sched_restart(hctx); 71812f5b931SKeith Busch blk_queue_exit(q); 71912f5b931SKeith Busch } 72012f5b931SKeith Busch 7216af54051SChristoph Hellwig void blk_mq_free_request(struct request *rq) 722320ae51fSJens Axboe { 723320ae51fSJens Axboe struct request_queue *q = rq->q; 724320ae51fSJens Axboe 725*e5c0ca13SChengming Zhou blk_mq_finish_request(rq); 7266af54051SChristoph Hellwig 7277beb2f84SJens Axboe if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) 728d152c682SChristoph Hellwig laptop_io_completion(q->disk->bdi); 7297beb2f84SJens Axboe 730a7905043SJosef Bacik rq_qos_done(q, rq); 7310d2602caSJens Axboe 73212f5b931SKeith Busch WRITE_ONCE(rq->state, MQ_RQ_IDLE); 7330a467d0fSJens Axboe if (req_ref_put_and_test(rq)) 73412f5b931SKeith Busch __blk_mq_free_request(rq); 735320ae51fSJens Axboe } 7361a3b595aSJens Axboe EXPORT_SYMBOL_GPL(blk_mq_free_request); 737320ae51fSJens Axboe 73847c122e3SJens Axboe void blk_mq_free_plug_rqs(struct blk_plug *plug) 739320ae51fSJens Axboe { 74047c122e3SJens Axboe struct request *rq; 741fe1f4526SJens Axboe 742c5fc7b93SJens Axboe while ((rq = rq_list_pop(&plug->cached_rq)) != NULL) 74347c122e3SJens Axboe blk_mq_free_request(rq); 74447c122e3SJens Axboe } 745522a7775SOmar Sandoval 74622350ad7SChristoph Hellwig void blk_dump_rq_flags(struct request *rq, char *msg) 74722350ad7SChristoph Hellwig { 74822350ad7SChristoph Hellwig printk(KERN_INFO "%s: dev %s: flags=%llx\n", msg, 749f3fa33acSChristoph Hellwig rq->q->disk ? rq->q->disk->disk_name : "?", 75016458cf3SBart Van Assche (__force unsigned long long) rq->cmd_flags); 75122350ad7SChristoph Hellwig 75222350ad7SChristoph Hellwig printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", 75322350ad7SChristoph Hellwig (unsigned long long)blk_rq_pos(rq), 75422350ad7SChristoph Hellwig blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); 75522350ad7SChristoph Hellwig printk(KERN_INFO " bio %p, biotail %p, len %u\n", 75622350ad7SChristoph Hellwig rq->bio, rq->biotail, blk_rq_bytes(rq)); 75722350ad7SChristoph Hellwig } 75822350ad7SChristoph Hellwig EXPORT_SYMBOL(blk_dump_rq_flags); 75922350ad7SChristoph Hellwig 7609be3e06fSJens Axboe static void req_bio_endio(struct request *rq, struct bio *bio, 7619be3e06fSJens Axboe unsigned int nbytes, blk_status_t error) 7629be3e06fSJens Axboe { 763478eb72bSPavel Begunkov if (unlikely(error)) { 7649be3e06fSJens Axboe bio->bi_status = error; 765478eb72bSPavel Begunkov } else if (req_op(rq) == REQ_OP_ZONE_APPEND) { 7669be3e06fSJens Axboe /* 7679be3e06fSJens Axboe * Partial zone append completions cannot be supported as the 7689be3e06fSJens Axboe * BIO fragments may end up not being written sequentially. 7699be3e06fSJens Axboe */ 770297db731SPavel Begunkov if (bio->bi_iter.bi_size != nbytes) 7719be3e06fSJens Axboe bio->bi_status = BLK_STS_IOERR; 7729be3e06fSJens Axboe else 7739be3e06fSJens Axboe bio->bi_iter.bi_sector = rq->__sector; 7749be3e06fSJens Axboe } 7759be3e06fSJens Axboe 776478eb72bSPavel Begunkov bio_advance(bio, nbytes); 777478eb72bSPavel Begunkov 778478eb72bSPavel Begunkov if (unlikely(rq->rq_flags & RQF_QUIET)) 779478eb72bSPavel Begunkov bio_set_flag(bio, BIO_QUIET); 7809be3e06fSJens Axboe /* don't actually finish bio if it's part of flush sequence */ 7819be3e06fSJens Axboe if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) 7829be3e06fSJens Axboe bio_endio(bio); 7839be3e06fSJens Axboe } 7849be3e06fSJens Axboe 7859be3e06fSJens Axboe static void blk_account_io_completion(struct request *req, unsigned int bytes) 7869be3e06fSJens Axboe { 7879be3e06fSJens Axboe if (req->part && blk_do_io_stat(req)) { 7889be3e06fSJens Axboe const int sgrp = op_stat_group(req_op(req)); 7899be3e06fSJens Axboe 7909be3e06fSJens Axboe part_stat_lock(); 7919be3e06fSJens Axboe part_stat_add(req->part, sectors[sgrp], bytes >> 9); 7929be3e06fSJens Axboe part_stat_unlock(); 7939be3e06fSJens Axboe } 7949be3e06fSJens Axboe } 7959be3e06fSJens Axboe 7960d7a29a2SChristoph Hellwig static void blk_print_req_error(struct request *req, blk_status_t status) 7970d7a29a2SChristoph Hellwig { 7980d7a29a2SChristoph Hellwig printk_ratelimited(KERN_ERR 7990d7a29a2SChristoph Hellwig "%s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x " 8000d7a29a2SChristoph Hellwig "phys_seg %u prio class %u\n", 8010d7a29a2SChristoph Hellwig blk_status_to_str(status), 802f3fa33acSChristoph Hellwig req->q->disk ? req->q->disk->disk_name : "?", 80316458cf3SBart Van Assche blk_rq_pos(req), (__force u32)req_op(req), 80416458cf3SBart Van Assche blk_op_str(req_op(req)), 80516458cf3SBart Van Assche (__force u32)(req->cmd_flags & ~REQ_OP_MASK), 8060d7a29a2SChristoph Hellwig req->nr_phys_segments, 8070d7a29a2SChristoph Hellwig IOPRIO_PRIO_CLASS(req->ioprio)); 8080d7a29a2SChristoph Hellwig } 8090d7a29a2SChristoph Hellwig 8105581a5ddSJens Axboe /* 8115581a5ddSJens Axboe * Fully end IO on a request. Does not support partial completions, or 8125581a5ddSJens Axboe * errors. 8135581a5ddSJens Axboe */ 8145581a5ddSJens Axboe static void blk_complete_request(struct request *req) 8155581a5ddSJens Axboe { 8165581a5ddSJens Axboe const bool is_flush = (req->rq_flags & RQF_FLUSH_SEQ) != 0; 8175581a5ddSJens Axboe int total_bytes = blk_rq_bytes(req); 8185581a5ddSJens Axboe struct bio *bio = req->bio; 8195581a5ddSJens Axboe 8205581a5ddSJens Axboe trace_block_rq_complete(req, BLK_STS_OK, total_bytes); 8215581a5ddSJens Axboe 8225581a5ddSJens Axboe if (!bio) 8235581a5ddSJens Axboe return; 8245581a5ddSJens Axboe 8255581a5ddSJens Axboe #ifdef CONFIG_BLK_DEV_INTEGRITY 8265581a5ddSJens Axboe if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ) 8275581a5ddSJens Axboe req->q->integrity.profile->complete_fn(req, total_bytes); 8285581a5ddSJens Axboe #endif 8295581a5ddSJens Axboe 8309cd1e566SEric Biggers /* 8319cd1e566SEric Biggers * Upper layers may call blk_crypto_evict_key() anytime after the last 8329cd1e566SEric Biggers * bio_endio(). Therefore, the keyslot must be released before that. 8339cd1e566SEric Biggers */ 8349cd1e566SEric Biggers blk_crypto_rq_put_keyslot(req); 8359cd1e566SEric Biggers 8365581a5ddSJens Axboe blk_account_io_completion(req, total_bytes); 8375581a5ddSJens Axboe 8385581a5ddSJens Axboe do { 8395581a5ddSJens Axboe struct bio *next = bio->bi_next; 8405581a5ddSJens Axboe 8415581a5ddSJens Axboe /* Completion has already been traced */ 8425581a5ddSJens Axboe bio_clear_flag(bio, BIO_TRACE_COMPLETION); 843a12821d5SPankaj Raghav 844a12821d5SPankaj Raghav if (req_op(req) == REQ_OP_ZONE_APPEND) 845a12821d5SPankaj Raghav bio->bi_iter.bi_sector = req->__sector; 846a12821d5SPankaj Raghav 8475581a5ddSJens Axboe if (!is_flush) 8485581a5ddSJens Axboe bio_endio(bio); 8495581a5ddSJens Axboe bio = next; 8505581a5ddSJens Axboe } while (bio); 8515581a5ddSJens Axboe 8525581a5ddSJens Axboe /* 8535581a5ddSJens Axboe * Reset counters so that the request stacking driver 8545581a5ddSJens Axboe * can find how many bytes remain in the request 8555581a5ddSJens Axboe * later. 8565581a5ddSJens Axboe */ 857ab3e1d3bSJens Axboe if (!req->end_io) { 8585581a5ddSJens Axboe req->bio = NULL; 8595581a5ddSJens Axboe req->__data_len = 0; 8605581a5ddSJens Axboe } 861ab3e1d3bSJens Axboe } 8625581a5ddSJens Axboe 8639be3e06fSJens Axboe /** 8649be3e06fSJens Axboe * blk_update_request - Complete multiple bytes without completing the request 8659be3e06fSJens Axboe * @req: the request being processed 8669be3e06fSJens Axboe * @error: block status code 8679be3e06fSJens Axboe * @nr_bytes: number of bytes to complete for @req 8689be3e06fSJens Axboe * 8699be3e06fSJens Axboe * Description: 8709be3e06fSJens Axboe * Ends I/O on a number of bytes attached to @req, but doesn't complete 8719be3e06fSJens Axboe * the request structure even if @req doesn't have leftover. 8729be3e06fSJens Axboe * If @req has leftover, sets it up for the next range of segments. 8739be3e06fSJens Axboe * 8749be3e06fSJens Axboe * Passing the result of blk_rq_bytes() as @nr_bytes guarantees 8759be3e06fSJens Axboe * %false return from this function. 8769be3e06fSJens Axboe * 8779be3e06fSJens Axboe * Note: 8789be3e06fSJens Axboe * The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in this function 8799be3e06fSJens Axboe * except in the consistency check at the end of this function. 8809be3e06fSJens Axboe * 8819be3e06fSJens Axboe * Return: 8829be3e06fSJens Axboe * %false - this request doesn't have any more data 8839be3e06fSJens Axboe * %true - this request has more data 8849be3e06fSJens Axboe **/ 8859be3e06fSJens Axboe bool blk_update_request(struct request *req, blk_status_t error, 8869be3e06fSJens Axboe unsigned int nr_bytes) 8879be3e06fSJens Axboe { 8889be3e06fSJens Axboe int total_bytes; 8899be3e06fSJens Axboe 8908a7d267bSChristoph Hellwig trace_block_rq_complete(req, error, nr_bytes); 8919be3e06fSJens Axboe 8929be3e06fSJens Axboe if (!req->bio) 8939be3e06fSJens Axboe return false; 8949be3e06fSJens Axboe 8959be3e06fSJens Axboe #ifdef CONFIG_BLK_DEV_INTEGRITY 8969be3e06fSJens Axboe if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ && 8979be3e06fSJens Axboe error == BLK_STS_OK) 8989be3e06fSJens Axboe req->q->integrity.profile->complete_fn(req, nr_bytes); 8999be3e06fSJens Axboe #endif 9009be3e06fSJens Axboe 9019cd1e566SEric Biggers /* 9029cd1e566SEric Biggers * Upper layers may call blk_crypto_evict_key() anytime after the last 9039cd1e566SEric Biggers * bio_endio(). Therefore, the keyslot must be released before that. 9049cd1e566SEric Biggers */ 9059cd1e566SEric Biggers if (blk_crypto_rq_has_keyslot(req) && nr_bytes >= blk_rq_bytes(req)) 9069cd1e566SEric Biggers __blk_crypto_rq_put_keyslot(req); 9079cd1e566SEric Biggers 9089be3e06fSJens Axboe if (unlikely(error && !blk_rq_is_passthrough(req) && 9093d973a76SChristoph Hellwig !(req->rq_flags & RQF_QUIET)) && 9103d973a76SChristoph Hellwig !test_bit(GD_DEAD, &req->q->disk->state)) { 9119be3e06fSJens Axboe blk_print_req_error(req, error); 912d5869fdcSYang Shi trace_block_rq_error(req, error, nr_bytes); 913d5869fdcSYang Shi } 9149be3e06fSJens Axboe 9159be3e06fSJens Axboe blk_account_io_completion(req, nr_bytes); 9169be3e06fSJens Axboe 9179be3e06fSJens Axboe total_bytes = 0; 9189be3e06fSJens Axboe while (req->bio) { 9199be3e06fSJens Axboe struct bio *bio = req->bio; 9209be3e06fSJens Axboe unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); 9219be3e06fSJens Axboe 9229be3e06fSJens Axboe if (bio_bytes == bio->bi_iter.bi_size) 9239be3e06fSJens Axboe req->bio = bio->bi_next; 9249be3e06fSJens Axboe 9259be3e06fSJens Axboe /* Completion has already been traced */ 9269be3e06fSJens Axboe bio_clear_flag(bio, BIO_TRACE_COMPLETION); 9279be3e06fSJens Axboe req_bio_endio(req, bio, bio_bytes, error); 9289be3e06fSJens Axboe 9299be3e06fSJens Axboe total_bytes += bio_bytes; 9309be3e06fSJens Axboe nr_bytes -= bio_bytes; 9319be3e06fSJens Axboe 9329be3e06fSJens Axboe if (!nr_bytes) 9339be3e06fSJens Axboe break; 9349be3e06fSJens Axboe } 9359be3e06fSJens Axboe 9369be3e06fSJens Axboe /* 9379be3e06fSJens Axboe * completely done 9389be3e06fSJens Axboe */ 9399be3e06fSJens Axboe if (!req->bio) { 9409be3e06fSJens Axboe /* 9419be3e06fSJens Axboe * Reset counters so that the request stacking driver 9429be3e06fSJens Axboe * can find how many bytes remain in the request 9439be3e06fSJens Axboe * later. 9449be3e06fSJens Axboe */ 9459be3e06fSJens Axboe req->__data_len = 0; 9469be3e06fSJens Axboe return false; 9479be3e06fSJens Axboe } 9489be3e06fSJens Axboe 9499be3e06fSJens Axboe req->__data_len -= total_bytes; 9509be3e06fSJens Axboe 9519be3e06fSJens Axboe /* update sector only for requests with clear definition of sector */ 9529be3e06fSJens Axboe if (!blk_rq_is_passthrough(req)) 9539be3e06fSJens Axboe req->__sector += total_bytes >> 9; 9549be3e06fSJens Axboe 9559be3e06fSJens Axboe /* mixed attributes always follow the first bio */ 9569be3e06fSJens Axboe if (req->rq_flags & RQF_MIXED_MERGE) { 9579be3e06fSJens Axboe req->cmd_flags &= ~REQ_FAILFAST_MASK; 9589be3e06fSJens Axboe req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK; 9599be3e06fSJens Axboe } 9609be3e06fSJens Axboe 9619be3e06fSJens Axboe if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) { 9629be3e06fSJens Axboe /* 9639be3e06fSJens Axboe * If total number of sectors is less than the first segment 9649be3e06fSJens Axboe * size, something has gone terribly wrong. 9659be3e06fSJens Axboe */ 9669be3e06fSJens Axboe if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { 9679be3e06fSJens Axboe blk_dump_rq_flags(req, "request botched"); 9689be3e06fSJens Axboe req->__data_len = blk_rq_cur_bytes(req); 9699be3e06fSJens Axboe } 9709be3e06fSJens Axboe 9719be3e06fSJens Axboe /* recalculate the number of segments */ 9729be3e06fSJens Axboe req->nr_phys_segments = blk_recalc_rq_segments(req); 9739be3e06fSJens Axboe } 9749be3e06fSJens Axboe 9759be3e06fSJens Axboe return true; 9769be3e06fSJens Axboe } 9779be3e06fSJens Axboe EXPORT_SYMBOL_GPL(blk_update_request); 9789be3e06fSJens Axboe 979450b7879SChristoph Hellwig static inline void blk_account_io_done(struct request *req, u64 now) 980450b7879SChristoph Hellwig { 9815a80bd07SHengqi Chen trace_block_io_done(req); 9825a80bd07SHengqi Chen 983450b7879SChristoph Hellwig /* 984450b7879SChristoph Hellwig * Account IO completion. flush_rq isn't accounted as a 985450b7879SChristoph Hellwig * normal IO on queueing nor completion. Accounting the 986450b7879SChristoph Hellwig * containing request is enough. 987450b7879SChristoph Hellwig */ 988450b7879SChristoph Hellwig if (blk_do_io_stat(req) && req->part && 98906965037SChaitanya Kulkarni !(req->rq_flags & RQF_FLUSH_SEQ)) { 99006965037SChaitanya Kulkarni const int sgrp = op_stat_group(req_op(req)); 99106965037SChaitanya Kulkarni 99206965037SChaitanya Kulkarni part_stat_lock(); 99306965037SChaitanya Kulkarni update_io_ticks(req->part, jiffies, true); 99406965037SChaitanya Kulkarni part_stat_inc(req->part, ios[sgrp]); 99506965037SChaitanya Kulkarni part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns); 99606965037SChaitanya Kulkarni part_stat_unlock(); 99706965037SChaitanya Kulkarni } 998450b7879SChristoph Hellwig } 999450b7879SChristoph Hellwig 1000e165fb4dSChaitanya Kulkarni static inline void blk_account_io_start(struct request *req) 1001450b7879SChristoph Hellwig { 10025a80bd07SHengqi Chen trace_block_io_start(req); 10035a80bd07SHengqi Chen 1004e165fb4dSChaitanya Kulkarni if (blk_do_io_stat(req)) { 100541fa7222SChristoph Hellwig /* 100641fa7222SChristoph Hellwig * All non-passthrough requests are created from a bio with one 100741fa7222SChristoph Hellwig * exception: when a flush command that is part of a flush sequence 100841fa7222SChristoph Hellwig * generated by the state machine in blk-flush.c is cloned onto the 100941fa7222SChristoph Hellwig * lower device by dm-multipath we can get here without a bio. 101041fa7222SChristoph Hellwig */ 1011e165fb4dSChaitanya Kulkarni if (req->bio) 1012e165fb4dSChaitanya Kulkarni req->part = req->bio->bi_bdev; 101341fa7222SChristoph Hellwig else 1014e165fb4dSChaitanya Kulkarni req->part = req->q->disk->part0; 1015450b7879SChristoph Hellwig 1016450b7879SChristoph Hellwig part_stat_lock(); 1017e165fb4dSChaitanya Kulkarni update_io_ticks(req->part, jiffies, false); 1018450b7879SChristoph Hellwig part_stat_unlock(); 1019450b7879SChristoph Hellwig } 1020450b7879SChristoph Hellwig } 1021450b7879SChristoph Hellwig 1022f794f335SJens Axboe static inline void __blk_mq_end_request_acct(struct request *rq, u64 now) 10230d11e6acSMing Lei { 102454bdd67dSKeith Busch if (rq->rq_flags & RQF_STATS) 1025320ae51fSJens Axboe blk_stat_add(rq, now); 1026320ae51fSJens Axboe 1027320ae51fSJens Axboe blk_mq_sched_completed_request(rq, now); 1028320ae51fSJens Axboe blk_account_io_done(rq, now); 10298971a3b7SPavel Begunkov } 1030320ae51fSJens Axboe 1031f794f335SJens Axboe inline void __blk_mq_end_request(struct request *rq, blk_status_t error) 1032f794f335SJens Axboe { 1033f794f335SJens Axboe if (blk_mq_need_time_stamp(rq)) 1034f794f335SJens Axboe __blk_mq_end_request_acct(rq, ktime_get_ns()); 1035320ae51fSJens Axboe 1036*e5c0ca13SChengming Zhou blk_mq_finish_request(rq); 1037*e5c0ca13SChengming Zhou 103891b63639SChristoph Hellwig if (rq->end_io) { 1039a7905043SJosef Bacik rq_qos_done(rq->q, rq); 1040de671d61SJens Axboe if (rq->end_io(rq, error) == RQ_END_IO_FREE) 1041de671d61SJens Axboe blk_mq_free_request(rq); 104291b63639SChristoph Hellwig } else { 1043320ae51fSJens Axboe blk_mq_free_request(rq); 1044320ae51fSJens Axboe } 104591b63639SChristoph Hellwig } 1046c8a446adSChristoph Hellwig EXPORT_SYMBOL(__blk_mq_end_request); 104763151a44SChristoph Hellwig 10482a842acaSChristoph Hellwig void blk_mq_end_request(struct request *rq, blk_status_t error) 104963151a44SChristoph Hellwig { 105063151a44SChristoph Hellwig if (blk_update_request(rq, error, blk_rq_bytes(rq))) 105163151a44SChristoph Hellwig BUG(); 1052c8a446adSChristoph Hellwig __blk_mq_end_request(rq, error); 105363151a44SChristoph Hellwig } 1054c8a446adSChristoph Hellwig EXPORT_SYMBOL(blk_mq_end_request); 1055320ae51fSJens Axboe 1056f794f335SJens Axboe #define TAG_COMP_BATCH 32 1057f794f335SJens Axboe 1058f794f335SJens Axboe static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx, 1059f794f335SJens Axboe int *tag_array, int nr_tags) 1060f794f335SJens Axboe { 1061f794f335SJens Axboe struct request_queue *q = hctx->queue; 1062f794f335SJens Axboe 10633b87c6eaSMing Lei /* 10643b87c6eaSMing Lei * All requests should have been marked as RQF_MQ_INFLIGHT, so 10653b87c6eaSMing Lei * update hctx->nr_active in batch 10663b87c6eaSMing Lei */ 10673b87c6eaSMing Lei if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) 10683b87c6eaSMing Lei __blk_mq_sub_active_requests(hctx, nr_tags); 10693b87c6eaSMing Lei 1070f794f335SJens Axboe blk_mq_put_tags(hctx->tags, tag_array, nr_tags); 1071f794f335SJens Axboe percpu_ref_put_many(&q->q_usage_counter, nr_tags); 1072f794f335SJens Axboe } 1073f794f335SJens Axboe 1074f794f335SJens Axboe void blk_mq_end_request_batch(struct io_comp_batch *iob) 1075f794f335SJens Axboe { 1076f794f335SJens Axboe int tags[TAG_COMP_BATCH], nr_tags = 0; 107702f7eab0SJens Axboe struct blk_mq_hw_ctx *cur_hctx = NULL; 1078f794f335SJens Axboe struct request *rq; 1079f794f335SJens Axboe u64 now = 0; 1080f794f335SJens Axboe 1081f794f335SJens Axboe if (iob->need_ts) 1082f794f335SJens Axboe now = ktime_get_ns(); 1083f794f335SJens Axboe 1084f794f335SJens Axboe while ((rq = rq_list_pop(&iob->req_list)) != NULL) { 1085f794f335SJens Axboe prefetch(rq->bio); 1086f794f335SJens Axboe prefetch(rq->rq_next); 1087f794f335SJens Axboe 10885581a5ddSJens Axboe blk_complete_request(rq); 1089f794f335SJens Axboe if (iob->need_ts) 1090f794f335SJens Axboe __blk_mq_end_request_acct(rq, now); 1091f794f335SJens Axboe 1092*e5c0ca13SChengming Zhou blk_mq_finish_request(rq); 1093*e5c0ca13SChengming Zhou 109498b26a0eSJens Axboe rq_qos_done(rq->q, rq); 109598b26a0eSJens Axboe 1096ab3e1d3bSJens Axboe /* 1097ab3e1d3bSJens Axboe * If end_io handler returns NONE, then it still has 1098ab3e1d3bSJens Axboe * ownership of the request. 1099ab3e1d3bSJens Axboe */ 1100ab3e1d3bSJens Axboe if (rq->end_io && rq->end_io(rq, 0) == RQ_END_IO_NONE) 1101ab3e1d3bSJens Axboe continue; 1102ab3e1d3bSJens Axboe 1103f794f335SJens Axboe WRITE_ONCE(rq->state, MQ_RQ_IDLE); 11040a467d0fSJens Axboe if (!req_ref_put_and_test(rq)) 1105f794f335SJens Axboe continue; 1106f794f335SJens Axboe 1107f794f335SJens Axboe blk_crypto_free_request(rq); 1108f794f335SJens Axboe blk_pm_mark_last_busy(rq); 1109f794f335SJens Axboe 111002f7eab0SJens Axboe if (nr_tags == TAG_COMP_BATCH || cur_hctx != rq->mq_hctx) { 111102f7eab0SJens Axboe if (cur_hctx) 111202f7eab0SJens Axboe blk_mq_flush_tag_batch(cur_hctx, tags, nr_tags); 1113f794f335SJens Axboe nr_tags = 0; 111402f7eab0SJens Axboe cur_hctx = rq->mq_hctx; 1115f794f335SJens Axboe } 1116f794f335SJens Axboe tags[nr_tags++] = rq->tag; 1117f794f335SJens Axboe } 1118f794f335SJens Axboe 1119f794f335SJens Axboe if (nr_tags) 112002f7eab0SJens Axboe blk_mq_flush_tag_batch(cur_hctx, tags, nr_tags); 1121f794f335SJens Axboe } 1122f794f335SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_end_request_batch); 1123f794f335SJens Axboe 1124f9ab4918SSebastian Andrzej Siewior static void blk_complete_reqs(struct llist_head *list) 1125c3077b5dSChristoph Hellwig { 1126f9ab4918SSebastian Andrzej Siewior struct llist_node *entry = llist_reverse_order(llist_del_all(list)); 1127f9ab4918SSebastian Andrzej Siewior struct request *rq, *next; 1128c3077b5dSChristoph Hellwig 1129f9ab4918SSebastian Andrzej Siewior llist_for_each_entry_safe(rq, next, entry, ipi_list) 1130c3077b5dSChristoph Hellwig rq->q->mq_ops->complete(rq); 1131c3077b5dSChristoph Hellwig } 1132c3077b5dSChristoph Hellwig 1133f9ab4918SSebastian Andrzej Siewior static __latent_entropy void blk_done_softirq(struct softirq_action *h) 1134115243f5SChristoph Hellwig { 1135f9ab4918SSebastian Andrzej Siewior blk_complete_reqs(this_cpu_ptr(&blk_cpu_done)); 1136c3077b5dSChristoph Hellwig } 1137c3077b5dSChristoph Hellwig 1138c3077b5dSChristoph Hellwig static int blk_softirq_cpu_dead(unsigned int cpu) 1139c3077b5dSChristoph Hellwig { 1140f9ab4918SSebastian Andrzej Siewior blk_complete_reqs(&per_cpu(blk_cpu_done, cpu)); 1141c3077b5dSChristoph Hellwig return 0; 1142c3077b5dSChristoph Hellwig } 1143c3077b5dSChristoph Hellwig 114430a91cb4SChristoph Hellwig static void __blk_mq_complete_request_remote(void *data) 1145320ae51fSJens Axboe { 1146f9ab4918SSebastian Andrzej Siewior __raise_softirq_irqoff(BLOCK_SOFTIRQ); 114736e76539SMing Lei } 114836e76539SMing Lei 114996339526SChristoph Hellwig static inline bool blk_mq_complete_need_ipi(struct request *rq) 115096339526SChristoph Hellwig { 115196339526SChristoph Hellwig int cpu = raw_smp_processor_id(); 115296339526SChristoph Hellwig 115396339526SChristoph Hellwig if (!IS_ENABLED(CONFIG_SMP) || 115496339526SChristoph Hellwig !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) 115596339526SChristoph Hellwig return false; 115671425189SSebastian Andrzej Siewior /* 115771425189SSebastian Andrzej Siewior * With force threaded interrupts enabled, raising softirq from an SMP 115871425189SSebastian Andrzej Siewior * function call will always result in waking the ksoftirqd thread. 115971425189SSebastian Andrzej Siewior * This is probably worse than completing the request on a different 116071425189SSebastian Andrzej Siewior * cache domain. 116171425189SSebastian Andrzej Siewior */ 116291cc470eSTanner Love if (force_irqthreads()) 116371425189SSebastian Andrzej Siewior return false; 116496339526SChristoph Hellwig 116596339526SChristoph Hellwig /* same CPU or cache domain? Complete locally */ 116696339526SChristoph Hellwig if (cpu == rq->mq_ctx->cpu || 116796339526SChristoph Hellwig (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) && 116896339526SChristoph Hellwig cpus_share_cache(cpu, rq->mq_ctx->cpu))) 116996339526SChristoph Hellwig return false; 117096339526SChristoph Hellwig 117196339526SChristoph Hellwig /* don't try to IPI to an offline CPU */ 117296339526SChristoph Hellwig return cpu_online(rq->mq_ctx->cpu); 117396339526SChristoph Hellwig } 117496339526SChristoph Hellwig 1175f9ab4918SSebastian Andrzej Siewior static void blk_mq_complete_send_ipi(struct request *rq) 1176f9ab4918SSebastian Andrzej Siewior { 1177f9ab4918SSebastian Andrzej Siewior struct llist_head *list; 1178f9ab4918SSebastian Andrzej Siewior unsigned int cpu; 1179f9ab4918SSebastian Andrzej Siewior 1180f9ab4918SSebastian Andrzej Siewior cpu = rq->mq_ctx->cpu; 1181f9ab4918SSebastian Andrzej Siewior list = &per_cpu(blk_cpu_done, cpu); 1182f9ab4918SSebastian Andrzej Siewior if (llist_add(&rq->ipi_list, list)) { 1183f9ab4918SSebastian Andrzej Siewior INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq); 1184f9ab4918SSebastian Andrzej Siewior smp_call_function_single_async(cpu, &rq->csd); 1185f9ab4918SSebastian Andrzej Siewior } 1186f9ab4918SSebastian Andrzej Siewior } 1187f9ab4918SSebastian Andrzej Siewior 1188f9ab4918SSebastian Andrzej Siewior static void blk_mq_raise_softirq(struct request *rq) 1189f9ab4918SSebastian Andrzej Siewior { 1190f9ab4918SSebastian Andrzej Siewior struct llist_head *list; 1191f9ab4918SSebastian Andrzej Siewior 1192f9ab4918SSebastian Andrzej Siewior preempt_disable(); 1193f9ab4918SSebastian Andrzej Siewior list = this_cpu_ptr(&blk_cpu_done); 1194f9ab4918SSebastian Andrzej Siewior if (llist_add(&rq->ipi_list, list)) 1195f9ab4918SSebastian Andrzej Siewior raise_softirq(BLOCK_SOFTIRQ); 1196f9ab4918SSebastian Andrzej Siewior preempt_enable(); 1197f9ab4918SSebastian Andrzej Siewior } 1198f9ab4918SSebastian Andrzej Siewior 119940d09b53SChristoph Hellwig bool blk_mq_complete_request_remote(struct request *rq) 120040d09b53SChristoph Hellwig { 120140d09b53SChristoph Hellwig WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); 120240d09b53SChristoph Hellwig 12034ab32bf3SJens Axboe /* 1204f168420cSLiu Song * For request which hctx has only one ctx mapping, 1205f168420cSLiu Song * or a polled request, always complete locally, 1206f168420cSLiu Song * it's pointless to redirect the completion. 12074ab32bf3SJens Axboe */ 120830654614SEd Tsai if ((rq->mq_hctx->nr_ctx == 1 && 120930654614SEd Tsai rq->mq_ctx->cpu == raw_smp_processor_id()) || 1210f168420cSLiu Song rq->cmd_flags & REQ_POLLED) 121140d09b53SChristoph Hellwig return false; 1212320ae51fSJens Axboe 121340d09b53SChristoph Hellwig if (blk_mq_complete_need_ipi(rq)) { 1214f9ab4918SSebastian Andrzej Siewior blk_mq_complete_send_ipi(rq); 1215f9ab4918SSebastian Andrzej Siewior return true; 12163d6efbf6SChristoph Hellwig } 121740d09b53SChristoph Hellwig 1218f9ab4918SSebastian Andrzej Siewior if (rq->q->nr_hw_queues == 1) { 1219f9ab4918SSebastian Andrzej Siewior blk_mq_raise_softirq(rq); 122040d09b53SChristoph Hellwig return true; 1221320ae51fSJens Axboe } 1222f9ab4918SSebastian Andrzej Siewior return false; 1223f9ab4918SSebastian Andrzej Siewior } 122440d09b53SChristoph Hellwig EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote); 122540d09b53SChristoph Hellwig 1226320ae51fSJens Axboe /** 122715f73f5bSChristoph Hellwig * blk_mq_complete_request - end I/O on a request 122815f73f5bSChristoph Hellwig * @rq: the request being processed 1229320ae51fSJens Axboe * 123015f73f5bSChristoph Hellwig * Description: 123115f73f5bSChristoph Hellwig * Complete a request by scheduling the ->complete_rq operation. 123215f73f5bSChristoph Hellwig **/ 123315f73f5bSChristoph Hellwig void blk_mq_complete_request(struct request *rq) 1234320ae51fSJens Axboe { 123540d09b53SChristoph Hellwig if (!blk_mq_complete_request_remote(rq)) 123696339526SChristoph Hellwig rq->q->mq_ops->complete(rq); 1237320ae51fSJens Axboe } 123815f73f5bSChristoph Hellwig EXPORT_SYMBOL(blk_mq_complete_request); 123930a91cb4SChristoph Hellwig 124030a91cb4SChristoph Hellwig /** 1241105663f7SAndré Almeida * blk_mq_start_request - Start processing a request 1242105663f7SAndré Almeida * @rq: Pointer to request to be started 1243105663f7SAndré Almeida * 1244105663f7SAndré Almeida * Function used by device drivers to notify the block layer that a request 1245105663f7SAndré Almeida * is going to be processed now, so blk layer can do proper initializations 1246105663f7SAndré Almeida * such as starting the timeout timer. 1247105663f7SAndré Almeida */ 1248e2490073SChristoph Hellwig void blk_mq_start_request(struct request *rq) 1249320ae51fSJens Axboe { 1250320ae51fSJens Axboe struct request_queue *q = rq->q; 1251320ae51fSJens Axboe 1252a54895faSChristoph Hellwig trace_block_rq_issue(rq); 1253320ae51fSJens Axboe 1254cf43e6beSJens Axboe if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { 12554cddeacaSTejun Heo rq->io_start_time_ns = ktime_get_ns(); 12563d244306SHou Tao rq->stats_sectors = blk_rq_sectors(rq); 1257cf43e6beSJens Axboe rq->rq_flags |= RQF_STATS; 1258a7905043SJosef Bacik rq_qos_issue(q, rq); 1259cf43e6beSJens Axboe } 1260cf43e6beSJens Axboe 12611d9bd516STejun Heo WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE); 1262538b7534SJens Axboe 1263538b7534SJens Axboe blk_add_timer(rq); 126412f5b931SKeith Busch WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT); 126549f5baa5SChristoph Hellwig 126654d4e6abSMax Gurtovoy #ifdef CONFIG_BLK_DEV_INTEGRITY 126754d4e6abSMax Gurtovoy if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE) 126854d4e6abSMax Gurtovoy q->integrity.profile->prepare_fn(rq); 126954d4e6abSMax Gurtovoy #endif 12703e08773cSChristoph Hellwig if (rq->bio && rq->bio->bi_opf & REQ_POLLED) 1271f6c80cffSKeith Busch WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num); 1272320ae51fSJens Axboe } 1273e2490073SChristoph Hellwig EXPORT_SYMBOL(blk_mq_start_request); 1274320ae51fSJens Axboe 1275a327c341SMing Lei /* 1276a327c341SMing Lei * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple 1277a327c341SMing Lei * queues. This is important for md arrays to benefit from merging 1278a327c341SMing Lei * requests. 1279a327c341SMing Lei */ 1280a327c341SMing Lei static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) 1281a327c341SMing Lei { 1282a327c341SMing Lei if (plug->multiple_queues) 1283a327c341SMing Lei return BLK_MAX_REQUEST_COUNT * 2; 1284a327c341SMing Lei return BLK_MAX_REQUEST_COUNT; 1285a327c341SMing Lei } 1286a327c341SMing Lei 1287a327c341SMing Lei static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) 1288a327c341SMing Lei { 1289a327c341SMing Lei struct request *last = rq_list_peek(&plug->mq_list); 1290a327c341SMing Lei 1291a327c341SMing Lei if (!plug->rq_count) { 1292a327c341SMing Lei trace_block_plug(rq->q); 1293a327c341SMing Lei } else if (plug->rq_count >= blk_plug_max_rq_count(plug) || 1294a327c341SMing Lei (!blk_queue_nomerges(rq->q) && 1295a327c341SMing Lei blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) { 1296a327c341SMing Lei blk_mq_flush_plug_list(plug, false); 1297878eb6e4SAl Viro last = NULL; 1298a327c341SMing Lei trace_block_plug(rq->q); 1299a327c341SMing Lei } 1300a327c341SMing Lei 1301a327c341SMing Lei if (!plug->multiple_queues && last && last->q != rq->q) 1302a327c341SMing Lei plug->multiple_queues = true; 1303c6b7a3a2SMing Lei /* 1304c6b7a3a2SMing Lei * Any request allocated from sched tags can't be issued to 1305c6b7a3a2SMing Lei * ->queue_rqs() directly 1306c6b7a3a2SMing Lei */ 1307c6b7a3a2SMing Lei if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS)) 1308a327c341SMing Lei plug->has_elevator = true; 1309a327c341SMing Lei rq->rq_next = NULL; 1310a327c341SMing Lei rq_list_add(&plug->mq_list, rq); 1311a327c341SMing Lei plug->rq_count++; 1312a327c341SMing Lei } 1313a327c341SMing Lei 13144054cff9SChristoph Hellwig /** 13154054cff9SChristoph Hellwig * blk_execute_rq_nowait - insert a request to I/O scheduler for execution 13164054cff9SChristoph Hellwig * @rq: request to insert 13174054cff9SChristoph Hellwig * @at_head: insert request at head or tail of queue 13184054cff9SChristoph Hellwig * 13194054cff9SChristoph Hellwig * Description: 13204054cff9SChristoph Hellwig * Insert a fully prepared request at the back of the I/O scheduler queue 13214054cff9SChristoph Hellwig * for execution. Don't wait for completion. 13224054cff9SChristoph Hellwig * 13234054cff9SChristoph Hellwig * Note: 13244054cff9SChristoph Hellwig * This function will invoke @done directly if the queue is dead. 13254054cff9SChristoph Hellwig */ 1326e2e53086SChristoph Hellwig void blk_execute_rq_nowait(struct request *rq, bool at_head) 13274054cff9SChristoph Hellwig { 1328f0dbe6e8SChristoph Hellwig struct blk_mq_hw_ctx *hctx = rq->mq_hctx; 1329f0dbe6e8SChristoph Hellwig 1330ae948fd6SChristoph Hellwig WARN_ON(irqs_disabled()); 1331ae948fd6SChristoph Hellwig WARN_ON(!blk_rq_is_passthrough(rq)); 13324054cff9SChristoph Hellwig 1333ae948fd6SChristoph Hellwig blk_account_io_start(rq); 1334110fdb44SPankaj Raghav 1335110fdb44SPankaj Raghav /* 1336110fdb44SPankaj Raghav * As plugging can be enabled for passthrough requests on a zoned 1337110fdb44SPankaj Raghav * device, directly accessing the plug instead of using blk_mq_plug() 1338110fdb44SPankaj Raghav * should not have any consequences. 1339110fdb44SPankaj Raghav */ 1340f0dbe6e8SChristoph Hellwig if (current->plug && !at_head) { 1341ae948fd6SChristoph Hellwig blk_add_rq_to_plug(current->plug, rq); 1342f0dbe6e8SChristoph Hellwig return; 1343f0dbe6e8SChristoph Hellwig } 1344f0dbe6e8SChristoph Hellwig 1345710fa378SChristoph Hellwig blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0); 1346f0dbe6e8SChristoph Hellwig blk_mq_run_hw_queue(hctx, false); 13474054cff9SChristoph Hellwig } 13484054cff9SChristoph Hellwig EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); 13494054cff9SChristoph Hellwig 135032ac5a9bSChristoph Hellwig struct blk_rq_wait { 135132ac5a9bSChristoph Hellwig struct completion done; 135232ac5a9bSChristoph Hellwig blk_status_t ret; 135332ac5a9bSChristoph Hellwig }; 135432ac5a9bSChristoph Hellwig 1355de671d61SJens Axboe static enum rq_end_io_ret blk_end_sync_rq(struct request *rq, blk_status_t ret) 135632ac5a9bSChristoph Hellwig { 135732ac5a9bSChristoph Hellwig struct blk_rq_wait *wait = rq->end_io_data; 135832ac5a9bSChristoph Hellwig 135932ac5a9bSChristoph Hellwig wait->ret = ret; 136032ac5a9bSChristoph Hellwig complete(&wait->done); 1361de671d61SJens Axboe return RQ_END_IO_NONE; 136232ac5a9bSChristoph Hellwig } 136332ac5a9bSChristoph Hellwig 1364c6e99ea4SKanchan Joshi bool blk_rq_is_poll(struct request *rq) 13654054cff9SChristoph Hellwig { 13664054cff9SChristoph Hellwig if (!rq->mq_hctx) 13674054cff9SChristoph Hellwig return false; 13684054cff9SChristoph Hellwig if (rq->mq_hctx->type != HCTX_TYPE_POLL) 13694054cff9SChristoph Hellwig return false; 13704054cff9SChristoph Hellwig return true; 13714054cff9SChristoph Hellwig } 1372c6e99ea4SKanchan Joshi EXPORT_SYMBOL_GPL(blk_rq_is_poll); 13734054cff9SChristoph Hellwig 13744054cff9SChristoph Hellwig static void blk_rq_poll_completion(struct request *rq, struct completion *wait) 13754054cff9SChristoph Hellwig { 13764054cff9SChristoph Hellwig do { 1377f6c80cffSKeith Busch blk_hctx_poll(rq->q, rq->mq_hctx, NULL, 0); 13784054cff9SChristoph Hellwig cond_resched(); 13794054cff9SChristoph Hellwig } while (!completion_done(wait)); 13804054cff9SChristoph Hellwig } 13814054cff9SChristoph Hellwig 13824054cff9SChristoph Hellwig /** 13834054cff9SChristoph Hellwig * blk_execute_rq - insert a request into queue for execution 13844054cff9SChristoph Hellwig * @rq: request to insert 13854054cff9SChristoph Hellwig * @at_head: insert request at head or tail of queue 13864054cff9SChristoph Hellwig * 13874054cff9SChristoph Hellwig * Description: 13884054cff9SChristoph Hellwig * Insert a fully prepared request at the back of the I/O scheduler queue 13894054cff9SChristoph Hellwig * for execution and wait for completion. 13904054cff9SChristoph Hellwig * Return: The blk_status_t result provided to blk_mq_end_request(). 13914054cff9SChristoph Hellwig */ 1392b84ba30bSChristoph Hellwig blk_status_t blk_execute_rq(struct request *rq, bool at_head) 13934054cff9SChristoph Hellwig { 1394f0dbe6e8SChristoph Hellwig struct blk_mq_hw_ctx *hctx = rq->mq_hctx; 139532ac5a9bSChristoph Hellwig struct blk_rq_wait wait = { 139632ac5a9bSChristoph Hellwig .done = COMPLETION_INITIALIZER_ONSTACK(wait.done), 139732ac5a9bSChristoph Hellwig }; 13984054cff9SChristoph Hellwig 1399ae948fd6SChristoph Hellwig WARN_ON(irqs_disabled()); 1400ae948fd6SChristoph Hellwig WARN_ON(!blk_rq_is_passthrough(rq)); 1401ae948fd6SChristoph Hellwig 14024054cff9SChristoph Hellwig rq->end_io_data = &wait; 1403ae948fd6SChristoph Hellwig rq->end_io = blk_end_sync_rq; 14044054cff9SChristoph Hellwig 1405ae948fd6SChristoph Hellwig blk_account_io_start(rq); 1406710fa378SChristoph Hellwig blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0); 1407f0dbe6e8SChristoph Hellwig blk_mq_run_hw_queue(hctx, false); 14084054cff9SChristoph Hellwig 1409ae948fd6SChristoph Hellwig if (blk_rq_is_poll(rq)) { 141032ac5a9bSChristoph Hellwig blk_rq_poll_completion(rq, &wait.done); 1411ae948fd6SChristoph Hellwig } else { 1412ae948fd6SChristoph Hellwig /* 1413ae948fd6SChristoph Hellwig * Prevent hang_check timer from firing at us during very long 1414ae948fd6SChristoph Hellwig * I/O 1415ae948fd6SChristoph Hellwig */ 1416ae948fd6SChristoph Hellwig unsigned long hang_check = sysctl_hung_task_timeout_secs; 1417ae948fd6SChristoph Hellwig 1418ae948fd6SChristoph Hellwig if (hang_check) 141932ac5a9bSChristoph Hellwig while (!wait_for_completion_io_timeout(&wait.done, 14204054cff9SChristoph Hellwig hang_check * (HZ/2))) 14214054cff9SChristoph Hellwig ; 14224054cff9SChristoph Hellwig else 142332ac5a9bSChristoph Hellwig wait_for_completion_io(&wait.done); 1424ae948fd6SChristoph Hellwig } 14254054cff9SChristoph Hellwig 142632ac5a9bSChristoph Hellwig return wait.ret; 14274054cff9SChristoph Hellwig } 14284054cff9SChristoph Hellwig EXPORT_SYMBOL(blk_execute_rq); 14294054cff9SChristoph Hellwig 1430ed0791b2SChristoph Hellwig static void __blk_mq_requeue_request(struct request *rq) 1431320ae51fSJens Axboe { 1432320ae51fSJens Axboe struct request_queue *q = rq->q; 1433320ae51fSJens Axboe 1434923218f6SMing Lei blk_mq_put_driver_tag(rq); 1435923218f6SMing Lei 1436a54895faSChristoph Hellwig trace_block_rq_requeue(rq); 1437a7905043SJosef Bacik rq_qos_requeue(q, rq); 143849f5baa5SChristoph Hellwig 143912f5b931SKeith Busch if (blk_mq_request_started(rq)) { 144012f5b931SKeith Busch WRITE_ONCE(rq->state, MQ_RQ_IDLE); 1441da661267SChristoph Hellwig rq->rq_flags &= ~RQF_TIMED_OUT; 1442320ae51fSJens Axboe } 1443e2490073SChristoph Hellwig } 1444320ae51fSJens Axboe 14452b053acaSBart Van Assche void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list) 1446ed0791b2SChristoph Hellwig { 1447214a4418SChristoph Hellwig struct request_queue *q = rq->q; 14489a67aa52SChristoph Hellwig unsigned long flags; 1449214a4418SChristoph Hellwig 1450ed0791b2SChristoph Hellwig __blk_mq_requeue_request(rq); 1451ed0791b2SChristoph Hellwig 1452105976f5SMing Lei /* this request will be re-inserted to io scheduler queue */ 1453105976f5SMing Lei blk_mq_sched_requeue_request(rq); 1454105976f5SMing Lei 14559a67aa52SChristoph Hellwig spin_lock_irqsave(&q->requeue_lock, flags); 14569a67aa52SChristoph Hellwig list_add_tail(&rq->queuelist, &q->requeue_list); 14579a67aa52SChristoph Hellwig spin_unlock_irqrestore(&q->requeue_lock, flags); 1458214a4418SChristoph Hellwig 1459214a4418SChristoph Hellwig if (kick_requeue_list) 1460214a4418SChristoph Hellwig blk_mq_kick_requeue_list(q); 1461ed0791b2SChristoph Hellwig } 1462ed0791b2SChristoph Hellwig EXPORT_SYMBOL(blk_mq_requeue_request); 1463ed0791b2SChristoph Hellwig 14646fca6a61SChristoph Hellwig static void blk_mq_requeue_work(struct work_struct *work) 14656fca6a61SChristoph Hellwig { 14666fca6a61SChristoph Hellwig struct request_queue *q = 14672849450aSMike Snitzer container_of(work, struct request_queue, requeue_work.work); 14686fca6a61SChristoph Hellwig LIST_HEAD(rq_list); 14699a67aa52SChristoph Hellwig LIST_HEAD(flush_list); 14709a67aa52SChristoph Hellwig struct request *rq; 14716fca6a61SChristoph Hellwig 147218e9781dSJens Axboe spin_lock_irq(&q->requeue_lock); 14736fca6a61SChristoph Hellwig list_splice_init(&q->requeue_list, &rq_list); 14749a67aa52SChristoph Hellwig list_splice_init(&q->flush_list, &flush_list); 147518e9781dSJens Axboe spin_unlock_irq(&q->requeue_lock); 14766fca6a61SChristoph Hellwig 14779a67aa52SChristoph Hellwig while (!list_empty(&rq_list)) { 14789a67aa52SChristoph Hellwig rq = list_entry(rq_list.next, struct request, queuelist); 1479a1e948b8SChristoph Hellwig /* 1480a1e948b8SChristoph Hellwig * If RQF_DONTPREP ist set, the request has been started by the 1481a1e948b8SChristoph Hellwig * driver already and might have driver-specific data allocated 1482a1e948b8SChristoph Hellwig * already. Insert it into the hctx dispatch list to avoid 1483a1e948b8SChristoph Hellwig * block layer merges for the request. 1484a1e948b8SChristoph Hellwig */ 1485a1e948b8SChristoph Hellwig if (rq->rq_flags & RQF_DONTPREP) { 14866fca6a61SChristoph Hellwig list_del_init(&rq->queuelist); 14872b597613SChristoph Hellwig blk_mq_request_bypass_insert(rq, 0); 14889a67aa52SChristoph Hellwig } else { 1489a1e948b8SChristoph Hellwig list_del_init(&rq->queuelist); 1490710fa378SChristoph Hellwig blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD); 14916fca6a61SChristoph Hellwig } 14926fca6a61SChristoph Hellwig } 14936fca6a61SChristoph Hellwig 14949a67aa52SChristoph Hellwig while (!list_empty(&flush_list)) { 14959a67aa52SChristoph Hellwig rq = list_entry(flush_list.next, struct request, queuelist); 14966fca6a61SChristoph Hellwig list_del_init(&rq->queuelist); 1497710fa378SChristoph Hellwig blk_mq_insert_request(rq, 0); 14986fca6a61SChristoph Hellwig } 14996fca6a61SChristoph Hellwig 150052d7f1b5SBart Van Assche blk_mq_run_hw_queues(q, false); 15016fca6a61SChristoph Hellwig } 15026fca6a61SChristoph Hellwig 15036fca6a61SChristoph Hellwig void blk_mq_kick_requeue_list(struct request_queue *q) 15046fca6a61SChristoph Hellwig { 1505ae943d20SBart Van Assche kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0); 15066fca6a61SChristoph Hellwig } 15076fca6a61SChristoph Hellwig EXPORT_SYMBOL(blk_mq_kick_requeue_list); 15086fca6a61SChristoph Hellwig 15092849450aSMike Snitzer void blk_mq_delay_kick_requeue_list(struct request_queue *q, 15102849450aSMike Snitzer unsigned long msecs) 15112849450aSMike Snitzer { 1512d4acf365SBart Van Assche kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 15132849450aSMike Snitzer msecs_to_jiffies(msecs)); 15142849450aSMike Snitzer } 15152849450aSMike Snitzer EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); 15162849450aSMike Snitzer 15172dd6532eSJohn Garry static bool blk_mq_rq_inflight(struct request *rq, void *priv) 1518ae879912SJens Axboe { 1519ae879912SJens Axboe /* 15208ab30a33SJohn Garry * If we find a request that isn't idle we know the queue is busy 15218ab30a33SJohn Garry * as it's checked in the iter. 15228ab30a33SJohn Garry * Return false to stop the iteration. 1523ae879912SJens Axboe */ 15248ab30a33SJohn Garry if (blk_mq_request_started(rq)) { 1525ae879912SJens Axboe bool *busy = priv; 1526ae879912SJens Axboe 1527ae879912SJens Axboe *busy = true; 1528ae879912SJens Axboe return false; 1529ae879912SJens Axboe } 1530ae879912SJens Axboe 1531ae879912SJens Axboe return true; 1532ae879912SJens Axboe } 1533ae879912SJens Axboe 15343c94d83cSJens Axboe bool blk_mq_queue_inflight(struct request_queue *q) 1535ae879912SJens Axboe { 1536ae879912SJens Axboe bool busy = false; 1537ae879912SJens Axboe 15383c94d83cSJens Axboe blk_mq_queue_tag_busy_iter(q, blk_mq_rq_inflight, &busy); 1539ae879912SJens Axboe return busy; 1540ae879912SJens Axboe } 15413c94d83cSJens Axboe EXPORT_SYMBOL_GPL(blk_mq_queue_inflight); 1542ae879912SJens Axboe 15439bdb4833SJohn Garry static void blk_mq_rq_timed_out(struct request *req) 1544320ae51fSJens Axboe { 1545da661267SChristoph Hellwig req->rq_flags |= RQF_TIMED_OUT; 1546d1210d5aSChristoph Hellwig if (req->q->mq_ops->timeout) { 1547d1210d5aSChristoph Hellwig enum blk_eh_timer_return ret; 154887ee7b11SJens Axboe 15499bdb4833SJohn Garry ret = req->q->mq_ops->timeout(req); 1550d1210d5aSChristoph Hellwig if (ret == BLK_EH_DONE) 1551d1210d5aSChristoph Hellwig return; 1552d1210d5aSChristoph Hellwig WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER); 155387ee7b11SJens Axboe } 1554d1210d5aSChristoph Hellwig 1555d1210d5aSChristoph Hellwig blk_add_timer(req); 155687ee7b11SJens Axboe } 155787ee7b11SJens Axboe 155882c22947SDavid Jeffery struct blk_expired_data { 155982c22947SDavid Jeffery bool has_timedout_rq; 156082c22947SDavid Jeffery unsigned long next; 156182c22947SDavid Jeffery unsigned long timeout_start; 156282c22947SDavid Jeffery }; 156382c22947SDavid Jeffery 156482c22947SDavid Jeffery static bool blk_mq_req_expired(struct request *rq, struct blk_expired_data *expired) 156512f5b931SKeith Busch { 156612f5b931SKeith Busch unsigned long deadline; 156712f5b931SKeith Busch 156812f5b931SKeith Busch if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT) 156912f5b931SKeith Busch return false; 1570da661267SChristoph Hellwig if (rq->rq_flags & RQF_TIMED_OUT) 1571da661267SChristoph Hellwig return false; 157212f5b931SKeith Busch 1573079076b3SChristoph Hellwig deadline = READ_ONCE(rq->deadline); 157482c22947SDavid Jeffery if (time_after_eq(expired->timeout_start, deadline)) 157512f5b931SKeith Busch return true; 157612f5b931SKeith Busch 157782c22947SDavid Jeffery if (expired->next == 0) 157882c22947SDavid Jeffery expired->next = deadline; 157982c22947SDavid Jeffery else if (time_after(expired->next, deadline)) 158082c22947SDavid Jeffery expired->next = deadline; 158112f5b931SKeith Busch return false; 158212f5b931SKeith Busch } 158312f5b931SKeith Busch 15842e315dc0SMing Lei void blk_mq_put_rq_ref(struct request *rq) 15852e315dc0SMing Lei { 1586de671d61SJens Axboe if (is_flush_rq(rq)) { 1587de671d61SJens Axboe if (rq->end_io(rq, 0) == RQ_END_IO_FREE) 1588de671d61SJens Axboe blk_mq_free_request(rq); 1589de671d61SJens Axboe } else if (req_ref_put_and_test(rq)) { 15902e315dc0SMing Lei __blk_mq_free_request(rq); 15912e315dc0SMing Lei } 1592de671d61SJens Axboe } 15932e315dc0SMing Lei 15942dd6532eSJohn Garry static bool blk_mq_check_expired(struct request *rq, void *priv) 1595320ae51fSJens Axboe { 159682c22947SDavid Jeffery struct blk_expired_data *expired = priv; 159781481eb4SChristoph Hellwig 159812f5b931SKeith Busch /* 1599c797b40cSMing Lei * blk_mq_queue_tag_busy_iter() has locked the request, so it cannot 1600c797b40cSMing Lei * be reallocated underneath the timeout handler's processing, then 1601c797b40cSMing Lei * the expire check is reliable. If the request is not expired, then 1602c797b40cSMing Lei * it was completed and reallocated as a new request after returning 1603c797b40cSMing Lei * from blk_mq_check_expired(). 160412f5b931SKeith Busch */ 160582c22947SDavid Jeffery if (blk_mq_req_expired(rq, expired)) { 160682c22947SDavid Jeffery expired->has_timedout_rq = true; 160782c22947SDavid Jeffery return false; 160882c22947SDavid Jeffery } 160982c22947SDavid Jeffery return true; 161082c22947SDavid Jeffery } 161182c22947SDavid Jeffery 161282c22947SDavid Jeffery static bool blk_mq_handle_expired(struct request *rq, void *priv) 161382c22947SDavid Jeffery { 161482c22947SDavid Jeffery struct blk_expired_data *expired = priv; 161582c22947SDavid Jeffery 161682c22947SDavid Jeffery if (blk_mq_req_expired(rq, expired)) 16179bdb4833SJohn Garry blk_mq_rq_timed_out(rq); 16187baa8572SJens Axboe return true; 16191d9bd516STejun Heo } 16201d9bd516STejun Heo 1621287922ebSChristoph Hellwig static void blk_mq_timeout_work(struct work_struct *work) 162281481eb4SChristoph Hellwig { 1623287922ebSChristoph Hellwig struct request_queue *q = 1624287922ebSChristoph Hellwig container_of(work, struct request_queue, timeout_work); 162582c22947SDavid Jeffery struct blk_expired_data expired = { 162682c22947SDavid Jeffery .timeout_start = jiffies, 162782c22947SDavid Jeffery }; 16281d9bd516STejun Heo struct blk_mq_hw_ctx *hctx; 16294f481208SMing Lei unsigned long i; 1630320ae51fSJens Axboe 163171f79fb3SGabriel Krisman Bertazi /* A deadlock might occur if a request is stuck requiring a 163271f79fb3SGabriel Krisman Bertazi * timeout at the same time a queue freeze is waiting 163371f79fb3SGabriel Krisman Bertazi * completion, since the timeout code would not be able to 163471f79fb3SGabriel Krisman Bertazi * acquire the queue reference here. 163571f79fb3SGabriel Krisman Bertazi * 163671f79fb3SGabriel Krisman Bertazi * That's why we don't use blk_queue_enter here; instead, we use 163771f79fb3SGabriel Krisman Bertazi * percpu_ref_tryget directly, because we need to be able to 163871f79fb3SGabriel Krisman Bertazi * obtain a reference even in the short window between the queue 163971f79fb3SGabriel Krisman Bertazi * starting to freeze, by dropping the first reference in 16401671d522SMing Lei * blk_freeze_queue_start, and the moment the last request is 164171f79fb3SGabriel Krisman Bertazi * consumed, marked by the instant q_usage_counter reaches 164271f79fb3SGabriel Krisman Bertazi * zero. 164371f79fb3SGabriel Krisman Bertazi */ 164471f79fb3SGabriel Krisman Bertazi if (!percpu_ref_tryget(&q->q_usage_counter)) 1645287922ebSChristoph Hellwig return; 1646287922ebSChristoph Hellwig 164782c22947SDavid Jeffery /* check if there is any timed-out request */ 164882c22947SDavid Jeffery blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &expired); 164982c22947SDavid Jeffery if (expired.has_timedout_rq) { 165082c22947SDavid Jeffery /* 165182c22947SDavid Jeffery * Before walking tags, we must ensure any submit started 165282c22947SDavid Jeffery * before the current time has finished. Since the submit 165382c22947SDavid Jeffery * uses srcu or rcu, wait for a synchronization point to 165482c22947SDavid Jeffery * ensure all running submits have finished 165582c22947SDavid Jeffery */ 1656483239c7SChristoph Hellwig blk_mq_wait_quiesce_done(q->tag_set); 1657320ae51fSJens Axboe 165882c22947SDavid Jeffery expired.next = 0; 165982c22947SDavid Jeffery blk_mq_queue_tag_busy_iter(q, blk_mq_handle_expired, &expired); 166082c22947SDavid Jeffery } 166182c22947SDavid Jeffery 166282c22947SDavid Jeffery if (expired.next != 0) { 166382c22947SDavid Jeffery mod_timer(&q->timeout, expired.next); 16640d2602caSJens Axboe } else { 1665fcd36c36SBart Van Assche /* 1666fcd36c36SBart Van Assche * Request timeouts are handled as a forward rolling timer. If 1667fcd36c36SBart Van Assche * we end up here it means that no requests are pending and 1668fcd36c36SBart Van Assche * also that no request has been pending for a while. Mark 1669fcd36c36SBart Van Assche * each hctx as idle. 1670fcd36c36SBart Van Assche */ 1671f054b56cSMing Lei queue_for_each_hw_ctx(q, hctx, i) { 1672f054b56cSMing Lei /* the hctx may be unmapped, so check it here */ 1673f054b56cSMing Lei if (blk_mq_hw_queue_mapped(hctx)) 16740d2602caSJens Axboe blk_mq_tag_idle(hctx); 16750d2602caSJens Axboe } 1676320ae51fSJens Axboe } 1677287922ebSChristoph Hellwig blk_queue_exit(q); 1678f054b56cSMing Lei } 1679320ae51fSJens Axboe 168088459642SOmar Sandoval struct flush_busy_ctx_data { 168188459642SOmar Sandoval struct blk_mq_hw_ctx *hctx; 168288459642SOmar Sandoval struct list_head *list; 168388459642SOmar Sandoval }; 168488459642SOmar Sandoval 168588459642SOmar Sandoval static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data) 168688459642SOmar Sandoval { 168788459642SOmar Sandoval struct flush_busy_ctx_data *flush_data = data; 168888459642SOmar Sandoval struct blk_mq_hw_ctx *hctx = flush_data->hctx; 168988459642SOmar Sandoval struct blk_mq_ctx *ctx = hctx->ctxs[bitnr]; 1690c16d6b5aSMing Lei enum hctx_type type = hctx->type; 169188459642SOmar Sandoval 169288459642SOmar Sandoval spin_lock(&ctx->lock); 1693c16d6b5aSMing Lei list_splice_tail_init(&ctx->rq_lists[type], flush_data->list); 1694e9a99a63SOmar Sandoval sbitmap_clear_bit(sb, bitnr); 169588459642SOmar Sandoval spin_unlock(&ctx->lock); 169688459642SOmar Sandoval return true; 169788459642SOmar Sandoval } 169888459642SOmar Sandoval 1699320ae51fSJens Axboe /* 17001429d7c9SJens Axboe * Process software queues that have been marked busy, splicing them 17011429d7c9SJens Axboe * to the for-dispatch 17021429d7c9SJens Axboe */ 17032c3ad667SJens Axboe void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) 17041429d7c9SJens Axboe { 170588459642SOmar Sandoval struct flush_busy_ctx_data data = { 170688459642SOmar Sandoval .hctx = hctx, 170788459642SOmar Sandoval .list = list, 170888459642SOmar Sandoval }; 17091429d7c9SJens Axboe 171088459642SOmar Sandoval sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data); 17111429d7c9SJens Axboe } 17122c3ad667SJens Axboe EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs); 17131429d7c9SJens Axboe 1714b347689fSMing Lei struct dispatch_rq_data { 1715b347689fSMing Lei struct blk_mq_hw_ctx *hctx; 1716b347689fSMing Lei struct request *rq; 1717b347689fSMing Lei }; 1718b347689fSMing Lei 1719b347689fSMing Lei static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr, 1720b347689fSMing Lei void *data) 1721b347689fSMing Lei { 1722b347689fSMing Lei struct dispatch_rq_data *dispatch_data = data; 1723b347689fSMing Lei struct blk_mq_hw_ctx *hctx = dispatch_data->hctx; 1724b347689fSMing Lei struct blk_mq_ctx *ctx = hctx->ctxs[bitnr]; 1725c16d6b5aSMing Lei enum hctx_type type = hctx->type; 1726b347689fSMing Lei 1727b347689fSMing Lei spin_lock(&ctx->lock); 1728c16d6b5aSMing Lei if (!list_empty(&ctx->rq_lists[type])) { 1729c16d6b5aSMing Lei dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next); 1730b347689fSMing Lei list_del_init(&dispatch_data->rq->queuelist); 1731c16d6b5aSMing Lei if (list_empty(&ctx->rq_lists[type])) 1732b347689fSMing Lei sbitmap_clear_bit(sb, bitnr); 1733b347689fSMing Lei } 1734b347689fSMing Lei spin_unlock(&ctx->lock); 1735b347689fSMing Lei 1736b347689fSMing Lei return !dispatch_data->rq; 1737b347689fSMing Lei } 1738b347689fSMing Lei 1739b347689fSMing Lei struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, 1740b347689fSMing Lei struct blk_mq_ctx *start) 1741b347689fSMing Lei { 1742f31967f0SJens Axboe unsigned off = start ? start->index_hw[hctx->type] : 0; 1743b347689fSMing Lei struct dispatch_rq_data data = { 1744b347689fSMing Lei .hctx = hctx, 1745b347689fSMing Lei .rq = NULL, 1746b347689fSMing Lei }; 1747b347689fSMing Lei 1748b347689fSMing Lei __sbitmap_for_each_set(&hctx->ctx_map, off, 1749b347689fSMing Lei dispatch_rq_from_ctx, &data); 1750b347689fSMing Lei 1751b347689fSMing Lei return data.rq; 1752b347689fSMing Lei } 1753b347689fSMing Lei 1754a808a9d5SJens Axboe static bool __blk_mq_alloc_driver_tag(struct request *rq) 1755703fd1c0SJens Axboe { 1756ae0f1a73SJohn Garry struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; 1757570e9b73SMing Lei unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags; 1758570e9b73SMing Lei int tag; 1759570e9b73SMing Lei 1760568f2700SMing Lei blk_mq_tag_busy(rq->mq_hctx); 1761568f2700SMing Lei 1762570e9b73SMing Lei if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) { 1763ae0f1a73SJohn Garry bt = &rq->mq_hctx->tags->breserved_tags; 1764570e9b73SMing Lei tag_offset = 0; 176528500850SMing Lei } else { 1766570e9b73SMing Lei if (!hctx_may_queue(rq->mq_hctx, bt)) 1767570e9b73SMing Lei return false; 176828500850SMing Lei } 176928500850SMing Lei 1770570e9b73SMing Lei tag = __sbitmap_queue_get(bt); 1771570e9b73SMing Lei if (tag == BLK_MQ_NO_TAG) 1772570e9b73SMing Lei return false; 1773570e9b73SMing Lei 1774570e9b73SMing Lei rq->tag = tag + tag_offset; 1775570e9b73SMing Lei return true; 1776570e9b73SMing Lei } 1777570e9b73SMing Lei 1778a808a9d5SJens Axboe bool __blk_mq_get_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq) 1779570e9b73SMing Lei { 1780a808a9d5SJens Axboe if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq)) 1781568f2700SMing Lei return false; 1782568f2700SMing Lei 178351db1c37SMing Lei if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) && 1784568f2700SMing Lei !(rq->rq_flags & RQF_MQ_INFLIGHT)) { 1785568f2700SMing Lei rq->rq_flags |= RQF_MQ_INFLIGHT; 1786bccf5e26SJohn Garry __blk_mq_inc_active_requests(hctx); 1787568f2700SMing Lei } 1788568f2700SMing Lei hctx->tags->rqs[rq->tag] = rq; 1789570e9b73SMing Lei return true; 1790570e9b73SMing Lei } 1791570e9b73SMing Lei 1792eb619fdbSJens Axboe static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, 1793eb619fdbSJens Axboe int flags, void *key) 1794da55f2ccSOmar Sandoval { 1795da55f2ccSOmar Sandoval struct blk_mq_hw_ctx *hctx; 1796da55f2ccSOmar Sandoval 1797da55f2ccSOmar Sandoval hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait); 1798da55f2ccSOmar Sandoval 17995815839bSMing Lei spin_lock(&hctx->dispatch_wait_lock); 1800e8618575SJens Axboe if (!list_empty(&wait->entry)) { 1801e8618575SJens Axboe struct sbitmap_queue *sbq; 1802e8618575SJens Axboe 1803eb619fdbSJens Axboe list_del_init(&wait->entry); 1804ae0f1a73SJohn Garry sbq = &hctx->tags->bitmap_tags; 1805e8618575SJens Axboe atomic_dec(&sbq->ws_active); 1806e8618575SJens Axboe } 18075815839bSMing Lei spin_unlock(&hctx->dispatch_wait_lock); 18085815839bSMing Lei 1809da55f2ccSOmar Sandoval blk_mq_run_hw_queue(hctx, true); 1810da55f2ccSOmar Sandoval return 1; 1811da55f2ccSOmar Sandoval } 1812da55f2ccSOmar Sandoval 1813f906a6a0SJens Axboe /* 1814f906a6a0SJens Axboe * Mark us waiting for a tag. For shared tags, this involves hooking us into 1815ee3e4de5SBart Van Assche * the tag wakeups. For non-shared tags, we can simply mark us needing a 1816ee3e4de5SBart Van Assche * restart. For both cases, take care to check the condition again after 1817f906a6a0SJens Axboe * marking us as waiting. 1818f906a6a0SJens Axboe */ 18192278d69fSMing Lei static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, 1820eb619fdbSJens Axboe struct request *rq) 1821da55f2ccSOmar Sandoval { 182298b99e94SKemeng Shi struct sbitmap_queue *sbq; 18235815839bSMing Lei struct wait_queue_head *wq; 1824f906a6a0SJens Axboe wait_queue_entry_t *wait; 1825f906a6a0SJens Axboe bool ret; 1826da55f2ccSOmar Sandoval 182747df9ce9SKemeng Shi if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) && 182847df9ce9SKemeng Shi !(blk_mq_is_shared_tags(hctx->flags))) { 1829684b7324SYufen Yu blk_mq_sched_mark_restart_hctx(hctx); 1830c27d53fbSBart Van Assche 1831c27d53fbSBart Van Assche /* 1832c27d53fbSBart Van Assche * It's possible that a tag was freed in the window between the 1833c27d53fbSBart Van Assche * allocation failure and adding the hardware queue to the wait 1834c27d53fbSBart Van Assche * queue. 1835c27d53fbSBart Van Assche * 1836c27d53fbSBart Van Assche * Don't clear RESTART here, someone else could have set it. 1837c27d53fbSBart Van Assche * At most this will cost an extra queue run. 1838c27d53fbSBart Van Assche */ 18398ab6bb9eSMing Lei return blk_mq_get_driver_tag(rq); 1840c27d53fbSBart Van Assche } 1841c27d53fbSBart Van Assche 18422278d69fSMing Lei wait = &hctx->dispatch_wait; 1843eb619fdbSJens Axboe if (!list_empty_careful(&wait->entry)) 1844da55f2ccSOmar Sandoval return false; 1845da55f2ccSOmar Sandoval 184698b99e94SKemeng Shi if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) 184798b99e94SKemeng Shi sbq = &hctx->tags->breserved_tags; 184898b99e94SKemeng Shi else 184998b99e94SKemeng Shi sbq = &hctx->tags->bitmap_tags; 1850e8618575SJens Axboe wq = &bt_wait_ptr(sbq, hctx)->wait; 18515815839bSMing Lei 18525815839bSMing Lei spin_lock_irq(&wq->lock); 18535815839bSMing Lei spin_lock(&hctx->dispatch_wait_lock); 1854eb619fdbSJens Axboe if (!list_empty(&wait->entry)) { 18555815839bSMing Lei spin_unlock(&hctx->dispatch_wait_lock); 18565815839bSMing Lei spin_unlock_irq(&wq->lock); 1857eb619fdbSJens Axboe return false; 1858eb619fdbSJens Axboe } 1859eb619fdbSJens Axboe 1860e8618575SJens Axboe atomic_inc(&sbq->ws_active); 18615815839bSMing Lei wait->flags &= ~WQ_FLAG_EXCLUSIVE; 18625815839bSMing Lei __add_wait_queue(wq, wait); 1863da55f2ccSOmar Sandoval 1864da55f2ccSOmar Sandoval /* 1865eb619fdbSJens Axboe * It's possible that a tag was freed in the window between the 1866eb619fdbSJens Axboe * allocation failure and adding the hardware queue to the wait 1867eb619fdbSJens Axboe * queue. 1868da55f2ccSOmar Sandoval */ 18698ab6bb9eSMing Lei ret = blk_mq_get_driver_tag(rq); 1870f906a6a0SJens Axboe if (!ret) { 18715815839bSMing Lei spin_unlock(&hctx->dispatch_wait_lock); 18725815839bSMing Lei spin_unlock_irq(&wq->lock); 1873eb619fdbSJens Axboe return false; 1874eb619fdbSJens Axboe } 1875eb619fdbSJens Axboe 1876eb619fdbSJens Axboe /* 1877eb619fdbSJens Axboe * We got a tag, remove ourselves from the wait queue to ensure 1878eb619fdbSJens Axboe * someone else gets the wakeup. 1879eb619fdbSJens Axboe */ 1880eb619fdbSJens Axboe list_del_init(&wait->entry); 1881e8618575SJens Axboe atomic_dec(&sbq->ws_active); 18825815839bSMing Lei spin_unlock(&hctx->dispatch_wait_lock); 18835815839bSMing Lei spin_unlock_irq(&wq->lock); 1884c27d53fbSBart Van Assche 1885da55f2ccSOmar Sandoval return true; 1886da55f2ccSOmar Sandoval } 1887da55f2ccSOmar Sandoval 18886e768717SMing Lei #define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT 8 18896e768717SMing Lei #define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR 4 18906e768717SMing Lei /* 18916e768717SMing Lei * Update dispatch busy with the Exponential Weighted Moving Average(EWMA): 18926e768717SMing Lei * - EWMA is one simple way to compute running average value 18936e768717SMing Lei * - weight(7/8 and 1/8) is applied so that it can decrease exponentially 18946e768717SMing Lei * - take 4 as factor for avoiding to get too small(0) result, and this 18956e768717SMing Lei * factor doesn't matter because EWMA decreases exponentially 18966e768717SMing Lei */ 18976e768717SMing Lei static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy) 18986e768717SMing Lei { 18996e768717SMing Lei unsigned int ewma; 19006e768717SMing Lei 19016e768717SMing Lei ewma = hctx->dispatch_busy; 19026e768717SMing Lei 19036e768717SMing Lei if (!ewma && !busy) 19046e768717SMing Lei return; 19056e768717SMing Lei 19066e768717SMing Lei ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1; 19076e768717SMing Lei if (busy) 19086e768717SMing Lei ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR; 19096e768717SMing Lei ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT; 19106e768717SMing Lei 19116e768717SMing Lei hctx->dispatch_busy = ewma; 19126e768717SMing Lei } 19136e768717SMing Lei 191486ff7c2aSMing Lei #define BLK_MQ_RESOURCE_DELAY 3 /* ms units */ 191586ff7c2aSMing Lei 1916c92a4103SJohannes Thumshirn static void blk_mq_handle_dev_resource(struct request *rq, 1917c92a4103SJohannes Thumshirn struct list_head *list) 1918c92a4103SJohannes Thumshirn { 1919c92a4103SJohannes Thumshirn list_add(&rq->queuelist, list); 1920c92a4103SJohannes Thumshirn __blk_mq_requeue_request(rq); 1921c92a4103SJohannes Thumshirn } 1922c92a4103SJohannes Thumshirn 19230512a75bSKeith Busch static void blk_mq_handle_zone_resource(struct request *rq, 19240512a75bSKeith Busch struct list_head *zone_list) 19250512a75bSKeith Busch { 19260512a75bSKeith Busch /* 19270512a75bSKeith Busch * If we end up here it is because we cannot dispatch a request to a 19280512a75bSKeith Busch * specific zone due to LLD level zone-write locking or other zone 19290512a75bSKeith Busch * related resource not being available. In this case, set the request 19300512a75bSKeith Busch * aside in zone_list for retrying it later. 19310512a75bSKeith Busch */ 19320512a75bSKeith Busch list_add(&rq->queuelist, zone_list); 19330512a75bSKeith Busch __blk_mq_requeue_request(rq); 19340512a75bSKeith Busch } 19350512a75bSKeith Busch 193675383524SMing Lei enum prep_dispatch { 193775383524SMing Lei PREP_DISPATCH_OK, 193875383524SMing Lei PREP_DISPATCH_NO_TAG, 193975383524SMing Lei PREP_DISPATCH_NO_BUDGET, 194075383524SMing Lei }; 194175383524SMing Lei 194275383524SMing Lei static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq, 194375383524SMing Lei bool need_budget) 1944f04c3df3SJens Axboe { 194575383524SMing Lei struct blk_mq_hw_ctx *hctx = rq->mq_hctx; 19462a5a24aaSMing Lei int budget_token = -1; 1947f04c3df3SJens Axboe 19482a5a24aaSMing Lei if (need_budget) { 19492a5a24aaSMing Lei budget_token = blk_mq_get_dispatch_budget(rq->q); 19502a5a24aaSMing Lei if (budget_token < 0) { 19515fe56de7SJohn Garry blk_mq_put_driver_tag(rq); 195275383524SMing Lei return PREP_DISPATCH_NO_BUDGET; 19535fe56de7SJohn Garry } 19542a5a24aaSMing Lei blk_mq_set_rq_budget_token(rq, budget_token); 19552a5a24aaSMing Lei } 19560bca799bSMing Lei 19578ab6bb9eSMing Lei if (!blk_mq_get_driver_tag(rq)) { 19583c782d67SJens Axboe /* 1959da55f2ccSOmar Sandoval * The initial allocation attempt failed, so we need to 1960eb619fdbSJens Axboe * rerun the hardware queue when a tag is freed. The 1961eb619fdbSJens Axboe * waitqueue takes care of that. If the queue is run 1962eb619fdbSJens Axboe * before we add this entry back on the dispatch list, 1963eb619fdbSJens Axboe * we'll re-run it below. 19643c782d67SJens Axboe */ 19652278d69fSMing Lei if (!blk_mq_mark_tag_wait(hctx, rq)) { 1966f906a6a0SJens Axboe /* 19671fd40b5eSMing Lei * All budgets not got from this function will be put 19681fd40b5eSMing Lei * together during handling partial dispatch 1969f906a6a0SJens Axboe */ 19701fd40b5eSMing Lei if (need_budget) 19712a5a24aaSMing Lei blk_mq_put_dispatch_budget(rq->q, budget_token); 197275383524SMing Lei return PREP_DISPATCH_NO_TAG; 197375383524SMing Lei } 197475383524SMing Lei } 197575383524SMing Lei 197675383524SMing Lei return PREP_DISPATCH_OK; 197775383524SMing Lei } 197875383524SMing Lei 19791fd40b5eSMing Lei /* release all allocated budgets before calling to blk_mq_dispatch_rq_list */ 19801fd40b5eSMing Lei static void blk_mq_release_budgets(struct request_queue *q, 19812a5a24aaSMing Lei struct list_head *list) 19821fd40b5eSMing Lei { 19832a5a24aaSMing Lei struct request *rq; 19841fd40b5eSMing Lei 19852a5a24aaSMing Lei list_for_each_entry(rq, list, queuelist) { 19862a5a24aaSMing Lei int budget_token = blk_mq_get_rq_budget_token(rq); 19872a5a24aaSMing Lei 19882a5a24aaSMing Lei if (budget_token >= 0) 19892a5a24aaSMing Lei blk_mq_put_dispatch_budget(q, budget_token); 19902a5a24aaSMing Lei } 19911fd40b5eSMing Lei } 19921fd40b5eSMing Lei 19931429d7c9SJens Axboe /* 199434c9f547SKemeng Shi * blk_mq_commit_rqs will notify driver using bd->last that there is no 199534c9f547SKemeng Shi * more requests. (See comment in struct blk_mq_ops for commit_rqs for 199634c9f547SKemeng Shi * details) 199734c9f547SKemeng Shi * Attention, we should explicitly call this in unusual cases: 199834c9f547SKemeng Shi * 1) did not queue everything initially scheduled to queue 199934c9f547SKemeng Shi * 2) the last attempt to queue a request failed 200034c9f547SKemeng Shi */ 200134c9f547SKemeng Shi static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int queued, 200234c9f547SKemeng Shi bool from_schedule) 200334c9f547SKemeng Shi { 200434c9f547SKemeng Shi if (hctx->queue->mq_ops->commit_rqs && queued) { 200534c9f547SKemeng Shi trace_block_unplug(hctx->queue, queued, !from_schedule); 200634c9f547SKemeng Shi hctx->queue->mq_ops->commit_rqs(hctx); 200734c9f547SKemeng Shi } 200834c9f547SKemeng Shi } 200934c9f547SKemeng Shi 201034c9f547SKemeng Shi /* 20111429d7c9SJens Axboe * Returns true if we did some work AND can potentially do more. 20121429d7c9SJens Axboe */ 2013445874e8SMing Lei bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, 20141fd40b5eSMing Lei unsigned int nr_budgets) 20151429d7c9SJens Axboe { 201675383524SMing Lei enum prep_dispatch prep; 2017445874e8SMing Lei struct request_queue *q = hctx->queue; 2018f1ce99f7SKemeng Shi struct request *rq; 20194ea58fe4SKemeng Shi int queued; 2020703fd1c0SJens Axboe blk_status_t ret = BLK_STS_OK; 2021703fd1c0SJens Axboe LIST_HEAD(zone_list); 20229586e67bSNaohiro Aota bool needs_resource = false; 20231429d7c9SJens Axboe 20241429d7c9SJens Axboe if (list_empty(list)) 2025f04c3df3SJens Axboe return false; 2026f04c3df3SJens Axboe 2027f04c3df3SJens Axboe /* 2028f04c3df3SJens Axboe * Now process all the entries, sending them to the driver. 2029f04c3df3SJens Axboe */ 20304ea58fe4SKemeng Shi queued = 0; 2031f04c3df3SJens Axboe do { 2032f04c3df3SJens Axboe struct blk_mq_queue_data bd; 2033f04c3df3SJens Axboe 2034f04c3df3SJens Axboe rq = list_first_entry(list, struct request, queuelist); 2035f04c3df3SJens Axboe 2036445874e8SMing Lei WARN_ON_ONCE(hctx != rq->mq_hctx); 20371fd40b5eSMing Lei prep = blk_mq_prep_dispatch_rq(rq, !nr_budgets); 203875383524SMing Lei if (prep != PREP_DISPATCH_OK) 2039bd166ef1SJens Axboe break; 2040de148297SMing Lei 2041f04c3df3SJens Axboe list_del_init(&rq->queuelist); 2042f04c3df3SJens Axboe 2043f04c3df3SJens Axboe bd.rq = rq; 2044f1ce99f7SKemeng Shi bd.last = list_empty(list); 2045f04c3df3SJens Axboe 20461fd40b5eSMing Lei /* 20471fd40b5eSMing Lei * once the request is queued to lld, no need to cover the 20481fd40b5eSMing Lei * budget any more 20491fd40b5eSMing Lei */ 20501fd40b5eSMing Lei if (nr_budgets) 20511fd40b5eSMing Lei nr_budgets--; 2052f04c3df3SJens Axboe ret = q->mq_ops->queue_rq(hctx, &bd); 20537bf13729SMing Lei switch (ret) { 20547bf13729SMing Lei case BLK_STS_OK: 20557bf13729SMing Lei queued++; 2056f04c3df3SJens Axboe break; 20577bf13729SMing Lei case BLK_STS_RESOURCE: 20589586e67bSNaohiro Aota needs_resource = true; 20599586e67bSNaohiro Aota fallthrough; 20607bf13729SMing Lei case BLK_STS_DEV_RESOURCE: 20617bf13729SMing Lei blk_mq_handle_dev_resource(rq, list); 20627bf13729SMing Lei goto out; 20637bf13729SMing Lei case BLK_STS_ZONE_RESOURCE: 20640512a75bSKeith Busch /* 20650512a75bSKeith Busch * Move the request to zone_list and keep going through 20660512a75bSKeith Busch * the dispatch list to find more requests the drive can 20670512a75bSKeith Busch * accept. 20680512a75bSKeith Busch */ 20690512a75bSKeith Busch blk_mq_handle_zone_resource(rq, &zone_list); 20709586e67bSNaohiro Aota needs_resource = true; 20710512a75bSKeith Busch break; 20727bf13729SMing Lei default: 2073e21ee5a6SHannes Reinecke blk_mq_end_request(rq, ret); 2074fc17b653SChristoph Hellwig } 207581380ca1SOmar Sandoval } while (!list_empty(list)); 20767bf13729SMing Lei out: 20770512a75bSKeith Busch if (!list_empty(&zone_list)) 20780512a75bSKeith Busch list_splice_tail_init(&zone_list, list); 20790512a75bSKeith Busch 2080632bfb63Syangerkun /* If we didn't flush the entire list, we could have told the driver 2081632bfb63Syangerkun * there was more coming, but that turned out to be a lie. 2082632bfb63Syangerkun */ 2083e4ef2e05SKemeng Shi if (!list_empty(list) || ret != BLK_STS_OK) 2084e4ef2e05SKemeng Shi blk_mq_commit_rqs(hctx, queued, false); 2085e4ef2e05SKemeng Shi 2086f04c3df3SJens Axboe /* 2087f04c3df3SJens Axboe * Any items that need requeuing? Stuff them into hctx->dispatch, 2088f04c3df3SJens Axboe * that is where we will continue on next queue run. 2089f04c3df3SJens Axboe */ 2090f04c3df3SJens Axboe if (!list_empty(list)) { 209186ff7c2aSMing Lei bool needs_restart; 209275383524SMing Lei /* For non-shared tags, the RESTART check will suffice */ 209375383524SMing Lei bool no_tag = prep == PREP_DISPATCH_NO_TAG && 209447df9ce9SKemeng Shi ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) || 209547df9ce9SKemeng Shi blk_mq_is_shared_tags(hctx->flags)); 209686ff7c2aSMing Lei 20972a5a24aaSMing Lei if (nr_budgets) 20982a5a24aaSMing Lei blk_mq_release_budgets(q, list); 2099f04c3df3SJens Axboe 2100f04c3df3SJens Axboe spin_lock(&hctx->lock); 210101e99aecSMing Lei list_splice_tail_init(list, &hctx->dispatch); 2102f04c3df3SJens Axboe spin_unlock(&hctx->lock); 2103f04c3df3SJens Axboe 2104f04c3df3SJens Axboe /* 2105d7d8535fSMing Lei * Order adding requests to hctx->dispatch and checking 2106d7d8535fSMing Lei * SCHED_RESTART flag. The pair of this smp_mb() is the one 2107d7d8535fSMing Lei * in blk_mq_sched_restart(). Avoid restart code path to 2108d7d8535fSMing Lei * miss the new added requests to hctx->dispatch, meantime 2109d7d8535fSMing Lei * SCHED_RESTART is observed here. 2110d7d8535fSMing Lei */ 2111d7d8535fSMing Lei smp_mb(); 2112d7d8535fSMing Lei 2113d7d8535fSMing Lei /* 2114710c785fSBart Van Assche * If SCHED_RESTART was set by the caller of this function and 2115710c785fSBart Van Assche * it is no longer set that means that it was cleared by another 2116710c785fSBart Van Assche * thread and hence that a queue rerun is needed. 2117f04c3df3SJens Axboe * 2118eb619fdbSJens Axboe * If 'no_tag' is set, that means that we failed getting 2119eb619fdbSJens Axboe * a driver tag with an I/O scheduler attached. If our dispatch 2120eb619fdbSJens Axboe * waitqueue is no longer active, ensure that we run the queue 2121eb619fdbSJens Axboe * AFTER adding our entries back to the list. 2122bd166ef1SJens Axboe * 2123710c785fSBart Van Assche * If no I/O scheduler has been configured it is possible that 2124710c785fSBart Van Assche * the hardware queue got stopped and restarted before requests 2125710c785fSBart Van Assche * were pushed back onto the dispatch list. Rerun the queue to 2126710c785fSBart Van Assche * avoid starvation. Notes: 2127710c785fSBart Van Assche * - blk_mq_run_hw_queue() checks whether or not a queue has 2128710c785fSBart Van Assche * been stopped before rerunning a queue. 2129710c785fSBart Van Assche * - Some but not all block drivers stop a queue before 2130fc17b653SChristoph Hellwig * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq 2131710c785fSBart Van Assche * and dm-rq. 213286ff7c2aSMing Lei * 213386ff7c2aSMing Lei * If driver returns BLK_STS_RESOURCE and SCHED_RESTART 213486ff7c2aSMing Lei * bit is set, run queue after a delay to avoid IO stalls 2135ab3cee37SDouglas Anderson * that could otherwise occur if the queue is idle. We'll do 21369586e67bSNaohiro Aota * similar if we couldn't get budget or couldn't lock a zone 21379586e67bSNaohiro Aota * and SCHED_RESTART is set. 2138bd166ef1SJens Axboe */ 213986ff7c2aSMing Lei needs_restart = blk_mq_sched_needs_restart(hctx); 21409586e67bSNaohiro Aota if (prep == PREP_DISPATCH_NO_BUDGET) 21419586e67bSNaohiro Aota needs_resource = true; 214286ff7c2aSMing Lei if (!needs_restart || 2143eb619fdbSJens Axboe (no_tag && list_empty_careful(&hctx->dispatch_wait.entry))) 2144f04c3df3SJens Axboe blk_mq_run_hw_queue(hctx, true); 21456d5e8d21SMiaohe Lin else if (needs_resource) 214686ff7c2aSMing Lei blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY); 21471f57f8d4SJens Axboe 21486e768717SMing Lei blk_mq_update_dispatch_busy(hctx, true); 21491f57f8d4SJens Axboe return false; 21504ea58fe4SKemeng Shi } 2151f04c3df3SJens Axboe 21524ea58fe4SKemeng Shi blk_mq_update_dispatch_busy(hctx, false); 21534ea58fe4SKemeng Shi return true; 2154f04c3df3SJens Axboe } 2155f04c3df3SJens Axboe 2156f82ddf19SMing Lei static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx) 2157f82ddf19SMing Lei { 2158f82ddf19SMing Lei int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask); 2159f82ddf19SMing Lei 2160f82ddf19SMing Lei if (cpu >= nr_cpu_ids) 2161f82ddf19SMing Lei cpu = cpumask_first(hctx->cpumask); 2162f82ddf19SMing Lei return cpu; 2163f82ddf19SMing Lei } 2164f82ddf19SMing Lei 2165506e931fSJens Axboe /* 2166506e931fSJens Axboe * It'd be great if the workqueue API had a way to pass 2167506e931fSJens Axboe * in a mask and had some smarts for more clever placement. 2168506e931fSJens Axboe * For now we just round-robin here, switching for every 2169506e931fSJens Axboe * BLK_MQ_CPU_WORK_BATCH queued items. 2170506e931fSJens Axboe */ 2171506e931fSJens Axboe static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) 2172506e931fSJens Axboe { 21737bed4595SMing Lei bool tried = false; 2174476f8c98SMing Lei int next_cpu = hctx->next_cpu; 21757bed4595SMing Lei 2176b657d7e6SChristoph Hellwig if (hctx->queue->nr_hw_queues == 1) 2177b657d7e6SChristoph Hellwig return WORK_CPU_UNBOUND; 2178506e931fSJens Axboe 2179506e931fSJens Axboe if (--hctx->next_cpu_batch <= 0) { 21807bed4595SMing Lei select_cpu: 2181476f8c98SMing Lei next_cpu = cpumask_next_and(next_cpu, hctx->cpumask, 218220e4d813SChristoph Hellwig cpu_online_mask); 2183506e931fSJens Axboe if (next_cpu >= nr_cpu_ids) 2184f82ddf19SMing Lei next_cpu = blk_mq_first_mapped_cpu(hctx); 2185506e931fSJens Axboe hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; 2186506e931fSJens Axboe } 2187506e931fSJens Axboe 21887bed4595SMing Lei /* 21897bed4595SMing Lei * Do unbound schedule if we can't find a online CPU for this hctx, 21907bed4595SMing Lei * and it should only happen in the path of handling CPU DEAD. 21917bed4595SMing Lei */ 2192476f8c98SMing Lei if (!cpu_online(next_cpu)) { 21937bed4595SMing Lei if (!tried) { 21947bed4595SMing Lei tried = true; 21957bed4595SMing Lei goto select_cpu; 21967bed4595SMing Lei } 21977bed4595SMing Lei 21987bed4595SMing Lei /* 21997bed4595SMing Lei * Make sure to re-select CPU next time once after CPUs 22007bed4595SMing Lei * in hctx->cpumask become online again. 22017bed4595SMing Lei */ 2202476f8c98SMing Lei hctx->next_cpu = next_cpu; 22037bed4595SMing Lei hctx->next_cpu_batch = 1; 22047bed4595SMing Lei return WORK_CPU_UNBOUND; 22057bed4595SMing Lei } 2206476f8c98SMing Lei 2207476f8c98SMing Lei hctx->next_cpu = next_cpu; 2208476f8c98SMing Lei return next_cpu; 2209b657d7e6SChristoph Hellwig } 2210b657d7e6SChristoph Hellwig 2211105663f7SAndré Almeida /** 2212105663f7SAndré Almeida * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously. 2213105663f7SAndré Almeida * @hctx: Pointer to the hardware queue to run. 2214fa94ba8aSMinwoo Im * @msecs: Milliseconds of delay to wait before running the queue. 2215105663f7SAndré Almeida * 2216105663f7SAndré Almeida * Run a hardware queue asynchronously with a delay of @msecs. 2217105663f7SAndré Almeida */ 22187587a5aeSBart Van Assche void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) 22197587a5aeSBart Van Assche { 22201aa8d875SChristoph Hellwig if (unlikely(blk_mq_hctx_stopped(hctx))) 22211aa8d875SChristoph Hellwig return; 22221aa8d875SChristoph Hellwig kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work, 22231aa8d875SChristoph Hellwig msecs_to_jiffies(msecs)); 22247587a5aeSBart Van Assche } 22257587a5aeSBart Van Assche EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); 22267587a5aeSBart Van Assche 2227105663f7SAndré Almeida /** 2228105663f7SAndré Almeida * blk_mq_run_hw_queue - Start to run a hardware queue. 2229105663f7SAndré Almeida * @hctx: Pointer to the hardware queue to run. 2230105663f7SAndré Almeida * @async: If we want to run the queue asynchronously. 2231105663f7SAndré Almeida * 2232105663f7SAndré Almeida * Check if the request queue is not in a quiesced state and if there are 2233105663f7SAndré Almeida * pending requests to be sent. If this is true, run the queue to send requests 2234105663f7SAndré Almeida * to hardware. 2235105663f7SAndré Almeida */ 2236626fb735SJohn Garry void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) 22377587a5aeSBart Van Assche { 223824f5a90fSMing Lei bool need_run; 223924f5a90fSMing Lei 224024f5a90fSMing Lei /* 22414d5bba5bSChristoph Hellwig * We can't run the queue inline with interrupts disabled. 22424d5bba5bSChristoph Hellwig */ 22434d5bba5bSChristoph Hellwig WARN_ON_ONCE(!async && in_interrupt()); 22444d5bba5bSChristoph Hellwig 22454d5bba5bSChristoph Hellwig /* 224624f5a90fSMing Lei * When queue is quiesced, we may be switching io scheduler, or 224724f5a90fSMing Lei * updating nr_hw_queues, or other things, and we can't run queue 224824f5a90fSMing Lei * any more, even __blk_mq_hctx_has_pending() can't be called safely. 224924f5a90fSMing Lei * 225024f5a90fSMing Lei * And queue will be rerun in blk_mq_unquiesce_queue() if it is 225124f5a90fSMing Lei * quiesced. 225224f5a90fSMing Lei */ 225341adf531SMing Lei __blk_mq_run_dispatch_ops(hctx->queue, false, 225424f5a90fSMing Lei need_run = !blk_queue_quiesced(hctx->queue) && 22552a904d00SMing Lei blk_mq_hctx_has_pending(hctx)); 225624f5a90fSMing Lei 22571aa8d875SChristoph Hellwig if (!need_run) 22581aa8d875SChristoph Hellwig return; 22591aa8d875SChristoph Hellwig 22601aa8d875SChristoph Hellwig if (async || (hctx->flags & BLK_MQ_F_BLOCKING) || 22611aa8d875SChristoph Hellwig !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) { 22621aa8d875SChristoph Hellwig blk_mq_delay_run_hw_queue(hctx, 0); 22631aa8d875SChristoph Hellwig return; 22641aa8d875SChristoph Hellwig } 22651aa8d875SChristoph Hellwig 22664d5bba5bSChristoph Hellwig blk_mq_run_dispatch_ops(hctx->queue, 22674d5bba5bSChristoph Hellwig blk_mq_sched_dispatch_requests(hctx)); 2268320ae51fSJens Axboe } 22695b727272SOmar Sandoval EXPORT_SYMBOL(blk_mq_run_hw_queue); 2270320ae51fSJens Axboe 2271b6e68ee8SJan Kara /* 2272b6e68ee8SJan Kara * Return prefered queue to dispatch from (if any) for non-mq aware IO 2273b6e68ee8SJan Kara * scheduler. 2274b6e68ee8SJan Kara */ 2275b6e68ee8SJan Kara static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q) 2276b6e68ee8SJan Kara { 22775d05426eSMing Lei struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); 2278b6e68ee8SJan Kara /* 2279b6e68ee8SJan Kara * If the IO scheduler does not respect hardware queues when 2280b6e68ee8SJan Kara * dispatching, we just don't bother with multiple HW queues and 2281b6e68ee8SJan Kara * dispatch from hctx for the current CPU since running multiple queues 2282b6e68ee8SJan Kara * just causes lock contention inside the scheduler and pointless cache 2283b6e68ee8SJan Kara * bouncing. 2284b6e68ee8SJan Kara */ 228551ab80f0SBart Van Assche struct blk_mq_hw_ctx *hctx = ctx->hctxs[HCTX_TYPE_DEFAULT]; 22865d05426eSMing Lei 2287b6e68ee8SJan Kara if (!blk_mq_hctx_stopped(hctx)) 2288b6e68ee8SJan Kara return hctx; 2289b6e68ee8SJan Kara return NULL; 2290b6e68ee8SJan Kara } 2291b6e68ee8SJan Kara 2292105663f7SAndré Almeida /** 229324f7bb88SMauro Carvalho Chehab * blk_mq_run_hw_queues - Run all hardware queues in a request queue. 2294105663f7SAndré Almeida * @q: Pointer to the request queue to run. 2295105663f7SAndré Almeida * @async: If we want to run the queue asynchronously. 2296105663f7SAndré Almeida */ 2297b94ec296SMike Snitzer void blk_mq_run_hw_queues(struct request_queue *q, bool async) 2298320ae51fSJens Axboe { 2299b6e68ee8SJan Kara struct blk_mq_hw_ctx *hctx, *sq_hctx; 23004f481208SMing Lei unsigned long i; 2301320ae51fSJens Axboe 2302b6e68ee8SJan Kara sq_hctx = NULL; 23034d337cebSMing Lei if (blk_queue_sq_sched(q)) 2304b6e68ee8SJan Kara sq_hctx = blk_mq_get_sq_hctx(q); 2305320ae51fSJens Axboe queue_for_each_hw_ctx(q, hctx, i) { 230679f720a7SJens Axboe if (blk_mq_hctx_stopped(hctx)) 2307320ae51fSJens Axboe continue; 2308b6e68ee8SJan Kara /* 2309b6e68ee8SJan Kara * Dispatch from this hctx either if there's no hctx preferred 2310b6e68ee8SJan Kara * by IO scheduler or if it has requests that bypass the 2311b6e68ee8SJan Kara * scheduler. 2312b6e68ee8SJan Kara */ 2313b6e68ee8SJan Kara if (!sq_hctx || sq_hctx == hctx || 2314b6e68ee8SJan Kara !list_empty_careful(&hctx->dispatch)) 2315b94ec296SMike Snitzer blk_mq_run_hw_queue(hctx, async); 2316320ae51fSJens Axboe } 2317320ae51fSJens Axboe } 2318b94ec296SMike Snitzer EXPORT_SYMBOL(blk_mq_run_hw_queues); 2319320ae51fSJens Axboe 2320fd001443SBart Van Assche /** 2321b9151e7bSDouglas Anderson * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously. 2322b9151e7bSDouglas Anderson * @q: Pointer to the request queue to run. 2323fa94ba8aSMinwoo Im * @msecs: Milliseconds of delay to wait before running the queues. 2324b9151e7bSDouglas Anderson */ 2325b9151e7bSDouglas Anderson void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs) 2326b9151e7bSDouglas Anderson { 2327b6e68ee8SJan Kara struct blk_mq_hw_ctx *hctx, *sq_hctx; 23284f481208SMing Lei unsigned long i; 2329b9151e7bSDouglas Anderson 2330b6e68ee8SJan Kara sq_hctx = NULL; 23314d337cebSMing Lei if (blk_queue_sq_sched(q)) 2332b6e68ee8SJan Kara sq_hctx = blk_mq_get_sq_hctx(q); 2333b9151e7bSDouglas Anderson queue_for_each_hw_ctx(q, hctx, i) { 2334b9151e7bSDouglas Anderson if (blk_mq_hctx_stopped(hctx)) 2335b9151e7bSDouglas Anderson continue; 2336b6e68ee8SJan Kara /* 23378f5fea65SDavid Jeffery * If there is already a run_work pending, leave the 23388f5fea65SDavid Jeffery * pending delay untouched. Otherwise, a hctx can stall 23398f5fea65SDavid Jeffery * if another hctx is re-delaying the other's work 23408f5fea65SDavid Jeffery * before the work executes. 23418f5fea65SDavid Jeffery */ 23428f5fea65SDavid Jeffery if (delayed_work_pending(&hctx->run_work)) 23438f5fea65SDavid Jeffery continue; 23448f5fea65SDavid Jeffery /* 2345b6e68ee8SJan Kara * Dispatch from this hctx either if there's no hctx preferred 2346b6e68ee8SJan Kara * by IO scheduler or if it has requests that bypass the 2347b6e68ee8SJan Kara * scheduler. 2348b6e68ee8SJan Kara */ 2349b6e68ee8SJan Kara if (!sq_hctx || sq_hctx == hctx || 2350b6e68ee8SJan Kara !list_empty_careful(&hctx->dispatch)) 2351b9151e7bSDouglas Anderson blk_mq_delay_run_hw_queue(hctx, msecs); 2352b9151e7bSDouglas Anderson } 2353b9151e7bSDouglas Anderson } 2354b9151e7bSDouglas Anderson EXPORT_SYMBOL(blk_mq_delay_run_hw_queues); 2355b9151e7bSDouglas Anderson 235639a70c76SMing Lei /* 235739a70c76SMing Lei * This function is often used for pausing .queue_rq() by driver when 235839a70c76SMing Lei * there isn't enough resource or some conditions aren't satisfied, and 23594d606219SBart Van Assche * BLK_STS_RESOURCE is usually returned. 236039a70c76SMing Lei * 236139a70c76SMing Lei * We do not guarantee that dispatch can be drained or blocked 236239a70c76SMing Lei * after blk_mq_stop_hw_queue() returns. Please use 236339a70c76SMing Lei * blk_mq_quiesce_queue() for that requirement. 236439a70c76SMing Lei */ 2365320ae51fSJens Axboe void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) 2366320ae51fSJens Axboe { 2367641a9ed6SMing Lei cancel_delayed_work(&hctx->run_work); 2368641a9ed6SMing Lei 2369641a9ed6SMing Lei set_bit(BLK_MQ_S_STOPPED, &hctx->state); 2370320ae51fSJens Axboe } 2371320ae51fSJens Axboe EXPORT_SYMBOL(blk_mq_stop_hw_queue); 2372320ae51fSJens Axboe 237339a70c76SMing Lei /* 237439a70c76SMing Lei * This function is often used for pausing .queue_rq() by driver when 237539a70c76SMing Lei * there isn't enough resource or some conditions aren't satisfied, and 23764d606219SBart Van Assche * BLK_STS_RESOURCE is usually returned. 237739a70c76SMing Lei * 237839a70c76SMing Lei * We do not guarantee that dispatch can be drained or blocked 237939a70c76SMing Lei * after blk_mq_stop_hw_queues() returns. Please use 238039a70c76SMing Lei * blk_mq_quiesce_queue() for that requirement. 238139a70c76SMing Lei */ 23822719aa21SJens Axboe void blk_mq_stop_hw_queues(struct request_queue *q) 23832719aa21SJens Axboe { 2384641a9ed6SMing Lei struct blk_mq_hw_ctx *hctx; 23854f481208SMing Lei unsigned long i; 2386641a9ed6SMing Lei 2387641a9ed6SMing Lei queue_for_each_hw_ctx(q, hctx, i) 2388641a9ed6SMing Lei blk_mq_stop_hw_queue(hctx); 2389280d45f6SChristoph Hellwig } 2390280d45f6SChristoph Hellwig EXPORT_SYMBOL(blk_mq_stop_hw_queues); 2391280d45f6SChristoph Hellwig 2392320ae51fSJens Axboe void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx) 2393320ae51fSJens Axboe { 2394320ae51fSJens Axboe clear_bit(BLK_MQ_S_STOPPED, &hctx->state); 2395e4043dcfSJens Axboe 23960ffbce80SJens Axboe blk_mq_run_hw_queue(hctx, false); 2397320ae51fSJens Axboe } 2398320ae51fSJens Axboe EXPORT_SYMBOL(blk_mq_start_hw_queue); 2399320ae51fSJens Axboe 24002f268556SChristoph Hellwig void blk_mq_start_hw_queues(struct request_queue *q) 24012f268556SChristoph Hellwig { 24022f268556SChristoph Hellwig struct blk_mq_hw_ctx *hctx; 24034f481208SMing Lei unsigned long i; 24042f268556SChristoph Hellwig 24052f268556SChristoph Hellwig queue_for_each_hw_ctx(q, hctx, i) 24062f268556SChristoph Hellwig blk_mq_start_hw_queue(hctx); 24072f268556SChristoph Hellwig } 24082f268556SChristoph Hellwig EXPORT_SYMBOL(blk_mq_start_hw_queues); 24092f268556SChristoph Hellwig 2410ae911c5eSJens Axboe void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) 2411ae911c5eSJens Axboe { 2412ae911c5eSJens Axboe if (!blk_mq_hctx_stopped(hctx)) 2413ae911c5eSJens Axboe return; 2414ae911c5eSJens Axboe 2415ae911c5eSJens Axboe clear_bit(BLK_MQ_S_STOPPED, &hctx->state); 2416ae911c5eSJens Axboe blk_mq_run_hw_queue(hctx, async); 2417ae911c5eSJens Axboe } 2418ae911c5eSJens Axboe EXPORT_SYMBOL_GPL(blk_mq_start_stopped_hw_queue); 2419ae911c5eSJens Axboe 24201b4a3258SChristoph Hellwig void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async) 2421320ae51fSJens Axboe { 2422320ae51fSJens Axboe struct blk_mq_hw_ctx *hctx; 24234f481208SMing Lei unsigned long i; 2424320ae51fSJens Axboe 2425ae911c5eSJens Axboe queue_for_each_hw_ctx(q, hctx, i) 2426ae911c5eSJens Axboe blk_mq_start_stopped_hw_queue(hctx, async); 2427320ae51fSJens Axboe } 2428320ae51fSJens Axboe EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues); 2429320ae51fSJens Axboe 243070f4db63SChristoph Hellwig static void blk_mq_run_work_fn(struct work_struct *work) 2431320ae51fSJens Axboe { 2432c20a1a2cSChristoph Hellwig struct blk_mq_hw_ctx *hctx = 2433c20a1a2cSChristoph Hellwig container_of(work, struct blk_mq_hw_ctx, run_work.work); 2434320ae51fSJens Axboe 24354d5bba5bSChristoph Hellwig blk_mq_run_dispatch_ops(hctx->queue, 24364d5bba5bSChristoph Hellwig blk_mq_sched_dispatch_requests(hctx)); 2437320ae51fSJens Axboe } 2438320ae51fSJens Axboe 2439105663f7SAndré Almeida /** 2440105663f7SAndré Almeida * blk_mq_request_bypass_insert - Insert a request at dispatch list. 2441105663f7SAndré Almeida * @rq: Pointer to request to be inserted. 24422b597613SChristoph Hellwig * @flags: BLK_MQ_INSERT_* 2443105663f7SAndré Almeida * 2444157f377bSJens Axboe * Should only be used carefully, when the caller knows we want to 2445157f377bSJens Axboe * bypass a potential IO scheduler on the target device. 2446157f377bSJens Axboe */ 2447360f2648SChristoph Hellwig static void blk_mq_request_bypass_insert(struct request *rq, blk_insert_t flags) 2448157f377bSJens Axboe { 2449ea4f995eSJens Axboe struct blk_mq_hw_ctx *hctx = rq->mq_hctx; 2450157f377bSJens Axboe 2451157f377bSJens Axboe spin_lock(&hctx->lock); 24522b597613SChristoph Hellwig if (flags & BLK_MQ_INSERT_AT_HEAD) 245301e99aecSMing Lei list_add(&rq->queuelist, &hctx->dispatch); 245401e99aecSMing Lei else 2455157f377bSJens Axboe list_add_tail(&rq->queuelist, &hctx->dispatch); 2456157f377bSJens Axboe spin_unlock(&hctx->lock); 2457157f377bSJens Axboe } 2458157f377bSJens Axboe 245905a93117SChristoph Hellwig static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, 246005a93117SChristoph Hellwig struct blk_mq_ctx *ctx, struct list_head *list, 246105a93117SChristoph Hellwig bool run_queue_async) 2462320ae51fSJens Axboe { 24633f0cedc7SMing Lei struct request *rq; 2464c16d6b5aSMing Lei enum hctx_type type = hctx->type; 24653f0cedc7SMing Lei 2466320ae51fSJens Axboe /* 246794aa228cSChristoph Hellwig * Try to issue requests directly if the hw queue isn't busy to save an 246894aa228cSChristoph Hellwig * extra enqueue & dequeue to the sw queue. 246994aa228cSChristoph Hellwig */ 247094aa228cSChristoph Hellwig if (!hctx->dispatch_busy && !run_queue_async) { 247194aa228cSChristoph Hellwig blk_mq_run_dispatch_ops(hctx->queue, 247294aa228cSChristoph Hellwig blk_mq_try_issue_list_directly(hctx, list)); 247394aa228cSChristoph Hellwig if (list_empty(list)) 247494aa228cSChristoph Hellwig goto out; 247594aa228cSChristoph Hellwig } 247694aa228cSChristoph Hellwig 247794aa228cSChristoph Hellwig /* 2478320ae51fSJens Axboe * preemption doesn't flush plug list, so it's possible ctx->cpu is 2479320ae51fSJens Axboe * offline now 2480320ae51fSJens Axboe */ 24813f0cedc7SMing Lei list_for_each_entry(rq, list, queuelist) { 2482e57690feSJens Axboe BUG_ON(rq->mq_ctx != ctx); 2483a54895faSChristoph Hellwig trace_block_rq_insert(rq); 2484320ae51fSJens Axboe } 24853f0cedc7SMing Lei 24863f0cedc7SMing Lei spin_lock(&ctx->lock); 2487c16d6b5aSMing Lei list_splice_tail_init(list, &ctx->rq_lists[type]); 2488cfd0c552SMing Lei blk_mq_hctx_mark_pending(hctx, ctx); 2489320ae51fSJens Axboe spin_unlock(&ctx->lock); 249094aa228cSChristoph Hellwig out: 249194aa228cSChristoph Hellwig blk_mq_run_hw_queue(hctx, run_queue_async); 2492320ae51fSJens Axboe } 2493320ae51fSJens Axboe 2494710fa378SChristoph Hellwig static void blk_mq_insert_request(struct request *rq, blk_insert_t flags) 24952bd215dfSChristoph Hellwig { 24962bd215dfSChristoph Hellwig struct request_queue *q = rq->q; 24972bd215dfSChristoph Hellwig struct blk_mq_ctx *ctx = rq->mq_ctx; 24982bd215dfSChristoph Hellwig struct blk_mq_hw_ctx *hctx = rq->mq_hctx; 24992bd215dfSChristoph Hellwig 250053548d2aSChristoph Hellwig if (blk_rq_is_passthrough(rq)) { 250153548d2aSChristoph Hellwig /* 250253548d2aSChristoph Hellwig * Passthrough request have to be added to hctx->dispatch 250353548d2aSChristoph Hellwig * directly. The device may be in a situation where it can't 250453548d2aSChristoph Hellwig * handle FS request, and always returns BLK_STS_RESOURCE for 250553548d2aSChristoph Hellwig * them, which gets them added to hctx->dispatch. 250653548d2aSChristoph Hellwig * 250753548d2aSChristoph Hellwig * If a passthrough request is required to unblock the queues, 250853548d2aSChristoph Hellwig * and it is added to the scheduler queue, there is no chance to 250953548d2aSChristoph Hellwig * dispatch it given we prioritize requests in hctx->dispatch. 251053548d2aSChristoph Hellwig */ 25112b597613SChristoph Hellwig blk_mq_request_bypass_insert(rq, flags); 2512be4c4278SBart Van Assche } else if (req_op(rq) == REQ_OP_FLUSH) { 25132bd215dfSChristoph Hellwig /* 25142bd215dfSChristoph Hellwig * Firstly normal IO request is inserted to scheduler queue or 25152bd215dfSChristoph Hellwig * sw queue, meantime we add flush request to dispatch queue( 25162bd215dfSChristoph Hellwig * hctx->dispatch) directly and there is at most one in-flight 25172bd215dfSChristoph Hellwig * flush request for each hw queue, so it doesn't matter to add 25182bd215dfSChristoph Hellwig * flush request to tail or front of the dispatch queue. 25192bd215dfSChristoph Hellwig * 25202bd215dfSChristoph Hellwig * Secondly in case of NCQ, flush request belongs to non-NCQ 25212bd215dfSChristoph Hellwig * command, and queueing it will fail when there is any 25222bd215dfSChristoph Hellwig * in-flight normal IO request(NCQ command). When adding flush 25232bd215dfSChristoph Hellwig * rq to the front of hctx->dispatch, it is easier to introduce 25242bd215dfSChristoph Hellwig * extra time to flush rq's latency because of S_SCHED_RESTART 25252bd215dfSChristoph Hellwig * compared with adding to the tail of dispatch queue, then 25262bd215dfSChristoph Hellwig * chance of flush merge is increased, and less flush requests 25272bd215dfSChristoph Hellwig * will be issued to controller. It is observed that ~10% time 25282bd215dfSChristoph Hellwig * is saved in blktests block/004 on disk attached to AHCI/NCQ 25292bd215dfSChristoph Hellwig * drive when adding flush rq to the front of hctx->dispatch. 25302bd215dfSChristoph Hellwig * 25312bd215dfSChristoph Hellwig * Simply queue flush rq to the front of hctx->dispatch so that 25322bd215dfSChristoph Hellwig * intensive flush workloads can benefit in case of NCQ HW. 25332bd215dfSChristoph Hellwig */ 25342b597613SChristoph Hellwig blk_mq_request_bypass_insert(rq, BLK_MQ_INSERT_AT_HEAD); 253553548d2aSChristoph Hellwig } else if (q->elevator) { 25362bd215dfSChristoph Hellwig LIST_HEAD(list); 25372bd215dfSChristoph Hellwig 253853548d2aSChristoph Hellwig WARN_ON_ONCE(rq->tag != BLK_MQ_NO_TAG); 253953548d2aSChristoph Hellwig 25402bd215dfSChristoph Hellwig list_add(&rq->queuelist, &list); 254193fffe16SChristoph Hellwig q->elevator->type->ops.insert_requests(hctx, &list, flags); 25422bd215dfSChristoph Hellwig } else { 25434ec5c055SChristoph Hellwig trace_block_rq_insert(rq); 25444ec5c055SChristoph Hellwig 25452bd215dfSChristoph Hellwig spin_lock(&ctx->lock); 2546710fa378SChristoph Hellwig if (flags & BLK_MQ_INSERT_AT_HEAD) 25474ec5c055SChristoph Hellwig list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]); 25484ec5c055SChristoph Hellwig else 25494ec5c055SChristoph Hellwig list_add_tail(&rq->queuelist, 25504ec5c055SChristoph Hellwig &ctx->rq_lists[hctx->type]); 2551a88db1e0SChristoph Hellwig blk_mq_hctx_mark_pending(hctx, ctx); 25522bd215dfSChristoph Hellwig spin_unlock(&ctx->lock); 25532bd215dfSChristoph Hellwig } 2554320ae51fSJens Axboe } 2555320ae51fSJens Axboe 255614ccb66bSChristoph Hellwig static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, 255714ccb66bSChristoph Hellwig unsigned int nr_segs) 2558320ae51fSJens Axboe { 255993f221aeSEric Biggers int err; 256093f221aeSEric Biggers 2561f924cddeSChristoph Hellwig if (bio->bi_opf & REQ_RAHEAD) 2562f924cddeSChristoph Hellwig rq->cmd_flags |= REQ_FAILFAST_MASK; 2563f924cddeSChristoph Hellwig 2564f924cddeSChristoph Hellwig rq->__sector = bio->bi_iter.bi_sector; 256514ccb66bSChristoph Hellwig blk_rq_bio_prep(rq, bio, nr_segs); 256693f221aeSEric Biggers 256793f221aeSEric Biggers /* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */ 256893f221aeSEric Biggers err = blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO); 256993f221aeSEric Biggers WARN_ON_ONCE(err); 25704b570521SJens Axboe 2571b5af37abSKonstantin Khlebnikov blk_account_io_start(rq); 2572320ae51fSJens Axboe } 2573320ae51fSJens Axboe 25740f95549cSMike Snitzer static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, 25753e08773cSChristoph Hellwig struct request *rq, bool last) 2576f984df1fSShaohua Li { 2577f984df1fSShaohua Li struct request_queue *q = rq->q; 2578f984df1fSShaohua Li struct blk_mq_queue_data bd = { 2579f984df1fSShaohua Li .rq = rq, 2580be94f058SJens Axboe .last = last, 2581f984df1fSShaohua Li }; 2582f06345adSJens Axboe blk_status_t ret; 25830f95549cSMike Snitzer 25840f95549cSMike Snitzer /* 25850f95549cSMike Snitzer * For OK queue, we are done. For error, caller may kill it. 25860f95549cSMike Snitzer * Any other error (busy), just add it to our list as we 25870f95549cSMike Snitzer * previously would have done. 25880f95549cSMike Snitzer */ 25890f95549cSMike Snitzer ret = q->mq_ops->queue_rq(hctx, &bd); 25900f95549cSMike Snitzer switch (ret) { 25910f95549cSMike Snitzer case BLK_STS_OK: 25926ce3dd6eSMing Lei blk_mq_update_dispatch_busy(hctx, false); 25930f95549cSMike Snitzer break; 25940f95549cSMike Snitzer case BLK_STS_RESOURCE: 259586ff7c2aSMing Lei case BLK_STS_DEV_RESOURCE: 25966ce3dd6eSMing Lei blk_mq_update_dispatch_busy(hctx, true); 25970f95549cSMike Snitzer __blk_mq_requeue_request(rq); 25980f95549cSMike Snitzer break; 25990f95549cSMike Snitzer default: 26006ce3dd6eSMing Lei blk_mq_update_dispatch_busy(hctx, false); 26010f95549cSMike Snitzer break; 26020f95549cSMike Snitzer } 26030f95549cSMike Snitzer 26040f95549cSMike Snitzer return ret; 26050f95549cSMike Snitzer } 26060f95549cSMike Snitzer 26072b71b877SChristoph Hellwig static bool blk_mq_get_budget_and_tag(struct request *rq) 26080f95549cSMike Snitzer { 26092a5a24aaSMing Lei int budget_token; 2610d964f04aSMing Lei 26112b71b877SChristoph Hellwig budget_token = blk_mq_get_dispatch_budget(rq->q); 26122a5a24aaSMing Lei if (budget_token < 0) 26132b71b877SChristoph Hellwig return false; 26142a5a24aaSMing Lei blk_mq_set_rq_budget_token(rq, budget_token); 26158ab6bb9eSMing Lei if (!blk_mq_get_driver_tag(rq)) { 26162b71b877SChristoph Hellwig blk_mq_put_dispatch_budget(rq->q, budget_token); 26172b71b877SChristoph Hellwig return false; 261888022d72SMing Lei } 26192b71b877SChristoph Hellwig return true; 26207f556a44SJianchao Wang } 2621fd9c40f6SBart Van Assche 2622105663f7SAndré Almeida /** 2623105663f7SAndré Almeida * blk_mq_try_issue_directly - Try to send a request directly to device driver. 2624105663f7SAndré Almeida * @hctx: Pointer of the associated hardware queue. 2625105663f7SAndré Almeida * @rq: Pointer to request to be sent. 2626105663f7SAndré Almeida * 2627105663f7SAndré Almeida * If the device has enough resources to accept a new request now, send the 2628105663f7SAndré Almeida * request directly to device driver. Else, insert at hctx->dispatch queue, so 2629105663f7SAndré Almeida * we can try send it another time in the future. Requests inserted at this 2630105663f7SAndré Almeida * queue have higher priority. 2631105663f7SAndré Almeida */ 2632fd9c40f6SBart Van Assche static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, 26333e08773cSChristoph Hellwig struct request *rq) 2634fd9c40f6SBart Van Assche { 2635e1f44ac0SChristoph Hellwig blk_status_t ret; 2636fd9c40f6SBart Van Assche 2637e1f44ac0SChristoph Hellwig if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) { 2638710fa378SChristoph Hellwig blk_mq_insert_request(rq, 0); 2639e1f44ac0SChristoph Hellwig return; 2640e1f44ac0SChristoph Hellwig } 2641e1f44ac0SChristoph Hellwig 2642dd6216bbSChristoph Hellwig if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) { 2643710fa378SChristoph Hellwig blk_mq_insert_request(rq, 0); 2644f0dbe6e8SChristoph Hellwig blk_mq_run_hw_queue(hctx, false); 2645e1f44ac0SChristoph Hellwig return; 2646e1f44ac0SChristoph Hellwig } 2647e1f44ac0SChristoph Hellwig 2648e1f44ac0SChristoph Hellwig ret = __blk_mq_issue_directly(hctx, rq, true); 2649e1f44ac0SChristoph Hellwig switch (ret) { 2650e1f44ac0SChristoph Hellwig case BLK_STS_OK: 2651e1f44ac0SChristoph Hellwig break; 2652e1f44ac0SChristoph Hellwig case BLK_STS_RESOURCE: 2653e1f44ac0SChristoph Hellwig case BLK_STS_DEV_RESOURCE: 26542b597613SChristoph Hellwig blk_mq_request_bypass_insert(rq, 0); 26552394395cSChristoph Hellwig blk_mq_run_hw_queue(hctx, false); 2656e1f44ac0SChristoph Hellwig break; 2657e1f44ac0SChristoph Hellwig default: 26587f556a44SJianchao Wang blk_mq_end_request(rq, ret); 2659e1f44ac0SChristoph Hellwig break; 2660e1f44ac0SChristoph Hellwig } 26617f556a44SJianchao Wang } 26627f556a44SJianchao Wang 266306c8c691SChristoph Hellwig static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) 2664fd9c40f6SBart Van Assche { 2665e1f44ac0SChristoph Hellwig struct blk_mq_hw_ctx *hctx = rq->mq_hctx; 2666e1f44ac0SChristoph Hellwig 2667e1f44ac0SChristoph Hellwig if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) { 2668710fa378SChristoph Hellwig blk_mq_insert_request(rq, 0); 2669e1f44ac0SChristoph Hellwig return BLK_STS_OK; 2670e1f44ac0SChristoph Hellwig } 2671e1f44ac0SChristoph Hellwig 2672e1f44ac0SChristoph Hellwig if (!blk_mq_get_budget_and_tag(rq)) 2673e1f44ac0SChristoph Hellwig return BLK_STS_RESOURCE; 2674e1f44ac0SChristoph Hellwig return __blk_mq_issue_directly(hctx, rq, last); 26755eb6126eSChristoph Hellwig } 26765eb6126eSChristoph Hellwig 26773e368fb0SKemeng Shi static void blk_mq_plug_issue_direct(struct blk_plug *plug) 2678b84c5b50SChristoph Hellwig { 2679b84c5b50SChristoph Hellwig struct blk_mq_hw_ctx *hctx = NULL; 2680b84c5b50SChristoph Hellwig struct request *rq; 2681b84c5b50SChristoph Hellwig int queued = 0; 26820d617a83SKemeng Shi blk_status_t ret = BLK_STS_OK; 2683b84c5b50SChristoph Hellwig 2684b84c5b50SChristoph Hellwig while ((rq = rq_list_pop(&plug->mq_list))) { 2685b84c5b50SChristoph Hellwig bool last = rq_list_empty(plug->mq_list); 2686b84c5b50SChristoph Hellwig 2687b84c5b50SChristoph Hellwig if (hctx != rq->mq_hctx) { 268834c9f547SKemeng Shi if (hctx) { 268934c9f547SKemeng Shi blk_mq_commit_rqs(hctx, queued, false); 269034c9f547SKemeng Shi queued = 0; 269134c9f547SKemeng Shi } 2692b84c5b50SChristoph Hellwig hctx = rq->mq_hctx; 2693b84c5b50SChristoph Hellwig } 2694b84c5b50SChristoph Hellwig 2695b84c5b50SChristoph Hellwig ret = blk_mq_request_issue_directly(rq, last); 2696b84c5b50SChristoph Hellwig switch (ret) { 2697b84c5b50SChristoph Hellwig case BLK_STS_OK: 2698b84c5b50SChristoph Hellwig queued++; 2699b84c5b50SChristoph Hellwig break; 2700b84c5b50SChristoph Hellwig case BLK_STS_RESOURCE: 2701b84c5b50SChristoph Hellwig case BLK_STS_DEV_RESOURCE: 27022b597613SChristoph Hellwig blk_mq_request_bypass_insert(rq, 0); 27032394395cSChristoph Hellwig blk_mq_run_hw_queue(hctx, false); 27040d617a83SKemeng Shi goto out; 2705b84c5b50SChristoph Hellwig default: 2706b84c5b50SChristoph Hellwig blk_mq_end_request(rq, ret); 2707b84c5b50SChristoph Hellwig break; 2708b84c5b50SChristoph Hellwig } 2709b84c5b50SChristoph Hellwig } 2710b84c5b50SChristoph Hellwig 27110d617a83SKemeng Shi out: 27120d617a83SKemeng Shi if (ret != BLK_STS_OK) 271334c9f547SKemeng Shi blk_mq_commit_rqs(hctx, queued, false); 2714b84c5b50SChristoph Hellwig } 2715b84c5b50SChristoph Hellwig 2716518579a9SKeith Busch static void __blk_mq_flush_plug_list(struct request_queue *q, 2717518579a9SKeith Busch struct blk_plug *plug) 2718518579a9SKeith Busch { 2719518579a9SKeith Busch if (blk_queue_quiesced(q)) 2720518579a9SKeith Busch return; 2721518579a9SKeith Busch q->mq_ops->queue_rqs(&plug->mq_list); 2722518579a9SKeith Busch } 2723518579a9SKeith Busch 272426fed4acSJens Axboe static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched) 272526fed4acSJens Axboe { 272626fed4acSJens Axboe struct blk_mq_hw_ctx *this_hctx = NULL; 272726fed4acSJens Axboe struct blk_mq_ctx *this_ctx = NULL; 272826fed4acSJens Axboe struct request *requeue_list = NULL; 272934e0a279SJan Kara struct request **requeue_lastp = &requeue_list; 273026fed4acSJens Axboe unsigned int depth = 0; 2731d97217e7SMing Lei bool is_passthrough = false; 273226fed4acSJens Axboe LIST_HEAD(list); 273326fed4acSJens Axboe 273426fed4acSJens Axboe do { 273526fed4acSJens Axboe struct request *rq = rq_list_pop(&plug->mq_list); 273626fed4acSJens Axboe 273726fed4acSJens Axboe if (!this_hctx) { 273826fed4acSJens Axboe this_hctx = rq->mq_hctx; 273926fed4acSJens Axboe this_ctx = rq->mq_ctx; 2740d97217e7SMing Lei is_passthrough = blk_rq_is_passthrough(rq); 2741d97217e7SMing Lei } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx || 2742d97217e7SMing Lei is_passthrough != blk_rq_is_passthrough(rq)) { 274334e0a279SJan Kara rq_list_add_tail(&requeue_lastp, rq); 274426fed4acSJens Axboe continue; 274526fed4acSJens Axboe } 274634e0a279SJan Kara list_add(&rq->queuelist, &list); 274726fed4acSJens Axboe depth++; 274826fed4acSJens Axboe } while (!rq_list_empty(plug->mq_list)); 274926fed4acSJens Axboe 275026fed4acSJens Axboe plug->mq_list = requeue_list; 275126fed4acSJens Axboe trace_block_unplug(this_hctx->queue, depth, !from_sched); 275205a93117SChristoph Hellwig 275305a93117SChristoph Hellwig percpu_ref_get(&this_hctx->queue->q_usage_counter); 2754d97217e7SMing Lei /* passthrough requests should never be issued to the I/O scheduler */ 27552293cae7SMing Lei if (is_passthrough) { 27562293cae7SMing Lei spin_lock(&this_hctx->lock); 27572293cae7SMing Lei list_splice_tail_init(&list, &this_hctx->dispatch); 27582293cae7SMing Lei spin_unlock(&this_hctx->lock); 27592293cae7SMing Lei blk_mq_run_hw_queue(this_hctx, from_sched); 27602293cae7SMing Lei } else if (this_hctx->queue->elevator) { 276105a93117SChristoph Hellwig this_hctx->queue->elevator->type->ops.insert_requests(this_hctx, 276293fffe16SChristoph Hellwig &list, 0); 276305a93117SChristoph Hellwig blk_mq_run_hw_queue(this_hctx, from_sched); 276405a93117SChristoph Hellwig } else { 276505a93117SChristoph Hellwig blk_mq_insert_requests(this_hctx, this_ctx, &list, from_sched); 276605a93117SChristoph Hellwig } 276705a93117SChristoph Hellwig percpu_ref_put(&this_hctx->queue->q_usage_counter); 276826fed4acSJens Axboe } 276926fed4acSJens Axboe 2770b84c5b50SChristoph Hellwig void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) 2771b84c5b50SChristoph Hellwig { 27723c67d44dSJens Axboe struct request *rq; 2773b84c5b50SChristoph Hellwig 277470904263SRoss Lagerwall /* 277570904263SRoss Lagerwall * We may have been called recursively midway through handling 277670904263SRoss Lagerwall * plug->mq_list via a schedule() in the driver's queue_rq() callback. 277770904263SRoss Lagerwall * To avoid mq_list changing under our feet, clear rq_count early and 277870904263SRoss Lagerwall * bail out specifically if rq_count is 0 rather than checking 277970904263SRoss Lagerwall * whether the mq_list is empty. 278070904263SRoss Lagerwall */ 278170904263SRoss Lagerwall if (plug->rq_count == 0) 2782b84c5b50SChristoph Hellwig return; 2783b84c5b50SChristoph Hellwig plug->rq_count = 0; 2784b84c5b50SChristoph Hellwig 2785b84c5b50SChristoph Hellwig if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) { 27863c67d44dSJens Axboe struct request_queue *q; 27873c67d44dSJens Axboe 27883c67d44dSJens Axboe rq = rq_list_peek(&plug->mq_list); 27893c67d44dSJens Axboe q = rq->q; 27903c67d44dSJens Axboe 27913c67d44dSJens Axboe /* 27923c67d44dSJens Axboe * Peek first request and see if we have a ->queue_rqs() hook. 27933c67d44dSJens Axboe * If we do, we can dispatch the whole plug list in one go. We 27943c67d44dSJens Axboe * already know at this point that all requests belong to the 27953c67d44dSJens Axboe * same queue, caller must ensure that's the case. 27963c67d44dSJens Axboe * 27973c67d44dSJens Axboe * Since we pass off the full list to the driver at this point, 27983c67d44dSJens Axboe * we do not increment the active request count for the queue. 27993c67d44dSJens Axboe * Bypass shared tags for now because of that. 28003c67d44dSJens Axboe */ 28013c67d44dSJens Axboe if (q->mq_ops->queue_rqs && 28023c67d44dSJens Axboe !(rq->mq_hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { 28033c67d44dSJens Axboe blk_mq_run_dispatch_ops(q, 2804518579a9SKeith Busch __blk_mq_flush_plug_list(q, plug)); 28053c67d44dSJens Axboe if (rq_list_empty(plug->mq_list)) 28063c67d44dSJens Axboe return; 28073c67d44dSJens Axboe } 280873f3760eSMing Lei 280973f3760eSMing Lei blk_mq_run_dispatch_ops(q, 28103e368fb0SKemeng Shi blk_mq_plug_issue_direct(plug)); 2811b84c5b50SChristoph Hellwig if (rq_list_empty(plug->mq_list)) 2812b84c5b50SChristoph Hellwig return; 2813b84c5b50SChristoph Hellwig } 2814b84c5b50SChristoph Hellwig 2815b84c5b50SChristoph Hellwig do { 281626fed4acSJens Axboe blk_mq_dispatch_plug_list(plug, from_schedule); 2817b84c5b50SChristoph Hellwig } while (!rq_list_empty(plug->mq_list)); 2818b84c5b50SChristoph Hellwig } 2819b84c5b50SChristoph Hellwig 282094aa228cSChristoph Hellwig static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, 28216ce3dd6eSMing Lei struct list_head *list) 28226ce3dd6eSMing Lei { 2823536167d4SKeith Busch int queued = 0; 2824984ce0a7SKemeng Shi blk_status_t ret = BLK_STS_OK; 2825536167d4SKeith Busch 28266ce3dd6eSMing Lei while (!list_empty(list)) { 28276ce3dd6eSMing Lei struct request *rq = list_first_entry(list, struct request, 28286ce3dd6eSMing Lei queuelist); 28296ce3dd6eSMing Lei 28306ce3dd6eSMing Lei list_del_init(&rq->queuelist); 2831fd9c40f6SBart Van Assche ret = blk_mq_request_issue_directly(rq, list_empty(list)); 283227e8b2bbSKemeng Shi switch (ret) { 283327e8b2bbSKemeng Shi case BLK_STS_OK: 283427e8b2bbSKemeng Shi queued++; 283527e8b2bbSKemeng Shi break; 283627e8b2bbSKemeng Shi case BLK_STS_RESOURCE: 283727e8b2bbSKemeng Shi case BLK_STS_DEV_RESOURCE: 28382b597613SChristoph Hellwig blk_mq_request_bypass_insert(rq, 0); 28392394395cSChristoph Hellwig if (list_empty(list)) 28402394395cSChristoph Hellwig blk_mq_run_hw_queue(hctx, false); 284127e8b2bbSKemeng Shi goto out; 284227e8b2bbSKemeng Shi default: 284327e8b2bbSKemeng Shi blk_mq_end_request(rq, ret); 2844fd9c40f6SBart Van Assche break; 2845fd9c40f6SBart Van Assche } 28466ce3dd6eSMing Lei } 2847d666ba98SJens Axboe 284827e8b2bbSKemeng Shi out: 2849984ce0a7SKemeng Shi if (ret != BLK_STS_OK) 2850984ce0a7SKemeng Shi blk_mq_commit_rqs(hctx, queued, false); 28516ce3dd6eSMing Lei } 28526ce3dd6eSMing Lei 2853b131f201SMing Lei static bool blk_mq_attempt_bio_merge(struct request_queue *q, 28540c5bcc92SChristoph Hellwig struct bio *bio, unsigned int nr_segs) 2855900e0807SJens Axboe { 2856900e0807SJens Axboe if (!blk_queue_nomerges(q) && bio_mergeable(bio)) { 28570c5bcc92SChristoph Hellwig if (blk_attempt_plug_merge(q, bio, nr_segs)) 2858900e0807SJens Axboe return true; 2859900e0807SJens Axboe if (blk_mq_sched_bio_merge(q, bio, nr_segs)) 2860900e0807SJens Axboe return true; 2861900e0807SJens Axboe } 2862900e0807SJens Axboe return false; 2863900e0807SJens Axboe } 2864900e0807SJens Axboe 286571539717SJens Axboe static struct request *blk_mq_get_new_requests(struct request_queue *q, 286671539717SJens Axboe struct blk_plug *plug, 28670a5aa8d1SShin'ichiro Kawasaki struct bio *bio, 28680a5aa8d1SShin'ichiro Kawasaki unsigned int nsegs) 286971539717SJens Axboe { 287071539717SJens Axboe struct blk_mq_alloc_data data = { 287171539717SJens Axboe .q = q, 287271539717SJens Axboe .nr_tags = 1, 28739d497e29SMing Lei .cmd_flags = bio->bi_opf, 287471539717SJens Axboe }; 287571539717SJens Axboe struct request *rq; 287671539717SJens Axboe 28775b13bc8aSChristoph Hellwig if (unlikely(bio_queue_enter(bio))) 2878b637108aSMing Lei return NULL; 2879900e0807SJens Axboe 28800a5aa8d1SShin'ichiro Kawasaki if (blk_mq_attempt_bio_merge(q, bio, nsegs)) 28810a5aa8d1SShin'ichiro Kawasaki goto queue_exit; 28820a5aa8d1SShin'ichiro Kawasaki 28830a5aa8d1SShin'ichiro Kawasaki rq_qos_throttle(q, bio); 28840a5aa8d1SShin'ichiro Kawasaki 288571539717SJens Axboe if (plug) { 288671539717SJens Axboe data.nr_tags = plug->nr_ios; 288771539717SJens Axboe plug->nr_ios = 1; 288871539717SJens Axboe data.cached_rq = &plug->cached_rq; 288971539717SJens Axboe } 289071539717SJens Axboe 289171539717SJens Axboe rq = __blk_mq_alloc_requests(&data); 2892373b5416SJens Axboe if (rq) 289371539717SJens Axboe return rq; 289471539717SJens Axboe rq_qos_cleanup(q, bio); 289571539717SJens Axboe if (bio->bi_opf & REQ_NOWAIT) 289671539717SJens Axboe bio_wouldblock_error(bio); 28970a5aa8d1SShin'ichiro Kawasaki queue_exit: 28985b13bc8aSChristoph Hellwig blk_queue_exit(q); 289971539717SJens Axboe return NULL; 290071539717SJens Axboe } 290171539717SJens Axboe 29025b13bc8aSChristoph Hellwig static inline struct request *blk_mq_get_cached_request(struct request_queue *q, 29030a5aa8d1SShin'ichiro Kawasaki struct blk_plug *plug, struct bio **bio, unsigned int nsegs) 290471539717SJens Axboe { 290571539717SJens Axboe struct request *rq; 290677465647SPavel Begunkov enum hctx_type type, hctx_type; 2907b637108aSMing Lei 29085b13bc8aSChristoph Hellwig if (!plug) 29095b13bc8aSChristoph Hellwig return NULL; 291081ea1222SMing Lei rq = rq_list_peek(&plug->cached_rq); 291181ea1222SMing Lei if (!rq || rq->q != q) 291281ea1222SMing Lei return NULL; 29135b13bc8aSChristoph Hellwig 29140a5aa8d1SShin'ichiro Kawasaki if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) { 29150a5aa8d1SShin'ichiro Kawasaki *bio = NULL; 2916900e0807SJens Axboe return NULL; 29170a5aa8d1SShin'ichiro Kawasaki } 29180a5aa8d1SShin'ichiro Kawasaki 291977465647SPavel Begunkov type = blk_mq_get_hctx_type((*bio)->bi_opf); 292077465647SPavel Begunkov hctx_type = rq->mq_hctx->type; 292177465647SPavel Begunkov if (type != hctx_type && 292277465647SPavel Begunkov !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT)) 29230a5aa8d1SShin'ichiro Kawasaki return NULL; 29240a5aa8d1SShin'ichiro Kawasaki if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf)) 29255b13bc8aSChristoph Hellwig return NULL; 29265b13bc8aSChristoph Hellwig 29272645672fSJens Axboe /* 29282645672fSJens Axboe * If any qos ->throttle() end up blocking, we will have flushed the 29292645672fSJens Axboe * plug and hence killed the cached_rq list as well. Pop this entry 29302645672fSJens Axboe * before we throttle. 29312645672fSJens Axboe */ 293271539717SJens Axboe plug->cached_rq = rq_list_next(rq); 29332645672fSJens Axboe rq_qos_throttle(q, *bio); 29342645672fSJens Axboe 29355c17f45eSChengming Zhou blk_mq_rq_time_init(rq, 0); 29362645672fSJens Axboe rq->cmd_flags = (*bio)->bi_opf; 293771539717SJens Axboe INIT_LIST_HEAD(&rq->queuelist); 293871539717SJens Axboe return rq; 293971539717SJens Axboe } 294071539717SJens Axboe 294182b74cacSJan Kara static void bio_set_ioprio(struct bio *bio) 294282b74cacSJan Kara { 2943a78418e6SJan Kara /* Nobody set ioprio so far? Initialize it based on task's nice value */ 2944a78418e6SJan Kara if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE) 2945a78418e6SJan Kara bio->bi_ioprio = get_current_ioprio(); 294682b74cacSJan Kara blkcg_set_ioprio(bio); 294782b74cacSJan Kara } 294882b74cacSJan Kara 2949105663f7SAndré Almeida /** 2950c62b37d9SChristoph Hellwig * blk_mq_submit_bio - Create and send a request to block device. 2951105663f7SAndré Almeida * @bio: Bio pointer. 2952105663f7SAndré Almeida * 2953105663f7SAndré Almeida * Builds up a request structure from @q and @bio and send to the device. The 2954105663f7SAndré Almeida * request may not be queued directly to hardware if: 2955105663f7SAndré Almeida * * This request can be merged with another one 2956105663f7SAndré Almeida * * We want to place request at plug queue for possible future merging 2957105663f7SAndré Almeida * * There is an IO scheduler active at this queue 2958105663f7SAndré Almeida * 2959105663f7SAndré Almeida * It will not queue the request if there is an error with the bio, or at the 2960105663f7SAndré Almeida * request creation. 2961105663f7SAndré Almeida */ 29623e08773cSChristoph Hellwig void blk_mq_submit_bio(struct bio *bio) 296307068d5bSJens Axboe { 2964ed6cddefSPavel Begunkov struct request_queue *q = bdev_get_queue(bio->bi_bdev); 29656deacb3bSChristoph Hellwig struct blk_plug *plug = blk_mq_plug(bio); 2966ef295ecfSChristoph Hellwig const int is_sync = op_is_sync(bio->bi_opf); 2967f0dbe6e8SChristoph Hellwig struct blk_mq_hw_ctx *hctx; 296807068d5bSJens Axboe struct request *rq; 2969abd45c15SJens Axboe unsigned int nr_segs = 1; 2970a892c8d5SSatya Tangirala blk_status_t ret; 297107068d5bSJens Axboe 297251d798cdSChristoph Hellwig bio = blk_queue_bounce(bio, q); 2973613b1488SJens Axboe if (bio_may_exceed_limits(bio, &q->limits)) { 2974c55ddd90SChristoph Hellwig bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); 2975613b1488SJens Axboe if (!bio) 2976613b1488SJens Axboe return; 2977613b1488SJens Axboe } 2978f36ea50cSWen Xiong 2979e23947bdSDmitry Monakhov if (!bio_integrity_prep(bio)) 2980900e0807SJens Axboe return; 298187760e5eSJens Axboe 29829c6227e0SJan Kara bio_set_ioprio(bio); 29839c6227e0SJan Kara 29840a5aa8d1SShin'ichiro Kawasaki rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs); 29859d497e29SMing Lei if (!rq) { 29860a5aa8d1SShin'ichiro Kawasaki if (!bio) 29870a5aa8d1SShin'ichiro Kawasaki return; 29880a5aa8d1SShin'ichiro Kawasaki rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); 298971539717SJens Axboe if (unlikely(!rq)) 2990900e0807SJens Axboe return; 29915b13bc8aSChristoph Hellwig } 299287760e5eSJens Axboe 2993e8a676d6SChristoph Hellwig trace_block_getrq(bio); 2994d6f1dda2SXiaoguang Wang 2995c1c80384SJosef Bacik rq_qos_track(q, rq, bio); 299607068d5bSJens Axboe 299714ccb66bSChristoph Hellwig blk_mq_bio_to_request(rq, bio, nr_segs); 2998923218f6SMing Lei 29999cd1e566SEric Biggers ret = blk_crypto_rq_get_keyslot(rq); 3000a892c8d5SSatya Tangirala if (ret != BLK_STS_OK) { 3001a892c8d5SSatya Tangirala bio->bi_status = ret; 3002a892c8d5SSatya Tangirala bio_endio(bio); 3003a892c8d5SSatya Tangirala blk_mq_free_request(rq); 30043e08773cSChristoph Hellwig return; 3005a892c8d5SSatya Tangirala } 3006a892c8d5SSatya Tangirala 3007360f2648SChristoph Hellwig if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq)) 3008d92ca9d8SChristoph Hellwig return; 3009d92ca9d8SChristoph Hellwig 3010f0dbe6e8SChristoph Hellwig if (plug) { 3011ce5b009cSJens Axboe blk_add_rq_to_plug(plug, rq); 3012f0dbe6e8SChristoph Hellwig return; 3013f0dbe6e8SChristoph Hellwig } 3014f0dbe6e8SChristoph Hellwig 3015f0dbe6e8SChristoph Hellwig hctx = rq->mq_hctx; 3016dd6216bbSChristoph Hellwig if ((rq->rq_flags & RQF_USE_SCHED) || 3017f0dbe6e8SChristoph Hellwig (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) { 3018710fa378SChristoph Hellwig blk_mq_insert_request(rq, 0); 3019f0dbe6e8SChristoph Hellwig blk_mq_run_hw_queue(hctx, true); 3020f0dbe6e8SChristoph Hellwig } else { 3021f0dbe6e8SChristoph Hellwig blk_mq_run_dispatch_ops(q, blk_mq_try_issue_directly(hctx, rq)); 3022f0dbe6e8SChristoph Hellwig } 3023ab42f35dSMing Lei } 3024320ae51fSJens Axboe 3025248c7933SChristoph Hellwig #ifdef CONFIG_BLK_MQ_STACKING 302606c8c691SChristoph Hellwig /** 3027a5efda3cSChristoph Hellwig * blk_insert_cloned_request - Helper for stacking drivers to submit a request 3028a5efda3cSChristoph Hellwig * @rq: the request being queued 302906c8c691SChristoph Hellwig */ 303028db4711SChristoph Hellwig blk_status_t blk_insert_cloned_request(struct request *rq) 303106c8c691SChristoph Hellwig { 303228db4711SChristoph Hellwig struct request_queue *q = rq->q; 303306c8c691SChristoph Hellwig unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq)); 303449d24398SUday Shankar unsigned int max_segments = blk_rq_get_max_segments(rq); 3035a5efda3cSChristoph Hellwig blk_status_t ret; 303606c8c691SChristoph Hellwig 303706c8c691SChristoph Hellwig if (blk_rq_sectors(rq) > max_sectors) { 303806c8c691SChristoph Hellwig /* 303906c8c691SChristoph Hellwig * SCSI device does not have a good way to return if 304006c8c691SChristoph Hellwig * Write Same/Zero is actually supported. If a device rejects 304106c8c691SChristoph Hellwig * a non-read/write command (discard, write same,etc.) the 304206c8c691SChristoph Hellwig * low-level device driver will set the relevant queue limit to 304306c8c691SChristoph Hellwig * 0 to prevent blk-lib from issuing more of the offending 304406c8c691SChristoph Hellwig * operations. Commands queued prior to the queue limit being 304506c8c691SChristoph Hellwig * reset need to be completed with BLK_STS_NOTSUPP to avoid I/O 304606c8c691SChristoph Hellwig * errors being propagated to upper layers. 304706c8c691SChristoph Hellwig */ 304806c8c691SChristoph Hellwig if (max_sectors == 0) 304906c8c691SChristoph Hellwig return BLK_STS_NOTSUPP; 305006c8c691SChristoph Hellwig 305106c8c691SChristoph Hellwig printk(KERN_ERR "%s: over max size limit. (%u > %u)\n", 305206c8c691SChristoph Hellwig __func__, blk_rq_sectors(rq), max_sectors); 305306c8c691SChristoph Hellwig return BLK_STS_IOERR; 305406c8c691SChristoph Hellwig } 305506c8c691SChristoph Hellwig 305606c8c691SChristoph Hellwig /* 305706c8c691SChristoph Hellwig * The queue settings related to segment counting may differ from the 305806c8c691SChristoph Hellwig * original queue. 305906c8c691SChristoph Hellwig */ 306006c8c691SChristoph Hellwig rq->nr_phys_segments = blk_recalc_rq_segments(rq); 306149d24398SUday Shankar if (rq->nr_phys_segments > max_segments) { 306249d24398SUday Shankar printk(KERN_ERR "%s: over max segments limit. (%u > %u)\n", 306349d24398SUday Shankar __func__, rq->nr_phys_segments, max_segments); 306406c8c691SChristoph Hellwig return BLK_STS_IOERR; 306506c8c691SChristoph Hellwig } 306606c8c691SChristoph Hellwig 306728db4711SChristoph Hellwig if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq))) 306806c8c691SChristoph Hellwig return BLK_STS_IOERR; 306906c8c691SChristoph Hellwig 30705b8562f0SEric Biggers ret = blk_crypto_rq_get_keyslot(rq); 30715b8562f0SEric Biggers if (ret != BLK_STS_OK) 30725b8562f0SEric Biggers return ret; 307306c8c691SChristoph Hellwig 307406c8c691SChristoph Hellwig blk_account_io_start(rq); 307506c8c691SChristoph Hellwig 307606c8c691SChristoph Hellwig /* 307706c8c691SChristoph Hellwig * Since we have a scheduler attached on the top device, 307806c8c691SChristoph Hellwig * bypass a potential scheduler on the bottom device for 307906c8c691SChristoph Hellwig * insert. 308006c8c691SChristoph Hellwig */ 308128db4711SChristoph Hellwig blk_mq_run_dispatch_ops(q, 30824cafe86cSMing Lei ret = blk_mq_request_issue_directly(rq, true)); 3083592ee119SYu Kuai if (ret) 3084592ee119SYu Kuai blk_account_io_done(rq, ktime_get_ns()); 30854cafe86cSMing Lei return ret; 308606c8c691SChristoph Hellwig } 308706c8c691SChristoph Hellwig EXPORT_SYMBOL_GPL(blk_insert_cloned_request); 308806c8c691SChristoph Hellwig 308906c8c691SChristoph Hellwig /** 309006c8c691SChristoph Hellwig * blk_rq_unprep_clone - Helper function to free all bios in a cloned request 309106c8c691SChristoph Hellwig * @rq: the clone request to be cleaned up 309206c8c691SChristoph Hellwig * 309306c8c691SChristoph Hellwig * Description: 309406c8c691SChristoph Hellwig * Free all bios in @rq for a cloned request. 309506c8c691SChristoph Hellwig */ 309606c8c691SChristoph Hellwig void blk_rq_unprep_clone(struct request *rq) 309706c8c691SChristoph Hellwig { 309806c8c691SChristoph Hellwig struct bio *bio; 309906c8c691SChristoph Hellwig 310006c8c691SChristoph Hellwig while ((bio = rq->bio) != NULL) { 310106c8c691SChristoph Hellwig rq->bio = bio->bi_next; 310206c8c691SChristoph Hellwig 310306c8c691SChristoph Hellwig bio_put(bio); 310406c8c691SChristoph Hellwig } 310506c8c691SChristoph Hellwig } 310606c8c691SChristoph Hellwig EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); 310706c8c691SChristoph Hellwig 310806c8c691SChristoph Hellwig /** 310906c8c691SChristoph Hellwig * blk_rq_prep_clone - Helper function to setup clone request 311006c8c691SChristoph Hellwig * @rq: the request to be setup 311106c8c691SChristoph Hellwig * @rq_src: original request to be cloned 311206c8c691SChristoph Hellwig * @bs: bio_set that bios for clone are allocated from 311306c8c691SChristoph Hellwig * @gfp_mask: memory allocation mask for bio 311406c8c691SChristoph Hellwig * @bio_ctr: setup function to be called for each clone bio. 311506c8c691SChristoph Hellwig * Returns %0 for success, non %0 for failure. 311606c8c691SChristoph Hellwig * @data: private data to be passed to @bio_ctr 311706c8c691SChristoph Hellwig * 311806c8c691SChristoph Hellwig * Description: 311906c8c691SChristoph Hellwig * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. 312006c8c691SChristoph Hellwig * Also, pages which the original bios are pointing to are not copied 312106c8c691SChristoph Hellwig * and the cloned bios just point same pages. 312206c8c691SChristoph Hellwig * So cloned bios must be completed before original bios, which means 312306c8c691SChristoph Hellwig * the caller must complete @rq before @rq_src. 312406c8c691SChristoph Hellwig */ 312506c8c691SChristoph Hellwig int blk_rq_prep_clone(struct request *rq, struct request *rq_src, 312606c8c691SChristoph Hellwig struct bio_set *bs, gfp_t gfp_mask, 312706c8c691SChristoph Hellwig int (*bio_ctr)(struct bio *, struct bio *, void *), 312806c8c691SChristoph Hellwig void *data) 312906c8c691SChristoph Hellwig { 313006c8c691SChristoph Hellwig struct bio *bio, *bio_src; 313106c8c691SChristoph Hellwig 313206c8c691SChristoph Hellwig if (!bs) 313306c8c691SChristoph Hellwig bs = &fs_bio_set; 313406c8c691SChristoph Hellwig 313506c8c691SChristoph Hellwig __rq_for_each_bio(bio_src, rq_src) { 3136abfc426dSChristoph Hellwig bio = bio_alloc_clone(rq->q->disk->part0, bio_src, gfp_mask, 3137abfc426dSChristoph Hellwig bs); 313806c8c691SChristoph Hellwig if (!bio) 313906c8c691SChristoph Hellwig goto free_and_out; 314006c8c691SChristoph Hellwig 314106c8c691SChristoph Hellwig if (bio_ctr && bio_ctr(bio, bio_src, data)) 314206c8c691SChristoph Hellwig goto free_and_out; 314306c8c691SChristoph Hellwig 314406c8c691SChristoph Hellwig if (rq->bio) { 314506c8c691SChristoph Hellwig rq->biotail->bi_next = bio; 314606c8c691SChristoph Hellwig rq->biotail = bio; 314706c8c691SChristoph Hellwig } else { 314806c8c691SChristoph Hellwig rq->bio = rq->biotail = bio; 314906c8c691SChristoph Hellwig } 315006c8c691SChristoph Hellwig bio = NULL; 315106c8c691SChristoph Hellwig } 315206c8c691SChristoph Hellwig 315306c8c691SChristoph Hellwig /* Copy attributes of the original request to the clone request. */ 315406c8c691SChristoph Hellwig rq->__sector = blk_rq_pos(rq_src); 315506c8c691SChristoph Hellwig rq->__data_len = blk_rq_bytes(rq_src); 315606c8c691SChristoph Hellwig if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) { 315706c8c691SChristoph Hellwig rq->rq_flags |= RQF_SPECIAL_PAYLOAD; 315806c8c691SChristoph Hellwig rq->special_vec = rq_src->special_vec; 315906c8c691SChristoph Hellwig } 316006c8c691SChristoph Hellwig rq->nr_phys_segments = rq_src->nr_phys_segments; 316106c8c691SChristoph Hellwig rq->ioprio = rq_src->ioprio; 316206c8c691SChristoph Hellwig 316306c8c691SChristoph Hellwig if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0) 316406c8c691SChristoph Hellwig goto free_and_out; 316506c8c691SChristoph Hellwig 316606c8c691SChristoph Hellwig return 0; 316706c8c691SChristoph Hellwig 316806c8c691SChristoph Hellwig free_and_out: 316906c8c691SChristoph Hellwig if (bio) 317006c8c691SChristoph Hellwig bio_put(bio); 317106c8c691SChristoph Hellwig blk_rq_unprep_clone(rq); 317206c8c691SChristoph Hellwig 317306c8c691SChristoph Hellwig return -ENOMEM; 317406c8c691SChristoph Hellwig } 317506c8c691SChristoph Hellwig EXPORT_SYMBOL_GPL(blk_rq_prep_clone); 3176248c7933SChristoph Hellwig #endif /* CONFIG_BLK_MQ_STACKING */ 317706c8c691SChristoph Hellwig 3178f2b8f3ceSChristoph Hellwig /* 3179f2b8f3ceSChristoph Hellwig * Steal bios from a request and add them to a bio list. 3180f2b8f3ceSChristoph Hellwig * The request must not have been partially completed before. 3181f2b8f3ceSChristoph Hellwig */ 3182f2b8f3ceSChristoph Hellwig void blk_steal_bios(struct bio_list *list, struct request *rq) 3183f2b8f3ceSChristoph Hellwig { 3184f2b8f3ceSChristoph Hellwig if (rq->bio) { 3185f2b8f3ceSChristoph Hellwig if (list->tail) 3186f2b8f3ceSChristoph Hellwig list->tail->bi_next = rq->bio; 3187f2b8f3ceSChristoph Hellwig else 3188f2b8f3ceSChristoph Hellwig list->head = rq->bio; 3189f2b8f3ceSChristoph Hellwig list->tail = rq->biotail; 3190f2b8f3ceSChristoph Hellwig 3191f2b8f3ceSChristoph Hellwig rq->bio = NULL; 3192f2b8f3ceSChristoph Hellwig rq->biotail = NULL; 3193f2b8f3ceSChristoph Hellwig } 3194f2b8f3ceSChristoph Hellwig 3195f2b8f3ceSChristoph Hellwig rq->__data_len = 0; 3196f2b8f3ceSChristoph Hellwig } 3197f2b8f3ceSChristoph Hellwig EXPORT_SYMBOL_GPL(blk_steal_bios); 3198f2b8f3ceSChristoph Hellwig 3199bd63141dSMing Lei static size_t order_to_size(unsigned int order) 3200bd63141dSMing Lei { 3201bd63141dSMing Lei return (size_t)PAGE_SIZE << order; 3202bd63141dSMing Lei } 3203bd63141dSMing Lei 3204bd63141dSMing Lei /* called before freeing request pool in @tags */ 3205f32e4eafSJohn Garry static void blk_mq_clear_rq_mapping(struct blk_mq_tags *drv_tags, 3206f32e4eafSJohn Garry struct blk_mq_tags *tags) 3207bd63141dSMing Lei { 3208bd63141dSMing Lei struct page *page; 3209bd63141dSMing Lei unsigned long flags; 3210bd63141dSMing Lei 321176dd2980SYu Kuai /* 321276dd2980SYu Kuai * There is no need to clear mapping if driver tags is not initialized 321376dd2980SYu Kuai * or the mapping belongs to the driver tags. 321476dd2980SYu Kuai */ 321576dd2980SYu Kuai if (!drv_tags || drv_tags == tags) 32164f245d5bSJohn Garry return; 32174f245d5bSJohn Garry 3218bd63141dSMing Lei list_for_each_entry(page, &tags->page_list, lru) { 3219bd63141dSMing Lei unsigned long start = (unsigned long)page_address(page); 3220bd63141dSMing Lei unsigned long end = start + order_to_size(page->private); 3221bd63141dSMing Lei int i; 3222bd63141dSMing Lei 3223f32e4eafSJohn Garry for (i = 0; i < drv_tags->nr_tags; i++) { 3224bd63141dSMing Lei struct request *rq = drv_tags->rqs[i]; 3225bd63141dSMing Lei unsigned long rq_addr = (unsigned long)rq; 3226bd63141dSMing Lei 3227bd63141dSMing Lei if (rq_addr >= start && rq_addr < end) { 32280a467d0fSJens Axboe WARN_ON_ONCE(req_ref_read(rq) != 0); 3229bd63141dSMing Lei cmpxchg(&drv_tags->rqs[i], rq, NULL); 3230bd63141dSMing Lei } 3231bd63141dSMing Lei } 3232bd63141dSMing Lei } 3233bd63141dSMing Lei 3234bd63141dSMing Lei /* 3235bd63141dSMing Lei * Wait until all pending iteration is done. 3236bd63141dSMing Lei * 3237bd63141dSMing Lei * Request reference is cleared and it is guaranteed to be observed 3238bd63141dSMing Lei * after the ->lock is released. 3239bd63141dSMing Lei */ 3240bd63141dSMing Lei spin_lock_irqsave(&drv_tags->lock, flags); 3241bd63141dSMing Lei spin_unlock_irqrestore(&drv_tags->lock, flags); 3242bd63141dSMing Lei } 3243bd63141dSMing Lei 3244cc71a6f4SJens Axboe void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, 32452c3ad667SJens Axboe unsigned int hctx_idx) 3246320ae51fSJens Axboe { 3247f32e4eafSJohn Garry struct blk_mq_tags *drv_tags; 3248320ae51fSJens Axboe struct page *page; 3249320ae51fSJens Axboe 3250e02657eaSMing Lei if (list_empty(&tags->page_list)) 3251e02657eaSMing Lei return; 3252e02657eaSMing Lei 3253079a2e3eSJohn Garry if (blk_mq_is_shared_tags(set->flags)) 3254079a2e3eSJohn Garry drv_tags = set->shared_tags; 3255e155b0c2SJohn Garry else 3256f32e4eafSJohn Garry drv_tags = set->tags[hctx_idx]; 3257f32e4eafSJohn Garry 325865de57bbSJohn Garry if (tags->static_rqs && set->ops->exit_request) { 3259e9b267d9SChristoph Hellwig int i; 3260e9b267d9SChristoph Hellwig 326124d2f903SChristoph Hellwig for (i = 0; i < tags->nr_tags; i++) { 32622af8cbe3SJens Axboe struct request *rq = tags->static_rqs[i]; 32632af8cbe3SJens Axboe 32642af8cbe3SJens Axboe if (!rq) 3265e9b267d9SChristoph Hellwig continue; 3266d6296d39SChristoph Hellwig set->ops->exit_request(set, rq, hctx_idx); 32672af8cbe3SJens Axboe tags->static_rqs[i] = NULL; 3268e9b267d9SChristoph Hellwig } 3269e9b267d9SChristoph Hellwig } 3270e9b267d9SChristoph Hellwig 3271f32e4eafSJohn Garry blk_mq_clear_rq_mapping(drv_tags, tags); 3272bd63141dSMing Lei 327324d2f903SChristoph Hellwig while (!list_empty(&tags->page_list)) { 327424d2f903SChristoph Hellwig page = list_first_entry(&tags->page_list, struct page, lru); 32756753471cSDave Hansen list_del_init(&page->lru); 3276f75782e4SCatalin Marinas /* 3277f75782e4SCatalin Marinas * Remove kmemleak object previously allocated in 3278273938bfSRaul E Rangel * blk_mq_alloc_rqs(). 3279f75782e4SCatalin Marinas */ 3280f75782e4SCatalin Marinas kmemleak_free(page_address(page)); 3281320ae51fSJens Axboe __free_pages(page, page->private); 3282320ae51fSJens Axboe } 3283cc71a6f4SJens Axboe } 3284320ae51fSJens Axboe 3285e155b0c2SJohn Garry void blk_mq_free_rq_map(struct blk_mq_tags *tags) 3286cc71a6f4SJens Axboe { 328724d2f903SChristoph Hellwig kfree(tags->rqs); 3288cc71a6f4SJens Axboe tags->rqs = NULL; 32892af8cbe3SJens Axboe kfree(tags->static_rqs); 32902af8cbe3SJens Axboe tags->static_rqs = NULL; 3291320ae51fSJens Axboe 3292e155b0c2SJohn Garry blk_mq_free_tags(tags); 3293320ae51fSJens Axboe } 3294320ae51fSJens Axboe 32954d805131SMing Lei static enum hctx_type hctx_idx_to_type(struct blk_mq_tag_set *set, 32964d805131SMing Lei unsigned int hctx_idx) 32974d805131SMing Lei { 32984d805131SMing Lei int i; 32994d805131SMing Lei 33004d805131SMing Lei for (i = 0; i < set->nr_maps; i++) { 33014d805131SMing Lei unsigned int start = set->map[i].queue_offset; 33024d805131SMing Lei unsigned int end = start + set->map[i].nr_queues; 33034d805131SMing Lei 33044d805131SMing Lei if (hctx_idx >= start && hctx_idx < end) 33054d805131SMing Lei break; 33064d805131SMing Lei } 33074d805131SMing Lei 33084d805131SMing Lei if (i >= set->nr_maps) 33094d805131SMing Lei i = HCTX_TYPE_DEFAULT; 33104d805131SMing Lei 33114d805131SMing Lei return i; 33124d805131SMing Lei } 33134d805131SMing Lei 33144d805131SMing Lei static int blk_mq_get_hctx_node(struct blk_mq_tag_set *set, 33154d805131SMing Lei unsigned int hctx_idx) 33164d805131SMing Lei { 33174d805131SMing Lei enum hctx_type type = hctx_idx_to_type(set, hctx_idx); 33184d805131SMing Lei 33194d805131SMing Lei return blk_mq_hw_queue_to_node(&set->map[type], hctx_idx); 33204d805131SMing Lei } 33214d805131SMing Lei 332263064be1SJohn Garry static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, 3323cc71a6f4SJens Axboe unsigned int hctx_idx, 3324cc71a6f4SJens Axboe unsigned int nr_tags, 3325e155b0c2SJohn Garry unsigned int reserved_tags) 3326320ae51fSJens Axboe { 33274d805131SMing Lei int node = blk_mq_get_hctx_node(set, hctx_idx); 332824d2f903SChristoph Hellwig struct blk_mq_tags *tags; 3329320ae51fSJens Axboe 333059f082e4SShaohua Li if (node == NUMA_NO_NODE) 333159f082e4SShaohua Li node = set->numa_node; 333259f082e4SShaohua Li 3333e155b0c2SJohn Garry tags = blk_mq_init_tags(nr_tags, reserved_tags, node, 3334e155b0c2SJohn Garry BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); 333524d2f903SChristoph Hellwig if (!tags) 333624d2f903SChristoph Hellwig return NULL; 3337320ae51fSJens Axboe 3338590b5b7dSKees Cook tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *), 333936e1f3d1SGabriel Krisman Bertazi GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, 334059f082e4SShaohua Li node); 33417edfd681SJinlong Chen if (!tags->rqs) 33427edfd681SJinlong Chen goto err_free_tags; 3343320ae51fSJens Axboe 3344590b5b7dSKees Cook tags->static_rqs = kcalloc_node(nr_tags, sizeof(struct request *), 33452af8cbe3SJens Axboe GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, 334659f082e4SShaohua Li node); 33477edfd681SJinlong Chen if (!tags->static_rqs) 33487edfd681SJinlong Chen goto err_free_rqs; 33492af8cbe3SJens Axboe 3350cc71a6f4SJens Axboe return tags; 33517edfd681SJinlong Chen 33527edfd681SJinlong Chen err_free_rqs: 33537edfd681SJinlong Chen kfree(tags->rqs); 33547edfd681SJinlong Chen err_free_tags: 33557edfd681SJinlong Chen blk_mq_free_tags(tags); 33567edfd681SJinlong Chen return NULL; 3357cc71a6f4SJens Axboe } 3358cc71a6f4SJens Axboe 33591d9bd516STejun Heo static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, 33601d9bd516STejun Heo unsigned int hctx_idx, int node) 33611d9bd516STejun Heo { 33621d9bd516STejun Heo int ret; 33631d9bd516STejun Heo 33641d9bd516STejun Heo if (set->ops->init_request) { 33651d9bd516STejun Heo ret = set->ops->init_request(set, rq, hctx_idx, node); 33661d9bd516STejun Heo if (ret) 33671d9bd516STejun Heo return ret; 33681d9bd516STejun Heo } 33691d9bd516STejun Heo 337012f5b931SKeith Busch WRITE_ONCE(rq->state, MQ_RQ_IDLE); 33711d9bd516STejun Heo return 0; 33721d9bd516STejun Heo } 33731d9bd516STejun Heo 337463064be1SJohn Garry static int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, 337563064be1SJohn Garry struct blk_mq_tags *tags, 3376cc71a6f4SJens Axboe unsigned int hctx_idx, unsigned int depth) 3377cc71a6f4SJens Axboe { 3378cc71a6f4SJens Axboe unsigned int i, j, entries_per_page, max_order = 4; 33794d805131SMing Lei int node = blk_mq_get_hctx_node(set, hctx_idx); 3380cc71a6f4SJens Axboe size_t rq_size, left; 338159f082e4SShaohua Li 338259f082e4SShaohua Li if (node == NUMA_NO_NODE) 338359f082e4SShaohua Li node = set->numa_node; 3384cc71a6f4SJens Axboe 3385cc71a6f4SJens Axboe INIT_LIST_HEAD(&tags->page_list); 3386cc71a6f4SJens Axboe 3387320ae51fSJens Axboe /* 3388320ae51fSJens Axboe * rq_size is the size of the request plus driver payload, rounded 3389320ae51fSJens Axboe * to the cacheline size 3390320ae51fSJens Axboe */ 339124d2f903SChristoph Hellwig rq_size = round_up(sizeof(struct request) + set->cmd_size, 3392320ae51fSJens Axboe cache_line_size()); 3393cc71a6f4SJens Axboe left = rq_size * depth; 3394320ae51fSJens Axboe 3395cc71a6f4SJens Axboe for (i = 0; i < depth; ) { 3396320ae51fSJens Axboe int this_order = max_order; 3397320ae51fSJens Axboe struct page *page; 3398320ae51fSJens Axboe int to_do; 3399320ae51fSJens Axboe void *p; 3400320ae51fSJens Axboe 3401b3a834b1SBartlomiej Zolnierkiewicz while (this_order && left < order_to_size(this_order - 1)) 3402320ae51fSJens Axboe this_order--; 3403320ae51fSJens Axboe 3404320ae51fSJens Axboe do { 340559f082e4SShaohua Li page = alloc_pages_node(node, 340636e1f3d1SGabriel Krisman Bertazi GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, 340724d2f903SChristoph Hellwig this_order); 3408320ae51fSJens Axboe if (page) 3409320ae51fSJens Axboe break; 3410320ae51fSJens Axboe if (!this_order--) 3411320ae51fSJens Axboe break; 3412320ae51fSJens Axboe if (order_to_size(this_order) < rq_size) 3413320ae51fSJens Axboe break; 3414320ae51fSJens Axboe } while (1); 3415320ae51fSJens Axboe 3416320ae51fSJens Axboe if (!page) 341724d2f903SChristoph Hellwig goto fail; 3418320ae51fSJens Axboe 3419320ae51fSJens Axboe page->private = this_order; 342024d2f903SChristoph Hellwig list_add_tail(&page->lru, &tags->page_list); 3421320ae51fSJens Axboe 3422320ae51fSJens Axboe p = page_address(page); 3423f75782e4SCatalin Marinas /* 3424f75782e4SCatalin Marinas * Allow kmemleak to scan these pages as they contain pointers 3425f75782e4SCatalin Marinas * to additional allocations like via ops->init_request(). 3426f75782e4SCatalin Marinas */ 342736e1f3d1SGabriel Krisman Bertazi kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO); 3428320ae51fSJens Axboe entries_per_page = order_to_size(this_order) / rq_size; 3429cc71a6f4SJens Axboe to_do = min(entries_per_page, depth - i); 3430320ae51fSJens Axboe left -= to_do * rq_size; 3431320ae51fSJens Axboe for (j = 0; j < to_do; j++) { 34322af8cbe3SJens Axboe struct request *rq = p; 34332af8cbe3SJens Axboe 34342af8cbe3SJens Axboe tags->static_rqs[i] = rq; 34351d9bd516STejun Heo if (blk_mq_init_request(set, rq, hctx_idx, node)) { 34362af8cbe3SJens Axboe tags->static_rqs[i] = NULL; 343724d2f903SChristoph Hellwig goto fail; 3438e9b267d9SChristoph Hellwig } 3439e9b267d9SChristoph Hellwig 3440320ae51fSJens Axboe p += rq_size; 3441320ae51fSJens Axboe i++; 3442320ae51fSJens Axboe } 3443320ae51fSJens Axboe } 3444cc71a6f4SJens Axboe return 0; 3445320ae51fSJens Axboe 344624d2f903SChristoph Hellwig fail: 3447cc71a6f4SJens Axboe blk_mq_free_rqs(set, tags, hctx_idx); 3448cc71a6f4SJens Axboe return -ENOMEM; 3449320ae51fSJens Axboe } 3450320ae51fSJens Axboe 3451bf0beec0SMing Lei struct rq_iter_data { 3452bf0beec0SMing Lei struct blk_mq_hw_ctx *hctx; 3453bf0beec0SMing Lei bool has_rq; 3454bf0beec0SMing Lei }; 3455bf0beec0SMing Lei 34562dd6532eSJohn Garry static bool blk_mq_has_request(struct request *rq, void *data) 3457bf0beec0SMing Lei { 3458bf0beec0SMing Lei struct rq_iter_data *iter_data = data; 3459bf0beec0SMing Lei 3460bf0beec0SMing Lei if (rq->mq_hctx != iter_data->hctx) 3461bf0beec0SMing Lei return true; 3462bf0beec0SMing Lei iter_data->has_rq = true; 3463bf0beec0SMing Lei return false; 3464bf0beec0SMing Lei } 3465bf0beec0SMing Lei 3466bf0beec0SMing Lei static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx) 3467bf0beec0SMing Lei { 3468bf0beec0SMing Lei struct blk_mq_tags *tags = hctx->sched_tags ? 3469bf0beec0SMing Lei hctx->sched_tags : hctx->tags; 3470bf0beec0SMing Lei struct rq_iter_data data = { 3471bf0beec0SMing Lei .hctx = hctx, 3472bf0beec0SMing Lei }; 3473bf0beec0SMing Lei 3474bf0beec0SMing Lei blk_mq_all_tag_iter(tags, blk_mq_has_request, &data); 3475bf0beec0SMing Lei return data.has_rq; 3476bf0beec0SMing Lei } 3477bf0beec0SMing Lei 3478bf0beec0SMing Lei static inline bool blk_mq_last_cpu_in_hctx(unsigned int cpu, 3479bf0beec0SMing Lei struct blk_mq_hw_ctx *hctx) 3480bf0beec0SMing Lei { 34819b51d9d8SYury Norov if (cpumask_first_and(hctx->cpumask, cpu_online_mask) != cpu) 3482bf0beec0SMing Lei return false; 3483bf0beec0SMing Lei if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids) 3484bf0beec0SMing Lei return false; 3485bf0beec0SMing Lei return true; 3486bf0beec0SMing Lei } 3487bf0beec0SMing Lei 3488bf0beec0SMing Lei static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node) 3489bf0beec0SMing Lei { 3490bf0beec0SMing Lei struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node, 3491bf0beec0SMing Lei struct blk_mq_hw_ctx, cpuhp_online); 3492bf0beec0SMing Lei 3493bf0beec0SMing Lei if (!cpumask_test_cpu(cpu, hctx->cpumask) || 3494bf0beec0SMing Lei !blk_mq_last_cpu_in_hctx(cpu, hctx)) 3495bf0beec0SMing Lei return 0; 3496bf0beec0SMing Lei 3497bf0beec0SMing Lei /* 3498bf0beec0SMing Lei * Prevent new request from being allocated on the current hctx. 3499bf0beec0SMing Lei * 3500bf0beec0SMing Lei * The smp_mb__after_atomic() Pairs with the implied barrier in 3501bf0beec0SMing Lei * test_and_set_bit_lock in sbitmap_get(). Ensures the inactive flag is 3502bf0beec0SMing Lei * seen once we return from the tag allocator. 3503bf0beec0SMing Lei */ 3504bf0beec0SMing Lei set_bit(BLK_MQ_S_INACTIVE, &hctx->state); 3505bf0beec0SMing Lei smp_mb__after_atomic(); 3506bf0beec0SMing Lei 3507bf0beec0SMing Lei /* 3508bf0beec0SMing Lei * Try to grab a reference to the queue and wait for any outstanding 3509bf0beec0SMing Lei * requests. If we could not grab a reference the queue has been 3510bf0beec0SMing Lei * frozen and there are no requests. 3511bf0beec0SMing Lei */ 3512bf0beec0SMing Lei if (percpu_ref_tryget(&hctx->queue->q_usage_counter)) { 3513bf0beec0SMing Lei while (blk_mq_hctx_has_requests(hctx)) 3514bf0beec0SMing Lei msleep(5); 3515bf0beec0SMing Lei percpu_ref_put(&hctx->queue->q_usage_counter); 3516bf0beec0SMing Lei } 3517bf0beec0SMing Lei 3518bf0beec0SMing Lei return 0; 3519bf0beec0SMing Lei } 3520bf0beec0SMing Lei 3521bf0beec0SMing Lei static int blk_mq_hctx_notify_online(unsigned int cpu, struct hlist_node *node) 3522bf0beec0SMing Lei { 3523bf0beec0SMing Lei struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node, 3524bf0beec0SMing Lei struct blk_mq_hw_ctx, cpuhp_online); 3525bf0beec0SMing Lei 3526bf0beec0SMing Lei if (cpumask_test_cpu(cpu, hctx->cpumask)) 3527bf0beec0SMing Lei clear_bit(BLK_MQ_S_INACTIVE, &hctx->state); 3528bf0beec0SMing Lei return 0; 3529bf0beec0SMing Lei } 3530bf0beec0SMing Lei 3531e57690feSJens Axboe /* 3532e57690feSJens Axboe * 'cpu' is going away. splice any existing rq_list entries from this 3533e57690feSJens Axboe * software queue to the hw queue dispatch list, and ensure that it 3534e57690feSJens Axboe * gets run. 3535e57690feSJens Axboe */ 35369467f859SThomas Gleixner static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node) 3537484b4061SJens Axboe { 35389467f859SThomas Gleixner struct blk_mq_hw_ctx *hctx; 3539484b4061SJens Axboe struct blk_mq_ctx *ctx; 3540484b4061SJens Axboe LIST_HEAD(tmp); 3541c16d6b5aSMing Lei enum hctx_type type; 3542484b4061SJens Axboe 35439467f859SThomas Gleixner hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead); 3544bf0beec0SMing Lei if (!cpumask_test_cpu(cpu, hctx->cpumask)) 3545bf0beec0SMing Lei return 0; 3546bf0beec0SMing Lei 3547e57690feSJens Axboe ctx = __blk_mq_get_ctx(hctx->queue, cpu); 3548c16d6b5aSMing Lei type = hctx->type; 3549484b4061SJens Axboe 3550484b4061SJens Axboe spin_lock(&ctx->lock); 3551c16d6b5aSMing Lei if (!list_empty(&ctx->rq_lists[type])) { 3552c16d6b5aSMing Lei list_splice_init(&ctx->rq_lists[type], &tmp); 3553484b4061SJens Axboe blk_mq_hctx_clear_pending(hctx, ctx); 3554484b4061SJens Axboe } 3555484b4061SJens Axboe spin_unlock(&ctx->lock); 3556484b4061SJens Axboe 3557484b4061SJens Axboe if (list_empty(&tmp)) 35589467f859SThomas Gleixner return 0; 3559484b4061SJens Axboe 3560e57690feSJens Axboe spin_lock(&hctx->lock); 3561e57690feSJens Axboe list_splice_tail_init(&tmp, &hctx->dispatch); 3562e57690feSJens Axboe spin_unlock(&hctx->lock); 3563484b4061SJens Axboe 3564484b4061SJens Axboe blk_mq_run_hw_queue(hctx, true); 35659467f859SThomas Gleixner return 0; 3566484b4061SJens Axboe } 3567484b4061SJens Axboe 35689467f859SThomas Gleixner static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx) 3569484b4061SJens Axboe { 3570bf0beec0SMing Lei if (!(hctx->flags & BLK_MQ_F_STACKING)) 3571bf0beec0SMing Lei cpuhp_state_remove_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE, 3572bf0beec0SMing Lei &hctx->cpuhp_online); 35739467f859SThomas Gleixner cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD, 35749467f859SThomas Gleixner &hctx->cpuhp_dead); 3575484b4061SJens Axboe } 3576484b4061SJens Axboe 3577364b6181SMing Lei /* 3578364b6181SMing Lei * Before freeing hw queue, clearing the flush request reference in 3579364b6181SMing Lei * tags->rqs[] for avoiding potential UAF. 3580364b6181SMing Lei */ 3581364b6181SMing Lei static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags, 3582364b6181SMing Lei unsigned int queue_depth, struct request *flush_rq) 3583364b6181SMing Lei { 3584364b6181SMing Lei int i; 3585364b6181SMing Lei unsigned long flags; 3586364b6181SMing Lei 3587364b6181SMing Lei /* The hw queue may not be mapped yet */ 3588364b6181SMing Lei if (!tags) 3589364b6181SMing Lei return; 3590364b6181SMing Lei 35910a467d0fSJens Axboe WARN_ON_ONCE(req_ref_read(flush_rq) != 0); 3592364b6181SMing Lei 3593364b6181SMing Lei for (i = 0; i < queue_depth; i++) 3594364b6181SMing Lei cmpxchg(&tags->rqs[i], flush_rq, NULL); 3595364b6181SMing Lei 3596364b6181SMing Lei /* 3597364b6181SMing Lei * Wait until all pending iteration is done. 3598364b6181SMing Lei * 3599364b6181SMing Lei * Request reference is cleared and it is guaranteed to be observed 3600364b6181SMing Lei * after the ->lock is released. 3601364b6181SMing Lei */ 3602364b6181SMing Lei spin_lock_irqsave(&tags->lock, flags); 3603364b6181SMing Lei spin_unlock_irqrestore(&tags->lock, flags); 3604364b6181SMing Lei } 3605364b6181SMing Lei 3606c3b4afcaSMing Lei /* hctx->ctxs will be freed in queue's release handler */ 360708e98fc6SMing Lei static void blk_mq_exit_hctx(struct request_queue *q, 360808e98fc6SMing Lei struct blk_mq_tag_set *set, 360908e98fc6SMing Lei struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) 361008e98fc6SMing Lei { 3611364b6181SMing Lei struct request *flush_rq = hctx->fq->flush_rq; 3612364b6181SMing Lei 36138ab0b7dcSMing Lei if (blk_mq_hw_queue_mapped(hctx)) 361408e98fc6SMing Lei blk_mq_tag_idle(hctx); 361508e98fc6SMing Lei 36166cfeadbfSMing Lei if (blk_queue_init_done(q)) 3617364b6181SMing Lei blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx], 3618364b6181SMing Lei set->queue_depth, flush_rq); 3619f70ced09SMing Lei if (set->ops->exit_request) 3620364b6181SMing Lei set->ops->exit_request(set, flush_rq, hctx_idx); 3621f70ced09SMing Lei 362208e98fc6SMing Lei if (set->ops->exit_hctx) 362308e98fc6SMing Lei set->ops->exit_hctx(hctx, hctx_idx); 362408e98fc6SMing Lei 36259467f859SThomas Gleixner blk_mq_remove_cpuhp(hctx); 36262f8f1336SMing Lei 36274e5cc99eSMing Lei xa_erase(&q->hctx_table, hctx_idx); 36284e5cc99eSMing Lei 36292f8f1336SMing Lei spin_lock(&q->unused_hctx_lock); 36302f8f1336SMing Lei list_add(&hctx->hctx_list, &q->unused_hctx_list); 36312f8f1336SMing Lei spin_unlock(&q->unused_hctx_lock); 363208e98fc6SMing Lei } 363308e98fc6SMing Lei 3634624dbe47SMing Lei static void blk_mq_exit_hw_queues(struct request_queue *q, 3635624dbe47SMing Lei struct blk_mq_tag_set *set, int nr_queue) 3636624dbe47SMing Lei { 3637624dbe47SMing Lei struct blk_mq_hw_ctx *hctx; 36384f481208SMing Lei unsigned long i; 3639624dbe47SMing Lei 3640624dbe47SMing Lei queue_for_each_hw_ctx(q, hctx, i) { 3641624dbe47SMing Lei if (i == nr_queue) 3642624dbe47SMing Lei break; 364308e98fc6SMing Lei blk_mq_exit_hctx(q, set, hctx, i); 3644624dbe47SMing Lei } 3645624dbe47SMing Lei } 3646624dbe47SMing Lei 364708e98fc6SMing Lei static int blk_mq_init_hctx(struct request_queue *q, 364808e98fc6SMing Lei struct blk_mq_tag_set *set, 364908e98fc6SMing Lei struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) 3650320ae51fSJens Axboe { 36517c6c5b7cSMing Lei hctx->queue_num = hctx_idx; 3652320ae51fSJens Axboe 3653bf0beec0SMing Lei if (!(hctx->flags & BLK_MQ_F_STACKING)) 3654bf0beec0SMing Lei cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE, 3655bf0beec0SMing Lei &hctx->cpuhp_online); 36567c6c5b7cSMing Lei cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead); 36577c6c5b7cSMing Lei 36587c6c5b7cSMing Lei hctx->tags = set->tags[hctx_idx]; 36597c6c5b7cSMing Lei 36607c6c5b7cSMing Lei if (set->ops->init_hctx && 36617c6c5b7cSMing Lei set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) 36627c6c5b7cSMing Lei goto unregister_cpu_notifier; 36637c6c5b7cSMing Lei 36647c6c5b7cSMing Lei if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, 36657c6c5b7cSMing Lei hctx->numa_node)) 36667c6c5b7cSMing Lei goto exit_hctx; 36674e5cc99eSMing Lei 36684e5cc99eSMing Lei if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL)) 36694e5cc99eSMing Lei goto exit_flush_rq; 36704e5cc99eSMing Lei 36717c6c5b7cSMing Lei return 0; 36727c6c5b7cSMing Lei 36734e5cc99eSMing Lei exit_flush_rq: 36744e5cc99eSMing Lei if (set->ops->exit_request) 36754e5cc99eSMing Lei set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx); 36767c6c5b7cSMing Lei exit_hctx: 36777c6c5b7cSMing Lei if (set->ops->exit_hctx) 36787c6c5b7cSMing Lei set->ops->exit_hctx(hctx, hctx_idx); 36797c6c5b7cSMing Lei unregister_cpu_notifier: 36807c6c5b7cSMing Lei blk_mq_remove_cpuhp(hctx); 36817c6c5b7cSMing Lei return -1; 36827c6c5b7cSMing Lei } 36837c6c5b7cSMing Lei 36847c6c5b7cSMing Lei static struct blk_mq_hw_ctx * 36857c6c5b7cSMing Lei blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, 36867c6c5b7cSMing Lei int node) 36877c6c5b7cSMing Lei { 36887c6c5b7cSMing Lei struct blk_mq_hw_ctx *hctx; 36897c6c5b7cSMing Lei gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY; 36907c6c5b7cSMing Lei 3691704b914fSMing Lei hctx = kzalloc_node(sizeof(struct blk_mq_hw_ctx), gfp, node); 36927c6c5b7cSMing Lei if (!hctx) 36937c6c5b7cSMing Lei goto fail_alloc_hctx; 36947c6c5b7cSMing Lei 36957c6c5b7cSMing Lei if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node)) 36967c6c5b7cSMing Lei goto free_hctx; 36977c6c5b7cSMing Lei 36987c6c5b7cSMing Lei atomic_set(&hctx->nr_active, 0); 3699320ae51fSJens Axboe if (node == NUMA_NO_NODE) 37007c6c5b7cSMing Lei node = set->numa_node; 37017c6c5b7cSMing Lei hctx->numa_node = node; 3702320ae51fSJens Axboe 37039f993737SJens Axboe INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); 3704320ae51fSJens Axboe spin_lock_init(&hctx->lock); 3705320ae51fSJens Axboe INIT_LIST_HEAD(&hctx->dispatch); 3706320ae51fSJens Axboe hctx->queue = q; 370751db1c37SMing Lei hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED; 3708320ae51fSJens Axboe 37092f8f1336SMing Lei INIT_LIST_HEAD(&hctx->hctx_list); 37102f8f1336SMing Lei 3711320ae51fSJens Axboe /* 3712a68aafa5SJens Axboe * Allocate space for all possible cpus to avoid allocation at 3713320ae51fSJens Axboe * runtime 3714320ae51fSJens Axboe */ 3715d904bfa7SJohannes Thumshirn hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *), 37167c6c5b7cSMing Lei gfp, node); 3717320ae51fSJens Axboe if (!hctx->ctxs) 37187c6c5b7cSMing Lei goto free_cpumask; 3719320ae51fSJens Axboe 37205b202853SJianchao Wang if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), 3721c548e62bSMing Lei gfp, node, false, false)) 372208e98fc6SMing Lei goto free_ctxs; 3723320ae51fSJens Axboe hctx->nr_ctx = 0; 3724320ae51fSJens Axboe 37255815839bSMing Lei spin_lock_init(&hctx->dispatch_wait_lock); 3726eb619fdbSJens Axboe init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake); 3727eb619fdbSJens Axboe INIT_LIST_HEAD(&hctx->dispatch_wait.entry); 3728eb619fdbSJens Axboe 3729754a1572SGuoqing Jiang hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp); 3730f70ced09SMing Lei if (!hctx->fq) 37317c6c5b7cSMing Lei goto free_bitmap; 3732f70ced09SMing Lei 37337c6c5b7cSMing Lei blk_mq_hctx_kobj_init(hctx); 37346a83e74dSBart Van Assche 37357c6c5b7cSMing Lei return hctx; 373608e98fc6SMing Lei 373708e98fc6SMing Lei free_bitmap: 373888459642SOmar Sandoval sbitmap_free(&hctx->ctx_map); 373908e98fc6SMing Lei free_ctxs: 374008e98fc6SMing Lei kfree(hctx->ctxs); 37417c6c5b7cSMing Lei free_cpumask: 37427c6c5b7cSMing Lei free_cpumask_var(hctx->cpumask); 37437c6c5b7cSMing Lei free_hctx: 37447c6c5b7cSMing Lei kfree(hctx); 37457c6c5b7cSMing Lei fail_alloc_hctx: 37467c6c5b7cSMing Lei return NULL; 374708e98fc6SMing Lei } 374808e98fc6SMing Lei 3749320ae51fSJens Axboe static void blk_mq_init_cpu_queues(struct request_queue *q, 3750320ae51fSJens Axboe unsigned int nr_hw_queues) 3751320ae51fSJens Axboe { 3752b3c661b1SJens Axboe struct blk_mq_tag_set *set = q->tag_set; 3753b3c661b1SJens Axboe unsigned int i, j; 3754320ae51fSJens Axboe 3755320ae51fSJens Axboe for_each_possible_cpu(i) { 3756320ae51fSJens Axboe struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); 3757320ae51fSJens Axboe struct blk_mq_hw_ctx *hctx; 3758c16d6b5aSMing Lei int k; 3759320ae51fSJens Axboe 3760320ae51fSJens Axboe __ctx->cpu = i; 3761320ae51fSJens Axboe spin_lock_init(&__ctx->lock); 3762c16d6b5aSMing Lei for (k = HCTX_TYPE_DEFAULT; k < HCTX_MAX_TYPES; k++) 3763c16d6b5aSMing Lei INIT_LIST_HEAD(&__ctx->rq_lists[k]); 3764c16d6b5aSMing Lei 3765320ae51fSJens Axboe __ctx->queue = q; 3766320ae51fSJens Axboe 3767320ae51fSJens Axboe /* 3768320ae51fSJens Axboe * Set local node, IFF we have more than one hw queue. If 3769320ae51fSJens Axboe * not, we remain on the home node of the device 3770320ae51fSJens Axboe */ 3771b3c661b1SJens Axboe for (j = 0; j < set->nr_maps; j++) { 3772b3c661b1SJens Axboe hctx = blk_mq_map_queue_type(q, j, i); 3773320ae51fSJens Axboe if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE) 3774576e85c5SXianting Tian hctx->numa_node = cpu_to_node(i); 3775320ae51fSJens Axboe } 3776320ae51fSJens Axboe } 3777b3c661b1SJens Axboe } 3778320ae51fSJens Axboe 377963064be1SJohn Garry struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set, 378063064be1SJohn Garry unsigned int hctx_idx, 378163064be1SJohn Garry unsigned int depth) 378263064be1SJohn Garry { 378363064be1SJohn Garry struct blk_mq_tags *tags; 378463064be1SJohn Garry int ret; 378563064be1SJohn Garry 3786e155b0c2SJohn Garry tags = blk_mq_alloc_rq_map(set, hctx_idx, depth, set->reserved_tags); 378763064be1SJohn Garry if (!tags) 378863064be1SJohn Garry return NULL; 378963064be1SJohn Garry 379063064be1SJohn Garry ret = blk_mq_alloc_rqs(set, tags, hctx_idx, depth); 379163064be1SJohn Garry if (ret) { 3792e155b0c2SJohn Garry blk_mq_free_rq_map(tags); 379363064be1SJohn Garry return NULL; 379463064be1SJohn Garry } 379563064be1SJohn Garry 379663064be1SJohn Garry return tags; 379763064be1SJohn Garry } 379863064be1SJohn Garry 379963064be1SJohn Garry static bool __blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set, 380003b63b02SWeiping Zhang int hctx_idx) 3801cc71a6f4SJens Axboe { 3802079a2e3eSJohn Garry if (blk_mq_is_shared_tags(set->flags)) { 3803079a2e3eSJohn Garry set->tags[hctx_idx] = set->shared_tags; 3804cc71a6f4SJens Axboe 3805cc71a6f4SJens Axboe return true; 3806cc71a6f4SJens Axboe } 3807cc71a6f4SJens Axboe 380863064be1SJohn Garry set->tags[hctx_idx] = blk_mq_alloc_map_and_rqs(set, hctx_idx, 3809cc71a6f4SJens Axboe set->queue_depth); 3810cc71a6f4SJens Axboe 381163064be1SJohn Garry return set->tags[hctx_idx]; 3812cc71a6f4SJens Axboe } 3813cc71a6f4SJens Axboe 3814645db34eSJohn Garry void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set, 3815645db34eSJohn Garry struct blk_mq_tags *tags, 3816cc71a6f4SJens Axboe unsigned int hctx_idx) 3817cc71a6f4SJens Axboe { 3818645db34eSJohn Garry if (tags) { 3819645db34eSJohn Garry blk_mq_free_rqs(set, tags, hctx_idx); 3820e155b0c2SJohn Garry blk_mq_free_rq_map(tags); 3821cc71a6f4SJens Axboe } 3822bd166ef1SJens Axboe } 3823cc71a6f4SJens Axboe 3824e155b0c2SJohn Garry static void __blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set, 3825e155b0c2SJohn Garry unsigned int hctx_idx) 3826e155b0c2SJohn Garry { 3827079a2e3eSJohn Garry if (!blk_mq_is_shared_tags(set->flags)) 3828e155b0c2SJohn Garry blk_mq_free_map_and_rqs(set, set->tags[hctx_idx], hctx_idx); 3829e155b0c2SJohn Garry 3830e155b0c2SJohn Garry set->tags[hctx_idx] = NULL; 3831e155b0c2SJohn Garry } 3832e155b0c2SJohn Garry 38334b855ad3SChristoph Hellwig static void blk_mq_map_swqueue(struct request_queue *q) 3834320ae51fSJens Axboe { 38354f481208SMing Lei unsigned int j, hctx_idx; 38364f481208SMing Lei unsigned long i; 3837320ae51fSJens Axboe struct blk_mq_hw_ctx *hctx; 3838320ae51fSJens Axboe struct blk_mq_ctx *ctx; 38392a34c087SMing Lei struct blk_mq_tag_set *set = q->tag_set; 3840320ae51fSJens Axboe 3841320ae51fSJens Axboe queue_for_each_hw_ctx(q, hctx, i) { 3842e4043dcfSJens Axboe cpumask_clear(hctx->cpumask); 3843320ae51fSJens Axboe hctx->nr_ctx = 0; 3844d416c92cShuhai hctx->dispatch_from = NULL; 3845320ae51fSJens Axboe } 3846320ae51fSJens Axboe 3847320ae51fSJens Axboe /* 38484b855ad3SChristoph Hellwig * Map software to hardware queues. 38494412efecSMing Lei * 38504412efecSMing Lei * If the cpu isn't present, the cpu is mapped to first hctx. 3851320ae51fSJens Axboe */ 385220e4d813SChristoph Hellwig for_each_possible_cpu(i) { 3853fd689871SMing Lei 3854fd689871SMing Lei ctx = per_cpu_ptr(q->queue_ctx, i); 3855fd689871SMing Lei for (j = 0; j < set->nr_maps; j++) { 3856fd689871SMing Lei if (!set->map[j].nr_queues) { 3857fd689871SMing Lei ctx->hctxs[j] = blk_mq_map_queue_type(q, 3858fd689871SMing Lei HCTX_TYPE_DEFAULT, i); 3859fd689871SMing Lei continue; 3860fd689871SMing Lei } 3861fd689871SMing Lei hctx_idx = set->map[j].mq_map[i]; 38624412efecSMing Lei /* unmapped hw queue can be remapped after CPU topo changed */ 38634412efecSMing Lei if (!set->tags[hctx_idx] && 386463064be1SJohn Garry !__blk_mq_alloc_map_and_rqs(set, hctx_idx)) { 38654412efecSMing Lei /* 38664412efecSMing Lei * If tags initialization fail for some hctx, 38674412efecSMing Lei * that hctx won't be brought online. In this 38684412efecSMing Lei * case, remap the current ctx to hctx[0] which 38694412efecSMing Lei * is guaranteed to always have tags allocated 38704412efecSMing Lei */ 3871fd689871SMing Lei set->map[j].mq_map[i] = 0; 3872bb94aea1SJianchao Wang } 3873e5edd5f2SMing Lei 3874b3c661b1SJens Axboe hctx = blk_mq_map_queue_type(q, j, i); 38758ccdf4a3SJianchao Wang ctx->hctxs[j] = hctx; 3876b3c661b1SJens Axboe /* 3877b3c661b1SJens Axboe * If the CPU is already set in the mask, then we've 3878b3c661b1SJens Axboe * mapped this one already. This can happen if 3879b3c661b1SJens Axboe * devices share queues across queue maps. 3880b3c661b1SJens Axboe */ 3881b3c661b1SJens Axboe if (cpumask_test_cpu(i, hctx->cpumask)) 3882b3c661b1SJens Axboe continue; 3883b3c661b1SJens Axboe 3884e4043dcfSJens Axboe cpumask_set_cpu(i, hctx->cpumask); 3885b3c661b1SJens Axboe hctx->type = j; 3886f31967f0SJens Axboe ctx->index_hw[hctx->type] = hctx->nr_ctx; 3887320ae51fSJens Axboe hctx->ctxs[hctx->nr_ctx++] = ctx; 3888f31967f0SJens Axboe 3889f31967f0SJens Axboe /* 3890f31967f0SJens Axboe * If the nr_ctx type overflows, we have exceeded the 3891f31967f0SJens Axboe * amount of sw queues we can support. 3892f31967f0SJens Axboe */ 3893f31967f0SJens Axboe BUG_ON(!hctx->nr_ctx); 3894320ae51fSJens Axboe } 3895bb94aea1SJianchao Wang 3896bb94aea1SJianchao Wang for (; j < HCTX_MAX_TYPES; j++) 3897bb94aea1SJianchao Wang ctx->hctxs[j] = blk_mq_map_queue_type(q, 3898bb94aea1SJianchao Wang HCTX_TYPE_DEFAULT, i); 3899b3c661b1SJens Axboe } 3900506e931fSJens Axboe 3901506e931fSJens Axboe queue_for_each_hw_ctx(q, hctx, i) { 39024412efecSMing Lei /* 39034412efecSMing Lei * If no software queues are mapped to this hardware queue, 39044412efecSMing Lei * disable it and free the request entries. 39054412efecSMing Lei */ 39064412efecSMing Lei if (!hctx->nr_ctx) { 39074412efecSMing Lei /* Never unmap queue 0. We need it as a 39084412efecSMing Lei * fallback in case of a new remap fails 39094412efecSMing Lei * allocation 39104412efecSMing Lei */ 3911e155b0c2SJohn Garry if (i) 3912e155b0c2SJohn Garry __blk_mq_free_map_and_rqs(set, i); 39134412efecSMing Lei 39144412efecSMing Lei hctx->tags = NULL; 39154412efecSMing Lei continue; 39164412efecSMing Lei } 3917484b4061SJens Axboe 39182a34c087SMing Lei hctx->tags = set->tags[i]; 39192a34c087SMing Lei WARN_ON(!hctx->tags); 39202a34c087SMing Lei 3921484b4061SJens Axboe /* 3922889fa31fSChong Yuan * Set the map size to the number of mapped software queues. 3923889fa31fSChong Yuan * This is more accurate and more efficient than looping 3924889fa31fSChong Yuan * over all possibly mapped software queues. 3925889fa31fSChong Yuan */ 392688459642SOmar Sandoval sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx); 3927889fa31fSChong Yuan 3928889fa31fSChong Yuan /* 3929484b4061SJens Axboe * Initialize batch roundrobin counts 3930484b4061SJens Axboe */ 3931f82ddf19SMing Lei hctx->next_cpu = blk_mq_first_mapped_cpu(hctx); 3932506e931fSJens Axboe hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; 3933506e931fSJens Axboe } 3934320ae51fSJens Axboe } 3935320ae51fSJens Axboe 39368e8320c9SJens Axboe /* 39378e8320c9SJens Axboe * Caller needs to ensure that we're either frozen/quiesced, or that 39388e8320c9SJens Axboe * the queue isn't live yet. 39398e8320c9SJens Axboe */ 39402404e607SJeff Moyer static void queue_set_hctx_shared(struct request_queue *q, bool shared) 39410d2602caSJens Axboe { 39420d2602caSJens Axboe struct blk_mq_hw_ctx *hctx; 39434f481208SMing Lei unsigned long i; 39440d2602caSJens Axboe 39450d2602caSJens Axboe queue_for_each_hw_ctx(q, hctx, i) { 3946454bb677SYu Kuai if (shared) { 394751db1c37SMing Lei hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; 3948454bb677SYu Kuai } else { 3949454bb677SYu Kuai blk_mq_tag_idle(hctx); 395051db1c37SMing Lei hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; 39510d2602caSJens Axboe } 39522404e607SJeff Moyer } 3953454bb677SYu Kuai } 39542404e607SJeff Moyer 3955655ac300SHannes Reinecke static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set, 39568e8320c9SJens Axboe bool shared) 39572404e607SJeff Moyer { 39582404e607SJeff Moyer struct request_queue *q; 39592404e607SJeff Moyer 3960705cda97SBart Van Assche lockdep_assert_held(&set->tag_list_lock); 3961705cda97SBart Van Assche 39622404e607SJeff Moyer list_for_each_entry(q, &set->tag_list, tag_set_list) { 39632404e607SJeff Moyer blk_mq_freeze_queue(q); 39642404e607SJeff Moyer queue_set_hctx_shared(q, shared); 39650d2602caSJens Axboe blk_mq_unfreeze_queue(q); 39660d2602caSJens Axboe } 39670d2602caSJens Axboe } 39680d2602caSJens Axboe 39690d2602caSJens Axboe static void blk_mq_del_queue_tag_set(struct request_queue *q) 39700d2602caSJens Axboe { 39710d2602caSJens Axboe struct blk_mq_tag_set *set = q->tag_set; 39720d2602caSJens Axboe 39730d2602caSJens Axboe mutex_lock(&set->tag_list_lock); 397408c875cbSDaniel Wagner list_del(&q->tag_set_list); 39752404e607SJeff Moyer if (list_is_singular(&set->tag_list)) { 39762404e607SJeff Moyer /* just transitioned to unshared */ 397751db1c37SMing Lei set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; 39782404e607SJeff Moyer /* update existing queue */ 3979655ac300SHannes Reinecke blk_mq_update_tag_set_shared(set, false); 39802404e607SJeff Moyer } 39810d2602caSJens Axboe mutex_unlock(&set->tag_list_lock); 3982a347c7adSRoman Pen INIT_LIST_HEAD(&q->tag_set_list); 39830d2602caSJens Axboe } 39840d2602caSJens Axboe 39850d2602caSJens Axboe static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, 39860d2602caSJens Axboe struct request_queue *q) 39870d2602caSJens Axboe { 39880d2602caSJens Axboe mutex_lock(&set->tag_list_lock); 39892404e607SJeff Moyer 3990ff821d27SJens Axboe /* 3991ff821d27SJens Axboe * Check to see if we're transitioning to shared (from 1 to 2 queues). 3992ff821d27SJens Axboe */ 3993ff821d27SJens Axboe if (!list_empty(&set->tag_list) && 399451db1c37SMing Lei !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { 399551db1c37SMing Lei set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; 39962404e607SJeff Moyer /* update existing queue */ 3997655ac300SHannes Reinecke blk_mq_update_tag_set_shared(set, true); 39982404e607SJeff Moyer } 399951db1c37SMing Lei if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED) 40002404e607SJeff Moyer queue_set_hctx_shared(q, true); 400108c875cbSDaniel Wagner list_add_tail(&q->tag_set_list, &set->tag_list); 40022404e607SJeff Moyer 40030d2602caSJens Axboe mutex_unlock(&set->tag_list_lock); 40040d2602caSJens Axboe } 40050d2602caSJens Axboe 40061db4909eSMing Lei /* All allocations will be freed in release handler of q->mq_kobj */ 40071db4909eSMing Lei static int blk_mq_alloc_ctxs(struct request_queue *q) 40081db4909eSMing Lei { 40091db4909eSMing Lei struct blk_mq_ctxs *ctxs; 40101db4909eSMing Lei int cpu; 40111db4909eSMing Lei 40121db4909eSMing Lei ctxs = kzalloc(sizeof(*ctxs), GFP_KERNEL); 40131db4909eSMing Lei if (!ctxs) 40141db4909eSMing Lei return -ENOMEM; 40151db4909eSMing Lei 40161db4909eSMing Lei ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx); 40171db4909eSMing Lei if (!ctxs->queue_ctx) 40181db4909eSMing Lei goto fail; 40191db4909eSMing Lei 40201db4909eSMing Lei for_each_possible_cpu(cpu) { 40211db4909eSMing Lei struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu); 40221db4909eSMing Lei ctx->ctxs = ctxs; 40231db4909eSMing Lei } 40241db4909eSMing Lei 40251db4909eSMing Lei q->mq_kobj = &ctxs->kobj; 40261db4909eSMing Lei q->queue_ctx = ctxs->queue_ctx; 40271db4909eSMing Lei 40281db4909eSMing Lei return 0; 40291db4909eSMing Lei fail: 40301db4909eSMing Lei kfree(ctxs); 40311db4909eSMing Lei return -ENOMEM; 40321db4909eSMing Lei } 40331db4909eSMing Lei 4034e09aae7eSMing Lei /* 4035e09aae7eSMing Lei * It is the actual release handler for mq, but we do it from 4036e09aae7eSMing Lei * request queue's release handler for avoiding use-after-free 4037e09aae7eSMing Lei * and headache because q->mq_kobj shouldn't have been introduced, 4038e09aae7eSMing Lei * but we can't group ctx/kctx kobj without it. 4039e09aae7eSMing Lei */ 4040e09aae7eSMing Lei void blk_mq_release(struct request_queue *q) 4041e09aae7eSMing Lei { 40422f8f1336SMing Lei struct blk_mq_hw_ctx *hctx, *next; 40434f481208SMing Lei unsigned long i; 4044e09aae7eSMing Lei 40452f8f1336SMing Lei queue_for_each_hw_ctx(q, hctx, i) 40462f8f1336SMing Lei WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list)); 40472f8f1336SMing Lei 40482f8f1336SMing Lei /* all hctx are in .unused_hctx_list now */ 40492f8f1336SMing Lei list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) { 40502f8f1336SMing Lei list_del_init(&hctx->hctx_list); 40516c8b232eSMing Lei kobject_put(&hctx->kobj); 4052c3b4afcaSMing Lei } 4053e09aae7eSMing Lei 40544e5cc99eSMing Lei xa_destroy(&q->hctx_table); 4055e09aae7eSMing Lei 40567ea5fe31SMing Lei /* 40577ea5fe31SMing Lei * release .mq_kobj and sw queue's kobject now because 40587ea5fe31SMing Lei * both share lifetime with request queue. 40597ea5fe31SMing Lei */ 40607ea5fe31SMing Lei blk_mq_sysfs_deinit(q); 4061e09aae7eSMing Lei } 4062e09aae7eSMing Lei 40635ec780a6SChristoph Hellwig static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, 40642f227bb9SChristoph Hellwig void *queuedata) 4065320ae51fSJens Axboe { 406626a9750aSChristoph Hellwig struct request_queue *q; 406726a9750aSChristoph Hellwig int ret; 4068b62c21b7SMike Snitzer 406980bd4a7aSChristoph Hellwig q = blk_alloc_queue(set->numa_node); 407026a9750aSChristoph Hellwig if (!q) 4071b62c21b7SMike Snitzer return ERR_PTR(-ENOMEM); 407226a9750aSChristoph Hellwig q->queuedata = queuedata; 407326a9750aSChristoph Hellwig ret = blk_mq_init_allocated_queue(set, q); 407426a9750aSChristoph Hellwig if (ret) { 40756f8191fdSChristoph Hellwig blk_put_queue(q); 407626a9750aSChristoph Hellwig return ERR_PTR(ret); 407726a9750aSChristoph Hellwig } 4078b62c21b7SMike Snitzer return q; 4079b62c21b7SMike Snitzer } 40802f227bb9SChristoph Hellwig 40812f227bb9SChristoph Hellwig struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) 40822f227bb9SChristoph Hellwig { 40832f227bb9SChristoph Hellwig return blk_mq_init_queue_data(set, NULL); 40842f227bb9SChristoph Hellwig } 4085b62c21b7SMike Snitzer EXPORT_SYMBOL(blk_mq_init_queue); 4086b62c21b7SMike Snitzer 40876f8191fdSChristoph Hellwig /** 40886f8191fdSChristoph Hellwig * blk_mq_destroy_queue - shutdown a request queue 40896f8191fdSChristoph Hellwig * @q: request queue to shutdown 40906f8191fdSChristoph Hellwig * 409181ea42b9SBart Van Assche * This shuts down a request queue allocated by blk_mq_init_queue(). All future 409281ea42b9SBart Van Assche * requests will be failed with -ENODEV. The caller is responsible for dropping 409381ea42b9SBart Van Assche * the reference from blk_mq_init_queue() by calling blk_put_queue(). 40946f8191fdSChristoph Hellwig * 40956f8191fdSChristoph Hellwig * Context: can sleep 40966f8191fdSChristoph Hellwig */ 40976f8191fdSChristoph Hellwig void blk_mq_destroy_queue(struct request_queue *q) 40986f8191fdSChristoph Hellwig { 40996f8191fdSChristoph Hellwig WARN_ON_ONCE(!queue_is_mq(q)); 41006f8191fdSChristoph Hellwig WARN_ON_ONCE(blk_queue_registered(q)); 41016f8191fdSChristoph Hellwig 41026f8191fdSChristoph Hellwig might_sleep(); 41036f8191fdSChristoph Hellwig 41046f8191fdSChristoph Hellwig blk_queue_flag_set(QUEUE_FLAG_DYING, q); 41056f8191fdSChristoph Hellwig blk_queue_start_drain(q); 410656c1ee92SJinlong Chen blk_mq_freeze_queue_wait(q); 41076f8191fdSChristoph Hellwig 41086f8191fdSChristoph Hellwig blk_sync_queue(q); 41096f8191fdSChristoph Hellwig blk_mq_cancel_work_sync(q); 41106f8191fdSChristoph Hellwig blk_mq_exit_queue(q); 41116f8191fdSChristoph Hellwig } 41126f8191fdSChristoph Hellwig EXPORT_SYMBOL(blk_mq_destroy_queue); 41136f8191fdSChristoph Hellwig 41144dcc4874SChristoph Hellwig struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, 41154dcc4874SChristoph Hellwig struct lock_class_key *lkclass) 41169316a9edSJens Axboe { 41179316a9edSJens Axboe struct request_queue *q; 4118b461dfc4SChristoph Hellwig struct gendisk *disk; 41199316a9edSJens Axboe 4120b461dfc4SChristoph Hellwig q = blk_mq_init_queue_data(set, queuedata); 4121b461dfc4SChristoph Hellwig if (IS_ERR(q)) 4122b461dfc4SChristoph Hellwig return ERR_CAST(q); 41239316a9edSJens Axboe 41244a1fa41dSChristoph Hellwig disk = __alloc_disk_node(q, set->numa_node, lkclass); 4125b461dfc4SChristoph Hellwig if (!disk) { 41260a3e5cc7SChristoph Hellwig blk_mq_destroy_queue(q); 41272b3f056fSChristoph Hellwig blk_put_queue(q); 4128b461dfc4SChristoph Hellwig return ERR_PTR(-ENOMEM); 41299316a9edSJens Axboe } 41306f8191fdSChristoph Hellwig set_bit(GD_OWNS_QUEUE, &disk->state); 4131b461dfc4SChristoph Hellwig return disk; 41329316a9edSJens Axboe } 4133b461dfc4SChristoph Hellwig EXPORT_SYMBOL(__blk_mq_alloc_disk); 41349316a9edSJens Axboe 41356f8191fdSChristoph Hellwig struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, 41366f8191fdSChristoph Hellwig struct lock_class_key *lkclass) 41376f8191fdSChristoph Hellwig { 413822c17e27SChristoph Hellwig struct gendisk *disk; 413922c17e27SChristoph Hellwig 41406f8191fdSChristoph Hellwig if (!blk_get_queue(q)) 41416f8191fdSChristoph Hellwig return NULL; 414222c17e27SChristoph Hellwig disk = __alloc_disk_node(q, NUMA_NO_NODE, lkclass); 414322c17e27SChristoph Hellwig if (!disk) 414422c17e27SChristoph Hellwig blk_put_queue(q); 414522c17e27SChristoph Hellwig return disk; 41466f8191fdSChristoph Hellwig } 41476f8191fdSChristoph Hellwig EXPORT_SYMBOL(blk_mq_alloc_disk_for_queue); 41486f8191fdSChristoph Hellwig 414934d11ffaSJianchao Wang static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( 415034d11ffaSJianchao Wang struct blk_mq_tag_set *set, struct request_queue *q, 415134d11ffaSJianchao Wang int hctx_idx, int node) 415234d11ffaSJianchao Wang { 41532f8f1336SMing Lei struct blk_mq_hw_ctx *hctx = NULL, *tmp; 415434d11ffaSJianchao Wang 41552f8f1336SMing Lei /* reuse dead hctx first */ 41562f8f1336SMing Lei spin_lock(&q->unused_hctx_lock); 41572f8f1336SMing Lei list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) { 41582f8f1336SMing Lei if (tmp->numa_node == node) { 41592f8f1336SMing Lei hctx = tmp; 41602f8f1336SMing Lei break; 41612f8f1336SMing Lei } 41622f8f1336SMing Lei } 41632f8f1336SMing Lei if (hctx) 41642f8f1336SMing Lei list_del_init(&hctx->hctx_list); 41652f8f1336SMing Lei spin_unlock(&q->unused_hctx_lock); 41662f8f1336SMing Lei 41672f8f1336SMing Lei if (!hctx) 41687c6c5b7cSMing Lei hctx = blk_mq_alloc_hctx(q, set, node); 416934d11ffaSJianchao Wang if (!hctx) 41707c6c5b7cSMing Lei goto fail; 417134d11ffaSJianchao Wang 41727c6c5b7cSMing Lei if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) 41737c6c5b7cSMing Lei goto free_hctx; 417434d11ffaSJianchao Wang 417534d11ffaSJianchao Wang return hctx; 41767c6c5b7cSMing Lei 41777c6c5b7cSMing Lei free_hctx: 41787c6c5b7cSMing Lei kobject_put(&hctx->kobj); 41797c6c5b7cSMing Lei fail: 41807c6c5b7cSMing Lei return NULL; 418134d11ffaSJianchao Wang } 418234d11ffaSJianchao Wang 4183868f2f0bSKeith Busch static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, 4184b62c21b7SMike Snitzer struct request_queue *q) 4185b62c21b7SMike Snitzer { 41864e5cc99eSMing Lei struct blk_mq_hw_ctx *hctx; 41874e5cc99eSMing Lei unsigned long i, j; 4188ac0d6b92SBart Van Assche 4189fb350e0aSMing Lei /* protect against switching io scheduler */ 4190fb350e0aSMing Lei mutex_lock(&q->sysfs_lock); 419124d2f903SChristoph Hellwig for (i = 0; i < set->nr_hw_queues; i++) { 4192306f13eeSMing Lei int old_node; 41934d805131SMing Lei int node = blk_mq_get_hctx_node(set, i); 41944e5cc99eSMing Lei struct blk_mq_hw_ctx *old_hctx = xa_load(&q->hctx_table, i); 4195868f2f0bSKeith Busch 4196306f13eeSMing Lei if (old_hctx) { 4197306f13eeSMing Lei old_node = old_hctx->numa_node; 4198306f13eeSMing Lei blk_mq_exit_hctx(q, set, old_hctx, i); 4199306f13eeSMing Lei } 4200320ae51fSJens Axboe 42014e5cc99eSMing Lei if (!blk_mq_alloc_and_init_hctx(set, q, i, node)) { 4202306f13eeSMing Lei if (!old_hctx) 4203868f2f0bSKeith Busch break; 4204306f13eeSMing Lei pr_warn("Allocate new hctx on node %d fails, fallback to previous one on node %d\n", 4205306f13eeSMing Lei node, old_node); 42064e5cc99eSMing Lei hctx = blk_mq_alloc_and_init_hctx(set, q, i, old_node); 42074e5cc99eSMing Lei WARN_ON_ONCE(!hctx); 4208868f2f0bSKeith Busch } 4209320ae51fSJens Axboe } 4210e01ad46dSJianchao Wang /* 4211e01ad46dSJianchao Wang * Increasing nr_hw_queues fails. Free the newly allocated 4212e01ad46dSJianchao Wang * hctxs and keep the previous q->nr_hw_queues. 4213e01ad46dSJianchao Wang */ 4214e01ad46dSJianchao Wang if (i != set->nr_hw_queues) { 4215e01ad46dSJianchao Wang j = q->nr_hw_queues; 4216e01ad46dSJianchao Wang } else { 4217e01ad46dSJianchao Wang j = i; 4218e01ad46dSJianchao Wang q->nr_hw_queues = set->nr_hw_queues; 4219e01ad46dSJianchao Wang } 422034d11ffaSJianchao Wang 42214e5cc99eSMing Lei xa_for_each_start(&q->hctx_table, j, hctx, j) 4222868f2f0bSKeith Busch blk_mq_exit_hctx(q, set, hctx, j); 4223fb350e0aSMing Lei mutex_unlock(&q->sysfs_lock); 4224868f2f0bSKeith Busch } 4225868f2f0bSKeith Busch 422642ee3061SMing Lei static void blk_mq_update_poll_flag(struct request_queue *q) 422742ee3061SMing Lei { 422842ee3061SMing Lei struct blk_mq_tag_set *set = q->tag_set; 422942ee3061SMing Lei 423042ee3061SMing Lei if (set->nr_maps > HCTX_TYPE_POLL && 423142ee3061SMing Lei set->map[HCTX_TYPE_POLL].nr_queues) 423242ee3061SMing Lei blk_queue_flag_set(QUEUE_FLAG_POLL, q); 423342ee3061SMing Lei else 423442ee3061SMing Lei blk_queue_flag_clear(QUEUE_FLAG_POLL, q); 423542ee3061SMing Lei } 423642ee3061SMing Lei 423726a9750aSChristoph Hellwig int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, 423826a9750aSChristoph Hellwig struct request_queue *q) 4239868f2f0bSKeith Busch { 424066841672SMing Lei /* mark the queue as mq asap */ 424166841672SMing Lei q->mq_ops = set->ops; 424266841672SMing Lei 42431db4909eSMing Lei if (blk_mq_alloc_ctxs(q)) 424454bdd67dSKeith Busch goto err_exit; 4245868f2f0bSKeith Busch 4246737f98cfSMing Lei /* init q->mq_kobj and sw queues' kobjects */ 4247737f98cfSMing Lei blk_mq_sysfs_init(q); 4248737f98cfSMing Lei 42492f8f1336SMing Lei INIT_LIST_HEAD(&q->unused_hctx_list); 42502f8f1336SMing Lei spin_lock_init(&q->unused_hctx_lock); 42512f8f1336SMing Lei 42524e5cc99eSMing Lei xa_init(&q->hctx_table); 42534e5cc99eSMing Lei 4254868f2f0bSKeith Busch blk_mq_realloc_hw_ctxs(set, q); 4255868f2f0bSKeith Busch if (!q->nr_hw_queues) 4256868f2f0bSKeith Busch goto err_hctxs; 4257320ae51fSJens Axboe 4258287922ebSChristoph Hellwig INIT_WORK(&q->timeout_work, blk_mq_timeout_work); 4259e56f698bSMing Lei blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ); 4260320ae51fSJens Axboe 4261a8908939SJens Axboe q->tag_set = set; 4262320ae51fSJens Axboe 426394eddfbeSJens Axboe q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; 426442ee3061SMing Lei blk_mq_update_poll_flag(q); 4265320ae51fSJens Axboe 42662849450aSMike Snitzer INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work); 42679a67aa52SChristoph Hellwig INIT_LIST_HEAD(&q->flush_list); 42686fca6a61SChristoph Hellwig INIT_LIST_HEAD(&q->requeue_list); 42696fca6a61SChristoph Hellwig spin_lock_init(&q->requeue_lock); 42706fca6a61SChristoph Hellwig 4271eba71768SJens Axboe q->nr_requests = set->queue_depth; 4272eba71768SJens Axboe 427324d2f903SChristoph Hellwig blk_mq_init_cpu_queues(q, set->nr_hw_queues); 42740d2602caSJens Axboe blk_mq_add_queue_tag_set(set, q); 42754b855ad3SChristoph Hellwig blk_mq_map_swqueue(q); 427626a9750aSChristoph Hellwig return 0; 427718741986SChristoph Hellwig 4278320ae51fSJens Axboe err_hctxs: 4279943f45b9SChen Jun blk_mq_release(q); 4280c7de5726SMing Lin err_exit: 4281c7de5726SMing Lin q->mq_ops = NULL; 428226a9750aSChristoph Hellwig return -ENOMEM; 4283320ae51fSJens Axboe } 4284b62c21b7SMike Snitzer EXPORT_SYMBOL(blk_mq_init_allocated_queue); 4285320ae51fSJens Axboe 4286c7e2d94bSMing Lei /* tags can _not_ be used after returning from blk_mq_exit_queue */ 4287c7e2d94bSMing Lei void blk_mq_exit_queue(struct request_queue *q) 4288320ae51fSJens Axboe { 4289624dbe47SMing Lei struct blk_mq_tag_set *set = q->tag_set; 4290320ae51fSJens Axboe 4291630ef623SBart Van Assche /* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */ 4292624dbe47SMing Lei blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); 4293630ef623SBart Van Assche /* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */ 4294630ef623SBart Van Assche blk_mq_del_queue_tag_set(q); 4295320ae51fSJens Axboe } 4296320ae51fSJens Axboe 4297a5164405SJens Axboe static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) 4298a5164405SJens Axboe { 4299a5164405SJens Axboe int i; 4300a5164405SJens Axboe 4301079a2e3eSJohn Garry if (blk_mq_is_shared_tags(set->flags)) { 4302079a2e3eSJohn Garry set->shared_tags = blk_mq_alloc_map_and_rqs(set, 4303e155b0c2SJohn Garry BLK_MQ_NO_HCTX_IDX, 4304e155b0c2SJohn Garry set->queue_depth); 4305079a2e3eSJohn Garry if (!set->shared_tags) 4306e155b0c2SJohn Garry return -ENOMEM; 4307e155b0c2SJohn Garry } 4308e155b0c2SJohn Garry 43098229cca8SXianting Tian for (i = 0; i < set->nr_hw_queues; i++) { 431063064be1SJohn Garry if (!__blk_mq_alloc_map_and_rqs(set, i)) 4311a5164405SJens Axboe goto out_unwind; 43128229cca8SXianting Tian cond_resched(); 43138229cca8SXianting Tian } 4314a5164405SJens Axboe 4315a5164405SJens Axboe return 0; 4316a5164405SJens Axboe 4317a5164405SJens Axboe out_unwind: 4318a5164405SJens Axboe while (--i >= 0) 4319e155b0c2SJohn Garry __blk_mq_free_map_and_rqs(set, i); 4320e155b0c2SJohn Garry 4321079a2e3eSJohn Garry if (blk_mq_is_shared_tags(set->flags)) { 4322079a2e3eSJohn Garry blk_mq_free_map_and_rqs(set, set->shared_tags, 4323e155b0c2SJohn Garry BLK_MQ_NO_HCTX_IDX); 4324645db34eSJohn Garry } 4325a5164405SJens Axboe 4326a5164405SJens Axboe return -ENOMEM; 4327a5164405SJens Axboe } 4328a5164405SJens Axboe 4329a5164405SJens Axboe /* 4330a5164405SJens Axboe * Allocate the request maps associated with this tag_set. Note that this 4331a5164405SJens Axboe * may reduce the depth asked for, if memory is tight. set->queue_depth 4332a5164405SJens Axboe * will be updated to reflect the allocated depth. 4333a5164405SJens Axboe */ 433463064be1SJohn Garry static int blk_mq_alloc_set_map_and_rqs(struct blk_mq_tag_set *set) 4335a5164405SJens Axboe { 4336a5164405SJens Axboe unsigned int depth; 4337a5164405SJens Axboe int err; 4338a5164405SJens Axboe 4339a5164405SJens Axboe depth = set->queue_depth; 4340a5164405SJens Axboe do { 4341a5164405SJens Axboe err = __blk_mq_alloc_rq_maps(set); 4342a5164405SJens Axboe if (!err) 4343a5164405SJens Axboe break; 4344a5164405SJens Axboe 4345a5164405SJens Axboe set->queue_depth >>= 1; 4346a5164405SJens Axboe if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) { 4347a5164405SJens Axboe err = -ENOMEM; 4348a5164405SJens Axboe break; 4349a5164405SJens Axboe } 4350a5164405SJens Axboe } while (set->queue_depth); 4351a5164405SJens Axboe 4352a5164405SJens Axboe if (!set->queue_depth || err) { 4353a5164405SJens Axboe pr_err("blk-mq: failed to allocate request map\n"); 4354a5164405SJens Axboe return -ENOMEM; 4355a5164405SJens Axboe } 4356a5164405SJens Axboe 4357a5164405SJens Axboe if (depth != set->queue_depth) 4358a5164405SJens Axboe pr_info("blk-mq: reduced tag depth (%u -> %u)\n", 4359a5164405SJens Axboe depth, set->queue_depth); 4360a5164405SJens Axboe 4361a5164405SJens Axboe return 0; 4362a5164405SJens Axboe } 4363a5164405SJens Axboe 4364a4e1d0b7SBart Van Assche static void blk_mq_update_queue_map(struct blk_mq_tag_set *set) 4365ebe8bddbSOmar Sandoval { 43666e66b493SBart Van Assche /* 43676e66b493SBart Van Assche * blk_mq_map_queues() and multiple .map_queues() implementations 43686e66b493SBart Van Assche * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the 43696e66b493SBart Van Assche * number of hardware queues. 43706e66b493SBart Van Assche */ 43716e66b493SBart Van Assche if (set->nr_maps == 1) 43726e66b493SBart Van Assche set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues; 43736e66b493SBart Van Assche 437459388702SMing Lei if (set->ops->map_queues && !is_kdump_kernel()) { 4375b3c661b1SJens Axboe int i; 4376b3c661b1SJens Axboe 43777d4901a9SMing Lei /* 43787d4901a9SMing Lei * transport .map_queues is usually done in the following 43797d4901a9SMing Lei * way: 43807d4901a9SMing Lei * 43817d4901a9SMing Lei * for (queue = 0; queue < set->nr_hw_queues; queue++) { 43827d4901a9SMing Lei * mask = get_cpu_mask(queue) 43837d4901a9SMing Lei * for_each_cpu(cpu, mask) 4384b3c661b1SJens Axboe * set->map[x].mq_map[cpu] = queue; 43857d4901a9SMing Lei * } 43867d4901a9SMing Lei * 43877d4901a9SMing Lei * When we need to remap, the table has to be cleared for 43887d4901a9SMing Lei * killing stale mapping since one CPU may not be mapped 43897d4901a9SMing Lei * to any hw queue. 43907d4901a9SMing Lei */ 4391b3c661b1SJens Axboe for (i = 0; i < set->nr_maps; i++) 4392b3c661b1SJens Axboe blk_mq_clear_mq_map(&set->map[i]); 43937d4901a9SMing Lei 4394a4e1d0b7SBart Van Assche set->ops->map_queues(set); 4395b3c661b1SJens Axboe } else { 4396b3c661b1SJens Axboe BUG_ON(set->nr_maps > 1); 4397a4e1d0b7SBart Van Assche blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); 4398ebe8bddbSOmar Sandoval } 4399b3c661b1SJens Axboe } 4400ebe8bddbSOmar Sandoval 4401f7e76dbcSBart Van Assche static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, 4402ee9d5521SChristoph Hellwig int new_nr_hw_queues) 4403f7e76dbcSBart Van Assche { 4404f7e76dbcSBart Van Assche struct blk_mq_tags **new_tags; 4405f7e76dbcSBart Van Assche 4406ee9d5521SChristoph Hellwig if (set->nr_hw_queues >= new_nr_hw_queues) 4407d4b2e0d4SShin'ichiro Kawasaki goto done; 4408f7e76dbcSBart Van Assche 4409f7e76dbcSBart Van Assche new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *), 4410f7e76dbcSBart Van Assche GFP_KERNEL, set->numa_node); 4411f7e76dbcSBart Van Assche if (!new_tags) 4412f7e76dbcSBart Van Assche return -ENOMEM; 4413f7e76dbcSBart Van Assche 4414f7e76dbcSBart Van Assche if (set->tags) 4415ee9d5521SChristoph Hellwig memcpy(new_tags, set->tags, set->nr_hw_queues * 4416f7e76dbcSBart Van Assche sizeof(*set->tags)); 4417f7e76dbcSBart Van Assche kfree(set->tags); 4418f7e76dbcSBart Van Assche set->tags = new_tags; 4419d4b2e0d4SShin'ichiro Kawasaki done: 4420f7e76dbcSBart Van Assche set->nr_hw_queues = new_nr_hw_queues; 4421f7e76dbcSBart Van Assche return 0; 4422f7e76dbcSBart Van Assche } 4423f7e76dbcSBart Van Assche 4424a4391c64SJens Axboe /* 4425a4391c64SJens Axboe * Alloc a tag set to be associated with one or more request queues. 4426a4391c64SJens Axboe * May fail with EINVAL for various error conditions. May adjust the 4427c018c84fSMinwoo Im * requested depth down, if it's too large. In that case, the set 4428a4391c64SJens Axboe * value will be stored in set->queue_depth. 4429a4391c64SJens Axboe */ 443024d2f903SChristoph Hellwig int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) 443124d2f903SChristoph Hellwig { 4432b3c661b1SJens Axboe int i, ret; 4433da695ba2SChristoph Hellwig 4434205fb5f5SBart Van Assche BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS); 4435205fb5f5SBart Van Assche 443624d2f903SChristoph Hellwig if (!set->nr_hw_queues) 443724d2f903SChristoph Hellwig return -EINVAL; 4438a4391c64SJens Axboe if (!set->queue_depth) 443924d2f903SChristoph Hellwig return -EINVAL; 444024d2f903SChristoph Hellwig if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) 444124d2f903SChristoph Hellwig return -EINVAL; 444224d2f903SChristoph Hellwig 44437d7e0f90SChristoph Hellwig if (!set->ops->queue_rq) 444424d2f903SChristoph Hellwig return -EINVAL; 444524d2f903SChristoph Hellwig 4446de148297SMing Lei if (!set->ops->get_budget ^ !set->ops->put_budget) 4447de148297SMing Lei return -EINVAL; 4448de148297SMing Lei 4449a4391c64SJens Axboe if (set->queue_depth > BLK_MQ_MAX_DEPTH) { 4450a4391c64SJens Axboe pr_info("blk-mq: reduced tag depth to %u\n", 4451a4391c64SJens Axboe BLK_MQ_MAX_DEPTH); 4452a4391c64SJens Axboe set->queue_depth = BLK_MQ_MAX_DEPTH; 4453a4391c64SJens Axboe } 445424d2f903SChristoph Hellwig 4455b3c661b1SJens Axboe if (!set->nr_maps) 4456b3c661b1SJens Axboe set->nr_maps = 1; 4457b3c661b1SJens Axboe else if (set->nr_maps > HCTX_MAX_TYPES) 4458b3c661b1SJens Axboe return -EINVAL; 4459b3c661b1SJens Axboe 44606637fadfSShaohua Li /* 44616637fadfSShaohua Li * If a crashdump is active, then we are potentially in a very 44626637fadfSShaohua Li * memory constrained environment. Limit us to 1 queue and 44636637fadfSShaohua Li * 64 tags to prevent using too much memory. 44646637fadfSShaohua Li */ 44656637fadfSShaohua Li if (is_kdump_kernel()) { 44666637fadfSShaohua Li set->nr_hw_queues = 1; 446759388702SMing Lei set->nr_maps = 1; 44686637fadfSShaohua Li set->queue_depth = min(64U, set->queue_depth); 44696637fadfSShaohua Li } 4470868f2f0bSKeith Busch /* 4471392546aeSJens Axboe * There is no use for more h/w queues than cpus if we just have 4472392546aeSJens Axboe * a single map 4473868f2f0bSKeith Busch */ 4474392546aeSJens Axboe if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids) 4475868f2f0bSKeith Busch set->nr_hw_queues = nr_cpu_ids; 44766637fadfSShaohua Li 447780bd4a7aSChristoph Hellwig if (set->flags & BLK_MQ_F_BLOCKING) { 447880bd4a7aSChristoph Hellwig set->srcu = kmalloc(sizeof(*set->srcu), GFP_KERNEL); 447980bd4a7aSChristoph Hellwig if (!set->srcu) 4480a5164405SJens Axboe return -ENOMEM; 448180bd4a7aSChristoph Hellwig ret = init_srcu_struct(set->srcu); 448280bd4a7aSChristoph Hellwig if (ret) 448380bd4a7aSChristoph Hellwig goto out_free_srcu; 448480bd4a7aSChristoph Hellwig } 448524d2f903SChristoph Hellwig 4486da695ba2SChristoph Hellwig ret = -ENOMEM; 44875ee20298SChristoph Hellwig set->tags = kcalloc_node(set->nr_hw_queues, 44885ee20298SChristoph Hellwig sizeof(struct blk_mq_tags *), GFP_KERNEL, 44895ee20298SChristoph Hellwig set->numa_node); 44905ee20298SChristoph Hellwig if (!set->tags) 449180bd4a7aSChristoph Hellwig goto out_cleanup_srcu; 449224d2f903SChristoph Hellwig 4493b3c661b1SJens Axboe for (i = 0; i < set->nr_maps; i++) { 4494b3c661b1SJens Axboe set->map[i].mq_map = kcalloc_node(nr_cpu_ids, 449507b35eb5SMing Lei sizeof(set->map[i].mq_map[0]), 4496da695ba2SChristoph Hellwig GFP_KERNEL, set->numa_node); 4497b3c661b1SJens Axboe if (!set->map[i].mq_map) 4498b3c661b1SJens Axboe goto out_free_mq_map; 449959388702SMing Lei set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues; 4500b3c661b1SJens Axboe } 4501bdd17e75SChristoph Hellwig 4502a4e1d0b7SBart Van Assche blk_mq_update_queue_map(set); 4503da695ba2SChristoph Hellwig 450463064be1SJohn Garry ret = blk_mq_alloc_set_map_and_rqs(set); 4505da695ba2SChristoph Hellwig if (ret) 4506bdd17e75SChristoph Hellwig goto out_free_mq_map; 450724d2f903SChristoph Hellwig 45080d2602caSJens Axboe mutex_init(&set->tag_list_lock); 45090d2602caSJens Axboe INIT_LIST_HEAD(&set->tag_list); 45100d2602caSJens Axboe 451124d2f903SChristoph Hellwig return 0; 4512bdd17e75SChristoph Hellwig 4513bdd17e75SChristoph Hellwig out_free_mq_map: 4514b3c661b1SJens Axboe for (i = 0; i < set->nr_maps; i++) { 4515b3c661b1SJens Axboe kfree(set->map[i].mq_map); 4516b3c661b1SJens Axboe set->map[i].mq_map = NULL; 4517b3c661b1SJens Axboe } 45185676e7b6SRobert Elliott kfree(set->tags); 45195676e7b6SRobert Elliott set->tags = NULL; 452080bd4a7aSChristoph Hellwig out_cleanup_srcu: 452180bd4a7aSChristoph Hellwig if (set->flags & BLK_MQ_F_BLOCKING) 452280bd4a7aSChristoph Hellwig cleanup_srcu_struct(set->srcu); 452380bd4a7aSChristoph Hellwig out_free_srcu: 452480bd4a7aSChristoph Hellwig if (set->flags & BLK_MQ_F_BLOCKING) 452580bd4a7aSChristoph Hellwig kfree(set->srcu); 4526da695ba2SChristoph Hellwig return ret; 452724d2f903SChristoph Hellwig } 452824d2f903SChristoph Hellwig EXPORT_SYMBOL(blk_mq_alloc_tag_set); 452924d2f903SChristoph Hellwig 4530cdb14e0fSChristoph Hellwig /* allocate and initialize a tagset for a simple single-queue device */ 4531cdb14e0fSChristoph Hellwig int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set, 4532cdb14e0fSChristoph Hellwig const struct blk_mq_ops *ops, unsigned int queue_depth, 4533cdb14e0fSChristoph Hellwig unsigned int set_flags) 4534cdb14e0fSChristoph Hellwig { 4535cdb14e0fSChristoph Hellwig memset(set, 0, sizeof(*set)); 4536cdb14e0fSChristoph Hellwig set->ops = ops; 4537cdb14e0fSChristoph Hellwig set->nr_hw_queues = 1; 4538cdb14e0fSChristoph Hellwig set->nr_maps = 1; 4539cdb14e0fSChristoph Hellwig set->queue_depth = queue_depth; 4540cdb14e0fSChristoph Hellwig set->numa_node = NUMA_NO_NODE; 4541cdb14e0fSChristoph Hellwig set->flags = set_flags; 4542cdb14e0fSChristoph Hellwig return blk_mq_alloc_tag_set(set); 4543cdb14e0fSChristoph Hellwig } 4544cdb14e0fSChristoph Hellwig EXPORT_SYMBOL_GPL(blk_mq_alloc_sq_tag_set); 4545cdb14e0fSChristoph Hellwig 454624d2f903SChristoph Hellwig void blk_mq_free_tag_set(struct blk_mq_tag_set *set) 454724d2f903SChristoph Hellwig { 4548b3c661b1SJens Axboe int i, j; 454924d2f903SChristoph Hellwig 4550f7e76dbcSBart Van Assche for (i = 0; i < set->nr_hw_queues; i++) 4551e155b0c2SJohn Garry __blk_mq_free_map_and_rqs(set, i); 4552484b4061SJens Axboe 4553079a2e3eSJohn Garry if (blk_mq_is_shared_tags(set->flags)) { 4554079a2e3eSJohn Garry blk_mq_free_map_and_rqs(set, set->shared_tags, 4555e155b0c2SJohn Garry BLK_MQ_NO_HCTX_IDX); 4556e155b0c2SJohn Garry } 455732bc15afSJohn Garry 4558b3c661b1SJens Axboe for (j = 0; j < set->nr_maps; j++) { 4559b3c661b1SJens Axboe kfree(set->map[j].mq_map); 4560b3c661b1SJens Axboe set->map[j].mq_map = NULL; 4561b3c661b1SJens Axboe } 4562bdd17e75SChristoph Hellwig 4563981bd189SMing Lei kfree(set->tags); 45645676e7b6SRobert Elliott set->tags = NULL; 456580bd4a7aSChristoph Hellwig if (set->flags & BLK_MQ_F_BLOCKING) { 456680bd4a7aSChristoph Hellwig cleanup_srcu_struct(set->srcu); 456780bd4a7aSChristoph Hellwig kfree(set->srcu); 456880bd4a7aSChristoph Hellwig } 456924d2f903SChristoph Hellwig } 457024d2f903SChristoph Hellwig EXPORT_SYMBOL(blk_mq_free_tag_set); 457124d2f903SChristoph Hellwig 4572e3a2b3f9SJens Axboe int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) 4573e3a2b3f9SJens Axboe { 4574e3a2b3f9SJens Axboe struct blk_mq_tag_set *set = q->tag_set; 4575e3a2b3f9SJens Axboe struct blk_mq_hw_ctx *hctx; 45764f481208SMing Lei int ret; 45774f481208SMing Lei unsigned long i; 4578e3a2b3f9SJens Axboe 4579bd166ef1SJens Axboe if (!set) 4580e3a2b3f9SJens Axboe return -EINVAL; 4581e3a2b3f9SJens Axboe 4582e5fa8140SAleksei Zakharov if (q->nr_requests == nr) 4583e5fa8140SAleksei Zakharov return 0; 4584e5fa8140SAleksei Zakharov 458570f36b60SJens Axboe blk_mq_freeze_queue(q); 458624f5a90fSMing Lei blk_mq_quiesce_queue(q); 458770f36b60SJens Axboe 4588e3a2b3f9SJens Axboe ret = 0; 4589e3a2b3f9SJens Axboe queue_for_each_hw_ctx(q, hctx, i) { 4590e9137d4bSKeith Busch if (!hctx->tags) 4591e9137d4bSKeith Busch continue; 4592bd166ef1SJens Axboe /* 4593bd166ef1SJens Axboe * If we're using an MQ scheduler, just update the scheduler 4594bd166ef1SJens Axboe * queue depth. This is similar to what the old code would do. 4595bd166ef1SJens Axboe */ 4596f6adcef5SJohn Garry if (hctx->sched_tags) { 459770f36b60SJens Axboe ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, 459870f36b60SJens Axboe nr, true); 4599f6adcef5SJohn Garry } else { 4600f6adcef5SJohn Garry ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr, 4601f6adcef5SJohn Garry false); 460270f36b60SJens Axboe } 4603e3a2b3f9SJens Axboe if (ret) 4604e3a2b3f9SJens Axboe break; 460577f1e0a5SJens Axboe if (q->elevator && q->elevator->type->ops.depth_updated) 460677f1e0a5SJens Axboe q->elevator->type->ops.depth_updated(hctx); 4607e3a2b3f9SJens Axboe } 4608d97e594cSJohn Garry if (!ret) { 4609e3a2b3f9SJens Axboe q->nr_requests = nr; 4610079a2e3eSJohn Garry if (blk_mq_is_shared_tags(set->flags)) { 46118fa04464SJohn Garry if (q->elevator) 4612079a2e3eSJohn Garry blk_mq_tag_update_sched_shared_tags(q); 46138fa04464SJohn Garry else 4614079a2e3eSJohn Garry blk_mq_tag_resize_shared_tags(set, nr); 46158fa04464SJohn Garry } 4616d97e594cSJohn Garry } 4617e3a2b3f9SJens Axboe 461824f5a90fSMing Lei blk_mq_unquiesce_queue(q); 461970f36b60SJens Axboe blk_mq_unfreeze_queue(q); 462070f36b60SJens Axboe 4621e3a2b3f9SJens Axboe return ret; 4622e3a2b3f9SJens Axboe } 4623e3a2b3f9SJens Axboe 4624d48ece20SJianchao Wang /* 4625d48ece20SJianchao Wang * request_queue and elevator_type pair. 4626d48ece20SJianchao Wang * It is just used by __blk_mq_update_nr_hw_queues to cache 4627d48ece20SJianchao Wang * the elevator_type associated with a request_queue. 4628d48ece20SJianchao Wang */ 4629d48ece20SJianchao Wang struct blk_mq_qe_pair { 4630d48ece20SJianchao Wang struct list_head node; 4631d48ece20SJianchao Wang struct request_queue *q; 4632d48ece20SJianchao Wang struct elevator_type *type; 4633d48ece20SJianchao Wang }; 4634d48ece20SJianchao Wang 4635d48ece20SJianchao Wang /* 4636d48ece20SJianchao Wang * Cache the elevator_type in qe pair list and switch the 4637d48ece20SJianchao Wang * io scheduler to 'none' 4638d48ece20SJianchao Wang */ 4639d48ece20SJianchao Wang static bool blk_mq_elv_switch_none(struct list_head *head, 4640d48ece20SJianchao Wang struct request_queue *q) 4641d48ece20SJianchao Wang { 4642d48ece20SJianchao Wang struct blk_mq_qe_pair *qe; 4643d48ece20SJianchao Wang 4644d48ece20SJianchao Wang qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY); 4645d48ece20SJianchao Wang if (!qe) 4646d48ece20SJianchao Wang return false; 4647d48ece20SJianchao Wang 46485fd7a84aSMing Lei /* q->elevator needs protection from ->sysfs_lock */ 46495fd7a84aSMing Lei mutex_lock(&q->sysfs_lock); 46505fd7a84aSMing Lei 465124516565SMing Lei /* the check has to be done with holding sysfs_lock */ 465224516565SMing Lei if (!q->elevator) { 465324516565SMing Lei kfree(qe); 465424516565SMing Lei goto unlock; 465524516565SMing Lei } 465624516565SMing Lei 4657d48ece20SJianchao Wang INIT_LIST_HEAD(&qe->node); 4658d48ece20SJianchao Wang qe->q = q; 4659d48ece20SJianchao Wang qe->type = q->elevator->type; 4660dd6f7f17SChristoph Hellwig /* keep a reference to the elevator module as we'll switch back */ 4661dd6f7f17SChristoph Hellwig __elevator_get(qe->type); 4662d48ece20SJianchao Wang list_add(&qe->node, head); 466364b36075SChristoph Hellwig elevator_disable(q); 466424516565SMing Lei unlock: 4665d48ece20SJianchao Wang mutex_unlock(&q->sysfs_lock); 4666d48ece20SJianchao Wang 4667d48ece20SJianchao Wang return true; 4668d48ece20SJianchao Wang } 4669d48ece20SJianchao Wang 46704a3b666eSJakob Koschel static struct blk_mq_qe_pair *blk_lookup_qe_pair(struct list_head *head, 46714a3b666eSJakob Koschel struct request_queue *q) 46724a3b666eSJakob Koschel { 46734a3b666eSJakob Koschel struct blk_mq_qe_pair *qe; 46744a3b666eSJakob Koschel 46754a3b666eSJakob Koschel list_for_each_entry(qe, head, node) 46764a3b666eSJakob Koschel if (qe->q == q) 46774a3b666eSJakob Koschel return qe; 46784a3b666eSJakob Koschel 46794a3b666eSJakob Koschel return NULL; 46804a3b666eSJakob Koschel } 46814a3b666eSJakob Koschel 4682d48ece20SJianchao Wang static void blk_mq_elv_switch_back(struct list_head *head, 4683d48ece20SJianchao Wang struct request_queue *q) 4684d48ece20SJianchao Wang { 4685d48ece20SJianchao Wang struct blk_mq_qe_pair *qe; 46864a3b666eSJakob Koschel struct elevator_type *t; 4687d48ece20SJianchao Wang 46884a3b666eSJakob Koschel qe = blk_lookup_qe_pair(head, q); 46894a3b666eSJakob Koschel if (!qe) 4690d48ece20SJianchao Wang return; 46914a3b666eSJakob Koschel t = qe->type; 4692d48ece20SJianchao Wang list_del(&qe->node); 4693d48ece20SJianchao Wang kfree(qe); 4694d48ece20SJianchao Wang 4695d48ece20SJianchao Wang mutex_lock(&q->sysfs_lock); 46968237c01fSKeith Busch elevator_switch(q, t); 46978ed40ee3SJinlong Chen /* drop the reference acquired in blk_mq_elv_switch_none */ 46988ed40ee3SJinlong Chen elevator_put(t); 4699d48ece20SJianchao Wang mutex_unlock(&q->sysfs_lock); 4700d48ece20SJianchao Wang } 4701d48ece20SJianchao Wang 4702e4dc2b32SKeith Busch static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, 4703e4dc2b32SKeith Busch int nr_hw_queues) 4704868f2f0bSKeith Busch { 4705868f2f0bSKeith Busch struct request_queue *q; 4706d48ece20SJianchao Wang LIST_HEAD(head); 4707e01ad46dSJianchao Wang int prev_nr_hw_queues; 4708868f2f0bSKeith Busch 4709705cda97SBart Van Assche lockdep_assert_held(&set->tag_list_lock); 4710705cda97SBart Van Assche 4711392546aeSJens Axboe if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids) 4712868f2f0bSKeith Busch nr_hw_queues = nr_cpu_ids; 4713fe35ec58SWeiping Zhang if (nr_hw_queues < 1) 4714fe35ec58SWeiping Zhang return; 4715fe35ec58SWeiping Zhang if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues) 4716868f2f0bSKeith Busch return; 4717868f2f0bSKeith Busch 4718868f2f0bSKeith Busch list_for_each_entry(q, &set->tag_list, tag_set_list) 4719868f2f0bSKeith Busch blk_mq_freeze_queue(q); 4720d48ece20SJianchao Wang /* 4721d48ece20SJianchao Wang * Switch IO scheduler to 'none', cleaning up the data associated 4722d48ece20SJianchao Wang * with the previous scheduler. We will switch back once we are done 4723d48ece20SJianchao Wang * updating the new sw to hw queue mappings. 4724d48ece20SJianchao Wang */ 4725d48ece20SJianchao Wang list_for_each_entry(q, &set->tag_list, tag_set_list) 4726d48ece20SJianchao Wang if (!blk_mq_elv_switch_none(&head, q)) 4727d48ece20SJianchao Wang goto switch_back; 4728868f2f0bSKeith Busch 4729477e19deSJianchao Wang list_for_each_entry(q, &set->tag_list, tag_set_list) { 4730477e19deSJianchao Wang blk_mq_debugfs_unregister_hctxs(q); 4731eaa870f9SChristoph Hellwig blk_mq_sysfs_unregister_hctxs(q); 4732477e19deSJianchao Wang } 4733477e19deSJianchao Wang 4734a2584e43SWeiping Zhang prev_nr_hw_queues = set->nr_hw_queues; 4735ee9d5521SChristoph Hellwig if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) 4736f7e76dbcSBart Van Assche goto reregister; 4737f7e76dbcSBart Van Assche 4738e01ad46dSJianchao Wang fallback: 4739aa880ad6SWeiping Zhang blk_mq_update_queue_map(set); 4740868f2f0bSKeith Busch list_for_each_entry(q, &set->tag_list, tag_set_list) { 4741868f2f0bSKeith Busch blk_mq_realloc_hw_ctxs(set, q); 474242ee3061SMing Lei blk_mq_update_poll_flag(q); 4743e01ad46dSJianchao Wang if (q->nr_hw_queues != set->nr_hw_queues) { 4744a846a8e6SYe Bin int i = prev_nr_hw_queues; 4745a846a8e6SYe Bin 4746e01ad46dSJianchao Wang pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n", 4747e01ad46dSJianchao Wang nr_hw_queues, prev_nr_hw_queues); 4748a846a8e6SYe Bin for (; i < set->nr_hw_queues; i++) 4749a846a8e6SYe Bin __blk_mq_free_map_and_rqs(set, i); 4750a846a8e6SYe Bin 4751e01ad46dSJianchao Wang set->nr_hw_queues = prev_nr_hw_queues; 47527d76f856SDongli Zhang blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); 4753e01ad46dSJianchao Wang goto fallback; 4754e01ad46dSJianchao Wang } 4755477e19deSJianchao Wang blk_mq_map_swqueue(q); 4756477e19deSJianchao Wang } 4757477e19deSJianchao Wang 4758f7e76dbcSBart Van Assche reregister: 4759477e19deSJianchao Wang list_for_each_entry(q, &set->tag_list, tag_set_list) { 4760eaa870f9SChristoph Hellwig blk_mq_sysfs_register_hctxs(q); 4761477e19deSJianchao Wang blk_mq_debugfs_register_hctxs(q); 4762868f2f0bSKeith Busch } 4763868f2f0bSKeith Busch 4764d48ece20SJianchao Wang switch_back: 4765d48ece20SJianchao Wang list_for_each_entry(q, &set->tag_list, tag_set_list) 4766d48ece20SJianchao Wang blk_mq_elv_switch_back(&head, q); 4767d48ece20SJianchao Wang 4768868f2f0bSKeith Busch list_for_each_entry(q, &set->tag_list, tag_set_list) 4769868f2f0bSKeith Busch blk_mq_unfreeze_queue(q); 4770868f2f0bSKeith Busch } 4771e4dc2b32SKeith Busch 4772e4dc2b32SKeith Busch void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) 4773e4dc2b32SKeith Busch { 4774e4dc2b32SKeith Busch mutex_lock(&set->tag_list_lock); 4775e4dc2b32SKeith Busch __blk_mq_update_nr_hw_queues(set, nr_hw_queues); 4776e4dc2b32SKeith Busch mutex_unlock(&set->tag_list_lock); 4777e4dc2b32SKeith Busch } 4778868f2f0bSKeith Busch EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); 4779868f2f0bSKeith Busch 4780f6c80cffSKeith Busch static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx, 4781f6c80cffSKeith Busch struct io_comp_batch *iob, unsigned int flags) 4782bbd7bb70SJens Axboe { 4783c6699d6fSChristoph Hellwig long state = get_current_state(); 4784bbd7bb70SJens Axboe int ret; 4785bbd7bb70SJens Axboe 4786c6699d6fSChristoph Hellwig do { 47875a72e899SJens Axboe ret = q->mq_ops->poll(hctx, iob); 4788bbd7bb70SJens Axboe if (ret > 0) { 4789849a3700SJens Axboe __set_current_state(TASK_RUNNING); 479085f4d4b6SJens Axboe return ret; 4791bbd7bb70SJens Axboe } 4792bbd7bb70SJens Axboe 4793bbd7bb70SJens Axboe if (signal_pending_state(state, current)) 4794849a3700SJens Axboe __set_current_state(TASK_RUNNING); 4795b03fbd4fSPeter Zijlstra if (task_is_running(current)) 479685f4d4b6SJens Axboe return 1; 4797c6699d6fSChristoph Hellwig 4798ef99b2d3SChristoph Hellwig if (ret < 0 || (flags & BLK_POLL_ONESHOT)) 4799bbd7bb70SJens Axboe break; 4800bbd7bb70SJens Axboe cpu_relax(); 4801aa61bec3SJens Axboe } while (!need_resched()); 4802bbd7bb70SJens Axboe 480367b4110fSNitesh Shetty __set_current_state(TASK_RUNNING); 480485f4d4b6SJens Axboe return 0; 4805bbd7bb70SJens Axboe } 4806bbd7bb70SJens Axboe 4807f6c80cffSKeith Busch int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, 4808f6c80cffSKeith Busch struct io_comp_batch *iob, unsigned int flags) 4809f6c80cffSKeith Busch { 4810f6c80cffSKeith Busch struct blk_mq_hw_ctx *hctx = xa_load(&q->hctx_table, cookie); 4811f6c80cffSKeith Busch 4812f6c80cffSKeith Busch return blk_hctx_poll(q, hctx, iob, flags); 4813f6c80cffSKeith Busch } 4814f6c80cffSKeith Busch 4815f6c80cffSKeith Busch int blk_rq_poll(struct request *rq, struct io_comp_batch *iob, 4816f6c80cffSKeith Busch unsigned int poll_flags) 4817f6c80cffSKeith Busch { 4818f6c80cffSKeith Busch struct request_queue *q = rq->q; 4819f6c80cffSKeith Busch int ret; 4820f6c80cffSKeith Busch 4821f6c80cffSKeith Busch if (!blk_rq_is_poll(rq)) 4822f6c80cffSKeith Busch return 0; 4823f6c80cffSKeith Busch if (!percpu_ref_tryget(&q->q_usage_counter)) 4824f6c80cffSKeith Busch return 0; 4825f6c80cffSKeith Busch 4826f6c80cffSKeith Busch ret = blk_hctx_poll(q, rq->mq_hctx, iob, poll_flags); 4827f6c80cffSKeith Busch blk_queue_exit(q); 4828f6c80cffSKeith Busch 4829f6c80cffSKeith Busch return ret; 4830f6c80cffSKeith Busch } 4831f6c80cffSKeith Busch EXPORT_SYMBOL_GPL(blk_rq_poll); 4832f6c80cffSKeith Busch 48339cf2bab6SJens Axboe unsigned int blk_mq_rq_cpu(struct request *rq) 48349cf2bab6SJens Axboe { 48359cf2bab6SJens Axboe return rq->mq_ctx->cpu; 48369cf2bab6SJens Axboe } 48379cf2bab6SJens Axboe EXPORT_SYMBOL(blk_mq_rq_cpu); 48389cf2bab6SJens Axboe 48392a19b28fSMing Lei void blk_mq_cancel_work_sync(struct request_queue *q) 48402a19b28fSMing Lei { 48412a19b28fSMing Lei struct blk_mq_hw_ctx *hctx; 48424f481208SMing Lei unsigned long i; 48432a19b28fSMing Lei 48442a19b28fSMing Lei cancel_delayed_work_sync(&q->requeue_work); 48452a19b28fSMing Lei 48462a19b28fSMing Lei queue_for_each_hw_ctx(q, hctx, i) 48472a19b28fSMing Lei cancel_delayed_work_sync(&hctx->run_work); 48482a19b28fSMing Lei } 48492a19b28fSMing Lei 4850320ae51fSJens Axboe static int __init blk_mq_init(void) 4851320ae51fSJens Axboe { 4852c3077b5dSChristoph Hellwig int i; 4853c3077b5dSChristoph Hellwig 4854c3077b5dSChristoph Hellwig for_each_possible_cpu(i) 4855f9ab4918SSebastian Andrzej Siewior init_llist_head(&per_cpu(blk_cpu_done, i)); 4856c3077b5dSChristoph Hellwig open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); 4857c3077b5dSChristoph Hellwig 4858c3077b5dSChristoph Hellwig cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD, 4859c3077b5dSChristoph Hellwig "block/softirq:dead", NULL, 4860c3077b5dSChristoph Hellwig blk_softirq_cpu_dead); 48619467f859SThomas Gleixner cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL, 48629467f859SThomas Gleixner blk_mq_hctx_notify_dead); 4863bf0beec0SMing Lei cpuhp_setup_state_multi(CPUHP_AP_BLK_MQ_ONLINE, "block/mq:online", 4864bf0beec0SMing Lei blk_mq_hctx_notify_online, 4865bf0beec0SMing Lei blk_mq_hctx_notify_offline); 4866320ae51fSJens Axboe return 0; 4867320ae51fSJens Axboe } 4868320ae51fSJens Axboe subsys_initcall(blk_mq_init); 4869