1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */ 2320ae51fSJens Axboe #ifndef INT_BLK_MQ_H 3320ae51fSJens Axboe #define INT_BLK_MQ_H 4320ae51fSJens Axboe 5cf43e6beSJens Axboe #include "blk-stat.h" 6244c65a3SMing Lei #include "blk-mq-tag.h" 7cf43e6beSJens Axboe 824d2f903SChristoph Hellwig struct blk_mq_tag_set; 924d2f903SChristoph Hellwig 101db4909eSMing Lei struct blk_mq_ctxs { 111db4909eSMing Lei struct kobject kobj; 121db4909eSMing Lei struct blk_mq_ctx __percpu *queue_ctx; 131db4909eSMing Lei }; 141db4909eSMing Lei 15fe644072SLinus Walleij /** 16fe644072SLinus Walleij * struct blk_mq_ctx - State for a software queue facing the submitting CPUs 17fe644072SLinus Walleij */ 18320ae51fSJens Axboe struct blk_mq_ctx { 19320ae51fSJens Axboe struct { 20320ae51fSJens Axboe spinlock_t lock; 21c16d6b5aSMing Lei struct list_head rq_lists[HCTX_MAX_TYPES]; 22320ae51fSJens Axboe } ____cacheline_aligned_in_smp; 23320ae51fSJens Axboe 24320ae51fSJens Axboe unsigned int cpu; 25f31967f0SJens Axboe unsigned short index_hw[HCTX_MAX_TYPES]; 268ccdf4a3SJianchao Wang struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; 27320ae51fSJens Axboe 28320ae51fSJens Axboe /* incremented at dispatch time */ 29320ae51fSJens Axboe unsigned long rq_dispatched[2]; 30320ae51fSJens Axboe unsigned long rq_merged; 31320ae51fSJens Axboe 32320ae51fSJens Axboe /* incremented at completion time */ 33320ae51fSJens Axboe unsigned long ____cacheline_aligned_in_smp rq_completed[2]; 34320ae51fSJens Axboe 35320ae51fSJens Axboe struct request_queue *queue; 361db4909eSMing Lei struct blk_mq_ctxs *ctxs; 37320ae51fSJens Axboe struct kobject kobj; 384bb659b1SJens Axboe } ____cacheline_aligned_in_smp; 39320ae51fSJens Axboe 40*3e08773cSChristoph Hellwig void blk_mq_submit_bio(struct bio *bio); 41*3e08773cSChristoph Hellwig int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags); 42c7e2d94bSMing Lei void blk_mq_exit_queue(struct request_queue *q); 43e3a2b3f9SJens Axboe int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); 44aed3ea94SJens Axboe void blk_mq_wake_waiters(struct request_queue *q); 451fd40b5eSMing Lei bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *, 461fd40b5eSMing Lei unsigned int); 47e6c98712SBart Van Assche void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, 48e6c98712SBart Van Assche bool kick_requeue_list); 492c3ad667SJens Axboe void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); 50b347689fSMing Lei struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, 51b347689fSMing Lei struct blk_mq_ctx *start); 522e315dc0SMing Lei void blk_mq_put_rq_ref(struct request *rq); 532c3ad667SJens Axboe 542c3ad667SJens Axboe /* 552c3ad667SJens Axboe * Internal helpers for allocating/freeing the request map 562c3ad667SJens Axboe */ 57cc71a6f4SJens Axboe void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, 582c3ad667SJens Axboe unsigned int hctx_idx); 59e155b0c2SJohn Garry void blk_mq_free_rq_map(struct blk_mq_tags *tags); 6063064be1SJohn Garry struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set, 61cc71a6f4SJens Axboe unsigned int hctx_idx, unsigned int depth); 62645db34eSJohn Garry void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set, 63645db34eSJohn Garry struct blk_mq_tags *tags, 64645db34eSJohn Garry unsigned int hctx_idx); 652c3ad667SJens Axboe /* 662c3ad667SJens Axboe * Internal helpers for request insertion into sw queues 672c3ad667SJens Axboe */ 682c3ad667SJens Axboe void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, 692c3ad667SJens Axboe bool at_head); 7001e99aecSMing Lei void blk_mq_request_bypass_insert(struct request *rq, bool at_head, 7101e99aecSMing Lei bool run_queue); 72bd166ef1SJens Axboe void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, 73bd166ef1SJens Axboe struct list_head *list); 74320ae51fSJens Axboe 75fd9c40f6SBart Van Assche /* Used by blk_insert_cloned_request() to issue request directly */ 76fd9c40f6SBart Van Assche blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last); 776ce3dd6eSMing Lei void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, 786ce3dd6eSMing Lei struct list_head *list); 79396eaf21SMing Lei 80320ae51fSJens Axboe /* 81320ae51fSJens Axboe * CPU -> queue mappings 82320ae51fSJens Axboe */ 83ed76e329SJens Axboe extern int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int); 84320ae51fSJens Axboe 85b3c661b1SJens Axboe /* 86b3c661b1SJens Axboe * blk_mq_map_queue_type() - map (hctx_type,cpu) to hardware queue 87b3c661b1SJens Axboe * @q: request queue 88e20ba6e1SChristoph Hellwig * @type: the hctx type index 89b3c661b1SJens Axboe * @cpu: CPU 90b3c661b1SJens Axboe */ 91ff2c5660SJens Axboe static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, 92e20ba6e1SChristoph Hellwig enum hctx_type type, 93ff2c5660SJens Axboe unsigned int cpu) 94ff2c5660SJens Axboe { 95e20ba6e1SChristoph Hellwig return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; 96b3c661b1SJens Axboe } 97b3c661b1SJens Axboe 98b3c661b1SJens Axboe /* 99b3c661b1SJens Axboe * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue 100b3c661b1SJens Axboe * @q: request queue 101b3c661b1SJens Axboe * @flags: request command flags 102d220a214SMinwoo Im * @ctx: software queue cpu ctx 103b3c661b1SJens Axboe */ 104b3c661b1SJens Axboe static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, 105b3c661b1SJens Axboe unsigned int flags, 1068ccdf4a3SJianchao Wang struct blk_mq_ctx *ctx) 107b3c661b1SJens Axboe { 108e20ba6e1SChristoph Hellwig enum hctx_type type = HCTX_TYPE_DEFAULT; 109b3c661b1SJens Axboe 110bb94aea1SJianchao Wang /* 1116ce913feSChristoph Hellwig * The caller ensure that if REQ_POLLED, poll must be enabled. 112bb94aea1SJianchao Wang */ 1136ce913feSChristoph Hellwig if (flags & REQ_POLLED) 114e20ba6e1SChristoph Hellwig type = HCTX_TYPE_POLL; 115bb94aea1SJianchao Wang else if ((flags & REQ_OP_MASK) == REQ_OP_READ) 116e20ba6e1SChristoph Hellwig type = HCTX_TYPE_READ; 117e20ba6e1SChristoph Hellwig 1188ccdf4a3SJianchao Wang return ctx->hctxs[type]; 119ff2c5660SJens Axboe } 120ff2c5660SJens Axboe 121e93ecf60SJens Axboe /* 12267aec14cSJens Axboe * sysfs helpers 12367aec14cSJens Axboe */ 124737f98cfSMing Lei extern void blk_mq_sysfs_init(struct request_queue *q); 1257ea5fe31SMing Lei extern void blk_mq_sysfs_deinit(struct request_queue *q); 1262d0364c8SBart Van Assche extern int __blk_mq_register_dev(struct device *dev, struct request_queue *q); 12767aec14cSJens Axboe extern int blk_mq_sysfs_register(struct request_queue *q); 12867aec14cSJens Axboe extern void blk_mq_sysfs_unregister(struct request_queue *q); 129868f2f0bSKeith Busch extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); 13047c122e3SJens Axboe void blk_mq_free_plug_rqs(struct blk_plug *plug); 13167aec14cSJens Axboe 132e09aae7eSMing Lei void blk_mq_release(struct request_queue *q); 133e09aae7eSMing Lei 1341aecfe48SMing Lei static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, 1351aecfe48SMing Lei unsigned int cpu) 1361aecfe48SMing Lei { 1371aecfe48SMing Lei return per_cpu_ptr(q->queue_ctx, cpu); 1381aecfe48SMing Lei } 1391aecfe48SMing Lei 1401aecfe48SMing Lei /* 1411aecfe48SMing Lei * This assumes per-cpu software queueing queues. They could be per-node 1421aecfe48SMing Lei * as well, for instance. For now this is hardcoded as-is. Note that we don't 1431aecfe48SMing Lei * care about preemption, since we know the ctx's are persistent. This does 1441aecfe48SMing Lei * mean that we can't rely on ctx always matching the currently running CPU. 1451aecfe48SMing Lei */ 1461aecfe48SMing Lei static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) 1471aecfe48SMing Lei { 148c05f4220SBart Van Assche return __blk_mq_get_ctx(q, raw_smp_processor_id()); 1491aecfe48SMing Lei } 1501aecfe48SMing Lei 151cb96a42cSMing Lei struct blk_mq_alloc_data { 152cb96a42cSMing Lei /* input parameter */ 153cb96a42cSMing Lei struct request_queue *q; 1549a95e4efSBart Van Assche blk_mq_req_flags_t flags; 155229a9287SOmar Sandoval unsigned int shallow_depth; 156f9afca4dSJens Axboe unsigned int cmd_flags; 157cb96a42cSMing Lei 15847c122e3SJens Axboe /* allocate multiple requests/tags in one go */ 15947c122e3SJens Axboe unsigned int nr_tags; 16047c122e3SJens Axboe struct request **cached_rq; 16147c122e3SJens Axboe 162cb96a42cSMing Lei /* input & output parameter */ 163cb96a42cSMing Lei struct blk_mq_ctx *ctx; 164cb96a42cSMing Lei struct blk_mq_hw_ctx *hctx; 165cb96a42cSMing Lei }; 166cb96a42cSMing Lei 167079a2e3eSJohn Garry static inline bool blk_mq_is_shared_tags(unsigned int flags) 16832bc15afSJohn Garry { 16932bc15afSJohn Garry return flags & BLK_MQ_F_TAG_HCTX_SHARED; 17032bc15afSJohn Garry } 17132bc15afSJohn Garry 1724941115bSJens Axboe static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) 1734941115bSJens Axboe { 17442fdc5e4SChristoph Hellwig if (data->q->elevator) 175bd166ef1SJens Axboe return data->hctx->sched_tags; 176bd166ef1SJens Axboe 1774941115bSJens Axboe return data->hctx->tags; 1784941115bSJens Axboe } 1794941115bSJens Axboe 1805d1b25c1SBart Van Assche static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) 1815d1b25c1SBart Van Assche { 1825d1b25c1SBart Van Assche return test_bit(BLK_MQ_S_STOPPED, &hctx->state); 1835d1b25c1SBart Van Assche } 1845d1b25c1SBart Van Assche 18519c66e59SMing Lei static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx) 18619c66e59SMing Lei { 18719c66e59SMing Lei return hctx->nr_ctx && hctx->tags; 18819c66e59SMing Lei } 18919c66e59SMing Lei 1908446fe92SChristoph Hellwig unsigned int blk_mq_in_flight(struct request_queue *q, 1918446fe92SChristoph Hellwig struct block_device *part); 1928446fe92SChristoph Hellwig void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, 193bf0ddabaSOmar Sandoval unsigned int inflight[2]); 194f299b7c7SJens Axboe 1952a5a24aaSMing Lei static inline void blk_mq_put_dispatch_budget(struct request_queue *q, 1962a5a24aaSMing Lei int budget_token) 197de148297SMing Lei { 198de148297SMing Lei if (q->mq_ops->put_budget) 1992a5a24aaSMing Lei q->mq_ops->put_budget(q, budget_token); 200de148297SMing Lei } 201de148297SMing Lei 2022a5a24aaSMing Lei static inline int blk_mq_get_dispatch_budget(struct request_queue *q) 203de148297SMing Lei { 204de148297SMing Lei if (q->mq_ops->get_budget) 20565c76369SMing Lei return q->mq_ops->get_budget(q); 2062a5a24aaSMing Lei return 0; 2072a5a24aaSMing Lei } 2082a5a24aaSMing Lei 2092a5a24aaSMing Lei static inline void blk_mq_set_rq_budget_token(struct request *rq, int token) 2102a5a24aaSMing Lei { 2112a5a24aaSMing Lei if (token < 0) 2122a5a24aaSMing Lei return; 2132a5a24aaSMing Lei 2142a5a24aaSMing Lei if (rq->q->mq_ops->set_rq_budget_token) 2152a5a24aaSMing Lei rq->q->mq_ops->set_rq_budget_token(rq, token); 2162a5a24aaSMing Lei } 2172a5a24aaSMing Lei 2182a5a24aaSMing Lei static inline int blk_mq_get_rq_budget_token(struct request *rq) 2192a5a24aaSMing Lei { 2202a5a24aaSMing Lei if (rq->q->mq_ops->get_rq_budget_token) 2212a5a24aaSMing Lei return rq->q->mq_ops->get_rq_budget_token(rq); 2222a5a24aaSMing Lei return -1; 223de148297SMing Lei } 224de148297SMing Lei 225bccf5e26SJohn Garry static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) 226bccf5e26SJohn Garry { 227079a2e3eSJohn Garry if (blk_mq_is_shared_tags(hctx->flags)) 228079a2e3eSJohn Garry atomic_inc(&hctx->queue->nr_active_requests_shared_tags); 229bccf5e26SJohn Garry else 230bccf5e26SJohn Garry atomic_inc(&hctx->nr_active); 231bccf5e26SJohn Garry } 232bccf5e26SJohn Garry 233bccf5e26SJohn Garry static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) 234bccf5e26SJohn Garry { 235079a2e3eSJohn Garry if (blk_mq_is_shared_tags(hctx->flags)) 236079a2e3eSJohn Garry atomic_dec(&hctx->queue->nr_active_requests_shared_tags); 237bccf5e26SJohn Garry else 238bccf5e26SJohn Garry atomic_dec(&hctx->nr_active); 239bccf5e26SJohn Garry } 240bccf5e26SJohn Garry 241bccf5e26SJohn Garry static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx) 242bccf5e26SJohn Garry { 243079a2e3eSJohn Garry if (blk_mq_is_shared_tags(hctx->flags)) 244079a2e3eSJohn Garry return atomic_read(&hctx->queue->nr_active_requests_shared_tags); 245bccf5e26SJohn Garry return atomic_read(&hctx->nr_active); 246bccf5e26SJohn Garry } 2474e2f62e5SJens Axboe static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, 2484e2f62e5SJens Axboe struct request *rq) 2494e2f62e5SJens Axboe { 2504e2f62e5SJens Axboe blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag); 2514e2f62e5SJens Axboe rq->tag = BLK_MQ_NO_TAG; 2524e2f62e5SJens Axboe 2534e2f62e5SJens Axboe if (rq->rq_flags & RQF_MQ_INFLIGHT) { 2544e2f62e5SJens Axboe rq->rq_flags &= ~RQF_MQ_INFLIGHT; 255bccf5e26SJohn Garry __blk_mq_dec_active_requests(hctx); 2564e2f62e5SJens Axboe } 2574e2f62e5SJens Axboe } 2584e2f62e5SJens Axboe 2594e2f62e5SJens Axboe static inline void blk_mq_put_driver_tag(struct request *rq) 2604e2f62e5SJens Axboe { 2614e2f62e5SJens Axboe if (rq->tag == BLK_MQ_NO_TAG || rq->internal_tag == BLK_MQ_NO_TAG) 2624e2f62e5SJens Axboe return; 2634e2f62e5SJens Axboe 2644e2f62e5SJens Axboe __blk_mq_put_driver_tag(rq->mq_hctx, rq); 2654e2f62e5SJens Axboe } 2664e2f62e5SJens Axboe 26761347154SJan Kara bool blk_mq_get_driver_tag(struct request *rq); 26861347154SJan Kara 269ed76e329SJens Axboe static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap) 2700da73d00SMinwoo Im { 2710da73d00SMinwoo Im int cpu; 2720da73d00SMinwoo Im 2730da73d00SMinwoo Im for_each_possible_cpu(cpu) 274ed76e329SJens Axboe qmap->mq_map[cpu] = 0; 2750da73d00SMinwoo Im } 2760da73d00SMinwoo Im 277b49773e7SDamien Le Moal /* 278b49773e7SDamien Le Moal * blk_mq_plug() - Get caller context plug 279b49773e7SDamien Le Moal * @q: request queue 280b49773e7SDamien Le Moal * @bio : the bio being submitted by the caller context 281b49773e7SDamien Le Moal * 282b49773e7SDamien Le Moal * Plugging, by design, may delay the insertion of BIOs into the elevator in 283b49773e7SDamien Le Moal * order to increase BIO merging opportunities. This however can cause BIO 284b49773e7SDamien Le Moal * insertion order to change from the order in which submit_bio() is being 285b49773e7SDamien Le Moal * executed in the case of multiple contexts concurrently issuing BIOs to a 286b49773e7SDamien Le Moal * device, even if these context are synchronized to tightly control BIO issuing 287b49773e7SDamien Le Moal * order. While this is not a problem with regular block devices, this ordering 288b49773e7SDamien Le Moal * change can cause write BIO failures with zoned block devices as these 289b49773e7SDamien Le Moal * require sequential write patterns to zones. Prevent this from happening by 290b49773e7SDamien Le Moal * ignoring the plug state of a BIO issuing context if the target request queue 291b49773e7SDamien Le Moal * is for a zoned block device and the BIO to plug is a write operation. 292b49773e7SDamien Le Moal * 293b49773e7SDamien Le Moal * Return current->plug if the bio can be plugged and NULL otherwise 294b49773e7SDamien Le Moal */ 295b49773e7SDamien Le Moal static inline struct blk_plug *blk_mq_plug(struct request_queue *q, 296b49773e7SDamien Le Moal struct bio *bio) 297b49773e7SDamien Le Moal { 298b49773e7SDamien Le Moal /* 299b49773e7SDamien Le Moal * For regular block devices or read operations, use the context plug 300b49773e7SDamien Le Moal * which may be NULL if blk_start_plug() was not executed. 301b49773e7SDamien Le Moal */ 302b49773e7SDamien Le Moal if (!blk_queue_is_zoned(q) || !op_is_write(bio_op(bio))) 303b49773e7SDamien Le Moal return current->plug; 304b49773e7SDamien Le Moal 305b49773e7SDamien Le Moal /* Zoned block device write operation case: do not plug the BIO */ 306b49773e7SDamien Le Moal return NULL; 307b49773e7SDamien Le Moal } 308b49773e7SDamien Le Moal 309fd2ef39cSJan Kara /* Free all requests on the list */ 310fd2ef39cSJan Kara static inline void blk_mq_free_requests(struct list_head *list) 311fd2ef39cSJan Kara { 312fd2ef39cSJan Kara while (!list_empty(list)) { 313fd2ef39cSJan Kara struct request *rq = list_entry_rq(list->next); 314fd2ef39cSJan Kara 315fd2ef39cSJan Kara list_del_init(&rq->queuelist); 316fd2ef39cSJan Kara blk_mq_free_request(rq); 317fd2ef39cSJan Kara } 318fd2ef39cSJan Kara } 319fd2ef39cSJan Kara 320a0235d23SJohn Garry /* 321a0235d23SJohn Garry * For shared tag users, we track the number of currently active users 322a0235d23SJohn Garry * and attempt to provide a fair share of the tag depth for each of them. 323a0235d23SJohn Garry */ 324a0235d23SJohn Garry static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, 325a0235d23SJohn Garry struct sbitmap_queue *bt) 326a0235d23SJohn Garry { 327a0235d23SJohn Garry unsigned int depth, users; 328a0235d23SJohn Garry 329a0235d23SJohn Garry if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) 330a0235d23SJohn Garry return true; 331a0235d23SJohn Garry 332a0235d23SJohn Garry /* 333a0235d23SJohn Garry * Don't try dividing an ant 334a0235d23SJohn Garry */ 335a0235d23SJohn Garry if (bt->sb.depth == 1) 336a0235d23SJohn Garry return true; 337a0235d23SJohn Garry 338079a2e3eSJohn Garry if (blk_mq_is_shared_tags(hctx->flags)) { 339f1b49fdcSJohn Garry struct request_queue *q = hctx->queue; 340f1b49fdcSJohn Garry 3412569063cSMing Lei if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) 342f1b49fdcSJohn Garry return true; 343f1b49fdcSJohn Garry } else { 344f1b49fdcSJohn Garry if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) 345f1b49fdcSJohn Garry return true; 346f1b49fdcSJohn Garry } 347f1b49fdcSJohn Garry 348e155b0c2SJohn Garry users = atomic_read(&hctx->tags->active_queues); 349e155b0c2SJohn Garry 350a0235d23SJohn Garry if (!users) 351a0235d23SJohn Garry return true; 352a0235d23SJohn Garry 353a0235d23SJohn Garry /* 354a0235d23SJohn Garry * Allow at least some tags 355a0235d23SJohn Garry */ 356a0235d23SJohn Garry depth = max((bt->sb.depth + users - 1) / users, 4U); 357bccf5e26SJohn Garry return __blk_mq_active_requests(hctx) < depth; 358a0235d23SJohn Garry } 359a0235d23SJohn Garry 360a0235d23SJohn Garry 361320ae51fSJens Axboe #endif 362