13dcf60bcSChristoph Hellwig // SPDX-License-Identifier: GPL-2.0
275bb4625SJens Axboe /*
388459642SOmar Sandoval * Tag allocation using scalable bitmaps. Uses active queue tracking to support
488459642SOmar Sandoval * fairer distribution of tags between multiple submitters when a shared tag map
588459642SOmar Sandoval * is used.
675bb4625SJens Axboe *
775bb4625SJens Axboe * Copyright (C) 2013-2014 Jens Axboe
875bb4625SJens Axboe */
9320ae51fSJens Axboe #include <linux/kernel.h>
10320ae51fSJens Axboe #include <linux/module.h>
11320ae51fSJens Axboe
12f9934a80SMing Lei #include <linux/delay.h>
13320ae51fSJens Axboe #include "blk.h"
14320ae51fSJens Axboe #include "blk-mq.h"
15d97e594cSJohn Garry #include "blk-mq-sched.h"
16320ae51fSJens Axboe
170d2602caSJens Axboe /*
18180dccb0SLaibin Qiu * Recalculate wakeup batch when tag is shared by hctx.
19180dccb0SLaibin Qiu */
blk_mq_update_wake_batch(struct blk_mq_tags * tags,unsigned int users)20180dccb0SLaibin Qiu static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
21180dccb0SLaibin Qiu unsigned int users)
22180dccb0SLaibin Qiu {
23180dccb0SLaibin Qiu if (!users)
24180dccb0SLaibin Qiu return;
25180dccb0SLaibin Qiu
26180dccb0SLaibin Qiu sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
27180dccb0SLaibin Qiu users);
28180dccb0SLaibin Qiu sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
29180dccb0SLaibin Qiu users);
30180dccb0SLaibin Qiu }
31180dccb0SLaibin Qiu
32180dccb0SLaibin Qiu /*
330d2602caSJens Axboe * If a previously inactive queue goes active, bump the active user count.
34d263ed99SJianchao Wang * We need to do this before try to allocate driver tag, then even if fail
35d263ed99SJianchao Wang * to get tag when first time, the other shared-tag users could reserve
36d263ed99SJianchao Wang * budget for it.
370d2602caSJens Axboe */
__blk_mq_tag_busy(struct blk_mq_hw_ctx * hctx)38ee78ec10SLiu Song void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
390d2602caSJens Axboe {
40180dccb0SLaibin Qiu unsigned int users;
41*edb39f62SLi Lingfeng unsigned long flags;
424f1731dfSYu Kuai struct blk_mq_tags *tags = hctx->tags;
43180dccb0SLaibin Qiu
443e94d54eSTian Lan /*
453e94d54eSTian Lan * calling test_bit() prior to test_and_set_bit() is intentional,
463e94d54eSTian Lan * it avoids dirtying the cacheline if the queue is already active.
473e94d54eSTian Lan */
48079a2e3eSJohn Garry if (blk_mq_is_shared_tags(hctx->flags)) {
49f1b49fdcSJohn Garry struct request_queue *q = hctx->queue;
50f1b49fdcSJohn Garry
513e94d54eSTian Lan if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
523e94d54eSTian Lan test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
53ee78ec10SLiu Song return;
54180dccb0SLaibin Qiu } else {
553e94d54eSTian Lan if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
563e94d54eSTian Lan test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
57ee78ec10SLiu Song return;
58180dccb0SLaibin Qiu }
59180dccb0SLaibin Qiu
60*edb39f62SLi Lingfeng spin_lock_irqsave(&tags->lock, flags);
614f1731dfSYu Kuai users = tags->active_queues + 1;
624f1731dfSYu Kuai WRITE_ONCE(tags->active_queues, users);
634f1731dfSYu Kuai blk_mq_update_wake_batch(tags, users);
64*edb39f62SLi Lingfeng spin_unlock_irqrestore(&tags->lock, flags);
650d2602caSJens Axboe }
660d2602caSJens Axboe
670d2602caSJens Axboe /*
68aed3ea94SJens Axboe * Wakeup all potentially sleeping on tags
690d2602caSJens Axboe */
blk_mq_tag_wakeup_all(struct blk_mq_tags * tags,bool include_reserve)70aed3ea94SJens Axboe void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
710d2602caSJens Axboe {
72ae0f1a73SJohn Garry sbitmap_queue_wake_all(&tags->bitmap_tags);
7388459642SOmar Sandoval if (include_reserve)
74ae0f1a73SJohn Garry sbitmap_queue_wake_all(&tags->breserved_tags);
750d2602caSJens Axboe }
760d2602caSJens Axboe
770d2602caSJens Axboe /*
78e3a2b3f9SJens Axboe * If a previously busy queue goes inactive, potential waiters could now
79e3a2b3f9SJens Axboe * be allowed to queue. Wake them up and check.
80e3a2b3f9SJens Axboe */
__blk_mq_tag_idle(struct blk_mq_hw_ctx * hctx)81e3a2b3f9SJens Axboe void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
82e3a2b3f9SJens Axboe {
83e3a2b3f9SJens Axboe struct blk_mq_tags *tags = hctx->tags;
84180dccb0SLaibin Qiu unsigned int users;
85e3a2b3f9SJens Axboe
86079a2e3eSJohn Garry if (blk_mq_is_shared_tags(hctx->flags)) {
87e155b0c2SJohn Garry struct request_queue *q = hctx->queue;
88e155b0c2SJohn Garry
89f1b49fdcSJohn Garry if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
90f1b49fdcSJohn Garry &q->queue_flags))
91f1b49fdcSJohn Garry return;
92f1b49fdcSJohn Garry } else {
93e3a2b3f9SJens Axboe if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
94e3a2b3f9SJens Axboe return;
95f1b49fdcSJohn Garry }
96e3a2b3f9SJens Axboe
974f1731dfSYu Kuai spin_lock_irq(&tags->lock);
984f1731dfSYu Kuai users = tags->active_queues - 1;
994f1731dfSYu Kuai WRITE_ONCE(tags->active_queues, users);
100180dccb0SLaibin Qiu blk_mq_update_wake_batch(tags, users);
1014f1731dfSYu Kuai spin_unlock_irq(&tags->lock);
102079a2e3eSJohn Garry
103aed3ea94SJens Axboe blk_mq_tag_wakeup_all(tags, false);
104e3a2b3f9SJens Axboe }
105e3a2b3f9SJens Axboe
__blk_mq_get_tag(struct blk_mq_alloc_data * data,struct sbitmap_queue * bt)106200e86b3SJens Axboe static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
107200e86b3SJens Axboe struct sbitmap_queue *bt)
1084bb659b1SJens Axboe {
10928500850SMing Lei if (!data->q->elevator && !(data->flags & BLK_MQ_REQ_RESERVED) &&
11028500850SMing Lei !hctx_may_queue(data->hctx, bt))
11176647368SChristoph Hellwig return BLK_MQ_NO_TAG;
11242fdc5e4SChristoph Hellwig
113229a9287SOmar Sandoval if (data->shallow_depth)
1143f607293SJohn Garry return sbitmap_queue_get_shallow(bt, data->shallow_depth);
115229a9287SOmar Sandoval else
116f4a644dbSOmar Sandoval return __sbitmap_queue_get(bt);
1174bb659b1SJens Axboe }
1184bb659b1SJens Axboe
blk_mq_get_tags(struct blk_mq_alloc_data * data,int nr_tags,unsigned int * offset)119349302daSJens Axboe unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,
120349302daSJens Axboe unsigned int *offset)
121349302daSJens Axboe {
122349302daSJens Axboe struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
123349302daSJens Axboe struct sbitmap_queue *bt = &tags->bitmap_tags;
124349302daSJens Axboe unsigned long ret;
125349302daSJens Axboe
126349302daSJens Axboe if (data->shallow_depth ||data->flags & BLK_MQ_REQ_RESERVED ||
127349302daSJens Axboe data->hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
128349302daSJens Axboe return 0;
129349302daSJens Axboe ret = __sbitmap_queue_get_batch(bt, nr_tags, offset);
130349302daSJens Axboe *offset += tags->nr_reserved_tags;
131349302daSJens Axboe return ret;
132349302daSJens Axboe }
133349302daSJens Axboe
blk_mq_get_tag(struct blk_mq_alloc_data * data)1344941115bSJens Axboe unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
1354bb659b1SJens Axboe {
1364941115bSJens Axboe struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
1374941115bSJens Axboe struct sbitmap_queue *bt;
13888459642SOmar Sandoval struct sbq_wait_state *ws;
1395d2ee712SJens Axboe DEFINE_SBQ_WAIT(wait);
1404941115bSJens Axboe unsigned int tag_offset;
1414bb659b1SJens Axboe int tag;
1424bb659b1SJens Axboe
1434941115bSJens Axboe if (data->flags & BLK_MQ_REQ_RESERVED) {
1444941115bSJens Axboe if (unlikely(!tags->nr_reserved_tags)) {
1454941115bSJens Axboe WARN_ON_ONCE(1);
146419c3d5eSChristoph Hellwig return BLK_MQ_NO_TAG;
1474941115bSJens Axboe }
148ae0f1a73SJohn Garry bt = &tags->breserved_tags;
1494941115bSJens Axboe tag_offset = 0;
1504941115bSJens Axboe } else {
151ae0f1a73SJohn Garry bt = &tags->bitmap_tags;
1524941115bSJens Axboe tag_offset = tags->nr_reserved_tags;
1534941115bSJens Axboe }
1544941115bSJens Axboe
155200e86b3SJens Axboe tag = __blk_mq_get_tag(data, bt);
15676647368SChristoph Hellwig if (tag != BLK_MQ_NO_TAG)
1574941115bSJens Axboe goto found_tag;
1584bb659b1SJens Axboe
1596f3b0e8bSChristoph Hellwig if (data->flags & BLK_MQ_REQ_NOWAIT)
160419c3d5eSChristoph Hellwig return BLK_MQ_NO_TAG;
1614bb659b1SJens Axboe
1624941115bSJens Axboe ws = bt_wait_ptr(bt, data->hctx);
16335d37c66SJens Axboe do {
164e6fc4649SMing Lei struct sbitmap_queue *bt_prev;
165e6fc4649SMing Lei
166b3223207SBart Van Assche /*
167b3223207SBart Van Assche * We're out of tags on this hardware queue, kick any
168b3223207SBart Van Assche * pending IO submits before going to sleep waiting for
1698cecb07dSJens Axboe * some to complete.
170b3223207SBart Van Assche */
1714941115bSJens Axboe blk_mq_run_hw_queue(data->hctx, false);
172b3223207SBart Van Assche
173080ff351SJens Axboe /*
174080ff351SJens Axboe * Retry tag allocation after running the hardware queue,
175080ff351SJens Axboe * as running the queue may also have found completions.
176080ff351SJens Axboe */
177200e86b3SJens Axboe tag = __blk_mq_get_tag(data, bt);
17876647368SChristoph Hellwig if (tag != BLK_MQ_NO_TAG)
179080ff351SJens Axboe break;
180080ff351SJens Axboe
1815d2ee712SJens Axboe sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);
1824e5dff41SJens Axboe
1834e5dff41SJens Axboe tag = __blk_mq_get_tag(data, bt);
18476647368SChristoph Hellwig if (tag != BLK_MQ_NO_TAG)
1854e5dff41SJens Axboe break;
1864e5dff41SJens Axboe
187e6fc4649SMing Lei bt_prev = bt;
1884bb659b1SJens Axboe io_schedule();
189cb96a42cSMing Lei
1905d2ee712SJens Axboe sbitmap_finish_wait(bt, ws, &wait);
1915d2ee712SJens Axboe
192cb96a42cSMing Lei data->ctx = blk_mq_get_ctx(data->q);
193f9afca4dSJens Axboe data->hctx = blk_mq_map_queue(data->q, data->cmd_flags,
1948ccdf4a3SJianchao Wang data->ctx);
1954941115bSJens Axboe tags = blk_mq_tags_from_data(data);
1964941115bSJens Axboe if (data->flags & BLK_MQ_REQ_RESERVED)
197ae0f1a73SJohn Garry bt = &tags->breserved_tags;
1984941115bSJens Axboe else
199ae0f1a73SJohn Garry bt = &tags->bitmap_tags;
2004941115bSJens Axboe
201e6fc4649SMing Lei /*
202e6fc4649SMing Lei * If destination hw queue is changed, fake wake up on
203e6fc4649SMing Lei * previous queue for compensating the wake up miss, so
204e6fc4649SMing Lei * other allocations on previous queue won't be starved.
205e6fc4649SMing Lei */
206e6fc4649SMing Lei if (bt != bt_prev)
2074acb8341SKeith Busch sbitmap_queue_wake_up(bt_prev, 1);
208e6fc4649SMing Lei
2094941115bSJens Axboe ws = bt_wait_ptr(bt, data->hctx);
2104bb659b1SJens Axboe } while (1);
2114bb659b1SJens Axboe
2125d2ee712SJens Axboe sbitmap_finish_wait(bt, ws, &wait);
2134941115bSJens Axboe
2144941115bSJens Axboe found_tag:
215bf0beec0SMing Lei /*
216bf0beec0SMing Lei * Give up this allocation if the hctx is inactive. The caller will
217bf0beec0SMing Lei * retry on an active hctx.
218bf0beec0SMing Lei */
219bf0beec0SMing Lei if (unlikely(test_bit(BLK_MQ_S_INACTIVE, &data->hctx->state))) {
220bf0beec0SMing Lei blk_mq_put_tag(tags, data->ctx, tag + tag_offset);
221bf0beec0SMing Lei return BLK_MQ_NO_TAG;
222bf0beec0SMing Lei }
2234941115bSJens Axboe return tag + tag_offset;
2244bb659b1SJens Axboe }
2254bb659b1SJens Axboe
blk_mq_put_tag(struct blk_mq_tags * tags,struct blk_mq_ctx * ctx,unsigned int tag)226cae740a0SJohn Garry void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
227cae740a0SJohn Garry unsigned int tag)
228320ae51fSJens Axboe {
229415b806dSSagi Grimberg if (!blk_mq_tag_is_reserved(tags, tag)) {
2304bb659b1SJens Axboe const int real_tag = tag - tags->nr_reserved_tags;
2314bb659b1SJens Axboe
23270114c39SJens Axboe BUG_ON(real_tag >= tags->nr_tags);
233ae0f1a73SJohn Garry sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
23470114c39SJens Axboe } else {
235ae0f1a73SJohn Garry sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
23670114c39SJens Axboe }
237320ae51fSJens Axboe }
238320ae51fSJens Axboe
blk_mq_put_tags(struct blk_mq_tags * tags,int * tag_array,int nr_tags)239f794f335SJens Axboe void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags)
240f794f335SJens Axboe {
241f794f335SJens Axboe sbitmap_queue_clear_batch(&tags->bitmap_tags, tags->nr_reserved_tags,
242f794f335SJens Axboe tag_array, nr_tags);
243f794f335SJens Axboe }
244f794f335SJens Axboe
24588459642SOmar Sandoval struct bt_iter_data {
24688459642SOmar Sandoval struct blk_mq_hw_ctx *hctx;
247fea9f92fSJohn Garry struct request_queue *q;
248fc39f8d2SJohn Garry busy_tag_iter_fn *fn;
24988459642SOmar Sandoval void *data;
25088459642SOmar Sandoval bool reserved;
25188459642SOmar Sandoval };
25288459642SOmar Sandoval
blk_mq_find_and_get_req(struct blk_mq_tags * tags,unsigned int bitnr)2532e315dc0SMing Lei static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
2542e315dc0SMing Lei unsigned int bitnr)
2552e315dc0SMing Lei {
256bd63141dSMing Lei struct request *rq;
257bd63141dSMing Lei unsigned long flags;
2582e315dc0SMing Lei
259bd63141dSMing Lei spin_lock_irqsave(&tags->lock, flags);
260bd63141dSMing Lei rq = tags->rqs[bitnr];
2610a467d0fSJens Axboe if (!rq || rq->tag != bitnr || !req_ref_inc_not_zero(rq))
262bd63141dSMing Lei rq = NULL;
263bd63141dSMing Lei spin_unlock_irqrestore(&tags->lock, flags);
2642e315dc0SMing Lei return rq;
2652e315dc0SMing Lei }
2662e315dc0SMing Lei
bt_iter(struct sbitmap * bitmap,unsigned int bitnr,void * data)26788459642SOmar Sandoval static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
26888459642SOmar Sandoval {
26988459642SOmar Sandoval struct bt_iter_data *iter_data = data;
27088459642SOmar Sandoval struct blk_mq_hw_ctx *hctx = iter_data->hctx;
271fea9f92fSJohn Garry struct request_queue *q = iter_data->q;
272fea9f92fSJohn Garry struct blk_mq_tag_set *set = q->tag_set;
273fea9f92fSJohn Garry struct blk_mq_tags *tags;
27488459642SOmar Sandoval struct request *rq;
2752e315dc0SMing Lei bool ret = true;
27688459642SOmar Sandoval
277fea9f92fSJohn Garry if (blk_mq_is_shared_tags(set->flags))
278fea9f92fSJohn Garry tags = set->shared_tags;
279fea9f92fSJohn Garry else
280fea9f92fSJohn Garry tags = hctx->tags;
281fea9f92fSJohn Garry
2824cf6e6c0SJohn Garry if (!iter_data->reserved)
28388459642SOmar Sandoval bitnr += tags->nr_reserved_tags;
2847f5562d5SJens Axboe /*
2857f5562d5SJens Axboe * We can hit rq == NULL here, because the tagging functions
286c7b1bf5cSBart Van Assche * test and set the bit before assigning ->rqs[].
2877f5562d5SJens Axboe */
2882e315dc0SMing Lei rq = blk_mq_find_and_get_req(tags, bitnr);
2892e315dc0SMing Lei if (!rq)
29088459642SOmar Sandoval return true;
2912e315dc0SMing Lei
292fea9f92fSJohn Garry if (rq->q == q && (!hctx || rq->mq_hctx == hctx))
2932dd6532eSJohn Garry ret = iter_data->fn(rq, iter_data->data);
2942e315dc0SMing Lei blk_mq_put_rq_ref(rq);
2952e315dc0SMing Lei return ret;
29688459642SOmar Sandoval }
29788459642SOmar Sandoval
298c7b1bf5cSBart Van Assche /**
299c7b1bf5cSBart Van Assche * bt_for_each - iterate over the requests associated with a hardware queue
300c7b1bf5cSBart Van Assche * @hctx: Hardware queue to examine.
301fea9f92fSJohn Garry * @q: Request queue to examine.
302c7b1bf5cSBart Van Assche * @bt: sbitmap to examine. This is either the breserved_tags member
303c7b1bf5cSBart Van Assche * or the bitmap_tags member of struct blk_mq_tags.
304c7b1bf5cSBart Van Assche * @fn: Pointer to the function that will be called for each request
305c7b1bf5cSBart Van Assche * associated with @hctx that has been assigned a driver tag.
306c7b1bf5cSBart Van Assche * @fn will be called as follows: @fn(@hctx, rq, @data, @reserved)
307ab11fe5aSJens Axboe * where rq is a pointer to a request. Return true to continue
308ab11fe5aSJens Axboe * iterating tags, false to stop.
309c7b1bf5cSBart Van Assche * @data: Will be passed as third argument to @fn.
310c7b1bf5cSBart Van Assche * @reserved: Indicates whether @bt is the breserved_tags member or the
311c7b1bf5cSBart Van Assche * bitmap_tags member of struct blk_mq_tags.
312c7b1bf5cSBart Van Assche */
bt_for_each(struct blk_mq_hw_ctx * hctx,struct request_queue * q,struct sbitmap_queue * bt,busy_tag_iter_fn * fn,void * data,bool reserved)313fea9f92fSJohn Garry static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct request_queue *q,
314fea9f92fSJohn Garry struct sbitmap_queue *bt, busy_tag_iter_fn *fn,
315fea9f92fSJohn Garry void *data, bool reserved)
316320ae51fSJens Axboe {
31788459642SOmar Sandoval struct bt_iter_data iter_data = {
31888459642SOmar Sandoval .hctx = hctx,
31988459642SOmar Sandoval .fn = fn,
32088459642SOmar Sandoval .data = data,
32188459642SOmar Sandoval .reserved = reserved,
322fea9f92fSJohn Garry .q = q,
32388459642SOmar Sandoval };
32488459642SOmar Sandoval
32588459642SOmar Sandoval sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
32688459642SOmar Sandoval }
32788459642SOmar Sandoval
32888459642SOmar Sandoval struct bt_tags_iter_data {
32988459642SOmar Sandoval struct blk_mq_tags *tags;
33088459642SOmar Sandoval busy_tag_iter_fn *fn;
33188459642SOmar Sandoval void *data;
332602380d2SMing Lei unsigned int flags;
33388459642SOmar Sandoval };
33488459642SOmar Sandoval
335602380d2SMing Lei #define BT_TAG_ITER_RESERVED (1 << 0)
336602380d2SMing Lei #define BT_TAG_ITER_STARTED (1 << 1)
33722f614bcSMing Lei #define BT_TAG_ITER_STATIC_RQS (1 << 2)
338602380d2SMing Lei
bt_tags_iter(struct sbitmap * bitmap,unsigned int bitnr,void * data)33988459642SOmar Sandoval static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
34088459642SOmar Sandoval {
34188459642SOmar Sandoval struct bt_tags_iter_data *iter_data = data;
34288459642SOmar Sandoval struct blk_mq_tags *tags = iter_data->tags;
34381481eb4SChristoph Hellwig struct request *rq;
3442e315dc0SMing Lei bool ret = true;
3452e315dc0SMing Lei bool iter_static_rqs = !!(iter_data->flags & BT_TAG_ITER_STATIC_RQS);
3464bb659b1SJens Axboe
3474cf6e6c0SJohn Garry if (!(iter_data->flags & BT_TAG_ITER_RESERVED))
34888459642SOmar Sandoval bitnr += tags->nr_reserved_tags;
3494bb659b1SJens Axboe
3507f5562d5SJens Axboe /*
3517f5562d5SJens Axboe * We can hit rq == NULL here, because the tagging functions
35222f614bcSMing Lei * test and set the bit before assigning ->rqs[].
3537f5562d5SJens Axboe */
3542e315dc0SMing Lei if (iter_static_rqs)
35522f614bcSMing Lei rq = tags->static_rqs[bitnr];
35622f614bcSMing Lei else
3572e315dc0SMing Lei rq = blk_mq_find_and_get_req(tags, bitnr);
358602380d2SMing Lei if (!rq)
35988459642SOmar Sandoval return true;
3602e315dc0SMing Lei
3612e315dc0SMing Lei if (!(iter_data->flags & BT_TAG_ITER_STARTED) ||
3622e315dc0SMing Lei blk_mq_request_started(rq))
3632dd6532eSJohn Garry ret = iter_data->fn(rq, iter_data->data);
3642e315dc0SMing Lei if (!iter_static_rqs)
3652e315dc0SMing Lei blk_mq_put_rq_ref(rq);
3662e315dc0SMing Lei return ret;
36781481eb4SChristoph Hellwig }
3684bb659b1SJens Axboe
369c7b1bf5cSBart Van Assche /**
370c7b1bf5cSBart Van Assche * bt_tags_for_each - iterate over the requests in a tag map
371c7b1bf5cSBart Van Assche * @tags: Tag map to iterate over.
372c7b1bf5cSBart Van Assche * @bt: sbitmap to examine. This is either the breserved_tags member
373c7b1bf5cSBart Van Assche * or the bitmap_tags member of struct blk_mq_tags.
374c7b1bf5cSBart Van Assche * @fn: Pointer to the function that will be called for each started
375c7b1bf5cSBart Van Assche * request. @fn will be called as follows: @fn(rq, @data,
376ab11fe5aSJens Axboe * @reserved) where rq is a pointer to a request. Return true
377ab11fe5aSJens Axboe * to continue iterating tags, false to stop.
378c7b1bf5cSBart Van Assche * @data: Will be passed as second argument to @fn.
379602380d2SMing Lei * @flags: BT_TAG_ITER_*
380c7b1bf5cSBart Van Assche */
bt_tags_for_each(struct blk_mq_tags * tags,struct sbitmap_queue * bt,busy_tag_iter_fn * fn,void * data,unsigned int flags)38188459642SOmar Sandoval static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
382602380d2SMing Lei busy_tag_iter_fn *fn, void *data, unsigned int flags)
383f26cdc85SKeith Busch {
38488459642SOmar Sandoval struct bt_tags_iter_data iter_data = {
38588459642SOmar Sandoval .tags = tags,
38688459642SOmar Sandoval .fn = fn,
38788459642SOmar Sandoval .data = data,
388602380d2SMing Lei .flags = flags,
38988459642SOmar Sandoval };
390f26cdc85SKeith Busch
39188459642SOmar Sandoval if (tags->rqs)
39288459642SOmar Sandoval sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
393f26cdc85SKeith Busch }
394f26cdc85SKeith Busch
__blk_mq_all_tag_iter(struct blk_mq_tags * tags,busy_tag_iter_fn * fn,void * priv,unsigned int flags)395602380d2SMing Lei static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags,
396602380d2SMing Lei busy_tag_iter_fn *fn, void *priv, unsigned int flags)
397602380d2SMing Lei {
398602380d2SMing Lei WARN_ON_ONCE(flags & BT_TAG_ITER_RESERVED);
399602380d2SMing Lei
400602380d2SMing Lei if (tags->nr_reserved_tags)
401ae0f1a73SJohn Garry bt_tags_for_each(tags, &tags->breserved_tags, fn, priv,
402602380d2SMing Lei flags | BT_TAG_ITER_RESERVED);
403ae0f1a73SJohn Garry bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, flags);
404602380d2SMing Lei }
405602380d2SMing Lei
406c7b1bf5cSBart Van Assche /**
407602380d2SMing Lei * blk_mq_all_tag_iter - iterate over all requests in a tag map
408c7b1bf5cSBart Van Assche * @tags: Tag map to iterate over.
409602380d2SMing Lei * @fn: Pointer to the function that will be called for each
410c7b1bf5cSBart Van Assche * request. @fn will be called as follows: @fn(rq, @priv,
411c7b1bf5cSBart Van Assche * reserved) where rq is a pointer to a request. 'reserved'
412ab11fe5aSJens Axboe * indicates whether or not @rq is a reserved request. Return
413ab11fe5aSJens Axboe * true to continue iterating tags, false to stop.
414c7b1bf5cSBart Van Assche * @priv: Will be passed as second argument to @fn.
41522f614bcSMing Lei *
41622f614bcSMing Lei * Caller has to pass the tag map from which requests are allocated.
417c7b1bf5cSBart Van Assche */
blk_mq_all_tag_iter(struct blk_mq_tags * tags,busy_tag_iter_fn * fn,void * priv)418602380d2SMing Lei void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
419602380d2SMing Lei void *priv)
420f26cdc85SKeith Busch {
421a8a5e383SBaolin Wang __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS);
422f26cdc85SKeith Busch }
423f26cdc85SKeith Busch
424c7b1bf5cSBart Van Assche /**
425c7b1bf5cSBart Van Assche * blk_mq_tagset_busy_iter - iterate over all started requests in a tag set
426c7b1bf5cSBart Van Assche * @tagset: Tag set to iterate over.
427c7b1bf5cSBart Van Assche * @fn: Pointer to the function that will be called for each started
428c7b1bf5cSBart Van Assche * request. @fn will be called as follows: @fn(rq, @priv,
429c7b1bf5cSBart Van Assche * reserved) where rq is a pointer to a request. 'reserved'
430ab11fe5aSJens Axboe * indicates whether or not @rq is a reserved request. Return
431ab11fe5aSJens Axboe * true to continue iterating tags, false to stop.
432c7b1bf5cSBart Van Assche * @priv: Will be passed as second argument to @fn.
4332e315dc0SMing Lei *
4342e315dc0SMing Lei * We grab one request reference before calling @fn and release it after
4352e315dc0SMing Lei * @fn returns.
436c7b1bf5cSBart Van Assche */
blk_mq_tagset_busy_iter(struct blk_mq_tag_set * tagset,busy_tag_iter_fn * fn,void * priv)437e0489487SSagi Grimberg void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
438e0489487SSagi Grimberg busy_tag_iter_fn *fn, void *priv)
439e0489487SSagi Grimberg {
4400994c64eSJohn Garry unsigned int flags = tagset->flags;
4410994c64eSJohn Garry int i, nr_tags;
442e0489487SSagi Grimberg
4430994c64eSJohn Garry nr_tags = blk_mq_is_shared_tags(flags) ? 1 : tagset->nr_hw_queues;
4440994c64eSJohn Garry
4450994c64eSJohn Garry for (i = 0; i < nr_tags; i++) {
446e0489487SSagi Grimberg if (tagset->tags && tagset->tags[i])
447602380d2SMing Lei __blk_mq_all_tag_iter(tagset->tags[i], fn, priv,
448602380d2SMing Lei BT_TAG_ITER_STARTED);
449e0489487SSagi Grimberg }
450e0489487SSagi Grimberg }
451e0489487SSagi Grimberg EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
452e0489487SSagi Grimberg
blk_mq_tagset_count_completed_rqs(struct request * rq,void * data)4532dd6532eSJohn Garry static bool blk_mq_tagset_count_completed_rqs(struct request *rq, void *data)
454f9934a80SMing Lei {
455f9934a80SMing Lei unsigned *count = data;
456f9934a80SMing Lei
457f9934a80SMing Lei if (blk_mq_request_completed(rq))
458f9934a80SMing Lei (*count)++;
459f9934a80SMing Lei return true;
460f9934a80SMing Lei }
461f9934a80SMing Lei
462f9934a80SMing Lei /**
4639cf1adc6SBhaskar Chowdhury * blk_mq_tagset_wait_completed_request - Wait until all scheduled request
4649cf1adc6SBhaskar Chowdhury * completions have finished.
465f9934a80SMing Lei * @tagset: Tag set to drain completed request
466f9934a80SMing Lei *
467f9934a80SMing Lei * Note: This function has to be run after all IO queues are shutdown
468f9934a80SMing Lei */
blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set * tagset)469f9934a80SMing Lei void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset)
470f9934a80SMing Lei {
471f9934a80SMing Lei while (true) {
472f9934a80SMing Lei unsigned count = 0;
473f9934a80SMing Lei
474f9934a80SMing Lei blk_mq_tagset_busy_iter(tagset,
475f9934a80SMing Lei blk_mq_tagset_count_completed_rqs, &count);
476f9934a80SMing Lei if (!count)
477f9934a80SMing Lei break;
478f9934a80SMing Lei msleep(5);
479f9934a80SMing Lei }
480f9934a80SMing Lei }
481f9934a80SMing Lei EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);
482f9934a80SMing Lei
483c7b1bf5cSBart Van Assche /**
484c7b1bf5cSBart Van Assche * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
485c7b1bf5cSBart Van Assche * @q: Request queue to examine.
486c7b1bf5cSBart Van Assche * @fn: Pointer to the function that will be called for each request
487c7b1bf5cSBart Van Assche * on @q. @fn will be called as follows: @fn(hctx, rq, @priv,
488c7b1bf5cSBart Van Assche * reserved) where rq is a pointer to a request and hctx points
489c7b1bf5cSBart Van Assche * to the hardware queue associated with the request. 'reserved'
490c7b1bf5cSBart Van Assche * indicates whether or not @rq is a reserved request.
491c7b1bf5cSBart Van Assche * @priv: Will be passed as third argument to @fn.
492c7b1bf5cSBart Van Assche *
493c7b1bf5cSBart Van Assche * Note: if @q->tag_set is shared with other request queues then @fn will be
494c7b1bf5cSBart Van Assche * called for all requests on all queues that share that tag set and not only
495c7b1bf5cSBart Van Assche * for requests associated with @q.
496c7b1bf5cSBart Van Assche */
blk_mq_queue_tag_busy_iter(struct request_queue * q,busy_tag_iter_fn * fn,void * priv)497fc39f8d2SJohn Garry void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
49881481eb4SChristoph Hellwig void *priv)
499320ae51fSJens Axboe {
500f5bbbbe4SJianchao Wang /*
5014e5cc99eSMing Lei * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and hctx_table
502c7b1bf5cSBart Van Assche * while the queue is frozen. So we can use q_usage_counter to avoid
50376cffccdSyangerkun * racing with it.
504f5bbbbe4SJianchao Wang */
505530ca2c9SKeith Busch if (!percpu_ref_tryget(&q->q_usage_counter))
506f5bbbbe4SJianchao Wang return;
5070bf6cd5bSChristoph Hellwig
508fea9f92fSJohn Garry if (blk_mq_is_shared_tags(q->tag_set->flags)) {
509fea9f92fSJohn Garry struct blk_mq_tags *tags = q->tag_set->shared_tags;
510fea9f92fSJohn Garry struct sbitmap_queue *bresv = &tags->breserved_tags;
511fea9f92fSJohn Garry struct sbitmap_queue *btags = &tags->bitmap_tags;
512fea9f92fSJohn Garry
513fea9f92fSJohn Garry if (tags->nr_reserved_tags)
514fea9f92fSJohn Garry bt_for_each(NULL, q, bresv, fn, priv, true);
515fea9f92fSJohn Garry bt_for_each(NULL, q, btags, fn, priv, false);
516fea9f92fSJohn Garry } else {
517fea9f92fSJohn Garry struct blk_mq_hw_ctx *hctx;
5184f481208SMing Lei unsigned long i;
519fea9f92fSJohn Garry
5200bf6cd5bSChristoph Hellwig queue_for_each_hw_ctx(q, hctx, i) {
52181481eb4SChristoph Hellwig struct blk_mq_tags *tags = hctx->tags;
522fea9f92fSJohn Garry struct sbitmap_queue *bresv = &tags->breserved_tags;
523fea9f92fSJohn Garry struct sbitmap_queue *btags = &tags->bitmap_tags;
524320ae51fSJens Axboe
5250bf6cd5bSChristoph Hellwig /*
526c7b1bf5cSBart Van Assche * If no software queues are currently mapped to this
5270bf6cd5bSChristoph Hellwig * hardware queue, there's nothing to check
5280bf6cd5bSChristoph Hellwig */
5290bf6cd5bSChristoph Hellwig if (!blk_mq_hw_queue_mapped(hctx))
5300bf6cd5bSChristoph Hellwig continue;
5310bf6cd5bSChristoph Hellwig
532320ae51fSJens Axboe if (tags->nr_reserved_tags)
533fea9f92fSJohn Garry bt_for_each(hctx, q, bresv, fn, priv, true);
534fea9f92fSJohn Garry bt_for_each(hctx, q, btags, fn, priv, false);
535fea9f92fSJohn Garry }
536320ae51fSJens Axboe }
537530ca2c9SKeith Busch blk_queue_exit(q);
5380bf6cd5bSChristoph Hellwig }
539320ae51fSJens Axboe
bt_alloc(struct sbitmap_queue * bt,unsigned int depth,bool round_robin,int node)540f4a644dbSOmar Sandoval static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
541f4a644dbSOmar Sandoval bool round_robin, int node)
542e3a2b3f9SJens Axboe {
543f4a644dbSOmar Sandoval return sbitmap_queue_init_node(bt, depth, -1, round_robin, GFP_KERNEL,
544f4a644dbSOmar Sandoval node);
5454bb659b1SJens Axboe }
5464bb659b1SJens Axboe
blk_mq_init_bitmaps(struct sbitmap_queue * bitmap_tags,struct sbitmap_queue * breserved_tags,unsigned int queue_depth,unsigned int reserved,int node,int alloc_policy)54756b68085SJohn Garry int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
54856b68085SJohn Garry struct sbitmap_queue *breserved_tags,
54956b68085SJohn Garry unsigned int queue_depth, unsigned int reserved,
55056b68085SJohn Garry int node, int alloc_policy)
55156b68085SJohn Garry {
55256b68085SJohn Garry unsigned int depth = queue_depth - reserved;
55356b68085SJohn Garry bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
55456b68085SJohn Garry
55556b68085SJohn Garry if (bt_alloc(bitmap_tags, depth, round_robin, node))
55656b68085SJohn Garry return -ENOMEM;
55756b68085SJohn Garry if (bt_alloc(breserved_tags, reserved, round_robin, node))
55856b68085SJohn Garry goto free_bitmap_tags;
55956b68085SJohn Garry
56056b68085SJohn Garry return 0;
56156b68085SJohn Garry
56256b68085SJohn Garry free_bitmap_tags:
56356b68085SJohn Garry sbitmap_queue_free(bitmap_tags);
56456b68085SJohn Garry return -ENOMEM;
56556b68085SJohn Garry }
56656b68085SJohn Garry
blk_mq_init_tags(unsigned int total_tags,unsigned int reserved_tags,int node,int alloc_policy)567320ae51fSJens Axboe struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
56824391c0dSShaohua Li unsigned int reserved_tags,
569e155b0c2SJohn Garry int node, int alloc_policy)
570320ae51fSJens Axboe {
571320ae51fSJens Axboe struct blk_mq_tags *tags;
572320ae51fSJens Axboe
573320ae51fSJens Axboe if (total_tags > BLK_MQ_TAG_MAX) {
574320ae51fSJens Axboe pr_err("blk-mq: tag depth too large\n");
575320ae51fSJens Axboe return NULL;
576320ae51fSJens Axboe }
577320ae51fSJens Axboe
578320ae51fSJens Axboe tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
579320ae51fSJens Axboe if (!tags)
580320ae51fSJens Axboe return NULL;
581320ae51fSJens Axboe
582320ae51fSJens Axboe tags->nr_tags = total_tags;
583320ae51fSJens Axboe tags->nr_reserved_tags = reserved_tags;
584bd63141dSMing Lei spin_lock_init(&tags->lock);
585320ae51fSJens Axboe
586ae0f1a73SJohn Garry if (blk_mq_init_bitmaps(&tags->bitmap_tags, &tags->breserved_tags,
587ae0f1a73SJohn Garry total_tags, reserved_tags, node,
588ae0f1a73SJohn Garry alloc_policy) < 0) {
5894d063237SHannes Reinecke kfree(tags);
5904d063237SHannes Reinecke return NULL;
5914d063237SHannes Reinecke }
5924d063237SHannes Reinecke return tags;
593320ae51fSJens Axboe }
594320ae51fSJens Axboe
blk_mq_free_tags(struct blk_mq_tags * tags)595e155b0c2SJohn Garry void blk_mq_free_tags(struct blk_mq_tags *tags)
596320ae51fSJens Axboe {
597ae0f1a73SJohn Garry sbitmap_queue_free(&tags->bitmap_tags);
598ae0f1a73SJohn Garry sbitmap_queue_free(&tags->breserved_tags);
599320ae51fSJens Axboe kfree(tags);
600320ae51fSJens Axboe }
601320ae51fSJens Axboe
blk_mq_tag_update_depth(struct blk_mq_hw_ctx * hctx,struct blk_mq_tags ** tagsptr,unsigned int tdepth,bool can_grow)60270f36b60SJens Axboe int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
60370f36b60SJens Axboe struct blk_mq_tags **tagsptr, unsigned int tdepth,
60470f36b60SJens Axboe bool can_grow)
605e3a2b3f9SJens Axboe {
60670f36b60SJens Axboe struct blk_mq_tags *tags = *tagsptr;
60770f36b60SJens Axboe
60870f36b60SJens Axboe if (tdepth <= tags->nr_reserved_tags)
60970f36b60SJens Axboe return -EINVAL;
61070f36b60SJens Axboe
61170f36b60SJens Axboe /*
61270f36b60SJens Axboe * If we are allowed to grow beyond the original size, allocate
61370f36b60SJens Axboe * a new set of tags before freeing the old one.
61470f36b60SJens Axboe */
61570f36b60SJens Axboe if (tdepth > tags->nr_tags) {
61670f36b60SJens Axboe struct blk_mq_tag_set *set = hctx->queue->tag_set;
61770f36b60SJens Axboe struct blk_mq_tags *new;
61870f36b60SJens Axboe
61970f36b60SJens Axboe if (!can_grow)
620e3a2b3f9SJens Axboe return -EINVAL;
621e3a2b3f9SJens Axboe
622e3a2b3f9SJens Axboe /*
62370f36b60SJens Axboe * We need some sort of upper limit, set it high enough that
62470f36b60SJens Axboe * no valid use cases should require more.
62570f36b60SJens Axboe */
626d97e594cSJohn Garry if (tdepth > MAX_SCHED_RQ)
62770f36b60SJens Axboe return -EINVAL;
62870f36b60SJens Axboe
629e155b0c2SJohn Garry /*
630e155b0c2SJohn Garry * Only the sbitmap needs resizing since we allocated the max
631e155b0c2SJohn Garry * initially.
632e155b0c2SJohn Garry */
633079a2e3eSJohn Garry if (blk_mq_is_shared_tags(set->flags))
634e155b0c2SJohn Garry return 0;
635e155b0c2SJohn Garry
63663064be1SJohn Garry new = blk_mq_alloc_map_and_rqs(set, hctx->queue_num, tdepth);
63770f36b60SJens Axboe if (!new)
63870f36b60SJens Axboe return -ENOMEM;
63970f36b60SJens Axboe
640645db34eSJohn Garry blk_mq_free_map_and_rqs(set, *tagsptr, hctx->queue_num);
64170f36b60SJens Axboe *tagsptr = new;
64270f36b60SJens Axboe } else {
64370f36b60SJens Axboe /*
64470f36b60SJens Axboe * Don't need (or can't) update reserved tags here, they
64570f36b60SJens Axboe * remain static and should never need resizing.
646e3a2b3f9SJens Axboe */
647ae0f1a73SJohn Garry sbitmap_queue_resize(&tags->bitmap_tags,
64875d6e175SMing Lei tdepth - tags->nr_reserved_tags);
64970f36b60SJens Axboe }
65088459642SOmar Sandoval
651e3a2b3f9SJens Axboe return 0;
652e3a2b3f9SJens Axboe }
653e3a2b3f9SJens Axboe
blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set * set,unsigned int size)654079a2e3eSJohn Garry void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, unsigned int size)
65532bc15afSJohn Garry {
656079a2e3eSJohn Garry struct blk_mq_tags *tags = set->shared_tags;
657e155b0c2SJohn Garry
658ae0f1a73SJohn Garry sbitmap_queue_resize(&tags->bitmap_tags, size - set->reserved_tags);
65932bc15afSJohn Garry }
66032bc15afSJohn Garry
blk_mq_tag_update_sched_shared_tags(struct request_queue * q)661079a2e3eSJohn Garry void blk_mq_tag_update_sched_shared_tags(struct request_queue *q)
662a7e7388dSJohn Garry {
663079a2e3eSJohn Garry sbitmap_queue_resize(&q->sched_shared_tags->bitmap_tags,
664a7e7388dSJohn Garry q->nr_requests - q->tag_set->reserved_tags);
665a7e7388dSJohn Garry }
666a7e7388dSJohn Garry
667205fb5f5SBart Van Assche /**
668205fb5f5SBart Van Assche * blk_mq_unique_tag() - return a tag that is unique queue-wide
669205fb5f5SBart Van Assche * @rq: request for which to compute a unique tag
670205fb5f5SBart Van Assche *
671205fb5f5SBart Van Assche * The tag field in struct request is unique per hardware queue but not over
672205fb5f5SBart Van Assche * all hardware queues. Hence this function that returns a tag with the
673205fb5f5SBart Van Assche * hardware context index in the upper bits and the per hardware queue tag in
674205fb5f5SBart Van Assche * the lower bits.
675205fb5f5SBart Van Assche *
676205fb5f5SBart Van Assche * Note: When called for a request that is queued on a non-multiqueue request
677205fb5f5SBart Van Assche * queue, the hardware context index is set to zero.
678205fb5f5SBart Van Assche */
blk_mq_unique_tag(struct request * rq)679205fb5f5SBart Van Assche u32 blk_mq_unique_tag(struct request *rq)
680205fb5f5SBart Van Assche {
681ea4f995eSJens Axboe return (rq->mq_hctx->queue_num << BLK_MQ_UNIQUE_TAG_BITS) |
682205fb5f5SBart Van Assche (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
683205fb5f5SBart Van Assche }
684205fb5f5SBart Van Assche EXPORT_SYMBOL(blk_mq_unique_tag);
685