13dcf60bcSChristoph Hellwig // SPDX-License-Identifier: GPL-2.0 23dcf60bcSChristoph Hellwig 3a7905043SJosef Bacik #include "blk-rq-qos.h" 4a7905043SJosef Bacik 5a7905043SJosef Bacik /* 6a7905043SJosef Bacik * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, 7a7905043SJosef Bacik * false if 'v' + 1 would be bigger than 'below'. 8a7905043SJosef Bacik */ 922f17952SJosef Bacik static bool atomic_inc_below(atomic_t *v, unsigned int below) 10a7905043SJosef Bacik { 1122f17952SJosef Bacik unsigned int cur = atomic_read(v); 12a7905043SJosef Bacik 13a7905043SJosef Bacik for (;;) { 1422f17952SJosef Bacik unsigned int old; 15a7905043SJosef Bacik 16a7905043SJosef Bacik if (cur >= below) 17a7905043SJosef Bacik return false; 18a7905043SJosef Bacik old = atomic_cmpxchg(v, cur, cur + 1); 19a7905043SJosef Bacik if (old == cur) 20a7905043SJosef Bacik break; 21a7905043SJosef Bacik cur = old; 22a7905043SJosef Bacik } 23a7905043SJosef Bacik 24a7905043SJosef Bacik return true; 25a7905043SJosef Bacik } 26a7905043SJosef Bacik 2722f17952SJosef Bacik bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit) 28a7905043SJosef Bacik { 29a7905043SJosef Bacik return atomic_inc_below(&rq_wait->inflight, limit); 30a7905043SJosef Bacik } 31a7905043SJosef Bacik 32e5045454SJens Axboe void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio) 33a7905043SJosef Bacik { 34e5045454SJens Axboe do { 35a7905043SJosef Bacik if (rqos->ops->cleanup) 36c1c80384SJosef Bacik rqos->ops->cleanup(rqos, bio); 37e5045454SJens Axboe rqos = rqos->next; 38e5045454SJens Axboe } while (rqos); 39a7905043SJosef Bacik } 40a7905043SJosef Bacik 41e5045454SJens Axboe void __rq_qos_done(struct rq_qos *rqos, struct request *rq) 42a7905043SJosef Bacik { 43e5045454SJens Axboe do { 44a7905043SJosef Bacik if (rqos->ops->done) 45a7905043SJosef Bacik rqos->ops->done(rqos, rq); 46e5045454SJens Axboe rqos = rqos->next; 47e5045454SJens Axboe } while (rqos); 48a7905043SJosef Bacik } 49a7905043SJosef Bacik 50e5045454SJens Axboe void __rq_qos_issue(struct rq_qos *rqos, struct request *rq) 51a7905043SJosef Bacik { 52e5045454SJens Axboe do { 53a7905043SJosef Bacik if (rqos->ops->issue) 54a7905043SJosef Bacik rqos->ops->issue(rqos, rq); 55e5045454SJens Axboe rqos = rqos->next; 56e5045454SJens Axboe } while (rqos); 57a7905043SJosef Bacik } 58a7905043SJosef Bacik 59e5045454SJens Axboe void __rq_qos_requeue(struct rq_qos *rqos, struct request *rq) 60a7905043SJosef Bacik { 61e5045454SJens Axboe do { 62a7905043SJosef Bacik if (rqos->ops->requeue) 63a7905043SJosef Bacik rqos->ops->requeue(rqos, rq); 64e5045454SJens Axboe rqos = rqos->next; 65e5045454SJens Axboe } while (rqos); 66a7905043SJosef Bacik } 67a7905043SJosef Bacik 68e5045454SJens Axboe void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio) 69a7905043SJosef Bacik { 70e5045454SJens Axboe do { 71a7905043SJosef Bacik if (rqos->ops->throttle) 72d5337560SChristoph Hellwig rqos->ops->throttle(rqos, bio); 73e5045454SJens Axboe rqos = rqos->next; 74e5045454SJens Axboe } while (rqos); 75c1c80384SJosef Bacik } 76c1c80384SJosef Bacik 77e5045454SJens Axboe void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) 78c1c80384SJosef Bacik { 79e5045454SJens Axboe do { 80c1c80384SJosef Bacik if (rqos->ops->track) 81c1c80384SJosef Bacik rqos->ops->track(rqos, rq, bio); 82e5045454SJens Axboe rqos = rqos->next; 83e5045454SJens Axboe } while (rqos); 84a7905043SJosef Bacik } 85a7905043SJosef Bacik 86d3e65fffSTejun Heo void __rq_qos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) 87d3e65fffSTejun Heo { 88d3e65fffSTejun Heo do { 89d3e65fffSTejun Heo if (rqos->ops->merge) 90d3e65fffSTejun Heo rqos->ops->merge(rqos, rq, bio); 91d3e65fffSTejun Heo rqos = rqos->next; 92d3e65fffSTejun Heo } while (rqos); 93d3e65fffSTejun Heo } 94d3e65fffSTejun Heo 95e5045454SJens Axboe void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio) 9667b42d0bSJosef Bacik { 97e5045454SJens Axboe do { 9867b42d0bSJosef Bacik if (rqos->ops->done_bio) 9967b42d0bSJosef Bacik rqos->ops->done_bio(rqos, bio); 100e5045454SJens Axboe rqos = rqos->next; 101e5045454SJens Axboe } while (rqos); 10267b42d0bSJosef Bacik } 10367b42d0bSJosef Bacik 1049677a3e0STejun Heo void __rq_qos_queue_depth_changed(struct rq_qos *rqos) 1059677a3e0STejun Heo { 1069677a3e0STejun Heo do { 1079677a3e0STejun Heo if (rqos->ops->queue_depth_changed) 1089677a3e0STejun Heo rqos->ops->queue_depth_changed(rqos); 1099677a3e0STejun Heo rqos = rqos->next; 1109677a3e0STejun Heo } while (rqos); 1119677a3e0STejun Heo } 1129677a3e0STejun Heo 113a7905043SJosef Bacik /* 114a7905043SJosef Bacik * Return true, if we can't increase the depth further by scaling 115a7905043SJosef Bacik */ 116a7905043SJosef Bacik bool rq_depth_calc_max_depth(struct rq_depth *rqd) 117a7905043SJosef Bacik { 118a7905043SJosef Bacik unsigned int depth; 119a7905043SJosef Bacik bool ret = false; 120a7905043SJosef Bacik 121a7905043SJosef Bacik /* 122a7905043SJosef Bacik * For QD=1 devices, this is a special case. It's important for those 123a7905043SJosef Bacik * to have one request ready when one completes, so force a depth of 124a7905043SJosef Bacik * 2 for those devices. On the backend, it'll be a depth of 1 anyway, 125a7905043SJosef Bacik * since the device can't have more than that in flight. If we're 126a7905043SJosef Bacik * scaling down, then keep a setting of 1/1/1. 127a7905043SJosef Bacik */ 128a7905043SJosef Bacik if (rqd->queue_depth == 1) { 129a7905043SJosef Bacik if (rqd->scale_step > 0) 130a7905043SJosef Bacik rqd->max_depth = 1; 131a7905043SJosef Bacik else { 132a7905043SJosef Bacik rqd->max_depth = 2; 133a7905043SJosef Bacik ret = true; 134a7905043SJosef Bacik } 135a7905043SJosef Bacik } else { 136a7905043SJosef Bacik /* 137a7905043SJosef Bacik * scale_step == 0 is our default state. If we have suffered 138a7905043SJosef Bacik * latency spikes, step will be > 0, and we shrink the 139a7905043SJosef Bacik * allowed write depths. If step is < 0, we're only doing 140a7905043SJosef Bacik * writes, and we allow a temporarily higher depth to 141a7905043SJosef Bacik * increase performance. 142a7905043SJosef Bacik */ 143a7905043SJosef Bacik depth = min_t(unsigned int, rqd->default_depth, 144a7905043SJosef Bacik rqd->queue_depth); 145a7905043SJosef Bacik if (rqd->scale_step > 0) 146a7905043SJosef Bacik depth = 1 + ((depth - 1) >> min(31, rqd->scale_step)); 147a7905043SJosef Bacik else if (rqd->scale_step < 0) { 148a7905043SJosef Bacik unsigned int maxd = 3 * rqd->queue_depth / 4; 149a7905043SJosef Bacik 150a7905043SJosef Bacik depth = 1 + ((depth - 1) << -rqd->scale_step); 151a7905043SJosef Bacik if (depth > maxd) { 152a7905043SJosef Bacik depth = maxd; 153a7905043SJosef Bacik ret = true; 154a7905043SJosef Bacik } 155a7905043SJosef Bacik } 156a7905043SJosef Bacik 157a7905043SJosef Bacik rqd->max_depth = depth; 158a7905043SJosef Bacik } 159a7905043SJosef Bacik 160a7905043SJosef Bacik return ret; 161a7905043SJosef Bacik } 162a7905043SJosef Bacik 163b84477d3SHarshad Shirwadkar /* Returns true on success and false if scaling up wasn't possible */ 164b84477d3SHarshad Shirwadkar bool rq_depth_scale_up(struct rq_depth *rqd) 165a7905043SJosef Bacik { 166a7905043SJosef Bacik /* 167a7905043SJosef Bacik * Hit max in previous round, stop here 168a7905043SJosef Bacik */ 169a7905043SJosef Bacik if (rqd->scaled_max) 170b84477d3SHarshad Shirwadkar return false; 171a7905043SJosef Bacik 172a7905043SJosef Bacik rqd->scale_step--; 173a7905043SJosef Bacik 174a7905043SJosef Bacik rqd->scaled_max = rq_depth_calc_max_depth(rqd); 175b84477d3SHarshad Shirwadkar return true; 176a7905043SJosef Bacik } 177a7905043SJosef Bacik 178a7905043SJosef Bacik /* 179a7905043SJosef Bacik * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we 180b84477d3SHarshad Shirwadkar * had a latency violation. Returns true on success and returns false if 181b84477d3SHarshad Shirwadkar * scaling down wasn't possible. 182a7905043SJosef Bacik */ 183b84477d3SHarshad Shirwadkar bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) 184a7905043SJosef Bacik { 185a7905043SJosef Bacik /* 186a7905043SJosef Bacik * Stop scaling down when we've hit the limit. This also prevents 187a7905043SJosef Bacik * ->scale_step from going to crazy values, if the device can't 188a7905043SJosef Bacik * keep up. 189a7905043SJosef Bacik */ 190a7905043SJosef Bacik if (rqd->max_depth == 1) 191b84477d3SHarshad Shirwadkar return false; 192a7905043SJosef Bacik 193a7905043SJosef Bacik if (rqd->scale_step < 0 && hard_throttle) 194a7905043SJosef Bacik rqd->scale_step = 0; 195a7905043SJosef Bacik else 196a7905043SJosef Bacik rqd->scale_step++; 197a7905043SJosef Bacik 198a7905043SJosef Bacik rqd->scaled_max = false; 199a7905043SJosef Bacik rq_depth_calc_max_depth(rqd); 200b84477d3SHarshad Shirwadkar return true; 201a7905043SJosef Bacik } 202a7905043SJosef Bacik 20384f60324SJosef Bacik struct rq_qos_wait_data { 20484f60324SJosef Bacik struct wait_queue_entry wq; 20584f60324SJosef Bacik struct task_struct *task; 20684f60324SJosef Bacik struct rq_wait *rqw; 20784f60324SJosef Bacik acquire_inflight_cb_t *cb; 20884f60324SJosef Bacik void *private_data; 20984f60324SJosef Bacik bool got_token; 21084f60324SJosef Bacik }; 21184f60324SJosef Bacik 21284f60324SJosef Bacik static int rq_qos_wake_function(struct wait_queue_entry *curr, 21384f60324SJosef Bacik unsigned int mode, int wake_flags, void *key) 21484f60324SJosef Bacik { 21584f60324SJosef Bacik struct rq_qos_wait_data *data = container_of(curr, 21684f60324SJosef Bacik struct rq_qos_wait_data, 21784f60324SJosef Bacik wq); 21884f60324SJosef Bacik 21984f60324SJosef Bacik /* 22084f60324SJosef Bacik * If we fail to get a budget, return -1 to interrupt the wake up loop 22184f60324SJosef Bacik * in __wake_up_common. 22284f60324SJosef Bacik */ 22384f60324SJosef Bacik if (!data->cb(data->rqw, data->private_data)) 22484f60324SJosef Bacik return -1; 22584f60324SJosef Bacik 22684f60324SJosef Bacik data->got_token = true; 227ac38297fSJosef Bacik smp_wmb(); 22884f60324SJosef Bacik list_del_init(&curr->entry); 22984f60324SJosef Bacik wake_up_process(data->task); 23084f60324SJosef Bacik return 1; 23184f60324SJosef Bacik } 23284f60324SJosef Bacik 23384f60324SJosef Bacik /** 23484f60324SJosef Bacik * rq_qos_wait - throttle on a rqw if we need to 23583826a50SBart Van Assche * @rqw: rqw to throttle on 23683826a50SBart Van Assche * @private_data: caller provided specific data 23783826a50SBart Van Assche * @acquire_inflight_cb: inc the rqw->inflight counter if we can 23883826a50SBart Van Assche * @cleanup_cb: the callback to cleanup in case we race with a waker 23984f60324SJosef Bacik * 24084f60324SJosef Bacik * This provides a uniform place for the rq_qos users to do their throttling. 24184f60324SJosef Bacik * Since you can end up with a lot of things sleeping at once, this manages the 24284f60324SJosef Bacik * waking up based on the resources available. The acquire_inflight_cb should 24384f60324SJosef Bacik * inc the rqw->inflight if we have the ability to do so, or return false if not 24484f60324SJosef Bacik * and then we will sleep until the room becomes available. 24584f60324SJosef Bacik * 24684f60324SJosef Bacik * cleanup_cb is in case that we race with a waker and need to cleanup the 24784f60324SJosef Bacik * inflight count accordingly. 24884f60324SJosef Bacik */ 24984f60324SJosef Bacik void rq_qos_wait(struct rq_wait *rqw, void *private_data, 25084f60324SJosef Bacik acquire_inflight_cb_t *acquire_inflight_cb, 25184f60324SJosef Bacik cleanup_cb_t *cleanup_cb) 25284f60324SJosef Bacik { 25384f60324SJosef Bacik struct rq_qos_wait_data data = { 25484f60324SJosef Bacik .wq = { 25584f60324SJosef Bacik .func = rq_qos_wake_function, 25684f60324SJosef Bacik .entry = LIST_HEAD_INIT(data.wq.entry), 25784f60324SJosef Bacik }, 25884f60324SJosef Bacik .task = current, 25984f60324SJosef Bacik .rqw = rqw, 26084f60324SJosef Bacik .cb = acquire_inflight_cb, 26184f60324SJosef Bacik .private_data = private_data, 26284f60324SJosef Bacik }; 26384f60324SJosef Bacik bool has_sleeper; 26484f60324SJosef Bacik 26584f60324SJosef Bacik has_sleeper = wq_has_sleeper(&rqw->wait); 26684f60324SJosef Bacik if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) 26784f60324SJosef Bacik return; 26884f60324SJosef Bacik 269*11c7aa0dSJan Kara has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq, 270*11c7aa0dSJan Kara TASK_UNINTERRUPTIBLE); 27184f60324SJosef Bacik do { 272ac38297fSJosef Bacik /* The memory barrier in set_task_state saves us here. */ 27384f60324SJosef Bacik if (data.got_token) 27484f60324SJosef Bacik break; 27584f60324SJosef Bacik if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { 27684f60324SJosef Bacik finish_wait(&rqw->wait, &data.wq); 27784f60324SJosef Bacik 27884f60324SJosef Bacik /* 27984f60324SJosef Bacik * We raced with wbt_wake_function() getting a token, 28084f60324SJosef Bacik * which means we now have two. Put our local token 28184f60324SJosef Bacik * and wake anyone else potentially waiting for one. 28284f60324SJosef Bacik */ 283ac38297fSJosef Bacik smp_rmb(); 28484f60324SJosef Bacik if (data.got_token) 28584f60324SJosef Bacik cleanup_cb(rqw, private_data); 28684f60324SJosef Bacik break; 28784f60324SJosef Bacik } 28884f60324SJosef Bacik io_schedule(); 28964e7ea87SJosef Bacik has_sleeper = true; 290d14a9b38SJosef Bacik set_current_state(TASK_UNINTERRUPTIBLE); 29184f60324SJosef Bacik } while (1); 29284f60324SJosef Bacik finish_wait(&rqw->wait, &data.wq); 29384f60324SJosef Bacik } 29484f60324SJosef Bacik 295a7905043SJosef Bacik void rq_qos_exit(struct request_queue *q) 296a7905043SJosef Bacik { 297cc56694fSMing Lei blk_mq_debugfs_unregister_queue_rqos(q); 298cc56694fSMing Lei 299a7905043SJosef Bacik while (q->rq_qos) { 300a7905043SJosef Bacik struct rq_qos *rqos = q->rq_qos; 301a7905043SJosef Bacik q->rq_qos = rqos->next; 302a7905043SJosef Bacik rqos->ops->exit(rqos); 303a7905043SJosef Bacik } 304a7905043SJosef Bacik } 305