1a7905043SJosef Bacik #include "blk-rq-qos.h" 2a7905043SJosef Bacik 3a7905043SJosef Bacik /* 4a7905043SJosef Bacik * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, 5a7905043SJosef Bacik * false if 'v' + 1 would be bigger than 'below'. 6a7905043SJosef Bacik */ 722f17952SJosef Bacik static bool atomic_inc_below(atomic_t *v, unsigned int below) 8a7905043SJosef Bacik { 922f17952SJosef Bacik unsigned int cur = atomic_read(v); 10a7905043SJosef Bacik 11a7905043SJosef Bacik for (;;) { 1222f17952SJosef Bacik unsigned int old; 13a7905043SJosef Bacik 14a7905043SJosef Bacik if (cur >= below) 15a7905043SJosef Bacik return false; 16a7905043SJosef Bacik old = atomic_cmpxchg(v, cur, cur + 1); 17a7905043SJosef Bacik if (old == cur) 18a7905043SJosef Bacik break; 19a7905043SJosef Bacik cur = old; 20a7905043SJosef Bacik } 21a7905043SJosef Bacik 22a7905043SJosef Bacik return true; 23a7905043SJosef Bacik } 24a7905043SJosef Bacik 2522f17952SJosef Bacik bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit) 26a7905043SJosef Bacik { 27a7905043SJosef Bacik return atomic_inc_below(&rq_wait->inflight, limit); 28a7905043SJosef Bacik } 29a7905043SJosef Bacik 30e5045454SJens Axboe void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio) 31a7905043SJosef Bacik { 32e5045454SJens Axboe do { 33a7905043SJosef Bacik if (rqos->ops->cleanup) 34c1c80384SJosef Bacik rqos->ops->cleanup(rqos, bio); 35e5045454SJens Axboe rqos = rqos->next; 36e5045454SJens Axboe } while (rqos); 37a7905043SJosef Bacik } 38a7905043SJosef Bacik 39e5045454SJens Axboe void __rq_qos_done(struct rq_qos *rqos, struct request *rq) 40a7905043SJosef Bacik { 41e5045454SJens Axboe do { 42a7905043SJosef Bacik if (rqos->ops->done) 43a7905043SJosef Bacik rqos->ops->done(rqos, rq); 44e5045454SJens Axboe rqos = rqos->next; 45e5045454SJens Axboe } while (rqos); 46a7905043SJosef Bacik } 47a7905043SJosef Bacik 48e5045454SJens Axboe void __rq_qos_issue(struct rq_qos *rqos, struct request *rq) 49a7905043SJosef Bacik { 50e5045454SJens Axboe do { 51a7905043SJosef Bacik if (rqos->ops->issue) 52a7905043SJosef Bacik rqos->ops->issue(rqos, rq); 53e5045454SJens Axboe rqos = rqos->next; 54e5045454SJens Axboe } while (rqos); 55a7905043SJosef Bacik } 56a7905043SJosef Bacik 57e5045454SJens Axboe void __rq_qos_requeue(struct rq_qos *rqos, struct request *rq) 58a7905043SJosef Bacik { 59e5045454SJens Axboe do { 60a7905043SJosef Bacik if (rqos->ops->requeue) 61a7905043SJosef Bacik rqos->ops->requeue(rqos, rq); 62e5045454SJens Axboe rqos = rqos->next; 63e5045454SJens Axboe } while (rqos); 64a7905043SJosef Bacik } 65a7905043SJosef Bacik 66e5045454SJens Axboe void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio) 67a7905043SJosef Bacik { 68e5045454SJens Axboe do { 69a7905043SJosef Bacik if (rqos->ops->throttle) 70d5337560SChristoph Hellwig rqos->ops->throttle(rqos, bio); 71e5045454SJens Axboe rqos = rqos->next; 72e5045454SJens Axboe } while (rqos); 73c1c80384SJosef Bacik } 74c1c80384SJosef Bacik 75e5045454SJens Axboe void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) 76c1c80384SJosef Bacik { 77e5045454SJens Axboe do { 78c1c80384SJosef Bacik if (rqos->ops->track) 79c1c80384SJosef Bacik rqos->ops->track(rqos, rq, bio); 80e5045454SJens Axboe rqos = rqos->next; 81e5045454SJens Axboe } while (rqos); 82a7905043SJosef Bacik } 83a7905043SJosef Bacik 84e5045454SJens Axboe void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio) 8567b42d0bSJosef Bacik { 86e5045454SJens Axboe do { 8767b42d0bSJosef Bacik if (rqos->ops->done_bio) 8867b42d0bSJosef Bacik rqos->ops->done_bio(rqos, bio); 89e5045454SJens Axboe rqos = rqos->next; 90e5045454SJens Axboe } while (rqos); 9167b42d0bSJosef Bacik } 9267b42d0bSJosef Bacik 93a7905043SJosef Bacik /* 94a7905043SJosef Bacik * Return true, if we can't increase the depth further by scaling 95a7905043SJosef Bacik */ 96a7905043SJosef Bacik bool rq_depth_calc_max_depth(struct rq_depth *rqd) 97a7905043SJosef Bacik { 98a7905043SJosef Bacik unsigned int depth; 99a7905043SJosef Bacik bool ret = false; 100a7905043SJosef Bacik 101a7905043SJosef Bacik /* 102a7905043SJosef Bacik * For QD=1 devices, this is a special case. It's important for those 103a7905043SJosef Bacik * to have one request ready when one completes, so force a depth of 104a7905043SJosef Bacik * 2 for those devices. On the backend, it'll be a depth of 1 anyway, 105a7905043SJosef Bacik * since the device can't have more than that in flight. If we're 106a7905043SJosef Bacik * scaling down, then keep a setting of 1/1/1. 107a7905043SJosef Bacik */ 108a7905043SJosef Bacik if (rqd->queue_depth == 1) { 109a7905043SJosef Bacik if (rqd->scale_step > 0) 110a7905043SJosef Bacik rqd->max_depth = 1; 111a7905043SJosef Bacik else { 112a7905043SJosef Bacik rqd->max_depth = 2; 113a7905043SJosef Bacik ret = true; 114a7905043SJosef Bacik } 115a7905043SJosef Bacik } else { 116a7905043SJosef Bacik /* 117a7905043SJosef Bacik * scale_step == 0 is our default state. If we have suffered 118a7905043SJosef Bacik * latency spikes, step will be > 0, and we shrink the 119a7905043SJosef Bacik * allowed write depths. If step is < 0, we're only doing 120a7905043SJosef Bacik * writes, and we allow a temporarily higher depth to 121a7905043SJosef Bacik * increase performance. 122a7905043SJosef Bacik */ 123a7905043SJosef Bacik depth = min_t(unsigned int, rqd->default_depth, 124a7905043SJosef Bacik rqd->queue_depth); 125a7905043SJosef Bacik if (rqd->scale_step > 0) 126a7905043SJosef Bacik depth = 1 + ((depth - 1) >> min(31, rqd->scale_step)); 127a7905043SJosef Bacik else if (rqd->scale_step < 0) { 128a7905043SJosef Bacik unsigned int maxd = 3 * rqd->queue_depth / 4; 129a7905043SJosef Bacik 130a7905043SJosef Bacik depth = 1 + ((depth - 1) << -rqd->scale_step); 131a7905043SJosef Bacik if (depth > maxd) { 132a7905043SJosef Bacik depth = maxd; 133a7905043SJosef Bacik ret = true; 134a7905043SJosef Bacik } 135a7905043SJosef Bacik } 136a7905043SJosef Bacik 137a7905043SJosef Bacik rqd->max_depth = depth; 138a7905043SJosef Bacik } 139a7905043SJosef Bacik 140a7905043SJosef Bacik return ret; 141a7905043SJosef Bacik } 142a7905043SJosef Bacik 143a7905043SJosef Bacik void rq_depth_scale_up(struct rq_depth *rqd) 144a7905043SJosef Bacik { 145a7905043SJosef Bacik /* 146a7905043SJosef Bacik * Hit max in previous round, stop here 147a7905043SJosef Bacik */ 148a7905043SJosef Bacik if (rqd->scaled_max) 149a7905043SJosef Bacik return; 150a7905043SJosef Bacik 151a7905043SJosef Bacik rqd->scale_step--; 152a7905043SJosef Bacik 153a7905043SJosef Bacik rqd->scaled_max = rq_depth_calc_max_depth(rqd); 154a7905043SJosef Bacik } 155a7905043SJosef Bacik 156a7905043SJosef Bacik /* 157a7905043SJosef Bacik * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we 158a7905043SJosef Bacik * had a latency violation. 159a7905043SJosef Bacik */ 160a7905043SJosef Bacik void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) 161a7905043SJosef Bacik { 162a7905043SJosef Bacik /* 163a7905043SJosef Bacik * Stop scaling down when we've hit the limit. This also prevents 164a7905043SJosef Bacik * ->scale_step from going to crazy values, if the device can't 165a7905043SJosef Bacik * keep up. 166a7905043SJosef Bacik */ 167a7905043SJosef Bacik if (rqd->max_depth == 1) 168a7905043SJosef Bacik return; 169a7905043SJosef Bacik 170a7905043SJosef Bacik if (rqd->scale_step < 0 && hard_throttle) 171a7905043SJosef Bacik rqd->scale_step = 0; 172a7905043SJosef Bacik else 173a7905043SJosef Bacik rqd->scale_step++; 174a7905043SJosef Bacik 175a7905043SJosef Bacik rqd->scaled_max = false; 176a7905043SJosef Bacik rq_depth_calc_max_depth(rqd); 177a7905043SJosef Bacik } 178a7905043SJosef Bacik 179*84f60324SJosef Bacik struct rq_qos_wait_data { 180*84f60324SJosef Bacik struct wait_queue_entry wq; 181*84f60324SJosef Bacik struct task_struct *task; 182*84f60324SJosef Bacik struct rq_wait *rqw; 183*84f60324SJosef Bacik acquire_inflight_cb_t *cb; 184*84f60324SJosef Bacik void *private_data; 185*84f60324SJosef Bacik bool got_token; 186*84f60324SJosef Bacik }; 187*84f60324SJosef Bacik 188*84f60324SJosef Bacik static int rq_qos_wake_function(struct wait_queue_entry *curr, 189*84f60324SJosef Bacik unsigned int mode, int wake_flags, void *key) 190*84f60324SJosef Bacik { 191*84f60324SJosef Bacik struct rq_qos_wait_data *data = container_of(curr, 192*84f60324SJosef Bacik struct rq_qos_wait_data, 193*84f60324SJosef Bacik wq); 194*84f60324SJosef Bacik 195*84f60324SJosef Bacik /* 196*84f60324SJosef Bacik * If we fail to get a budget, return -1 to interrupt the wake up loop 197*84f60324SJosef Bacik * in __wake_up_common. 198*84f60324SJosef Bacik */ 199*84f60324SJosef Bacik if (!data->cb(data->rqw, data->private_data)) 200*84f60324SJosef Bacik return -1; 201*84f60324SJosef Bacik 202*84f60324SJosef Bacik data->got_token = true; 203*84f60324SJosef Bacik list_del_init(&curr->entry); 204*84f60324SJosef Bacik wake_up_process(data->task); 205*84f60324SJosef Bacik return 1; 206*84f60324SJosef Bacik } 207*84f60324SJosef Bacik 208*84f60324SJosef Bacik /** 209*84f60324SJosef Bacik * rq_qos_wait - throttle on a rqw if we need to 210*84f60324SJosef Bacik * @private_data - caller provided specific data 211*84f60324SJosef Bacik * @acquire_inflight_cb - inc the rqw->inflight counter if we can 212*84f60324SJosef Bacik * @cleanup_cb - the callback to cleanup in case we race with a waker 213*84f60324SJosef Bacik * 214*84f60324SJosef Bacik * This provides a uniform place for the rq_qos users to do their throttling. 215*84f60324SJosef Bacik * Since you can end up with a lot of things sleeping at once, this manages the 216*84f60324SJosef Bacik * waking up based on the resources available. The acquire_inflight_cb should 217*84f60324SJosef Bacik * inc the rqw->inflight if we have the ability to do so, or return false if not 218*84f60324SJosef Bacik * and then we will sleep until the room becomes available. 219*84f60324SJosef Bacik * 220*84f60324SJosef Bacik * cleanup_cb is in case that we race with a waker and need to cleanup the 221*84f60324SJosef Bacik * inflight count accordingly. 222*84f60324SJosef Bacik */ 223*84f60324SJosef Bacik void rq_qos_wait(struct rq_wait *rqw, void *private_data, 224*84f60324SJosef Bacik acquire_inflight_cb_t *acquire_inflight_cb, 225*84f60324SJosef Bacik cleanup_cb_t *cleanup_cb) 226*84f60324SJosef Bacik { 227*84f60324SJosef Bacik struct rq_qos_wait_data data = { 228*84f60324SJosef Bacik .wq = { 229*84f60324SJosef Bacik .func = rq_qos_wake_function, 230*84f60324SJosef Bacik .entry = LIST_HEAD_INIT(data.wq.entry), 231*84f60324SJosef Bacik }, 232*84f60324SJosef Bacik .task = current, 233*84f60324SJosef Bacik .rqw = rqw, 234*84f60324SJosef Bacik .cb = acquire_inflight_cb, 235*84f60324SJosef Bacik .private_data = private_data, 236*84f60324SJosef Bacik }; 237*84f60324SJosef Bacik bool has_sleeper; 238*84f60324SJosef Bacik 239*84f60324SJosef Bacik has_sleeper = wq_has_sleeper(&rqw->wait); 240*84f60324SJosef Bacik if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) 241*84f60324SJosef Bacik return; 242*84f60324SJosef Bacik 243*84f60324SJosef Bacik prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); 244*84f60324SJosef Bacik do { 245*84f60324SJosef Bacik if (data.got_token) 246*84f60324SJosef Bacik break; 247*84f60324SJosef Bacik if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { 248*84f60324SJosef Bacik finish_wait(&rqw->wait, &data.wq); 249*84f60324SJosef Bacik 250*84f60324SJosef Bacik /* 251*84f60324SJosef Bacik * We raced with wbt_wake_function() getting a token, 252*84f60324SJosef Bacik * which means we now have two. Put our local token 253*84f60324SJosef Bacik * and wake anyone else potentially waiting for one. 254*84f60324SJosef Bacik */ 255*84f60324SJosef Bacik if (data.got_token) 256*84f60324SJosef Bacik cleanup_cb(rqw, private_data); 257*84f60324SJosef Bacik break; 258*84f60324SJosef Bacik } 259*84f60324SJosef Bacik io_schedule(); 260*84f60324SJosef Bacik has_sleeper = false; 261*84f60324SJosef Bacik } while (1); 262*84f60324SJosef Bacik finish_wait(&rqw->wait, &data.wq); 263*84f60324SJosef Bacik } 264*84f60324SJosef Bacik 265a7905043SJosef Bacik void rq_qos_exit(struct request_queue *q) 266a7905043SJosef Bacik { 267a7905043SJosef Bacik while (q->rq_qos) { 268a7905043SJosef Bacik struct rq_qos *rqos = q->rq_qos; 269a7905043SJosef Bacik q->rq_qos = rqos->next; 270a7905043SJosef Bacik rqos->ops->exit(rqos); 271a7905043SJosef Bacik } 272a7905043SJosef Bacik } 273