1 #include "blk-rq-qos.h" 2 3 /* 4 * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, 5 * false if 'v' + 1 would be bigger than 'below'. 6 */ 7 static bool atomic_inc_below(atomic_t *v, unsigned int below) 8 { 9 unsigned int cur = atomic_read(v); 10 11 for (;;) { 12 unsigned int old; 13 14 if (cur >= below) 15 return false; 16 old = atomic_cmpxchg(v, cur, cur + 1); 17 if (old == cur) 18 break; 19 cur = old; 20 } 21 22 return true; 23 } 24 25 bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit) 26 { 27 return atomic_inc_below(&rq_wait->inflight, limit); 28 } 29 30 void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio) 31 { 32 do { 33 if (rqos->ops->cleanup) 34 rqos->ops->cleanup(rqos, bio); 35 rqos = rqos->next; 36 } while (rqos); 37 } 38 39 void __rq_qos_done(struct rq_qos *rqos, struct request *rq) 40 { 41 do { 42 if (rqos->ops->done) 43 rqos->ops->done(rqos, rq); 44 rqos = rqos->next; 45 } while (rqos); 46 } 47 48 void __rq_qos_issue(struct rq_qos *rqos, struct request *rq) 49 { 50 do { 51 if (rqos->ops->issue) 52 rqos->ops->issue(rqos, rq); 53 rqos = rqos->next; 54 } while (rqos); 55 } 56 57 void __rq_qos_requeue(struct rq_qos *rqos, struct request *rq) 58 { 59 do { 60 if (rqos->ops->requeue) 61 rqos->ops->requeue(rqos, rq); 62 rqos = rqos->next; 63 } while (rqos); 64 } 65 66 void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio) 67 { 68 do { 69 if (rqos->ops->throttle) 70 rqos->ops->throttle(rqos, bio); 71 rqos = rqos->next; 72 } while (rqos); 73 } 74 75 void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) 76 { 77 do { 78 if (rqos->ops->track) 79 rqos->ops->track(rqos, rq, bio); 80 rqos = rqos->next; 81 } while (rqos); 82 } 83 84 void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio) 85 { 86 do { 87 if (rqos->ops->done_bio) 88 rqos->ops->done_bio(rqos, bio); 89 rqos = rqos->next; 90 } while (rqos); 91 } 92 93 /* 94 * Return true, if we can't increase the depth further by scaling 95 */ 96 bool rq_depth_calc_max_depth(struct rq_depth *rqd) 97 { 98 unsigned int depth; 99 bool ret = false; 100 101 /* 102 * For QD=1 devices, this is a special case. It's important for those 103 * to have one request ready when one completes, so force a depth of 104 * 2 for those devices. On the backend, it'll be a depth of 1 anyway, 105 * since the device can't have more than that in flight. If we're 106 * scaling down, then keep a setting of 1/1/1. 107 */ 108 if (rqd->queue_depth == 1) { 109 if (rqd->scale_step > 0) 110 rqd->max_depth = 1; 111 else { 112 rqd->max_depth = 2; 113 ret = true; 114 } 115 } else { 116 /* 117 * scale_step == 0 is our default state. If we have suffered 118 * latency spikes, step will be > 0, and we shrink the 119 * allowed write depths. If step is < 0, we're only doing 120 * writes, and we allow a temporarily higher depth to 121 * increase performance. 122 */ 123 depth = min_t(unsigned int, rqd->default_depth, 124 rqd->queue_depth); 125 if (rqd->scale_step > 0) 126 depth = 1 + ((depth - 1) >> min(31, rqd->scale_step)); 127 else if (rqd->scale_step < 0) { 128 unsigned int maxd = 3 * rqd->queue_depth / 4; 129 130 depth = 1 + ((depth - 1) << -rqd->scale_step); 131 if (depth > maxd) { 132 depth = maxd; 133 ret = true; 134 } 135 } 136 137 rqd->max_depth = depth; 138 } 139 140 return ret; 141 } 142 143 void rq_depth_scale_up(struct rq_depth *rqd) 144 { 145 /* 146 * Hit max in previous round, stop here 147 */ 148 if (rqd->scaled_max) 149 return; 150 151 rqd->scale_step--; 152 153 rqd->scaled_max = rq_depth_calc_max_depth(rqd); 154 } 155 156 /* 157 * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we 158 * had a latency violation. 159 */ 160 void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) 161 { 162 /* 163 * Stop scaling down when we've hit the limit. This also prevents 164 * ->scale_step from going to crazy values, if the device can't 165 * keep up. 166 */ 167 if (rqd->max_depth == 1) 168 return; 169 170 if (rqd->scale_step < 0 && hard_throttle) 171 rqd->scale_step = 0; 172 else 173 rqd->scale_step++; 174 175 rqd->scaled_max = false; 176 rq_depth_calc_max_depth(rqd); 177 } 178 179 struct rq_qos_wait_data { 180 struct wait_queue_entry wq; 181 struct task_struct *task; 182 struct rq_wait *rqw; 183 acquire_inflight_cb_t *cb; 184 void *private_data; 185 bool got_token; 186 }; 187 188 static int rq_qos_wake_function(struct wait_queue_entry *curr, 189 unsigned int mode, int wake_flags, void *key) 190 { 191 struct rq_qos_wait_data *data = container_of(curr, 192 struct rq_qos_wait_data, 193 wq); 194 195 /* 196 * If we fail to get a budget, return -1 to interrupt the wake up loop 197 * in __wake_up_common. 198 */ 199 if (!data->cb(data->rqw, data->private_data)) 200 return -1; 201 202 data->got_token = true; 203 list_del_init(&curr->entry); 204 wake_up_process(data->task); 205 return 1; 206 } 207 208 /** 209 * rq_qos_wait - throttle on a rqw if we need to 210 * @private_data - caller provided specific data 211 * @acquire_inflight_cb - inc the rqw->inflight counter if we can 212 * @cleanup_cb - the callback to cleanup in case we race with a waker 213 * 214 * This provides a uniform place for the rq_qos users to do their throttling. 215 * Since you can end up with a lot of things sleeping at once, this manages the 216 * waking up based on the resources available. The acquire_inflight_cb should 217 * inc the rqw->inflight if we have the ability to do so, or return false if not 218 * and then we will sleep until the room becomes available. 219 * 220 * cleanup_cb is in case that we race with a waker and need to cleanup the 221 * inflight count accordingly. 222 */ 223 void rq_qos_wait(struct rq_wait *rqw, void *private_data, 224 acquire_inflight_cb_t *acquire_inflight_cb, 225 cleanup_cb_t *cleanup_cb) 226 { 227 struct rq_qos_wait_data data = { 228 .wq = { 229 .func = rq_qos_wake_function, 230 .entry = LIST_HEAD_INIT(data.wq.entry), 231 }, 232 .task = current, 233 .rqw = rqw, 234 .cb = acquire_inflight_cb, 235 .private_data = private_data, 236 }; 237 bool has_sleeper; 238 239 has_sleeper = wq_has_sleeper(&rqw->wait); 240 if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) 241 return; 242 243 prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); 244 do { 245 if (data.got_token) 246 break; 247 if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { 248 finish_wait(&rqw->wait, &data.wq); 249 250 /* 251 * We raced with wbt_wake_function() getting a token, 252 * which means we now have two. Put our local token 253 * and wake anyone else potentially waiting for one. 254 */ 255 if (data.got_token) 256 cleanup_cb(rqw, private_data); 257 break; 258 } 259 io_schedule(); 260 has_sleeper = false; 261 } while (1); 262 finish_wait(&rqw->wait, &data.wq); 263 } 264 265 void rq_qos_exit(struct request_queue *q) 266 { 267 blk_mq_debugfs_unregister_queue_rqos(q); 268 269 while (q->rq_qos) { 270 struct rq_qos *rqos = q->rq_qos; 271 q->rq_qos = rqos->next; 272 rqos->ops->exit(rqos); 273 } 274 } 275