13dcf60bcSChristoph Hellwig // SPDX-License-Identifier: GPL-2.0 2e34cbd30SJens Axboe /* 3e34cbd30SJens Axboe * buffered writeback throttling. loosely based on CoDel. We can't drop 4e34cbd30SJens Axboe * packets for IO scheduling, so the logic is something like this: 5e34cbd30SJens Axboe * 6e34cbd30SJens Axboe * - Monitor latencies in a defined window of time. 7e34cbd30SJens Axboe * - If the minimum latency in the above window exceeds some target, increment 8e34cbd30SJens Axboe * scaling step and scale down queue depth by a factor of 2x. The monitoring 9e34cbd30SJens Axboe * window is then shrunk to 100 / sqrt(scaling step + 1). 10e34cbd30SJens Axboe * - For any window where we don't have solid data on what the latencies 11e34cbd30SJens Axboe * look like, retain status quo. 12e34cbd30SJens Axboe * - If latencies look good, decrement scaling step. 13e34cbd30SJens Axboe * - If we're only doing writes, allow the scaling step to go negative. This 14e34cbd30SJens Axboe * will temporarily boost write performance, snapping back to a stable 15e34cbd30SJens Axboe * scaling step of 0 if reads show up or the heavy writers finish. Unlike 16e34cbd30SJens Axboe * positive scaling steps where we shrink the monitoring window, a negative 17e34cbd30SJens Axboe * scaling step retains the default step==0 window size. 18e34cbd30SJens Axboe * 19e34cbd30SJens Axboe * Copyright (C) 2016 Jens Axboe 20e34cbd30SJens Axboe * 21e34cbd30SJens Axboe */ 22e34cbd30SJens Axboe #include <linux/kernel.h> 23e34cbd30SJens Axboe #include <linux/blk_types.h> 24e34cbd30SJens Axboe #include <linux/slab.h> 25e34cbd30SJens Axboe #include <linux/backing-dev.h> 26e34cbd30SJens Axboe #include <linux/swap.h> 27e34cbd30SJens Axboe 28e34cbd30SJens Axboe #include "blk-wbt.h" 29a7905043SJosef Bacik #include "blk-rq-qos.h" 30e34cbd30SJens Axboe 31e34cbd30SJens Axboe #define CREATE_TRACE_POINTS 32e34cbd30SJens Axboe #include <trace/events/wbt.h> 33e34cbd30SJens Axboe 34a8a45941SOmar Sandoval static inline void wbt_clear_state(struct request *rq) 35934031a1SOmar Sandoval { 36544ccc8dSOmar Sandoval rq->wbt_flags = 0; 37934031a1SOmar Sandoval } 38934031a1SOmar Sandoval 39a8a45941SOmar Sandoval static inline enum wbt_flags wbt_flags(struct request *rq) 40934031a1SOmar Sandoval { 41544ccc8dSOmar Sandoval return rq->wbt_flags; 42934031a1SOmar Sandoval } 43934031a1SOmar Sandoval 44a8a45941SOmar Sandoval static inline bool wbt_is_tracked(struct request *rq) 45934031a1SOmar Sandoval { 46544ccc8dSOmar Sandoval return rq->wbt_flags & WBT_TRACKED; 47934031a1SOmar Sandoval } 48934031a1SOmar Sandoval 49a8a45941SOmar Sandoval static inline bool wbt_is_read(struct request *rq) 50934031a1SOmar Sandoval { 51544ccc8dSOmar Sandoval return rq->wbt_flags & WBT_READ; 52934031a1SOmar Sandoval } 53934031a1SOmar Sandoval 54e34cbd30SJens Axboe enum { 55e34cbd30SJens Axboe /* 56e34cbd30SJens Axboe * Default setting, we'll scale up (to 75% of QD max) or down (min 1) 57e34cbd30SJens Axboe * from here depending on device stats 58e34cbd30SJens Axboe */ 59e34cbd30SJens Axboe RWB_DEF_DEPTH = 16, 60e34cbd30SJens Axboe 61e34cbd30SJens Axboe /* 62e34cbd30SJens Axboe * 100msec window 63e34cbd30SJens Axboe */ 64e34cbd30SJens Axboe RWB_WINDOW_NSEC = 100 * 1000 * 1000ULL, 65e34cbd30SJens Axboe 66e34cbd30SJens Axboe /* 67e34cbd30SJens Axboe * Disregard stats, if we don't meet this minimum 68e34cbd30SJens Axboe */ 69e34cbd30SJens Axboe RWB_MIN_WRITE_SAMPLES = 3, 70e34cbd30SJens Axboe 71e34cbd30SJens Axboe /* 72e34cbd30SJens Axboe * If we have this number of consecutive windows with not enough 73e34cbd30SJens Axboe * information to scale up or down, scale up. 74e34cbd30SJens Axboe */ 75e34cbd30SJens Axboe RWB_UNKNOWN_BUMP = 5, 76e34cbd30SJens Axboe }; 77e34cbd30SJens Axboe 78e34cbd30SJens Axboe static inline bool rwb_enabled(struct rq_wb *rwb) 79e34cbd30SJens Axboe { 80e34cbd30SJens Axboe return rwb && rwb->wb_normal != 0; 81e34cbd30SJens Axboe } 82e34cbd30SJens Axboe 83e34cbd30SJens Axboe static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) 84e34cbd30SJens Axboe { 85e34cbd30SJens Axboe if (rwb_enabled(rwb)) { 86e34cbd30SJens Axboe const unsigned long cur = jiffies; 87e34cbd30SJens Axboe 88e34cbd30SJens Axboe if (cur != *var) 89e34cbd30SJens Axboe *var = cur; 90e34cbd30SJens Axboe } 91e34cbd30SJens Axboe } 92e34cbd30SJens Axboe 93e34cbd30SJens Axboe /* 94e34cbd30SJens Axboe * If a task was rate throttled in balance_dirty_pages() within the last 95e34cbd30SJens Axboe * second or so, use that to indicate a higher cleaning rate. 96e34cbd30SJens Axboe */ 97e34cbd30SJens Axboe static bool wb_recent_wait(struct rq_wb *rwb) 98e34cbd30SJens Axboe { 99a7905043SJosef Bacik struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb; 100e34cbd30SJens Axboe 101e34cbd30SJens Axboe return time_before(jiffies, wb->dirty_sleep + HZ); 102e34cbd30SJens Axboe } 103e34cbd30SJens Axboe 1048bea6090SJens Axboe static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, 1058bea6090SJens Axboe enum wbt_flags wb_acct) 106e34cbd30SJens Axboe { 1078bea6090SJens Axboe if (wb_acct & WBT_KSWAPD) 1088bea6090SJens Axboe return &rwb->rq_wait[WBT_RWQ_KSWAPD]; 109782f5697SJens Axboe else if (wb_acct & WBT_DISCARD) 110782f5697SJens Axboe return &rwb->rq_wait[WBT_RWQ_DISCARD]; 1118bea6090SJens Axboe 1128bea6090SJens Axboe return &rwb->rq_wait[WBT_RWQ_BG]; 113e34cbd30SJens Axboe } 114e34cbd30SJens Axboe 115e34cbd30SJens Axboe static void rwb_wake_all(struct rq_wb *rwb) 116e34cbd30SJens Axboe { 117e34cbd30SJens Axboe int i; 118e34cbd30SJens Axboe 119e34cbd30SJens Axboe for (i = 0; i < WBT_NUM_RWQ; i++) { 120e34cbd30SJens Axboe struct rq_wait *rqw = &rwb->rq_wait[i]; 121e34cbd30SJens Axboe 122b7882093SJens Axboe if (wq_has_sleeper(&rqw->wait)) 123e34cbd30SJens Axboe wake_up_all(&rqw->wait); 124e34cbd30SJens Axboe } 125e34cbd30SJens Axboe } 126e34cbd30SJens Axboe 127061a5427SJens Axboe static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw, 128061a5427SJens Axboe enum wbt_flags wb_acct) 129e34cbd30SJens Axboe { 130e34cbd30SJens Axboe int inflight, limit; 131e34cbd30SJens Axboe 132e34cbd30SJens Axboe inflight = atomic_dec_return(&rqw->inflight); 133e34cbd30SJens Axboe 134e34cbd30SJens Axboe /* 135e34cbd30SJens Axboe * wbt got disabled with IO in flight. Wake up any potential 136e34cbd30SJens Axboe * waiters, we don't have to do more than that. 137e34cbd30SJens Axboe */ 138e34cbd30SJens Axboe if (unlikely(!rwb_enabled(rwb))) { 139e34cbd30SJens Axboe rwb_wake_all(rwb); 140e34cbd30SJens Axboe return; 141e34cbd30SJens Axboe } 142e34cbd30SJens Axboe 143e34cbd30SJens Axboe /* 144782f5697SJens Axboe * For discards, our limit is always the background. For writes, if 145782f5697SJens Axboe * the device does write back caching, drop further down before we 146782f5697SJens Axboe * wake people up. 147e34cbd30SJens Axboe */ 148782f5697SJens Axboe if (wb_acct & WBT_DISCARD) 149782f5697SJens Axboe limit = rwb->wb_background; 150782f5697SJens Axboe else if (rwb->wc && !wb_recent_wait(rwb)) 151e34cbd30SJens Axboe limit = 0; 152e34cbd30SJens Axboe else 153e34cbd30SJens Axboe limit = rwb->wb_normal; 154e34cbd30SJens Axboe 155e34cbd30SJens Axboe /* 156e34cbd30SJens Axboe * Don't wake anyone up if we are above the normal limit. 157e34cbd30SJens Axboe */ 158e34cbd30SJens Axboe if (inflight && inflight >= limit) 159e34cbd30SJens Axboe return; 160e34cbd30SJens Axboe 161b7882093SJens Axboe if (wq_has_sleeper(&rqw->wait)) { 162e34cbd30SJens Axboe int diff = limit - inflight; 163e34cbd30SJens Axboe 164e34cbd30SJens Axboe if (!inflight || diff >= rwb->wb_background / 2) 16538cfb5a4SJens Axboe wake_up_all(&rqw->wait); 166e34cbd30SJens Axboe } 167e34cbd30SJens Axboe } 168e34cbd30SJens Axboe 169061a5427SJens Axboe static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) 170061a5427SJens Axboe { 171061a5427SJens Axboe struct rq_wb *rwb = RQWB(rqos); 172061a5427SJens Axboe struct rq_wait *rqw; 173061a5427SJens Axboe 174061a5427SJens Axboe if (!(wb_acct & WBT_TRACKED)) 175061a5427SJens Axboe return; 176061a5427SJens Axboe 177061a5427SJens Axboe rqw = get_rq_wait(rwb, wb_acct); 178061a5427SJens Axboe wbt_rqw_done(rwb, rqw, wb_acct); 179061a5427SJens Axboe } 180061a5427SJens Axboe 181e34cbd30SJens Axboe /* 182e34cbd30SJens Axboe * Called on completion of a request. Note that it's also called when 183e34cbd30SJens Axboe * a request is merged, when the request gets freed. 184e34cbd30SJens Axboe */ 185a7905043SJosef Bacik static void wbt_done(struct rq_qos *rqos, struct request *rq) 186e34cbd30SJens Axboe { 187a7905043SJosef Bacik struct rq_wb *rwb = RQWB(rqos); 188e34cbd30SJens Axboe 189a8a45941SOmar Sandoval if (!wbt_is_tracked(rq)) { 190a8a45941SOmar Sandoval if (rwb->sync_cookie == rq) { 191e34cbd30SJens Axboe rwb->sync_issue = 0; 192e34cbd30SJens Axboe rwb->sync_cookie = NULL; 193e34cbd30SJens Axboe } 194e34cbd30SJens Axboe 195a8a45941SOmar Sandoval if (wbt_is_read(rq)) 196e34cbd30SJens Axboe wb_timestamp(rwb, &rwb->last_comp); 197e34cbd30SJens Axboe } else { 198a8a45941SOmar Sandoval WARN_ON_ONCE(rq == rwb->sync_cookie); 199a7905043SJosef Bacik __wbt_done(rqos, wbt_flags(rq)); 200e34cbd30SJens Axboe } 201a8a45941SOmar Sandoval wbt_clear_state(rq); 202e34cbd30SJens Axboe } 203e34cbd30SJens Axboe 2044121d385SArnd Bergmann static inline bool stat_sample_valid(struct blk_rq_stat *stat) 205e34cbd30SJens Axboe { 206e34cbd30SJens Axboe /* 207e34cbd30SJens Axboe * We need at least one read sample, and a minimum of 208e34cbd30SJens Axboe * RWB_MIN_WRITE_SAMPLES. We require some write samples to know 209e34cbd30SJens Axboe * that it's writes impacting us, and not just some sole read on 210e34cbd30SJens Axboe * a device that is in a lower power state. 211e34cbd30SJens Axboe */ 212fa2e39cbSOmar Sandoval return (stat[READ].nr_samples >= 1 && 213fa2e39cbSOmar Sandoval stat[WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES); 214e34cbd30SJens Axboe } 215e34cbd30SJens Axboe 216e34cbd30SJens Axboe static u64 rwb_sync_issue_lat(struct rq_wb *rwb) 217e34cbd30SJens Axboe { 2186aa7de05SMark Rutland u64 now, issue = READ_ONCE(rwb->sync_issue); 219e34cbd30SJens Axboe 220e34cbd30SJens Axboe if (!issue || !rwb->sync_cookie) 221e34cbd30SJens Axboe return 0; 222e34cbd30SJens Axboe 223e34cbd30SJens Axboe now = ktime_to_ns(ktime_get()); 224e34cbd30SJens Axboe return now - issue; 225e34cbd30SJens Axboe } 226e34cbd30SJens Axboe 227e34cbd30SJens Axboe enum { 228e34cbd30SJens Axboe LAT_OK = 1, 229e34cbd30SJens Axboe LAT_UNKNOWN, 230e34cbd30SJens Axboe LAT_UNKNOWN_WRITES, 231e34cbd30SJens Axboe LAT_EXCEEDED, 232e34cbd30SJens Axboe }; 233e34cbd30SJens Axboe 23434dbad5dSOmar Sandoval static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) 235e34cbd30SJens Axboe { 236a7905043SJosef Bacik struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info; 237a7905043SJosef Bacik struct rq_depth *rqd = &rwb->rq_depth; 238e34cbd30SJens Axboe u64 thislat; 239e34cbd30SJens Axboe 240e34cbd30SJens Axboe /* 241e34cbd30SJens Axboe * If our stored sync issue exceeds the window size, or it 242e34cbd30SJens Axboe * exceeds our min target AND we haven't logged any entries, 243e34cbd30SJens Axboe * flag the latency as exceeded. wbt works off completion latencies, 244e34cbd30SJens Axboe * but for a flooded device, a single sync IO can take a long time 245e34cbd30SJens Axboe * to complete after being issued. If this time exceeds our 246e34cbd30SJens Axboe * monitoring window AND we didn't see any other completions in that 247e34cbd30SJens Axboe * window, then count that sync IO as a violation of the latency. 248e34cbd30SJens Axboe */ 249e34cbd30SJens Axboe thislat = rwb_sync_issue_lat(rwb); 250e34cbd30SJens Axboe if (thislat > rwb->cur_win_nsec || 251fa2e39cbSOmar Sandoval (thislat > rwb->min_lat_nsec && !stat[READ].nr_samples)) { 252d8a0cbfdSJens Axboe trace_wbt_lat(bdi, thislat); 253e34cbd30SJens Axboe return LAT_EXCEEDED; 254e34cbd30SJens Axboe } 255e34cbd30SJens Axboe 256e34cbd30SJens Axboe /* 257e34cbd30SJens Axboe * No read/write mix, if stat isn't valid 258e34cbd30SJens Axboe */ 259e34cbd30SJens Axboe if (!stat_sample_valid(stat)) { 260e34cbd30SJens Axboe /* 261e34cbd30SJens Axboe * If we had writes in this stat window and the window is 262e34cbd30SJens Axboe * current, we're only doing writes. If a task recently 263e34cbd30SJens Axboe * waited or still has writes in flights, consider us doing 264e34cbd30SJens Axboe * just writes as well. 265e34cbd30SJens Axboe */ 26634dbad5dSOmar Sandoval if (stat[WRITE].nr_samples || wb_recent_wait(rwb) || 26734dbad5dSOmar Sandoval wbt_inflight(rwb)) 268e34cbd30SJens Axboe return LAT_UNKNOWN_WRITES; 269e34cbd30SJens Axboe return LAT_UNKNOWN; 270e34cbd30SJens Axboe } 271e34cbd30SJens Axboe 272e34cbd30SJens Axboe /* 273e34cbd30SJens Axboe * If the 'min' latency exceeds our target, step down. 274e34cbd30SJens Axboe */ 275fa2e39cbSOmar Sandoval if (stat[READ].min > rwb->min_lat_nsec) { 276fa2e39cbSOmar Sandoval trace_wbt_lat(bdi, stat[READ].min); 277d8a0cbfdSJens Axboe trace_wbt_stat(bdi, stat); 278e34cbd30SJens Axboe return LAT_EXCEEDED; 279e34cbd30SJens Axboe } 280e34cbd30SJens Axboe 281a7905043SJosef Bacik if (rqd->scale_step) 282d8a0cbfdSJens Axboe trace_wbt_stat(bdi, stat); 283e34cbd30SJens Axboe 284e34cbd30SJens Axboe return LAT_OK; 285e34cbd30SJens Axboe } 286e34cbd30SJens Axboe 287e34cbd30SJens Axboe static void rwb_trace_step(struct rq_wb *rwb, const char *msg) 288e34cbd30SJens Axboe { 289a7905043SJosef Bacik struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info; 290a7905043SJosef Bacik struct rq_depth *rqd = &rwb->rq_depth; 291d8a0cbfdSJens Axboe 292a7905043SJosef Bacik trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec, 293a7905043SJosef Bacik rwb->wb_background, rwb->wb_normal, rqd->max_depth); 294a7905043SJosef Bacik } 295a7905043SJosef Bacik 296a7905043SJosef Bacik static void calc_wb_limits(struct rq_wb *rwb) 297a7905043SJosef Bacik { 298a7905043SJosef Bacik if (rwb->min_lat_nsec == 0) { 299a7905043SJosef Bacik rwb->wb_normal = rwb->wb_background = 0; 300a7905043SJosef Bacik } else if (rwb->rq_depth.max_depth <= 2) { 301a7905043SJosef Bacik rwb->wb_normal = rwb->rq_depth.max_depth; 302a7905043SJosef Bacik rwb->wb_background = 1; 303a7905043SJosef Bacik } else { 304a7905043SJosef Bacik rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2; 305a7905043SJosef Bacik rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4; 306a7905043SJosef Bacik } 307e34cbd30SJens Axboe } 308e34cbd30SJens Axboe 309e34cbd30SJens Axboe static void scale_up(struct rq_wb *rwb) 310e34cbd30SJens Axboe { 311a7905043SJosef Bacik rq_depth_scale_up(&rwb->rq_depth); 312a7905043SJosef Bacik calc_wb_limits(rwb); 313e34cbd30SJens Axboe rwb->unknown_cnt = 0; 3145e65a203SJosef Bacik rwb_wake_all(rwb); 315a7905043SJosef Bacik rwb_trace_step(rwb, "scale up"); 316e34cbd30SJens Axboe } 317e34cbd30SJens Axboe 318e34cbd30SJens Axboe static void scale_down(struct rq_wb *rwb, bool hard_throttle) 319e34cbd30SJens Axboe { 320a7905043SJosef Bacik rq_depth_scale_down(&rwb->rq_depth, hard_throttle); 321e34cbd30SJens Axboe calc_wb_limits(rwb); 322a7905043SJosef Bacik rwb->unknown_cnt = 0; 323a7905043SJosef Bacik rwb_trace_step(rwb, "scale down"); 324e34cbd30SJens Axboe } 325e34cbd30SJens Axboe 326e34cbd30SJens Axboe static void rwb_arm_timer(struct rq_wb *rwb) 327e34cbd30SJens Axboe { 328a7905043SJosef Bacik struct rq_depth *rqd = &rwb->rq_depth; 329a7905043SJosef Bacik 330a7905043SJosef Bacik if (rqd->scale_step > 0) { 331e34cbd30SJens Axboe /* 332e34cbd30SJens Axboe * We should speed this up, using some variant of a fast 333e34cbd30SJens Axboe * integer inverse square root calculation. Since we only do 334e34cbd30SJens Axboe * this for every window expiration, it's not a huge deal, 335e34cbd30SJens Axboe * though. 336e34cbd30SJens Axboe */ 337e34cbd30SJens Axboe rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4, 338a7905043SJosef Bacik int_sqrt((rqd->scale_step + 1) << 8)); 339e34cbd30SJens Axboe } else { 340e34cbd30SJens Axboe /* 341e34cbd30SJens Axboe * For step < 0, we don't want to increase/decrease the 342e34cbd30SJens Axboe * window size. 343e34cbd30SJens Axboe */ 344e34cbd30SJens Axboe rwb->cur_win_nsec = rwb->win_nsec; 345e34cbd30SJens Axboe } 346e34cbd30SJens Axboe 34734dbad5dSOmar Sandoval blk_stat_activate_nsecs(rwb->cb, rwb->cur_win_nsec); 348e34cbd30SJens Axboe } 349e34cbd30SJens Axboe 35034dbad5dSOmar Sandoval static void wb_timer_fn(struct blk_stat_callback *cb) 351e34cbd30SJens Axboe { 35234dbad5dSOmar Sandoval struct rq_wb *rwb = cb->data; 353a7905043SJosef Bacik struct rq_depth *rqd = &rwb->rq_depth; 354e34cbd30SJens Axboe unsigned int inflight = wbt_inflight(rwb); 355e34cbd30SJens Axboe int status; 356e34cbd30SJens Axboe 35734dbad5dSOmar Sandoval status = latency_exceeded(rwb, cb->stat); 358e34cbd30SJens Axboe 359a7905043SJosef Bacik trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step, 360d8a0cbfdSJens Axboe inflight); 361e34cbd30SJens Axboe 362e34cbd30SJens Axboe /* 363e34cbd30SJens Axboe * If we exceeded the latency target, step down. If we did not, 364e34cbd30SJens Axboe * step one level up. If we don't know enough to say either exceeded 365e34cbd30SJens Axboe * or ok, then don't do anything. 366e34cbd30SJens Axboe */ 367e34cbd30SJens Axboe switch (status) { 368e34cbd30SJens Axboe case LAT_EXCEEDED: 369e34cbd30SJens Axboe scale_down(rwb, true); 370e34cbd30SJens Axboe break; 371e34cbd30SJens Axboe case LAT_OK: 372e34cbd30SJens Axboe scale_up(rwb); 373e34cbd30SJens Axboe break; 374e34cbd30SJens Axboe case LAT_UNKNOWN_WRITES: 375e34cbd30SJens Axboe /* 376e34cbd30SJens Axboe * We started a the center step, but don't have a valid 377e34cbd30SJens Axboe * read/write sample, but we do have writes going on. 378e34cbd30SJens Axboe * Allow step to go negative, to increase write perf. 379e34cbd30SJens Axboe */ 380e34cbd30SJens Axboe scale_up(rwb); 381e34cbd30SJens Axboe break; 382e34cbd30SJens Axboe case LAT_UNKNOWN: 383e34cbd30SJens Axboe if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP) 384e34cbd30SJens Axboe break; 385e34cbd30SJens Axboe /* 386e34cbd30SJens Axboe * We get here when previously scaled reduced depth, and we 387e34cbd30SJens Axboe * currently don't have a valid read/write sample. For that 388e34cbd30SJens Axboe * case, slowly return to center state (step == 0). 389e34cbd30SJens Axboe */ 390a7905043SJosef Bacik if (rqd->scale_step > 0) 391e34cbd30SJens Axboe scale_up(rwb); 392a7905043SJosef Bacik else if (rqd->scale_step < 0) 393e34cbd30SJens Axboe scale_down(rwb, false); 394e34cbd30SJens Axboe break; 395e34cbd30SJens Axboe default: 396e34cbd30SJens Axboe break; 397e34cbd30SJens Axboe } 398e34cbd30SJens Axboe 399e34cbd30SJens Axboe /* 400e34cbd30SJens Axboe * Re-arm timer, if we have IO in flight 401e34cbd30SJens Axboe */ 402a7905043SJosef Bacik if (rqd->scale_step || inflight) 403e34cbd30SJens Axboe rwb_arm_timer(rwb); 404e34cbd30SJens Axboe } 405e34cbd30SJens Axboe 406a7905043SJosef Bacik static void __wbt_update_limits(struct rq_wb *rwb) 407e34cbd30SJens Axboe { 408a7905043SJosef Bacik struct rq_depth *rqd = &rwb->rq_depth; 409a7905043SJosef Bacik 410a7905043SJosef Bacik rqd->scale_step = 0; 411a7905043SJosef Bacik rqd->scaled_max = false; 412a7905043SJosef Bacik 413a7905043SJosef Bacik rq_depth_calc_max_depth(rqd); 414e34cbd30SJens Axboe calc_wb_limits(rwb); 415e34cbd30SJens Axboe 416e34cbd30SJens Axboe rwb_wake_all(rwb); 417e34cbd30SJens Axboe } 418e34cbd30SJens Axboe 419a7905043SJosef Bacik void wbt_update_limits(struct request_queue *q) 420a7905043SJosef Bacik { 421a7905043SJosef Bacik struct rq_qos *rqos = wbt_rq_qos(q); 422a7905043SJosef Bacik if (!rqos) 423a7905043SJosef Bacik return; 424a7905043SJosef Bacik __wbt_update_limits(RQWB(rqos)); 425a7905043SJosef Bacik } 426a7905043SJosef Bacik 427a7905043SJosef Bacik u64 wbt_get_min_lat(struct request_queue *q) 428a7905043SJosef Bacik { 429a7905043SJosef Bacik struct rq_qos *rqos = wbt_rq_qos(q); 430a7905043SJosef Bacik if (!rqos) 431a7905043SJosef Bacik return 0; 432a7905043SJosef Bacik return RQWB(rqos)->min_lat_nsec; 433a7905043SJosef Bacik } 434a7905043SJosef Bacik 435a7905043SJosef Bacik void wbt_set_min_lat(struct request_queue *q, u64 val) 436a7905043SJosef Bacik { 437a7905043SJosef Bacik struct rq_qos *rqos = wbt_rq_qos(q); 438a7905043SJosef Bacik if (!rqos) 439a7905043SJosef Bacik return; 440a7905043SJosef Bacik RQWB(rqos)->min_lat_nsec = val; 441a7905043SJosef Bacik RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL; 442a7905043SJosef Bacik __wbt_update_limits(RQWB(rqos)); 443a7905043SJosef Bacik } 444a7905043SJosef Bacik 445a7905043SJosef Bacik 446e34cbd30SJens Axboe static bool close_io(struct rq_wb *rwb) 447e34cbd30SJens Axboe { 448e34cbd30SJens Axboe const unsigned long now = jiffies; 449e34cbd30SJens Axboe 450e34cbd30SJens Axboe return time_before(now, rwb->last_issue + HZ / 10) || 451e34cbd30SJens Axboe time_before(now, rwb->last_comp + HZ / 10); 452e34cbd30SJens Axboe } 453e34cbd30SJens Axboe 454e34cbd30SJens Axboe #define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO) 455e34cbd30SJens Axboe 456e34cbd30SJens Axboe static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) 457e34cbd30SJens Axboe { 458e34cbd30SJens Axboe unsigned int limit; 459e34cbd30SJens Axboe 460ffa358dcSJens Axboe /* 461ffa358dcSJens Axboe * If we got disabled, just return UINT_MAX. This ensures that 462ffa358dcSJens Axboe * we'll properly inc a new IO, and dec+wakeup at the end. 463ffa358dcSJens Axboe */ 464ffa358dcSJens Axboe if (!rwb_enabled(rwb)) 465ffa358dcSJens Axboe return UINT_MAX; 466ffa358dcSJens Axboe 467782f5697SJens Axboe if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD) 468782f5697SJens Axboe return rwb->wb_background; 469782f5697SJens Axboe 470e34cbd30SJens Axboe /* 471e34cbd30SJens Axboe * At this point we know it's a buffered write. If this is 4723dfbdc44Sweiping zhang * kswapd trying to free memory, or REQ_SYNC is set, then 473e34cbd30SJens Axboe * it's WB_SYNC_ALL writeback, and we'll use the max limit for 474e34cbd30SJens Axboe * that. If the write is marked as a background write, then use 475e34cbd30SJens Axboe * the idle limit, or go to normal if we haven't had competing 476e34cbd30SJens Axboe * IO for a bit. 477e34cbd30SJens Axboe */ 478e34cbd30SJens Axboe if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) 479a7905043SJosef Bacik limit = rwb->rq_depth.max_depth; 480e34cbd30SJens Axboe else if ((rw & REQ_BACKGROUND) || close_io(rwb)) { 481e34cbd30SJens Axboe /* 482e34cbd30SJens Axboe * If less than 100ms since we completed unrelated IO, 483e34cbd30SJens Axboe * limit us to half the depth for background writeback. 484e34cbd30SJens Axboe */ 485e34cbd30SJens Axboe limit = rwb->wb_background; 486e34cbd30SJens Axboe } else 487e34cbd30SJens Axboe limit = rwb->wb_normal; 488e34cbd30SJens Axboe 489e34cbd30SJens Axboe return limit; 490e34cbd30SJens Axboe } 491e34cbd30SJens Axboe 49238cfb5a4SJens Axboe struct wbt_wait_data { 49338cfb5a4SJens Axboe struct rq_wb *rwb; 494b6c7b58fSJosef Bacik enum wbt_flags wb_acct; 49538cfb5a4SJens Axboe unsigned long rw; 49638cfb5a4SJens Axboe }; 49738cfb5a4SJens Axboe 498b6c7b58fSJosef Bacik static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data) 49938cfb5a4SJens Axboe { 500b6c7b58fSJosef Bacik struct wbt_wait_data *data = private_data; 501b6c7b58fSJosef Bacik return rq_wait_inc_below(rqw, get_limit(data->rwb, data->rw)); 502b6c7b58fSJosef Bacik } 50338cfb5a4SJens Axboe 504b6c7b58fSJosef Bacik static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data) 505b6c7b58fSJosef Bacik { 506b6c7b58fSJosef Bacik struct wbt_wait_data *data = private_data; 507b6c7b58fSJosef Bacik wbt_rqw_done(data->rwb, rqw, data->wb_acct); 50838cfb5a4SJens Axboe } 50938cfb5a4SJens Axboe 510e34cbd30SJens Axboe /* 511e34cbd30SJens Axboe * Block if we will exceed our limit, or if we are currently waiting for 512e34cbd30SJens Axboe * the timer to kick off queuing again. 513e34cbd30SJens Axboe */ 5148bea6090SJens Axboe static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, 515d5337560SChristoph Hellwig unsigned long rw) 516e34cbd30SJens Axboe { 5178bea6090SJens Axboe struct rq_wait *rqw = get_rq_wait(rwb, wb_acct); 51838cfb5a4SJens Axboe struct wbt_wait_data data = { 51938cfb5a4SJens Axboe .rwb = rwb, 520b6c7b58fSJosef Bacik .wb_acct = wb_acct, 52138cfb5a4SJens Axboe .rw = rw, 52238cfb5a4SJens Axboe }; 523e34cbd30SJens Axboe 524b6c7b58fSJosef Bacik rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb); 525e34cbd30SJens Axboe } 526e34cbd30SJens Axboe 527e34cbd30SJens Axboe static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) 528e34cbd30SJens Axboe { 529782f5697SJens Axboe switch (bio_op(bio)) { 530782f5697SJens Axboe case REQ_OP_WRITE: 531e34cbd30SJens Axboe /* 532e34cbd30SJens Axboe * Don't throttle WRITE_ODIRECT 533e34cbd30SJens Axboe */ 534782f5697SJens Axboe if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) == 535782f5697SJens Axboe (REQ_SYNC | REQ_IDLE)) 536e34cbd30SJens Axboe return false; 537782f5697SJens Axboe /* fallthrough */ 538782f5697SJens Axboe case REQ_OP_DISCARD: 539e34cbd30SJens Axboe return true; 540782f5697SJens Axboe default: 541782f5697SJens Axboe return false; 542782f5697SJens Axboe } 543e34cbd30SJens Axboe } 544e34cbd30SJens Axboe 545c1c80384SJosef Bacik static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio) 546c1c80384SJosef Bacik { 547c1c80384SJosef Bacik enum wbt_flags flags = 0; 548c1c80384SJosef Bacik 549c125311dSJens Axboe if (!rwb_enabled(rwb)) 550c125311dSJens Axboe return 0; 551c125311dSJens Axboe 552c1c80384SJosef Bacik if (bio_op(bio) == REQ_OP_READ) { 553c1c80384SJosef Bacik flags = WBT_READ; 554c1c80384SJosef Bacik } else if (wbt_should_throttle(rwb, bio)) { 555c1c80384SJosef Bacik if (current_is_kswapd()) 556c1c80384SJosef Bacik flags |= WBT_KSWAPD; 557c1c80384SJosef Bacik if (bio_op(bio) == REQ_OP_DISCARD) 558c1c80384SJosef Bacik flags |= WBT_DISCARD; 559c1c80384SJosef Bacik flags |= WBT_TRACKED; 560c1c80384SJosef Bacik } 561c1c80384SJosef Bacik return flags; 562c1c80384SJosef Bacik } 563c1c80384SJosef Bacik 564c1c80384SJosef Bacik static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio) 565c1c80384SJosef Bacik { 566c1c80384SJosef Bacik struct rq_wb *rwb = RQWB(rqos); 567c1c80384SJosef Bacik enum wbt_flags flags = bio_to_wbt_flags(rwb, bio); 568c1c80384SJosef Bacik __wbt_done(rqos, flags); 569c1c80384SJosef Bacik } 570c1c80384SJosef Bacik 571e34cbd30SJens Axboe /* 572e34cbd30SJens Axboe * Returns true if the IO request should be accounted, false if not. 573e34cbd30SJens Axboe * May sleep, if we have exceeded the writeback limits. Caller can pass 574e34cbd30SJens Axboe * in an irq held spinlock, if it holds one when calling this function. 575e34cbd30SJens Axboe * If we do sleep, we'll release and re-grab it. 576e34cbd30SJens Axboe */ 577d5337560SChristoph Hellwig static void wbt_wait(struct rq_qos *rqos, struct bio *bio) 578e34cbd30SJens Axboe { 579a7905043SJosef Bacik struct rq_wb *rwb = RQWB(rqos); 580c1c80384SJosef Bacik enum wbt_flags flags; 581e34cbd30SJens Axboe 582c1c80384SJosef Bacik flags = bio_to_wbt_flags(rwb, bio); 583df60f6e8SMing Lei if (!(flags & WBT_TRACKED)) { 584c1c80384SJosef Bacik if (flags & WBT_READ) 585e34cbd30SJens Axboe wb_timestamp(rwb, &rwb->last_issue); 586c1c80384SJosef Bacik return; 587e34cbd30SJens Axboe } 588e34cbd30SJens Axboe 589d5337560SChristoph Hellwig __wbt_wait(rwb, flags, bio->bi_opf); 590e34cbd30SJens Axboe 59134dbad5dSOmar Sandoval if (!blk_stat_is_active(rwb->cb)) 592e34cbd30SJens Axboe rwb_arm_timer(rwb); 593c1c80384SJosef Bacik } 594e34cbd30SJens Axboe 595c1c80384SJosef Bacik static void wbt_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) 596c1c80384SJosef Bacik { 597c1c80384SJosef Bacik struct rq_wb *rwb = RQWB(rqos); 598c1c80384SJosef Bacik rq->wbt_flags |= bio_to_wbt_flags(rwb, bio); 599e34cbd30SJens Axboe } 600e34cbd30SJens Axboe 601c83f536aSBart Van Assche static void wbt_issue(struct rq_qos *rqos, struct request *rq) 602e34cbd30SJens Axboe { 603a7905043SJosef Bacik struct rq_wb *rwb = RQWB(rqos); 604a7905043SJosef Bacik 605e34cbd30SJens Axboe if (!rwb_enabled(rwb)) 606e34cbd30SJens Axboe return; 607e34cbd30SJens Axboe 608e34cbd30SJens Axboe /* 609a8a45941SOmar Sandoval * Track sync issue, in case it takes a long time to complete. Allows us 610a8a45941SOmar Sandoval * to react quicker, if a sync IO takes a long time to complete. Note 611a8a45941SOmar Sandoval * that this is just a hint. The request can go away when it completes, 612a8a45941SOmar Sandoval * so it's important we never dereference it. We only use the address to 613a8a45941SOmar Sandoval * compare with, which is why we store the sync_issue time locally. 614e34cbd30SJens Axboe */ 615a8a45941SOmar Sandoval if (wbt_is_read(rq) && !rwb->sync_issue) { 616a8a45941SOmar Sandoval rwb->sync_cookie = rq; 617544ccc8dSOmar Sandoval rwb->sync_issue = rq->io_start_time_ns; 618e34cbd30SJens Axboe } 619e34cbd30SJens Axboe } 620e34cbd30SJens Axboe 621c83f536aSBart Van Assche static void wbt_requeue(struct rq_qos *rqos, struct request *rq) 622e34cbd30SJens Axboe { 623a7905043SJosef Bacik struct rq_wb *rwb = RQWB(rqos); 624e34cbd30SJens Axboe if (!rwb_enabled(rwb)) 625e34cbd30SJens Axboe return; 626a8a45941SOmar Sandoval if (rq == rwb->sync_cookie) { 627e34cbd30SJens Axboe rwb->sync_issue = 0; 628e34cbd30SJens Axboe rwb->sync_cookie = NULL; 629e34cbd30SJens Axboe } 630e34cbd30SJens Axboe } 631e34cbd30SJens Axboe 632a7905043SJosef Bacik void wbt_set_write_cache(struct request_queue *q, bool write_cache_on) 633e34cbd30SJens Axboe { 634a7905043SJosef Bacik struct rq_qos *rqos = wbt_rq_qos(q); 635a7905043SJosef Bacik if (rqos) 636a7905043SJosef Bacik RQWB(rqos)->wc = write_cache_on; 637e34cbd30SJens Axboe } 638e34cbd30SJens Axboe 639fa224eedSJens Axboe /* 6408330cdb0SJan Kara * Enable wbt if defaults are configured that way 6418330cdb0SJan Kara */ 6428330cdb0SJan Kara void wbt_enable_default(struct request_queue *q) 6438330cdb0SJan Kara { 644a7905043SJosef Bacik struct rq_qos *rqos = wbt_rq_qos(q); 6458330cdb0SJan Kara /* Throttling already enabled? */ 646a7905043SJosef Bacik if (rqos) 6478330cdb0SJan Kara return; 6488330cdb0SJan Kara 6498330cdb0SJan Kara /* Queue not registered? Maybe shutting down... */ 65058c898baSMing Lei if (!blk_queue_registered(q)) 6518330cdb0SJan Kara return; 6528330cdb0SJan Kara 653344e9ffcSJens Axboe if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ)) 6548330cdb0SJan Kara wbt_init(q); 6558330cdb0SJan Kara } 6568330cdb0SJan Kara EXPORT_SYMBOL_GPL(wbt_enable_default); 6578330cdb0SJan Kara 65880e091d1SJens Axboe u64 wbt_default_latency_nsec(struct request_queue *q) 65980e091d1SJens Axboe { 66080e091d1SJens Axboe /* 66180e091d1SJens Axboe * We default to 2msec for non-rotational storage, and 75msec 66280e091d1SJens Axboe * for rotational storage. 66380e091d1SJens Axboe */ 66480e091d1SJens Axboe if (blk_queue_nonrot(q)) 66580e091d1SJens Axboe return 2000000ULL; 66680e091d1SJens Axboe else 66780e091d1SJens Axboe return 75000000ULL; 66880e091d1SJens Axboe } 66980e091d1SJens Axboe 67099c749a4SJens Axboe static int wbt_data_dir(const struct request *rq) 67199c749a4SJens Axboe { 6725235553dSJens Axboe const int op = req_op(rq); 6735235553dSJens Axboe 6745235553dSJens Axboe if (op == REQ_OP_READ) 6755235553dSJens Axboe return READ; 676825843b0SJens Axboe else if (op_is_write(op)) 6775235553dSJens Axboe return WRITE; 6785235553dSJens Axboe 6795235553dSJens Axboe /* don't account */ 6805235553dSJens Axboe return -1; 68199c749a4SJens Axboe } 68299c749a4SJens Axboe 6839677a3e0STejun Heo static void wbt_queue_depth_changed(struct rq_qos *rqos) 6849677a3e0STejun Heo { 6859677a3e0STejun Heo RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q); 6869677a3e0STejun Heo __wbt_update_limits(RQWB(rqos)); 6879677a3e0STejun Heo } 6889677a3e0STejun Heo 689a7905043SJosef Bacik static void wbt_exit(struct rq_qos *rqos) 690a7905043SJosef Bacik { 691a7905043SJosef Bacik struct rq_wb *rwb = RQWB(rqos); 692a7905043SJosef Bacik struct request_queue *q = rqos->q; 693a7905043SJosef Bacik 694a7905043SJosef Bacik blk_stat_remove_callback(q, rwb->cb); 695a7905043SJosef Bacik blk_stat_free_callback(rwb->cb); 696a7905043SJosef Bacik kfree(rwb); 697a7905043SJosef Bacik } 698a7905043SJosef Bacik 699a7905043SJosef Bacik /* 700a7905043SJosef Bacik * Disable wbt, if enabled by default. 701a7905043SJosef Bacik */ 702a7905043SJosef Bacik void wbt_disable_default(struct request_queue *q) 703a7905043SJosef Bacik { 704a7905043SJosef Bacik struct rq_qos *rqos = wbt_rq_qos(q); 705a7905043SJosef Bacik struct rq_wb *rwb; 706a7905043SJosef Bacik if (!rqos) 707a7905043SJosef Bacik return; 708a7905043SJosef Bacik rwb = RQWB(rqos); 709544fbd16SMing Lei if (rwb->enable_state == WBT_STATE_ON_DEFAULT) { 710544fbd16SMing Lei blk_stat_deactivate(rwb->cb); 711a7905043SJosef Bacik rwb->wb_normal = 0; 712a7905043SJosef Bacik } 713544fbd16SMing Lei } 714e815f404SJens Axboe EXPORT_SYMBOL_GPL(wbt_disable_default); 715a7905043SJosef Bacik 716d19afebcSMing Lei #ifdef CONFIG_BLK_DEBUG_FS 717d19afebcSMing Lei static int wbt_curr_win_nsec_show(void *data, struct seq_file *m) 718d19afebcSMing Lei { 719d19afebcSMing Lei struct rq_qos *rqos = data; 720d19afebcSMing Lei struct rq_wb *rwb = RQWB(rqos); 721d19afebcSMing Lei 722d19afebcSMing Lei seq_printf(m, "%llu\n", rwb->cur_win_nsec); 723d19afebcSMing Lei return 0; 724d19afebcSMing Lei } 725d19afebcSMing Lei 726d19afebcSMing Lei static int wbt_enabled_show(void *data, struct seq_file *m) 727d19afebcSMing Lei { 728d19afebcSMing Lei struct rq_qos *rqos = data; 729d19afebcSMing Lei struct rq_wb *rwb = RQWB(rqos); 730d19afebcSMing Lei 731d19afebcSMing Lei seq_printf(m, "%d\n", rwb->enable_state); 732d19afebcSMing Lei return 0; 733d19afebcSMing Lei } 734d19afebcSMing Lei 735d19afebcSMing Lei static int wbt_id_show(void *data, struct seq_file *m) 736d19afebcSMing Lei { 737d19afebcSMing Lei struct rq_qos *rqos = data; 738d19afebcSMing Lei 739d19afebcSMing Lei seq_printf(m, "%u\n", rqos->id); 740d19afebcSMing Lei return 0; 741d19afebcSMing Lei } 742d19afebcSMing Lei 743d19afebcSMing Lei static int wbt_inflight_show(void *data, struct seq_file *m) 744d19afebcSMing Lei { 745d19afebcSMing Lei struct rq_qos *rqos = data; 746d19afebcSMing Lei struct rq_wb *rwb = RQWB(rqos); 747d19afebcSMing Lei int i; 748d19afebcSMing Lei 749d19afebcSMing Lei for (i = 0; i < WBT_NUM_RWQ; i++) 750d19afebcSMing Lei seq_printf(m, "%d: inflight %d\n", i, 751d19afebcSMing Lei atomic_read(&rwb->rq_wait[i].inflight)); 752d19afebcSMing Lei return 0; 753d19afebcSMing Lei } 754d19afebcSMing Lei 755d19afebcSMing Lei static int wbt_min_lat_nsec_show(void *data, struct seq_file *m) 756d19afebcSMing Lei { 757d19afebcSMing Lei struct rq_qos *rqos = data; 758d19afebcSMing Lei struct rq_wb *rwb = RQWB(rqos); 759d19afebcSMing Lei 760d19afebcSMing Lei seq_printf(m, "%lu\n", rwb->min_lat_nsec); 761d19afebcSMing Lei return 0; 762d19afebcSMing Lei } 763d19afebcSMing Lei 764d19afebcSMing Lei static int wbt_unknown_cnt_show(void *data, struct seq_file *m) 765d19afebcSMing Lei { 766d19afebcSMing Lei struct rq_qos *rqos = data; 767d19afebcSMing Lei struct rq_wb *rwb = RQWB(rqos); 768d19afebcSMing Lei 769d19afebcSMing Lei seq_printf(m, "%u\n", rwb->unknown_cnt); 770d19afebcSMing Lei return 0; 771d19afebcSMing Lei } 772d19afebcSMing Lei 773d19afebcSMing Lei static int wbt_normal_show(void *data, struct seq_file *m) 774d19afebcSMing Lei { 775d19afebcSMing Lei struct rq_qos *rqos = data; 776d19afebcSMing Lei struct rq_wb *rwb = RQWB(rqos); 777d19afebcSMing Lei 778d19afebcSMing Lei seq_printf(m, "%u\n", rwb->wb_normal); 779d19afebcSMing Lei return 0; 780d19afebcSMing Lei } 781d19afebcSMing Lei 782d19afebcSMing Lei static int wbt_background_show(void *data, struct seq_file *m) 783d19afebcSMing Lei { 784d19afebcSMing Lei struct rq_qos *rqos = data; 785d19afebcSMing Lei struct rq_wb *rwb = RQWB(rqos); 786d19afebcSMing Lei 787d19afebcSMing Lei seq_printf(m, "%u\n", rwb->wb_background); 788d19afebcSMing Lei return 0; 789d19afebcSMing Lei } 790d19afebcSMing Lei 791d19afebcSMing Lei static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = { 792d19afebcSMing Lei {"curr_win_nsec", 0400, wbt_curr_win_nsec_show}, 793d19afebcSMing Lei {"enabled", 0400, wbt_enabled_show}, 794d19afebcSMing Lei {"id", 0400, wbt_id_show}, 795d19afebcSMing Lei {"inflight", 0400, wbt_inflight_show}, 796d19afebcSMing Lei {"min_lat_nsec", 0400, wbt_min_lat_nsec_show}, 797d19afebcSMing Lei {"unknown_cnt", 0400, wbt_unknown_cnt_show}, 798d19afebcSMing Lei {"wb_normal", 0400, wbt_normal_show}, 799d19afebcSMing Lei {"wb_background", 0400, wbt_background_show}, 800d19afebcSMing Lei {}, 801d19afebcSMing Lei }; 802d19afebcSMing Lei #endif 803d19afebcSMing Lei 804a7905043SJosef Bacik static struct rq_qos_ops wbt_rqos_ops = { 805a7905043SJosef Bacik .throttle = wbt_wait, 806a7905043SJosef Bacik .issue = wbt_issue, 807c1c80384SJosef Bacik .track = wbt_track, 808a7905043SJosef Bacik .requeue = wbt_requeue, 809a7905043SJosef Bacik .done = wbt_done, 810c1c80384SJosef Bacik .cleanup = wbt_cleanup, 8119677a3e0STejun Heo .queue_depth_changed = wbt_queue_depth_changed, 812a7905043SJosef Bacik .exit = wbt_exit, 813d19afebcSMing Lei #ifdef CONFIG_BLK_DEBUG_FS 814d19afebcSMing Lei .debugfs_attrs = wbt_debugfs_attrs, 815d19afebcSMing Lei #endif 816a7905043SJosef Bacik }; 817a7905043SJosef Bacik 8188054b89fSJens Axboe int wbt_init(struct request_queue *q) 819e34cbd30SJens Axboe { 820e34cbd30SJens Axboe struct rq_wb *rwb; 821e34cbd30SJens Axboe int i; 822e34cbd30SJens Axboe 823e34cbd30SJens Axboe rwb = kzalloc(sizeof(*rwb), GFP_KERNEL); 824e34cbd30SJens Axboe if (!rwb) 825e34cbd30SJens Axboe return -ENOMEM; 826e34cbd30SJens Axboe 82799c749a4SJens Axboe rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb); 82834dbad5dSOmar Sandoval if (!rwb->cb) { 82934dbad5dSOmar Sandoval kfree(rwb); 83034dbad5dSOmar Sandoval return -ENOMEM; 83134dbad5dSOmar Sandoval } 83234dbad5dSOmar Sandoval 833a7905043SJosef Bacik for (i = 0; i < WBT_NUM_RWQ; i++) 834a7905043SJosef Bacik rq_wait_init(&rwb->rq_wait[i]); 835e34cbd30SJens Axboe 836a7905043SJosef Bacik rwb->rqos.id = RQ_QOS_WBT; 837a7905043SJosef Bacik rwb->rqos.ops = &wbt_rqos_ops; 838a7905043SJosef Bacik rwb->rqos.q = q; 839e34cbd30SJens Axboe rwb->last_comp = rwb->last_issue = jiffies; 840e34cbd30SJens Axboe rwb->win_nsec = RWB_WINDOW_NSEC; 841d62118b6SJens Axboe rwb->enable_state = WBT_STATE_ON_DEFAULT; 842a7905043SJosef Bacik rwb->wc = 1; 843a7905043SJosef Bacik rwb->rq_depth.default_depth = RWB_DEF_DEPTH; 844a7905043SJosef Bacik __wbt_update_limits(rwb); 845e34cbd30SJens Axboe 846e34cbd30SJens Axboe /* 84734dbad5dSOmar Sandoval * Assign rwb and add the stats callback. 848e34cbd30SJens Axboe */ 849a7905043SJosef Bacik rq_qos_add(q, &rwb->rqos); 85034dbad5dSOmar Sandoval blk_stat_add_callback(q, rwb->cb); 851e34cbd30SJens Axboe 85280e091d1SJens Axboe rwb->min_lat_nsec = wbt_default_latency_nsec(q); 853e34cbd30SJens Axboe 8549677a3e0STejun Heo wbt_queue_depth_changed(&rwb->rqos); 855a7905043SJosef Bacik wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); 856e34cbd30SJens Axboe 857e34cbd30SJens Axboe return 0; 858e34cbd30SJens Axboe } 859