13dcf60bcSChristoph Hellwig // SPDX-License-Identifier: GPL-2.0 2e34cbd30SJens Axboe /* 3e34cbd30SJens Axboe * buffered writeback throttling. loosely based on CoDel. We can't drop 4e34cbd30SJens Axboe * packets for IO scheduling, so the logic is something like this: 5e34cbd30SJens Axboe * 6e34cbd30SJens Axboe * - Monitor latencies in a defined window of time. 7e34cbd30SJens Axboe * - If the minimum latency in the above window exceeds some target, increment 8e34cbd30SJens Axboe * scaling step and scale down queue depth by a factor of 2x. The monitoring 9e34cbd30SJens Axboe * window is then shrunk to 100 / sqrt(scaling step + 1). 10e34cbd30SJens Axboe * - For any window where we don't have solid data on what the latencies 11e34cbd30SJens Axboe * look like, retain status quo. 12e34cbd30SJens Axboe * - If latencies look good, decrement scaling step. 13e34cbd30SJens Axboe * - If we're only doing writes, allow the scaling step to go negative. This 14e34cbd30SJens Axboe * will temporarily boost write performance, snapping back to a stable 15e34cbd30SJens Axboe * scaling step of 0 if reads show up or the heavy writers finish. Unlike 16e34cbd30SJens Axboe * positive scaling steps where we shrink the monitoring window, a negative 17e34cbd30SJens Axboe * scaling step retains the default step==0 window size. 18e34cbd30SJens Axboe * 19e34cbd30SJens Axboe * Copyright (C) 2016 Jens Axboe 20e34cbd30SJens Axboe * 21e34cbd30SJens Axboe */ 22e34cbd30SJens Axboe #include <linux/kernel.h> 23e34cbd30SJens Axboe #include <linux/blk_types.h> 24e34cbd30SJens Axboe #include <linux/slab.h> 25e34cbd30SJens Axboe #include <linux/backing-dev.h> 26e34cbd30SJens Axboe #include <linux/swap.h> 27e34cbd30SJens Axboe 28e34cbd30SJens Axboe #include "blk-wbt.h" 29a7905043SJosef Bacik #include "blk-rq-qos.h" 30e34cbd30SJens Axboe 31e34cbd30SJens Axboe #define CREATE_TRACE_POINTS 32e34cbd30SJens Axboe #include <trace/events/wbt.h> 33e34cbd30SJens Axboe 34a8a45941SOmar Sandoval static inline void wbt_clear_state(struct request *rq) 35934031a1SOmar Sandoval { 36544ccc8dSOmar Sandoval rq->wbt_flags = 0; 37934031a1SOmar Sandoval } 38934031a1SOmar Sandoval 39a8a45941SOmar Sandoval static inline enum wbt_flags wbt_flags(struct request *rq) 40934031a1SOmar Sandoval { 41544ccc8dSOmar Sandoval return rq->wbt_flags; 42934031a1SOmar Sandoval } 43934031a1SOmar Sandoval 44a8a45941SOmar Sandoval static inline bool wbt_is_tracked(struct request *rq) 45934031a1SOmar Sandoval { 46544ccc8dSOmar Sandoval return rq->wbt_flags & WBT_TRACKED; 47934031a1SOmar Sandoval } 48934031a1SOmar Sandoval 49a8a45941SOmar Sandoval static inline bool wbt_is_read(struct request *rq) 50934031a1SOmar Sandoval { 51544ccc8dSOmar Sandoval return rq->wbt_flags & WBT_READ; 52934031a1SOmar Sandoval } 53934031a1SOmar Sandoval 54e34cbd30SJens Axboe enum { 55e34cbd30SJens Axboe /* 56e34cbd30SJens Axboe * Default setting, we'll scale up (to 75% of QD max) or down (min 1) 57e34cbd30SJens Axboe * from here depending on device stats 58e34cbd30SJens Axboe */ 59e34cbd30SJens Axboe RWB_DEF_DEPTH = 16, 60e34cbd30SJens Axboe 61e34cbd30SJens Axboe /* 62e34cbd30SJens Axboe * 100msec window 63e34cbd30SJens Axboe */ 64e34cbd30SJens Axboe RWB_WINDOW_NSEC = 100 * 1000 * 1000ULL, 65e34cbd30SJens Axboe 66e34cbd30SJens Axboe /* 67e34cbd30SJens Axboe * Disregard stats, if we don't meet this minimum 68e34cbd30SJens Axboe */ 69e34cbd30SJens Axboe RWB_MIN_WRITE_SAMPLES = 3, 70e34cbd30SJens Axboe 71e34cbd30SJens Axboe /* 72e34cbd30SJens Axboe * If we have this number of consecutive windows with not enough 73e34cbd30SJens Axboe * information to scale up or down, scale up. 74e34cbd30SJens Axboe */ 75e34cbd30SJens Axboe RWB_UNKNOWN_BUMP = 5, 76e34cbd30SJens Axboe }; 77e34cbd30SJens Axboe 78e34cbd30SJens Axboe static inline bool rwb_enabled(struct rq_wb *rwb) 79e34cbd30SJens Axboe { 80e34cbd30SJens Axboe return rwb && rwb->wb_normal != 0; 81e34cbd30SJens Axboe } 82e34cbd30SJens Axboe 83e34cbd30SJens Axboe static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) 84e34cbd30SJens Axboe { 85e34cbd30SJens Axboe if (rwb_enabled(rwb)) { 86e34cbd30SJens Axboe const unsigned long cur = jiffies; 87e34cbd30SJens Axboe 88e34cbd30SJens Axboe if (cur != *var) 89e34cbd30SJens Axboe *var = cur; 90e34cbd30SJens Axboe } 91e34cbd30SJens Axboe } 92e34cbd30SJens Axboe 93e34cbd30SJens Axboe /* 94e34cbd30SJens Axboe * If a task was rate throttled in balance_dirty_pages() within the last 95e34cbd30SJens Axboe * second or so, use that to indicate a higher cleaning rate. 96e34cbd30SJens Axboe */ 97e34cbd30SJens Axboe static bool wb_recent_wait(struct rq_wb *rwb) 98e34cbd30SJens Axboe { 99a7905043SJosef Bacik struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb; 100e34cbd30SJens Axboe 101e34cbd30SJens Axboe return time_before(jiffies, wb->dirty_sleep + HZ); 102e34cbd30SJens Axboe } 103e34cbd30SJens Axboe 1048bea6090SJens Axboe static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, 1058bea6090SJens Axboe enum wbt_flags wb_acct) 106e34cbd30SJens Axboe { 1078bea6090SJens Axboe if (wb_acct & WBT_KSWAPD) 1088bea6090SJens Axboe return &rwb->rq_wait[WBT_RWQ_KSWAPD]; 109782f5697SJens Axboe else if (wb_acct & WBT_DISCARD) 110782f5697SJens Axboe return &rwb->rq_wait[WBT_RWQ_DISCARD]; 1118bea6090SJens Axboe 1128bea6090SJens Axboe return &rwb->rq_wait[WBT_RWQ_BG]; 113e34cbd30SJens Axboe } 114e34cbd30SJens Axboe 115e34cbd30SJens Axboe static void rwb_wake_all(struct rq_wb *rwb) 116e34cbd30SJens Axboe { 117e34cbd30SJens Axboe int i; 118e34cbd30SJens Axboe 119e34cbd30SJens Axboe for (i = 0; i < WBT_NUM_RWQ; i++) { 120e34cbd30SJens Axboe struct rq_wait *rqw = &rwb->rq_wait[i]; 121e34cbd30SJens Axboe 122b7882093SJens Axboe if (wq_has_sleeper(&rqw->wait)) 123e34cbd30SJens Axboe wake_up_all(&rqw->wait); 124e34cbd30SJens Axboe } 125e34cbd30SJens Axboe } 126e34cbd30SJens Axboe 127061a5427SJens Axboe static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw, 128061a5427SJens Axboe enum wbt_flags wb_acct) 129e34cbd30SJens Axboe { 130e34cbd30SJens Axboe int inflight, limit; 131e34cbd30SJens Axboe 132e34cbd30SJens Axboe inflight = atomic_dec_return(&rqw->inflight); 133e34cbd30SJens Axboe 134e34cbd30SJens Axboe /* 135e34cbd30SJens Axboe * wbt got disabled with IO in flight. Wake up any potential 136e34cbd30SJens Axboe * waiters, we don't have to do more than that. 137e34cbd30SJens Axboe */ 138e34cbd30SJens Axboe if (unlikely(!rwb_enabled(rwb))) { 139e34cbd30SJens Axboe rwb_wake_all(rwb); 140e34cbd30SJens Axboe return; 141e34cbd30SJens Axboe } 142e34cbd30SJens Axboe 143e34cbd30SJens Axboe /* 144782f5697SJens Axboe * For discards, our limit is always the background. For writes, if 145782f5697SJens Axboe * the device does write back caching, drop further down before we 146782f5697SJens Axboe * wake people up. 147e34cbd30SJens Axboe */ 148782f5697SJens Axboe if (wb_acct & WBT_DISCARD) 149782f5697SJens Axboe limit = rwb->wb_background; 150782f5697SJens Axboe else if (rwb->wc && !wb_recent_wait(rwb)) 151e34cbd30SJens Axboe limit = 0; 152e34cbd30SJens Axboe else 153e34cbd30SJens Axboe limit = rwb->wb_normal; 154e34cbd30SJens Axboe 155e34cbd30SJens Axboe /* 156e34cbd30SJens Axboe * Don't wake anyone up if we are above the normal limit. 157e34cbd30SJens Axboe */ 158e34cbd30SJens Axboe if (inflight && inflight >= limit) 159e34cbd30SJens Axboe return; 160e34cbd30SJens Axboe 161b7882093SJens Axboe if (wq_has_sleeper(&rqw->wait)) { 162e34cbd30SJens Axboe int diff = limit - inflight; 163e34cbd30SJens Axboe 164e34cbd30SJens Axboe if (!inflight || diff >= rwb->wb_background / 2) 16538cfb5a4SJens Axboe wake_up_all(&rqw->wait); 166e34cbd30SJens Axboe } 167e34cbd30SJens Axboe } 168e34cbd30SJens Axboe 169061a5427SJens Axboe static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) 170061a5427SJens Axboe { 171061a5427SJens Axboe struct rq_wb *rwb = RQWB(rqos); 172061a5427SJens Axboe struct rq_wait *rqw; 173061a5427SJens Axboe 174061a5427SJens Axboe if (!(wb_acct & WBT_TRACKED)) 175061a5427SJens Axboe return; 176061a5427SJens Axboe 177061a5427SJens Axboe rqw = get_rq_wait(rwb, wb_acct); 178061a5427SJens Axboe wbt_rqw_done(rwb, rqw, wb_acct); 179061a5427SJens Axboe } 180061a5427SJens Axboe 181e34cbd30SJens Axboe /* 182e34cbd30SJens Axboe * Called on completion of a request. Note that it's also called when 183e34cbd30SJens Axboe * a request is merged, when the request gets freed. 184e34cbd30SJens Axboe */ 185a7905043SJosef Bacik static void wbt_done(struct rq_qos *rqos, struct request *rq) 186e34cbd30SJens Axboe { 187a7905043SJosef Bacik struct rq_wb *rwb = RQWB(rqos); 188e34cbd30SJens Axboe 189a8a45941SOmar Sandoval if (!wbt_is_tracked(rq)) { 190a8a45941SOmar Sandoval if (rwb->sync_cookie == rq) { 191e34cbd30SJens Axboe rwb->sync_issue = 0; 192e34cbd30SJens Axboe rwb->sync_cookie = NULL; 193e34cbd30SJens Axboe } 194e34cbd30SJens Axboe 195a8a45941SOmar Sandoval if (wbt_is_read(rq)) 196e34cbd30SJens Axboe wb_timestamp(rwb, &rwb->last_comp); 197e34cbd30SJens Axboe } else { 198a8a45941SOmar Sandoval WARN_ON_ONCE(rq == rwb->sync_cookie); 199a7905043SJosef Bacik __wbt_done(rqos, wbt_flags(rq)); 200e34cbd30SJens Axboe } 201a8a45941SOmar Sandoval wbt_clear_state(rq); 202e34cbd30SJens Axboe } 203e34cbd30SJens Axboe 2044121d385SArnd Bergmann static inline bool stat_sample_valid(struct blk_rq_stat *stat) 205e34cbd30SJens Axboe { 206e34cbd30SJens Axboe /* 207e34cbd30SJens Axboe * We need at least one read sample, and a minimum of 208e34cbd30SJens Axboe * RWB_MIN_WRITE_SAMPLES. We require some write samples to know 209e34cbd30SJens Axboe * that it's writes impacting us, and not just some sole read on 210e34cbd30SJens Axboe * a device that is in a lower power state. 211e34cbd30SJens Axboe */ 212fa2e39cbSOmar Sandoval return (stat[READ].nr_samples >= 1 && 213fa2e39cbSOmar Sandoval stat[WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES); 214e34cbd30SJens Axboe } 215e34cbd30SJens Axboe 216e34cbd30SJens Axboe static u64 rwb_sync_issue_lat(struct rq_wb *rwb) 217e34cbd30SJens Axboe { 2186aa7de05SMark Rutland u64 now, issue = READ_ONCE(rwb->sync_issue); 219e34cbd30SJens Axboe 220e34cbd30SJens Axboe if (!issue || !rwb->sync_cookie) 221e34cbd30SJens Axboe return 0; 222e34cbd30SJens Axboe 223e34cbd30SJens Axboe now = ktime_to_ns(ktime_get()); 224e34cbd30SJens Axboe return now - issue; 225e34cbd30SJens Axboe } 226e34cbd30SJens Axboe 227e34cbd30SJens Axboe enum { 228e34cbd30SJens Axboe LAT_OK = 1, 229e34cbd30SJens Axboe LAT_UNKNOWN, 230e34cbd30SJens Axboe LAT_UNKNOWN_WRITES, 231e34cbd30SJens Axboe LAT_EXCEEDED, 232e34cbd30SJens Axboe }; 233e34cbd30SJens Axboe 23434dbad5dSOmar Sandoval static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) 235e34cbd30SJens Axboe { 236a7905043SJosef Bacik struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info; 237a7905043SJosef Bacik struct rq_depth *rqd = &rwb->rq_depth; 238e34cbd30SJens Axboe u64 thislat; 239e34cbd30SJens Axboe 240e34cbd30SJens Axboe /* 241e34cbd30SJens Axboe * If our stored sync issue exceeds the window size, or it 242e34cbd30SJens Axboe * exceeds our min target AND we haven't logged any entries, 243e34cbd30SJens Axboe * flag the latency as exceeded. wbt works off completion latencies, 244e34cbd30SJens Axboe * but for a flooded device, a single sync IO can take a long time 245e34cbd30SJens Axboe * to complete after being issued. If this time exceeds our 246e34cbd30SJens Axboe * monitoring window AND we didn't see any other completions in that 247e34cbd30SJens Axboe * window, then count that sync IO as a violation of the latency. 248e34cbd30SJens Axboe */ 249e34cbd30SJens Axboe thislat = rwb_sync_issue_lat(rwb); 250e34cbd30SJens Axboe if (thislat > rwb->cur_win_nsec || 251fa2e39cbSOmar Sandoval (thislat > rwb->min_lat_nsec && !stat[READ].nr_samples)) { 252d8a0cbfdSJens Axboe trace_wbt_lat(bdi, thislat); 253e34cbd30SJens Axboe return LAT_EXCEEDED; 254e34cbd30SJens Axboe } 255e34cbd30SJens Axboe 256e34cbd30SJens Axboe /* 257e34cbd30SJens Axboe * No read/write mix, if stat isn't valid 258e34cbd30SJens Axboe */ 259e34cbd30SJens Axboe if (!stat_sample_valid(stat)) { 260e34cbd30SJens Axboe /* 261e34cbd30SJens Axboe * If we had writes in this stat window and the window is 262e34cbd30SJens Axboe * current, we're only doing writes. If a task recently 263e34cbd30SJens Axboe * waited or still has writes in flights, consider us doing 264e34cbd30SJens Axboe * just writes as well. 265e34cbd30SJens Axboe */ 26634dbad5dSOmar Sandoval if (stat[WRITE].nr_samples || wb_recent_wait(rwb) || 26734dbad5dSOmar Sandoval wbt_inflight(rwb)) 268e34cbd30SJens Axboe return LAT_UNKNOWN_WRITES; 269e34cbd30SJens Axboe return LAT_UNKNOWN; 270e34cbd30SJens Axboe } 271e34cbd30SJens Axboe 272e34cbd30SJens Axboe /* 273e34cbd30SJens Axboe * If the 'min' latency exceeds our target, step down. 274e34cbd30SJens Axboe */ 275fa2e39cbSOmar Sandoval if (stat[READ].min > rwb->min_lat_nsec) { 276fa2e39cbSOmar Sandoval trace_wbt_lat(bdi, stat[READ].min); 277d8a0cbfdSJens Axboe trace_wbt_stat(bdi, stat); 278e34cbd30SJens Axboe return LAT_EXCEEDED; 279e34cbd30SJens Axboe } 280e34cbd30SJens Axboe 281a7905043SJosef Bacik if (rqd->scale_step) 282d8a0cbfdSJens Axboe trace_wbt_stat(bdi, stat); 283e34cbd30SJens Axboe 284e34cbd30SJens Axboe return LAT_OK; 285e34cbd30SJens Axboe } 286e34cbd30SJens Axboe 287e34cbd30SJens Axboe static void rwb_trace_step(struct rq_wb *rwb, const char *msg) 288e34cbd30SJens Axboe { 289a7905043SJosef Bacik struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info; 290a7905043SJosef Bacik struct rq_depth *rqd = &rwb->rq_depth; 291d8a0cbfdSJens Axboe 292a7905043SJosef Bacik trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec, 293a7905043SJosef Bacik rwb->wb_background, rwb->wb_normal, rqd->max_depth); 294a7905043SJosef Bacik } 295a7905043SJosef Bacik 296a7905043SJosef Bacik static void calc_wb_limits(struct rq_wb *rwb) 297a7905043SJosef Bacik { 298a7905043SJosef Bacik if (rwb->min_lat_nsec == 0) { 299a7905043SJosef Bacik rwb->wb_normal = rwb->wb_background = 0; 300a7905043SJosef Bacik } else if (rwb->rq_depth.max_depth <= 2) { 301a7905043SJosef Bacik rwb->wb_normal = rwb->rq_depth.max_depth; 302a7905043SJosef Bacik rwb->wb_background = 1; 303a7905043SJosef Bacik } else { 304a7905043SJosef Bacik rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2; 305a7905043SJosef Bacik rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4; 306a7905043SJosef Bacik } 307e34cbd30SJens Axboe } 308e34cbd30SJens Axboe 309e34cbd30SJens Axboe static void scale_up(struct rq_wb *rwb) 310e34cbd30SJens Axboe { 311b84477d3SHarshad Shirwadkar if (!rq_depth_scale_up(&rwb->rq_depth)) 312b84477d3SHarshad Shirwadkar return; 313a7905043SJosef Bacik calc_wb_limits(rwb); 314e34cbd30SJens Axboe rwb->unknown_cnt = 0; 3155e65a203SJosef Bacik rwb_wake_all(rwb); 316a7905043SJosef Bacik rwb_trace_step(rwb, "scale up"); 317e34cbd30SJens Axboe } 318e34cbd30SJens Axboe 319e34cbd30SJens Axboe static void scale_down(struct rq_wb *rwb, bool hard_throttle) 320e34cbd30SJens Axboe { 321b84477d3SHarshad Shirwadkar if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle)) 322b84477d3SHarshad Shirwadkar return; 323e34cbd30SJens Axboe calc_wb_limits(rwb); 324a7905043SJosef Bacik rwb->unknown_cnt = 0; 325a7905043SJosef Bacik rwb_trace_step(rwb, "scale down"); 326e34cbd30SJens Axboe } 327e34cbd30SJens Axboe 328e34cbd30SJens Axboe static void rwb_arm_timer(struct rq_wb *rwb) 329e34cbd30SJens Axboe { 330a7905043SJosef Bacik struct rq_depth *rqd = &rwb->rq_depth; 331a7905043SJosef Bacik 332a7905043SJosef Bacik if (rqd->scale_step > 0) { 333e34cbd30SJens Axboe /* 334e34cbd30SJens Axboe * We should speed this up, using some variant of a fast 335e34cbd30SJens Axboe * integer inverse square root calculation. Since we only do 336e34cbd30SJens Axboe * this for every window expiration, it's not a huge deal, 337e34cbd30SJens Axboe * though. 338e34cbd30SJens Axboe */ 339e34cbd30SJens Axboe rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4, 340a7905043SJosef Bacik int_sqrt((rqd->scale_step + 1) << 8)); 341e34cbd30SJens Axboe } else { 342e34cbd30SJens Axboe /* 343e34cbd30SJens Axboe * For step < 0, we don't want to increase/decrease the 344e34cbd30SJens Axboe * window size. 345e34cbd30SJens Axboe */ 346e34cbd30SJens Axboe rwb->cur_win_nsec = rwb->win_nsec; 347e34cbd30SJens Axboe } 348e34cbd30SJens Axboe 34934dbad5dSOmar Sandoval blk_stat_activate_nsecs(rwb->cb, rwb->cur_win_nsec); 350e34cbd30SJens Axboe } 351e34cbd30SJens Axboe 35234dbad5dSOmar Sandoval static void wb_timer_fn(struct blk_stat_callback *cb) 353e34cbd30SJens Axboe { 35434dbad5dSOmar Sandoval struct rq_wb *rwb = cb->data; 355a7905043SJosef Bacik struct rq_depth *rqd = &rwb->rq_depth; 356e34cbd30SJens Axboe unsigned int inflight = wbt_inflight(rwb); 357e34cbd30SJens Axboe int status; 358e34cbd30SJens Axboe 35934dbad5dSOmar Sandoval status = latency_exceeded(rwb, cb->stat); 360e34cbd30SJens Axboe 361a7905043SJosef Bacik trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step, 362d8a0cbfdSJens Axboe inflight); 363e34cbd30SJens Axboe 364e34cbd30SJens Axboe /* 365e34cbd30SJens Axboe * If we exceeded the latency target, step down. If we did not, 366e34cbd30SJens Axboe * step one level up. If we don't know enough to say either exceeded 367e34cbd30SJens Axboe * or ok, then don't do anything. 368e34cbd30SJens Axboe */ 369e34cbd30SJens Axboe switch (status) { 370e34cbd30SJens Axboe case LAT_EXCEEDED: 371e34cbd30SJens Axboe scale_down(rwb, true); 372e34cbd30SJens Axboe break; 373e34cbd30SJens Axboe case LAT_OK: 374e34cbd30SJens Axboe scale_up(rwb); 375e34cbd30SJens Axboe break; 376e34cbd30SJens Axboe case LAT_UNKNOWN_WRITES: 377e34cbd30SJens Axboe /* 378e34cbd30SJens Axboe * We started a the center step, but don't have a valid 379e34cbd30SJens Axboe * read/write sample, but we do have writes going on. 380e34cbd30SJens Axboe * Allow step to go negative, to increase write perf. 381e34cbd30SJens Axboe */ 382e34cbd30SJens Axboe scale_up(rwb); 383e34cbd30SJens Axboe break; 384e34cbd30SJens Axboe case LAT_UNKNOWN: 385e34cbd30SJens Axboe if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP) 386e34cbd30SJens Axboe break; 387e34cbd30SJens Axboe /* 388e34cbd30SJens Axboe * We get here when previously scaled reduced depth, and we 389e34cbd30SJens Axboe * currently don't have a valid read/write sample. For that 390e34cbd30SJens Axboe * case, slowly return to center state (step == 0). 391e34cbd30SJens Axboe */ 392a7905043SJosef Bacik if (rqd->scale_step > 0) 393e34cbd30SJens Axboe scale_up(rwb); 394a7905043SJosef Bacik else if (rqd->scale_step < 0) 395e34cbd30SJens Axboe scale_down(rwb, false); 396e34cbd30SJens Axboe break; 397e34cbd30SJens Axboe default: 398e34cbd30SJens Axboe break; 399e34cbd30SJens Axboe } 400e34cbd30SJens Axboe 401e34cbd30SJens Axboe /* 402e34cbd30SJens Axboe * Re-arm timer, if we have IO in flight 403e34cbd30SJens Axboe */ 404a7905043SJosef Bacik if (rqd->scale_step || inflight) 405e34cbd30SJens Axboe rwb_arm_timer(rwb); 406e34cbd30SJens Axboe } 407e34cbd30SJens Axboe 408a7905043SJosef Bacik static void __wbt_update_limits(struct rq_wb *rwb) 409e34cbd30SJens Axboe { 410a7905043SJosef Bacik struct rq_depth *rqd = &rwb->rq_depth; 411a7905043SJosef Bacik 412a7905043SJosef Bacik rqd->scale_step = 0; 413a7905043SJosef Bacik rqd->scaled_max = false; 414a7905043SJosef Bacik 415a7905043SJosef Bacik rq_depth_calc_max_depth(rqd); 416e34cbd30SJens Axboe calc_wb_limits(rwb); 417e34cbd30SJens Axboe 418e34cbd30SJens Axboe rwb_wake_all(rwb); 419e34cbd30SJens Axboe } 420e34cbd30SJens Axboe 421a7905043SJosef Bacik void wbt_update_limits(struct request_queue *q) 422a7905043SJosef Bacik { 423a7905043SJosef Bacik struct rq_qos *rqos = wbt_rq_qos(q); 424a7905043SJosef Bacik if (!rqos) 425a7905043SJosef Bacik return; 426a7905043SJosef Bacik __wbt_update_limits(RQWB(rqos)); 427a7905043SJosef Bacik } 428a7905043SJosef Bacik 429a7905043SJosef Bacik u64 wbt_get_min_lat(struct request_queue *q) 430a7905043SJosef Bacik { 431a7905043SJosef Bacik struct rq_qos *rqos = wbt_rq_qos(q); 432a7905043SJosef Bacik if (!rqos) 433a7905043SJosef Bacik return 0; 434a7905043SJosef Bacik return RQWB(rqos)->min_lat_nsec; 435a7905043SJosef Bacik } 436a7905043SJosef Bacik 437a7905043SJosef Bacik void wbt_set_min_lat(struct request_queue *q, u64 val) 438a7905043SJosef Bacik { 439a7905043SJosef Bacik struct rq_qos *rqos = wbt_rq_qos(q); 440a7905043SJosef Bacik if (!rqos) 441a7905043SJosef Bacik return; 442a7905043SJosef Bacik RQWB(rqos)->min_lat_nsec = val; 443a7905043SJosef Bacik RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL; 444a7905043SJosef Bacik __wbt_update_limits(RQWB(rqos)); 445a7905043SJosef Bacik } 446a7905043SJosef Bacik 447a7905043SJosef Bacik 448e34cbd30SJens Axboe static bool close_io(struct rq_wb *rwb) 449e34cbd30SJens Axboe { 450e34cbd30SJens Axboe const unsigned long now = jiffies; 451e34cbd30SJens Axboe 452e34cbd30SJens Axboe return time_before(now, rwb->last_issue + HZ / 10) || 453e34cbd30SJens Axboe time_before(now, rwb->last_comp + HZ / 10); 454e34cbd30SJens Axboe } 455e34cbd30SJens Axboe 456e34cbd30SJens Axboe #define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO) 457e34cbd30SJens Axboe 458e34cbd30SJens Axboe static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) 459e34cbd30SJens Axboe { 460e34cbd30SJens Axboe unsigned int limit; 461e34cbd30SJens Axboe 462ffa358dcSJens Axboe /* 463ffa358dcSJens Axboe * If we got disabled, just return UINT_MAX. This ensures that 464ffa358dcSJens Axboe * we'll properly inc a new IO, and dec+wakeup at the end. 465ffa358dcSJens Axboe */ 466ffa358dcSJens Axboe if (!rwb_enabled(rwb)) 467ffa358dcSJens Axboe return UINT_MAX; 468ffa358dcSJens Axboe 469782f5697SJens Axboe if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD) 470782f5697SJens Axboe return rwb->wb_background; 471782f5697SJens Axboe 472e34cbd30SJens Axboe /* 473e34cbd30SJens Axboe * At this point we know it's a buffered write. If this is 4743dfbdc44Sweiping zhang * kswapd trying to free memory, or REQ_SYNC is set, then 475e34cbd30SJens Axboe * it's WB_SYNC_ALL writeback, and we'll use the max limit for 476e34cbd30SJens Axboe * that. If the write is marked as a background write, then use 477e34cbd30SJens Axboe * the idle limit, or go to normal if we haven't had competing 478e34cbd30SJens Axboe * IO for a bit. 479e34cbd30SJens Axboe */ 480e34cbd30SJens Axboe if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) 481a7905043SJosef Bacik limit = rwb->rq_depth.max_depth; 482e34cbd30SJens Axboe else if ((rw & REQ_BACKGROUND) || close_io(rwb)) { 483e34cbd30SJens Axboe /* 484e34cbd30SJens Axboe * If less than 100ms since we completed unrelated IO, 485e34cbd30SJens Axboe * limit us to half the depth for background writeback. 486e34cbd30SJens Axboe */ 487e34cbd30SJens Axboe limit = rwb->wb_background; 488e34cbd30SJens Axboe } else 489e34cbd30SJens Axboe limit = rwb->wb_normal; 490e34cbd30SJens Axboe 491e34cbd30SJens Axboe return limit; 492e34cbd30SJens Axboe } 493e34cbd30SJens Axboe 49438cfb5a4SJens Axboe struct wbt_wait_data { 49538cfb5a4SJens Axboe struct rq_wb *rwb; 496b6c7b58fSJosef Bacik enum wbt_flags wb_acct; 49738cfb5a4SJens Axboe unsigned long rw; 49838cfb5a4SJens Axboe }; 49938cfb5a4SJens Axboe 500b6c7b58fSJosef Bacik static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data) 50138cfb5a4SJens Axboe { 502b6c7b58fSJosef Bacik struct wbt_wait_data *data = private_data; 503b6c7b58fSJosef Bacik return rq_wait_inc_below(rqw, get_limit(data->rwb, data->rw)); 504b6c7b58fSJosef Bacik } 50538cfb5a4SJens Axboe 506b6c7b58fSJosef Bacik static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data) 507b6c7b58fSJosef Bacik { 508b6c7b58fSJosef Bacik struct wbt_wait_data *data = private_data; 509b6c7b58fSJosef Bacik wbt_rqw_done(data->rwb, rqw, data->wb_acct); 51038cfb5a4SJens Axboe } 51138cfb5a4SJens Axboe 512e34cbd30SJens Axboe /* 513e34cbd30SJens Axboe * Block if we will exceed our limit, or if we are currently waiting for 514e34cbd30SJens Axboe * the timer to kick off queuing again. 515e34cbd30SJens Axboe */ 5168bea6090SJens Axboe static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, 517d5337560SChristoph Hellwig unsigned long rw) 518e34cbd30SJens Axboe { 5198bea6090SJens Axboe struct rq_wait *rqw = get_rq_wait(rwb, wb_acct); 52038cfb5a4SJens Axboe struct wbt_wait_data data = { 52138cfb5a4SJens Axboe .rwb = rwb, 522b6c7b58fSJosef Bacik .wb_acct = wb_acct, 52338cfb5a4SJens Axboe .rw = rw, 52438cfb5a4SJens Axboe }; 525e34cbd30SJens Axboe 526b6c7b58fSJosef Bacik rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb); 527e34cbd30SJens Axboe } 528e34cbd30SJens Axboe 529e34cbd30SJens Axboe static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) 530e34cbd30SJens Axboe { 531782f5697SJens Axboe switch (bio_op(bio)) { 532782f5697SJens Axboe case REQ_OP_WRITE: 533e34cbd30SJens Axboe /* 534e34cbd30SJens Axboe * Don't throttle WRITE_ODIRECT 535e34cbd30SJens Axboe */ 536782f5697SJens Axboe if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) == 537782f5697SJens Axboe (REQ_SYNC | REQ_IDLE)) 538e34cbd30SJens Axboe return false; 539782f5697SJens Axboe /* fallthrough */ 540782f5697SJens Axboe case REQ_OP_DISCARD: 541e34cbd30SJens Axboe return true; 542782f5697SJens Axboe default: 543782f5697SJens Axboe return false; 544782f5697SJens Axboe } 545e34cbd30SJens Axboe } 546e34cbd30SJens Axboe 547c1c80384SJosef Bacik static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio) 548c1c80384SJosef Bacik { 549c1c80384SJosef Bacik enum wbt_flags flags = 0; 550c1c80384SJosef Bacik 551c125311dSJens Axboe if (!rwb_enabled(rwb)) 552c125311dSJens Axboe return 0; 553c125311dSJens Axboe 554c1c80384SJosef Bacik if (bio_op(bio) == REQ_OP_READ) { 555c1c80384SJosef Bacik flags = WBT_READ; 556c1c80384SJosef Bacik } else if (wbt_should_throttle(rwb, bio)) { 557c1c80384SJosef Bacik if (current_is_kswapd()) 558c1c80384SJosef Bacik flags |= WBT_KSWAPD; 559c1c80384SJosef Bacik if (bio_op(bio) == REQ_OP_DISCARD) 560c1c80384SJosef Bacik flags |= WBT_DISCARD; 561c1c80384SJosef Bacik flags |= WBT_TRACKED; 562c1c80384SJosef Bacik } 563c1c80384SJosef Bacik return flags; 564c1c80384SJosef Bacik } 565c1c80384SJosef Bacik 566c1c80384SJosef Bacik static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio) 567c1c80384SJosef Bacik { 568c1c80384SJosef Bacik struct rq_wb *rwb = RQWB(rqos); 569c1c80384SJosef Bacik enum wbt_flags flags = bio_to_wbt_flags(rwb, bio); 570c1c80384SJosef Bacik __wbt_done(rqos, flags); 571c1c80384SJosef Bacik } 572c1c80384SJosef Bacik 573e34cbd30SJens Axboe /* 574e34cbd30SJens Axboe * Returns true if the IO request should be accounted, false if not. 575e34cbd30SJens Axboe * May sleep, if we have exceeded the writeback limits. Caller can pass 576e34cbd30SJens Axboe * in an irq held spinlock, if it holds one when calling this function. 577e34cbd30SJens Axboe * If we do sleep, we'll release and re-grab it. 578e34cbd30SJens Axboe */ 579d5337560SChristoph Hellwig static void wbt_wait(struct rq_qos *rqos, struct bio *bio) 580e34cbd30SJens Axboe { 581a7905043SJosef Bacik struct rq_wb *rwb = RQWB(rqos); 582c1c80384SJosef Bacik enum wbt_flags flags; 583e34cbd30SJens Axboe 584c1c80384SJosef Bacik flags = bio_to_wbt_flags(rwb, bio); 585df60f6e8SMing Lei if (!(flags & WBT_TRACKED)) { 586c1c80384SJosef Bacik if (flags & WBT_READ) 587e34cbd30SJens Axboe wb_timestamp(rwb, &rwb->last_issue); 588c1c80384SJosef Bacik return; 589e34cbd30SJens Axboe } 590e34cbd30SJens Axboe 591d5337560SChristoph Hellwig __wbt_wait(rwb, flags, bio->bi_opf); 592e34cbd30SJens Axboe 59334dbad5dSOmar Sandoval if (!blk_stat_is_active(rwb->cb)) 594e34cbd30SJens Axboe rwb_arm_timer(rwb); 595c1c80384SJosef Bacik } 596e34cbd30SJens Axboe 597c1c80384SJosef Bacik static void wbt_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) 598c1c80384SJosef Bacik { 599c1c80384SJosef Bacik struct rq_wb *rwb = RQWB(rqos); 600c1c80384SJosef Bacik rq->wbt_flags |= bio_to_wbt_flags(rwb, bio); 601e34cbd30SJens Axboe } 602e34cbd30SJens Axboe 603c83f536aSBart Van Assche static void wbt_issue(struct rq_qos *rqos, struct request *rq) 604e34cbd30SJens Axboe { 605a7905043SJosef Bacik struct rq_wb *rwb = RQWB(rqos); 606a7905043SJosef Bacik 607e34cbd30SJens Axboe if (!rwb_enabled(rwb)) 608e34cbd30SJens Axboe return; 609e34cbd30SJens Axboe 610e34cbd30SJens Axboe /* 611a8a45941SOmar Sandoval * Track sync issue, in case it takes a long time to complete. Allows us 612a8a45941SOmar Sandoval * to react quicker, if a sync IO takes a long time to complete. Note 613a8a45941SOmar Sandoval * that this is just a hint. The request can go away when it completes, 614a8a45941SOmar Sandoval * so it's important we never dereference it. We only use the address to 615a8a45941SOmar Sandoval * compare with, which is why we store the sync_issue time locally. 616e34cbd30SJens Axboe */ 617a8a45941SOmar Sandoval if (wbt_is_read(rq) && !rwb->sync_issue) { 618a8a45941SOmar Sandoval rwb->sync_cookie = rq; 619544ccc8dSOmar Sandoval rwb->sync_issue = rq->io_start_time_ns; 620e34cbd30SJens Axboe } 621e34cbd30SJens Axboe } 622e34cbd30SJens Axboe 623c83f536aSBart Van Assche static void wbt_requeue(struct rq_qos *rqos, struct request *rq) 624e34cbd30SJens Axboe { 625a7905043SJosef Bacik struct rq_wb *rwb = RQWB(rqos); 626e34cbd30SJens Axboe if (!rwb_enabled(rwb)) 627e34cbd30SJens Axboe return; 628a8a45941SOmar Sandoval if (rq == rwb->sync_cookie) { 629e34cbd30SJens Axboe rwb->sync_issue = 0; 630e34cbd30SJens Axboe rwb->sync_cookie = NULL; 631e34cbd30SJens Axboe } 632e34cbd30SJens Axboe } 633e34cbd30SJens Axboe 634a7905043SJosef Bacik void wbt_set_write_cache(struct request_queue *q, bool write_cache_on) 635e34cbd30SJens Axboe { 636a7905043SJosef Bacik struct rq_qos *rqos = wbt_rq_qos(q); 637a7905043SJosef Bacik if (rqos) 638a7905043SJosef Bacik RQWB(rqos)->wc = write_cache_on; 639e34cbd30SJens Axboe } 640e34cbd30SJens Axboe 641fa224eedSJens Axboe /* 6428330cdb0SJan Kara * Enable wbt if defaults are configured that way 6438330cdb0SJan Kara */ 6448330cdb0SJan Kara void wbt_enable_default(struct request_queue *q) 6458330cdb0SJan Kara { 646a7905043SJosef Bacik struct rq_qos *rqos = wbt_rq_qos(q); 6478330cdb0SJan Kara /* Throttling already enabled? */ 648a7905043SJosef Bacik if (rqos) 6498330cdb0SJan Kara return; 6508330cdb0SJan Kara 6518330cdb0SJan Kara /* Queue not registered? Maybe shutting down... */ 65258c898baSMing Lei if (!blk_queue_registered(q)) 6538330cdb0SJan Kara return; 6548330cdb0SJan Kara 655344e9ffcSJens Axboe if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ)) 6568330cdb0SJan Kara wbt_init(q); 6578330cdb0SJan Kara } 6588330cdb0SJan Kara EXPORT_SYMBOL_GPL(wbt_enable_default); 6598330cdb0SJan Kara 66080e091d1SJens Axboe u64 wbt_default_latency_nsec(struct request_queue *q) 66180e091d1SJens Axboe { 66280e091d1SJens Axboe /* 66380e091d1SJens Axboe * We default to 2msec for non-rotational storage, and 75msec 66480e091d1SJens Axboe * for rotational storage. 66580e091d1SJens Axboe */ 66680e091d1SJens Axboe if (blk_queue_nonrot(q)) 66780e091d1SJens Axboe return 2000000ULL; 66880e091d1SJens Axboe else 66980e091d1SJens Axboe return 75000000ULL; 67080e091d1SJens Axboe } 67180e091d1SJens Axboe 67299c749a4SJens Axboe static int wbt_data_dir(const struct request *rq) 67399c749a4SJens Axboe { 6745235553dSJens Axboe const int op = req_op(rq); 6755235553dSJens Axboe 6765235553dSJens Axboe if (op == REQ_OP_READ) 6775235553dSJens Axboe return READ; 678825843b0SJens Axboe else if (op_is_write(op)) 6795235553dSJens Axboe return WRITE; 6805235553dSJens Axboe 6815235553dSJens Axboe /* don't account */ 6825235553dSJens Axboe return -1; 68399c749a4SJens Axboe } 68499c749a4SJens Axboe 6859677a3e0STejun Heo static void wbt_queue_depth_changed(struct rq_qos *rqos) 6869677a3e0STejun Heo { 6879677a3e0STejun Heo RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q); 6889677a3e0STejun Heo __wbt_update_limits(RQWB(rqos)); 6899677a3e0STejun Heo } 6909677a3e0STejun Heo 691a7905043SJosef Bacik static void wbt_exit(struct rq_qos *rqos) 692a7905043SJosef Bacik { 693a7905043SJosef Bacik struct rq_wb *rwb = RQWB(rqos); 694a7905043SJosef Bacik struct request_queue *q = rqos->q; 695a7905043SJosef Bacik 696a7905043SJosef Bacik blk_stat_remove_callback(q, rwb->cb); 697a7905043SJosef Bacik blk_stat_free_callback(rwb->cb); 698a7905043SJosef Bacik kfree(rwb); 699a7905043SJosef Bacik } 700a7905043SJosef Bacik 701a7905043SJosef Bacik /* 702a7905043SJosef Bacik * Disable wbt, if enabled by default. 703a7905043SJosef Bacik */ 704a7905043SJosef Bacik void wbt_disable_default(struct request_queue *q) 705a7905043SJosef Bacik { 706a7905043SJosef Bacik struct rq_qos *rqos = wbt_rq_qos(q); 707a7905043SJosef Bacik struct rq_wb *rwb; 708a7905043SJosef Bacik if (!rqos) 709a7905043SJosef Bacik return; 710a7905043SJosef Bacik rwb = RQWB(rqos); 711544fbd16SMing Lei if (rwb->enable_state == WBT_STATE_ON_DEFAULT) { 712544fbd16SMing Lei blk_stat_deactivate(rwb->cb); 713a7905043SJosef Bacik rwb->wb_normal = 0; 714a7905043SJosef Bacik } 715544fbd16SMing Lei } 716e815f404SJens Axboe EXPORT_SYMBOL_GPL(wbt_disable_default); 717a7905043SJosef Bacik 718d19afebcSMing Lei #ifdef CONFIG_BLK_DEBUG_FS 719d19afebcSMing Lei static int wbt_curr_win_nsec_show(void *data, struct seq_file *m) 720d19afebcSMing Lei { 721d19afebcSMing Lei struct rq_qos *rqos = data; 722d19afebcSMing Lei struct rq_wb *rwb = RQWB(rqos); 723d19afebcSMing Lei 724d19afebcSMing Lei seq_printf(m, "%llu\n", rwb->cur_win_nsec); 725d19afebcSMing Lei return 0; 726d19afebcSMing Lei } 727d19afebcSMing Lei 728d19afebcSMing Lei static int wbt_enabled_show(void *data, struct seq_file *m) 729d19afebcSMing Lei { 730d19afebcSMing Lei struct rq_qos *rqos = data; 731d19afebcSMing Lei struct rq_wb *rwb = RQWB(rqos); 732d19afebcSMing Lei 733d19afebcSMing Lei seq_printf(m, "%d\n", rwb->enable_state); 734d19afebcSMing Lei return 0; 735d19afebcSMing Lei } 736d19afebcSMing Lei 737d19afebcSMing Lei static int wbt_id_show(void *data, struct seq_file *m) 738d19afebcSMing Lei { 739d19afebcSMing Lei struct rq_qos *rqos = data; 740d19afebcSMing Lei 741d19afebcSMing Lei seq_printf(m, "%u\n", rqos->id); 742d19afebcSMing Lei return 0; 743d19afebcSMing Lei } 744d19afebcSMing Lei 745d19afebcSMing Lei static int wbt_inflight_show(void *data, struct seq_file *m) 746d19afebcSMing Lei { 747d19afebcSMing Lei struct rq_qos *rqos = data; 748d19afebcSMing Lei struct rq_wb *rwb = RQWB(rqos); 749d19afebcSMing Lei int i; 750d19afebcSMing Lei 751d19afebcSMing Lei for (i = 0; i < WBT_NUM_RWQ; i++) 752d19afebcSMing Lei seq_printf(m, "%d: inflight %d\n", i, 753d19afebcSMing Lei atomic_read(&rwb->rq_wait[i].inflight)); 754d19afebcSMing Lei return 0; 755d19afebcSMing Lei } 756d19afebcSMing Lei 757d19afebcSMing Lei static int wbt_min_lat_nsec_show(void *data, struct seq_file *m) 758d19afebcSMing Lei { 759d19afebcSMing Lei struct rq_qos *rqos = data; 760d19afebcSMing Lei struct rq_wb *rwb = RQWB(rqos); 761d19afebcSMing Lei 762d19afebcSMing Lei seq_printf(m, "%lu\n", rwb->min_lat_nsec); 763d19afebcSMing Lei return 0; 764d19afebcSMing Lei } 765d19afebcSMing Lei 766d19afebcSMing Lei static int wbt_unknown_cnt_show(void *data, struct seq_file *m) 767d19afebcSMing Lei { 768d19afebcSMing Lei struct rq_qos *rqos = data; 769d19afebcSMing Lei struct rq_wb *rwb = RQWB(rqos); 770d19afebcSMing Lei 771d19afebcSMing Lei seq_printf(m, "%u\n", rwb->unknown_cnt); 772d19afebcSMing Lei return 0; 773d19afebcSMing Lei } 774d19afebcSMing Lei 775d19afebcSMing Lei static int wbt_normal_show(void *data, struct seq_file *m) 776d19afebcSMing Lei { 777d19afebcSMing Lei struct rq_qos *rqos = data; 778d19afebcSMing Lei struct rq_wb *rwb = RQWB(rqos); 779d19afebcSMing Lei 780d19afebcSMing Lei seq_printf(m, "%u\n", rwb->wb_normal); 781d19afebcSMing Lei return 0; 782d19afebcSMing Lei } 783d19afebcSMing Lei 784d19afebcSMing Lei static int wbt_background_show(void *data, struct seq_file *m) 785d19afebcSMing Lei { 786d19afebcSMing Lei struct rq_qos *rqos = data; 787d19afebcSMing Lei struct rq_wb *rwb = RQWB(rqos); 788d19afebcSMing Lei 789d19afebcSMing Lei seq_printf(m, "%u\n", rwb->wb_background); 790d19afebcSMing Lei return 0; 791d19afebcSMing Lei } 792d19afebcSMing Lei 793d19afebcSMing Lei static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = { 794d19afebcSMing Lei {"curr_win_nsec", 0400, wbt_curr_win_nsec_show}, 795d19afebcSMing Lei {"enabled", 0400, wbt_enabled_show}, 796d19afebcSMing Lei {"id", 0400, wbt_id_show}, 797d19afebcSMing Lei {"inflight", 0400, wbt_inflight_show}, 798d19afebcSMing Lei {"min_lat_nsec", 0400, wbt_min_lat_nsec_show}, 799d19afebcSMing Lei {"unknown_cnt", 0400, wbt_unknown_cnt_show}, 800d19afebcSMing Lei {"wb_normal", 0400, wbt_normal_show}, 801d19afebcSMing Lei {"wb_background", 0400, wbt_background_show}, 802d19afebcSMing Lei {}, 803d19afebcSMing Lei }; 804d19afebcSMing Lei #endif 805d19afebcSMing Lei 806a7905043SJosef Bacik static struct rq_qos_ops wbt_rqos_ops = { 807a7905043SJosef Bacik .throttle = wbt_wait, 808a7905043SJosef Bacik .issue = wbt_issue, 809c1c80384SJosef Bacik .track = wbt_track, 810a7905043SJosef Bacik .requeue = wbt_requeue, 811a7905043SJosef Bacik .done = wbt_done, 812c1c80384SJosef Bacik .cleanup = wbt_cleanup, 8139677a3e0STejun Heo .queue_depth_changed = wbt_queue_depth_changed, 814a7905043SJosef Bacik .exit = wbt_exit, 815d19afebcSMing Lei #ifdef CONFIG_BLK_DEBUG_FS 816d19afebcSMing Lei .debugfs_attrs = wbt_debugfs_attrs, 817d19afebcSMing Lei #endif 818a7905043SJosef Bacik }; 819a7905043SJosef Bacik 8208054b89fSJens Axboe int wbt_init(struct request_queue *q) 821e34cbd30SJens Axboe { 822e34cbd30SJens Axboe struct rq_wb *rwb; 823e34cbd30SJens Axboe int i; 824e34cbd30SJens Axboe 825e34cbd30SJens Axboe rwb = kzalloc(sizeof(*rwb), GFP_KERNEL); 826e34cbd30SJens Axboe if (!rwb) 827e34cbd30SJens Axboe return -ENOMEM; 828e34cbd30SJens Axboe 82999c749a4SJens Axboe rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb); 83034dbad5dSOmar Sandoval if (!rwb->cb) { 83134dbad5dSOmar Sandoval kfree(rwb); 83234dbad5dSOmar Sandoval return -ENOMEM; 83334dbad5dSOmar Sandoval } 83434dbad5dSOmar Sandoval 835a7905043SJosef Bacik for (i = 0; i < WBT_NUM_RWQ; i++) 836a7905043SJosef Bacik rq_wait_init(&rwb->rq_wait[i]); 837e34cbd30SJens Axboe 838a7905043SJosef Bacik rwb->rqos.id = RQ_QOS_WBT; 839a7905043SJosef Bacik rwb->rqos.ops = &wbt_rqos_ops; 840a7905043SJosef Bacik rwb->rqos.q = q; 841e34cbd30SJens Axboe rwb->last_comp = rwb->last_issue = jiffies; 842e34cbd30SJens Axboe rwb->win_nsec = RWB_WINDOW_NSEC; 843d62118b6SJens Axboe rwb->enable_state = WBT_STATE_ON_DEFAULT; 844a7905043SJosef Bacik rwb->wc = 1; 845a7905043SJosef Bacik rwb->rq_depth.default_depth = RWB_DEF_DEPTH; 846a7905043SJosef Bacik __wbt_update_limits(rwb); 847e34cbd30SJens Axboe 848e34cbd30SJens Axboe /* 84934dbad5dSOmar Sandoval * Assign rwb and add the stats callback. 850e34cbd30SJens Axboe */ 851a7905043SJosef Bacik rq_qos_add(q, &rwb->rqos); 85234dbad5dSOmar Sandoval blk_stat_add_callback(q, rwb->cb); 853e34cbd30SJens Axboe 85480e091d1SJens Axboe rwb->min_lat_nsec = wbt_default_latency_nsec(q); 855e34cbd30SJens Axboe 8569677a3e0STejun Heo wbt_queue_depth_changed(&rwb->rqos); 857a7905043SJosef Bacik wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); 858e34cbd30SJens Axboe 859e34cbd30SJens Axboe return 0; 860e34cbd30SJens Axboe } 861