1 /* 2 * buffered writeback throttling. loosely based on CoDel. We can't drop 3 * packets for IO scheduling, so the logic is something like this: 4 * 5 * - Monitor latencies in a defined window of time. 6 * - If the minimum latency in the above window exceeds some target, increment 7 * scaling step and scale down queue depth by a factor of 2x. The monitoring 8 * window is then shrunk to 100 / sqrt(scaling step + 1). 9 * - For any window where we don't have solid data on what the latencies 10 * look like, retain status quo. 11 * - If latencies look good, decrement scaling step. 12 * - If we're only doing writes, allow the scaling step to go negative. This 13 * will temporarily boost write performance, snapping back to a stable 14 * scaling step of 0 if reads show up or the heavy writers finish. Unlike 15 * positive scaling steps where we shrink the monitoring window, a negative 16 * scaling step retains the default step==0 window size. 17 * 18 * Copyright (C) 2016 Jens Axboe 19 * 20 */ 21 #include <linux/kernel.h> 22 #include <linux/blk_types.h> 23 #include <linux/slab.h> 24 #include <linux/backing-dev.h> 25 #include <linux/swap.h> 26 27 #include "blk-wbt.h" 28 #include "blk-rq-qos.h" 29 30 #define CREATE_TRACE_POINTS 31 #include <trace/events/wbt.h> 32 33 static inline void wbt_clear_state(struct request *rq) 34 { 35 rq->wbt_flags = 0; 36 } 37 38 static inline enum wbt_flags wbt_flags(struct request *rq) 39 { 40 return rq->wbt_flags; 41 } 42 43 static inline bool wbt_is_tracked(struct request *rq) 44 { 45 return rq->wbt_flags & WBT_TRACKED; 46 } 47 48 static inline bool wbt_is_read(struct request *rq) 49 { 50 return rq->wbt_flags & WBT_READ; 51 } 52 53 enum { 54 /* 55 * Default setting, we'll scale up (to 75% of QD max) or down (min 1) 56 * from here depending on device stats 57 */ 58 RWB_DEF_DEPTH = 16, 59 60 /* 61 * 100msec window 62 */ 63 RWB_WINDOW_NSEC = 100 * 1000 * 1000ULL, 64 65 /* 66 * Disregard stats, if we don't meet this minimum 67 */ 68 RWB_MIN_WRITE_SAMPLES = 3, 69 70 /* 71 * If we have this number of consecutive windows with not enough 72 * information to scale up or down, scale up. 73 */ 74 RWB_UNKNOWN_BUMP = 5, 75 }; 76 77 static inline bool rwb_enabled(struct rq_wb *rwb) 78 { 79 return rwb && rwb->wb_normal != 0; 80 } 81 82 static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) 83 { 84 if (rwb_enabled(rwb)) { 85 const unsigned long cur = jiffies; 86 87 if (cur != *var) 88 *var = cur; 89 } 90 } 91 92 /* 93 * If a task was rate throttled in balance_dirty_pages() within the last 94 * second or so, use that to indicate a higher cleaning rate. 95 */ 96 static bool wb_recent_wait(struct rq_wb *rwb) 97 { 98 struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb; 99 100 return time_before(jiffies, wb->dirty_sleep + HZ); 101 } 102 103 static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, 104 enum wbt_flags wb_acct) 105 { 106 if (wb_acct & WBT_KSWAPD) 107 return &rwb->rq_wait[WBT_RWQ_KSWAPD]; 108 else if (wb_acct & WBT_DISCARD) 109 return &rwb->rq_wait[WBT_RWQ_DISCARD]; 110 111 return &rwb->rq_wait[WBT_RWQ_BG]; 112 } 113 114 static void rwb_wake_all(struct rq_wb *rwb) 115 { 116 int i; 117 118 for (i = 0; i < WBT_NUM_RWQ; i++) { 119 struct rq_wait *rqw = &rwb->rq_wait[i]; 120 121 if (wq_has_sleeper(&rqw->wait)) 122 wake_up_all(&rqw->wait); 123 } 124 } 125 126 static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw, 127 enum wbt_flags wb_acct) 128 { 129 int inflight, limit; 130 131 inflight = atomic_dec_return(&rqw->inflight); 132 133 /* 134 * wbt got disabled with IO in flight. Wake up any potential 135 * waiters, we don't have to do more than that. 136 */ 137 if (unlikely(!rwb_enabled(rwb))) { 138 rwb_wake_all(rwb); 139 return; 140 } 141 142 /* 143 * For discards, our limit is always the background. For writes, if 144 * the device does write back caching, drop further down before we 145 * wake people up. 146 */ 147 if (wb_acct & WBT_DISCARD) 148 limit = rwb->wb_background; 149 else if (rwb->wc && !wb_recent_wait(rwb)) 150 limit = 0; 151 else 152 limit = rwb->wb_normal; 153 154 /* 155 * Don't wake anyone up if we are above the normal limit. 156 */ 157 if (inflight && inflight >= limit) 158 return; 159 160 if (wq_has_sleeper(&rqw->wait)) { 161 int diff = limit - inflight; 162 163 if (!inflight || diff >= rwb->wb_background / 2) 164 wake_up_all(&rqw->wait); 165 } 166 } 167 168 static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) 169 { 170 struct rq_wb *rwb = RQWB(rqos); 171 struct rq_wait *rqw; 172 173 if (!(wb_acct & WBT_TRACKED)) 174 return; 175 176 rqw = get_rq_wait(rwb, wb_acct); 177 wbt_rqw_done(rwb, rqw, wb_acct); 178 } 179 180 /* 181 * Called on completion of a request. Note that it's also called when 182 * a request is merged, when the request gets freed. 183 */ 184 static void wbt_done(struct rq_qos *rqos, struct request *rq) 185 { 186 struct rq_wb *rwb = RQWB(rqos); 187 188 if (!wbt_is_tracked(rq)) { 189 if (rwb->sync_cookie == rq) { 190 rwb->sync_issue = 0; 191 rwb->sync_cookie = NULL; 192 } 193 194 if (wbt_is_read(rq)) 195 wb_timestamp(rwb, &rwb->last_comp); 196 } else { 197 WARN_ON_ONCE(rq == rwb->sync_cookie); 198 __wbt_done(rqos, wbt_flags(rq)); 199 } 200 wbt_clear_state(rq); 201 } 202 203 static inline bool stat_sample_valid(struct blk_rq_stat *stat) 204 { 205 /* 206 * We need at least one read sample, and a minimum of 207 * RWB_MIN_WRITE_SAMPLES. We require some write samples to know 208 * that it's writes impacting us, and not just some sole read on 209 * a device that is in a lower power state. 210 */ 211 return (stat[READ].nr_samples >= 1 && 212 stat[WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES); 213 } 214 215 static u64 rwb_sync_issue_lat(struct rq_wb *rwb) 216 { 217 u64 now, issue = READ_ONCE(rwb->sync_issue); 218 219 if (!issue || !rwb->sync_cookie) 220 return 0; 221 222 now = ktime_to_ns(ktime_get()); 223 return now - issue; 224 } 225 226 enum { 227 LAT_OK = 1, 228 LAT_UNKNOWN, 229 LAT_UNKNOWN_WRITES, 230 LAT_EXCEEDED, 231 }; 232 233 static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) 234 { 235 struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info; 236 struct rq_depth *rqd = &rwb->rq_depth; 237 u64 thislat; 238 239 /* 240 * If our stored sync issue exceeds the window size, or it 241 * exceeds our min target AND we haven't logged any entries, 242 * flag the latency as exceeded. wbt works off completion latencies, 243 * but for a flooded device, a single sync IO can take a long time 244 * to complete after being issued. If this time exceeds our 245 * monitoring window AND we didn't see any other completions in that 246 * window, then count that sync IO as a violation of the latency. 247 */ 248 thislat = rwb_sync_issue_lat(rwb); 249 if (thislat > rwb->cur_win_nsec || 250 (thislat > rwb->min_lat_nsec && !stat[READ].nr_samples)) { 251 trace_wbt_lat(bdi, thislat); 252 return LAT_EXCEEDED; 253 } 254 255 /* 256 * No read/write mix, if stat isn't valid 257 */ 258 if (!stat_sample_valid(stat)) { 259 /* 260 * If we had writes in this stat window and the window is 261 * current, we're only doing writes. If a task recently 262 * waited or still has writes in flights, consider us doing 263 * just writes as well. 264 */ 265 if (stat[WRITE].nr_samples || wb_recent_wait(rwb) || 266 wbt_inflight(rwb)) 267 return LAT_UNKNOWN_WRITES; 268 return LAT_UNKNOWN; 269 } 270 271 /* 272 * If the 'min' latency exceeds our target, step down. 273 */ 274 if (stat[READ].min > rwb->min_lat_nsec) { 275 trace_wbt_lat(bdi, stat[READ].min); 276 trace_wbt_stat(bdi, stat); 277 return LAT_EXCEEDED; 278 } 279 280 if (rqd->scale_step) 281 trace_wbt_stat(bdi, stat); 282 283 return LAT_OK; 284 } 285 286 static void rwb_trace_step(struct rq_wb *rwb, const char *msg) 287 { 288 struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info; 289 struct rq_depth *rqd = &rwb->rq_depth; 290 291 trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec, 292 rwb->wb_background, rwb->wb_normal, rqd->max_depth); 293 } 294 295 static void calc_wb_limits(struct rq_wb *rwb) 296 { 297 if (rwb->min_lat_nsec == 0) { 298 rwb->wb_normal = rwb->wb_background = 0; 299 } else if (rwb->rq_depth.max_depth <= 2) { 300 rwb->wb_normal = rwb->rq_depth.max_depth; 301 rwb->wb_background = 1; 302 } else { 303 rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2; 304 rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4; 305 } 306 } 307 308 static void scale_up(struct rq_wb *rwb) 309 { 310 rq_depth_scale_up(&rwb->rq_depth); 311 calc_wb_limits(rwb); 312 rwb->unknown_cnt = 0; 313 rwb_wake_all(rwb); 314 rwb_trace_step(rwb, "scale up"); 315 } 316 317 static void scale_down(struct rq_wb *rwb, bool hard_throttle) 318 { 319 rq_depth_scale_down(&rwb->rq_depth, hard_throttle); 320 calc_wb_limits(rwb); 321 rwb->unknown_cnt = 0; 322 rwb_trace_step(rwb, "scale down"); 323 } 324 325 static void rwb_arm_timer(struct rq_wb *rwb) 326 { 327 struct rq_depth *rqd = &rwb->rq_depth; 328 329 if (rqd->scale_step > 0) { 330 /* 331 * We should speed this up, using some variant of a fast 332 * integer inverse square root calculation. Since we only do 333 * this for every window expiration, it's not a huge deal, 334 * though. 335 */ 336 rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4, 337 int_sqrt((rqd->scale_step + 1) << 8)); 338 } else { 339 /* 340 * For step < 0, we don't want to increase/decrease the 341 * window size. 342 */ 343 rwb->cur_win_nsec = rwb->win_nsec; 344 } 345 346 blk_stat_activate_nsecs(rwb->cb, rwb->cur_win_nsec); 347 } 348 349 static void wb_timer_fn(struct blk_stat_callback *cb) 350 { 351 struct rq_wb *rwb = cb->data; 352 struct rq_depth *rqd = &rwb->rq_depth; 353 unsigned int inflight = wbt_inflight(rwb); 354 int status; 355 356 status = latency_exceeded(rwb, cb->stat); 357 358 trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step, 359 inflight); 360 361 /* 362 * If we exceeded the latency target, step down. If we did not, 363 * step one level up. If we don't know enough to say either exceeded 364 * or ok, then don't do anything. 365 */ 366 switch (status) { 367 case LAT_EXCEEDED: 368 scale_down(rwb, true); 369 break; 370 case LAT_OK: 371 scale_up(rwb); 372 break; 373 case LAT_UNKNOWN_WRITES: 374 /* 375 * We started a the center step, but don't have a valid 376 * read/write sample, but we do have writes going on. 377 * Allow step to go negative, to increase write perf. 378 */ 379 scale_up(rwb); 380 break; 381 case LAT_UNKNOWN: 382 if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP) 383 break; 384 /* 385 * We get here when previously scaled reduced depth, and we 386 * currently don't have a valid read/write sample. For that 387 * case, slowly return to center state (step == 0). 388 */ 389 if (rqd->scale_step > 0) 390 scale_up(rwb); 391 else if (rqd->scale_step < 0) 392 scale_down(rwb, false); 393 break; 394 default: 395 break; 396 } 397 398 /* 399 * Re-arm timer, if we have IO in flight 400 */ 401 if (rqd->scale_step || inflight) 402 rwb_arm_timer(rwb); 403 } 404 405 static void __wbt_update_limits(struct rq_wb *rwb) 406 { 407 struct rq_depth *rqd = &rwb->rq_depth; 408 409 rqd->scale_step = 0; 410 rqd->scaled_max = false; 411 412 rq_depth_calc_max_depth(rqd); 413 calc_wb_limits(rwb); 414 415 rwb_wake_all(rwb); 416 } 417 418 void wbt_update_limits(struct request_queue *q) 419 { 420 struct rq_qos *rqos = wbt_rq_qos(q); 421 if (!rqos) 422 return; 423 __wbt_update_limits(RQWB(rqos)); 424 } 425 426 u64 wbt_get_min_lat(struct request_queue *q) 427 { 428 struct rq_qos *rqos = wbt_rq_qos(q); 429 if (!rqos) 430 return 0; 431 return RQWB(rqos)->min_lat_nsec; 432 } 433 434 void wbt_set_min_lat(struct request_queue *q, u64 val) 435 { 436 struct rq_qos *rqos = wbt_rq_qos(q); 437 if (!rqos) 438 return; 439 RQWB(rqos)->min_lat_nsec = val; 440 RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL; 441 __wbt_update_limits(RQWB(rqos)); 442 } 443 444 445 static bool close_io(struct rq_wb *rwb) 446 { 447 const unsigned long now = jiffies; 448 449 return time_before(now, rwb->last_issue + HZ / 10) || 450 time_before(now, rwb->last_comp + HZ / 10); 451 } 452 453 #define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO) 454 455 static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) 456 { 457 unsigned int limit; 458 459 /* 460 * If we got disabled, just return UINT_MAX. This ensures that 461 * we'll properly inc a new IO, and dec+wakeup at the end. 462 */ 463 if (!rwb_enabled(rwb)) 464 return UINT_MAX; 465 466 if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD) 467 return rwb->wb_background; 468 469 /* 470 * At this point we know it's a buffered write. If this is 471 * kswapd trying to free memory, or REQ_SYNC is set, then 472 * it's WB_SYNC_ALL writeback, and we'll use the max limit for 473 * that. If the write is marked as a background write, then use 474 * the idle limit, or go to normal if we haven't had competing 475 * IO for a bit. 476 */ 477 if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) 478 limit = rwb->rq_depth.max_depth; 479 else if ((rw & REQ_BACKGROUND) || close_io(rwb)) { 480 /* 481 * If less than 100ms since we completed unrelated IO, 482 * limit us to half the depth for background writeback. 483 */ 484 limit = rwb->wb_background; 485 } else 486 limit = rwb->wb_normal; 487 488 return limit; 489 } 490 491 struct wbt_wait_data { 492 struct rq_wb *rwb; 493 enum wbt_flags wb_acct; 494 unsigned long rw; 495 }; 496 497 static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data) 498 { 499 struct wbt_wait_data *data = private_data; 500 return rq_wait_inc_below(rqw, get_limit(data->rwb, data->rw)); 501 } 502 503 static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data) 504 { 505 struct wbt_wait_data *data = private_data; 506 wbt_rqw_done(data->rwb, rqw, data->wb_acct); 507 } 508 509 /* 510 * Block if we will exceed our limit, or if we are currently waiting for 511 * the timer to kick off queuing again. 512 */ 513 static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, 514 unsigned long rw) 515 { 516 struct rq_wait *rqw = get_rq_wait(rwb, wb_acct); 517 struct wbt_wait_data data = { 518 .rwb = rwb, 519 .wb_acct = wb_acct, 520 .rw = rw, 521 }; 522 523 rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb); 524 } 525 526 static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) 527 { 528 switch (bio_op(bio)) { 529 case REQ_OP_WRITE: 530 /* 531 * Don't throttle WRITE_ODIRECT 532 */ 533 if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) == 534 (REQ_SYNC | REQ_IDLE)) 535 return false; 536 /* fallthrough */ 537 case REQ_OP_DISCARD: 538 return true; 539 default: 540 return false; 541 } 542 } 543 544 static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio) 545 { 546 enum wbt_flags flags = 0; 547 548 if (!rwb_enabled(rwb)) 549 return 0; 550 551 if (bio_op(bio) == REQ_OP_READ) { 552 flags = WBT_READ; 553 } else if (wbt_should_throttle(rwb, bio)) { 554 if (current_is_kswapd()) 555 flags |= WBT_KSWAPD; 556 if (bio_op(bio) == REQ_OP_DISCARD) 557 flags |= WBT_DISCARD; 558 flags |= WBT_TRACKED; 559 } 560 return flags; 561 } 562 563 static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio) 564 { 565 struct rq_wb *rwb = RQWB(rqos); 566 enum wbt_flags flags = bio_to_wbt_flags(rwb, bio); 567 __wbt_done(rqos, flags); 568 } 569 570 /* 571 * Returns true if the IO request should be accounted, false if not. 572 * May sleep, if we have exceeded the writeback limits. Caller can pass 573 * in an irq held spinlock, if it holds one when calling this function. 574 * If we do sleep, we'll release and re-grab it. 575 */ 576 static void wbt_wait(struct rq_qos *rqos, struct bio *bio) 577 { 578 struct rq_wb *rwb = RQWB(rqos); 579 enum wbt_flags flags; 580 581 flags = bio_to_wbt_flags(rwb, bio); 582 if (!(flags & WBT_TRACKED)) { 583 if (flags & WBT_READ) 584 wb_timestamp(rwb, &rwb->last_issue); 585 return; 586 } 587 588 __wbt_wait(rwb, flags, bio->bi_opf); 589 590 if (!blk_stat_is_active(rwb->cb)) 591 rwb_arm_timer(rwb); 592 } 593 594 static void wbt_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) 595 { 596 struct rq_wb *rwb = RQWB(rqos); 597 rq->wbt_flags |= bio_to_wbt_flags(rwb, bio); 598 } 599 600 static void wbt_issue(struct rq_qos *rqos, struct request *rq) 601 { 602 struct rq_wb *rwb = RQWB(rqos); 603 604 if (!rwb_enabled(rwb)) 605 return; 606 607 /* 608 * Track sync issue, in case it takes a long time to complete. Allows us 609 * to react quicker, if a sync IO takes a long time to complete. Note 610 * that this is just a hint. The request can go away when it completes, 611 * so it's important we never dereference it. We only use the address to 612 * compare with, which is why we store the sync_issue time locally. 613 */ 614 if (wbt_is_read(rq) && !rwb->sync_issue) { 615 rwb->sync_cookie = rq; 616 rwb->sync_issue = rq->io_start_time_ns; 617 } 618 } 619 620 static void wbt_requeue(struct rq_qos *rqos, struct request *rq) 621 { 622 struct rq_wb *rwb = RQWB(rqos); 623 if (!rwb_enabled(rwb)) 624 return; 625 if (rq == rwb->sync_cookie) { 626 rwb->sync_issue = 0; 627 rwb->sync_cookie = NULL; 628 } 629 } 630 631 void wbt_set_queue_depth(struct request_queue *q, unsigned int depth) 632 { 633 struct rq_qos *rqos = wbt_rq_qos(q); 634 if (rqos) { 635 RQWB(rqos)->rq_depth.queue_depth = depth; 636 __wbt_update_limits(RQWB(rqos)); 637 } 638 } 639 640 void wbt_set_write_cache(struct request_queue *q, bool write_cache_on) 641 { 642 struct rq_qos *rqos = wbt_rq_qos(q); 643 if (rqos) 644 RQWB(rqos)->wc = write_cache_on; 645 } 646 647 /* 648 * Enable wbt if defaults are configured that way 649 */ 650 void wbt_enable_default(struct request_queue *q) 651 { 652 struct rq_qos *rqos = wbt_rq_qos(q); 653 /* Throttling already enabled? */ 654 if (rqos) 655 return; 656 657 /* Queue not registered? Maybe shutting down... */ 658 if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags)) 659 return; 660 661 if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ)) 662 wbt_init(q); 663 } 664 EXPORT_SYMBOL_GPL(wbt_enable_default); 665 666 u64 wbt_default_latency_nsec(struct request_queue *q) 667 { 668 /* 669 * We default to 2msec for non-rotational storage, and 75msec 670 * for rotational storage. 671 */ 672 if (blk_queue_nonrot(q)) 673 return 2000000ULL; 674 else 675 return 75000000ULL; 676 } 677 678 static int wbt_data_dir(const struct request *rq) 679 { 680 const int op = req_op(rq); 681 682 if (op == REQ_OP_READ) 683 return READ; 684 else if (op_is_write(op)) 685 return WRITE; 686 687 /* don't account */ 688 return -1; 689 } 690 691 static void wbt_exit(struct rq_qos *rqos) 692 { 693 struct rq_wb *rwb = RQWB(rqos); 694 struct request_queue *q = rqos->q; 695 696 blk_stat_remove_callback(q, rwb->cb); 697 blk_stat_free_callback(rwb->cb); 698 kfree(rwb); 699 } 700 701 /* 702 * Disable wbt, if enabled by default. 703 */ 704 void wbt_disable_default(struct request_queue *q) 705 { 706 struct rq_qos *rqos = wbt_rq_qos(q); 707 struct rq_wb *rwb; 708 if (!rqos) 709 return; 710 rwb = RQWB(rqos); 711 if (rwb->enable_state == WBT_STATE_ON_DEFAULT) { 712 blk_stat_deactivate(rwb->cb); 713 rwb->wb_normal = 0; 714 } 715 } 716 EXPORT_SYMBOL_GPL(wbt_disable_default); 717 718 #ifdef CONFIG_BLK_DEBUG_FS 719 static int wbt_curr_win_nsec_show(void *data, struct seq_file *m) 720 { 721 struct rq_qos *rqos = data; 722 struct rq_wb *rwb = RQWB(rqos); 723 724 seq_printf(m, "%llu\n", rwb->cur_win_nsec); 725 return 0; 726 } 727 728 static int wbt_enabled_show(void *data, struct seq_file *m) 729 { 730 struct rq_qos *rqos = data; 731 struct rq_wb *rwb = RQWB(rqos); 732 733 seq_printf(m, "%d\n", rwb->enable_state); 734 return 0; 735 } 736 737 static int wbt_id_show(void *data, struct seq_file *m) 738 { 739 struct rq_qos *rqos = data; 740 741 seq_printf(m, "%u\n", rqos->id); 742 return 0; 743 } 744 745 static int wbt_inflight_show(void *data, struct seq_file *m) 746 { 747 struct rq_qos *rqos = data; 748 struct rq_wb *rwb = RQWB(rqos); 749 int i; 750 751 for (i = 0; i < WBT_NUM_RWQ; i++) 752 seq_printf(m, "%d: inflight %d\n", i, 753 atomic_read(&rwb->rq_wait[i].inflight)); 754 return 0; 755 } 756 757 static int wbt_min_lat_nsec_show(void *data, struct seq_file *m) 758 { 759 struct rq_qos *rqos = data; 760 struct rq_wb *rwb = RQWB(rqos); 761 762 seq_printf(m, "%lu\n", rwb->min_lat_nsec); 763 return 0; 764 } 765 766 static int wbt_unknown_cnt_show(void *data, struct seq_file *m) 767 { 768 struct rq_qos *rqos = data; 769 struct rq_wb *rwb = RQWB(rqos); 770 771 seq_printf(m, "%u\n", rwb->unknown_cnt); 772 return 0; 773 } 774 775 static int wbt_normal_show(void *data, struct seq_file *m) 776 { 777 struct rq_qos *rqos = data; 778 struct rq_wb *rwb = RQWB(rqos); 779 780 seq_printf(m, "%u\n", rwb->wb_normal); 781 return 0; 782 } 783 784 static int wbt_background_show(void *data, struct seq_file *m) 785 { 786 struct rq_qos *rqos = data; 787 struct rq_wb *rwb = RQWB(rqos); 788 789 seq_printf(m, "%u\n", rwb->wb_background); 790 return 0; 791 } 792 793 static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = { 794 {"curr_win_nsec", 0400, wbt_curr_win_nsec_show}, 795 {"enabled", 0400, wbt_enabled_show}, 796 {"id", 0400, wbt_id_show}, 797 {"inflight", 0400, wbt_inflight_show}, 798 {"min_lat_nsec", 0400, wbt_min_lat_nsec_show}, 799 {"unknown_cnt", 0400, wbt_unknown_cnt_show}, 800 {"wb_normal", 0400, wbt_normal_show}, 801 {"wb_background", 0400, wbt_background_show}, 802 {}, 803 }; 804 #endif 805 806 static struct rq_qos_ops wbt_rqos_ops = { 807 .throttle = wbt_wait, 808 .issue = wbt_issue, 809 .track = wbt_track, 810 .requeue = wbt_requeue, 811 .done = wbt_done, 812 .cleanup = wbt_cleanup, 813 .exit = wbt_exit, 814 #ifdef CONFIG_BLK_DEBUG_FS 815 .debugfs_attrs = wbt_debugfs_attrs, 816 #endif 817 }; 818 819 int wbt_init(struct request_queue *q) 820 { 821 struct rq_wb *rwb; 822 int i; 823 824 rwb = kzalloc(sizeof(*rwb), GFP_KERNEL); 825 if (!rwb) 826 return -ENOMEM; 827 828 rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb); 829 if (!rwb->cb) { 830 kfree(rwb); 831 return -ENOMEM; 832 } 833 834 for (i = 0; i < WBT_NUM_RWQ; i++) 835 rq_wait_init(&rwb->rq_wait[i]); 836 837 rwb->rqos.id = RQ_QOS_WBT; 838 rwb->rqos.ops = &wbt_rqos_ops; 839 rwb->rqos.q = q; 840 rwb->last_comp = rwb->last_issue = jiffies; 841 rwb->win_nsec = RWB_WINDOW_NSEC; 842 rwb->enable_state = WBT_STATE_ON_DEFAULT; 843 rwb->wc = 1; 844 rwb->rq_depth.default_depth = RWB_DEF_DEPTH; 845 __wbt_update_limits(rwb); 846 847 /* 848 * Assign rwb and add the stats callback. 849 */ 850 rq_qos_add(q, &rwb->rqos); 851 blk_stat_add_callback(q, rwb->cb); 852 853 rwb->min_lat_nsec = wbt_default_latency_nsec(q); 854 855 wbt_set_queue_depth(q, blk_queue_depth(q)); 856 wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); 857 858 return 0; 859 } 860