1 /* 2 * MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler, 3 * for the blk-mq scheduling framework 4 * 5 * Copyright (C) 2016 Jens Axboe <axboe@kernel.dk> 6 */ 7 #include <linux/kernel.h> 8 #include <linux/fs.h> 9 #include <linux/blkdev.h> 10 #include <linux/blk-mq.h> 11 #include <linux/elevator.h> 12 #include <linux/bio.h> 13 #include <linux/module.h> 14 #include <linux/slab.h> 15 #include <linux/init.h> 16 #include <linux/compiler.h> 17 #include <linux/rbtree.h> 18 #include <linux/sbitmap.h> 19 20 #include "blk.h" 21 #include "blk-mq.h" 22 #include "blk-mq-debugfs.h" 23 #include "blk-mq-tag.h" 24 #include "blk-mq-sched.h" 25 26 /* 27 * See Documentation/block/deadline-iosched.txt 28 */ 29 static const int read_expire = HZ / 2; /* max time before a read is submitted. */ 30 static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ 31 static const int writes_starved = 2; /* max times reads can starve a write */ 32 static const int fifo_batch = 16; /* # of sequential requests treated as one 33 by the above parameters. For throughput. */ 34 35 struct deadline_data { 36 /* 37 * run time data 38 */ 39 40 /* 41 * requests (deadline_rq s) are present on both sort_list and fifo_list 42 */ 43 struct rb_root sort_list[2]; 44 struct list_head fifo_list[2]; 45 46 /* 47 * next in sort order. read, write or both are NULL 48 */ 49 struct request *next_rq[2]; 50 unsigned int batching; /* number of sequential requests made */ 51 unsigned int starved; /* times reads have starved writes */ 52 53 /* 54 * settings that change how the i/o scheduler behaves 55 */ 56 int fifo_expire[2]; 57 int fifo_batch; 58 int writes_starved; 59 int front_merges; 60 61 spinlock_t lock; 62 struct list_head dispatch; 63 }; 64 65 static inline struct rb_root * 66 deadline_rb_root(struct deadline_data *dd, struct request *rq) 67 { 68 return &dd->sort_list[rq_data_dir(rq)]; 69 } 70 71 /* 72 * get the request after `rq' in sector-sorted order 73 */ 74 static inline struct request * 75 deadline_latter_request(struct request *rq) 76 { 77 struct rb_node *node = rb_next(&rq->rb_node); 78 79 if (node) 80 return rb_entry_rq(node); 81 82 return NULL; 83 } 84 85 static void 86 deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) 87 { 88 struct rb_root *root = deadline_rb_root(dd, rq); 89 90 elv_rb_add(root, rq); 91 } 92 93 static inline void 94 deadline_del_rq_rb(struct deadline_data *dd, struct request *rq) 95 { 96 const int data_dir = rq_data_dir(rq); 97 98 if (dd->next_rq[data_dir] == rq) 99 dd->next_rq[data_dir] = deadline_latter_request(rq); 100 101 elv_rb_del(deadline_rb_root(dd, rq), rq); 102 } 103 104 /* 105 * remove rq from rbtree and fifo. 106 */ 107 static void deadline_remove_request(struct request_queue *q, struct request *rq) 108 { 109 struct deadline_data *dd = q->elevator->elevator_data; 110 111 list_del_init(&rq->queuelist); 112 113 /* 114 * We might not be on the rbtree, if we are doing an insert merge 115 */ 116 if (!RB_EMPTY_NODE(&rq->rb_node)) 117 deadline_del_rq_rb(dd, rq); 118 119 elv_rqhash_del(q, rq); 120 if (q->last_merge == rq) 121 q->last_merge = NULL; 122 } 123 124 static void dd_request_merged(struct request_queue *q, struct request *req, 125 enum elv_merge type) 126 { 127 struct deadline_data *dd = q->elevator->elevator_data; 128 129 /* 130 * if the merge was a front merge, we need to reposition request 131 */ 132 if (type == ELEVATOR_FRONT_MERGE) { 133 elv_rb_del(deadline_rb_root(dd, req), req); 134 deadline_add_rq_rb(dd, req); 135 } 136 } 137 138 static void dd_merged_requests(struct request_queue *q, struct request *req, 139 struct request *next) 140 { 141 /* 142 * if next expires before rq, assign its expire time to rq 143 * and move into next position (next will be deleted) in fifo 144 */ 145 if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { 146 if (time_before((unsigned long)next->fifo_time, 147 (unsigned long)req->fifo_time)) { 148 list_move(&req->queuelist, &next->queuelist); 149 req->fifo_time = next->fifo_time; 150 } 151 } 152 153 /* 154 * kill knowledge of next, this one is a goner 155 */ 156 deadline_remove_request(q, next); 157 } 158 159 /* 160 * move an entry to dispatch queue 161 */ 162 static void 163 deadline_move_request(struct deadline_data *dd, struct request *rq) 164 { 165 const int data_dir = rq_data_dir(rq); 166 167 dd->next_rq[READ] = NULL; 168 dd->next_rq[WRITE] = NULL; 169 dd->next_rq[data_dir] = deadline_latter_request(rq); 170 171 /* 172 * take it off the sort and fifo list 173 */ 174 deadline_remove_request(rq->q, rq); 175 } 176 177 /* 178 * deadline_check_fifo returns 0 if there are no expired requests on the fifo, 179 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) 180 */ 181 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) 182 { 183 struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next); 184 185 /* 186 * rq is expired! 187 */ 188 if (time_after_eq(jiffies, (unsigned long)rq->fifo_time)) 189 return 1; 190 191 return 0; 192 } 193 194 /* 195 * deadline_dispatch_requests selects the best request according to 196 * read/write expire, fifo_batch, etc 197 */ 198 static struct request *__dd_dispatch_request(struct blk_mq_hw_ctx *hctx) 199 { 200 struct deadline_data *dd = hctx->queue->elevator->elevator_data; 201 struct request *rq; 202 bool reads, writes; 203 int data_dir; 204 205 if (!list_empty(&dd->dispatch)) { 206 rq = list_first_entry(&dd->dispatch, struct request, queuelist); 207 list_del_init(&rq->queuelist); 208 goto done; 209 } 210 211 reads = !list_empty(&dd->fifo_list[READ]); 212 writes = !list_empty(&dd->fifo_list[WRITE]); 213 214 /* 215 * batches are currently reads XOR writes 216 */ 217 if (dd->next_rq[WRITE]) 218 rq = dd->next_rq[WRITE]; 219 else 220 rq = dd->next_rq[READ]; 221 222 if (rq && dd->batching < dd->fifo_batch) 223 /* we have a next request are still entitled to batch */ 224 goto dispatch_request; 225 226 /* 227 * at this point we are not running a batch. select the appropriate 228 * data direction (read / write) 229 */ 230 231 if (reads) { 232 BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ])); 233 234 if (writes && (dd->starved++ >= dd->writes_starved)) 235 goto dispatch_writes; 236 237 data_dir = READ; 238 239 goto dispatch_find_request; 240 } 241 242 /* 243 * there are either no reads or writes have been starved 244 */ 245 246 if (writes) { 247 dispatch_writes: 248 BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE])); 249 250 dd->starved = 0; 251 252 data_dir = WRITE; 253 254 goto dispatch_find_request; 255 } 256 257 return NULL; 258 259 dispatch_find_request: 260 /* 261 * we are not running a batch, find best request for selected data_dir 262 */ 263 if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir]) { 264 /* 265 * A deadline has expired, the last request was in the other 266 * direction, or we have run out of higher-sectored requests. 267 * Start again from the request with the earliest expiry time. 268 */ 269 rq = rq_entry_fifo(dd->fifo_list[data_dir].next); 270 } else { 271 /* 272 * The last req was the same dir and we have a next request in 273 * sort order. No expired requests so continue on from here. 274 */ 275 rq = dd->next_rq[data_dir]; 276 } 277 278 dd->batching = 0; 279 280 dispatch_request: 281 /* 282 * rq is the selected appropriate request. 283 */ 284 dd->batching++; 285 deadline_move_request(dd, rq); 286 done: 287 rq->rq_flags |= RQF_STARTED; 288 return rq; 289 } 290 291 static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) 292 { 293 struct deadline_data *dd = hctx->queue->elevator->elevator_data; 294 struct request *rq; 295 296 spin_lock(&dd->lock); 297 rq = __dd_dispatch_request(hctx); 298 spin_unlock(&dd->lock); 299 300 return rq; 301 } 302 303 static void dd_exit_queue(struct elevator_queue *e) 304 { 305 struct deadline_data *dd = e->elevator_data; 306 307 BUG_ON(!list_empty(&dd->fifo_list[READ])); 308 BUG_ON(!list_empty(&dd->fifo_list[WRITE])); 309 310 kfree(dd); 311 } 312 313 /* 314 * initialize elevator private data (deadline_data). 315 */ 316 static int dd_init_queue(struct request_queue *q, struct elevator_type *e) 317 { 318 struct deadline_data *dd; 319 struct elevator_queue *eq; 320 321 eq = elevator_alloc(q, e); 322 if (!eq) 323 return -ENOMEM; 324 325 dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node); 326 if (!dd) { 327 kobject_put(&eq->kobj); 328 return -ENOMEM; 329 } 330 eq->elevator_data = dd; 331 332 INIT_LIST_HEAD(&dd->fifo_list[READ]); 333 INIT_LIST_HEAD(&dd->fifo_list[WRITE]); 334 dd->sort_list[READ] = RB_ROOT; 335 dd->sort_list[WRITE] = RB_ROOT; 336 dd->fifo_expire[READ] = read_expire; 337 dd->fifo_expire[WRITE] = write_expire; 338 dd->writes_starved = writes_starved; 339 dd->front_merges = 1; 340 dd->fifo_batch = fifo_batch; 341 spin_lock_init(&dd->lock); 342 INIT_LIST_HEAD(&dd->dispatch); 343 344 q->elevator = eq; 345 return 0; 346 } 347 348 static int dd_request_merge(struct request_queue *q, struct request **rq, 349 struct bio *bio) 350 { 351 struct deadline_data *dd = q->elevator->elevator_data; 352 sector_t sector = bio_end_sector(bio); 353 struct request *__rq; 354 355 if (!dd->front_merges) 356 return ELEVATOR_NO_MERGE; 357 358 __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector); 359 if (__rq) { 360 BUG_ON(sector != blk_rq_pos(__rq)); 361 362 if (elv_bio_merge_ok(__rq, bio)) { 363 *rq = __rq; 364 return ELEVATOR_FRONT_MERGE; 365 } 366 } 367 368 return ELEVATOR_NO_MERGE; 369 } 370 371 static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) 372 { 373 struct request_queue *q = hctx->queue; 374 struct deadline_data *dd = q->elevator->elevator_data; 375 struct request *free = NULL; 376 bool ret; 377 378 spin_lock(&dd->lock); 379 ret = blk_mq_sched_try_merge(q, bio, &free); 380 spin_unlock(&dd->lock); 381 382 if (free) 383 blk_mq_free_request(free); 384 385 return ret; 386 } 387 388 /* 389 * add rq to rbtree and fifo 390 */ 391 static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, 392 bool at_head) 393 { 394 struct request_queue *q = hctx->queue; 395 struct deadline_data *dd = q->elevator->elevator_data; 396 const int data_dir = rq_data_dir(rq); 397 398 if (blk_mq_sched_try_insert_merge(q, rq)) 399 return; 400 401 blk_mq_sched_request_inserted(rq); 402 403 if (at_head || blk_rq_is_passthrough(rq)) { 404 if (at_head) 405 list_add(&rq->queuelist, &dd->dispatch); 406 else 407 list_add_tail(&rq->queuelist, &dd->dispatch); 408 } else { 409 deadline_add_rq_rb(dd, rq); 410 411 if (rq_mergeable(rq)) { 412 elv_rqhash_add(q, rq); 413 if (!q->last_merge) 414 q->last_merge = rq; 415 } 416 417 /* 418 * set expire time and add to fifo list 419 */ 420 rq->fifo_time = jiffies + dd->fifo_expire[data_dir]; 421 list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]); 422 } 423 } 424 425 static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, 426 struct list_head *list, bool at_head) 427 { 428 struct request_queue *q = hctx->queue; 429 struct deadline_data *dd = q->elevator->elevator_data; 430 431 spin_lock(&dd->lock); 432 while (!list_empty(list)) { 433 struct request *rq; 434 435 rq = list_first_entry(list, struct request, queuelist); 436 list_del_init(&rq->queuelist); 437 dd_insert_request(hctx, rq, at_head); 438 } 439 spin_unlock(&dd->lock); 440 } 441 442 static bool dd_has_work(struct blk_mq_hw_ctx *hctx) 443 { 444 struct deadline_data *dd = hctx->queue->elevator->elevator_data; 445 446 return !list_empty_careful(&dd->dispatch) || 447 !list_empty_careful(&dd->fifo_list[0]) || 448 !list_empty_careful(&dd->fifo_list[1]); 449 } 450 451 /* 452 * sysfs parts below 453 */ 454 static ssize_t 455 deadline_var_show(int var, char *page) 456 { 457 return sprintf(page, "%d\n", var); 458 } 459 460 static ssize_t 461 deadline_var_store(int *var, const char *page, size_t count) 462 { 463 char *p = (char *) page; 464 465 *var = simple_strtol(p, &p, 10); 466 return count; 467 } 468 469 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ 470 static ssize_t __FUNC(struct elevator_queue *e, char *page) \ 471 { \ 472 struct deadline_data *dd = e->elevator_data; \ 473 int __data = __VAR; \ 474 if (__CONV) \ 475 __data = jiffies_to_msecs(__data); \ 476 return deadline_var_show(__data, (page)); \ 477 } 478 SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1); 479 SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1); 480 SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0); 481 SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0); 482 SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0); 483 #undef SHOW_FUNCTION 484 485 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 486 static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ 487 { \ 488 struct deadline_data *dd = e->elevator_data; \ 489 int __data; \ 490 int ret = deadline_var_store(&__data, (page), count); \ 491 if (__data < (MIN)) \ 492 __data = (MIN); \ 493 else if (__data > (MAX)) \ 494 __data = (MAX); \ 495 if (__CONV) \ 496 *(__PTR) = msecs_to_jiffies(__data); \ 497 else \ 498 *(__PTR) = __data; \ 499 return ret; \ 500 } 501 STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1); 502 STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1); 503 STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0); 504 STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0); 505 STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0); 506 #undef STORE_FUNCTION 507 508 #define DD_ATTR(name) \ 509 __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \ 510 deadline_##name##_store) 511 512 static struct elv_fs_entry deadline_attrs[] = { 513 DD_ATTR(read_expire), 514 DD_ATTR(write_expire), 515 DD_ATTR(writes_starved), 516 DD_ATTR(front_merges), 517 DD_ATTR(fifo_batch), 518 __ATTR_NULL 519 }; 520 521 #ifdef CONFIG_BLK_DEBUG_FS 522 #define DEADLINE_DEBUGFS_DDIR_ATTRS(ddir, name) \ 523 static void *deadline_##name##_fifo_start(struct seq_file *m, \ 524 loff_t *pos) \ 525 __acquires(&dd->lock) \ 526 { \ 527 struct request_queue *q = m->private; \ 528 struct deadline_data *dd = q->elevator->elevator_data; \ 529 \ 530 spin_lock(&dd->lock); \ 531 return seq_list_start(&dd->fifo_list[ddir], *pos); \ 532 } \ 533 \ 534 static void *deadline_##name##_fifo_next(struct seq_file *m, void *v, \ 535 loff_t *pos) \ 536 { \ 537 struct request_queue *q = m->private; \ 538 struct deadline_data *dd = q->elevator->elevator_data; \ 539 \ 540 return seq_list_next(v, &dd->fifo_list[ddir], pos); \ 541 } \ 542 \ 543 static void deadline_##name##_fifo_stop(struct seq_file *m, void *v) \ 544 __releases(&dd->lock) \ 545 { \ 546 struct request_queue *q = m->private; \ 547 struct deadline_data *dd = q->elevator->elevator_data; \ 548 \ 549 spin_unlock(&dd->lock); \ 550 } \ 551 \ 552 static const struct seq_operations deadline_##name##_fifo_seq_ops = { \ 553 .start = deadline_##name##_fifo_start, \ 554 .next = deadline_##name##_fifo_next, \ 555 .stop = deadline_##name##_fifo_stop, \ 556 .show = blk_mq_debugfs_rq_show, \ 557 }; \ 558 \ 559 static int deadline_##name##_next_rq_show(void *data, \ 560 struct seq_file *m) \ 561 { \ 562 struct request_queue *q = data; \ 563 struct deadline_data *dd = q->elevator->elevator_data; \ 564 struct request *rq = dd->next_rq[ddir]; \ 565 \ 566 if (rq) \ 567 __blk_mq_debugfs_rq_show(m, rq); \ 568 return 0; \ 569 } 570 DEADLINE_DEBUGFS_DDIR_ATTRS(READ, read) 571 DEADLINE_DEBUGFS_DDIR_ATTRS(WRITE, write) 572 #undef DEADLINE_DEBUGFS_DDIR_ATTRS 573 574 static int deadline_batching_show(void *data, struct seq_file *m) 575 { 576 struct request_queue *q = data; 577 struct deadline_data *dd = q->elevator->elevator_data; 578 579 seq_printf(m, "%u\n", dd->batching); 580 return 0; 581 } 582 583 static int deadline_starved_show(void *data, struct seq_file *m) 584 { 585 struct request_queue *q = data; 586 struct deadline_data *dd = q->elevator->elevator_data; 587 588 seq_printf(m, "%u\n", dd->starved); 589 return 0; 590 } 591 592 static void *deadline_dispatch_start(struct seq_file *m, loff_t *pos) 593 __acquires(&dd->lock) 594 { 595 struct request_queue *q = m->private; 596 struct deadline_data *dd = q->elevator->elevator_data; 597 598 spin_lock(&dd->lock); 599 return seq_list_start(&dd->dispatch, *pos); 600 } 601 602 static void *deadline_dispatch_next(struct seq_file *m, void *v, loff_t *pos) 603 { 604 struct request_queue *q = m->private; 605 struct deadline_data *dd = q->elevator->elevator_data; 606 607 return seq_list_next(v, &dd->dispatch, pos); 608 } 609 610 static void deadline_dispatch_stop(struct seq_file *m, void *v) 611 __releases(&dd->lock) 612 { 613 struct request_queue *q = m->private; 614 struct deadline_data *dd = q->elevator->elevator_data; 615 616 spin_unlock(&dd->lock); 617 } 618 619 static const struct seq_operations deadline_dispatch_seq_ops = { 620 .start = deadline_dispatch_start, 621 .next = deadline_dispatch_next, 622 .stop = deadline_dispatch_stop, 623 .show = blk_mq_debugfs_rq_show, 624 }; 625 626 #define DEADLINE_QUEUE_DDIR_ATTRS(name) \ 627 {#name "_fifo_list", 0400, .seq_ops = &deadline_##name##_fifo_seq_ops}, \ 628 {#name "_next_rq", 0400, deadline_##name##_next_rq_show} 629 static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = { 630 DEADLINE_QUEUE_DDIR_ATTRS(read), 631 DEADLINE_QUEUE_DDIR_ATTRS(write), 632 {"batching", 0400, deadline_batching_show}, 633 {"starved", 0400, deadline_starved_show}, 634 {"dispatch", 0400, .seq_ops = &deadline_dispatch_seq_ops}, 635 {}, 636 }; 637 #undef DEADLINE_QUEUE_DDIR_ATTRS 638 #endif 639 640 static struct elevator_type mq_deadline = { 641 .ops.mq = { 642 .insert_requests = dd_insert_requests, 643 .dispatch_request = dd_dispatch_request, 644 .next_request = elv_rb_latter_request, 645 .former_request = elv_rb_former_request, 646 .bio_merge = dd_bio_merge, 647 .request_merge = dd_request_merge, 648 .requests_merged = dd_merged_requests, 649 .request_merged = dd_request_merged, 650 .has_work = dd_has_work, 651 .init_sched = dd_init_queue, 652 .exit_sched = dd_exit_queue, 653 }, 654 655 .uses_mq = true, 656 #ifdef CONFIG_BLK_DEBUG_FS 657 .queue_debugfs_attrs = deadline_queue_debugfs_attrs, 658 #endif 659 .elevator_attrs = deadline_attrs, 660 .elevator_name = "mq-deadline", 661 .elevator_owner = THIS_MODULE, 662 }; 663 664 static int __init deadline_init(void) 665 { 666 return elv_register(&mq_deadline); 667 } 668 669 static void __exit deadline_exit(void) 670 { 671 elv_unregister(&mq_deadline); 672 } 673 674 module_init(deadline_init); 675 module_exit(deadline_exit); 676 677 MODULE_AUTHOR("Jens Axboe"); 678 MODULE_LICENSE("GPL"); 679 MODULE_DESCRIPTION("MQ deadline IO scheduler"); 680