1 /* 2 * Block device elevator/IO-scheduler. 3 * 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 5 * 6 * 30042000 Jens Axboe <axboe@kernel.dk> : 7 * 8 * Split the elevator a bit so that it is possible to choose a different 9 * one or even write a new "plug in". There are three pieces: 10 * - elevator_fn, inserts a new request in the queue list 11 * - elevator_merge_fn, decides whether a new buffer can be merged with 12 * an existing request 13 * - elevator_dequeue_fn, called when a request is taken off the active list 14 * 15 * 20082000 Dave Jones <davej@suse.de> : 16 * Removed tests for max-bomb-segments, which was breaking elvtune 17 * when run without -bN 18 * 19 * Jens: 20 * - Rework again to work with bio instead of buffer_heads 21 * - loose bi_dev comparisons, partition handling is right now 22 * - completely modularize elevator setup and teardown 23 * 24 */ 25 #include <linux/kernel.h> 26 #include <linux/fs.h> 27 #include <linux/blkdev.h> 28 #include <linux/elevator.h> 29 #include <linux/bio.h> 30 #include <linux/module.h> 31 #include <linux/slab.h> 32 #include <linux/init.h> 33 #include <linux/compiler.h> 34 #include <linux/blktrace_api.h> 35 #include <linux/hash.h> 36 #include <linux/uaccess.h> 37 #include <linux/pm_runtime.h> 38 #include <linux/blk-cgroup.h> 39 40 #include <trace/events/block.h> 41 42 #include "blk.h" 43 #include "blk-mq-sched.h" 44 #include "blk-wbt.h" 45 46 static DEFINE_SPINLOCK(elv_list_lock); 47 static LIST_HEAD(elv_list); 48 49 /* 50 * Merge hash stuff. 51 */ 52 #define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) 53 54 /* 55 * Query io scheduler to see if the current process issuing bio may be 56 * merged with rq. 57 */ 58 static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio) 59 { 60 struct request_queue *q = rq->q; 61 struct elevator_queue *e = q->elevator; 62 63 if (e->uses_mq && e->type->ops.mq.allow_merge) 64 return e->type->ops.mq.allow_merge(q, rq, bio); 65 else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn) 66 return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio); 67 68 return 1; 69 } 70 71 /* 72 * can we safely merge with this request? 73 */ 74 bool elv_bio_merge_ok(struct request *rq, struct bio *bio) 75 { 76 if (!blk_rq_merge_ok(rq, bio)) 77 return false; 78 79 if (!elv_iosched_allow_bio_merge(rq, bio)) 80 return false; 81 82 return true; 83 } 84 EXPORT_SYMBOL(elv_bio_merge_ok); 85 86 static struct elevator_type *elevator_find(const char *name) 87 { 88 struct elevator_type *e; 89 90 list_for_each_entry(e, &elv_list, list) { 91 if (!strcmp(e->elevator_name, name)) 92 return e; 93 } 94 95 return NULL; 96 } 97 98 static void elevator_put(struct elevator_type *e) 99 { 100 module_put(e->elevator_owner); 101 } 102 103 static struct elevator_type *elevator_get(const char *name, bool try_loading) 104 { 105 struct elevator_type *e; 106 107 spin_lock(&elv_list_lock); 108 109 e = elevator_find(name); 110 if (!e && try_loading) { 111 spin_unlock(&elv_list_lock); 112 request_module("%s-iosched", name); 113 spin_lock(&elv_list_lock); 114 e = elevator_find(name); 115 } 116 117 if (e && !try_module_get(e->elevator_owner)) 118 e = NULL; 119 120 spin_unlock(&elv_list_lock); 121 122 return e; 123 } 124 125 static char chosen_elevator[ELV_NAME_MAX]; 126 127 static int __init elevator_setup(char *str) 128 { 129 /* 130 * Be backwards-compatible with previous kernels, so users 131 * won't get the wrong elevator. 132 */ 133 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); 134 return 1; 135 } 136 137 __setup("elevator=", elevator_setup); 138 139 /* called during boot to load the elevator chosen by the elevator param */ 140 void __init load_default_elevator_module(void) 141 { 142 struct elevator_type *e; 143 144 if (!chosen_elevator[0]) 145 return; 146 147 spin_lock(&elv_list_lock); 148 e = elevator_find(chosen_elevator); 149 spin_unlock(&elv_list_lock); 150 151 if (!e) 152 request_module("%s-iosched", chosen_elevator); 153 } 154 155 static struct kobj_type elv_ktype; 156 157 struct elevator_queue *elevator_alloc(struct request_queue *q, 158 struct elevator_type *e) 159 { 160 struct elevator_queue *eq; 161 162 eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node); 163 if (unlikely(!eq)) 164 return NULL; 165 166 eq->type = e; 167 kobject_init(&eq->kobj, &elv_ktype); 168 mutex_init(&eq->sysfs_lock); 169 hash_init(eq->hash); 170 eq->uses_mq = e->uses_mq; 171 172 return eq; 173 } 174 EXPORT_SYMBOL(elevator_alloc); 175 176 static void elevator_release(struct kobject *kobj) 177 { 178 struct elevator_queue *e; 179 180 e = container_of(kobj, struct elevator_queue, kobj); 181 elevator_put(e->type); 182 kfree(e); 183 } 184 185 int elevator_init(struct request_queue *q, char *name) 186 { 187 struct elevator_type *e = NULL; 188 int err; 189 190 /* 191 * q->sysfs_lock must be held to provide mutual exclusion between 192 * elevator_switch() and here. 193 */ 194 lockdep_assert_held(&q->sysfs_lock); 195 196 if (unlikely(q->elevator)) 197 return 0; 198 199 INIT_LIST_HEAD(&q->queue_head); 200 q->last_merge = NULL; 201 q->end_sector = 0; 202 q->boundary_rq = NULL; 203 204 if (name) { 205 e = elevator_get(name, true); 206 if (!e) 207 return -EINVAL; 208 } 209 210 /* 211 * Use the default elevator specified by config boot param for 212 * non-mq devices, or by config option. Don't try to load modules 213 * as we could be running off async and request_module() isn't 214 * allowed from async. 215 */ 216 if (!e && !q->mq_ops && *chosen_elevator) { 217 e = elevator_get(chosen_elevator, false); 218 if (!e) 219 printk(KERN_ERR "I/O scheduler %s not found\n", 220 chosen_elevator); 221 } 222 223 if (!e) { 224 /* 225 * For blk-mq devices, we default to using mq-deadline, 226 * if available, for single queue devices. If deadline 227 * isn't available OR we have multiple queues, default 228 * to "none". 229 */ 230 if (q->mq_ops) { 231 if (q->nr_hw_queues == 1) 232 e = elevator_get("mq-deadline", false); 233 if (!e) 234 return 0; 235 } else 236 e = elevator_get(CONFIG_DEFAULT_IOSCHED, false); 237 238 if (!e) { 239 printk(KERN_ERR 240 "Default I/O scheduler not found. " \ 241 "Using noop.\n"); 242 e = elevator_get("noop", false); 243 } 244 } 245 246 if (e->uses_mq) 247 err = blk_mq_init_sched(q, e); 248 else 249 err = e->ops.sq.elevator_init_fn(q, e); 250 if (err) 251 elevator_put(e); 252 return err; 253 } 254 EXPORT_SYMBOL(elevator_init); 255 256 void elevator_exit(struct request_queue *q, struct elevator_queue *e) 257 { 258 mutex_lock(&e->sysfs_lock); 259 if (e->uses_mq && e->type->ops.mq.exit_sched) 260 blk_mq_exit_sched(q, e); 261 else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn) 262 e->type->ops.sq.elevator_exit_fn(e); 263 mutex_unlock(&e->sysfs_lock); 264 265 kobject_put(&e->kobj); 266 } 267 EXPORT_SYMBOL(elevator_exit); 268 269 static inline void __elv_rqhash_del(struct request *rq) 270 { 271 hash_del(&rq->hash); 272 rq->rq_flags &= ~RQF_HASHED; 273 } 274 275 void elv_rqhash_del(struct request_queue *q, struct request *rq) 276 { 277 if (ELV_ON_HASH(rq)) 278 __elv_rqhash_del(rq); 279 } 280 EXPORT_SYMBOL_GPL(elv_rqhash_del); 281 282 void elv_rqhash_add(struct request_queue *q, struct request *rq) 283 { 284 struct elevator_queue *e = q->elevator; 285 286 BUG_ON(ELV_ON_HASH(rq)); 287 hash_add(e->hash, &rq->hash, rq_hash_key(rq)); 288 rq->rq_flags |= RQF_HASHED; 289 } 290 EXPORT_SYMBOL_GPL(elv_rqhash_add); 291 292 void elv_rqhash_reposition(struct request_queue *q, struct request *rq) 293 { 294 __elv_rqhash_del(rq); 295 elv_rqhash_add(q, rq); 296 } 297 298 struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) 299 { 300 struct elevator_queue *e = q->elevator; 301 struct hlist_node *next; 302 struct request *rq; 303 304 hash_for_each_possible_safe(e->hash, rq, next, hash, offset) { 305 BUG_ON(!ELV_ON_HASH(rq)); 306 307 if (unlikely(!rq_mergeable(rq))) { 308 __elv_rqhash_del(rq); 309 continue; 310 } 311 312 if (rq_hash_key(rq) == offset) 313 return rq; 314 } 315 316 return NULL; 317 } 318 319 /* 320 * RB-tree support functions for inserting/lookup/removal of requests 321 * in a sorted RB tree. 322 */ 323 void elv_rb_add(struct rb_root *root, struct request *rq) 324 { 325 struct rb_node **p = &root->rb_node; 326 struct rb_node *parent = NULL; 327 struct request *__rq; 328 329 while (*p) { 330 parent = *p; 331 __rq = rb_entry(parent, struct request, rb_node); 332 333 if (blk_rq_pos(rq) < blk_rq_pos(__rq)) 334 p = &(*p)->rb_left; 335 else if (blk_rq_pos(rq) >= blk_rq_pos(__rq)) 336 p = &(*p)->rb_right; 337 } 338 339 rb_link_node(&rq->rb_node, parent, p); 340 rb_insert_color(&rq->rb_node, root); 341 } 342 EXPORT_SYMBOL(elv_rb_add); 343 344 void elv_rb_del(struct rb_root *root, struct request *rq) 345 { 346 BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); 347 rb_erase(&rq->rb_node, root); 348 RB_CLEAR_NODE(&rq->rb_node); 349 } 350 EXPORT_SYMBOL(elv_rb_del); 351 352 struct request *elv_rb_find(struct rb_root *root, sector_t sector) 353 { 354 struct rb_node *n = root->rb_node; 355 struct request *rq; 356 357 while (n) { 358 rq = rb_entry(n, struct request, rb_node); 359 360 if (sector < blk_rq_pos(rq)) 361 n = n->rb_left; 362 else if (sector > blk_rq_pos(rq)) 363 n = n->rb_right; 364 else 365 return rq; 366 } 367 368 return NULL; 369 } 370 EXPORT_SYMBOL(elv_rb_find); 371 372 /* 373 * Insert rq into dispatch queue of q. Queue lock must be held on 374 * entry. rq is sort instead into the dispatch queue. To be used by 375 * specific elevators. 376 */ 377 void elv_dispatch_sort(struct request_queue *q, struct request *rq) 378 { 379 sector_t boundary; 380 struct list_head *entry; 381 382 if (q->last_merge == rq) 383 q->last_merge = NULL; 384 385 elv_rqhash_del(q, rq); 386 387 q->nr_sorted--; 388 389 boundary = q->end_sector; 390 list_for_each_prev(entry, &q->queue_head) { 391 struct request *pos = list_entry_rq(entry); 392 393 if (req_op(rq) != req_op(pos)) 394 break; 395 if (rq_data_dir(rq) != rq_data_dir(pos)) 396 break; 397 if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER)) 398 break; 399 if (blk_rq_pos(rq) >= boundary) { 400 if (blk_rq_pos(pos) < boundary) 401 continue; 402 } else { 403 if (blk_rq_pos(pos) >= boundary) 404 break; 405 } 406 if (blk_rq_pos(rq) >= blk_rq_pos(pos)) 407 break; 408 } 409 410 list_add(&rq->queuelist, entry); 411 } 412 EXPORT_SYMBOL(elv_dispatch_sort); 413 414 /* 415 * Insert rq into dispatch queue of q. Queue lock must be held on 416 * entry. rq is added to the back of the dispatch queue. To be used by 417 * specific elevators. 418 */ 419 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) 420 { 421 if (q->last_merge == rq) 422 q->last_merge = NULL; 423 424 elv_rqhash_del(q, rq); 425 426 q->nr_sorted--; 427 428 q->end_sector = rq_end_sector(rq); 429 q->boundary_rq = rq; 430 list_add_tail(&rq->queuelist, &q->queue_head); 431 } 432 EXPORT_SYMBOL(elv_dispatch_add_tail); 433 434 enum elv_merge elv_merge(struct request_queue *q, struct request **req, 435 struct bio *bio) 436 { 437 struct elevator_queue *e = q->elevator; 438 struct request *__rq; 439 440 /* 441 * Levels of merges: 442 * nomerges: No merges at all attempted 443 * noxmerges: Only simple one-hit cache try 444 * merges: All merge tries attempted 445 */ 446 if (blk_queue_nomerges(q) || !bio_mergeable(bio)) 447 return ELEVATOR_NO_MERGE; 448 449 /* 450 * First try one-hit cache. 451 */ 452 if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) { 453 enum elv_merge ret = blk_try_merge(q->last_merge, bio); 454 455 if (ret != ELEVATOR_NO_MERGE) { 456 *req = q->last_merge; 457 return ret; 458 } 459 } 460 461 if (blk_queue_noxmerges(q)) 462 return ELEVATOR_NO_MERGE; 463 464 /* 465 * See if our hash lookup can find a potential backmerge. 466 */ 467 __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector); 468 if (__rq && elv_bio_merge_ok(__rq, bio)) { 469 *req = __rq; 470 return ELEVATOR_BACK_MERGE; 471 } 472 473 if (e->uses_mq && e->type->ops.mq.request_merge) 474 return e->type->ops.mq.request_merge(q, req, bio); 475 else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn) 476 return e->type->ops.sq.elevator_merge_fn(q, req, bio); 477 478 return ELEVATOR_NO_MERGE; 479 } 480 481 /* 482 * Attempt to do an insertion back merge. Only check for the case where 483 * we can append 'rq' to an existing request, so we can throw 'rq' away 484 * afterwards. 485 * 486 * Returns true if we merged, false otherwise 487 */ 488 bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) 489 { 490 struct request *__rq; 491 bool ret; 492 493 if (blk_queue_nomerges(q)) 494 return false; 495 496 /* 497 * First try one-hit cache. 498 */ 499 if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) 500 return true; 501 502 if (blk_queue_noxmerges(q)) 503 return false; 504 505 ret = false; 506 /* 507 * See if our hash lookup can find a potential backmerge. 508 */ 509 while (1) { 510 __rq = elv_rqhash_find(q, blk_rq_pos(rq)); 511 if (!__rq || !blk_attempt_req_merge(q, __rq, rq)) 512 break; 513 514 /* The merged request could be merged with others, try again */ 515 ret = true; 516 rq = __rq; 517 } 518 519 return ret; 520 } 521 522 void elv_merged_request(struct request_queue *q, struct request *rq, 523 enum elv_merge type) 524 { 525 struct elevator_queue *e = q->elevator; 526 527 if (e->uses_mq && e->type->ops.mq.request_merged) 528 e->type->ops.mq.request_merged(q, rq, type); 529 else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn) 530 e->type->ops.sq.elevator_merged_fn(q, rq, type); 531 532 if (type == ELEVATOR_BACK_MERGE) 533 elv_rqhash_reposition(q, rq); 534 535 q->last_merge = rq; 536 } 537 538 void elv_merge_requests(struct request_queue *q, struct request *rq, 539 struct request *next) 540 { 541 struct elevator_queue *e = q->elevator; 542 bool next_sorted = false; 543 544 if (e->uses_mq && e->type->ops.mq.requests_merged) 545 e->type->ops.mq.requests_merged(q, rq, next); 546 else if (e->type->ops.sq.elevator_merge_req_fn) { 547 next_sorted = (__force bool)(next->rq_flags & RQF_SORTED); 548 if (next_sorted) 549 e->type->ops.sq.elevator_merge_req_fn(q, rq, next); 550 } 551 552 elv_rqhash_reposition(q, rq); 553 554 if (next_sorted) { 555 elv_rqhash_del(q, next); 556 q->nr_sorted--; 557 } 558 559 q->last_merge = rq; 560 } 561 562 void elv_bio_merged(struct request_queue *q, struct request *rq, 563 struct bio *bio) 564 { 565 struct elevator_queue *e = q->elevator; 566 567 if (WARN_ON_ONCE(e->uses_mq)) 568 return; 569 570 if (e->type->ops.sq.elevator_bio_merged_fn) 571 e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio); 572 } 573 574 #ifdef CONFIG_PM 575 static void blk_pm_requeue_request(struct request *rq) 576 { 577 if (rq->q->dev && !(rq->rq_flags & RQF_PM)) 578 rq->q->nr_pending--; 579 } 580 581 static void blk_pm_add_request(struct request_queue *q, struct request *rq) 582 { 583 if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 && 584 (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING)) 585 pm_request_resume(q->dev); 586 } 587 #else 588 static inline void blk_pm_requeue_request(struct request *rq) {} 589 static inline void blk_pm_add_request(struct request_queue *q, 590 struct request *rq) 591 { 592 } 593 #endif 594 595 void elv_requeue_request(struct request_queue *q, struct request *rq) 596 { 597 /* 598 * it already went through dequeue, we need to decrement the 599 * in_flight count again 600 */ 601 if (blk_account_rq(rq)) { 602 q->in_flight[rq_is_sync(rq)]--; 603 if (rq->rq_flags & RQF_SORTED) 604 elv_deactivate_rq(q, rq); 605 } 606 607 rq->rq_flags &= ~RQF_STARTED; 608 609 blk_pm_requeue_request(rq); 610 611 __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE); 612 } 613 614 void elv_drain_elevator(struct request_queue *q) 615 { 616 struct elevator_queue *e = q->elevator; 617 static int printed; 618 619 if (WARN_ON_ONCE(e->uses_mq)) 620 return; 621 622 lockdep_assert_held(q->queue_lock); 623 624 while (e->type->ops.sq.elevator_dispatch_fn(q, 1)) 625 ; 626 if (q->nr_sorted && printed++ < 10) { 627 printk(KERN_ERR "%s: forced dispatching is broken " 628 "(nr_sorted=%u), please report this\n", 629 q->elevator->type->elevator_name, q->nr_sorted); 630 } 631 } 632 633 void __elv_add_request(struct request_queue *q, struct request *rq, int where) 634 { 635 trace_block_rq_insert(q, rq); 636 637 blk_pm_add_request(q, rq); 638 639 rq->q = q; 640 641 if (rq->rq_flags & RQF_SOFTBARRIER) { 642 /* barriers are scheduling boundary, update end_sector */ 643 if (!blk_rq_is_passthrough(rq)) { 644 q->end_sector = rq_end_sector(rq); 645 q->boundary_rq = rq; 646 } 647 } else if (!(rq->rq_flags & RQF_ELVPRIV) && 648 (where == ELEVATOR_INSERT_SORT || 649 where == ELEVATOR_INSERT_SORT_MERGE)) 650 where = ELEVATOR_INSERT_BACK; 651 652 switch (where) { 653 case ELEVATOR_INSERT_REQUEUE: 654 case ELEVATOR_INSERT_FRONT: 655 rq->rq_flags |= RQF_SOFTBARRIER; 656 list_add(&rq->queuelist, &q->queue_head); 657 break; 658 659 case ELEVATOR_INSERT_BACK: 660 rq->rq_flags |= RQF_SOFTBARRIER; 661 elv_drain_elevator(q); 662 list_add_tail(&rq->queuelist, &q->queue_head); 663 /* 664 * We kick the queue here for the following reasons. 665 * - The elevator might have returned NULL previously 666 * to delay requests and returned them now. As the 667 * queue wasn't empty before this request, ll_rw_blk 668 * won't run the queue on return, resulting in hang. 669 * - Usually, back inserted requests won't be merged 670 * with anything. There's no point in delaying queue 671 * processing. 672 */ 673 __blk_run_queue(q); 674 break; 675 676 case ELEVATOR_INSERT_SORT_MERGE: 677 /* 678 * If we succeed in merging this request with one in the 679 * queue already, we are done - rq has now been freed, 680 * so no need to do anything further. 681 */ 682 if (elv_attempt_insert_merge(q, rq)) 683 break; 684 case ELEVATOR_INSERT_SORT: 685 BUG_ON(blk_rq_is_passthrough(rq)); 686 rq->rq_flags |= RQF_SORTED; 687 q->nr_sorted++; 688 if (rq_mergeable(rq)) { 689 elv_rqhash_add(q, rq); 690 if (!q->last_merge) 691 q->last_merge = rq; 692 } 693 694 /* 695 * Some ioscheds (cfq) run q->request_fn directly, so 696 * rq cannot be accessed after calling 697 * elevator_add_req_fn. 698 */ 699 q->elevator->type->ops.sq.elevator_add_req_fn(q, rq); 700 break; 701 702 case ELEVATOR_INSERT_FLUSH: 703 rq->rq_flags |= RQF_SOFTBARRIER; 704 blk_insert_flush(rq); 705 break; 706 default: 707 printk(KERN_ERR "%s: bad insertion point %d\n", 708 __func__, where); 709 BUG(); 710 } 711 } 712 EXPORT_SYMBOL(__elv_add_request); 713 714 void elv_add_request(struct request_queue *q, struct request *rq, int where) 715 { 716 unsigned long flags; 717 718 spin_lock_irqsave(q->queue_lock, flags); 719 __elv_add_request(q, rq, where); 720 spin_unlock_irqrestore(q->queue_lock, flags); 721 } 722 EXPORT_SYMBOL(elv_add_request); 723 724 struct request *elv_latter_request(struct request_queue *q, struct request *rq) 725 { 726 struct elevator_queue *e = q->elevator; 727 728 if (e->uses_mq && e->type->ops.mq.next_request) 729 return e->type->ops.mq.next_request(q, rq); 730 else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn) 731 return e->type->ops.sq.elevator_latter_req_fn(q, rq); 732 733 return NULL; 734 } 735 736 struct request *elv_former_request(struct request_queue *q, struct request *rq) 737 { 738 struct elevator_queue *e = q->elevator; 739 740 if (e->uses_mq && e->type->ops.mq.former_request) 741 return e->type->ops.mq.former_request(q, rq); 742 if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn) 743 return e->type->ops.sq.elevator_former_req_fn(q, rq); 744 return NULL; 745 } 746 747 int elv_set_request(struct request_queue *q, struct request *rq, 748 struct bio *bio, gfp_t gfp_mask) 749 { 750 struct elevator_queue *e = q->elevator; 751 752 if (WARN_ON_ONCE(e->uses_mq)) 753 return 0; 754 755 if (e->type->ops.sq.elevator_set_req_fn) 756 return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask); 757 return 0; 758 } 759 760 void elv_put_request(struct request_queue *q, struct request *rq) 761 { 762 struct elevator_queue *e = q->elevator; 763 764 if (WARN_ON_ONCE(e->uses_mq)) 765 return; 766 767 if (e->type->ops.sq.elevator_put_req_fn) 768 e->type->ops.sq.elevator_put_req_fn(rq); 769 } 770 771 int elv_may_queue(struct request_queue *q, unsigned int op) 772 { 773 struct elevator_queue *e = q->elevator; 774 775 if (WARN_ON_ONCE(e->uses_mq)) 776 return 0; 777 778 if (e->type->ops.sq.elevator_may_queue_fn) 779 return e->type->ops.sq.elevator_may_queue_fn(q, op); 780 781 return ELV_MQUEUE_MAY; 782 } 783 784 void elv_completed_request(struct request_queue *q, struct request *rq) 785 { 786 struct elevator_queue *e = q->elevator; 787 788 if (WARN_ON_ONCE(e->uses_mq)) 789 return; 790 791 /* 792 * request is released from the driver, io must be done 793 */ 794 if (blk_account_rq(rq)) { 795 q->in_flight[rq_is_sync(rq)]--; 796 if ((rq->rq_flags & RQF_SORTED) && 797 e->type->ops.sq.elevator_completed_req_fn) 798 e->type->ops.sq.elevator_completed_req_fn(q, rq); 799 } 800 } 801 802 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) 803 804 static ssize_t 805 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 806 { 807 struct elv_fs_entry *entry = to_elv(attr); 808 struct elevator_queue *e; 809 ssize_t error; 810 811 if (!entry->show) 812 return -EIO; 813 814 e = container_of(kobj, struct elevator_queue, kobj); 815 mutex_lock(&e->sysfs_lock); 816 error = e->type ? entry->show(e, page) : -ENOENT; 817 mutex_unlock(&e->sysfs_lock); 818 return error; 819 } 820 821 static ssize_t 822 elv_attr_store(struct kobject *kobj, struct attribute *attr, 823 const char *page, size_t length) 824 { 825 struct elv_fs_entry *entry = to_elv(attr); 826 struct elevator_queue *e; 827 ssize_t error; 828 829 if (!entry->store) 830 return -EIO; 831 832 e = container_of(kobj, struct elevator_queue, kobj); 833 mutex_lock(&e->sysfs_lock); 834 error = e->type ? entry->store(e, page, length) : -ENOENT; 835 mutex_unlock(&e->sysfs_lock); 836 return error; 837 } 838 839 static const struct sysfs_ops elv_sysfs_ops = { 840 .show = elv_attr_show, 841 .store = elv_attr_store, 842 }; 843 844 static struct kobj_type elv_ktype = { 845 .sysfs_ops = &elv_sysfs_ops, 846 .release = elevator_release, 847 }; 848 849 int elv_register_queue(struct request_queue *q) 850 { 851 struct elevator_queue *e = q->elevator; 852 int error; 853 854 error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); 855 if (!error) { 856 struct elv_fs_entry *attr = e->type->elevator_attrs; 857 if (attr) { 858 while (attr->attr.name) { 859 if (sysfs_create_file(&e->kobj, &attr->attr)) 860 break; 861 attr++; 862 } 863 } 864 kobject_uevent(&e->kobj, KOBJ_ADD); 865 e->registered = 1; 866 if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn) 867 e->type->ops.sq.elevator_registered_fn(q); 868 } 869 return error; 870 } 871 EXPORT_SYMBOL(elv_register_queue); 872 873 void elv_unregister_queue(struct request_queue *q) 874 { 875 if (q) { 876 struct elevator_queue *e = q->elevator; 877 878 kobject_uevent(&e->kobj, KOBJ_REMOVE); 879 kobject_del(&e->kobj); 880 e->registered = 0; 881 /* Re-enable throttling in case elevator disabled it */ 882 wbt_enable_default(q); 883 } 884 } 885 EXPORT_SYMBOL(elv_unregister_queue); 886 887 int elv_register(struct elevator_type *e) 888 { 889 char *def = ""; 890 891 /* create icq_cache if requested */ 892 if (e->icq_size) { 893 if (WARN_ON(e->icq_size < sizeof(struct io_cq)) || 894 WARN_ON(e->icq_align < __alignof__(struct io_cq))) 895 return -EINVAL; 896 897 snprintf(e->icq_cache_name, sizeof(e->icq_cache_name), 898 "%s_io_cq", e->elevator_name); 899 e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size, 900 e->icq_align, 0, NULL); 901 if (!e->icq_cache) 902 return -ENOMEM; 903 } 904 905 /* register, don't allow duplicate names */ 906 spin_lock(&elv_list_lock); 907 if (elevator_find(e->elevator_name)) { 908 spin_unlock(&elv_list_lock); 909 if (e->icq_cache) 910 kmem_cache_destroy(e->icq_cache); 911 return -EBUSY; 912 } 913 list_add_tail(&e->list, &elv_list); 914 spin_unlock(&elv_list_lock); 915 916 /* print pretty message */ 917 if (!strcmp(e->elevator_name, chosen_elevator) || 918 (!*chosen_elevator && 919 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) 920 def = " (default)"; 921 922 printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, 923 def); 924 return 0; 925 } 926 EXPORT_SYMBOL_GPL(elv_register); 927 928 void elv_unregister(struct elevator_type *e) 929 { 930 /* unregister */ 931 spin_lock(&elv_list_lock); 932 list_del_init(&e->list); 933 spin_unlock(&elv_list_lock); 934 935 /* 936 * Destroy icq_cache if it exists. icq's are RCU managed. Make 937 * sure all RCU operations are complete before proceeding. 938 */ 939 if (e->icq_cache) { 940 rcu_barrier(); 941 kmem_cache_destroy(e->icq_cache); 942 e->icq_cache = NULL; 943 } 944 } 945 EXPORT_SYMBOL_GPL(elv_unregister); 946 947 static int elevator_switch_mq(struct request_queue *q, 948 struct elevator_type *new_e) 949 { 950 int ret; 951 952 blk_mq_freeze_queue(q); 953 954 if (q->elevator) { 955 if (q->elevator->registered) 956 elv_unregister_queue(q); 957 ioc_clear_queue(q); 958 elevator_exit(q, q->elevator); 959 } 960 961 ret = blk_mq_init_sched(q, new_e); 962 if (ret) 963 goto out; 964 965 if (new_e) { 966 ret = elv_register_queue(q); 967 if (ret) { 968 elevator_exit(q, q->elevator); 969 goto out; 970 } 971 } 972 973 if (new_e) 974 blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); 975 else 976 blk_add_trace_msg(q, "elv switch: none"); 977 978 out: 979 blk_mq_unfreeze_queue(q); 980 return ret; 981 } 982 983 /* 984 * switch to new_e io scheduler. be careful not to introduce deadlocks - 985 * we don't free the old io scheduler, before we have allocated what we 986 * need for the new one. this way we have a chance of going back to the old 987 * one, if the new one fails init for some reason. 988 */ 989 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) 990 { 991 struct elevator_queue *old = q->elevator; 992 bool old_registered = false; 993 int err; 994 995 if (q->mq_ops) 996 return elevator_switch_mq(q, new_e); 997 998 /* 999 * Turn on BYPASS and drain all requests w/ elevator private data. 1000 * Block layer doesn't call into a quiesced elevator - all requests 1001 * are directly put on the dispatch list without elevator data 1002 * using INSERT_BACK. All requests have SOFTBARRIER set and no 1003 * merge happens either. 1004 */ 1005 if (old) { 1006 old_registered = old->registered; 1007 1008 blk_queue_bypass_start(q); 1009 1010 /* unregister and clear all auxiliary data of the old elevator */ 1011 if (old_registered) 1012 elv_unregister_queue(q); 1013 1014 ioc_clear_queue(q); 1015 } 1016 1017 /* allocate, init and register new elevator */ 1018 err = new_e->ops.sq.elevator_init_fn(q, new_e); 1019 if (err) 1020 goto fail_init; 1021 1022 err = elv_register_queue(q); 1023 if (err) 1024 goto fail_register; 1025 1026 /* done, kill the old one and finish */ 1027 if (old) { 1028 elevator_exit(q, old); 1029 blk_queue_bypass_end(q); 1030 } 1031 1032 blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); 1033 1034 return 0; 1035 1036 fail_register: 1037 elevator_exit(q, q->elevator); 1038 fail_init: 1039 /* switch failed, restore and re-register old elevator */ 1040 if (old) { 1041 q->elevator = old; 1042 elv_register_queue(q); 1043 blk_queue_bypass_end(q); 1044 } 1045 1046 return err; 1047 } 1048 1049 /* 1050 * Switch this queue to the given IO scheduler. 1051 */ 1052 static int __elevator_change(struct request_queue *q, const char *name) 1053 { 1054 char elevator_name[ELV_NAME_MAX]; 1055 struct elevator_type *e; 1056 1057 /* 1058 * Special case for mq, turn off scheduling 1059 */ 1060 if (q->mq_ops && !strncmp(name, "none", 4)) 1061 return elevator_switch(q, NULL); 1062 1063 strlcpy(elevator_name, name, sizeof(elevator_name)); 1064 e = elevator_get(strstrip(elevator_name), true); 1065 if (!e) 1066 return -EINVAL; 1067 1068 if (q->elevator && 1069 !strcmp(elevator_name, q->elevator->type->elevator_name)) { 1070 elevator_put(e); 1071 return 0; 1072 } 1073 1074 if (!e->uses_mq && q->mq_ops) { 1075 elevator_put(e); 1076 return -EINVAL; 1077 } 1078 if (e->uses_mq && !q->mq_ops) { 1079 elevator_put(e); 1080 return -EINVAL; 1081 } 1082 1083 return elevator_switch(q, e); 1084 } 1085 1086 static inline bool elv_support_iosched(struct request_queue *q) 1087 { 1088 if (q->mq_ops && q->tag_set && (q->tag_set->flags & 1089 BLK_MQ_F_NO_SCHED)) 1090 return false; 1091 return true; 1092 } 1093 1094 ssize_t elv_iosched_store(struct request_queue *q, const char *name, 1095 size_t count) 1096 { 1097 int ret; 1098 1099 if (!(q->mq_ops || q->request_fn) || !elv_support_iosched(q)) 1100 return count; 1101 1102 ret = __elevator_change(q, name); 1103 if (!ret) 1104 return count; 1105 1106 return ret; 1107 } 1108 1109 ssize_t elv_iosched_show(struct request_queue *q, char *name) 1110 { 1111 struct elevator_queue *e = q->elevator; 1112 struct elevator_type *elv = NULL; 1113 struct elevator_type *__e; 1114 int len = 0; 1115 1116 if (!blk_queue_stackable(q)) 1117 return sprintf(name, "none\n"); 1118 1119 if (!q->elevator) 1120 len += sprintf(name+len, "[none] "); 1121 else 1122 elv = e->type; 1123 1124 spin_lock(&elv_list_lock); 1125 list_for_each_entry(__e, &elv_list, list) { 1126 if (elv && !strcmp(elv->elevator_name, __e->elevator_name)) { 1127 len += sprintf(name+len, "[%s] ", elv->elevator_name); 1128 continue; 1129 } 1130 if (__e->uses_mq && q->mq_ops && elv_support_iosched(q)) 1131 len += sprintf(name+len, "%s ", __e->elevator_name); 1132 else if (!__e->uses_mq && !q->mq_ops) 1133 len += sprintf(name+len, "%s ", __e->elevator_name); 1134 } 1135 spin_unlock(&elv_list_lock); 1136 1137 if (q->mq_ops && q->elevator) 1138 len += sprintf(name+len, "none"); 1139 1140 len += sprintf(len+name, "\n"); 1141 return len; 1142 } 1143 1144 struct request *elv_rb_former_request(struct request_queue *q, 1145 struct request *rq) 1146 { 1147 struct rb_node *rbprev = rb_prev(&rq->rb_node); 1148 1149 if (rbprev) 1150 return rb_entry_rq(rbprev); 1151 1152 return NULL; 1153 } 1154 EXPORT_SYMBOL(elv_rb_former_request); 1155 1156 struct request *elv_rb_latter_request(struct request_queue *q, 1157 struct request *rq) 1158 { 1159 struct rb_node *rbnext = rb_next(&rq->rb_node); 1160 1161 if (rbnext) 1162 return rb_entry_rq(rbnext); 1163 1164 return NULL; 1165 } 1166 EXPORT_SYMBOL(elv_rb_latter_request); 1167