1 /* 2 * Block device elevator/IO-scheduler. 3 * 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 5 * 6 * 30042000 Jens Axboe <axboe@kernel.dk> : 7 * 8 * Split the elevator a bit so that it is possible to choose a different 9 * one or even write a new "plug in". There are three pieces: 10 * - elevator_fn, inserts a new request in the queue list 11 * - elevator_merge_fn, decides whether a new buffer can be merged with 12 * an existing request 13 * - elevator_dequeue_fn, called when a request is taken off the active list 14 * 15 * 20082000 Dave Jones <davej@suse.de> : 16 * Removed tests for max-bomb-segments, which was breaking elvtune 17 * when run without -bN 18 * 19 * Jens: 20 * - Rework again to work with bio instead of buffer_heads 21 * - loose bi_dev comparisons, partition handling is right now 22 * - completely modularize elevator setup and teardown 23 * 24 */ 25 #include <linux/kernel.h> 26 #include <linux/fs.h> 27 #include <linux/blkdev.h> 28 #include <linux/elevator.h> 29 #include <linux/bio.h> 30 #include <linux/module.h> 31 #include <linux/slab.h> 32 #include <linux/init.h> 33 #include <linux/compiler.h> 34 #include <linux/blktrace_api.h> 35 #include <linux/hash.h> 36 #include <linux/uaccess.h> 37 #include <linux/pm_runtime.h> 38 #include <linux/blk-cgroup.h> 39 40 #include <trace/events/block.h> 41 42 #include "blk.h" 43 #include "blk-mq-sched.h" 44 #include "blk-wbt.h" 45 46 static DEFINE_SPINLOCK(elv_list_lock); 47 static LIST_HEAD(elv_list); 48 49 /* 50 * Merge hash stuff. 51 */ 52 #define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) 53 54 /* 55 * Query io scheduler to see if the current process issuing bio may be 56 * merged with rq. 57 */ 58 static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio) 59 { 60 struct request_queue *q = rq->q; 61 struct elevator_queue *e = q->elevator; 62 63 if (e->uses_mq && e->type->ops.mq.allow_merge) 64 return e->type->ops.mq.allow_merge(q, rq, bio); 65 else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn) 66 return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio); 67 68 return 1; 69 } 70 71 /* 72 * can we safely merge with this request? 73 */ 74 bool elv_bio_merge_ok(struct request *rq, struct bio *bio) 75 { 76 if (!blk_rq_merge_ok(rq, bio)) 77 return false; 78 79 if (!elv_iosched_allow_bio_merge(rq, bio)) 80 return false; 81 82 return true; 83 } 84 EXPORT_SYMBOL(elv_bio_merge_ok); 85 86 static struct elevator_type *elevator_find(const char *name) 87 { 88 struct elevator_type *e; 89 90 list_for_each_entry(e, &elv_list, list) { 91 if (!strcmp(e->elevator_name, name)) 92 return e; 93 } 94 95 return NULL; 96 } 97 98 static void elevator_put(struct elevator_type *e) 99 { 100 module_put(e->elevator_owner); 101 } 102 103 static struct elevator_type *elevator_get(const char *name, bool try_loading) 104 { 105 struct elevator_type *e; 106 107 spin_lock(&elv_list_lock); 108 109 e = elevator_find(name); 110 if (!e && try_loading) { 111 spin_unlock(&elv_list_lock); 112 request_module("%s-iosched", name); 113 spin_lock(&elv_list_lock); 114 e = elevator_find(name); 115 } 116 117 if (e && !try_module_get(e->elevator_owner)) 118 e = NULL; 119 120 spin_unlock(&elv_list_lock); 121 122 return e; 123 } 124 125 static char chosen_elevator[ELV_NAME_MAX]; 126 127 static int __init elevator_setup(char *str) 128 { 129 /* 130 * Be backwards-compatible with previous kernels, so users 131 * won't get the wrong elevator. 132 */ 133 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); 134 return 1; 135 } 136 137 __setup("elevator=", elevator_setup); 138 139 /* called during boot to load the elevator chosen by the elevator param */ 140 void __init load_default_elevator_module(void) 141 { 142 struct elevator_type *e; 143 144 if (!chosen_elevator[0]) 145 return; 146 147 spin_lock(&elv_list_lock); 148 e = elevator_find(chosen_elevator); 149 spin_unlock(&elv_list_lock); 150 151 if (!e) 152 request_module("%s-iosched", chosen_elevator); 153 } 154 155 static struct kobj_type elv_ktype; 156 157 struct elevator_queue *elevator_alloc(struct request_queue *q, 158 struct elevator_type *e) 159 { 160 struct elevator_queue *eq; 161 162 eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node); 163 if (unlikely(!eq)) 164 return NULL; 165 166 eq->type = e; 167 kobject_init(&eq->kobj, &elv_ktype); 168 mutex_init(&eq->sysfs_lock); 169 hash_init(eq->hash); 170 eq->uses_mq = e->uses_mq; 171 172 return eq; 173 } 174 EXPORT_SYMBOL(elevator_alloc); 175 176 static void elevator_release(struct kobject *kobj) 177 { 178 struct elevator_queue *e; 179 180 e = container_of(kobj, struct elevator_queue, kobj); 181 elevator_put(e->type); 182 kfree(e); 183 } 184 185 int elevator_init(struct request_queue *q, char *name) 186 { 187 struct elevator_type *e = NULL; 188 int err; 189 190 /* 191 * q->sysfs_lock must be held to provide mutual exclusion between 192 * elevator_switch() and here. 193 */ 194 lockdep_assert_held(&q->sysfs_lock); 195 196 if (unlikely(q->elevator)) 197 return 0; 198 199 INIT_LIST_HEAD(&q->queue_head); 200 q->last_merge = NULL; 201 q->end_sector = 0; 202 q->boundary_rq = NULL; 203 204 if (name) { 205 e = elevator_get(name, true); 206 if (!e) 207 return -EINVAL; 208 } 209 210 /* 211 * Use the default elevator specified by config boot param for 212 * non-mq devices, or by config option. Don't try to load modules 213 * as we could be running off async and request_module() isn't 214 * allowed from async. 215 */ 216 if (!e && !q->mq_ops && *chosen_elevator) { 217 e = elevator_get(chosen_elevator, false); 218 if (!e) 219 printk(KERN_ERR "I/O scheduler %s not found\n", 220 chosen_elevator); 221 } 222 223 if (!e) { 224 /* 225 * For blk-mq devices, we default to using mq-deadline, 226 * if available, for single queue devices. If deadline 227 * isn't available OR we have multiple queues, default 228 * to "none". 229 */ 230 if (q->mq_ops) { 231 if (q->nr_hw_queues == 1) 232 e = elevator_get("mq-deadline", false); 233 if (!e) 234 return 0; 235 } else 236 e = elevator_get(CONFIG_DEFAULT_IOSCHED, false); 237 238 if (!e) { 239 printk(KERN_ERR 240 "Default I/O scheduler not found. " \ 241 "Using noop.\n"); 242 e = elevator_get("noop", false); 243 } 244 } 245 246 if (e->uses_mq) 247 err = blk_mq_init_sched(q, e); 248 else 249 err = e->ops.sq.elevator_init_fn(q, e); 250 if (err) 251 elevator_put(e); 252 return err; 253 } 254 EXPORT_SYMBOL(elevator_init); 255 256 void elevator_exit(struct request_queue *q, struct elevator_queue *e) 257 { 258 mutex_lock(&e->sysfs_lock); 259 if (e->uses_mq && e->type->ops.mq.exit_sched) 260 blk_mq_exit_sched(q, e); 261 else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn) 262 e->type->ops.sq.elevator_exit_fn(e); 263 mutex_unlock(&e->sysfs_lock); 264 265 kobject_put(&e->kobj); 266 } 267 EXPORT_SYMBOL(elevator_exit); 268 269 static inline void __elv_rqhash_del(struct request *rq) 270 { 271 hash_del(&rq->hash); 272 rq->rq_flags &= ~RQF_HASHED; 273 } 274 275 void elv_rqhash_del(struct request_queue *q, struct request *rq) 276 { 277 if (ELV_ON_HASH(rq)) 278 __elv_rqhash_del(rq); 279 } 280 EXPORT_SYMBOL_GPL(elv_rqhash_del); 281 282 void elv_rqhash_add(struct request_queue *q, struct request *rq) 283 { 284 struct elevator_queue *e = q->elevator; 285 286 BUG_ON(ELV_ON_HASH(rq)); 287 hash_add(e->hash, &rq->hash, rq_hash_key(rq)); 288 rq->rq_flags |= RQF_HASHED; 289 } 290 EXPORT_SYMBOL_GPL(elv_rqhash_add); 291 292 void elv_rqhash_reposition(struct request_queue *q, struct request *rq) 293 { 294 __elv_rqhash_del(rq); 295 elv_rqhash_add(q, rq); 296 } 297 298 struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) 299 { 300 struct elevator_queue *e = q->elevator; 301 struct hlist_node *next; 302 struct request *rq; 303 304 hash_for_each_possible_safe(e->hash, rq, next, hash, offset) { 305 BUG_ON(!ELV_ON_HASH(rq)); 306 307 if (unlikely(!rq_mergeable(rq))) { 308 __elv_rqhash_del(rq); 309 continue; 310 } 311 312 if (rq_hash_key(rq) == offset) 313 return rq; 314 } 315 316 return NULL; 317 } 318 319 /* 320 * RB-tree support functions for inserting/lookup/removal of requests 321 * in a sorted RB tree. 322 */ 323 void elv_rb_add(struct rb_root *root, struct request *rq) 324 { 325 struct rb_node **p = &root->rb_node; 326 struct rb_node *parent = NULL; 327 struct request *__rq; 328 329 while (*p) { 330 parent = *p; 331 __rq = rb_entry(parent, struct request, rb_node); 332 333 if (blk_rq_pos(rq) < blk_rq_pos(__rq)) 334 p = &(*p)->rb_left; 335 else if (blk_rq_pos(rq) >= blk_rq_pos(__rq)) 336 p = &(*p)->rb_right; 337 } 338 339 rb_link_node(&rq->rb_node, parent, p); 340 rb_insert_color(&rq->rb_node, root); 341 } 342 EXPORT_SYMBOL(elv_rb_add); 343 344 void elv_rb_del(struct rb_root *root, struct request *rq) 345 { 346 BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); 347 rb_erase(&rq->rb_node, root); 348 RB_CLEAR_NODE(&rq->rb_node); 349 } 350 EXPORT_SYMBOL(elv_rb_del); 351 352 struct request *elv_rb_find(struct rb_root *root, sector_t sector) 353 { 354 struct rb_node *n = root->rb_node; 355 struct request *rq; 356 357 while (n) { 358 rq = rb_entry(n, struct request, rb_node); 359 360 if (sector < blk_rq_pos(rq)) 361 n = n->rb_left; 362 else if (sector > blk_rq_pos(rq)) 363 n = n->rb_right; 364 else 365 return rq; 366 } 367 368 return NULL; 369 } 370 EXPORT_SYMBOL(elv_rb_find); 371 372 /* 373 * Insert rq into dispatch queue of q. Queue lock must be held on 374 * entry. rq is sort instead into the dispatch queue. To be used by 375 * specific elevators. 376 */ 377 void elv_dispatch_sort(struct request_queue *q, struct request *rq) 378 { 379 sector_t boundary; 380 struct list_head *entry; 381 382 if (q->last_merge == rq) 383 q->last_merge = NULL; 384 385 elv_rqhash_del(q, rq); 386 387 q->nr_sorted--; 388 389 boundary = q->end_sector; 390 list_for_each_prev(entry, &q->queue_head) { 391 struct request *pos = list_entry_rq(entry); 392 393 if (req_op(rq) != req_op(pos)) 394 break; 395 if (rq_data_dir(rq) != rq_data_dir(pos)) 396 break; 397 if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER)) 398 break; 399 if (blk_rq_pos(rq) >= boundary) { 400 if (blk_rq_pos(pos) < boundary) 401 continue; 402 } else { 403 if (blk_rq_pos(pos) >= boundary) 404 break; 405 } 406 if (blk_rq_pos(rq) >= blk_rq_pos(pos)) 407 break; 408 } 409 410 list_add(&rq->queuelist, entry); 411 } 412 EXPORT_SYMBOL(elv_dispatch_sort); 413 414 /* 415 * Insert rq into dispatch queue of q. Queue lock must be held on 416 * entry. rq is added to the back of the dispatch queue. To be used by 417 * specific elevators. 418 */ 419 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) 420 { 421 if (q->last_merge == rq) 422 q->last_merge = NULL; 423 424 elv_rqhash_del(q, rq); 425 426 q->nr_sorted--; 427 428 q->end_sector = rq_end_sector(rq); 429 q->boundary_rq = rq; 430 list_add_tail(&rq->queuelist, &q->queue_head); 431 } 432 EXPORT_SYMBOL(elv_dispatch_add_tail); 433 434 enum elv_merge elv_merge(struct request_queue *q, struct request **req, 435 struct bio *bio) 436 { 437 struct elevator_queue *e = q->elevator; 438 struct request *__rq; 439 440 /* 441 * Levels of merges: 442 * nomerges: No merges at all attempted 443 * noxmerges: Only simple one-hit cache try 444 * merges: All merge tries attempted 445 */ 446 if (blk_queue_nomerges(q) || !bio_mergeable(bio)) 447 return ELEVATOR_NO_MERGE; 448 449 /* 450 * First try one-hit cache. 451 */ 452 if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) { 453 enum elv_merge ret = blk_try_merge(q->last_merge, bio); 454 455 if (ret != ELEVATOR_NO_MERGE) { 456 *req = q->last_merge; 457 return ret; 458 } 459 } 460 461 if (blk_queue_noxmerges(q)) 462 return ELEVATOR_NO_MERGE; 463 464 /* 465 * See if our hash lookup can find a potential backmerge. 466 */ 467 __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector); 468 if (__rq && elv_bio_merge_ok(__rq, bio)) { 469 *req = __rq; 470 return ELEVATOR_BACK_MERGE; 471 } 472 473 if (e->uses_mq && e->type->ops.mq.request_merge) 474 return e->type->ops.mq.request_merge(q, req, bio); 475 else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn) 476 return e->type->ops.sq.elevator_merge_fn(q, req, bio); 477 478 return ELEVATOR_NO_MERGE; 479 } 480 481 /* 482 * Attempt to do an insertion back merge. Only check for the case where 483 * we can append 'rq' to an existing request, so we can throw 'rq' away 484 * afterwards. 485 * 486 * Returns true if we merged, false otherwise 487 */ 488 bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) 489 { 490 struct request *__rq; 491 bool ret; 492 493 if (blk_queue_nomerges(q)) 494 return false; 495 496 /* 497 * First try one-hit cache. 498 */ 499 if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) 500 return true; 501 502 if (blk_queue_noxmerges(q)) 503 return false; 504 505 ret = false; 506 /* 507 * See if our hash lookup can find a potential backmerge. 508 */ 509 while (1) { 510 __rq = elv_rqhash_find(q, blk_rq_pos(rq)); 511 if (!__rq || !blk_attempt_req_merge(q, __rq, rq)) 512 break; 513 514 /* The merged request could be merged with others, try again */ 515 ret = true; 516 rq = __rq; 517 } 518 519 return ret; 520 } 521 522 void elv_merged_request(struct request_queue *q, struct request *rq, 523 enum elv_merge type) 524 { 525 struct elevator_queue *e = q->elevator; 526 527 if (e->uses_mq && e->type->ops.mq.request_merged) 528 e->type->ops.mq.request_merged(q, rq, type); 529 else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn) 530 e->type->ops.sq.elevator_merged_fn(q, rq, type); 531 532 if (type == ELEVATOR_BACK_MERGE) 533 elv_rqhash_reposition(q, rq); 534 535 q->last_merge = rq; 536 } 537 538 void elv_merge_requests(struct request_queue *q, struct request *rq, 539 struct request *next) 540 { 541 struct elevator_queue *e = q->elevator; 542 bool next_sorted = false; 543 544 if (e->uses_mq && e->type->ops.mq.requests_merged) 545 e->type->ops.mq.requests_merged(q, rq, next); 546 else if (e->type->ops.sq.elevator_merge_req_fn) { 547 next_sorted = (__force bool)(next->rq_flags & RQF_SORTED); 548 if (next_sorted) 549 e->type->ops.sq.elevator_merge_req_fn(q, rq, next); 550 } 551 552 elv_rqhash_reposition(q, rq); 553 554 if (next_sorted) { 555 elv_rqhash_del(q, next); 556 q->nr_sorted--; 557 } 558 559 q->last_merge = rq; 560 } 561 562 void elv_bio_merged(struct request_queue *q, struct request *rq, 563 struct bio *bio) 564 { 565 struct elevator_queue *e = q->elevator; 566 567 if (WARN_ON_ONCE(e->uses_mq)) 568 return; 569 570 if (e->type->ops.sq.elevator_bio_merged_fn) 571 e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio); 572 } 573 574 #ifdef CONFIG_PM 575 static void blk_pm_requeue_request(struct request *rq) 576 { 577 if (rq->q->dev && !(rq->rq_flags & RQF_PM)) 578 rq->q->nr_pending--; 579 } 580 581 static void blk_pm_add_request(struct request_queue *q, struct request *rq) 582 { 583 if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 && 584 (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING)) 585 pm_request_resume(q->dev); 586 } 587 #else 588 static inline void blk_pm_requeue_request(struct request *rq) {} 589 static inline void blk_pm_add_request(struct request_queue *q, 590 struct request *rq) 591 { 592 } 593 #endif 594 595 void elv_requeue_request(struct request_queue *q, struct request *rq) 596 { 597 /* 598 * it already went through dequeue, we need to decrement the 599 * in_flight count again 600 */ 601 if (blk_account_rq(rq)) { 602 q->in_flight[rq_is_sync(rq)]--; 603 if (rq->rq_flags & RQF_SORTED) 604 elv_deactivate_rq(q, rq); 605 } 606 607 rq->rq_flags &= ~RQF_STARTED; 608 609 blk_pm_requeue_request(rq); 610 611 __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE); 612 } 613 614 void elv_drain_elevator(struct request_queue *q) 615 { 616 struct elevator_queue *e = q->elevator; 617 static int printed; 618 619 if (WARN_ON_ONCE(e->uses_mq)) 620 return; 621 622 lockdep_assert_held(q->queue_lock); 623 624 while (e->type->ops.sq.elevator_dispatch_fn(q, 1)) 625 ; 626 if (q->nr_sorted && printed++ < 10) { 627 printk(KERN_ERR "%s: forced dispatching is broken " 628 "(nr_sorted=%u), please report this\n", 629 q->elevator->type->elevator_name, q->nr_sorted); 630 } 631 } 632 633 void __elv_add_request(struct request_queue *q, struct request *rq, int where) 634 { 635 trace_block_rq_insert(q, rq); 636 637 blk_pm_add_request(q, rq); 638 639 rq->q = q; 640 641 if (rq->rq_flags & RQF_SOFTBARRIER) { 642 /* barriers are scheduling boundary, update end_sector */ 643 if (!blk_rq_is_passthrough(rq)) { 644 q->end_sector = rq_end_sector(rq); 645 q->boundary_rq = rq; 646 } 647 } else if (!(rq->rq_flags & RQF_ELVPRIV) && 648 (where == ELEVATOR_INSERT_SORT || 649 where == ELEVATOR_INSERT_SORT_MERGE)) 650 where = ELEVATOR_INSERT_BACK; 651 652 switch (where) { 653 case ELEVATOR_INSERT_REQUEUE: 654 case ELEVATOR_INSERT_FRONT: 655 rq->rq_flags |= RQF_SOFTBARRIER; 656 list_add(&rq->queuelist, &q->queue_head); 657 break; 658 659 case ELEVATOR_INSERT_BACK: 660 rq->rq_flags |= RQF_SOFTBARRIER; 661 elv_drain_elevator(q); 662 list_add_tail(&rq->queuelist, &q->queue_head); 663 /* 664 * We kick the queue here for the following reasons. 665 * - The elevator might have returned NULL previously 666 * to delay requests and returned them now. As the 667 * queue wasn't empty before this request, ll_rw_blk 668 * won't run the queue on return, resulting in hang. 669 * - Usually, back inserted requests won't be merged 670 * with anything. There's no point in delaying queue 671 * processing. 672 */ 673 __blk_run_queue(q); 674 break; 675 676 case ELEVATOR_INSERT_SORT_MERGE: 677 /* 678 * If we succeed in merging this request with one in the 679 * queue already, we are done - rq has now been freed, 680 * so no need to do anything further. 681 */ 682 if (elv_attempt_insert_merge(q, rq)) 683 break; 684 /* fall through */ 685 case ELEVATOR_INSERT_SORT: 686 BUG_ON(blk_rq_is_passthrough(rq)); 687 rq->rq_flags |= RQF_SORTED; 688 q->nr_sorted++; 689 if (rq_mergeable(rq)) { 690 elv_rqhash_add(q, rq); 691 if (!q->last_merge) 692 q->last_merge = rq; 693 } 694 695 /* 696 * Some ioscheds (cfq) run q->request_fn directly, so 697 * rq cannot be accessed after calling 698 * elevator_add_req_fn. 699 */ 700 q->elevator->type->ops.sq.elevator_add_req_fn(q, rq); 701 break; 702 703 case ELEVATOR_INSERT_FLUSH: 704 rq->rq_flags |= RQF_SOFTBARRIER; 705 blk_insert_flush(rq); 706 break; 707 default: 708 printk(KERN_ERR "%s: bad insertion point %d\n", 709 __func__, where); 710 BUG(); 711 } 712 } 713 EXPORT_SYMBOL(__elv_add_request); 714 715 void elv_add_request(struct request_queue *q, struct request *rq, int where) 716 { 717 unsigned long flags; 718 719 spin_lock_irqsave(q->queue_lock, flags); 720 __elv_add_request(q, rq, where); 721 spin_unlock_irqrestore(q->queue_lock, flags); 722 } 723 EXPORT_SYMBOL(elv_add_request); 724 725 struct request *elv_latter_request(struct request_queue *q, struct request *rq) 726 { 727 struct elevator_queue *e = q->elevator; 728 729 if (e->uses_mq && e->type->ops.mq.next_request) 730 return e->type->ops.mq.next_request(q, rq); 731 else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn) 732 return e->type->ops.sq.elevator_latter_req_fn(q, rq); 733 734 return NULL; 735 } 736 737 struct request *elv_former_request(struct request_queue *q, struct request *rq) 738 { 739 struct elevator_queue *e = q->elevator; 740 741 if (e->uses_mq && e->type->ops.mq.former_request) 742 return e->type->ops.mq.former_request(q, rq); 743 if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn) 744 return e->type->ops.sq.elevator_former_req_fn(q, rq); 745 return NULL; 746 } 747 748 int elv_set_request(struct request_queue *q, struct request *rq, 749 struct bio *bio, gfp_t gfp_mask) 750 { 751 struct elevator_queue *e = q->elevator; 752 753 if (WARN_ON_ONCE(e->uses_mq)) 754 return 0; 755 756 if (e->type->ops.sq.elevator_set_req_fn) 757 return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask); 758 return 0; 759 } 760 761 void elv_put_request(struct request_queue *q, struct request *rq) 762 { 763 struct elevator_queue *e = q->elevator; 764 765 if (WARN_ON_ONCE(e->uses_mq)) 766 return; 767 768 if (e->type->ops.sq.elevator_put_req_fn) 769 e->type->ops.sq.elevator_put_req_fn(rq); 770 } 771 772 int elv_may_queue(struct request_queue *q, unsigned int op) 773 { 774 struct elevator_queue *e = q->elevator; 775 776 if (WARN_ON_ONCE(e->uses_mq)) 777 return 0; 778 779 if (e->type->ops.sq.elevator_may_queue_fn) 780 return e->type->ops.sq.elevator_may_queue_fn(q, op); 781 782 return ELV_MQUEUE_MAY; 783 } 784 785 void elv_completed_request(struct request_queue *q, struct request *rq) 786 { 787 struct elevator_queue *e = q->elevator; 788 789 if (WARN_ON_ONCE(e->uses_mq)) 790 return; 791 792 /* 793 * request is released from the driver, io must be done 794 */ 795 if (blk_account_rq(rq)) { 796 q->in_flight[rq_is_sync(rq)]--; 797 if ((rq->rq_flags & RQF_SORTED) && 798 e->type->ops.sq.elevator_completed_req_fn) 799 e->type->ops.sq.elevator_completed_req_fn(q, rq); 800 } 801 } 802 803 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) 804 805 static ssize_t 806 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 807 { 808 struct elv_fs_entry *entry = to_elv(attr); 809 struct elevator_queue *e; 810 ssize_t error; 811 812 if (!entry->show) 813 return -EIO; 814 815 e = container_of(kobj, struct elevator_queue, kobj); 816 mutex_lock(&e->sysfs_lock); 817 error = e->type ? entry->show(e, page) : -ENOENT; 818 mutex_unlock(&e->sysfs_lock); 819 return error; 820 } 821 822 static ssize_t 823 elv_attr_store(struct kobject *kobj, struct attribute *attr, 824 const char *page, size_t length) 825 { 826 struct elv_fs_entry *entry = to_elv(attr); 827 struct elevator_queue *e; 828 ssize_t error; 829 830 if (!entry->store) 831 return -EIO; 832 833 e = container_of(kobj, struct elevator_queue, kobj); 834 mutex_lock(&e->sysfs_lock); 835 error = e->type ? entry->store(e, page, length) : -ENOENT; 836 mutex_unlock(&e->sysfs_lock); 837 return error; 838 } 839 840 static const struct sysfs_ops elv_sysfs_ops = { 841 .show = elv_attr_show, 842 .store = elv_attr_store, 843 }; 844 845 static struct kobj_type elv_ktype = { 846 .sysfs_ops = &elv_sysfs_ops, 847 .release = elevator_release, 848 }; 849 850 int elv_register_queue(struct request_queue *q) 851 { 852 struct elevator_queue *e = q->elevator; 853 int error; 854 855 error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); 856 if (!error) { 857 struct elv_fs_entry *attr = e->type->elevator_attrs; 858 if (attr) { 859 while (attr->attr.name) { 860 if (sysfs_create_file(&e->kobj, &attr->attr)) 861 break; 862 attr++; 863 } 864 } 865 kobject_uevent(&e->kobj, KOBJ_ADD); 866 e->registered = 1; 867 if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn) 868 e->type->ops.sq.elevator_registered_fn(q); 869 } 870 return error; 871 } 872 EXPORT_SYMBOL(elv_register_queue); 873 874 void elv_unregister_queue(struct request_queue *q) 875 { 876 if (q) { 877 struct elevator_queue *e = q->elevator; 878 879 kobject_uevent(&e->kobj, KOBJ_REMOVE); 880 kobject_del(&e->kobj); 881 e->registered = 0; 882 /* Re-enable throttling in case elevator disabled it */ 883 wbt_enable_default(q); 884 } 885 } 886 EXPORT_SYMBOL(elv_unregister_queue); 887 888 int elv_register(struct elevator_type *e) 889 { 890 char *def = ""; 891 892 /* create icq_cache if requested */ 893 if (e->icq_size) { 894 if (WARN_ON(e->icq_size < sizeof(struct io_cq)) || 895 WARN_ON(e->icq_align < __alignof__(struct io_cq))) 896 return -EINVAL; 897 898 snprintf(e->icq_cache_name, sizeof(e->icq_cache_name), 899 "%s_io_cq", e->elevator_name); 900 e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size, 901 e->icq_align, 0, NULL); 902 if (!e->icq_cache) 903 return -ENOMEM; 904 } 905 906 /* register, don't allow duplicate names */ 907 spin_lock(&elv_list_lock); 908 if (elevator_find(e->elevator_name)) { 909 spin_unlock(&elv_list_lock); 910 if (e->icq_cache) 911 kmem_cache_destroy(e->icq_cache); 912 return -EBUSY; 913 } 914 list_add_tail(&e->list, &elv_list); 915 spin_unlock(&elv_list_lock); 916 917 /* print pretty message */ 918 if (!strcmp(e->elevator_name, chosen_elevator) || 919 (!*chosen_elevator && 920 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) 921 def = " (default)"; 922 923 printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, 924 def); 925 return 0; 926 } 927 EXPORT_SYMBOL_GPL(elv_register); 928 929 void elv_unregister(struct elevator_type *e) 930 { 931 /* unregister */ 932 spin_lock(&elv_list_lock); 933 list_del_init(&e->list); 934 spin_unlock(&elv_list_lock); 935 936 /* 937 * Destroy icq_cache if it exists. icq's are RCU managed. Make 938 * sure all RCU operations are complete before proceeding. 939 */ 940 if (e->icq_cache) { 941 rcu_barrier(); 942 kmem_cache_destroy(e->icq_cache); 943 e->icq_cache = NULL; 944 } 945 } 946 EXPORT_SYMBOL_GPL(elv_unregister); 947 948 static int elevator_switch_mq(struct request_queue *q, 949 struct elevator_type *new_e) 950 { 951 int ret; 952 953 blk_mq_freeze_queue(q); 954 955 if (q->elevator) { 956 if (q->elevator->registered) 957 elv_unregister_queue(q); 958 ioc_clear_queue(q); 959 elevator_exit(q, q->elevator); 960 } 961 962 ret = blk_mq_init_sched(q, new_e); 963 if (ret) 964 goto out; 965 966 if (new_e) { 967 ret = elv_register_queue(q); 968 if (ret) { 969 elevator_exit(q, q->elevator); 970 goto out; 971 } 972 } 973 974 if (new_e) 975 blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); 976 else 977 blk_add_trace_msg(q, "elv switch: none"); 978 979 out: 980 blk_mq_unfreeze_queue(q); 981 return ret; 982 } 983 984 /* 985 * switch to new_e io scheduler. be careful not to introduce deadlocks - 986 * we don't free the old io scheduler, before we have allocated what we 987 * need for the new one. this way we have a chance of going back to the old 988 * one, if the new one fails init for some reason. 989 */ 990 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) 991 { 992 struct elevator_queue *old = q->elevator; 993 bool old_registered = false; 994 int err; 995 996 if (q->mq_ops) 997 return elevator_switch_mq(q, new_e); 998 999 /* 1000 * Turn on BYPASS and drain all requests w/ elevator private data. 1001 * Block layer doesn't call into a quiesced elevator - all requests 1002 * are directly put on the dispatch list without elevator data 1003 * using INSERT_BACK. All requests have SOFTBARRIER set and no 1004 * merge happens either. 1005 */ 1006 if (old) { 1007 old_registered = old->registered; 1008 1009 blk_queue_bypass_start(q); 1010 1011 /* unregister and clear all auxiliary data of the old elevator */ 1012 if (old_registered) 1013 elv_unregister_queue(q); 1014 1015 ioc_clear_queue(q); 1016 } 1017 1018 /* allocate, init and register new elevator */ 1019 err = new_e->ops.sq.elevator_init_fn(q, new_e); 1020 if (err) 1021 goto fail_init; 1022 1023 err = elv_register_queue(q); 1024 if (err) 1025 goto fail_register; 1026 1027 /* done, kill the old one and finish */ 1028 if (old) { 1029 elevator_exit(q, old); 1030 blk_queue_bypass_end(q); 1031 } 1032 1033 blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); 1034 1035 return 0; 1036 1037 fail_register: 1038 elevator_exit(q, q->elevator); 1039 fail_init: 1040 /* switch failed, restore and re-register old elevator */ 1041 if (old) { 1042 q->elevator = old; 1043 elv_register_queue(q); 1044 blk_queue_bypass_end(q); 1045 } 1046 1047 return err; 1048 } 1049 1050 /* 1051 * Switch this queue to the given IO scheduler. 1052 */ 1053 static int __elevator_change(struct request_queue *q, const char *name) 1054 { 1055 char elevator_name[ELV_NAME_MAX]; 1056 struct elevator_type *e; 1057 1058 /* Make sure queue is not in the middle of being removed */ 1059 if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags)) 1060 return -ENOENT; 1061 1062 /* 1063 * Special case for mq, turn off scheduling 1064 */ 1065 if (q->mq_ops && !strncmp(name, "none", 4)) 1066 return elevator_switch(q, NULL); 1067 1068 strlcpy(elevator_name, name, sizeof(elevator_name)); 1069 e = elevator_get(strstrip(elevator_name), true); 1070 if (!e) 1071 return -EINVAL; 1072 1073 if (q->elevator && 1074 !strcmp(elevator_name, q->elevator->type->elevator_name)) { 1075 elevator_put(e); 1076 return 0; 1077 } 1078 1079 if (!e->uses_mq && q->mq_ops) { 1080 elevator_put(e); 1081 return -EINVAL; 1082 } 1083 if (e->uses_mq && !q->mq_ops) { 1084 elevator_put(e); 1085 return -EINVAL; 1086 } 1087 1088 return elevator_switch(q, e); 1089 } 1090 1091 static inline bool elv_support_iosched(struct request_queue *q) 1092 { 1093 if (q->mq_ops && q->tag_set && (q->tag_set->flags & 1094 BLK_MQ_F_NO_SCHED)) 1095 return false; 1096 return true; 1097 } 1098 1099 ssize_t elv_iosched_store(struct request_queue *q, const char *name, 1100 size_t count) 1101 { 1102 int ret; 1103 1104 if (!(q->mq_ops || q->request_fn) || !elv_support_iosched(q)) 1105 return count; 1106 1107 ret = __elevator_change(q, name); 1108 if (!ret) 1109 return count; 1110 1111 return ret; 1112 } 1113 1114 ssize_t elv_iosched_show(struct request_queue *q, char *name) 1115 { 1116 struct elevator_queue *e = q->elevator; 1117 struct elevator_type *elv = NULL; 1118 struct elevator_type *__e; 1119 int len = 0; 1120 1121 if (!blk_queue_stackable(q)) 1122 return sprintf(name, "none\n"); 1123 1124 if (!q->elevator) 1125 len += sprintf(name+len, "[none] "); 1126 else 1127 elv = e->type; 1128 1129 spin_lock(&elv_list_lock); 1130 list_for_each_entry(__e, &elv_list, list) { 1131 if (elv && !strcmp(elv->elevator_name, __e->elevator_name)) { 1132 len += sprintf(name+len, "[%s] ", elv->elevator_name); 1133 continue; 1134 } 1135 if (__e->uses_mq && q->mq_ops && elv_support_iosched(q)) 1136 len += sprintf(name+len, "%s ", __e->elevator_name); 1137 else if (!__e->uses_mq && !q->mq_ops) 1138 len += sprintf(name+len, "%s ", __e->elevator_name); 1139 } 1140 spin_unlock(&elv_list_lock); 1141 1142 if (q->mq_ops && q->elevator) 1143 len += sprintf(name+len, "none"); 1144 1145 len += sprintf(len+name, "\n"); 1146 return len; 1147 } 1148 1149 struct request *elv_rb_former_request(struct request_queue *q, 1150 struct request *rq) 1151 { 1152 struct rb_node *rbprev = rb_prev(&rq->rb_node); 1153 1154 if (rbprev) 1155 return rb_entry_rq(rbprev); 1156 1157 return NULL; 1158 } 1159 EXPORT_SYMBOL(elv_rb_former_request); 1160 1161 struct request *elv_rb_latter_request(struct request_queue *q, 1162 struct request *rq) 1163 { 1164 struct rb_node *rbnext = rb_next(&rq->rb_node); 1165 1166 if (rbnext) 1167 return rb_entry_rq(rbnext); 1168 1169 return NULL; 1170 } 1171 EXPORT_SYMBOL(elv_rb_latter_request); 1172