1 /* 2 * Block device elevator/IO-scheduler. 3 * 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 5 * 6 * 30042000 Jens Axboe <axboe@kernel.dk> : 7 * 8 * Split the elevator a bit so that it is possible to choose a different 9 * one or even write a new "plug in". There are three pieces: 10 * - elevator_fn, inserts a new request in the queue list 11 * - elevator_merge_fn, decides whether a new buffer can be merged with 12 * an existing request 13 * - elevator_dequeue_fn, called when a request is taken off the active list 14 * 15 * 20082000 Dave Jones <davej@suse.de> : 16 * Removed tests for max-bomb-segments, which was breaking elvtune 17 * when run without -bN 18 * 19 * Jens: 20 * - Rework again to work with bio instead of buffer_heads 21 * - loose bi_dev comparisons, partition handling is right now 22 * - completely modularize elevator setup and teardown 23 * 24 */ 25 #include <linux/kernel.h> 26 #include <linux/fs.h> 27 #include <linux/blkdev.h> 28 #include <linux/elevator.h> 29 #include <linux/bio.h> 30 #include <linux/module.h> 31 #include <linux/slab.h> 32 #include <linux/init.h> 33 #include <linux/compiler.h> 34 #include <linux/delay.h> 35 #include <linux/blktrace_api.h> 36 #include <linux/hash.h> 37 38 #include <asm/uaccess.h> 39 40 static DEFINE_SPINLOCK(elv_list_lock); 41 static LIST_HEAD(elv_list); 42 43 /* 44 * Merge hash stuff. 45 */ 46 static const int elv_hash_shift = 6; 47 #define ELV_HASH_BLOCK(sec) ((sec) >> 3) 48 #define ELV_HASH_FN(sec) \ 49 (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) 50 #define ELV_HASH_ENTRIES (1 << elv_hash_shift) 51 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 52 #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) 53 54 /* 55 * Query io scheduler to see if the current process issuing bio may be 56 * merged with rq. 57 */ 58 static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) 59 { 60 struct request_queue *q = rq->q; 61 elevator_t *e = q->elevator; 62 63 if (e->ops->elevator_allow_merge_fn) 64 return e->ops->elevator_allow_merge_fn(q, rq, bio); 65 66 return 1; 67 } 68 69 /* 70 * can we safely merge with this request? 71 */ 72 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) 73 { 74 if (!rq_mergeable(rq)) 75 return 0; 76 77 /* 78 * different data direction or already started, don't merge 79 */ 80 if (bio_data_dir(bio) != rq_data_dir(rq)) 81 return 0; 82 83 /* 84 * must be same device and not a special request 85 */ 86 if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) 87 return 0; 88 89 if (!elv_iosched_allow_merge(rq, bio)) 90 return 0; 91 92 return 1; 93 } 94 EXPORT_SYMBOL(elv_rq_merge_ok); 95 96 static inline int elv_try_merge(struct request *__rq, struct bio *bio) 97 { 98 int ret = ELEVATOR_NO_MERGE; 99 100 /* 101 * we can merge and sequence is ok, check if it's possible 102 */ 103 if (elv_rq_merge_ok(__rq, bio)) { 104 if (__rq->sector + __rq->nr_sectors == bio->bi_sector) 105 ret = ELEVATOR_BACK_MERGE; 106 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) 107 ret = ELEVATOR_FRONT_MERGE; 108 } 109 110 return ret; 111 } 112 113 static struct elevator_type *elevator_find(const char *name) 114 { 115 struct elevator_type *e; 116 117 list_for_each_entry(e, &elv_list, list) { 118 if (!strcmp(e->elevator_name, name)) 119 return e; 120 } 121 122 return NULL; 123 } 124 125 static void elevator_put(struct elevator_type *e) 126 { 127 module_put(e->elevator_owner); 128 } 129 130 static struct elevator_type *elevator_get(const char *name) 131 { 132 struct elevator_type *e; 133 134 spin_lock(&elv_list_lock); 135 136 e = elevator_find(name); 137 if (!e) { 138 char elv[ELV_NAME_MAX + strlen("-iosched")]; 139 140 spin_unlock(&elv_list_lock); 141 142 if (!strcmp(name, "anticipatory")) 143 sprintf(elv, "as-iosched"); 144 else 145 sprintf(elv, "%s-iosched", name); 146 147 request_module(elv); 148 spin_lock(&elv_list_lock); 149 e = elevator_find(name); 150 } 151 152 if (e && !try_module_get(e->elevator_owner)) 153 e = NULL; 154 155 spin_unlock(&elv_list_lock); 156 157 return e; 158 } 159 160 static void *elevator_init_queue(struct request_queue *q, 161 struct elevator_queue *eq) 162 { 163 return eq->ops->elevator_init_fn(q); 164 } 165 166 static void elevator_attach(struct request_queue *q, struct elevator_queue *eq, 167 void *data) 168 { 169 q->elevator = eq; 170 eq->elevator_data = data; 171 } 172 173 static char chosen_elevator[16]; 174 175 static int __init elevator_setup(char *str) 176 { 177 /* 178 * Be backwards-compatible with previous kernels, so users 179 * won't get the wrong elevator. 180 */ 181 if (!strcmp(str, "as")) 182 strcpy(chosen_elevator, "anticipatory"); 183 else 184 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); 185 return 1; 186 } 187 188 __setup("elevator=", elevator_setup); 189 190 static struct kobj_type elv_ktype; 191 192 static elevator_t *elevator_alloc(struct request_queue *q, 193 struct elevator_type *e) 194 { 195 elevator_t *eq; 196 int i; 197 198 eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL | __GFP_ZERO, q->node); 199 if (unlikely(!eq)) 200 goto err; 201 202 eq->ops = &e->ops; 203 eq->elevator_type = e; 204 kobject_init(&eq->kobj, &elv_ktype); 205 mutex_init(&eq->sysfs_lock); 206 207 eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, 208 GFP_KERNEL, q->node); 209 if (!eq->hash) 210 goto err; 211 212 for (i = 0; i < ELV_HASH_ENTRIES; i++) 213 INIT_HLIST_HEAD(&eq->hash[i]); 214 215 return eq; 216 err: 217 kfree(eq); 218 elevator_put(e); 219 return NULL; 220 } 221 222 static void elevator_release(struct kobject *kobj) 223 { 224 elevator_t *e = container_of(kobj, elevator_t, kobj); 225 226 elevator_put(e->elevator_type); 227 kfree(e->hash); 228 kfree(e); 229 } 230 231 int elevator_init(struct request_queue *q, char *name) 232 { 233 struct elevator_type *e = NULL; 234 struct elevator_queue *eq; 235 int ret = 0; 236 void *data; 237 238 INIT_LIST_HEAD(&q->queue_head); 239 q->last_merge = NULL; 240 q->end_sector = 0; 241 q->boundary_rq = NULL; 242 243 if (name) { 244 e = elevator_get(name); 245 if (!e) 246 return -EINVAL; 247 } 248 249 if (!e && *chosen_elevator) { 250 e = elevator_get(chosen_elevator); 251 if (!e) 252 printk(KERN_ERR "I/O scheduler %s not found\n", 253 chosen_elevator); 254 } 255 256 if (!e) { 257 e = elevator_get(CONFIG_DEFAULT_IOSCHED); 258 if (!e) { 259 printk(KERN_ERR 260 "Default I/O scheduler not found. " \ 261 "Using noop.\n"); 262 e = elevator_get("noop"); 263 } 264 } 265 266 eq = elevator_alloc(q, e); 267 if (!eq) 268 return -ENOMEM; 269 270 data = elevator_init_queue(q, eq); 271 if (!data) { 272 kobject_put(&eq->kobj); 273 return -ENOMEM; 274 } 275 276 elevator_attach(q, eq, data); 277 return ret; 278 } 279 EXPORT_SYMBOL(elevator_init); 280 281 void elevator_exit(elevator_t *e) 282 { 283 mutex_lock(&e->sysfs_lock); 284 if (e->ops->elevator_exit_fn) 285 e->ops->elevator_exit_fn(e); 286 e->ops = NULL; 287 mutex_unlock(&e->sysfs_lock); 288 289 kobject_put(&e->kobj); 290 } 291 EXPORT_SYMBOL(elevator_exit); 292 293 static void elv_activate_rq(struct request_queue *q, struct request *rq) 294 { 295 elevator_t *e = q->elevator; 296 297 if (e->ops->elevator_activate_req_fn) 298 e->ops->elevator_activate_req_fn(q, rq); 299 } 300 301 static void elv_deactivate_rq(struct request_queue *q, struct request *rq) 302 { 303 elevator_t *e = q->elevator; 304 305 if (e->ops->elevator_deactivate_req_fn) 306 e->ops->elevator_deactivate_req_fn(q, rq); 307 } 308 309 static inline void __elv_rqhash_del(struct request *rq) 310 { 311 hlist_del_init(&rq->hash); 312 } 313 314 static void elv_rqhash_del(struct request_queue *q, struct request *rq) 315 { 316 if (ELV_ON_HASH(rq)) 317 __elv_rqhash_del(rq); 318 } 319 320 static void elv_rqhash_add(struct request_queue *q, struct request *rq) 321 { 322 elevator_t *e = q->elevator; 323 324 BUG_ON(ELV_ON_HASH(rq)); 325 hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); 326 } 327 328 static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) 329 { 330 __elv_rqhash_del(rq); 331 elv_rqhash_add(q, rq); 332 } 333 334 static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) 335 { 336 elevator_t *e = q->elevator; 337 struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; 338 struct hlist_node *entry, *next; 339 struct request *rq; 340 341 hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { 342 BUG_ON(!ELV_ON_HASH(rq)); 343 344 if (unlikely(!rq_mergeable(rq))) { 345 __elv_rqhash_del(rq); 346 continue; 347 } 348 349 if (rq_hash_key(rq) == offset) 350 return rq; 351 } 352 353 return NULL; 354 } 355 356 /* 357 * RB-tree support functions for inserting/lookup/removal of requests 358 * in a sorted RB tree. 359 */ 360 struct request *elv_rb_add(struct rb_root *root, struct request *rq) 361 { 362 struct rb_node **p = &root->rb_node; 363 struct rb_node *parent = NULL; 364 struct request *__rq; 365 366 while (*p) { 367 parent = *p; 368 __rq = rb_entry(parent, struct request, rb_node); 369 370 if (rq->sector < __rq->sector) 371 p = &(*p)->rb_left; 372 else if (rq->sector > __rq->sector) 373 p = &(*p)->rb_right; 374 else 375 return __rq; 376 } 377 378 rb_link_node(&rq->rb_node, parent, p); 379 rb_insert_color(&rq->rb_node, root); 380 return NULL; 381 } 382 EXPORT_SYMBOL(elv_rb_add); 383 384 void elv_rb_del(struct rb_root *root, struct request *rq) 385 { 386 BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); 387 rb_erase(&rq->rb_node, root); 388 RB_CLEAR_NODE(&rq->rb_node); 389 } 390 EXPORT_SYMBOL(elv_rb_del); 391 392 struct request *elv_rb_find(struct rb_root *root, sector_t sector) 393 { 394 struct rb_node *n = root->rb_node; 395 struct request *rq; 396 397 while (n) { 398 rq = rb_entry(n, struct request, rb_node); 399 400 if (sector < rq->sector) 401 n = n->rb_left; 402 else if (sector > rq->sector) 403 n = n->rb_right; 404 else 405 return rq; 406 } 407 408 return NULL; 409 } 410 EXPORT_SYMBOL(elv_rb_find); 411 412 /* 413 * Insert rq into dispatch queue of q. Queue lock must be held on 414 * entry. rq is sort instead into the dispatch queue. To be used by 415 * specific elevators. 416 */ 417 void elv_dispatch_sort(struct request_queue *q, struct request *rq) 418 { 419 sector_t boundary; 420 struct list_head *entry; 421 int stop_flags; 422 423 if (q->last_merge == rq) 424 q->last_merge = NULL; 425 426 elv_rqhash_del(q, rq); 427 428 q->nr_sorted--; 429 430 boundary = q->end_sector; 431 stop_flags = REQ_SOFTBARRIER | REQ_HARDBARRIER | REQ_STARTED; 432 list_for_each_prev(entry, &q->queue_head) { 433 struct request *pos = list_entry_rq(entry); 434 435 if (rq_data_dir(rq) != rq_data_dir(pos)) 436 break; 437 if (pos->cmd_flags & stop_flags) 438 break; 439 if (rq->sector >= boundary) { 440 if (pos->sector < boundary) 441 continue; 442 } else { 443 if (pos->sector >= boundary) 444 break; 445 } 446 if (rq->sector >= pos->sector) 447 break; 448 } 449 450 list_add(&rq->queuelist, entry); 451 } 452 EXPORT_SYMBOL(elv_dispatch_sort); 453 454 /* 455 * Insert rq into dispatch queue of q. Queue lock must be held on 456 * entry. rq is added to the back of the dispatch queue. To be used by 457 * specific elevators. 458 */ 459 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) 460 { 461 if (q->last_merge == rq) 462 q->last_merge = NULL; 463 464 elv_rqhash_del(q, rq); 465 466 q->nr_sorted--; 467 468 q->end_sector = rq_end_sector(rq); 469 q->boundary_rq = rq; 470 list_add_tail(&rq->queuelist, &q->queue_head); 471 } 472 EXPORT_SYMBOL(elv_dispatch_add_tail); 473 474 int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) 475 { 476 elevator_t *e = q->elevator; 477 struct request *__rq; 478 int ret; 479 480 /* 481 * First try one-hit cache. 482 */ 483 if (q->last_merge) { 484 ret = elv_try_merge(q->last_merge, bio); 485 if (ret != ELEVATOR_NO_MERGE) { 486 *req = q->last_merge; 487 return ret; 488 } 489 } 490 491 /* 492 * See if our hash lookup can find a potential backmerge. 493 */ 494 __rq = elv_rqhash_find(q, bio->bi_sector); 495 if (__rq && elv_rq_merge_ok(__rq, bio)) { 496 *req = __rq; 497 return ELEVATOR_BACK_MERGE; 498 } 499 500 if (e->ops->elevator_merge_fn) 501 return e->ops->elevator_merge_fn(q, req, bio); 502 503 return ELEVATOR_NO_MERGE; 504 } 505 506 void elv_merged_request(struct request_queue *q, struct request *rq, int type) 507 { 508 elevator_t *e = q->elevator; 509 510 if (e->ops->elevator_merged_fn) 511 e->ops->elevator_merged_fn(q, rq, type); 512 513 if (type == ELEVATOR_BACK_MERGE) 514 elv_rqhash_reposition(q, rq); 515 516 q->last_merge = rq; 517 } 518 519 void elv_merge_requests(struct request_queue *q, struct request *rq, 520 struct request *next) 521 { 522 elevator_t *e = q->elevator; 523 524 if (e->ops->elevator_merge_req_fn) 525 e->ops->elevator_merge_req_fn(q, rq, next); 526 527 elv_rqhash_reposition(q, rq); 528 elv_rqhash_del(q, next); 529 530 q->nr_sorted--; 531 q->last_merge = rq; 532 } 533 534 void elv_requeue_request(struct request_queue *q, struct request *rq) 535 { 536 /* 537 * it already went through dequeue, we need to decrement the 538 * in_flight count again 539 */ 540 if (blk_account_rq(rq)) { 541 q->in_flight--; 542 if (blk_sorted_rq(rq)) 543 elv_deactivate_rq(q, rq); 544 } 545 546 rq->cmd_flags &= ~REQ_STARTED; 547 548 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); 549 } 550 551 static void elv_drain_elevator(struct request_queue *q) 552 { 553 static int printed; 554 while (q->elevator->ops->elevator_dispatch_fn(q, 1)) 555 ; 556 if (q->nr_sorted == 0) 557 return; 558 if (printed++ < 10) { 559 printk(KERN_ERR "%s: forced dispatching is broken " 560 "(nr_sorted=%u), please report this\n", 561 q->elevator->elevator_type->elevator_name, q->nr_sorted); 562 } 563 } 564 565 void elv_insert(struct request_queue *q, struct request *rq, int where) 566 { 567 struct list_head *pos; 568 unsigned ordseq; 569 int unplug_it = 1; 570 571 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 572 573 rq->q = q; 574 575 switch (where) { 576 case ELEVATOR_INSERT_FRONT: 577 rq->cmd_flags |= REQ_SOFTBARRIER; 578 579 list_add(&rq->queuelist, &q->queue_head); 580 break; 581 582 case ELEVATOR_INSERT_BACK: 583 rq->cmd_flags |= REQ_SOFTBARRIER; 584 elv_drain_elevator(q); 585 list_add_tail(&rq->queuelist, &q->queue_head); 586 /* 587 * We kick the queue here for the following reasons. 588 * - The elevator might have returned NULL previously 589 * to delay requests and returned them now. As the 590 * queue wasn't empty before this request, ll_rw_blk 591 * won't run the queue on return, resulting in hang. 592 * - Usually, back inserted requests won't be merged 593 * with anything. There's no point in delaying queue 594 * processing. 595 */ 596 blk_remove_plug(q); 597 q->request_fn(q); 598 break; 599 600 case ELEVATOR_INSERT_SORT: 601 BUG_ON(!blk_fs_request(rq)); 602 rq->cmd_flags |= REQ_SORTED; 603 q->nr_sorted++; 604 if (rq_mergeable(rq)) { 605 elv_rqhash_add(q, rq); 606 if (!q->last_merge) 607 q->last_merge = rq; 608 } 609 610 /* 611 * Some ioscheds (cfq) run q->request_fn directly, so 612 * rq cannot be accessed after calling 613 * elevator_add_req_fn. 614 */ 615 q->elevator->ops->elevator_add_req_fn(q, rq); 616 break; 617 618 case ELEVATOR_INSERT_REQUEUE: 619 /* 620 * If ordered flush isn't in progress, we do front 621 * insertion; otherwise, requests should be requeued 622 * in ordseq order. 623 */ 624 rq->cmd_flags |= REQ_SOFTBARRIER; 625 626 /* 627 * Most requeues happen because of a busy condition, 628 * don't force unplug of the queue for that case. 629 */ 630 unplug_it = 0; 631 632 if (q->ordseq == 0) { 633 list_add(&rq->queuelist, &q->queue_head); 634 break; 635 } 636 637 ordseq = blk_ordered_req_seq(rq); 638 639 list_for_each(pos, &q->queue_head) { 640 struct request *pos_rq = list_entry_rq(pos); 641 if (ordseq <= blk_ordered_req_seq(pos_rq)) 642 break; 643 } 644 645 list_add_tail(&rq->queuelist, pos); 646 break; 647 648 default: 649 printk(KERN_ERR "%s: bad insertion point %d\n", 650 __FUNCTION__, where); 651 BUG(); 652 } 653 654 if (unplug_it && blk_queue_plugged(q)) { 655 int nrq = q->rq.count[READ] + q->rq.count[WRITE] 656 - q->in_flight; 657 658 if (nrq >= q->unplug_thresh) 659 __generic_unplug_device(q); 660 } 661 } 662 663 void __elv_add_request(struct request_queue *q, struct request *rq, int where, 664 int plug) 665 { 666 if (q->ordcolor) 667 rq->cmd_flags |= REQ_ORDERED_COLOR; 668 669 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 670 /* 671 * toggle ordered color 672 */ 673 if (blk_barrier_rq(rq)) 674 q->ordcolor ^= 1; 675 676 /* 677 * barriers implicitly indicate back insertion 678 */ 679 if (where == ELEVATOR_INSERT_SORT) 680 where = ELEVATOR_INSERT_BACK; 681 682 /* 683 * this request is scheduling boundary, update 684 * end_sector 685 */ 686 if (blk_fs_request(rq)) { 687 q->end_sector = rq_end_sector(rq); 688 q->boundary_rq = rq; 689 } 690 } else if (!(rq->cmd_flags & REQ_ELVPRIV) && 691 where == ELEVATOR_INSERT_SORT) 692 where = ELEVATOR_INSERT_BACK; 693 694 if (plug) 695 blk_plug_device(q); 696 697 elv_insert(q, rq, where); 698 } 699 EXPORT_SYMBOL(__elv_add_request); 700 701 void elv_add_request(struct request_queue *q, struct request *rq, int where, 702 int plug) 703 { 704 unsigned long flags; 705 706 spin_lock_irqsave(q->queue_lock, flags); 707 __elv_add_request(q, rq, where, plug); 708 spin_unlock_irqrestore(q->queue_lock, flags); 709 } 710 EXPORT_SYMBOL(elv_add_request); 711 712 static inline struct request *__elv_next_request(struct request_queue *q) 713 { 714 struct request *rq; 715 716 while (1) { 717 while (!list_empty(&q->queue_head)) { 718 rq = list_entry_rq(q->queue_head.next); 719 if (blk_do_ordered(q, &rq)) 720 return rq; 721 } 722 723 if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) 724 return NULL; 725 } 726 } 727 728 struct request *elv_next_request(struct request_queue *q) 729 { 730 struct request *rq; 731 int ret; 732 733 while ((rq = __elv_next_request(q)) != NULL) { 734 /* 735 * Kill the empty barrier place holder, the driver must 736 * not ever see it. 737 */ 738 if (blk_empty_barrier(rq)) { 739 end_queued_request(rq, 1); 740 continue; 741 } 742 if (!(rq->cmd_flags & REQ_STARTED)) { 743 /* 744 * This is the first time the device driver 745 * sees this request (possibly after 746 * requeueing). Notify IO scheduler. 747 */ 748 if (blk_sorted_rq(rq)) 749 elv_activate_rq(q, rq); 750 751 /* 752 * just mark as started even if we don't start 753 * it, a request that has been delayed should 754 * not be passed by new incoming requests 755 */ 756 rq->cmd_flags |= REQ_STARTED; 757 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 758 } 759 760 if (!q->boundary_rq || q->boundary_rq == rq) { 761 q->end_sector = rq_end_sector(rq); 762 q->boundary_rq = NULL; 763 } 764 765 if (rq->cmd_flags & REQ_DONTPREP) 766 break; 767 768 if (q->dma_drain_size && rq->data_len) { 769 /* 770 * make sure space for the drain appears we 771 * know we can do this because max_hw_segments 772 * has been adjusted to be one fewer than the 773 * device can handle 774 */ 775 rq->nr_phys_segments++; 776 rq->nr_hw_segments++; 777 } 778 779 if (!q->prep_rq_fn) 780 break; 781 782 ret = q->prep_rq_fn(q, rq); 783 if (ret == BLKPREP_OK) { 784 break; 785 } else if (ret == BLKPREP_DEFER) { 786 /* 787 * the request may have been (partially) prepped. 788 * we need to keep this request in the front to 789 * avoid resource deadlock. REQ_STARTED will 790 * prevent other fs requests from passing this one. 791 */ 792 if (q->dma_drain_size && rq->data_len && 793 !(rq->cmd_flags & REQ_DONTPREP)) { 794 /* 795 * remove the space for the drain we added 796 * so that we don't add it again 797 */ 798 --rq->nr_phys_segments; 799 --rq->nr_hw_segments; 800 } 801 802 rq = NULL; 803 break; 804 } else if (ret == BLKPREP_KILL) { 805 rq->cmd_flags |= REQ_QUIET; 806 end_queued_request(rq, 0); 807 } else { 808 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, 809 ret); 810 break; 811 } 812 } 813 814 return rq; 815 } 816 EXPORT_SYMBOL(elv_next_request); 817 818 void elv_dequeue_request(struct request_queue *q, struct request *rq) 819 { 820 BUG_ON(list_empty(&rq->queuelist)); 821 BUG_ON(ELV_ON_HASH(rq)); 822 823 list_del_init(&rq->queuelist); 824 825 /* 826 * the time frame between a request being removed from the lists 827 * and to it is freed is accounted as io that is in progress at 828 * the driver side. 829 */ 830 if (blk_account_rq(rq)) 831 q->in_flight++; 832 } 833 EXPORT_SYMBOL(elv_dequeue_request); 834 835 int elv_queue_empty(struct request_queue *q) 836 { 837 elevator_t *e = q->elevator; 838 839 if (!list_empty(&q->queue_head)) 840 return 0; 841 842 if (e->ops->elevator_queue_empty_fn) 843 return e->ops->elevator_queue_empty_fn(q); 844 845 return 1; 846 } 847 EXPORT_SYMBOL(elv_queue_empty); 848 849 struct request *elv_latter_request(struct request_queue *q, struct request *rq) 850 { 851 elevator_t *e = q->elevator; 852 853 if (e->ops->elevator_latter_req_fn) 854 return e->ops->elevator_latter_req_fn(q, rq); 855 return NULL; 856 } 857 858 struct request *elv_former_request(struct request_queue *q, struct request *rq) 859 { 860 elevator_t *e = q->elevator; 861 862 if (e->ops->elevator_former_req_fn) 863 return e->ops->elevator_former_req_fn(q, rq); 864 return NULL; 865 } 866 867 int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) 868 { 869 elevator_t *e = q->elevator; 870 871 if (e->ops->elevator_set_req_fn) 872 return e->ops->elevator_set_req_fn(q, rq, gfp_mask); 873 874 rq->elevator_private = NULL; 875 return 0; 876 } 877 878 void elv_put_request(struct request_queue *q, struct request *rq) 879 { 880 elevator_t *e = q->elevator; 881 882 if (e->ops->elevator_put_req_fn) 883 e->ops->elevator_put_req_fn(rq); 884 } 885 886 int elv_may_queue(struct request_queue *q, int rw) 887 { 888 elevator_t *e = q->elevator; 889 890 if (e->ops->elevator_may_queue_fn) 891 return e->ops->elevator_may_queue_fn(q, rw); 892 893 return ELV_MQUEUE_MAY; 894 } 895 896 void elv_completed_request(struct request_queue *q, struct request *rq) 897 { 898 elevator_t *e = q->elevator; 899 900 /* 901 * request is released from the driver, io must be done 902 */ 903 if (blk_account_rq(rq)) { 904 q->in_flight--; 905 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) 906 e->ops->elevator_completed_req_fn(q, rq); 907 } 908 909 /* 910 * Check if the queue is waiting for fs requests to be 911 * drained for flush sequence. 912 */ 913 if (unlikely(q->ordseq)) { 914 struct request *first_rq = list_entry_rq(q->queue_head.next); 915 if (q->in_flight == 0 && 916 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && 917 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { 918 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); 919 q->request_fn(q); 920 } 921 } 922 } 923 924 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) 925 926 static ssize_t 927 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 928 { 929 elevator_t *e = container_of(kobj, elevator_t, kobj); 930 struct elv_fs_entry *entry = to_elv(attr); 931 ssize_t error; 932 933 if (!entry->show) 934 return -EIO; 935 936 mutex_lock(&e->sysfs_lock); 937 error = e->ops ? entry->show(e, page) : -ENOENT; 938 mutex_unlock(&e->sysfs_lock); 939 return error; 940 } 941 942 static ssize_t 943 elv_attr_store(struct kobject *kobj, struct attribute *attr, 944 const char *page, size_t length) 945 { 946 elevator_t *e = container_of(kobj, elevator_t, kobj); 947 struct elv_fs_entry *entry = to_elv(attr); 948 ssize_t error; 949 950 if (!entry->store) 951 return -EIO; 952 953 mutex_lock(&e->sysfs_lock); 954 error = e->ops ? entry->store(e, page, length) : -ENOENT; 955 mutex_unlock(&e->sysfs_lock); 956 return error; 957 } 958 959 static struct sysfs_ops elv_sysfs_ops = { 960 .show = elv_attr_show, 961 .store = elv_attr_store, 962 }; 963 964 static struct kobj_type elv_ktype = { 965 .sysfs_ops = &elv_sysfs_ops, 966 .release = elevator_release, 967 }; 968 969 int elv_register_queue(struct request_queue *q) 970 { 971 elevator_t *e = q->elevator; 972 int error; 973 974 error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); 975 if (!error) { 976 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; 977 if (attr) { 978 while (attr->attr.name) { 979 if (sysfs_create_file(&e->kobj, &attr->attr)) 980 break; 981 attr++; 982 } 983 } 984 kobject_uevent(&e->kobj, KOBJ_ADD); 985 } 986 return error; 987 } 988 989 static void __elv_unregister_queue(elevator_t *e) 990 { 991 kobject_uevent(&e->kobj, KOBJ_REMOVE); 992 kobject_del(&e->kobj); 993 } 994 995 void elv_unregister_queue(struct request_queue *q) 996 { 997 if (q) 998 __elv_unregister_queue(q->elevator); 999 } 1000 1001 void elv_register(struct elevator_type *e) 1002 { 1003 char *def = ""; 1004 1005 spin_lock(&elv_list_lock); 1006 BUG_ON(elevator_find(e->elevator_name)); 1007 list_add_tail(&e->list, &elv_list); 1008 spin_unlock(&elv_list_lock); 1009 1010 if (!strcmp(e->elevator_name, chosen_elevator) || 1011 (!*chosen_elevator && 1012 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) 1013 def = " (default)"; 1014 1015 printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, 1016 def); 1017 } 1018 EXPORT_SYMBOL_GPL(elv_register); 1019 1020 void elv_unregister(struct elevator_type *e) 1021 { 1022 struct task_struct *g, *p; 1023 1024 /* 1025 * Iterate every thread in the process to remove the io contexts. 1026 */ 1027 if (e->ops.trim) { 1028 read_lock(&tasklist_lock); 1029 do_each_thread(g, p) { 1030 task_lock(p); 1031 if (p->io_context) 1032 e->ops.trim(p->io_context); 1033 task_unlock(p); 1034 } while_each_thread(g, p); 1035 read_unlock(&tasklist_lock); 1036 } 1037 1038 spin_lock(&elv_list_lock); 1039 list_del_init(&e->list); 1040 spin_unlock(&elv_list_lock); 1041 } 1042 EXPORT_SYMBOL_GPL(elv_unregister); 1043 1044 /* 1045 * switch to new_e io scheduler. be careful not to introduce deadlocks - 1046 * we don't free the old io scheduler, before we have allocated what we 1047 * need for the new one. this way we have a chance of going back to the old 1048 * one, if the new one fails init for some reason. 1049 */ 1050 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) 1051 { 1052 elevator_t *old_elevator, *e; 1053 void *data; 1054 1055 /* 1056 * Allocate new elevator 1057 */ 1058 e = elevator_alloc(q, new_e); 1059 if (!e) 1060 return 0; 1061 1062 data = elevator_init_queue(q, e); 1063 if (!data) { 1064 kobject_put(&e->kobj); 1065 return 0; 1066 } 1067 1068 /* 1069 * Turn on BYPASS and drain all requests w/ elevator private data 1070 */ 1071 spin_lock_irq(q->queue_lock); 1072 1073 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1074 1075 elv_drain_elevator(q); 1076 1077 while (q->rq.elvpriv) { 1078 blk_remove_plug(q); 1079 q->request_fn(q); 1080 spin_unlock_irq(q->queue_lock); 1081 msleep(10); 1082 spin_lock_irq(q->queue_lock); 1083 elv_drain_elevator(q); 1084 } 1085 1086 /* 1087 * Remember old elevator. 1088 */ 1089 old_elevator = q->elevator; 1090 1091 /* 1092 * attach and start new elevator 1093 */ 1094 elevator_attach(q, e, data); 1095 1096 spin_unlock_irq(q->queue_lock); 1097 1098 __elv_unregister_queue(old_elevator); 1099 1100 if (elv_register_queue(q)) 1101 goto fail_register; 1102 1103 /* 1104 * finally exit old elevator and turn off BYPASS. 1105 */ 1106 elevator_exit(old_elevator); 1107 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1108 return 1; 1109 1110 fail_register: 1111 /* 1112 * switch failed, exit the new io scheduler and reattach the old 1113 * one again (along with re-adding the sysfs dir) 1114 */ 1115 elevator_exit(e); 1116 q->elevator = old_elevator; 1117 elv_register_queue(q); 1118 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1119 return 0; 1120 } 1121 1122 ssize_t elv_iosched_store(struct request_queue *q, const char *name, 1123 size_t count) 1124 { 1125 char elevator_name[ELV_NAME_MAX]; 1126 size_t len; 1127 struct elevator_type *e; 1128 1129 elevator_name[sizeof(elevator_name) - 1] = '\0'; 1130 strncpy(elevator_name, name, sizeof(elevator_name) - 1); 1131 len = strlen(elevator_name); 1132 1133 if (len && elevator_name[len - 1] == '\n') 1134 elevator_name[len - 1] = '\0'; 1135 1136 e = elevator_get(elevator_name); 1137 if (!e) { 1138 printk(KERN_ERR "elevator: type %s not found\n", elevator_name); 1139 return -EINVAL; 1140 } 1141 1142 if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { 1143 elevator_put(e); 1144 return count; 1145 } 1146 1147 if (!elevator_switch(q, e)) 1148 printk(KERN_ERR "elevator: switch to %s failed\n", 1149 elevator_name); 1150 return count; 1151 } 1152 1153 ssize_t elv_iosched_show(struct request_queue *q, char *name) 1154 { 1155 elevator_t *e = q->elevator; 1156 struct elevator_type *elv = e->elevator_type; 1157 struct elevator_type *__e; 1158 int len = 0; 1159 1160 spin_lock(&elv_list_lock); 1161 list_for_each_entry(__e, &elv_list, list) { 1162 if (!strcmp(elv->elevator_name, __e->elevator_name)) 1163 len += sprintf(name+len, "[%s] ", elv->elevator_name); 1164 else 1165 len += sprintf(name+len, "%s ", __e->elevator_name); 1166 } 1167 spin_unlock(&elv_list_lock); 1168 1169 len += sprintf(len+name, "\n"); 1170 return len; 1171 } 1172 1173 struct request *elv_rb_former_request(struct request_queue *q, 1174 struct request *rq) 1175 { 1176 struct rb_node *rbprev = rb_prev(&rq->rb_node); 1177 1178 if (rbprev) 1179 return rb_entry_rq(rbprev); 1180 1181 return NULL; 1182 } 1183 EXPORT_SYMBOL(elv_rb_former_request); 1184 1185 struct request *elv_rb_latter_request(struct request_queue *q, 1186 struct request *rq) 1187 { 1188 struct rb_node *rbnext = rb_next(&rq->rb_node); 1189 1190 if (rbnext) 1191 return rb_entry_rq(rbnext); 1192 1193 return NULL; 1194 } 1195 EXPORT_SYMBOL(elv_rb_latter_request); 1196