1 /* 2 * Block device elevator/IO-scheduler. 3 * 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 5 * 6 * 30042000 Jens Axboe <axboe@kernel.dk> : 7 * 8 * Split the elevator a bit so that it is possible to choose a different 9 * one or even write a new "plug in". There are three pieces: 10 * - elevator_fn, inserts a new request in the queue list 11 * - elevator_merge_fn, decides whether a new buffer can be merged with 12 * an existing request 13 * - elevator_dequeue_fn, called when a request is taken off the active list 14 * 15 * 20082000 Dave Jones <davej@suse.de> : 16 * Removed tests for max-bomb-segments, which was breaking elvtune 17 * when run without -bN 18 * 19 * Jens: 20 * - Rework again to work with bio instead of buffer_heads 21 * - loose bi_dev comparisons, partition handling is right now 22 * - completely modularize elevator setup and teardown 23 * 24 */ 25 #include <linux/kernel.h> 26 #include <linux/fs.h> 27 #include <linux/blkdev.h> 28 #include <linux/elevator.h> 29 #include <linux/bio.h> 30 #include <linux/module.h> 31 #include <linux/slab.h> 32 #include <linux/init.h> 33 #include <linux/compiler.h> 34 #include <linux/delay.h> 35 #include <linux/blktrace_api.h> 36 #include <linux/hash.h> 37 38 #include <asm/uaccess.h> 39 40 static DEFINE_SPINLOCK(elv_list_lock); 41 static LIST_HEAD(elv_list); 42 43 /* 44 * Merge hash stuff. 45 */ 46 static const int elv_hash_shift = 6; 47 #define ELV_HASH_BLOCK(sec) ((sec) >> 3) 48 #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) 49 #define ELV_HASH_ENTRIES (1 << elv_hash_shift) 50 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 51 #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) 52 53 /* 54 * Query io scheduler to see if the current process issuing bio may be 55 * merged with rq. 56 */ 57 static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) 58 { 59 request_queue_t *q = rq->q; 60 elevator_t *e = q->elevator; 61 62 if (e->ops->elevator_allow_merge_fn) 63 return e->ops->elevator_allow_merge_fn(q, rq, bio); 64 65 return 1; 66 } 67 68 /* 69 * can we safely merge with this request? 70 */ 71 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) 72 { 73 if (!rq_mergeable(rq)) 74 return 0; 75 76 /* 77 * different data direction or already started, don't merge 78 */ 79 if (bio_data_dir(bio) != rq_data_dir(rq)) 80 return 0; 81 82 /* 83 * must be same device and not a special request 84 */ 85 if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) 86 return 0; 87 88 if (!elv_iosched_allow_merge(rq, bio)) 89 return 0; 90 91 return 1; 92 } 93 EXPORT_SYMBOL(elv_rq_merge_ok); 94 95 static inline int elv_try_merge(struct request *__rq, struct bio *bio) 96 { 97 int ret = ELEVATOR_NO_MERGE; 98 99 /* 100 * we can merge and sequence is ok, check if it's possible 101 */ 102 if (elv_rq_merge_ok(__rq, bio)) { 103 if (__rq->sector + __rq->nr_sectors == bio->bi_sector) 104 ret = ELEVATOR_BACK_MERGE; 105 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) 106 ret = ELEVATOR_FRONT_MERGE; 107 } 108 109 return ret; 110 } 111 112 static struct elevator_type *elevator_find(const char *name) 113 { 114 struct elevator_type *e; 115 116 list_for_each_entry(e, &elv_list, list) { 117 if (!strcmp(e->elevator_name, name)) 118 return e; 119 } 120 121 return NULL; 122 } 123 124 static void elevator_put(struct elevator_type *e) 125 { 126 module_put(e->elevator_owner); 127 } 128 129 static struct elevator_type *elevator_get(const char *name) 130 { 131 struct elevator_type *e; 132 133 spin_lock(&elv_list_lock); 134 135 e = elevator_find(name); 136 if (e && !try_module_get(e->elevator_owner)) 137 e = NULL; 138 139 spin_unlock(&elv_list_lock); 140 141 return e; 142 } 143 144 static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq) 145 { 146 return eq->ops->elevator_init_fn(q); 147 } 148 149 static void elevator_attach(request_queue_t *q, struct elevator_queue *eq, 150 void *data) 151 { 152 q->elevator = eq; 153 eq->elevator_data = data; 154 } 155 156 static char chosen_elevator[16]; 157 158 static int __init elevator_setup(char *str) 159 { 160 /* 161 * Be backwards-compatible with previous kernels, so users 162 * won't get the wrong elevator. 163 */ 164 if (!strcmp(str, "as")) 165 strcpy(chosen_elevator, "anticipatory"); 166 else 167 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); 168 return 1; 169 } 170 171 __setup("elevator=", elevator_setup); 172 173 static struct kobj_type elv_ktype; 174 175 static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e) 176 { 177 elevator_t *eq; 178 int i; 179 180 eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node); 181 if (unlikely(!eq)) 182 goto err; 183 184 memset(eq, 0, sizeof(*eq)); 185 eq->ops = &e->ops; 186 eq->elevator_type = e; 187 kobject_init(&eq->kobj); 188 snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); 189 eq->kobj.ktype = &elv_ktype; 190 mutex_init(&eq->sysfs_lock); 191 192 eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, 193 GFP_KERNEL, q->node); 194 if (!eq->hash) 195 goto err; 196 197 for (i = 0; i < ELV_HASH_ENTRIES; i++) 198 INIT_HLIST_HEAD(&eq->hash[i]); 199 200 return eq; 201 err: 202 kfree(eq); 203 elevator_put(e); 204 return NULL; 205 } 206 207 static void elevator_release(struct kobject *kobj) 208 { 209 elevator_t *e = container_of(kobj, elevator_t, kobj); 210 211 elevator_put(e->elevator_type); 212 kfree(e->hash); 213 kfree(e); 214 } 215 216 int elevator_init(request_queue_t *q, char *name) 217 { 218 struct elevator_type *e = NULL; 219 struct elevator_queue *eq; 220 int ret = 0; 221 void *data; 222 223 INIT_LIST_HEAD(&q->queue_head); 224 q->last_merge = NULL; 225 q->end_sector = 0; 226 q->boundary_rq = NULL; 227 228 if (name && !(e = elevator_get(name))) 229 return -EINVAL; 230 231 if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator))) 232 printk("I/O scheduler %s not found\n", chosen_elevator); 233 234 if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) { 235 printk("Default I/O scheduler not found, using no-op\n"); 236 e = elevator_get("noop"); 237 } 238 239 eq = elevator_alloc(q, e); 240 if (!eq) 241 return -ENOMEM; 242 243 data = elevator_init_queue(q, eq); 244 if (!data) { 245 kobject_put(&eq->kobj); 246 return -ENOMEM; 247 } 248 249 elevator_attach(q, eq, data); 250 return ret; 251 } 252 253 EXPORT_SYMBOL(elevator_init); 254 255 void elevator_exit(elevator_t *e) 256 { 257 mutex_lock(&e->sysfs_lock); 258 if (e->ops->elevator_exit_fn) 259 e->ops->elevator_exit_fn(e); 260 e->ops = NULL; 261 mutex_unlock(&e->sysfs_lock); 262 263 kobject_put(&e->kobj); 264 } 265 266 EXPORT_SYMBOL(elevator_exit); 267 268 static void elv_activate_rq(request_queue_t *q, struct request *rq) 269 { 270 elevator_t *e = q->elevator; 271 272 if (e->ops->elevator_activate_req_fn) 273 e->ops->elevator_activate_req_fn(q, rq); 274 } 275 276 static void elv_deactivate_rq(request_queue_t *q, struct request *rq) 277 { 278 elevator_t *e = q->elevator; 279 280 if (e->ops->elevator_deactivate_req_fn) 281 e->ops->elevator_deactivate_req_fn(q, rq); 282 } 283 284 static inline void __elv_rqhash_del(struct request *rq) 285 { 286 hlist_del_init(&rq->hash); 287 } 288 289 static void elv_rqhash_del(request_queue_t *q, struct request *rq) 290 { 291 if (ELV_ON_HASH(rq)) 292 __elv_rqhash_del(rq); 293 } 294 295 static void elv_rqhash_add(request_queue_t *q, struct request *rq) 296 { 297 elevator_t *e = q->elevator; 298 299 BUG_ON(ELV_ON_HASH(rq)); 300 hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); 301 } 302 303 static void elv_rqhash_reposition(request_queue_t *q, struct request *rq) 304 { 305 __elv_rqhash_del(rq); 306 elv_rqhash_add(q, rq); 307 } 308 309 static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset) 310 { 311 elevator_t *e = q->elevator; 312 struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; 313 struct hlist_node *entry, *next; 314 struct request *rq; 315 316 hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { 317 BUG_ON(!ELV_ON_HASH(rq)); 318 319 if (unlikely(!rq_mergeable(rq))) { 320 __elv_rqhash_del(rq); 321 continue; 322 } 323 324 if (rq_hash_key(rq) == offset) 325 return rq; 326 } 327 328 return NULL; 329 } 330 331 /* 332 * RB-tree support functions for inserting/lookup/removal of requests 333 * in a sorted RB tree. 334 */ 335 struct request *elv_rb_add(struct rb_root *root, struct request *rq) 336 { 337 struct rb_node **p = &root->rb_node; 338 struct rb_node *parent = NULL; 339 struct request *__rq; 340 341 while (*p) { 342 parent = *p; 343 __rq = rb_entry(parent, struct request, rb_node); 344 345 if (rq->sector < __rq->sector) 346 p = &(*p)->rb_left; 347 else if (rq->sector > __rq->sector) 348 p = &(*p)->rb_right; 349 else 350 return __rq; 351 } 352 353 rb_link_node(&rq->rb_node, parent, p); 354 rb_insert_color(&rq->rb_node, root); 355 return NULL; 356 } 357 358 EXPORT_SYMBOL(elv_rb_add); 359 360 void elv_rb_del(struct rb_root *root, struct request *rq) 361 { 362 BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); 363 rb_erase(&rq->rb_node, root); 364 RB_CLEAR_NODE(&rq->rb_node); 365 } 366 367 EXPORT_SYMBOL(elv_rb_del); 368 369 struct request *elv_rb_find(struct rb_root *root, sector_t sector) 370 { 371 struct rb_node *n = root->rb_node; 372 struct request *rq; 373 374 while (n) { 375 rq = rb_entry(n, struct request, rb_node); 376 377 if (sector < rq->sector) 378 n = n->rb_left; 379 else if (sector > rq->sector) 380 n = n->rb_right; 381 else 382 return rq; 383 } 384 385 return NULL; 386 } 387 388 EXPORT_SYMBOL(elv_rb_find); 389 390 /* 391 * Insert rq into dispatch queue of q. Queue lock must be held on 392 * entry. rq is sort insted into the dispatch queue. To be used by 393 * specific elevators. 394 */ 395 void elv_dispatch_sort(request_queue_t *q, struct request *rq) 396 { 397 sector_t boundary; 398 struct list_head *entry; 399 400 if (q->last_merge == rq) 401 q->last_merge = NULL; 402 403 elv_rqhash_del(q, rq); 404 405 q->nr_sorted--; 406 407 boundary = q->end_sector; 408 409 list_for_each_prev(entry, &q->queue_head) { 410 struct request *pos = list_entry_rq(entry); 411 412 if (rq_data_dir(rq) != rq_data_dir(pos)) 413 break; 414 if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) 415 break; 416 if (rq->sector >= boundary) { 417 if (pos->sector < boundary) 418 continue; 419 } else { 420 if (pos->sector >= boundary) 421 break; 422 } 423 if (rq->sector >= pos->sector) 424 break; 425 } 426 427 list_add(&rq->queuelist, entry); 428 } 429 430 EXPORT_SYMBOL(elv_dispatch_sort); 431 432 /* 433 * Insert rq into dispatch queue of q. Queue lock must be held on 434 * entry. rq is added to the back of the dispatch queue. To be used by 435 * specific elevators. 436 */ 437 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) 438 { 439 if (q->last_merge == rq) 440 q->last_merge = NULL; 441 442 elv_rqhash_del(q, rq); 443 444 q->nr_sorted--; 445 446 q->end_sector = rq_end_sector(rq); 447 q->boundary_rq = rq; 448 list_add_tail(&rq->queuelist, &q->queue_head); 449 } 450 451 EXPORT_SYMBOL(elv_dispatch_add_tail); 452 453 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) 454 { 455 elevator_t *e = q->elevator; 456 struct request *__rq; 457 int ret; 458 459 /* 460 * First try one-hit cache. 461 */ 462 if (q->last_merge) { 463 ret = elv_try_merge(q->last_merge, bio); 464 if (ret != ELEVATOR_NO_MERGE) { 465 *req = q->last_merge; 466 return ret; 467 } 468 } 469 470 /* 471 * See if our hash lookup can find a potential backmerge. 472 */ 473 __rq = elv_rqhash_find(q, bio->bi_sector); 474 if (__rq && elv_rq_merge_ok(__rq, bio)) { 475 *req = __rq; 476 return ELEVATOR_BACK_MERGE; 477 } 478 479 if (e->ops->elevator_merge_fn) 480 return e->ops->elevator_merge_fn(q, req, bio); 481 482 return ELEVATOR_NO_MERGE; 483 } 484 485 void elv_merged_request(request_queue_t *q, struct request *rq, int type) 486 { 487 elevator_t *e = q->elevator; 488 489 if (e->ops->elevator_merged_fn) 490 e->ops->elevator_merged_fn(q, rq, type); 491 492 if (type == ELEVATOR_BACK_MERGE) 493 elv_rqhash_reposition(q, rq); 494 495 q->last_merge = rq; 496 } 497 498 void elv_merge_requests(request_queue_t *q, struct request *rq, 499 struct request *next) 500 { 501 elevator_t *e = q->elevator; 502 503 if (e->ops->elevator_merge_req_fn) 504 e->ops->elevator_merge_req_fn(q, rq, next); 505 506 elv_rqhash_reposition(q, rq); 507 elv_rqhash_del(q, next); 508 509 q->nr_sorted--; 510 q->last_merge = rq; 511 } 512 513 void elv_requeue_request(request_queue_t *q, struct request *rq) 514 { 515 /* 516 * it already went through dequeue, we need to decrement the 517 * in_flight count again 518 */ 519 if (blk_account_rq(rq)) { 520 q->in_flight--; 521 if (blk_sorted_rq(rq)) 522 elv_deactivate_rq(q, rq); 523 } 524 525 rq->cmd_flags &= ~REQ_STARTED; 526 527 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); 528 } 529 530 static void elv_drain_elevator(request_queue_t *q) 531 { 532 static int printed; 533 while (q->elevator->ops->elevator_dispatch_fn(q, 1)) 534 ; 535 if (q->nr_sorted == 0) 536 return; 537 if (printed++ < 10) { 538 printk(KERN_ERR "%s: forced dispatching is broken " 539 "(nr_sorted=%u), please report this\n", 540 q->elevator->elevator_type->elevator_name, q->nr_sorted); 541 } 542 } 543 544 void elv_insert(request_queue_t *q, struct request *rq, int where) 545 { 546 struct list_head *pos; 547 unsigned ordseq; 548 int unplug_it = 1; 549 550 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 551 552 rq->q = q; 553 554 switch (where) { 555 case ELEVATOR_INSERT_FRONT: 556 rq->cmd_flags |= REQ_SOFTBARRIER; 557 558 list_add(&rq->queuelist, &q->queue_head); 559 break; 560 561 case ELEVATOR_INSERT_BACK: 562 rq->cmd_flags |= REQ_SOFTBARRIER; 563 elv_drain_elevator(q); 564 list_add_tail(&rq->queuelist, &q->queue_head); 565 /* 566 * We kick the queue here for the following reasons. 567 * - The elevator might have returned NULL previously 568 * to delay requests and returned them now. As the 569 * queue wasn't empty before this request, ll_rw_blk 570 * won't run the queue on return, resulting in hang. 571 * - Usually, back inserted requests won't be merged 572 * with anything. There's no point in delaying queue 573 * processing. 574 */ 575 blk_remove_plug(q); 576 q->request_fn(q); 577 break; 578 579 case ELEVATOR_INSERT_SORT: 580 BUG_ON(!blk_fs_request(rq)); 581 rq->cmd_flags |= REQ_SORTED; 582 q->nr_sorted++; 583 if (rq_mergeable(rq)) { 584 elv_rqhash_add(q, rq); 585 if (!q->last_merge) 586 q->last_merge = rq; 587 } 588 589 /* 590 * Some ioscheds (cfq) run q->request_fn directly, so 591 * rq cannot be accessed after calling 592 * elevator_add_req_fn. 593 */ 594 q->elevator->ops->elevator_add_req_fn(q, rq); 595 break; 596 597 case ELEVATOR_INSERT_REQUEUE: 598 /* 599 * If ordered flush isn't in progress, we do front 600 * insertion; otherwise, requests should be requeued 601 * in ordseq order. 602 */ 603 rq->cmd_flags |= REQ_SOFTBARRIER; 604 605 /* 606 * Most requeues happen because of a busy condition, 607 * don't force unplug of the queue for that case. 608 */ 609 unplug_it = 0; 610 611 if (q->ordseq == 0) { 612 list_add(&rq->queuelist, &q->queue_head); 613 break; 614 } 615 616 ordseq = blk_ordered_req_seq(rq); 617 618 list_for_each(pos, &q->queue_head) { 619 struct request *pos_rq = list_entry_rq(pos); 620 if (ordseq <= blk_ordered_req_seq(pos_rq)) 621 break; 622 } 623 624 list_add_tail(&rq->queuelist, pos); 625 break; 626 627 default: 628 printk(KERN_ERR "%s: bad insertion point %d\n", 629 __FUNCTION__, where); 630 BUG(); 631 } 632 633 if (unplug_it && blk_queue_plugged(q)) { 634 int nrq = q->rq.count[READ] + q->rq.count[WRITE] 635 - q->in_flight; 636 637 if (nrq >= q->unplug_thresh) 638 __generic_unplug_device(q); 639 } 640 } 641 642 void __elv_add_request(request_queue_t *q, struct request *rq, int where, 643 int plug) 644 { 645 if (q->ordcolor) 646 rq->cmd_flags |= REQ_ORDERED_COLOR; 647 648 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 649 /* 650 * toggle ordered color 651 */ 652 if (blk_barrier_rq(rq)) 653 q->ordcolor ^= 1; 654 655 /* 656 * barriers implicitly indicate back insertion 657 */ 658 if (where == ELEVATOR_INSERT_SORT) 659 where = ELEVATOR_INSERT_BACK; 660 661 /* 662 * this request is scheduling boundary, update 663 * end_sector 664 */ 665 if (blk_fs_request(rq)) { 666 q->end_sector = rq_end_sector(rq); 667 q->boundary_rq = rq; 668 } 669 } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) 670 where = ELEVATOR_INSERT_BACK; 671 672 if (plug) 673 blk_plug_device(q); 674 675 elv_insert(q, rq, where); 676 } 677 678 EXPORT_SYMBOL(__elv_add_request); 679 680 void elv_add_request(request_queue_t *q, struct request *rq, int where, 681 int plug) 682 { 683 unsigned long flags; 684 685 spin_lock_irqsave(q->queue_lock, flags); 686 __elv_add_request(q, rq, where, plug); 687 spin_unlock_irqrestore(q->queue_lock, flags); 688 } 689 690 EXPORT_SYMBOL(elv_add_request); 691 692 static inline struct request *__elv_next_request(request_queue_t *q) 693 { 694 struct request *rq; 695 696 while (1) { 697 while (!list_empty(&q->queue_head)) { 698 rq = list_entry_rq(q->queue_head.next); 699 if (blk_do_ordered(q, &rq)) 700 return rq; 701 } 702 703 if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) 704 return NULL; 705 } 706 } 707 708 struct request *elv_next_request(request_queue_t *q) 709 { 710 struct request *rq; 711 int ret; 712 713 while ((rq = __elv_next_request(q)) != NULL) { 714 if (!(rq->cmd_flags & REQ_STARTED)) { 715 /* 716 * This is the first time the device driver 717 * sees this request (possibly after 718 * requeueing). Notify IO scheduler. 719 */ 720 if (blk_sorted_rq(rq)) 721 elv_activate_rq(q, rq); 722 723 /* 724 * just mark as started even if we don't start 725 * it, a request that has been delayed should 726 * not be passed by new incoming requests 727 */ 728 rq->cmd_flags |= REQ_STARTED; 729 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 730 } 731 732 if (!q->boundary_rq || q->boundary_rq == rq) { 733 q->end_sector = rq_end_sector(rq); 734 q->boundary_rq = NULL; 735 } 736 737 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) 738 break; 739 740 ret = q->prep_rq_fn(q, rq); 741 if (ret == BLKPREP_OK) { 742 break; 743 } else if (ret == BLKPREP_DEFER) { 744 /* 745 * the request may have been (partially) prepped. 746 * we need to keep this request in the front to 747 * avoid resource deadlock. REQ_STARTED will 748 * prevent other fs requests from passing this one. 749 */ 750 rq = NULL; 751 break; 752 } else if (ret == BLKPREP_KILL) { 753 int nr_bytes = rq->hard_nr_sectors << 9; 754 755 if (!nr_bytes) 756 nr_bytes = rq->data_len; 757 758 blkdev_dequeue_request(rq); 759 rq->cmd_flags |= REQ_QUIET; 760 end_that_request_chunk(rq, 0, nr_bytes); 761 end_that_request_last(rq, 0); 762 } else { 763 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, 764 ret); 765 break; 766 } 767 } 768 769 return rq; 770 } 771 772 EXPORT_SYMBOL(elv_next_request); 773 774 void elv_dequeue_request(request_queue_t *q, struct request *rq) 775 { 776 BUG_ON(list_empty(&rq->queuelist)); 777 BUG_ON(ELV_ON_HASH(rq)); 778 779 list_del_init(&rq->queuelist); 780 781 /* 782 * the time frame between a request being removed from the lists 783 * and to it is freed is accounted as io that is in progress at 784 * the driver side. 785 */ 786 if (blk_account_rq(rq)) 787 q->in_flight++; 788 } 789 790 EXPORT_SYMBOL(elv_dequeue_request); 791 792 int elv_queue_empty(request_queue_t *q) 793 { 794 elevator_t *e = q->elevator; 795 796 if (!list_empty(&q->queue_head)) 797 return 0; 798 799 if (e->ops->elevator_queue_empty_fn) 800 return e->ops->elevator_queue_empty_fn(q); 801 802 return 1; 803 } 804 805 EXPORT_SYMBOL(elv_queue_empty); 806 807 struct request *elv_latter_request(request_queue_t *q, struct request *rq) 808 { 809 elevator_t *e = q->elevator; 810 811 if (e->ops->elevator_latter_req_fn) 812 return e->ops->elevator_latter_req_fn(q, rq); 813 return NULL; 814 } 815 816 struct request *elv_former_request(request_queue_t *q, struct request *rq) 817 { 818 elevator_t *e = q->elevator; 819 820 if (e->ops->elevator_former_req_fn) 821 return e->ops->elevator_former_req_fn(q, rq); 822 return NULL; 823 } 824 825 int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask) 826 { 827 elevator_t *e = q->elevator; 828 829 if (e->ops->elevator_set_req_fn) 830 return e->ops->elevator_set_req_fn(q, rq, gfp_mask); 831 832 rq->elevator_private = NULL; 833 return 0; 834 } 835 836 void elv_put_request(request_queue_t *q, struct request *rq) 837 { 838 elevator_t *e = q->elevator; 839 840 if (e->ops->elevator_put_req_fn) 841 e->ops->elevator_put_req_fn(rq); 842 } 843 844 int elv_may_queue(request_queue_t *q, int rw) 845 { 846 elevator_t *e = q->elevator; 847 848 if (e->ops->elevator_may_queue_fn) 849 return e->ops->elevator_may_queue_fn(q, rw); 850 851 return ELV_MQUEUE_MAY; 852 } 853 854 void elv_completed_request(request_queue_t *q, struct request *rq) 855 { 856 elevator_t *e = q->elevator; 857 858 /* 859 * request is released from the driver, io must be done 860 */ 861 if (blk_account_rq(rq)) { 862 q->in_flight--; 863 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) 864 e->ops->elevator_completed_req_fn(q, rq); 865 } 866 867 /* 868 * Check if the queue is waiting for fs requests to be 869 * drained for flush sequence. 870 */ 871 if (unlikely(q->ordseq)) { 872 struct request *first_rq = list_entry_rq(q->queue_head.next); 873 if (q->in_flight == 0 && 874 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && 875 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { 876 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); 877 q->request_fn(q); 878 } 879 } 880 } 881 882 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) 883 884 static ssize_t 885 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 886 { 887 elevator_t *e = container_of(kobj, elevator_t, kobj); 888 struct elv_fs_entry *entry = to_elv(attr); 889 ssize_t error; 890 891 if (!entry->show) 892 return -EIO; 893 894 mutex_lock(&e->sysfs_lock); 895 error = e->ops ? entry->show(e, page) : -ENOENT; 896 mutex_unlock(&e->sysfs_lock); 897 return error; 898 } 899 900 static ssize_t 901 elv_attr_store(struct kobject *kobj, struct attribute *attr, 902 const char *page, size_t length) 903 { 904 elevator_t *e = container_of(kobj, elevator_t, kobj); 905 struct elv_fs_entry *entry = to_elv(attr); 906 ssize_t error; 907 908 if (!entry->store) 909 return -EIO; 910 911 mutex_lock(&e->sysfs_lock); 912 error = e->ops ? entry->store(e, page, length) : -ENOENT; 913 mutex_unlock(&e->sysfs_lock); 914 return error; 915 } 916 917 static struct sysfs_ops elv_sysfs_ops = { 918 .show = elv_attr_show, 919 .store = elv_attr_store, 920 }; 921 922 static struct kobj_type elv_ktype = { 923 .sysfs_ops = &elv_sysfs_ops, 924 .release = elevator_release, 925 }; 926 927 int elv_register_queue(struct request_queue *q) 928 { 929 elevator_t *e = q->elevator; 930 int error; 931 932 e->kobj.parent = &q->kobj; 933 934 error = kobject_add(&e->kobj); 935 if (!error) { 936 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; 937 if (attr) { 938 while (attr->attr.name) { 939 if (sysfs_create_file(&e->kobj, &attr->attr)) 940 break; 941 attr++; 942 } 943 } 944 kobject_uevent(&e->kobj, KOBJ_ADD); 945 } 946 return error; 947 } 948 949 static void __elv_unregister_queue(elevator_t *e) 950 { 951 kobject_uevent(&e->kobj, KOBJ_REMOVE); 952 kobject_del(&e->kobj); 953 } 954 955 void elv_unregister_queue(struct request_queue *q) 956 { 957 if (q) 958 __elv_unregister_queue(q->elevator); 959 } 960 961 int elv_register(struct elevator_type *e) 962 { 963 char *def = ""; 964 965 spin_lock(&elv_list_lock); 966 BUG_ON(elevator_find(e->elevator_name)); 967 list_add_tail(&e->list, &elv_list); 968 spin_unlock(&elv_list_lock); 969 970 if (!strcmp(e->elevator_name, chosen_elevator) || 971 (!*chosen_elevator && 972 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) 973 def = " (default)"; 974 975 printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, def); 976 return 0; 977 } 978 EXPORT_SYMBOL_GPL(elv_register); 979 980 void elv_unregister(struct elevator_type *e) 981 { 982 struct task_struct *g, *p; 983 984 /* 985 * Iterate every thread in the process to remove the io contexts. 986 */ 987 if (e->ops.trim) { 988 read_lock(&tasklist_lock); 989 do_each_thread(g, p) { 990 task_lock(p); 991 if (p->io_context) 992 e->ops.trim(p->io_context); 993 task_unlock(p); 994 } while_each_thread(g, p); 995 read_unlock(&tasklist_lock); 996 } 997 998 spin_lock(&elv_list_lock); 999 list_del_init(&e->list); 1000 spin_unlock(&elv_list_lock); 1001 } 1002 EXPORT_SYMBOL_GPL(elv_unregister); 1003 1004 /* 1005 * switch to new_e io scheduler. be careful not to introduce deadlocks - 1006 * we don't free the old io scheduler, before we have allocated what we 1007 * need for the new one. this way we have a chance of going back to the old 1008 * one, if the new one fails init for some reason. 1009 */ 1010 static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) 1011 { 1012 elevator_t *old_elevator, *e; 1013 void *data; 1014 1015 /* 1016 * Allocate new elevator 1017 */ 1018 e = elevator_alloc(q, new_e); 1019 if (!e) 1020 return 0; 1021 1022 data = elevator_init_queue(q, e); 1023 if (!data) { 1024 kobject_put(&e->kobj); 1025 return 0; 1026 } 1027 1028 /* 1029 * Turn on BYPASS and drain all requests w/ elevator private data 1030 */ 1031 spin_lock_irq(q->queue_lock); 1032 1033 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1034 1035 elv_drain_elevator(q); 1036 1037 while (q->rq.elvpriv) { 1038 blk_remove_plug(q); 1039 q->request_fn(q); 1040 spin_unlock_irq(q->queue_lock); 1041 msleep(10); 1042 spin_lock_irq(q->queue_lock); 1043 elv_drain_elevator(q); 1044 } 1045 1046 /* 1047 * Remember old elevator. 1048 */ 1049 old_elevator = q->elevator; 1050 1051 /* 1052 * attach and start new elevator 1053 */ 1054 elevator_attach(q, e, data); 1055 1056 spin_unlock_irq(q->queue_lock); 1057 1058 __elv_unregister_queue(old_elevator); 1059 1060 if (elv_register_queue(q)) 1061 goto fail_register; 1062 1063 /* 1064 * finally exit old elevator and turn off BYPASS. 1065 */ 1066 elevator_exit(old_elevator); 1067 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1068 return 1; 1069 1070 fail_register: 1071 /* 1072 * switch failed, exit the new io scheduler and reattach the old 1073 * one again (along with re-adding the sysfs dir) 1074 */ 1075 elevator_exit(e); 1076 q->elevator = old_elevator; 1077 elv_register_queue(q); 1078 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1079 return 0; 1080 } 1081 1082 ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) 1083 { 1084 char elevator_name[ELV_NAME_MAX]; 1085 size_t len; 1086 struct elevator_type *e; 1087 1088 elevator_name[sizeof(elevator_name) - 1] = '\0'; 1089 strncpy(elevator_name, name, sizeof(elevator_name) - 1); 1090 len = strlen(elevator_name); 1091 1092 if (len && elevator_name[len - 1] == '\n') 1093 elevator_name[len - 1] = '\0'; 1094 1095 e = elevator_get(elevator_name); 1096 if (!e) { 1097 printk(KERN_ERR "elevator: type %s not found\n", elevator_name); 1098 return -EINVAL; 1099 } 1100 1101 if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { 1102 elevator_put(e); 1103 return count; 1104 } 1105 1106 if (!elevator_switch(q, e)) 1107 printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); 1108 return count; 1109 } 1110 1111 ssize_t elv_iosched_show(request_queue_t *q, char *name) 1112 { 1113 elevator_t *e = q->elevator; 1114 struct elevator_type *elv = e->elevator_type; 1115 struct elevator_type *__e; 1116 int len = 0; 1117 1118 spin_lock(&elv_list_lock); 1119 list_for_each_entry(__e, &elv_list, list) { 1120 if (!strcmp(elv->elevator_name, __e->elevator_name)) 1121 len += sprintf(name+len, "[%s] ", elv->elevator_name); 1122 else 1123 len += sprintf(name+len, "%s ", __e->elevator_name); 1124 } 1125 spin_unlock(&elv_list_lock); 1126 1127 len += sprintf(len+name, "\n"); 1128 return len; 1129 } 1130 1131 struct request *elv_rb_former_request(request_queue_t *q, struct request *rq) 1132 { 1133 struct rb_node *rbprev = rb_prev(&rq->rb_node); 1134 1135 if (rbprev) 1136 return rb_entry_rq(rbprev); 1137 1138 return NULL; 1139 } 1140 1141 EXPORT_SYMBOL(elv_rb_former_request); 1142 1143 struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq) 1144 { 1145 struct rb_node *rbnext = rb_next(&rq->rb_node); 1146 1147 if (rbnext) 1148 return rb_entry_rq(rbnext); 1149 1150 return NULL; 1151 } 1152 1153 EXPORT_SYMBOL(elv_rb_latter_request); 1154