1 /* 2 * Block device elevator/IO-scheduler. 3 * 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 5 * 6 * 30042000 Jens Axboe <axboe@kernel.dk> : 7 * 8 * Split the elevator a bit so that it is possible to choose a different 9 * one or even write a new "plug in". There are three pieces: 10 * - elevator_fn, inserts a new request in the queue list 11 * - elevator_merge_fn, decides whether a new buffer can be merged with 12 * an existing request 13 * - elevator_dequeue_fn, called when a request is taken off the active list 14 * 15 * 20082000 Dave Jones <davej@suse.de> : 16 * Removed tests for max-bomb-segments, which was breaking elvtune 17 * when run without -bN 18 * 19 * Jens: 20 * - Rework again to work with bio instead of buffer_heads 21 * - loose bi_dev comparisons, partition handling is right now 22 * - completely modularize elevator setup and teardown 23 * 24 */ 25 #include <linux/kernel.h> 26 #include <linux/fs.h> 27 #include <linux/blkdev.h> 28 #include <linux/elevator.h> 29 #include <linux/bio.h> 30 #include <linux/module.h> 31 #include <linux/slab.h> 32 #include <linux/init.h> 33 #include <linux/compiler.h> 34 #include <linux/delay.h> 35 #include <linux/blktrace_api.h> 36 #include <linux/hash.h> 37 38 #include <asm/uaccess.h> 39 40 static DEFINE_SPINLOCK(elv_list_lock); 41 static LIST_HEAD(elv_list); 42 43 /* 44 * Merge hash stuff. 45 */ 46 static const int elv_hash_shift = 6; 47 #define ELV_HASH_BLOCK(sec) ((sec) >> 3) 48 #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) 49 #define ELV_HASH_ENTRIES (1 << elv_hash_shift) 50 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 51 #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) 52 53 /* 54 * Query io scheduler to see if the current process issuing bio may be 55 * merged with rq. 56 */ 57 static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) 58 { 59 request_queue_t *q = rq->q; 60 elevator_t *e = q->elevator; 61 62 if (e->ops->elevator_allow_merge_fn) 63 return e->ops->elevator_allow_merge_fn(q, rq, bio); 64 65 return 1; 66 } 67 68 /* 69 * can we safely merge with this request? 70 */ 71 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) 72 { 73 if (!rq_mergeable(rq)) 74 return 0; 75 76 /* 77 * different data direction or already started, don't merge 78 */ 79 if (bio_data_dir(bio) != rq_data_dir(rq)) 80 return 0; 81 82 /* 83 * must be same device and not a special request 84 */ 85 if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) 86 return 0; 87 88 if (!elv_iosched_allow_merge(rq, bio)) 89 return 0; 90 91 return 1; 92 } 93 EXPORT_SYMBOL(elv_rq_merge_ok); 94 95 static inline int elv_try_merge(struct request *__rq, struct bio *bio) 96 { 97 int ret = ELEVATOR_NO_MERGE; 98 99 /* 100 * we can merge and sequence is ok, check if it's possible 101 */ 102 if (elv_rq_merge_ok(__rq, bio)) { 103 if (__rq->sector + __rq->nr_sectors == bio->bi_sector) 104 ret = ELEVATOR_BACK_MERGE; 105 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) 106 ret = ELEVATOR_FRONT_MERGE; 107 } 108 109 return ret; 110 } 111 112 static struct elevator_type *elevator_find(const char *name) 113 { 114 struct elevator_type *e; 115 struct list_head *entry; 116 117 list_for_each(entry, &elv_list) { 118 119 e = list_entry(entry, struct elevator_type, list); 120 121 if (!strcmp(e->elevator_name, name)) 122 return e; 123 } 124 125 return NULL; 126 } 127 128 static void elevator_put(struct elevator_type *e) 129 { 130 module_put(e->elevator_owner); 131 } 132 133 static struct elevator_type *elevator_get(const char *name) 134 { 135 struct elevator_type *e; 136 137 spin_lock(&elv_list_lock); 138 139 e = elevator_find(name); 140 if (e && !try_module_get(e->elevator_owner)) 141 e = NULL; 142 143 spin_unlock(&elv_list_lock); 144 145 return e; 146 } 147 148 static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq) 149 { 150 return eq->ops->elevator_init_fn(q); 151 } 152 153 static void elevator_attach(request_queue_t *q, struct elevator_queue *eq, 154 void *data) 155 { 156 q->elevator = eq; 157 eq->elevator_data = data; 158 } 159 160 static char chosen_elevator[16]; 161 162 static int __init elevator_setup(char *str) 163 { 164 /* 165 * Be backwards-compatible with previous kernels, so users 166 * won't get the wrong elevator. 167 */ 168 if (!strcmp(str, "as")) 169 strcpy(chosen_elevator, "anticipatory"); 170 else 171 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); 172 return 1; 173 } 174 175 __setup("elevator=", elevator_setup); 176 177 static struct kobj_type elv_ktype; 178 179 static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e) 180 { 181 elevator_t *eq; 182 int i; 183 184 eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node); 185 if (unlikely(!eq)) 186 goto err; 187 188 memset(eq, 0, sizeof(*eq)); 189 eq->ops = &e->ops; 190 eq->elevator_type = e; 191 kobject_init(&eq->kobj); 192 snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); 193 eq->kobj.ktype = &elv_ktype; 194 mutex_init(&eq->sysfs_lock); 195 196 eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, 197 GFP_KERNEL, q->node); 198 if (!eq->hash) 199 goto err; 200 201 for (i = 0; i < ELV_HASH_ENTRIES; i++) 202 INIT_HLIST_HEAD(&eq->hash[i]); 203 204 return eq; 205 err: 206 kfree(eq); 207 elevator_put(e); 208 return NULL; 209 } 210 211 static void elevator_release(struct kobject *kobj) 212 { 213 elevator_t *e = container_of(kobj, elevator_t, kobj); 214 215 elevator_put(e->elevator_type); 216 kfree(e->hash); 217 kfree(e); 218 } 219 220 int elevator_init(request_queue_t *q, char *name) 221 { 222 struct elevator_type *e = NULL; 223 struct elevator_queue *eq; 224 int ret = 0; 225 void *data; 226 227 INIT_LIST_HEAD(&q->queue_head); 228 q->last_merge = NULL; 229 q->end_sector = 0; 230 q->boundary_rq = NULL; 231 232 if (name && !(e = elevator_get(name))) 233 return -EINVAL; 234 235 if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator))) 236 printk("I/O scheduler %s not found\n", chosen_elevator); 237 238 if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) { 239 printk("Default I/O scheduler not found, using no-op\n"); 240 e = elevator_get("noop"); 241 } 242 243 eq = elevator_alloc(q, e); 244 if (!eq) 245 return -ENOMEM; 246 247 data = elevator_init_queue(q, eq); 248 if (!data) { 249 kobject_put(&eq->kobj); 250 return -ENOMEM; 251 } 252 253 elevator_attach(q, eq, data); 254 return ret; 255 } 256 257 EXPORT_SYMBOL(elevator_init); 258 259 void elevator_exit(elevator_t *e) 260 { 261 mutex_lock(&e->sysfs_lock); 262 if (e->ops->elevator_exit_fn) 263 e->ops->elevator_exit_fn(e); 264 e->ops = NULL; 265 mutex_unlock(&e->sysfs_lock); 266 267 kobject_put(&e->kobj); 268 } 269 270 EXPORT_SYMBOL(elevator_exit); 271 272 static void elv_activate_rq(request_queue_t *q, struct request *rq) 273 { 274 elevator_t *e = q->elevator; 275 276 if (e->ops->elevator_activate_req_fn) 277 e->ops->elevator_activate_req_fn(q, rq); 278 } 279 280 static void elv_deactivate_rq(request_queue_t *q, struct request *rq) 281 { 282 elevator_t *e = q->elevator; 283 284 if (e->ops->elevator_deactivate_req_fn) 285 e->ops->elevator_deactivate_req_fn(q, rq); 286 } 287 288 static inline void __elv_rqhash_del(struct request *rq) 289 { 290 hlist_del_init(&rq->hash); 291 } 292 293 static void elv_rqhash_del(request_queue_t *q, struct request *rq) 294 { 295 if (ELV_ON_HASH(rq)) 296 __elv_rqhash_del(rq); 297 } 298 299 static void elv_rqhash_add(request_queue_t *q, struct request *rq) 300 { 301 elevator_t *e = q->elevator; 302 303 BUG_ON(ELV_ON_HASH(rq)); 304 hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); 305 } 306 307 static void elv_rqhash_reposition(request_queue_t *q, struct request *rq) 308 { 309 __elv_rqhash_del(rq); 310 elv_rqhash_add(q, rq); 311 } 312 313 static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset) 314 { 315 elevator_t *e = q->elevator; 316 struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; 317 struct hlist_node *entry, *next; 318 struct request *rq; 319 320 hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { 321 BUG_ON(!ELV_ON_HASH(rq)); 322 323 if (unlikely(!rq_mergeable(rq))) { 324 __elv_rqhash_del(rq); 325 continue; 326 } 327 328 if (rq_hash_key(rq) == offset) 329 return rq; 330 } 331 332 return NULL; 333 } 334 335 /* 336 * RB-tree support functions for inserting/lookup/removal of requests 337 * in a sorted RB tree. 338 */ 339 struct request *elv_rb_add(struct rb_root *root, struct request *rq) 340 { 341 struct rb_node **p = &root->rb_node; 342 struct rb_node *parent = NULL; 343 struct request *__rq; 344 345 while (*p) { 346 parent = *p; 347 __rq = rb_entry(parent, struct request, rb_node); 348 349 if (rq->sector < __rq->sector) 350 p = &(*p)->rb_left; 351 else if (rq->sector > __rq->sector) 352 p = &(*p)->rb_right; 353 else 354 return __rq; 355 } 356 357 rb_link_node(&rq->rb_node, parent, p); 358 rb_insert_color(&rq->rb_node, root); 359 return NULL; 360 } 361 362 EXPORT_SYMBOL(elv_rb_add); 363 364 void elv_rb_del(struct rb_root *root, struct request *rq) 365 { 366 BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); 367 rb_erase(&rq->rb_node, root); 368 RB_CLEAR_NODE(&rq->rb_node); 369 } 370 371 EXPORT_SYMBOL(elv_rb_del); 372 373 struct request *elv_rb_find(struct rb_root *root, sector_t sector) 374 { 375 struct rb_node *n = root->rb_node; 376 struct request *rq; 377 378 while (n) { 379 rq = rb_entry(n, struct request, rb_node); 380 381 if (sector < rq->sector) 382 n = n->rb_left; 383 else if (sector > rq->sector) 384 n = n->rb_right; 385 else 386 return rq; 387 } 388 389 return NULL; 390 } 391 392 EXPORT_SYMBOL(elv_rb_find); 393 394 /* 395 * Insert rq into dispatch queue of q. Queue lock must be held on 396 * entry. rq is sort insted into the dispatch queue. To be used by 397 * specific elevators. 398 */ 399 void elv_dispatch_sort(request_queue_t *q, struct request *rq) 400 { 401 sector_t boundary; 402 struct list_head *entry; 403 404 if (q->last_merge == rq) 405 q->last_merge = NULL; 406 407 elv_rqhash_del(q, rq); 408 409 q->nr_sorted--; 410 411 boundary = q->end_sector; 412 413 list_for_each_prev(entry, &q->queue_head) { 414 struct request *pos = list_entry_rq(entry); 415 416 if (rq_data_dir(rq) != rq_data_dir(pos)) 417 break; 418 if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) 419 break; 420 if (rq->sector >= boundary) { 421 if (pos->sector < boundary) 422 continue; 423 } else { 424 if (pos->sector >= boundary) 425 break; 426 } 427 if (rq->sector >= pos->sector) 428 break; 429 } 430 431 list_add(&rq->queuelist, entry); 432 } 433 434 EXPORT_SYMBOL(elv_dispatch_sort); 435 436 /* 437 * Insert rq into dispatch queue of q. Queue lock must be held on 438 * entry. rq is added to the back of the dispatch queue. To be used by 439 * specific elevators. 440 */ 441 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) 442 { 443 if (q->last_merge == rq) 444 q->last_merge = NULL; 445 446 elv_rqhash_del(q, rq); 447 448 q->nr_sorted--; 449 450 q->end_sector = rq_end_sector(rq); 451 q->boundary_rq = rq; 452 list_add_tail(&rq->queuelist, &q->queue_head); 453 } 454 455 EXPORT_SYMBOL(elv_dispatch_add_tail); 456 457 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) 458 { 459 elevator_t *e = q->elevator; 460 struct request *__rq; 461 int ret; 462 463 /* 464 * First try one-hit cache. 465 */ 466 if (q->last_merge) { 467 ret = elv_try_merge(q->last_merge, bio); 468 if (ret != ELEVATOR_NO_MERGE) { 469 *req = q->last_merge; 470 return ret; 471 } 472 } 473 474 /* 475 * See if our hash lookup can find a potential backmerge. 476 */ 477 __rq = elv_rqhash_find(q, bio->bi_sector); 478 if (__rq && elv_rq_merge_ok(__rq, bio)) { 479 *req = __rq; 480 return ELEVATOR_BACK_MERGE; 481 } 482 483 if (e->ops->elevator_merge_fn) 484 return e->ops->elevator_merge_fn(q, req, bio); 485 486 return ELEVATOR_NO_MERGE; 487 } 488 489 void elv_merged_request(request_queue_t *q, struct request *rq, int type) 490 { 491 elevator_t *e = q->elevator; 492 493 if (e->ops->elevator_merged_fn) 494 e->ops->elevator_merged_fn(q, rq, type); 495 496 if (type == ELEVATOR_BACK_MERGE) 497 elv_rqhash_reposition(q, rq); 498 499 q->last_merge = rq; 500 } 501 502 void elv_merge_requests(request_queue_t *q, struct request *rq, 503 struct request *next) 504 { 505 elevator_t *e = q->elevator; 506 507 if (e->ops->elevator_merge_req_fn) 508 e->ops->elevator_merge_req_fn(q, rq, next); 509 510 elv_rqhash_reposition(q, rq); 511 elv_rqhash_del(q, next); 512 513 q->nr_sorted--; 514 q->last_merge = rq; 515 } 516 517 void elv_requeue_request(request_queue_t *q, struct request *rq) 518 { 519 /* 520 * it already went through dequeue, we need to decrement the 521 * in_flight count again 522 */ 523 if (blk_account_rq(rq)) { 524 q->in_flight--; 525 if (blk_sorted_rq(rq)) 526 elv_deactivate_rq(q, rq); 527 } 528 529 rq->cmd_flags &= ~REQ_STARTED; 530 531 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); 532 } 533 534 static void elv_drain_elevator(request_queue_t *q) 535 { 536 static int printed; 537 while (q->elevator->ops->elevator_dispatch_fn(q, 1)) 538 ; 539 if (q->nr_sorted == 0) 540 return; 541 if (printed++ < 10) { 542 printk(KERN_ERR "%s: forced dispatching is broken " 543 "(nr_sorted=%u), please report this\n", 544 q->elevator->elevator_type->elevator_name, q->nr_sorted); 545 } 546 } 547 548 void elv_insert(request_queue_t *q, struct request *rq, int where) 549 { 550 struct list_head *pos; 551 unsigned ordseq; 552 int unplug_it = 1; 553 554 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 555 556 rq->q = q; 557 558 switch (where) { 559 case ELEVATOR_INSERT_FRONT: 560 rq->cmd_flags |= REQ_SOFTBARRIER; 561 562 list_add(&rq->queuelist, &q->queue_head); 563 break; 564 565 case ELEVATOR_INSERT_BACK: 566 rq->cmd_flags |= REQ_SOFTBARRIER; 567 elv_drain_elevator(q); 568 list_add_tail(&rq->queuelist, &q->queue_head); 569 /* 570 * We kick the queue here for the following reasons. 571 * - The elevator might have returned NULL previously 572 * to delay requests and returned them now. As the 573 * queue wasn't empty before this request, ll_rw_blk 574 * won't run the queue on return, resulting in hang. 575 * - Usually, back inserted requests won't be merged 576 * with anything. There's no point in delaying queue 577 * processing. 578 */ 579 blk_remove_plug(q); 580 q->request_fn(q); 581 break; 582 583 case ELEVATOR_INSERT_SORT: 584 BUG_ON(!blk_fs_request(rq)); 585 rq->cmd_flags |= REQ_SORTED; 586 q->nr_sorted++; 587 if (rq_mergeable(rq)) { 588 elv_rqhash_add(q, rq); 589 if (!q->last_merge) 590 q->last_merge = rq; 591 } 592 593 /* 594 * Some ioscheds (cfq) run q->request_fn directly, so 595 * rq cannot be accessed after calling 596 * elevator_add_req_fn. 597 */ 598 q->elevator->ops->elevator_add_req_fn(q, rq); 599 break; 600 601 case ELEVATOR_INSERT_REQUEUE: 602 /* 603 * If ordered flush isn't in progress, we do front 604 * insertion; otherwise, requests should be requeued 605 * in ordseq order. 606 */ 607 rq->cmd_flags |= REQ_SOFTBARRIER; 608 609 /* 610 * Most requeues happen because of a busy condition, 611 * don't force unplug of the queue for that case. 612 */ 613 unplug_it = 0; 614 615 if (q->ordseq == 0) { 616 list_add(&rq->queuelist, &q->queue_head); 617 break; 618 } 619 620 ordseq = blk_ordered_req_seq(rq); 621 622 list_for_each(pos, &q->queue_head) { 623 struct request *pos_rq = list_entry_rq(pos); 624 if (ordseq <= blk_ordered_req_seq(pos_rq)) 625 break; 626 } 627 628 list_add_tail(&rq->queuelist, pos); 629 break; 630 631 default: 632 printk(KERN_ERR "%s: bad insertion point %d\n", 633 __FUNCTION__, where); 634 BUG(); 635 } 636 637 if (unplug_it && blk_queue_plugged(q)) { 638 int nrq = q->rq.count[READ] + q->rq.count[WRITE] 639 - q->in_flight; 640 641 if (nrq >= q->unplug_thresh) 642 __generic_unplug_device(q); 643 } 644 } 645 646 void __elv_add_request(request_queue_t *q, struct request *rq, int where, 647 int plug) 648 { 649 if (q->ordcolor) 650 rq->cmd_flags |= REQ_ORDERED_COLOR; 651 652 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 653 /* 654 * toggle ordered color 655 */ 656 if (blk_barrier_rq(rq)) 657 q->ordcolor ^= 1; 658 659 /* 660 * barriers implicitly indicate back insertion 661 */ 662 if (where == ELEVATOR_INSERT_SORT) 663 where = ELEVATOR_INSERT_BACK; 664 665 /* 666 * this request is scheduling boundary, update 667 * end_sector 668 */ 669 if (blk_fs_request(rq)) { 670 q->end_sector = rq_end_sector(rq); 671 q->boundary_rq = rq; 672 } 673 } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) 674 where = ELEVATOR_INSERT_BACK; 675 676 if (plug) 677 blk_plug_device(q); 678 679 elv_insert(q, rq, where); 680 } 681 682 EXPORT_SYMBOL(__elv_add_request); 683 684 void elv_add_request(request_queue_t *q, struct request *rq, int where, 685 int plug) 686 { 687 unsigned long flags; 688 689 spin_lock_irqsave(q->queue_lock, flags); 690 __elv_add_request(q, rq, where, plug); 691 spin_unlock_irqrestore(q->queue_lock, flags); 692 } 693 694 EXPORT_SYMBOL(elv_add_request); 695 696 static inline struct request *__elv_next_request(request_queue_t *q) 697 { 698 struct request *rq; 699 700 while (1) { 701 while (!list_empty(&q->queue_head)) { 702 rq = list_entry_rq(q->queue_head.next); 703 if (blk_do_ordered(q, &rq)) 704 return rq; 705 } 706 707 if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) 708 return NULL; 709 } 710 } 711 712 struct request *elv_next_request(request_queue_t *q) 713 { 714 struct request *rq; 715 int ret; 716 717 while ((rq = __elv_next_request(q)) != NULL) { 718 if (!(rq->cmd_flags & REQ_STARTED)) { 719 /* 720 * This is the first time the device driver 721 * sees this request (possibly after 722 * requeueing). Notify IO scheduler. 723 */ 724 if (blk_sorted_rq(rq)) 725 elv_activate_rq(q, rq); 726 727 /* 728 * just mark as started even if we don't start 729 * it, a request that has been delayed should 730 * not be passed by new incoming requests 731 */ 732 rq->cmd_flags |= REQ_STARTED; 733 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 734 } 735 736 if (!q->boundary_rq || q->boundary_rq == rq) { 737 q->end_sector = rq_end_sector(rq); 738 q->boundary_rq = NULL; 739 } 740 741 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) 742 break; 743 744 ret = q->prep_rq_fn(q, rq); 745 if (ret == BLKPREP_OK) { 746 break; 747 } else if (ret == BLKPREP_DEFER) { 748 /* 749 * the request may have been (partially) prepped. 750 * we need to keep this request in the front to 751 * avoid resource deadlock. REQ_STARTED will 752 * prevent other fs requests from passing this one. 753 */ 754 rq = NULL; 755 break; 756 } else if (ret == BLKPREP_KILL) { 757 int nr_bytes = rq->hard_nr_sectors << 9; 758 759 if (!nr_bytes) 760 nr_bytes = rq->data_len; 761 762 blkdev_dequeue_request(rq); 763 rq->cmd_flags |= REQ_QUIET; 764 end_that_request_chunk(rq, 0, nr_bytes); 765 end_that_request_last(rq, 0); 766 } else { 767 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, 768 ret); 769 break; 770 } 771 } 772 773 return rq; 774 } 775 776 EXPORT_SYMBOL(elv_next_request); 777 778 void elv_dequeue_request(request_queue_t *q, struct request *rq) 779 { 780 BUG_ON(list_empty(&rq->queuelist)); 781 BUG_ON(ELV_ON_HASH(rq)); 782 783 list_del_init(&rq->queuelist); 784 785 /* 786 * the time frame between a request being removed from the lists 787 * and to it is freed is accounted as io that is in progress at 788 * the driver side. 789 */ 790 if (blk_account_rq(rq)) 791 q->in_flight++; 792 } 793 794 EXPORT_SYMBOL(elv_dequeue_request); 795 796 int elv_queue_empty(request_queue_t *q) 797 { 798 elevator_t *e = q->elevator; 799 800 if (!list_empty(&q->queue_head)) 801 return 0; 802 803 if (e->ops->elevator_queue_empty_fn) 804 return e->ops->elevator_queue_empty_fn(q); 805 806 return 1; 807 } 808 809 EXPORT_SYMBOL(elv_queue_empty); 810 811 struct request *elv_latter_request(request_queue_t *q, struct request *rq) 812 { 813 elevator_t *e = q->elevator; 814 815 if (e->ops->elevator_latter_req_fn) 816 return e->ops->elevator_latter_req_fn(q, rq); 817 return NULL; 818 } 819 820 struct request *elv_former_request(request_queue_t *q, struct request *rq) 821 { 822 elevator_t *e = q->elevator; 823 824 if (e->ops->elevator_former_req_fn) 825 return e->ops->elevator_former_req_fn(q, rq); 826 return NULL; 827 } 828 829 int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask) 830 { 831 elevator_t *e = q->elevator; 832 833 if (e->ops->elevator_set_req_fn) 834 return e->ops->elevator_set_req_fn(q, rq, gfp_mask); 835 836 rq->elevator_private = NULL; 837 return 0; 838 } 839 840 void elv_put_request(request_queue_t *q, struct request *rq) 841 { 842 elevator_t *e = q->elevator; 843 844 if (e->ops->elevator_put_req_fn) 845 e->ops->elevator_put_req_fn(rq); 846 } 847 848 int elv_may_queue(request_queue_t *q, int rw) 849 { 850 elevator_t *e = q->elevator; 851 852 if (e->ops->elevator_may_queue_fn) 853 return e->ops->elevator_may_queue_fn(q, rw); 854 855 return ELV_MQUEUE_MAY; 856 } 857 858 void elv_completed_request(request_queue_t *q, struct request *rq) 859 { 860 elevator_t *e = q->elevator; 861 862 /* 863 * request is released from the driver, io must be done 864 */ 865 if (blk_account_rq(rq)) { 866 q->in_flight--; 867 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) 868 e->ops->elevator_completed_req_fn(q, rq); 869 } 870 871 /* 872 * Check if the queue is waiting for fs requests to be 873 * drained for flush sequence. 874 */ 875 if (unlikely(q->ordseq)) { 876 struct request *first_rq = list_entry_rq(q->queue_head.next); 877 if (q->in_flight == 0 && 878 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && 879 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { 880 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); 881 q->request_fn(q); 882 } 883 } 884 } 885 886 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) 887 888 static ssize_t 889 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 890 { 891 elevator_t *e = container_of(kobj, elevator_t, kobj); 892 struct elv_fs_entry *entry = to_elv(attr); 893 ssize_t error; 894 895 if (!entry->show) 896 return -EIO; 897 898 mutex_lock(&e->sysfs_lock); 899 error = e->ops ? entry->show(e, page) : -ENOENT; 900 mutex_unlock(&e->sysfs_lock); 901 return error; 902 } 903 904 static ssize_t 905 elv_attr_store(struct kobject *kobj, struct attribute *attr, 906 const char *page, size_t length) 907 { 908 elevator_t *e = container_of(kobj, elevator_t, kobj); 909 struct elv_fs_entry *entry = to_elv(attr); 910 ssize_t error; 911 912 if (!entry->store) 913 return -EIO; 914 915 mutex_lock(&e->sysfs_lock); 916 error = e->ops ? entry->store(e, page, length) : -ENOENT; 917 mutex_unlock(&e->sysfs_lock); 918 return error; 919 } 920 921 static struct sysfs_ops elv_sysfs_ops = { 922 .show = elv_attr_show, 923 .store = elv_attr_store, 924 }; 925 926 static struct kobj_type elv_ktype = { 927 .sysfs_ops = &elv_sysfs_ops, 928 .release = elevator_release, 929 }; 930 931 int elv_register_queue(struct request_queue *q) 932 { 933 elevator_t *e = q->elevator; 934 int error; 935 936 e->kobj.parent = &q->kobj; 937 938 error = kobject_add(&e->kobj); 939 if (!error) { 940 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; 941 if (attr) { 942 while (attr->attr.name) { 943 if (sysfs_create_file(&e->kobj, &attr->attr)) 944 break; 945 attr++; 946 } 947 } 948 kobject_uevent(&e->kobj, KOBJ_ADD); 949 } 950 return error; 951 } 952 953 static void __elv_unregister_queue(elevator_t *e) 954 { 955 kobject_uevent(&e->kobj, KOBJ_REMOVE); 956 kobject_del(&e->kobj); 957 } 958 959 void elv_unregister_queue(struct request_queue *q) 960 { 961 if (q) 962 __elv_unregister_queue(q->elevator); 963 } 964 965 int elv_register(struct elevator_type *e) 966 { 967 char *def = ""; 968 969 spin_lock(&elv_list_lock); 970 BUG_ON(elevator_find(e->elevator_name)); 971 list_add_tail(&e->list, &elv_list); 972 spin_unlock(&elv_list_lock); 973 974 if (!strcmp(e->elevator_name, chosen_elevator) || 975 (!*chosen_elevator && 976 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) 977 def = " (default)"; 978 979 printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, def); 980 return 0; 981 } 982 EXPORT_SYMBOL_GPL(elv_register); 983 984 void elv_unregister(struct elevator_type *e) 985 { 986 struct task_struct *g, *p; 987 988 /* 989 * Iterate every thread in the process to remove the io contexts. 990 */ 991 if (e->ops.trim) { 992 read_lock(&tasklist_lock); 993 do_each_thread(g, p) { 994 task_lock(p); 995 if (p->io_context) 996 e->ops.trim(p->io_context); 997 task_unlock(p); 998 } while_each_thread(g, p); 999 read_unlock(&tasklist_lock); 1000 } 1001 1002 spin_lock(&elv_list_lock); 1003 list_del_init(&e->list); 1004 spin_unlock(&elv_list_lock); 1005 } 1006 EXPORT_SYMBOL_GPL(elv_unregister); 1007 1008 /* 1009 * switch to new_e io scheduler. be careful not to introduce deadlocks - 1010 * we don't free the old io scheduler, before we have allocated what we 1011 * need for the new one. this way we have a chance of going back to the old 1012 * one, if the new one fails init for some reason. 1013 */ 1014 static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) 1015 { 1016 elevator_t *old_elevator, *e; 1017 void *data; 1018 1019 /* 1020 * Allocate new elevator 1021 */ 1022 e = elevator_alloc(q, new_e); 1023 if (!e) 1024 return 0; 1025 1026 data = elevator_init_queue(q, e); 1027 if (!data) { 1028 kobject_put(&e->kobj); 1029 return 0; 1030 } 1031 1032 /* 1033 * Turn on BYPASS and drain all requests w/ elevator private data 1034 */ 1035 spin_lock_irq(q->queue_lock); 1036 1037 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1038 1039 elv_drain_elevator(q); 1040 1041 while (q->rq.elvpriv) { 1042 blk_remove_plug(q); 1043 q->request_fn(q); 1044 spin_unlock_irq(q->queue_lock); 1045 msleep(10); 1046 spin_lock_irq(q->queue_lock); 1047 elv_drain_elevator(q); 1048 } 1049 1050 /* 1051 * Remember old elevator. 1052 */ 1053 old_elevator = q->elevator; 1054 1055 /* 1056 * attach and start new elevator 1057 */ 1058 elevator_attach(q, e, data); 1059 1060 spin_unlock_irq(q->queue_lock); 1061 1062 __elv_unregister_queue(old_elevator); 1063 1064 if (elv_register_queue(q)) 1065 goto fail_register; 1066 1067 /* 1068 * finally exit old elevator and turn off BYPASS. 1069 */ 1070 elevator_exit(old_elevator); 1071 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1072 return 1; 1073 1074 fail_register: 1075 /* 1076 * switch failed, exit the new io scheduler and reattach the old 1077 * one again (along with re-adding the sysfs dir) 1078 */ 1079 elevator_exit(e); 1080 q->elevator = old_elevator; 1081 elv_register_queue(q); 1082 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1083 return 0; 1084 } 1085 1086 ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) 1087 { 1088 char elevator_name[ELV_NAME_MAX]; 1089 size_t len; 1090 struct elevator_type *e; 1091 1092 elevator_name[sizeof(elevator_name) - 1] = '\0'; 1093 strncpy(elevator_name, name, sizeof(elevator_name) - 1); 1094 len = strlen(elevator_name); 1095 1096 if (len && elevator_name[len - 1] == '\n') 1097 elevator_name[len - 1] = '\0'; 1098 1099 e = elevator_get(elevator_name); 1100 if (!e) { 1101 printk(KERN_ERR "elevator: type %s not found\n", elevator_name); 1102 return -EINVAL; 1103 } 1104 1105 if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { 1106 elevator_put(e); 1107 return count; 1108 } 1109 1110 if (!elevator_switch(q, e)) 1111 printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); 1112 return count; 1113 } 1114 1115 ssize_t elv_iosched_show(request_queue_t *q, char *name) 1116 { 1117 elevator_t *e = q->elevator; 1118 struct elevator_type *elv = e->elevator_type; 1119 struct list_head *entry; 1120 int len = 0; 1121 1122 spin_lock(&elv_list_lock); 1123 list_for_each(entry, &elv_list) { 1124 struct elevator_type *__e; 1125 1126 __e = list_entry(entry, struct elevator_type, list); 1127 if (!strcmp(elv->elevator_name, __e->elevator_name)) 1128 len += sprintf(name+len, "[%s] ", elv->elevator_name); 1129 else 1130 len += sprintf(name+len, "%s ", __e->elevator_name); 1131 } 1132 spin_unlock(&elv_list_lock); 1133 1134 len += sprintf(len+name, "\n"); 1135 return len; 1136 } 1137 1138 struct request *elv_rb_former_request(request_queue_t *q, struct request *rq) 1139 { 1140 struct rb_node *rbprev = rb_prev(&rq->rb_node); 1141 1142 if (rbprev) 1143 return rb_entry_rq(rbprev); 1144 1145 return NULL; 1146 } 1147 1148 EXPORT_SYMBOL(elv_rb_former_request); 1149 1150 struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq) 1151 { 1152 struct rb_node *rbnext = rb_next(&rq->rb_node); 1153 1154 if (rbnext) 1155 return rb_entry_rq(rbnext); 1156 1157 return NULL; 1158 } 1159 1160 EXPORT_SYMBOL(elv_rb_latter_request); 1161