xref: /openbmc/linux/block/elevator.c (revision 64c70b1c)
1 /*
2  *  Block device elevator/IO-scheduler.
3  *
4  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5  *
6  * 30042000 Jens Axboe <axboe@kernel.dk> :
7  *
8  * Split the elevator a bit so that it is possible to choose a different
9  * one or even write a new "plug in". There are three pieces:
10  * - elevator_fn, inserts a new request in the queue list
11  * - elevator_merge_fn, decides whether a new buffer can be merged with
12  *   an existing request
13  * - elevator_dequeue_fn, called when a request is taken off the active list
14  *
15  * 20082000 Dave Jones <davej@suse.de> :
16  * Removed tests for max-bomb-segments, which was breaking elvtune
17  *  when run without -bN
18  *
19  * Jens:
20  * - Rework again to work with bio instead of buffer_heads
21  * - loose bi_dev comparisons, partition handling is right now
22  * - completely modularize elevator setup and teardown
23  *
24  */
25 #include <linux/kernel.h>
26 #include <linux/fs.h>
27 #include <linux/blkdev.h>
28 #include <linux/elevator.h>
29 #include <linux/bio.h>
30 #include <linux/module.h>
31 #include <linux/slab.h>
32 #include <linux/init.h>
33 #include <linux/compiler.h>
34 #include <linux/delay.h>
35 #include <linux/blktrace_api.h>
36 #include <linux/hash.h>
37 
38 #include <asm/uaccess.h>
39 
40 static DEFINE_SPINLOCK(elv_list_lock);
41 static LIST_HEAD(elv_list);
42 
43 /*
44  * Merge hash stuff.
45  */
46 static const int elv_hash_shift = 6;
47 #define ELV_HASH_BLOCK(sec)	((sec) >> 3)
48 #define ELV_HASH_FN(sec)	(hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
49 #define ELV_HASH_ENTRIES	(1 << elv_hash_shift)
50 #define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
51 #define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
52 
53 /*
54  * Query io scheduler to see if the current process issuing bio may be
55  * merged with rq.
56  */
57 static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
58 {
59 	request_queue_t *q = rq->q;
60 	elevator_t *e = q->elevator;
61 
62 	if (e->ops->elevator_allow_merge_fn)
63 		return e->ops->elevator_allow_merge_fn(q, rq, bio);
64 
65 	return 1;
66 }
67 
68 /*
69  * can we safely merge with this request?
70  */
71 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
72 {
73 	if (!rq_mergeable(rq))
74 		return 0;
75 
76 	/*
77 	 * different data direction or already started, don't merge
78 	 */
79 	if (bio_data_dir(bio) != rq_data_dir(rq))
80 		return 0;
81 
82 	/*
83 	 * must be same device and not a special request
84 	 */
85 	if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
86 		return 0;
87 
88 	if (!elv_iosched_allow_merge(rq, bio))
89 		return 0;
90 
91 	return 1;
92 }
93 EXPORT_SYMBOL(elv_rq_merge_ok);
94 
95 static inline int elv_try_merge(struct request *__rq, struct bio *bio)
96 {
97 	int ret = ELEVATOR_NO_MERGE;
98 
99 	/*
100 	 * we can merge and sequence is ok, check if it's possible
101 	 */
102 	if (elv_rq_merge_ok(__rq, bio)) {
103 		if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
104 			ret = ELEVATOR_BACK_MERGE;
105 		else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
106 			ret = ELEVATOR_FRONT_MERGE;
107 	}
108 
109 	return ret;
110 }
111 
112 static struct elevator_type *elevator_find(const char *name)
113 {
114 	struct elevator_type *e;
115 
116 	list_for_each_entry(e, &elv_list, list) {
117 		if (!strcmp(e->elevator_name, name))
118 			return e;
119 	}
120 
121 	return NULL;
122 }
123 
124 static void elevator_put(struct elevator_type *e)
125 {
126 	module_put(e->elevator_owner);
127 }
128 
129 static struct elevator_type *elevator_get(const char *name)
130 {
131 	struct elevator_type *e;
132 
133 	spin_lock(&elv_list_lock);
134 
135 	e = elevator_find(name);
136 	if (e && !try_module_get(e->elevator_owner))
137 		e = NULL;
138 
139 	spin_unlock(&elv_list_lock);
140 
141 	return e;
142 }
143 
144 static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq)
145 {
146 	return eq->ops->elevator_init_fn(q);
147 }
148 
149 static void elevator_attach(request_queue_t *q, struct elevator_queue *eq,
150 			   void *data)
151 {
152 	q->elevator = eq;
153 	eq->elevator_data = data;
154 }
155 
156 static char chosen_elevator[16];
157 
158 static int __init elevator_setup(char *str)
159 {
160 	/*
161 	 * Be backwards-compatible with previous kernels, so users
162 	 * won't get the wrong elevator.
163 	 */
164 	if (!strcmp(str, "as"))
165 		strcpy(chosen_elevator, "anticipatory");
166 	else
167 		strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
168 	return 1;
169 }
170 
171 __setup("elevator=", elevator_setup);
172 
173 static struct kobj_type elv_ktype;
174 
175 static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e)
176 {
177 	elevator_t *eq;
178 	int i;
179 
180 	eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node);
181 	if (unlikely(!eq))
182 		goto err;
183 
184 	memset(eq, 0, sizeof(*eq));
185 	eq->ops = &e->ops;
186 	eq->elevator_type = e;
187 	kobject_init(&eq->kobj);
188 	snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
189 	eq->kobj.ktype = &elv_ktype;
190 	mutex_init(&eq->sysfs_lock);
191 
192 	eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
193 					GFP_KERNEL, q->node);
194 	if (!eq->hash)
195 		goto err;
196 
197 	for (i = 0; i < ELV_HASH_ENTRIES; i++)
198 		INIT_HLIST_HEAD(&eq->hash[i]);
199 
200 	return eq;
201 err:
202 	kfree(eq);
203 	elevator_put(e);
204 	return NULL;
205 }
206 
207 static void elevator_release(struct kobject *kobj)
208 {
209 	elevator_t *e = container_of(kobj, elevator_t, kobj);
210 
211 	elevator_put(e->elevator_type);
212 	kfree(e->hash);
213 	kfree(e);
214 }
215 
216 int elevator_init(request_queue_t *q, char *name)
217 {
218 	struct elevator_type *e = NULL;
219 	struct elevator_queue *eq;
220 	int ret = 0;
221 	void *data;
222 
223 	INIT_LIST_HEAD(&q->queue_head);
224 	q->last_merge = NULL;
225 	q->end_sector = 0;
226 	q->boundary_rq = NULL;
227 
228 	if (name && !(e = elevator_get(name)))
229 		return -EINVAL;
230 
231 	if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
232 		printk("I/O scheduler %s not found\n", chosen_elevator);
233 
234 	if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
235 		printk("Default I/O scheduler not found, using no-op\n");
236 		e = elevator_get("noop");
237 	}
238 
239 	eq = elevator_alloc(q, e);
240 	if (!eq)
241 		return -ENOMEM;
242 
243 	data = elevator_init_queue(q, eq);
244 	if (!data) {
245 		kobject_put(&eq->kobj);
246 		return -ENOMEM;
247 	}
248 
249 	elevator_attach(q, eq, data);
250 	return ret;
251 }
252 
253 EXPORT_SYMBOL(elevator_init);
254 
255 void elevator_exit(elevator_t *e)
256 {
257 	mutex_lock(&e->sysfs_lock);
258 	if (e->ops->elevator_exit_fn)
259 		e->ops->elevator_exit_fn(e);
260 	e->ops = NULL;
261 	mutex_unlock(&e->sysfs_lock);
262 
263 	kobject_put(&e->kobj);
264 }
265 
266 EXPORT_SYMBOL(elevator_exit);
267 
268 static void elv_activate_rq(request_queue_t *q, struct request *rq)
269 {
270 	elevator_t *e = q->elevator;
271 
272 	if (e->ops->elevator_activate_req_fn)
273 		e->ops->elevator_activate_req_fn(q, rq);
274 }
275 
276 static void elv_deactivate_rq(request_queue_t *q, struct request *rq)
277 {
278 	elevator_t *e = q->elevator;
279 
280 	if (e->ops->elevator_deactivate_req_fn)
281 		e->ops->elevator_deactivate_req_fn(q, rq);
282 }
283 
284 static inline void __elv_rqhash_del(struct request *rq)
285 {
286 	hlist_del_init(&rq->hash);
287 }
288 
289 static void elv_rqhash_del(request_queue_t *q, struct request *rq)
290 {
291 	if (ELV_ON_HASH(rq))
292 		__elv_rqhash_del(rq);
293 }
294 
295 static void elv_rqhash_add(request_queue_t *q, struct request *rq)
296 {
297 	elevator_t *e = q->elevator;
298 
299 	BUG_ON(ELV_ON_HASH(rq));
300 	hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
301 }
302 
303 static void elv_rqhash_reposition(request_queue_t *q, struct request *rq)
304 {
305 	__elv_rqhash_del(rq);
306 	elv_rqhash_add(q, rq);
307 }
308 
309 static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
310 {
311 	elevator_t *e = q->elevator;
312 	struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
313 	struct hlist_node *entry, *next;
314 	struct request *rq;
315 
316 	hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
317 		BUG_ON(!ELV_ON_HASH(rq));
318 
319 		if (unlikely(!rq_mergeable(rq))) {
320 			__elv_rqhash_del(rq);
321 			continue;
322 		}
323 
324 		if (rq_hash_key(rq) == offset)
325 			return rq;
326 	}
327 
328 	return NULL;
329 }
330 
331 /*
332  * RB-tree support functions for inserting/lookup/removal of requests
333  * in a sorted RB tree.
334  */
335 struct request *elv_rb_add(struct rb_root *root, struct request *rq)
336 {
337 	struct rb_node **p = &root->rb_node;
338 	struct rb_node *parent = NULL;
339 	struct request *__rq;
340 
341 	while (*p) {
342 		parent = *p;
343 		__rq = rb_entry(parent, struct request, rb_node);
344 
345 		if (rq->sector < __rq->sector)
346 			p = &(*p)->rb_left;
347 		else if (rq->sector > __rq->sector)
348 			p = &(*p)->rb_right;
349 		else
350 			return __rq;
351 	}
352 
353 	rb_link_node(&rq->rb_node, parent, p);
354 	rb_insert_color(&rq->rb_node, root);
355 	return NULL;
356 }
357 
358 EXPORT_SYMBOL(elv_rb_add);
359 
360 void elv_rb_del(struct rb_root *root, struct request *rq)
361 {
362 	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
363 	rb_erase(&rq->rb_node, root);
364 	RB_CLEAR_NODE(&rq->rb_node);
365 }
366 
367 EXPORT_SYMBOL(elv_rb_del);
368 
369 struct request *elv_rb_find(struct rb_root *root, sector_t sector)
370 {
371 	struct rb_node *n = root->rb_node;
372 	struct request *rq;
373 
374 	while (n) {
375 		rq = rb_entry(n, struct request, rb_node);
376 
377 		if (sector < rq->sector)
378 			n = n->rb_left;
379 		else if (sector > rq->sector)
380 			n = n->rb_right;
381 		else
382 			return rq;
383 	}
384 
385 	return NULL;
386 }
387 
388 EXPORT_SYMBOL(elv_rb_find);
389 
390 /*
391  * Insert rq into dispatch queue of q.  Queue lock must be held on
392  * entry.  rq is sort insted into the dispatch queue. To be used by
393  * specific elevators.
394  */
395 void elv_dispatch_sort(request_queue_t *q, struct request *rq)
396 {
397 	sector_t boundary;
398 	struct list_head *entry;
399 
400 	if (q->last_merge == rq)
401 		q->last_merge = NULL;
402 
403 	elv_rqhash_del(q, rq);
404 
405 	q->nr_sorted--;
406 
407 	boundary = q->end_sector;
408 
409 	list_for_each_prev(entry, &q->queue_head) {
410 		struct request *pos = list_entry_rq(entry);
411 
412 		if (rq_data_dir(rq) != rq_data_dir(pos))
413 			break;
414 		if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
415 			break;
416 		if (rq->sector >= boundary) {
417 			if (pos->sector < boundary)
418 				continue;
419 		} else {
420 			if (pos->sector >= boundary)
421 				break;
422 		}
423 		if (rq->sector >= pos->sector)
424 			break;
425 	}
426 
427 	list_add(&rq->queuelist, entry);
428 }
429 
430 EXPORT_SYMBOL(elv_dispatch_sort);
431 
432 /*
433  * Insert rq into dispatch queue of q.  Queue lock must be held on
434  * entry.  rq is added to the back of the dispatch queue. To be used by
435  * specific elevators.
436  */
437 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
438 {
439 	if (q->last_merge == rq)
440 		q->last_merge = NULL;
441 
442 	elv_rqhash_del(q, rq);
443 
444 	q->nr_sorted--;
445 
446 	q->end_sector = rq_end_sector(rq);
447 	q->boundary_rq = rq;
448 	list_add_tail(&rq->queuelist, &q->queue_head);
449 }
450 
451 EXPORT_SYMBOL(elv_dispatch_add_tail);
452 
453 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
454 {
455 	elevator_t *e = q->elevator;
456 	struct request *__rq;
457 	int ret;
458 
459 	/*
460 	 * First try one-hit cache.
461 	 */
462 	if (q->last_merge) {
463 		ret = elv_try_merge(q->last_merge, bio);
464 		if (ret != ELEVATOR_NO_MERGE) {
465 			*req = q->last_merge;
466 			return ret;
467 		}
468 	}
469 
470 	/*
471 	 * See if our hash lookup can find a potential backmerge.
472 	 */
473 	__rq = elv_rqhash_find(q, bio->bi_sector);
474 	if (__rq && elv_rq_merge_ok(__rq, bio)) {
475 		*req = __rq;
476 		return ELEVATOR_BACK_MERGE;
477 	}
478 
479 	if (e->ops->elevator_merge_fn)
480 		return e->ops->elevator_merge_fn(q, req, bio);
481 
482 	return ELEVATOR_NO_MERGE;
483 }
484 
485 void elv_merged_request(request_queue_t *q, struct request *rq, int type)
486 {
487 	elevator_t *e = q->elevator;
488 
489 	if (e->ops->elevator_merged_fn)
490 		e->ops->elevator_merged_fn(q, rq, type);
491 
492 	if (type == ELEVATOR_BACK_MERGE)
493 		elv_rqhash_reposition(q, rq);
494 
495 	q->last_merge = rq;
496 }
497 
498 void elv_merge_requests(request_queue_t *q, struct request *rq,
499 			     struct request *next)
500 {
501 	elevator_t *e = q->elevator;
502 
503 	if (e->ops->elevator_merge_req_fn)
504 		e->ops->elevator_merge_req_fn(q, rq, next);
505 
506 	elv_rqhash_reposition(q, rq);
507 	elv_rqhash_del(q, next);
508 
509 	q->nr_sorted--;
510 	q->last_merge = rq;
511 }
512 
513 void elv_requeue_request(request_queue_t *q, struct request *rq)
514 {
515 	/*
516 	 * it already went through dequeue, we need to decrement the
517 	 * in_flight count again
518 	 */
519 	if (blk_account_rq(rq)) {
520 		q->in_flight--;
521 		if (blk_sorted_rq(rq))
522 			elv_deactivate_rq(q, rq);
523 	}
524 
525 	rq->cmd_flags &= ~REQ_STARTED;
526 
527 	elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
528 }
529 
530 static void elv_drain_elevator(request_queue_t *q)
531 {
532 	static int printed;
533 	while (q->elevator->ops->elevator_dispatch_fn(q, 1))
534 		;
535 	if (q->nr_sorted == 0)
536 		return;
537 	if (printed++ < 10) {
538 		printk(KERN_ERR "%s: forced dispatching is broken "
539 		       "(nr_sorted=%u), please report this\n",
540 		       q->elevator->elevator_type->elevator_name, q->nr_sorted);
541 	}
542 }
543 
544 void elv_insert(request_queue_t *q, struct request *rq, int where)
545 {
546 	struct list_head *pos;
547 	unsigned ordseq;
548 	int unplug_it = 1;
549 
550 	blk_add_trace_rq(q, rq, BLK_TA_INSERT);
551 
552 	rq->q = q;
553 
554 	switch (where) {
555 	case ELEVATOR_INSERT_FRONT:
556 		rq->cmd_flags |= REQ_SOFTBARRIER;
557 
558 		list_add(&rq->queuelist, &q->queue_head);
559 		break;
560 
561 	case ELEVATOR_INSERT_BACK:
562 		rq->cmd_flags |= REQ_SOFTBARRIER;
563 		elv_drain_elevator(q);
564 		list_add_tail(&rq->queuelist, &q->queue_head);
565 		/*
566 		 * We kick the queue here for the following reasons.
567 		 * - The elevator might have returned NULL previously
568 		 *   to delay requests and returned them now.  As the
569 		 *   queue wasn't empty before this request, ll_rw_blk
570 		 *   won't run the queue on return, resulting in hang.
571 		 * - Usually, back inserted requests won't be merged
572 		 *   with anything.  There's no point in delaying queue
573 		 *   processing.
574 		 */
575 		blk_remove_plug(q);
576 		q->request_fn(q);
577 		break;
578 
579 	case ELEVATOR_INSERT_SORT:
580 		BUG_ON(!blk_fs_request(rq));
581 		rq->cmd_flags |= REQ_SORTED;
582 		q->nr_sorted++;
583 		if (rq_mergeable(rq)) {
584 			elv_rqhash_add(q, rq);
585 			if (!q->last_merge)
586 				q->last_merge = rq;
587 		}
588 
589 		/*
590 		 * Some ioscheds (cfq) run q->request_fn directly, so
591 		 * rq cannot be accessed after calling
592 		 * elevator_add_req_fn.
593 		 */
594 		q->elevator->ops->elevator_add_req_fn(q, rq);
595 		break;
596 
597 	case ELEVATOR_INSERT_REQUEUE:
598 		/*
599 		 * If ordered flush isn't in progress, we do front
600 		 * insertion; otherwise, requests should be requeued
601 		 * in ordseq order.
602 		 */
603 		rq->cmd_flags |= REQ_SOFTBARRIER;
604 
605 		/*
606 		 * Most requeues happen because of a busy condition,
607 		 * don't force unplug of the queue for that case.
608 		 */
609 		unplug_it = 0;
610 
611 		if (q->ordseq == 0) {
612 			list_add(&rq->queuelist, &q->queue_head);
613 			break;
614 		}
615 
616 		ordseq = blk_ordered_req_seq(rq);
617 
618 		list_for_each(pos, &q->queue_head) {
619 			struct request *pos_rq = list_entry_rq(pos);
620 			if (ordseq <= blk_ordered_req_seq(pos_rq))
621 				break;
622 		}
623 
624 		list_add_tail(&rq->queuelist, pos);
625 		break;
626 
627 	default:
628 		printk(KERN_ERR "%s: bad insertion point %d\n",
629 		       __FUNCTION__, where);
630 		BUG();
631 	}
632 
633 	if (unplug_it && blk_queue_plugged(q)) {
634 		int nrq = q->rq.count[READ] + q->rq.count[WRITE]
635 			- q->in_flight;
636 
637 		if (nrq >= q->unplug_thresh)
638 			__generic_unplug_device(q);
639 	}
640 }
641 
642 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
643 		       int plug)
644 {
645 	if (q->ordcolor)
646 		rq->cmd_flags |= REQ_ORDERED_COLOR;
647 
648 	if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
649 		/*
650 		 * toggle ordered color
651 		 */
652 		if (blk_barrier_rq(rq))
653 			q->ordcolor ^= 1;
654 
655 		/*
656 		 * barriers implicitly indicate back insertion
657 		 */
658 		if (where == ELEVATOR_INSERT_SORT)
659 			where = ELEVATOR_INSERT_BACK;
660 
661 		/*
662 		 * this request is scheduling boundary, update
663 		 * end_sector
664 		 */
665 		if (blk_fs_request(rq)) {
666 			q->end_sector = rq_end_sector(rq);
667 			q->boundary_rq = rq;
668 		}
669 	} else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
670 		where = ELEVATOR_INSERT_BACK;
671 
672 	if (plug)
673 		blk_plug_device(q);
674 
675 	elv_insert(q, rq, where);
676 }
677 
678 EXPORT_SYMBOL(__elv_add_request);
679 
680 void elv_add_request(request_queue_t *q, struct request *rq, int where,
681 		     int plug)
682 {
683 	unsigned long flags;
684 
685 	spin_lock_irqsave(q->queue_lock, flags);
686 	__elv_add_request(q, rq, where, plug);
687 	spin_unlock_irqrestore(q->queue_lock, flags);
688 }
689 
690 EXPORT_SYMBOL(elv_add_request);
691 
692 static inline struct request *__elv_next_request(request_queue_t *q)
693 {
694 	struct request *rq;
695 
696 	while (1) {
697 		while (!list_empty(&q->queue_head)) {
698 			rq = list_entry_rq(q->queue_head.next);
699 			if (blk_do_ordered(q, &rq))
700 				return rq;
701 		}
702 
703 		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
704 			return NULL;
705 	}
706 }
707 
708 struct request *elv_next_request(request_queue_t *q)
709 {
710 	struct request *rq;
711 	int ret;
712 
713 	while ((rq = __elv_next_request(q)) != NULL) {
714 		if (!(rq->cmd_flags & REQ_STARTED)) {
715 			/*
716 			 * This is the first time the device driver
717 			 * sees this request (possibly after
718 			 * requeueing).  Notify IO scheduler.
719 			 */
720 			if (blk_sorted_rq(rq))
721 				elv_activate_rq(q, rq);
722 
723 			/*
724 			 * just mark as started even if we don't start
725 			 * it, a request that has been delayed should
726 			 * not be passed by new incoming requests
727 			 */
728 			rq->cmd_flags |= REQ_STARTED;
729 			blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
730 		}
731 
732 		if (!q->boundary_rq || q->boundary_rq == rq) {
733 			q->end_sector = rq_end_sector(rq);
734 			q->boundary_rq = NULL;
735 		}
736 
737 		if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
738 			break;
739 
740 		ret = q->prep_rq_fn(q, rq);
741 		if (ret == BLKPREP_OK) {
742 			break;
743 		} else if (ret == BLKPREP_DEFER) {
744 			/*
745 			 * the request may have been (partially) prepped.
746 			 * we need to keep this request in the front to
747 			 * avoid resource deadlock.  REQ_STARTED will
748 			 * prevent other fs requests from passing this one.
749 			 */
750 			rq = NULL;
751 			break;
752 		} else if (ret == BLKPREP_KILL) {
753 			int nr_bytes = rq->hard_nr_sectors << 9;
754 
755 			if (!nr_bytes)
756 				nr_bytes = rq->data_len;
757 
758 			blkdev_dequeue_request(rq);
759 			rq->cmd_flags |= REQ_QUIET;
760 			end_that_request_chunk(rq, 0, nr_bytes);
761 			end_that_request_last(rq, 0);
762 		} else {
763 			printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
764 								ret);
765 			break;
766 		}
767 	}
768 
769 	return rq;
770 }
771 
772 EXPORT_SYMBOL(elv_next_request);
773 
774 void elv_dequeue_request(request_queue_t *q, struct request *rq)
775 {
776 	BUG_ON(list_empty(&rq->queuelist));
777 	BUG_ON(ELV_ON_HASH(rq));
778 
779 	list_del_init(&rq->queuelist);
780 
781 	/*
782 	 * the time frame between a request being removed from the lists
783 	 * and to it is freed is accounted as io that is in progress at
784 	 * the driver side.
785 	 */
786 	if (blk_account_rq(rq))
787 		q->in_flight++;
788 }
789 
790 EXPORT_SYMBOL(elv_dequeue_request);
791 
792 int elv_queue_empty(request_queue_t *q)
793 {
794 	elevator_t *e = q->elevator;
795 
796 	if (!list_empty(&q->queue_head))
797 		return 0;
798 
799 	if (e->ops->elevator_queue_empty_fn)
800 		return e->ops->elevator_queue_empty_fn(q);
801 
802 	return 1;
803 }
804 
805 EXPORT_SYMBOL(elv_queue_empty);
806 
807 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
808 {
809 	elevator_t *e = q->elevator;
810 
811 	if (e->ops->elevator_latter_req_fn)
812 		return e->ops->elevator_latter_req_fn(q, rq);
813 	return NULL;
814 }
815 
816 struct request *elv_former_request(request_queue_t *q, struct request *rq)
817 {
818 	elevator_t *e = q->elevator;
819 
820 	if (e->ops->elevator_former_req_fn)
821 		return e->ops->elevator_former_req_fn(q, rq);
822 	return NULL;
823 }
824 
825 int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
826 {
827 	elevator_t *e = q->elevator;
828 
829 	if (e->ops->elevator_set_req_fn)
830 		return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
831 
832 	rq->elevator_private = NULL;
833 	return 0;
834 }
835 
836 void elv_put_request(request_queue_t *q, struct request *rq)
837 {
838 	elevator_t *e = q->elevator;
839 
840 	if (e->ops->elevator_put_req_fn)
841 		e->ops->elevator_put_req_fn(rq);
842 }
843 
844 int elv_may_queue(request_queue_t *q, int rw)
845 {
846 	elevator_t *e = q->elevator;
847 
848 	if (e->ops->elevator_may_queue_fn)
849 		return e->ops->elevator_may_queue_fn(q, rw);
850 
851 	return ELV_MQUEUE_MAY;
852 }
853 
854 void elv_completed_request(request_queue_t *q, struct request *rq)
855 {
856 	elevator_t *e = q->elevator;
857 
858 	/*
859 	 * request is released from the driver, io must be done
860 	 */
861 	if (blk_account_rq(rq)) {
862 		q->in_flight--;
863 		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
864 			e->ops->elevator_completed_req_fn(q, rq);
865 	}
866 
867 	/*
868 	 * Check if the queue is waiting for fs requests to be
869 	 * drained for flush sequence.
870 	 */
871 	if (unlikely(q->ordseq)) {
872 		struct request *first_rq = list_entry_rq(q->queue_head.next);
873 		if (q->in_flight == 0 &&
874 		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
875 		    blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
876 			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
877 			q->request_fn(q);
878 		}
879 	}
880 }
881 
882 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
883 
884 static ssize_t
885 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
886 {
887 	elevator_t *e = container_of(kobj, elevator_t, kobj);
888 	struct elv_fs_entry *entry = to_elv(attr);
889 	ssize_t error;
890 
891 	if (!entry->show)
892 		return -EIO;
893 
894 	mutex_lock(&e->sysfs_lock);
895 	error = e->ops ? entry->show(e, page) : -ENOENT;
896 	mutex_unlock(&e->sysfs_lock);
897 	return error;
898 }
899 
900 static ssize_t
901 elv_attr_store(struct kobject *kobj, struct attribute *attr,
902 	       const char *page, size_t length)
903 {
904 	elevator_t *e = container_of(kobj, elevator_t, kobj);
905 	struct elv_fs_entry *entry = to_elv(attr);
906 	ssize_t error;
907 
908 	if (!entry->store)
909 		return -EIO;
910 
911 	mutex_lock(&e->sysfs_lock);
912 	error = e->ops ? entry->store(e, page, length) : -ENOENT;
913 	mutex_unlock(&e->sysfs_lock);
914 	return error;
915 }
916 
917 static struct sysfs_ops elv_sysfs_ops = {
918 	.show	= elv_attr_show,
919 	.store	= elv_attr_store,
920 };
921 
922 static struct kobj_type elv_ktype = {
923 	.sysfs_ops	= &elv_sysfs_ops,
924 	.release	= elevator_release,
925 };
926 
927 int elv_register_queue(struct request_queue *q)
928 {
929 	elevator_t *e = q->elevator;
930 	int error;
931 
932 	e->kobj.parent = &q->kobj;
933 
934 	error = kobject_add(&e->kobj);
935 	if (!error) {
936 		struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
937 		if (attr) {
938 			while (attr->attr.name) {
939 				if (sysfs_create_file(&e->kobj, &attr->attr))
940 					break;
941 				attr++;
942 			}
943 		}
944 		kobject_uevent(&e->kobj, KOBJ_ADD);
945 	}
946 	return error;
947 }
948 
949 static void __elv_unregister_queue(elevator_t *e)
950 {
951 	kobject_uevent(&e->kobj, KOBJ_REMOVE);
952 	kobject_del(&e->kobj);
953 }
954 
955 void elv_unregister_queue(struct request_queue *q)
956 {
957 	if (q)
958 		__elv_unregister_queue(q->elevator);
959 }
960 
961 int elv_register(struct elevator_type *e)
962 {
963 	char *def = "";
964 
965 	spin_lock(&elv_list_lock);
966 	BUG_ON(elevator_find(e->elevator_name));
967 	list_add_tail(&e->list, &elv_list);
968 	spin_unlock(&elv_list_lock);
969 
970 	if (!strcmp(e->elevator_name, chosen_elevator) ||
971 			(!*chosen_elevator &&
972 			 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
973 				def = " (default)";
974 
975 	printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, def);
976 	return 0;
977 }
978 EXPORT_SYMBOL_GPL(elv_register);
979 
980 void elv_unregister(struct elevator_type *e)
981 {
982 	struct task_struct *g, *p;
983 
984 	/*
985 	 * Iterate every thread in the process to remove the io contexts.
986 	 */
987 	if (e->ops.trim) {
988 		read_lock(&tasklist_lock);
989 		do_each_thread(g, p) {
990 			task_lock(p);
991 			if (p->io_context)
992 				e->ops.trim(p->io_context);
993 			task_unlock(p);
994 		} while_each_thread(g, p);
995 		read_unlock(&tasklist_lock);
996 	}
997 
998 	spin_lock(&elv_list_lock);
999 	list_del_init(&e->list);
1000 	spin_unlock(&elv_list_lock);
1001 }
1002 EXPORT_SYMBOL_GPL(elv_unregister);
1003 
1004 /*
1005  * switch to new_e io scheduler. be careful not to introduce deadlocks -
1006  * we don't free the old io scheduler, before we have allocated what we
1007  * need for the new one. this way we have a chance of going back to the old
1008  * one, if the new one fails init for some reason.
1009  */
1010 static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
1011 {
1012 	elevator_t *old_elevator, *e;
1013 	void *data;
1014 
1015 	/*
1016 	 * Allocate new elevator
1017 	 */
1018 	e = elevator_alloc(q, new_e);
1019 	if (!e)
1020 		return 0;
1021 
1022 	data = elevator_init_queue(q, e);
1023 	if (!data) {
1024 		kobject_put(&e->kobj);
1025 		return 0;
1026 	}
1027 
1028 	/*
1029 	 * Turn on BYPASS and drain all requests w/ elevator private data
1030 	 */
1031 	spin_lock_irq(q->queue_lock);
1032 
1033 	set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1034 
1035 	elv_drain_elevator(q);
1036 
1037 	while (q->rq.elvpriv) {
1038 		blk_remove_plug(q);
1039 		q->request_fn(q);
1040 		spin_unlock_irq(q->queue_lock);
1041 		msleep(10);
1042 		spin_lock_irq(q->queue_lock);
1043 		elv_drain_elevator(q);
1044 	}
1045 
1046 	/*
1047 	 * Remember old elevator.
1048 	 */
1049 	old_elevator = q->elevator;
1050 
1051 	/*
1052 	 * attach and start new elevator
1053 	 */
1054 	elevator_attach(q, e, data);
1055 
1056 	spin_unlock_irq(q->queue_lock);
1057 
1058 	__elv_unregister_queue(old_elevator);
1059 
1060 	if (elv_register_queue(q))
1061 		goto fail_register;
1062 
1063 	/*
1064 	 * finally exit old elevator and turn off BYPASS.
1065 	 */
1066 	elevator_exit(old_elevator);
1067 	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1068 	return 1;
1069 
1070 fail_register:
1071 	/*
1072 	 * switch failed, exit the new io scheduler and reattach the old
1073 	 * one again (along with re-adding the sysfs dir)
1074 	 */
1075 	elevator_exit(e);
1076 	q->elevator = old_elevator;
1077 	elv_register_queue(q);
1078 	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1079 	return 0;
1080 }
1081 
1082 ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
1083 {
1084 	char elevator_name[ELV_NAME_MAX];
1085 	size_t len;
1086 	struct elevator_type *e;
1087 
1088 	elevator_name[sizeof(elevator_name) - 1] = '\0';
1089 	strncpy(elevator_name, name, sizeof(elevator_name) - 1);
1090 	len = strlen(elevator_name);
1091 
1092 	if (len && elevator_name[len - 1] == '\n')
1093 		elevator_name[len - 1] = '\0';
1094 
1095 	e = elevator_get(elevator_name);
1096 	if (!e) {
1097 		printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
1098 		return -EINVAL;
1099 	}
1100 
1101 	if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
1102 		elevator_put(e);
1103 		return count;
1104 	}
1105 
1106 	if (!elevator_switch(q, e))
1107 		printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
1108 	return count;
1109 }
1110 
1111 ssize_t elv_iosched_show(request_queue_t *q, char *name)
1112 {
1113 	elevator_t *e = q->elevator;
1114 	struct elevator_type *elv = e->elevator_type;
1115 	struct elevator_type *__e;
1116 	int len = 0;
1117 
1118 	spin_lock(&elv_list_lock);
1119 	list_for_each_entry(__e, &elv_list, list) {
1120 		if (!strcmp(elv->elevator_name, __e->elevator_name))
1121 			len += sprintf(name+len, "[%s] ", elv->elevator_name);
1122 		else
1123 			len += sprintf(name+len, "%s ", __e->elevator_name);
1124 	}
1125 	spin_unlock(&elv_list_lock);
1126 
1127 	len += sprintf(len+name, "\n");
1128 	return len;
1129 }
1130 
1131 struct request *elv_rb_former_request(request_queue_t *q, struct request *rq)
1132 {
1133 	struct rb_node *rbprev = rb_prev(&rq->rb_node);
1134 
1135 	if (rbprev)
1136 		return rb_entry_rq(rbprev);
1137 
1138 	return NULL;
1139 }
1140 
1141 EXPORT_SYMBOL(elv_rb_former_request);
1142 
1143 struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq)
1144 {
1145 	struct rb_node *rbnext = rb_next(&rq->rb_node);
1146 
1147 	if (rbnext)
1148 		return rb_entry_rq(rbnext);
1149 
1150 	return NULL;
1151 }
1152 
1153 EXPORT_SYMBOL(elv_rb_latter_request);
1154