xref: /openbmc/linux/block/elevator.c (revision aac5987a)
1 /*
2  *  Block device elevator/IO-scheduler.
3  *
4  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5  *
6  * 30042000 Jens Axboe <axboe@kernel.dk> :
7  *
8  * Split the elevator a bit so that it is possible to choose a different
9  * one or even write a new "plug in". There are three pieces:
10  * - elevator_fn, inserts a new request in the queue list
11  * - elevator_merge_fn, decides whether a new buffer can be merged with
12  *   an existing request
13  * - elevator_dequeue_fn, called when a request is taken off the active list
14  *
15  * 20082000 Dave Jones <davej@suse.de> :
16  * Removed tests for max-bomb-segments, which was breaking elvtune
17  *  when run without -bN
18  *
19  * Jens:
20  * - Rework again to work with bio instead of buffer_heads
21  * - loose bi_dev comparisons, partition handling is right now
22  * - completely modularize elevator setup and teardown
23  *
24  */
25 #include <linux/kernel.h>
26 #include <linux/fs.h>
27 #include <linux/blkdev.h>
28 #include <linux/elevator.h>
29 #include <linux/bio.h>
30 #include <linux/module.h>
31 #include <linux/slab.h>
32 #include <linux/init.h>
33 #include <linux/compiler.h>
34 #include <linux/blktrace_api.h>
35 #include <linux/hash.h>
36 #include <linux/uaccess.h>
37 #include <linux/pm_runtime.h>
38 #include <linux/blk-cgroup.h>
39 
40 #include <trace/events/block.h>
41 
42 #include "blk.h"
43 #include "blk-mq-sched.h"
44 
45 static DEFINE_SPINLOCK(elv_list_lock);
46 static LIST_HEAD(elv_list);
47 
48 /*
49  * Merge hash stuff.
50  */
51 #define rq_hash_key(rq)		(blk_rq_pos(rq) + blk_rq_sectors(rq))
52 
53 /*
54  * Query io scheduler to see if the current process issuing bio may be
55  * merged with rq.
56  */
57 static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
58 {
59 	struct request_queue *q = rq->q;
60 	struct elevator_queue *e = q->elevator;
61 
62 	if (e->uses_mq && e->type->ops.mq.allow_merge)
63 		return e->type->ops.mq.allow_merge(q, rq, bio);
64 	else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn)
65 		return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio);
66 
67 	return 1;
68 }
69 
70 /*
71  * can we safely merge with this request?
72  */
73 bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
74 {
75 	if (!blk_rq_merge_ok(rq, bio))
76 		return false;
77 
78 	if (!elv_iosched_allow_bio_merge(rq, bio))
79 		return false;
80 
81 	return true;
82 }
83 EXPORT_SYMBOL(elv_bio_merge_ok);
84 
85 static struct elevator_type *elevator_find(const char *name)
86 {
87 	struct elevator_type *e;
88 
89 	list_for_each_entry(e, &elv_list, list) {
90 		if (!strcmp(e->elevator_name, name))
91 			return e;
92 	}
93 
94 	return NULL;
95 }
96 
97 static void elevator_put(struct elevator_type *e)
98 {
99 	module_put(e->elevator_owner);
100 }
101 
102 static struct elevator_type *elevator_get(const char *name, bool try_loading)
103 {
104 	struct elevator_type *e;
105 
106 	spin_lock(&elv_list_lock);
107 
108 	e = elevator_find(name);
109 	if (!e && try_loading) {
110 		spin_unlock(&elv_list_lock);
111 		request_module("%s-iosched", name);
112 		spin_lock(&elv_list_lock);
113 		e = elevator_find(name);
114 	}
115 
116 	if (e && !try_module_get(e->elevator_owner))
117 		e = NULL;
118 
119 	spin_unlock(&elv_list_lock);
120 
121 	return e;
122 }
123 
124 static char chosen_elevator[ELV_NAME_MAX];
125 
126 static int __init elevator_setup(char *str)
127 {
128 	/*
129 	 * Be backwards-compatible with previous kernels, so users
130 	 * won't get the wrong elevator.
131 	 */
132 	strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
133 	return 1;
134 }
135 
136 __setup("elevator=", elevator_setup);
137 
138 /* called during boot to load the elevator chosen by the elevator param */
139 void __init load_default_elevator_module(void)
140 {
141 	struct elevator_type *e;
142 
143 	if (!chosen_elevator[0])
144 		return;
145 
146 	spin_lock(&elv_list_lock);
147 	e = elevator_find(chosen_elevator);
148 	spin_unlock(&elv_list_lock);
149 
150 	if (!e)
151 		request_module("%s-iosched", chosen_elevator);
152 }
153 
154 static struct kobj_type elv_ktype;
155 
156 struct elevator_queue *elevator_alloc(struct request_queue *q,
157 				  struct elevator_type *e)
158 {
159 	struct elevator_queue *eq;
160 
161 	eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
162 	if (unlikely(!eq))
163 		return NULL;
164 
165 	eq->type = e;
166 	kobject_init(&eq->kobj, &elv_ktype);
167 	mutex_init(&eq->sysfs_lock);
168 	hash_init(eq->hash);
169 	eq->uses_mq = e->uses_mq;
170 
171 	return eq;
172 }
173 EXPORT_SYMBOL(elevator_alloc);
174 
175 static void elevator_release(struct kobject *kobj)
176 {
177 	struct elevator_queue *e;
178 
179 	e = container_of(kobj, struct elevator_queue, kobj);
180 	elevator_put(e->type);
181 	kfree(e);
182 }
183 
184 int elevator_init(struct request_queue *q, char *name)
185 {
186 	struct elevator_type *e = NULL;
187 	int err;
188 
189 	/*
190 	 * q->sysfs_lock must be held to provide mutual exclusion between
191 	 * elevator_switch() and here.
192 	 */
193 	lockdep_assert_held(&q->sysfs_lock);
194 
195 	if (unlikely(q->elevator))
196 		return 0;
197 
198 	INIT_LIST_HEAD(&q->queue_head);
199 	q->last_merge = NULL;
200 	q->end_sector = 0;
201 	q->boundary_rq = NULL;
202 
203 	if (name) {
204 		e = elevator_get(name, true);
205 		if (!e)
206 			return -EINVAL;
207 	}
208 
209 	/*
210 	 * Use the default elevator specified by config boot param for
211 	 * non-mq devices, or by config option. Don't try to load modules
212 	 * as we could be running off async and request_module() isn't
213 	 * allowed from async.
214 	 */
215 	if (!e && !q->mq_ops && *chosen_elevator) {
216 		e = elevator_get(chosen_elevator, false);
217 		if (!e)
218 			printk(KERN_ERR "I/O scheduler %s not found\n",
219 							chosen_elevator);
220 	}
221 
222 	if (!e) {
223 		/*
224 		 * For blk-mq devices, we default to using mq-deadline,
225 		 * if available, for single queue devices. If deadline
226 		 * isn't available OR we have multiple queues, default
227 		 * to "none".
228 		 */
229 		if (q->mq_ops) {
230 			if (q->nr_hw_queues == 1)
231 				e = elevator_get("mq-deadline", false);
232 			if (!e)
233 				return 0;
234 		} else
235 			e = elevator_get(CONFIG_DEFAULT_IOSCHED, false);
236 
237 		if (!e) {
238 			printk(KERN_ERR
239 				"Default I/O scheduler not found. " \
240 				"Using noop.\n");
241 			e = elevator_get("noop", false);
242 		}
243 	}
244 
245 	if (e->uses_mq) {
246 		err = blk_mq_sched_setup(q);
247 		if (!err)
248 			err = e->ops.mq.init_sched(q, e);
249 	} else
250 		err = e->ops.sq.elevator_init_fn(q, e);
251 	if (err) {
252 		if (e->uses_mq)
253 			blk_mq_sched_teardown(q);
254 		elevator_put(e);
255 	}
256 	return err;
257 }
258 EXPORT_SYMBOL(elevator_init);
259 
260 void elevator_exit(struct elevator_queue *e)
261 {
262 	mutex_lock(&e->sysfs_lock);
263 	if (e->uses_mq && e->type->ops.mq.exit_sched)
264 		e->type->ops.mq.exit_sched(e);
265 	else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn)
266 		e->type->ops.sq.elevator_exit_fn(e);
267 	mutex_unlock(&e->sysfs_lock);
268 
269 	kobject_put(&e->kobj);
270 }
271 EXPORT_SYMBOL(elevator_exit);
272 
273 static inline void __elv_rqhash_del(struct request *rq)
274 {
275 	hash_del(&rq->hash);
276 	rq->rq_flags &= ~RQF_HASHED;
277 }
278 
279 void elv_rqhash_del(struct request_queue *q, struct request *rq)
280 {
281 	if (ELV_ON_HASH(rq))
282 		__elv_rqhash_del(rq);
283 }
284 EXPORT_SYMBOL_GPL(elv_rqhash_del);
285 
286 void elv_rqhash_add(struct request_queue *q, struct request *rq)
287 {
288 	struct elevator_queue *e = q->elevator;
289 
290 	BUG_ON(ELV_ON_HASH(rq));
291 	hash_add(e->hash, &rq->hash, rq_hash_key(rq));
292 	rq->rq_flags |= RQF_HASHED;
293 }
294 EXPORT_SYMBOL_GPL(elv_rqhash_add);
295 
296 void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
297 {
298 	__elv_rqhash_del(rq);
299 	elv_rqhash_add(q, rq);
300 }
301 
302 struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
303 {
304 	struct elevator_queue *e = q->elevator;
305 	struct hlist_node *next;
306 	struct request *rq;
307 
308 	hash_for_each_possible_safe(e->hash, rq, next, hash, offset) {
309 		BUG_ON(!ELV_ON_HASH(rq));
310 
311 		if (unlikely(!rq_mergeable(rq))) {
312 			__elv_rqhash_del(rq);
313 			continue;
314 		}
315 
316 		if (rq_hash_key(rq) == offset)
317 			return rq;
318 	}
319 
320 	return NULL;
321 }
322 
323 /*
324  * RB-tree support functions for inserting/lookup/removal of requests
325  * in a sorted RB tree.
326  */
327 void elv_rb_add(struct rb_root *root, struct request *rq)
328 {
329 	struct rb_node **p = &root->rb_node;
330 	struct rb_node *parent = NULL;
331 	struct request *__rq;
332 
333 	while (*p) {
334 		parent = *p;
335 		__rq = rb_entry(parent, struct request, rb_node);
336 
337 		if (blk_rq_pos(rq) < blk_rq_pos(__rq))
338 			p = &(*p)->rb_left;
339 		else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
340 			p = &(*p)->rb_right;
341 	}
342 
343 	rb_link_node(&rq->rb_node, parent, p);
344 	rb_insert_color(&rq->rb_node, root);
345 }
346 EXPORT_SYMBOL(elv_rb_add);
347 
348 void elv_rb_del(struct rb_root *root, struct request *rq)
349 {
350 	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
351 	rb_erase(&rq->rb_node, root);
352 	RB_CLEAR_NODE(&rq->rb_node);
353 }
354 EXPORT_SYMBOL(elv_rb_del);
355 
356 struct request *elv_rb_find(struct rb_root *root, sector_t sector)
357 {
358 	struct rb_node *n = root->rb_node;
359 	struct request *rq;
360 
361 	while (n) {
362 		rq = rb_entry(n, struct request, rb_node);
363 
364 		if (sector < blk_rq_pos(rq))
365 			n = n->rb_left;
366 		else if (sector > blk_rq_pos(rq))
367 			n = n->rb_right;
368 		else
369 			return rq;
370 	}
371 
372 	return NULL;
373 }
374 EXPORT_SYMBOL(elv_rb_find);
375 
376 /*
377  * Insert rq into dispatch queue of q.  Queue lock must be held on
378  * entry.  rq is sort instead into the dispatch queue. To be used by
379  * specific elevators.
380  */
381 void elv_dispatch_sort(struct request_queue *q, struct request *rq)
382 {
383 	sector_t boundary;
384 	struct list_head *entry;
385 
386 	if (q->last_merge == rq)
387 		q->last_merge = NULL;
388 
389 	elv_rqhash_del(q, rq);
390 
391 	q->nr_sorted--;
392 
393 	boundary = q->end_sector;
394 	list_for_each_prev(entry, &q->queue_head) {
395 		struct request *pos = list_entry_rq(entry);
396 
397 		if (req_op(rq) != req_op(pos))
398 			break;
399 		if (rq_data_dir(rq) != rq_data_dir(pos))
400 			break;
401 		if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER))
402 			break;
403 		if (blk_rq_pos(rq) >= boundary) {
404 			if (blk_rq_pos(pos) < boundary)
405 				continue;
406 		} else {
407 			if (blk_rq_pos(pos) >= boundary)
408 				break;
409 		}
410 		if (blk_rq_pos(rq) >= blk_rq_pos(pos))
411 			break;
412 	}
413 
414 	list_add(&rq->queuelist, entry);
415 }
416 EXPORT_SYMBOL(elv_dispatch_sort);
417 
418 /*
419  * Insert rq into dispatch queue of q.  Queue lock must be held on
420  * entry.  rq is added to the back of the dispatch queue. To be used by
421  * specific elevators.
422  */
423 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
424 {
425 	if (q->last_merge == rq)
426 		q->last_merge = NULL;
427 
428 	elv_rqhash_del(q, rq);
429 
430 	q->nr_sorted--;
431 
432 	q->end_sector = rq_end_sector(rq);
433 	q->boundary_rq = rq;
434 	list_add_tail(&rq->queuelist, &q->queue_head);
435 }
436 EXPORT_SYMBOL(elv_dispatch_add_tail);
437 
438 enum elv_merge elv_merge(struct request_queue *q, struct request **req,
439 		struct bio *bio)
440 {
441 	struct elevator_queue *e = q->elevator;
442 	struct request *__rq;
443 
444 	/*
445 	 * Levels of merges:
446 	 * 	nomerges:  No merges at all attempted
447 	 * 	noxmerges: Only simple one-hit cache try
448 	 * 	merges:	   All merge tries attempted
449 	 */
450 	if (blk_queue_nomerges(q) || !bio_mergeable(bio))
451 		return ELEVATOR_NO_MERGE;
452 
453 	/*
454 	 * First try one-hit cache.
455 	 */
456 	if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
457 		enum elv_merge ret = blk_try_merge(q->last_merge, bio);
458 
459 		if (ret != ELEVATOR_NO_MERGE) {
460 			*req = q->last_merge;
461 			return ret;
462 		}
463 	}
464 
465 	if (blk_queue_noxmerges(q))
466 		return ELEVATOR_NO_MERGE;
467 
468 	/*
469 	 * See if our hash lookup can find a potential backmerge.
470 	 */
471 	__rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
472 	if (__rq && elv_bio_merge_ok(__rq, bio)) {
473 		*req = __rq;
474 		return ELEVATOR_BACK_MERGE;
475 	}
476 
477 	if (e->uses_mq && e->type->ops.mq.request_merge)
478 		return e->type->ops.mq.request_merge(q, req, bio);
479 	else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn)
480 		return e->type->ops.sq.elevator_merge_fn(q, req, bio);
481 
482 	return ELEVATOR_NO_MERGE;
483 }
484 
485 /*
486  * Attempt to do an insertion back merge. Only check for the case where
487  * we can append 'rq' to an existing request, so we can throw 'rq' away
488  * afterwards.
489  *
490  * Returns true if we merged, false otherwise
491  */
492 bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
493 {
494 	struct request *__rq;
495 	bool ret;
496 
497 	if (blk_queue_nomerges(q))
498 		return false;
499 
500 	/*
501 	 * First try one-hit cache.
502 	 */
503 	if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
504 		return true;
505 
506 	if (blk_queue_noxmerges(q))
507 		return false;
508 
509 	ret = false;
510 	/*
511 	 * See if our hash lookup can find a potential backmerge.
512 	 */
513 	while (1) {
514 		__rq = elv_rqhash_find(q, blk_rq_pos(rq));
515 		if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
516 			break;
517 
518 		/* The merged request could be merged with others, try again */
519 		ret = true;
520 		rq = __rq;
521 	}
522 
523 	return ret;
524 }
525 
526 void elv_merged_request(struct request_queue *q, struct request *rq,
527 		enum elv_merge type)
528 {
529 	struct elevator_queue *e = q->elevator;
530 
531 	if (e->uses_mq && e->type->ops.mq.request_merged)
532 		e->type->ops.mq.request_merged(q, rq, type);
533 	else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn)
534 		e->type->ops.sq.elevator_merged_fn(q, rq, type);
535 
536 	if (type == ELEVATOR_BACK_MERGE)
537 		elv_rqhash_reposition(q, rq);
538 
539 	q->last_merge = rq;
540 }
541 
542 void elv_merge_requests(struct request_queue *q, struct request *rq,
543 			     struct request *next)
544 {
545 	struct elevator_queue *e = q->elevator;
546 	bool next_sorted = false;
547 
548 	if (e->uses_mq && e->type->ops.mq.requests_merged)
549 		e->type->ops.mq.requests_merged(q, rq, next);
550 	else if (e->type->ops.sq.elevator_merge_req_fn) {
551 		next_sorted = (__force bool)(next->rq_flags & RQF_SORTED);
552 		if (next_sorted)
553 			e->type->ops.sq.elevator_merge_req_fn(q, rq, next);
554 	}
555 
556 	elv_rqhash_reposition(q, rq);
557 
558 	if (next_sorted) {
559 		elv_rqhash_del(q, next);
560 		q->nr_sorted--;
561 	}
562 
563 	q->last_merge = rq;
564 }
565 
566 void elv_bio_merged(struct request_queue *q, struct request *rq,
567 			struct bio *bio)
568 {
569 	struct elevator_queue *e = q->elevator;
570 
571 	if (WARN_ON_ONCE(e->uses_mq))
572 		return;
573 
574 	if (e->type->ops.sq.elevator_bio_merged_fn)
575 		e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio);
576 }
577 
578 #ifdef CONFIG_PM
579 static void blk_pm_requeue_request(struct request *rq)
580 {
581 	if (rq->q->dev && !(rq->rq_flags & RQF_PM))
582 		rq->q->nr_pending--;
583 }
584 
585 static void blk_pm_add_request(struct request_queue *q, struct request *rq)
586 {
587 	if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 &&
588 	    (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
589 		pm_request_resume(q->dev);
590 }
591 #else
592 static inline void blk_pm_requeue_request(struct request *rq) {}
593 static inline void blk_pm_add_request(struct request_queue *q,
594 				      struct request *rq)
595 {
596 }
597 #endif
598 
599 void elv_requeue_request(struct request_queue *q, struct request *rq)
600 {
601 	/*
602 	 * it already went through dequeue, we need to decrement the
603 	 * in_flight count again
604 	 */
605 	if (blk_account_rq(rq)) {
606 		q->in_flight[rq_is_sync(rq)]--;
607 		if (rq->rq_flags & RQF_SORTED)
608 			elv_deactivate_rq(q, rq);
609 	}
610 
611 	rq->rq_flags &= ~RQF_STARTED;
612 
613 	blk_pm_requeue_request(rq);
614 
615 	__elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
616 }
617 
618 void elv_drain_elevator(struct request_queue *q)
619 {
620 	struct elevator_queue *e = q->elevator;
621 	static int printed;
622 
623 	if (WARN_ON_ONCE(e->uses_mq))
624 		return;
625 
626 	lockdep_assert_held(q->queue_lock);
627 
628 	while (e->type->ops.sq.elevator_dispatch_fn(q, 1))
629 		;
630 	if (q->nr_sorted && printed++ < 10) {
631 		printk(KERN_ERR "%s: forced dispatching is broken "
632 		       "(nr_sorted=%u), please report this\n",
633 		       q->elevator->type->elevator_name, q->nr_sorted);
634 	}
635 }
636 
637 void __elv_add_request(struct request_queue *q, struct request *rq, int where)
638 {
639 	trace_block_rq_insert(q, rq);
640 
641 	blk_pm_add_request(q, rq);
642 
643 	rq->q = q;
644 
645 	if (rq->rq_flags & RQF_SOFTBARRIER) {
646 		/* barriers are scheduling boundary, update end_sector */
647 		if (!blk_rq_is_passthrough(rq)) {
648 			q->end_sector = rq_end_sector(rq);
649 			q->boundary_rq = rq;
650 		}
651 	} else if (!(rq->rq_flags & RQF_ELVPRIV) &&
652 		    (where == ELEVATOR_INSERT_SORT ||
653 		     where == ELEVATOR_INSERT_SORT_MERGE))
654 		where = ELEVATOR_INSERT_BACK;
655 
656 	switch (where) {
657 	case ELEVATOR_INSERT_REQUEUE:
658 	case ELEVATOR_INSERT_FRONT:
659 		rq->rq_flags |= RQF_SOFTBARRIER;
660 		list_add(&rq->queuelist, &q->queue_head);
661 		break;
662 
663 	case ELEVATOR_INSERT_BACK:
664 		rq->rq_flags |= RQF_SOFTBARRIER;
665 		elv_drain_elevator(q);
666 		list_add_tail(&rq->queuelist, &q->queue_head);
667 		/*
668 		 * We kick the queue here for the following reasons.
669 		 * - The elevator might have returned NULL previously
670 		 *   to delay requests and returned them now.  As the
671 		 *   queue wasn't empty before this request, ll_rw_blk
672 		 *   won't run the queue on return, resulting in hang.
673 		 * - Usually, back inserted requests won't be merged
674 		 *   with anything.  There's no point in delaying queue
675 		 *   processing.
676 		 */
677 		__blk_run_queue(q);
678 		break;
679 
680 	case ELEVATOR_INSERT_SORT_MERGE:
681 		/*
682 		 * If we succeed in merging this request with one in the
683 		 * queue already, we are done - rq has now been freed,
684 		 * so no need to do anything further.
685 		 */
686 		if (elv_attempt_insert_merge(q, rq))
687 			break;
688 	case ELEVATOR_INSERT_SORT:
689 		BUG_ON(blk_rq_is_passthrough(rq));
690 		rq->rq_flags |= RQF_SORTED;
691 		q->nr_sorted++;
692 		if (rq_mergeable(rq)) {
693 			elv_rqhash_add(q, rq);
694 			if (!q->last_merge)
695 				q->last_merge = rq;
696 		}
697 
698 		/*
699 		 * Some ioscheds (cfq) run q->request_fn directly, so
700 		 * rq cannot be accessed after calling
701 		 * elevator_add_req_fn.
702 		 */
703 		q->elevator->type->ops.sq.elevator_add_req_fn(q, rq);
704 		break;
705 
706 	case ELEVATOR_INSERT_FLUSH:
707 		rq->rq_flags |= RQF_SOFTBARRIER;
708 		blk_insert_flush(rq);
709 		break;
710 	default:
711 		printk(KERN_ERR "%s: bad insertion point %d\n",
712 		       __func__, where);
713 		BUG();
714 	}
715 }
716 EXPORT_SYMBOL(__elv_add_request);
717 
718 void elv_add_request(struct request_queue *q, struct request *rq, int where)
719 {
720 	unsigned long flags;
721 
722 	spin_lock_irqsave(q->queue_lock, flags);
723 	__elv_add_request(q, rq, where);
724 	spin_unlock_irqrestore(q->queue_lock, flags);
725 }
726 EXPORT_SYMBOL(elv_add_request);
727 
728 struct request *elv_latter_request(struct request_queue *q, struct request *rq)
729 {
730 	struct elevator_queue *e = q->elevator;
731 
732 	if (e->uses_mq && e->type->ops.mq.next_request)
733 		return e->type->ops.mq.next_request(q, rq);
734 	else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn)
735 		return e->type->ops.sq.elevator_latter_req_fn(q, rq);
736 
737 	return NULL;
738 }
739 
740 struct request *elv_former_request(struct request_queue *q, struct request *rq)
741 {
742 	struct elevator_queue *e = q->elevator;
743 
744 	if (e->uses_mq && e->type->ops.mq.former_request)
745 		return e->type->ops.mq.former_request(q, rq);
746 	if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn)
747 		return e->type->ops.sq.elevator_former_req_fn(q, rq);
748 	return NULL;
749 }
750 
751 int elv_set_request(struct request_queue *q, struct request *rq,
752 		    struct bio *bio, gfp_t gfp_mask)
753 {
754 	struct elevator_queue *e = q->elevator;
755 
756 	if (WARN_ON_ONCE(e->uses_mq))
757 		return 0;
758 
759 	if (e->type->ops.sq.elevator_set_req_fn)
760 		return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask);
761 	return 0;
762 }
763 
764 void elv_put_request(struct request_queue *q, struct request *rq)
765 {
766 	struct elevator_queue *e = q->elevator;
767 
768 	if (WARN_ON_ONCE(e->uses_mq))
769 		return;
770 
771 	if (e->type->ops.sq.elevator_put_req_fn)
772 		e->type->ops.sq.elevator_put_req_fn(rq);
773 }
774 
775 int elv_may_queue(struct request_queue *q, unsigned int op)
776 {
777 	struct elevator_queue *e = q->elevator;
778 
779 	if (WARN_ON_ONCE(e->uses_mq))
780 		return 0;
781 
782 	if (e->type->ops.sq.elevator_may_queue_fn)
783 		return e->type->ops.sq.elevator_may_queue_fn(q, op);
784 
785 	return ELV_MQUEUE_MAY;
786 }
787 
788 void elv_completed_request(struct request_queue *q, struct request *rq)
789 {
790 	struct elevator_queue *e = q->elevator;
791 
792 	if (WARN_ON_ONCE(e->uses_mq))
793 		return;
794 
795 	/*
796 	 * request is released from the driver, io must be done
797 	 */
798 	if (blk_account_rq(rq)) {
799 		q->in_flight[rq_is_sync(rq)]--;
800 		if ((rq->rq_flags & RQF_SORTED) &&
801 		    e->type->ops.sq.elevator_completed_req_fn)
802 			e->type->ops.sq.elevator_completed_req_fn(q, rq);
803 	}
804 }
805 
806 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
807 
808 static ssize_t
809 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
810 {
811 	struct elv_fs_entry *entry = to_elv(attr);
812 	struct elevator_queue *e;
813 	ssize_t error;
814 
815 	if (!entry->show)
816 		return -EIO;
817 
818 	e = container_of(kobj, struct elevator_queue, kobj);
819 	mutex_lock(&e->sysfs_lock);
820 	error = e->type ? entry->show(e, page) : -ENOENT;
821 	mutex_unlock(&e->sysfs_lock);
822 	return error;
823 }
824 
825 static ssize_t
826 elv_attr_store(struct kobject *kobj, struct attribute *attr,
827 	       const char *page, size_t length)
828 {
829 	struct elv_fs_entry *entry = to_elv(attr);
830 	struct elevator_queue *e;
831 	ssize_t error;
832 
833 	if (!entry->store)
834 		return -EIO;
835 
836 	e = container_of(kobj, struct elevator_queue, kobj);
837 	mutex_lock(&e->sysfs_lock);
838 	error = e->type ? entry->store(e, page, length) : -ENOENT;
839 	mutex_unlock(&e->sysfs_lock);
840 	return error;
841 }
842 
843 static const struct sysfs_ops elv_sysfs_ops = {
844 	.show	= elv_attr_show,
845 	.store	= elv_attr_store,
846 };
847 
848 static struct kobj_type elv_ktype = {
849 	.sysfs_ops	= &elv_sysfs_ops,
850 	.release	= elevator_release,
851 };
852 
853 int elv_register_queue(struct request_queue *q)
854 {
855 	struct elevator_queue *e = q->elevator;
856 	int error;
857 
858 	error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
859 	if (!error) {
860 		struct elv_fs_entry *attr = e->type->elevator_attrs;
861 		if (attr) {
862 			while (attr->attr.name) {
863 				if (sysfs_create_file(&e->kobj, &attr->attr))
864 					break;
865 				attr++;
866 			}
867 		}
868 		kobject_uevent(&e->kobj, KOBJ_ADD);
869 		e->registered = 1;
870 		if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn)
871 			e->type->ops.sq.elevator_registered_fn(q);
872 	}
873 	return error;
874 }
875 EXPORT_SYMBOL(elv_register_queue);
876 
877 void elv_unregister_queue(struct request_queue *q)
878 {
879 	if (q) {
880 		struct elevator_queue *e = q->elevator;
881 
882 		kobject_uevent(&e->kobj, KOBJ_REMOVE);
883 		kobject_del(&e->kobj);
884 		e->registered = 0;
885 	}
886 }
887 EXPORT_SYMBOL(elv_unregister_queue);
888 
889 int elv_register(struct elevator_type *e)
890 {
891 	char *def = "";
892 
893 	/* create icq_cache if requested */
894 	if (e->icq_size) {
895 		if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
896 		    WARN_ON(e->icq_align < __alignof__(struct io_cq)))
897 			return -EINVAL;
898 
899 		snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
900 			 "%s_io_cq", e->elevator_name);
901 		e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
902 						 e->icq_align, 0, NULL);
903 		if (!e->icq_cache)
904 			return -ENOMEM;
905 	}
906 
907 	/* register, don't allow duplicate names */
908 	spin_lock(&elv_list_lock);
909 	if (elevator_find(e->elevator_name)) {
910 		spin_unlock(&elv_list_lock);
911 		if (e->icq_cache)
912 			kmem_cache_destroy(e->icq_cache);
913 		return -EBUSY;
914 	}
915 	list_add_tail(&e->list, &elv_list);
916 	spin_unlock(&elv_list_lock);
917 
918 	/* print pretty message */
919 	if (!strcmp(e->elevator_name, chosen_elevator) ||
920 			(!*chosen_elevator &&
921 			 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
922 				def = " (default)";
923 
924 	printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
925 								def);
926 	return 0;
927 }
928 EXPORT_SYMBOL_GPL(elv_register);
929 
930 void elv_unregister(struct elevator_type *e)
931 {
932 	/* unregister */
933 	spin_lock(&elv_list_lock);
934 	list_del_init(&e->list);
935 	spin_unlock(&elv_list_lock);
936 
937 	/*
938 	 * Destroy icq_cache if it exists.  icq's are RCU managed.  Make
939 	 * sure all RCU operations are complete before proceeding.
940 	 */
941 	if (e->icq_cache) {
942 		rcu_barrier();
943 		kmem_cache_destroy(e->icq_cache);
944 		e->icq_cache = NULL;
945 	}
946 }
947 EXPORT_SYMBOL_GPL(elv_unregister);
948 
949 /*
950  * switch to new_e io scheduler. be careful not to introduce deadlocks -
951  * we don't free the old io scheduler, before we have allocated what we
952  * need for the new one. this way we have a chance of going back to the old
953  * one, if the new one fails init for some reason.
954  */
955 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
956 {
957 	struct elevator_queue *old = q->elevator;
958 	bool old_registered = false;
959 	int err;
960 
961 	if (q->mq_ops) {
962 		blk_mq_freeze_queue(q);
963 		blk_mq_quiesce_queue(q);
964 	}
965 
966 	/*
967 	 * Turn on BYPASS and drain all requests w/ elevator private data.
968 	 * Block layer doesn't call into a quiesced elevator - all requests
969 	 * are directly put on the dispatch list without elevator data
970 	 * using INSERT_BACK.  All requests have SOFTBARRIER set and no
971 	 * merge happens either.
972 	 */
973 	if (old) {
974 		old_registered = old->registered;
975 
976 		if (old->uses_mq)
977 			blk_mq_sched_teardown(q);
978 
979 		if (!q->mq_ops)
980 			blk_queue_bypass_start(q);
981 
982 		/* unregister and clear all auxiliary data of the old elevator */
983 		if (old_registered)
984 			elv_unregister_queue(q);
985 
986 		ioc_clear_queue(q);
987 	}
988 
989 	/* allocate, init and register new elevator */
990 	if (new_e) {
991 		if (new_e->uses_mq) {
992 			err = blk_mq_sched_setup(q);
993 			if (!err)
994 				err = new_e->ops.mq.init_sched(q, new_e);
995 		} else
996 			err = new_e->ops.sq.elevator_init_fn(q, new_e);
997 		if (err)
998 			goto fail_init;
999 
1000 		err = elv_register_queue(q);
1001 		if (err)
1002 			goto fail_register;
1003 	} else
1004 		q->elevator = NULL;
1005 
1006 	/* done, kill the old one and finish */
1007 	if (old) {
1008 		elevator_exit(old);
1009 		if (!q->mq_ops)
1010 			blk_queue_bypass_end(q);
1011 	}
1012 
1013 	if (q->mq_ops) {
1014 		blk_mq_unfreeze_queue(q);
1015 		blk_mq_start_stopped_hw_queues(q, true);
1016 	}
1017 
1018 	if (new_e)
1019 		blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
1020 	else
1021 		blk_add_trace_msg(q, "elv switch: none");
1022 
1023 	return 0;
1024 
1025 fail_register:
1026 	if (q->mq_ops)
1027 		blk_mq_sched_teardown(q);
1028 	elevator_exit(q->elevator);
1029 fail_init:
1030 	/* switch failed, restore and re-register old elevator */
1031 	if (old) {
1032 		q->elevator = old;
1033 		elv_register_queue(q);
1034 		if (!q->mq_ops)
1035 			blk_queue_bypass_end(q);
1036 	}
1037 	if (q->mq_ops) {
1038 		blk_mq_unfreeze_queue(q);
1039 		blk_mq_start_stopped_hw_queues(q, true);
1040 	}
1041 
1042 	return err;
1043 }
1044 
1045 /*
1046  * Switch this queue to the given IO scheduler.
1047  */
1048 static int __elevator_change(struct request_queue *q, const char *name)
1049 {
1050 	char elevator_name[ELV_NAME_MAX];
1051 	struct elevator_type *e;
1052 
1053 	/*
1054 	 * Special case for mq, turn off scheduling
1055 	 */
1056 	if (q->mq_ops && !strncmp(name, "none", 4))
1057 		return elevator_switch(q, NULL);
1058 
1059 	strlcpy(elevator_name, name, sizeof(elevator_name));
1060 	e = elevator_get(strstrip(elevator_name), true);
1061 	if (!e) {
1062 		printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
1063 		return -EINVAL;
1064 	}
1065 
1066 	if (q->elevator &&
1067 	    !strcmp(elevator_name, q->elevator->type->elevator_name)) {
1068 		elevator_put(e);
1069 		return 0;
1070 	}
1071 
1072 	if (!e->uses_mq && q->mq_ops) {
1073 		elevator_put(e);
1074 		return -EINVAL;
1075 	}
1076 	if (e->uses_mq && !q->mq_ops) {
1077 		elevator_put(e);
1078 		return -EINVAL;
1079 	}
1080 
1081 	return elevator_switch(q, e);
1082 }
1083 
1084 int elevator_change(struct request_queue *q, const char *name)
1085 {
1086 	int ret;
1087 
1088 	/* Protect q->elevator from elevator_init() */
1089 	mutex_lock(&q->sysfs_lock);
1090 	ret = __elevator_change(q, name);
1091 	mutex_unlock(&q->sysfs_lock);
1092 
1093 	return ret;
1094 }
1095 EXPORT_SYMBOL(elevator_change);
1096 
1097 ssize_t elv_iosched_store(struct request_queue *q, const char *name,
1098 			  size_t count)
1099 {
1100 	int ret;
1101 
1102 	if (!(q->mq_ops || q->request_fn))
1103 		return count;
1104 
1105 	ret = __elevator_change(q, name);
1106 	if (!ret)
1107 		return count;
1108 
1109 	printk(KERN_ERR "elevator: switch to %s failed\n", name);
1110 	return ret;
1111 }
1112 
1113 ssize_t elv_iosched_show(struct request_queue *q, char *name)
1114 {
1115 	struct elevator_queue *e = q->elevator;
1116 	struct elevator_type *elv = NULL;
1117 	struct elevator_type *__e;
1118 	int len = 0;
1119 
1120 	if (!blk_queue_stackable(q))
1121 		return sprintf(name, "none\n");
1122 
1123 	if (!q->elevator)
1124 		len += sprintf(name+len, "[none] ");
1125 	else
1126 		elv = e->type;
1127 
1128 	spin_lock(&elv_list_lock);
1129 	list_for_each_entry(__e, &elv_list, list) {
1130 		if (elv && !strcmp(elv->elevator_name, __e->elevator_name)) {
1131 			len += sprintf(name+len, "[%s] ", elv->elevator_name);
1132 			continue;
1133 		}
1134 		if (__e->uses_mq && q->mq_ops)
1135 			len += sprintf(name+len, "%s ", __e->elevator_name);
1136 		else if (!__e->uses_mq && !q->mq_ops)
1137 			len += sprintf(name+len, "%s ", __e->elevator_name);
1138 	}
1139 	spin_unlock(&elv_list_lock);
1140 
1141 	if (q->mq_ops && q->elevator)
1142 		len += sprintf(name+len, "none");
1143 
1144 	len += sprintf(len+name, "\n");
1145 	return len;
1146 }
1147 
1148 struct request *elv_rb_former_request(struct request_queue *q,
1149 				      struct request *rq)
1150 {
1151 	struct rb_node *rbprev = rb_prev(&rq->rb_node);
1152 
1153 	if (rbprev)
1154 		return rb_entry_rq(rbprev);
1155 
1156 	return NULL;
1157 }
1158 EXPORT_SYMBOL(elv_rb_former_request);
1159 
1160 struct request *elv_rb_latter_request(struct request_queue *q,
1161 				      struct request *rq)
1162 {
1163 	struct rb_node *rbnext = rb_next(&rq->rb_node);
1164 
1165 	if (rbnext)
1166 		return rb_entry_rq(rbnext);
1167 
1168 	return NULL;
1169 }
1170 EXPORT_SYMBOL(elv_rb_latter_request);
1171