xref: /openbmc/linux/block/elevator.c (revision 8bd1369b)
1 /*
2  *  Block device elevator/IO-scheduler.
3  *
4  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5  *
6  * 30042000 Jens Axboe <axboe@kernel.dk> :
7  *
8  * Split the elevator a bit so that it is possible to choose a different
9  * one or even write a new "plug in". There are three pieces:
10  * - elevator_fn, inserts a new request in the queue list
11  * - elevator_merge_fn, decides whether a new buffer can be merged with
12  *   an existing request
13  * - elevator_dequeue_fn, called when a request is taken off the active list
14  *
15  * 20082000 Dave Jones <davej@suse.de> :
16  * Removed tests for max-bomb-segments, which was breaking elvtune
17  *  when run without -bN
18  *
19  * Jens:
20  * - Rework again to work with bio instead of buffer_heads
21  * - loose bi_dev comparisons, partition handling is right now
22  * - completely modularize elevator setup and teardown
23  *
24  */
25 #include <linux/kernel.h>
26 #include <linux/fs.h>
27 #include <linux/blkdev.h>
28 #include <linux/elevator.h>
29 #include <linux/bio.h>
30 #include <linux/module.h>
31 #include <linux/slab.h>
32 #include <linux/init.h>
33 #include <linux/compiler.h>
34 #include <linux/blktrace_api.h>
35 #include <linux/hash.h>
36 #include <linux/uaccess.h>
37 #include <linux/pm_runtime.h>
38 #include <linux/blk-cgroup.h>
39 
40 #include <trace/events/block.h>
41 
42 #include "blk.h"
43 #include "blk-mq-sched.h"
44 #include "blk-wbt.h"
45 
46 static DEFINE_SPINLOCK(elv_list_lock);
47 static LIST_HEAD(elv_list);
48 
49 /*
50  * Merge hash stuff.
51  */
52 #define rq_hash_key(rq)		(blk_rq_pos(rq) + blk_rq_sectors(rq))
53 
54 /*
55  * Query io scheduler to see if the current process issuing bio may be
56  * merged with rq.
57  */
58 static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
59 {
60 	struct request_queue *q = rq->q;
61 	struct elevator_queue *e = q->elevator;
62 
63 	if (e->uses_mq && e->type->ops.mq.allow_merge)
64 		return e->type->ops.mq.allow_merge(q, rq, bio);
65 	else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn)
66 		return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio);
67 
68 	return 1;
69 }
70 
71 /*
72  * can we safely merge with this request?
73  */
74 bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
75 {
76 	if (!blk_rq_merge_ok(rq, bio))
77 		return false;
78 
79 	if (!elv_iosched_allow_bio_merge(rq, bio))
80 		return false;
81 
82 	return true;
83 }
84 EXPORT_SYMBOL(elv_bio_merge_ok);
85 
86 static bool elevator_match(const struct elevator_type *e, const char *name)
87 {
88 	if (!strcmp(e->elevator_name, name))
89 		return true;
90 	if (e->elevator_alias && !strcmp(e->elevator_alias, name))
91 		return true;
92 
93 	return false;
94 }
95 
96 /*
97  * Return scheduler with name 'name' and with matching 'mq capability
98  */
99 static struct elevator_type *elevator_find(const char *name, bool mq)
100 {
101 	struct elevator_type *e;
102 
103 	list_for_each_entry(e, &elv_list, list) {
104 		if (elevator_match(e, name) && (mq == e->uses_mq))
105 			return e;
106 	}
107 
108 	return NULL;
109 }
110 
111 static void elevator_put(struct elevator_type *e)
112 {
113 	module_put(e->elevator_owner);
114 }
115 
116 static struct elevator_type *elevator_get(struct request_queue *q,
117 					  const char *name, bool try_loading)
118 {
119 	struct elevator_type *e;
120 
121 	spin_lock(&elv_list_lock);
122 
123 	e = elevator_find(name, q->mq_ops != NULL);
124 	if (!e && try_loading) {
125 		spin_unlock(&elv_list_lock);
126 		request_module("%s-iosched", name);
127 		spin_lock(&elv_list_lock);
128 		e = elevator_find(name, q->mq_ops != NULL);
129 	}
130 
131 	if (e && !try_module_get(e->elevator_owner))
132 		e = NULL;
133 
134 	spin_unlock(&elv_list_lock);
135 	return e;
136 }
137 
138 static char chosen_elevator[ELV_NAME_MAX];
139 
140 static int __init elevator_setup(char *str)
141 {
142 	/*
143 	 * Be backwards-compatible with previous kernels, so users
144 	 * won't get the wrong elevator.
145 	 */
146 	strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
147 	return 1;
148 }
149 
150 __setup("elevator=", elevator_setup);
151 
152 /* called during boot to load the elevator chosen by the elevator param */
153 void __init load_default_elevator_module(void)
154 {
155 	struct elevator_type *e;
156 
157 	if (!chosen_elevator[0])
158 		return;
159 
160 	/*
161 	 * Boot parameter is deprecated, we haven't supported that for MQ.
162 	 * Only look for non-mq schedulers from here.
163 	 */
164 	spin_lock(&elv_list_lock);
165 	e = elevator_find(chosen_elevator, false);
166 	spin_unlock(&elv_list_lock);
167 
168 	if (!e)
169 		request_module("%s-iosched", chosen_elevator);
170 }
171 
172 static struct kobj_type elv_ktype;
173 
174 struct elevator_queue *elevator_alloc(struct request_queue *q,
175 				  struct elevator_type *e)
176 {
177 	struct elevator_queue *eq;
178 
179 	eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
180 	if (unlikely(!eq))
181 		return NULL;
182 
183 	eq->type = e;
184 	kobject_init(&eq->kobj, &elv_ktype);
185 	mutex_init(&eq->sysfs_lock);
186 	hash_init(eq->hash);
187 	eq->uses_mq = e->uses_mq;
188 
189 	return eq;
190 }
191 EXPORT_SYMBOL(elevator_alloc);
192 
193 static void elevator_release(struct kobject *kobj)
194 {
195 	struct elevator_queue *e;
196 
197 	e = container_of(kobj, struct elevator_queue, kobj);
198 	elevator_put(e->type);
199 	kfree(e);
200 }
201 
202 /*
203  * Use the default elevator specified by config boot param for non-mq devices,
204  * or by config option.  Don't try to load modules as we could be running off
205  * async and request_module() isn't allowed from async.
206  */
207 int elevator_init(struct request_queue *q)
208 {
209 	struct elevator_type *e = NULL;
210 	int err = 0;
211 
212 	/*
213 	 * q->sysfs_lock must be held to provide mutual exclusion between
214 	 * elevator_switch() and here.
215 	 */
216 	mutex_lock(&q->sysfs_lock);
217 	if (unlikely(q->elevator))
218 		goto out_unlock;
219 
220 	if (*chosen_elevator) {
221 		e = elevator_get(q, chosen_elevator, false);
222 		if (!e)
223 			printk(KERN_ERR "I/O scheduler %s not found\n",
224 							chosen_elevator);
225 	}
226 
227 	if (!e)
228 		e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
229 	if (!e) {
230 		printk(KERN_ERR
231 			"Default I/O scheduler not found. Using noop.\n");
232 		e = elevator_get(q, "noop", false);
233 	}
234 
235 	err = e->ops.sq.elevator_init_fn(q, e);
236 	if (err)
237 		elevator_put(e);
238 out_unlock:
239 	mutex_unlock(&q->sysfs_lock);
240 	return err;
241 }
242 
243 void elevator_exit(struct request_queue *q, struct elevator_queue *e)
244 {
245 	mutex_lock(&e->sysfs_lock);
246 	if (e->uses_mq && e->type->ops.mq.exit_sched)
247 		blk_mq_exit_sched(q, e);
248 	else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn)
249 		e->type->ops.sq.elevator_exit_fn(e);
250 	mutex_unlock(&e->sysfs_lock);
251 
252 	kobject_put(&e->kobj);
253 }
254 
255 static inline void __elv_rqhash_del(struct request *rq)
256 {
257 	hash_del(&rq->hash);
258 	rq->rq_flags &= ~RQF_HASHED;
259 }
260 
261 void elv_rqhash_del(struct request_queue *q, struct request *rq)
262 {
263 	if (ELV_ON_HASH(rq))
264 		__elv_rqhash_del(rq);
265 }
266 EXPORT_SYMBOL_GPL(elv_rqhash_del);
267 
268 void elv_rqhash_add(struct request_queue *q, struct request *rq)
269 {
270 	struct elevator_queue *e = q->elevator;
271 
272 	BUG_ON(ELV_ON_HASH(rq));
273 	hash_add(e->hash, &rq->hash, rq_hash_key(rq));
274 	rq->rq_flags |= RQF_HASHED;
275 }
276 EXPORT_SYMBOL_GPL(elv_rqhash_add);
277 
278 void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
279 {
280 	__elv_rqhash_del(rq);
281 	elv_rqhash_add(q, rq);
282 }
283 
284 struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
285 {
286 	struct elevator_queue *e = q->elevator;
287 	struct hlist_node *next;
288 	struct request *rq;
289 
290 	hash_for_each_possible_safe(e->hash, rq, next, hash, offset) {
291 		BUG_ON(!ELV_ON_HASH(rq));
292 
293 		if (unlikely(!rq_mergeable(rq))) {
294 			__elv_rqhash_del(rq);
295 			continue;
296 		}
297 
298 		if (rq_hash_key(rq) == offset)
299 			return rq;
300 	}
301 
302 	return NULL;
303 }
304 
305 /*
306  * RB-tree support functions for inserting/lookup/removal of requests
307  * in a sorted RB tree.
308  */
309 void elv_rb_add(struct rb_root *root, struct request *rq)
310 {
311 	struct rb_node **p = &root->rb_node;
312 	struct rb_node *parent = NULL;
313 	struct request *__rq;
314 
315 	while (*p) {
316 		parent = *p;
317 		__rq = rb_entry(parent, struct request, rb_node);
318 
319 		if (blk_rq_pos(rq) < blk_rq_pos(__rq))
320 			p = &(*p)->rb_left;
321 		else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
322 			p = &(*p)->rb_right;
323 	}
324 
325 	rb_link_node(&rq->rb_node, parent, p);
326 	rb_insert_color(&rq->rb_node, root);
327 }
328 EXPORT_SYMBOL(elv_rb_add);
329 
330 void elv_rb_del(struct rb_root *root, struct request *rq)
331 {
332 	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
333 	rb_erase(&rq->rb_node, root);
334 	RB_CLEAR_NODE(&rq->rb_node);
335 }
336 EXPORT_SYMBOL(elv_rb_del);
337 
338 struct request *elv_rb_find(struct rb_root *root, sector_t sector)
339 {
340 	struct rb_node *n = root->rb_node;
341 	struct request *rq;
342 
343 	while (n) {
344 		rq = rb_entry(n, struct request, rb_node);
345 
346 		if (sector < blk_rq_pos(rq))
347 			n = n->rb_left;
348 		else if (sector > blk_rq_pos(rq))
349 			n = n->rb_right;
350 		else
351 			return rq;
352 	}
353 
354 	return NULL;
355 }
356 EXPORT_SYMBOL(elv_rb_find);
357 
358 /*
359  * Insert rq into dispatch queue of q.  Queue lock must be held on
360  * entry.  rq is sort instead into the dispatch queue. To be used by
361  * specific elevators.
362  */
363 void elv_dispatch_sort(struct request_queue *q, struct request *rq)
364 {
365 	sector_t boundary;
366 	struct list_head *entry;
367 
368 	if (q->last_merge == rq)
369 		q->last_merge = NULL;
370 
371 	elv_rqhash_del(q, rq);
372 
373 	q->nr_sorted--;
374 
375 	boundary = q->end_sector;
376 	list_for_each_prev(entry, &q->queue_head) {
377 		struct request *pos = list_entry_rq(entry);
378 
379 		if (req_op(rq) != req_op(pos))
380 			break;
381 		if (rq_data_dir(rq) != rq_data_dir(pos))
382 			break;
383 		if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER))
384 			break;
385 		if (blk_rq_pos(rq) >= boundary) {
386 			if (blk_rq_pos(pos) < boundary)
387 				continue;
388 		} else {
389 			if (blk_rq_pos(pos) >= boundary)
390 				break;
391 		}
392 		if (blk_rq_pos(rq) >= blk_rq_pos(pos))
393 			break;
394 	}
395 
396 	list_add(&rq->queuelist, entry);
397 }
398 EXPORT_SYMBOL(elv_dispatch_sort);
399 
400 /*
401  * Insert rq into dispatch queue of q.  Queue lock must be held on
402  * entry.  rq is added to the back of the dispatch queue. To be used by
403  * specific elevators.
404  */
405 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
406 {
407 	if (q->last_merge == rq)
408 		q->last_merge = NULL;
409 
410 	elv_rqhash_del(q, rq);
411 
412 	q->nr_sorted--;
413 
414 	q->end_sector = rq_end_sector(rq);
415 	q->boundary_rq = rq;
416 	list_add_tail(&rq->queuelist, &q->queue_head);
417 }
418 EXPORT_SYMBOL(elv_dispatch_add_tail);
419 
420 enum elv_merge elv_merge(struct request_queue *q, struct request **req,
421 		struct bio *bio)
422 {
423 	struct elevator_queue *e = q->elevator;
424 	struct request *__rq;
425 
426 	/*
427 	 * Levels of merges:
428 	 * 	nomerges:  No merges at all attempted
429 	 * 	noxmerges: Only simple one-hit cache try
430 	 * 	merges:	   All merge tries attempted
431 	 */
432 	if (blk_queue_nomerges(q) || !bio_mergeable(bio))
433 		return ELEVATOR_NO_MERGE;
434 
435 	/*
436 	 * First try one-hit cache.
437 	 */
438 	if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
439 		enum elv_merge ret = blk_try_merge(q->last_merge, bio);
440 
441 		if (ret != ELEVATOR_NO_MERGE) {
442 			*req = q->last_merge;
443 			return ret;
444 		}
445 	}
446 
447 	if (blk_queue_noxmerges(q))
448 		return ELEVATOR_NO_MERGE;
449 
450 	/*
451 	 * See if our hash lookup can find a potential backmerge.
452 	 */
453 	__rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
454 	if (__rq && elv_bio_merge_ok(__rq, bio)) {
455 		*req = __rq;
456 		return ELEVATOR_BACK_MERGE;
457 	}
458 
459 	if (e->uses_mq && e->type->ops.mq.request_merge)
460 		return e->type->ops.mq.request_merge(q, req, bio);
461 	else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn)
462 		return e->type->ops.sq.elevator_merge_fn(q, req, bio);
463 
464 	return ELEVATOR_NO_MERGE;
465 }
466 
467 /*
468  * Attempt to do an insertion back merge. Only check for the case where
469  * we can append 'rq' to an existing request, so we can throw 'rq' away
470  * afterwards.
471  *
472  * Returns true if we merged, false otherwise
473  */
474 bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
475 {
476 	struct request *__rq;
477 	bool ret;
478 
479 	if (blk_queue_nomerges(q))
480 		return false;
481 
482 	/*
483 	 * First try one-hit cache.
484 	 */
485 	if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
486 		return true;
487 
488 	if (blk_queue_noxmerges(q))
489 		return false;
490 
491 	ret = false;
492 	/*
493 	 * See if our hash lookup can find a potential backmerge.
494 	 */
495 	while (1) {
496 		__rq = elv_rqhash_find(q, blk_rq_pos(rq));
497 		if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
498 			break;
499 
500 		/* The merged request could be merged with others, try again */
501 		ret = true;
502 		rq = __rq;
503 	}
504 
505 	return ret;
506 }
507 
508 void elv_merged_request(struct request_queue *q, struct request *rq,
509 		enum elv_merge type)
510 {
511 	struct elevator_queue *e = q->elevator;
512 
513 	if (e->uses_mq && e->type->ops.mq.request_merged)
514 		e->type->ops.mq.request_merged(q, rq, type);
515 	else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn)
516 		e->type->ops.sq.elevator_merged_fn(q, rq, type);
517 
518 	if (type == ELEVATOR_BACK_MERGE)
519 		elv_rqhash_reposition(q, rq);
520 
521 	q->last_merge = rq;
522 }
523 
524 void elv_merge_requests(struct request_queue *q, struct request *rq,
525 			     struct request *next)
526 {
527 	struct elevator_queue *e = q->elevator;
528 	bool next_sorted = false;
529 
530 	if (e->uses_mq && e->type->ops.mq.requests_merged)
531 		e->type->ops.mq.requests_merged(q, rq, next);
532 	else if (e->type->ops.sq.elevator_merge_req_fn) {
533 		next_sorted = (__force bool)(next->rq_flags & RQF_SORTED);
534 		if (next_sorted)
535 			e->type->ops.sq.elevator_merge_req_fn(q, rq, next);
536 	}
537 
538 	elv_rqhash_reposition(q, rq);
539 
540 	if (next_sorted) {
541 		elv_rqhash_del(q, next);
542 		q->nr_sorted--;
543 	}
544 
545 	q->last_merge = rq;
546 }
547 
548 void elv_bio_merged(struct request_queue *q, struct request *rq,
549 			struct bio *bio)
550 {
551 	struct elevator_queue *e = q->elevator;
552 
553 	if (WARN_ON_ONCE(e->uses_mq))
554 		return;
555 
556 	if (e->type->ops.sq.elevator_bio_merged_fn)
557 		e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio);
558 }
559 
560 #ifdef CONFIG_PM
561 static void blk_pm_requeue_request(struct request *rq)
562 {
563 	if (rq->q->dev && !(rq->rq_flags & RQF_PM))
564 		rq->q->nr_pending--;
565 }
566 
567 static void blk_pm_add_request(struct request_queue *q, struct request *rq)
568 {
569 	if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 &&
570 	    (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
571 		pm_request_resume(q->dev);
572 }
573 #else
574 static inline void blk_pm_requeue_request(struct request *rq) {}
575 static inline void blk_pm_add_request(struct request_queue *q,
576 				      struct request *rq)
577 {
578 }
579 #endif
580 
581 void elv_requeue_request(struct request_queue *q, struct request *rq)
582 {
583 	/*
584 	 * it already went through dequeue, we need to decrement the
585 	 * in_flight count again
586 	 */
587 	if (blk_account_rq(rq)) {
588 		q->in_flight[rq_is_sync(rq)]--;
589 		if (rq->rq_flags & RQF_SORTED)
590 			elv_deactivate_rq(q, rq);
591 	}
592 
593 	rq->rq_flags &= ~RQF_STARTED;
594 
595 	blk_pm_requeue_request(rq);
596 
597 	__elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
598 }
599 
600 void elv_drain_elevator(struct request_queue *q)
601 {
602 	struct elevator_queue *e = q->elevator;
603 	static int printed;
604 
605 	if (WARN_ON_ONCE(e->uses_mq))
606 		return;
607 
608 	lockdep_assert_held(q->queue_lock);
609 
610 	while (e->type->ops.sq.elevator_dispatch_fn(q, 1))
611 		;
612 	if (q->nr_sorted && printed++ < 10) {
613 		printk(KERN_ERR "%s: forced dispatching is broken "
614 		       "(nr_sorted=%u), please report this\n",
615 		       q->elevator->type->elevator_name, q->nr_sorted);
616 	}
617 }
618 
619 void __elv_add_request(struct request_queue *q, struct request *rq, int where)
620 {
621 	trace_block_rq_insert(q, rq);
622 
623 	blk_pm_add_request(q, rq);
624 
625 	rq->q = q;
626 
627 	if (rq->rq_flags & RQF_SOFTBARRIER) {
628 		/* barriers are scheduling boundary, update end_sector */
629 		if (!blk_rq_is_passthrough(rq)) {
630 			q->end_sector = rq_end_sector(rq);
631 			q->boundary_rq = rq;
632 		}
633 	} else if (!(rq->rq_flags & RQF_ELVPRIV) &&
634 		    (where == ELEVATOR_INSERT_SORT ||
635 		     where == ELEVATOR_INSERT_SORT_MERGE))
636 		where = ELEVATOR_INSERT_BACK;
637 
638 	switch (where) {
639 	case ELEVATOR_INSERT_REQUEUE:
640 	case ELEVATOR_INSERT_FRONT:
641 		rq->rq_flags |= RQF_SOFTBARRIER;
642 		list_add(&rq->queuelist, &q->queue_head);
643 		break;
644 
645 	case ELEVATOR_INSERT_BACK:
646 		rq->rq_flags |= RQF_SOFTBARRIER;
647 		elv_drain_elevator(q);
648 		list_add_tail(&rq->queuelist, &q->queue_head);
649 		/*
650 		 * We kick the queue here for the following reasons.
651 		 * - The elevator might have returned NULL previously
652 		 *   to delay requests and returned them now.  As the
653 		 *   queue wasn't empty before this request, ll_rw_blk
654 		 *   won't run the queue on return, resulting in hang.
655 		 * - Usually, back inserted requests won't be merged
656 		 *   with anything.  There's no point in delaying queue
657 		 *   processing.
658 		 */
659 		__blk_run_queue(q);
660 		break;
661 
662 	case ELEVATOR_INSERT_SORT_MERGE:
663 		/*
664 		 * If we succeed in merging this request with one in the
665 		 * queue already, we are done - rq has now been freed,
666 		 * so no need to do anything further.
667 		 */
668 		if (elv_attempt_insert_merge(q, rq))
669 			break;
670 		/* fall through */
671 	case ELEVATOR_INSERT_SORT:
672 		BUG_ON(blk_rq_is_passthrough(rq));
673 		rq->rq_flags |= RQF_SORTED;
674 		q->nr_sorted++;
675 		if (rq_mergeable(rq)) {
676 			elv_rqhash_add(q, rq);
677 			if (!q->last_merge)
678 				q->last_merge = rq;
679 		}
680 
681 		/*
682 		 * Some ioscheds (cfq) run q->request_fn directly, so
683 		 * rq cannot be accessed after calling
684 		 * elevator_add_req_fn.
685 		 */
686 		q->elevator->type->ops.sq.elevator_add_req_fn(q, rq);
687 		break;
688 
689 	case ELEVATOR_INSERT_FLUSH:
690 		rq->rq_flags |= RQF_SOFTBARRIER;
691 		blk_insert_flush(rq);
692 		break;
693 	default:
694 		printk(KERN_ERR "%s: bad insertion point %d\n",
695 		       __func__, where);
696 		BUG();
697 	}
698 }
699 EXPORT_SYMBOL(__elv_add_request);
700 
701 void elv_add_request(struct request_queue *q, struct request *rq, int where)
702 {
703 	unsigned long flags;
704 
705 	spin_lock_irqsave(q->queue_lock, flags);
706 	__elv_add_request(q, rq, where);
707 	spin_unlock_irqrestore(q->queue_lock, flags);
708 }
709 EXPORT_SYMBOL(elv_add_request);
710 
711 struct request *elv_latter_request(struct request_queue *q, struct request *rq)
712 {
713 	struct elevator_queue *e = q->elevator;
714 
715 	if (e->uses_mq && e->type->ops.mq.next_request)
716 		return e->type->ops.mq.next_request(q, rq);
717 	else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn)
718 		return e->type->ops.sq.elevator_latter_req_fn(q, rq);
719 
720 	return NULL;
721 }
722 
723 struct request *elv_former_request(struct request_queue *q, struct request *rq)
724 {
725 	struct elevator_queue *e = q->elevator;
726 
727 	if (e->uses_mq && e->type->ops.mq.former_request)
728 		return e->type->ops.mq.former_request(q, rq);
729 	if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn)
730 		return e->type->ops.sq.elevator_former_req_fn(q, rq);
731 	return NULL;
732 }
733 
734 int elv_set_request(struct request_queue *q, struct request *rq,
735 		    struct bio *bio, gfp_t gfp_mask)
736 {
737 	struct elevator_queue *e = q->elevator;
738 
739 	if (WARN_ON_ONCE(e->uses_mq))
740 		return 0;
741 
742 	if (e->type->ops.sq.elevator_set_req_fn)
743 		return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask);
744 	return 0;
745 }
746 
747 void elv_put_request(struct request_queue *q, struct request *rq)
748 {
749 	struct elevator_queue *e = q->elevator;
750 
751 	if (WARN_ON_ONCE(e->uses_mq))
752 		return;
753 
754 	if (e->type->ops.sq.elevator_put_req_fn)
755 		e->type->ops.sq.elevator_put_req_fn(rq);
756 }
757 
758 int elv_may_queue(struct request_queue *q, unsigned int op)
759 {
760 	struct elevator_queue *e = q->elevator;
761 
762 	if (WARN_ON_ONCE(e->uses_mq))
763 		return 0;
764 
765 	if (e->type->ops.sq.elevator_may_queue_fn)
766 		return e->type->ops.sq.elevator_may_queue_fn(q, op);
767 
768 	return ELV_MQUEUE_MAY;
769 }
770 
771 void elv_completed_request(struct request_queue *q, struct request *rq)
772 {
773 	struct elevator_queue *e = q->elevator;
774 
775 	if (WARN_ON_ONCE(e->uses_mq))
776 		return;
777 
778 	/*
779 	 * request is released from the driver, io must be done
780 	 */
781 	if (blk_account_rq(rq)) {
782 		q->in_flight[rq_is_sync(rq)]--;
783 		if ((rq->rq_flags & RQF_SORTED) &&
784 		    e->type->ops.sq.elevator_completed_req_fn)
785 			e->type->ops.sq.elevator_completed_req_fn(q, rq);
786 	}
787 }
788 
789 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
790 
791 static ssize_t
792 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
793 {
794 	struct elv_fs_entry *entry = to_elv(attr);
795 	struct elevator_queue *e;
796 	ssize_t error;
797 
798 	if (!entry->show)
799 		return -EIO;
800 
801 	e = container_of(kobj, struct elevator_queue, kobj);
802 	mutex_lock(&e->sysfs_lock);
803 	error = e->type ? entry->show(e, page) : -ENOENT;
804 	mutex_unlock(&e->sysfs_lock);
805 	return error;
806 }
807 
808 static ssize_t
809 elv_attr_store(struct kobject *kobj, struct attribute *attr,
810 	       const char *page, size_t length)
811 {
812 	struct elv_fs_entry *entry = to_elv(attr);
813 	struct elevator_queue *e;
814 	ssize_t error;
815 
816 	if (!entry->store)
817 		return -EIO;
818 
819 	e = container_of(kobj, struct elevator_queue, kobj);
820 	mutex_lock(&e->sysfs_lock);
821 	error = e->type ? entry->store(e, page, length) : -ENOENT;
822 	mutex_unlock(&e->sysfs_lock);
823 	return error;
824 }
825 
826 static const struct sysfs_ops elv_sysfs_ops = {
827 	.show	= elv_attr_show,
828 	.store	= elv_attr_store,
829 };
830 
831 static struct kobj_type elv_ktype = {
832 	.sysfs_ops	= &elv_sysfs_ops,
833 	.release	= elevator_release,
834 };
835 
836 int elv_register_queue(struct request_queue *q)
837 {
838 	struct elevator_queue *e = q->elevator;
839 	int error;
840 
841 	lockdep_assert_held(&q->sysfs_lock);
842 
843 	error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
844 	if (!error) {
845 		struct elv_fs_entry *attr = e->type->elevator_attrs;
846 		if (attr) {
847 			while (attr->attr.name) {
848 				if (sysfs_create_file(&e->kobj, &attr->attr))
849 					break;
850 				attr++;
851 			}
852 		}
853 		kobject_uevent(&e->kobj, KOBJ_ADD);
854 		e->registered = 1;
855 		if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn)
856 			e->type->ops.sq.elevator_registered_fn(q);
857 	}
858 	return error;
859 }
860 
861 void elv_unregister_queue(struct request_queue *q)
862 {
863 	lockdep_assert_held(&q->sysfs_lock);
864 
865 	if (q) {
866 		struct elevator_queue *e = q->elevator;
867 
868 		kobject_uevent(&e->kobj, KOBJ_REMOVE);
869 		kobject_del(&e->kobj);
870 		e->registered = 0;
871 		/* Re-enable throttling in case elevator disabled it */
872 		wbt_enable_default(q);
873 	}
874 }
875 
876 int elv_register(struct elevator_type *e)
877 {
878 	char *def = "";
879 
880 	/* create icq_cache if requested */
881 	if (e->icq_size) {
882 		if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
883 		    WARN_ON(e->icq_align < __alignof__(struct io_cq)))
884 			return -EINVAL;
885 
886 		snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
887 			 "%s_io_cq", e->elevator_name);
888 		e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
889 						 e->icq_align, 0, NULL);
890 		if (!e->icq_cache)
891 			return -ENOMEM;
892 	}
893 
894 	/* register, don't allow duplicate names */
895 	spin_lock(&elv_list_lock);
896 	if (elevator_find(e->elevator_name, e->uses_mq)) {
897 		spin_unlock(&elv_list_lock);
898 		if (e->icq_cache)
899 			kmem_cache_destroy(e->icq_cache);
900 		return -EBUSY;
901 	}
902 	list_add_tail(&e->list, &elv_list);
903 	spin_unlock(&elv_list_lock);
904 
905 	/* print pretty message */
906 	if (elevator_match(e, chosen_elevator) ||
907 			(!*chosen_elevator &&
908 			 elevator_match(e, CONFIG_DEFAULT_IOSCHED)))
909 				def = " (default)";
910 
911 	printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
912 								def);
913 	return 0;
914 }
915 EXPORT_SYMBOL_GPL(elv_register);
916 
917 void elv_unregister(struct elevator_type *e)
918 {
919 	/* unregister */
920 	spin_lock(&elv_list_lock);
921 	list_del_init(&e->list);
922 	spin_unlock(&elv_list_lock);
923 
924 	/*
925 	 * Destroy icq_cache if it exists.  icq's are RCU managed.  Make
926 	 * sure all RCU operations are complete before proceeding.
927 	 */
928 	if (e->icq_cache) {
929 		rcu_barrier();
930 		kmem_cache_destroy(e->icq_cache);
931 		e->icq_cache = NULL;
932 	}
933 }
934 EXPORT_SYMBOL_GPL(elv_unregister);
935 
936 static int elevator_switch_mq(struct request_queue *q,
937 			      struct elevator_type *new_e)
938 {
939 	int ret;
940 
941 	lockdep_assert_held(&q->sysfs_lock);
942 
943 	blk_mq_freeze_queue(q);
944 	blk_mq_quiesce_queue(q);
945 
946 	if (q->elevator) {
947 		if (q->elevator->registered)
948 			elv_unregister_queue(q);
949 		ioc_clear_queue(q);
950 		elevator_exit(q, q->elevator);
951 	}
952 
953 	ret = blk_mq_init_sched(q, new_e);
954 	if (ret)
955 		goto out;
956 
957 	if (new_e) {
958 		ret = elv_register_queue(q);
959 		if (ret) {
960 			elevator_exit(q, q->elevator);
961 			goto out;
962 		}
963 	}
964 
965 	if (new_e)
966 		blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
967 	else
968 		blk_add_trace_msg(q, "elv switch: none");
969 
970 out:
971 	blk_mq_unquiesce_queue(q);
972 	blk_mq_unfreeze_queue(q);
973 	return ret;
974 }
975 
976 /*
977  * For blk-mq devices, we default to using mq-deadline, if available, for single
978  * queue devices.  If deadline isn't available OR we have multiple queues,
979  * default to "none".
980  */
981 int elevator_init_mq(struct request_queue *q)
982 {
983 	struct elevator_type *e;
984 	int err = 0;
985 
986 	if (q->nr_hw_queues != 1)
987 		return 0;
988 
989 	/*
990 	 * q->sysfs_lock must be held to provide mutual exclusion between
991 	 * elevator_switch() and here.
992 	 */
993 	mutex_lock(&q->sysfs_lock);
994 	if (unlikely(q->elevator))
995 		goto out_unlock;
996 
997 	e = elevator_get(q, "mq-deadline", false);
998 	if (!e)
999 		goto out_unlock;
1000 
1001 	err = blk_mq_init_sched(q, e);
1002 	if (err)
1003 		elevator_put(e);
1004 out_unlock:
1005 	mutex_unlock(&q->sysfs_lock);
1006 	return err;
1007 }
1008 
1009 
1010 /*
1011  * switch to new_e io scheduler. be careful not to introduce deadlocks -
1012  * we don't free the old io scheduler, before we have allocated what we
1013  * need for the new one. this way we have a chance of going back to the old
1014  * one, if the new one fails init for some reason.
1015  */
1016 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
1017 {
1018 	struct elevator_queue *old = q->elevator;
1019 	bool old_registered = false;
1020 	int err;
1021 
1022 	lockdep_assert_held(&q->sysfs_lock);
1023 
1024 	if (q->mq_ops)
1025 		return elevator_switch_mq(q, new_e);
1026 
1027 	/*
1028 	 * Turn on BYPASS and drain all requests w/ elevator private data.
1029 	 * Block layer doesn't call into a quiesced elevator - all requests
1030 	 * are directly put on the dispatch list without elevator data
1031 	 * using INSERT_BACK.  All requests have SOFTBARRIER set and no
1032 	 * merge happens either.
1033 	 */
1034 	if (old) {
1035 		old_registered = old->registered;
1036 
1037 		blk_queue_bypass_start(q);
1038 
1039 		/* unregister and clear all auxiliary data of the old elevator */
1040 		if (old_registered)
1041 			elv_unregister_queue(q);
1042 
1043 		ioc_clear_queue(q);
1044 	}
1045 
1046 	/* allocate, init and register new elevator */
1047 	err = new_e->ops.sq.elevator_init_fn(q, new_e);
1048 	if (err)
1049 		goto fail_init;
1050 
1051 	err = elv_register_queue(q);
1052 	if (err)
1053 		goto fail_register;
1054 
1055 	/* done, kill the old one and finish */
1056 	if (old) {
1057 		elevator_exit(q, old);
1058 		blk_queue_bypass_end(q);
1059 	}
1060 
1061 	blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
1062 
1063 	return 0;
1064 
1065 fail_register:
1066 	elevator_exit(q, q->elevator);
1067 fail_init:
1068 	/* switch failed, restore and re-register old elevator */
1069 	if (old) {
1070 		q->elevator = old;
1071 		elv_register_queue(q);
1072 		blk_queue_bypass_end(q);
1073 	}
1074 
1075 	return err;
1076 }
1077 
1078 /*
1079  * Switch this queue to the given IO scheduler.
1080  */
1081 static int __elevator_change(struct request_queue *q, const char *name)
1082 {
1083 	char elevator_name[ELV_NAME_MAX];
1084 	struct elevator_type *e;
1085 
1086 	/* Make sure queue is not in the middle of being removed */
1087 	if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
1088 		return -ENOENT;
1089 
1090 	/*
1091 	 * Special case for mq, turn off scheduling
1092 	 */
1093 	if (q->mq_ops && !strncmp(name, "none", 4))
1094 		return elevator_switch(q, NULL);
1095 
1096 	strlcpy(elevator_name, name, sizeof(elevator_name));
1097 	e = elevator_get(q, strstrip(elevator_name), true);
1098 	if (!e)
1099 		return -EINVAL;
1100 
1101 	if (q->elevator && elevator_match(q->elevator->type, elevator_name)) {
1102 		elevator_put(e);
1103 		return 0;
1104 	}
1105 
1106 	return elevator_switch(q, e);
1107 }
1108 
1109 static inline bool elv_support_iosched(struct request_queue *q)
1110 {
1111 	if (q->mq_ops && q->tag_set && (q->tag_set->flags &
1112 				BLK_MQ_F_NO_SCHED))
1113 		return false;
1114 	return true;
1115 }
1116 
1117 ssize_t elv_iosched_store(struct request_queue *q, const char *name,
1118 			  size_t count)
1119 {
1120 	int ret;
1121 
1122 	if (!(q->mq_ops || q->request_fn) || !elv_support_iosched(q))
1123 		return count;
1124 
1125 	ret = __elevator_change(q, name);
1126 	if (!ret)
1127 		return count;
1128 
1129 	return ret;
1130 }
1131 
1132 ssize_t elv_iosched_show(struct request_queue *q, char *name)
1133 {
1134 	struct elevator_queue *e = q->elevator;
1135 	struct elevator_type *elv = NULL;
1136 	struct elevator_type *__e;
1137 	bool uses_mq = q->mq_ops != NULL;
1138 	int len = 0;
1139 
1140 	if (!queue_is_rq_based(q))
1141 		return sprintf(name, "none\n");
1142 
1143 	if (!q->elevator)
1144 		len += sprintf(name+len, "[none] ");
1145 	else
1146 		elv = e->type;
1147 
1148 	spin_lock(&elv_list_lock);
1149 	list_for_each_entry(__e, &elv_list, list) {
1150 		if (elv && elevator_match(elv, __e->elevator_name) &&
1151 		    (__e->uses_mq == uses_mq)) {
1152 			len += sprintf(name+len, "[%s] ", elv->elevator_name);
1153 			continue;
1154 		}
1155 		if (__e->uses_mq && q->mq_ops && elv_support_iosched(q))
1156 			len += sprintf(name+len, "%s ", __e->elevator_name);
1157 		else if (!__e->uses_mq && !q->mq_ops)
1158 			len += sprintf(name+len, "%s ", __e->elevator_name);
1159 	}
1160 	spin_unlock(&elv_list_lock);
1161 
1162 	if (q->mq_ops && q->elevator)
1163 		len += sprintf(name+len, "none");
1164 
1165 	len += sprintf(len+name, "\n");
1166 	return len;
1167 }
1168 
1169 struct request *elv_rb_former_request(struct request_queue *q,
1170 				      struct request *rq)
1171 {
1172 	struct rb_node *rbprev = rb_prev(&rq->rb_node);
1173 
1174 	if (rbprev)
1175 		return rb_entry_rq(rbprev);
1176 
1177 	return NULL;
1178 }
1179 EXPORT_SYMBOL(elv_rb_former_request);
1180 
1181 struct request *elv_rb_latter_request(struct request_queue *q,
1182 				      struct request *rq)
1183 {
1184 	struct rb_node *rbnext = rb_next(&rq->rb_node);
1185 
1186 	if (rbnext)
1187 		return rb_entry_rq(rbnext);
1188 
1189 	return NULL;
1190 }
1191 EXPORT_SYMBOL(elv_rb_latter_request);
1192