bfq-iosched.c (03e565e4204c6cf8687d995de5cafd0341503b4e) bfq-iosched.c (73d58118498b14e4d2f2391105459b997b586ddc)
1/*
2 * Budget Fair Queueing (BFQ) I/O scheduler.
3 *
4 * Based on ideas and code from CFQ:
5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
6 *
7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8 * Paolo Valente <paolo.valente@unimore.it>

--- 610 unchanged lines hidden (view full) ---

619 if (!__bfqq) {
620 rb_link_node(&bfqq->pos_node, parent, p);
621 rb_insert_color(&bfqq->pos_node, bfqq->pos_root);
622 } else
623 bfqq->pos_root = NULL;
624}
625
626/*
1/*
2 * Budget Fair Queueing (BFQ) I/O scheduler.
3 *
4 * Based on ideas and code from CFQ:
5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
6 *
7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8 * Paolo Valente <paolo.valente@unimore.it>

--- 610 unchanged lines hidden (view full) ---

619 if (!__bfqq) {
620 rb_link_node(&bfqq->pos_node, parent, p);
621 rb_insert_color(&bfqq->pos_node, bfqq->pos_root);
622 } else
623 bfqq->pos_root = NULL;
624}
625
626/*
627 * Tell whether there are active queues with different weights or
628 * active groups.
629 */
630static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd)
631{
632 /*
633 * For queue weights to differ, queue_weights_tree must contain
634 * at least two nodes.
635 */
636 return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
637 (bfqd->queue_weights_tree.rb_node->rb_left ||
638 bfqd->queue_weights_tree.rb_node->rb_right)
639#ifdef CONFIG_BFQ_GROUP_IOSCHED
640 ) ||
641 (bfqd->num_groups_with_pending_reqs > 0
642#endif
643 );
644}
645
646/*
647 * The following function returns true if every queue must receive the
648 * same share of the throughput (this condition is used when deciding
649 * whether idling may be disabled, see the comments in the function
650 * bfq_better_to_idle()).
651 *
652 * Such a scenario occurs when:
653 * 1) all active queues have the same weight,
627 * The following function returns true if every queue must receive the
628 * same share of the throughput (this condition is used when deciding
629 * whether idling may be disabled, see the comments in the function
630 * bfq_better_to_idle()).
631 *
632 * Such a scenario occurs when:
633 * 1) all active queues have the same weight,
654 * 2) all active groups at the same level in the groups tree have the same
655 * weight,
634 * 2) all active queues belong to the same I/O-priority class,
656 * 3) all active groups at the same level in the groups tree have the same
635 * 3) all active groups at the same level in the groups tree have the same
636 * weight,
637 * 4) all active groups at the same level in the groups tree have the same
657 * number of children.
658 *
659 * Unfortunately, keeping the necessary state for evaluating exactly
660 * the last two symmetry sub-conditions above would be quite complex
638 * number of children.
639 *
640 * Unfortunately, keeping the necessary state for evaluating exactly
641 * the last two symmetry sub-conditions above would be quite complex
661 * and time consuming. Therefore this function evaluates, instead,
662 * only the following stronger two sub-conditions, for which it is
642 * and time consuming. Therefore this function evaluates, instead,
643 * only the following stronger three sub-conditions, for which it is
663 * much easier to maintain the needed state:
664 * 1) all active queues have the same weight,
644 * much easier to maintain the needed state:
645 * 1) all active queues have the same weight,
665 * 2) there are no active groups.
646 * 2) all active queues belong to the same I/O-priority class,
647 * 3) there are no active groups.
666 * In particular, the last condition is always true if hierarchical
667 * support or the cgroups interface are not enabled, thus no state
668 * needs to be maintained in this case.
669 */
670static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
671{
648 * In particular, the last condition is always true if hierarchical
649 * support or the cgroups interface are not enabled, thus no state
650 * needs to be maintained in this case.
651 */
652static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
653{
672 return !bfq_varied_queue_weights_or_active_groups(bfqd);
654 /*
655 * For queue weights to differ, queue_weights_tree must contain
656 * at least two nodes.
657 */
658 bool varied_queue_weights = !RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
659 (bfqd->queue_weights_tree.rb_node->rb_left ||
660 bfqd->queue_weights_tree.rb_node->rb_right);
661
662 bool multiple_classes_busy =
663 (bfqd->busy_queues[0] && bfqd->busy_queues[1]) ||
664 (bfqd->busy_queues[0] && bfqd->busy_queues[2]) ||
665 (bfqd->busy_queues[1] && bfqd->busy_queues[2]);
666
667 /*
668 * For queue weights to differ, queue_weights_tree must contain
669 * at least two nodes.
670 */
671 return !(varied_queue_weights || multiple_classes_busy
672#ifdef BFQ_GROUP_IOSCHED_ENABLED
673 || bfqd->num_groups_with_pending_reqs > 0
674#endif
675 );
673}
674
675/*
676 * If the weight-counter tree passed as input contains no counter for
677 * the weight of the input queue, then add that counter; otherwise just
678 * increment the existing counter.
679 *
680 * Note that weight-counter trees contain few nodes in mostly symmetric

--- 42 unchanged lines hidden (view full) ---

723 }
724
725 bfqq->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
726 GFP_ATOMIC);
727
728 /*
729 * In the unlucky event of an allocation failure, we just
730 * exit. This will cause the weight of queue to not be
676}
677
678/*
679 * If the weight-counter tree passed as input contains no counter for
680 * the weight of the input queue, then add that counter; otherwise just
681 * increment the existing counter.
682 *
683 * Note that weight-counter trees contain few nodes in mostly symmetric

--- 42 unchanged lines hidden (view full) ---

726 }
727
728 bfqq->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
729 GFP_ATOMIC);
730
731 /*
732 * In the unlucky event of an allocation failure, we just
733 * exit. This will cause the weight of queue to not be
731 * considered in bfq_varied_queue_weights_or_active_groups,
732 * which, in its turn, causes the scenario to be deemed
733 * wrongly symmetric in case bfqq's weight would have been
734 * the only weight making the scenario asymmetric. On the
735 * bright side, no unbalance will however occur when bfqq
736 * becomes inactive again (the invocation of this function
737 * is triggered by an activation of queue). In fact,
738 * bfq_weights_tree_remove does nothing if
739 * !bfqq->weight_counter.
734 * considered in bfq_symmetric_scenario, which, in its turn,
735 * causes the scenario to be deemed wrongly symmetric in case
736 * bfqq's weight would have been the only weight making the
737 * scenario asymmetric. On the bright side, no unbalance will
738 * however occur when bfqq becomes inactive again (the
739 * invocation of this function is triggered by an activation
740 * of queue). In fact, bfq_weights_tree_remove does nothing
741 * if !bfqq->weight_counter.
740 */
741 if (unlikely(!bfqq->weight_counter))
742 return;
743
744 bfqq->weight_counter->weight = entity->weight;
745 rb_link_node(&bfqq->weight_counter->weights_node, parent, new);
746 rb_insert_color(&bfqq->weight_counter->weights_node, root);
747

--- 1474 unchanged lines hidden (view full) ---

2222
2223 if (bfqq->new_bfqq)
2224 return bfqq->new_bfqq;
2225
2226 if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
2227 return NULL;
2228
2229 /* If there is only one backlogged queue, don't search. */
742 */
743 if (unlikely(!bfqq->weight_counter))
744 return;
745
746 bfqq->weight_counter->weight = entity->weight;
747 rb_link_node(&bfqq->weight_counter->weights_node, parent, new);
748 rb_insert_color(&bfqq->weight_counter->weights_node, root);
749

--- 1474 unchanged lines hidden (view full) ---

2224
2225 if (bfqq->new_bfqq)
2226 return bfqq->new_bfqq;
2227
2228 if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
2229 return NULL;
2230
2231 /* If there is only one backlogged queue, don't search. */
2230 if (bfqd->busy_queues == 1)
2232 if (bfq_tot_busy_queues(bfqd) == 1)
2231 return NULL;
2232
2233 in_service_bfqq = bfqd->in_service_queue;
2234
2235 if (in_service_bfqq && in_service_bfqq != bfqq &&
2236 likely(in_service_bfqq != &bfqd->oom_bfqq) &&
2237 bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&
2238 bfqq->entity.parent == in_service_bfqq->entity.parent &&

--- 1437 unchanged lines hidden (view full) ---

3676 * disabled in a time period during which all symmetry
3677 * sub-conditions hold, and hence the device is allowed to
3678 * enqueue many requests, but at some later point in time some
3679 * sub-condition stops to hold, then it may become impossible
3680 * to let requests be served in the desired order until all
3681 * the requests already queued in the device have been served.
3682 */
3683 asymmetric_scenario = (bfqq->wr_coeff > 1 &&
2233 return NULL;
2234
2235 in_service_bfqq = bfqd->in_service_queue;
2236
2237 if (in_service_bfqq && in_service_bfqq != bfqq &&
2238 likely(in_service_bfqq != &bfqd->oom_bfqq) &&
2239 bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&
2240 bfqq->entity.parent == in_service_bfqq->entity.parent &&

--- 1437 unchanged lines hidden (view full) ---

3678 * disabled in a time period during which all symmetry
3679 * sub-conditions hold, and hence the device is allowed to
3680 * enqueue many requests, but at some later point in time some
3681 * sub-condition stops to hold, then it may become impossible
3682 * to let requests be served in the desired order until all
3683 * the requests already queued in the device have been served.
3684 */
3685 asymmetric_scenario = (bfqq->wr_coeff > 1 &&
3684 bfqd->wr_busy_queues < bfqd->busy_queues) ||
3686 bfqd->wr_busy_queues <
3687 bfq_tot_busy_queues(bfqd)) ||
3685 !bfq_symmetric_scenario(bfqd);
3686
3687 /*
3688 * Finally, there is a case where maximizing throughput is the
3689 * best choice even if it may cause unfairness toward
3690 * bfqq. Such a case is when bfqq became active in a burst of
3691 * queue activations. Queues that became active during a large
3692 * burst benefit only from throughput, as discussed in the

--- 262 unchanged lines hidden (view full) ---

3955 */
3956 bfq_update_wr_data(bfqd, bfqq);
3957
3958 /*
3959 * Expire bfqq, pretending that its budget expired, if bfqq
3960 * belongs to CLASS_IDLE and other queues are waiting for
3961 * service.
3962 */
3688 !bfq_symmetric_scenario(bfqd);
3689
3690 /*
3691 * Finally, there is a case where maximizing throughput is the
3692 * best choice even if it may cause unfairness toward
3693 * bfqq. Such a case is when bfqq became active in a burst of
3694 * queue activations. Queues that became active during a large
3695 * burst benefit only from throughput, as discussed in the

--- 262 unchanged lines hidden (view full) ---

3958 */
3959 bfq_update_wr_data(bfqd, bfqq);
3960
3961 /*
3962 * Expire bfqq, pretending that its budget expired, if bfqq
3963 * belongs to CLASS_IDLE and other queues are waiting for
3964 * service.
3965 */
3963 if (!(bfqd->busy_queues > 1 && bfq_class_idle(bfqq)))
3966 if (!(bfq_tot_busy_queues(bfqd) > 1 && bfq_class_idle(bfqq)))
3964 goto return_rq;
3965
3966 bfq_bfqq_expire(bfqd, bfqq, false, BFQQE_BUDGET_EXHAUSTED);
3967
3968return_rq:
3969 return rq;
3970}
3971
3972static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
3973{
3974 struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
3975
3976 /*
3977 * Avoiding lock: a race on bfqd->busy_queues should cause at
3978 * most a call to dispatch for nothing
3979 */
3980 return !list_empty_careful(&bfqd->dispatch) ||
3967 goto return_rq;
3968
3969 bfq_bfqq_expire(bfqd, bfqq, false, BFQQE_BUDGET_EXHAUSTED);
3970
3971return_rq:
3972 return rq;
3973}
3974
3975static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
3976{
3977 struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
3978
3979 /*
3980 * Avoiding lock: a race on bfqd->busy_queues should cause at
3981 * most a call to dispatch for nothing
3982 */
3983 return !list_empty_careful(&bfqd->dispatch) ||
3981 bfqd->busy_queues > 0;
3984 bfq_tot_busy_queues(bfqd) > 0;
3982}
3983
3984static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
3985{
3986 struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
3987 struct request *rq = NULL;
3988 struct bfq_queue *bfqq = NULL;
3989

--- 37 unchanged lines hidden (view full) ---

4027 * but it would entail using an extra interface
4028 * function. This cost seems higher than the benefit,
4029 * being the frequency of non-elevator-private
4030 * requests very low.
4031 */
4032 goto start_rq;
4033 }
4034
3985}
3986
3987static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
3988{
3989 struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
3990 struct request *rq = NULL;
3991 struct bfq_queue *bfqq = NULL;
3992

--- 37 unchanged lines hidden (view full) ---

4030 * but it would entail using an extra interface
4031 * function. This cost seems higher than the benefit,
4032 * being the frequency of non-elevator-private
4033 * requests very low.
4034 */
4035 goto start_rq;
4036 }
4037
4035 bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues);
4038 bfq_log(bfqd, "dispatch requests: %d busy queues",
4039 bfq_tot_busy_queues(bfqd));
4036
4040
4037 if (bfqd->busy_queues == 0)
4041 if (bfq_tot_busy_queues(bfqd) == 0)
4038 goto exit;
4039
4040 /*
4041 * Force device to serve one request at a time if
4042 * strict_guarantees is true. Forcing this service scheme is
4043 * currently the ONLY way to guarantee that the request
4044 * service order enforced by the scheduler is respected by a
4045 * queueing device. Otherwise the device is free even to make

--- 1829 unchanged lines hidden ---
4042 goto exit;
4043
4044 /*
4045 * Force device to serve one request at a time if
4046 * strict_guarantees is true. Forcing this service scheme is
4047 * currently the ONLY way to guarantee that the request
4048 * service order enforced by the scheduler is respected by a
4049 * queueing device. Otherwise the device is free even to make

--- 1829 unchanged lines hidden ---