xref: /openbmc/linux/net/sched/sch_codel.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
176e3cc12SEric Dumazet /*
276e3cc12SEric Dumazet  * Codel - The Controlled-Delay Active Queue Management algorithm
376e3cc12SEric Dumazet  *
476e3cc12SEric Dumazet  *  Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
576e3cc12SEric Dumazet  *  Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
676e3cc12SEric Dumazet  *
776e3cc12SEric Dumazet  *  Implemented on linux by :
876e3cc12SEric Dumazet  *  Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
980ba92faSEric Dumazet  *  Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
1076e3cc12SEric Dumazet  *
1176e3cc12SEric Dumazet  * Redistribution and use in source and binary forms, with or without
1276e3cc12SEric Dumazet  * modification, are permitted provided that the following conditions
1376e3cc12SEric Dumazet  * are met:
1476e3cc12SEric Dumazet  * 1. Redistributions of source code must retain the above copyright
1576e3cc12SEric Dumazet  *    notice, this list of conditions, and the following disclaimer,
1676e3cc12SEric Dumazet  *    without modification.
1776e3cc12SEric Dumazet  * 2. Redistributions in binary form must reproduce the above copyright
1876e3cc12SEric Dumazet  *    notice, this list of conditions and the following disclaimer in the
1976e3cc12SEric Dumazet  *    documentation and/or other materials provided with the distribution.
2076e3cc12SEric Dumazet  * 3. The names of the authors may not be used to endorse or promote products
2176e3cc12SEric Dumazet  *    derived from this software without specific prior written permission.
2276e3cc12SEric Dumazet  *
2376e3cc12SEric Dumazet  * Alternatively, provided that this notice is retained in full, this
2476e3cc12SEric Dumazet  * software may be distributed under the terms of the GNU General
2576e3cc12SEric Dumazet  * Public License ("GPL") version 2, in which case the provisions of the
2676e3cc12SEric Dumazet  * GPL apply INSTEAD OF those given above.
2776e3cc12SEric Dumazet  *
2876e3cc12SEric Dumazet  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2976e3cc12SEric Dumazet  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
3076e3cc12SEric Dumazet  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
3176e3cc12SEric Dumazet  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
3276e3cc12SEric Dumazet  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
3376e3cc12SEric Dumazet  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
3476e3cc12SEric Dumazet  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
3576e3cc12SEric Dumazet  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
3676e3cc12SEric Dumazet  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
3776e3cc12SEric Dumazet  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3876e3cc12SEric Dumazet  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
3976e3cc12SEric Dumazet  * DAMAGE.
4076e3cc12SEric Dumazet  *
4176e3cc12SEric Dumazet  */
4276e3cc12SEric Dumazet 
4376e3cc12SEric Dumazet #include <linux/module.h>
4476e3cc12SEric Dumazet #include <linux/slab.h>
4576e3cc12SEric Dumazet #include <linux/types.h>
4676e3cc12SEric Dumazet #include <linux/kernel.h>
4776e3cc12SEric Dumazet #include <linux/errno.h>
4876e3cc12SEric Dumazet #include <linux/skbuff.h>
49ce5b4b97SGeert Uytterhoeven #include <linux/prefetch.h>
5076e3cc12SEric Dumazet #include <net/pkt_sched.h>
5176e3cc12SEric Dumazet #include <net/codel.h>
52d068ca2aSMichal Kazior #include <net/codel_impl.h>
53d068ca2aSMichal Kazior #include <net/codel_qdisc.h>
5476e3cc12SEric Dumazet 
5576e3cc12SEric Dumazet 
5676e3cc12SEric Dumazet #define DEFAULT_CODEL_LIMIT 1000
5776e3cc12SEric Dumazet 
5876e3cc12SEric Dumazet struct codel_sched_data {
5976e3cc12SEric Dumazet 	struct codel_params	params;
6076e3cc12SEric Dumazet 	struct codel_vars	vars;
6176e3cc12SEric Dumazet 	struct codel_stats	stats;
6276e3cc12SEric Dumazet 	u32			drop_overlimit;
6376e3cc12SEric Dumazet };
6476e3cc12SEric Dumazet 
6576e3cc12SEric Dumazet /* This is the specific function called from codel_dequeue()
6676e3cc12SEric Dumazet  * to dequeue a packet from queue. Note: backlog is handled in
6776e3cc12SEric Dumazet  * codel, we dont need to reduce it here.
6876e3cc12SEric Dumazet  */
dequeue_func(struct codel_vars * vars,void * ctx)6979bdc4c8SMichal Kazior static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx)
7076e3cc12SEric Dumazet {
7179bdc4c8SMichal Kazior 	struct Qdisc *sch = ctx;
72ed760cb8SFlorian Westphal 	struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
7376e3cc12SEric Dumazet 
74*051c7b39SJia-Ju Bai 	if (skb) {
7579bdc4c8SMichal Kazior 		sch->qstats.backlog -= qdisc_pkt_len(skb);
7676e3cc12SEric Dumazet 		prefetch(&skb->end); /* we'll need skb_shinfo() */
77*051c7b39SJia-Ju Bai 	}
7876e3cc12SEric Dumazet 	return skb;
7976e3cc12SEric Dumazet }
8076e3cc12SEric Dumazet 
drop_func(struct sk_buff * skb,void * ctx)8179bdc4c8SMichal Kazior static void drop_func(struct sk_buff *skb, void *ctx)
8279bdc4c8SMichal Kazior {
8379bdc4c8SMichal Kazior 	struct Qdisc *sch = ctx;
8479bdc4c8SMichal Kazior 
85520ac30fSEric Dumazet 	kfree_skb(skb);
86520ac30fSEric Dumazet 	qdisc_qstats_drop(sch);
8779bdc4c8SMichal Kazior }
8879bdc4c8SMichal Kazior 
codel_qdisc_dequeue(struct Qdisc * sch)8976e3cc12SEric Dumazet static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
9076e3cc12SEric Dumazet {
9176e3cc12SEric Dumazet 	struct codel_sched_data *q = qdisc_priv(sch);
9276e3cc12SEric Dumazet 	struct sk_buff *skb;
9376e3cc12SEric Dumazet 
9479bdc4c8SMichal Kazior 	skb = codel_dequeue(sch, &sch->qstats.backlog, &q->params, &q->vars,
9579bdc4c8SMichal Kazior 			    &q->stats, qdisc_pkt_len, codel_get_enqueue_time,
9679bdc4c8SMichal Kazior 			    drop_func, dequeue_func);
97865ec552SEric Dumazet 
982ccccf5fSWANG Cong 	/* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
9976e3cc12SEric Dumazet 	 * or HTB crashes. Defer it for next round.
10076e3cc12SEric Dumazet 	 */
10176e3cc12SEric Dumazet 	if (q->stats.drop_count && sch->q.qlen) {
1022ccccf5fSWANG Cong 		qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
10376e3cc12SEric Dumazet 		q->stats.drop_count = 0;
1042ccccf5fSWANG Cong 		q->stats.drop_len = 0;
10576e3cc12SEric Dumazet 	}
10676e3cc12SEric Dumazet 	if (skb)
10776e3cc12SEric Dumazet 		qdisc_bstats_update(sch, skb);
10876e3cc12SEric Dumazet 	return skb;
10976e3cc12SEric Dumazet }
11076e3cc12SEric Dumazet 
codel_qdisc_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)111520ac30fSEric Dumazet static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
112520ac30fSEric Dumazet 			       struct sk_buff **to_free)
11376e3cc12SEric Dumazet {
11476e3cc12SEric Dumazet 	struct codel_sched_data *q;
11576e3cc12SEric Dumazet 
11676e3cc12SEric Dumazet 	if (likely(qdisc_qlen(sch) < sch->limit)) {
11776e3cc12SEric Dumazet 		codel_set_enqueue_time(skb);
11876e3cc12SEric Dumazet 		return qdisc_enqueue_tail(skb, sch);
11976e3cc12SEric Dumazet 	}
12076e3cc12SEric Dumazet 	q = qdisc_priv(sch);
12176e3cc12SEric Dumazet 	q->drop_overlimit++;
122520ac30fSEric Dumazet 	return qdisc_drop(skb, sch, to_free);
12376e3cc12SEric Dumazet }
12476e3cc12SEric Dumazet 
12576e3cc12SEric Dumazet static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
12676e3cc12SEric Dumazet 	[TCA_CODEL_TARGET]	= { .type = NLA_U32 },
12776e3cc12SEric Dumazet 	[TCA_CODEL_LIMIT]	= { .type = NLA_U32 },
12876e3cc12SEric Dumazet 	[TCA_CODEL_INTERVAL]	= { .type = NLA_U32 },
12976e3cc12SEric Dumazet 	[TCA_CODEL_ECN]		= { .type = NLA_U32 },
13080ba92faSEric Dumazet 	[TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 },
13176e3cc12SEric Dumazet };
13276e3cc12SEric Dumazet 
codel_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)1332030721cSAlexander Aring static int codel_change(struct Qdisc *sch, struct nlattr *opt,
1342030721cSAlexander Aring 			struct netlink_ext_ack *extack)
13576e3cc12SEric Dumazet {
13676e3cc12SEric Dumazet 	struct codel_sched_data *q = qdisc_priv(sch);
13776e3cc12SEric Dumazet 	struct nlattr *tb[TCA_CODEL_MAX + 1];
1382ccccf5fSWANG Cong 	unsigned int qlen, dropped = 0;
13976e3cc12SEric Dumazet 	int err;
14076e3cc12SEric Dumazet 
1418cb08174SJohannes Berg 	err = nla_parse_nested_deprecated(tb, TCA_CODEL_MAX, opt,
1428cb08174SJohannes Berg 					  codel_policy, NULL);
14376e3cc12SEric Dumazet 	if (err < 0)
14476e3cc12SEric Dumazet 		return err;
14576e3cc12SEric Dumazet 
14676e3cc12SEric Dumazet 	sch_tree_lock(sch);
14776e3cc12SEric Dumazet 
14876e3cc12SEric Dumazet 	if (tb[TCA_CODEL_TARGET]) {
14976e3cc12SEric Dumazet 		u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
15076e3cc12SEric Dumazet 
15176e3cc12SEric Dumazet 		q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
15276e3cc12SEric Dumazet 	}
15376e3cc12SEric Dumazet 
15480ba92faSEric Dumazet 	if (tb[TCA_CODEL_CE_THRESHOLD]) {
15580ba92faSEric Dumazet 		u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
15680ba92faSEric Dumazet 
15780ba92faSEric Dumazet 		q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
15880ba92faSEric Dumazet 	}
15980ba92faSEric Dumazet 
16076e3cc12SEric Dumazet 	if (tb[TCA_CODEL_INTERVAL]) {
16176e3cc12SEric Dumazet 		u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
16276e3cc12SEric Dumazet 
16376e3cc12SEric Dumazet 		q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
16476e3cc12SEric Dumazet 	}
16576e3cc12SEric Dumazet 
16676e3cc12SEric Dumazet 	if (tb[TCA_CODEL_LIMIT])
16776e3cc12SEric Dumazet 		sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
16876e3cc12SEric Dumazet 
16976e3cc12SEric Dumazet 	if (tb[TCA_CODEL_ECN])
17076e3cc12SEric Dumazet 		q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
17176e3cc12SEric Dumazet 
17276e3cc12SEric Dumazet 	qlen = sch->q.qlen;
17376e3cc12SEric Dumazet 	while (sch->q.qlen > sch->limit) {
174ed760cb8SFlorian Westphal 		struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
17576e3cc12SEric Dumazet 
1762ccccf5fSWANG Cong 		dropped += qdisc_pkt_len(skb);
17725331d6cSJohn Fastabend 		qdisc_qstats_backlog_dec(sch, skb);
178b3d7e2b2SEric Dumazet 		rtnl_qdisc_drop(skb, sch);
17976e3cc12SEric Dumazet 	}
1802ccccf5fSWANG Cong 	qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
18176e3cc12SEric Dumazet 
18276e3cc12SEric Dumazet 	sch_tree_unlock(sch);
18376e3cc12SEric Dumazet 	return 0;
18476e3cc12SEric Dumazet }
18576e3cc12SEric Dumazet 
codel_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)186e63d7dfdSAlexander Aring static int codel_init(struct Qdisc *sch, struct nlattr *opt,
187e63d7dfdSAlexander Aring 		      struct netlink_ext_ack *extack)
18876e3cc12SEric Dumazet {
18976e3cc12SEric Dumazet 	struct codel_sched_data *q = qdisc_priv(sch);
19076e3cc12SEric Dumazet 
19176e3cc12SEric Dumazet 	sch->limit = DEFAULT_CODEL_LIMIT;
19276e3cc12SEric Dumazet 
19379bdc4c8SMichal Kazior 	codel_params_init(&q->params);
19476e3cc12SEric Dumazet 	codel_vars_init(&q->vars);
19576e3cc12SEric Dumazet 	codel_stats_init(&q->stats);
19679bdc4c8SMichal Kazior 	q->params.mtu = psched_mtu(qdisc_dev(sch));
19776e3cc12SEric Dumazet 
19876e3cc12SEric Dumazet 	if (opt) {
1992030721cSAlexander Aring 		int err = codel_change(sch, opt, extack);
20076e3cc12SEric Dumazet 
20176e3cc12SEric Dumazet 		if (err)
20276e3cc12SEric Dumazet 			return err;
20376e3cc12SEric Dumazet 	}
20476e3cc12SEric Dumazet 
20576e3cc12SEric Dumazet 	if (sch->limit >= 1)
20676e3cc12SEric Dumazet 		sch->flags |= TCQ_F_CAN_BYPASS;
20776e3cc12SEric Dumazet 	else
20876e3cc12SEric Dumazet 		sch->flags &= ~TCQ_F_CAN_BYPASS;
20976e3cc12SEric Dumazet 
21076e3cc12SEric Dumazet 	return 0;
21176e3cc12SEric Dumazet }
21276e3cc12SEric Dumazet 
codel_dump(struct Qdisc * sch,struct sk_buff * skb)21376e3cc12SEric Dumazet static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
21476e3cc12SEric Dumazet {
21576e3cc12SEric Dumazet 	struct codel_sched_data *q = qdisc_priv(sch);
21676e3cc12SEric Dumazet 	struct nlattr *opts;
21776e3cc12SEric Dumazet 
218ae0be8deSMichal Kubecek 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
21976e3cc12SEric Dumazet 	if (opts == NULL)
22076e3cc12SEric Dumazet 		goto nla_put_failure;
22176e3cc12SEric Dumazet 
22276e3cc12SEric Dumazet 	if (nla_put_u32(skb, TCA_CODEL_TARGET,
22376e3cc12SEric Dumazet 			codel_time_to_us(q->params.target)) ||
22476e3cc12SEric Dumazet 	    nla_put_u32(skb, TCA_CODEL_LIMIT,
22576e3cc12SEric Dumazet 			sch->limit) ||
22676e3cc12SEric Dumazet 	    nla_put_u32(skb, TCA_CODEL_INTERVAL,
22776e3cc12SEric Dumazet 			codel_time_to_us(q->params.interval)) ||
22876e3cc12SEric Dumazet 	    nla_put_u32(skb, TCA_CODEL_ECN,
22976e3cc12SEric Dumazet 			q->params.ecn))
23076e3cc12SEric Dumazet 		goto nla_put_failure;
23180ba92faSEric Dumazet 	if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
23280ba92faSEric Dumazet 	    nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
23380ba92faSEric Dumazet 			codel_time_to_us(q->params.ce_threshold)))
23480ba92faSEric Dumazet 		goto nla_put_failure;
23576e3cc12SEric Dumazet 	return nla_nest_end(skb, opts);
23676e3cc12SEric Dumazet 
23776e3cc12SEric Dumazet nla_put_failure:
23876e3cc12SEric Dumazet 	nla_nest_cancel(skb, opts);
23976e3cc12SEric Dumazet 	return -1;
24076e3cc12SEric Dumazet }
24176e3cc12SEric Dumazet 
codel_dump_stats(struct Qdisc * sch,struct gnet_dump * d)24276e3cc12SEric Dumazet static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
24376e3cc12SEric Dumazet {
24476e3cc12SEric Dumazet 	const struct codel_sched_data *q = qdisc_priv(sch);
24576e3cc12SEric Dumazet 	struct tc_codel_xstats st = {
24676e3cc12SEric Dumazet 		.maxpacket	= q->stats.maxpacket,
24776e3cc12SEric Dumazet 		.count		= q->vars.count,
24876e3cc12SEric Dumazet 		.lastcount	= q->vars.lastcount,
24976e3cc12SEric Dumazet 		.drop_overlimit = q->drop_overlimit,
25076e3cc12SEric Dumazet 		.ldelay		= codel_time_to_us(q->vars.ldelay),
25176e3cc12SEric Dumazet 		.dropping	= q->vars.dropping,
25276e3cc12SEric Dumazet 		.ecn_mark	= q->stats.ecn_mark,
25380ba92faSEric Dumazet 		.ce_mark	= q->stats.ce_mark,
25476e3cc12SEric Dumazet 	};
25576e3cc12SEric Dumazet 
25676e3cc12SEric Dumazet 	if (q->vars.dropping) {
25776e3cc12SEric Dumazet 		codel_tdiff_t delta = q->vars.drop_next - codel_get_time();
25876e3cc12SEric Dumazet 
25976e3cc12SEric Dumazet 		if (delta >= 0)
26076e3cc12SEric Dumazet 			st.drop_next = codel_time_to_us(delta);
26176e3cc12SEric Dumazet 		else
26276e3cc12SEric Dumazet 			st.drop_next = -codel_time_to_us(-delta);
26376e3cc12SEric Dumazet 	}
26476e3cc12SEric Dumazet 
26576e3cc12SEric Dumazet 	return gnet_stats_copy_app(d, &st, sizeof(st));
26676e3cc12SEric Dumazet }
26776e3cc12SEric Dumazet 
codel_reset(struct Qdisc * sch)26876e3cc12SEric Dumazet static void codel_reset(struct Qdisc *sch)
26976e3cc12SEric Dumazet {
27076e3cc12SEric Dumazet 	struct codel_sched_data *q = qdisc_priv(sch);
27176e3cc12SEric Dumazet 
27276e3cc12SEric Dumazet 	qdisc_reset_queue(sch);
27376e3cc12SEric Dumazet 	codel_vars_init(&q->vars);
27476e3cc12SEric Dumazet }
27576e3cc12SEric Dumazet 
27676e3cc12SEric Dumazet static struct Qdisc_ops codel_qdisc_ops __read_mostly = {
27776e3cc12SEric Dumazet 	.id		=	"codel",
27876e3cc12SEric Dumazet 	.priv_size	=	sizeof(struct codel_sched_data),
27976e3cc12SEric Dumazet 
28076e3cc12SEric Dumazet 	.enqueue	=	codel_qdisc_enqueue,
28176e3cc12SEric Dumazet 	.dequeue	=	codel_qdisc_dequeue,
28276e3cc12SEric Dumazet 	.peek		=	qdisc_peek_dequeued,
28376e3cc12SEric Dumazet 	.init		=	codel_init,
28476e3cc12SEric Dumazet 	.reset		=	codel_reset,
28576e3cc12SEric Dumazet 	.change 	=	codel_change,
28676e3cc12SEric Dumazet 	.dump		=	codel_dump,
28776e3cc12SEric Dumazet 	.dump_stats	=	codel_dump_stats,
28876e3cc12SEric Dumazet 	.owner		=	THIS_MODULE,
28976e3cc12SEric Dumazet };
29076e3cc12SEric Dumazet 
codel_module_init(void)29176e3cc12SEric Dumazet static int __init codel_module_init(void)
29276e3cc12SEric Dumazet {
29376e3cc12SEric Dumazet 	return register_qdisc(&codel_qdisc_ops);
29476e3cc12SEric Dumazet }
29576e3cc12SEric Dumazet 
codel_module_exit(void)29676e3cc12SEric Dumazet static void __exit codel_module_exit(void)
29776e3cc12SEric Dumazet {
29876e3cc12SEric Dumazet 	unregister_qdisc(&codel_qdisc_ops);
29976e3cc12SEric Dumazet }
30076e3cc12SEric Dumazet 
30176e3cc12SEric Dumazet module_init(codel_module_init)
30276e3cc12SEric Dumazet module_exit(codel_module_exit)
30376e3cc12SEric Dumazet 
30476e3cc12SEric Dumazet MODULE_DESCRIPTION("Controlled Delay queue discipline");
30576e3cc12SEric Dumazet MODULE_AUTHOR("Dave Taht");
30676e3cc12SEric Dumazet MODULE_AUTHOR("Eric Dumazet");
30776e3cc12SEric Dumazet MODULE_LICENSE("Dual BSD/GPL");
308