176e3cc12SEric Dumazet /*
276e3cc12SEric Dumazet * Codel - The Controlled-Delay Active Queue Management algorithm
376e3cc12SEric Dumazet *
476e3cc12SEric Dumazet * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
576e3cc12SEric Dumazet * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
676e3cc12SEric Dumazet *
776e3cc12SEric Dumazet * Implemented on linux by :
876e3cc12SEric Dumazet * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
980ba92faSEric Dumazet * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
1076e3cc12SEric Dumazet *
1176e3cc12SEric Dumazet * Redistribution and use in source and binary forms, with or without
1276e3cc12SEric Dumazet * modification, are permitted provided that the following conditions
1376e3cc12SEric Dumazet * are met:
1476e3cc12SEric Dumazet * 1. Redistributions of source code must retain the above copyright
1576e3cc12SEric Dumazet * notice, this list of conditions, and the following disclaimer,
1676e3cc12SEric Dumazet * without modification.
1776e3cc12SEric Dumazet * 2. Redistributions in binary form must reproduce the above copyright
1876e3cc12SEric Dumazet * notice, this list of conditions and the following disclaimer in the
1976e3cc12SEric Dumazet * documentation and/or other materials provided with the distribution.
2076e3cc12SEric Dumazet * 3. The names of the authors may not be used to endorse or promote products
2176e3cc12SEric Dumazet * derived from this software without specific prior written permission.
2276e3cc12SEric Dumazet *
2376e3cc12SEric Dumazet * Alternatively, provided that this notice is retained in full, this
2476e3cc12SEric Dumazet * software may be distributed under the terms of the GNU General
2576e3cc12SEric Dumazet * Public License ("GPL") version 2, in which case the provisions of the
2676e3cc12SEric Dumazet * GPL apply INSTEAD OF those given above.
2776e3cc12SEric Dumazet *
2876e3cc12SEric Dumazet * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2976e3cc12SEric Dumazet * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
3076e3cc12SEric Dumazet * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
3176e3cc12SEric Dumazet * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
3276e3cc12SEric Dumazet * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
3376e3cc12SEric Dumazet * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
3476e3cc12SEric Dumazet * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
3576e3cc12SEric Dumazet * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
3676e3cc12SEric Dumazet * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
3776e3cc12SEric Dumazet * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3876e3cc12SEric Dumazet * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
3976e3cc12SEric Dumazet * DAMAGE.
4076e3cc12SEric Dumazet *
4176e3cc12SEric Dumazet */
4276e3cc12SEric Dumazet
4376e3cc12SEric Dumazet #include <linux/module.h>
4476e3cc12SEric Dumazet #include <linux/slab.h>
4576e3cc12SEric Dumazet #include <linux/types.h>
4676e3cc12SEric Dumazet #include <linux/kernel.h>
4776e3cc12SEric Dumazet #include <linux/errno.h>
4876e3cc12SEric Dumazet #include <linux/skbuff.h>
49ce5b4b97SGeert Uytterhoeven #include <linux/prefetch.h>
5076e3cc12SEric Dumazet #include <net/pkt_sched.h>
5176e3cc12SEric Dumazet #include <net/codel.h>
52d068ca2aSMichal Kazior #include <net/codel_impl.h>
53d068ca2aSMichal Kazior #include <net/codel_qdisc.h>
5476e3cc12SEric Dumazet
5576e3cc12SEric Dumazet
5676e3cc12SEric Dumazet #define DEFAULT_CODEL_LIMIT 1000
5776e3cc12SEric Dumazet
5876e3cc12SEric Dumazet struct codel_sched_data {
5976e3cc12SEric Dumazet struct codel_params params;
6076e3cc12SEric Dumazet struct codel_vars vars;
6176e3cc12SEric Dumazet struct codel_stats stats;
6276e3cc12SEric Dumazet u32 drop_overlimit;
6376e3cc12SEric Dumazet };
6476e3cc12SEric Dumazet
6576e3cc12SEric Dumazet /* This is the specific function called from codel_dequeue()
6676e3cc12SEric Dumazet * to dequeue a packet from queue. Note: backlog is handled in
6776e3cc12SEric Dumazet * codel, we dont need to reduce it here.
6876e3cc12SEric Dumazet */
dequeue_func(struct codel_vars * vars,void * ctx)6979bdc4c8SMichal Kazior static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx)
7076e3cc12SEric Dumazet {
7179bdc4c8SMichal Kazior struct Qdisc *sch = ctx;
72ed760cb8SFlorian Westphal struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
7376e3cc12SEric Dumazet
74*051c7b39SJia-Ju Bai if (skb) {
7579bdc4c8SMichal Kazior sch->qstats.backlog -= qdisc_pkt_len(skb);
7676e3cc12SEric Dumazet prefetch(&skb->end); /* we'll need skb_shinfo() */
77*051c7b39SJia-Ju Bai }
7876e3cc12SEric Dumazet return skb;
7976e3cc12SEric Dumazet }
8076e3cc12SEric Dumazet
drop_func(struct sk_buff * skb,void * ctx)8179bdc4c8SMichal Kazior static void drop_func(struct sk_buff *skb, void *ctx)
8279bdc4c8SMichal Kazior {
8379bdc4c8SMichal Kazior struct Qdisc *sch = ctx;
8479bdc4c8SMichal Kazior
85520ac30fSEric Dumazet kfree_skb(skb);
86520ac30fSEric Dumazet qdisc_qstats_drop(sch);
8779bdc4c8SMichal Kazior }
8879bdc4c8SMichal Kazior
codel_qdisc_dequeue(struct Qdisc * sch)8976e3cc12SEric Dumazet static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
9076e3cc12SEric Dumazet {
9176e3cc12SEric Dumazet struct codel_sched_data *q = qdisc_priv(sch);
9276e3cc12SEric Dumazet struct sk_buff *skb;
9376e3cc12SEric Dumazet
9479bdc4c8SMichal Kazior skb = codel_dequeue(sch, &sch->qstats.backlog, &q->params, &q->vars,
9579bdc4c8SMichal Kazior &q->stats, qdisc_pkt_len, codel_get_enqueue_time,
9679bdc4c8SMichal Kazior drop_func, dequeue_func);
97865ec552SEric Dumazet
982ccccf5fSWANG Cong /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
9976e3cc12SEric Dumazet * or HTB crashes. Defer it for next round.
10076e3cc12SEric Dumazet */
10176e3cc12SEric Dumazet if (q->stats.drop_count && sch->q.qlen) {
1022ccccf5fSWANG Cong qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
10376e3cc12SEric Dumazet q->stats.drop_count = 0;
1042ccccf5fSWANG Cong q->stats.drop_len = 0;
10576e3cc12SEric Dumazet }
10676e3cc12SEric Dumazet if (skb)
10776e3cc12SEric Dumazet qdisc_bstats_update(sch, skb);
10876e3cc12SEric Dumazet return skb;
10976e3cc12SEric Dumazet }
11076e3cc12SEric Dumazet
codel_qdisc_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)111520ac30fSEric Dumazet static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
112520ac30fSEric Dumazet struct sk_buff **to_free)
11376e3cc12SEric Dumazet {
11476e3cc12SEric Dumazet struct codel_sched_data *q;
11576e3cc12SEric Dumazet
11676e3cc12SEric Dumazet if (likely(qdisc_qlen(sch) < sch->limit)) {
11776e3cc12SEric Dumazet codel_set_enqueue_time(skb);
11876e3cc12SEric Dumazet return qdisc_enqueue_tail(skb, sch);
11976e3cc12SEric Dumazet }
12076e3cc12SEric Dumazet q = qdisc_priv(sch);
12176e3cc12SEric Dumazet q->drop_overlimit++;
122520ac30fSEric Dumazet return qdisc_drop(skb, sch, to_free);
12376e3cc12SEric Dumazet }
12476e3cc12SEric Dumazet
12576e3cc12SEric Dumazet static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
12676e3cc12SEric Dumazet [TCA_CODEL_TARGET] = { .type = NLA_U32 },
12776e3cc12SEric Dumazet [TCA_CODEL_LIMIT] = { .type = NLA_U32 },
12876e3cc12SEric Dumazet [TCA_CODEL_INTERVAL] = { .type = NLA_U32 },
12976e3cc12SEric Dumazet [TCA_CODEL_ECN] = { .type = NLA_U32 },
13080ba92faSEric Dumazet [TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 },
13176e3cc12SEric Dumazet };
13276e3cc12SEric Dumazet
codel_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)1332030721cSAlexander Aring static int codel_change(struct Qdisc *sch, struct nlattr *opt,
1342030721cSAlexander Aring struct netlink_ext_ack *extack)
13576e3cc12SEric Dumazet {
13676e3cc12SEric Dumazet struct codel_sched_data *q = qdisc_priv(sch);
13776e3cc12SEric Dumazet struct nlattr *tb[TCA_CODEL_MAX + 1];
1382ccccf5fSWANG Cong unsigned int qlen, dropped = 0;
13976e3cc12SEric Dumazet int err;
14076e3cc12SEric Dumazet
1418cb08174SJohannes Berg err = nla_parse_nested_deprecated(tb, TCA_CODEL_MAX, opt,
1428cb08174SJohannes Berg codel_policy, NULL);
14376e3cc12SEric Dumazet if (err < 0)
14476e3cc12SEric Dumazet return err;
14576e3cc12SEric Dumazet
14676e3cc12SEric Dumazet sch_tree_lock(sch);
14776e3cc12SEric Dumazet
14876e3cc12SEric Dumazet if (tb[TCA_CODEL_TARGET]) {
14976e3cc12SEric Dumazet u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
15076e3cc12SEric Dumazet
15176e3cc12SEric Dumazet q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
15276e3cc12SEric Dumazet }
15376e3cc12SEric Dumazet
15480ba92faSEric Dumazet if (tb[TCA_CODEL_CE_THRESHOLD]) {
15580ba92faSEric Dumazet u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
15680ba92faSEric Dumazet
15780ba92faSEric Dumazet q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
15880ba92faSEric Dumazet }
15980ba92faSEric Dumazet
16076e3cc12SEric Dumazet if (tb[TCA_CODEL_INTERVAL]) {
16176e3cc12SEric Dumazet u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
16276e3cc12SEric Dumazet
16376e3cc12SEric Dumazet q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
16476e3cc12SEric Dumazet }
16576e3cc12SEric Dumazet
16676e3cc12SEric Dumazet if (tb[TCA_CODEL_LIMIT])
16776e3cc12SEric Dumazet sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
16876e3cc12SEric Dumazet
16976e3cc12SEric Dumazet if (tb[TCA_CODEL_ECN])
17076e3cc12SEric Dumazet q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
17176e3cc12SEric Dumazet
17276e3cc12SEric Dumazet qlen = sch->q.qlen;
17376e3cc12SEric Dumazet while (sch->q.qlen > sch->limit) {
174ed760cb8SFlorian Westphal struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
17576e3cc12SEric Dumazet
1762ccccf5fSWANG Cong dropped += qdisc_pkt_len(skb);
17725331d6cSJohn Fastabend qdisc_qstats_backlog_dec(sch, skb);
178b3d7e2b2SEric Dumazet rtnl_qdisc_drop(skb, sch);
17976e3cc12SEric Dumazet }
1802ccccf5fSWANG Cong qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
18176e3cc12SEric Dumazet
18276e3cc12SEric Dumazet sch_tree_unlock(sch);
18376e3cc12SEric Dumazet return 0;
18476e3cc12SEric Dumazet }
18576e3cc12SEric Dumazet
codel_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)186e63d7dfdSAlexander Aring static int codel_init(struct Qdisc *sch, struct nlattr *opt,
187e63d7dfdSAlexander Aring struct netlink_ext_ack *extack)
18876e3cc12SEric Dumazet {
18976e3cc12SEric Dumazet struct codel_sched_data *q = qdisc_priv(sch);
19076e3cc12SEric Dumazet
19176e3cc12SEric Dumazet sch->limit = DEFAULT_CODEL_LIMIT;
19276e3cc12SEric Dumazet
19379bdc4c8SMichal Kazior codel_params_init(&q->params);
19476e3cc12SEric Dumazet codel_vars_init(&q->vars);
19576e3cc12SEric Dumazet codel_stats_init(&q->stats);
19679bdc4c8SMichal Kazior q->params.mtu = psched_mtu(qdisc_dev(sch));
19776e3cc12SEric Dumazet
19876e3cc12SEric Dumazet if (opt) {
1992030721cSAlexander Aring int err = codel_change(sch, opt, extack);
20076e3cc12SEric Dumazet
20176e3cc12SEric Dumazet if (err)
20276e3cc12SEric Dumazet return err;
20376e3cc12SEric Dumazet }
20476e3cc12SEric Dumazet
20576e3cc12SEric Dumazet if (sch->limit >= 1)
20676e3cc12SEric Dumazet sch->flags |= TCQ_F_CAN_BYPASS;
20776e3cc12SEric Dumazet else
20876e3cc12SEric Dumazet sch->flags &= ~TCQ_F_CAN_BYPASS;
20976e3cc12SEric Dumazet
21076e3cc12SEric Dumazet return 0;
21176e3cc12SEric Dumazet }
21276e3cc12SEric Dumazet
codel_dump(struct Qdisc * sch,struct sk_buff * skb)21376e3cc12SEric Dumazet static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
21476e3cc12SEric Dumazet {
21576e3cc12SEric Dumazet struct codel_sched_data *q = qdisc_priv(sch);
21676e3cc12SEric Dumazet struct nlattr *opts;
21776e3cc12SEric Dumazet
218ae0be8deSMichal Kubecek opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
21976e3cc12SEric Dumazet if (opts == NULL)
22076e3cc12SEric Dumazet goto nla_put_failure;
22176e3cc12SEric Dumazet
22276e3cc12SEric Dumazet if (nla_put_u32(skb, TCA_CODEL_TARGET,
22376e3cc12SEric Dumazet codel_time_to_us(q->params.target)) ||
22476e3cc12SEric Dumazet nla_put_u32(skb, TCA_CODEL_LIMIT,
22576e3cc12SEric Dumazet sch->limit) ||
22676e3cc12SEric Dumazet nla_put_u32(skb, TCA_CODEL_INTERVAL,
22776e3cc12SEric Dumazet codel_time_to_us(q->params.interval)) ||
22876e3cc12SEric Dumazet nla_put_u32(skb, TCA_CODEL_ECN,
22976e3cc12SEric Dumazet q->params.ecn))
23076e3cc12SEric Dumazet goto nla_put_failure;
23180ba92faSEric Dumazet if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
23280ba92faSEric Dumazet nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
23380ba92faSEric Dumazet codel_time_to_us(q->params.ce_threshold)))
23480ba92faSEric Dumazet goto nla_put_failure;
23576e3cc12SEric Dumazet return nla_nest_end(skb, opts);
23676e3cc12SEric Dumazet
23776e3cc12SEric Dumazet nla_put_failure:
23876e3cc12SEric Dumazet nla_nest_cancel(skb, opts);
23976e3cc12SEric Dumazet return -1;
24076e3cc12SEric Dumazet }
24176e3cc12SEric Dumazet
codel_dump_stats(struct Qdisc * sch,struct gnet_dump * d)24276e3cc12SEric Dumazet static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
24376e3cc12SEric Dumazet {
24476e3cc12SEric Dumazet const struct codel_sched_data *q = qdisc_priv(sch);
24576e3cc12SEric Dumazet struct tc_codel_xstats st = {
24676e3cc12SEric Dumazet .maxpacket = q->stats.maxpacket,
24776e3cc12SEric Dumazet .count = q->vars.count,
24876e3cc12SEric Dumazet .lastcount = q->vars.lastcount,
24976e3cc12SEric Dumazet .drop_overlimit = q->drop_overlimit,
25076e3cc12SEric Dumazet .ldelay = codel_time_to_us(q->vars.ldelay),
25176e3cc12SEric Dumazet .dropping = q->vars.dropping,
25276e3cc12SEric Dumazet .ecn_mark = q->stats.ecn_mark,
25380ba92faSEric Dumazet .ce_mark = q->stats.ce_mark,
25476e3cc12SEric Dumazet };
25576e3cc12SEric Dumazet
25676e3cc12SEric Dumazet if (q->vars.dropping) {
25776e3cc12SEric Dumazet codel_tdiff_t delta = q->vars.drop_next - codel_get_time();
25876e3cc12SEric Dumazet
25976e3cc12SEric Dumazet if (delta >= 0)
26076e3cc12SEric Dumazet st.drop_next = codel_time_to_us(delta);
26176e3cc12SEric Dumazet else
26276e3cc12SEric Dumazet st.drop_next = -codel_time_to_us(-delta);
26376e3cc12SEric Dumazet }
26476e3cc12SEric Dumazet
26576e3cc12SEric Dumazet return gnet_stats_copy_app(d, &st, sizeof(st));
26676e3cc12SEric Dumazet }
26776e3cc12SEric Dumazet
codel_reset(struct Qdisc * sch)26876e3cc12SEric Dumazet static void codel_reset(struct Qdisc *sch)
26976e3cc12SEric Dumazet {
27076e3cc12SEric Dumazet struct codel_sched_data *q = qdisc_priv(sch);
27176e3cc12SEric Dumazet
27276e3cc12SEric Dumazet qdisc_reset_queue(sch);
27376e3cc12SEric Dumazet codel_vars_init(&q->vars);
27476e3cc12SEric Dumazet }
27576e3cc12SEric Dumazet
27676e3cc12SEric Dumazet static struct Qdisc_ops codel_qdisc_ops __read_mostly = {
27776e3cc12SEric Dumazet .id = "codel",
27876e3cc12SEric Dumazet .priv_size = sizeof(struct codel_sched_data),
27976e3cc12SEric Dumazet
28076e3cc12SEric Dumazet .enqueue = codel_qdisc_enqueue,
28176e3cc12SEric Dumazet .dequeue = codel_qdisc_dequeue,
28276e3cc12SEric Dumazet .peek = qdisc_peek_dequeued,
28376e3cc12SEric Dumazet .init = codel_init,
28476e3cc12SEric Dumazet .reset = codel_reset,
28576e3cc12SEric Dumazet .change = codel_change,
28676e3cc12SEric Dumazet .dump = codel_dump,
28776e3cc12SEric Dumazet .dump_stats = codel_dump_stats,
28876e3cc12SEric Dumazet .owner = THIS_MODULE,
28976e3cc12SEric Dumazet };
29076e3cc12SEric Dumazet
codel_module_init(void)29176e3cc12SEric Dumazet static int __init codel_module_init(void)
29276e3cc12SEric Dumazet {
29376e3cc12SEric Dumazet return register_qdisc(&codel_qdisc_ops);
29476e3cc12SEric Dumazet }
29576e3cc12SEric Dumazet
codel_module_exit(void)29676e3cc12SEric Dumazet static void __exit codel_module_exit(void)
29776e3cc12SEric Dumazet {
29876e3cc12SEric Dumazet unregister_qdisc(&codel_qdisc_ops);
29976e3cc12SEric Dumazet }
30076e3cc12SEric Dumazet
30176e3cc12SEric Dumazet module_init(codel_module_init)
30276e3cc12SEric Dumazet module_exit(codel_module_exit)
30376e3cc12SEric Dumazet
30476e3cc12SEric Dumazet MODULE_DESCRIPTION("Controlled Delay queue discipline");
30576e3cc12SEric Dumazet MODULE_AUTHOR("Dave Taht");
30676e3cc12SEric Dumazet MODULE_AUTHOR("Eric Dumazet");
30776e3cc12SEric Dumazet MODULE_LICENSE("Dual BSD/GPL");
308