xref: /openbmc/linux/net/sched/sch_red.c (revision cd2a9e62c8a3c5cae7691982667d79a0edc65283)
1 /*
2  * net/sched/sch_red.c	Random Early Detection queue.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914:	computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16 
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/inet_ecn.h>
23 #include <net/red.h>
24 
25 
26 /*	Parameters, settable by user:
27 	-----------------------------
28 
29 	limit		- bytes (must be > qth_max + burst)
30 
31 	Hard limit on queue length, should be chosen >qth_max
32 	to allow packet bursts. This parameter does not
33 	affect the algorithms behaviour and can be chosen
34 	arbitrarily high (well, less than ram size)
35 	Really, this limit will never be reached
36 	if RED works correctly.
37  */
38 
39 struct red_sched_data {
40 	u32			limit;		/* HARD maximal queue length */
41 	unsigned char		flags;
42 	struct timer_list	adapt_timer;
43 	struct red_parms	parms;
44 	struct red_vars		vars;
45 	struct red_stats	stats;
46 	struct Qdisc		*qdisc;
47 };
48 
49 static inline int red_use_ecn(struct red_sched_data *q)
50 {
51 	return q->flags & TC_RED_ECN;
52 }
53 
54 static inline int red_use_harddrop(struct red_sched_data *q)
55 {
56 	return q->flags & TC_RED_HARDDROP;
57 }
58 
59 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
60 {
61 	struct red_sched_data *q = qdisc_priv(sch);
62 	struct Qdisc *child = q->qdisc;
63 	int ret;
64 
65 	q->vars.qavg = red_calc_qavg(&q->parms,
66 				     &q->vars,
67 				     child->qstats.backlog);
68 
69 	if (red_is_idling(&q->vars))
70 		red_end_of_idle_period(&q->vars);
71 
72 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
73 	case RED_DONT_MARK:
74 		break;
75 
76 	case RED_PROB_MARK:
77 		qdisc_qstats_overlimit(sch);
78 		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
79 			q->stats.prob_drop++;
80 			goto congestion_drop;
81 		}
82 
83 		q->stats.prob_mark++;
84 		break;
85 
86 	case RED_HARD_MARK:
87 		qdisc_qstats_overlimit(sch);
88 		if (red_use_harddrop(q) || !red_use_ecn(q) ||
89 		    !INET_ECN_set_ce(skb)) {
90 			q->stats.forced_drop++;
91 			goto congestion_drop;
92 		}
93 
94 		q->stats.forced_mark++;
95 		break;
96 	}
97 
98 	ret = qdisc_enqueue(skb, child);
99 	if (likely(ret == NET_XMIT_SUCCESS)) {
100 		qdisc_qstats_backlog_inc(sch, skb);
101 		sch->q.qlen++;
102 	} else if (net_xmit_drop_count(ret)) {
103 		q->stats.pdrop++;
104 		qdisc_qstats_drop(sch);
105 	}
106 	return ret;
107 
108 congestion_drop:
109 	qdisc_drop(skb, sch);
110 	return NET_XMIT_CN;
111 }
112 
113 static struct sk_buff *red_dequeue(struct Qdisc *sch)
114 {
115 	struct sk_buff *skb;
116 	struct red_sched_data *q = qdisc_priv(sch);
117 	struct Qdisc *child = q->qdisc;
118 
119 	skb = child->dequeue(child);
120 	if (skb) {
121 		qdisc_bstats_update(sch, skb);
122 		qdisc_qstats_backlog_dec(sch, skb);
123 		sch->q.qlen--;
124 	} else {
125 		if (!red_is_idling(&q->vars))
126 			red_start_of_idle_period(&q->vars);
127 	}
128 	return skb;
129 }
130 
131 static struct sk_buff *red_peek(struct Qdisc *sch)
132 {
133 	struct red_sched_data *q = qdisc_priv(sch);
134 	struct Qdisc *child = q->qdisc;
135 
136 	return child->ops->peek(child);
137 }
138 
139 static void red_reset(struct Qdisc *sch)
140 {
141 	struct red_sched_data *q = qdisc_priv(sch);
142 
143 	qdisc_reset(q->qdisc);
144 	sch->qstats.backlog = 0;
145 	sch->q.qlen = 0;
146 	red_restart(&q->vars);
147 }
148 
149 static void red_destroy(struct Qdisc *sch)
150 {
151 	struct red_sched_data *q = qdisc_priv(sch);
152 
153 	del_timer_sync(&q->adapt_timer);
154 	qdisc_destroy(q->qdisc);
155 }
156 
157 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
158 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
159 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
160 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
161 };
162 
163 static int red_change(struct Qdisc *sch, struct nlattr *opt)
164 {
165 	struct red_sched_data *q = qdisc_priv(sch);
166 	struct nlattr *tb[TCA_RED_MAX + 1];
167 	struct tc_red_qopt *ctl;
168 	struct Qdisc *child = NULL;
169 	int err;
170 	u32 max_P;
171 
172 	if (opt == NULL)
173 		return -EINVAL;
174 
175 	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy);
176 	if (err < 0)
177 		return err;
178 
179 	if (tb[TCA_RED_PARMS] == NULL ||
180 	    tb[TCA_RED_STAB] == NULL)
181 		return -EINVAL;
182 
183 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
184 
185 	ctl = nla_data(tb[TCA_RED_PARMS]);
186 
187 	if (ctl->limit > 0) {
188 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
189 		if (IS_ERR(child))
190 			return PTR_ERR(child);
191 	}
192 
193 	sch_tree_lock(sch);
194 	q->flags = ctl->flags;
195 	q->limit = ctl->limit;
196 	if (child) {
197 		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
198 					  q->qdisc->qstats.backlog);
199 		qdisc_destroy(q->qdisc);
200 		q->qdisc = child;
201 	}
202 
203 	red_set_parms(&q->parms,
204 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
205 		      ctl->Plog, ctl->Scell_log,
206 		      nla_data(tb[TCA_RED_STAB]),
207 		      max_P);
208 	red_set_vars(&q->vars);
209 
210 	del_timer(&q->adapt_timer);
211 	if (ctl->flags & TC_RED_ADAPTATIVE)
212 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
213 
214 	if (!q->qdisc->q.qlen)
215 		red_start_of_idle_period(&q->vars);
216 
217 	sch_tree_unlock(sch);
218 	return 0;
219 }
220 
221 static inline void red_adaptative_timer(unsigned long arg)
222 {
223 	struct Qdisc *sch = (struct Qdisc *)arg;
224 	struct red_sched_data *q = qdisc_priv(sch);
225 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
226 
227 	spin_lock(root_lock);
228 	red_adaptative_algo(&q->parms, &q->vars);
229 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
230 	spin_unlock(root_lock);
231 }
232 
233 static int red_init(struct Qdisc *sch, struct nlattr *opt)
234 {
235 	struct red_sched_data *q = qdisc_priv(sch);
236 
237 	q->qdisc = &noop_qdisc;
238 	setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
239 	return red_change(sch, opt);
240 }
241 
242 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
243 {
244 	struct red_sched_data *q = qdisc_priv(sch);
245 	struct nlattr *opts = NULL;
246 	struct tc_red_qopt opt = {
247 		.limit		= q->limit,
248 		.flags		= q->flags,
249 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
250 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
251 		.Wlog		= q->parms.Wlog,
252 		.Plog		= q->parms.Plog,
253 		.Scell_log	= q->parms.Scell_log,
254 	};
255 
256 	sch->qstats.backlog = q->qdisc->qstats.backlog;
257 	opts = nla_nest_start(skb, TCA_OPTIONS);
258 	if (opts == NULL)
259 		goto nla_put_failure;
260 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
261 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
262 		goto nla_put_failure;
263 	return nla_nest_end(skb, opts);
264 
265 nla_put_failure:
266 	nla_nest_cancel(skb, opts);
267 	return -EMSGSIZE;
268 }
269 
270 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
271 {
272 	struct red_sched_data *q = qdisc_priv(sch);
273 	struct tc_red_xstats st = {
274 		.early	= q->stats.prob_drop + q->stats.forced_drop,
275 		.pdrop	= q->stats.pdrop,
276 		.other	= q->stats.other,
277 		.marked	= q->stats.prob_mark + q->stats.forced_mark,
278 	};
279 
280 	return gnet_stats_copy_app(d, &st, sizeof(st));
281 }
282 
283 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
284 			  struct sk_buff *skb, struct tcmsg *tcm)
285 {
286 	struct red_sched_data *q = qdisc_priv(sch);
287 
288 	tcm->tcm_handle |= TC_H_MIN(1);
289 	tcm->tcm_info = q->qdisc->handle;
290 	return 0;
291 }
292 
293 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
294 		     struct Qdisc **old)
295 {
296 	struct red_sched_data *q = qdisc_priv(sch);
297 
298 	if (new == NULL)
299 		new = &noop_qdisc;
300 
301 	*old = qdisc_replace(sch, new, &q->qdisc);
302 	return 0;
303 }
304 
305 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
306 {
307 	struct red_sched_data *q = qdisc_priv(sch);
308 	return q->qdisc;
309 }
310 
311 static unsigned long red_get(struct Qdisc *sch, u32 classid)
312 {
313 	return 1;
314 }
315 
316 static void red_put(struct Qdisc *sch, unsigned long arg)
317 {
318 }
319 
320 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
321 {
322 	if (!walker->stop) {
323 		if (walker->count >= walker->skip)
324 			if (walker->fn(sch, 1, walker) < 0) {
325 				walker->stop = 1;
326 				return;
327 			}
328 		walker->count++;
329 	}
330 }
331 
332 static const struct Qdisc_class_ops red_class_ops = {
333 	.graft		=	red_graft,
334 	.leaf		=	red_leaf,
335 	.get		=	red_get,
336 	.put		=	red_put,
337 	.walk		=	red_walk,
338 	.dump		=	red_dump_class,
339 };
340 
341 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
342 	.id		=	"red",
343 	.priv_size	=	sizeof(struct red_sched_data),
344 	.cl_ops		=	&red_class_ops,
345 	.enqueue	=	red_enqueue,
346 	.dequeue	=	red_dequeue,
347 	.peek		=	red_peek,
348 	.init		=	red_init,
349 	.reset		=	red_reset,
350 	.destroy	=	red_destroy,
351 	.change		=	red_change,
352 	.dump		=	red_dump,
353 	.dump_stats	=	red_dump_stats,
354 	.owner		=	THIS_MODULE,
355 };
356 
357 static int __init red_module_init(void)
358 {
359 	return register_qdisc(&red_qdisc_ops);
360 }
361 
362 static void __exit red_module_exit(void)
363 {
364 	unregister_qdisc(&red_qdisc_ops);
365 }
366 
367 module_init(red_module_init)
368 module_exit(red_module_exit)
369 
370 MODULE_LICENSE("GPL");
371