xref: /openbmc/linux/net/sched/sch_red.c (revision 623e9ef6)
1 /*
2  * net/sched/sch_red.c	Random Early Detection queue.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914:	computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16 
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/inet_ecn.h>
23 #include <net/red.h>
24 
25 
26 /*	Parameters, settable by user:
27 	-----------------------------
28 
29 	limit		- bytes (must be > qth_max + burst)
30 
31 	Hard limit on queue length, should be chosen >qth_max
32 	to allow packet bursts. This parameter does not
33 	affect the algorithms behaviour and can be chosen
34 	arbitrarily high (well, less than ram size)
35 	Really, this limit will never be reached
36 	if RED works correctly.
37  */
38 
39 struct red_sched_data {
40 	u32			limit;		/* HARD maximal queue length */
41 	unsigned char		flags;
42 	struct timer_list	adapt_timer;
43 	struct red_parms	parms;
44 	struct red_vars		vars;
45 	struct red_stats	stats;
46 	struct Qdisc		*qdisc;
47 };
48 
49 static inline int red_use_ecn(struct red_sched_data *q)
50 {
51 	return q->flags & TC_RED_ECN;
52 }
53 
54 static inline int red_use_harddrop(struct red_sched_data *q)
55 {
56 	return q->flags & TC_RED_HARDDROP;
57 }
58 
59 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
60 {
61 	struct red_sched_data *q = qdisc_priv(sch);
62 	struct Qdisc *child = q->qdisc;
63 	int ret;
64 
65 	q->vars.qavg = red_calc_qavg(&q->parms,
66 				     &q->vars,
67 				     child->qstats.backlog);
68 
69 	if (red_is_idling(&q->vars))
70 		red_end_of_idle_period(&q->vars);
71 
72 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
73 	case RED_DONT_MARK:
74 		break;
75 
76 	case RED_PROB_MARK:
77 		qdisc_qstats_overlimit(sch);
78 		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
79 			q->stats.prob_drop++;
80 			goto congestion_drop;
81 		}
82 
83 		q->stats.prob_mark++;
84 		break;
85 
86 	case RED_HARD_MARK:
87 		qdisc_qstats_overlimit(sch);
88 		if (red_use_harddrop(q) || !red_use_ecn(q) ||
89 		    !INET_ECN_set_ce(skb)) {
90 			q->stats.forced_drop++;
91 			goto congestion_drop;
92 		}
93 
94 		q->stats.forced_mark++;
95 		break;
96 	}
97 
98 	ret = qdisc_enqueue(skb, child);
99 	if (likely(ret == NET_XMIT_SUCCESS)) {
100 		qdisc_qstats_backlog_inc(sch, skb);
101 		sch->q.qlen++;
102 	} else if (net_xmit_drop_count(ret)) {
103 		q->stats.pdrop++;
104 		qdisc_qstats_drop(sch);
105 	}
106 	return ret;
107 
108 congestion_drop:
109 	qdisc_drop(skb, sch);
110 	return NET_XMIT_CN;
111 }
112 
113 static struct sk_buff *red_dequeue(struct Qdisc *sch)
114 {
115 	struct sk_buff *skb;
116 	struct red_sched_data *q = qdisc_priv(sch);
117 	struct Qdisc *child = q->qdisc;
118 
119 	skb = child->dequeue(child);
120 	if (skb) {
121 		qdisc_bstats_update(sch, skb);
122 		qdisc_qstats_backlog_dec(sch, skb);
123 		sch->q.qlen--;
124 	} else {
125 		if (!red_is_idling(&q->vars))
126 			red_start_of_idle_period(&q->vars);
127 	}
128 	return skb;
129 }
130 
131 static struct sk_buff *red_peek(struct Qdisc *sch)
132 {
133 	struct red_sched_data *q = qdisc_priv(sch);
134 	struct Qdisc *child = q->qdisc;
135 
136 	return child->ops->peek(child);
137 }
138 
139 static unsigned int red_drop(struct Qdisc *sch)
140 {
141 	struct red_sched_data *q = qdisc_priv(sch);
142 	struct Qdisc *child = q->qdisc;
143 	unsigned int len;
144 
145 	if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
146 		q->stats.other++;
147 		qdisc_qstats_drop(sch);
148 		sch->qstats.backlog -= len;
149 		sch->q.qlen--;
150 		return len;
151 	}
152 
153 	if (!red_is_idling(&q->vars))
154 		red_start_of_idle_period(&q->vars);
155 
156 	return 0;
157 }
158 
159 static void red_reset(struct Qdisc *sch)
160 {
161 	struct red_sched_data *q = qdisc_priv(sch);
162 
163 	qdisc_reset(q->qdisc);
164 	sch->qstats.backlog = 0;
165 	sch->q.qlen = 0;
166 	red_restart(&q->vars);
167 }
168 
169 static void red_destroy(struct Qdisc *sch)
170 {
171 	struct red_sched_data *q = qdisc_priv(sch);
172 
173 	del_timer_sync(&q->adapt_timer);
174 	qdisc_destroy(q->qdisc);
175 }
176 
177 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
178 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
179 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
180 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
181 };
182 
183 static int red_change(struct Qdisc *sch, struct nlattr *opt)
184 {
185 	struct red_sched_data *q = qdisc_priv(sch);
186 	struct nlattr *tb[TCA_RED_MAX + 1];
187 	struct tc_red_qopt *ctl;
188 	struct Qdisc *child = NULL;
189 	int err;
190 	u32 max_P;
191 
192 	if (opt == NULL)
193 		return -EINVAL;
194 
195 	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy);
196 	if (err < 0)
197 		return err;
198 
199 	if (tb[TCA_RED_PARMS] == NULL ||
200 	    tb[TCA_RED_STAB] == NULL)
201 		return -EINVAL;
202 
203 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
204 
205 	ctl = nla_data(tb[TCA_RED_PARMS]);
206 
207 	if (ctl->limit > 0) {
208 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
209 		if (IS_ERR(child))
210 			return PTR_ERR(child);
211 	}
212 
213 	sch_tree_lock(sch);
214 	q->flags = ctl->flags;
215 	q->limit = ctl->limit;
216 	if (child) {
217 		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
218 					  q->qdisc->qstats.backlog);
219 		qdisc_destroy(q->qdisc);
220 		q->qdisc = child;
221 	}
222 
223 	red_set_parms(&q->parms,
224 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
225 		      ctl->Plog, ctl->Scell_log,
226 		      nla_data(tb[TCA_RED_STAB]),
227 		      max_P);
228 	red_set_vars(&q->vars);
229 
230 	del_timer(&q->adapt_timer);
231 	if (ctl->flags & TC_RED_ADAPTATIVE)
232 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
233 
234 	if (!q->qdisc->q.qlen)
235 		red_start_of_idle_period(&q->vars);
236 
237 	sch_tree_unlock(sch);
238 	return 0;
239 }
240 
241 static inline void red_adaptative_timer(unsigned long arg)
242 {
243 	struct Qdisc *sch = (struct Qdisc *)arg;
244 	struct red_sched_data *q = qdisc_priv(sch);
245 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
246 
247 	spin_lock(root_lock);
248 	red_adaptative_algo(&q->parms, &q->vars);
249 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
250 	spin_unlock(root_lock);
251 }
252 
253 static int red_init(struct Qdisc *sch, struct nlattr *opt)
254 {
255 	struct red_sched_data *q = qdisc_priv(sch);
256 
257 	q->qdisc = &noop_qdisc;
258 	setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
259 	return red_change(sch, opt);
260 }
261 
262 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
263 {
264 	struct red_sched_data *q = qdisc_priv(sch);
265 	struct nlattr *opts = NULL;
266 	struct tc_red_qopt opt = {
267 		.limit		= q->limit,
268 		.flags		= q->flags,
269 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
270 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
271 		.Wlog		= q->parms.Wlog,
272 		.Plog		= q->parms.Plog,
273 		.Scell_log	= q->parms.Scell_log,
274 	};
275 
276 	sch->qstats.backlog = q->qdisc->qstats.backlog;
277 	opts = nla_nest_start(skb, TCA_OPTIONS);
278 	if (opts == NULL)
279 		goto nla_put_failure;
280 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
281 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
282 		goto nla_put_failure;
283 	return nla_nest_end(skb, opts);
284 
285 nla_put_failure:
286 	nla_nest_cancel(skb, opts);
287 	return -EMSGSIZE;
288 }
289 
290 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
291 {
292 	struct red_sched_data *q = qdisc_priv(sch);
293 	struct tc_red_xstats st = {
294 		.early	= q->stats.prob_drop + q->stats.forced_drop,
295 		.pdrop	= q->stats.pdrop,
296 		.other	= q->stats.other,
297 		.marked	= q->stats.prob_mark + q->stats.forced_mark,
298 	};
299 
300 	return gnet_stats_copy_app(d, &st, sizeof(st));
301 }
302 
303 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
304 			  struct sk_buff *skb, struct tcmsg *tcm)
305 {
306 	struct red_sched_data *q = qdisc_priv(sch);
307 
308 	tcm->tcm_handle |= TC_H_MIN(1);
309 	tcm->tcm_info = q->qdisc->handle;
310 	return 0;
311 }
312 
313 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
314 		     struct Qdisc **old)
315 {
316 	struct red_sched_data *q = qdisc_priv(sch);
317 
318 	if (new == NULL)
319 		new = &noop_qdisc;
320 
321 	*old = qdisc_replace(sch, new, &q->qdisc);
322 	return 0;
323 }
324 
325 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
326 {
327 	struct red_sched_data *q = qdisc_priv(sch);
328 	return q->qdisc;
329 }
330 
331 static unsigned long red_get(struct Qdisc *sch, u32 classid)
332 {
333 	return 1;
334 }
335 
336 static void red_put(struct Qdisc *sch, unsigned long arg)
337 {
338 }
339 
340 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
341 {
342 	if (!walker->stop) {
343 		if (walker->count >= walker->skip)
344 			if (walker->fn(sch, 1, walker) < 0) {
345 				walker->stop = 1;
346 				return;
347 			}
348 		walker->count++;
349 	}
350 }
351 
352 static const struct Qdisc_class_ops red_class_ops = {
353 	.graft		=	red_graft,
354 	.leaf		=	red_leaf,
355 	.get		=	red_get,
356 	.put		=	red_put,
357 	.walk		=	red_walk,
358 	.dump		=	red_dump_class,
359 };
360 
361 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
362 	.id		=	"red",
363 	.priv_size	=	sizeof(struct red_sched_data),
364 	.cl_ops		=	&red_class_ops,
365 	.enqueue	=	red_enqueue,
366 	.dequeue	=	red_dequeue,
367 	.peek		=	red_peek,
368 	.drop		=	red_drop,
369 	.init		=	red_init,
370 	.reset		=	red_reset,
371 	.destroy	=	red_destroy,
372 	.change		=	red_change,
373 	.dump		=	red_dump,
374 	.dump_stats	=	red_dump_stats,
375 	.owner		=	THIS_MODULE,
376 };
377 
378 static int __init red_module_init(void)
379 {
380 	return register_qdisc(&red_qdisc_ops);
381 }
382 
383 static void __exit red_module_exit(void)
384 {
385 	unregister_qdisc(&red_qdisc_ops);
386 }
387 
388 module_init(red_module_init)
389 module_exit(red_module_exit)
390 
391 MODULE_LICENSE("GPL");
392