xref: /openbmc/linux/net/sched/sch_red.c (revision 0edff03d)
1 /*
2  * net/sched/sch_red.c	Random Early Detection queue.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914:	computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16 
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
24 #include <net/red.h>
25 
26 
27 /*	Parameters, settable by user:
28 	-----------------------------
29 
30 	limit		- bytes (must be > qth_max + burst)
31 
32 	Hard limit on queue length, should be chosen >qth_max
33 	to allow packet bursts. This parameter does not
34 	affect the algorithms behaviour and can be chosen
35 	arbitrarily high (well, less than ram size)
36 	Really, this limit will never be reached
37 	if RED works correctly.
38  */
39 
40 struct red_sched_data {
41 	u32			limit;		/* HARD maximal queue length */
42 	unsigned char		flags;
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 };
50 
51 static inline int red_use_ecn(struct red_sched_data *q)
52 {
53 	return q->flags & TC_RED_ECN;
54 }
55 
56 static inline int red_use_harddrop(struct red_sched_data *q)
57 {
58 	return q->flags & TC_RED_HARDDROP;
59 }
60 
61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62 		       struct sk_buff **to_free)
63 {
64 	struct red_sched_data *q = qdisc_priv(sch);
65 	struct Qdisc *child = q->qdisc;
66 	int ret;
67 
68 	q->vars.qavg = red_calc_qavg(&q->parms,
69 				     &q->vars,
70 				     child->qstats.backlog);
71 
72 	if (red_is_idling(&q->vars))
73 		red_end_of_idle_period(&q->vars);
74 
75 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
76 	case RED_DONT_MARK:
77 		break;
78 
79 	case RED_PROB_MARK:
80 		qdisc_qstats_overlimit(sch);
81 		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82 			q->stats.prob_drop++;
83 			goto congestion_drop;
84 		}
85 
86 		q->stats.prob_mark++;
87 		break;
88 
89 	case RED_HARD_MARK:
90 		qdisc_qstats_overlimit(sch);
91 		if (red_use_harddrop(q) || !red_use_ecn(q) ||
92 		    !INET_ECN_set_ce(skb)) {
93 			q->stats.forced_drop++;
94 			goto congestion_drop;
95 		}
96 
97 		q->stats.forced_mark++;
98 		break;
99 	}
100 
101 	ret = qdisc_enqueue(skb, child, to_free);
102 	if (likely(ret == NET_XMIT_SUCCESS)) {
103 		qdisc_qstats_backlog_inc(sch, skb);
104 		sch->q.qlen++;
105 	} else if (net_xmit_drop_count(ret)) {
106 		q->stats.pdrop++;
107 		qdisc_qstats_drop(sch);
108 	}
109 	return ret;
110 
111 congestion_drop:
112 	qdisc_drop(skb, sch, to_free);
113 	return NET_XMIT_CN;
114 }
115 
116 static struct sk_buff *red_dequeue(struct Qdisc *sch)
117 {
118 	struct sk_buff *skb;
119 	struct red_sched_data *q = qdisc_priv(sch);
120 	struct Qdisc *child = q->qdisc;
121 
122 	skb = child->dequeue(child);
123 	if (skb) {
124 		qdisc_bstats_update(sch, skb);
125 		qdisc_qstats_backlog_dec(sch, skb);
126 		sch->q.qlen--;
127 	} else {
128 		if (!red_is_idling(&q->vars))
129 			red_start_of_idle_period(&q->vars);
130 	}
131 	return skb;
132 }
133 
134 static struct sk_buff *red_peek(struct Qdisc *sch)
135 {
136 	struct red_sched_data *q = qdisc_priv(sch);
137 	struct Qdisc *child = q->qdisc;
138 
139 	return child->ops->peek(child);
140 }
141 
142 static void red_reset(struct Qdisc *sch)
143 {
144 	struct red_sched_data *q = qdisc_priv(sch);
145 
146 	qdisc_reset(q->qdisc);
147 	sch->qstats.backlog = 0;
148 	sch->q.qlen = 0;
149 	red_restart(&q->vars);
150 }
151 
152 static int red_offload(struct Qdisc *sch, bool enable)
153 {
154 	struct red_sched_data *q = qdisc_priv(sch);
155 	struct net_device *dev = qdisc_dev(sch);
156 	struct tc_red_qopt_offload opt = {
157 		.handle = sch->handle,
158 		.parent = sch->parent,
159 	};
160 	int err;
161 
162 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
163 		return -EOPNOTSUPP;
164 
165 	if (enable) {
166 		opt.command = TC_RED_REPLACE;
167 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
168 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
169 		opt.set.probability = q->parms.max_P;
170 		opt.set.is_ecn = red_use_ecn(q);
171 	} else {
172 		opt.command = TC_RED_DESTROY;
173 	}
174 
175 	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
176 
177 	if (!err && enable)
178 		sch->flags |= TCQ_F_OFFLOADED;
179 	else
180 		sch->flags &= ~TCQ_F_OFFLOADED;
181 
182 	return err;
183 }
184 
185 static void red_destroy(struct Qdisc *sch)
186 {
187 	struct red_sched_data *q = qdisc_priv(sch);
188 
189 	del_timer_sync(&q->adapt_timer);
190 	red_offload(sch, false);
191 	qdisc_destroy(q->qdisc);
192 }
193 
194 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
195 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
196 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
197 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
198 };
199 
200 static int red_change(struct Qdisc *sch, struct nlattr *opt)
201 {
202 	struct red_sched_data *q = qdisc_priv(sch);
203 	struct nlattr *tb[TCA_RED_MAX + 1];
204 	struct tc_red_qopt *ctl;
205 	struct Qdisc *child = NULL;
206 	int err;
207 	u32 max_P;
208 
209 	if (opt == NULL)
210 		return -EINVAL;
211 
212 	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
213 	if (err < 0)
214 		return err;
215 
216 	if (tb[TCA_RED_PARMS] == NULL ||
217 	    tb[TCA_RED_STAB] == NULL)
218 		return -EINVAL;
219 
220 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
221 
222 	ctl = nla_data(tb[TCA_RED_PARMS]);
223 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
224 		return -EINVAL;
225 
226 	if (ctl->limit > 0) {
227 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
228 		if (IS_ERR(child))
229 			return PTR_ERR(child);
230 	}
231 
232 	if (child != &noop_qdisc)
233 		qdisc_hash_add(child, true);
234 	sch_tree_lock(sch);
235 	q->flags = ctl->flags;
236 	q->limit = ctl->limit;
237 	if (child) {
238 		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
239 					  q->qdisc->qstats.backlog);
240 		qdisc_destroy(q->qdisc);
241 		q->qdisc = child;
242 	}
243 
244 	red_set_parms(&q->parms,
245 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
246 		      ctl->Plog, ctl->Scell_log,
247 		      nla_data(tb[TCA_RED_STAB]),
248 		      max_P);
249 	red_set_vars(&q->vars);
250 
251 	del_timer(&q->adapt_timer);
252 	if (ctl->flags & TC_RED_ADAPTATIVE)
253 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
254 
255 	if (!q->qdisc->q.qlen)
256 		red_start_of_idle_period(&q->vars);
257 
258 	sch_tree_unlock(sch);
259 	red_offload(sch, true);
260 	return 0;
261 }
262 
263 static inline void red_adaptative_timer(struct timer_list *t)
264 {
265 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
266 	struct Qdisc *sch = q->sch;
267 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
268 
269 	spin_lock(root_lock);
270 	red_adaptative_algo(&q->parms, &q->vars);
271 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
272 	spin_unlock(root_lock);
273 }
274 
275 static int red_init(struct Qdisc *sch, struct nlattr *opt)
276 {
277 	struct red_sched_data *q = qdisc_priv(sch);
278 
279 	q->qdisc = &noop_qdisc;
280 	q->sch = sch;
281 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
282 	return red_change(sch, opt);
283 }
284 
285 static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
286 {
287 	struct net_device *dev = qdisc_dev(sch);
288 	struct tc_red_qopt_offload hw_stats = {
289 		.command = TC_RED_STATS,
290 		.handle = sch->handle,
291 		.parent = sch->parent,
292 		{
293 			.stats.bstats = &sch->bstats,
294 			.stats.qstats = &sch->qstats,
295 		},
296 	};
297 
298 	if (!(sch->flags & TCQ_F_OFFLOADED))
299 		return 0;
300 
301 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
302 					     &hw_stats);
303 }
304 
305 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
306 {
307 	struct red_sched_data *q = qdisc_priv(sch);
308 	struct nlattr *opts = NULL;
309 	struct tc_red_qopt opt = {
310 		.limit		= q->limit,
311 		.flags		= q->flags,
312 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
313 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
314 		.Wlog		= q->parms.Wlog,
315 		.Plog		= q->parms.Plog,
316 		.Scell_log	= q->parms.Scell_log,
317 	};
318 	int err;
319 
320 	sch->qstats.backlog = q->qdisc->qstats.backlog;
321 	err = red_dump_offload_stats(sch, &opt);
322 	if (err)
323 		goto nla_put_failure;
324 
325 	opts = nla_nest_start(skb, TCA_OPTIONS);
326 	if (opts == NULL)
327 		goto nla_put_failure;
328 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
329 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
330 		goto nla_put_failure;
331 	return nla_nest_end(skb, opts);
332 
333 nla_put_failure:
334 	nla_nest_cancel(skb, opts);
335 	return -EMSGSIZE;
336 }
337 
338 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
339 {
340 	struct red_sched_data *q = qdisc_priv(sch);
341 	struct net_device *dev = qdisc_dev(sch);
342 	struct tc_red_xstats st = {
343 		.early	= q->stats.prob_drop + q->stats.forced_drop,
344 		.pdrop	= q->stats.pdrop,
345 		.other	= q->stats.other,
346 		.marked	= q->stats.prob_mark + q->stats.forced_mark,
347 	};
348 
349 	if (sch->flags & TCQ_F_OFFLOADED) {
350 		struct red_stats hw_stats = {0};
351 		struct tc_red_qopt_offload hw_stats_request = {
352 			.command = TC_RED_XSTATS,
353 			.handle = sch->handle,
354 			.parent = sch->parent,
355 			{
356 				.xstats = &hw_stats,
357 			},
358 		};
359 		if (!dev->netdev_ops->ndo_setup_tc(dev,
360 						   TC_SETUP_QDISC_RED,
361 						   &hw_stats_request)) {
362 			st.early += hw_stats.prob_drop + hw_stats.forced_drop;
363 			st.pdrop += hw_stats.pdrop;
364 			st.other += hw_stats.other;
365 			st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
366 		}
367 	}
368 
369 	return gnet_stats_copy_app(d, &st, sizeof(st));
370 }
371 
372 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
373 			  struct sk_buff *skb, struct tcmsg *tcm)
374 {
375 	struct red_sched_data *q = qdisc_priv(sch);
376 
377 	tcm->tcm_handle |= TC_H_MIN(1);
378 	tcm->tcm_info = q->qdisc->handle;
379 	return 0;
380 }
381 
382 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
383 		     struct Qdisc **old)
384 {
385 	struct red_sched_data *q = qdisc_priv(sch);
386 
387 	if (new == NULL)
388 		new = &noop_qdisc;
389 
390 	*old = qdisc_replace(sch, new, &q->qdisc);
391 	return 0;
392 }
393 
394 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
395 {
396 	struct red_sched_data *q = qdisc_priv(sch);
397 	return q->qdisc;
398 }
399 
400 static unsigned long red_find(struct Qdisc *sch, u32 classid)
401 {
402 	return 1;
403 }
404 
405 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
406 {
407 	if (!walker->stop) {
408 		if (walker->count >= walker->skip)
409 			if (walker->fn(sch, 1, walker) < 0) {
410 				walker->stop = 1;
411 				return;
412 			}
413 		walker->count++;
414 	}
415 }
416 
417 static const struct Qdisc_class_ops red_class_ops = {
418 	.graft		=	red_graft,
419 	.leaf		=	red_leaf,
420 	.find		=	red_find,
421 	.walk		=	red_walk,
422 	.dump		=	red_dump_class,
423 };
424 
425 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
426 	.id		=	"red",
427 	.priv_size	=	sizeof(struct red_sched_data),
428 	.cl_ops		=	&red_class_ops,
429 	.enqueue	=	red_enqueue,
430 	.dequeue	=	red_dequeue,
431 	.peek		=	red_peek,
432 	.init		=	red_init,
433 	.reset		=	red_reset,
434 	.destroy	=	red_destroy,
435 	.change		=	red_change,
436 	.dump		=	red_dump,
437 	.dump_stats	=	red_dump_stats,
438 	.owner		=	THIS_MODULE,
439 };
440 
441 static int __init red_module_init(void)
442 {
443 	return register_qdisc(&red_qdisc_ops);
444 }
445 
446 static void __exit red_module_exit(void)
447 {
448 	unregister_qdisc(&red_qdisc_ops);
449 }
450 
451 module_init(red_module_init)
452 module_exit(red_module_exit)
453 
454 MODULE_LICENSE("GPL");
455