xref: /openbmc/linux/net/sched/sch_red.c (revision 9dae47aba0a055f761176d9297371d5bb24289ec)
1 /*
2  * net/sched/sch_red.c	Random Early Detection queue.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914:	computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16 
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
24 #include <net/red.h>
25 
26 
27 /*	Parameters, settable by user:
28 	-----------------------------
29 
30 	limit		- bytes (must be > qth_max + burst)
31 
32 	Hard limit on queue length, should be chosen >qth_max
33 	to allow packet bursts. This parameter does not
34 	affect the algorithms behaviour and can be chosen
35 	arbitrarily high (well, less than ram size)
36 	Really, this limit will never be reached
37 	if RED works correctly.
38  */
39 
40 struct red_sched_data {
41 	u32			limit;		/* HARD maximal queue length */
42 	unsigned char		flags;
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 };
50 
51 static inline int red_use_ecn(struct red_sched_data *q)
52 {
53 	return q->flags & TC_RED_ECN;
54 }
55 
56 static inline int red_use_harddrop(struct red_sched_data *q)
57 {
58 	return q->flags & TC_RED_HARDDROP;
59 }
60 
61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62 		       struct sk_buff **to_free)
63 {
64 	struct red_sched_data *q = qdisc_priv(sch);
65 	struct Qdisc *child = q->qdisc;
66 	int ret;
67 
68 	q->vars.qavg = red_calc_qavg(&q->parms,
69 				     &q->vars,
70 				     child->qstats.backlog);
71 
72 	if (red_is_idling(&q->vars))
73 		red_end_of_idle_period(&q->vars);
74 
75 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
76 	case RED_DONT_MARK:
77 		break;
78 
79 	case RED_PROB_MARK:
80 		qdisc_qstats_overlimit(sch);
81 		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82 			q->stats.prob_drop++;
83 			goto congestion_drop;
84 		}
85 
86 		q->stats.prob_mark++;
87 		break;
88 
89 	case RED_HARD_MARK:
90 		qdisc_qstats_overlimit(sch);
91 		if (red_use_harddrop(q) || !red_use_ecn(q) ||
92 		    !INET_ECN_set_ce(skb)) {
93 			q->stats.forced_drop++;
94 			goto congestion_drop;
95 		}
96 
97 		q->stats.forced_mark++;
98 		break;
99 	}
100 
101 	ret = qdisc_enqueue(skb, child, to_free);
102 	if (likely(ret == NET_XMIT_SUCCESS)) {
103 		qdisc_qstats_backlog_inc(sch, skb);
104 		sch->q.qlen++;
105 	} else if (net_xmit_drop_count(ret)) {
106 		q->stats.pdrop++;
107 		qdisc_qstats_drop(sch);
108 	}
109 	return ret;
110 
111 congestion_drop:
112 	qdisc_drop(skb, sch, to_free);
113 	return NET_XMIT_CN;
114 }
115 
116 static struct sk_buff *red_dequeue(struct Qdisc *sch)
117 {
118 	struct sk_buff *skb;
119 	struct red_sched_data *q = qdisc_priv(sch);
120 	struct Qdisc *child = q->qdisc;
121 
122 	skb = child->dequeue(child);
123 	if (skb) {
124 		qdisc_bstats_update(sch, skb);
125 		qdisc_qstats_backlog_dec(sch, skb);
126 		sch->q.qlen--;
127 	} else {
128 		if (!red_is_idling(&q->vars))
129 			red_start_of_idle_period(&q->vars);
130 	}
131 	return skb;
132 }
133 
134 static struct sk_buff *red_peek(struct Qdisc *sch)
135 {
136 	struct red_sched_data *q = qdisc_priv(sch);
137 	struct Qdisc *child = q->qdisc;
138 
139 	return child->ops->peek(child);
140 }
141 
142 static void red_reset(struct Qdisc *sch)
143 {
144 	struct red_sched_data *q = qdisc_priv(sch);
145 
146 	qdisc_reset(q->qdisc);
147 	sch->qstats.backlog = 0;
148 	sch->q.qlen = 0;
149 	red_restart(&q->vars);
150 }
151 
152 static int red_offload(struct Qdisc *sch, bool enable)
153 {
154 	struct red_sched_data *q = qdisc_priv(sch);
155 	struct net_device *dev = qdisc_dev(sch);
156 	struct tc_red_qopt_offload opt = {
157 		.handle = sch->handle,
158 		.parent = sch->parent,
159 	};
160 
161 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162 		return -EOPNOTSUPP;
163 
164 	if (enable) {
165 		opt.command = TC_RED_REPLACE;
166 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
167 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
168 		opt.set.probability = q->parms.max_P;
169 		opt.set.is_ecn = red_use_ecn(q);
170 	} else {
171 		opt.command = TC_RED_DESTROY;
172 	}
173 
174 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
175 }
176 
177 static void red_destroy(struct Qdisc *sch)
178 {
179 	struct red_sched_data *q = qdisc_priv(sch);
180 
181 	del_timer_sync(&q->adapt_timer);
182 	red_offload(sch, false);
183 	qdisc_destroy(q->qdisc);
184 }
185 
186 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
187 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
188 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
189 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
190 };
191 
192 static int red_change(struct Qdisc *sch, struct nlattr *opt,
193 		      struct netlink_ext_ack *extack)
194 {
195 	struct red_sched_data *q = qdisc_priv(sch);
196 	struct nlattr *tb[TCA_RED_MAX + 1];
197 	struct tc_red_qopt *ctl;
198 	struct Qdisc *child = NULL;
199 	int err;
200 	u32 max_P;
201 
202 	if (opt == NULL)
203 		return -EINVAL;
204 
205 	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
206 	if (err < 0)
207 		return err;
208 
209 	if (tb[TCA_RED_PARMS] == NULL ||
210 	    tb[TCA_RED_STAB] == NULL)
211 		return -EINVAL;
212 
213 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
214 
215 	ctl = nla_data(tb[TCA_RED_PARMS]);
216 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
217 		return -EINVAL;
218 
219 	if (ctl->limit > 0) {
220 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
221 					 extack);
222 		if (IS_ERR(child))
223 			return PTR_ERR(child);
224 	}
225 
226 	if (child != &noop_qdisc)
227 		qdisc_hash_add(child, true);
228 	sch_tree_lock(sch);
229 	q->flags = ctl->flags;
230 	q->limit = ctl->limit;
231 	if (child) {
232 		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
233 					  q->qdisc->qstats.backlog);
234 		qdisc_destroy(q->qdisc);
235 		q->qdisc = child;
236 	}
237 
238 	red_set_parms(&q->parms,
239 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
240 		      ctl->Plog, ctl->Scell_log,
241 		      nla_data(tb[TCA_RED_STAB]),
242 		      max_P);
243 	red_set_vars(&q->vars);
244 
245 	del_timer(&q->adapt_timer);
246 	if (ctl->flags & TC_RED_ADAPTATIVE)
247 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
248 
249 	if (!q->qdisc->q.qlen)
250 		red_start_of_idle_period(&q->vars);
251 
252 	sch_tree_unlock(sch);
253 	red_offload(sch, true);
254 	return 0;
255 }
256 
257 static inline void red_adaptative_timer(struct timer_list *t)
258 {
259 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
260 	struct Qdisc *sch = q->sch;
261 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
262 
263 	spin_lock(root_lock);
264 	red_adaptative_algo(&q->parms, &q->vars);
265 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
266 	spin_unlock(root_lock);
267 }
268 
269 static int red_init(struct Qdisc *sch, struct nlattr *opt,
270 		    struct netlink_ext_ack *extack)
271 {
272 	struct red_sched_data *q = qdisc_priv(sch);
273 
274 	q->qdisc = &noop_qdisc;
275 	q->sch = sch;
276 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
277 	return red_change(sch, opt, extack);
278 }
279 
280 static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
281 {
282 	struct net_device *dev = qdisc_dev(sch);
283 	struct tc_red_qopt_offload hw_stats = {
284 		.command = TC_RED_STATS,
285 		.handle = sch->handle,
286 		.parent = sch->parent,
287 		{
288 			.stats.bstats = &sch->bstats,
289 			.stats.qstats = &sch->qstats,
290 		},
291 	};
292 	int err;
293 
294 	sch->flags &= ~TCQ_F_OFFLOADED;
295 
296 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
297 		return 0;
298 
299 	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
300 					    &hw_stats);
301 	if (err == -EOPNOTSUPP)
302 		return 0;
303 
304 	if (!err)
305 		sch->flags |= TCQ_F_OFFLOADED;
306 
307 	return err;
308 }
309 
310 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
311 {
312 	struct red_sched_data *q = qdisc_priv(sch);
313 	struct nlattr *opts = NULL;
314 	struct tc_red_qopt opt = {
315 		.limit		= q->limit,
316 		.flags		= q->flags,
317 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
318 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
319 		.Wlog		= q->parms.Wlog,
320 		.Plog		= q->parms.Plog,
321 		.Scell_log	= q->parms.Scell_log,
322 	};
323 	int err;
324 
325 	sch->qstats.backlog = q->qdisc->qstats.backlog;
326 	err = red_dump_offload_stats(sch, &opt);
327 	if (err)
328 		goto nla_put_failure;
329 
330 	opts = nla_nest_start(skb, TCA_OPTIONS);
331 	if (opts == NULL)
332 		goto nla_put_failure;
333 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
334 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
335 		goto nla_put_failure;
336 	return nla_nest_end(skb, opts);
337 
338 nla_put_failure:
339 	nla_nest_cancel(skb, opts);
340 	return -EMSGSIZE;
341 }
342 
343 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
344 {
345 	struct red_sched_data *q = qdisc_priv(sch);
346 	struct net_device *dev = qdisc_dev(sch);
347 	struct tc_red_xstats st = {
348 		.early	= q->stats.prob_drop + q->stats.forced_drop,
349 		.pdrop	= q->stats.pdrop,
350 		.other	= q->stats.other,
351 		.marked	= q->stats.prob_mark + q->stats.forced_mark,
352 	};
353 
354 	if (sch->flags & TCQ_F_OFFLOADED) {
355 		struct red_stats hw_stats = {0};
356 		struct tc_red_qopt_offload hw_stats_request = {
357 			.command = TC_RED_XSTATS,
358 			.handle = sch->handle,
359 			.parent = sch->parent,
360 			{
361 				.xstats = &hw_stats,
362 			},
363 		};
364 		if (!dev->netdev_ops->ndo_setup_tc(dev,
365 						   TC_SETUP_QDISC_RED,
366 						   &hw_stats_request)) {
367 			st.early += hw_stats.prob_drop + hw_stats.forced_drop;
368 			st.pdrop += hw_stats.pdrop;
369 			st.other += hw_stats.other;
370 			st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
371 		}
372 	}
373 
374 	return gnet_stats_copy_app(d, &st, sizeof(st));
375 }
376 
377 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
378 			  struct sk_buff *skb, struct tcmsg *tcm)
379 {
380 	struct red_sched_data *q = qdisc_priv(sch);
381 
382 	tcm->tcm_handle |= TC_H_MIN(1);
383 	tcm->tcm_info = q->qdisc->handle;
384 	return 0;
385 }
386 
387 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
388 		     struct Qdisc **old, struct netlink_ext_ack *extack)
389 {
390 	struct red_sched_data *q = qdisc_priv(sch);
391 
392 	if (new == NULL)
393 		new = &noop_qdisc;
394 
395 	*old = qdisc_replace(sch, new, &q->qdisc);
396 	return 0;
397 }
398 
399 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
400 {
401 	struct red_sched_data *q = qdisc_priv(sch);
402 	return q->qdisc;
403 }
404 
405 static unsigned long red_find(struct Qdisc *sch, u32 classid)
406 {
407 	return 1;
408 }
409 
410 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
411 {
412 	if (!walker->stop) {
413 		if (walker->count >= walker->skip)
414 			if (walker->fn(sch, 1, walker) < 0) {
415 				walker->stop = 1;
416 				return;
417 			}
418 		walker->count++;
419 	}
420 }
421 
422 static const struct Qdisc_class_ops red_class_ops = {
423 	.graft		=	red_graft,
424 	.leaf		=	red_leaf,
425 	.find		=	red_find,
426 	.walk		=	red_walk,
427 	.dump		=	red_dump_class,
428 };
429 
430 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
431 	.id		=	"red",
432 	.priv_size	=	sizeof(struct red_sched_data),
433 	.cl_ops		=	&red_class_ops,
434 	.enqueue	=	red_enqueue,
435 	.dequeue	=	red_dequeue,
436 	.peek		=	red_peek,
437 	.init		=	red_init,
438 	.reset		=	red_reset,
439 	.destroy	=	red_destroy,
440 	.change		=	red_change,
441 	.dump		=	red_dump,
442 	.dump_stats	=	red_dump_stats,
443 	.owner		=	THIS_MODULE,
444 };
445 
446 static int __init red_module_init(void)
447 {
448 	return register_qdisc(&red_qdisc_ops);
449 }
450 
451 static void __exit red_module_exit(void)
452 {
453 	unregister_qdisc(&red_qdisc_ops);
454 }
455 
456 module_init(red_module_init)
457 module_exit(red_module_exit)
458 
459 MODULE_LICENSE("GPL");
460