xref: /openbmc/linux/net/sched/sch_red.c (revision 6e5a716f424b1020fc8cf52b0ab4a1551e952733)
1 /*
2  * net/sched/sch_red.c	Random Early Detection queue.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914:	computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16 
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
24 #include <net/red.h>
25 
26 
27 /*	Parameters, settable by user:
28 	-----------------------------
29 
30 	limit		- bytes (must be > qth_max + burst)
31 
32 	Hard limit on queue length, should be chosen >qth_max
33 	to allow packet bursts. This parameter does not
34 	affect the algorithms behaviour and can be chosen
35 	arbitrarily high (well, less than ram size)
36 	Really, this limit will never be reached
37 	if RED works correctly.
38  */
39 
40 struct red_sched_data {
41 	u32			limit;		/* HARD maximal queue length */
42 	unsigned char		flags;
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 };
50 
51 static inline int red_use_ecn(struct red_sched_data *q)
52 {
53 	return q->flags & TC_RED_ECN;
54 }
55 
56 static inline int red_use_harddrop(struct red_sched_data *q)
57 {
58 	return q->flags & TC_RED_HARDDROP;
59 }
60 
61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62 		       struct sk_buff **to_free)
63 {
64 	struct red_sched_data *q = qdisc_priv(sch);
65 	struct Qdisc *child = q->qdisc;
66 	int ret;
67 
68 	q->vars.qavg = red_calc_qavg(&q->parms,
69 				     &q->vars,
70 				     child->qstats.backlog);
71 
72 	if (red_is_idling(&q->vars))
73 		red_end_of_idle_period(&q->vars);
74 
75 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
76 	case RED_DONT_MARK:
77 		break;
78 
79 	case RED_PROB_MARK:
80 		qdisc_qstats_overlimit(sch);
81 		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82 			q->stats.prob_drop++;
83 			goto congestion_drop;
84 		}
85 
86 		q->stats.prob_mark++;
87 		break;
88 
89 	case RED_HARD_MARK:
90 		qdisc_qstats_overlimit(sch);
91 		if (red_use_harddrop(q) || !red_use_ecn(q) ||
92 		    !INET_ECN_set_ce(skb)) {
93 			q->stats.forced_drop++;
94 			goto congestion_drop;
95 		}
96 
97 		q->stats.forced_mark++;
98 		break;
99 	}
100 
101 	ret = qdisc_enqueue(skb, child, to_free);
102 	if (likely(ret == NET_XMIT_SUCCESS)) {
103 		qdisc_qstats_backlog_inc(sch, skb);
104 		sch->q.qlen++;
105 	} else if (net_xmit_drop_count(ret)) {
106 		q->stats.pdrop++;
107 		qdisc_qstats_drop(sch);
108 	}
109 	return ret;
110 
111 congestion_drop:
112 	qdisc_drop(skb, sch, to_free);
113 	return NET_XMIT_CN;
114 }
115 
116 static struct sk_buff *red_dequeue(struct Qdisc *sch)
117 {
118 	struct sk_buff *skb;
119 	struct red_sched_data *q = qdisc_priv(sch);
120 	struct Qdisc *child = q->qdisc;
121 
122 	skb = child->dequeue(child);
123 	if (skb) {
124 		qdisc_bstats_update(sch, skb);
125 		qdisc_qstats_backlog_dec(sch, skb);
126 		sch->q.qlen--;
127 	} else {
128 		if (!red_is_idling(&q->vars))
129 			red_start_of_idle_period(&q->vars);
130 	}
131 	return skb;
132 }
133 
134 static struct sk_buff *red_peek(struct Qdisc *sch)
135 {
136 	struct red_sched_data *q = qdisc_priv(sch);
137 	struct Qdisc *child = q->qdisc;
138 
139 	return child->ops->peek(child);
140 }
141 
142 static void red_reset(struct Qdisc *sch)
143 {
144 	struct red_sched_data *q = qdisc_priv(sch);
145 
146 	qdisc_reset(q->qdisc);
147 	sch->qstats.backlog = 0;
148 	sch->q.qlen = 0;
149 	red_restart(&q->vars);
150 }
151 
152 static int red_offload(struct Qdisc *sch, bool enable)
153 {
154 	struct red_sched_data *q = qdisc_priv(sch);
155 	struct net_device *dev = qdisc_dev(sch);
156 	struct tc_red_qopt_offload opt = {
157 		.handle = sch->handle,
158 		.parent = sch->parent,
159 	};
160 
161 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162 		return -EOPNOTSUPP;
163 
164 	if (enable) {
165 		opt.command = TC_RED_REPLACE;
166 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
167 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
168 		opt.set.probability = q->parms.max_P;
169 		opt.set.is_ecn = red_use_ecn(q);
170 		opt.set.is_harddrop = red_use_harddrop(q);
171 		opt.set.qstats = &sch->qstats;
172 	} else {
173 		opt.command = TC_RED_DESTROY;
174 	}
175 
176 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
177 }
178 
179 static void red_destroy(struct Qdisc *sch)
180 {
181 	struct red_sched_data *q = qdisc_priv(sch);
182 
183 	del_timer_sync(&q->adapt_timer);
184 	red_offload(sch, false);
185 	qdisc_put(q->qdisc);
186 }
187 
188 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
189 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
190 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
191 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
192 };
193 
194 static int red_change(struct Qdisc *sch, struct nlattr *opt,
195 		      struct netlink_ext_ack *extack)
196 {
197 	struct Qdisc *old_child = NULL, *child = NULL;
198 	struct red_sched_data *q = qdisc_priv(sch);
199 	struct nlattr *tb[TCA_RED_MAX + 1];
200 	struct tc_red_qopt *ctl;
201 	int err;
202 	u32 max_P;
203 
204 	if (opt == NULL)
205 		return -EINVAL;
206 
207 	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
208 	if (err < 0)
209 		return err;
210 
211 	if (tb[TCA_RED_PARMS] == NULL ||
212 	    tb[TCA_RED_STAB] == NULL)
213 		return -EINVAL;
214 
215 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
216 
217 	ctl = nla_data(tb[TCA_RED_PARMS]);
218 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
219 		return -EINVAL;
220 
221 	if (ctl->limit > 0) {
222 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
223 					 extack);
224 		if (IS_ERR(child))
225 			return PTR_ERR(child);
226 
227 		/* child is fifo, no need to check for noop_qdisc */
228 		qdisc_hash_add(child, true);
229 	}
230 
231 	sch_tree_lock(sch);
232 	q->flags = ctl->flags;
233 	q->limit = ctl->limit;
234 	if (child) {
235 		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
236 					  q->qdisc->qstats.backlog);
237 		old_child = q->qdisc;
238 		q->qdisc = child;
239 	}
240 
241 	red_set_parms(&q->parms,
242 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
243 		      ctl->Plog, ctl->Scell_log,
244 		      nla_data(tb[TCA_RED_STAB]),
245 		      max_P);
246 	red_set_vars(&q->vars);
247 
248 	del_timer(&q->adapt_timer);
249 	if (ctl->flags & TC_RED_ADAPTATIVE)
250 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
251 
252 	if (!q->qdisc->q.qlen)
253 		red_start_of_idle_period(&q->vars);
254 
255 	sch_tree_unlock(sch);
256 
257 	red_offload(sch, true);
258 
259 	if (old_child)
260 		qdisc_put(old_child);
261 	return 0;
262 }
263 
264 static inline void red_adaptative_timer(struct timer_list *t)
265 {
266 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
267 	struct Qdisc *sch = q->sch;
268 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
269 
270 	spin_lock(root_lock);
271 	red_adaptative_algo(&q->parms, &q->vars);
272 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
273 	spin_unlock(root_lock);
274 }
275 
276 static int red_init(struct Qdisc *sch, struct nlattr *opt,
277 		    struct netlink_ext_ack *extack)
278 {
279 	struct red_sched_data *q = qdisc_priv(sch);
280 
281 	q->qdisc = &noop_qdisc;
282 	q->sch = sch;
283 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
284 	return red_change(sch, opt, extack);
285 }
286 
287 static int red_dump_offload_stats(struct Qdisc *sch)
288 {
289 	struct tc_red_qopt_offload hw_stats = {
290 		.command = TC_RED_STATS,
291 		.handle = sch->handle,
292 		.parent = sch->parent,
293 		{
294 			.stats.bstats = &sch->bstats,
295 			.stats.qstats = &sch->qstats,
296 		},
297 	};
298 
299 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
300 }
301 
302 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
303 {
304 	struct red_sched_data *q = qdisc_priv(sch);
305 	struct nlattr *opts = NULL;
306 	struct tc_red_qopt opt = {
307 		.limit		= q->limit,
308 		.flags		= q->flags,
309 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
310 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
311 		.Wlog		= q->parms.Wlog,
312 		.Plog		= q->parms.Plog,
313 		.Scell_log	= q->parms.Scell_log,
314 	};
315 	int err;
316 
317 	err = red_dump_offload_stats(sch);
318 	if (err)
319 		goto nla_put_failure;
320 
321 	opts = nla_nest_start(skb, TCA_OPTIONS);
322 	if (opts == NULL)
323 		goto nla_put_failure;
324 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
325 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
326 		goto nla_put_failure;
327 	return nla_nest_end(skb, opts);
328 
329 nla_put_failure:
330 	nla_nest_cancel(skb, opts);
331 	return -EMSGSIZE;
332 }
333 
334 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
335 {
336 	struct red_sched_data *q = qdisc_priv(sch);
337 	struct net_device *dev = qdisc_dev(sch);
338 	struct tc_red_xstats st = {0};
339 
340 	if (sch->flags & TCQ_F_OFFLOADED) {
341 		struct tc_red_qopt_offload hw_stats_request = {
342 			.command = TC_RED_XSTATS,
343 			.handle = sch->handle,
344 			.parent = sch->parent,
345 			{
346 				.xstats = &q->stats,
347 			},
348 		};
349 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
350 					      &hw_stats_request);
351 	}
352 	st.early = q->stats.prob_drop + q->stats.forced_drop;
353 	st.pdrop = q->stats.pdrop;
354 	st.other = q->stats.other;
355 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
356 
357 	return gnet_stats_copy_app(d, &st, sizeof(st));
358 }
359 
360 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
361 			  struct sk_buff *skb, struct tcmsg *tcm)
362 {
363 	struct red_sched_data *q = qdisc_priv(sch);
364 
365 	tcm->tcm_handle |= TC_H_MIN(1);
366 	tcm->tcm_info = q->qdisc->handle;
367 	return 0;
368 }
369 
370 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
371 		     struct Qdisc **old, struct netlink_ext_ack *extack)
372 {
373 	struct red_sched_data *q = qdisc_priv(sch);
374 
375 	if (new == NULL)
376 		new = &noop_qdisc;
377 
378 	*old = qdisc_replace(sch, new, &q->qdisc);
379 	return 0;
380 }
381 
382 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
383 {
384 	struct red_sched_data *q = qdisc_priv(sch);
385 	return q->qdisc;
386 }
387 
388 static unsigned long red_find(struct Qdisc *sch, u32 classid)
389 {
390 	return 1;
391 }
392 
393 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
394 {
395 	if (!walker->stop) {
396 		if (walker->count >= walker->skip)
397 			if (walker->fn(sch, 1, walker) < 0) {
398 				walker->stop = 1;
399 				return;
400 			}
401 		walker->count++;
402 	}
403 }
404 
405 static const struct Qdisc_class_ops red_class_ops = {
406 	.graft		=	red_graft,
407 	.leaf		=	red_leaf,
408 	.find		=	red_find,
409 	.walk		=	red_walk,
410 	.dump		=	red_dump_class,
411 };
412 
413 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
414 	.id		=	"red",
415 	.priv_size	=	sizeof(struct red_sched_data),
416 	.cl_ops		=	&red_class_ops,
417 	.enqueue	=	red_enqueue,
418 	.dequeue	=	red_dequeue,
419 	.peek		=	red_peek,
420 	.init		=	red_init,
421 	.reset		=	red_reset,
422 	.destroy	=	red_destroy,
423 	.change		=	red_change,
424 	.dump		=	red_dump,
425 	.dump_stats	=	red_dump_stats,
426 	.owner		=	THIS_MODULE,
427 };
428 
429 static int __init red_module_init(void)
430 {
431 	return register_qdisc(&red_qdisc_ops);
432 }
433 
434 static void __exit red_module_exit(void)
435 {
436 	unregister_qdisc(&red_qdisc_ops);
437 }
438 
439 module_init(red_module_init)
440 module_exit(red_module_exit)
441 
442 MODULE_LICENSE("GPL");
443