xref: /openbmc/linux/net/sched/sch_red.c (revision d2574c33)
1 /*
2  * net/sched/sch_red.c	Random Early Detection queue.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914:	computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16 
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
24 #include <net/red.h>
25 
26 
27 /*	Parameters, settable by user:
28 	-----------------------------
29 
30 	limit		- bytes (must be > qth_max + burst)
31 
32 	Hard limit on queue length, should be chosen >qth_max
33 	to allow packet bursts. This parameter does not
34 	affect the algorithms behaviour and can be chosen
35 	arbitrarily high (well, less than ram size)
36 	Really, this limit will never be reached
37 	if RED works correctly.
38  */
39 
40 struct red_sched_data {
41 	u32			limit;		/* HARD maximal queue length */
42 	unsigned char		flags;
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 };
50 
51 static inline int red_use_ecn(struct red_sched_data *q)
52 {
53 	return q->flags & TC_RED_ECN;
54 }
55 
56 static inline int red_use_harddrop(struct red_sched_data *q)
57 {
58 	return q->flags & TC_RED_HARDDROP;
59 }
60 
61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62 		       struct sk_buff **to_free)
63 {
64 	struct red_sched_data *q = qdisc_priv(sch);
65 	struct Qdisc *child = q->qdisc;
66 	int ret;
67 
68 	q->vars.qavg = red_calc_qavg(&q->parms,
69 				     &q->vars,
70 				     child->qstats.backlog);
71 
72 	if (red_is_idling(&q->vars))
73 		red_end_of_idle_period(&q->vars);
74 
75 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
76 	case RED_DONT_MARK:
77 		break;
78 
79 	case RED_PROB_MARK:
80 		qdisc_qstats_overlimit(sch);
81 		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82 			q->stats.prob_drop++;
83 			goto congestion_drop;
84 		}
85 
86 		q->stats.prob_mark++;
87 		break;
88 
89 	case RED_HARD_MARK:
90 		qdisc_qstats_overlimit(sch);
91 		if (red_use_harddrop(q) || !red_use_ecn(q) ||
92 		    !INET_ECN_set_ce(skb)) {
93 			q->stats.forced_drop++;
94 			goto congestion_drop;
95 		}
96 
97 		q->stats.forced_mark++;
98 		break;
99 	}
100 
101 	ret = qdisc_enqueue(skb, child, to_free);
102 	if (likely(ret == NET_XMIT_SUCCESS)) {
103 		qdisc_qstats_backlog_inc(sch, skb);
104 		sch->q.qlen++;
105 	} else if (net_xmit_drop_count(ret)) {
106 		q->stats.pdrop++;
107 		qdisc_qstats_drop(sch);
108 	}
109 	return ret;
110 
111 congestion_drop:
112 	qdisc_drop(skb, sch, to_free);
113 	return NET_XMIT_CN;
114 }
115 
116 static struct sk_buff *red_dequeue(struct Qdisc *sch)
117 {
118 	struct sk_buff *skb;
119 	struct red_sched_data *q = qdisc_priv(sch);
120 	struct Qdisc *child = q->qdisc;
121 
122 	skb = child->dequeue(child);
123 	if (skb) {
124 		qdisc_bstats_update(sch, skb);
125 		qdisc_qstats_backlog_dec(sch, skb);
126 		sch->q.qlen--;
127 	} else {
128 		if (!red_is_idling(&q->vars))
129 			red_start_of_idle_period(&q->vars);
130 	}
131 	return skb;
132 }
133 
134 static struct sk_buff *red_peek(struct Qdisc *sch)
135 {
136 	struct red_sched_data *q = qdisc_priv(sch);
137 	struct Qdisc *child = q->qdisc;
138 
139 	return child->ops->peek(child);
140 }
141 
142 static void red_reset(struct Qdisc *sch)
143 {
144 	struct red_sched_data *q = qdisc_priv(sch);
145 
146 	qdisc_reset(q->qdisc);
147 	sch->qstats.backlog = 0;
148 	sch->q.qlen = 0;
149 	red_restart(&q->vars);
150 }
151 
152 static int red_offload(struct Qdisc *sch, bool enable)
153 {
154 	struct red_sched_data *q = qdisc_priv(sch);
155 	struct net_device *dev = qdisc_dev(sch);
156 	struct tc_red_qopt_offload opt = {
157 		.handle = sch->handle,
158 		.parent = sch->parent,
159 	};
160 
161 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162 		return -EOPNOTSUPP;
163 
164 	if (enable) {
165 		opt.command = TC_RED_REPLACE;
166 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
167 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
168 		opt.set.probability = q->parms.max_P;
169 		opt.set.limit = q->limit;
170 		opt.set.is_ecn = red_use_ecn(q);
171 		opt.set.is_harddrop = red_use_harddrop(q);
172 		opt.set.qstats = &sch->qstats;
173 	} else {
174 		opt.command = TC_RED_DESTROY;
175 	}
176 
177 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
178 }
179 
180 static void red_destroy(struct Qdisc *sch)
181 {
182 	struct red_sched_data *q = qdisc_priv(sch);
183 
184 	del_timer_sync(&q->adapt_timer);
185 	red_offload(sch, false);
186 	qdisc_put(q->qdisc);
187 }
188 
189 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
190 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
191 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
192 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
193 };
194 
195 static int red_change(struct Qdisc *sch, struct nlattr *opt,
196 		      struct netlink_ext_ack *extack)
197 {
198 	struct Qdisc *old_child = NULL, *child = NULL;
199 	struct red_sched_data *q = qdisc_priv(sch);
200 	struct nlattr *tb[TCA_RED_MAX + 1];
201 	struct tc_red_qopt *ctl;
202 	int err;
203 	u32 max_P;
204 
205 	if (opt == NULL)
206 		return -EINVAL;
207 
208 	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
209 	if (err < 0)
210 		return err;
211 
212 	if (tb[TCA_RED_PARMS] == NULL ||
213 	    tb[TCA_RED_STAB] == NULL)
214 		return -EINVAL;
215 
216 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
217 
218 	ctl = nla_data(tb[TCA_RED_PARMS]);
219 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
220 		return -EINVAL;
221 
222 	if (ctl->limit > 0) {
223 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
224 					 extack);
225 		if (IS_ERR(child))
226 			return PTR_ERR(child);
227 
228 		/* child is fifo, no need to check for noop_qdisc */
229 		qdisc_hash_add(child, true);
230 	}
231 
232 	sch_tree_lock(sch);
233 	q->flags = ctl->flags;
234 	q->limit = ctl->limit;
235 	if (child) {
236 		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
237 					  q->qdisc->qstats.backlog);
238 		old_child = q->qdisc;
239 		q->qdisc = child;
240 	}
241 
242 	red_set_parms(&q->parms,
243 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
244 		      ctl->Plog, ctl->Scell_log,
245 		      nla_data(tb[TCA_RED_STAB]),
246 		      max_P);
247 	red_set_vars(&q->vars);
248 
249 	del_timer(&q->adapt_timer);
250 	if (ctl->flags & TC_RED_ADAPTATIVE)
251 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
252 
253 	if (!q->qdisc->q.qlen)
254 		red_start_of_idle_period(&q->vars);
255 
256 	sch_tree_unlock(sch);
257 
258 	red_offload(sch, true);
259 
260 	if (old_child)
261 		qdisc_put(old_child);
262 	return 0;
263 }
264 
265 static inline void red_adaptative_timer(struct timer_list *t)
266 {
267 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
268 	struct Qdisc *sch = q->sch;
269 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
270 
271 	spin_lock(root_lock);
272 	red_adaptative_algo(&q->parms, &q->vars);
273 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
274 	spin_unlock(root_lock);
275 }
276 
277 static int red_init(struct Qdisc *sch, struct nlattr *opt,
278 		    struct netlink_ext_ack *extack)
279 {
280 	struct red_sched_data *q = qdisc_priv(sch);
281 
282 	q->qdisc = &noop_qdisc;
283 	q->sch = sch;
284 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
285 	return red_change(sch, opt, extack);
286 }
287 
288 static int red_dump_offload_stats(struct Qdisc *sch)
289 {
290 	struct tc_red_qopt_offload hw_stats = {
291 		.command = TC_RED_STATS,
292 		.handle = sch->handle,
293 		.parent = sch->parent,
294 		{
295 			.stats.bstats = &sch->bstats,
296 			.stats.qstats = &sch->qstats,
297 		},
298 	};
299 
300 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
301 }
302 
303 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
304 {
305 	struct red_sched_data *q = qdisc_priv(sch);
306 	struct nlattr *opts = NULL;
307 	struct tc_red_qopt opt = {
308 		.limit		= q->limit,
309 		.flags		= q->flags,
310 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
311 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
312 		.Wlog		= q->parms.Wlog,
313 		.Plog		= q->parms.Plog,
314 		.Scell_log	= q->parms.Scell_log,
315 	};
316 	int err;
317 
318 	err = red_dump_offload_stats(sch);
319 	if (err)
320 		goto nla_put_failure;
321 
322 	opts = nla_nest_start(skb, TCA_OPTIONS);
323 	if (opts == NULL)
324 		goto nla_put_failure;
325 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
326 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
327 		goto nla_put_failure;
328 	return nla_nest_end(skb, opts);
329 
330 nla_put_failure:
331 	nla_nest_cancel(skb, opts);
332 	return -EMSGSIZE;
333 }
334 
335 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
336 {
337 	struct red_sched_data *q = qdisc_priv(sch);
338 	struct net_device *dev = qdisc_dev(sch);
339 	struct tc_red_xstats st = {0};
340 
341 	if (sch->flags & TCQ_F_OFFLOADED) {
342 		struct tc_red_qopt_offload hw_stats_request = {
343 			.command = TC_RED_XSTATS,
344 			.handle = sch->handle,
345 			.parent = sch->parent,
346 			{
347 				.xstats = &q->stats,
348 			},
349 		};
350 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
351 					      &hw_stats_request);
352 	}
353 	st.early = q->stats.prob_drop + q->stats.forced_drop;
354 	st.pdrop = q->stats.pdrop;
355 	st.other = q->stats.other;
356 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
357 
358 	return gnet_stats_copy_app(d, &st, sizeof(st));
359 }
360 
361 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
362 			  struct sk_buff *skb, struct tcmsg *tcm)
363 {
364 	struct red_sched_data *q = qdisc_priv(sch);
365 
366 	tcm->tcm_handle |= TC_H_MIN(1);
367 	tcm->tcm_info = q->qdisc->handle;
368 	return 0;
369 }
370 
371 static void red_graft_offload(struct Qdisc *sch,
372 			      struct Qdisc *new, struct Qdisc *old,
373 			      struct netlink_ext_ack *extack)
374 {
375 	struct tc_red_qopt_offload graft_offload = {
376 		.handle		= sch->handle,
377 		.parent		= sch->parent,
378 		.child_handle	= new->handle,
379 		.command	= TC_RED_GRAFT,
380 	};
381 
382 	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
383 				   TC_SETUP_QDISC_RED, &graft_offload, extack);
384 }
385 
386 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
387 		     struct Qdisc **old, struct netlink_ext_ack *extack)
388 {
389 	struct red_sched_data *q = qdisc_priv(sch);
390 
391 	if (new == NULL)
392 		new = &noop_qdisc;
393 
394 	*old = qdisc_replace(sch, new, &q->qdisc);
395 
396 	red_graft_offload(sch, new, *old, extack);
397 	return 0;
398 }
399 
400 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
401 {
402 	struct red_sched_data *q = qdisc_priv(sch);
403 	return q->qdisc;
404 }
405 
406 static unsigned long red_find(struct Qdisc *sch, u32 classid)
407 {
408 	return 1;
409 }
410 
411 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
412 {
413 	if (!walker->stop) {
414 		if (walker->count >= walker->skip)
415 			if (walker->fn(sch, 1, walker) < 0) {
416 				walker->stop = 1;
417 				return;
418 			}
419 		walker->count++;
420 	}
421 }
422 
423 static const struct Qdisc_class_ops red_class_ops = {
424 	.graft		=	red_graft,
425 	.leaf		=	red_leaf,
426 	.find		=	red_find,
427 	.walk		=	red_walk,
428 	.dump		=	red_dump_class,
429 };
430 
431 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
432 	.id		=	"red",
433 	.priv_size	=	sizeof(struct red_sched_data),
434 	.cl_ops		=	&red_class_ops,
435 	.enqueue	=	red_enqueue,
436 	.dequeue	=	red_dequeue,
437 	.peek		=	red_peek,
438 	.init		=	red_init,
439 	.reset		=	red_reset,
440 	.destroy	=	red_destroy,
441 	.change		=	red_change,
442 	.dump		=	red_dump,
443 	.dump_stats	=	red_dump_stats,
444 	.owner		=	THIS_MODULE,
445 };
446 
447 static int __init red_module_init(void)
448 {
449 	return register_qdisc(&red_qdisc_ops);
450 }
451 
452 static void __exit red_module_exit(void)
453 {
454 	unregister_qdisc(&red_qdisc_ops);
455 }
456 
457 module_init(red_module_init)
458 module_exit(red_module_exit)
459 
460 MODULE_LICENSE("GPL");
461