xref: /openbmc/linux/net/sched/sch_red.c (revision 82df5b73)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c	Random Early Detection queue.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914:	computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21 
22 
23 /*	Parameters, settable by user:
24 	-----------------------------
25 
26 	limit		- bytes (must be > qth_max + burst)
27 
28 	Hard limit on queue length, should be chosen >qth_max
29 	to allow packet bursts. This parameter does not
30 	affect the algorithms behaviour and can be chosen
31 	arbitrarily high (well, less than ram size)
32 	Really, this limit will never be reached
33 	if RED works correctly.
34  */
35 
36 struct red_sched_data {
37 	u32			limit;		/* HARD maximal queue length */
38 
39 	unsigned char		flags;
40 	/* Non-flags in tc_red_qopt.flags. */
41 	unsigned char		userbits;
42 
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 };
50 
51 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
52 
53 static inline int red_use_ecn(struct red_sched_data *q)
54 {
55 	return q->flags & TC_RED_ECN;
56 }
57 
58 static inline int red_use_harddrop(struct red_sched_data *q)
59 {
60 	return q->flags & TC_RED_HARDDROP;
61 }
62 
63 static int red_use_nodrop(struct red_sched_data *q)
64 {
65 	return q->flags & TC_RED_NODROP;
66 }
67 
68 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
69 		       struct sk_buff **to_free)
70 {
71 	struct red_sched_data *q = qdisc_priv(sch);
72 	struct Qdisc *child = q->qdisc;
73 	int ret;
74 
75 	q->vars.qavg = red_calc_qavg(&q->parms,
76 				     &q->vars,
77 				     child->qstats.backlog);
78 
79 	if (red_is_idling(&q->vars))
80 		red_end_of_idle_period(&q->vars);
81 
82 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
83 	case RED_DONT_MARK:
84 		break;
85 
86 	case RED_PROB_MARK:
87 		qdisc_qstats_overlimit(sch);
88 		if (!red_use_ecn(q)) {
89 			q->stats.prob_drop++;
90 			goto congestion_drop;
91 		}
92 
93 		if (INET_ECN_set_ce(skb)) {
94 			q->stats.prob_mark++;
95 		} else if (!red_use_nodrop(q)) {
96 			q->stats.prob_drop++;
97 			goto congestion_drop;
98 		}
99 
100 		/* Non-ECT packet in ECN nodrop mode: queue it. */
101 		break;
102 
103 	case RED_HARD_MARK:
104 		qdisc_qstats_overlimit(sch);
105 		if (red_use_harddrop(q) || !red_use_ecn(q)) {
106 			q->stats.forced_drop++;
107 			goto congestion_drop;
108 		}
109 
110 		if (INET_ECN_set_ce(skb)) {
111 			q->stats.forced_mark++;
112 		} else if (!red_use_nodrop(q)) {
113 			q->stats.forced_drop++;
114 			goto congestion_drop;
115 		}
116 
117 		/* Non-ECT packet in ECN nodrop mode: queue it. */
118 		break;
119 	}
120 
121 	ret = qdisc_enqueue(skb, child, to_free);
122 	if (likely(ret == NET_XMIT_SUCCESS)) {
123 		qdisc_qstats_backlog_inc(sch, skb);
124 		sch->q.qlen++;
125 	} else if (net_xmit_drop_count(ret)) {
126 		q->stats.pdrop++;
127 		qdisc_qstats_drop(sch);
128 	}
129 	return ret;
130 
131 congestion_drop:
132 	qdisc_drop(skb, sch, to_free);
133 	return NET_XMIT_CN;
134 }
135 
136 static struct sk_buff *red_dequeue(struct Qdisc *sch)
137 {
138 	struct sk_buff *skb;
139 	struct red_sched_data *q = qdisc_priv(sch);
140 	struct Qdisc *child = q->qdisc;
141 
142 	skb = child->dequeue(child);
143 	if (skb) {
144 		qdisc_bstats_update(sch, skb);
145 		qdisc_qstats_backlog_dec(sch, skb);
146 		sch->q.qlen--;
147 	} else {
148 		if (!red_is_idling(&q->vars))
149 			red_start_of_idle_period(&q->vars);
150 	}
151 	return skb;
152 }
153 
154 static struct sk_buff *red_peek(struct Qdisc *sch)
155 {
156 	struct red_sched_data *q = qdisc_priv(sch);
157 	struct Qdisc *child = q->qdisc;
158 
159 	return child->ops->peek(child);
160 }
161 
162 static void red_reset(struct Qdisc *sch)
163 {
164 	struct red_sched_data *q = qdisc_priv(sch);
165 
166 	qdisc_reset(q->qdisc);
167 	sch->qstats.backlog = 0;
168 	sch->q.qlen = 0;
169 	red_restart(&q->vars);
170 }
171 
172 static int red_offload(struct Qdisc *sch, bool enable)
173 {
174 	struct red_sched_data *q = qdisc_priv(sch);
175 	struct net_device *dev = qdisc_dev(sch);
176 	struct tc_red_qopt_offload opt = {
177 		.handle = sch->handle,
178 		.parent = sch->parent,
179 	};
180 
181 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
182 		return -EOPNOTSUPP;
183 
184 	if (enable) {
185 		opt.command = TC_RED_REPLACE;
186 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
187 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
188 		opt.set.probability = q->parms.max_P;
189 		opt.set.limit = q->limit;
190 		opt.set.is_ecn = red_use_ecn(q);
191 		opt.set.is_harddrop = red_use_harddrop(q);
192 		opt.set.is_nodrop = red_use_nodrop(q);
193 		opt.set.qstats = &sch->qstats;
194 	} else {
195 		opt.command = TC_RED_DESTROY;
196 	}
197 
198 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
199 }
200 
201 static void red_destroy(struct Qdisc *sch)
202 {
203 	struct red_sched_data *q = qdisc_priv(sch);
204 
205 	del_timer_sync(&q->adapt_timer);
206 	red_offload(sch, false);
207 	qdisc_put(q->qdisc);
208 }
209 
210 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
211 	[TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
212 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
213 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
214 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
215 	[TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
216 };
217 
218 static int red_change(struct Qdisc *sch, struct nlattr *opt,
219 		      struct netlink_ext_ack *extack)
220 {
221 	struct Qdisc *old_child = NULL, *child = NULL;
222 	struct red_sched_data *q = qdisc_priv(sch);
223 	struct nlattr *tb[TCA_RED_MAX + 1];
224 	struct nla_bitfield32 flags_bf;
225 	struct tc_red_qopt *ctl;
226 	unsigned char userbits;
227 	unsigned char flags;
228 	int err;
229 	u32 max_P;
230 
231 	if (opt == NULL)
232 		return -EINVAL;
233 
234 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
235 					  NULL);
236 	if (err < 0)
237 		return err;
238 
239 	if (tb[TCA_RED_PARMS] == NULL ||
240 	    tb[TCA_RED_STAB] == NULL)
241 		return -EINVAL;
242 
243 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
244 
245 	ctl = nla_data(tb[TCA_RED_PARMS]);
246 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
247 		return -EINVAL;
248 
249 	err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
250 			    tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
251 			    &flags_bf, &userbits, extack);
252 	if (err)
253 		return err;
254 
255 	if (ctl->limit > 0) {
256 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
257 					 extack);
258 		if (IS_ERR(child))
259 			return PTR_ERR(child);
260 
261 		/* child is fifo, no need to check for noop_qdisc */
262 		qdisc_hash_add(child, true);
263 	}
264 
265 	sch_tree_lock(sch);
266 
267 	flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
268 	err = red_validate_flags(flags, extack);
269 	if (err)
270 		goto unlock_out;
271 
272 	q->flags = flags;
273 	q->userbits = userbits;
274 	q->limit = ctl->limit;
275 	if (child) {
276 		qdisc_tree_flush_backlog(q->qdisc);
277 		old_child = q->qdisc;
278 		q->qdisc = child;
279 	}
280 
281 	red_set_parms(&q->parms,
282 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
283 		      ctl->Plog, ctl->Scell_log,
284 		      nla_data(tb[TCA_RED_STAB]),
285 		      max_P);
286 	red_set_vars(&q->vars);
287 
288 	del_timer(&q->adapt_timer);
289 	if (ctl->flags & TC_RED_ADAPTATIVE)
290 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
291 
292 	if (!q->qdisc->q.qlen)
293 		red_start_of_idle_period(&q->vars);
294 
295 	sch_tree_unlock(sch);
296 
297 	red_offload(sch, true);
298 
299 	if (old_child)
300 		qdisc_put(old_child);
301 	return 0;
302 
303 unlock_out:
304 	sch_tree_unlock(sch);
305 	if (child)
306 		qdisc_put(child);
307 	return err;
308 }
309 
310 static inline void red_adaptative_timer(struct timer_list *t)
311 {
312 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
313 	struct Qdisc *sch = q->sch;
314 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
315 
316 	spin_lock(root_lock);
317 	red_adaptative_algo(&q->parms, &q->vars);
318 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
319 	spin_unlock(root_lock);
320 }
321 
322 static int red_init(struct Qdisc *sch, struct nlattr *opt,
323 		    struct netlink_ext_ack *extack)
324 {
325 	struct red_sched_data *q = qdisc_priv(sch);
326 
327 	q->qdisc = &noop_qdisc;
328 	q->sch = sch;
329 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
330 	return red_change(sch, opt, extack);
331 }
332 
333 static int red_dump_offload_stats(struct Qdisc *sch)
334 {
335 	struct tc_red_qopt_offload hw_stats = {
336 		.command = TC_RED_STATS,
337 		.handle = sch->handle,
338 		.parent = sch->parent,
339 		{
340 			.stats.bstats = &sch->bstats,
341 			.stats.qstats = &sch->qstats,
342 		},
343 	};
344 
345 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
346 }
347 
348 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
349 {
350 	struct red_sched_data *q = qdisc_priv(sch);
351 	struct nlattr *opts = NULL;
352 	struct tc_red_qopt opt = {
353 		.limit		= q->limit,
354 		.flags		= (q->flags & TC_RED_HISTORIC_FLAGS) |
355 				  q->userbits,
356 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
357 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
358 		.Wlog		= q->parms.Wlog,
359 		.Plog		= q->parms.Plog,
360 		.Scell_log	= q->parms.Scell_log,
361 	};
362 	int err;
363 
364 	err = red_dump_offload_stats(sch);
365 	if (err)
366 		goto nla_put_failure;
367 
368 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
369 	if (opts == NULL)
370 		goto nla_put_failure;
371 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
372 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
373 	    nla_put_bitfield32(skb, TCA_RED_FLAGS,
374 			       q->flags, TC_RED_SUPPORTED_FLAGS))
375 		goto nla_put_failure;
376 	return nla_nest_end(skb, opts);
377 
378 nla_put_failure:
379 	nla_nest_cancel(skb, opts);
380 	return -EMSGSIZE;
381 }
382 
383 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
384 {
385 	struct red_sched_data *q = qdisc_priv(sch);
386 	struct net_device *dev = qdisc_dev(sch);
387 	struct tc_red_xstats st = {0};
388 
389 	if (sch->flags & TCQ_F_OFFLOADED) {
390 		struct tc_red_qopt_offload hw_stats_request = {
391 			.command = TC_RED_XSTATS,
392 			.handle = sch->handle,
393 			.parent = sch->parent,
394 			{
395 				.xstats = &q->stats,
396 			},
397 		};
398 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
399 					      &hw_stats_request);
400 	}
401 	st.early = q->stats.prob_drop + q->stats.forced_drop;
402 	st.pdrop = q->stats.pdrop;
403 	st.other = q->stats.other;
404 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
405 
406 	return gnet_stats_copy_app(d, &st, sizeof(st));
407 }
408 
409 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
410 			  struct sk_buff *skb, struct tcmsg *tcm)
411 {
412 	struct red_sched_data *q = qdisc_priv(sch);
413 
414 	tcm->tcm_handle |= TC_H_MIN(1);
415 	tcm->tcm_info = q->qdisc->handle;
416 	return 0;
417 }
418 
419 static void red_graft_offload(struct Qdisc *sch,
420 			      struct Qdisc *new, struct Qdisc *old,
421 			      struct netlink_ext_ack *extack)
422 {
423 	struct tc_red_qopt_offload graft_offload = {
424 		.handle		= sch->handle,
425 		.parent		= sch->parent,
426 		.child_handle	= new->handle,
427 		.command	= TC_RED_GRAFT,
428 	};
429 
430 	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
431 				   TC_SETUP_QDISC_RED, &graft_offload, extack);
432 }
433 
434 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
435 		     struct Qdisc **old, struct netlink_ext_ack *extack)
436 {
437 	struct red_sched_data *q = qdisc_priv(sch);
438 
439 	if (new == NULL)
440 		new = &noop_qdisc;
441 
442 	*old = qdisc_replace(sch, new, &q->qdisc);
443 
444 	red_graft_offload(sch, new, *old, extack);
445 	return 0;
446 }
447 
448 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
449 {
450 	struct red_sched_data *q = qdisc_priv(sch);
451 	return q->qdisc;
452 }
453 
454 static unsigned long red_find(struct Qdisc *sch, u32 classid)
455 {
456 	return 1;
457 }
458 
459 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
460 {
461 	if (!walker->stop) {
462 		if (walker->count >= walker->skip)
463 			if (walker->fn(sch, 1, walker) < 0) {
464 				walker->stop = 1;
465 				return;
466 			}
467 		walker->count++;
468 	}
469 }
470 
471 static const struct Qdisc_class_ops red_class_ops = {
472 	.graft		=	red_graft,
473 	.leaf		=	red_leaf,
474 	.find		=	red_find,
475 	.walk		=	red_walk,
476 	.dump		=	red_dump_class,
477 };
478 
479 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
480 	.id		=	"red",
481 	.priv_size	=	sizeof(struct red_sched_data),
482 	.cl_ops		=	&red_class_ops,
483 	.enqueue	=	red_enqueue,
484 	.dequeue	=	red_dequeue,
485 	.peek		=	red_peek,
486 	.init		=	red_init,
487 	.reset		=	red_reset,
488 	.destroy	=	red_destroy,
489 	.change		=	red_change,
490 	.dump		=	red_dump,
491 	.dump_stats	=	red_dump_stats,
492 	.owner		=	THIS_MODULE,
493 };
494 
495 static int __init red_module_init(void)
496 {
497 	return register_qdisc(&red_qdisc_ops);
498 }
499 
500 static void __exit red_module_exit(void)
501 {
502 	unregister_qdisc(&red_qdisc_ops);
503 }
504 
505 module_init(red_module_init)
506 module_exit(red_module_exit)
507 
508 MODULE_LICENSE("GPL");
509