xref: /openbmc/linux/net/sched/sch_red.c (revision 867e6d38)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c	Random Early Detection queue.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914:	computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21 
22 
23 /*	Parameters, settable by user:
24 	-----------------------------
25 
26 	limit		- bytes (must be > qth_max + burst)
27 
28 	Hard limit on queue length, should be chosen >qth_max
29 	to allow packet bursts. This parameter does not
30 	affect the algorithms behaviour and can be chosen
31 	arbitrarily high (well, less than ram size)
32 	Really, this limit will never be reached
33 	if RED works correctly.
34  */
35 
36 struct red_sched_data {
37 	u32			limit;		/* HARD maximal queue length */
38 
39 	unsigned char		flags;
40 	/* Non-flags in tc_red_qopt.flags. */
41 	unsigned char		userbits;
42 
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 	struct tcf_qevent	qe_early_drop;
50 	struct tcf_qevent	qe_mark;
51 };
52 
53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
54 
55 static inline int red_use_ecn(struct red_sched_data *q)
56 {
57 	return q->flags & TC_RED_ECN;
58 }
59 
60 static inline int red_use_harddrop(struct red_sched_data *q)
61 {
62 	return q->flags & TC_RED_HARDDROP;
63 }
64 
65 static int red_use_nodrop(struct red_sched_data *q)
66 {
67 	return q->flags & TC_RED_NODROP;
68 }
69 
70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
71 		       struct sk_buff **to_free)
72 {
73 	struct red_sched_data *q = qdisc_priv(sch);
74 	struct Qdisc *child = q->qdisc;
75 	int ret;
76 
77 	q->vars.qavg = red_calc_qavg(&q->parms,
78 				     &q->vars,
79 				     child->qstats.backlog);
80 
81 	if (red_is_idling(&q->vars))
82 		red_end_of_idle_period(&q->vars);
83 
84 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
85 	case RED_DONT_MARK:
86 		break;
87 
88 	case RED_PROB_MARK:
89 		qdisc_qstats_overlimit(sch);
90 		if (!red_use_ecn(q)) {
91 			q->stats.prob_drop++;
92 			goto congestion_drop;
93 		}
94 
95 		if (INET_ECN_set_ce(skb)) {
96 			q->stats.prob_mark++;
97 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
98 			if (!skb)
99 				return NET_XMIT_CN | ret;
100 		} else if (!red_use_nodrop(q)) {
101 			q->stats.prob_drop++;
102 			goto congestion_drop;
103 		}
104 
105 		/* Non-ECT packet in ECN nodrop mode: queue it. */
106 		break;
107 
108 	case RED_HARD_MARK:
109 		qdisc_qstats_overlimit(sch);
110 		if (red_use_harddrop(q) || !red_use_ecn(q)) {
111 			q->stats.forced_drop++;
112 			goto congestion_drop;
113 		}
114 
115 		if (INET_ECN_set_ce(skb)) {
116 			q->stats.forced_mark++;
117 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
118 			if (!skb)
119 				return NET_XMIT_CN | ret;
120 		} else if (!red_use_nodrop(q)) {
121 			q->stats.forced_drop++;
122 			goto congestion_drop;
123 		}
124 
125 		/* Non-ECT packet in ECN nodrop mode: queue it. */
126 		break;
127 	}
128 
129 	ret = qdisc_enqueue(skb, child, to_free);
130 	if (likely(ret == NET_XMIT_SUCCESS)) {
131 		qdisc_qstats_backlog_inc(sch, skb);
132 		sch->q.qlen++;
133 	} else if (net_xmit_drop_count(ret)) {
134 		q->stats.pdrop++;
135 		qdisc_qstats_drop(sch);
136 	}
137 	return ret;
138 
139 congestion_drop:
140 	skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
141 	if (!skb)
142 		return NET_XMIT_CN | ret;
143 
144 	qdisc_drop(skb, sch, to_free);
145 	return NET_XMIT_CN;
146 }
147 
148 static struct sk_buff *red_dequeue(struct Qdisc *sch)
149 {
150 	struct sk_buff *skb;
151 	struct red_sched_data *q = qdisc_priv(sch);
152 	struct Qdisc *child = q->qdisc;
153 
154 	skb = child->dequeue(child);
155 	if (skb) {
156 		qdisc_bstats_update(sch, skb);
157 		qdisc_qstats_backlog_dec(sch, skb);
158 		sch->q.qlen--;
159 	} else {
160 		if (!red_is_idling(&q->vars))
161 			red_start_of_idle_period(&q->vars);
162 	}
163 	return skb;
164 }
165 
166 static struct sk_buff *red_peek(struct Qdisc *sch)
167 {
168 	struct red_sched_data *q = qdisc_priv(sch);
169 	struct Qdisc *child = q->qdisc;
170 
171 	return child->ops->peek(child);
172 }
173 
174 static void red_reset(struct Qdisc *sch)
175 {
176 	struct red_sched_data *q = qdisc_priv(sch);
177 
178 	qdisc_reset(q->qdisc);
179 	sch->qstats.backlog = 0;
180 	sch->q.qlen = 0;
181 	red_restart(&q->vars);
182 }
183 
184 static int red_offload(struct Qdisc *sch, bool enable)
185 {
186 	struct red_sched_data *q = qdisc_priv(sch);
187 	struct net_device *dev = qdisc_dev(sch);
188 	struct tc_red_qopt_offload opt = {
189 		.handle = sch->handle,
190 		.parent = sch->parent,
191 	};
192 
193 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
194 		return -EOPNOTSUPP;
195 
196 	if (enable) {
197 		opt.command = TC_RED_REPLACE;
198 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
199 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
200 		opt.set.probability = q->parms.max_P;
201 		opt.set.limit = q->limit;
202 		opt.set.is_ecn = red_use_ecn(q);
203 		opt.set.is_harddrop = red_use_harddrop(q);
204 		opt.set.is_nodrop = red_use_nodrop(q);
205 		opt.set.qstats = &sch->qstats;
206 	} else {
207 		opt.command = TC_RED_DESTROY;
208 	}
209 
210 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
211 }
212 
213 static void red_destroy(struct Qdisc *sch)
214 {
215 	struct red_sched_data *q = qdisc_priv(sch);
216 
217 	tcf_qevent_destroy(&q->qe_mark, sch);
218 	tcf_qevent_destroy(&q->qe_early_drop, sch);
219 	del_timer_sync(&q->adapt_timer);
220 	red_offload(sch, false);
221 	qdisc_put(q->qdisc);
222 }
223 
224 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
225 	[TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
226 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
227 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
228 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
229 	[TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
230 	[TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
231 	[TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
232 };
233 
234 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
235 			struct netlink_ext_ack *extack)
236 {
237 	struct Qdisc *old_child = NULL, *child = NULL;
238 	struct red_sched_data *q = qdisc_priv(sch);
239 	struct nla_bitfield32 flags_bf;
240 	struct tc_red_qopt *ctl;
241 	unsigned char userbits;
242 	unsigned char flags;
243 	int err;
244 	u32 max_P;
245 	u8 *stab;
246 
247 	if (tb[TCA_RED_PARMS] == NULL ||
248 	    tb[TCA_RED_STAB] == NULL)
249 		return -EINVAL;
250 
251 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
252 
253 	ctl = nla_data(tb[TCA_RED_PARMS]);
254 	stab = nla_data(tb[TCA_RED_STAB]);
255 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
256 			      ctl->Scell_log, stab))
257 		return -EINVAL;
258 
259 	err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
260 			    tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
261 			    &flags_bf, &userbits, extack);
262 	if (err)
263 		return err;
264 
265 	if (ctl->limit > 0) {
266 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
267 					 extack);
268 		if (IS_ERR(child))
269 			return PTR_ERR(child);
270 
271 		/* child is fifo, no need to check for noop_qdisc */
272 		qdisc_hash_add(child, true);
273 	}
274 
275 	sch_tree_lock(sch);
276 
277 	flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
278 	err = red_validate_flags(flags, extack);
279 	if (err)
280 		goto unlock_out;
281 
282 	q->flags = flags;
283 	q->userbits = userbits;
284 	q->limit = ctl->limit;
285 	if (child) {
286 		qdisc_tree_flush_backlog(q->qdisc);
287 		old_child = q->qdisc;
288 		q->qdisc = child;
289 	}
290 
291 	red_set_parms(&q->parms,
292 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
293 		      ctl->Plog, ctl->Scell_log,
294 		      stab,
295 		      max_P);
296 	red_set_vars(&q->vars);
297 
298 	del_timer(&q->adapt_timer);
299 	if (ctl->flags & TC_RED_ADAPTATIVE)
300 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
301 
302 	if (!q->qdisc->q.qlen)
303 		red_start_of_idle_period(&q->vars);
304 
305 	sch_tree_unlock(sch);
306 
307 	red_offload(sch, true);
308 
309 	if (old_child)
310 		qdisc_put(old_child);
311 	return 0;
312 
313 unlock_out:
314 	sch_tree_unlock(sch);
315 	if (child)
316 		qdisc_put(child);
317 	return err;
318 }
319 
320 static inline void red_adaptative_timer(struct timer_list *t)
321 {
322 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
323 	struct Qdisc *sch = q->sch;
324 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
325 
326 	spin_lock(root_lock);
327 	red_adaptative_algo(&q->parms, &q->vars);
328 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
329 	spin_unlock(root_lock);
330 }
331 
332 static int red_init(struct Qdisc *sch, struct nlattr *opt,
333 		    struct netlink_ext_ack *extack)
334 {
335 	struct red_sched_data *q = qdisc_priv(sch);
336 	struct nlattr *tb[TCA_RED_MAX + 1];
337 	int err;
338 
339 	q->qdisc = &noop_qdisc;
340 	q->sch = sch;
341 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
342 
343 	if (!opt)
344 		return -EINVAL;
345 
346 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
347 					  extack);
348 	if (err < 0)
349 		return err;
350 
351 	err = __red_change(sch, tb, extack);
352 	if (err)
353 		return err;
354 
355 	err = tcf_qevent_init(&q->qe_early_drop, sch,
356 			      FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
357 			      tb[TCA_RED_EARLY_DROP_BLOCK], extack);
358 	if (err)
359 		return err;
360 
361 	return tcf_qevent_init(&q->qe_mark, sch,
362 			       FLOW_BLOCK_BINDER_TYPE_RED_MARK,
363 			       tb[TCA_RED_MARK_BLOCK], extack);
364 }
365 
366 static int red_change(struct Qdisc *sch, struct nlattr *opt,
367 		      struct netlink_ext_ack *extack)
368 {
369 	struct red_sched_data *q = qdisc_priv(sch);
370 	struct nlattr *tb[TCA_RED_MAX + 1];
371 	int err;
372 
373 	if (!opt)
374 		return -EINVAL;
375 
376 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
377 					  extack);
378 	if (err < 0)
379 		return err;
380 
381 	err = tcf_qevent_validate_change(&q->qe_early_drop,
382 					 tb[TCA_RED_EARLY_DROP_BLOCK], extack);
383 	if (err)
384 		return err;
385 
386 	err = tcf_qevent_validate_change(&q->qe_mark,
387 					 tb[TCA_RED_MARK_BLOCK], extack);
388 	if (err)
389 		return err;
390 
391 	return __red_change(sch, tb, extack);
392 }
393 
394 static int red_dump_offload_stats(struct Qdisc *sch)
395 {
396 	struct tc_red_qopt_offload hw_stats = {
397 		.command = TC_RED_STATS,
398 		.handle = sch->handle,
399 		.parent = sch->parent,
400 		{
401 			.stats.bstats = &sch->bstats,
402 			.stats.qstats = &sch->qstats,
403 		},
404 	};
405 
406 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
407 }
408 
409 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
410 {
411 	struct red_sched_data *q = qdisc_priv(sch);
412 	struct nlattr *opts = NULL;
413 	struct tc_red_qopt opt = {
414 		.limit		= q->limit,
415 		.flags		= (q->flags & TC_RED_HISTORIC_FLAGS) |
416 				  q->userbits,
417 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
418 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
419 		.Wlog		= q->parms.Wlog,
420 		.Plog		= q->parms.Plog,
421 		.Scell_log	= q->parms.Scell_log,
422 	};
423 	int err;
424 
425 	err = red_dump_offload_stats(sch);
426 	if (err)
427 		goto nla_put_failure;
428 
429 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
430 	if (opts == NULL)
431 		goto nla_put_failure;
432 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
433 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
434 	    nla_put_bitfield32(skb, TCA_RED_FLAGS,
435 			       q->flags, TC_RED_SUPPORTED_FLAGS) ||
436 	    tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
437 	    tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
438 		goto nla_put_failure;
439 	return nla_nest_end(skb, opts);
440 
441 nla_put_failure:
442 	nla_nest_cancel(skb, opts);
443 	return -EMSGSIZE;
444 }
445 
446 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
447 {
448 	struct red_sched_data *q = qdisc_priv(sch);
449 	struct net_device *dev = qdisc_dev(sch);
450 	struct tc_red_xstats st = {0};
451 
452 	if (sch->flags & TCQ_F_OFFLOADED) {
453 		struct tc_red_qopt_offload hw_stats_request = {
454 			.command = TC_RED_XSTATS,
455 			.handle = sch->handle,
456 			.parent = sch->parent,
457 			{
458 				.xstats = &q->stats,
459 			},
460 		};
461 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
462 					      &hw_stats_request);
463 	}
464 	st.early = q->stats.prob_drop + q->stats.forced_drop;
465 	st.pdrop = q->stats.pdrop;
466 	st.other = q->stats.other;
467 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
468 
469 	return gnet_stats_copy_app(d, &st, sizeof(st));
470 }
471 
472 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
473 			  struct sk_buff *skb, struct tcmsg *tcm)
474 {
475 	struct red_sched_data *q = qdisc_priv(sch);
476 
477 	tcm->tcm_handle |= TC_H_MIN(1);
478 	tcm->tcm_info = q->qdisc->handle;
479 	return 0;
480 }
481 
482 static void red_graft_offload(struct Qdisc *sch,
483 			      struct Qdisc *new, struct Qdisc *old,
484 			      struct netlink_ext_ack *extack)
485 {
486 	struct tc_red_qopt_offload graft_offload = {
487 		.handle		= sch->handle,
488 		.parent		= sch->parent,
489 		.child_handle	= new->handle,
490 		.command	= TC_RED_GRAFT,
491 	};
492 
493 	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
494 				   TC_SETUP_QDISC_RED, &graft_offload, extack);
495 }
496 
497 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
498 		     struct Qdisc **old, struct netlink_ext_ack *extack)
499 {
500 	struct red_sched_data *q = qdisc_priv(sch);
501 
502 	if (new == NULL)
503 		new = &noop_qdisc;
504 
505 	*old = qdisc_replace(sch, new, &q->qdisc);
506 
507 	red_graft_offload(sch, new, *old, extack);
508 	return 0;
509 }
510 
511 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
512 {
513 	struct red_sched_data *q = qdisc_priv(sch);
514 	return q->qdisc;
515 }
516 
517 static unsigned long red_find(struct Qdisc *sch, u32 classid)
518 {
519 	return 1;
520 }
521 
522 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
523 {
524 	if (!walker->stop) {
525 		if (walker->count >= walker->skip)
526 			if (walker->fn(sch, 1, walker) < 0) {
527 				walker->stop = 1;
528 				return;
529 			}
530 		walker->count++;
531 	}
532 }
533 
534 static const struct Qdisc_class_ops red_class_ops = {
535 	.graft		=	red_graft,
536 	.leaf		=	red_leaf,
537 	.find		=	red_find,
538 	.walk		=	red_walk,
539 	.dump		=	red_dump_class,
540 };
541 
542 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
543 	.id		=	"red",
544 	.priv_size	=	sizeof(struct red_sched_data),
545 	.cl_ops		=	&red_class_ops,
546 	.enqueue	=	red_enqueue,
547 	.dequeue	=	red_dequeue,
548 	.peek		=	red_peek,
549 	.init		=	red_init,
550 	.reset		=	red_reset,
551 	.destroy	=	red_destroy,
552 	.change		=	red_change,
553 	.dump		=	red_dump,
554 	.dump_stats	=	red_dump_stats,
555 	.owner		=	THIS_MODULE,
556 };
557 
558 static int __init red_module_init(void)
559 {
560 	return register_qdisc(&red_qdisc_ops);
561 }
562 
563 static void __exit red_module_exit(void)
564 {
565 	unregister_qdisc(&red_qdisc_ops);
566 }
567 
568 module_init(red_module_init)
569 module_exit(red_module_exit)
570 
571 MODULE_LICENSE("GPL");
572