xref: /openbmc/linux/net/sched/sch_red.c (revision b03afaa8)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c	Random Early Detection queue.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914:	computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21 
22 
23 /*	Parameters, settable by user:
24 	-----------------------------
25 
26 	limit		- bytes (must be > qth_max + burst)
27 
28 	Hard limit on queue length, should be chosen >qth_max
29 	to allow packet bursts. This parameter does not
30 	affect the algorithms behaviour and can be chosen
31 	arbitrarily high (well, less than ram size)
32 	Really, this limit will never be reached
33 	if RED works correctly.
34  */
35 
36 struct red_sched_data {
37 	u32			limit;		/* HARD maximal queue length */
38 
39 	unsigned char		flags;
40 	/* Non-flags in tc_red_qopt.flags. */
41 	unsigned char		userbits;
42 
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 	struct tcf_qevent	qe_early_drop;
50 	struct tcf_qevent	qe_mark;
51 };
52 
53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
54 
55 static inline int red_use_ecn(struct red_sched_data *q)
56 {
57 	return q->flags & TC_RED_ECN;
58 }
59 
60 static inline int red_use_harddrop(struct red_sched_data *q)
61 {
62 	return q->flags & TC_RED_HARDDROP;
63 }
64 
65 static int red_use_nodrop(struct red_sched_data *q)
66 {
67 	return q->flags & TC_RED_NODROP;
68 }
69 
70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
71 		       struct sk_buff **to_free)
72 {
73 	struct red_sched_data *q = qdisc_priv(sch);
74 	struct Qdisc *child = q->qdisc;
75 	int ret;
76 
77 	q->vars.qavg = red_calc_qavg(&q->parms,
78 				     &q->vars,
79 				     child->qstats.backlog);
80 
81 	if (red_is_idling(&q->vars))
82 		red_end_of_idle_period(&q->vars);
83 
84 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
85 	case RED_DONT_MARK:
86 		break;
87 
88 	case RED_PROB_MARK:
89 		qdisc_qstats_overlimit(sch);
90 		if (!red_use_ecn(q)) {
91 			q->stats.prob_drop++;
92 			goto congestion_drop;
93 		}
94 
95 		if (INET_ECN_set_ce(skb)) {
96 			q->stats.prob_mark++;
97 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
98 			if (!skb)
99 				return NET_XMIT_CN | ret;
100 		} else if (!red_use_nodrop(q)) {
101 			q->stats.prob_drop++;
102 			goto congestion_drop;
103 		}
104 
105 		/* Non-ECT packet in ECN nodrop mode: queue it. */
106 		break;
107 
108 	case RED_HARD_MARK:
109 		qdisc_qstats_overlimit(sch);
110 		if (red_use_harddrop(q) || !red_use_ecn(q)) {
111 			q->stats.forced_drop++;
112 			goto congestion_drop;
113 		}
114 
115 		if (INET_ECN_set_ce(skb)) {
116 			q->stats.forced_mark++;
117 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
118 			if (!skb)
119 				return NET_XMIT_CN | ret;
120 		} else if (!red_use_nodrop(q)) {
121 			q->stats.forced_drop++;
122 			goto congestion_drop;
123 		}
124 
125 		/* Non-ECT packet in ECN nodrop mode: queue it. */
126 		break;
127 	}
128 
129 	ret = qdisc_enqueue(skb, child, to_free);
130 	if (likely(ret == NET_XMIT_SUCCESS)) {
131 		qdisc_qstats_backlog_inc(sch, skb);
132 		sch->q.qlen++;
133 	} else if (net_xmit_drop_count(ret)) {
134 		q->stats.pdrop++;
135 		qdisc_qstats_drop(sch);
136 	}
137 	return ret;
138 
139 congestion_drop:
140 	skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
141 	if (!skb)
142 		return NET_XMIT_CN | ret;
143 
144 	qdisc_drop(skb, sch, to_free);
145 	return NET_XMIT_CN;
146 }
147 
148 static struct sk_buff *red_dequeue(struct Qdisc *sch)
149 {
150 	struct sk_buff *skb;
151 	struct red_sched_data *q = qdisc_priv(sch);
152 	struct Qdisc *child = q->qdisc;
153 
154 	skb = child->dequeue(child);
155 	if (skb) {
156 		qdisc_bstats_update(sch, skb);
157 		qdisc_qstats_backlog_dec(sch, skb);
158 		sch->q.qlen--;
159 	} else {
160 		if (!red_is_idling(&q->vars))
161 			red_start_of_idle_period(&q->vars);
162 	}
163 	return skb;
164 }
165 
166 static struct sk_buff *red_peek(struct Qdisc *sch)
167 {
168 	struct red_sched_data *q = qdisc_priv(sch);
169 	struct Qdisc *child = q->qdisc;
170 
171 	return child->ops->peek(child);
172 }
173 
174 static void red_reset(struct Qdisc *sch)
175 {
176 	struct red_sched_data *q = qdisc_priv(sch);
177 
178 	qdisc_reset(q->qdisc);
179 	sch->qstats.backlog = 0;
180 	sch->q.qlen = 0;
181 	red_restart(&q->vars);
182 }
183 
184 static int red_offload(struct Qdisc *sch, bool enable)
185 {
186 	struct red_sched_data *q = qdisc_priv(sch);
187 	struct net_device *dev = qdisc_dev(sch);
188 	struct tc_red_qopt_offload opt = {
189 		.handle = sch->handle,
190 		.parent = sch->parent,
191 	};
192 
193 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
194 		return -EOPNOTSUPP;
195 
196 	if (enable) {
197 		opt.command = TC_RED_REPLACE;
198 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
199 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
200 		opt.set.probability = q->parms.max_P;
201 		opt.set.limit = q->limit;
202 		opt.set.is_ecn = red_use_ecn(q);
203 		opt.set.is_harddrop = red_use_harddrop(q);
204 		opt.set.is_nodrop = red_use_nodrop(q);
205 		opt.set.qstats = &sch->qstats;
206 	} else {
207 		opt.command = TC_RED_DESTROY;
208 	}
209 
210 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
211 }
212 
213 static void red_destroy(struct Qdisc *sch)
214 {
215 	struct red_sched_data *q = qdisc_priv(sch);
216 
217 	tcf_qevent_destroy(&q->qe_mark, sch);
218 	tcf_qevent_destroy(&q->qe_early_drop, sch);
219 	del_timer_sync(&q->adapt_timer);
220 	red_offload(sch, false);
221 	qdisc_put(q->qdisc);
222 }
223 
224 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
225 	[TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
226 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
227 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
228 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
229 	[TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
230 	[TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
231 	[TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
232 };
233 
234 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
235 			struct netlink_ext_ack *extack)
236 {
237 	struct Qdisc *old_child = NULL, *child = NULL;
238 	struct red_sched_data *q = qdisc_priv(sch);
239 	struct nla_bitfield32 flags_bf;
240 	struct tc_red_qopt *ctl;
241 	unsigned char userbits;
242 	unsigned char flags;
243 	int err;
244 	u32 max_P;
245 
246 	if (tb[TCA_RED_PARMS] == NULL ||
247 	    tb[TCA_RED_STAB] == NULL)
248 		return -EINVAL;
249 
250 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
251 
252 	ctl = nla_data(tb[TCA_RED_PARMS]);
253 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
254 		return -EINVAL;
255 
256 	err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
257 			    tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
258 			    &flags_bf, &userbits, extack);
259 	if (err)
260 		return err;
261 
262 	if (ctl->limit > 0) {
263 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
264 					 extack);
265 		if (IS_ERR(child))
266 			return PTR_ERR(child);
267 
268 		/* child is fifo, no need to check for noop_qdisc */
269 		qdisc_hash_add(child, true);
270 	}
271 
272 	sch_tree_lock(sch);
273 
274 	flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
275 	err = red_validate_flags(flags, extack);
276 	if (err)
277 		goto unlock_out;
278 
279 	q->flags = flags;
280 	q->userbits = userbits;
281 	q->limit = ctl->limit;
282 	if (child) {
283 		qdisc_tree_flush_backlog(q->qdisc);
284 		old_child = q->qdisc;
285 		q->qdisc = child;
286 	}
287 
288 	red_set_parms(&q->parms,
289 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
290 		      ctl->Plog, ctl->Scell_log,
291 		      nla_data(tb[TCA_RED_STAB]),
292 		      max_P);
293 	red_set_vars(&q->vars);
294 
295 	del_timer(&q->adapt_timer);
296 	if (ctl->flags & TC_RED_ADAPTATIVE)
297 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
298 
299 	if (!q->qdisc->q.qlen)
300 		red_start_of_idle_period(&q->vars);
301 
302 	sch_tree_unlock(sch);
303 
304 	red_offload(sch, true);
305 
306 	if (old_child)
307 		qdisc_put(old_child);
308 	return 0;
309 
310 unlock_out:
311 	sch_tree_unlock(sch);
312 	if (child)
313 		qdisc_put(child);
314 	return err;
315 }
316 
317 static inline void red_adaptative_timer(struct timer_list *t)
318 {
319 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
320 	struct Qdisc *sch = q->sch;
321 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
322 
323 	spin_lock(root_lock);
324 	red_adaptative_algo(&q->parms, &q->vars);
325 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
326 	spin_unlock(root_lock);
327 }
328 
329 static int red_init(struct Qdisc *sch, struct nlattr *opt,
330 		    struct netlink_ext_ack *extack)
331 {
332 	struct red_sched_data *q = qdisc_priv(sch);
333 	struct nlattr *tb[TCA_RED_MAX + 1];
334 	int err;
335 
336 	if (!opt)
337 		return -EINVAL;
338 
339 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
340 					  extack);
341 	if (err < 0)
342 		return err;
343 
344 	q->qdisc = &noop_qdisc;
345 	q->sch = sch;
346 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
347 
348 	err = __red_change(sch, tb, extack);
349 	if (err)
350 		return err;
351 
352 	err = tcf_qevent_init(&q->qe_early_drop, sch,
353 			      FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
354 			      tb[TCA_RED_EARLY_DROP_BLOCK], extack);
355 	if (err)
356 		goto err_early_drop_init;
357 
358 	err = tcf_qevent_init(&q->qe_mark, sch,
359 			      FLOW_BLOCK_BINDER_TYPE_RED_MARK,
360 			      tb[TCA_RED_MARK_BLOCK], extack);
361 	if (err)
362 		goto err_mark_init;
363 
364 	return 0;
365 
366 err_mark_init:
367 	tcf_qevent_destroy(&q->qe_early_drop, sch);
368 err_early_drop_init:
369 	del_timer_sync(&q->adapt_timer);
370 	red_offload(sch, false);
371 	qdisc_put(q->qdisc);
372 	return err;
373 }
374 
375 static int red_change(struct Qdisc *sch, struct nlattr *opt,
376 		      struct netlink_ext_ack *extack)
377 {
378 	struct red_sched_data *q = qdisc_priv(sch);
379 	struct nlattr *tb[TCA_RED_MAX + 1];
380 	int err;
381 
382 	if (!opt)
383 		return -EINVAL;
384 
385 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
386 					  extack);
387 	if (err < 0)
388 		return err;
389 
390 	err = tcf_qevent_validate_change(&q->qe_early_drop,
391 					 tb[TCA_RED_EARLY_DROP_BLOCK], extack);
392 	if (err)
393 		return err;
394 
395 	err = tcf_qevent_validate_change(&q->qe_mark,
396 					 tb[TCA_RED_MARK_BLOCK], extack);
397 	if (err)
398 		return err;
399 
400 	return __red_change(sch, tb, extack);
401 }
402 
403 static int red_dump_offload_stats(struct Qdisc *sch)
404 {
405 	struct tc_red_qopt_offload hw_stats = {
406 		.command = TC_RED_STATS,
407 		.handle = sch->handle,
408 		.parent = sch->parent,
409 		{
410 			.stats.bstats = &sch->bstats,
411 			.stats.qstats = &sch->qstats,
412 		},
413 	};
414 
415 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
416 }
417 
418 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
419 {
420 	struct red_sched_data *q = qdisc_priv(sch);
421 	struct nlattr *opts = NULL;
422 	struct tc_red_qopt opt = {
423 		.limit		= q->limit,
424 		.flags		= (q->flags & TC_RED_HISTORIC_FLAGS) |
425 				  q->userbits,
426 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
427 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
428 		.Wlog		= q->parms.Wlog,
429 		.Plog		= q->parms.Plog,
430 		.Scell_log	= q->parms.Scell_log,
431 	};
432 	int err;
433 
434 	err = red_dump_offload_stats(sch);
435 	if (err)
436 		goto nla_put_failure;
437 
438 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
439 	if (opts == NULL)
440 		goto nla_put_failure;
441 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
442 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
443 	    nla_put_bitfield32(skb, TCA_RED_FLAGS,
444 			       q->flags, TC_RED_SUPPORTED_FLAGS) ||
445 	    tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
446 	    tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
447 		goto nla_put_failure;
448 	return nla_nest_end(skb, opts);
449 
450 nla_put_failure:
451 	nla_nest_cancel(skb, opts);
452 	return -EMSGSIZE;
453 }
454 
455 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
456 {
457 	struct red_sched_data *q = qdisc_priv(sch);
458 	struct net_device *dev = qdisc_dev(sch);
459 	struct tc_red_xstats st = {0};
460 
461 	if (sch->flags & TCQ_F_OFFLOADED) {
462 		struct tc_red_qopt_offload hw_stats_request = {
463 			.command = TC_RED_XSTATS,
464 			.handle = sch->handle,
465 			.parent = sch->parent,
466 			{
467 				.xstats = &q->stats,
468 			},
469 		};
470 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
471 					      &hw_stats_request);
472 	}
473 	st.early = q->stats.prob_drop + q->stats.forced_drop;
474 	st.pdrop = q->stats.pdrop;
475 	st.other = q->stats.other;
476 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
477 
478 	return gnet_stats_copy_app(d, &st, sizeof(st));
479 }
480 
481 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
482 			  struct sk_buff *skb, struct tcmsg *tcm)
483 {
484 	struct red_sched_data *q = qdisc_priv(sch);
485 
486 	tcm->tcm_handle |= TC_H_MIN(1);
487 	tcm->tcm_info = q->qdisc->handle;
488 	return 0;
489 }
490 
491 static void red_graft_offload(struct Qdisc *sch,
492 			      struct Qdisc *new, struct Qdisc *old,
493 			      struct netlink_ext_ack *extack)
494 {
495 	struct tc_red_qopt_offload graft_offload = {
496 		.handle		= sch->handle,
497 		.parent		= sch->parent,
498 		.child_handle	= new->handle,
499 		.command	= TC_RED_GRAFT,
500 	};
501 
502 	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
503 				   TC_SETUP_QDISC_RED, &graft_offload, extack);
504 }
505 
506 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
507 		     struct Qdisc **old, struct netlink_ext_ack *extack)
508 {
509 	struct red_sched_data *q = qdisc_priv(sch);
510 
511 	if (new == NULL)
512 		new = &noop_qdisc;
513 
514 	*old = qdisc_replace(sch, new, &q->qdisc);
515 
516 	red_graft_offload(sch, new, *old, extack);
517 	return 0;
518 }
519 
520 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
521 {
522 	struct red_sched_data *q = qdisc_priv(sch);
523 	return q->qdisc;
524 }
525 
526 static unsigned long red_find(struct Qdisc *sch, u32 classid)
527 {
528 	return 1;
529 }
530 
531 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
532 {
533 	if (!walker->stop) {
534 		if (walker->count >= walker->skip)
535 			if (walker->fn(sch, 1, walker) < 0) {
536 				walker->stop = 1;
537 				return;
538 			}
539 		walker->count++;
540 	}
541 }
542 
543 static const struct Qdisc_class_ops red_class_ops = {
544 	.graft		=	red_graft,
545 	.leaf		=	red_leaf,
546 	.find		=	red_find,
547 	.walk		=	red_walk,
548 	.dump		=	red_dump_class,
549 };
550 
551 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
552 	.id		=	"red",
553 	.priv_size	=	sizeof(struct red_sched_data),
554 	.cl_ops		=	&red_class_ops,
555 	.enqueue	=	red_enqueue,
556 	.dequeue	=	red_dequeue,
557 	.peek		=	red_peek,
558 	.init		=	red_init,
559 	.reset		=	red_reset,
560 	.destroy	=	red_destroy,
561 	.change		=	red_change,
562 	.dump		=	red_dump,
563 	.dump_stats	=	red_dump_stats,
564 	.owner		=	THIS_MODULE,
565 };
566 
567 static int __init red_module_init(void)
568 {
569 	return register_qdisc(&red_qdisc_ops);
570 }
571 
572 static void __exit red_module_exit(void)
573 {
574 	unregister_qdisc(&red_qdisc_ops);
575 }
576 
577 module_init(red_module_init)
578 module_exit(red_module_exit)
579 
580 MODULE_LICENSE("GPL");
581