xref: /openbmc/linux/net/sched/sch_red.c (revision 6a143a7c)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c	Random Early Detection queue.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914:	computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21 
22 
23 /*	Parameters, settable by user:
24 	-----------------------------
25 
26 	limit		- bytes (must be > qth_max + burst)
27 
28 	Hard limit on queue length, should be chosen >qth_max
29 	to allow packet bursts. This parameter does not
30 	affect the algorithms behaviour and can be chosen
31 	arbitrarily high (well, less than ram size)
32 	Really, this limit will never be reached
33 	if RED works correctly.
34  */
35 
36 struct red_sched_data {
37 	u32			limit;		/* HARD maximal queue length */
38 
39 	unsigned char		flags;
40 	/* Non-flags in tc_red_qopt.flags. */
41 	unsigned char		userbits;
42 
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 	struct tcf_qevent	qe_early_drop;
50 	struct tcf_qevent	qe_mark;
51 };
52 
53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
54 
55 static inline int red_use_ecn(struct red_sched_data *q)
56 {
57 	return q->flags & TC_RED_ECN;
58 }
59 
60 static inline int red_use_harddrop(struct red_sched_data *q)
61 {
62 	return q->flags & TC_RED_HARDDROP;
63 }
64 
65 static int red_use_nodrop(struct red_sched_data *q)
66 {
67 	return q->flags & TC_RED_NODROP;
68 }
69 
70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
71 		       struct sk_buff **to_free)
72 {
73 	struct red_sched_data *q = qdisc_priv(sch);
74 	struct Qdisc *child = q->qdisc;
75 	int ret;
76 
77 	q->vars.qavg = red_calc_qavg(&q->parms,
78 				     &q->vars,
79 				     child->qstats.backlog);
80 
81 	if (red_is_idling(&q->vars))
82 		red_end_of_idle_period(&q->vars);
83 
84 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
85 	case RED_DONT_MARK:
86 		break;
87 
88 	case RED_PROB_MARK:
89 		qdisc_qstats_overlimit(sch);
90 		if (!red_use_ecn(q)) {
91 			q->stats.prob_drop++;
92 			goto congestion_drop;
93 		}
94 
95 		if (INET_ECN_set_ce(skb)) {
96 			q->stats.prob_mark++;
97 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
98 			if (!skb)
99 				return NET_XMIT_CN | ret;
100 		} else if (!red_use_nodrop(q)) {
101 			q->stats.prob_drop++;
102 			goto congestion_drop;
103 		}
104 
105 		/* Non-ECT packet in ECN nodrop mode: queue it. */
106 		break;
107 
108 	case RED_HARD_MARK:
109 		qdisc_qstats_overlimit(sch);
110 		if (red_use_harddrop(q) || !red_use_ecn(q)) {
111 			q->stats.forced_drop++;
112 			goto congestion_drop;
113 		}
114 
115 		if (INET_ECN_set_ce(skb)) {
116 			q->stats.forced_mark++;
117 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
118 			if (!skb)
119 				return NET_XMIT_CN | ret;
120 		} else if (!red_use_nodrop(q)) {
121 			q->stats.forced_drop++;
122 			goto congestion_drop;
123 		}
124 
125 		/* Non-ECT packet in ECN nodrop mode: queue it. */
126 		break;
127 	}
128 
129 	ret = qdisc_enqueue(skb, child, to_free);
130 	if (likely(ret == NET_XMIT_SUCCESS)) {
131 		qdisc_qstats_backlog_inc(sch, skb);
132 		sch->q.qlen++;
133 	} else if (net_xmit_drop_count(ret)) {
134 		q->stats.pdrop++;
135 		qdisc_qstats_drop(sch);
136 	}
137 	return ret;
138 
139 congestion_drop:
140 	skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
141 	if (!skb)
142 		return NET_XMIT_CN | ret;
143 
144 	qdisc_drop(skb, sch, to_free);
145 	return NET_XMIT_CN;
146 }
147 
148 static struct sk_buff *red_dequeue(struct Qdisc *sch)
149 {
150 	struct sk_buff *skb;
151 	struct red_sched_data *q = qdisc_priv(sch);
152 	struct Qdisc *child = q->qdisc;
153 
154 	skb = child->dequeue(child);
155 	if (skb) {
156 		qdisc_bstats_update(sch, skb);
157 		qdisc_qstats_backlog_dec(sch, skb);
158 		sch->q.qlen--;
159 	} else {
160 		if (!red_is_idling(&q->vars))
161 			red_start_of_idle_period(&q->vars);
162 	}
163 	return skb;
164 }
165 
166 static struct sk_buff *red_peek(struct Qdisc *sch)
167 {
168 	struct red_sched_data *q = qdisc_priv(sch);
169 	struct Qdisc *child = q->qdisc;
170 
171 	return child->ops->peek(child);
172 }
173 
174 static void red_reset(struct Qdisc *sch)
175 {
176 	struct red_sched_data *q = qdisc_priv(sch);
177 
178 	qdisc_reset(q->qdisc);
179 	sch->qstats.backlog = 0;
180 	sch->q.qlen = 0;
181 	red_restart(&q->vars);
182 }
183 
184 static int red_offload(struct Qdisc *sch, bool enable)
185 {
186 	struct red_sched_data *q = qdisc_priv(sch);
187 	struct net_device *dev = qdisc_dev(sch);
188 	struct tc_red_qopt_offload opt = {
189 		.handle = sch->handle,
190 		.parent = sch->parent,
191 	};
192 
193 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
194 		return -EOPNOTSUPP;
195 
196 	if (enable) {
197 		opt.command = TC_RED_REPLACE;
198 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
199 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
200 		opt.set.probability = q->parms.max_P;
201 		opt.set.limit = q->limit;
202 		opt.set.is_ecn = red_use_ecn(q);
203 		opt.set.is_harddrop = red_use_harddrop(q);
204 		opt.set.is_nodrop = red_use_nodrop(q);
205 		opt.set.qstats = &sch->qstats;
206 	} else {
207 		opt.command = TC_RED_DESTROY;
208 	}
209 
210 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
211 }
212 
213 static void red_destroy(struct Qdisc *sch)
214 {
215 	struct red_sched_data *q = qdisc_priv(sch);
216 
217 	tcf_qevent_destroy(&q->qe_mark, sch);
218 	tcf_qevent_destroy(&q->qe_early_drop, sch);
219 	del_timer_sync(&q->adapt_timer);
220 	red_offload(sch, false);
221 	qdisc_put(q->qdisc);
222 }
223 
224 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
225 	[TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
226 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
227 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
228 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
229 	[TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
230 	[TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
231 	[TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
232 };
233 
234 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
235 			struct netlink_ext_ack *extack)
236 {
237 	struct Qdisc *old_child = NULL, *child = NULL;
238 	struct red_sched_data *q = qdisc_priv(sch);
239 	struct nla_bitfield32 flags_bf;
240 	struct tc_red_qopt *ctl;
241 	unsigned char userbits;
242 	unsigned char flags;
243 	int err;
244 	u32 max_P;
245 
246 	if (tb[TCA_RED_PARMS] == NULL ||
247 	    tb[TCA_RED_STAB] == NULL)
248 		return -EINVAL;
249 
250 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
251 
252 	ctl = nla_data(tb[TCA_RED_PARMS]);
253 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log))
254 		return -EINVAL;
255 
256 	err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
257 			    tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
258 			    &flags_bf, &userbits, extack);
259 	if (err)
260 		return err;
261 
262 	if (ctl->limit > 0) {
263 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
264 					 extack);
265 		if (IS_ERR(child))
266 			return PTR_ERR(child);
267 
268 		/* child is fifo, no need to check for noop_qdisc */
269 		qdisc_hash_add(child, true);
270 	}
271 
272 	sch_tree_lock(sch);
273 
274 	flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
275 	err = red_validate_flags(flags, extack);
276 	if (err)
277 		goto unlock_out;
278 
279 	q->flags = flags;
280 	q->userbits = userbits;
281 	q->limit = ctl->limit;
282 	if (child) {
283 		qdisc_tree_flush_backlog(q->qdisc);
284 		old_child = q->qdisc;
285 		q->qdisc = child;
286 	}
287 
288 	red_set_parms(&q->parms,
289 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
290 		      ctl->Plog, ctl->Scell_log,
291 		      nla_data(tb[TCA_RED_STAB]),
292 		      max_P);
293 	red_set_vars(&q->vars);
294 
295 	del_timer(&q->adapt_timer);
296 	if (ctl->flags & TC_RED_ADAPTATIVE)
297 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
298 
299 	if (!q->qdisc->q.qlen)
300 		red_start_of_idle_period(&q->vars);
301 
302 	sch_tree_unlock(sch);
303 
304 	red_offload(sch, true);
305 
306 	if (old_child)
307 		qdisc_put(old_child);
308 	return 0;
309 
310 unlock_out:
311 	sch_tree_unlock(sch);
312 	if (child)
313 		qdisc_put(child);
314 	return err;
315 }
316 
317 static inline void red_adaptative_timer(struct timer_list *t)
318 {
319 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
320 	struct Qdisc *sch = q->sch;
321 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
322 
323 	spin_lock(root_lock);
324 	red_adaptative_algo(&q->parms, &q->vars);
325 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
326 	spin_unlock(root_lock);
327 }
328 
329 static int red_init(struct Qdisc *sch, struct nlattr *opt,
330 		    struct netlink_ext_ack *extack)
331 {
332 	struct red_sched_data *q = qdisc_priv(sch);
333 	struct nlattr *tb[TCA_RED_MAX + 1];
334 	int err;
335 
336 	q->qdisc = &noop_qdisc;
337 	q->sch = sch;
338 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
339 
340 	if (!opt)
341 		return -EINVAL;
342 
343 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
344 					  extack);
345 	if (err < 0)
346 		return err;
347 
348 	err = __red_change(sch, tb, extack);
349 	if (err)
350 		return err;
351 
352 	err = tcf_qevent_init(&q->qe_early_drop, sch,
353 			      FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
354 			      tb[TCA_RED_EARLY_DROP_BLOCK], extack);
355 	if (err)
356 		return err;
357 
358 	return tcf_qevent_init(&q->qe_mark, sch,
359 			       FLOW_BLOCK_BINDER_TYPE_RED_MARK,
360 			       tb[TCA_RED_MARK_BLOCK], extack);
361 }
362 
363 static int red_change(struct Qdisc *sch, struct nlattr *opt,
364 		      struct netlink_ext_ack *extack)
365 {
366 	struct red_sched_data *q = qdisc_priv(sch);
367 	struct nlattr *tb[TCA_RED_MAX + 1];
368 	int err;
369 
370 	if (!opt)
371 		return -EINVAL;
372 
373 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
374 					  extack);
375 	if (err < 0)
376 		return err;
377 
378 	err = tcf_qevent_validate_change(&q->qe_early_drop,
379 					 tb[TCA_RED_EARLY_DROP_BLOCK], extack);
380 	if (err)
381 		return err;
382 
383 	err = tcf_qevent_validate_change(&q->qe_mark,
384 					 tb[TCA_RED_MARK_BLOCK], extack);
385 	if (err)
386 		return err;
387 
388 	return __red_change(sch, tb, extack);
389 }
390 
391 static int red_dump_offload_stats(struct Qdisc *sch)
392 {
393 	struct tc_red_qopt_offload hw_stats = {
394 		.command = TC_RED_STATS,
395 		.handle = sch->handle,
396 		.parent = sch->parent,
397 		{
398 			.stats.bstats = &sch->bstats,
399 			.stats.qstats = &sch->qstats,
400 		},
401 	};
402 
403 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
404 }
405 
406 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
407 {
408 	struct red_sched_data *q = qdisc_priv(sch);
409 	struct nlattr *opts = NULL;
410 	struct tc_red_qopt opt = {
411 		.limit		= q->limit,
412 		.flags		= (q->flags & TC_RED_HISTORIC_FLAGS) |
413 				  q->userbits,
414 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
415 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
416 		.Wlog		= q->parms.Wlog,
417 		.Plog		= q->parms.Plog,
418 		.Scell_log	= q->parms.Scell_log,
419 	};
420 	int err;
421 
422 	err = red_dump_offload_stats(sch);
423 	if (err)
424 		goto nla_put_failure;
425 
426 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
427 	if (opts == NULL)
428 		goto nla_put_failure;
429 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
430 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
431 	    nla_put_bitfield32(skb, TCA_RED_FLAGS,
432 			       q->flags, TC_RED_SUPPORTED_FLAGS) ||
433 	    tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
434 	    tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
435 		goto nla_put_failure;
436 	return nla_nest_end(skb, opts);
437 
438 nla_put_failure:
439 	nla_nest_cancel(skb, opts);
440 	return -EMSGSIZE;
441 }
442 
443 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
444 {
445 	struct red_sched_data *q = qdisc_priv(sch);
446 	struct net_device *dev = qdisc_dev(sch);
447 	struct tc_red_xstats st = {0};
448 
449 	if (sch->flags & TCQ_F_OFFLOADED) {
450 		struct tc_red_qopt_offload hw_stats_request = {
451 			.command = TC_RED_XSTATS,
452 			.handle = sch->handle,
453 			.parent = sch->parent,
454 			{
455 				.xstats = &q->stats,
456 			},
457 		};
458 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
459 					      &hw_stats_request);
460 	}
461 	st.early = q->stats.prob_drop + q->stats.forced_drop;
462 	st.pdrop = q->stats.pdrop;
463 	st.other = q->stats.other;
464 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
465 
466 	return gnet_stats_copy_app(d, &st, sizeof(st));
467 }
468 
469 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
470 			  struct sk_buff *skb, struct tcmsg *tcm)
471 {
472 	struct red_sched_data *q = qdisc_priv(sch);
473 
474 	tcm->tcm_handle |= TC_H_MIN(1);
475 	tcm->tcm_info = q->qdisc->handle;
476 	return 0;
477 }
478 
479 static void red_graft_offload(struct Qdisc *sch,
480 			      struct Qdisc *new, struct Qdisc *old,
481 			      struct netlink_ext_ack *extack)
482 {
483 	struct tc_red_qopt_offload graft_offload = {
484 		.handle		= sch->handle,
485 		.parent		= sch->parent,
486 		.child_handle	= new->handle,
487 		.command	= TC_RED_GRAFT,
488 	};
489 
490 	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
491 				   TC_SETUP_QDISC_RED, &graft_offload, extack);
492 }
493 
494 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
495 		     struct Qdisc **old, struct netlink_ext_ack *extack)
496 {
497 	struct red_sched_data *q = qdisc_priv(sch);
498 
499 	if (new == NULL)
500 		new = &noop_qdisc;
501 
502 	*old = qdisc_replace(sch, new, &q->qdisc);
503 
504 	red_graft_offload(sch, new, *old, extack);
505 	return 0;
506 }
507 
508 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
509 {
510 	struct red_sched_data *q = qdisc_priv(sch);
511 	return q->qdisc;
512 }
513 
514 static unsigned long red_find(struct Qdisc *sch, u32 classid)
515 {
516 	return 1;
517 }
518 
519 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
520 {
521 	if (!walker->stop) {
522 		if (walker->count >= walker->skip)
523 			if (walker->fn(sch, 1, walker) < 0) {
524 				walker->stop = 1;
525 				return;
526 			}
527 		walker->count++;
528 	}
529 }
530 
531 static const struct Qdisc_class_ops red_class_ops = {
532 	.graft		=	red_graft,
533 	.leaf		=	red_leaf,
534 	.find		=	red_find,
535 	.walk		=	red_walk,
536 	.dump		=	red_dump_class,
537 };
538 
539 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
540 	.id		=	"red",
541 	.priv_size	=	sizeof(struct red_sched_data),
542 	.cl_ops		=	&red_class_ops,
543 	.enqueue	=	red_enqueue,
544 	.dequeue	=	red_dequeue,
545 	.peek		=	red_peek,
546 	.init		=	red_init,
547 	.reset		=	red_reset,
548 	.destroy	=	red_destroy,
549 	.change		=	red_change,
550 	.dump		=	red_dump,
551 	.dump_stats	=	red_dump_stats,
552 	.owner		=	THIS_MODULE,
553 };
554 
555 static int __init red_module_init(void)
556 {
557 	return register_qdisc(&red_qdisc_ops);
558 }
559 
560 static void __exit red_module_exit(void)
561 {
562 	unregister_qdisc(&red_qdisc_ops);
563 }
564 
565 module_init(red_module_init)
566 module_exit(red_module_exit)
567 
568 MODULE_LICENSE("GPL");
569