xref: /openbmc/linux/net/sched/sch_red.c (revision e79e40c8)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c	Random Early Detection queue.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914:	computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21 
22 
23 /*	Parameters, settable by user:
24 	-----------------------------
25 
26 	limit		- bytes (must be > qth_max + burst)
27 
28 	Hard limit on queue length, should be chosen >qth_max
29 	to allow packet bursts. This parameter does not
30 	affect the algorithms behaviour and can be chosen
31 	arbitrarily high (well, less than ram size)
32 	Really, this limit will never be reached
33 	if RED works correctly.
34  */
35 
36 struct red_sched_data {
37 	u32			limit;		/* HARD maximal queue length */
38 
39 	unsigned char		flags;
40 	/* Non-flags in tc_red_qopt.flags. */
41 	unsigned char		userbits;
42 
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 	struct tcf_qevent	qe_early_drop;
50 	struct tcf_qevent	qe_mark;
51 };
52 
53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
54 
55 static inline int red_use_ecn(struct red_sched_data *q)
56 {
57 	return q->flags & TC_RED_ECN;
58 }
59 
60 static inline int red_use_harddrop(struct red_sched_data *q)
61 {
62 	return q->flags & TC_RED_HARDDROP;
63 }
64 
65 static int red_use_nodrop(struct red_sched_data *q)
66 {
67 	return q->flags & TC_RED_NODROP;
68 }
69 
70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
71 		       struct sk_buff **to_free)
72 {
73 	struct red_sched_data *q = qdisc_priv(sch);
74 	struct Qdisc *child = q->qdisc;
75 	int ret;
76 
77 	q->vars.qavg = red_calc_qavg(&q->parms,
78 				     &q->vars,
79 				     child->qstats.backlog);
80 
81 	if (red_is_idling(&q->vars))
82 		red_end_of_idle_period(&q->vars);
83 
84 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
85 	case RED_DONT_MARK:
86 		break;
87 
88 	case RED_PROB_MARK:
89 		qdisc_qstats_overlimit(sch);
90 		if (!red_use_ecn(q)) {
91 			q->stats.prob_drop++;
92 			goto congestion_drop;
93 		}
94 
95 		if (INET_ECN_set_ce(skb)) {
96 			q->stats.prob_mark++;
97 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
98 			if (!skb)
99 				return NET_XMIT_CN | ret;
100 		} else if (!red_use_nodrop(q)) {
101 			q->stats.prob_drop++;
102 			goto congestion_drop;
103 		}
104 
105 		/* Non-ECT packet in ECN nodrop mode: queue it. */
106 		break;
107 
108 	case RED_HARD_MARK:
109 		qdisc_qstats_overlimit(sch);
110 		if (red_use_harddrop(q) || !red_use_ecn(q)) {
111 			q->stats.forced_drop++;
112 			goto congestion_drop;
113 		}
114 
115 		if (INET_ECN_set_ce(skb)) {
116 			q->stats.forced_mark++;
117 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
118 			if (!skb)
119 				return NET_XMIT_CN | ret;
120 		} else if (!red_use_nodrop(q)) {
121 			q->stats.forced_drop++;
122 			goto congestion_drop;
123 		}
124 
125 		/* Non-ECT packet in ECN nodrop mode: queue it. */
126 		break;
127 	}
128 
129 	ret = qdisc_enqueue(skb, child, to_free);
130 	if (likely(ret == NET_XMIT_SUCCESS)) {
131 		qdisc_qstats_backlog_inc(sch, skb);
132 		sch->q.qlen++;
133 	} else if (net_xmit_drop_count(ret)) {
134 		q->stats.pdrop++;
135 		qdisc_qstats_drop(sch);
136 	}
137 	return ret;
138 
139 congestion_drop:
140 	skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
141 	if (!skb)
142 		return NET_XMIT_CN | ret;
143 
144 	qdisc_drop(skb, sch, to_free);
145 	return NET_XMIT_CN;
146 }
147 
148 static struct sk_buff *red_dequeue(struct Qdisc *sch)
149 {
150 	struct sk_buff *skb;
151 	struct red_sched_data *q = qdisc_priv(sch);
152 	struct Qdisc *child = q->qdisc;
153 
154 	skb = child->dequeue(child);
155 	if (skb) {
156 		qdisc_bstats_update(sch, skb);
157 		qdisc_qstats_backlog_dec(sch, skb);
158 		sch->q.qlen--;
159 	} else {
160 		if (!red_is_idling(&q->vars))
161 			red_start_of_idle_period(&q->vars);
162 	}
163 	return skb;
164 }
165 
166 static struct sk_buff *red_peek(struct Qdisc *sch)
167 {
168 	struct red_sched_data *q = qdisc_priv(sch);
169 	struct Qdisc *child = q->qdisc;
170 
171 	return child->ops->peek(child);
172 }
173 
174 static void red_reset(struct Qdisc *sch)
175 {
176 	struct red_sched_data *q = qdisc_priv(sch);
177 
178 	qdisc_reset(q->qdisc);
179 	red_restart(&q->vars);
180 }
181 
182 static int red_offload(struct Qdisc *sch, bool enable)
183 {
184 	struct red_sched_data *q = qdisc_priv(sch);
185 	struct net_device *dev = qdisc_dev(sch);
186 	struct tc_red_qopt_offload opt = {
187 		.handle = sch->handle,
188 		.parent = sch->parent,
189 	};
190 
191 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
192 		return -EOPNOTSUPP;
193 
194 	if (enable) {
195 		opt.command = TC_RED_REPLACE;
196 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
197 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
198 		opt.set.probability = q->parms.max_P;
199 		opt.set.limit = q->limit;
200 		opt.set.is_ecn = red_use_ecn(q);
201 		opt.set.is_harddrop = red_use_harddrop(q);
202 		opt.set.is_nodrop = red_use_nodrop(q);
203 		opt.set.qstats = &sch->qstats;
204 	} else {
205 		opt.command = TC_RED_DESTROY;
206 	}
207 
208 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
209 }
210 
211 static void red_destroy(struct Qdisc *sch)
212 {
213 	struct red_sched_data *q = qdisc_priv(sch);
214 
215 	tcf_qevent_destroy(&q->qe_mark, sch);
216 	tcf_qevent_destroy(&q->qe_early_drop, sch);
217 	del_timer_sync(&q->adapt_timer);
218 	red_offload(sch, false);
219 	qdisc_put(q->qdisc);
220 }
221 
222 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
223 	[TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
224 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
225 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
226 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
227 	[TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
228 	[TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
229 	[TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
230 };
231 
232 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
233 			struct netlink_ext_ack *extack)
234 {
235 	struct Qdisc *old_child = NULL, *child = NULL;
236 	struct red_sched_data *q = qdisc_priv(sch);
237 	struct nla_bitfield32 flags_bf;
238 	struct tc_red_qopt *ctl;
239 	unsigned char userbits;
240 	unsigned char flags;
241 	int err;
242 	u32 max_P;
243 	u8 *stab;
244 
245 	if (tb[TCA_RED_PARMS] == NULL ||
246 	    tb[TCA_RED_STAB] == NULL)
247 		return -EINVAL;
248 
249 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
250 
251 	ctl = nla_data(tb[TCA_RED_PARMS]);
252 	stab = nla_data(tb[TCA_RED_STAB]);
253 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
254 			      ctl->Scell_log, stab))
255 		return -EINVAL;
256 
257 	err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
258 			    tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
259 			    &flags_bf, &userbits, extack);
260 	if (err)
261 		return err;
262 
263 	if (ctl->limit > 0) {
264 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
265 					 extack);
266 		if (IS_ERR(child))
267 			return PTR_ERR(child);
268 
269 		/* child is fifo, no need to check for noop_qdisc */
270 		qdisc_hash_add(child, true);
271 	}
272 
273 	sch_tree_lock(sch);
274 
275 	flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
276 	err = red_validate_flags(flags, extack);
277 	if (err)
278 		goto unlock_out;
279 
280 	q->flags = flags;
281 	q->userbits = userbits;
282 	q->limit = ctl->limit;
283 	if (child) {
284 		qdisc_tree_flush_backlog(q->qdisc);
285 		old_child = q->qdisc;
286 		q->qdisc = child;
287 	}
288 
289 	red_set_parms(&q->parms,
290 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
291 		      ctl->Plog, ctl->Scell_log,
292 		      stab,
293 		      max_P);
294 	red_set_vars(&q->vars);
295 
296 	del_timer(&q->adapt_timer);
297 	if (ctl->flags & TC_RED_ADAPTATIVE)
298 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
299 
300 	if (!q->qdisc->q.qlen)
301 		red_start_of_idle_period(&q->vars);
302 
303 	sch_tree_unlock(sch);
304 
305 	red_offload(sch, true);
306 
307 	if (old_child)
308 		qdisc_put(old_child);
309 	return 0;
310 
311 unlock_out:
312 	sch_tree_unlock(sch);
313 	if (child)
314 		qdisc_put(child);
315 	return err;
316 }
317 
318 static inline void red_adaptative_timer(struct timer_list *t)
319 {
320 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
321 	struct Qdisc *sch = q->sch;
322 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
323 
324 	spin_lock(root_lock);
325 	red_adaptative_algo(&q->parms, &q->vars);
326 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
327 	spin_unlock(root_lock);
328 }
329 
330 static int red_init(struct Qdisc *sch, struct nlattr *opt,
331 		    struct netlink_ext_ack *extack)
332 {
333 	struct red_sched_data *q = qdisc_priv(sch);
334 	struct nlattr *tb[TCA_RED_MAX + 1];
335 	int err;
336 
337 	q->qdisc = &noop_qdisc;
338 	q->sch = sch;
339 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
340 
341 	if (!opt)
342 		return -EINVAL;
343 
344 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
345 					  extack);
346 	if (err < 0)
347 		return err;
348 
349 	err = __red_change(sch, tb, extack);
350 	if (err)
351 		return err;
352 
353 	err = tcf_qevent_init(&q->qe_early_drop, sch,
354 			      FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
355 			      tb[TCA_RED_EARLY_DROP_BLOCK], extack);
356 	if (err)
357 		return err;
358 
359 	return tcf_qevent_init(&q->qe_mark, sch,
360 			       FLOW_BLOCK_BINDER_TYPE_RED_MARK,
361 			       tb[TCA_RED_MARK_BLOCK], extack);
362 }
363 
364 static int red_change(struct Qdisc *sch, struct nlattr *opt,
365 		      struct netlink_ext_ack *extack)
366 {
367 	struct red_sched_data *q = qdisc_priv(sch);
368 	struct nlattr *tb[TCA_RED_MAX + 1];
369 	int err;
370 
371 	if (!opt)
372 		return -EINVAL;
373 
374 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
375 					  extack);
376 	if (err < 0)
377 		return err;
378 
379 	err = tcf_qevent_validate_change(&q->qe_early_drop,
380 					 tb[TCA_RED_EARLY_DROP_BLOCK], extack);
381 	if (err)
382 		return err;
383 
384 	err = tcf_qevent_validate_change(&q->qe_mark,
385 					 tb[TCA_RED_MARK_BLOCK], extack);
386 	if (err)
387 		return err;
388 
389 	return __red_change(sch, tb, extack);
390 }
391 
392 static int red_dump_offload_stats(struct Qdisc *sch)
393 {
394 	struct tc_red_qopt_offload hw_stats = {
395 		.command = TC_RED_STATS,
396 		.handle = sch->handle,
397 		.parent = sch->parent,
398 		{
399 			.stats.bstats = &sch->bstats,
400 			.stats.qstats = &sch->qstats,
401 		},
402 	};
403 
404 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
405 }
406 
407 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
408 {
409 	struct red_sched_data *q = qdisc_priv(sch);
410 	struct nlattr *opts = NULL;
411 	struct tc_red_qopt opt = {
412 		.limit		= q->limit,
413 		.flags		= (q->flags & TC_RED_HISTORIC_FLAGS) |
414 				  q->userbits,
415 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
416 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
417 		.Wlog		= q->parms.Wlog,
418 		.Plog		= q->parms.Plog,
419 		.Scell_log	= q->parms.Scell_log,
420 	};
421 	int err;
422 
423 	err = red_dump_offload_stats(sch);
424 	if (err)
425 		goto nla_put_failure;
426 
427 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
428 	if (opts == NULL)
429 		goto nla_put_failure;
430 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
431 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
432 	    nla_put_bitfield32(skb, TCA_RED_FLAGS,
433 			       q->flags, TC_RED_SUPPORTED_FLAGS) ||
434 	    tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
435 	    tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
436 		goto nla_put_failure;
437 	return nla_nest_end(skb, opts);
438 
439 nla_put_failure:
440 	nla_nest_cancel(skb, opts);
441 	return -EMSGSIZE;
442 }
443 
444 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
445 {
446 	struct red_sched_data *q = qdisc_priv(sch);
447 	struct net_device *dev = qdisc_dev(sch);
448 	struct tc_red_xstats st = {0};
449 
450 	if (sch->flags & TCQ_F_OFFLOADED) {
451 		struct tc_red_qopt_offload hw_stats_request = {
452 			.command = TC_RED_XSTATS,
453 			.handle = sch->handle,
454 			.parent = sch->parent,
455 			{
456 				.xstats = &q->stats,
457 			},
458 		};
459 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
460 					      &hw_stats_request);
461 	}
462 	st.early = q->stats.prob_drop + q->stats.forced_drop;
463 	st.pdrop = q->stats.pdrop;
464 	st.other = q->stats.other;
465 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
466 
467 	return gnet_stats_copy_app(d, &st, sizeof(st));
468 }
469 
470 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
471 			  struct sk_buff *skb, struct tcmsg *tcm)
472 {
473 	struct red_sched_data *q = qdisc_priv(sch);
474 
475 	tcm->tcm_handle |= TC_H_MIN(1);
476 	tcm->tcm_info = q->qdisc->handle;
477 	return 0;
478 }
479 
480 static void red_graft_offload(struct Qdisc *sch,
481 			      struct Qdisc *new, struct Qdisc *old,
482 			      struct netlink_ext_ack *extack)
483 {
484 	struct tc_red_qopt_offload graft_offload = {
485 		.handle		= sch->handle,
486 		.parent		= sch->parent,
487 		.child_handle	= new->handle,
488 		.command	= TC_RED_GRAFT,
489 	};
490 
491 	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
492 				   TC_SETUP_QDISC_RED, &graft_offload, extack);
493 }
494 
495 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
496 		     struct Qdisc **old, struct netlink_ext_ack *extack)
497 {
498 	struct red_sched_data *q = qdisc_priv(sch);
499 
500 	if (new == NULL)
501 		new = &noop_qdisc;
502 
503 	*old = qdisc_replace(sch, new, &q->qdisc);
504 
505 	red_graft_offload(sch, new, *old, extack);
506 	return 0;
507 }
508 
509 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
510 {
511 	struct red_sched_data *q = qdisc_priv(sch);
512 	return q->qdisc;
513 }
514 
515 static unsigned long red_find(struct Qdisc *sch, u32 classid)
516 {
517 	return 1;
518 }
519 
520 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
521 {
522 	if (!walker->stop) {
523 		if (walker->count >= walker->skip)
524 			if (walker->fn(sch, 1, walker) < 0) {
525 				walker->stop = 1;
526 				return;
527 			}
528 		walker->count++;
529 	}
530 }
531 
532 static const struct Qdisc_class_ops red_class_ops = {
533 	.graft		=	red_graft,
534 	.leaf		=	red_leaf,
535 	.find		=	red_find,
536 	.walk		=	red_walk,
537 	.dump		=	red_dump_class,
538 };
539 
540 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
541 	.id		=	"red",
542 	.priv_size	=	sizeof(struct red_sched_data),
543 	.cl_ops		=	&red_class_ops,
544 	.enqueue	=	red_enqueue,
545 	.dequeue	=	red_dequeue,
546 	.peek		=	red_peek,
547 	.init		=	red_init,
548 	.reset		=	red_reset,
549 	.destroy	=	red_destroy,
550 	.change		=	red_change,
551 	.dump		=	red_dump,
552 	.dump_stats	=	red_dump_stats,
553 	.owner		=	THIS_MODULE,
554 };
555 
556 static int __init red_module_init(void)
557 {
558 	return register_qdisc(&red_qdisc_ops);
559 }
560 
561 static void __exit red_module_exit(void)
562 {
563 	unregister_qdisc(&red_qdisc_ops);
564 }
565 
566 module_init(red_module_init)
567 module_exit(red_module_exit)
568 
569 MODULE_LICENSE("GPL");
570