xref: /openbmc/linux/net/sched/sch_red.c (revision cfacc4d8c04d177911ae2c6601d9c320c5c5c29e)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c	Random Early Detection queue.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914:	computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21 
22 
23 /*	Parameters, settable by user:
24 	-----------------------------
25 
26 	limit		- bytes (must be > qth_max + burst)
27 
28 	Hard limit on queue length, should be chosen >qth_max
29 	to allow packet bursts. This parameter does not
30 	affect the algorithms behaviour and can be chosen
31 	arbitrarily high (well, less than ram size)
32 	Really, this limit will never be reached
33 	if RED works correctly.
34  */
35 
36 struct red_sched_data {
37 	u32			limit;		/* HARD maximal queue length */
38 
39 	unsigned char		flags;
40 	/* Non-flags in tc_red_qopt.flags. */
41 	unsigned char		userbits;
42 
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 	struct tcf_qevent	qe_early_drop;
50 	struct tcf_qevent	qe_mark;
51 };
52 
53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
54 
55 static inline int red_use_ecn(struct red_sched_data *q)
56 {
57 	return q->flags & TC_RED_ECN;
58 }
59 
60 static inline int red_use_harddrop(struct red_sched_data *q)
61 {
62 	return q->flags & TC_RED_HARDDROP;
63 }
64 
65 static int red_use_nodrop(struct red_sched_data *q)
66 {
67 	return q->flags & TC_RED_NODROP;
68 }
69 
70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
71 		       struct sk_buff **to_free)
72 {
73 	struct red_sched_data *q = qdisc_priv(sch);
74 	struct Qdisc *child = q->qdisc;
75 	unsigned int len;
76 	int ret;
77 
78 	q->vars.qavg = red_calc_qavg(&q->parms,
79 				     &q->vars,
80 				     child->qstats.backlog);
81 
82 	if (red_is_idling(&q->vars))
83 		red_end_of_idle_period(&q->vars);
84 
85 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
86 	case RED_DONT_MARK:
87 		break;
88 
89 	case RED_PROB_MARK:
90 		qdisc_qstats_overlimit(sch);
91 		if (!red_use_ecn(q)) {
92 			q->stats.prob_drop++;
93 			goto congestion_drop;
94 		}
95 
96 		if (INET_ECN_set_ce(skb)) {
97 			q->stats.prob_mark++;
98 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
99 			if (!skb)
100 				return NET_XMIT_CN | ret;
101 		} else if (!red_use_nodrop(q)) {
102 			q->stats.prob_drop++;
103 			goto congestion_drop;
104 		}
105 
106 		/* Non-ECT packet in ECN nodrop mode: queue it. */
107 		break;
108 
109 	case RED_HARD_MARK:
110 		qdisc_qstats_overlimit(sch);
111 		if (red_use_harddrop(q) || !red_use_ecn(q)) {
112 			q->stats.forced_drop++;
113 			goto congestion_drop;
114 		}
115 
116 		if (INET_ECN_set_ce(skb)) {
117 			q->stats.forced_mark++;
118 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
119 			if (!skb)
120 				return NET_XMIT_CN | ret;
121 		} else if (!red_use_nodrop(q)) {
122 			q->stats.forced_drop++;
123 			goto congestion_drop;
124 		}
125 
126 		/* Non-ECT packet in ECN nodrop mode: queue it. */
127 		break;
128 	}
129 
130 	len = qdisc_pkt_len(skb);
131 	ret = qdisc_enqueue(skb, child, to_free);
132 	if (likely(ret == NET_XMIT_SUCCESS)) {
133 		sch->qstats.backlog += len;
134 		sch->q.qlen++;
135 	} else if (net_xmit_drop_count(ret)) {
136 		q->stats.pdrop++;
137 		qdisc_qstats_drop(sch);
138 	}
139 	return ret;
140 
141 congestion_drop:
142 	skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
143 	if (!skb)
144 		return NET_XMIT_CN | ret;
145 
146 	qdisc_drop(skb, sch, to_free);
147 	return NET_XMIT_CN;
148 }
149 
150 static struct sk_buff *red_dequeue(struct Qdisc *sch)
151 {
152 	struct sk_buff *skb;
153 	struct red_sched_data *q = qdisc_priv(sch);
154 	struct Qdisc *child = q->qdisc;
155 
156 	skb = child->dequeue(child);
157 	if (skb) {
158 		qdisc_bstats_update(sch, skb);
159 		qdisc_qstats_backlog_dec(sch, skb);
160 		sch->q.qlen--;
161 	} else {
162 		if (!red_is_idling(&q->vars))
163 			red_start_of_idle_period(&q->vars);
164 	}
165 	return skb;
166 }
167 
168 static struct sk_buff *red_peek(struct Qdisc *sch)
169 {
170 	struct red_sched_data *q = qdisc_priv(sch);
171 	struct Qdisc *child = q->qdisc;
172 
173 	return child->ops->peek(child);
174 }
175 
176 static void red_reset(struct Qdisc *sch)
177 {
178 	struct red_sched_data *q = qdisc_priv(sch);
179 
180 	qdisc_reset(q->qdisc);
181 	red_restart(&q->vars);
182 }
183 
184 static int red_offload(struct Qdisc *sch, bool enable)
185 {
186 	struct red_sched_data *q = qdisc_priv(sch);
187 	struct net_device *dev = qdisc_dev(sch);
188 	struct tc_red_qopt_offload opt = {
189 		.handle = sch->handle,
190 		.parent = sch->parent,
191 	};
192 
193 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
194 		return -EOPNOTSUPP;
195 
196 	if (enable) {
197 		opt.command = TC_RED_REPLACE;
198 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
199 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
200 		opt.set.probability = q->parms.max_P;
201 		opt.set.limit = q->limit;
202 		opt.set.is_ecn = red_use_ecn(q);
203 		opt.set.is_harddrop = red_use_harddrop(q);
204 		opt.set.is_nodrop = red_use_nodrop(q);
205 		opt.set.qstats = &sch->qstats;
206 	} else {
207 		opt.command = TC_RED_DESTROY;
208 	}
209 
210 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
211 }
212 
213 static void red_destroy(struct Qdisc *sch)
214 {
215 	struct red_sched_data *q = qdisc_priv(sch);
216 
217 	tcf_qevent_destroy(&q->qe_mark, sch);
218 	tcf_qevent_destroy(&q->qe_early_drop, sch);
219 	del_timer_sync(&q->adapt_timer);
220 	red_offload(sch, false);
221 	qdisc_put(q->qdisc);
222 }
223 
224 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
225 	[TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
226 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
227 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
228 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
229 	[TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
230 	[TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
231 	[TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
232 };
233 
234 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
235 			struct netlink_ext_ack *extack)
236 {
237 	struct Qdisc *old_child = NULL, *child = NULL;
238 	struct red_sched_data *q = qdisc_priv(sch);
239 	struct nla_bitfield32 flags_bf;
240 	struct tc_red_qopt *ctl;
241 	unsigned char userbits;
242 	unsigned char flags;
243 	int err;
244 	u32 max_P;
245 	u8 *stab;
246 
247 	if (tb[TCA_RED_PARMS] == NULL ||
248 	    tb[TCA_RED_STAB] == NULL)
249 		return -EINVAL;
250 
251 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
252 
253 	ctl = nla_data(tb[TCA_RED_PARMS]);
254 	stab = nla_data(tb[TCA_RED_STAB]);
255 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
256 			      ctl->Scell_log, stab))
257 		return -EINVAL;
258 
259 	err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
260 			    tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
261 			    &flags_bf, &userbits, extack);
262 	if (err)
263 		return err;
264 
265 	if (ctl->limit > 0) {
266 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
267 					 extack);
268 		if (IS_ERR(child))
269 			return PTR_ERR(child);
270 
271 		/* child is fifo, no need to check for noop_qdisc */
272 		qdisc_hash_add(child, true);
273 	}
274 
275 	sch_tree_lock(sch);
276 
277 	flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
278 	err = red_validate_flags(flags, extack);
279 	if (err)
280 		goto unlock_out;
281 
282 	q->flags = flags;
283 	q->userbits = userbits;
284 	q->limit = ctl->limit;
285 	if (child) {
286 		qdisc_tree_flush_backlog(q->qdisc);
287 		old_child = q->qdisc;
288 		q->qdisc = child;
289 	}
290 
291 	red_set_parms(&q->parms,
292 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
293 		      ctl->Plog, ctl->Scell_log,
294 		      stab,
295 		      max_P);
296 	red_set_vars(&q->vars);
297 
298 	del_timer(&q->adapt_timer);
299 	if (ctl->flags & TC_RED_ADAPTATIVE)
300 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
301 
302 	if (!q->qdisc->q.qlen)
303 		red_start_of_idle_period(&q->vars);
304 
305 	sch_tree_unlock(sch);
306 
307 	red_offload(sch, true);
308 
309 	if (old_child)
310 		qdisc_put(old_child);
311 	return 0;
312 
313 unlock_out:
314 	sch_tree_unlock(sch);
315 	if (child)
316 		qdisc_put(child);
317 	return err;
318 }
319 
320 static inline void red_adaptative_timer(struct timer_list *t)
321 {
322 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
323 	struct Qdisc *sch = q->sch;
324 	spinlock_t *root_lock;
325 
326 	rcu_read_lock();
327 	root_lock = qdisc_lock(qdisc_root_sleeping(sch));
328 	spin_lock(root_lock);
329 	red_adaptative_algo(&q->parms, &q->vars);
330 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
331 	spin_unlock(root_lock);
332 	rcu_read_unlock();
333 }
334 
335 static int red_init(struct Qdisc *sch, struct nlattr *opt,
336 		    struct netlink_ext_ack *extack)
337 {
338 	struct red_sched_data *q = qdisc_priv(sch);
339 	struct nlattr *tb[TCA_RED_MAX + 1];
340 	int err;
341 
342 	q->qdisc = &noop_qdisc;
343 	q->sch = sch;
344 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
345 
346 	if (!opt)
347 		return -EINVAL;
348 
349 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
350 					  extack);
351 	if (err < 0)
352 		return err;
353 
354 	err = __red_change(sch, tb, extack);
355 	if (err)
356 		return err;
357 
358 	err = tcf_qevent_init(&q->qe_early_drop, sch,
359 			      FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
360 			      tb[TCA_RED_EARLY_DROP_BLOCK], extack);
361 	if (err)
362 		return err;
363 
364 	return tcf_qevent_init(&q->qe_mark, sch,
365 			       FLOW_BLOCK_BINDER_TYPE_RED_MARK,
366 			       tb[TCA_RED_MARK_BLOCK], extack);
367 }
368 
369 static int red_change(struct Qdisc *sch, struct nlattr *opt,
370 		      struct netlink_ext_ack *extack)
371 {
372 	struct red_sched_data *q = qdisc_priv(sch);
373 	struct nlattr *tb[TCA_RED_MAX + 1];
374 	int err;
375 
376 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
377 					  extack);
378 	if (err < 0)
379 		return err;
380 
381 	err = tcf_qevent_validate_change(&q->qe_early_drop,
382 					 tb[TCA_RED_EARLY_DROP_BLOCK], extack);
383 	if (err)
384 		return err;
385 
386 	err = tcf_qevent_validate_change(&q->qe_mark,
387 					 tb[TCA_RED_MARK_BLOCK], extack);
388 	if (err)
389 		return err;
390 
391 	return __red_change(sch, tb, extack);
392 }
393 
394 static int red_dump_offload_stats(struct Qdisc *sch)
395 {
396 	struct tc_red_qopt_offload hw_stats = {
397 		.command = TC_RED_STATS,
398 		.handle = sch->handle,
399 		.parent = sch->parent,
400 		{
401 			.stats.bstats = &sch->bstats,
402 			.stats.qstats = &sch->qstats,
403 		},
404 	};
405 
406 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
407 }
408 
409 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
410 {
411 	struct red_sched_data *q = qdisc_priv(sch);
412 	struct nlattr *opts = NULL;
413 	struct tc_red_qopt opt = {
414 		.limit		= q->limit,
415 		.flags		= (q->flags & TC_RED_HISTORIC_FLAGS) |
416 				  q->userbits,
417 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
418 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
419 		.Wlog		= q->parms.Wlog,
420 		.Plog		= q->parms.Plog,
421 		.Scell_log	= q->parms.Scell_log,
422 	};
423 	int err;
424 
425 	err = red_dump_offload_stats(sch);
426 	if (err)
427 		goto nla_put_failure;
428 
429 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
430 	if (opts == NULL)
431 		goto nla_put_failure;
432 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
433 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
434 	    nla_put_bitfield32(skb, TCA_RED_FLAGS,
435 			       q->flags, TC_RED_SUPPORTED_FLAGS) ||
436 	    tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
437 	    tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
438 		goto nla_put_failure;
439 	return nla_nest_end(skb, opts);
440 
441 nla_put_failure:
442 	nla_nest_cancel(skb, opts);
443 	return -EMSGSIZE;
444 }
445 
446 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
447 {
448 	struct red_sched_data *q = qdisc_priv(sch);
449 	struct net_device *dev = qdisc_dev(sch);
450 	struct tc_red_xstats st = {0};
451 
452 	if (sch->flags & TCQ_F_OFFLOADED) {
453 		struct tc_red_qopt_offload hw_stats_request = {
454 			.command = TC_RED_XSTATS,
455 			.handle = sch->handle,
456 			.parent = sch->parent,
457 			{
458 				.xstats = &q->stats,
459 			},
460 		};
461 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
462 					      &hw_stats_request);
463 	}
464 	st.early = q->stats.prob_drop + q->stats.forced_drop;
465 	st.pdrop = q->stats.pdrop;
466 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
467 
468 	return gnet_stats_copy_app(d, &st, sizeof(st));
469 }
470 
471 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
472 			  struct sk_buff *skb, struct tcmsg *tcm)
473 {
474 	struct red_sched_data *q = qdisc_priv(sch);
475 
476 	tcm->tcm_handle |= TC_H_MIN(1);
477 	tcm->tcm_info = q->qdisc->handle;
478 	return 0;
479 }
480 
481 static void red_graft_offload(struct Qdisc *sch,
482 			      struct Qdisc *new, struct Qdisc *old,
483 			      struct netlink_ext_ack *extack)
484 {
485 	struct tc_red_qopt_offload graft_offload = {
486 		.handle		= sch->handle,
487 		.parent		= sch->parent,
488 		.child_handle	= new->handle,
489 		.command	= TC_RED_GRAFT,
490 	};
491 
492 	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
493 				   TC_SETUP_QDISC_RED, &graft_offload, extack);
494 }
495 
496 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
497 		     struct Qdisc **old, struct netlink_ext_ack *extack)
498 {
499 	struct red_sched_data *q = qdisc_priv(sch);
500 
501 	if (new == NULL)
502 		new = &noop_qdisc;
503 
504 	*old = qdisc_replace(sch, new, &q->qdisc);
505 
506 	red_graft_offload(sch, new, *old, extack);
507 	return 0;
508 }
509 
510 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
511 {
512 	struct red_sched_data *q = qdisc_priv(sch);
513 	return q->qdisc;
514 }
515 
516 static unsigned long red_find(struct Qdisc *sch, u32 classid)
517 {
518 	return 1;
519 }
520 
521 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
522 {
523 	if (!walker->stop) {
524 		tc_qdisc_stats_dump(sch, 1, walker);
525 	}
526 }
527 
528 static const struct Qdisc_class_ops red_class_ops = {
529 	.graft		=	red_graft,
530 	.leaf		=	red_leaf,
531 	.find		=	red_find,
532 	.walk		=	red_walk,
533 	.dump		=	red_dump_class,
534 };
535 
536 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
537 	.id		=	"red",
538 	.priv_size	=	sizeof(struct red_sched_data),
539 	.cl_ops		=	&red_class_ops,
540 	.enqueue	=	red_enqueue,
541 	.dequeue	=	red_dequeue,
542 	.peek		=	red_peek,
543 	.init		=	red_init,
544 	.reset		=	red_reset,
545 	.destroy	=	red_destroy,
546 	.change		=	red_change,
547 	.dump		=	red_dump,
548 	.dump_stats	=	red_dump_stats,
549 	.owner		=	THIS_MODULE,
550 };
551 
552 static int __init red_module_init(void)
553 {
554 	return register_qdisc(&red_qdisc_ops);
555 }
556 
557 static void __exit red_module_exit(void)
558 {
559 	unregister_qdisc(&red_qdisc_ops);
560 }
561 
562 module_init(red_module_init)
563 module_exit(red_module_exit)
564 
565 MODULE_LICENSE("GPL");
566