xref: /openbmc/linux/net/sched/sch_red.c (revision 5ebfa90bdd3d78f4967dc0095daf755989a999e0)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c	Random Early Detection queue.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914:	computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21 
22 
23 /*	Parameters, settable by user:
24 	-----------------------------
25 
26 	limit		- bytes (must be > qth_max + burst)
27 
28 	Hard limit on queue length, should be chosen >qth_max
29 	to allow packet bursts. This parameter does not
30 	affect the algorithms behaviour and can be chosen
31 	arbitrarily high (well, less than ram size)
32 	Really, this limit will never be reached
33 	if RED works correctly.
34  */
35 
36 struct red_sched_data {
37 	u32			limit;		/* HARD maximal queue length */
38 
39 	unsigned char		flags;
40 	/* Non-flags in tc_red_qopt.flags. */
41 	unsigned char		userbits;
42 
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 	struct tcf_qevent	qe_early_drop;
50 	struct tcf_qevent	qe_mark;
51 };
52 
53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
54 
55 static inline int red_use_ecn(struct red_sched_data *q)
56 {
57 	return q->flags & TC_RED_ECN;
58 }
59 
60 static inline int red_use_harddrop(struct red_sched_data *q)
61 {
62 	return q->flags & TC_RED_HARDDROP;
63 }
64 
65 static int red_use_nodrop(struct red_sched_data *q)
66 {
67 	return q->flags & TC_RED_NODROP;
68 }
69 
70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
71 		       struct sk_buff **to_free)
72 {
73 	struct red_sched_data *q = qdisc_priv(sch);
74 	struct Qdisc *child = q->qdisc;
75 	unsigned int len;
76 	int ret;
77 
78 	q->vars.qavg = red_calc_qavg(&q->parms,
79 				     &q->vars,
80 				     child->qstats.backlog);
81 
82 	if (red_is_idling(&q->vars))
83 		red_end_of_idle_period(&q->vars);
84 
85 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
86 	case RED_DONT_MARK:
87 		break;
88 
89 	case RED_PROB_MARK:
90 		qdisc_qstats_overlimit(sch);
91 		if (!red_use_ecn(q)) {
92 			q->stats.prob_drop++;
93 			goto congestion_drop;
94 		}
95 
96 		if (INET_ECN_set_ce(skb)) {
97 			q->stats.prob_mark++;
98 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
99 			if (!skb)
100 				return NET_XMIT_CN | ret;
101 		} else if (!red_use_nodrop(q)) {
102 			q->stats.prob_drop++;
103 			goto congestion_drop;
104 		}
105 
106 		/* Non-ECT packet in ECN nodrop mode: queue it. */
107 		break;
108 
109 	case RED_HARD_MARK:
110 		qdisc_qstats_overlimit(sch);
111 		if (red_use_harddrop(q) || !red_use_ecn(q)) {
112 			q->stats.forced_drop++;
113 			goto congestion_drop;
114 		}
115 
116 		if (INET_ECN_set_ce(skb)) {
117 			q->stats.forced_mark++;
118 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
119 			if (!skb)
120 				return NET_XMIT_CN | ret;
121 		} else if (!red_use_nodrop(q)) {
122 			q->stats.forced_drop++;
123 			goto congestion_drop;
124 		}
125 
126 		/* Non-ECT packet in ECN nodrop mode: queue it. */
127 		break;
128 	}
129 
130 	len = qdisc_pkt_len(skb);
131 	ret = qdisc_enqueue(skb, child, to_free);
132 	if (likely(ret == NET_XMIT_SUCCESS)) {
133 		sch->qstats.backlog += len;
134 		sch->q.qlen++;
135 	} else if (net_xmit_drop_count(ret)) {
136 		q->stats.pdrop++;
137 		qdisc_qstats_drop(sch);
138 	}
139 	return ret;
140 
141 congestion_drop:
142 	skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
143 	if (!skb)
144 		return NET_XMIT_CN | ret;
145 
146 	qdisc_drop(skb, sch, to_free);
147 	return NET_XMIT_CN;
148 }
149 
150 static struct sk_buff *red_dequeue(struct Qdisc *sch)
151 {
152 	struct sk_buff *skb;
153 	struct red_sched_data *q = qdisc_priv(sch);
154 	struct Qdisc *child = q->qdisc;
155 
156 	skb = child->dequeue(child);
157 	if (skb) {
158 		qdisc_bstats_update(sch, skb);
159 		qdisc_qstats_backlog_dec(sch, skb);
160 		sch->q.qlen--;
161 	} else {
162 		if (!red_is_idling(&q->vars))
163 			red_start_of_idle_period(&q->vars);
164 	}
165 	return skb;
166 }
167 
168 static struct sk_buff *red_peek(struct Qdisc *sch)
169 {
170 	struct red_sched_data *q = qdisc_priv(sch);
171 	struct Qdisc *child = q->qdisc;
172 
173 	return child->ops->peek(child);
174 }
175 
176 static void red_reset(struct Qdisc *sch)
177 {
178 	struct red_sched_data *q = qdisc_priv(sch);
179 
180 	qdisc_reset(q->qdisc);
181 	red_restart(&q->vars);
182 }
183 
184 static int red_offload(struct Qdisc *sch, bool enable)
185 {
186 	struct red_sched_data *q = qdisc_priv(sch);
187 	struct net_device *dev = qdisc_dev(sch);
188 	struct tc_red_qopt_offload opt = {
189 		.handle = sch->handle,
190 		.parent = sch->parent,
191 	};
192 
193 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
194 		return -EOPNOTSUPP;
195 
196 	if (enable) {
197 		opt.command = TC_RED_REPLACE;
198 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
199 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
200 		opt.set.probability = q->parms.max_P;
201 		opt.set.limit = q->limit;
202 		opt.set.is_ecn = red_use_ecn(q);
203 		opt.set.is_harddrop = red_use_harddrop(q);
204 		opt.set.is_nodrop = red_use_nodrop(q);
205 		opt.set.qstats = &sch->qstats;
206 	} else {
207 		opt.command = TC_RED_DESTROY;
208 	}
209 
210 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
211 }
212 
213 static void red_destroy(struct Qdisc *sch)
214 {
215 	struct red_sched_data *q = qdisc_priv(sch);
216 
217 	tcf_qevent_destroy(&q->qe_mark, sch);
218 	tcf_qevent_destroy(&q->qe_early_drop, sch);
219 	del_timer_sync(&q->adapt_timer);
220 	red_offload(sch, false);
221 	qdisc_put(q->qdisc);
222 }
223 
224 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
225 	[TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
226 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
227 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
228 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
229 	[TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
230 	[TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
231 	[TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
232 };
233 
234 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
235 			struct netlink_ext_ack *extack)
236 {
237 	struct Qdisc *old_child = NULL, *child = NULL;
238 	struct red_sched_data *q = qdisc_priv(sch);
239 	struct nla_bitfield32 flags_bf;
240 	struct tc_red_qopt *ctl;
241 	unsigned char userbits;
242 	unsigned char flags;
243 	int err;
244 	u32 max_P;
245 	u8 *stab;
246 
247 	if (tb[TCA_RED_PARMS] == NULL ||
248 	    tb[TCA_RED_STAB] == NULL)
249 		return -EINVAL;
250 
251 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
252 
253 	ctl = nla_data(tb[TCA_RED_PARMS]);
254 	stab = nla_data(tb[TCA_RED_STAB]);
255 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
256 			      ctl->Scell_log, stab))
257 		return -EINVAL;
258 
259 	err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
260 			    tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
261 			    &flags_bf, &userbits, extack);
262 	if (err)
263 		return err;
264 
265 	if (ctl->limit > 0) {
266 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
267 					 extack);
268 		if (IS_ERR(child))
269 			return PTR_ERR(child);
270 
271 		/* child is fifo, no need to check for noop_qdisc */
272 		qdisc_hash_add(child, true);
273 	}
274 
275 	sch_tree_lock(sch);
276 
277 	flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
278 	err = red_validate_flags(flags, extack);
279 	if (err)
280 		goto unlock_out;
281 
282 	q->flags = flags;
283 	q->userbits = userbits;
284 	q->limit = ctl->limit;
285 	if (child) {
286 		qdisc_tree_flush_backlog(q->qdisc);
287 		old_child = q->qdisc;
288 		q->qdisc = child;
289 	}
290 
291 	red_set_parms(&q->parms,
292 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
293 		      ctl->Plog, ctl->Scell_log,
294 		      stab,
295 		      max_P);
296 	red_set_vars(&q->vars);
297 
298 	del_timer(&q->adapt_timer);
299 	if (ctl->flags & TC_RED_ADAPTATIVE)
300 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
301 
302 	if (!q->qdisc->q.qlen)
303 		red_start_of_idle_period(&q->vars);
304 
305 	sch_tree_unlock(sch);
306 
307 	red_offload(sch, true);
308 
309 	if (old_child)
310 		qdisc_put(old_child);
311 	return 0;
312 
313 unlock_out:
314 	sch_tree_unlock(sch);
315 	if (child)
316 		qdisc_put(child);
317 	return err;
318 }
319 
320 static inline void red_adaptative_timer(struct timer_list *t)
321 {
322 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
323 	struct Qdisc *sch = q->sch;
324 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
325 
326 	spin_lock(root_lock);
327 	red_adaptative_algo(&q->parms, &q->vars);
328 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
329 	spin_unlock(root_lock);
330 }
331 
332 static int red_init(struct Qdisc *sch, struct nlattr *opt,
333 		    struct netlink_ext_ack *extack)
334 {
335 	struct red_sched_data *q = qdisc_priv(sch);
336 	struct nlattr *tb[TCA_RED_MAX + 1];
337 	int err;
338 
339 	q->qdisc = &noop_qdisc;
340 	q->sch = sch;
341 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
342 
343 	if (!opt)
344 		return -EINVAL;
345 
346 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
347 					  extack);
348 	if (err < 0)
349 		return err;
350 
351 	err = __red_change(sch, tb, extack);
352 	if (err)
353 		return err;
354 
355 	err = tcf_qevent_init(&q->qe_early_drop, sch,
356 			      FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
357 			      tb[TCA_RED_EARLY_DROP_BLOCK], extack);
358 	if (err)
359 		return err;
360 
361 	return tcf_qevent_init(&q->qe_mark, sch,
362 			       FLOW_BLOCK_BINDER_TYPE_RED_MARK,
363 			       tb[TCA_RED_MARK_BLOCK], extack);
364 }
365 
366 static int red_change(struct Qdisc *sch, struct nlattr *opt,
367 		      struct netlink_ext_ack *extack)
368 {
369 	struct red_sched_data *q = qdisc_priv(sch);
370 	struct nlattr *tb[TCA_RED_MAX + 1];
371 	int err;
372 
373 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
374 					  extack);
375 	if (err < 0)
376 		return err;
377 
378 	err = tcf_qevent_validate_change(&q->qe_early_drop,
379 					 tb[TCA_RED_EARLY_DROP_BLOCK], extack);
380 	if (err)
381 		return err;
382 
383 	err = tcf_qevent_validate_change(&q->qe_mark,
384 					 tb[TCA_RED_MARK_BLOCK], extack);
385 	if (err)
386 		return err;
387 
388 	return __red_change(sch, tb, extack);
389 }
390 
391 static int red_dump_offload_stats(struct Qdisc *sch)
392 {
393 	struct tc_red_qopt_offload hw_stats = {
394 		.command = TC_RED_STATS,
395 		.handle = sch->handle,
396 		.parent = sch->parent,
397 		{
398 			.stats.bstats = &sch->bstats,
399 			.stats.qstats = &sch->qstats,
400 		},
401 	};
402 
403 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
404 }
405 
406 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
407 {
408 	struct red_sched_data *q = qdisc_priv(sch);
409 	struct nlattr *opts = NULL;
410 	struct tc_red_qopt opt = {
411 		.limit		= q->limit,
412 		.flags		= (q->flags & TC_RED_HISTORIC_FLAGS) |
413 				  q->userbits,
414 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
415 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
416 		.Wlog		= q->parms.Wlog,
417 		.Plog		= q->parms.Plog,
418 		.Scell_log	= q->parms.Scell_log,
419 	};
420 	int err;
421 
422 	err = red_dump_offload_stats(sch);
423 	if (err)
424 		goto nla_put_failure;
425 
426 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
427 	if (opts == NULL)
428 		goto nla_put_failure;
429 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
430 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
431 	    nla_put_bitfield32(skb, TCA_RED_FLAGS,
432 			       q->flags, TC_RED_SUPPORTED_FLAGS) ||
433 	    tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
434 	    tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
435 		goto nla_put_failure;
436 	return nla_nest_end(skb, opts);
437 
438 nla_put_failure:
439 	nla_nest_cancel(skb, opts);
440 	return -EMSGSIZE;
441 }
442 
443 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
444 {
445 	struct red_sched_data *q = qdisc_priv(sch);
446 	struct net_device *dev = qdisc_dev(sch);
447 	struct tc_red_xstats st = {0};
448 
449 	if (sch->flags & TCQ_F_OFFLOADED) {
450 		struct tc_red_qopt_offload hw_stats_request = {
451 			.command = TC_RED_XSTATS,
452 			.handle = sch->handle,
453 			.parent = sch->parent,
454 			{
455 				.xstats = &q->stats,
456 			},
457 		};
458 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
459 					      &hw_stats_request);
460 	}
461 	st.early = q->stats.prob_drop + q->stats.forced_drop;
462 	st.pdrop = q->stats.pdrop;
463 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
464 
465 	return gnet_stats_copy_app(d, &st, sizeof(st));
466 }
467 
468 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
469 			  struct sk_buff *skb, struct tcmsg *tcm)
470 {
471 	struct red_sched_data *q = qdisc_priv(sch);
472 
473 	tcm->tcm_handle |= TC_H_MIN(1);
474 	tcm->tcm_info = q->qdisc->handle;
475 	return 0;
476 }
477 
478 static void red_graft_offload(struct Qdisc *sch,
479 			      struct Qdisc *new, struct Qdisc *old,
480 			      struct netlink_ext_ack *extack)
481 {
482 	struct tc_red_qopt_offload graft_offload = {
483 		.handle		= sch->handle,
484 		.parent		= sch->parent,
485 		.child_handle	= new->handle,
486 		.command	= TC_RED_GRAFT,
487 	};
488 
489 	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
490 				   TC_SETUP_QDISC_RED, &graft_offload, extack);
491 }
492 
493 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
494 		     struct Qdisc **old, struct netlink_ext_ack *extack)
495 {
496 	struct red_sched_data *q = qdisc_priv(sch);
497 
498 	if (new == NULL)
499 		new = &noop_qdisc;
500 
501 	*old = qdisc_replace(sch, new, &q->qdisc);
502 
503 	red_graft_offload(sch, new, *old, extack);
504 	return 0;
505 }
506 
507 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
508 {
509 	struct red_sched_data *q = qdisc_priv(sch);
510 	return q->qdisc;
511 }
512 
513 static unsigned long red_find(struct Qdisc *sch, u32 classid)
514 {
515 	return 1;
516 }
517 
518 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
519 {
520 	if (!walker->stop) {
521 		tc_qdisc_stats_dump(sch, 1, walker);
522 	}
523 }
524 
525 static const struct Qdisc_class_ops red_class_ops = {
526 	.graft		=	red_graft,
527 	.leaf		=	red_leaf,
528 	.find		=	red_find,
529 	.walk		=	red_walk,
530 	.dump		=	red_dump_class,
531 };
532 
533 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
534 	.id		=	"red",
535 	.priv_size	=	sizeof(struct red_sched_data),
536 	.cl_ops		=	&red_class_ops,
537 	.enqueue	=	red_enqueue,
538 	.dequeue	=	red_dequeue,
539 	.peek		=	red_peek,
540 	.init		=	red_init,
541 	.reset		=	red_reset,
542 	.destroy	=	red_destroy,
543 	.change		=	red_change,
544 	.dump		=	red_dump,
545 	.dump_stats	=	red_dump_stats,
546 	.owner		=	THIS_MODULE,
547 };
548 
549 static int __init red_module_init(void)
550 {
551 	return register_qdisc(&red_qdisc_ops);
552 }
553 
554 static void __exit red_module_exit(void)
555 {
556 	unregister_qdisc(&red_qdisc_ops);
557 }
558 
559 module_init(red_module_init)
560 module_exit(red_module_exit)
561 
562 MODULE_LICENSE("GPL");
563