xref: /openbmc/linux/net/sched/sch_choke.c (revision 867e6d38)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * net/sched/sch_choke.c	CHOKE scheduler
4  *
5  * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
6  * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
7  */
8 
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/skbuff.h>
13 #include <linux/vmalloc.h>
14 #include <net/pkt_sched.h>
15 #include <net/pkt_cls.h>
16 #include <net/inet_ecn.h>
17 #include <net/red.h>
18 #include <net/flow_dissector.h>
19 
20 /*
21    CHOKe stateless AQM for fair bandwidth allocation
22    =================================================
23 
24    CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
25    unresponsive flows) is a variant of RED that penalizes misbehaving flows but
26    maintains no flow state. The difference from RED is an additional step
27    during the enqueuing process. If average queue size is over the
28    low threshold (qmin), a packet is chosen at random from the queue.
29    If both the new and chosen packet are from the same flow, both
30    are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
31    needs to access packets in queue randomly. It has a minimal class
32    interface to allow overriding the builtin flow classifier with
33    filters.
34 
35    Source:
36    R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
37    Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
38    IEEE INFOCOM, 2000.
39 
40    A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
41    Characteristics", IEEE/ACM Transactions on Networking, 2004
42 
43  */
44 
45 /* Upper bound on size of sk_buff table (packets) */
46 #define CHOKE_MAX_QUEUE	(128*1024 - 1)
47 
48 struct choke_sched_data {
49 /* Parameters */
50 	u32		 limit;
51 	unsigned char	 flags;
52 
53 	struct red_parms parms;
54 
55 /* Variables */
56 	struct red_vars  vars;
57 	struct {
58 		u32	prob_drop;	/* Early probability drops */
59 		u32	prob_mark;	/* Early probability marks */
60 		u32	forced_drop;	/* Forced drops, qavg > max_thresh */
61 		u32	forced_mark;	/* Forced marks, qavg > max_thresh */
62 		u32	pdrop;          /* Drops due to queue limits */
63 		u32	other;          /* Drops due to drop() calls */
64 		u32	matched;	/* Drops to flow match */
65 	} stats;
66 
67 	unsigned int	 head;
68 	unsigned int	 tail;
69 
70 	unsigned int	 tab_mask; /* size - 1 */
71 
72 	struct sk_buff **tab;
73 };
74 
75 /* number of elements in queue including holes */
76 static unsigned int choke_len(const struct choke_sched_data *q)
77 {
78 	return (q->tail - q->head) & q->tab_mask;
79 }
80 
81 /* Is ECN parameter configured */
82 static int use_ecn(const struct choke_sched_data *q)
83 {
84 	return q->flags & TC_RED_ECN;
85 }
86 
87 /* Should packets over max just be dropped (versus marked) */
88 static int use_harddrop(const struct choke_sched_data *q)
89 {
90 	return q->flags & TC_RED_HARDDROP;
91 }
92 
93 /* Move head pointer forward to skip over holes */
94 static void choke_zap_head_holes(struct choke_sched_data *q)
95 {
96 	do {
97 		q->head = (q->head + 1) & q->tab_mask;
98 		if (q->head == q->tail)
99 			break;
100 	} while (q->tab[q->head] == NULL);
101 }
102 
103 /* Move tail pointer backwards to reuse holes */
104 static void choke_zap_tail_holes(struct choke_sched_data *q)
105 {
106 	do {
107 		q->tail = (q->tail - 1) & q->tab_mask;
108 		if (q->head == q->tail)
109 			break;
110 	} while (q->tab[q->tail] == NULL);
111 }
112 
113 /* Drop packet from queue array by creating a "hole" */
114 static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
115 			      struct sk_buff **to_free)
116 {
117 	struct choke_sched_data *q = qdisc_priv(sch);
118 	struct sk_buff *skb = q->tab[idx];
119 
120 	q->tab[idx] = NULL;
121 
122 	if (idx == q->head)
123 		choke_zap_head_holes(q);
124 	if (idx == q->tail)
125 		choke_zap_tail_holes(q);
126 
127 	qdisc_qstats_backlog_dec(sch, skb);
128 	qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
129 	qdisc_drop(skb, sch, to_free);
130 	--sch->q.qlen;
131 }
132 
133 struct choke_skb_cb {
134 	u8			keys_valid;
135 	struct			flow_keys_digest keys;
136 };
137 
138 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
139 {
140 	qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
141 	return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
142 }
143 
144 /*
145  * Compare flow of two packets
146  *  Returns true only if source and destination address and port match.
147  *          false for special cases
148  */
149 static bool choke_match_flow(struct sk_buff *skb1,
150 			     struct sk_buff *skb2)
151 {
152 	struct flow_keys temp;
153 
154 	if (skb1->protocol != skb2->protocol)
155 		return false;
156 
157 	if (!choke_skb_cb(skb1)->keys_valid) {
158 		choke_skb_cb(skb1)->keys_valid = 1;
159 		skb_flow_dissect_flow_keys(skb1, &temp, 0);
160 		make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
161 	}
162 
163 	if (!choke_skb_cb(skb2)->keys_valid) {
164 		choke_skb_cb(skb2)->keys_valid = 1;
165 		skb_flow_dissect_flow_keys(skb2, &temp, 0);
166 		make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
167 	}
168 
169 	return !memcmp(&choke_skb_cb(skb1)->keys,
170 		       &choke_skb_cb(skb2)->keys,
171 		       sizeof(choke_skb_cb(skb1)->keys));
172 }
173 
174 /*
175  * Select a packet at random from queue
176  * HACK: since queue can have holes from previous deletion; retry several
177  *   times to find a random skb but then just give up and return the head
178  * Will return NULL if queue is empty (q->head == q->tail)
179  */
180 static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
181 					 unsigned int *pidx)
182 {
183 	struct sk_buff *skb;
184 	int retrys = 3;
185 
186 	do {
187 		*pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
188 		skb = q->tab[*pidx];
189 		if (skb)
190 			return skb;
191 	} while (--retrys > 0);
192 
193 	return q->tab[*pidx = q->head];
194 }
195 
196 /*
197  * Compare new packet with random packet in queue
198  * returns true if matched and sets *pidx
199  */
200 static bool choke_match_random(const struct choke_sched_data *q,
201 			       struct sk_buff *nskb,
202 			       unsigned int *pidx)
203 {
204 	struct sk_buff *oskb;
205 
206 	if (q->head == q->tail)
207 		return false;
208 
209 	oskb = choke_peek_random(q, pidx);
210 	return choke_match_flow(oskb, nskb);
211 }
212 
213 static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
214 			 struct sk_buff **to_free)
215 {
216 	struct choke_sched_data *q = qdisc_priv(sch);
217 	const struct red_parms *p = &q->parms;
218 
219 	choke_skb_cb(skb)->keys_valid = 0;
220 	/* Compute average queue usage (see RED) */
221 	q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
222 	if (red_is_idling(&q->vars))
223 		red_end_of_idle_period(&q->vars);
224 
225 	/* Is queue small? */
226 	if (q->vars.qavg <= p->qth_min)
227 		q->vars.qcount = -1;
228 	else {
229 		unsigned int idx;
230 
231 		/* Draw a packet at random from queue and compare flow */
232 		if (choke_match_random(q, skb, &idx)) {
233 			q->stats.matched++;
234 			choke_drop_by_idx(sch, idx, to_free);
235 			goto congestion_drop;
236 		}
237 
238 		/* Queue is large, always mark/drop */
239 		if (q->vars.qavg > p->qth_max) {
240 			q->vars.qcount = -1;
241 
242 			qdisc_qstats_overlimit(sch);
243 			if (use_harddrop(q) || !use_ecn(q) ||
244 			    !INET_ECN_set_ce(skb)) {
245 				q->stats.forced_drop++;
246 				goto congestion_drop;
247 			}
248 
249 			q->stats.forced_mark++;
250 		} else if (++q->vars.qcount) {
251 			if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
252 				q->vars.qcount = 0;
253 				q->vars.qR = red_random(p);
254 
255 				qdisc_qstats_overlimit(sch);
256 				if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
257 					q->stats.prob_drop++;
258 					goto congestion_drop;
259 				}
260 
261 				q->stats.prob_mark++;
262 			}
263 		} else
264 			q->vars.qR = red_random(p);
265 	}
266 
267 	/* Admit new packet */
268 	if (sch->q.qlen < q->limit) {
269 		q->tab[q->tail] = skb;
270 		q->tail = (q->tail + 1) & q->tab_mask;
271 		++sch->q.qlen;
272 		qdisc_qstats_backlog_inc(sch, skb);
273 		return NET_XMIT_SUCCESS;
274 	}
275 
276 	q->stats.pdrop++;
277 	return qdisc_drop(skb, sch, to_free);
278 
279 congestion_drop:
280 	qdisc_drop(skb, sch, to_free);
281 	return NET_XMIT_CN;
282 }
283 
284 static struct sk_buff *choke_dequeue(struct Qdisc *sch)
285 {
286 	struct choke_sched_data *q = qdisc_priv(sch);
287 	struct sk_buff *skb;
288 
289 	if (q->head == q->tail) {
290 		if (!red_is_idling(&q->vars))
291 			red_start_of_idle_period(&q->vars);
292 		return NULL;
293 	}
294 
295 	skb = q->tab[q->head];
296 	q->tab[q->head] = NULL;
297 	choke_zap_head_holes(q);
298 	--sch->q.qlen;
299 	qdisc_qstats_backlog_dec(sch, skb);
300 	qdisc_bstats_update(sch, skb);
301 
302 	return skb;
303 }
304 
305 static void choke_reset(struct Qdisc *sch)
306 {
307 	struct choke_sched_data *q = qdisc_priv(sch);
308 
309 	while (q->head != q->tail) {
310 		struct sk_buff *skb = q->tab[q->head];
311 
312 		q->head = (q->head + 1) & q->tab_mask;
313 		if (!skb)
314 			continue;
315 		rtnl_qdisc_drop(skb, sch);
316 	}
317 
318 	sch->q.qlen = 0;
319 	sch->qstats.backlog = 0;
320 	if (q->tab)
321 		memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
322 	q->head = q->tail = 0;
323 	red_restart(&q->vars);
324 }
325 
326 static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
327 	[TCA_CHOKE_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
328 	[TCA_CHOKE_STAB]	= { .len = RED_STAB_SIZE },
329 	[TCA_CHOKE_MAX_P]	= { .type = NLA_U32 },
330 };
331 
332 
333 static void choke_free(void *addr)
334 {
335 	kvfree(addr);
336 }
337 
338 static int choke_change(struct Qdisc *sch, struct nlattr *opt,
339 			struct netlink_ext_ack *extack)
340 {
341 	struct choke_sched_data *q = qdisc_priv(sch);
342 	struct nlattr *tb[TCA_CHOKE_MAX + 1];
343 	const struct tc_red_qopt *ctl;
344 	int err;
345 	struct sk_buff **old = NULL;
346 	unsigned int mask;
347 	u32 max_P;
348 	u8 *stab;
349 
350 	if (opt == NULL)
351 		return -EINVAL;
352 
353 	err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt,
354 					  choke_policy, NULL);
355 	if (err < 0)
356 		return err;
357 
358 	if (tb[TCA_CHOKE_PARMS] == NULL ||
359 	    tb[TCA_CHOKE_STAB] == NULL)
360 		return -EINVAL;
361 
362 	max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
363 
364 	ctl = nla_data(tb[TCA_CHOKE_PARMS]);
365 	stab = nla_data(tb[TCA_CHOKE_STAB]);
366 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab))
367 		return -EINVAL;
368 
369 	if (ctl->limit > CHOKE_MAX_QUEUE)
370 		return -EINVAL;
371 
372 	mask = roundup_pow_of_two(ctl->limit + 1) - 1;
373 	if (mask != q->tab_mask) {
374 		struct sk_buff **ntab;
375 
376 		ntab = kvcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
377 		if (!ntab)
378 			return -ENOMEM;
379 
380 		sch_tree_lock(sch);
381 		old = q->tab;
382 		if (old) {
383 			unsigned int oqlen = sch->q.qlen, tail = 0;
384 			unsigned dropped = 0;
385 
386 			while (q->head != q->tail) {
387 				struct sk_buff *skb = q->tab[q->head];
388 
389 				q->head = (q->head + 1) & q->tab_mask;
390 				if (!skb)
391 					continue;
392 				if (tail < mask) {
393 					ntab[tail++] = skb;
394 					continue;
395 				}
396 				dropped += qdisc_pkt_len(skb);
397 				qdisc_qstats_backlog_dec(sch, skb);
398 				--sch->q.qlen;
399 				rtnl_qdisc_drop(skb, sch);
400 			}
401 			qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
402 			q->head = 0;
403 			q->tail = tail;
404 		}
405 
406 		q->tab_mask = mask;
407 		q->tab = ntab;
408 	} else
409 		sch_tree_lock(sch);
410 
411 	q->flags = ctl->flags;
412 	q->limit = ctl->limit;
413 
414 	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
415 		      ctl->Plog, ctl->Scell_log,
416 		      stab,
417 		      max_P);
418 	red_set_vars(&q->vars);
419 
420 	if (q->head == q->tail)
421 		red_end_of_idle_period(&q->vars);
422 
423 	sch_tree_unlock(sch);
424 	choke_free(old);
425 	return 0;
426 }
427 
428 static int choke_init(struct Qdisc *sch, struct nlattr *opt,
429 		      struct netlink_ext_ack *extack)
430 {
431 	return choke_change(sch, opt, extack);
432 }
433 
434 static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
435 {
436 	struct choke_sched_data *q = qdisc_priv(sch);
437 	struct nlattr *opts = NULL;
438 	struct tc_red_qopt opt = {
439 		.limit		= q->limit,
440 		.flags		= q->flags,
441 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
442 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
443 		.Wlog		= q->parms.Wlog,
444 		.Plog		= q->parms.Plog,
445 		.Scell_log	= q->parms.Scell_log,
446 	};
447 
448 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
449 	if (opts == NULL)
450 		goto nla_put_failure;
451 
452 	if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
453 	    nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
454 		goto nla_put_failure;
455 	return nla_nest_end(skb, opts);
456 
457 nla_put_failure:
458 	nla_nest_cancel(skb, opts);
459 	return -EMSGSIZE;
460 }
461 
462 static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
463 {
464 	struct choke_sched_data *q = qdisc_priv(sch);
465 	struct tc_choke_xstats st = {
466 		.early	= q->stats.prob_drop + q->stats.forced_drop,
467 		.marked	= q->stats.prob_mark + q->stats.forced_mark,
468 		.pdrop	= q->stats.pdrop,
469 		.other	= q->stats.other,
470 		.matched = q->stats.matched,
471 	};
472 
473 	return gnet_stats_copy_app(d, &st, sizeof(st));
474 }
475 
476 static void choke_destroy(struct Qdisc *sch)
477 {
478 	struct choke_sched_data *q = qdisc_priv(sch);
479 
480 	choke_free(q->tab);
481 }
482 
483 static struct sk_buff *choke_peek_head(struct Qdisc *sch)
484 {
485 	struct choke_sched_data *q = qdisc_priv(sch);
486 
487 	return (q->head != q->tail) ? q->tab[q->head] : NULL;
488 }
489 
490 static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
491 	.id		=	"choke",
492 	.priv_size	=	sizeof(struct choke_sched_data),
493 
494 	.enqueue	=	choke_enqueue,
495 	.dequeue	=	choke_dequeue,
496 	.peek		=	choke_peek_head,
497 	.init		=	choke_init,
498 	.destroy	=	choke_destroy,
499 	.reset		=	choke_reset,
500 	.change		=	choke_change,
501 	.dump		=	choke_dump,
502 	.dump_stats	=	choke_dump_stats,
503 	.owner		=	THIS_MODULE,
504 };
505 
506 static int __init choke_module_init(void)
507 {
508 	return register_qdisc(&choke_qdisc_ops);
509 }
510 
511 static void __exit choke_module_exit(void)
512 {
513 	unregister_qdisc(&choke_qdisc_ops);
514 }
515 
516 module_init(choke_module_init)
517 module_exit(choke_module_exit)
518 
519 MODULE_LICENSE("GPL");
520