xref: /openbmc/linux/net/sched/sch_prio.c (revision 1e90474c377e92db7262a8968a45c1dd980ca9e5)
1 /*
2  * net/sched/sch_prio.c	Simple 3-band priority "scheduler".
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  * Fixes:       19990609: J Hadi Salim <hadi@nortelnetworks.com>:
11  *              Init --  EINVAL when opt undefined
12  */
13 
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <net/netlink.h>
21 #include <net/pkt_sched.h>
22 
23 
24 struct prio_sched_data
25 {
26 	int bands;
27 	int curband; /* for round-robin */
28 	struct tcf_proto *filter_list;
29 	u8  prio2band[TC_PRIO_MAX+1];
30 	struct Qdisc *queues[TCQ_PRIO_BANDS];
31 	int mq;
32 };
33 
34 
35 static struct Qdisc *
36 prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
37 {
38 	struct prio_sched_data *q = qdisc_priv(sch);
39 	u32 band = skb->priority;
40 	struct tcf_result res;
41 	int err;
42 
43 	*qerr = NET_XMIT_BYPASS;
44 	if (TC_H_MAJ(skb->priority) != sch->handle) {
45 		err = tc_classify(skb, q->filter_list, &res);
46 #ifdef CONFIG_NET_CLS_ACT
47 		switch (err) {
48 		case TC_ACT_STOLEN:
49 		case TC_ACT_QUEUED:
50 			*qerr = NET_XMIT_SUCCESS;
51 		case TC_ACT_SHOT:
52 			return NULL;
53 		}
54 #endif
55 		if (!q->filter_list || err < 0) {
56 			if (TC_H_MAJ(band))
57 				band = 0;
58 			band = q->prio2band[band&TC_PRIO_MAX];
59 			goto out;
60 		}
61 		band = res.classid;
62 	}
63 	band = TC_H_MIN(band) - 1;
64 	if (band >= q->bands)
65 		band = q->prio2band[0];
66 out:
67 	if (q->mq)
68 		skb_set_queue_mapping(skb, band);
69 	return q->queues[band];
70 }
71 
72 static int
73 prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
74 {
75 	struct Qdisc *qdisc;
76 	int ret;
77 
78 	qdisc = prio_classify(skb, sch, &ret);
79 #ifdef CONFIG_NET_CLS_ACT
80 	if (qdisc == NULL) {
81 
82 		if (ret == NET_XMIT_BYPASS)
83 			sch->qstats.drops++;
84 		kfree_skb(skb);
85 		return ret;
86 	}
87 #endif
88 
89 	if ((ret = qdisc->enqueue(skb, qdisc)) == NET_XMIT_SUCCESS) {
90 		sch->bstats.bytes += skb->len;
91 		sch->bstats.packets++;
92 		sch->q.qlen++;
93 		return NET_XMIT_SUCCESS;
94 	}
95 	sch->qstats.drops++;
96 	return ret;
97 }
98 
99 
100 static int
101 prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
102 {
103 	struct Qdisc *qdisc;
104 	int ret;
105 
106 	qdisc = prio_classify(skb, sch, &ret);
107 #ifdef CONFIG_NET_CLS_ACT
108 	if (qdisc == NULL) {
109 		if (ret == NET_XMIT_BYPASS)
110 			sch->qstats.drops++;
111 		kfree_skb(skb);
112 		return ret;
113 	}
114 #endif
115 
116 	if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) {
117 		sch->q.qlen++;
118 		sch->qstats.requeues++;
119 		return 0;
120 	}
121 	sch->qstats.drops++;
122 	return NET_XMIT_DROP;
123 }
124 
125 
126 static struct sk_buff *
127 prio_dequeue(struct Qdisc* sch)
128 {
129 	struct sk_buff *skb;
130 	struct prio_sched_data *q = qdisc_priv(sch);
131 	int prio;
132 	struct Qdisc *qdisc;
133 
134 	for (prio = 0; prio < q->bands; prio++) {
135 		/* Check if the target subqueue is available before
136 		 * pulling an skb.  This way we avoid excessive requeues
137 		 * for slower queues.
138 		 */
139 		if (!__netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) {
140 			qdisc = q->queues[prio];
141 			skb = qdisc->dequeue(qdisc);
142 			if (skb) {
143 				sch->q.qlen--;
144 				return skb;
145 			}
146 		}
147 	}
148 	return NULL;
149 
150 }
151 
152 static struct sk_buff *rr_dequeue(struct Qdisc* sch)
153 {
154 	struct sk_buff *skb;
155 	struct prio_sched_data *q = qdisc_priv(sch);
156 	struct Qdisc *qdisc;
157 	int bandcount;
158 
159 	/* Only take one pass through the queues.  If nothing is available,
160 	 * return nothing.
161 	 */
162 	for (bandcount = 0; bandcount < q->bands; bandcount++) {
163 		/* Check if the target subqueue is available before
164 		 * pulling an skb.  This way we avoid excessive requeues
165 		 * for slower queues.  If the queue is stopped, try the
166 		 * next queue.
167 		 */
168 		if (!__netif_subqueue_stopped(sch->dev,
169 					    (q->mq ? q->curband : 0))) {
170 			qdisc = q->queues[q->curband];
171 			skb = qdisc->dequeue(qdisc);
172 			if (skb) {
173 				sch->q.qlen--;
174 				q->curband++;
175 				if (q->curband >= q->bands)
176 					q->curband = 0;
177 				return skb;
178 			}
179 		}
180 		q->curband++;
181 		if (q->curband >= q->bands)
182 			q->curband = 0;
183 	}
184 	return NULL;
185 }
186 
187 static unsigned int prio_drop(struct Qdisc* sch)
188 {
189 	struct prio_sched_data *q = qdisc_priv(sch);
190 	int prio;
191 	unsigned int len;
192 	struct Qdisc *qdisc;
193 
194 	for (prio = q->bands-1; prio >= 0; prio--) {
195 		qdisc = q->queues[prio];
196 		if (qdisc->ops->drop && (len = qdisc->ops->drop(qdisc)) != 0) {
197 			sch->q.qlen--;
198 			return len;
199 		}
200 	}
201 	return 0;
202 }
203 
204 
205 static void
206 prio_reset(struct Qdisc* sch)
207 {
208 	int prio;
209 	struct prio_sched_data *q = qdisc_priv(sch);
210 
211 	for (prio=0; prio<q->bands; prio++)
212 		qdisc_reset(q->queues[prio]);
213 	sch->q.qlen = 0;
214 }
215 
216 static void
217 prio_destroy(struct Qdisc* sch)
218 {
219 	int prio;
220 	struct prio_sched_data *q = qdisc_priv(sch);
221 
222 	tcf_destroy_chain(q->filter_list);
223 	for (prio=0; prio<q->bands; prio++)
224 		qdisc_destroy(q->queues[prio]);
225 }
226 
227 static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
228 {
229 	struct prio_sched_data *q = qdisc_priv(sch);
230 	struct tc_prio_qopt *qopt;
231 	struct nlattr *tb[TCA_PRIO_MAX + 1];
232 	int i;
233 
234 	if (nla_parse_nested_compat(tb, TCA_PRIO_MAX, opt, NULL, qopt,
235 				    sizeof(*qopt)))
236 		return -EINVAL;
237 	q->bands = qopt->bands;
238 	/* If we're multiqueue, make sure the number of incoming bands
239 	 * matches the number of queues on the device we're associating with.
240 	 * If the number of bands requested is zero, then set q->bands to
241 	 * dev->egress_subqueue_count.  Also, the root qdisc must be the
242 	 * only one that is enabled for multiqueue, since it's the only one
243 	 * that interacts with the underlying device.
244 	 */
245 	q->mq = nla_get_flag(tb[TCA_PRIO_MQ]);
246 	if (q->mq) {
247 		if (sch->parent != TC_H_ROOT)
248 			return -EINVAL;
249 		if (netif_is_multiqueue(sch->dev)) {
250 			if (q->bands == 0)
251 				q->bands = sch->dev->egress_subqueue_count;
252 			else if (q->bands != sch->dev->egress_subqueue_count)
253 				return -EINVAL;
254 		} else
255 			return -EOPNOTSUPP;
256 	}
257 
258 	if (q->bands > TCQ_PRIO_BANDS || q->bands < 2)
259 		return -EINVAL;
260 
261 	for (i=0; i<=TC_PRIO_MAX; i++) {
262 		if (qopt->priomap[i] >= q->bands)
263 			return -EINVAL;
264 	}
265 
266 	sch_tree_lock(sch);
267 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
268 
269 	for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
270 		struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
271 		if (child != &noop_qdisc) {
272 			qdisc_tree_decrease_qlen(child, child->q.qlen);
273 			qdisc_destroy(child);
274 		}
275 	}
276 	sch_tree_unlock(sch);
277 
278 	for (i=0; i<q->bands; i++) {
279 		if (q->queues[i] == &noop_qdisc) {
280 			struct Qdisc *child;
281 			child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
282 						  TC_H_MAKE(sch->handle, i + 1));
283 			if (child) {
284 				sch_tree_lock(sch);
285 				child = xchg(&q->queues[i], child);
286 
287 				if (child != &noop_qdisc) {
288 					qdisc_tree_decrease_qlen(child,
289 								 child->q.qlen);
290 					qdisc_destroy(child);
291 				}
292 				sch_tree_unlock(sch);
293 			}
294 		}
295 	}
296 	return 0;
297 }
298 
299 static int prio_init(struct Qdisc *sch, struct nlattr *opt)
300 {
301 	struct prio_sched_data *q = qdisc_priv(sch);
302 	int i;
303 
304 	for (i=0; i<TCQ_PRIO_BANDS; i++)
305 		q->queues[i] = &noop_qdisc;
306 
307 	if (opt == NULL) {
308 		return -EINVAL;
309 	} else {
310 		int err;
311 
312 		if ((err= prio_tune(sch, opt)) != 0)
313 			return err;
314 	}
315 	return 0;
316 }
317 
318 static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
319 {
320 	struct prio_sched_data *q = qdisc_priv(sch);
321 	unsigned char *b = skb_tail_pointer(skb);
322 	struct nlattr *nest;
323 	struct tc_prio_qopt opt;
324 
325 	opt.bands = q->bands;
326 	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
327 
328 	nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt);
329 	if (nest == NULL)
330 		goto nla_put_failure;
331 	if (q->mq) {
332 		if (nla_put_flag(skb, TCA_PRIO_MQ) < 0)
333 			goto nla_put_failure;
334 	}
335 	nla_nest_compat_end(skb, nest);
336 
337 	return skb->len;
338 
339 nla_put_failure:
340 	nlmsg_trim(skb, b);
341 	return -1;
342 }
343 
344 static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
345 		      struct Qdisc **old)
346 {
347 	struct prio_sched_data *q = qdisc_priv(sch);
348 	unsigned long band = arg - 1;
349 
350 	if (band >= q->bands)
351 		return -EINVAL;
352 
353 	if (new == NULL)
354 		new = &noop_qdisc;
355 
356 	sch_tree_lock(sch);
357 	*old = q->queues[band];
358 	q->queues[band] = new;
359 	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
360 	qdisc_reset(*old);
361 	sch_tree_unlock(sch);
362 
363 	return 0;
364 }
365 
366 static struct Qdisc *
367 prio_leaf(struct Qdisc *sch, unsigned long arg)
368 {
369 	struct prio_sched_data *q = qdisc_priv(sch);
370 	unsigned long band = arg - 1;
371 
372 	if (band >= q->bands)
373 		return NULL;
374 
375 	return q->queues[band];
376 }
377 
378 static unsigned long prio_get(struct Qdisc *sch, u32 classid)
379 {
380 	struct prio_sched_data *q = qdisc_priv(sch);
381 	unsigned long band = TC_H_MIN(classid);
382 
383 	if (band - 1 >= q->bands)
384 		return 0;
385 	return band;
386 }
387 
388 static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
389 {
390 	return prio_get(sch, classid);
391 }
392 
393 
394 static void prio_put(struct Qdisc *q, unsigned long cl)
395 {
396 	return;
397 }
398 
399 static int prio_change(struct Qdisc *sch, u32 handle, u32 parent, struct nlattr **tca, unsigned long *arg)
400 {
401 	unsigned long cl = *arg;
402 	struct prio_sched_data *q = qdisc_priv(sch);
403 
404 	if (cl - 1 > q->bands)
405 		return -ENOENT;
406 	return 0;
407 }
408 
409 static int prio_delete(struct Qdisc *sch, unsigned long cl)
410 {
411 	struct prio_sched_data *q = qdisc_priv(sch);
412 	if (cl - 1 > q->bands)
413 		return -ENOENT;
414 	return 0;
415 }
416 
417 
418 static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb,
419 			   struct tcmsg *tcm)
420 {
421 	struct prio_sched_data *q = qdisc_priv(sch);
422 
423 	if (cl - 1 > q->bands)
424 		return -ENOENT;
425 	tcm->tcm_handle |= TC_H_MIN(cl);
426 	if (q->queues[cl-1])
427 		tcm->tcm_info = q->queues[cl-1]->handle;
428 	return 0;
429 }
430 
431 static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
432 				 struct gnet_dump *d)
433 {
434 	struct prio_sched_data *q = qdisc_priv(sch);
435 	struct Qdisc *cl_q;
436 
437 	cl_q = q->queues[cl - 1];
438 	if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 ||
439 	    gnet_stats_copy_queue(d, &cl_q->qstats) < 0)
440 		return -1;
441 
442 	return 0;
443 }
444 
445 static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
446 {
447 	struct prio_sched_data *q = qdisc_priv(sch);
448 	int prio;
449 
450 	if (arg->stop)
451 		return;
452 
453 	for (prio = 0; prio < q->bands; prio++) {
454 		if (arg->count < arg->skip) {
455 			arg->count++;
456 			continue;
457 		}
458 		if (arg->fn(sch, prio+1, arg) < 0) {
459 			arg->stop = 1;
460 			break;
461 		}
462 		arg->count++;
463 	}
464 }
465 
466 static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
467 {
468 	struct prio_sched_data *q = qdisc_priv(sch);
469 
470 	if (cl)
471 		return NULL;
472 	return &q->filter_list;
473 }
474 
475 static const struct Qdisc_class_ops prio_class_ops = {
476 	.graft		=	prio_graft,
477 	.leaf		=	prio_leaf,
478 	.get		=	prio_get,
479 	.put		=	prio_put,
480 	.change		=	prio_change,
481 	.delete		=	prio_delete,
482 	.walk		=	prio_walk,
483 	.tcf_chain	=	prio_find_tcf,
484 	.bind_tcf	=	prio_bind,
485 	.unbind_tcf	=	prio_put,
486 	.dump		=	prio_dump_class,
487 	.dump_stats	=	prio_dump_class_stats,
488 };
489 
490 static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
491 	.next		=	NULL,
492 	.cl_ops		=	&prio_class_ops,
493 	.id		=	"prio",
494 	.priv_size	=	sizeof(struct prio_sched_data),
495 	.enqueue	=	prio_enqueue,
496 	.dequeue	=	prio_dequeue,
497 	.requeue	=	prio_requeue,
498 	.drop		=	prio_drop,
499 	.init		=	prio_init,
500 	.reset		=	prio_reset,
501 	.destroy	=	prio_destroy,
502 	.change		=	prio_tune,
503 	.dump		=	prio_dump,
504 	.owner		=	THIS_MODULE,
505 };
506 
507 static struct Qdisc_ops rr_qdisc_ops __read_mostly = {
508 	.next		=	NULL,
509 	.cl_ops		=	&prio_class_ops,
510 	.id		=	"rr",
511 	.priv_size	=	sizeof(struct prio_sched_data),
512 	.enqueue	=	prio_enqueue,
513 	.dequeue	=	rr_dequeue,
514 	.requeue	=	prio_requeue,
515 	.drop		=	prio_drop,
516 	.init		=	prio_init,
517 	.reset		=	prio_reset,
518 	.destroy	=	prio_destroy,
519 	.change		=	prio_tune,
520 	.dump		=	prio_dump,
521 	.owner		=	THIS_MODULE,
522 };
523 
524 static int __init prio_module_init(void)
525 {
526 	int err;
527 
528 	err = register_qdisc(&prio_qdisc_ops);
529 	if (err < 0)
530 		return err;
531 	err = register_qdisc(&rr_qdisc_ops);
532 	if (err < 0)
533 		unregister_qdisc(&prio_qdisc_ops);
534 	return err;
535 }
536 
537 static void __exit prio_module_exit(void)
538 {
539 	unregister_qdisc(&prio_qdisc_ops);
540 	unregister_qdisc(&rr_qdisc_ops);
541 }
542 
543 module_init(prio_module_init)
544 module_exit(prio_module_exit)
545 
546 MODULE_LICENSE("GPL");
547 MODULE_ALIAS("sch_rr");
548