xref: /openbmc/linux/net/sched/sch_api.c (revision 7d4ad2e71d175bf7688e0a9f60e8df12591c9abd)
1 /*
2  * net/sched/sch_api.c	Packet scheduler API.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17 
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
33 
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 
39 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
40 			struct nlmsghdr *n, u32 clid,
41 			struct Qdisc *old, struct Qdisc *new);
42 static int tclass_notify(struct net *net, struct sk_buff *oskb,
43 			 struct nlmsghdr *n, struct Qdisc *q,
44 			 unsigned long cl, int event);
45 
46 /*
47 
48    Short review.
49    -------------
50 
51    This file consists of two interrelated parts:
52 
53    1. queueing disciplines manager frontend.
54    2. traffic classes manager frontend.
55 
56    Generally, queueing discipline ("qdisc") is a black box,
57    which is able to enqueue packets and to dequeue them (when
58    device is ready to send something) in order and at times
59    determined by algorithm hidden in it.
60 
61    qdisc's are divided to two categories:
62    - "queues", which have no internal structure visible from outside.
63    - "schedulers", which split all the packets to "traffic classes",
64      using "packet classifiers" (look at cls_api.c)
65 
66    In turn, classes may have child qdiscs (as rule, queues)
67    attached to them etc. etc. etc.
68 
69    The goal of the routines in this file is to translate
70    information supplied by user in the form of handles
71    to more intelligible for kernel form, to make some sanity
72    checks and part of work, which is common to all qdiscs
73    and to provide rtnetlink notifications.
74 
75    All real intelligent work is done inside qdisc modules.
76 
77 
78 
79    Every discipline has two major routines: enqueue and dequeue.
80 
81    ---dequeue
82 
83    dequeue usually returns a skb to send. It is allowed to return NULL,
84    but it does not mean that queue is empty, it just means that
85    discipline does not want to send anything this time.
86    Queue is really empty if q->q.qlen == 0.
87    For complicated disciplines with multiple queues q->q is not
88    real packet queue, but however q->q.qlen must be valid.
89 
90    ---enqueue
91 
92    enqueue returns 0, if packet was enqueued successfully.
93    If packet (this one or another one) was dropped, it returns
94    not zero error code.
95    NET_XMIT_DROP 	- this packet dropped
96      Expected action: do not backoff, but wait until queue will clear.
97    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
98      Expected action: backoff or ignore
99 
100    Auxiliary routines:
101 
102    ---peek
103 
104    like dequeue but without removing a packet from the queue
105 
106    ---reset
107 
108    returns qdisc to initial state: purge all buffers, clear all
109    timers, counters (except for statistics) etc.
110 
111    ---init
112 
113    initializes newly created qdisc.
114 
115    ---destroy
116 
117    destroys resources allocated by init and during lifetime of qdisc.
118 
119    ---change
120 
121    changes qdisc parameters.
122  */
123 
124 /* Protects list of registered TC modules. It is pure SMP lock. */
125 static DEFINE_RWLOCK(qdisc_mod_lock);
126 
127 
128 /************************************************
129  *	Queueing disciplines manipulation.	*
130  ************************************************/
131 
132 
133 /* The list of all installed queueing disciplines. */
134 
135 static struct Qdisc_ops *qdisc_base;
136 
137 /* Register/unregister queueing discipline */
138 
139 int register_qdisc(struct Qdisc_ops *qops)
140 {
141 	struct Qdisc_ops *q, **qp;
142 	int rc = -EEXIST;
143 
144 	write_lock(&qdisc_mod_lock);
145 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
146 		if (!strcmp(qops->id, q->id))
147 			goto out;
148 
149 	if (qops->enqueue == NULL)
150 		qops->enqueue = noop_qdisc_ops.enqueue;
151 	if (qops->peek == NULL) {
152 		if (qops->dequeue == NULL)
153 			qops->peek = noop_qdisc_ops.peek;
154 		else
155 			goto out_einval;
156 	}
157 	if (qops->dequeue == NULL)
158 		qops->dequeue = noop_qdisc_ops.dequeue;
159 
160 	if (qops->cl_ops) {
161 		const struct Qdisc_class_ops *cops = qops->cl_ops;
162 
163 		if (!(cops->get && cops->put && cops->walk && cops->leaf))
164 			goto out_einval;
165 
166 		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
167 			goto out_einval;
168 	}
169 
170 	qops->next = NULL;
171 	*qp = qops;
172 	rc = 0;
173 out:
174 	write_unlock(&qdisc_mod_lock);
175 	return rc;
176 
177 out_einval:
178 	rc = -EINVAL;
179 	goto out;
180 }
181 EXPORT_SYMBOL(register_qdisc);
182 
183 int unregister_qdisc(struct Qdisc_ops *qops)
184 {
185 	struct Qdisc_ops *q, **qp;
186 	int err = -ENOENT;
187 
188 	write_lock(&qdisc_mod_lock);
189 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
190 		if (q == qops)
191 			break;
192 	if (q) {
193 		*qp = q->next;
194 		q->next = NULL;
195 		err = 0;
196 	}
197 	write_unlock(&qdisc_mod_lock);
198 	return err;
199 }
200 EXPORT_SYMBOL(unregister_qdisc);
201 
202 /* Get default qdisc if not otherwise specified */
203 void qdisc_get_default(char *name, size_t len)
204 {
205 	read_lock(&qdisc_mod_lock);
206 	strlcpy(name, default_qdisc_ops->id, len);
207 	read_unlock(&qdisc_mod_lock);
208 }
209 
210 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
211 {
212 	struct Qdisc_ops *q = NULL;
213 
214 	for (q = qdisc_base; q; q = q->next) {
215 		if (!strcmp(name, q->id)) {
216 			if (!try_module_get(q->owner))
217 				q = NULL;
218 			break;
219 		}
220 	}
221 
222 	return q;
223 }
224 
225 /* Set new default qdisc to use */
226 int qdisc_set_default(const char *name)
227 {
228 	const struct Qdisc_ops *ops;
229 
230 	if (!capable(CAP_NET_ADMIN))
231 		return -EPERM;
232 
233 	write_lock(&qdisc_mod_lock);
234 	ops = qdisc_lookup_default(name);
235 	if (!ops) {
236 		/* Not found, drop lock and try to load module */
237 		write_unlock(&qdisc_mod_lock);
238 		request_module("sch_%s", name);
239 		write_lock(&qdisc_mod_lock);
240 
241 		ops = qdisc_lookup_default(name);
242 	}
243 
244 	if (ops) {
245 		/* Set new default */
246 		module_put(default_qdisc_ops->owner);
247 		default_qdisc_ops = ops;
248 	}
249 	write_unlock(&qdisc_mod_lock);
250 
251 	return ops ? 0 : -ENOENT;
252 }
253 
254 #ifdef CONFIG_NET_SCH_DEFAULT
255 /* Set default value from kernel config */
256 static int __init sch_default_qdisc(void)
257 {
258 	return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
259 }
260 late_initcall(sch_default_qdisc);
261 #endif
262 
263 /* We know handle. Find qdisc among all qdisc's attached to device
264  * (root qdisc, all its children, children of children etc.)
265  * Note: caller either uses rtnl or rcu_read_lock()
266  */
267 
268 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
269 {
270 	struct Qdisc *q;
271 
272 	if (!qdisc_dev(root))
273 		return (root->handle == handle ? root : NULL);
274 
275 	if (!(root->flags & TCQ_F_BUILTIN) &&
276 	    root->handle == handle)
277 		return root;
278 
279 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
280 		if (q->handle == handle)
281 			return q;
282 	}
283 	return NULL;
284 }
285 
286 void qdisc_hash_add(struct Qdisc *q, bool invisible)
287 {
288 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
289 		ASSERT_RTNL();
290 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
291 		if (invisible)
292 			q->flags |= TCQ_F_INVISIBLE;
293 	}
294 }
295 EXPORT_SYMBOL(qdisc_hash_add);
296 
297 void qdisc_hash_del(struct Qdisc *q)
298 {
299 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
300 		ASSERT_RTNL();
301 		hash_del_rcu(&q->hash);
302 	}
303 }
304 EXPORT_SYMBOL(qdisc_hash_del);
305 
306 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
307 {
308 	struct Qdisc *q;
309 
310 	q = qdisc_match_from_root(dev->qdisc, handle);
311 	if (q)
312 		goto out;
313 
314 	if (dev_ingress_queue(dev))
315 		q = qdisc_match_from_root(
316 			dev_ingress_queue(dev)->qdisc_sleeping,
317 			handle);
318 out:
319 	return q;
320 }
321 
322 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
323 {
324 	unsigned long cl;
325 	struct Qdisc *leaf;
326 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
327 
328 	if (cops == NULL)
329 		return NULL;
330 	cl = cops->get(p, classid);
331 
332 	if (cl == 0)
333 		return NULL;
334 	leaf = cops->leaf(p, cl);
335 	cops->put(p, cl);
336 	return leaf;
337 }
338 
339 /* Find queueing discipline by name */
340 
341 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
342 {
343 	struct Qdisc_ops *q = NULL;
344 
345 	if (kind) {
346 		read_lock(&qdisc_mod_lock);
347 		for (q = qdisc_base; q; q = q->next) {
348 			if (nla_strcmp(kind, q->id) == 0) {
349 				if (!try_module_get(q->owner))
350 					q = NULL;
351 				break;
352 			}
353 		}
354 		read_unlock(&qdisc_mod_lock);
355 	}
356 	return q;
357 }
358 
359 /* The linklayer setting were not transferred from iproute2, in older
360  * versions, and the rate tables lookup systems have been dropped in
361  * the kernel. To keep backward compatible with older iproute2 tc
362  * utils, we detect the linklayer setting by detecting if the rate
363  * table were modified.
364  *
365  * For linklayer ATM table entries, the rate table will be aligned to
366  * 48 bytes, thus some table entries will contain the same value.  The
367  * mpu (min packet unit) is also encoded into the old rate table, thus
368  * starting from the mpu, we find low and high table entries for
369  * mapping this cell.  If these entries contain the same value, when
370  * the rate tables have been modified for linklayer ATM.
371  *
372  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
373  * and then roundup to the next cell, calc the table entry one below,
374  * and compare.
375  */
376 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
377 {
378 	int low       = roundup(r->mpu, 48);
379 	int high      = roundup(low+1, 48);
380 	int cell_low  = low >> r->cell_log;
381 	int cell_high = (high >> r->cell_log) - 1;
382 
383 	/* rtab is too inaccurate at rates > 100Mbit/s */
384 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
385 		pr_debug("TC linklayer: Giving up ATM detection\n");
386 		return TC_LINKLAYER_ETHERNET;
387 	}
388 
389 	if ((cell_high > cell_low) && (cell_high < 256)
390 	    && (rtab[cell_low] == rtab[cell_high])) {
391 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
392 			 cell_low, cell_high, rtab[cell_high]);
393 		return TC_LINKLAYER_ATM;
394 	}
395 	return TC_LINKLAYER_ETHERNET;
396 }
397 
398 static struct qdisc_rate_table *qdisc_rtab_list;
399 
400 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
401 					struct nlattr *tab)
402 {
403 	struct qdisc_rate_table *rtab;
404 
405 	if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
406 	    nla_len(tab) != TC_RTAB_SIZE)
407 		return NULL;
408 
409 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
410 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
411 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
412 			rtab->refcnt++;
413 			return rtab;
414 		}
415 	}
416 
417 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
418 	if (rtab) {
419 		rtab->rate = *r;
420 		rtab->refcnt = 1;
421 		memcpy(rtab->data, nla_data(tab), 1024);
422 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
423 			r->linklayer = __detect_linklayer(r, rtab->data);
424 		rtab->next = qdisc_rtab_list;
425 		qdisc_rtab_list = rtab;
426 	}
427 	return rtab;
428 }
429 EXPORT_SYMBOL(qdisc_get_rtab);
430 
431 void qdisc_put_rtab(struct qdisc_rate_table *tab)
432 {
433 	struct qdisc_rate_table *rtab, **rtabp;
434 
435 	if (!tab || --tab->refcnt)
436 		return;
437 
438 	for (rtabp = &qdisc_rtab_list;
439 	     (rtab = *rtabp) != NULL;
440 	     rtabp = &rtab->next) {
441 		if (rtab == tab) {
442 			*rtabp = rtab->next;
443 			kfree(rtab);
444 			return;
445 		}
446 	}
447 }
448 EXPORT_SYMBOL(qdisc_put_rtab);
449 
450 static LIST_HEAD(qdisc_stab_list);
451 
452 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
453 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
454 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
455 };
456 
457 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
458 {
459 	struct nlattr *tb[TCA_STAB_MAX + 1];
460 	struct qdisc_size_table *stab;
461 	struct tc_sizespec *s;
462 	unsigned int tsize = 0;
463 	u16 *tab = NULL;
464 	int err;
465 
466 	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL);
467 	if (err < 0)
468 		return ERR_PTR(err);
469 	if (!tb[TCA_STAB_BASE])
470 		return ERR_PTR(-EINVAL);
471 
472 	s = nla_data(tb[TCA_STAB_BASE]);
473 
474 	if (s->tsize > 0) {
475 		if (!tb[TCA_STAB_DATA])
476 			return ERR_PTR(-EINVAL);
477 		tab = nla_data(tb[TCA_STAB_DATA]);
478 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
479 	}
480 
481 	if (tsize != s->tsize || (!tab && tsize > 0))
482 		return ERR_PTR(-EINVAL);
483 
484 	list_for_each_entry(stab, &qdisc_stab_list, list) {
485 		if (memcmp(&stab->szopts, s, sizeof(*s)))
486 			continue;
487 		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
488 			continue;
489 		stab->refcnt++;
490 		return stab;
491 	}
492 
493 	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
494 	if (!stab)
495 		return ERR_PTR(-ENOMEM);
496 
497 	stab->refcnt = 1;
498 	stab->szopts = *s;
499 	if (tsize > 0)
500 		memcpy(stab->data, tab, tsize * sizeof(u16));
501 
502 	list_add_tail(&stab->list, &qdisc_stab_list);
503 
504 	return stab;
505 }
506 
507 static void stab_kfree_rcu(struct rcu_head *head)
508 {
509 	kfree(container_of(head, struct qdisc_size_table, rcu));
510 }
511 
512 void qdisc_put_stab(struct qdisc_size_table *tab)
513 {
514 	if (!tab)
515 		return;
516 
517 	if (--tab->refcnt == 0) {
518 		list_del(&tab->list);
519 		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
520 	}
521 }
522 EXPORT_SYMBOL(qdisc_put_stab);
523 
524 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
525 {
526 	struct nlattr *nest;
527 
528 	nest = nla_nest_start(skb, TCA_STAB);
529 	if (nest == NULL)
530 		goto nla_put_failure;
531 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
532 		goto nla_put_failure;
533 	nla_nest_end(skb, nest);
534 
535 	return skb->len;
536 
537 nla_put_failure:
538 	return -1;
539 }
540 
541 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
542 			       const struct qdisc_size_table *stab)
543 {
544 	int pkt_len, slot;
545 
546 	pkt_len = skb->len + stab->szopts.overhead;
547 	if (unlikely(!stab->szopts.tsize))
548 		goto out;
549 
550 	slot = pkt_len + stab->szopts.cell_align;
551 	if (unlikely(slot < 0))
552 		slot = 0;
553 
554 	slot >>= stab->szopts.cell_log;
555 	if (likely(slot < stab->szopts.tsize))
556 		pkt_len = stab->data[slot];
557 	else
558 		pkt_len = stab->data[stab->szopts.tsize - 1] *
559 				(slot / stab->szopts.tsize) +
560 				stab->data[slot % stab->szopts.tsize];
561 
562 	pkt_len <<= stab->szopts.size_log;
563 out:
564 	if (unlikely(pkt_len < 1))
565 		pkt_len = 1;
566 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
567 }
568 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
569 
570 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
571 {
572 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
573 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
574 			txt, qdisc->ops->id, qdisc->handle >> 16);
575 		qdisc->flags |= TCQ_F_WARN_NONWC;
576 	}
577 }
578 EXPORT_SYMBOL(qdisc_warn_nonwc);
579 
580 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
581 {
582 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
583 						 timer);
584 
585 	rcu_read_lock();
586 	__netif_schedule(qdisc_root(wd->qdisc));
587 	rcu_read_unlock();
588 
589 	return HRTIMER_NORESTART;
590 }
591 
592 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
593 {
594 	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
595 	wd->timer.function = qdisc_watchdog;
596 	wd->qdisc = qdisc;
597 }
598 EXPORT_SYMBOL(qdisc_watchdog_init);
599 
600 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
601 {
602 	if (test_bit(__QDISC_STATE_DEACTIVATED,
603 		     &qdisc_root_sleeping(wd->qdisc)->state))
604 		return;
605 
606 	if (wd->last_expires == expires)
607 		return;
608 
609 	wd->last_expires = expires;
610 	hrtimer_start(&wd->timer,
611 		      ns_to_ktime(expires),
612 		      HRTIMER_MODE_ABS_PINNED);
613 }
614 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
615 
616 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
617 {
618 	hrtimer_cancel(&wd->timer);
619 }
620 EXPORT_SYMBOL(qdisc_watchdog_cancel);
621 
622 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
623 {
624 	unsigned int size = n * sizeof(struct hlist_head), i;
625 	struct hlist_head *h;
626 
627 	if (size <= PAGE_SIZE)
628 		h = kmalloc(size, GFP_KERNEL);
629 	else
630 		h = (struct hlist_head *)
631 			__get_free_pages(GFP_KERNEL, get_order(size));
632 
633 	if (h != NULL) {
634 		for (i = 0; i < n; i++)
635 			INIT_HLIST_HEAD(&h[i]);
636 	}
637 	return h;
638 }
639 
640 static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
641 {
642 	unsigned int size = n * sizeof(struct hlist_head);
643 
644 	if (size <= PAGE_SIZE)
645 		kfree(h);
646 	else
647 		free_pages((unsigned long)h, get_order(size));
648 }
649 
650 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
651 {
652 	struct Qdisc_class_common *cl;
653 	struct hlist_node *next;
654 	struct hlist_head *nhash, *ohash;
655 	unsigned int nsize, nmask, osize;
656 	unsigned int i, h;
657 
658 	/* Rehash when load factor exceeds 0.75 */
659 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
660 		return;
661 	nsize = clhash->hashsize * 2;
662 	nmask = nsize - 1;
663 	nhash = qdisc_class_hash_alloc(nsize);
664 	if (nhash == NULL)
665 		return;
666 
667 	ohash = clhash->hash;
668 	osize = clhash->hashsize;
669 
670 	sch_tree_lock(sch);
671 	for (i = 0; i < osize; i++) {
672 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
673 			h = qdisc_class_hash(cl->classid, nmask);
674 			hlist_add_head(&cl->hnode, &nhash[h]);
675 		}
676 	}
677 	clhash->hash     = nhash;
678 	clhash->hashsize = nsize;
679 	clhash->hashmask = nmask;
680 	sch_tree_unlock(sch);
681 
682 	qdisc_class_hash_free(ohash, osize);
683 }
684 EXPORT_SYMBOL(qdisc_class_hash_grow);
685 
686 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
687 {
688 	unsigned int size = 4;
689 
690 	clhash->hash = qdisc_class_hash_alloc(size);
691 	if (clhash->hash == NULL)
692 		return -ENOMEM;
693 	clhash->hashsize  = size;
694 	clhash->hashmask  = size - 1;
695 	clhash->hashelems = 0;
696 	return 0;
697 }
698 EXPORT_SYMBOL(qdisc_class_hash_init);
699 
700 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
701 {
702 	qdisc_class_hash_free(clhash->hash, clhash->hashsize);
703 }
704 EXPORT_SYMBOL(qdisc_class_hash_destroy);
705 
706 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
707 			     struct Qdisc_class_common *cl)
708 {
709 	unsigned int h;
710 
711 	INIT_HLIST_NODE(&cl->hnode);
712 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
713 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
714 	clhash->hashelems++;
715 }
716 EXPORT_SYMBOL(qdisc_class_hash_insert);
717 
718 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
719 			     struct Qdisc_class_common *cl)
720 {
721 	hlist_del(&cl->hnode);
722 	clhash->hashelems--;
723 }
724 EXPORT_SYMBOL(qdisc_class_hash_remove);
725 
726 /* Allocate an unique handle from space managed by kernel
727  * Possible range is [8000-FFFF]:0000 (0x8000 values)
728  */
729 static u32 qdisc_alloc_handle(struct net_device *dev)
730 {
731 	int i = 0x8000;
732 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
733 
734 	do {
735 		autohandle += TC_H_MAKE(0x10000U, 0);
736 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
737 			autohandle = TC_H_MAKE(0x80000000U, 0);
738 		if (!qdisc_lookup(dev, autohandle))
739 			return autohandle;
740 		cond_resched();
741 	} while	(--i > 0);
742 
743 	return 0;
744 }
745 
746 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
747 			       unsigned int len)
748 {
749 	const struct Qdisc_class_ops *cops;
750 	unsigned long cl;
751 	u32 parentid;
752 	int drops;
753 
754 	if (n == 0 && len == 0)
755 		return;
756 	drops = max_t(int, n, 0);
757 	rcu_read_lock();
758 	while ((parentid = sch->parent)) {
759 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
760 			break;
761 
762 		if (sch->flags & TCQ_F_NOPARENT)
763 			break;
764 		/* TODO: perform the search on a per txq basis */
765 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
766 		if (sch == NULL) {
767 			WARN_ON_ONCE(parentid != TC_H_ROOT);
768 			break;
769 		}
770 		cops = sch->ops->cl_ops;
771 		if (cops->qlen_notify) {
772 			cl = cops->get(sch, parentid);
773 			cops->qlen_notify(sch, cl);
774 			cops->put(sch, cl);
775 		}
776 		sch->q.qlen -= n;
777 		sch->qstats.backlog -= len;
778 		__qdisc_qstats_drop(sch, drops);
779 	}
780 	rcu_read_unlock();
781 }
782 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
783 
784 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
785 			       struct nlmsghdr *n, u32 clid,
786 			       struct Qdisc *old, struct Qdisc *new)
787 {
788 	if (new || old)
789 		qdisc_notify(net, skb, n, clid, old, new);
790 
791 	if (old)
792 		qdisc_destroy(old);
793 }
794 
795 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
796  * to device "dev".
797  *
798  * When appropriate send a netlink notification using 'skb'
799  * and "n".
800  *
801  * On success, destroy old qdisc.
802  */
803 
804 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
805 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
806 		       struct Qdisc *new, struct Qdisc *old)
807 {
808 	struct Qdisc *q = old;
809 	struct net *net = dev_net(dev);
810 	int err = 0;
811 
812 	if (parent == NULL) {
813 		unsigned int i, num_q, ingress;
814 
815 		ingress = 0;
816 		num_q = dev->num_tx_queues;
817 		if ((q && q->flags & TCQ_F_INGRESS) ||
818 		    (new && new->flags & TCQ_F_INGRESS)) {
819 			num_q = 1;
820 			ingress = 1;
821 			if (!dev_ingress_queue(dev))
822 				return -ENOENT;
823 		}
824 
825 		if (dev->flags & IFF_UP)
826 			dev_deactivate(dev);
827 
828 		if (new && new->ops->attach)
829 			goto skip;
830 
831 		for (i = 0; i < num_q; i++) {
832 			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
833 
834 			if (!ingress)
835 				dev_queue = netdev_get_tx_queue(dev, i);
836 
837 			old = dev_graft_qdisc(dev_queue, new);
838 			if (new && i > 0)
839 				refcount_inc(&new->refcnt);
840 
841 			if (!ingress)
842 				qdisc_destroy(old);
843 		}
844 
845 skip:
846 		if (!ingress) {
847 			notify_and_destroy(net, skb, n, classid,
848 					   dev->qdisc, new);
849 			if (new && !new->ops->attach)
850 				refcount_inc(&new->refcnt);
851 			dev->qdisc = new ? : &noop_qdisc;
852 
853 			if (new && new->ops->attach)
854 				new->ops->attach(new);
855 		} else {
856 			notify_and_destroy(net, skb, n, classid, old, new);
857 		}
858 
859 		if (dev->flags & IFF_UP)
860 			dev_activate(dev);
861 	} else {
862 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
863 
864 		err = -EOPNOTSUPP;
865 		if (cops && cops->graft) {
866 			unsigned long cl = cops->get(parent, classid);
867 			if (cl) {
868 				err = cops->graft(parent, cl, new, &old);
869 				cops->put(parent, cl);
870 			} else
871 				err = -ENOENT;
872 		}
873 		if (!err)
874 			notify_and_destroy(net, skb, n, classid, old, new);
875 	}
876 	return err;
877 }
878 
879 /* lockdep annotation is needed for ingress; egress gets it only for name */
880 static struct lock_class_key qdisc_tx_lock;
881 static struct lock_class_key qdisc_rx_lock;
882 
883 /*
884    Allocate and initialize new qdisc.
885 
886    Parameters are passed via opt.
887  */
888 
889 static struct Qdisc *qdisc_create(struct net_device *dev,
890 				  struct netdev_queue *dev_queue,
891 				  struct Qdisc *p, u32 parent, u32 handle,
892 				  struct nlattr **tca, int *errp)
893 {
894 	int err;
895 	struct nlattr *kind = tca[TCA_KIND];
896 	struct Qdisc *sch;
897 	struct Qdisc_ops *ops;
898 	struct qdisc_size_table *stab;
899 
900 	ops = qdisc_lookup_ops(kind);
901 #ifdef CONFIG_MODULES
902 	if (ops == NULL && kind != NULL) {
903 		char name[IFNAMSIZ];
904 		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
905 			/* We dropped the RTNL semaphore in order to
906 			 * perform the module load.  So, even if we
907 			 * succeeded in loading the module we have to
908 			 * tell the caller to replay the request.  We
909 			 * indicate this using -EAGAIN.
910 			 * We replay the request because the device may
911 			 * go away in the mean time.
912 			 */
913 			rtnl_unlock();
914 			request_module("sch_%s", name);
915 			rtnl_lock();
916 			ops = qdisc_lookup_ops(kind);
917 			if (ops != NULL) {
918 				/* We will try again qdisc_lookup_ops,
919 				 * so don't keep a reference.
920 				 */
921 				module_put(ops->owner);
922 				err = -EAGAIN;
923 				goto err_out;
924 			}
925 		}
926 	}
927 #endif
928 
929 	err = -ENOENT;
930 	if (ops == NULL)
931 		goto err_out;
932 
933 	sch = qdisc_alloc(dev_queue, ops);
934 	if (IS_ERR(sch)) {
935 		err = PTR_ERR(sch);
936 		goto err_out2;
937 	}
938 
939 	sch->parent = parent;
940 
941 	if (handle == TC_H_INGRESS) {
942 		sch->flags |= TCQ_F_INGRESS;
943 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
944 		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
945 	} else {
946 		if (handle == 0) {
947 			handle = qdisc_alloc_handle(dev);
948 			err = -ENOMEM;
949 			if (handle == 0)
950 				goto err_out3;
951 		}
952 		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
953 		if (!netif_is_multiqueue(dev))
954 			sch->flags |= TCQ_F_ONETXQUEUE;
955 	}
956 
957 	sch->handle = handle;
958 
959 	/* This exist to keep backward compatible with a userspace
960 	 * loophole, what allowed userspace to get IFF_NO_QUEUE
961 	 * facility on older kernels by setting tx_queue_len=0 (prior
962 	 * to qdisc init), and then forgot to reinit tx_queue_len
963 	 * before again attaching a qdisc.
964 	 */
965 	if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
966 		dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
967 		netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
968 	}
969 
970 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
971 		if (qdisc_is_percpu_stats(sch)) {
972 			sch->cpu_bstats =
973 				netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
974 			if (!sch->cpu_bstats)
975 				goto err_out4;
976 
977 			sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
978 			if (!sch->cpu_qstats)
979 				goto err_out4;
980 		}
981 
982 		if (tca[TCA_STAB]) {
983 			stab = qdisc_get_stab(tca[TCA_STAB]);
984 			if (IS_ERR(stab)) {
985 				err = PTR_ERR(stab);
986 				goto err_out4;
987 			}
988 			rcu_assign_pointer(sch->stab, stab);
989 		}
990 		if (tca[TCA_RATE]) {
991 			seqcount_t *running;
992 
993 			err = -EOPNOTSUPP;
994 			if (sch->flags & TCQ_F_MQROOT)
995 				goto err_out4;
996 
997 			if ((sch->parent != TC_H_ROOT) &&
998 			    !(sch->flags & TCQ_F_INGRESS) &&
999 			    (!p || !(p->flags & TCQ_F_MQROOT)))
1000 				running = qdisc_root_sleeping_running(sch);
1001 			else
1002 				running = &sch->running;
1003 
1004 			err = gen_new_estimator(&sch->bstats,
1005 						sch->cpu_bstats,
1006 						&sch->rate_est,
1007 						NULL,
1008 						running,
1009 						tca[TCA_RATE]);
1010 			if (err)
1011 				goto err_out4;
1012 		}
1013 
1014 		qdisc_hash_add(sch, false);
1015 
1016 		return sch;
1017 	}
1018 	/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1019 	if (ops->destroy)
1020 		ops->destroy(sch);
1021 err_out3:
1022 	dev_put(dev);
1023 	kfree((char *) sch - sch->padded);
1024 err_out2:
1025 	module_put(ops->owner);
1026 err_out:
1027 	*errp = err;
1028 	return NULL;
1029 
1030 err_out4:
1031 	free_percpu(sch->cpu_bstats);
1032 	free_percpu(sch->cpu_qstats);
1033 	/*
1034 	 * Any broken qdiscs that would require a ops->reset() here?
1035 	 * The qdisc was never in action so it shouldn't be necessary.
1036 	 */
1037 	qdisc_put_stab(rtnl_dereference(sch->stab));
1038 	if (ops->destroy)
1039 		ops->destroy(sch);
1040 	goto err_out3;
1041 }
1042 
1043 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1044 {
1045 	struct qdisc_size_table *ostab, *stab = NULL;
1046 	int err = 0;
1047 
1048 	if (tca[TCA_OPTIONS]) {
1049 		if (sch->ops->change == NULL)
1050 			return -EINVAL;
1051 		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1052 		if (err)
1053 			return err;
1054 	}
1055 
1056 	if (tca[TCA_STAB]) {
1057 		stab = qdisc_get_stab(tca[TCA_STAB]);
1058 		if (IS_ERR(stab))
1059 			return PTR_ERR(stab);
1060 	}
1061 
1062 	ostab = rtnl_dereference(sch->stab);
1063 	rcu_assign_pointer(sch->stab, stab);
1064 	qdisc_put_stab(ostab);
1065 
1066 	if (tca[TCA_RATE]) {
1067 		/* NB: ignores errors from replace_estimator
1068 		   because change can't be undone. */
1069 		if (sch->flags & TCQ_F_MQROOT)
1070 			goto out;
1071 		gen_replace_estimator(&sch->bstats,
1072 				      sch->cpu_bstats,
1073 				      &sch->rate_est,
1074 				      NULL,
1075 				      qdisc_root_sleeping_running(sch),
1076 				      tca[TCA_RATE]);
1077 	}
1078 out:
1079 	return 0;
1080 }
1081 
1082 struct check_loop_arg {
1083 	struct qdisc_walker	w;
1084 	struct Qdisc		*p;
1085 	int			depth;
1086 };
1087 
1088 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1089 			 struct qdisc_walker *w);
1090 
1091 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1092 {
1093 	struct check_loop_arg	arg;
1094 
1095 	if (q->ops->cl_ops == NULL)
1096 		return 0;
1097 
1098 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1099 	arg.w.fn = check_loop_fn;
1100 	arg.depth = depth;
1101 	arg.p = p;
1102 	q->ops->cl_ops->walk(q, &arg.w);
1103 	return arg.w.stop ? -ELOOP : 0;
1104 }
1105 
1106 static int
1107 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1108 {
1109 	struct Qdisc *leaf;
1110 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1111 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1112 
1113 	leaf = cops->leaf(q, cl);
1114 	if (leaf) {
1115 		if (leaf == arg->p || arg->depth > 7)
1116 			return -ELOOP;
1117 		return check_loop(leaf, arg->p, arg->depth + 1);
1118 	}
1119 	return 0;
1120 }
1121 
1122 /*
1123  * Delete/get qdisc.
1124  */
1125 
1126 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1127 			struct netlink_ext_ack *extack)
1128 {
1129 	struct net *net = sock_net(skb->sk);
1130 	struct tcmsg *tcm = nlmsg_data(n);
1131 	struct nlattr *tca[TCA_MAX + 1];
1132 	struct net_device *dev;
1133 	u32 clid;
1134 	struct Qdisc *q = NULL;
1135 	struct Qdisc *p = NULL;
1136 	int err;
1137 
1138 	if ((n->nlmsg_type != RTM_GETQDISC) &&
1139 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1140 		return -EPERM;
1141 
1142 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1143 	if (err < 0)
1144 		return err;
1145 
1146 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1147 	if (!dev)
1148 		return -ENODEV;
1149 
1150 	clid = tcm->tcm_parent;
1151 	if (clid) {
1152 		if (clid != TC_H_ROOT) {
1153 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1154 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1155 				if (!p)
1156 					return -ENOENT;
1157 				q = qdisc_leaf(p, clid);
1158 			} else if (dev_ingress_queue(dev)) {
1159 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1160 			}
1161 		} else {
1162 			q = dev->qdisc;
1163 		}
1164 		if (!q)
1165 			return -ENOENT;
1166 
1167 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1168 			return -EINVAL;
1169 	} else {
1170 		q = qdisc_lookup(dev, tcm->tcm_handle);
1171 		if (!q)
1172 			return -ENOENT;
1173 	}
1174 
1175 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1176 		return -EINVAL;
1177 
1178 	if (n->nlmsg_type == RTM_DELQDISC) {
1179 		if (!clid)
1180 			return -EINVAL;
1181 		if (q->handle == 0)
1182 			return -ENOENT;
1183 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1184 		if (err != 0)
1185 			return err;
1186 	} else {
1187 		qdisc_notify(net, skb, n, clid, NULL, q);
1188 	}
1189 	return 0;
1190 }
1191 
1192 /*
1193  * Create/change qdisc.
1194  */
1195 
1196 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1197 			   struct netlink_ext_ack *extack)
1198 {
1199 	struct net *net = sock_net(skb->sk);
1200 	struct tcmsg *tcm;
1201 	struct nlattr *tca[TCA_MAX + 1];
1202 	struct net_device *dev;
1203 	u32 clid;
1204 	struct Qdisc *q, *p;
1205 	int err;
1206 
1207 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1208 		return -EPERM;
1209 
1210 replay:
1211 	/* Reinit, just in case something touches this. */
1212 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1213 	if (err < 0)
1214 		return err;
1215 
1216 	tcm = nlmsg_data(n);
1217 	clid = tcm->tcm_parent;
1218 	q = p = NULL;
1219 
1220 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1221 	if (!dev)
1222 		return -ENODEV;
1223 
1224 
1225 	if (clid) {
1226 		if (clid != TC_H_ROOT) {
1227 			if (clid != TC_H_INGRESS) {
1228 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1229 				if (!p)
1230 					return -ENOENT;
1231 				q = qdisc_leaf(p, clid);
1232 			} else if (dev_ingress_queue_create(dev)) {
1233 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1234 			}
1235 		} else {
1236 			q = dev->qdisc;
1237 		}
1238 
1239 		/* It may be default qdisc, ignore it */
1240 		if (q && q->handle == 0)
1241 			q = NULL;
1242 
1243 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1244 			if (tcm->tcm_handle) {
1245 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1246 					return -EEXIST;
1247 				if (TC_H_MIN(tcm->tcm_handle))
1248 					return -EINVAL;
1249 				q = qdisc_lookup(dev, tcm->tcm_handle);
1250 				if (!q)
1251 					goto create_n_graft;
1252 				if (n->nlmsg_flags & NLM_F_EXCL)
1253 					return -EEXIST;
1254 				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1255 					return -EINVAL;
1256 				if (q == p ||
1257 				    (p && check_loop(q, p, 0)))
1258 					return -ELOOP;
1259 				refcount_inc(&q->refcnt);
1260 				goto graft;
1261 			} else {
1262 				if (!q)
1263 					goto create_n_graft;
1264 
1265 				/* This magic test requires explanation.
1266 				 *
1267 				 *   We know, that some child q is already
1268 				 *   attached to this parent and have choice:
1269 				 *   either to change it or to create/graft new one.
1270 				 *
1271 				 *   1. We are allowed to create/graft only
1272 				 *   if CREATE and REPLACE flags are set.
1273 				 *
1274 				 *   2. If EXCL is set, requestor wanted to say,
1275 				 *   that qdisc tcm_handle is not expected
1276 				 *   to exist, so that we choose create/graft too.
1277 				 *
1278 				 *   3. The last case is when no flags are set.
1279 				 *   Alas, it is sort of hole in API, we
1280 				 *   cannot decide what to do unambiguously.
1281 				 *   For now we select create/graft, if
1282 				 *   user gave KIND, which does not match existing.
1283 				 */
1284 				if ((n->nlmsg_flags & NLM_F_CREATE) &&
1285 				    (n->nlmsg_flags & NLM_F_REPLACE) &&
1286 				    ((n->nlmsg_flags & NLM_F_EXCL) ||
1287 				     (tca[TCA_KIND] &&
1288 				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
1289 					goto create_n_graft;
1290 			}
1291 		}
1292 	} else {
1293 		if (!tcm->tcm_handle)
1294 			return -EINVAL;
1295 		q = qdisc_lookup(dev, tcm->tcm_handle);
1296 	}
1297 
1298 	/* Change qdisc parameters */
1299 	if (q == NULL)
1300 		return -ENOENT;
1301 	if (n->nlmsg_flags & NLM_F_EXCL)
1302 		return -EEXIST;
1303 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1304 		return -EINVAL;
1305 	err = qdisc_change(q, tca);
1306 	if (err == 0)
1307 		qdisc_notify(net, skb, n, clid, NULL, q);
1308 	return err;
1309 
1310 create_n_graft:
1311 	if (!(n->nlmsg_flags & NLM_F_CREATE))
1312 		return -ENOENT;
1313 	if (clid == TC_H_INGRESS) {
1314 		if (dev_ingress_queue(dev))
1315 			q = qdisc_create(dev, dev_ingress_queue(dev), p,
1316 					 tcm->tcm_parent, tcm->tcm_parent,
1317 					 tca, &err);
1318 		else
1319 			err = -ENOENT;
1320 	} else {
1321 		struct netdev_queue *dev_queue;
1322 
1323 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1324 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1325 		else if (p)
1326 			dev_queue = p->dev_queue;
1327 		else
1328 			dev_queue = netdev_get_tx_queue(dev, 0);
1329 
1330 		q = qdisc_create(dev, dev_queue, p,
1331 				 tcm->tcm_parent, tcm->tcm_handle,
1332 				 tca, &err);
1333 	}
1334 	if (q == NULL) {
1335 		if (err == -EAGAIN)
1336 			goto replay;
1337 		return err;
1338 	}
1339 
1340 graft:
1341 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1342 	if (err) {
1343 		if (q)
1344 			qdisc_destroy(q);
1345 		return err;
1346 	}
1347 
1348 	return 0;
1349 }
1350 
1351 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1352 			 u32 portid, u32 seq, u16 flags, int event)
1353 {
1354 	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
1355 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
1356 	struct tcmsg *tcm;
1357 	struct nlmsghdr  *nlh;
1358 	unsigned char *b = skb_tail_pointer(skb);
1359 	struct gnet_dump d;
1360 	struct qdisc_size_table *stab;
1361 	__u32 qlen;
1362 
1363 	cond_resched();
1364 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1365 	if (!nlh)
1366 		goto out_nlmsg_trim;
1367 	tcm = nlmsg_data(nlh);
1368 	tcm->tcm_family = AF_UNSPEC;
1369 	tcm->tcm__pad1 = 0;
1370 	tcm->tcm__pad2 = 0;
1371 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1372 	tcm->tcm_parent = clid;
1373 	tcm->tcm_handle = q->handle;
1374 	tcm->tcm_info = refcount_read(&q->refcnt);
1375 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1376 		goto nla_put_failure;
1377 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
1378 		goto nla_put_failure;
1379 	qlen = q->q.qlen;
1380 
1381 	stab = rtnl_dereference(q->stab);
1382 	if (stab && qdisc_dump_stab(skb, stab) < 0)
1383 		goto nla_put_failure;
1384 
1385 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1386 					 NULL, &d, TCA_PAD) < 0)
1387 		goto nla_put_failure;
1388 
1389 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1390 		goto nla_put_failure;
1391 
1392 	if (qdisc_is_percpu_stats(q)) {
1393 		cpu_bstats = q->cpu_bstats;
1394 		cpu_qstats = q->cpu_qstats;
1395 	}
1396 
1397 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
1398 				  &d, cpu_bstats, &q->bstats) < 0 ||
1399 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1400 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
1401 		goto nla_put_failure;
1402 
1403 	if (gnet_stats_finish_copy(&d) < 0)
1404 		goto nla_put_failure;
1405 
1406 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1407 	return skb->len;
1408 
1409 out_nlmsg_trim:
1410 nla_put_failure:
1411 	nlmsg_trim(skb, b);
1412 	return -1;
1413 }
1414 
1415 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
1416 {
1417 	if (q->flags & TCQ_F_BUILTIN)
1418 		return true;
1419 	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
1420 		return true;
1421 
1422 	return false;
1423 }
1424 
1425 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1426 			struct nlmsghdr *n, u32 clid,
1427 			struct Qdisc *old, struct Qdisc *new)
1428 {
1429 	struct sk_buff *skb;
1430 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1431 
1432 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1433 	if (!skb)
1434 		return -ENOBUFS;
1435 
1436 	if (old && !tc_qdisc_dump_ignore(old, false)) {
1437 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1438 				  0, RTM_DELQDISC) < 0)
1439 			goto err_out;
1440 	}
1441 	if (new && !tc_qdisc_dump_ignore(new, false)) {
1442 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1443 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1444 			goto err_out;
1445 	}
1446 
1447 	if (skb->len)
1448 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1449 				      n->nlmsg_flags & NLM_F_ECHO);
1450 
1451 err_out:
1452 	kfree_skb(skb);
1453 	return -EINVAL;
1454 }
1455 
1456 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1457 			      struct netlink_callback *cb,
1458 			      int *q_idx_p, int s_q_idx, bool recur,
1459 			      bool dump_invisible)
1460 {
1461 	int ret = 0, q_idx = *q_idx_p;
1462 	struct Qdisc *q;
1463 	int b;
1464 
1465 	if (!root)
1466 		return 0;
1467 
1468 	q = root;
1469 	if (q_idx < s_q_idx) {
1470 		q_idx++;
1471 	} else {
1472 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1473 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1474 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1475 				  RTM_NEWQDISC) <= 0)
1476 			goto done;
1477 		q_idx++;
1478 	}
1479 
1480 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1481 	 * itself has already been dumped.
1482 	 *
1483 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1484 	 * qdisc hashtable, we don't want to hit it again
1485 	 */
1486 	if (!qdisc_dev(root) || !recur)
1487 		goto out;
1488 
1489 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1490 		if (q_idx < s_q_idx) {
1491 			q_idx++;
1492 			continue;
1493 		}
1494 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1495 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1496 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1497 				  RTM_NEWQDISC) <= 0)
1498 			goto done;
1499 		q_idx++;
1500 	}
1501 
1502 out:
1503 	*q_idx_p = q_idx;
1504 	return ret;
1505 done:
1506 	ret = -1;
1507 	goto out;
1508 }
1509 
1510 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1511 {
1512 	struct net *net = sock_net(skb->sk);
1513 	int idx, q_idx;
1514 	int s_idx, s_q_idx;
1515 	struct net_device *dev;
1516 	const struct nlmsghdr *nlh = cb->nlh;
1517 	struct tcmsg *tcm = nlmsg_data(nlh);
1518 	struct nlattr *tca[TCA_MAX + 1];
1519 	int err;
1520 
1521 	s_idx = cb->args[0];
1522 	s_q_idx = q_idx = cb->args[1];
1523 
1524 	idx = 0;
1525 	ASSERT_RTNL();
1526 
1527 	err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
1528 	if (err < 0)
1529 		return err;
1530 
1531 	for_each_netdev(net, dev) {
1532 		struct netdev_queue *dev_queue;
1533 
1534 		if (idx < s_idx)
1535 			goto cont;
1536 		if (idx > s_idx)
1537 			s_q_idx = 0;
1538 		q_idx = 0;
1539 
1540 		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1541 				       true, tca[TCA_DUMP_INVISIBLE]) < 0)
1542 			goto done;
1543 
1544 		dev_queue = dev_ingress_queue(dev);
1545 		if (dev_queue &&
1546 		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1547 				       &q_idx, s_q_idx, false,
1548 				       tca[TCA_DUMP_INVISIBLE]) < 0)
1549 			goto done;
1550 
1551 cont:
1552 		idx++;
1553 	}
1554 
1555 done:
1556 	cb->args[0] = idx;
1557 	cb->args[1] = q_idx;
1558 
1559 	return skb->len;
1560 }
1561 
1562 
1563 
1564 /************************************************
1565  *	Traffic classes manipulation.		*
1566  ************************************************/
1567 
1568 
1569 
1570 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1571 			 struct netlink_ext_ack *extack)
1572 {
1573 	struct net *net = sock_net(skb->sk);
1574 	struct tcmsg *tcm = nlmsg_data(n);
1575 	struct nlattr *tca[TCA_MAX + 1];
1576 	struct net_device *dev;
1577 	struct Qdisc *q = NULL;
1578 	const struct Qdisc_class_ops *cops;
1579 	unsigned long cl = 0;
1580 	unsigned long new_cl;
1581 	u32 portid;
1582 	u32 clid;
1583 	u32 qid;
1584 	int err;
1585 
1586 	if ((n->nlmsg_type != RTM_GETTCLASS) &&
1587 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1588 		return -EPERM;
1589 
1590 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1591 	if (err < 0)
1592 		return err;
1593 
1594 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1595 	if (!dev)
1596 		return -ENODEV;
1597 
1598 	/*
1599 	   parent == TC_H_UNSPEC - unspecified parent.
1600 	   parent == TC_H_ROOT   - class is root, which has no parent.
1601 	   parent == X:0	 - parent is root class.
1602 	   parent == X:Y	 - parent is a node in hierarchy.
1603 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
1604 
1605 	   handle == 0:0	 - generate handle from kernel pool.
1606 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
1607 	   handle == X:Y	 - clear.
1608 	   handle == X:0	 - root class.
1609 	 */
1610 
1611 	/* Step 1. Determine qdisc handle X:0 */
1612 
1613 	portid = tcm->tcm_parent;
1614 	clid = tcm->tcm_handle;
1615 	qid = TC_H_MAJ(clid);
1616 
1617 	if (portid != TC_H_ROOT) {
1618 		u32 qid1 = TC_H_MAJ(portid);
1619 
1620 		if (qid && qid1) {
1621 			/* If both majors are known, they must be identical. */
1622 			if (qid != qid1)
1623 				return -EINVAL;
1624 		} else if (qid1) {
1625 			qid = qid1;
1626 		} else if (qid == 0)
1627 			qid = dev->qdisc->handle;
1628 
1629 		/* Now qid is genuine qdisc handle consistent
1630 		 * both with parent and child.
1631 		 *
1632 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1633 		 */
1634 		if (portid)
1635 			portid = TC_H_MAKE(qid, portid);
1636 	} else {
1637 		if (qid == 0)
1638 			qid = dev->qdisc->handle;
1639 	}
1640 
1641 	/* OK. Locate qdisc */
1642 	q = qdisc_lookup(dev, qid);
1643 	if (!q)
1644 		return -ENOENT;
1645 
1646 	/* An check that it supports classes */
1647 	cops = q->ops->cl_ops;
1648 	if (cops == NULL)
1649 		return -EINVAL;
1650 
1651 	/* Now try to get class */
1652 	if (clid == 0) {
1653 		if (portid == TC_H_ROOT)
1654 			clid = qid;
1655 	} else
1656 		clid = TC_H_MAKE(qid, clid);
1657 
1658 	if (clid)
1659 		cl = cops->get(q, clid);
1660 
1661 	if (cl == 0) {
1662 		err = -ENOENT;
1663 		if (n->nlmsg_type != RTM_NEWTCLASS ||
1664 		    !(n->nlmsg_flags & NLM_F_CREATE))
1665 			goto out;
1666 	} else {
1667 		switch (n->nlmsg_type) {
1668 		case RTM_NEWTCLASS:
1669 			err = -EEXIST;
1670 			if (n->nlmsg_flags & NLM_F_EXCL)
1671 				goto out;
1672 			break;
1673 		case RTM_DELTCLASS:
1674 			err = -EOPNOTSUPP;
1675 			if (cops->delete)
1676 				err = cops->delete(q, cl);
1677 			if (err == 0)
1678 				tclass_notify(net, skb, n, q, cl,
1679 					      RTM_DELTCLASS);
1680 			goto out;
1681 		case RTM_GETTCLASS:
1682 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1683 			goto out;
1684 		default:
1685 			err = -EINVAL;
1686 			goto out;
1687 		}
1688 	}
1689 
1690 	new_cl = cl;
1691 	err = -EOPNOTSUPP;
1692 	if (cops->change)
1693 		err = cops->change(q, clid, portid, tca, &new_cl);
1694 	if (err == 0)
1695 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1696 
1697 out:
1698 	if (cl)
1699 		cops->put(q, cl);
1700 
1701 	return err;
1702 }
1703 
1704 
1705 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1706 			  unsigned long cl,
1707 			  u32 portid, u32 seq, u16 flags, int event)
1708 {
1709 	struct tcmsg *tcm;
1710 	struct nlmsghdr  *nlh;
1711 	unsigned char *b = skb_tail_pointer(skb);
1712 	struct gnet_dump d;
1713 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1714 
1715 	cond_resched();
1716 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1717 	if (!nlh)
1718 		goto out_nlmsg_trim;
1719 	tcm = nlmsg_data(nlh);
1720 	tcm->tcm_family = AF_UNSPEC;
1721 	tcm->tcm__pad1 = 0;
1722 	tcm->tcm__pad2 = 0;
1723 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1724 	tcm->tcm_parent = q->handle;
1725 	tcm->tcm_handle = q->handle;
1726 	tcm->tcm_info = 0;
1727 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1728 		goto nla_put_failure;
1729 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1730 		goto nla_put_failure;
1731 
1732 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1733 					 NULL, &d, TCA_PAD) < 0)
1734 		goto nla_put_failure;
1735 
1736 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1737 		goto nla_put_failure;
1738 
1739 	if (gnet_stats_finish_copy(&d) < 0)
1740 		goto nla_put_failure;
1741 
1742 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1743 	return skb->len;
1744 
1745 out_nlmsg_trim:
1746 nla_put_failure:
1747 	nlmsg_trim(skb, b);
1748 	return -1;
1749 }
1750 
1751 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1752 			 struct nlmsghdr *n, struct Qdisc *q,
1753 			 unsigned long cl, int event)
1754 {
1755 	struct sk_buff *skb;
1756 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1757 
1758 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1759 	if (!skb)
1760 		return -ENOBUFS;
1761 
1762 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1763 		kfree_skb(skb);
1764 		return -EINVAL;
1765 	}
1766 
1767 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1768 			      n->nlmsg_flags & NLM_F_ECHO);
1769 }
1770 
1771 struct qdisc_dump_args {
1772 	struct qdisc_walker	w;
1773 	struct sk_buff		*skb;
1774 	struct netlink_callback	*cb;
1775 };
1776 
1777 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1778 			    struct qdisc_walker *arg)
1779 {
1780 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1781 
1782 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1783 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1784 			      RTM_NEWTCLASS);
1785 }
1786 
1787 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1788 				struct tcmsg *tcm, struct netlink_callback *cb,
1789 				int *t_p, int s_t)
1790 {
1791 	struct qdisc_dump_args arg;
1792 
1793 	if (tc_qdisc_dump_ignore(q, false) ||
1794 	    *t_p < s_t || !q->ops->cl_ops ||
1795 	    (tcm->tcm_parent &&
1796 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1797 		(*t_p)++;
1798 		return 0;
1799 	}
1800 	if (*t_p > s_t)
1801 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1802 	arg.w.fn = qdisc_class_dump;
1803 	arg.skb = skb;
1804 	arg.cb = cb;
1805 	arg.w.stop  = 0;
1806 	arg.w.skip = cb->args[1];
1807 	arg.w.count = 0;
1808 	q->ops->cl_ops->walk(q, &arg.w);
1809 	cb->args[1] = arg.w.count;
1810 	if (arg.w.stop)
1811 		return -1;
1812 	(*t_p)++;
1813 	return 0;
1814 }
1815 
1816 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1817 			       struct tcmsg *tcm, struct netlink_callback *cb,
1818 			       int *t_p, int s_t)
1819 {
1820 	struct Qdisc *q;
1821 	int b;
1822 
1823 	if (!root)
1824 		return 0;
1825 
1826 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1827 		return -1;
1828 
1829 	if (!qdisc_dev(root))
1830 		return 0;
1831 
1832 	if (tcm->tcm_parent) {
1833 		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
1834 		if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1835 			return -1;
1836 		return 0;
1837 	}
1838 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1839 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1840 			return -1;
1841 	}
1842 
1843 	return 0;
1844 }
1845 
1846 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1847 {
1848 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
1849 	struct net *net = sock_net(skb->sk);
1850 	struct netdev_queue *dev_queue;
1851 	struct net_device *dev;
1852 	int t, s_t;
1853 
1854 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1855 		return 0;
1856 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
1857 	if (!dev)
1858 		return 0;
1859 
1860 	s_t = cb->args[0];
1861 	t = 0;
1862 
1863 	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1864 		goto done;
1865 
1866 	dev_queue = dev_ingress_queue(dev);
1867 	if (dev_queue &&
1868 	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1869 				&t, s_t) < 0)
1870 		goto done;
1871 
1872 done:
1873 	cb->args[0] = t;
1874 
1875 	dev_put(dev);
1876 	return skb->len;
1877 }
1878 
1879 #ifdef CONFIG_PROC_FS
1880 static int psched_show(struct seq_file *seq, void *v)
1881 {
1882 	seq_printf(seq, "%08x %08x %08x %08x\n",
1883 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1884 		   1000000,
1885 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
1886 
1887 	return 0;
1888 }
1889 
1890 static int psched_open(struct inode *inode, struct file *file)
1891 {
1892 	return single_open(file, psched_show, NULL);
1893 }
1894 
1895 static const struct file_operations psched_fops = {
1896 	.owner = THIS_MODULE,
1897 	.open = psched_open,
1898 	.read  = seq_read,
1899 	.llseek = seq_lseek,
1900 	.release = single_release,
1901 };
1902 
1903 static int __net_init psched_net_init(struct net *net)
1904 {
1905 	struct proc_dir_entry *e;
1906 
1907 	e = proc_create("psched", 0, net->proc_net, &psched_fops);
1908 	if (e == NULL)
1909 		return -ENOMEM;
1910 
1911 	return 0;
1912 }
1913 
1914 static void __net_exit psched_net_exit(struct net *net)
1915 {
1916 	remove_proc_entry("psched", net->proc_net);
1917 }
1918 #else
1919 static int __net_init psched_net_init(struct net *net)
1920 {
1921 	return 0;
1922 }
1923 
1924 static void __net_exit psched_net_exit(struct net *net)
1925 {
1926 }
1927 #endif
1928 
1929 static struct pernet_operations psched_net_ops = {
1930 	.init = psched_net_init,
1931 	.exit = psched_net_exit,
1932 };
1933 
1934 static int __init pktsched_init(void)
1935 {
1936 	int err;
1937 
1938 	err = register_pernet_subsys(&psched_net_ops);
1939 	if (err) {
1940 		pr_err("pktsched_init: "
1941 		       "cannot initialize per netns operations\n");
1942 		return err;
1943 	}
1944 
1945 	register_qdisc(&pfifo_fast_ops);
1946 	register_qdisc(&pfifo_qdisc_ops);
1947 	register_qdisc(&bfifo_qdisc_ops);
1948 	register_qdisc(&pfifo_head_drop_qdisc_ops);
1949 	register_qdisc(&mq_qdisc_ops);
1950 	register_qdisc(&noqueue_qdisc_ops);
1951 
1952 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1953 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1954 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
1955 		      NULL);
1956 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
1957 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
1958 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
1959 		      NULL);
1960 
1961 	return 0;
1962 }
1963 
1964 subsys_initcall(pktsched_init);
1965