xref: /openbmc/linux/net/sched/sch_api.c (revision 5d0e4d78)
1 /*
2  * net/sched/sch_api.c	Packet scheduler API.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17 
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
33 
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 
39 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
40 			struct nlmsghdr *n, u32 clid,
41 			struct Qdisc *old, struct Qdisc *new);
42 static int tclass_notify(struct net *net, struct sk_buff *oskb,
43 			 struct nlmsghdr *n, struct Qdisc *q,
44 			 unsigned long cl, int event);
45 
46 /*
47 
48    Short review.
49    -------------
50 
51    This file consists of two interrelated parts:
52 
53    1. queueing disciplines manager frontend.
54    2. traffic classes manager frontend.
55 
56    Generally, queueing discipline ("qdisc") is a black box,
57    which is able to enqueue packets and to dequeue them (when
58    device is ready to send something) in order and at times
59    determined by algorithm hidden in it.
60 
61    qdisc's are divided to two categories:
62    - "queues", which have no internal structure visible from outside.
63    - "schedulers", which split all the packets to "traffic classes",
64      using "packet classifiers" (look at cls_api.c)
65 
66    In turn, classes may have child qdiscs (as rule, queues)
67    attached to them etc. etc. etc.
68 
69    The goal of the routines in this file is to translate
70    information supplied by user in the form of handles
71    to more intelligible for kernel form, to make some sanity
72    checks and part of work, which is common to all qdiscs
73    and to provide rtnetlink notifications.
74 
75    All real intelligent work is done inside qdisc modules.
76 
77 
78 
79    Every discipline has two major routines: enqueue and dequeue.
80 
81    ---dequeue
82 
83    dequeue usually returns a skb to send. It is allowed to return NULL,
84    but it does not mean that queue is empty, it just means that
85    discipline does not want to send anything this time.
86    Queue is really empty if q->q.qlen == 0.
87    For complicated disciplines with multiple queues q->q is not
88    real packet queue, but however q->q.qlen must be valid.
89 
90    ---enqueue
91 
92    enqueue returns 0, if packet was enqueued successfully.
93    If packet (this one or another one) was dropped, it returns
94    not zero error code.
95    NET_XMIT_DROP 	- this packet dropped
96      Expected action: do not backoff, but wait until queue will clear.
97    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
98      Expected action: backoff or ignore
99 
100    Auxiliary routines:
101 
102    ---peek
103 
104    like dequeue but without removing a packet from the queue
105 
106    ---reset
107 
108    returns qdisc to initial state: purge all buffers, clear all
109    timers, counters (except for statistics) etc.
110 
111    ---init
112 
113    initializes newly created qdisc.
114 
115    ---destroy
116 
117    destroys resources allocated by init and during lifetime of qdisc.
118 
119    ---change
120 
121    changes qdisc parameters.
122  */
123 
124 /* Protects list of registered TC modules. It is pure SMP lock. */
125 static DEFINE_RWLOCK(qdisc_mod_lock);
126 
127 
128 /************************************************
129  *	Queueing disciplines manipulation.	*
130  ************************************************/
131 
132 
133 /* The list of all installed queueing disciplines. */
134 
135 static struct Qdisc_ops *qdisc_base;
136 
137 /* Register/unregister queueing discipline */
138 
139 int register_qdisc(struct Qdisc_ops *qops)
140 {
141 	struct Qdisc_ops *q, **qp;
142 	int rc = -EEXIST;
143 
144 	write_lock(&qdisc_mod_lock);
145 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
146 		if (!strcmp(qops->id, q->id))
147 			goto out;
148 
149 	if (qops->enqueue == NULL)
150 		qops->enqueue = noop_qdisc_ops.enqueue;
151 	if (qops->peek == NULL) {
152 		if (qops->dequeue == NULL)
153 			qops->peek = noop_qdisc_ops.peek;
154 		else
155 			goto out_einval;
156 	}
157 	if (qops->dequeue == NULL)
158 		qops->dequeue = noop_qdisc_ops.dequeue;
159 
160 	if (qops->cl_ops) {
161 		const struct Qdisc_class_ops *cops = qops->cl_ops;
162 
163 		if (!(cops->get && cops->put && cops->walk && cops->leaf))
164 			goto out_einval;
165 
166 		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
167 			goto out_einval;
168 	}
169 
170 	qops->next = NULL;
171 	*qp = qops;
172 	rc = 0;
173 out:
174 	write_unlock(&qdisc_mod_lock);
175 	return rc;
176 
177 out_einval:
178 	rc = -EINVAL;
179 	goto out;
180 }
181 EXPORT_SYMBOL(register_qdisc);
182 
183 int unregister_qdisc(struct Qdisc_ops *qops)
184 {
185 	struct Qdisc_ops *q, **qp;
186 	int err = -ENOENT;
187 
188 	write_lock(&qdisc_mod_lock);
189 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
190 		if (q == qops)
191 			break;
192 	if (q) {
193 		*qp = q->next;
194 		q->next = NULL;
195 		err = 0;
196 	}
197 	write_unlock(&qdisc_mod_lock);
198 	return err;
199 }
200 EXPORT_SYMBOL(unregister_qdisc);
201 
202 /* Get default qdisc if not otherwise specified */
203 void qdisc_get_default(char *name, size_t len)
204 {
205 	read_lock(&qdisc_mod_lock);
206 	strlcpy(name, default_qdisc_ops->id, len);
207 	read_unlock(&qdisc_mod_lock);
208 }
209 
210 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
211 {
212 	struct Qdisc_ops *q = NULL;
213 
214 	for (q = qdisc_base; q; q = q->next) {
215 		if (!strcmp(name, q->id)) {
216 			if (!try_module_get(q->owner))
217 				q = NULL;
218 			break;
219 		}
220 	}
221 
222 	return q;
223 }
224 
225 /* Set new default qdisc to use */
226 int qdisc_set_default(const char *name)
227 {
228 	const struct Qdisc_ops *ops;
229 
230 	if (!capable(CAP_NET_ADMIN))
231 		return -EPERM;
232 
233 	write_lock(&qdisc_mod_lock);
234 	ops = qdisc_lookup_default(name);
235 	if (!ops) {
236 		/* Not found, drop lock and try to load module */
237 		write_unlock(&qdisc_mod_lock);
238 		request_module("sch_%s", name);
239 		write_lock(&qdisc_mod_lock);
240 
241 		ops = qdisc_lookup_default(name);
242 	}
243 
244 	if (ops) {
245 		/* Set new default */
246 		module_put(default_qdisc_ops->owner);
247 		default_qdisc_ops = ops;
248 	}
249 	write_unlock(&qdisc_mod_lock);
250 
251 	return ops ? 0 : -ENOENT;
252 }
253 
254 #ifdef CONFIG_NET_SCH_DEFAULT
255 /* Set default value from kernel config */
256 static int __init sch_default_qdisc(void)
257 {
258 	return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
259 }
260 late_initcall(sch_default_qdisc);
261 #endif
262 
263 /* We know handle. Find qdisc among all qdisc's attached to device
264  * (root qdisc, all its children, children of children etc.)
265  * Note: caller either uses rtnl or rcu_read_lock()
266  */
267 
268 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
269 {
270 	struct Qdisc *q;
271 
272 	if (!qdisc_dev(root))
273 		return (root->handle == handle ? root : NULL);
274 
275 	if (!(root->flags & TCQ_F_BUILTIN) &&
276 	    root->handle == handle)
277 		return root;
278 
279 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
280 		if (q->handle == handle)
281 			return q;
282 	}
283 	return NULL;
284 }
285 
286 void qdisc_hash_add(struct Qdisc *q, bool invisible)
287 {
288 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
289 		struct Qdisc *root = qdisc_dev(q)->qdisc;
290 
291 		WARN_ON_ONCE(root == &noop_qdisc);
292 		ASSERT_RTNL();
293 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
294 		if (invisible)
295 			q->flags |= TCQ_F_INVISIBLE;
296 	}
297 }
298 EXPORT_SYMBOL(qdisc_hash_add);
299 
300 void qdisc_hash_del(struct Qdisc *q)
301 {
302 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
303 		ASSERT_RTNL();
304 		hash_del_rcu(&q->hash);
305 	}
306 }
307 EXPORT_SYMBOL(qdisc_hash_del);
308 
309 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
310 {
311 	struct Qdisc *q;
312 
313 	q = qdisc_match_from_root(dev->qdisc, handle);
314 	if (q)
315 		goto out;
316 
317 	if (dev_ingress_queue(dev))
318 		q = qdisc_match_from_root(
319 			dev_ingress_queue(dev)->qdisc_sleeping,
320 			handle);
321 out:
322 	return q;
323 }
324 
325 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
326 {
327 	unsigned long cl;
328 	struct Qdisc *leaf;
329 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
330 
331 	if (cops == NULL)
332 		return NULL;
333 	cl = cops->get(p, classid);
334 
335 	if (cl == 0)
336 		return NULL;
337 	leaf = cops->leaf(p, cl);
338 	cops->put(p, cl);
339 	return leaf;
340 }
341 
342 /* Find queueing discipline by name */
343 
344 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
345 {
346 	struct Qdisc_ops *q = NULL;
347 
348 	if (kind) {
349 		read_lock(&qdisc_mod_lock);
350 		for (q = qdisc_base; q; q = q->next) {
351 			if (nla_strcmp(kind, q->id) == 0) {
352 				if (!try_module_get(q->owner))
353 					q = NULL;
354 				break;
355 			}
356 		}
357 		read_unlock(&qdisc_mod_lock);
358 	}
359 	return q;
360 }
361 
362 /* The linklayer setting were not transferred from iproute2, in older
363  * versions, and the rate tables lookup systems have been dropped in
364  * the kernel. To keep backward compatible with older iproute2 tc
365  * utils, we detect the linklayer setting by detecting if the rate
366  * table were modified.
367  *
368  * For linklayer ATM table entries, the rate table will be aligned to
369  * 48 bytes, thus some table entries will contain the same value.  The
370  * mpu (min packet unit) is also encoded into the old rate table, thus
371  * starting from the mpu, we find low and high table entries for
372  * mapping this cell.  If these entries contain the same value, when
373  * the rate tables have been modified for linklayer ATM.
374  *
375  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
376  * and then roundup to the next cell, calc the table entry one below,
377  * and compare.
378  */
379 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
380 {
381 	int low       = roundup(r->mpu, 48);
382 	int high      = roundup(low+1, 48);
383 	int cell_low  = low >> r->cell_log;
384 	int cell_high = (high >> r->cell_log) - 1;
385 
386 	/* rtab is too inaccurate at rates > 100Mbit/s */
387 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
388 		pr_debug("TC linklayer: Giving up ATM detection\n");
389 		return TC_LINKLAYER_ETHERNET;
390 	}
391 
392 	if ((cell_high > cell_low) && (cell_high < 256)
393 	    && (rtab[cell_low] == rtab[cell_high])) {
394 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
395 			 cell_low, cell_high, rtab[cell_high]);
396 		return TC_LINKLAYER_ATM;
397 	}
398 	return TC_LINKLAYER_ETHERNET;
399 }
400 
401 static struct qdisc_rate_table *qdisc_rtab_list;
402 
403 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
404 					struct nlattr *tab)
405 {
406 	struct qdisc_rate_table *rtab;
407 
408 	if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
409 	    nla_len(tab) != TC_RTAB_SIZE)
410 		return NULL;
411 
412 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
413 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
414 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
415 			rtab->refcnt++;
416 			return rtab;
417 		}
418 	}
419 
420 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
421 	if (rtab) {
422 		rtab->rate = *r;
423 		rtab->refcnt = 1;
424 		memcpy(rtab->data, nla_data(tab), 1024);
425 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
426 			r->linklayer = __detect_linklayer(r, rtab->data);
427 		rtab->next = qdisc_rtab_list;
428 		qdisc_rtab_list = rtab;
429 	}
430 	return rtab;
431 }
432 EXPORT_SYMBOL(qdisc_get_rtab);
433 
434 void qdisc_put_rtab(struct qdisc_rate_table *tab)
435 {
436 	struct qdisc_rate_table *rtab, **rtabp;
437 
438 	if (!tab || --tab->refcnt)
439 		return;
440 
441 	for (rtabp = &qdisc_rtab_list;
442 	     (rtab = *rtabp) != NULL;
443 	     rtabp = &rtab->next) {
444 		if (rtab == tab) {
445 			*rtabp = rtab->next;
446 			kfree(rtab);
447 			return;
448 		}
449 	}
450 }
451 EXPORT_SYMBOL(qdisc_put_rtab);
452 
453 static LIST_HEAD(qdisc_stab_list);
454 
455 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
456 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
457 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
458 };
459 
460 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
461 {
462 	struct nlattr *tb[TCA_STAB_MAX + 1];
463 	struct qdisc_size_table *stab;
464 	struct tc_sizespec *s;
465 	unsigned int tsize = 0;
466 	u16 *tab = NULL;
467 	int err;
468 
469 	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL);
470 	if (err < 0)
471 		return ERR_PTR(err);
472 	if (!tb[TCA_STAB_BASE])
473 		return ERR_PTR(-EINVAL);
474 
475 	s = nla_data(tb[TCA_STAB_BASE]);
476 
477 	if (s->tsize > 0) {
478 		if (!tb[TCA_STAB_DATA])
479 			return ERR_PTR(-EINVAL);
480 		tab = nla_data(tb[TCA_STAB_DATA]);
481 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
482 	}
483 
484 	if (tsize != s->tsize || (!tab && tsize > 0))
485 		return ERR_PTR(-EINVAL);
486 
487 	list_for_each_entry(stab, &qdisc_stab_list, list) {
488 		if (memcmp(&stab->szopts, s, sizeof(*s)))
489 			continue;
490 		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
491 			continue;
492 		stab->refcnt++;
493 		return stab;
494 	}
495 
496 	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
497 	if (!stab)
498 		return ERR_PTR(-ENOMEM);
499 
500 	stab->refcnt = 1;
501 	stab->szopts = *s;
502 	if (tsize > 0)
503 		memcpy(stab->data, tab, tsize * sizeof(u16));
504 
505 	list_add_tail(&stab->list, &qdisc_stab_list);
506 
507 	return stab;
508 }
509 
510 static void stab_kfree_rcu(struct rcu_head *head)
511 {
512 	kfree(container_of(head, struct qdisc_size_table, rcu));
513 }
514 
515 void qdisc_put_stab(struct qdisc_size_table *tab)
516 {
517 	if (!tab)
518 		return;
519 
520 	if (--tab->refcnt == 0) {
521 		list_del(&tab->list);
522 		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
523 	}
524 }
525 EXPORT_SYMBOL(qdisc_put_stab);
526 
527 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
528 {
529 	struct nlattr *nest;
530 
531 	nest = nla_nest_start(skb, TCA_STAB);
532 	if (nest == NULL)
533 		goto nla_put_failure;
534 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
535 		goto nla_put_failure;
536 	nla_nest_end(skb, nest);
537 
538 	return skb->len;
539 
540 nla_put_failure:
541 	return -1;
542 }
543 
544 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
545 			       const struct qdisc_size_table *stab)
546 {
547 	int pkt_len, slot;
548 
549 	pkt_len = skb->len + stab->szopts.overhead;
550 	if (unlikely(!stab->szopts.tsize))
551 		goto out;
552 
553 	slot = pkt_len + stab->szopts.cell_align;
554 	if (unlikely(slot < 0))
555 		slot = 0;
556 
557 	slot >>= stab->szopts.cell_log;
558 	if (likely(slot < stab->szopts.tsize))
559 		pkt_len = stab->data[slot];
560 	else
561 		pkt_len = stab->data[stab->szopts.tsize - 1] *
562 				(slot / stab->szopts.tsize) +
563 				stab->data[slot % stab->szopts.tsize];
564 
565 	pkt_len <<= stab->szopts.size_log;
566 out:
567 	if (unlikely(pkt_len < 1))
568 		pkt_len = 1;
569 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
570 }
571 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
572 
573 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
574 {
575 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
576 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
577 			txt, qdisc->ops->id, qdisc->handle >> 16);
578 		qdisc->flags |= TCQ_F_WARN_NONWC;
579 	}
580 }
581 EXPORT_SYMBOL(qdisc_warn_nonwc);
582 
583 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
584 {
585 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
586 						 timer);
587 
588 	rcu_read_lock();
589 	__netif_schedule(qdisc_root(wd->qdisc));
590 	rcu_read_unlock();
591 
592 	return HRTIMER_NORESTART;
593 }
594 
595 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
596 {
597 	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
598 	wd->timer.function = qdisc_watchdog;
599 	wd->qdisc = qdisc;
600 }
601 EXPORT_SYMBOL(qdisc_watchdog_init);
602 
603 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
604 {
605 	if (test_bit(__QDISC_STATE_DEACTIVATED,
606 		     &qdisc_root_sleeping(wd->qdisc)->state))
607 		return;
608 
609 	if (wd->last_expires == expires)
610 		return;
611 
612 	wd->last_expires = expires;
613 	hrtimer_start(&wd->timer,
614 		      ns_to_ktime(expires),
615 		      HRTIMER_MODE_ABS_PINNED);
616 }
617 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
618 
619 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
620 {
621 	hrtimer_cancel(&wd->timer);
622 }
623 EXPORT_SYMBOL(qdisc_watchdog_cancel);
624 
625 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
626 {
627 	unsigned int size = n * sizeof(struct hlist_head), i;
628 	struct hlist_head *h;
629 
630 	if (size <= PAGE_SIZE)
631 		h = kmalloc(size, GFP_KERNEL);
632 	else
633 		h = (struct hlist_head *)
634 			__get_free_pages(GFP_KERNEL, get_order(size));
635 
636 	if (h != NULL) {
637 		for (i = 0; i < n; i++)
638 			INIT_HLIST_HEAD(&h[i]);
639 	}
640 	return h;
641 }
642 
643 static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
644 {
645 	unsigned int size = n * sizeof(struct hlist_head);
646 
647 	if (size <= PAGE_SIZE)
648 		kfree(h);
649 	else
650 		free_pages((unsigned long)h, get_order(size));
651 }
652 
653 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
654 {
655 	struct Qdisc_class_common *cl;
656 	struct hlist_node *next;
657 	struct hlist_head *nhash, *ohash;
658 	unsigned int nsize, nmask, osize;
659 	unsigned int i, h;
660 
661 	/* Rehash when load factor exceeds 0.75 */
662 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
663 		return;
664 	nsize = clhash->hashsize * 2;
665 	nmask = nsize - 1;
666 	nhash = qdisc_class_hash_alloc(nsize);
667 	if (nhash == NULL)
668 		return;
669 
670 	ohash = clhash->hash;
671 	osize = clhash->hashsize;
672 
673 	sch_tree_lock(sch);
674 	for (i = 0; i < osize; i++) {
675 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
676 			h = qdisc_class_hash(cl->classid, nmask);
677 			hlist_add_head(&cl->hnode, &nhash[h]);
678 		}
679 	}
680 	clhash->hash     = nhash;
681 	clhash->hashsize = nsize;
682 	clhash->hashmask = nmask;
683 	sch_tree_unlock(sch);
684 
685 	qdisc_class_hash_free(ohash, osize);
686 }
687 EXPORT_SYMBOL(qdisc_class_hash_grow);
688 
689 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
690 {
691 	unsigned int size = 4;
692 
693 	clhash->hash = qdisc_class_hash_alloc(size);
694 	if (clhash->hash == NULL)
695 		return -ENOMEM;
696 	clhash->hashsize  = size;
697 	clhash->hashmask  = size - 1;
698 	clhash->hashelems = 0;
699 	return 0;
700 }
701 EXPORT_SYMBOL(qdisc_class_hash_init);
702 
703 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
704 {
705 	qdisc_class_hash_free(clhash->hash, clhash->hashsize);
706 }
707 EXPORT_SYMBOL(qdisc_class_hash_destroy);
708 
709 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
710 			     struct Qdisc_class_common *cl)
711 {
712 	unsigned int h;
713 
714 	INIT_HLIST_NODE(&cl->hnode);
715 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
716 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
717 	clhash->hashelems++;
718 }
719 EXPORT_SYMBOL(qdisc_class_hash_insert);
720 
721 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
722 			     struct Qdisc_class_common *cl)
723 {
724 	hlist_del(&cl->hnode);
725 	clhash->hashelems--;
726 }
727 EXPORT_SYMBOL(qdisc_class_hash_remove);
728 
729 /* Allocate an unique handle from space managed by kernel
730  * Possible range is [8000-FFFF]:0000 (0x8000 values)
731  */
732 static u32 qdisc_alloc_handle(struct net_device *dev)
733 {
734 	int i = 0x8000;
735 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
736 
737 	do {
738 		autohandle += TC_H_MAKE(0x10000U, 0);
739 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
740 			autohandle = TC_H_MAKE(0x80000000U, 0);
741 		if (!qdisc_lookup(dev, autohandle))
742 			return autohandle;
743 		cond_resched();
744 	} while	(--i > 0);
745 
746 	return 0;
747 }
748 
749 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
750 			       unsigned int len)
751 {
752 	const struct Qdisc_class_ops *cops;
753 	unsigned long cl;
754 	u32 parentid;
755 	int drops;
756 
757 	if (n == 0 && len == 0)
758 		return;
759 	drops = max_t(int, n, 0);
760 	rcu_read_lock();
761 	while ((parentid = sch->parent)) {
762 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
763 			break;
764 
765 		if (sch->flags & TCQ_F_NOPARENT)
766 			break;
767 		/* TODO: perform the search on a per txq basis */
768 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
769 		if (sch == NULL) {
770 			WARN_ON_ONCE(parentid != TC_H_ROOT);
771 			break;
772 		}
773 		cops = sch->ops->cl_ops;
774 		if (cops->qlen_notify) {
775 			cl = cops->get(sch, parentid);
776 			cops->qlen_notify(sch, cl);
777 			cops->put(sch, cl);
778 		}
779 		sch->q.qlen -= n;
780 		sch->qstats.backlog -= len;
781 		__qdisc_qstats_drop(sch, drops);
782 	}
783 	rcu_read_unlock();
784 }
785 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
786 
787 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
788 			       struct nlmsghdr *n, u32 clid,
789 			       struct Qdisc *old, struct Qdisc *new)
790 {
791 	if (new || old)
792 		qdisc_notify(net, skb, n, clid, old, new);
793 
794 	if (old)
795 		qdisc_destroy(old);
796 }
797 
798 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
799  * to device "dev".
800  *
801  * When appropriate send a netlink notification using 'skb'
802  * and "n".
803  *
804  * On success, destroy old qdisc.
805  */
806 
807 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
808 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
809 		       struct Qdisc *new, struct Qdisc *old)
810 {
811 	struct Qdisc *q = old;
812 	struct net *net = dev_net(dev);
813 	int err = 0;
814 
815 	if (parent == NULL) {
816 		unsigned int i, num_q, ingress;
817 
818 		ingress = 0;
819 		num_q = dev->num_tx_queues;
820 		if ((q && q->flags & TCQ_F_INGRESS) ||
821 		    (new && new->flags & TCQ_F_INGRESS)) {
822 			num_q = 1;
823 			ingress = 1;
824 			if (!dev_ingress_queue(dev))
825 				return -ENOENT;
826 		}
827 
828 		if (dev->flags & IFF_UP)
829 			dev_deactivate(dev);
830 
831 		if (new && new->ops->attach)
832 			goto skip;
833 
834 		for (i = 0; i < num_q; i++) {
835 			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
836 
837 			if (!ingress)
838 				dev_queue = netdev_get_tx_queue(dev, i);
839 
840 			old = dev_graft_qdisc(dev_queue, new);
841 			if (new && i > 0)
842 				refcount_inc(&new->refcnt);
843 
844 			if (!ingress)
845 				qdisc_destroy(old);
846 		}
847 
848 skip:
849 		if (!ingress) {
850 			notify_and_destroy(net, skb, n, classid,
851 					   dev->qdisc, new);
852 			if (new && !new->ops->attach)
853 				refcount_inc(&new->refcnt);
854 			dev->qdisc = new ? : &noop_qdisc;
855 
856 			if (new && new->ops->attach)
857 				new->ops->attach(new);
858 		} else {
859 			notify_and_destroy(net, skb, n, classid, old, new);
860 		}
861 
862 		if (dev->flags & IFF_UP)
863 			dev_activate(dev);
864 	} else {
865 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
866 
867 		err = -EOPNOTSUPP;
868 		if (cops && cops->graft) {
869 			unsigned long cl = cops->get(parent, classid);
870 			if (cl) {
871 				err = cops->graft(parent, cl, new, &old);
872 				cops->put(parent, cl);
873 			} else
874 				err = -ENOENT;
875 		}
876 		if (!err)
877 			notify_and_destroy(net, skb, n, classid, old, new);
878 	}
879 	return err;
880 }
881 
882 /* lockdep annotation is needed for ingress; egress gets it only for name */
883 static struct lock_class_key qdisc_tx_lock;
884 static struct lock_class_key qdisc_rx_lock;
885 
886 /*
887    Allocate and initialize new qdisc.
888 
889    Parameters are passed via opt.
890  */
891 
892 static struct Qdisc *qdisc_create(struct net_device *dev,
893 				  struct netdev_queue *dev_queue,
894 				  struct Qdisc *p, u32 parent, u32 handle,
895 				  struct nlattr **tca, int *errp)
896 {
897 	int err;
898 	struct nlattr *kind = tca[TCA_KIND];
899 	struct Qdisc *sch;
900 	struct Qdisc_ops *ops;
901 	struct qdisc_size_table *stab;
902 
903 	ops = qdisc_lookup_ops(kind);
904 #ifdef CONFIG_MODULES
905 	if (ops == NULL && kind != NULL) {
906 		char name[IFNAMSIZ];
907 		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
908 			/* We dropped the RTNL semaphore in order to
909 			 * perform the module load.  So, even if we
910 			 * succeeded in loading the module we have to
911 			 * tell the caller to replay the request.  We
912 			 * indicate this using -EAGAIN.
913 			 * We replay the request because the device may
914 			 * go away in the mean time.
915 			 */
916 			rtnl_unlock();
917 			request_module("sch_%s", name);
918 			rtnl_lock();
919 			ops = qdisc_lookup_ops(kind);
920 			if (ops != NULL) {
921 				/* We will try again qdisc_lookup_ops,
922 				 * so don't keep a reference.
923 				 */
924 				module_put(ops->owner);
925 				err = -EAGAIN;
926 				goto err_out;
927 			}
928 		}
929 	}
930 #endif
931 
932 	err = -ENOENT;
933 	if (ops == NULL)
934 		goto err_out;
935 
936 	sch = qdisc_alloc(dev_queue, ops);
937 	if (IS_ERR(sch)) {
938 		err = PTR_ERR(sch);
939 		goto err_out2;
940 	}
941 
942 	sch->parent = parent;
943 
944 	if (handle == TC_H_INGRESS) {
945 		sch->flags |= TCQ_F_INGRESS;
946 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
947 		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
948 	} else {
949 		if (handle == 0) {
950 			handle = qdisc_alloc_handle(dev);
951 			err = -ENOMEM;
952 			if (handle == 0)
953 				goto err_out3;
954 		}
955 		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
956 		if (!netif_is_multiqueue(dev))
957 			sch->flags |= TCQ_F_ONETXQUEUE;
958 	}
959 
960 	sch->handle = handle;
961 
962 	/* This exist to keep backward compatible with a userspace
963 	 * loophole, what allowed userspace to get IFF_NO_QUEUE
964 	 * facility on older kernels by setting tx_queue_len=0 (prior
965 	 * to qdisc init), and then forgot to reinit tx_queue_len
966 	 * before again attaching a qdisc.
967 	 */
968 	if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
969 		dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
970 		netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
971 	}
972 
973 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
974 		if (qdisc_is_percpu_stats(sch)) {
975 			sch->cpu_bstats =
976 				netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
977 			if (!sch->cpu_bstats)
978 				goto err_out4;
979 
980 			sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
981 			if (!sch->cpu_qstats)
982 				goto err_out4;
983 		}
984 
985 		if (tca[TCA_STAB]) {
986 			stab = qdisc_get_stab(tca[TCA_STAB]);
987 			if (IS_ERR(stab)) {
988 				err = PTR_ERR(stab);
989 				goto err_out4;
990 			}
991 			rcu_assign_pointer(sch->stab, stab);
992 		}
993 		if (tca[TCA_RATE]) {
994 			seqcount_t *running;
995 
996 			err = -EOPNOTSUPP;
997 			if (sch->flags & TCQ_F_MQROOT)
998 				goto err_out4;
999 
1000 			if ((sch->parent != TC_H_ROOT) &&
1001 			    !(sch->flags & TCQ_F_INGRESS) &&
1002 			    (!p || !(p->flags & TCQ_F_MQROOT)))
1003 				running = qdisc_root_sleeping_running(sch);
1004 			else
1005 				running = &sch->running;
1006 
1007 			err = gen_new_estimator(&sch->bstats,
1008 						sch->cpu_bstats,
1009 						&sch->rate_est,
1010 						NULL,
1011 						running,
1012 						tca[TCA_RATE]);
1013 			if (err)
1014 				goto err_out4;
1015 		}
1016 
1017 		qdisc_hash_add(sch, false);
1018 
1019 		return sch;
1020 	}
1021 	/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1022 	if (ops->destroy)
1023 		ops->destroy(sch);
1024 err_out3:
1025 	dev_put(dev);
1026 	kfree((char *) sch - sch->padded);
1027 err_out2:
1028 	module_put(ops->owner);
1029 err_out:
1030 	*errp = err;
1031 	return NULL;
1032 
1033 err_out4:
1034 	free_percpu(sch->cpu_bstats);
1035 	free_percpu(sch->cpu_qstats);
1036 	/*
1037 	 * Any broken qdiscs that would require a ops->reset() here?
1038 	 * The qdisc was never in action so it shouldn't be necessary.
1039 	 */
1040 	qdisc_put_stab(rtnl_dereference(sch->stab));
1041 	if (ops->destroy)
1042 		ops->destroy(sch);
1043 	goto err_out3;
1044 }
1045 
1046 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1047 {
1048 	struct qdisc_size_table *ostab, *stab = NULL;
1049 	int err = 0;
1050 
1051 	if (tca[TCA_OPTIONS]) {
1052 		if (sch->ops->change == NULL)
1053 			return -EINVAL;
1054 		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1055 		if (err)
1056 			return err;
1057 	}
1058 
1059 	if (tca[TCA_STAB]) {
1060 		stab = qdisc_get_stab(tca[TCA_STAB]);
1061 		if (IS_ERR(stab))
1062 			return PTR_ERR(stab);
1063 	}
1064 
1065 	ostab = rtnl_dereference(sch->stab);
1066 	rcu_assign_pointer(sch->stab, stab);
1067 	qdisc_put_stab(ostab);
1068 
1069 	if (tca[TCA_RATE]) {
1070 		/* NB: ignores errors from replace_estimator
1071 		   because change can't be undone. */
1072 		if (sch->flags & TCQ_F_MQROOT)
1073 			goto out;
1074 		gen_replace_estimator(&sch->bstats,
1075 				      sch->cpu_bstats,
1076 				      &sch->rate_est,
1077 				      NULL,
1078 				      qdisc_root_sleeping_running(sch),
1079 				      tca[TCA_RATE]);
1080 	}
1081 out:
1082 	return 0;
1083 }
1084 
1085 struct check_loop_arg {
1086 	struct qdisc_walker	w;
1087 	struct Qdisc		*p;
1088 	int			depth;
1089 };
1090 
1091 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1092 			 struct qdisc_walker *w);
1093 
1094 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1095 {
1096 	struct check_loop_arg	arg;
1097 
1098 	if (q->ops->cl_ops == NULL)
1099 		return 0;
1100 
1101 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1102 	arg.w.fn = check_loop_fn;
1103 	arg.depth = depth;
1104 	arg.p = p;
1105 	q->ops->cl_ops->walk(q, &arg.w);
1106 	return arg.w.stop ? -ELOOP : 0;
1107 }
1108 
1109 static int
1110 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1111 {
1112 	struct Qdisc *leaf;
1113 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1114 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1115 
1116 	leaf = cops->leaf(q, cl);
1117 	if (leaf) {
1118 		if (leaf == arg->p || arg->depth > 7)
1119 			return -ELOOP;
1120 		return check_loop(leaf, arg->p, arg->depth + 1);
1121 	}
1122 	return 0;
1123 }
1124 
1125 /*
1126  * Delete/get qdisc.
1127  */
1128 
1129 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1130 			struct netlink_ext_ack *extack)
1131 {
1132 	struct net *net = sock_net(skb->sk);
1133 	struct tcmsg *tcm = nlmsg_data(n);
1134 	struct nlattr *tca[TCA_MAX + 1];
1135 	struct net_device *dev;
1136 	u32 clid;
1137 	struct Qdisc *q = NULL;
1138 	struct Qdisc *p = NULL;
1139 	int err;
1140 
1141 	if ((n->nlmsg_type != RTM_GETQDISC) &&
1142 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1143 		return -EPERM;
1144 
1145 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1146 	if (err < 0)
1147 		return err;
1148 
1149 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1150 	if (!dev)
1151 		return -ENODEV;
1152 
1153 	clid = tcm->tcm_parent;
1154 	if (clid) {
1155 		if (clid != TC_H_ROOT) {
1156 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1157 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1158 				if (!p)
1159 					return -ENOENT;
1160 				q = qdisc_leaf(p, clid);
1161 			} else if (dev_ingress_queue(dev)) {
1162 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1163 			}
1164 		} else {
1165 			q = dev->qdisc;
1166 		}
1167 		if (!q)
1168 			return -ENOENT;
1169 
1170 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1171 			return -EINVAL;
1172 	} else {
1173 		q = qdisc_lookup(dev, tcm->tcm_handle);
1174 		if (!q)
1175 			return -ENOENT;
1176 	}
1177 
1178 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1179 		return -EINVAL;
1180 
1181 	if (n->nlmsg_type == RTM_DELQDISC) {
1182 		if (!clid)
1183 			return -EINVAL;
1184 		if (q->handle == 0)
1185 			return -ENOENT;
1186 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1187 		if (err != 0)
1188 			return err;
1189 	} else {
1190 		qdisc_notify(net, skb, n, clid, NULL, q);
1191 	}
1192 	return 0;
1193 }
1194 
1195 /*
1196  * Create/change qdisc.
1197  */
1198 
1199 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1200 			   struct netlink_ext_ack *extack)
1201 {
1202 	struct net *net = sock_net(skb->sk);
1203 	struct tcmsg *tcm;
1204 	struct nlattr *tca[TCA_MAX + 1];
1205 	struct net_device *dev;
1206 	u32 clid;
1207 	struct Qdisc *q, *p;
1208 	int err;
1209 
1210 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1211 		return -EPERM;
1212 
1213 replay:
1214 	/* Reinit, just in case something touches this. */
1215 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1216 	if (err < 0)
1217 		return err;
1218 
1219 	tcm = nlmsg_data(n);
1220 	clid = tcm->tcm_parent;
1221 	q = p = NULL;
1222 
1223 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1224 	if (!dev)
1225 		return -ENODEV;
1226 
1227 
1228 	if (clid) {
1229 		if (clid != TC_H_ROOT) {
1230 			if (clid != TC_H_INGRESS) {
1231 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1232 				if (!p)
1233 					return -ENOENT;
1234 				q = qdisc_leaf(p, clid);
1235 			} else if (dev_ingress_queue_create(dev)) {
1236 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1237 			}
1238 		} else {
1239 			q = dev->qdisc;
1240 		}
1241 
1242 		/* It may be default qdisc, ignore it */
1243 		if (q && q->handle == 0)
1244 			q = NULL;
1245 
1246 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1247 			if (tcm->tcm_handle) {
1248 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1249 					return -EEXIST;
1250 				if (TC_H_MIN(tcm->tcm_handle))
1251 					return -EINVAL;
1252 				q = qdisc_lookup(dev, tcm->tcm_handle);
1253 				if (!q)
1254 					goto create_n_graft;
1255 				if (n->nlmsg_flags & NLM_F_EXCL)
1256 					return -EEXIST;
1257 				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1258 					return -EINVAL;
1259 				if (q == p ||
1260 				    (p && check_loop(q, p, 0)))
1261 					return -ELOOP;
1262 				refcount_inc(&q->refcnt);
1263 				goto graft;
1264 			} else {
1265 				if (!q)
1266 					goto create_n_graft;
1267 
1268 				/* This magic test requires explanation.
1269 				 *
1270 				 *   We know, that some child q is already
1271 				 *   attached to this parent and have choice:
1272 				 *   either to change it or to create/graft new one.
1273 				 *
1274 				 *   1. We are allowed to create/graft only
1275 				 *   if CREATE and REPLACE flags are set.
1276 				 *
1277 				 *   2. If EXCL is set, requestor wanted to say,
1278 				 *   that qdisc tcm_handle is not expected
1279 				 *   to exist, so that we choose create/graft too.
1280 				 *
1281 				 *   3. The last case is when no flags are set.
1282 				 *   Alas, it is sort of hole in API, we
1283 				 *   cannot decide what to do unambiguously.
1284 				 *   For now we select create/graft, if
1285 				 *   user gave KIND, which does not match existing.
1286 				 */
1287 				if ((n->nlmsg_flags & NLM_F_CREATE) &&
1288 				    (n->nlmsg_flags & NLM_F_REPLACE) &&
1289 				    ((n->nlmsg_flags & NLM_F_EXCL) ||
1290 				     (tca[TCA_KIND] &&
1291 				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
1292 					goto create_n_graft;
1293 			}
1294 		}
1295 	} else {
1296 		if (!tcm->tcm_handle)
1297 			return -EINVAL;
1298 		q = qdisc_lookup(dev, tcm->tcm_handle);
1299 	}
1300 
1301 	/* Change qdisc parameters */
1302 	if (q == NULL)
1303 		return -ENOENT;
1304 	if (n->nlmsg_flags & NLM_F_EXCL)
1305 		return -EEXIST;
1306 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1307 		return -EINVAL;
1308 	err = qdisc_change(q, tca);
1309 	if (err == 0)
1310 		qdisc_notify(net, skb, n, clid, NULL, q);
1311 	return err;
1312 
1313 create_n_graft:
1314 	if (!(n->nlmsg_flags & NLM_F_CREATE))
1315 		return -ENOENT;
1316 	if (clid == TC_H_INGRESS) {
1317 		if (dev_ingress_queue(dev))
1318 			q = qdisc_create(dev, dev_ingress_queue(dev), p,
1319 					 tcm->tcm_parent, tcm->tcm_parent,
1320 					 tca, &err);
1321 		else
1322 			err = -ENOENT;
1323 	} else {
1324 		struct netdev_queue *dev_queue;
1325 
1326 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1327 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1328 		else if (p)
1329 			dev_queue = p->dev_queue;
1330 		else
1331 			dev_queue = netdev_get_tx_queue(dev, 0);
1332 
1333 		q = qdisc_create(dev, dev_queue, p,
1334 				 tcm->tcm_parent, tcm->tcm_handle,
1335 				 tca, &err);
1336 	}
1337 	if (q == NULL) {
1338 		if (err == -EAGAIN)
1339 			goto replay;
1340 		return err;
1341 	}
1342 
1343 graft:
1344 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1345 	if (err) {
1346 		if (q)
1347 			qdisc_destroy(q);
1348 		return err;
1349 	}
1350 
1351 	return 0;
1352 }
1353 
1354 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1355 			 u32 portid, u32 seq, u16 flags, int event)
1356 {
1357 	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
1358 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
1359 	struct tcmsg *tcm;
1360 	struct nlmsghdr  *nlh;
1361 	unsigned char *b = skb_tail_pointer(skb);
1362 	struct gnet_dump d;
1363 	struct qdisc_size_table *stab;
1364 	__u32 qlen;
1365 
1366 	cond_resched();
1367 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1368 	if (!nlh)
1369 		goto out_nlmsg_trim;
1370 	tcm = nlmsg_data(nlh);
1371 	tcm->tcm_family = AF_UNSPEC;
1372 	tcm->tcm__pad1 = 0;
1373 	tcm->tcm__pad2 = 0;
1374 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1375 	tcm->tcm_parent = clid;
1376 	tcm->tcm_handle = q->handle;
1377 	tcm->tcm_info = refcount_read(&q->refcnt);
1378 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1379 		goto nla_put_failure;
1380 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
1381 		goto nla_put_failure;
1382 	qlen = q->q.qlen;
1383 
1384 	stab = rtnl_dereference(q->stab);
1385 	if (stab && qdisc_dump_stab(skb, stab) < 0)
1386 		goto nla_put_failure;
1387 
1388 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1389 					 NULL, &d, TCA_PAD) < 0)
1390 		goto nla_put_failure;
1391 
1392 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1393 		goto nla_put_failure;
1394 
1395 	if (qdisc_is_percpu_stats(q)) {
1396 		cpu_bstats = q->cpu_bstats;
1397 		cpu_qstats = q->cpu_qstats;
1398 	}
1399 
1400 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
1401 				  &d, cpu_bstats, &q->bstats) < 0 ||
1402 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1403 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
1404 		goto nla_put_failure;
1405 
1406 	if (gnet_stats_finish_copy(&d) < 0)
1407 		goto nla_put_failure;
1408 
1409 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1410 	return skb->len;
1411 
1412 out_nlmsg_trim:
1413 nla_put_failure:
1414 	nlmsg_trim(skb, b);
1415 	return -1;
1416 }
1417 
1418 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
1419 {
1420 	if (q->flags & TCQ_F_BUILTIN)
1421 		return true;
1422 	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
1423 		return true;
1424 
1425 	return false;
1426 }
1427 
1428 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1429 			struct nlmsghdr *n, u32 clid,
1430 			struct Qdisc *old, struct Qdisc *new)
1431 {
1432 	struct sk_buff *skb;
1433 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1434 
1435 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1436 	if (!skb)
1437 		return -ENOBUFS;
1438 
1439 	if (old && !tc_qdisc_dump_ignore(old, false)) {
1440 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1441 				  0, RTM_DELQDISC) < 0)
1442 			goto err_out;
1443 	}
1444 	if (new && !tc_qdisc_dump_ignore(new, false)) {
1445 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1446 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1447 			goto err_out;
1448 	}
1449 
1450 	if (skb->len)
1451 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1452 				      n->nlmsg_flags & NLM_F_ECHO);
1453 
1454 err_out:
1455 	kfree_skb(skb);
1456 	return -EINVAL;
1457 }
1458 
1459 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1460 			      struct netlink_callback *cb,
1461 			      int *q_idx_p, int s_q_idx, bool recur,
1462 			      bool dump_invisible)
1463 {
1464 	int ret = 0, q_idx = *q_idx_p;
1465 	struct Qdisc *q;
1466 	int b;
1467 
1468 	if (!root)
1469 		return 0;
1470 
1471 	q = root;
1472 	if (q_idx < s_q_idx) {
1473 		q_idx++;
1474 	} else {
1475 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1476 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1477 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1478 				  RTM_NEWQDISC) <= 0)
1479 			goto done;
1480 		q_idx++;
1481 	}
1482 
1483 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1484 	 * itself has already been dumped.
1485 	 *
1486 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1487 	 * qdisc hashtable, we don't want to hit it again
1488 	 */
1489 	if (!qdisc_dev(root) || !recur)
1490 		goto out;
1491 
1492 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1493 		if (q_idx < s_q_idx) {
1494 			q_idx++;
1495 			continue;
1496 		}
1497 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1498 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1499 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1500 				  RTM_NEWQDISC) <= 0)
1501 			goto done;
1502 		q_idx++;
1503 	}
1504 
1505 out:
1506 	*q_idx_p = q_idx;
1507 	return ret;
1508 done:
1509 	ret = -1;
1510 	goto out;
1511 }
1512 
1513 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1514 {
1515 	struct net *net = sock_net(skb->sk);
1516 	int idx, q_idx;
1517 	int s_idx, s_q_idx;
1518 	struct net_device *dev;
1519 	const struct nlmsghdr *nlh = cb->nlh;
1520 	struct tcmsg *tcm = nlmsg_data(nlh);
1521 	struct nlattr *tca[TCA_MAX + 1];
1522 	int err;
1523 
1524 	s_idx = cb->args[0];
1525 	s_q_idx = q_idx = cb->args[1];
1526 
1527 	idx = 0;
1528 	ASSERT_RTNL();
1529 
1530 	err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
1531 	if (err < 0)
1532 		return err;
1533 
1534 	for_each_netdev(net, dev) {
1535 		struct netdev_queue *dev_queue;
1536 
1537 		if (idx < s_idx)
1538 			goto cont;
1539 		if (idx > s_idx)
1540 			s_q_idx = 0;
1541 		q_idx = 0;
1542 
1543 		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1544 				       true, tca[TCA_DUMP_INVISIBLE]) < 0)
1545 			goto done;
1546 
1547 		dev_queue = dev_ingress_queue(dev);
1548 		if (dev_queue &&
1549 		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1550 				       &q_idx, s_q_idx, false,
1551 				       tca[TCA_DUMP_INVISIBLE]) < 0)
1552 			goto done;
1553 
1554 cont:
1555 		idx++;
1556 	}
1557 
1558 done:
1559 	cb->args[0] = idx;
1560 	cb->args[1] = q_idx;
1561 
1562 	return skb->len;
1563 }
1564 
1565 
1566 
1567 /************************************************
1568  *	Traffic classes manipulation.		*
1569  ************************************************/
1570 
1571 
1572 
1573 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1574 			 struct netlink_ext_ack *extack)
1575 {
1576 	struct net *net = sock_net(skb->sk);
1577 	struct tcmsg *tcm = nlmsg_data(n);
1578 	struct nlattr *tca[TCA_MAX + 1];
1579 	struct net_device *dev;
1580 	struct Qdisc *q = NULL;
1581 	const struct Qdisc_class_ops *cops;
1582 	unsigned long cl = 0;
1583 	unsigned long new_cl;
1584 	u32 portid;
1585 	u32 clid;
1586 	u32 qid;
1587 	int err;
1588 
1589 	if ((n->nlmsg_type != RTM_GETTCLASS) &&
1590 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1591 		return -EPERM;
1592 
1593 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1594 	if (err < 0)
1595 		return err;
1596 
1597 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1598 	if (!dev)
1599 		return -ENODEV;
1600 
1601 	/*
1602 	   parent == TC_H_UNSPEC - unspecified parent.
1603 	   parent == TC_H_ROOT   - class is root, which has no parent.
1604 	   parent == X:0	 - parent is root class.
1605 	   parent == X:Y	 - parent is a node in hierarchy.
1606 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
1607 
1608 	   handle == 0:0	 - generate handle from kernel pool.
1609 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
1610 	   handle == X:Y	 - clear.
1611 	   handle == X:0	 - root class.
1612 	 */
1613 
1614 	/* Step 1. Determine qdisc handle X:0 */
1615 
1616 	portid = tcm->tcm_parent;
1617 	clid = tcm->tcm_handle;
1618 	qid = TC_H_MAJ(clid);
1619 
1620 	if (portid != TC_H_ROOT) {
1621 		u32 qid1 = TC_H_MAJ(portid);
1622 
1623 		if (qid && qid1) {
1624 			/* If both majors are known, they must be identical. */
1625 			if (qid != qid1)
1626 				return -EINVAL;
1627 		} else if (qid1) {
1628 			qid = qid1;
1629 		} else if (qid == 0)
1630 			qid = dev->qdisc->handle;
1631 
1632 		/* Now qid is genuine qdisc handle consistent
1633 		 * both with parent and child.
1634 		 *
1635 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1636 		 */
1637 		if (portid)
1638 			portid = TC_H_MAKE(qid, portid);
1639 	} else {
1640 		if (qid == 0)
1641 			qid = dev->qdisc->handle;
1642 	}
1643 
1644 	/* OK. Locate qdisc */
1645 	q = qdisc_lookup(dev, qid);
1646 	if (!q)
1647 		return -ENOENT;
1648 
1649 	/* An check that it supports classes */
1650 	cops = q->ops->cl_ops;
1651 	if (cops == NULL)
1652 		return -EINVAL;
1653 
1654 	/* Now try to get class */
1655 	if (clid == 0) {
1656 		if (portid == TC_H_ROOT)
1657 			clid = qid;
1658 	} else
1659 		clid = TC_H_MAKE(qid, clid);
1660 
1661 	if (clid)
1662 		cl = cops->get(q, clid);
1663 
1664 	if (cl == 0) {
1665 		err = -ENOENT;
1666 		if (n->nlmsg_type != RTM_NEWTCLASS ||
1667 		    !(n->nlmsg_flags & NLM_F_CREATE))
1668 			goto out;
1669 	} else {
1670 		switch (n->nlmsg_type) {
1671 		case RTM_NEWTCLASS:
1672 			err = -EEXIST;
1673 			if (n->nlmsg_flags & NLM_F_EXCL)
1674 				goto out;
1675 			break;
1676 		case RTM_DELTCLASS:
1677 			err = -EOPNOTSUPP;
1678 			if (cops->delete)
1679 				err = cops->delete(q, cl);
1680 			if (err == 0)
1681 				tclass_notify(net, skb, n, q, cl,
1682 					      RTM_DELTCLASS);
1683 			goto out;
1684 		case RTM_GETTCLASS:
1685 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1686 			goto out;
1687 		default:
1688 			err = -EINVAL;
1689 			goto out;
1690 		}
1691 	}
1692 
1693 	new_cl = cl;
1694 	err = -EOPNOTSUPP;
1695 	if (cops->change)
1696 		err = cops->change(q, clid, portid, tca, &new_cl);
1697 	if (err == 0)
1698 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1699 
1700 out:
1701 	if (cl)
1702 		cops->put(q, cl);
1703 
1704 	return err;
1705 }
1706 
1707 
1708 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1709 			  unsigned long cl,
1710 			  u32 portid, u32 seq, u16 flags, int event)
1711 {
1712 	struct tcmsg *tcm;
1713 	struct nlmsghdr  *nlh;
1714 	unsigned char *b = skb_tail_pointer(skb);
1715 	struct gnet_dump d;
1716 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1717 
1718 	cond_resched();
1719 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1720 	if (!nlh)
1721 		goto out_nlmsg_trim;
1722 	tcm = nlmsg_data(nlh);
1723 	tcm->tcm_family = AF_UNSPEC;
1724 	tcm->tcm__pad1 = 0;
1725 	tcm->tcm__pad2 = 0;
1726 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1727 	tcm->tcm_parent = q->handle;
1728 	tcm->tcm_handle = q->handle;
1729 	tcm->tcm_info = 0;
1730 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1731 		goto nla_put_failure;
1732 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1733 		goto nla_put_failure;
1734 
1735 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1736 					 NULL, &d, TCA_PAD) < 0)
1737 		goto nla_put_failure;
1738 
1739 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1740 		goto nla_put_failure;
1741 
1742 	if (gnet_stats_finish_copy(&d) < 0)
1743 		goto nla_put_failure;
1744 
1745 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1746 	return skb->len;
1747 
1748 out_nlmsg_trim:
1749 nla_put_failure:
1750 	nlmsg_trim(skb, b);
1751 	return -1;
1752 }
1753 
1754 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1755 			 struct nlmsghdr *n, struct Qdisc *q,
1756 			 unsigned long cl, int event)
1757 {
1758 	struct sk_buff *skb;
1759 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1760 
1761 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1762 	if (!skb)
1763 		return -ENOBUFS;
1764 
1765 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1766 		kfree_skb(skb);
1767 		return -EINVAL;
1768 	}
1769 
1770 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1771 			      n->nlmsg_flags & NLM_F_ECHO);
1772 }
1773 
1774 struct qdisc_dump_args {
1775 	struct qdisc_walker	w;
1776 	struct sk_buff		*skb;
1777 	struct netlink_callback	*cb;
1778 };
1779 
1780 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1781 			    struct qdisc_walker *arg)
1782 {
1783 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1784 
1785 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1786 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1787 			      RTM_NEWTCLASS);
1788 }
1789 
1790 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1791 				struct tcmsg *tcm, struct netlink_callback *cb,
1792 				int *t_p, int s_t)
1793 {
1794 	struct qdisc_dump_args arg;
1795 
1796 	if (tc_qdisc_dump_ignore(q, false) ||
1797 	    *t_p < s_t || !q->ops->cl_ops ||
1798 	    (tcm->tcm_parent &&
1799 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1800 		(*t_p)++;
1801 		return 0;
1802 	}
1803 	if (*t_p > s_t)
1804 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1805 	arg.w.fn = qdisc_class_dump;
1806 	arg.skb = skb;
1807 	arg.cb = cb;
1808 	arg.w.stop  = 0;
1809 	arg.w.skip = cb->args[1];
1810 	arg.w.count = 0;
1811 	q->ops->cl_ops->walk(q, &arg.w);
1812 	cb->args[1] = arg.w.count;
1813 	if (arg.w.stop)
1814 		return -1;
1815 	(*t_p)++;
1816 	return 0;
1817 }
1818 
1819 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1820 			       struct tcmsg *tcm, struct netlink_callback *cb,
1821 			       int *t_p, int s_t)
1822 {
1823 	struct Qdisc *q;
1824 	int b;
1825 
1826 	if (!root)
1827 		return 0;
1828 
1829 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1830 		return -1;
1831 
1832 	if (!qdisc_dev(root))
1833 		return 0;
1834 
1835 	if (tcm->tcm_parent) {
1836 		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
1837 		if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1838 			return -1;
1839 		return 0;
1840 	}
1841 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1842 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1843 			return -1;
1844 	}
1845 
1846 	return 0;
1847 }
1848 
1849 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1850 {
1851 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
1852 	struct net *net = sock_net(skb->sk);
1853 	struct netdev_queue *dev_queue;
1854 	struct net_device *dev;
1855 	int t, s_t;
1856 
1857 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1858 		return 0;
1859 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
1860 	if (!dev)
1861 		return 0;
1862 
1863 	s_t = cb->args[0];
1864 	t = 0;
1865 
1866 	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1867 		goto done;
1868 
1869 	dev_queue = dev_ingress_queue(dev);
1870 	if (dev_queue &&
1871 	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1872 				&t, s_t) < 0)
1873 		goto done;
1874 
1875 done:
1876 	cb->args[0] = t;
1877 
1878 	dev_put(dev);
1879 	return skb->len;
1880 }
1881 
1882 #ifdef CONFIG_PROC_FS
1883 static int psched_show(struct seq_file *seq, void *v)
1884 {
1885 	seq_printf(seq, "%08x %08x %08x %08x\n",
1886 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1887 		   1000000,
1888 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
1889 
1890 	return 0;
1891 }
1892 
1893 static int psched_open(struct inode *inode, struct file *file)
1894 {
1895 	return single_open(file, psched_show, NULL);
1896 }
1897 
1898 static const struct file_operations psched_fops = {
1899 	.owner = THIS_MODULE,
1900 	.open = psched_open,
1901 	.read  = seq_read,
1902 	.llseek = seq_lseek,
1903 	.release = single_release,
1904 };
1905 
1906 static int __net_init psched_net_init(struct net *net)
1907 {
1908 	struct proc_dir_entry *e;
1909 
1910 	e = proc_create("psched", 0, net->proc_net, &psched_fops);
1911 	if (e == NULL)
1912 		return -ENOMEM;
1913 
1914 	return 0;
1915 }
1916 
1917 static void __net_exit psched_net_exit(struct net *net)
1918 {
1919 	remove_proc_entry("psched", net->proc_net);
1920 }
1921 #else
1922 static int __net_init psched_net_init(struct net *net)
1923 {
1924 	return 0;
1925 }
1926 
1927 static void __net_exit psched_net_exit(struct net *net)
1928 {
1929 }
1930 #endif
1931 
1932 static struct pernet_operations psched_net_ops = {
1933 	.init = psched_net_init,
1934 	.exit = psched_net_exit,
1935 };
1936 
1937 static int __init pktsched_init(void)
1938 {
1939 	int err;
1940 
1941 	err = register_pernet_subsys(&psched_net_ops);
1942 	if (err) {
1943 		pr_err("pktsched_init: "
1944 		       "cannot initialize per netns operations\n");
1945 		return err;
1946 	}
1947 
1948 	register_qdisc(&pfifo_fast_ops);
1949 	register_qdisc(&pfifo_qdisc_ops);
1950 	register_qdisc(&bfifo_qdisc_ops);
1951 	register_qdisc(&pfifo_head_drop_qdisc_ops);
1952 	register_qdisc(&mq_qdisc_ops);
1953 	register_qdisc(&noqueue_qdisc_ops);
1954 
1955 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1956 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1957 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
1958 		      NULL);
1959 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
1960 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
1961 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
1962 		      NULL);
1963 
1964 	return 0;
1965 }
1966 
1967 subsys_initcall(pktsched_init);
1968