xref: /openbmc/linux/net/sched/sch_api.c (revision 174cd4b1)
1 /*
2  * net/sched/sch_api.c	Packet scheduler API.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17 
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
33 
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 
39 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
40 			struct nlmsghdr *n, u32 clid,
41 			struct Qdisc *old, struct Qdisc *new);
42 static int tclass_notify(struct net *net, struct sk_buff *oskb,
43 			 struct nlmsghdr *n, struct Qdisc *q,
44 			 unsigned long cl, int event);
45 
46 /*
47 
48    Short review.
49    -------------
50 
51    This file consists of two interrelated parts:
52 
53    1. queueing disciplines manager frontend.
54    2. traffic classes manager frontend.
55 
56    Generally, queueing discipline ("qdisc") is a black box,
57    which is able to enqueue packets and to dequeue them (when
58    device is ready to send something) in order and at times
59    determined by algorithm hidden in it.
60 
61    qdisc's are divided to two categories:
62    - "queues", which have no internal structure visible from outside.
63    - "schedulers", which split all the packets to "traffic classes",
64      using "packet classifiers" (look at cls_api.c)
65 
66    In turn, classes may have child qdiscs (as rule, queues)
67    attached to them etc. etc. etc.
68 
69    The goal of the routines in this file is to translate
70    information supplied by user in the form of handles
71    to more intelligible for kernel form, to make some sanity
72    checks and part of work, which is common to all qdiscs
73    and to provide rtnetlink notifications.
74 
75    All real intelligent work is done inside qdisc modules.
76 
77 
78 
79    Every discipline has two major routines: enqueue and dequeue.
80 
81    ---dequeue
82 
83    dequeue usually returns a skb to send. It is allowed to return NULL,
84    but it does not mean that queue is empty, it just means that
85    discipline does not want to send anything this time.
86    Queue is really empty if q->q.qlen == 0.
87    For complicated disciplines with multiple queues q->q is not
88    real packet queue, but however q->q.qlen must be valid.
89 
90    ---enqueue
91 
92    enqueue returns 0, if packet was enqueued successfully.
93    If packet (this one or another one) was dropped, it returns
94    not zero error code.
95    NET_XMIT_DROP 	- this packet dropped
96      Expected action: do not backoff, but wait until queue will clear.
97    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
98      Expected action: backoff or ignore
99 
100    Auxiliary routines:
101 
102    ---peek
103 
104    like dequeue but without removing a packet from the queue
105 
106    ---reset
107 
108    returns qdisc to initial state: purge all buffers, clear all
109    timers, counters (except for statistics) etc.
110 
111    ---init
112 
113    initializes newly created qdisc.
114 
115    ---destroy
116 
117    destroys resources allocated by init and during lifetime of qdisc.
118 
119    ---change
120 
121    changes qdisc parameters.
122  */
123 
124 /* Protects list of registered TC modules. It is pure SMP lock. */
125 static DEFINE_RWLOCK(qdisc_mod_lock);
126 
127 
128 /************************************************
129  *	Queueing disciplines manipulation.	*
130  ************************************************/
131 
132 
133 /* The list of all installed queueing disciplines. */
134 
135 static struct Qdisc_ops *qdisc_base;
136 
137 /* Register/unregister queueing discipline */
138 
139 int register_qdisc(struct Qdisc_ops *qops)
140 {
141 	struct Qdisc_ops *q, **qp;
142 	int rc = -EEXIST;
143 
144 	write_lock(&qdisc_mod_lock);
145 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
146 		if (!strcmp(qops->id, q->id))
147 			goto out;
148 
149 	if (qops->enqueue == NULL)
150 		qops->enqueue = noop_qdisc_ops.enqueue;
151 	if (qops->peek == NULL) {
152 		if (qops->dequeue == NULL)
153 			qops->peek = noop_qdisc_ops.peek;
154 		else
155 			goto out_einval;
156 	}
157 	if (qops->dequeue == NULL)
158 		qops->dequeue = noop_qdisc_ops.dequeue;
159 
160 	if (qops->cl_ops) {
161 		const struct Qdisc_class_ops *cops = qops->cl_ops;
162 
163 		if (!(cops->get && cops->put && cops->walk && cops->leaf))
164 			goto out_einval;
165 
166 		if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
167 			goto out_einval;
168 	}
169 
170 	qops->next = NULL;
171 	*qp = qops;
172 	rc = 0;
173 out:
174 	write_unlock(&qdisc_mod_lock);
175 	return rc;
176 
177 out_einval:
178 	rc = -EINVAL;
179 	goto out;
180 }
181 EXPORT_SYMBOL(register_qdisc);
182 
183 int unregister_qdisc(struct Qdisc_ops *qops)
184 {
185 	struct Qdisc_ops *q, **qp;
186 	int err = -ENOENT;
187 
188 	write_lock(&qdisc_mod_lock);
189 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
190 		if (q == qops)
191 			break;
192 	if (q) {
193 		*qp = q->next;
194 		q->next = NULL;
195 		err = 0;
196 	}
197 	write_unlock(&qdisc_mod_lock);
198 	return err;
199 }
200 EXPORT_SYMBOL(unregister_qdisc);
201 
202 /* Get default qdisc if not otherwise specified */
203 void qdisc_get_default(char *name, size_t len)
204 {
205 	read_lock(&qdisc_mod_lock);
206 	strlcpy(name, default_qdisc_ops->id, len);
207 	read_unlock(&qdisc_mod_lock);
208 }
209 
210 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
211 {
212 	struct Qdisc_ops *q = NULL;
213 
214 	for (q = qdisc_base; q; q = q->next) {
215 		if (!strcmp(name, q->id)) {
216 			if (!try_module_get(q->owner))
217 				q = NULL;
218 			break;
219 		}
220 	}
221 
222 	return q;
223 }
224 
225 /* Set new default qdisc to use */
226 int qdisc_set_default(const char *name)
227 {
228 	const struct Qdisc_ops *ops;
229 
230 	if (!capable(CAP_NET_ADMIN))
231 		return -EPERM;
232 
233 	write_lock(&qdisc_mod_lock);
234 	ops = qdisc_lookup_default(name);
235 	if (!ops) {
236 		/* Not found, drop lock and try to load module */
237 		write_unlock(&qdisc_mod_lock);
238 		request_module("sch_%s", name);
239 		write_lock(&qdisc_mod_lock);
240 
241 		ops = qdisc_lookup_default(name);
242 	}
243 
244 	if (ops) {
245 		/* Set new default */
246 		module_put(default_qdisc_ops->owner);
247 		default_qdisc_ops = ops;
248 	}
249 	write_unlock(&qdisc_mod_lock);
250 
251 	return ops ? 0 : -ENOENT;
252 }
253 
254 /* We know handle. Find qdisc among all qdisc's attached to device
255  * (root qdisc, all its children, children of children etc.)
256  * Note: caller either uses rtnl or rcu_read_lock()
257  */
258 
259 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
260 {
261 	struct Qdisc *q;
262 
263 	if (!qdisc_dev(root))
264 		return (root->handle == handle ? root : NULL);
265 
266 	if (!(root->flags & TCQ_F_BUILTIN) &&
267 	    root->handle == handle)
268 		return root;
269 
270 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
271 		if (q->handle == handle)
272 			return q;
273 	}
274 	return NULL;
275 }
276 
277 void qdisc_hash_add(struct Qdisc *q)
278 {
279 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
280 		struct Qdisc *root = qdisc_dev(q)->qdisc;
281 
282 		WARN_ON_ONCE(root == &noop_qdisc);
283 		ASSERT_RTNL();
284 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
285 	}
286 }
287 EXPORT_SYMBOL(qdisc_hash_add);
288 
289 void qdisc_hash_del(struct Qdisc *q)
290 {
291 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
292 		ASSERT_RTNL();
293 		hash_del_rcu(&q->hash);
294 	}
295 }
296 EXPORT_SYMBOL(qdisc_hash_del);
297 
298 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
299 {
300 	struct Qdisc *q;
301 
302 	q = qdisc_match_from_root(dev->qdisc, handle);
303 	if (q)
304 		goto out;
305 
306 	if (dev_ingress_queue(dev))
307 		q = qdisc_match_from_root(
308 			dev_ingress_queue(dev)->qdisc_sleeping,
309 			handle);
310 out:
311 	return q;
312 }
313 
314 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
315 {
316 	unsigned long cl;
317 	struct Qdisc *leaf;
318 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
319 
320 	if (cops == NULL)
321 		return NULL;
322 	cl = cops->get(p, classid);
323 
324 	if (cl == 0)
325 		return NULL;
326 	leaf = cops->leaf(p, cl);
327 	cops->put(p, cl);
328 	return leaf;
329 }
330 
331 /* Find queueing discipline by name */
332 
333 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
334 {
335 	struct Qdisc_ops *q = NULL;
336 
337 	if (kind) {
338 		read_lock(&qdisc_mod_lock);
339 		for (q = qdisc_base; q; q = q->next) {
340 			if (nla_strcmp(kind, q->id) == 0) {
341 				if (!try_module_get(q->owner))
342 					q = NULL;
343 				break;
344 			}
345 		}
346 		read_unlock(&qdisc_mod_lock);
347 	}
348 	return q;
349 }
350 
351 /* The linklayer setting were not transferred from iproute2, in older
352  * versions, and the rate tables lookup systems have been dropped in
353  * the kernel. To keep backward compatible with older iproute2 tc
354  * utils, we detect the linklayer setting by detecting if the rate
355  * table were modified.
356  *
357  * For linklayer ATM table entries, the rate table will be aligned to
358  * 48 bytes, thus some table entries will contain the same value.  The
359  * mpu (min packet unit) is also encoded into the old rate table, thus
360  * starting from the mpu, we find low and high table entries for
361  * mapping this cell.  If these entries contain the same value, when
362  * the rate tables have been modified for linklayer ATM.
363  *
364  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
365  * and then roundup to the next cell, calc the table entry one below,
366  * and compare.
367  */
368 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
369 {
370 	int low       = roundup(r->mpu, 48);
371 	int high      = roundup(low+1, 48);
372 	int cell_low  = low >> r->cell_log;
373 	int cell_high = (high >> r->cell_log) - 1;
374 
375 	/* rtab is too inaccurate at rates > 100Mbit/s */
376 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
377 		pr_debug("TC linklayer: Giving up ATM detection\n");
378 		return TC_LINKLAYER_ETHERNET;
379 	}
380 
381 	if ((cell_high > cell_low) && (cell_high < 256)
382 	    && (rtab[cell_low] == rtab[cell_high])) {
383 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
384 			 cell_low, cell_high, rtab[cell_high]);
385 		return TC_LINKLAYER_ATM;
386 	}
387 	return TC_LINKLAYER_ETHERNET;
388 }
389 
390 static struct qdisc_rate_table *qdisc_rtab_list;
391 
392 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
393 					struct nlattr *tab)
394 {
395 	struct qdisc_rate_table *rtab;
396 
397 	if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
398 	    nla_len(tab) != TC_RTAB_SIZE)
399 		return NULL;
400 
401 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
402 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
403 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
404 			rtab->refcnt++;
405 			return rtab;
406 		}
407 	}
408 
409 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
410 	if (rtab) {
411 		rtab->rate = *r;
412 		rtab->refcnt = 1;
413 		memcpy(rtab->data, nla_data(tab), 1024);
414 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
415 			r->linklayer = __detect_linklayer(r, rtab->data);
416 		rtab->next = qdisc_rtab_list;
417 		qdisc_rtab_list = rtab;
418 	}
419 	return rtab;
420 }
421 EXPORT_SYMBOL(qdisc_get_rtab);
422 
423 void qdisc_put_rtab(struct qdisc_rate_table *tab)
424 {
425 	struct qdisc_rate_table *rtab, **rtabp;
426 
427 	if (!tab || --tab->refcnt)
428 		return;
429 
430 	for (rtabp = &qdisc_rtab_list;
431 	     (rtab = *rtabp) != NULL;
432 	     rtabp = &rtab->next) {
433 		if (rtab == tab) {
434 			*rtabp = rtab->next;
435 			kfree(rtab);
436 			return;
437 		}
438 	}
439 }
440 EXPORT_SYMBOL(qdisc_put_rtab);
441 
442 static LIST_HEAD(qdisc_stab_list);
443 
444 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
445 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
446 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
447 };
448 
449 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
450 {
451 	struct nlattr *tb[TCA_STAB_MAX + 1];
452 	struct qdisc_size_table *stab;
453 	struct tc_sizespec *s;
454 	unsigned int tsize = 0;
455 	u16 *tab = NULL;
456 	int err;
457 
458 	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
459 	if (err < 0)
460 		return ERR_PTR(err);
461 	if (!tb[TCA_STAB_BASE])
462 		return ERR_PTR(-EINVAL);
463 
464 	s = nla_data(tb[TCA_STAB_BASE]);
465 
466 	if (s->tsize > 0) {
467 		if (!tb[TCA_STAB_DATA])
468 			return ERR_PTR(-EINVAL);
469 		tab = nla_data(tb[TCA_STAB_DATA]);
470 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
471 	}
472 
473 	if (tsize != s->tsize || (!tab && tsize > 0))
474 		return ERR_PTR(-EINVAL);
475 
476 	list_for_each_entry(stab, &qdisc_stab_list, list) {
477 		if (memcmp(&stab->szopts, s, sizeof(*s)))
478 			continue;
479 		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
480 			continue;
481 		stab->refcnt++;
482 		return stab;
483 	}
484 
485 	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
486 	if (!stab)
487 		return ERR_PTR(-ENOMEM);
488 
489 	stab->refcnt = 1;
490 	stab->szopts = *s;
491 	if (tsize > 0)
492 		memcpy(stab->data, tab, tsize * sizeof(u16));
493 
494 	list_add_tail(&stab->list, &qdisc_stab_list);
495 
496 	return stab;
497 }
498 
499 static void stab_kfree_rcu(struct rcu_head *head)
500 {
501 	kfree(container_of(head, struct qdisc_size_table, rcu));
502 }
503 
504 void qdisc_put_stab(struct qdisc_size_table *tab)
505 {
506 	if (!tab)
507 		return;
508 
509 	if (--tab->refcnt == 0) {
510 		list_del(&tab->list);
511 		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
512 	}
513 }
514 EXPORT_SYMBOL(qdisc_put_stab);
515 
516 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
517 {
518 	struct nlattr *nest;
519 
520 	nest = nla_nest_start(skb, TCA_STAB);
521 	if (nest == NULL)
522 		goto nla_put_failure;
523 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
524 		goto nla_put_failure;
525 	nla_nest_end(skb, nest);
526 
527 	return skb->len;
528 
529 nla_put_failure:
530 	return -1;
531 }
532 
533 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
534 			       const struct qdisc_size_table *stab)
535 {
536 	int pkt_len, slot;
537 
538 	pkt_len = skb->len + stab->szopts.overhead;
539 	if (unlikely(!stab->szopts.tsize))
540 		goto out;
541 
542 	slot = pkt_len + stab->szopts.cell_align;
543 	if (unlikely(slot < 0))
544 		slot = 0;
545 
546 	slot >>= stab->szopts.cell_log;
547 	if (likely(slot < stab->szopts.tsize))
548 		pkt_len = stab->data[slot];
549 	else
550 		pkt_len = stab->data[stab->szopts.tsize - 1] *
551 				(slot / stab->szopts.tsize) +
552 				stab->data[slot % stab->szopts.tsize];
553 
554 	pkt_len <<= stab->szopts.size_log;
555 out:
556 	if (unlikely(pkt_len < 1))
557 		pkt_len = 1;
558 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
559 }
560 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
561 
562 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
563 {
564 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
565 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
566 			txt, qdisc->ops->id, qdisc->handle >> 16);
567 		qdisc->flags |= TCQ_F_WARN_NONWC;
568 	}
569 }
570 EXPORT_SYMBOL(qdisc_warn_nonwc);
571 
572 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
573 {
574 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
575 						 timer);
576 
577 	rcu_read_lock();
578 	__netif_schedule(qdisc_root(wd->qdisc));
579 	rcu_read_unlock();
580 
581 	return HRTIMER_NORESTART;
582 }
583 
584 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
585 {
586 	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
587 	wd->timer.function = qdisc_watchdog;
588 	wd->qdisc = qdisc;
589 }
590 EXPORT_SYMBOL(qdisc_watchdog_init);
591 
592 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
593 {
594 	if (test_bit(__QDISC_STATE_DEACTIVATED,
595 		     &qdisc_root_sleeping(wd->qdisc)->state))
596 		return;
597 
598 	if (wd->last_expires == expires)
599 		return;
600 
601 	wd->last_expires = expires;
602 	hrtimer_start(&wd->timer,
603 		      ns_to_ktime(expires),
604 		      HRTIMER_MODE_ABS_PINNED);
605 }
606 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
607 
608 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
609 {
610 	hrtimer_cancel(&wd->timer);
611 }
612 EXPORT_SYMBOL(qdisc_watchdog_cancel);
613 
614 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
615 {
616 	unsigned int size = n * sizeof(struct hlist_head), i;
617 	struct hlist_head *h;
618 
619 	if (size <= PAGE_SIZE)
620 		h = kmalloc(size, GFP_KERNEL);
621 	else
622 		h = (struct hlist_head *)
623 			__get_free_pages(GFP_KERNEL, get_order(size));
624 
625 	if (h != NULL) {
626 		for (i = 0; i < n; i++)
627 			INIT_HLIST_HEAD(&h[i]);
628 	}
629 	return h;
630 }
631 
632 static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
633 {
634 	unsigned int size = n * sizeof(struct hlist_head);
635 
636 	if (size <= PAGE_SIZE)
637 		kfree(h);
638 	else
639 		free_pages((unsigned long)h, get_order(size));
640 }
641 
642 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
643 {
644 	struct Qdisc_class_common *cl;
645 	struct hlist_node *next;
646 	struct hlist_head *nhash, *ohash;
647 	unsigned int nsize, nmask, osize;
648 	unsigned int i, h;
649 
650 	/* Rehash when load factor exceeds 0.75 */
651 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
652 		return;
653 	nsize = clhash->hashsize * 2;
654 	nmask = nsize - 1;
655 	nhash = qdisc_class_hash_alloc(nsize);
656 	if (nhash == NULL)
657 		return;
658 
659 	ohash = clhash->hash;
660 	osize = clhash->hashsize;
661 
662 	sch_tree_lock(sch);
663 	for (i = 0; i < osize; i++) {
664 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
665 			h = qdisc_class_hash(cl->classid, nmask);
666 			hlist_add_head(&cl->hnode, &nhash[h]);
667 		}
668 	}
669 	clhash->hash     = nhash;
670 	clhash->hashsize = nsize;
671 	clhash->hashmask = nmask;
672 	sch_tree_unlock(sch);
673 
674 	qdisc_class_hash_free(ohash, osize);
675 }
676 EXPORT_SYMBOL(qdisc_class_hash_grow);
677 
678 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
679 {
680 	unsigned int size = 4;
681 
682 	clhash->hash = qdisc_class_hash_alloc(size);
683 	if (clhash->hash == NULL)
684 		return -ENOMEM;
685 	clhash->hashsize  = size;
686 	clhash->hashmask  = size - 1;
687 	clhash->hashelems = 0;
688 	return 0;
689 }
690 EXPORT_SYMBOL(qdisc_class_hash_init);
691 
692 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
693 {
694 	qdisc_class_hash_free(clhash->hash, clhash->hashsize);
695 }
696 EXPORT_SYMBOL(qdisc_class_hash_destroy);
697 
698 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
699 			     struct Qdisc_class_common *cl)
700 {
701 	unsigned int h;
702 
703 	INIT_HLIST_NODE(&cl->hnode);
704 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
705 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
706 	clhash->hashelems++;
707 }
708 EXPORT_SYMBOL(qdisc_class_hash_insert);
709 
710 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
711 			     struct Qdisc_class_common *cl)
712 {
713 	hlist_del(&cl->hnode);
714 	clhash->hashelems--;
715 }
716 EXPORT_SYMBOL(qdisc_class_hash_remove);
717 
718 /* Allocate an unique handle from space managed by kernel
719  * Possible range is [8000-FFFF]:0000 (0x8000 values)
720  */
721 static u32 qdisc_alloc_handle(struct net_device *dev)
722 {
723 	int i = 0x8000;
724 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
725 
726 	do {
727 		autohandle += TC_H_MAKE(0x10000U, 0);
728 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
729 			autohandle = TC_H_MAKE(0x80000000U, 0);
730 		if (!qdisc_lookup(dev, autohandle))
731 			return autohandle;
732 		cond_resched();
733 	} while	(--i > 0);
734 
735 	return 0;
736 }
737 
738 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
739 			       unsigned int len)
740 {
741 	const struct Qdisc_class_ops *cops;
742 	unsigned long cl;
743 	u32 parentid;
744 	int drops;
745 
746 	if (n == 0 && len == 0)
747 		return;
748 	drops = max_t(int, n, 0);
749 	rcu_read_lock();
750 	while ((parentid = sch->parent)) {
751 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
752 			break;
753 
754 		if (sch->flags & TCQ_F_NOPARENT)
755 			break;
756 		/* TODO: perform the search on a per txq basis */
757 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
758 		if (sch == NULL) {
759 			WARN_ON_ONCE(parentid != TC_H_ROOT);
760 			break;
761 		}
762 		cops = sch->ops->cl_ops;
763 		if (cops->qlen_notify) {
764 			cl = cops->get(sch, parentid);
765 			cops->qlen_notify(sch, cl);
766 			cops->put(sch, cl);
767 		}
768 		sch->q.qlen -= n;
769 		sch->qstats.backlog -= len;
770 		__qdisc_qstats_drop(sch, drops);
771 	}
772 	rcu_read_unlock();
773 }
774 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
775 
776 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
777 			       struct nlmsghdr *n, u32 clid,
778 			       struct Qdisc *old, struct Qdisc *new)
779 {
780 	if (new || old)
781 		qdisc_notify(net, skb, n, clid, old, new);
782 
783 	if (old)
784 		qdisc_destroy(old);
785 }
786 
787 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
788  * to device "dev".
789  *
790  * When appropriate send a netlink notification using 'skb'
791  * and "n".
792  *
793  * On success, destroy old qdisc.
794  */
795 
796 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
797 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
798 		       struct Qdisc *new, struct Qdisc *old)
799 {
800 	struct Qdisc *q = old;
801 	struct net *net = dev_net(dev);
802 	int err = 0;
803 
804 	if (parent == NULL) {
805 		unsigned int i, num_q, ingress;
806 
807 		ingress = 0;
808 		num_q = dev->num_tx_queues;
809 		if ((q && q->flags & TCQ_F_INGRESS) ||
810 		    (new && new->flags & TCQ_F_INGRESS)) {
811 			num_q = 1;
812 			ingress = 1;
813 			if (!dev_ingress_queue(dev))
814 				return -ENOENT;
815 		}
816 
817 		if (dev->flags & IFF_UP)
818 			dev_deactivate(dev);
819 
820 		if (new && new->ops->attach)
821 			goto skip;
822 
823 		for (i = 0; i < num_q; i++) {
824 			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
825 
826 			if (!ingress)
827 				dev_queue = netdev_get_tx_queue(dev, i);
828 
829 			old = dev_graft_qdisc(dev_queue, new);
830 			if (new && i > 0)
831 				atomic_inc(&new->refcnt);
832 
833 			if (!ingress)
834 				qdisc_destroy(old);
835 		}
836 
837 skip:
838 		if (!ingress) {
839 			notify_and_destroy(net, skb, n, classid,
840 					   dev->qdisc, new);
841 			if (new && !new->ops->attach)
842 				atomic_inc(&new->refcnt);
843 			dev->qdisc = new ? : &noop_qdisc;
844 
845 			if (new && new->ops->attach)
846 				new->ops->attach(new);
847 		} else {
848 			notify_and_destroy(net, skb, n, classid, old, new);
849 		}
850 
851 		if (dev->flags & IFF_UP)
852 			dev_activate(dev);
853 	} else {
854 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
855 
856 		err = -EOPNOTSUPP;
857 		if (cops && cops->graft) {
858 			unsigned long cl = cops->get(parent, classid);
859 			if (cl) {
860 				err = cops->graft(parent, cl, new, &old);
861 				cops->put(parent, cl);
862 			} else
863 				err = -ENOENT;
864 		}
865 		if (!err)
866 			notify_and_destroy(net, skb, n, classid, old, new);
867 	}
868 	return err;
869 }
870 
871 /* lockdep annotation is needed for ingress; egress gets it only for name */
872 static struct lock_class_key qdisc_tx_lock;
873 static struct lock_class_key qdisc_rx_lock;
874 
875 /*
876    Allocate and initialize new qdisc.
877 
878    Parameters are passed via opt.
879  */
880 
881 static struct Qdisc *qdisc_create(struct net_device *dev,
882 				  struct netdev_queue *dev_queue,
883 				  struct Qdisc *p, u32 parent, u32 handle,
884 				  struct nlattr **tca, int *errp)
885 {
886 	int err;
887 	struct nlattr *kind = tca[TCA_KIND];
888 	struct Qdisc *sch;
889 	struct Qdisc_ops *ops;
890 	struct qdisc_size_table *stab;
891 
892 	ops = qdisc_lookup_ops(kind);
893 #ifdef CONFIG_MODULES
894 	if (ops == NULL && kind != NULL) {
895 		char name[IFNAMSIZ];
896 		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
897 			/* We dropped the RTNL semaphore in order to
898 			 * perform the module load.  So, even if we
899 			 * succeeded in loading the module we have to
900 			 * tell the caller to replay the request.  We
901 			 * indicate this using -EAGAIN.
902 			 * We replay the request because the device may
903 			 * go away in the mean time.
904 			 */
905 			rtnl_unlock();
906 			request_module("sch_%s", name);
907 			rtnl_lock();
908 			ops = qdisc_lookup_ops(kind);
909 			if (ops != NULL) {
910 				/* We will try again qdisc_lookup_ops,
911 				 * so don't keep a reference.
912 				 */
913 				module_put(ops->owner);
914 				err = -EAGAIN;
915 				goto err_out;
916 			}
917 		}
918 	}
919 #endif
920 
921 	err = -ENOENT;
922 	if (ops == NULL)
923 		goto err_out;
924 
925 	sch = qdisc_alloc(dev_queue, ops);
926 	if (IS_ERR(sch)) {
927 		err = PTR_ERR(sch);
928 		goto err_out2;
929 	}
930 
931 	sch->parent = parent;
932 
933 	if (handle == TC_H_INGRESS) {
934 		sch->flags |= TCQ_F_INGRESS;
935 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
936 		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
937 	} else {
938 		if (handle == 0) {
939 			handle = qdisc_alloc_handle(dev);
940 			err = -ENOMEM;
941 			if (handle == 0)
942 				goto err_out3;
943 		}
944 		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
945 		if (!netif_is_multiqueue(dev))
946 			sch->flags |= TCQ_F_ONETXQUEUE;
947 	}
948 
949 	sch->handle = handle;
950 
951 	/* This exist to keep backward compatible with a userspace
952 	 * loophole, what allowed userspace to get IFF_NO_QUEUE
953 	 * facility on older kernels by setting tx_queue_len=0 (prior
954 	 * to qdisc init), and then forgot to reinit tx_queue_len
955 	 * before again attaching a qdisc.
956 	 */
957 	if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
958 		dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
959 		netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
960 	}
961 
962 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
963 		if (qdisc_is_percpu_stats(sch)) {
964 			sch->cpu_bstats =
965 				netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
966 			if (!sch->cpu_bstats)
967 				goto err_out4;
968 
969 			sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
970 			if (!sch->cpu_qstats)
971 				goto err_out4;
972 		}
973 
974 		if (tca[TCA_STAB]) {
975 			stab = qdisc_get_stab(tca[TCA_STAB]);
976 			if (IS_ERR(stab)) {
977 				err = PTR_ERR(stab);
978 				goto err_out4;
979 			}
980 			rcu_assign_pointer(sch->stab, stab);
981 		}
982 		if (tca[TCA_RATE]) {
983 			seqcount_t *running;
984 
985 			err = -EOPNOTSUPP;
986 			if (sch->flags & TCQ_F_MQROOT)
987 				goto err_out4;
988 
989 			if ((sch->parent != TC_H_ROOT) &&
990 			    !(sch->flags & TCQ_F_INGRESS) &&
991 			    (!p || !(p->flags & TCQ_F_MQROOT)))
992 				running = qdisc_root_sleeping_running(sch);
993 			else
994 				running = &sch->running;
995 
996 			err = gen_new_estimator(&sch->bstats,
997 						sch->cpu_bstats,
998 						&sch->rate_est,
999 						NULL,
1000 						running,
1001 						tca[TCA_RATE]);
1002 			if (err)
1003 				goto err_out4;
1004 		}
1005 
1006 		qdisc_hash_add(sch);
1007 
1008 		return sch;
1009 	}
1010 	/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1011 	ops->destroy(sch);
1012 err_out3:
1013 	dev_put(dev);
1014 	kfree((char *) sch - sch->padded);
1015 err_out2:
1016 	module_put(ops->owner);
1017 err_out:
1018 	*errp = err;
1019 	return NULL;
1020 
1021 err_out4:
1022 	free_percpu(sch->cpu_bstats);
1023 	free_percpu(sch->cpu_qstats);
1024 	/*
1025 	 * Any broken qdiscs that would require a ops->reset() here?
1026 	 * The qdisc was never in action so it shouldn't be necessary.
1027 	 */
1028 	qdisc_put_stab(rtnl_dereference(sch->stab));
1029 	if (ops->destroy)
1030 		ops->destroy(sch);
1031 	goto err_out3;
1032 }
1033 
1034 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1035 {
1036 	struct qdisc_size_table *ostab, *stab = NULL;
1037 	int err = 0;
1038 
1039 	if (tca[TCA_OPTIONS]) {
1040 		if (sch->ops->change == NULL)
1041 			return -EINVAL;
1042 		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1043 		if (err)
1044 			return err;
1045 	}
1046 
1047 	if (tca[TCA_STAB]) {
1048 		stab = qdisc_get_stab(tca[TCA_STAB]);
1049 		if (IS_ERR(stab))
1050 			return PTR_ERR(stab);
1051 	}
1052 
1053 	ostab = rtnl_dereference(sch->stab);
1054 	rcu_assign_pointer(sch->stab, stab);
1055 	qdisc_put_stab(ostab);
1056 
1057 	if (tca[TCA_RATE]) {
1058 		/* NB: ignores errors from replace_estimator
1059 		   because change can't be undone. */
1060 		if (sch->flags & TCQ_F_MQROOT)
1061 			goto out;
1062 		gen_replace_estimator(&sch->bstats,
1063 				      sch->cpu_bstats,
1064 				      &sch->rate_est,
1065 				      NULL,
1066 				      qdisc_root_sleeping_running(sch),
1067 				      tca[TCA_RATE]);
1068 	}
1069 out:
1070 	return 0;
1071 }
1072 
1073 struct check_loop_arg {
1074 	struct qdisc_walker	w;
1075 	struct Qdisc		*p;
1076 	int			depth;
1077 };
1078 
1079 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1080 			 struct qdisc_walker *w);
1081 
1082 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1083 {
1084 	struct check_loop_arg	arg;
1085 
1086 	if (q->ops->cl_ops == NULL)
1087 		return 0;
1088 
1089 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1090 	arg.w.fn = check_loop_fn;
1091 	arg.depth = depth;
1092 	arg.p = p;
1093 	q->ops->cl_ops->walk(q, &arg.w);
1094 	return arg.w.stop ? -ELOOP : 0;
1095 }
1096 
1097 static int
1098 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1099 {
1100 	struct Qdisc *leaf;
1101 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1102 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1103 
1104 	leaf = cops->leaf(q, cl);
1105 	if (leaf) {
1106 		if (leaf == arg->p || arg->depth > 7)
1107 			return -ELOOP;
1108 		return check_loop(leaf, arg->p, arg->depth + 1);
1109 	}
1110 	return 0;
1111 }
1112 
1113 /*
1114  * Delete/get qdisc.
1115  */
1116 
1117 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1118 {
1119 	struct net *net = sock_net(skb->sk);
1120 	struct tcmsg *tcm = nlmsg_data(n);
1121 	struct nlattr *tca[TCA_MAX + 1];
1122 	struct net_device *dev;
1123 	u32 clid;
1124 	struct Qdisc *q = NULL;
1125 	struct Qdisc *p = NULL;
1126 	int err;
1127 
1128 	if ((n->nlmsg_type != RTM_GETQDISC) &&
1129 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1130 		return -EPERM;
1131 
1132 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1133 	if (err < 0)
1134 		return err;
1135 
1136 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1137 	if (!dev)
1138 		return -ENODEV;
1139 
1140 	clid = tcm->tcm_parent;
1141 	if (clid) {
1142 		if (clid != TC_H_ROOT) {
1143 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1144 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1145 				if (!p)
1146 					return -ENOENT;
1147 				q = qdisc_leaf(p, clid);
1148 			} else if (dev_ingress_queue(dev)) {
1149 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1150 			}
1151 		} else {
1152 			q = dev->qdisc;
1153 		}
1154 		if (!q)
1155 			return -ENOENT;
1156 
1157 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1158 			return -EINVAL;
1159 	} else {
1160 		q = qdisc_lookup(dev, tcm->tcm_handle);
1161 		if (!q)
1162 			return -ENOENT;
1163 	}
1164 
1165 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1166 		return -EINVAL;
1167 
1168 	if (n->nlmsg_type == RTM_DELQDISC) {
1169 		if (!clid)
1170 			return -EINVAL;
1171 		if (q->handle == 0)
1172 			return -ENOENT;
1173 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1174 		if (err != 0)
1175 			return err;
1176 	} else {
1177 		qdisc_notify(net, skb, n, clid, NULL, q);
1178 	}
1179 	return 0;
1180 }
1181 
1182 /*
1183  * Create/change qdisc.
1184  */
1185 
1186 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1187 {
1188 	struct net *net = sock_net(skb->sk);
1189 	struct tcmsg *tcm;
1190 	struct nlattr *tca[TCA_MAX + 1];
1191 	struct net_device *dev;
1192 	u32 clid;
1193 	struct Qdisc *q, *p;
1194 	int err;
1195 
1196 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1197 		return -EPERM;
1198 
1199 replay:
1200 	/* Reinit, just in case something touches this. */
1201 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1202 	if (err < 0)
1203 		return err;
1204 
1205 	tcm = nlmsg_data(n);
1206 	clid = tcm->tcm_parent;
1207 	q = p = NULL;
1208 
1209 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1210 	if (!dev)
1211 		return -ENODEV;
1212 
1213 
1214 	if (clid) {
1215 		if (clid != TC_H_ROOT) {
1216 			if (clid != TC_H_INGRESS) {
1217 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1218 				if (!p)
1219 					return -ENOENT;
1220 				q = qdisc_leaf(p, clid);
1221 			} else if (dev_ingress_queue_create(dev)) {
1222 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1223 			}
1224 		} else {
1225 			q = dev->qdisc;
1226 		}
1227 
1228 		/* It may be default qdisc, ignore it */
1229 		if (q && q->handle == 0)
1230 			q = NULL;
1231 
1232 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1233 			if (tcm->tcm_handle) {
1234 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1235 					return -EEXIST;
1236 				if (TC_H_MIN(tcm->tcm_handle))
1237 					return -EINVAL;
1238 				q = qdisc_lookup(dev, tcm->tcm_handle);
1239 				if (!q)
1240 					goto create_n_graft;
1241 				if (n->nlmsg_flags & NLM_F_EXCL)
1242 					return -EEXIST;
1243 				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1244 					return -EINVAL;
1245 				if (q == p ||
1246 				    (p && check_loop(q, p, 0)))
1247 					return -ELOOP;
1248 				atomic_inc(&q->refcnt);
1249 				goto graft;
1250 			} else {
1251 				if (!q)
1252 					goto create_n_graft;
1253 
1254 				/* This magic test requires explanation.
1255 				 *
1256 				 *   We know, that some child q is already
1257 				 *   attached to this parent and have choice:
1258 				 *   either to change it or to create/graft new one.
1259 				 *
1260 				 *   1. We are allowed to create/graft only
1261 				 *   if CREATE and REPLACE flags are set.
1262 				 *
1263 				 *   2. If EXCL is set, requestor wanted to say,
1264 				 *   that qdisc tcm_handle is not expected
1265 				 *   to exist, so that we choose create/graft too.
1266 				 *
1267 				 *   3. The last case is when no flags are set.
1268 				 *   Alas, it is sort of hole in API, we
1269 				 *   cannot decide what to do unambiguously.
1270 				 *   For now we select create/graft, if
1271 				 *   user gave KIND, which does not match existing.
1272 				 */
1273 				if ((n->nlmsg_flags & NLM_F_CREATE) &&
1274 				    (n->nlmsg_flags & NLM_F_REPLACE) &&
1275 				    ((n->nlmsg_flags & NLM_F_EXCL) ||
1276 				     (tca[TCA_KIND] &&
1277 				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
1278 					goto create_n_graft;
1279 			}
1280 		}
1281 	} else {
1282 		if (!tcm->tcm_handle)
1283 			return -EINVAL;
1284 		q = qdisc_lookup(dev, tcm->tcm_handle);
1285 	}
1286 
1287 	/* Change qdisc parameters */
1288 	if (q == NULL)
1289 		return -ENOENT;
1290 	if (n->nlmsg_flags & NLM_F_EXCL)
1291 		return -EEXIST;
1292 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1293 		return -EINVAL;
1294 	err = qdisc_change(q, tca);
1295 	if (err == 0)
1296 		qdisc_notify(net, skb, n, clid, NULL, q);
1297 	return err;
1298 
1299 create_n_graft:
1300 	if (!(n->nlmsg_flags & NLM_F_CREATE))
1301 		return -ENOENT;
1302 	if (clid == TC_H_INGRESS) {
1303 		if (dev_ingress_queue(dev))
1304 			q = qdisc_create(dev, dev_ingress_queue(dev), p,
1305 					 tcm->tcm_parent, tcm->tcm_parent,
1306 					 tca, &err);
1307 		else
1308 			err = -ENOENT;
1309 	} else {
1310 		struct netdev_queue *dev_queue;
1311 
1312 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1313 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1314 		else if (p)
1315 			dev_queue = p->dev_queue;
1316 		else
1317 			dev_queue = netdev_get_tx_queue(dev, 0);
1318 
1319 		q = qdisc_create(dev, dev_queue, p,
1320 				 tcm->tcm_parent, tcm->tcm_handle,
1321 				 tca, &err);
1322 	}
1323 	if (q == NULL) {
1324 		if (err == -EAGAIN)
1325 			goto replay;
1326 		return err;
1327 	}
1328 
1329 graft:
1330 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1331 	if (err) {
1332 		if (q)
1333 			qdisc_destroy(q);
1334 		return err;
1335 	}
1336 
1337 	return 0;
1338 }
1339 
1340 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1341 			 u32 portid, u32 seq, u16 flags, int event)
1342 {
1343 	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
1344 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
1345 	struct tcmsg *tcm;
1346 	struct nlmsghdr  *nlh;
1347 	unsigned char *b = skb_tail_pointer(skb);
1348 	struct gnet_dump d;
1349 	struct qdisc_size_table *stab;
1350 	__u32 qlen;
1351 
1352 	cond_resched();
1353 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1354 	if (!nlh)
1355 		goto out_nlmsg_trim;
1356 	tcm = nlmsg_data(nlh);
1357 	tcm->tcm_family = AF_UNSPEC;
1358 	tcm->tcm__pad1 = 0;
1359 	tcm->tcm__pad2 = 0;
1360 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1361 	tcm->tcm_parent = clid;
1362 	tcm->tcm_handle = q->handle;
1363 	tcm->tcm_info = atomic_read(&q->refcnt);
1364 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1365 		goto nla_put_failure;
1366 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
1367 		goto nla_put_failure;
1368 	qlen = q->q.qlen;
1369 
1370 	stab = rtnl_dereference(q->stab);
1371 	if (stab && qdisc_dump_stab(skb, stab) < 0)
1372 		goto nla_put_failure;
1373 
1374 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1375 					 NULL, &d, TCA_PAD) < 0)
1376 		goto nla_put_failure;
1377 
1378 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1379 		goto nla_put_failure;
1380 
1381 	if (qdisc_is_percpu_stats(q)) {
1382 		cpu_bstats = q->cpu_bstats;
1383 		cpu_qstats = q->cpu_qstats;
1384 	}
1385 
1386 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
1387 				  &d, cpu_bstats, &q->bstats) < 0 ||
1388 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1389 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
1390 		goto nla_put_failure;
1391 
1392 	if (gnet_stats_finish_copy(&d) < 0)
1393 		goto nla_put_failure;
1394 
1395 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1396 	return skb->len;
1397 
1398 out_nlmsg_trim:
1399 nla_put_failure:
1400 	nlmsg_trim(skb, b);
1401 	return -1;
1402 }
1403 
1404 static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1405 {
1406 	return (q->flags & TCQ_F_BUILTIN) ? true : false;
1407 }
1408 
1409 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1410 			struct nlmsghdr *n, u32 clid,
1411 			struct Qdisc *old, struct Qdisc *new)
1412 {
1413 	struct sk_buff *skb;
1414 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1415 
1416 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1417 	if (!skb)
1418 		return -ENOBUFS;
1419 
1420 	if (old && !tc_qdisc_dump_ignore(old)) {
1421 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1422 				  0, RTM_DELQDISC) < 0)
1423 			goto err_out;
1424 	}
1425 	if (new && !tc_qdisc_dump_ignore(new)) {
1426 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1427 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1428 			goto err_out;
1429 	}
1430 
1431 	if (skb->len)
1432 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1433 				      n->nlmsg_flags & NLM_F_ECHO);
1434 
1435 err_out:
1436 	kfree_skb(skb);
1437 	return -EINVAL;
1438 }
1439 
1440 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1441 			      struct netlink_callback *cb,
1442 			      int *q_idx_p, int s_q_idx, bool recur)
1443 {
1444 	int ret = 0, q_idx = *q_idx_p;
1445 	struct Qdisc *q;
1446 	int b;
1447 
1448 	if (!root)
1449 		return 0;
1450 
1451 	q = root;
1452 	if (q_idx < s_q_idx) {
1453 		q_idx++;
1454 	} else {
1455 		if (!tc_qdisc_dump_ignore(q) &&
1456 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1457 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1458 				  RTM_NEWQDISC) <= 0)
1459 			goto done;
1460 		q_idx++;
1461 	}
1462 
1463 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1464 	 * itself has already been dumped.
1465 	 *
1466 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1467 	 * qdisc hashtable, we don't want to hit it again
1468 	 */
1469 	if (!qdisc_dev(root) || !recur)
1470 		goto out;
1471 
1472 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1473 		if (q_idx < s_q_idx) {
1474 			q_idx++;
1475 			continue;
1476 		}
1477 		if (!tc_qdisc_dump_ignore(q) &&
1478 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1479 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1480 				  RTM_NEWQDISC) <= 0)
1481 			goto done;
1482 		q_idx++;
1483 	}
1484 
1485 out:
1486 	*q_idx_p = q_idx;
1487 	return ret;
1488 done:
1489 	ret = -1;
1490 	goto out;
1491 }
1492 
1493 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1494 {
1495 	struct net *net = sock_net(skb->sk);
1496 	int idx, q_idx;
1497 	int s_idx, s_q_idx;
1498 	struct net_device *dev;
1499 
1500 	s_idx = cb->args[0];
1501 	s_q_idx = q_idx = cb->args[1];
1502 
1503 	idx = 0;
1504 	ASSERT_RTNL();
1505 	for_each_netdev(net, dev) {
1506 		struct netdev_queue *dev_queue;
1507 
1508 		if (idx < s_idx)
1509 			goto cont;
1510 		if (idx > s_idx)
1511 			s_q_idx = 0;
1512 		q_idx = 0;
1513 
1514 		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1515 				       true) < 0)
1516 			goto done;
1517 
1518 		dev_queue = dev_ingress_queue(dev);
1519 		if (dev_queue &&
1520 		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1521 				       &q_idx, s_q_idx, false) < 0)
1522 			goto done;
1523 
1524 cont:
1525 		idx++;
1526 	}
1527 
1528 done:
1529 	cb->args[0] = idx;
1530 	cb->args[1] = q_idx;
1531 
1532 	return skb->len;
1533 }
1534 
1535 
1536 
1537 /************************************************
1538  *	Traffic classes manipulation.		*
1539  ************************************************/
1540 
1541 
1542 
1543 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
1544 {
1545 	struct net *net = sock_net(skb->sk);
1546 	struct tcmsg *tcm = nlmsg_data(n);
1547 	struct nlattr *tca[TCA_MAX + 1];
1548 	struct net_device *dev;
1549 	struct Qdisc *q = NULL;
1550 	const struct Qdisc_class_ops *cops;
1551 	unsigned long cl = 0;
1552 	unsigned long new_cl;
1553 	u32 portid;
1554 	u32 clid;
1555 	u32 qid;
1556 	int err;
1557 
1558 	if ((n->nlmsg_type != RTM_GETTCLASS) &&
1559 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1560 		return -EPERM;
1561 
1562 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1563 	if (err < 0)
1564 		return err;
1565 
1566 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1567 	if (!dev)
1568 		return -ENODEV;
1569 
1570 	/*
1571 	   parent == TC_H_UNSPEC - unspecified parent.
1572 	   parent == TC_H_ROOT   - class is root, which has no parent.
1573 	   parent == X:0	 - parent is root class.
1574 	   parent == X:Y	 - parent is a node in hierarchy.
1575 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
1576 
1577 	   handle == 0:0	 - generate handle from kernel pool.
1578 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
1579 	   handle == X:Y	 - clear.
1580 	   handle == X:0	 - root class.
1581 	 */
1582 
1583 	/* Step 1. Determine qdisc handle X:0 */
1584 
1585 	portid = tcm->tcm_parent;
1586 	clid = tcm->tcm_handle;
1587 	qid = TC_H_MAJ(clid);
1588 
1589 	if (portid != TC_H_ROOT) {
1590 		u32 qid1 = TC_H_MAJ(portid);
1591 
1592 		if (qid && qid1) {
1593 			/* If both majors are known, they must be identical. */
1594 			if (qid != qid1)
1595 				return -EINVAL;
1596 		} else if (qid1) {
1597 			qid = qid1;
1598 		} else if (qid == 0)
1599 			qid = dev->qdisc->handle;
1600 
1601 		/* Now qid is genuine qdisc handle consistent
1602 		 * both with parent and child.
1603 		 *
1604 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1605 		 */
1606 		if (portid)
1607 			portid = TC_H_MAKE(qid, portid);
1608 	} else {
1609 		if (qid == 0)
1610 			qid = dev->qdisc->handle;
1611 	}
1612 
1613 	/* OK. Locate qdisc */
1614 	q = qdisc_lookup(dev, qid);
1615 	if (!q)
1616 		return -ENOENT;
1617 
1618 	/* An check that it supports classes */
1619 	cops = q->ops->cl_ops;
1620 	if (cops == NULL)
1621 		return -EINVAL;
1622 
1623 	/* Now try to get class */
1624 	if (clid == 0) {
1625 		if (portid == TC_H_ROOT)
1626 			clid = qid;
1627 	} else
1628 		clid = TC_H_MAKE(qid, clid);
1629 
1630 	if (clid)
1631 		cl = cops->get(q, clid);
1632 
1633 	if (cl == 0) {
1634 		err = -ENOENT;
1635 		if (n->nlmsg_type != RTM_NEWTCLASS ||
1636 		    !(n->nlmsg_flags & NLM_F_CREATE))
1637 			goto out;
1638 	} else {
1639 		switch (n->nlmsg_type) {
1640 		case RTM_NEWTCLASS:
1641 			err = -EEXIST;
1642 			if (n->nlmsg_flags & NLM_F_EXCL)
1643 				goto out;
1644 			break;
1645 		case RTM_DELTCLASS:
1646 			err = -EOPNOTSUPP;
1647 			if (cops->delete)
1648 				err = cops->delete(q, cl);
1649 			if (err == 0)
1650 				tclass_notify(net, skb, n, q, cl,
1651 					      RTM_DELTCLASS);
1652 			goto out;
1653 		case RTM_GETTCLASS:
1654 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1655 			goto out;
1656 		default:
1657 			err = -EINVAL;
1658 			goto out;
1659 		}
1660 	}
1661 
1662 	new_cl = cl;
1663 	err = -EOPNOTSUPP;
1664 	if (cops->change)
1665 		err = cops->change(q, clid, portid, tca, &new_cl);
1666 	if (err == 0)
1667 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1668 
1669 out:
1670 	if (cl)
1671 		cops->put(q, cl);
1672 
1673 	return err;
1674 }
1675 
1676 
1677 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1678 			  unsigned long cl,
1679 			  u32 portid, u32 seq, u16 flags, int event)
1680 {
1681 	struct tcmsg *tcm;
1682 	struct nlmsghdr  *nlh;
1683 	unsigned char *b = skb_tail_pointer(skb);
1684 	struct gnet_dump d;
1685 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1686 
1687 	cond_resched();
1688 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1689 	if (!nlh)
1690 		goto out_nlmsg_trim;
1691 	tcm = nlmsg_data(nlh);
1692 	tcm->tcm_family = AF_UNSPEC;
1693 	tcm->tcm__pad1 = 0;
1694 	tcm->tcm__pad2 = 0;
1695 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1696 	tcm->tcm_parent = q->handle;
1697 	tcm->tcm_handle = q->handle;
1698 	tcm->tcm_info = 0;
1699 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1700 		goto nla_put_failure;
1701 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1702 		goto nla_put_failure;
1703 
1704 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1705 					 NULL, &d, TCA_PAD) < 0)
1706 		goto nla_put_failure;
1707 
1708 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1709 		goto nla_put_failure;
1710 
1711 	if (gnet_stats_finish_copy(&d) < 0)
1712 		goto nla_put_failure;
1713 
1714 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1715 	return skb->len;
1716 
1717 out_nlmsg_trim:
1718 nla_put_failure:
1719 	nlmsg_trim(skb, b);
1720 	return -1;
1721 }
1722 
1723 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1724 			 struct nlmsghdr *n, struct Qdisc *q,
1725 			 unsigned long cl, int event)
1726 {
1727 	struct sk_buff *skb;
1728 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1729 
1730 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1731 	if (!skb)
1732 		return -ENOBUFS;
1733 
1734 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1735 		kfree_skb(skb);
1736 		return -EINVAL;
1737 	}
1738 
1739 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1740 			      n->nlmsg_flags & NLM_F_ECHO);
1741 }
1742 
1743 struct qdisc_dump_args {
1744 	struct qdisc_walker	w;
1745 	struct sk_buff		*skb;
1746 	struct netlink_callback	*cb;
1747 };
1748 
1749 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1750 			    struct qdisc_walker *arg)
1751 {
1752 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1753 
1754 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1755 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1756 			      RTM_NEWTCLASS);
1757 }
1758 
1759 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1760 				struct tcmsg *tcm, struct netlink_callback *cb,
1761 				int *t_p, int s_t)
1762 {
1763 	struct qdisc_dump_args arg;
1764 
1765 	if (tc_qdisc_dump_ignore(q) ||
1766 	    *t_p < s_t || !q->ops->cl_ops ||
1767 	    (tcm->tcm_parent &&
1768 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1769 		(*t_p)++;
1770 		return 0;
1771 	}
1772 	if (*t_p > s_t)
1773 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1774 	arg.w.fn = qdisc_class_dump;
1775 	arg.skb = skb;
1776 	arg.cb = cb;
1777 	arg.w.stop  = 0;
1778 	arg.w.skip = cb->args[1];
1779 	arg.w.count = 0;
1780 	q->ops->cl_ops->walk(q, &arg.w);
1781 	cb->args[1] = arg.w.count;
1782 	if (arg.w.stop)
1783 		return -1;
1784 	(*t_p)++;
1785 	return 0;
1786 }
1787 
1788 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1789 			       struct tcmsg *tcm, struct netlink_callback *cb,
1790 			       int *t_p, int s_t)
1791 {
1792 	struct Qdisc *q;
1793 	int b;
1794 
1795 	if (!root)
1796 		return 0;
1797 
1798 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1799 		return -1;
1800 
1801 	if (!qdisc_dev(root))
1802 		return 0;
1803 
1804 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1805 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1806 			return -1;
1807 	}
1808 
1809 	return 0;
1810 }
1811 
1812 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1813 {
1814 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
1815 	struct net *net = sock_net(skb->sk);
1816 	struct netdev_queue *dev_queue;
1817 	struct net_device *dev;
1818 	int t, s_t;
1819 
1820 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1821 		return 0;
1822 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
1823 	if (!dev)
1824 		return 0;
1825 
1826 	s_t = cb->args[0];
1827 	t = 0;
1828 
1829 	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1830 		goto done;
1831 
1832 	dev_queue = dev_ingress_queue(dev);
1833 	if (dev_queue &&
1834 	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1835 				&t, s_t) < 0)
1836 		goto done;
1837 
1838 done:
1839 	cb->args[0] = t;
1840 
1841 	dev_put(dev);
1842 	return skb->len;
1843 }
1844 
1845 /* Main classifier routine: scans classifier chain attached
1846  * to this qdisc, (optionally) tests for protocol and asks
1847  * specific classifiers.
1848  */
1849 int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1850 		struct tcf_result *res, bool compat_mode)
1851 {
1852 	__be16 protocol = tc_skb_protocol(skb);
1853 #ifdef CONFIG_NET_CLS_ACT
1854 	const int max_reclassify_loop = 4;
1855 	const struct tcf_proto *old_tp = tp;
1856 	int limit = 0;
1857 
1858 reclassify:
1859 #endif
1860 	for (; tp; tp = rcu_dereference_bh(tp->next)) {
1861 		int err;
1862 
1863 		if (tp->protocol != protocol &&
1864 		    tp->protocol != htons(ETH_P_ALL))
1865 			continue;
1866 
1867 		err = tp->classify(skb, tp, res);
1868 #ifdef CONFIG_NET_CLS_ACT
1869 		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
1870 			goto reset;
1871 #endif
1872 		if (err >= 0)
1873 			return err;
1874 	}
1875 
1876 	return TC_ACT_UNSPEC; /* signal: continue lookup */
1877 #ifdef CONFIG_NET_CLS_ACT
1878 reset:
1879 	if (unlikely(limit++ >= max_reclassify_loop)) {
1880 		net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
1881 				       tp->q->ops->id, tp->prio & 0xffff,
1882 				       ntohs(tp->protocol));
1883 		return TC_ACT_SHOT;
1884 	}
1885 
1886 	tp = old_tp;
1887 	protocol = tc_skb_protocol(skb);
1888 	goto reclassify;
1889 #endif
1890 }
1891 EXPORT_SYMBOL(tc_classify);
1892 
1893 #ifdef CONFIG_PROC_FS
1894 static int psched_show(struct seq_file *seq, void *v)
1895 {
1896 	seq_printf(seq, "%08x %08x %08x %08x\n",
1897 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1898 		   1000000,
1899 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
1900 
1901 	return 0;
1902 }
1903 
1904 static int psched_open(struct inode *inode, struct file *file)
1905 {
1906 	return single_open(file, psched_show, NULL);
1907 }
1908 
1909 static const struct file_operations psched_fops = {
1910 	.owner = THIS_MODULE,
1911 	.open = psched_open,
1912 	.read  = seq_read,
1913 	.llseek = seq_lseek,
1914 	.release = single_release,
1915 };
1916 
1917 static int __net_init psched_net_init(struct net *net)
1918 {
1919 	struct proc_dir_entry *e;
1920 
1921 	e = proc_create("psched", 0, net->proc_net, &psched_fops);
1922 	if (e == NULL)
1923 		return -ENOMEM;
1924 
1925 	return 0;
1926 }
1927 
1928 static void __net_exit psched_net_exit(struct net *net)
1929 {
1930 	remove_proc_entry("psched", net->proc_net);
1931 }
1932 #else
1933 static int __net_init psched_net_init(struct net *net)
1934 {
1935 	return 0;
1936 }
1937 
1938 static void __net_exit psched_net_exit(struct net *net)
1939 {
1940 }
1941 #endif
1942 
1943 static struct pernet_operations psched_net_ops = {
1944 	.init = psched_net_init,
1945 	.exit = psched_net_exit,
1946 };
1947 
1948 static int __init pktsched_init(void)
1949 {
1950 	int err;
1951 
1952 	err = register_pernet_subsys(&psched_net_ops);
1953 	if (err) {
1954 		pr_err("pktsched_init: "
1955 		       "cannot initialize per netns operations\n");
1956 		return err;
1957 	}
1958 
1959 	register_qdisc(&pfifo_fast_ops);
1960 	register_qdisc(&pfifo_qdisc_ops);
1961 	register_qdisc(&bfifo_qdisc_ops);
1962 	register_qdisc(&pfifo_head_drop_qdisc_ops);
1963 	register_qdisc(&mq_qdisc_ops);
1964 	register_qdisc(&noqueue_qdisc_ops);
1965 
1966 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1967 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1968 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
1969 		      NULL);
1970 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
1971 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
1972 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
1973 		      NULL);
1974 
1975 	return 0;
1976 }
1977 
1978 subsys_initcall(pktsched_init);
1979