xref: /openbmc/linux/net/sched/sch_api.c (revision 6dfcd296)
1 /*
2  * net/sched/sch_api.c	Packet scheduler API.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17 
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
33 
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 
39 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
40 			struct nlmsghdr *n, u32 clid,
41 			struct Qdisc *old, struct Qdisc *new);
42 static int tclass_notify(struct net *net, struct sk_buff *oskb,
43 			 struct nlmsghdr *n, struct Qdisc *q,
44 			 unsigned long cl, int event);
45 
46 /*
47 
48    Short review.
49    -------------
50 
51    This file consists of two interrelated parts:
52 
53    1. queueing disciplines manager frontend.
54    2. traffic classes manager frontend.
55 
56    Generally, queueing discipline ("qdisc") is a black box,
57    which is able to enqueue packets and to dequeue them (when
58    device is ready to send something) in order and at times
59    determined by algorithm hidden in it.
60 
61    qdisc's are divided to two categories:
62    - "queues", which have no internal structure visible from outside.
63    - "schedulers", which split all the packets to "traffic classes",
64      using "packet classifiers" (look at cls_api.c)
65 
66    In turn, classes may have child qdiscs (as rule, queues)
67    attached to them etc. etc. etc.
68 
69    The goal of the routines in this file is to translate
70    information supplied by user in the form of handles
71    to more intelligible for kernel form, to make some sanity
72    checks and part of work, which is common to all qdiscs
73    and to provide rtnetlink notifications.
74 
75    All real intelligent work is done inside qdisc modules.
76 
77 
78 
79    Every discipline has two major routines: enqueue and dequeue.
80 
81    ---dequeue
82 
83    dequeue usually returns a skb to send. It is allowed to return NULL,
84    but it does not mean that queue is empty, it just means that
85    discipline does not want to send anything this time.
86    Queue is really empty if q->q.qlen == 0.
87    For complicated disciplines with multiple queues q->q is not
88    real packet queue, but however q->q.qlen must be valid.
89 
90    ---enqueue
91 
92    enqueue returns 0, if packet was enqueued successfully.
93    If packet (this one or another one) was dropped, it returns
94    not zero error code.
95    NET_XMIT_DROP 	- this packet dropped
96      Expected action: do not backoff, but wait until queue will clear.
97    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
98      Expected action: backoff or ignore
99 
100    Auxiliary routines:
101 
102    ---peek
103 
104    like dequeue but without removing a packet from the queue
105 
106    ---reset
107 
108    returns qdisc to initial state: purge all buffers, clear all
109    timers, counters (except for statistics) etc.
110 
111    ---init
112 
113    initializes newly created qdisc.
114 
115    ---destroy
116 
117    destroys resources allocated by init and during lifetime of qdisc.
118 
119    ---change
120 
121    changes qdisc parameters.
122  */
123 
124 /* Protects list of registered TC modules. It is pure SMP lock. */
125 static DEFINE_RWLOCK(qdisc_mod_lock);
126 
127 
128 /************************************************
129  *	Queueing disciplines manipulation.	*
130  ************************************************/
131 
132 
133 /* The list of all installed queueing disciplines. */
134 
135 static struct Qdisc_ops *qdisc_base;
136 
137 /* Register/unregister queueing discipline */
138 
139 int register_qdisc(struct Qdisc_ops *qops)
140 {
141 	struct Qdisc_ops *q, **qp;
142 	int rc = -EEXIST;
143 
144 	write_lock(&qdisc_mod_lock);
145 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
146 		if (!strcmp(qops->id, q->id))
147 			goto out;
148 
149 	if (qops->enqueue == NULL)
150 		qops->enqueue = noop_qdisc_ops.enqueue;
151 	if (qops->peek == NULL) {
152 		if (qops->dequeue == NULL)
153 			qops->peek = noop_qdisc_ops.peek;
154 		else
155 			goto out_einval;
156 	}
157 	if (qops->dequeue == NULL)
158 		qops->dequeue = noop_qdisc_ops.dequeue;
159 
160 	if (qops->cl_ops) {
161 		const struct Qdisc_class_ops *cops = qops->cl_ops;
162 
163 		if (!(cops->get && cops->put && cops->walk && cops->leaf))
164 			goto out_einval;
165 
166 		if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
167 			goto out_einval;
168 	}
169 
170 	qops->next = NULL;
171 	*qp = qops;
172 	rc = 0;
173 out:
174 	write_unlock(&qdisc_mod_lock);
175 	return rc;
176 
177 out_einval:
178 	rc = -EINVAL;
179 	goto out;
180 }
181 EXPORT_SYMBOL(register_qdisc);
182 
183 int unregister_qdisc(struct Qdisc_ops *qops)
184 {
185 	struct Qdisc_ops *q, **qp;
186 	int err = -ENOENT;
187 
188 	write_lock(&qdisc_mod_lock);
189 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
190 		if (q == qops)
191 			break;
192 	if (q) {
193 		*qp = q->next;
194 		q->next = NULL;
195 		err = 0;
196 	}
197 	write_unlock(&qdisc_mod_lock);
198 	return err;
199 }
200 EXPORT_SYMBOL(unregister_qdisc);
201 
202 /* Get default qdisc if not otherwise specified */
203 void qdisc_get_default(char *name, size_t len)
204 {
205 	read_lock(&qdisc_mod_lock);
206 	strlcpy(name, default_qdisc_ops->id, len);
207 	read_unlock(&qdisc_mod_lock);
208 }
209 
210 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
211 {
212 	struct Qdisc_ops *q = NULL;
213 
214 	for (q = qdisc_base; q; q = q->next) {
215 		if (!strcmp(name, q->id)) {
216 			if (!try_module_get(q->owner))
217 				q = NULL;
218 			break;
219 		}
220 	}
221 
222 	return q;
223 }
224 
225 /* Set new default qdisc to use */
226 int qdisc_set_default(const char *name)
227 {
228 	const struct Qdisc_ops *ops;
229 
230 	if (!capable(CAP_NET_ADMIN))
231 		return -EPERM;
232 
233 	write_lock(&qdisc_mod_lock);
234 	ops = qdisc_lookup_default(name);
235 	if (!ops) {
236 		/* Not found, drop lock and try to load module */
237 		write_unlock(&qdisc_mod_lock);
238 		request_module("sch_%s", name);
239 		write_lock(&qdisc_mod_lock);
240 
241 		ops = qdisc_lookup_default(name);
242 	}
243 
244 	if (ops) {
245 		/* Set new default */
246 		module_put(default_qdisc_ops->owner);
247 		default_qdisc_ops = ops;
248 	}
249 	write_unlock(&qdisc_mod_lock);
250 
251 	return ops ? 0 : -ENOENT;
252 }
253 
254 /* We know handle. Find qdisc among all qdisc's attached to device
255  * (root qdisc, all its children, children of children etc.)
256  * Note: caller either uses rtnl or rcu_read_lock()
257  */
258 
259 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
260 {
261 	struct Qdisc *q;
262 
263 	if (!qdisc_dev(root))
264 		return (root->handle == handle ? root : NULL);
265 
266 	if (!(root->flags & TCQ_F_BUILTIN) &&
267 	    root->handle == handle)
268 		return root;
269 
270 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
271 		if (q->handle == handle)
272 			return q;
273 	}
274 	return NULL;
275 }
276 
277 void qdisc_hash_add(struct Qdisc *q)
278 {
279 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
280 		struct Qdisc *root = qdisc_dev(q)->qdisc;
281 
282 		WARN_ON_ONCE(root == &noop_qdisc);
283 		ASSERT_RTNL();
284 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
285 	}
286 }
287 EXPORT_SYMBOL(qdisc_hash_add);
288 
289 void qdisc_hash_del(struct Qdisc *q)
290 {
291 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
292 		ASSERT_RTNL();
293 		hash_del_rcu(&q->hash);
294 	}
295 }
296 EXPORT_SYMBOL(qdisc_hash_del);
297 
298 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
299 {
300 	struct Qdisc *q;
301 
302 	q = qdisc_match_from_root(dev->qdisc, handle);
303 	if (q)
304 		goto out;
305 
306 	if (dev_ingress_queue(dev))
307 		q = qdisc_match_from_root(
308 			dev_ingress_queue(dev)->qdisc_sleeping,
309 			handle);
310 out:
311 	return q;
312 }
313 
314 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
315 {
316 	unsigned long cl;
317 	struct Qdisc *leaf;
318 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
319 
320 	if (cops == NULL)
321 		return NULL;
322 	cl = cops->get(p, classid);
323 
324 	if (cl == 0)
325 		return NULL;
326 	leaf = cops->leaf(p, cl);
327 	cops->put(p, cl);
328 	return leaf;
329 }
330 
331 /* Find queueing discipline by name */
332 
333 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
334 {
335 	struct Qdisc_ops *q = NULL;
336 
337 	if (kind) {
338 		read_lock(&qdisc_mod_lock);
339 		for (q = qdisc_base; q; q = q->next) {
340 			if (nla_strcmp(kind, q->id) == 0) {
341 				if (!try_module_get(q->owner))
342 					q = NULL;
343 				break;
344 			}
345 		}
346 		read_unlock(&qdisc_mod_lock);
347 	}
348 	return q;
349 }
350 
351 /* The linklayer setting were not transferred from iproute2, in older
352  * versions, and the rate tables lookup systems have been dropped in
353  * the kernel. To keep backward compatible with older iproute2 tc
354  * utils, we detect the linklayer setting by detecting if the rate
355  * table were modified.
356  *
357  * For linklayer ATM table entries, the rate table will be aligned to
358  * 48 bytes, thus some table entries will contain the same value.  The
359  * mpu (min packet unit) is also encoded into the old rate table, thus
360  * starting from the mpu, we find low and high table entries for
361  * mapping this cell.  If these entries contain the same value, when
362  * the rate tables have been modified for linklayer ATM.
363  *
364  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
365  * and then roundup to the next cell, calc the table entry one below,
366  * and compare.
367  */
368 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
369 {
370 	int low       = roundup(r->mpu, 48);
371 	int high      = roundup(low+1, 48);
372 	int cell_low  = low >> r->cell_log;
373 	int cell_high = (high >> r->cell_log) - 1;
374 
375 	/* rtab is too inaccurate at rates > 100Mbit/s */
376 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
377 		pr_debug("TC linklayer: Giving up ATM detection\n");
378 		return TC_LINKLAYER_ETHERNET;
379 	}
380 
381 	if ((cell_high > cell_low) && (cell_high < 256)
382 	    && (rtab[cell_low] == rtab[cell_high])) {
383 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
384 			 cell_low, cell_high, rtab[cell_high]);
385 		return TC_LINKLAYER_ATM;
386 	}
387 	return TC_LINKLAYER_ETHERNET;
388 }
389 
390 static struct qdisc_rate_table *qdisc_rtab_list;
391 
392 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
393 					struct nlattr *tab)
394 {
395 	struct qdisc_rate_table *rtab;
396 
397 	if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
398 	    nla_len(tab) != TC_RTAB_SIZE)
399 		return NULL;
400 
401 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
402 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
403 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
404 			rtab->refcnt++;
405 			return rtab;
406 		}
407 	}
408 
409 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
410 	if (rtab) {
411 		rtab->rate = *r;
412 		rtab->refcnt = 1;
413 		memcpy(rtab->data, nla_data(tab), 1024);
414 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
415 			r->linklayer = __detect_linklayer(r, rtab->data);
416 		rtab->next = qdisc_rtab_list;
417 		qdisc_rtab_list = rtab;
418 	}
419 	return rtab;
420 }
421 EXPORT_SYMBOL(qdisc_get_rtab);
422 
423 void qdisc_put_rtab(struct qdisc_rate_table *tab)
424 {
425 	struct qdisc_rate_table *rtab, **rtabp;
426 
427 	if (!tab || --tab->refcnt)
428 		return;
429 
430 	for (rtabp = &qdisc_rtab_list;
431 	     (rtab = *rtabp) != NULL;
432 	     rtabp = &rtab->next) {
433 		if (rtab == tab) {
434 			*rtabp = rtab->next;
435 			kfree(rtab);
436 			return;
437 		}
438 	}
439 }
440 EXPORT_SYMBOL(qdisc_put_rtab);
441 
442 static LIST_HEAD(qdisc_stab_list);
443 static DEFINE_SPINLOCK(qdisc_stab_lock);
444 
445 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
446 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
447 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
448 };
449 
450 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
451 {
452 	struct nlattr *tb[TCA_STAB_MAX + 1];
453 	struct qdisc_size_table *stab;
454 	struct tc_sizespec *s;
455 	unsigned int tsize = 0;
456 	u16 *tab = NULL;
457 	int err;
458 
459 	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
460 	if (err < 0)
461 		return ERR_PTR(err);
462 	if (!tb[TCA_STAB_BASE])
463 		return ERR_PTR(-EINVAL);
464 
465 	s = nla_data(tb[TCA_STAB_BASE]);
466 
467 	if (s->tsize > 0) {
468 		if (!tb[TCA_STAB_DATA])
469 			return ERR_PTR(-EINVAL);
470 		tab = nla_data(tb[TCA_STAB_DATA]);
471 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
472 	}
473 
474 	if (tsize != s->tsize || (!tab && tsize > 0))
475 		return ERR_PTR(-EINVAL);
476 
477 	spin_lock(&qdisc_stab_lock);
478 
479 	list_for_each_entry(stab, &qdisc_stab_list, list) {
480 		if (memcmp(&stab->szopts, s, sizeof(*s)))
481 			continue;
482 		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
483 			continue;
484 		stab->refcnt++;
485 		spin_unlock(&qdisc_stab_lock);
486 		return stab;
487 	}
488 
489 	spin_unlock(&qdisc_stab_lock);
490 
491 	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
492 	if (!stab)
493 		return ERR_PTR(-ENOMEM);
494 
495 	stab->refcnt = 1;
496 	stab->szopts = *s;
497 	if (tsize > 0)
498 		memcpy(stab->data, tab, tsize * sizeof(u16));
499 
500 	spin_lock(&qdisc_stab_lock);
501 	list_add_tail(&stab->list, &qdisc_stab_list);
502 	spin_unlock(&qdisc_stab_lock);
503 
504 	return stab;
505 }
506 
507 static void stab_kfree_rcu(struct rcu_head *head)
508 {
509 	kfree(container_of(head, struct qdisc_size_table, rcu));
510 }
511 
512 void qdisc_put_stab(struct qdisc_size_table *tab)
513 {
514 	if (!tab)
515 		return;
516 
517 	spin_lock(&qdisc_stab_lock);
518 
519 	if (--tab->refcnt == 0) {
520 		list_del(&tab->list);
521 		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
522 	}
523 
524 	spin_unlock(&qdisc_stab_lock);
525 }
526 EXPORT_SYMBOL(qdisc_put_stab);
527 
528 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
529 {
530 	struct nlattr *nest;
531 
532 	nest = nla_nest_start(skb, TCA_STAB);
533 	if (nest == NULL)
534 		goto nla_put_failure;
535 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
536 		goto nla_put_failure;
537 	nla_nest_end(skb, nest);
538 
539 	return skb->len;
540 
541 nla_put_failure:
542 	return -1;
543 }
544 
545 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
546 			       const struct qdisc_size_table *stab)
547 {
548 	int pkt_len, slot;
549 
550 	pkt_len = skb->len + stab->szopts.overhead;
551 	if (unlikely(!stab->szopts.tsize))
552 		goto out;
553 
554 	slot = pkt_len + stab->szopts.cell_align;
555 	if (unlikely(slot < 0))
556 		slot = 0;
557 
558 	slot >>= stab->szopts.cell_log;
559 	if (likely(slot < stab->szopts.tsize))
560 		pkt_len = stab->data[slot];
561 	else
562 		pkt_len = stab->data[stab->szopts.tsize - 1] *
563 				(slot / stab->szopts.tsize) +
564 				stab->data[slot % stab->szopts.tsize];
565 
566 	pkt_len <<= stab->szopts.size_log;
567 out:
568 	if (unlikely(pkt_len < 1))
569 		pkt_len = 1;
570 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
571 }
572 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
573 
574 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
575 {
576 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
577 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
578 			txt, qdisc->ops->id, qdisc->handle >> 16);
579 		qdisc->flags |= TCQ_F_WARN_NONWC;
580 	}
581 }
582 EXPORT_SYMBOL(qdisc_warn_nonwc);
583 
584 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
585 {
586 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
587 						 timer);
588 
589 	rcu_read_lock();
590 	__netif_schedule(qdisc_root(wd->qdisc));
591 	rcu_read_unlock();
592 
593 	return HRTIMER_NORESTART;
594 }
595 
596 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
597 {
598 	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
599 	wd->timer.function = qdisc_watchdog;
600 	wd->qdisc = qdisc;
601 }
602 EXPORT_SYMBOL(qdisc_watchdog_init);
603 
604 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
605 {
606 	if (test_bit(__QDISC_STATE_DEACTIVATED,
607 		     &qdisc_root_sleeping(wd->qdisc)->state))
608 		return;
609 
610 	if (wd->last_expires == expires)
611 		return;
612 
613 	wd->last_expires = expires;
614 	hrtimer_start(&wd->timer,
615 		      ns_to_ktime(expires),
616 		      HRTIMER_MODE_ABS_PINNED);
617 }
618 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
619 
620 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
621 {
622 	hrtimer_cancel(&wd->timer);
623 }
624 EXPORT_SYMBOL(qdisc_watchdog_cancel);
625 
626 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
627 {
628 	unsigned int size = n * sizeof(struct hlist_head), i;
629 	struct hlist_head *h;
630 
631 	if (size <= PAGE_SIZE)
632 		h = kmalloc(size, GFP_KERNEL);
633 	else
634 		h = (struct hlist_head *)
635 			__get_free_pages(GFP_KERNEL, get_order(size));
636 
637 	if (h != NULL) {
638 		for (i = 0; i < n; i++)
639 			INIT_HLIST_HEAD(&h[i]);
640 	}
641 	return h;
642 }
643 
644 static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
645 {
646 	unsigned int size = n * sizeof(struct hlist_head);
647 
648 	if (size <= PAGE_SIZE)
649 		kfree(h);
650 	else
651 		free_pages((unsigned long)h, get_order(size));
652 }
653 
654 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
655 {
656 	struct Qdisc_class_common *cl;
657 	struct hlist_node *next;
658 	struct hlist_head *nhash, *ohash;
659 	unsigned int nsize, nmask, osize;
660 	unsigned int i, h;
661 
662 	/* Rehash when load factor exceeds 0.75 */
663 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
664 		return;
665 	nsize = clhash->hashsize * 2;
666 	nmask = nsize - 1;
667 	nhash = qdisc_class_hash_alloc(nsize);
668 	if (nhash == NULL)
669 		return;
670 
671 	ohash = clhash->hash;
672 	osize = clhash->hashsize;
673 
674 	sch_tree_lock(sch);
675 	for (i = 0; i < osize; i++) {
676 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
677 			h = qdisc_class_hash(cl->classid, nmask);
678 			hlist_add_head(&cl->hnode, &nhash[h]);
679 		}
680 	}
681 	clhash->hash     = nhash;
682 	clhash->hashsize = nsize;
683 	clhash->hashmask = nmask;
684 	sch_tree_unlock(sch);
685 
686 	qdisc_class_hash_free(ohash, osize);
687 }
688 EXPORT_SYMBOL(qdisc_class_hash_grow);
689 
690 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
691 {
692 	unsigned int size = 4;
693 
694 	clhash->hash = qdisc_class_hash_alloc(size);
695 	if (clhash->hash == NULL)
696 		return -ENOMEM;
697 	clhash->hashsize  = size;
698 	clhash->hashmask  = size - 1;
699 	clhash->hashelems = 0;
700 	return 0;
701 }
702 EXPORT_SYMBOL(qdisc_class_hash_init);
703 
704 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
705 {
706 	qdisc_class_hash_free(clhash->hash, clhash->hashsize);
707 }
708 EXPORT_SYMBOL(qdisc_class_hash_destroy);
709 
710 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
711 			     struct Qdisc_class_common *cl)
712 {
713 	unsigned int h;
714 
715 	INIT_HLIST_NODE(&cl->hnode);
716 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
717 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
718 	clhash->hashelems++;
719 }
720 EXPORT_SYMBOL(qdisc_class_hash_insert);
721 
722 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
723 			     struct Qdisc_class_common *cl)
724 {
725 	hlist_del(&cl->hnode);
726 	clhash->hashelems--;
727 }
728 EXPORT_SYMBOL(qdisc_class_hash_remove);
729 
730 /* Allocate an unique handle from space managed by kernel
731  * Possible range is [8000-FFFF]:0000 (0x8000 values)
732  */
733 static u32 qdisc_alloc_handle(struct net_device *dev)
734 {
735 	int i = 0x8000;
736 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
737 
738 	do {
739 		autohandle += TC_H_MAKE(0x10000U, 0);
740 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
741 			autohandle = TC_H_MAKE(0x80000000U, 0);
742 		if (!qdisc_lookup(dev, autohandle))
743 			return autohandle;
744 		cond_resched();
745 	} while	(--i > 0);
746 
747 	return 0;
748 }
749 
750 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
751 			       unsigned int len)
752 {
753 	const struct Qdisc_class_ops *cops;
754 	unsigned long cl;
755 	u32 parentid;
756 	int drops;
757 
758 	if (n == 0 && len == 0)
759 		return;
760 	drops = max_t(int, n, 0);
761 	rcu_read_lock();
762 	while ((parentid = sch->parent)) {
763 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
764 			break;
765 
766 		if (sch->flags & TCQ_F_NOPARENT)
767 			break;
768 		/* TODO: perform the search on a per txq basis */
769 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
770 		if (sch == NULL) {
771 			WARN_ON_ONCE(parentid != TC_H_ROOT);
772 			break;
773 		}
774 		cops = sch->ops->cl_ops;
775 		if (cops->qlen_notify) {
776 			cl = cops->get(sch, parentid);
777 			cops->qlen_notify(sch, cl);
778 			cops->put(sch, cl);
779 		}
780 		sch->q.qlen -= n;
781 		sch->qstats.backlog -= len;
782 		__qdisc_qstats_drop(sch, drops);
783 	}
784 	rcu_read_unlock();
785 }
786 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
787 
788 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
789 			       struct nlmsghdr *n, u32 clid,
790 			       struct Qdisc *old, struct Qdisc *new)
791 {
792 	if (new || old)
793 		qdisc_notify(net, skb, n, clid, old, new);
794 
795 	if (old)
796 		qdisc_destroy(old);
797 }
798 
799 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
800  * to device "dev".
801  *
802  * When appropriate send a netlink notification using 'skb'
803  * and "n".
804  *
805  * On success, destroy old qdisc.
806  */
807 
808 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
809 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
810 		       struct Qdisc *new, struct Qdisc *old)
811 {
812 	struct Qdisc *q = old;
813 	struct net *net = dev_net(dev);
814 	int err = 0;
815 
816 	if (parent == NULL) {
817 		unsigned int i, num_q, ingress;
818 
819 		ingress = 0;
820 		num_q = dev->num_tx_queues;
821 		if ((q && q->flags & TCQ_F_INGRESS) ||
822 		    (new && new->flags & TCQ_F_INGRESS)) {
823 			num_q = 1;
824 			ingress = 1;
825 			if (!dev_ingress_queue(dev))
826 				return -ENOENT;
827 		}
828 
829 		if (dev->flags & IFF_UP)
830 			dev_deactivate(dev);
831 
832 		if (new && new->ops->attach)
833 			goto skip;
834 
835 		for (i = 0; i < num_q; i++) {
836 			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
837 
838 			if (!ingress)
839 				dev_queue = netdev_get_tx_queue(dev, i);
840 
841 			old = dev_graft_qdisc(dev_queue, new);
842 			if (new && i > 0)
843 				atomic_inc(&new->refcnt);
844 
845 			if (!ingress)
846 				qdisc_destroy(old);
847 		}
848 
849 skip:
850 		if (!ingress) {
851 			notify_and_destroy(net, skb, n, classid,
852 					   dev->qdisc, new);
853 			if (new && !new->ops->attach)
854 				atomic_inc(&new->refcnt);
855 			dev->qdisc = new ? : &noop_qdisc;
856 
857 			if (new && new->ops->attach)
858 				new->ops->attach(new);
859 		} else {
860 			notify_and_destroy(net, skb, n, classid, old, new);
861 		}
862 
863 		if (dev->flags & IFF_UP)
864 			dev_activate(dev);
865 	} else {
866 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
867 
868 		err = -EOPNOTSUPP;
869 		if (cops && cops->graft) {
870 			unsigned long cl = cops->get(parent, classid);
871 			if (cl) {
872 				err = cops->graft(parent, cl, new, &old);
873 				cops->put(parent, cl);
874 			} else
875 				err = -ENOENT;
876 		}
877 		if (!err)
878 			notify_and_destroy(net, skb, n, classid, old, new);
879 	}
880 	return err;
881 }
882 
883 /* lockdep annotation is needed for ingress; egress gets it only for name */
884 static struct lock_class_key qdisc_tx_lock;
885 static struct lock_class_key qdisc_rx_lock;
886 
887 /*
888    Allocate and initialize new qdisc.
889 
890    Parameters are passed via opt.
891  */
892 
893 static struct Qdisc *qdisc_create(struct net_device *dev,
894 				  struct netdev_queue *dev_queue,
895 				  struct Qdisc *p, u32 parent, u32 handle,
896 				  struct nlattr **tca, int *errp)
897 {
898 	int err;
899 	struct nlattr *kind = tca[TCA_KIND];
900 	struct Qdisc *sch;
901 	struct Qdisc_ops *ops;
902 	struct qdisc_size_table *stab;
903 
904 	ops = qdisc_lookup_ops(kind);
905 #ifdef CONFIG_MODULES
906 	if (ops == NULL && kind != NULL) {
907 		char name[IFNAMSIZ];
908 		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
909 			/* We dropped the RTNL semaphore in order to
910 			 * perform the module load.  So, even if we
911 			 * succeeded in loading the module we have to
912 			 * tell the caller to replay the request.  We
913 			 * indicate this using -EAGAIN.
914 			 * We replay the request because the device may
915 			 * go away in the mean time.
916 			 */
917 			rtnl_unlock();
918 			request_module("sch_%s", name);
919 			rtnl_lock();
920 			ops = qdisc_lookup_ops(kind);
921 			if (ops != NULL) {
922 				/* We will try again qdisc_lookup_ops,
923 				 * so don't keep a reference.
924 				 */
925 				module_put(ops->owner);
926 				err = -EAGAIN;
927 				goto err_out;
928 			}
929 		}
930 	}
931 #endif
932 
933 	err = -ENOENT;
934 	if (ops == NULL)
935 		goto err_out;
936 
937 	sch = qdisc_alloc(dev_queue, ops);
938 	if (IS_ERR(sch)) {
939 		err = PTR_ERR(sch);
940 		goto err_out2;
941 	}
942 
943 	sch->parent = parent;
944 
945 	if (handle == TC_H_INGRESS) {
946 		sch->flags |= TCQ_F_INGRESS;
947 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
948 		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
949 	} else {
950 		if (handle == 0) {
951 			handle = qdisc_alloc_handle(dev);
952 			err = -ENOMEM;
953 			if (handle == 0)
954 				goto err_out3;
955 		}
956 		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
957 		if (!netif_is_multiqueue(dev))
958 			sch->flags |= TCQ_F_ONETXQUEUE;
959 	}
960 
961 	sch->handle = handle;
962 
963 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
964 		if (qdisc_is_percpu_stats(sch)) {
965 			sch->cpu_bstats =
966 				netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
967 			if (!sch->cpu_bstats)
968 				goto err_out4;
969 
970 			sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
971 			if (!sch->cpu_qstats)
972 				goto err_out4;
973 		}
974 
975 		if (tca[TCA_STAB]) {
976 			stab = qdisc_get_stab(tca[TCA_STAB]);
977 			if (IS_ERR(stab)) {
978 				err = PTR_ERR(stab);
979 				goto err_out4;
980 			}
981 			rcu_assign_pointer(sch->stab, stab);
982 		}
983 		if (tca[TCA_RATE]) {
984 			seqcount_t *running;
985 
986 			err = -EOPNOTSUPP;
987 			if (sch->flags & TCQ_F_MQROOT)
988 				goto err_out4;
989 
990 			if ((sch->parent != TC_H_ROOT) &&
991 			    !(sch->flags & TCQ_F_INGRESS) &&
992 			    (!p || !(p->flags & TCQ_F_MQROOT)))
993 				running = qdisc_root_sleeping_running(sch);
994 			else
995 				running = &sch->running;
996 
997 			err = gen_new_estimator(&sch->bstats,
998 						sch->cpu_bstats,
999 						&sch->rate_est,
1000 						NULL,
1001 						running,
1002 						tca[TCA_RATE]);
1003 			if (err)
1004 				goto err_out4;
1005 		}
1006 
1007 		qdisc_hash_add(sch);
1008 
1009 		return sch;
1010 	}
1011 err_out3:
1012 	dev_put(dev);
1013 	kfree((char *) sch - sch->padded);
1014 err_out2:
1015 	module_put(ops->owner);
1016 err_out:
1017 	*errp = err;
1018 	return NULL;
1019 
1020 err_out4:
1021 	free_percpu(sch->cpu_bstats);
1022 	free_percpu(sch->cpu_qstats);
1023 	/*
1024 	 * Any broken qdiscs that would require a ops->reset() here?
1025 	 * The qdisc was never in action so it shouldn't be necessary.
1026 	 */
1027 	qdisc_put_stab(rtnl_dereference(sch->stab));
1028 	if (ops->destroy)
1029 		ops->destroy(sch);
1030 	goto err_out3;
1031 }
1032 
1033 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1034 {
1035 	struct qdisc_size_table *ostab, *stab = NULL;
1036 	int err = 0;
1037 
1038 	if (tca[TCA_OPTIONS]) {
1039 		if (sch->ops->change == NULL)
1040 			return -EINVAL;
1041 		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1042 		if (err)
1043 			return err;
1044 	}
1045 
1046 	if (tca[TCA_STAB]) {
1047 		stab = qdisc_get_stab(tca[TCA_STAB]);
1048 		if (IS_ERR(stab))
1049 			return PTR_ERR(stab);
1050 	}
1051 
1052 	ostab = rtnl_dereference(sch->stab);
1053 	rcu_assign_pointer(sch->stab, stab);
1054 	qdisc_put_stab(ostab);
1055 
1056 	if (tca[TCA_RATE]) {
1057 		/* NB: ignores errors from replace_estimator
1058 		   because change can't be undone. */
1059 		if (sch->flags & TCQ_F_MQROOT)
1060 			goto out;
1061 		gen_replace_estimator(&sch->bstats,
1062 				      sch->cpu_bstats,
1063 				      &sch->rate_est,
1064 				      NULL,
1065 				      qdisc_root_sleeping_running(sch),
1066 				      tca[TCA_RATE]);
1067 	}
1068 out:
1069 	return 0;
1070 }
1071 
1072 struct check_loop_arg {
1073 	struct qdisc_walker	w;
1074 	struct Qdisc		*p;
1075 	int			depth;
1076 };
1077 
1078 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1079 			 struct qdisc_walker *w);
1080 
1081 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1082 {
1083 	struct check_loop_arg	arg;
1084 
1085 	if (q->ops->cl_ops == NULL)
1086 		return 0;
1087 
1088 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1089 	arg.w.fn = check_loop_fn;
1090 	arg.depth = depth;
1091 	arg.p = p;
1092 	q->ops->cl_ops->walk(q, &arg.w);
1093 	return arg.w.stop ? -ELOOP : 0;
1094 }
1095 
1096 static int
1097 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1098 {
1099 	struct Qdisc *leaf;
1100 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1101 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1102 
1103 	leaf = cops->leaf(q, cl);
1104 	if (leaf) {
1105 		if (leaf == arg->p || arg->depth > 7)
1106 			return -ELOOP;
1107 		return check_loop(leaf, arg->p, arg->depth + 1);
1108 	}
1109 	return 0;
1110 }
1111 
1112 /*
1113  * Delete/get qdisc.
1114  */
1115 
1116 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1117 {
1118 	struct net *net = sock_net(skb->sk);
1119 	struct tcmsg *tcm = nlmsg_data(n);
1120 	struct nlattr *tca[TCA_MAX + 1];
1121 	struct net_device *dev;
1122 	u32 clid;
1123 	struct Qdisc *q = NULL;
1124 	struct Qdisc *p = NULL;
1125 	int err;
1126 
1127 	if ((n->nlmsg_type != RTM_GETQDISC) &&
1128 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1129 		return -EPERM;
1130 
1131 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1132 	if (err < 0)
1133 		return err;
1134 
1135 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1136 	if (!dev)
1137 		return -ENODEV;
1138 
1139 	clid = tcm->tcm_parent;
1140 	if (clid) {
1141 		if (clid != TC_H_ROOT) {
1142 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1143 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1144 				if (!p)
1145 					return -ENOENT;
1146 				q = qdisc_leaf(p, clid);
1147 			} else if (dev_ingress_queue(dev)) {
1148 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1149 			}
1150 		} else {
1151 			q = dev->qdisc;
1152 		}
1153 		if (!q)
1154 			return -ENOENT;
1155 
1156 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1157 			return -EINVAL;
1158 	} else {
1159 		q = qdisc_lookup(dev, tcm->tcm_handle);
1160 		if (!q)
1161 			return -ENOENT;
1162 	}
1163 
1164 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1165 		return -EINVAL;
1166 
1167 	if (n->nlmsg_type == RTM_DELQDISC) {
1168 		if (!clid)
1169 			return -EINVAL;
1170 		if (q->handle == 0)
1171 			return -ENOENT;
1172 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1173 		if (err != 0)
1174 			return err;
1175 	} else {
1176 		qdisc_notify(net, skb, n, clid, NULL, q);
1177 	}
1178 	return 0;
1179 }
1180 
1181 /*
1182  * Create/change qdisc.
1183  */
1184 
1185 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1186 {
1187 	struct net *net = sock_net(skb->sk);
1188 	struct tcmsg *tcm;
1189 	struct nlattr *tca[TCA_MAX + 1];
1190 	struct net_device *dev;
1191 	u32 clid;
1192 	struct Qdisc *q, *p;
1193 	int err;
1194 
1195 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1196 		return -EPERM;
1197 
1198 replay:
1199 	/* Reinit, just in case something touches this. */
1200 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1201 	if (err < 0)
1202 		return err;
1203 
1204 	tcm = nlmsg_data(n);
1205 	clid = tcm->tcm_parent;
1206 	q = p = NULL;
1207 
1208 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1209 	if (!dev)
1210 		return -ENODEV;
1211 
1212 
1213 	if (clid) {
1214 		if (clid != TC_H_ROOT) {
1215 			if (clid != TC_H_INGRESS) {
1216 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1217 				if (!p)
1218 					return -ENOENT;
1219 				q = qdisc_leaf(p, clid);
1220 			} else if (dev_ingress_queue_create(dev)) {
1221 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1222 			}
1223 		} else {
1224 			q = dev->qdisc;
1225 		}
1226 
1227 		/* It may be default qdisc, ignore it */
1228 		if (q && q->handle == 0)
1229 			q = NULL;
1230 
1231 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1232 			if (tcm->tcm_handle) {
1233 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1234 					return -EEXIST;
1235 				if (TC_H_MIN(tcm->tcm_handle))
1236 					return -EINVAL;
1237 				q = qdisc_lookup(dev, tcm->tcm_handle);
1238 				if (!q)
1239 					goto create_n_graft;
1240 				if (n->nlmsg_flags & NLM_F_EXCL)
1241 					return -EEXIST;
1242 				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1243 					return -EINVAL;
1244 				if (q == p ||
1245 				    (p && check_loop(q, p, 0)))
1246 					return -ELOOP;
1247 				atomic_inc(&q->refcnt);
1248 				goto graft;
1249 			} else {
1250 				if (!q)
1251 					goto create_n_graft;
1252 
1253 				/* This magic test requires explanation.
1254 				 *
1255 				 *   We know, that some child q is already
1256 				 *   attached to this parent and have choice:
1257 				 *   either to change it or to create/graft new one.
1258 				 *
1259 				 *   1. We are allowed to create/graft only
1260 				 *   if CREATE and REPLACE flags are set.
1261 				 *
1262 				 *   2. If EXCL is set, requestor wanted to say,
1263 				 *   that qdisc tcm_handle is not expected
1264 				 *   to exist, so that we choose create/graft too.
1265 				 *
1266 				 *   3. The last case is when no flags are set.
1267 				 *   Alas, it is sort of hole in API, we
1268 				 *   cannot decide what to do unambiguously.
1269 				 *   For now we select create/graft, if
1270 				 *   user gave KIND, which does not match existing.
1271 				 */
1272 				if ((n->nlmsg_flags & NLM_F_CREATE) &&
1273 				    (n->nlmsg_flags & NLM_F_REPLACE) &&
1274 				    ((n->nlmsg_flags & NLM_F_EXCL) ||
1275 				     (tca[TCA_KIND] &&
1276 				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
1277 					goto create_n_graft;
1278 			}
1279 		}
1280 	} else {
1281 		if (!tcm->tcm_handle)
1282 			return -EINVAL;
1283 		q = qdisc_lookup(dev, tcm->tcm_handle);
1284 	}
1285 
1286 	/* Change qdisc parameters */
1287 	if (q == NULL)
1288 		return -ENOENT;
1289 	if (n->nlmsg_flags & NLM_F_EXCL)
1290 		return -EEXIST;
1291 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1292 		return -EINVAL;
1293 	err = qdisc_change(q, tca);
1294 	if (err == 0)
1295 		qdisc_notify(net, skb, n, clid, NULL, q);
1296 	return err;
1297 
1298 create_n_graft:
1299 	if (!(n->nlmsg_flags & NLM_F_CREATE))
1300 		return -ENOENT;
1301 	if (clid == TC_H_INGRESS) {
1302 		if (dev_ingress_queue(dev))
1303 			q = qdisc_create(dev, dev_ingress_queue(dev), p,
1304 					 tcm->tcm_parent, tcm->tcm_parent,
1305 					 tca, &err);
1306 		else
1307 			err = -ENOENT;
1308 	} else {
1309 		struct netdev_queue *dev_queue;
1310 
1311 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1312 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1313 		else if (p)
1314 			dev_queue = p->dev_queue;
1315 		else
1316 			dev_queue = netdev_get_tx_queue(dev, 0);
1317 
1318 		q = qdisc_create(dev, dev_queue, p,
1319 				 tcm->tcm_parent, tcm->tcm_handle,
1320 				 tca, &err);
1321 	}
1322 	if (q == NULL) {
1323 		if (err == -EAGAIN)
1324 			goto replay;
1325 		return err;
1326 	}
1327 
1328 graft:
1329 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1330 	if (err) {
1331 		if (q)
1332 			qdisc_destroy(q);
1333 		return err;
1334 	}
1335 
1336 	return 0;
1337 }
1338 
1339 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1340 			 u32 portid, u32 seq, u16 flags, int event)
1341 {
1342 	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
1343 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
1344 	struct tcmsg *tcm;
1345 	struct nlmsghdr  *nlh;
1346 	unsigned char *b = skb_tail_pointer(skb);
1347 	struct gnet_dump d;
1348 	struct qdisc_size_table *stab;
1349 	__u32 qlen;
1350 
1351 	cond_resched();
1352 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1353 	if (!nlh)
1354 		goto out_nlmsg_trim;
1355 	tcm = nlmsg_data(nlh);
1356 	tcm->tcm_family = AF_UNSPEC;
1357 	tcm->tcm__pad1 = 0;
1358 	tcm->tcm__pad2 = 0;
1359 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1360 	tcm->tcm_parent = clid;
1361 	tcm->tcm_handle = q->handle;
1362 	tcm->tcm_info = atomic_read(&q->refcnt);
1363 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1364 		goto nla_put_failure;
1365 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
1366 		goto nla_put_failure;
1367 	qlen = q->q.qlen;
1368 
1369 	stab = rtnl_dereference(q->stab);
1370 	if (stab && qdisc_dump_stab(skb, stab) < 0)
1371 		goto nla_put_failure;
1372 
1373 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1374 					 NULL, &d, TCA_PAD) < 0)
1375 		goto nla_put_failure;
1376 
1377 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1378 		goto nla_put_failure;
1379 
1380 	if (qdisc_is_percpu_stats(q)) {
1381 		cpu_bstats = q->cpu_bstats;
1382 		cpu_qstats = q->cpu_qstats;
1383 	}
1384 
1385 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
1386 				  &d, cpu_bstats, &q->bstats) < 0 ||
1387 	    gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
1388 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
1389 		goto nla_put_failure;
1390 
1391 	if (gnet_stats_finish_copy(&d) < 0)
1392 		goto nla_put_failure;
1393 
1394 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1395 	return skb->len;
1396 
1397 out_nlmsg_trim:
1398 nla_put_failure:
1399 	nlmsg_trim(skb, b);
1400 	return -1;
1401 }
1402 
1403 static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1404 {
1405 	return (q->flags & TCQ_F_BUILTIN) ? true : false;
1406 }
1407 
1408 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1409 			struct nlmsghdr *n, u32 clid,
1410 			struct Qdisc *old, struct Qdisc *new)
1411 {
1412 	struct sk_buff *skb;
1413 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1414 
1415 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1416 	if (!skb)
1417 		return -ENOBUFS;
1418 
1419 	if (old && !tc_qdisc_dump_ignore(old)) {
1420 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1421 				  0, RTM_DELQDISC) < 0)
1422 			goto err_out;
1423 	}
1424 	if (new && !tc_qdisc_dump_ignore(new)) {
1425 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1426 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1427 			goto err_out;
1428 	}
1429 
1430 	if (skb->len)
1431 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1432 				      n->nlmsg_flags & NLM_F_ECHO);
1433 
1434 err_out:
1435 	kfree_skb(skb);
1436 	return -EINVAL;
1437 }
1438 
1439 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1440 			      struct netlink_callback *cb,
1441 			      int *q_idx_p, int s_q_idx, bool recur)
1442 {
1443 	int ret = 0, q_idx = *q_idx_p;
1444 	struct Qdisc *q;
1445 	int b;
1446 
1447 	if (!root)
1448 		return 0;
1449 
1450 	q = root;
1451 	if (q_idx < s_q_idx) {
1452 		q_idx++;
1453 	} else {
1454 		if (!tc_qdisc_dump_ignore(q) &&
1455 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1456 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1457 				  RTM_NEWQDISC) <= 0)
1458 			goto done;
1459 		q_idx++;
1460 	}
1461 
1462 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1463 	 * itself has already been dumped.
1464 	 *
1465 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1466 	 * qdisc hashtable, we don't want to hit it again
1467 	 */
1468 	if (!qdisc_dev(root) || !recur)
1469 		goto out;
1470 
1471 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1472 		if (q_idx < s_q_idx) {
1473 			q_idx++;
1474 			continue;
1475 		}
1476 		if (!tc_qdisc_dump_ignore(q) &&
1477 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1478 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1479 				  RTM_NEWQDISC) <= 0)
1480 			goto done;
1481 		q_idx++;
1482 	}
1483 
1484 out:
1485 	*q_idx_p = q_idx;
1486 	return ret;
1487 done:
1488 	ret = -1;
1489 	goto out;
1490 }
1491 
1492 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1493 {
1494 	struct net *net = sock_net(skb->sk);
1495 	int idx, q_idx;
1496 	int s_idx, s_q_idx;
1497 	struct net_device *dev;
1498 
1499 	s_idx = cb->args[0];
1500 	s_q_idx = q_idx = cb->args[1];
1501 
1502 	idx = 0;
1503 	ASSERT_RTNL();
1504 	for_each_netdev(net, dev) {
1505 		struct netdev_queue *dev_queue;
1506 
1507 		if (idx < s_idx)
1508 			goto cont;
1509 		if (idx > s_idx)
1510 			s_q_idx = 0;
1511 		q_idx = 0;
1512 
1513 		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1514 				       true) < 0)
1515 			goto done;
1516 
1517 		dev_queue = dev_ingress_queue(dev);
1518 		if (dev_queue &&
1519 		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1520 				       &q_idx, s_q_idx, false) < 0)
1521 			goto done;
1522 
1523 cont:
1524 		idx++;
1525 	}
1526 
1527 done:
1528 	cb->args[0] = idx;
1529 	cb->args[1] = q_idx;
1530 
1531 	return skb->len;
1532 }
1533 
1534 
1535 
1536 /************************************************
1537  *	Traffic classes manipulation.		*
1538  ************************************************/
1539 
1540 
1541 
1542 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
1543 {
1544 	struct net *net = sock_net(skb->sk);
1545 	struct tcmsg *tcm = nlmsg_data(n);
1546 	struct nlattr *tca[TCA_MAX + 1];
1547 	struct net_device *dev;
1548 	struct Qdisc *q = NULL;
1549 	const struct Qdisc_class_ops *cops;
1550 	unsigned long cl = 0;
1551 	unsigned long new_cl;
1552 	u32 portid;
1553 	u32 clid;
1554 	u32 qid;
1555 	int err;
1556 
1557 	if ((n->nlmsg_type != RTM_GETTCLASS) &&
1558 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1559 		return -EPERM;
1560 
1561 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1562 	if (err < 0)
1563 		return err;
1564 
1565 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1566 	if (!dev)
1567 		return -ENODEV;
1568 
1569 	/*
1570 	   parent == TC_H_UNSPEC - unspecified parent.
1571 	   parent == TC_H_ROOT   - class is root, which has no parent.
1572 	   parent == X:0	 - parent is root class.
1573 	   parent == X:Y	 - parent is a node in hierarchy.
1574 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
1575 
1576 	   handle == 0:0	 - generate handle from kernel pool.
1577 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
1578 	   handle == X:Y	 - clear.
1579 	   handle == X:0	 - root class.
1580 	 */
1581 
1582 	/* Step 1. Determine qdisc handle X:0 */
1583 
1584 	portid = tcm->tcm_parent;
1585 	clid = tcm->tcm_handle;
1586 	qid = TC_H_MAJ(clid);
1587 
1588 	if (portid != TC_H_ROOT) {
1589 		u32 qid1 = TC_H_MAJ(portid);
1590 
1591 		if (qid && qid1) {
1592 			/* If both majors are known, they must be identical. */
1593 			if (qid != qid1)
1594 				return -EINVAL;
1595 		} else if (qid1) {
1596 			qid = qid1;
1597 		} else if (qid == 0)
1598 			qid = dev->qdisc->handle;
1599 
1600 		/* Now qid is genuine qdisc handle consistent
1601 		 * both with parent and child.
1602 		 *
1603 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1604 		 */
1605 		if (portid)
1606 			portid = TC_H_MAKE(qid, portid);
1607 	} else {
1608 		if (qid == 0)
1609 			qid = dev->qdisc->handle;
1610 	}
1611 
1612 	/* OK. Locate qdisc */
1613 	q = qdisc_lookup(dev, qid);
1614 	if (!q)
1615 		return -ENOENT;
1616 
1617 	/* An check that it supports classes */
1618 	cops = q->ops->cl_ops;
1619 	if (cops == NULL)
1620 		return -EINVAL;
1621 
1622 	/* Now try to get class */
1623 	if (clid == 0) {
1624 		if (portid == TC_H_ROOT)
1625 			clid = qid;
1626 	} else
1627 		clid = TC_H_MAKE(qid, clid);
1628 
1629 	if (clid)
1630 		cl = cops->get(q, clid);
1631 
1632 	if (cl == 0) {
1633 		err = -ENOENT;
1634 		if (n->nlmsg_type != RTM_NEWTCLASS ||
1635 		    !(n->nlmsg_flags & NLM_F_CREATE))
1636 			goto out;
1637 	} else {
1638 		switch (n->nlmsg_type) {
1639 		case RTM_NEWTCLASS:
1640 			err = -EEXIST;
1641 			if (n->nlmsg_flags & NLM_F_EXCL)
1642 				goto out;
1643 			break;
1644 		case RTM_DELTCLASS:
1645 			err = -EOPNOTSUPP;
1646 			if (cops->delete)
1647 				err = cops->delete(q, cl);
1648 			if (err == 0)
1649 				tclass_notify(net, skb, n, q, cl,
1650 					      RTM_DELTCLASS);
1651 			goto out;
1652 		case RTM_GETTCLASS:
1653 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1654 			goto out;
1655 		default:
1656 			err = -EINVAL;
1657 			goto out;
1658 		}
1659 	}
1660 
1661 	new_cl = cl;
1662 	err = -EOPNOTSUPP;
1663 	if (cops->change)
1664 		err = cops->change(q, clid, portid, tca, &new_cl);
1665 	if (err == 0)
1666 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1667 
1668 out:
1669 	if (cl)
1670 		cops->put(q, cl);
1671 
1672 	return err;
1673 }
1674 
1675 
1676 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1677 			  unsigned long cl,
1678 			  u32 portid, u32 seq, u16 flags, int event)
1679 {
1680 	struct tcmsg *tcm;
1681 	struct nlmsghdr  *nlh;
1682 	unsigned char *b = skb_tail_pointer(skb);
1683 	struct gnet_dump d;
1684 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1685 
1686 	cond_resched();
1687 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1688 	if (!nlh)
1689 		goto out_nlmsg_trim;
1690 	tcm = nlmsg_data(nlh);
1691 	tcm->tcm_family = AF_UNSPEC;
1692 	tcm->tcm__pad1 = 0;
1693 	tcm->tcm__pad2 = 0;
1694 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1695 	tcm->tcm_parent = q->handle;
1696 	tcm->tcm_handle = q->handle;
1697 	tcm->tcm_info = 0;
1698 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1699 		goto nla_put_failure;
1700 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1701 		goto nla_put_failure;
1702 
1703 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1704 					 NULL, &d, TCA_PAD) < 0)
1705 		goto nla_put_failure;
1706 
1707 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1708 		goto nla_put_failure;
1709 
1710 	if (gnet_stats_finish_copy(&d) < 0)
1711 		goto nla_put_failure;
1712 
1713 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1714 	return skb->len;
1715 
1716 out_nlmsg_trim:
1717 nla_put_failure:
1718 	nlmsg_trim(skb, b);
1719 	return -1;
1720 }
1721 
1722 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1723 			 struct nlmsghdr *n, struct Qdisc *q,
1724 			 unsigned long cl, int event)
1725 {
1726 	struct sk_buff *skb;
1727 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1728 
1729 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1730 	if (!skb)
1731 		return -ENOBUFS;
1732 
1733 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1734 		kfree_skb(skb);
1735 		return -EINVAL;
1736 	}
1737 
1738 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1739 			      n->nlmsg_flags & NLM_F_ECHO);
1740 }
1741 
1742 struct qdisc_dump_args {
1743 	struct qdisc_walker	w;
1744 	struct sk_buff		*skb;
1745 	struct netlink_callback	*cb;
1746 };
1747 
1748 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1749 			    struct qdisc_walker *arg)
1750 {
1751 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1752 
1753 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1754 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1755 			      RTM_NEWTCLASS);
1756 }
1757 
1758 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1759 				struct tcmsg *tcm, struct netlink_callback *cb,
1760 				int *t_p, int s_t)
1761 {
1762 	struct qdisc_dump_args arg;
1763 
1764 	if (tc_qdisc_dump_ignore(q) ||
1765 	    *t_p < s_t || !q->ops->cl_ops ||
1766 	    (tcm->tcm_parent &&
1767 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1768 		(*t_p)++;
1769 		return 0;
1770 	}
1771 	if (*t_p > s_t)
1772 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1773 	arg.w.fn = qdisc_class_dump;
1774 	arg.skb = skb;
1775 	arg.cb = cb;
1776 	arg.w.stop  = 0;
1777 	arg.w.skip = cb->args[1];
1778 	arg.w.count = 0;
1779 	q->ops->cl_ops->walk(q, &arg.w);
1780 	cb->args[1] = arg.w.count;
1781 	if (arg.w.stop)
1782 		return -1;
1783 	(*t_p)++;
1784 	return 0;
1785 }
1786 
1787 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1788 			       struct tcmsg *tcm, struct netlink_callback *cb,
1789 			       int *t_p, int s_t)
1790 {
1791 	struct Qdisc *q;
1792 	int b;
1793 
1794 	if (!root)
1795 		return 0;
1796 
1797 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1798 		return -1;
1799 
1800 	if (!qdisc_dev(root))
1801 		return 0;
1802 
1803 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1804 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1805 			return -1;
1806 	}
1807 
1808 	return 0;
1809 }
1810 
1811 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1812 {
1813 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
1814 	struct net *net = sock_net(skb->sk);
1815 	struct netdev_queue *dev_queue;
1816 	struct net_device *dev;
1817 	int t, s_t;
1818 
1819 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1820 		return 0;
1821 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
1822 	if (!dev)
1823 		return 0;
1824 
1825 	s_t = cb->args[0];
1826 	t = 0;
1827 
1828 	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1829 		goto done;
1830 
1831 	dev_queue = dev_ingress_queue(dev);
1832 	if (dev_queue &&
1833 	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1834 				&t, s_t) < 0)
1835 		goto done;
1836 
1837 done:
1838 	cb->args[0] = t;
1839 
1840 	dev_put(dev);
1841 	return skb->len;
1842 }
1843 
1844 /* Main classifier routine: scans classifier chain attached
1845  * to this qdisc, (optionally) tests for protocol and asks
1846  * specific classifiers.
1847  */
1848 int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1849 		struct tcf_result *res, bool compat_mode)
1850 {
1851 	__be16 protocol = tc_skb_protocol(skb);
1852 #ifdef CONFIG_NET_CLS_ACT
1853 	const struct tcf_proto *old_tp = tp;
1854 	int limit = 0;
1855 
1856 reclassify:
1857 #endif
1858 	for (; tp; tp = rcu_dereference_bh(tp->next)) {
1859 		int err;
1860 
1861 		if (tp->protocol != protocol &&
1862 		    tp->protocol != htons(ETH_P_ALL))
1863 			continue;
1864 
1865 		err = tp->classify(skb, tp, res);
1866 #ifdef CONFIG_NET_CLS_ACT
1867 		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
1868 			goto reset;
1869 #endif
1870 		if (err >= 0)
1871 			return err;
1872 	}
1873 
1874 	return TC_ACT_UNSPEC; /* signal: continue lookup */
1875 #ifdef CONFIG_NET_CLS_ACT
1876 reset:
1877 	if (unlikely(limit++ >= MAX_REC_LOOP)) {
1878 		net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
1879 				       tp->q->ops->id, tp->prio & 0xffff,
1880 				       ntohs(tp->protocol));
1881 		return TC_ACT_SHOT;
1882 	}
1883 
1884 	tp = old_tp;
1885 	protocol = tc_skb_protocol(skb);
1886 	goto reclassify;
1887 #endif
1888 }
1889 EXPORT_SYMBOL(tc_classify);
1890 
1891 bool tcf_destroy(struct tcf_proto *tp, bool force)
1892 {
1893 	if (tp->ops->destroy(tp, force)) {
1894 		module_put(tp->ops->owner);
1895 		kfree_rcu(tp, rcu);
1896 		return true;
1897 	}
1898 
1899 	return false;
1900 }
1901 
1902 void tcf_destroy_chain(struct tcf_proto __rcu **fl)
1903 {
1904 	struct tcf_proto *tp;
1905 
1906 	while ((tp = rtnl_dereference(*fl)) != NULL) {
1907 		RCU_INIT_POINTER(*fl, tp->next);
1908 		tcf_destroy(tp, true);
1909 	}
1910 }
1911 EXPORT_SYMBOL(tcf_destroy_chain);
1912 
1913 #ifdef CONFIG_PROC_FS
1914 static int psched_show(struct seq_file *seq, void *v)
1915 {
1916 	seq_printf(seq, "%08x %08x %08x %08x\n",
1917 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1918 		   1000000,
1919 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
1920 
1921 	return 0;
1922 }
1923 
1924 static int psched_open(struct inode *inode, struct file *file)
1925 {
1926 	return single_open(file, psched_show, NULL);
1927 }
1928 
1929 static const struct file_operations psched_fops = {
1930 	.owner = THIS_MODULE,
1931 	.open = psched_open,
1932 	.read  = seq_read,
1933 	.llseek = seq_lseek,
1934 	.release = single_release,
1935 };
1936 
1937 static int __net_init psched_net_init(struct net *net)
1938 {
1939 	struct proc_dir_entry *e;
1940 
1941 	e = proc_create("psched", 0, net->proc_net, &psched_fops);
1942 	if (e == NULL)
1943 		return -ENOMEM;
1944 
1945 	return 0;
1946 }
1947 
1948 static void __net_exit psched_net_exit(struct net *net)
1949 {
1950 	remove_proc_entry("psched", net->proc_net);
1951 }
1952 #else
1953 static int __net_init psched_net_init(struct net *net)
1954 {
1955 	return 0;
1956 }
1957 
1958 static void __net_exit psched_net_exit(struct net *net)
1959 {
1960 }
1961 #endif
1962 
1963 static struct pernet_operations psched_net_ops = {
1964 	.init = psched_net_init,
1965 	.exit = psched_net_exit,
1966 };
1967 
1968 static int __init pktsched_init(void)
1969 {
1970 	int err;
1971 
1972 	err = register_pernet_subsys(&psched_net_ops);
1973 	if (err) {
1974 		pr_err("pktsched_init: "
1975 		       "cannot initialize per netns operations\n");
1976 		return err;
1977 	}
1978 
1979 	register_qdisc(&pfifo_fast_ops);
1980 	register_qdisc(&pfifo_qdisc_ops);
1981 	register_qdisc(&bfifo_qdisc_ops);
1982 	register_qdisc(&pfifo_head_drop_qdisc_ops);
1983 	register_qdisc(&mq_qdisc_ops);
1984 	register_qdisc(&noqueue_qdisc_ops);
1985 
1986 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1987 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1988 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
1989 		      NULL);
1990 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
1991 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
1992 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
1993 		      NULL);
1994 
1995 	return 0;
1996 }
1997 
1998 subsys_initcall(pktsched_init);
1999