xref: /openbmc/linux/net/sched/sch_api.c (revision e4781421e883340b796da5a724bda7226817990b)
1 /*
2  * net/sched/sch_api.c	Packet scheduler API.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17 
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
33 
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 
39 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
40 			struct nlmsghdr *n, u32 clid,
41 			struct Qdisc *old, struct Qdisc *new);
42 static int tclass_notify(struct net *net, struct sk_buff *oskb,
43 			 struct nlmsghdr *n, struct Qdisc *q,
44 			 unsigned long cl, int event);
45 
46 /*
47 
48    Short review.
49    -------------
50 
51    This file consists of two interrelated parts:
52 
53    1. queueing disciplines manager frontend.
54    2. traffic classes manager frontend.
55 
56    Generally, queueing discipline ("qdisc") is a black box,
57    which is able to enqueue packets and to dequeue them (when
58    device is ready to send something) in order and at times
59    determined by algorithm hidden in it.
60 
61    qdisc's are divided to two categories:
62    - "queues", which have no internal structure visible from outside.
63    - "schedulers", which split all the packets to "traffic classes",
64      using "packet classifiers" (look at cls_api.c)
65 
66    In turn, classes may have child qdiscs (as rule, queues)
67    attached to them etc. etc. etc.
68 
69    The goal of the routines in this file is to translate
70    information supplied by user in the form of handles
71    to more intelligible for kernel form, to make some sanity
72    checks and part of work, which is common to all qdiscs
73    and to provide rtnetlink notifications.
74 
75    All real intelligent work is done inside qdisc modules.
76 
77 
78 
79    Every discipline has two major routines: enqueue and dequeue.
80 
81    ---dequeue
82 
83    dequeue usually returns a skb to send. It is allowed to return NULL,
84    but it does not mean that queue is empty, it just means that
85    discipline does not want to send anything this time.
86    Queue is really empty if q->q.qlen == 0.
87    For complicated disciplines with multiple queues q->q is not
88    real packet queue, but however q->q.qlen must be valid.
89 
90    ---enqueue
91 
92    enqueue returns 0, if packet was enqueued successfully.
93    If packet (this one or another one) was dropped, it returns
94    not zero error code.
95    NET_XMIT_DROP 	- this packet dropped
96      Expected action: do not backoff, but wait until queue will clear.
97    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
98      Expected action: backoff or ignore
99 
100    Auxiliary routines:
101 
102    ---peek
103 
104    like dequeue but without removing a packet from the queue
105 
106    ---reset
107 
108    returns qdisc to initial state: purge all buffers, clear all
109    timers, counters (except for statistics) etc.
110 
111    ---init
112 
113    initializes newly created qdisc.
114 
115    ---destroy
116 
117    destroys resources allocated by init and during lifetime of qdisc.
118 
119    ---change
120 
121    changes qdisc parameters.
122  */
123 
124 /* Protects list of registered TC modules. It is pure SMP lock. */
125 static DEFINE_RWLOCK(qdisc_mod_lock);
126 
127 
128 /************************************************
129  *	Queueing disciplines manipulation.	*
130  ************************************************/
131 
132 
133 /* The list of all installed queueing disciplines. */
134 
135 static struct Qdisc_ops *qdisc_base;
136 
137 /* Register/unregister queueing discipline */
138 
139 int register_qdisc(struct Qdisc_ops *qops)
140 {
141 	struct Qdisc_ops *q, **qp;
142 	int rc = -EEXIST;
143 
144 	write_lock(&qdisc_mod_lock);
145 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
146 		if (!strcmp(qops->id, q->id))
147 			goto out;
148 
149 	if (qops->enqueue == NULL)
150 		qops->enqueue = noop_qdisc_ops.enqueue;
151 	if (qops->peek == NULL) {
152 		if (qops->dequeue == NULL)
153 			qops->peek = noop_qdisc_ops.peek;
154 		else
155 			goto out_einval;
156 	}
157 	if (qops->dequeue == NULL)
158 		qops->dequeue = noop_qdisc_ops.dequeue;
159 
160 	if (qops->cl_ops) {
161 		const struct Qdisc_class_ops *cops = qops->cl_ops;
162 
163 		if (!(cops->get && cops->put && cops->walk && cops->leaf))
164 			goto out_einval;
165 
166 		if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
167 			goto out_einval;
168 	}
169 
170 	qops->next = NULL;
171 	*qp = qops;
172 	rc = 0;
173 out:
174 	write_unlock(&qdisc_mod_lock);
175 	return rc;
176 
177 out_einval:
178 	rc = -EINVAL;
179 	goto out;
180 }
181 EXPORT_SYMBOL(register_qdisc);
182 
183 int unregister_qdisc(struct Qdisc_ops *qops)
184 {
185 	struct Qdisc_ops *q, **qp;
186 	int err = -ENOENT;
187 
188 	write_lock(&qdisc_mod_lock);
189 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
190 		if (q == qops)
191 			break;
192 	if (q) {
193 		*qp = q->next;
194 		q->next = NULL;
195 		err = 0;
196 	}
197 	write_unlock(&qdisc_mod_lock);
198 	return err;
199 }
200 EXPORT_SYMBOL(unregister_qdisc);
201 
202 /* Get default qdisc if not otherwise specified */
203 void qdisc_get_default(char *name, size_t len)
204 {
205 	read_lock(&qdisc_mod_lock);
206 	strlcpy(name, default_qdisc_ops->id, len);
207 	read_unlock(&qdisc_mod_lock);
208 }
209 
210 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
211 {
212 	struct Qdisc_ops *q = NULL;
213 
214 	for (q = qdisc_base; q; q = q->next) {
215 		if (!strcmp(name, q->id)) {
216 			if (!try_module_get(q->owner))
217 				q = NULL;
218 			break;
219 		}
220 	}
221 
222 	return q;
223 }
224 
225 /* Set new default qdisc to use */
226 int qdisc_set_default(const char *name)
227 {
228 	const struct Qdisc_ops *ops;
229 
230 	if (!capable(CAP_NET_ADMIN))
231 		return -EPERM;
232 
233 	write_lock(&qdisc_mod_lock);
234 	ops = qdisc_lookup_default(name);
235 	if (!ops) {
236 		/* Not found, drop lock and try to load module */
237 		write_unlock(&qdisc_mod_lock);
238 		request_module("sch_%s", name);
239 		write_lock(&qdisc_mod_lock);
240 
241 		ops = qdisc_lookup_default(name);
242 	}
243 
244 	if (ops) {
245 		/* Set new default */
246 		module_put(default_qdisc_ops->owner);
247 		default_qdisc_ops = ops;
248 	}
249 	write_unlock(&qdisc_mod_lock);
250 
251 	return ops ? 0 : -ENOENT;
252 }
253 
254 /* We know handle. Find qdisc among all qdisc's attached to device
255  * (root qdisc, all its children, children of children etc.)
256  * Note: caller either uses rtnl or rcu_read_lock()
257  */
258 
259 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
260 {
261 	struct Qdisc *q;
262 
263 	if (!qdisc_dev(root))
264 		return (root->handle == handle ? root : NULL);
265 
266 	if (!(root->flags & TCQ_F_BUILTIN) &&
267 	    root->handle == handle)
268 		return root;
269 
270 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
271 		if (q->handle == handle)
272 			return q;
273 	}
274 	return NULL;
275 }
276 
277 void qdisc_hash_add(struct Qdisc *q)
278 {
279 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
280 		struct Qdisc *root = qdisc_dev(q)->qdisc;
281 
282 		WARN_ON_ONCE(root == &noop_qdisc);
283 		ASSERT_RTNL();
284 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
285 	}
286 }
287 EXPORT_SYMBOL(qdisc_hash_add);
288 
289 void qdisc_hash_del(struct Qdisc *q)
290 {
291 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
292 		ASSERT_RTNL();
293 		hash_del_rcu(&q->hash);
294 	}
295 }
296 EXPORT_SYMBOL(qdisc_hash_del);
297 
298 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
299 {
300 	struct Qdisc *q;
301 
302 	q = qdisc_match_from_root(dev->qdisc, handle);
303 	if (q)
304 		goto out;
305 
306 	if (dev_ingress_queue(dev))
307 		q = qdisc_match_from_root(
308 			dev_ingress_queue(dev)->qdisc_sleeping,
309 			handle);
310 out:
311 	return q;
312 }
313 
314 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
315 {
316 	unsigned long cl;
317 	struct Qdisc *leaf;
318 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
319 
320 	if (cops == NULL)
321 		return NULL;
322 	cl = cops->get(p, classid);
323 
324 	if (cl == 0)
325 		return NULL;
326 	leaf = cops->leaf(p, cl);
327 	cops->put(p, cl);
328 	return leaf;
329 }
330 
331 /* Find queueing discipline by name */
332 
333 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
334 {
335 	struct Qdisc_ops *q = NULL;
336 
337 	if (kind) {
338 		read_lock(&qdisc_mod_lock);
339 		for (q = qdisc_base; q; q = q->next) {
340 			if (nla_strcmp(kind, q->id) == 0) {
341 				if (!try_module_get(q->owner))
342 					q = NULL;
343 				break;
344 			}
345 		}
346 		read_unlock(&qdisc_mod_lock);
347 	}
348 	return q;
349 }
350 
351 /* The linklayer setting were not transferred from iproute2, in older
352  * versions, and the rate tables lookup systems have been dropped in
353  * the kernel. To keep backward compatible with older iproute2 tc
354  * utils, we detect the linklayer setting by detecting if the rate
355  * table were modified.
356  *
357  * For linklayer ATM table entries, the rate table will be aligned to
358  * 48 bytes, thus some table entries will contain the same value.  The
359  * mpu (min packet unit) is also encoded into the old rate table, thus
360  * starting from the mpu, we find low and high table entries for
361  * mapping this cell.  If these entries contain the same value, when
362  * the rate tables have been modified for linklayer ATM.
363  *
364  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
365  * and then roundup to the next cell, calc the table entry one below,
366  * and compare.
367  */
368 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
369 {
370 	int low       = roundup(r->mpu, 48);
371 	int high      = roundup(low+1, 48);
372 	int cell_low  = low >> r->cell_log;
373 	int cell_high = (high >> r->cell_log) - 1;
374 
375 	/* rtab is too inaccurate at rates > 100Mbit/s */
376 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
377 		pr_debug("TC linklayer: Giving up ATM detection\n");
378 		return TC_LINKLAYER_ETHERNET;
379 	}
380 
381 	if ((cell_high > cell_low) && (cell_high < 256)
382 	    && (rtab[cell_low] == rtab[cell_high])) {
383 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
384 			 cell_low, cell_high, rtab[cell_high]);
385 		return TC_LINKLAYER_ATM;
386 	}
387 	return TC_LINKLAYER_ETHERNET;
388 }
389 
390 static struct qdisc_rate_table *qdisc_rtab_list;
391 
392 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
393 					struct nlattr *tab)
394 {
395 	struct qdisc_rate_table *rtab;
396 
397 	if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
398 	    nla_len(tab) != TC_RTAB_SIZE)
399 		return NULL;
400 
401 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
402 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
403 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
404 			rtab->refcnt++;
405 			return rtab;
406 		}
407 	}
408 
409 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
410 	if (rtab) {
411 		rtab->rate = *r;
412 		rtab->refcnt = 1;
413 		memcpy(rtab->data, nla_data(tab), 1024);
414 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
415 			r->linklayer = __detect_linklayer(r, rtab->data);
416 		rtab->next = qdisc_rtab_list;
417 		qdisc_rtab_list = rtab;
418 	}
419 	return rtab;
420 }
421 EXPORT_SYMBOL(qdisc_get_rtab);
422 
423 void qdisc_put_rtab(struct qdisc_rate_table *tab)
424 {
425 	struct qdisc_rate_table *rtab, **rtabp;
426 
427 	if (!tab || --tab->refcnt)
428 		return;
429 
430 	for (rtabp = &qdisc_rtab_list;
431 	     (rtab = *rtabp) != NULL;
432 	     rtabp = &rtab->next) {
433 		if (rtab == tab) {
434 			*rtabp = rtab->next;
435 			kfree(rtab);
436 			return;
437 		}
438 	}
439 }
440 EXPORT_SYMBOL(qdisc_put_rtab);
441 
442 static LIST_HEAD(qdisc_stab_list);
443 static DEFINE_SPINLOCK(qdisc_stab_lock);
444 
445 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
446 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
447 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
448 };
449 
450 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
451 {
452 	struct nlattr *tb[TCA_STAB_MAX + 1];
453 	struct qdisc_size_table *stab;
454 	struct tc_sizespec *s;
455 	unsigned int tsize = 0;
456 	u16 *tab = NULL;
457 	int err;
458 
459 	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
460 	if (err < 0)
461 		return ERR_PTR(err);
462 	if (!tb[TCA_STAB_BASE])
463 		return ERR_PTR(-EINVAL);
464 
465 	s = nla_data(tb[TCA_STAB_BASE]);
466 
467 	if (s->tsize > 0) {
468 		if (!tb[TCA_STAB_DATA])
469 			return ERR_PTR(-EINVAL);
470 		tab = nla_data(tb[TCA_STAB_DATA]);
471 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
472 	}
473 
474 	if (tsize != s->tsize || (!tab && tsize > 0))
475 		return ERR_PTR(-EINVAL);
476 
477 	spin_lock(&qdisc_stab_lock);
478 
479 	list_for_each_entry(stab, &qdisc_stab_list, list) {
480 		if (memcmp(&stab->szopts, s, sizeof(*s)))
481 			continue;
482 		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
483 			continue;
484 		stab->refcnt++;
485 		spin_unlock(&qdisc_stab_lock);
486 		return stab;
487 	}
488 
489 	spin_unlock(&qdisc_stab_lock);
490 
491 	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
492 	if (!stab)
493 		return ERR_PTR(-ENOMEM);
494 
495 	stab->refcnt = 1;
496 	stab->szopts = *s;
497 	if (tsize > 0)
498 		memcpy(stab->data, tab, tsize * sizeof(u16));
499 
500 	spin_lock(&qdisc_stab_lock);
501 	list_add_tail(&stab->list, &qdisc_stab_list);
502 	spin_unlock(&qdisc_stab_lock);
503 
504 	return stab;
505 }
506 
507 static void stab_kfree_rcu(struct rcu_head *head)
508 {
509 	kfree(container_of(head, struct qdisc_size_table, rcu));
510 }
511 
512 void qdisc_put_stab(struct qdisc_size_table *tab)
513 {
514 	if (!tab)
515 		return;
516 
517 	spin_lock(&qdisc_stab_lock);
518 
519 	if (--tab->refcnt == 0) {
520 		list_del(&tab->list);
521 		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
522 	}
523 
524 	spin_unlock(&qdisc_stab_lock);
525 }
526 EXPORT_SYMBOL(qdisc_put_stab);
527 
528 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
529 {
530 	struct nlattr *nest;
531 
532 	nest = nla_nest_start(skb, TCA_STAB);
533 	if (nest == NULL)
534 		goto nla_put_failure;
535 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
536 		goto nla_put_failure;
537 	nla_nest_end(skb, nest);
538 
539 	return skb->len;
540 
541 nla_put_failure:
542 	return -1;
543 }
544 
545 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
546 			       const struct qdisc_size_table *stab)
547 {
548 	int pkt_len, slot;
549 
550 	pkt_len = skb->len + stab->szopts.overhead;
551 	if (unlikely(!stab->szopts.tsize))
552 		goto out;
553 
554 	slot = pkt_len + stab->szopts.cell_align;
555 	if (unlikely(slot < 0))
556 		slot = 0;
557 
558 	slot >>= stab->szopts.cell_log;
559 	if (likely(slot < stab->szopts.tsize))
560 		pkt_len = stab->data[slot];
561 	else
562 		pkt_len = stab->data[stab->szopts.tsize - 1] *
563 				(slot / stab->szopts.tsize) +
564 				stab->data[slot % stab->szopts.tsize];
565 
566 	pkt_len <<= stab->szopts.size_log;
567 out:
568 	if (unlikely(pkt_len < 1))
569 		pkt_len = 1;
570 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
571 }
572 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
573 
574 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
575 {
576 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
577 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
578 			txt, qdisc->ops->id, qdisc->handle >> 16);
579 		qdisc->flags |= TCQ_F_WARN_NONWC;
580 	}
581 }
582 EXPORT_SYMBOL(qdisc_warn_nonwc);
583 
584 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
585 {
586 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
587 						 timer);
588 
589 	rcu_read_lock();
590 	__netif_schedule(qdisc_root(wd->qdisc));
591 	rcu_read_unlock();
592 
593 	return HRTIMER_NORESTART;
594 }
595 
596 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
597 {
598 	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
599 	wd->timer.function = qdisc_watchdog;
600 	wd->qdisc = qdisc;
601 }
602 EXPORT_SYMBOL(qdisc_watchdog_init);
603 
604 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
605 {
606 	if (test_bit(__QDISC_STATE_DEACTIVATED,
607 		     &qdisc_root_sleeping(wd->qdisc)->state))
608 		return;
609 
610 	if (wd->last_expires == expires)
611 		return;
612 
613 	wd->last_expires = expires;
614 	hrtimer_start(&wd->timer,
615 		      ns_to_ktime(expires),
616 		      HRTIMER_MODE_ABS_PINNED);
617 }
618 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
619 
620 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
621 {
622 	hrtimer_cancel(&wd->timer);
623 }
624 EXPORT_SYMBOL(qdisc_watchdog_cancel);
625 
626 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
627 {
628 	unsigned int size = n * sizeof(struct hlist_head), i;
629 	struct hlist_head *h;
630 
631 	if (size <= PAGE_SIZE)
632 		h = kmalloc(size, GFP_KERNEL);
633 	else
634 		h = (struct hlist_head *)
635 			__get_free_pages(GFP_KERNEL, get_order(size));
636 
637 	if (h != NULL) {
638 		for (i = 0; i < n; i++)
639 			INIT_HLIST_HEAD(&h[i]);
640 	}
641 	return h;
642 }
643 
644 static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
645 {
646 	unsigned int size = n * sizeof(struct hlist_head);
647 
648 	if (size <= PAGE_SIZE)
649 		kfree(h);
650 	else
651 		free_pages((unsigned long)h, get_order(size));
652 }
653 
654 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
655 {
656 	struct Qdisc_class_common *cl;
657 	struct hlist_node *next;
658 	struct hlist_head *nhash, *ohash;
659 	unsigned int nsize, nmask, osize;
660 	unsigned int i, h;
661 
662 	/* Rehash when load factor exceeds 0.75 */
663 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
664 		return;
665 	nsize = clhash->hashsize * 2;
666 	nmask = nsize - 1;
667 	nhash = qdisc_class_hash_alloc(nsize);
668 	if (nhash == NULL)
669 		return;
670 
671 	ohash = clhash->hash;
672 	osize = clhash->hashsize;
673 
674 	sch_tree_lock(sch);
675 	for (i = 0; i < osize; i++) {
676 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
677 			h = qdisc_class_hash(cl->classid, nmask);
678 			hlist_add_head(&cl->hnode, &nhash[h]);
679 		}
680 	}
681 	clhash->hash     = nhash;
682 	clhash->hashsize = nsize;
683 	clhash->hashmask = nmask;
684 	sch_tree_unlock(sch);
685 
686 	qdisc_class_hash_free(ohash, osize);
687 }
688 EXPORT_SYMBOL(qdisc_class_hash_grow);
689 
690 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
691 {
692 	unsigned int size = 4;
693 
694 	clhash->hash = qdisc_class_hash_alloc(size);
695 	if (clhash->hash == NULL)
696 		return -ENOMEM;
697 	clhash->hashsize  = size;
698 	clhash->hashmask  = size - 1;
699 	clhash->hashelems = 0;
700 	return 0;
701 }
702 EXPORT_SYMBOL(qdisc_class_hash_init);
703 
704 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
705 {
706 	qdisc_class_hash_free(clhash->hash, clhash->hashsize);
707 }
708 EXPORT_SYMBOL(qdisc_class_hash_destroy);
709 
710 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
711 			     struct Qdisc_class_common *cl)
712 {
713 	unsigned int h;
714 
715 	INIT_HLIST_NODE(&cl->hnode);
716 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
717 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
718 	clhash->hashelems++;
719 }
720 EXPORT_SYMBOL(qdisc_class_hash_insert);
721 
722 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
723 			     struct Qdisc_class_common *cl)
724 {
725 	hlist_del(&cl->hnode);
726 	clhash->hashelems--;
727 }
728 EXPORT_SYMBOL(qdisc_class_hash_remove);
729 
730 /* Allocate an unique handle from space managed by kernel
731  * Possible range is [8000-FFFF]:0000 (0x8000 values)
732  */
733 static u32 qdisc_alloc_handle(struct net_device *dev)
734 {
735 	int i = 0x8000;
736 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
737 
738 	do {
739 		autohandle += TC_H_MAKE(0x10000U, 0);
740 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
741 			autohandle = TC_H_MAKE(0x80000000U, 0);
742 		if (!qdisc_lookup(dev, autohandle))
743 			return autohandle;
744 		cond_resched();
745 	} while	(--i > 0);
746 
747 	return 0;
748 }
749 
750 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
751 			       unsigned int len)
752 {
753 	const struct Qdisc_class_ops *cops;
754 	unsigned long cl;
755 	u32 parentid;
756 	int drops;
757 
758 	if (n == 0 && len == 0)
759 		return;
760 	drops = max_t(int, n, 0);
761 	rcu_read_lock();
762 	while ((parentid = sch->parent)) {
763 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
764 			break;
765 
766 		if (sch->flags & TCQ_F_NOPARENT)
767 			break;
768 		/* TODO: perform the search on a per txq basis */
769 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
770 		if (sch == NULL) {
771 			WARN_ON_ONCE(parentid != TC_H_ROOT);
772 			break;
773 		}
774 		cops = sch->ops->cl_ops;
775 		if (cops->qlen_notify) {
776 			cl = cops->get(sch, parentid);
777 			cops->qlen_notify(sch, cl);
778 			cops->put(sch, cl);
779 		}
780 		sch->q.qlen -= n;
781 		sch->qstats.backlog -= len;
782 		__qdisc_qstats_drop(sch, drops);
783 	}
784 	rcu_read_unlock();
785 }
786 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
787 
788 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
789 			       struct nlmsghdr *n, u32 clid,
790 			       struct Qdisc *old, struct Qdisc *new)
791 {
792 	if (new || old)
793 		qdisc_notify(net, skb, n, clid, old, new);
794 
795 	if (old)
796 		qdisc_destroy(old);
797 }
798 
799 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
800  * to device "dev".
801  *
802  * When appropriate send a netlink notification using 'skb'
803  * and "n".
804  *
805  * On success, destroy old qdisc.
806  */
807 
808 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
809 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
810 		       struct Qdisc *new, struct Qdisc *old)
811 {
812 	struct Qdisc *q = old;
813 	struct net *net = dev_net(dev);
814 	int err = 0;
815 
816 	if (parent == NULL) {
817 		unsigned int i, num_q, ingress;
818 
819 		ingress = 0;
820 		num_q = dev->num_tx_queues;
821 		if ((q && q->flags & TCQ_F_INGRESS) ||
822 		    (new && new->flags & TCQ_F_INGRESS)) {
823 			num_q = 1;
824 			ingress = 1;
825 			if (!dev_ingress_queue(dev))
826 				return -ENOENT;
827 		}
828 
829 		if (dev->flags & IFF_UP)
830 			dev_deactivate(dev);
831 
832 		if (new && new->ops->attach)
833 			goto skip;
834 
835 		for (i = 0; i < num_q; i++) {
836 			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
837 
838 			if (!ingress)
839 				dev_queue = netdev_get_tx_queue(dev, i);
840 
841 			old = dev_graft_qdisc(dev_queue, new);
842 			if (new && i > 0)
843 				atomic_inc(&new->refcnt);
844 
845 			if (!ingress)
846 				qdisc_destroy(old);
847 		}
848 
849 skip:
850 		if (!ingress) {
851 			notify_and_destroy(net, skb, n, classid,
852 					   dev->qdisc, new);
853 			if (new && !new->ops->attach)
854 				atomic_inc(&new->refcnt);
855 			dev->qdisc = new ? : &noop_qdisc;
856 
857 			if (new && new->ops->attach)
858 				new->ops->attach(new);
859 		} else {
860 			notify_and_destroy(net, skb, n, classid, old, new);
861 		}
862 
863 		if (dev->flags & IFF_UP)
864 			dev_activate(dev);
865 	} else {
866 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
867 
868 		err = -EOPNOTSUPP;
869 		if (cops && cops->graft) {
870 			unsigned long cl = cops->get(parent, classid);
871 			if (cl) {
872 				err = cops->graft(parent, cl, new, &old);
873 				cops->put(parent, cl);
874 			} else
875 				err = -ENOENT;
876 		}
877 		if (!err)
878 			notify_and_destroy(net, skb, n, classid, old, new);
879 	}
880 	return err;
881 }
882 
883 /* lockdep annotation is needed for ingress; egress gets it only for name */
884 static struct lock_class_key qdisc_tx_lock;
885 static struct lock_class_key qdisc_rx_lock;
886 
887 /*
888    Allocate and initialize new qdisc.
889 
890    Parameters are passed via opt.
891  */
892 
893 static struct Qdisc *qdisc_create(struct net_device *dev,
894 				  struct netdev_queue *dev_queue,
895 				  struct Qdisc *p, u32 parent, u32 handle,
896 				  struct nlattr **tca, int *errp)
897 {
898 	int err;
899 	struct nlattr *kind = tca[TCA_KIND];
900 	struct Qdisc *sch;
901 	struct Qdisc_ops *ops;
902 	struct qdisc_size_table *stab;
903 
904 	ops = qdisc_lookup_ops(kind);
905 #ifdef CONFIG_MODULES
906 	if (ops == NULL && kind != NULL) {
907 		char name[IFNAMSIZ];
908 		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
909 			/* We dropped the RTNL semaphore in order to
910 			 * perform the module load.  So, even if we
911 			 * succeeded in loading the module we have to
912 			 * tell the caller to replay the request.  We
913 			 * indicate this using -EAGAIN.
914 			 * We replay the request because the device may
915 			 * go away in the mean time.
916 			 */
917 			rtnl_unlock();
918 			request_module("sch_%s", name);
919 			rtnl_lock();
920 			ops = qdisc_lookup_ops(kind);
921 			if (ops != NULL) {
922 				/* We will try again qdisc_lookup_ops,
923 				 * so don't keep a reference.
924 				 */
925 				module_put(ops->owner);
926 				err = -EAGAIN;
927 				goto err_out;
928 			}
929 		}
930 	}
931 #endif
932 
933 	err = -ENOENT;
934 	if (ops == NULL)
935 		goto err_out;
936 
937 	sch = qdisc_alloc(dev_queue, ops);
938 	if (IS_ERR(sch)) {
939 		err = PTR_ERR(sch);
940 		goto err_out2;
941 	}
942 
943 	sch->parent = parent;
944 
945 	if (handle == TC_H_INGRESS) {
946 		sch->flags |= TCQ_F_INGRESS;
947 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
948 		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
949 	} else {
950 		if (handle == 0) {
951 			handle = qdisc_alloc_handle(dev);
952 			err = -ENOMEM;
953 			if (handle == 0)
954 				goto err_out3;
955 		}
956 		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
957 		if (!netif_is_multiqueue(dev))
958 			sch->flags |= TCQ_F_ONETXQUEUE;
959 	}
960 
961 	sch->handle = handle;
962 
963 	/* This exist to keep backward compatible with a userspace
964 	 * loophole, what allowed userspace to get IFF_NO_QUEUE
965 	 * facility on older kernels by setting tx_queue_len=0 (prior
966 	 * to qdisc init), and then forgot to reinit tx_queue_len
967 	 * before again attaching a qdisc.
968 	 */
969 	if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
970 		dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
971 		netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
972 	}
973 
974 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
975 		if (qdisc_is_percpu_stats(sch)) {
976 			sch->cpu_bstats =
977 				netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
978 			if (!sch->cpu_bstats)
979 				goto err_out4;
980 
981 			sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
982 			if (!sch->cpu_qstats)
983 				goto err_out4;
984 		}
985 
986 		if (tca[TCA_STAB]) {
987 			stab = qdisc_get_stab(tca[TCA_STAB]);
988 			if (IS_ERR(stab)) {
989 				err = PTR_ERR(stab);
990 				goto err_out4;
991 			}
992 			rcu_assign_pointer(sch->stab, stab);
993 		}
994 		if (tca[TCA_RATE]) {
995 			seqcount_t *running;
996 
997 			err = -EOPNOTSUPP;
998 			if (sch->flags & TCQ_F_MQROOT)
999 				goto err_out4;
1000 
1001 			if ((sch->parent != TC_H_ROOT) &&
1002 			    !(sch->flags & TCQ_F_INGRESS) &&
1003 			    (!p || !(p->flags & TCQ_F_MQROOT)))
1004 				running = qdisc_root_sleeping_running(sch);
1005 			else
1006 				running = &sch->running;
1007 
1008 			err = gen_new_estimator(&sch->bstats,
1009 						sch->cpu_bstats,
1010 						&sch->rate_est,
1011 						NULL,
1012 						running,
1013 						tca[TCA_RATE]);
1014 			if (err)
1015 				goto err_out4;
1016 		}
1017 
1018 		qdisc_hash_add(sch);
1019 
1020 		return sch;
1021 	}
1022 err_out3:
1023 	dev_put(dev);
1024 	kfree((char *) sch - sch->padded);
1025 err_out2:
1026 	module_put(ops->owner);
1027 err_out:
1028 	*errp = err;
1029 	return NULL;
1030 
1031 err_out4:
1032 	free_percpu(sch->cpu_bstats);
1033 	free_percpu(sch->cpu_qstats);
1034 	/*
1035 	 * Any broken qdiscs that would require a ops->reset() here?
1036 	 * The qdisc was never in action so it shouldn't be necessary.
1037 	 */
1038 	qdisc_put_stab(rtnl_dereference(sch->stab));
1039 	if (ops->destroy)
1040 		ops->destroy(sch);
1041 	goto err_out3;
1042 }
1043 
1044 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1045 {
1046 	struct qdisc_size_table *ostab, *stab = NULL;
1047 	int err = 0;
1048 
1049 	if (tca[TCA_OPTIONS]) {
1050 		if (sch->ops->change == NULL)
1051 			return -EINVAL;
1052 		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1053 		if (err)
1054 			return err;
1055 	}
1056 
1057 	if (tca[TCA_STAB]) {
1058 		stab = qdisc_get_stab(tca[TCA_STAB]);
1059 		if (IS_ERR(stab))
1060 			return PTR_ERR(stab);
1061 	}
1062 
1063 	ostab = rtnl_dereference(sch->stab);
1064 	rcu_assign_pointer(sch->stab, stab);
1065 	qdisc_put_stab(ostab);
1066 
1067 	if (tca[TCA_RATE]) {
1068 		/* NB: ignores errors from replace_estimator
1069 		   because change can't be undone. */
1070 		if (sch->flags & TCQ_F_MQROOT)
1071 			goto out;
1072 		gen_replace_estimator(&sch->bstats,
1073 				      sch->cpu_bstats,
1074 				      &sch->rate_est,
1075 				      NULL,
1076 				      qdisc_root_sleeping_running(sch),
1077 				      tca[TCA_RATE]);
1078 	}
1079 out:
1080 	return 0;
1081 }
1082 
1083 struct check_loop_arg {
1084 	struct qdisc_walker	w;
1085 	struct Qdisc		*p;
1086 	int			depth;
1087 };
1088 
1089 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1090 			 struct qdisc_walker *w);
1091 
1092 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1093 {
1094 	struct check_loop_arg	arg;
1095 
1096 	if (q->ops->cl_ops == NULL)
1097 		return 0;
1098 
1099 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1100 	arg.w.fn = check_loop_fn;
1101 	arg.depth = depth;
1102 	arg.p = p;
1103 	q->ops->cl_ops->walk(q, &arg.w);
1104 	return arg.w.stop ? -ELOOP : 0;
1105 }
1106 
1107 static int
1108 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1109 {
1110 	struct Qdisc *leaf;
1111 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1112 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1113 
1114 	leaf = cops->leaf(q, cl);
1115 	if (leaf) {
1116 		if (leaf == arg->p || arg->depth > 7)
1117 			return -ELOOP;
1118 		return check_loop(leaf, arg->p, arg->depth + 1);
1119 	}
1120 	return 0;
1121 }
1122 
1123 /*
1124  * Delete/get qdisc.
1125  */
1126 
1127 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1128 {
1129 	struct net *net = sock_net(skb->sk);
1130 	struct tcmsg *tcm = nlmsg_data(n);
1131 	struct nlattr *tca[TCA_MAX + 1];
1132 	struct net_device *dev;
1133 	u32 clid;
1134 	struct Qdisc *q = NULL;
1135 	struct Qdisc *p = NULL;
1136 	int err;
1137 
1138 	if ((n->nlmsg_type != RTM_GETQDISC) &&
1139 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1140 		return -EPERM;
1141 
1142 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1143 	if (err < 0)
1144 		return err;
1145 
1146 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1147 	if (!dev)
1148 		return -ENODEV;
1149 
1150 	clid = tcm->tcm_parent;
1151 	if (clid) {
1152 		if (clid != TC_H_ROOT) {
1153 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1154 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1155 				if (!p)
1156 					return -ENOENT;
1157 				q = qdisc_leaf(p, clid);
1158 			} else if (dev_ingress_queue(dev)) {
1159 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1160 			}
1161 		} else {
1162 			q = dev->qdisc;
1163 		}
1164 		if (!q)
1165 			return -ENOENT;
1166 
1167 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1168 			return -EINVAL;
1169 	} else {
1170 		q = qdisc_lookup(dev, tcm->tcm_handle);
1171 		if (!q)
1172 			return -ENOENT;
1173 	}
1174 
1175 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1176 		return -EINVAL;
1177 
1178 	if (n->nlmsg_type == RTM_DELQDISC) {
1179 		if (!clid)
1180 			return -EINVAL;
1181 		if (q->handle == 0)
1182 			return -ENOENT;
1183 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1184 		if (err != 0)
1185 			return err;
1186 	} else {
1187 		qdisc_notify(net, skb, n, clid, NULL, q);
1188 	}
1189 	return 0;
1190 }
1191 
1192 /*
1193  * Create/change qdisc.
1194  */
1195 
1196 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1197 {
1198 	struct net *net = sock_net(skb->sk);
1199 	struct tcmsg *tcm;
1200 	struct nlattr *tca[TCA_MAX + 1];
1201 	struct net_device *dev;
1202 	u32 clid;
1203 	struct Qdisc *q, *p;
1204 	int err;
1205 
1206 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1207 		return -EPERM;
1208 
1209 replay:
1210 	/* Reinit, just in case something touches this. */
1211 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1212 	if (err < 0)
1213 		return err;
1214 
1215 	tcm = nlmsg_data(n);
1216 	clid = tcm->tcm_parent;
1217 	q = p = NULL;
1218 
1219 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1220 	if (!dev)
1221 		return -ENODEV;
1222 
1223 
1224 	if (clid) {
1225 		if (clid != TC_H_ROOT) {
1226 			if (clid != TC_H_INGRESS) {
1227 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1228 				if (!p)
1229 					return -ENOENT;
1230 				q = qdisc_leaf(p, clid);
1231 			} else if (dev_ingress_queue_create(dev)) {
1232 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1233 			}
1234 		} else {
1235 			q = dev->qdisc;
1236 		}
1237 
1238 		/* It may be default qdisc, ignore it */
1239 		if (q && q->handle == 0)
1240 			q = NULL;
1241 
1242 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1243 			if (tcm->tcm_handle) {
1244 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1245 					return -EEXIST;
1246 				if (TC_H_MIN(tcm->tcm_handle))
1247 					return -EINVAL;
1248 				q = qdisc_lookup(dev, tcm->tcm_handle);
1249 				if (!q)
1250 					goto create_n_graft;
1251 				if (n->nlmsg_flags & NLM_F_EXCL)
1252 					return -EEXIST;
1253 				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1254 					return -EINVAL;
1255 				if (q == p ||
1256 				    (p && check_loop(q, p, 0)))
1257 					return -ELOOP;
1258 				atomic_inc(&q->refcnt);
1259 				goto graft;
1260 			} else {
1261 				if (!q)
1262 					goto create_n_graft;
1263 
1264 				/* This magic test requires explanation.
1265 				 *
1266 				 *   We know, that some child q is already
1267 				 *   attached to this parent and have choice:
1268 				 *   either to change it or to create/graft new one.
1269 				 *
1270 				 *   1. We are allowed to create/graft only
1271 				 *   if CREATE and REPLACE flags are set.
1272 				 *
1273 				 *   2. If EXCL is set, requestor wanted to say,
1274 				 *   that qdisc tcm_handle is not expected
1275 				 *   to exist, so that we choose create/graft too.
1276 				 *
1277 				 *   3. The last case is when no flags are set.
1278 				 *   Alas, it is sort of hole in API, we
1279 				 *   cannot decide what to do unambiguously.
1280 				 *   For now we select create/graft, if
1281 				 *   user gave KIND, which does not match existing.
1282 				 */
1283 				if ((n->nlmsg_flags & NLM_F_CREATE) &&
1284 				    (n->nlmsg_flags & NLM_F_REPLACE) &&
1285 				    ((n->nlmsg_flags & NLM_F_EXCL) ||
1286 				     (tca[TCA_KIND] &&
1287 				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
1288 					goto create_n_graft;
1289 			}
1290 		}
1291 	} else {
1292 		if (!tcm->tcm_handle)
1293 			return -EINVAL;
1294 		q = qdisc_lookup(dev, tcm->tcm_handle);
1295 	}
1296 
1297 	/* Change qdisc parameters */
1298 	if (q == NULL)
1299 		return -ENOENT;
1300 	if (n->nlmsg_flags & NLM_F_EXCL)
1301 		return -EEXIST;
1302 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1303 		return -EINVAL;
1304 	err = qdisc_change(q, tca);
1305 	if (err == 0)
1306 		qdisc_notify(net, skb, n, clid, NULL, q);
1307 	return err;
1308 
1309 create_n_graft:
1310 	if (!(n->nlmsg_flags & NLM_F_CREATE))
1311 		return -ENOENT;
1312 	if (clid == TC_H_INGRESS) {
1313 		if (dev_ingress_queue(dev))
1314 			q = qdisc_create(dev, dev_ingress_queue(dev), p,
1315 					 tcm->tcm_parent, tcm->tcm_parent,
1316 					 tca, &err);
1317 		else
1318 			err = -ENOENT;
1319 	} else {
1320 		struct netdev_queue *dev_queue;
1321 
1322 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1323 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1324 		else if (p)
1325 			dev_queue = p->dev_queue;
1326 		else
1327 			dev_queue = netdev_get_tx_queue(dev, 0);
1328 
1329 		q = qdisc_create(dev, dev_queue, p,
1330 				 tcm->tcm_parent, tcm->tcm_handle,
1331 				 tca, &err);
1332 	}
1333 	if (q == NULL) {
1334 		if (err == -EAGAIN)
1335 			goto replay;
1336 		return err;
1337 	}
1338 
1339 graft:
1340 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1341 	if (err) {
1342 		if (q)
1343 			qdisc_destroy(q);
1344 		return err;
1345 	}
1346 
1347 	return 0;
1348 }
1349 
1350 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1351 			 u32 portid, u32 seq, u16 flags, int event)
1352 {
1353 	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
1354 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
1355 	struct tcmsg *tcm;
1356 	struct nlmsghdr  *nlh;
1357 	unsigned char *b = skb_tail_pointer(skb);
1358 	struct gnet_dump d;
1359 	struct qdisc_size_table *stab;
1360 	__u32 qlen;
1361 
1362 	cond_resched();
1363 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1364 	if (!nlh)
1365 		goto out_nlmsg_trim;
1366 	tcm = nlmsg_data(nlh);
1367 	tcm->tcm_family = AF_UNSPEC;
1368 	tcm->tcm__pad1 = 0;
1369 	tcm->tcm__pad2 = 0;
1370 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1371 	tcm->tcm_parent = clid;
1372 	tcm->tcm_handle = q->handle;
1373 	tcm->tcm_info = atomic_read(&q->refcnt);
1374 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1375 		goto nla_put_failure;
1376 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
1377 		goto nla_put_failure;
1378 	qlen = q->q.qlen;
1379 
1380 	stab = rtnl_dereference(q->stab);
1381 	if (stab && qdisc_dump_stab(skb, stab) < 0)
1382 		goto nla_put_failure;
1383 
1384 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1385 					 NULL, &d, TCA_PAD) < 0)
1386 		goto nla_put_failure;
1387 
1388 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1389 		goto nla_put_failure;
1390 
1391 	if (qdisc_is_percpu_stats(q)) {
1392 		cpu_bstats = q->cpu_bstats;
1393 		cpu_qstats = q->cpu_qstats;
1394 	}
1395 
1396 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
1397 				  &d, cpu_bstats, &q->bstats) < 0 ||
1398 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1399 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
1400 		goto nla_put_failure;
1401 
1402 	if (gnet_stats_finish_copy(&d) < 0)
1403 		goto nla_put_failure;
1404 
1405 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1406 	return skb->len;
1407 
1408 out_nlmsg_trim:
1409 nla_put_failure:
1410 	nlmsg_trim(skb, b);
1411 	return -1;
1412 }
1413 
1414 static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1415 {
1416 	return (q->flags & TCQ_F_BUILTIN) ? true : false;
1417 }
1418 
1419 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1420 			struct nlmsghdr *n, u32 clid,
1421 			struct Qdisc *old, struct Qdisc *new)
1422 {
1423 	struct sk_buff *skb;
1424 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1425 
1426 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1427 	if (!skb)
1428 		return -ENOBUFS;
1429 
1430 	if (old && !tc_qdisc_dump_ignore(old)) {
1431 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1432 				  0, RTM_DELQDISC) < 0)
1433 			goto err_out;
1434 	}
1435 	if (new && !tc_qdisc_dump_ignore(new)) {
1436 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1437 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1438 			goto err_out;
1439 	}
1440 
1441 	if (skb->len)
1442 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1443 				      n->nlmsg_flags & NLM_F_ECHO);
1444 
1445 err_out:
1446 	kfree_skb(skb);
1447 	return -EINVAL;
1448 }
1449 
1450 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1451 			      struct netlink_callback *cb,
1452 			      int *q_idx_p, int s_q_idx, bool recur)
1453 {
1454 	int ret = 0, q_idx = *q_idx_p;
1455 	struct Qdisc *q;
1456 	int b;
1457 
1458 	if (!root)
1459 		return 0;
1460 
1461 	q = root;
1462 	if (q_idx < s_q_idx) {
1463 		q_idx++;
1464 	} else {
1465 		if (!tc_qdisc_dump_ignore(q) &&
1466 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1467 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1468 				  RTM_NEWQDISC) <= 0)
1469 			goto done;
1470 		q_idx++;
1471 	}
1472 
1473 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1474 	 * itself has already been dumped.
1475 	 *
1476 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1477 	 * qdisc hashtable, we don't want to hit it again
1478 	 */
1479 	if (!qdisc_dev(root) || !recur)
1480 		goto out;
1481 
1482 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1483 		if (q_idx < s_q_idx) {
1484 			q_idx++;
1485 			continue;
1486 		}
1487 		if (!tc_qdisc_dump_ignore(q) &&
1488 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1489 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1490 				  RTM_NEWQDISC) <= 0)
1491 			goto done;
1492 		q_idx++;
1493 	}
1494 
1495 out:
1496 	*q_idx_p = q_idx;
1497 	return ret;
1498 done:
1499 	ret = -1;
1500 	goto out;
1501 }
1502 
1503 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1504 {
1505 	struct net *net = sock_net(skb->sk);
1506 	int idx, q_idx;
1507 	int s_idx, s_q_idx;
1508 	struct net_device *dev;
1509 
1510 	s_idx = cb->args[0];
1511 	s_q_idx = q_idx = cb->args[1];
1512 
1513 	idx = 0;
1514 	ASSERT_RTNL();
1515 	for_each_netdev(net, dev) {
1516 		struct netdev_queue *dev_queue;
1517 
1518 		if (idx < s_idx)
1519 			goto cont;
1520 		if (idx > s_idx)
1521 			s_q_idx = 0;
1522 		q_idx = 0;
1523 
1524 		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1525 				       true) < 0)
1526 			goto done;
1527 
1528 		dev_queue = dev_ingress_queue(dev);
1529 		if (dev_queue &&
1530 		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1531 				       &q_idx, s_q_idx, false) < 0)
1532 			goto done;
1533 
1534 cont:
1535 		idx++;
1536 	}
1537 
1538 done:
1539 	cb->args[0] = idx;
1540 	cb->args[1] = q_idx;
1541 
1542 	return skb->len;
1543 }
1544 
1545 
1546 
1547 /************************************************
1548  *	Traffic classes manipulation.		*
1549  ************************************************/
1550 
1551 
1552 
1553 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
1554 {
1555 	struct net *net = sock_net(skb->sk);
1556 	struct tcmsg *tcm = nlmsg_data(n);
1557 	struct nlattr *tca[TCA_MAX + 1];
1558 	struct net_device *dev;
1559 	struct Qdisc *q = NULL;
1560 	const struct Qdisc_class_ops *cops;
1561 	unsigned long cl = 0;
1562 	unsigned long new_cl;
1563 	u32 portid;
1564 	u32 clid;
1565 	u32 qid;
1566 	int err;
1567 
1568 	if ((n->nlmsg_type != RTM_GETTCLASS) &&
1569 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1570 		return -EPERM;
1571 
1572 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1573 	if (err < 0)
1574 		return err;
1575 
1576 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1577 	if (!dev)
1578 		return -ENODEV;
1579 
1580 	/*
1581 	   parent == TC_H_UNSPEC - unspecified parent.
1582 	   parent == TC_H_ROOT   - class is root, which has no parent.
1583 	   parent == X:0	 - parent is root class.
1584 	   parent == X:Y	 - parent is a node in hierarchy.
1585 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
1586 
1587 	   handle == 0:0	 - generate handle from kernel pool.
1588 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
1589 	   handle == X:Y	 - clear.
1590 	   handle == X:0	 - root class.
1591 	 */
1592 
1593 	/* Step 1. Determine qdisc handle X:0 */
1594 
1595 	portid = tcm->tcm_parent;
1596 	clid = tcm->tcm_handle;
1597 	qid = TC_H_MAJ(clid);
1598 
1599 	if (portid != TC_H_ROOT) {
1600 		u32 qid1 = TC_H_MAJ(portid);
1601 
1602 		if (qid && qid1) {
1603 			/* If both majors are known, they must be identical. */
1604 			if (qid != qid1)
1605 				return -EINVAL;
1606 		} else if (qid1) {
1607 			qid = qid1;
1608 		} else if (qid == 0)
1609 			qid = dev->qdisc->handle;
1610 
1611 		/* Now qid is genuine qdisc handle consistent
1612 		 * both with parent and child.
1613 		 *
1614 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1615 		 */
1616 		if (portid)
1617 			portid = TC_H_MAKE(qid, portid);
1618 	} else {
1619 		if (qid == 0)
1620 			qid = dev->qdisc->handle;
1621 	}
1622 
1623 	/* OK. Locate qdisc */
1624 	q = qdisc_lookup(dev, qid);
1625 	if (!q)
1626 		return -ENOENT;
1627 
1628 	/* An check that it supports classes */
1629 	cops = q->ops->cl_ops;
1630 	if (cops == NULL)
1631 		return -EINVAL;
1632 
1633 	/* Now try to get class */
1634 	if (clid == 0) {
1635 		if (portid == TC_H_ROOT)
1636 			clid = qid;
1637 	} else
1638 		clid = TC_H_MAKE(qid, clid);
1639 
1640 	if (clid)
1641 		cl = cops->get(q, clid);
1642 
1643 	if (cl == 0) {
1644 		err = -ENOENT;
1645 		if (n->nlmsg_type != RTM_NEWTCLASS ||
1646 		    !(n->nlmsg_flags & NLM_F_CREATE))
1647 			goto out;
1648 	} else {
1649 		switch (n->nlmsg_type) {
1650 		case RTM_NEWTCLASS:
1651 			err = -EEXIST;
1652 			if (n->nlmsg_flags & NLM_F_EXCL)
1653 				goto out;
1654 			break;
1655 		case RTM_DELTCLASS:
1656 			err = -EOPNOTSUPP;
1657 			if (cops->delete)
1658 				err = cops->delete(q, cl);
1659 			if (err == 0)
1660 				tclass_notify(net, skb, n, q, cl,
1661 					      RTM_DELTCLASS);
1662 			goto out;
1663 		case RTM_GETTCLASS:
1664 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1665 			goto out;
1666 		default:
1667 			err = -EINVAL;
1668 			goto out;
1669 		}
1670 	}
1671 
1672 	new_cl = cl;
1673 	err = -EOPNOTSUPP;
1674 	if (cops->change)
1675 		err = cops->change(q, clid, portid, tca, &new_cl);
1676 	if (err == 0)
1677 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1678 
1679 out:
1680 	if (cl)
1681 		cops->put(q, cl);
1682 
1683 	return err;
1684 }
1685 
1686 
1687 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1688 			  unsigned long cl,
1689 			  u32 portid, u32 seq, u16 flags, int event)
1690 {
1691 	struct tcmsg *tcm;
1692 	struct nlmsghdr  *nlh;
1693 	unsigned char *b = skb_tail_pointer(skb);
1694 	struct gnet_dump d;
1695 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1696 
1697 	cond_resched();
1698 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1699 	if (!nlh)
1700 		goto out_nlmsg_trim;
1701 	tcm = nlmsg_data(nlh);
1702 	tcm->tcm_family = AF_UNSPEC;
1703 	tcm->tcm__pad1 = 0;
1704 	tcm->tcm__pad2 = 0;
1705 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1706 	tcm->tcm_parent = q->handle;
1707 	tcm->tcm_handle = q->handle;
1708 	tcm->tcm_info = 0;
1709 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1710 		goto nla_put_failure;
1711 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1712 		goto nla_put_failure;
1713 
1714 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1715 					 NULL, &d, TCA_PAD) < 0)
1716 		goto nla_put_failure;
1717 
1718 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1719 		goto nla_put_failure;
1720 
1721 	if (gnet_stats_finish_copy(&d) < 0)
1722 		goto nla_put_failure;
1723 
1724 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1725 	return skb->len;
1726 
1727 out_nlmsg_trim:
1728 nla_put_failure:
1729 	nlmsg_trim(skb, b);
1730 	return -1;
1731 }
1732 
1733 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1734 			 struct nlmsghdr *n, struct Qdisc *q,
1735 			 unsigned long cl, int event)
1736 {
1737 	struct sk_buff *skb;
1738 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1739 
1740 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1741 	if (!skb)
1742 		return -ENOBUFS;
1743 
1744 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1745 		kfree_skb(skb);
1746 		return -EINVAL;
1747 	}
1748 
1749 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1750 			      n->nlmsg_flags & NLM_F_ECHO);
1751 }
1752 
1753 struct qdisc_dump_args {
1754 	struct qdisc_walker	w;
1755 	struct sk_buff		*skb;
1756 	struct netlink_callback	*cb;
1757 };
1758 
1759 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1760 			    struct qdisc_walker *arg)
1761 {
1762 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1763 
1764 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1765 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1766 			      RTM_NEWTCLASS);
1767 }
1768 
1769 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1770 				struct tcmsg *tcm, struct netlink_callback *cb,
1771 				int *t_p, int s_t)
1772 {
1773 	struct qdisc_dump_args arg;
1774 
1775 	if (tc_qdisc_dump_ignore(q) ||
1776 	    *t_p < s_t || !q->ops->cl_ops ||
1777 	    (tcm->tcm_parent &&
1778 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1779 		(*t_p)++;
1780 		return 0;
1781 	}
1782 	if (*t_p > s_t)
1783 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1784 	arg.w.fn = qdisc_class_dump;
1785 	arg.skb = skb;
1786 	arg.cb = cb;
1787 	arg.w.stop  = 0;
1788 	arg.w.skip = cb->args[1];
1789 	arg.w.count = 0;
1790 	q->ops->cl_ops->walk(q, &arg.w);
1791 	cb->args[1] = arg.w.count;
1792 	if (arg.w.stop)
1793 		return -1;
1794 	(*t_p)++;
1795 	return 0;
1796 }
1797 
1798 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1799 			       struct tcmsg *tcm, struct netlink_callback *cb,
1800 			       int *t_p, int s_t)
1801 {
1802 	struct Qdisc *q;
1803 	int b;
1804 
1805 	if (!root)
1806 		return 0;
1807 
1808 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1809 		return -1;
1810 
1811 	if (!qdisc_dev(root))
1812 		return 0;
1813 
1814 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1815 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1816 			return -1;
1817 	}
1818 
1819 	return 0;
1820 }
1821 
1822 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1823 {
1824 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
1825 	struct net *net = sock_net(skb->sk);
1826 	struct netdev_queue *dev_queue;
1827 	struct net_device *dev;
1828 	int t, s_t;
1829 
1830 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1831 		return 0;
1832 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
1833 	if (!dev)
1834 		return 0;
1835 
1836 	s_t = cb->args[0];
1837 	t = 0;
1838 
1839 	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1840 		goto done;
1841 
1842 	dev_queue = dev_ingress_queue(dev);
1843 	if (dev_queue &&
1844 	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1845 				&t, s_t) < 0)
1846 		goto done;
1847 
1848 done:
1849 	cb->args[0] = t;
1850 
1851 	dev_put(dev);
1852 	return skb->len;
1853 }
1854 
1855 /* Main classifier routine: scans classifier chain attached
1856  * to this qdisc, (optionally) tests for protocol and asks
1857  * specific classifiers.
1858  */
1859 int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1860 		struct tcf_result *res, bool compat_mode)
1861 {
1862 	__be16 protocol = tc_skb_protocol(skb);
1863 #ifdef CONFIG_NET_CLS_ACT
1864 	const struct tcf_proto *old_tp = tp;
1865 	int limit = 0;
1866 
1867 reclassify:
1868 #endif
1869 	for (; tp; tp = rcu_dereference_bh(tp->next)) {
1870 		int err;
1871 
1872 		if (tp->protocol != protocol &&
1873 		    tp->protocol != htons(ETH_P_ALL))
1874 			continue;
1875 
1876 		err = tp->classify(skb, tp, res);
1877 #ifdef CONFIG_NET_CLS_ACT
1878 		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
1879 			goto reset;
1880 #endif
1881 		if (err >= 0)
1882 			return err;
1883 	}
1884 
1885 	return TC_ACT_UNSPEC; /* signal: continue lookup */
1886 #ifdef CONFIG_NET_CLS_ACT
1887 reset:
1888 	if (unlikely(limit++ >= MAX_REC_LOOP)) {
1889 		net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
1890 				       tp->q->ops->id, tp->prio & 0xffff,
1891 				       ntohs(tp->protocol));
1892 		return TC_ACT_SHOT;
1893 	}
1894 
1895 	tp = old_tp;
1896 	protocol = tc_skb_protocol(skb);
1897 	goto reclassify;
1898 #endif
1899 }
1900 EXPORT_SYMBOL(tc_classify);
1901 
1902 bool tcf_destroy(struct tcf_proto *tp, bool force)
1903 {
1904 	if (tp->ops->destroy(tp, force)) {
1905 		module_put(tp->ops->owner);
1906 		kfree_rcu(tp, rcu);
1907 		return true;
1908 	}
1909 
1910 	return false;
1911 }
1912 
1913 void tcf_destroy_chain(struct tcf_proto __rcu **fl)
1914 {
1915 	struct tcf_proto *tp;
1916 
1917 	while ((tp = rtnl_dereference(*fl)) != NULL) {
1918 		RCU_INIT_POINTER(*fl, tp->next);
1919 		tcf_destroy(tp, true);
1920 	}
1921 }
1922 EXPORT_SYMBOL(tcf_destroy_chain);
1923 
1924 #ifdef CONFIG_PROC_FS
1925 static int psched_show(struct seq_file *seq, void *v)
1926 {
1927 	seq_printf(seq, "%08x %08x %08x %08x\n",
1928 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1929 		   1000000,
1930 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
1931 
1932 	return 0;
1933 }
1934 
1935 static int psched_open(struct inode *inode, struct file *file)
1936 {
1937 	return single_open(file, psched_show, NULL);
1938 }
1939 
1940 static const struct file_operations psched_fops = {
1941 	.owner = THIS_MODULE,
1942 	.open = psched_open,
1943 	.read  = seq_read,
1944 	.llseek = seq_lseek,
1945 	.release = single_release,
1946 };
1947 
1948 static int __net_init psched_net_init(struct net *net)
1949 {
1950 	struct proc_dir_entry *e;
1951 
1952 	e = proc_create("psched", 0, net->proc_net, &psched_fops);
1953 	if (e == NULL)
1954 		return -ENOMEM;
1955 
1956 	return 0;
1957 }
1958 
1959 static void __net_exit psched_net_exit(struct net *net)
1960 {
1961 	remove_proc_entry("psched", net->proc_net);
1962 }
1963 #else
1964 static int __net_init psched_net_init(struct net *net)
1965 {
1966 	return 0;
1967 }
1968 
1969 static void __net_exit psched_net_exit(struct net *net)
1970 {
1971 }
1972 #endif
1973 
1974 static struct pernet_operations psched_net_ops = {
1975 	.init = psched_net_init,
1976 	.exit = psched_net_exit,
1977 };
1978 
1979 static int __init pktsched_init(void)
1980 {
1981 	int err;
1982 
1983 	err = register_pernet_subsys(&psched_net_ops);
1984 	if (err) {
1985 		pr_err("pktsched_init: "
1986 		       "cannot initialize per netns operations\n");
1987 		return err;
1988 	}
1989 
1990 	register_qdisc(&pfifo_fast_ops);
1991 	register_qdisc(&pfifo_qdisc_ops);
1992 	register_qdisc(&bfifo_qdisc_ops);
1993 	register_qdisc(&pfifo_head_drop_qdisc_ops);
1994 	register_qdisc(&mq_qdisc_ops);
1995 	register_qdisc(&noqueue_qdisc_ops);
1996 
1997 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1998 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1999 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2000 		      NULL);
2001 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
2002 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
2003 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2004 		      NULL);
2005 
2006 	return 0;
2007 }
2008 
2009 subsys_initcall(pktsched_init);
2010