xref: /openbmc/linux/net/sched/sch_api.c (revision 1c2dd16a)
1 /*
2  * net/sched/sch_api.c	Packet scheduler API.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17 
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
33 
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 
39 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
40 			struct nlmsghdr *n, u32 clid,
41 			struct Qdisc *old, struct Qdisc *new);
42 static int tclass_notify(struct net *net, struct sk_buff *oskb,
43 			 struct nlmsghdr *n, struct Qdisc *q,
44 			 unsigned long cl, int event);
45 
46 /*
47 
48    Short review.
49    -------------
50 
51    This file consists of two interrelated parts:
52 
53    1. queueing disciplines manager frontend.
54    2. traffic classes manager frontend.
55 
56    Generally, queueing discipline ("qdisc") is a black box,
57    which is able to enqueue packets and to dequeue them (when
58    device is ready to send something) in order and at times
59    determined by algorithm hidden in it.
60 
61    qdisc's are divided to two categories:
62    - "queues", which have no internal structure visible from outside.
63    - "schedulers", which split all the packets to "traffic classes",
64      using "packet classifiers" (look at cls_api.c)
65 
66    In turn, classes may have child qdiscs (as rule, queues)
67    attached to them etc. etc. etc.
68 
69    The goal of the routines in this file is to translate
70    information supplied by user in the form of handles
71    to more intelligible for kernel form, to make some sanity
72    checks and part of work, which is common to all qdiscs
73    and to provide rtnetlink notifications.
74 
75    All real intelligent work is done inside qdisc modules.
76 
77 
78 
79    Every discipline has two major routines: enqueue and dequeue.
80 
81    ---dequeue
82 
83    dequeue usually returns a skb to send. It is allowed to return NULL,
84    but it does not mean that queue is empty, it just means that
85    discipline does not want to send anything this time.
86    Queue is really empty if q->q.qlen == 0.
87    For complicated disciplines with multiple queues q->q is not
88    real packet queue, but however q->q.qlen must be valid.
89 
90    ---enqueue
91 
92    enqueue returns 0, if packet was enqueued successfully.
93    If packet (this one or another one) was dropped, it returns
94    not zero error code.
95    NET_XMIT_DROP 	- this packet dropped
96      Expected action: do not backoff, but wait until queue will clear.
97    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
98      Expected action: backoff or ignore
99 
100    Auxiliary routines:
101 
102    ---peek
103 
104    like dequeue but without removing a packet from the queue
105 
106    ---reset
107 
108    returns qdisc to initial state: purge all buffers, clear all
109    timers, counters (except for statistics) etc.
110 
111    ---init
112 
113    initializes newly created qdisc.
114 
115    ---destroy
116 
117    destroys resources allocated by init and during lifetime of qdisc.
118 
119    ---change
120 
121    changes qdisc parameters.
122  */
123 
124 /* Protects list of registered TC modules. It is pure SMP lock. */
125 static DEFINE_RWLOCK(qdisc_mod_lock);
126 
127 
128 /************************************************
129  *	Queueing disciplines manipulation.	*
130  ************************************************/
131 
132 
133 /* The list of all installed queueing disciplines. */
134 
135 static struct Qdisc_ops *qdisc_base;
136 
137 /* Register/unregister queueing discipline */
138 
139 int register_qdisc(struct Qdisc_ops *qops)
140 {
141 	struct Qdisc_ops *q, **qp;
142 	int rc = -EEXIST;
143 
144 	write_lock(&qdisc_mod_lock);
145 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
146 		if (!strcmp(qops->id, q->id))
147 			goto out;
148 
149 	if (qops->enqueue == NULL)
150 		qops->enqueue = noop_qdisc_ops.enqueue;
151 	if (qops->peek == NULL) {
152 		if (qops->dequeue == NULL)
153 			qops->peek = noop_qdisc_ops.peek;
154 		else
155 			goto out_einval;
156 	}
157 	if (qops->dequeue == NULL)
158 		qops->dequeue = noop_qdisc_ops.dequeue;
159 
160 	if (qops->cl_ops) {
161 		const struct Qdisc_class_ops *cops = qops->cl_ops;
162 
163 		if (!(cops->get && cops->put && cops->walk && cops->leaf))
164 			goto out_einval;
165 
166 		if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
167 			goto out_einval;
168 	}
169 
170 	qops->next = NULL;
171 	*qp = qops;
172 	rc = 0;
173 out:
174 	write_unlock(&qdisc_mod_lock);
175 	return rc;
176 
177 out_einval:
178 	rc = -EINVAL;
179 	goto out;
180 }
181 EXPORT_SYMBOL(register_qdisc);
182 
183 int unregister_qdisc(struct Qdisc_ops *qops)
184 {
185 	struct Qdisc_ops *q, **qp;
186 	int err = -ENOENT;
187 
188 	write_lock(&qdisc_mod_lock);
189 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
190 		if (q == qops)
191 			break;
192 	if (q) {
193 		*qp = q->next;
194 		q->next = NULL;
195 		err = 0;
196 	}
197 	write_unlock(&qdisc_mod_lock);
198 	return err;
199 }
200 EXPORT_SYMBOL(unregister_qdisc);
201 
202 /* Get default qdisc if not otherwise specified */
203 void qdisc_get_default(char *name, size_t len)
204 {
205 	read_lock(&qdisc_mod_lock);
206 	strlcpy(name, default_qdisc_ops->id, len);
207 	read_unlock(&qdisc_mod_lock);
208 }
209 
210 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
211 {
212 	struct Qdisc_ops *q = NULL;
213 
214 	for (q = qdisc_base; q; q = q->next) {
215 		if (!strcmp(name, q->id)) {
216 			if (!try_module_get(q->owner))
217 				q = NULL;
218 			break;
219 		}
220 	}
221 
222 	return q;
223 }
224 
225 /* Set new default qdisc to use */
226 int qdisc_set_default(const char *name)
227 {
228 	const struct Qdisc_ops *ops;
229 
230 	if (!capable(CAP_NET_ADMIN))
231 		return -EPERM;
232 
233 	write_lock(&qdisc_mod_lock);
234 	ops = qdisc_lookup_default(name);
235 	if (!ops) {
236 		/* Not found, drop lock and try to load module */
237 		write_unlock(&qdisc_mod_lock);
238 		request_module("sch_%s", name);
239 		write_lock(&qdisc_mod_lock);
240 
241 		ops = qdisc_lookup_default(name);
242 	}
243 
244 	if (ops) {
245 		/* Set new default */
246 		module_put(default_qdisc_ops->owner);
247 		default_qdisc_ops = ops;
248 	}
249 	write_unlock(&qdisc_mod_lock);
250 
251 	return ops ? 0 : -ENOENT;
252 }
253 
254 #ifdef CONFIG_NET_SCH_DEFAULT
255 /* Set default value from kernel config */
256 static int __init sch_default_qdisc(void)
257 {
258 	return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
259 }
260 late_initcall(sch_default_qdisc);
261 #endif
262 
263 /* We know handle. Find qdisc among all qdisc's attached to device
264  * (root qdisc, all its children, children of children etc.)
265  * Note: caller either uses rtnl or rcu_read_lock()
266  */
267 
268 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
269 {
270 	struct Qdisc *q;
271 
272 	if (!qdisc_dev(root))
273 		return (root->handle == handle ? root : NULL);
274 
275 	if (!(root->flags & TCQ_F_BUILTIN) &&
276 	    root->handle == handle)
277 		return root;
278 
279 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
280 		if (q->handle == handle)
281 			return q;
282 	}
283 	return NULL;
284 }
285 
286 void qdisc_hash_add(struct Qdisc *q, bool invisible)
287 {
288 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
289 		struct Qdisc *root = qdisc_dev(q)->qdisc;
290 
291 		WARN_ON_ONCE(root == &noop_qdisc);
292 		ASSERT_RTNL();
293 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
294 		if (invisible)
295 			q->flags |= TCQ_F_INVISIBLE;
296 	}
297 }
298 EXPORT_SYMBOL(qdisc_hash_add);
299 
300 void qdisc_hash_del(struct Qdisc *q)
301 {
302 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
303 		ASSERT_RTNL();
304 		hash_del_rcu(&q->hash);
305 	}
306 }
307 EXPORT_SYMBOL(qdisc_hash_del);
308 
309 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
310 {
311 	struct Qdisc *q;
312 
313 	q = qdisc_match_from_root(dev->qdisc, handle);
314 	if (q)
315 		goto out;
316 
317 	if (dev_ingress_queue(dev))
318 		q = qdisc_match_from_root(
319 			dev_ingress_queue(dev)->qdisc_sleeping,
320 			handle);
321 out:
322 	return q;
323 }
324 
325 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
326 {
327 	unsigned long cl;
328 	struct Qdisc *leaf;
329 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
330 
331 	if (cops == NULL)
332 		return NULL;
333 	cl = cops->get(p, classid);
334 
335 	if (cl == 0)
336 		return NULL;
337 	leaf = cops->leaf(p, cl);
338 	cops->put(p, cl);
339 	return leaf;
340 }
341 
342 /* Find queueing discipline by name */
343 
344 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
345 {
346 	struct Qdisc_ops *q = NULL;
347 
348 	if (kind) {
349 		read_lock(&qdisc_mod_lock);
350 		for (q = qdisc_base; q; q = q->next) {
351 			if (nla_strcmp(kind, q->id) == 0) {
352 				if (!try_module_get(q->owner))
353 					q = NULL;
354 				break;
355 			}
356 		}
357 		read_unlock(&qdisc_mod_lock);
358 	}
359 	return q;
360 }
361 
362 /* The linklayer setting were not transferred from iproute2, in older
363  * versions, and the rate tables lookup systems have been dropped in
364  * the kernel. To keep backward compatible with older iproute2 tc
365  * utils, we detect the linklayer setting by detecting if the rate
366  * table were modified.
367  *
368  * For linklayer ATM table entries, the rate table will be aligned to
369  * 48 bytes, thus some table entries will contain the same value.  The
370  * mpu (min packet unit) is also encoded into the old rate table, thus
371  * starting from the mpu, we find low and high table entries for
372  * mapping this cell.  If these entries contain the same value, when
373  * the rate tables have been modified for linklayer ATM.
374  *
375  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
376  * and then roundup to the next cell, calc the table entry one below,
377  * and compare.
378  */
379 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
380 {
381 	int low       = roundup(r->mpu, 48);
382 	int high      = roundup(low+1, 48);
383 	int cell_low  = low >> r->cell_log;
384 	int cell_high = (high >> r->cell_log) - 1;
385 
386 	/* rtab is too inaccurate at rates > 100Mbit/s */
387 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
388 		pr_debug("TC linklayer: Giving up ATM detection\n");
389 		return TC_LINKLAYER_ETHERNET;
390 	}
391 
392 	if ((cell_high > cell_low) && (cell_high < 256)
393 	    && (rtab[cell_low] == rtab[cell_high])) {
394 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
395 			 cell_low, cell_high, rtab[cell_high]);
396 		return TC_LINKLAYER_ATM;
397 	}
398 	return TC_LINKLAYER_ETHERNET;
399 }
400 
401 static struct qdisc_rate_table *qdisc_rtab_list;
402 
403 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
404 					struct nlattr *tab)
405 {
406 	struct qdisc_rate_table *rtab;
407 
408 	if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
409 	    nla_len(tab) != TC_RTAB_SIZE)
410 		return NULL;
411 
412 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
413 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
414 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
415 			rtab->refcnt++;
416 			return rtab;
417 		}
418 	}
419 
420 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
421 	if (rtab) {
422 		rtab->rate = *r;
423 		rtab->refcnt = 1;
424 		memcpy(rtab->data, nla_data(tab), 1024);
425 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
426 			r->linklayer = __detect_linklayer(r, rtab->data);
427 		rtab->next = qdisc_rtab_list;
428 		qdisc_rtab_list = rtab;
429 	}
430 	return rtab;
431 }
432 EXPORT_SYMBOL(qdisc_get_rtab);
433 
434 void qdisc_put_rtab(struct qdisc_rate_table *tab)
435 {
436 	struct qdisc_rate_table *rtab, **rtabp;
437 
438 	if (!tab || --tab->refcnt)
439 		return;
440 
441 	for (rtabp = &qdisc_rtab_list;
442 	     (rtab = *rtabp) != NULL;
443 	     rtabp = &rtab->next) {
444 		if (rtab == tab) {
445 			*rtabp = rtab->next;
446 			kfree(rtab);
447 			return;
448 		}
449 	}
450 }
451 EXPORT_SYMBOL(qdisc_put_rtab);
452 
453 static LIST_HEAD(qdisc_stab_list);
454 
455 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
456 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
457 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
458 };
459 
460 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
461 {
462 	struct nlattr *tb[TCA_STAB_MAX + 1];
463 	struct qdisc_size_table *stab;
464 	struct tc_sizespec *s;
465 	unsigned int tsize = 0;
466 	u16 *tab = NULL;
467 	int err;
468 
469 	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL);
470 	if (err < 0)
471 		return ERR_PTR(err);
472 	if (!tb[TCA_STAB_BASE])
473 		return ERR_PTR(-EINVAL);
474 
475 	s = nla_data(tb[TCA_STAB_BASE]);
476 
477 	if (s->tsize > 0) {
478 		if (!tb[TCA_STAB_DATA])
479 			return ERR_PTR(-EINVAL);
480 		tab = nla_data(tb[TCA_STAB_DATA]);
481 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
482 	}
483 
484 	if (tsize != s->tsize || (!tab && tsize > 0))
485 		return ERR_PTR(-EINVAL);
486 
487 	list_for_each_entry(stab, &qdisc_stab_list, list) {
488 		if (memcmp(&stab->szopts, s, sizeof(*s)))
489 			continue;
490 		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
491 			continue;
492 		stab->refcnt++;
493 		return stab;
494 	}
495 
496 	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
497 	if (!stab)
498 		return ERR_PTR(-ENOMEM);
499 
500 	stab->refcnt = 1;
501 	stab->szopts = *s;
502 	if (tsize > 0)
503 		memcpy(stab->data, tab, tsize * sizeof(u16));
504 
505 	list_add_tail(&stab->list, &qdisc_stab_list);
506 
507 	return stab;
508 }
509 
510 static void stab_kfree_rcu(struct rcu_head *head)
511 {
512 	kfree(container_of(head, struct qdisc_size_table, rcu));
513 }
514 
515 void qdisc_put_stab(struct qdisc_size_table *tab)
516 {
517 	if (!tab)
518 		return;
519 
520 	if (--tab->refcnt == 0) {
521 		list_del(&tab->list);
522 		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
523 	}
524 }
525 EXPORT_SYMBOL(qdisc_put_stab);
526 
527 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
528 {
529 	struct nlattr *nest;
530 
531 	nest = nla_nest_start(skb, TCA_STAB);
532 	if (nest == NULL)
533 		goto nla_put_failure;
534 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
535 		goto nla_put_failure;
536 	nla_nest_end(skb, nest);
537 
538 	return skb->len;
539 
540 nla_put_failure:
541 	return -1;
542 }
543 
544 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
545 			       const struct qdisc_size_table *stab)
546 {
547 	int pkt_len, slot;
548 
549 	pkt_len = skb->len + stab->szopts.overhead;
550 	if (unlikely(!stab->szopts.tsize))
551 		goto out;
552 
553 	slot = pkt_len + stab->szopts.cell_align;
554 	if (unlikely(slot < 0))
555 		slot = 0;
556 
557 	slot >>= stab->szopts.cell_log;
558 	if (likely(slot < stab->szopts.tsize))
559 		pkt_len = stab->data[slot];
560 	else
561 		pkt_len = stab->data[stab->szopts.tsize - 1] *
562 				(slot / stab->szopts.tsize) +
563 				stab->data[slot % stab->szopts.tsize];
564 
565 	pkt_len <<= stab->szopts.size_log;
566 out:
567 	if (unlikely(pkt_len < 1))
568 		pkt_len = 1;
569 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
570 }
571 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
572 
573 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
574 {
575 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
576 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
577 			txt, qdisc->ops->id, qdisc->handle >> 16);
578 		qdisc->flags |= TCQ_F_WARN_NONWC;
579 	}
580 }
581 EXPORT_SYMBOL(qdisc_warn_nonwc);
582 
583 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
584 {
585 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
586 						 timer);
587 
588 	rcu_read_lock();
589 	__netif_schedule(qdisc_root(wd->qdisc));
590 	rcu_read_unlock();
591 
592 	return HRTIMER_NORESTART;
593 }
594 
595 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
596 {
597 	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
598 	wd->timer.function = qdisc_watchdog;
599 	wd->qdisc = qdisc;
600 }
601 EXPORT_SYMBOL(qdisc_watchdog_init);
602 
603 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
604 {
605 	if (test_bit(__QDISC_STATE_DEACTIVATED,
606 		     &qdisc_root_sleeping(wd->qdisc)->state))
607 		return;
608 
609 	if (wd->last_expires == expires)
610 		return;
611 
612 	wd->last_expires = expires;
613 	hrtimer_start(&wd->timer,
614 		      ns_to_ktime(expires),
615 		      HRTIMER_MODE_ABS_PINNED);
616 }
617 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
618 
619 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
620 {
621 	hrtimer_cancel(&wd->timer);
622 }
623 EXPORT_SYMBOL(qdisc_watchdog_cancel);
624 
625 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
626 {
627 	unsigned int size = n * sizeof(struct hlist_head), i;
628 	struct hlist_head *h;
629 
630 	if (size <= PAGE_SIZE)
631 		h = kmalloc(size, GFP_KERNEL);
632 	else
633 		h = (struct hlist_head *)
634 			__get_free_pages(GFP_KERNEL, get_order(size));
635 
636 	if (h != NULL) {
637 		for (i = 0; i < n; i++)
638 			INIT_HLIST_HEAD(&h[i]);
639 	}
640 	return h;
641 }
642 
643 static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
644 {
645 	unsigned int size = n * sizeof(struct hlist_head);
646 
647 	if (size <= PAGE_SIZE)
648 		kfree(h);
649 	else
650 		free_pages((unsigned long)h, get_order(size));
651 }
652 
653 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
654 {
655 	struct Qdisc_class_common *cl;
656 	struct hlist_node *next;
657 	struct hlist_head *nhash, *ohash;
658 	unsigned int nsize, nmask, osize;
659 	unsigned int i, h;
660 
661 	/* Rehash when load factor exceeds 0.75 */
662 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
663 		return;
664 	nsize = clhash->hashsize * 2;
665 	nmask = nsize - 1;
666 	nhash = qdisc_class_hash_alloc(nsize);
667 	if (nhash == NULL)
668 		return;
669 
670 	ohash = clhash->hash;
671 	osize = clhash->hashsize;
672 
673 	sch_tree_lock(sch);
674 	for (i = 0; i < osize; i++) {
675 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
676 			h = qdisc_class_hash(cl->classid, nmask);
677 			hlist_add_head(&cl->hnode, &nhash[h]);
678 		}
679 	}
680 	clhash->hash     = nhash;
681 	clhash->hashsize = nsize;
682 	clhash->hashmask = nmask;
683 	sch_tree_unlock(sch);
684 
685 	qdisc_class_hash_free(ohash, osize);
686 }
687 EXPORT_SYMBOL(qdisc_class_hash_grow);
688 
689 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
690 {
691 	unsigned int size = 4;
692 
693 	clhash->hash = qdisc_class_hash_alloc(size);
694 	if (clhash->hash == NULL)
695 		return -ENOMEM;
696 	clhash->hashsize  = size;
697 	clhash->hashmask  = size - 1;
698 	clhash->hashelems = 0;
699 	return 0;
700 }
701 EXPORT_SYMBOL(qdisc_class_hash_init);
702 
703 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
704 {
705 	qdisc_class_hash_free(clhash->hash, clhash->hashsize);
706 }
707 EXPORT_SYMBOL(qdisc_class_hash_destroy);
708 
709 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
710 			     struct Qdisc_class_common *cl)
711 {
712 	unsigned int h;
713 
714 	INIT_HLIST_NODE(&cl->hnode);
715 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
716 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
717 	clhash->hashelems++;
718 }
719 EXPORT_SYMBOL(qdisc_class_hash_insert);
720 
721 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
722 			     struct Qdisc_class_common *cl)
723 {
724 	hlist_del(&cl->hnode);
725 	clhash->hashelems--;
726 }
727 EXPORT_SYMBOL(qdisc_class_hash_remove);
728 
729 /* Allocate an unique handle from space managed by kernel
730  * Possible range is [8000-FFFF]:0000 (0x8000 values)
731  */
732 static u32 qdisc_alloc_handle(struct net_device *dev)
733 {
734 	int i = 0x8000;
735 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
736 
737 	do {
738 		autohandle += TC_H_MAKE(0x10000U, 0);
739 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
740 			autohandle = TC_H_MAKE(0x80000000U, 0);
741 		if (!qdisc_lookup(dev, autohandle))
742 			return autohandle;
743 		cond_resched();
744 	} while	(--i > 0);
745 
746 	return 0;
747 }
748 
749 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
750 			       unsigned int len)
751 {
752 	const struct Qdisc_class_ops *cops;
753 	unsigned long cl;
754 	u32 parentid;
755 	int drops;
756 
757 	if (n == 0 && len == 0)
758 		return;
759 	drops = max_t(int, n, 0);
760 	rcu_read_lock();
761 	while ((parentid = sch->parent)) {
762 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
763 			break;
764 
765 		if (sch->flags & TCQ_F_NOPARENT)
766 			break;
767 		/* TODO: perform the search on a per txq basis */
768 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
769 		if (sch == NULL) {
770 			WARN_ON_ONCE(parentid != TC_H_ROOT);
771 			break;
772 		}
773 		cops = sch->ops->cl_ops;
774 		if (cops->qlen_notify) {
775 			cl = cops->get(sch, parentid);
776 			cops->qlen_notify(sch, cl);
777 			cops->put(sch, cl);
778 		}
779 		sch->q.qlen -= n;
780 		sch->qstats.backlog -= len;
781 		__qdisc_qstats_drop(sch, drops);
782 	}
783 	rcu_read_unlock();
784 }
785 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
786 
787 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
788 			       struct nlmsghdr *n, u32 clid,
789 			       struct Qdisc *old, struct Qdisc *new)
790 {
791 	if (new || old)
792 		qdisc_notify(net, skb, n, clid, old, new);
793 
794 	if (old)
795 		qdisc_destroy(old);
796 }
797 
798 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
799  * to device "dev".
800  *
801  * When appropriate send a netlink notification using 'skb'
802  * and "n".
803  *
804  * On success, destroy old qdisc.
805  */
806 
807 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
808 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
809 		       struct Qdisc *new, struct Qdisc *old)
810 {
811 	struct Qdisc *q = old;
812 	struct net *net = dev_net(dev);
813 	int err = 0;
814 
815 	if (parent == NULL) {
816 		unsigned int i, num_q, ingress;
817 
818 		ingress = 0;
819 		num_q = dev->num_tx_queues;
820 		if ((q && q->flags & TCQ_F_INGRESS) ||
821 		    (new && new->flags & TCQ_F_INGRESS)) {
822 			num_q = 1;
823 			ingress = 1;
824 			if (!dev_ingress_queue(dev))
825 				return -ENOENT;
826 		}
827 
828 		if (dev->flags & IFF_UP)
829 			dev_deactivate(dev);
830 
831 		if (new && new->ops->attach)
832 			goto skip;
833 
834 		for (i = 0; i < num_q; i++) {
835 			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
836 
837 			if (!ingress)
838 				dev_queue = netdev_get_tx_queue(dev, i);
839 
840 			old = dev_graft_qdisc(dev_queue, new);
841 			if (new && i > 0)
842 				atomic_inc(&new->refcnt);
843 
844 			if (!ingress)
845 				qdisc_destroy(old);
846 		}
847 
848 skip:
849 		if (!ingress) {
850 			notify_and_destroy(net, skb, n, classid,
851 					   dev->qdisc, new);
852 			if (new && !new->ops->attach)
853 				atomic_inc(&new->refcnt);
854 			dev->qdisc = new ? : &noop_qdisc;
855 
856 			if (new && new->ops->attach)
857 				new->ops->attach(new);
858 		} else {
859 			notify_and_destroy(net, skb, n, classid, old, new);
860 		}
861 
862 		if (dev->flags & IFF_UP)
863 			dev_activate(dev);
864 	} else {
865 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
866 
867 		err = -EOPNOTSUPP;
868 		if (cops && cops->graft) {
869 			unsigned long cl = cops->get(parent, classid);
870 			if (cl) {
871 				err = cops->graft(parent, cl, new, &old);
872 				cops->put(parent, cl);
873 			} else
874 				err = -ENOENT;
875 		}
876 		if (!err)
877 			notify_and_destroy(net, skb, n, classid, old, new);
878 	}
879 	return err;
880 }
881 
882 /* lockdep annotation is needed for ingress; egress gets it only for name */
883 static struct lock_class_key qdisc_tx_lock;
884 static struct lock_class_key qdisc_rx_lock;
885 
886 /*
887    Allocate and initialize new qdisc.
888 
889    Parameters are passed via opt.
890  */
891 
892 static struct Qdisc *qdisc_create(struct net_device *dev,
893 				  struct netdev_queue *dev_queue,
894 				  struct Qdisc *p, u32 parent, u32 handle,
895 				  struct nlattr **tca, int *errp)
896 {
897 	int err;
898 	struct nlattr *kind = tca[TCA_KIND];
899 	struct Qdisc *sch;
900 	struct Qdisc_ops *ops;
901 	struct qdisc_size_table *stab;
902 
903 	ops = qdisc_lookup_ops(kind);
904 #ifdef CONFIG_MODULES
905 	if (ops == NULL && kind != NULL) {
906 		char name[IFNAMSIZ];
907 		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
908 			/* We dropped the RTNL semaphore in order to
909 			 * perform the module load.  So, even if we
910 			 * succeeded in loading the module we have to
911 			 * tell the caller to replay the request.  We
912 			 * indicate this using -EAGAIN.
913 			 * We replay the request because the device may
914 			 * go away in the mean time.
915 			 */
916 			rtnl_unlock();
917 			request_module("sch_%s", name);
918 			rtnl_lock();
919 			ops = qdisc_lookup_ops(kind);
920 			if (ops != NULL) {
921 				/* We will try again qdisc_lookup_ops,
922 				 * so don't keep a reference.
923 				 */
924 				module_put(ops->owner);
925 				err = -EAGAIN;
926 				goto err_out;
927 			}
928 		}
929 	}
930 #endif
931 
932 	err = -ENOENT;
933 	if (ops == NULL)
934 		goto err_out;
935 
936 	sch = qdisc_alloc(dev_queue, ops);
937 	if (IS_ERR(sch)) {
938 		err = PTR_ERR(sch);
939 		goto err_out2;
940 	}
941 
942 	sch->parent = parent;
943 
944 	if (handle == TC_H_INGRESS) {
945 		sch->flags |= TCQ_F_INGRESS;
946 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
947 		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
948 	} else {
949 		if (handle == 0) {
950 			handle = qdisc_alloc_handle(dev);
951 			err = -ENOMEM;
952 			if (handle == 0)
953 				goto err_out3;
954 		}
955 		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
956 		if (!netif_is_multiqueue(dev))
957 			sch->flags |= TCQ_F_ONETXQUEUE;
958 	}
959 
960 	sch->handle = handle;
961 
962 	/* This exist to keep backward compatible with a userspace
963 	 * loophole, what allowed userspace to get IFF_NO_QUEUE
964 	 * facility on older kernels by setting tx_queue_len=0 (prior
965 	 * to qdisc init), and then forgot to reinit tx_queue_len
966 	 * before again attaching a qdisc.
967 	 */
968 	if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
969 		dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
970 		netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
971 	}
972 
973 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
974 		if (qdisc_is_percpu_stats(sch)) {
975 			sch->cpu_bstats =
976 				netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
977 			if (!sch->cpu_bstats)
978 				goto err_out4;
979 
980 			sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
981 			if (!sch->cpu_qstats)
982 				goto err_out4;
983 		}
984 
985 		if (tca[TCA_STAB]) {
986 			stab = qdisc_get_stab(tca[TCA_STAB]);
987 			if (IS_ERR(stab)) {
988 				err = PTR_ERR(stab);
989 				goto err_out4;
990 			}
991 			rcu_assign_pointer(sch->stab, stab);
992 		}
993 		if (tca[TCA_RATE]) {
994 			seqcount_t *running;
995 
996 			err = -EOPNOTSUPP;
997 			if (sch->flags & TCQ_F_MQROOT)
998 				goto err_out4;
999 
1000 			if ((sch->parent != TC_H_ROOT) &&
1001 			    !(sch->flags & TCQ_F_INGRESS) &&
1002 			    (!p || !(p->flags & TCQ_F_MQROOT)))
1003 				running = qdisc_root_sleeping_running(sch);
1004 			else
1005 				running = &sch->running;
1006 
1007 			err = gen_new_estimator(&sch->bstats,
1008 						sch->cpu_bstats,
1009 						&sch->rate_est,
1010 						NULL,
1011 						running,
1012 						tca[TCA_RATE]);
1013 			if (err)
1014 				goto err_out4;
1015 		}
1016 
1017 		qdisc_hash_add(sch, false);
1018 
1019 		return sch;
1020 	}
1021 	/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1022 	ops->destroy(sch);
1023 err_out3:
1024 	dev_put(dev);
1025 	kfree((char *) sch - sch->padded);
1026 err_out2:
1027 	module_put(ops->owner);
1028 err_out:
1029 	*errp = err;
1030 	return NULL;
1031 
1032 err_out4:
1033 	free_percpu(sch->cpu_bstats);
1034 	free_percpu(sch->cpu_qstats);
1035 	/*
1036 	 * Any broken qdiscs that would require a ops->reset() here?
1037 	 * The qdisc was never in action so it shouldn't be necessary.
1038 	 */
1039 	qdisc_put_stab(rtnl_dereference(sch->stab));
1040 	if (ops->destroy)
1041 		ops->destroy(sch);
1042 	goto err_out3;
1043 }
1044 
1045 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1046 {
1047 	struct qdisc_size_table *ostab, *stab = NULL;
1048 	int err = 0;
1049 
1050 	if (tca[TCA_OPTIONS]) {
1051 		if (sch->ops->change == NULL)
1052 			return -EINVAL;
1053 		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1054 		if (err)
1055 			return err;
1056 	}
1057 
1058 	if (tca[TCA_STAB]) {
1059 		stab = qdisc_get_stab(tca[TCA_STAB]);
1060 		if (IS_ERR(stab))
1061 			return PTR_ERR(stab);
1062 	}
1063 
1064 	ostab = rtnl_dereference(sch->stab);
1065 	rcu_assign_pointer(sch->stab, stab);
1066 	qdisc_put_stab(ostab);
1067 
1068 	if (tca[TCA_RATE]) {
1069 		/* NB: ignores errors from replace_estimator
1070 		   because change can't be undone. */
1071 		if (sch->flags & TCQ_F_MQROOT)
1072 			goto out;
1073 		gen_replace_estimator(&sch->bstats,
1074 				      sch->cpu_bstats,
1075 				      &sch->rate_est,
1076 				      NULL,
1077 				      qdisc_root_sleeping_running(sch),
1078 				      tca[TCA_RATE]);
1079 	}
1080 out:
1081 	return 0;
1082 }
1083 
1084 struct check_loop_arg {
1085 	struct qdisc_walker	w;
1086 	struct Qdisc		*p;
1087 	int			depth;
1088 };
1089 
1090 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1091 			 struct qdisc_walker *w);
1092 
1093 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1094 {
1095 	struct check_loop_arg	arg;
1096 
1097 	if (q->ops->cl_ops == NULL)
1098 		return 0;
1099 
1100 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1101 	arg.w.fn = check_loop_fn;
1102 	arg.depth = depth;
1103 	arg.p = p;
1104 	q->ops->cl_ops->walk(q, &arg.w);
1105 	return arg.w.stop ? -ELOOP : 0;
1106 }
1107 
1108 static int
1109 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1110 {
1111 	struct Qdisc *leaf;
1112 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1113 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1114 
1115 	leaf = cops->leaf(q, cl);
1116 	if (leaf) {
1117 		if (leaf == arg->p || arg->depth > 7)
1118 			return -ELOOP;
1119 		return check_loop(leaf, arg->p, arg->depth + 1);
1120 	}
1121 	return 0;
1122 }
1123 
1124 /*
1125  * Delete/get qdisc.
1126  */
1127 
1128 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1129 			struct netlink_ext_ack *extack)
1130 {
1131 	struct net *net = sock_net(skb->sk);
1132 	struct tcmsg *tcm = nlmsg_data(n);
1133 	struct nlattr *tca[TCA_MAX + 1];
1134 	struct net_device *dev;
1135 	u32 clid;
1136 	struct Qdisc *q = NULL;
1137 	struct Qdisc *p = NULL;
1138 	int err;
1139 
1140 	if ((n->nlmsg_type != RTM_GETQDISC) &&
1141 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1142 		return -EPERM;
1143 
1144 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1145 	if (err < 0)
1146 		return err;
1147 
1148 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1149 	if (!dev)
1150 		return -ENODEV;
1151 
1152 	clid = tcm->tcm_parent;
1153 	if (clid) {
1154 		if (clid != TC_H_ROOT) {
1155 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1156 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1157 				if (!p)
1158 					return -ENOENT;
1159 				q = qdisc_leaf(p, clid);
1160 			} else if (dev_ingress_queue(dev)) {
1161 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1162 			}
1163 		} else {
1164 			q = dev->qdisc;
1165 		}
1166 		if (!q)
1167 			return -ENOENT;
1168 
1169 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1170 			return -EINVAL;
1171 	} else {
1172 		q = qdisc_lookup(dev, tcm->tcm_handle);
1173 		if (!q)
1174 			return -ENOENT;
1175 	}
1176 
1177 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1178 		return -EINVAL;
1179 
1180 	if (n->nlmsg_type == RTM_DELQDISC) {
1181 		if (!clid)
1182 			return -EINVAL;
1183 		if (q->handle == 0)
1184 			return -ENOENT;
1185 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1186 		if (err != 0)
1187 			return err;
1188 	} else {
1189 		qdisc_notify(net, skb, n, clid, NULL, q);
1190 	}
1191 	return 0;
1192 }
1193 
1194 /*
1195  * Create/change qdisc.
1196  */
1197 
1198 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1199 			   struct netlink_ext_ack *extack)
1200 {
1201 	struct net *net = sock_net(skb->sk);
1202 	struct tcmsg *tcm;
1203 	struct nlattr *tca[TCA_MAX + 1];
1204 	struct net_device *dev;
1205 	u32 clid;
1206 	struct Qdisc *q, *p;
1207 	int err;
1208 
1209 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1210 		return -EPERM;
1211 
1212 replay:
1213 	/* Reinit, just in case something touches this. */
1214 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1215 	if (err < 0)
1216 		return err;
1217 
1218 	tcm = nlmsg_data(n);
1219 	clid = tcm->tcm_parent;
1220 	q = p = NULL;
1221 
1222 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1223 	if (!dev)
1224 		return -ENODEV;
1225 
1226 
1227 	if (clid) {
1228 		if (clid != TC_H_ROOT) {
1229 			if (clid != TC_H_INGRESS) {
1230 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1231 				if (!p)
1232 					return -ENOENT;
1233 				q = qdisc_leaf(p, clid);
1234 			} else if (dev_ingress_queue_create(dev)) {
1235 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1236 			}
1237 		} else {
1238 			q = dev->qdisc;
1239 		}
1240 
1241 		/* It may be default qdisc, ignore it */
1242 		if (q && q->handle == 0)
1243 			q = NULL;
1244 
1245 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1246 			if (tcm->tcm_handle) {
1247 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1248 					return -EEXIST;
1249 				if (TC_H_MIN(tcm->tcm_handle))
1250 					return -EINVAL;
1251 				q = qdisc_lookup(dev, tcm->tcm_handle);
1252 				if (!q)
1253 					goto create_n_graft;
1254 				if (n->nlmsg_flags & NLM_F_EXCL)
1255 					return -EEXIST;
1256 				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1257 					return -EINVAL;
1258 				if (q == p ||
1259 				    (p && check_loop(q, p, 0)))
1260 					return -ELOOP;
1261 				atomic_inc(&q->refcnt);
1262 				goto graft;
1263 			} else {
1264 				if (!q)
1265 					goto create_n_graft;
1266 
1267 				/* This magic test requires explanation.
1268 				 *
1269 				 *   We know, that some child q is already
1270 				 *   attached to this parent and have choice:
1271 				 *   either to change it or to create/graft new one.
1272 				 *
1273 				 *   1. We are allowed to create/graft only
1274 				 *   if CREATE and REPLACE flags are set.
1275 				 *
1276 				 *   2. If EXCL is set, requestor wanted to say,
1277 				 *   that qdisc tcm_handle is not expected
1278 				 *   to exist, so that we choose create/graft too.
1279 				 *
1280 				 *   3. The last case is when no flags are set.
1281 				 *   Alas, it is sort of hole in API, we
1282 				 *   cannot decide what to do unambiguously.
1283 				 *   For now we select create/graft, if
1284 				 *   user gave KIND, which does not match existing.
1285 				 */
1286 				if ((n->nlmsg_flags & NLM_F_CREATE) &&
1287 				    (n->nlmsg_flags & NLM_F_REPLACE) &&
1288 				    ((n->nlmsg_flags & NLM_F_EXCL) ||
1289 				     (tca[TCA_KIND] &&
1290 				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
1291 					goto create_n_graft;
1292 			}
1293 		}
1294 	} else {
1295 		if (!tcm->tcm_handle)
1296 			return -EINVAL;
1297 		q = qdisc_lookup(dev, tcm->tcm_handle);
1298 	}
1299 
1300 	/* Change qdisc parameters */
1301 	if (q == NULL)
1302 		return -ENOENT;
1303 	if (n->nlmsg_flags & NLM_F_EXCL)
1304 		return -EEXIST;
1305 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1306 		return -EINVAL;
1307 	err = qdisc_change(q, tca);
1308 	if (err == 0)
1309 		qdisc_notify(net, skb, n, clid, NULL, q);
1310 	return err;
1311 
1312 create_n_graft:
1313 	if (!(n->nlmsg_flags & NLM_F_CREATE))
1314 		return -ENOENT;
1315 	if (clid == TC_H_INGRESS) {
1316 		if (dev_ingress_queue(dev))
1317 			q = qdisc_create(dev, dev_ingress_queue(dev), p,
1318 					 tcm->tcm_parent, tcm->tcm_parent,
1319 					 tca, &err);
1320 		else
1321 			err = -ENOENT;
1322 	} else {
1323 		struct netdev_queue *dev_queue;
1324 
1325 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1326 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1327 		else if (p)
1328 			dev_queue = p->dev_queue;
1329 		else
1330 			dev_queue = netdev_get_tx_queue(dev, 0);
1331 
1332 		q = qdisc_create(dev, dev_queue, p,
1333 				 tcm->tcm_parent, tcm->tcm_handle,
1334 				 tca, &err);
1335 	}
1336 	if (q == NULL) {
1337 		if (err == -EAGAIN)
1338 			goto replay;
1339 		return err;
1340 	}
1341 
1342 graft:
1343 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1344 	if (err) {
1345 		if (q)
1346 			qdisc_destroy(q);
1347 		return err;
1348 	}
1349 
1350 	return 0;
1351 }
1352 
1353 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1354 			 u32 portid, u32 seq, u16 flags, int event)
1355 {
1356 	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
1357 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
1358 	struct tcmsg *tcm;
1359 	struct nlmsghdr  *nlh;
1360 	unsigned char *b = skb_tail_pointer(skb);
1361 	struct gnet_dump d;
1362 	struct qdisc_size_table *stab;
1363 	__u32 qlen;
1364 
1365 	cond_resched();
1366 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1367 	if (!nlh)
1368 		goto out_nlmsg_trim;
1369 	tcm = nlmsg_data(nlh);
1370 	tcm->tcm_family = AF_UNSPEC;
1371 	tcm->tcm__pad1 = 0;
1372 	tcm->tcm__pad2 = 0;
1373 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1374 	tcm->tcm_parent = clid;
1375 	tcm->tcm_handle = q->handle;
1376 	tcm->tcm_info = atomic_read(&q->refcnt);
1377 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1378 		goto nla_put_failure;
1379 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
1380 		goto nla_put_failure;
1381 	qlen = q->q.qlen;
1382 
1383 	stab = rtnl_dereference(q->stab);
1384 	if (stab && qdisc_dump_stab(skb, stab) < 0)
1385 		goto nla_put_failure;
1386 
1387 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1388 					 NULL, &d, TCA_PAD) < 0)
1389 		goto nla_put_failure;
1390 
1391 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1392 		goto nla_put_failure;
1393 
1394 	if (qdisc_is_percpu_stats(q)) {
1395 		cpu_bstats = q->cpu_bstats;
1396 		cpu_qstats = q->cpu_qstats;
1397 	}
1398 
1399 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
1400 				  &d, cpu_bstats, &q->bstats) < 0 ||
1401 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1402 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
1403 		goto nla_put_failure;
1404 
1405 	if (gnet_stats_finish_copy(&d) < 0)
1406 		goto nla_put_failure;
1407 
1408 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1409 	return skb->len;
1410 
1411 out_nlmsg_trim:
1412 nla_put_failure:
1413 	nlmsg_trim(skb, b);
1414 	return -1;
1415 }
1416 
1417 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
1418 {
1419 	if (q->flags & TCQ_F_BUILTIN)
1420 		return true;
1421 	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
1422 		return true;
1423 
1424 	return false;
1425 }
1426 
1427 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1428 			struct nlmsghdr *n, u32 clid,
1429 			struct Qdisc *old, struct Qdisc *new)
1430 {
1431 	struct sk_buff *skb;
1432 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1433 
1434 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1435 	if (!skb)
1436 		return -ENOBUFS;
1437 
1438 	if (old && !tc_qdisc_dump_ignore(old, false)) {
1439 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1440 				  0, RTM_DELQDISC) < 0)
1441 			goto err_out;
1442 	}
1443 	if (new && !tc_qdisc_dump_ignore(new, false)) {
1444 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1445 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1446 			goto err_out;
1447 	}
1448 
1449 	if (skb->len)
1450 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1451 				      n->nlmsg_flags & NLM_F_ECHO);
1452 
1453 err_out:
1454 	kfree_skb(skb);
1455 	return -EINVAL;
1456 }
1457 
1458 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1459 			      struct netlink_callback *cb,
1460 			      int *q_idx_p, int s_q_idx, bool recur,
1461 			      bool dump_invisible)
1462 {
1463 	int ret = 0, q_idx = *q_idx_p;
1464 	struct Qdisc *q;
1465 	int b;
1466 
1467 	if (!root)
1468 		return 0;
1469 
1470 	q = root;
1471 	if (q_idx < s_q_idx) {
1472 		q_idx++;
1473 	} else {
1474 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1475 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1476 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1477 				  RTM_NEWQDISC) <= 0)
1478 			goto done;
1479 		q_idx++;
1480 	}
1481 
1482 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1483 	 * itself has already been dumped.
1484 	 *
1485 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1486 	 * qdisc hashtable, we don't want to hit it again
1487 	 */
1488 	if (!qdisc_dev(root) || !recur)
1489 		goto out;
1490 
1491 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1492 		if (q_idx < s_q_idx) {
1493 			q_idx++;
1494 			continue;
1495 		}
1496 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1497 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1498 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1499 				  RTM_NEWQDISC) <= 0)
1500 			goto done;
1501 		q_idx++;
1502 	}
1503 
1504 out:
1505 	*q_idx_p = q_idx;
1506 	return ret;
1507 done:
1508 	ret = -1;
1509 	goto out;
1510 }
1511 
1512 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1513 {
1514 	struct net *net = sock_net(skb->sk);
1515 	int idx, q_idx;
1516 	int s_idx, s_q_idx;
1517 	struct net_device *dev;
1518 	const struct nlmsghdr *nlh = cb->nlh;
1519 	struct tcmsg *tcm = nlmsg_data(nlh);
1520 	struct nlattr *tca[TCA_MAX + 1];
1521 	int err;
1522 
1523 	s_idx = cb->args[0];
1524 	s_q_idx = q_idx = cb->args[1];
1525 
1526 	idx = 0;
1527 	ASSERT_RTNL();
1528 
1529 	err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
1530 	if (err < 0)
1531 		return err;
1532 
1533 	for_each_netdev(net, dev) {
1534 		struct netdev_queue *dev_queue;
1535 
1536 		if (idx < s_idx)
1537 			goto cont;
1538 		if (idx > s_idx)
1539 			s_q_idx = 0;
1540 		q_idx = 0;
1541 
1542 		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1543 				       true, tca[TCA_DUMP_INVISIBLE]) < 0)
1544 			goto done;
1545 
1546 		dev_queue = dev_ingress_queue(dev);
1547 		if (dev_queue &&
1548 		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1549 				       &q_idx, s_q_idx, false,
1550 				       tca[TCA_DUMP_INVISIBLE]) < 0)
1551 			goto done;
1552 
1553 cont:
1554 		idx++;
1555 	}
1556 
1557 done:
1558 	cb->args[0] = idx;
1559 	cb->args[1] = q_idx;
1560 
1561 	return skb->len;
1562 }
1563 
1564 
1565 
1566 /************************************************
1567  *	Traffic classes manipulation.		*
1568  ************************************************/
1569 
1570 
1571 
1572 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1573 			 struct netlink_ext_ack *extack)
1574 {
1575 	struct net *net = sock_net(skb->sk);
1576 	struct tcmsg *tcm = nlmsg_data(n);
1577 	struct nlattr *tca[TCA_MAX + 1];
1578 	struct net_device *dev;
1579 	struct Qdisc *q = NULL;
1580 	const struct Qdisc_class_ops *cops;
1581 	unsigned long cl = 0;
1582 	unsigned long new_cl;
1583 	u32 portid;
1584 	u32 clid;
1585 	u32 qid;
1586 	int err;
1587 
1588 	if ((n->nlmsg_type != RTM_GETTCLASS) &&
1589 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1590 		return -EPERM;
1591 
1592 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1593 	if (err < 0)
1594 		return err;
1595 
1596 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1597 	if (!dev)
1598 		return -ENODEV;
1599 
1600 	/*
1601 	   parent == TC_H_UNSPEC - unspecified parent.
1602 	   parent == TC_H_ROOT   - class is root, which has no parent.
1603 	   parent == X:0	 - parent is root class.
1604 	   parent == X:Y	 - parent is a node in hierarchy.
1605 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
1606 
1607 	   handle == 0:0	 - generate handle from kernel pool.
1608 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
1609 	   handle == X:Y	 - clear.
1610 	   handle == X:0	 - root class.
1611 	 */
1612 
1613 	/* Step 1. Determine qdisc handle X:0 */
1614 
1615 	portid = tcm->tcm_parent;
1616 	clid = tcm->tcm_handle;
1617 	qid = TC_H_MAJ(clid);
1618 
1619 	if (portid != TC_H_ROOT) {
1620 		u32 qid1 = TC_H_MAJ(portid);
1621 
1622 		if (qid && qid1) {
1623 			/* If both majors are known, they must be identical. */
1624 			if (qid != qid1)
1625 				return -EINVAL;
1626 		} else if (qid1) {
1627 			qid = qid1;
1628 		} else if (qid == 0)
1629 			qid = dev->qdisc->handle;
1630 
1631 		/* Now qid is genuine qdisc handle consistent
1632 		 * both with parent and child.
1633 		 *
1634 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1635 		 */
1636 		if (portid)
1637 			portid = TC_H_MAKE(qid, portid);
1638 	} else {
1639 		if (qid == 0)
1640 			qid = dev->qdisc->handle;
1641 	}
1642 
1643 	/* OK. Locate qdisc */
1644 	q = qdisc_lookup(dev, qid);
1645 	if (!q)
1646 		return -ENOENT;
1647 
1648 	/* An check that it supports classes */
1649 	cops = q->ops->cl_ops;
1650 	if (cops == NULL)
1651 		return -EINVAL;
1652 
1653 	/* Now try to get class */
1654 	if (clid == 0) {
1655 		if (portid == TC_H_ROOT)
1656 			clid = qid;
1657 	} else
1658 		clid = TC_H_MAKE(qid, clid);
1659 
1660 	if (clid)
1661 		cl = cops->get(q, clid);
1662 
1663 	if (cl == 0) {
1664 		err = -ENOENT;
1665 		if (n->nlmsg_type != RTM_NEWTCLASS ||
1666 		    !(n->nlmsg_flags & NLM_F_CREATE))
1667 			goto out;
1668 	} else {
1669 		switch (n->nlmsg_type) {
1670 		case RTM_NEWTCLASS:
1671 			err = -EEXIST;
1672 			if (n->nlmsg_flags & NLM_F_EXCL)
1673 				goto out;
1674 			break;
1675 		case RTM_DELTCLASS:
1676 			err = -EOPNOTSUPP;
1677 			if (cops->delete)
1678 				err = cops->delete(q, cl);
1679 			if (err == 0)
1680 				tclass_notify(net, skb, n, q, cl,
1681 					      RTM_DELTCLASS);
1682 			goto out;
1683 		case RTM_GETTCLASS:
1684 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1685 			goto out;
1686 		default:
1687 			err = -EINVAL;
1688 			goto out;
1689 		}
1690 	}
1691 
1692 	new_cl = cl;
1693 	err = -EOPNOTSUPP;
1694 	if (cops->change)
1695 		err = cops->change(q, clid, portid, tca, &new_cl);
1696 	if (err == 0)
1697 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1698 
1699 out:
1700 	if (cl)
1701 		cops->put(q, cl);
1702 
1703 	return err;
1704 }
1705 
1706 
1707 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1708 			  unsigned long cl,
1709 			  u32 portid, u32 seq, u16 flags, int event)
1710 {
1711 	struct tcmsg *tcm;
1712 	struct nlmsghdr  *nlh;
1713 	unsigned char *b = skb_tail_pointer(skb);
1714 	struct gnet_dump d;
1715 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1716 
1717 	cond_resched();
1718 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1719 	if (!nlh)
1720 		goto out_nlmsg_trim;
1721 	tcm = nlmsg_data(nlh);
1722 	tcm->tcm_family = AF_UNSPEC;
1723 	tcm->tcm__pad1 = 0;
1724 	tcm->tcm__pad2 = 0;
1725 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1726 	tcm->tcm_parent = q->handle;
1727 	tcm->tcm_handle = q->handle;
1728 	tcm->tcm_info = 0;
1729 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1730 		goto nla_put_failure;
1731 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1732 		goto nla_put_failure;
1733 
1734 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1735 					 NULL, &d, TCA_PAD) < 0)
1736 		goto nla_put_failure;
1737 
1738 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1739 		goto nla_put_failure;
1740 
1741 	if (gnet_stats_finish_copy(&d) < 0)
1742 		goto nla_put_failure;
1743 
1744 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1745 	return skb->len;
1746 
1747 out_nlmsg_trim:
1748 nla_put_failure:
1749 	nlmsg_trim(skb, b);
1750 	return -1;
1751 }
1752 
1753 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1754 			 struct nlmsghdr *n, struct Qdisc *q,
1755 			 unsigned long cl, int event)
1756 {
1757 	struct sk_buff *skb;
1758 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1759 
1760 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1761 	if (!skb)
1762 		return -ENOBUFS;
1763 
1764 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1765 		kfree_skb(skb);
1766 		return -EINVAL;
1767 	}
1768 
1769 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1770 			      n->nlmsg_flags & NLM_F_ECHO);
1771 }
1772 
1773 struct qdisc_dump_args {
1774 	struct qdisc_walker	w;
1775 	struct sk_buff		*skb;
1776 	struct netlink_callback	*cb;
1777 };
1778 
1779 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1780 			    struct qdisc_walker *arg)
1781 {
1782 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1783 
1784 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1785 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1786 			      RTM_NEWTCLASS);
1787 }
1788 
1789 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1790 				struct tcmsg *tcm, struct netlink_callback *cb,
1791 				int *t_p, int s_t)
1792 {
1793 	struct qdisc_dump_args arg;
1794 
1795 	if (tc_qdisc_dump_ignore(q, false) ||
1796 	    *t_p < s_t || !q->ops->cl_ops ||
1797 	    (tcm->tcm_parent &&
1798 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1799 		(*t_p)++;
1800 		return 0;
1801 	}
1802 	if (*t_p > s_t)
1803 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1804 	arg.w.fn = qdisc_class_dump;
1805 	arg.skb = skb;
1806 	arg.cb = cb;
1807 	arg.w.stop  = 0;
1808 	arg.w.skip = cb->args[1];
1809 	arg.w.count = 0;
1810 	q->ops->cl_ops->walk(q, &arg.w);
1811 	cb->args[1] = arg.w.count;
1812 	if (arg.w.stop)
1813 		return -1;
1814 	(*t_p)++;
1815 	return 0;
1816 }
1817 
1818 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1819 			       struct tcmsg *tcm, struct netlink_callback *cb,
1820 			       int *t_p, int s_t)
1821 {
1822 	struct Qdisc *q;
1823 	int b;
1824 
1825 	if (!root)
1826 		return 0;
1827 
1828 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1829 		return -1;
1830 
1831 	if (!qdisc_dev(root))
1832 		return 0;
1833 
1834 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1835 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1836 			return -1;
1837 	}
1838 
1839 	return 0;
1840 }
1841 
1842 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1843 {
1844 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
1845 	struct net *net = sock_net(skb->sk);
1846 	struct netdev_queue *dev_queue;
1847 	struct net_device *dev;
1848 	int t, s_t;
1849 
1850 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1851 		return 0;
1852 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
1853 	if (!dev)
1854 		return 0;
1855 
1856 	s_t = cb->args[0];
1857 	t = 0;
1858 
1859 	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1860 		goto done;
1861 
1862 	dev_queue = dev_ingress_queue(dev);
1863 	if (dev_queue &&
1864 	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1865 				&t, s_t) < 0)
1866 		goto done;
1867 
1868 done:
1869 	cb->args[0] = t;
1870 
1871 	dev_put(dev);
1872 	return skb->len;
1873 }
1874 
1875 /* Main classifier routine: scans classifier chain attached
1876  * to this qdisc, (optionally) tests for protocol and asks
1877  * specific classifiers.
1878  */
1879 int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1880 		struct tcf_result *res, bool compat_mode)
1881 {
1882 	__be16 protocol = tc_skb_protocol(skb);
1883 #ifdef CONFIG_NET_CLS_ACT
1884 	const int max_reclassify_loop = 4;
1885 	const struct tcf_proto *old_tp = tp;
1886 	int limit = 0;
1887 
1888 reclassify:
1889 #endif
1890 	for (; tp; tp = rcu_dereference_bh(tp->next)) {
1891 		int err;
1892 
1893 		if (tp->protocol != protocol &&
1894 		    tp->protocol != htons(ETH_P_ALL))
1895 			continue;
1896 
1897 		err = tp->classify(skb, tp, res);
1898 #ifdef CONFIG_NET_CLS_ACT
1899 		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
1900 			goto reset;
1901 #endif
1902 		if (err >= 0)
1903 			return err;
1904 	}
1905 
1906 	return TC_ACT_UNSPEC; /* signal: continue lookup */
1907 #ifdef CONFIG_NET_CLS_ACT
1908 reset:
1909 	if (unlikely(limit++ >= max_reclassify_loop)) {
1910 		net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
1911 				       tp->q->ops->id, tp->prio & 0xffff,
1912 				       ntohs(tp->protocol));
1913 		return TC_ACT_SHOT;
1914 	}
1915 
1916 	tp = old_tp;
1917 	protocol = tc_skb_protocol(skb);
1918 	goto reclassify;
1919 #endif
1920 }
1921 EXPORT_SYMBOL(tc_classify);
1922 
1923 #ifdef CONFIG_PROC_FS
1924 static int psched_show(struct seq_file *seq, void *v)
1925 {
1926 	seq_printf(seq, "%08x %08x %08x %08x\n",
1927 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1928 		   1000000,
1929 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
1930 
1931 	return 0;
1932 }
1933 
1934 static int psched_open(struct inode *inode, struct file *file)
1935 {
1936 	return single_open(file, psched_show, NULL);
1937 }
1938 
1939 static const struct file_operations psched_fops = {
1940 	.owner = THIS_MODULE,
1941 	.open = psched_open,
1942 	.read  = seq_read,
1943 	.llseek = seq_lseek,
1944 	.release = single_release,
1945 };
1946 
1947 static int __net_init psched_net_init(struct net *net)
1948 {
1949 	struct proc_dir_entry *e;
1950 
1951 	e = proc_create("psched", 0, net->proc_net, &psched_fops);
1952 	if (e == NULL)
1953 		return -ENOMEM;
1954 
1955 	return 0;
1956 }
1957 
1958 static void __net_exit psched_net_exit(struct net *net)
1959 {
1960 	remove_proc_entry("psched", net->proc_net);
1961 }
1962 #else
1963 static int __net_init psched_net_init(struct net *net)
1964 {
1965 	return 0;
1966 }
1967 
1968 static void __net_exit psched_net_exit(struct net *net)
1969 {
1970 }
1971 #endif
1972 
1973 static struct pernet_operations psched_net_ops = {
1974 	.init = psched_net_init,
1975 	.exit = psched_net_exit,
1976 };
1977 
1978 static int __init pktsched_init(void)
1979 {
1980 	int err;
1981 
1982 	err = register_pernet_subsys(&psched_net_ops);
1983 	if (err) {
1984 		pr_err("pktsched_init: "
1985 		       "cannot initialize per netns operations\n");
1986 		return err;
1987 	}
1988 
1989 	register_qdisc(&pfifo_fast_ops);
1990 	register_qdisc(&pfifo_qdisc_ops);
1991 	register_qdisc(&bfifo_qdisc_ops);
1992 	register_qdisc(&pfifo_head_drop_qdisc_ops);
1993 	register_qdisc(&mq_qdisc_ops);
1994 	register_qdisc(&noqueue_qdisc_ops);
1995 
1996 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1997 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1998 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
1999 		      NULL);
2000 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
2001 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
2002 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2003 		      NULL);
2004 
2005 	return 0;
2006 }
2007 
2008 subsys_initcall(pktsched_init);
2009