xref: /openbmc/linux/net/sched/sch_api.c (revision 55b7acbd15b15e75c6df468c72177a6b32e648cf)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_api.c	Packet scheduler API.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Fixes:
8  *
9  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12  */
13 
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <linux/init.h>
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23 #include <linux/kmod.h>
24 #include <linux/list.h>
25 #include <linux/hrtimer.h>
26 #include <linux/slab.h>
27 #include <linux/hashtable.h>
28 
29 #include <net/net_namespace.h>
30 #include <net/sock.h>
31 #include <net/netlink.h>
32 #include <net/pkt_sched.h>
33 #include <net/pkt_cls.h>
34 #include <net/tc_wrapper.h>
35 
36 #include <trace/events/qdisc.h>
37 
38 /*
39 
40    Short review.
41    -------------
42 
43    This file consists of two interrelated parts:
44 
45    1. queueing disciplines manager frontend.
46    2. traffic classes manager frontend.
47 
48    Generally, queueing discipline ("qdisc") is a black box,
49    which is able to enqueue packets and to dequeue them (when
50    device is ready to send something) in order and at times
51    determined by algorithm hidden in it.
52 
53    qdisc's are divided to two categories:
54    - "queues", which have no internal structure visible from outside.
55    - "schedulers", which split all the packets to "traffic classes",
56      using "packet classifiers" (look at cls_api.c)
57 
58    In turn, classes may have child qdiscs (as rule, queues)
59    attached to them etc. etc. etc.
60 
61    The goal of the routines in this file is to translate
62    information supplied by user in the form of handles
63    to more intelligible for kernel form, to make some sanity
64    checks and part of work, which is common to all qdiscs
65    and to provide rtnetlink notifications.
66 
67    All real intelligent work is done inside qdisc modules.
68 
69 
70 
71    Every discipline has two major routines: enqueue and dequeue.
72 
73    ---dequeue
74 
75    dequeue usually returns a skb to send. It is allowed to return NULL,
76    but it does not mean that queue is empty, it just means that
77    discipline does not want to send anything this time.
78    Queue is really empty if q->q.qlen == 0.
79    For complicated disciplines with multiple queues q->q is not
80    real packet queue, but however q->q.qlen must be valid.
81 
82    ---enqueue
83 
84    enqueue returns 0, if packet was enqueued successfully.
85    If packet (this one or another one) was dropped, it returns
86    not zero error code.
87    NET_XMIT_DROP 	- this packet dropped
88      Expected action: do not backoff, but wait until queue will clear.
89    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
90      Expected action: backoff or ignore
91 
92    Auxiliary routines:
93 
94    ---peek
95 
96    like dequeue but without removing a packet from the queue
97 
98    ---reset
99 
100    returns qdisc to initial state: purge all buffers, clear all
101    timers, counters (except for statistics) etc.
102 
103    ---init
104 
105    initializes newly created qdisc.
106 
107    ---destroy
108 
109    destroys resources allocated by init and during lifetime of qdisc.
110 
111    ---change
112 
113    changes qdisc parameters.
114  */
115 
116 /* Protects list of registered TC modules. It is pure SMP lock. */
117 static DEFINE_RWLOCK(qdisc_mod_lock);
118 
119 
120 /************************************************
121  *	Queueing disciplines manipulation.	*
122  ************************************************/
123 
124 
125 /* The list of all installed queueing disciplines. */
126 
127 static struct Qdisc_ops *qdisc_base;
128 
129 /* Register/unregister queueing discipline */
130 
register_qdisc(struct Qdisc_ops * qops)131 int register_qdisc(struct Qdisc_ops *qops)
132 {
133 	struct Qdisc_ops *q, **qp;
134 	int rc = -EEXIST;
135 
136 	write_lock(&qdisc_mod_lock);
137 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
138 		if (!strcmp(qops->id, q->id))
139 			goto out;
140 
141 	if (qops->enqueue == NULL)
142 		qops->enqueue = noop_qdisc_ops.enqueue;
143 	if (qops->peek == NULL) {
144 		if (qops->dequeue == NULL)
145 			qops->peek = noop_qdisc_ops.peek;
146 		else
147 			goto out_einval;
148 	}
149 	if (qops->dequeue == NULL)
150 		qops->dequeue = noop_qdisc_ops.dequeue;
151 
152 	if (qops->cl_ops) {
153 		const struct Qdisc_class_ops *cops = qops->cl_ops;
154 
155 		if (!(cops->find && cops->walk && cops->leaf))
156 			goto out_einval;
157 
158 		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
159 			goto out_einval;
160 	}
161 
162 	qops->next = NULL;
163 	*qp = qops;
164 	rc = 0;
165 out:
166 	write_unlock(&qdisc_mod_lock);
167 	return rc;
168 
169 out_einval:
170 	rc = -EINVAL;
171 	goto out;
172 }
173 EXPORT_SYMBOL(register_qdisc);
174 
unregister_qdisc(struct Qdisc_ops * qops)175 void unregister_qdisc(struct Qdisc_ops *qops)
176 {
177 	struct Qdisc_ops *q, **qp;
178 	int err = -ENOENT;
179 
180 	write_lock(&qdisc_mod_lock);
181 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
182 		if (q == qops)
183 			break;
184 	if (q) {
185 		*qp = q->next;
186 		q->next = NULL;
187 		err = 0;
188 	}
189 	write_unlock(&qdisc_mod_lock);
190 
191 	WARN(err, "unregister qdisc(%s) failed\n", qops->id);
192 }
193 EXPORT_SYMBOL(unregister_qdisc);
194 
195 /* Get default qdisc if not otherwise specified */
qdisc_get_default(char * name,size_t len)196 void qdisc_get_default(char *name, size_t len)
197 {
198 	read_lock(&qdisc_mod_lock);
199 	strscpy(name, default_qdisc_ops->id, len);
200 	read_unlock(&qdisc_mod_lock);
201 }
202 
qdisc_lookup_default(const char * name)203 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
204 {
205 	struct Qdisc_ops *q = NULL;
206 
207 	for (q = qdisc_base; q; q = q->next) {
208 		if (!strcmp(name, q->id)) {
209 			if (!try_module_get(q->owner))
210 				q = NULL;
211 			break;
212 		}
213 	}
214 
215 	return q;
216 }
217 
218 /* Set new default qdisc to use */
qdisc_set_default(const char * name)219 int qdisc_set_default(const char *name)
220 {
221 	const struct Qdisc_ops *ops;
222 
223 	if (!capable(CAP_NET_ADMIN))
224 		return -EPERM;
225 
226 	write_lock(&qdisc_mod_lock);
227 	ops = qdisc_lookup_default(name);
228 	if (!ops) {
229 		/* Not found, drop lock and try to load module */
230 		write_unlock(&qdisc_mod_lock);
231 		request_module("sch_%s", name);
232 		write_lock(&qdisc_mod_lock);
233 
234 		ops = qdisc_lookup_default(name);
235 	}
236 
237 	if (ops) {
238 		/* Set new default */
239 		module_put(default_qdisc_ops->owner);
240 		default_qdisc_ops = ops;
241 	}
242 	write_unlock(&qdisc_mod_lock);
243 
244 	return ops ? 0 : -ENOENT;
245 }
246 
247 #ifdef CONFIG_NET_SCH_DEFAULT
248 /* Set default value from kernel config */
sch_default_qdisc(void)249 static int __init sch_default_qdisc(void)
250 {
251 	return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
252 }
253 late_initcall(sch_default_qdisc);
254 #endif
255 
256 /* We know handle. Find qdisc among all qdisc's attached to device
257  * (root qdisc, all its children, children of children etc.)
258  * Note: caller either uses rtnl or rcu_read_lock()
259  */
260 
qdisc_match_from_root(struct Qdisc * root,u32 handle)261 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
262 {
263 	struct Qdisc *q;
264 
265 	if (!qdisc_dev(root))
266 		return (root->handle == handle ? root : NULL);
267 
268 	if (!(root->flags & TCQ_F_BUILTIN) &&
269 	    root->handle == handle)
270 		return root;
271 
272 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
273 				   lockdep_rtnl_is_held()) {
274 		if (q->handle == handle)
275 			return q;
276 	}
277 	return NULL;
278 }
279 
qdisc_hash_add(struct Qdisc * q,bool invisible)280 void qdisc_hash_add(struct Qdisc *q, bool invisible)
281 {
282 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
283 		ASSERT_RTNL();
284 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
285 		if (invisible)
286 			q->flags |= TCQ_F_INVISIBLE;
287 	}
288 }
289 EXPORT_SYMBOL(qdisc_hash_add);
290 
qdisc_hash_del(struct Qdisc * q)291 void qdisc_hash_del(struct Qdisc *q)
292 {
293 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
294 		ASSERT_RTNL();
295 		hash_del_rcu(&q->hash);
296 	}
297 }
298 EXPORT_SYMBOL(qdisc_hash_del);
299 
qdisc_lookup(struct net_device * dev,u32 handle)300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
301 {
302 	struct Qdisc *q;
303 
304 	if (!handle)
305 		return NULL;
306 	q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
307 	if (q)
308 		goto out;
309 
310 	if (dev_ingress_queue(dev))
311 		q = qdisc_match_from_root(
312 			rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping),
313 			handle);
314 out:
315 	return q;
316 }
317 
qdisc_lookup_rcu(struct net_device * dev,u32 handle)318 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
319 {
320 	struct netdev_queue *nq;
321 	struct Qdisc *q;
322 
323 	if (!handle)
324 		return NULL;
325 	q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
326 	if (q)
327 		goto out;
328 
329 	nq = dev_ingress_queue_rcu(dev);
330 	if (nq)
331 		q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping),
332 					  handle);
333 out:
334 	return q;
335 }
336 
qdisc_leaf(struct Qdisc * p,u32 classid,struct netlink_ext_ack * extack)337 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid,
338 				struct netlink_ext_ack *extack)
339 {
340 	unsigned long cl;
341 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
342 
343 	if (cops == NULL) {
344 		NL_SET_ERR_MSG(extack, "Parent qdisc is not classful");
345 		return ERR_PTR(-EOPNOTSUPP);
346 	}
347 	cl = cops->find(p, classid);
348 
349 	if (cl == 0) {
350 		NL_SET_ERR_MSG(extack, "Specified class not found");
351 		return ERR_PTR(-ENOENT);
352 	}
353 	return cops->leaf(p, cl);
354 }
355 
356 /* Find queueing discipline by name */
357 
qdisc_lookup_ops(struct nlattr * kind)358 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
359 {
360 	struct Qdisc_ops *q = NULL;
361 
362 	if (kind) {
363 		read_lock(&qdisc_mod_lock);
364 		for (q = qdisc_base; q; q = q->next) {
365 			if (nla_strcmp(kind, q->id) == 0) {
366 				if (!try_module_get(q->owner))
367 					q = NULL;
368 				break;
369 			}
370 		}
371 		read_unlock(&qdisc_mod_lock);
372 	}
373 	return q;
374 }
375 
376 /* The linklayer setting were not transferred from iproute2, in older
377  * versions, and the rate tables lookup systems have been dropped in
378  * the kernel. To keep backward compatible with older iproute2 tc
379  * utils, we detect the linklayer setting by detecting if the rate
380  * table were modified.
381  *
382  * For linklayer ATM table entries, the rate table will be aligned to
383  * 48 bytes, thus some table entries will contain the same value.  The
384  * mpu (min packet unit) is also encoded into the old rate table, thus
385  * starting from the mpu, we find low and high table entries for
386  * mapping this cell.  If these entries contain the same value, when
387  * the rate tables have been modified for linklayer ATM.
388  *
389  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
390  * and then roundup to the next cell, calc the table entry one below,
391  * and compare.
392  */
__detect_linklayer(struct tc_ratespec * r,__u32 * rtab)393 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
394 {
395 	int low       = roundup(r->mpu, 48);
396 	int high      = roundup(low+1, 48);
397 	int cell_low  = low >> r->cell_log;
398 	int cell_high = (high >> r->cell_log) - 1;
399 
400 	/* rtab is too inaccurate at rates > 100Mbit/s */
401 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
402 		pr_debug("TC linklayer: Giving up ATM detection\n");
403 		return TC_LINKLAYER_ETHERNET;
404 	}
405 
406 	if ((cell_high > cell_low) && (cell_high < 256)
407 	    && (rtab[cell_low] == rtab[cell_high])) {
408 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
409 			 cell_low, cell_high, rtab[cell_high]);
410 		return TC_LINKLAYER_ATM;
411 	}
412 	return TC_LINKLAYER_ETHERNET;
413 }
414 
415 static struct qdisc_rate_table *qdisc_rtab_list;
416 
qdisc_get_rtab(struct tc_ratespec * r,struct nlattr * tab,struct netlink_ext_ack * extack)417 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
418 					struct nlattr *tab,
419 					struct netlink_ext_ack *extack)
420 {
421 	struct qdisc_rate_table *rtab;
422 
423 	if (tab == NULL || r->rate == 0 ||
424 	    r->cell_log == 0 || r->cell_log >= 32 ||
425 	    nla_len(tab) != TC_RTAB_SIZE) {
426 		NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
427 		return NULL;
428 	}
429 
430 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
431 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
432 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
433 			rtab->refcnt++;
434 			return rtab;
435 		}
436 	}
437 
438 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
439 	if (rtab) {
440 		rtab->rate = *r;
441 		rtab->refcnt = 1;
442 		memcpy(rtab->data, nla_data(tab), 1024);
443 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
444 			r->linklayer = __detect_linklayer(r, rtab->data);
445 		rtab->next = qdisc_rtab_list;
446 		qdisc_rtab_list = rtab;
447 	} else {
448 		NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
449 	}
450 	return rtab;
451 }
452 EXPORT_SYMBOL(qdisc_get_rtab);
453 
qdisc_put_rtab(struct qdisc_rate_table * tab)454 void qdisc_put_rtab(struct qdisc_rate_table *tab)
455 {
456 	struct qdisc_rate_table *rtab, **rtabp;
457 
458 	if (!tab || --tab->refcnt)
459 		return;
460 
461 	for (rtabp = &qdisc_rtab_list;
462 	     (rtab = *rtabp) != NULL;
463 	     rtabp = &rtab->next) {
464 		if (rtab == tab) {
465 			*rtabp = rtab->next;
466 			kfree(rtab);
467 			return;
468 		}
469 	}
470 }
471 EXPORT_SYMBOL(qdisc_put_rtab);
472 
473 static LIST_HEAD(qdisc_stab_list);
474 
475 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
476 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
477 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
478 };
479 
qdisc_get_stab(struct nlattr * opt,struct netlink_ext_ack * extack)480 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
481 					       struct netlink_ext_ack *extack)
482 {
483 	struct nlattr *tb[TCA_STAB_MAX + 1];
484 	struct qdisc_size_table *stab;
485 	struct tc_sizespec *s;
486 	unsigned int tsize = 0;
487 	u16 *tab = NULL;
488 	int err;
489 
490 	err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
491 					  extack);
492 	if (err < 0)
493 		return ERR_PTR(err);
494 	if (!tb[TCA_STAB_BASE]) {
495 		NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
496 		return ERR_PTR(-EINVAL);
497 	}
498 
499 	s = nla_data(tb[TCA_STAB_BASE]);
500 
501 	if (s->tsize > 0) {
502 		if (!tb[TCA_STAB_DATA]) {
503 			NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
504 			return ERR_PTR(-EINVAL);
505 		}
506 		tab = nla_data(tb[TCA_STAB_DATA]);
507 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
508 	}
509 
510 	if (tsize != s->tsize || (!tab && tsize > 0)) {
511 		NL_SET_ERR_MSG(extack, "Invalid size of size table");
512 		return ERR_PTR(-EINVAL);
513 	}
514 
515 	list_for_each_entry(stab, &qdisc_stab_list, list) {
516 		if (memcmp(&stab->szopts, s, sizeof(*s)))
517 			continue;
518 		if (tsize > 0 &&
519 		    memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
520 			continue;
521 		stab->refcnt++;
522 		return stab;
523 	}
524 
525 	if (s->size_log > STAB_SIZE_LOG_MAX ||
526 	    s->cell_log > STAB_SIZE_LOG_MAX) {
527 		NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
528 		return ERR_PTR(-EINVAL);
529 	}
530 
531 	stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
532 	if (!stab)
533 		return ERR_PTR(-ENOMEM);
534 
535 	stab->refcnt = 1;
536 	stab->szopts = *s;
537 	if (tsize > 0)
538 		memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
539 
540 	list_add_tail(&stab->list, &qdisc_stab_list);
541 
542 	return stab;
543 }
544 
qdisc_put_stab(struct qdisc_size_table * tab)545 void qdisc_put_stab(struct qdisc_size_table *tab)
546 {
547 	if (!tab)
548 		return;
549 
550 	if (--tab->refcnt == 0) {
551 		list_del(&tab->list);
552 		kfree_rcu(tab, rcu);
553 	}
554 }
555 EXPORT_SYMBOL(qdisc_put_stab);
556 
qdisc_dump_stab(struct sk_buff * skb,struct qdisc_size_table * stab)557 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
558 {
559 	struct nlattr *nest;
560 
561 	nest = nla_nest_start_noflag(skb, TCA_STAB);
562 	if (nest == NULL)
563 		goto nla_put_failure;
564 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
565 		goto nla_put_failure;
566 	nla_nest_end(skb, nest);
567 
568 	return skb->len;
569 
570 nla_put_failure:
571 	return -1;
572 }
573 
__qdisc_calculate_pkt_len(struct sk_buff * skb,const struct qdisc_size_table * stab)574 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
575 			       const struct qdisc_size_table *stab)
576 {
577 	int pkt_len, slot;
578 
579 	pkt_len = skb->len + stab->szopts.overhead;
580 	if (unlikely(!stab->szopts.tsize))
581 		goto out;
582 
583 	slot = pkt_len + stab->szopts.cell_align;
584 	if (unlikely(slot < 0))
585 		slot = 0;
586 
587 	slot >>= stab->szopts.cell_log;
588 	if (likely(slot < stab->szopts.tsize))
589 		pkt_len = stab->data[slot];
590 	else
591 		pkt_len = stab->data[stab->szopts.tsize - 1] *
592 				(slot / stab->szopts.tsize) +
593 				stab->data[slot % stab->szopts.tsize];
594 
595 	pkt_len <<= stab->szopts.size_log;
596 out:
597 	if (unlikely(pkt_len < 1))
598 		pkt_len = 1;
599 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
600 }
601 
qdisc_warn_nonwc(const char * txt,struct Qdisc * qdisc)602 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
603 {
604 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
605 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
606 			txt, qdisc->ops->id, qdisc->handle >> 16);
607 		qdisc->flags |= TCQ_F_WARN_NONWC;
608 	}
609 }
610 EXPORT_SYMBOL(qdisc_warn_nonwc);
611 
qdisc_watchdog(struct hrtimer * timer)612 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
613 {
614 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
615 						 timer);
616 
617 	rcu_read_lock();
618 	__netif_schedule(qdisc_root(wd->qdisc));
619 	rcu_read_unlock();
620 
621 	return HRTIMER_NORESTART;
622 }
623 
qdisc_watchdog_init_clockid(struct qdisc_watchdog * wd,struct Qdisc * qdisc,clockid_t clockid)624 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
625 				 clockid_t clockid)
626 {
627 	hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
628 	wd->timer.function = qdisc_watchdog;
629 	wd->qdisc = qdisc;
630 }
631 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
632 
qdisc_watchdog_init(struct qdisc_watchdog * wd,struct Qdisc * qdisc)633 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
634 {
635 	qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
636 }
637 EXPORT_SYMBOL(qdisc_watchdog_init);
638 
qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog * wd,u64 expires,u64 delta_ns)639 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
640 				      u64 delta_ns)
641 {
642 	bool deactivated;
643 
644 	rcu_read_lock();
645 	deactivated = test_bit(__QDISC_STATE_DEACTIVATED,
646 			       &qdisc_root_sleeping(wd->qdisc)->state);
647 	rcu_read_unlock();
648 	if (deactivated)
649 		return;
650 
651 	if (hrtimer_is_queued(&wd->timer)) {
652 		u64 softexpires;
653 
654 		softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer));
655 		/* If timer is already set in [expires, expires + delta_ns],
656 		 * do not reprogram it.
657 		 */
658 		if (softexpires - expires <= delta_ns)
659 			return;
660 	}
661 
662 	hrtimer_start_range_ns(&wd->timer,
663 			       ns_to_ktime(expires),
664 			       delta_ns,
665 			       HRTIMER_MODE_ABS_PINNED);
666 }
667 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
668 
qdisc_watchdog_cancel(struct qdisc_watchdog * wd)669 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
670 {
671 	hrtimer_cancel(&wd->timer);
672 }
673 EXPORT_SYMBOL(qdisc_watchdog_cancel);
674 
qdisc_class_hash_alloc(unsigned int n)675 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
676 {
677 	struct hlist_head *h;
678 	unsigned int i;
679 
680 	h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
681 
682 	if (h != NULL) {
683 		for (i = 0; i < n; i++)
684 			INIT_HLIST_HEAD(&h[i]);
685 	}
686 	return h;
687 }
688 
qdisc_class_hash_grow(struct Qdisc * sch,struct Qdisc_class_hash * clhash)689 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
690 {
691 	struct Qdisc_class_common *cl;
692 	struct hlist_node *next;
693 	struct hlist_head *nhash, *ohash;
694 	unsigned int nsize, nmask, osize;
695 	unsigned int i, h;
696 
697 	/* Rehash when load factor exceeds 0.75 */
698 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
699 		return;
700 	nsize = clhash->hashsize * 2;
701 	nmask = nsize - 1;
702 	nhash = qdisc_class_hash_alloc(nsize);
703 	if (nhash == NULL)
704 		return;
705 
706 	ohash = clhash->hash;
707 	osize = clhash->hashsize;
708 
709 	sch_tree_lock(sch);
710 	for (i = 0; i < osize; i++) {
711 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
712 			h = qdisc_class_hash(cl->classid, nmask);
713 			hlist_add_head(&cl->hnode, &nhash[h]);
714 		}
715 	}
716 	clhash->hash     = nhash;
717 	clhash->hashsize = nsize;
718 	clhash->hashmask = nmask;
719 	sch_tree_unlock(sch);
720 
721 	kvfree(ohash);
722 }
723 EXPORT_SYMBOL(qdisc_class_hash_grow);
724 
qdisc_class_hash_init(struct Qdisc_class_hash * clhash)725 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
726 {
727 	unsigned int size = 4;
728 
729 	clhash->hash = qdisc_class_hash_alloc(size);
730 	if (!clhash->hash)
731 		return -ENOMEM;
732 	clhash->hashsize  = size;
733 	clhash->hashmask  = size - 1;
734 	clhash->hashelems = 0;
735 	return 0;
736 }
737 EXPORT_SYMBOL(qdisc_class_hash_init);
738 
qdisc_class_hash_destroy(struct Qdisc_class_hash * clhash)739 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
740 {
741 	kvfree(clhash->hash);
742 }
743 EXPORT_SYMBOL(qdisc_class_hash_destroy);
744 
qdisc_class_hash_insert(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)745 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
746 			     struct Qdisc_class_common *cl)
747 {
748 	unsigned int h;
749 
750 	INIT_HLIST_NODE(&cl->hnode);
751 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
752 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
753 	clhash->hashelems++;
754 }
755 EXPORT_SYMBOL(qdisc_class_hash_insert);
756 
qdisc_class_hash_remove(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)757 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
758 			     struct Qdisc_class_common *cl)
759 {
760 	hlist_del(&cl->hnode);
761 	clhash->hashelems--;
762 }
763 EXPORT_SYMBOL(qdisc_class_hash_remove);
764 
765 /* Allocate an unique handle from space managed by kernel
766  * Possible range is [8000-FFFF]:0000 (0x8000 values)
767  */
qdisc_alloc_handle(struct net_device * dev)768 static u32 qdisc_alloc_handle(struct net_device *dev)
769 {
770 	int i = 0x8000;
771 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
772 
773 	do {
774 		autohandle += TC_H_MAKE(0x10000U, 0);
775 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
776 			autohandle = TC_H_MAKE(0x80000000U, 0);
777 		if (!qdisc_lookup(dev, autohandle))
778 			return autohandle;
779 		cond_resched();
780 	} while	(--i > 0);
781 
782 	return 0;
783 }
784 
qdisc_tree_reduce_backlog(struct Qdisc * sch,int n,int len)785 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
786 {
787 	const struct Qdisc_class_ops *cops;
788 	unsigned long cl;
789 	u32 parentid;
790 	bool notify;
791 	int drops;
792 
793 	drops = max_t(int, n, 0);
794 	rcu_read_lock();
795 	while ((parentid = sch->parent)) {
796 		if (parentid == TC_H_ROOT)
797 			break;
798 
799 		if (sch->flags & TCQ_F_NOPARENT)
800 			break;
801 		/* Notify parent qdisc only if child qdisc becomes empty. */
802 		notify = !sch->q.qlen;
803 		/* TODO: perform the search on a per txq basis */
804 		sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
805 		if (sch == NULL) {
806 			WARN_ON_ONCE(parentid != TC_H_ROOT);
807 			break;
808 		}
809 		cops = sch->ops->cl_ops;
810 		if (notify && cops->qlen_notify) {
811 			/* Note that qlen_notify must be idempotent as it may get called
812 			 * multiple times.
813 			 */
814 			cl = cops->find(sch, parentid);
815 			cops->qlen_notify(sch, cl);
816 		}
817 		sch->q.qlen -= n;
818 		sch->qstats.backlog -= len;
819 		__qdisc_qstats_drop(sch, drops);
820 	}
821 	rcu_read_unlock();
822 }
823 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
824 
qdisc_offload_dump_helper(struct Qdisc * sch,enum tc_setup_type type,void * type_data)825 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
826 			      void *type_data)
827 {
828 	struct net_device *dev = qdisc_dev(sch);
829 	int err;
830 
831 	sch->flags &= ~TCQ_F_OFFLOADED;
832 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
833 		return 0;
834 
835 	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
836 	if (err == -EOPNOTSUPP)
837 		return 0;
838 
839 	if (!err)
840 		sch->flags |= TCQ_F_OFFLOADED;
841 
842 	return err;
843 }
844 EXPORT_SYMBOL(qdisc_offload_dump_helper);
845 
qdisc_offload_graft_helper(struct net_device * dev,struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,enum tc_setup_type type,void * type_data,struct netlink_ext_ack * extack)846 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
847 				struct Qdisc *new, struct Qdisc *old,
848 				enum tc_setup_type type, void *type_data,
849 				struct netlink_ext_ack *extack)
850 {
851 	bool any_qdisc_is_offloaded;
852 	int err;
853 
854 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
855 		return;
856 
857 	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
858 
859 	/* Don't report error if the graft is part of destroy operation. */
860 	if (!err || !new || new == &noop_qdisc)
861 		return;
862 
863 	/* Don't report error if the parent, the old child and the new
864 	 * one are not offloaded.
865 	 */
866 	any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
867 	any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
868 	any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
869 
870 	if (any_qdisc_is_offloaded)
871 		NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
872 }
873 EXPORT_SYMBOL(qdisc_offload_graft_helper);
874 
qdisc_offload_query_caps(struct net_device * dev,enum tc_setup_type type,void * caps,size_t caps_len)875 void qdisc_offload_query_caps(struct net_device *dev,
876 			      enum tc_setup_type type,
877 			      void *caps, size_t caps_len)
878 {
879 	const struct net_device_ops *ops = dev->netdev_ops;
880 	struct tc_query_caps_base base = {
881 		.type = type,
882 		.caps = caps,
883 	};
884 
885 	memset(caps, 0, caps_len);
886 
887 	if (ops->ndo_setup_tc)
888 		ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
889 }
890 EXPORT_SYMBOL(qdisc_offload_query_caps);
891 
qdisc_offload_graft_root(struct net_device * dev,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)892 static void qdisc_offload_graft_root(struct net_device *dev,
893 				     struct Qdisc *new, struct Qdisc *old,
894 				     struct netlink_ext_ack *extack)
895 {
896 	struct tc_root_qopt_offload graft_offload = {
897 		.command	= TC_ROOT_GRAFT,
898 		.handle		= new ? new->handle : 0,
899 		.ingress	= (new && new->flags & TCQ_F_INGRESS) ||
900 				  (old && old->flags & TCQ_F_INGRESS),
901 	};
902 
903 	qdisc_offload_graft_helper(dev, NULL, new, old,
904 				   TC_SETUP_ROOT_QDISC, &graft_offload, extack);
905 }
906 
tc_fill_qdisc(struct sk_buff * skb,struct Qdisc * q,u32 clid,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)907 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
908 			 u32 portid, u32 seq, u16 flags, int event,
909 			 struct netlink_ext_ack *extack)
910 {
911 	struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
912 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
913 	struct tcmsg *tcm;
914 	struct nlmsghdr  *nlh;
915 	unsigned char *b = skb_tail_pointer(skb);
916 	struct gnet_dump d;
917 	struct qdisc_size_table *stab;
918 	u32 block_index;
919 	__u32 qlen;
920 
921 	cond_resched();
922 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
923 	if (!nlh)
924 		goto out_nlmsg_trim;
925 	tcm = nlmsg_data(nlh);
926 	tcm->tcm_family = AF_UNSPEC;
927 	tcm->tcm__pad1 = 0;
928 	tcm->tcm__pad2 = 0;
929 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
930 	tcm->tcm_parent = clid;
931 	tcm->tcm_handle = q->handle;
932 	tcm->tcm_info = refcount_read(&q->refcnt);
933 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
934 		goto nla_put_failure;
935 	if (q->ops->ingress_block_get) {
936 		block_index = q->ops->ingress_block_get(q);
937 		if (block_index &&
938 		    nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
939 			goto nla_put_failure;
940 	}
941 	if (q->ops->egress_block_get) {
942 		block_index = q->ops->egress_block_get(q);
943 		if (block_index &&
944 		    nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
945 			goto nla_put_failure;
946 	}
947 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
948 		goto nla_put_failure;
949 	if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
950 		goto nla_put_failure;
951 	qlen = qdisc_qlen_sum(q);
952 
953 	stab = rtnl_dereference(q->stab);
954 	if (stab && qdisc_dump_stab(skb, stab) < 0)
955 		goto nla_put_failure;
956 
957 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
958 					 NULL, &d, TCA_PAD) < 0)
959 		goto nla_put_failure;
960 
961 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
962 		goto nla_put_failure;
963 
964 	if (qdisc_is_percpu_stats(q)) {
965 		cpu_bstats = q->cpu_bstats;
966 		cpu_qstats = q->cpu_qstats;
967 	}
968 
969 	if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
970 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
971 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
972 		goto nla_put_failure;
973 
974 	if (gnet_stats_finish_copy(&d) < 0)
975 		goto nla_put_failure;
976 
977 	if (extack && extack->_msg &&
978 	    nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
979 		goto out_nlmsg_trim;
980 
981 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
982 
983 	return skb->len;
984 
985 out_nlmsg_trim:
986 nla_put_failure:
987 	nlmsg_trim(skb, b);
988 	return -1;
989 }
990 
tc_qdisc_dump_ignore(struct Qdisc * q,bool dump_invisible)991 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
992 {
993 	if (q->flags & TCQ_F_BUILTIN)
994 		return true;
995 	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
996 		return true;
997 
998 	return false;
999 }
1000 
qdisc_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)1001 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1002 			struct nlmsghdr *n, u32 clid,
1003 			struct Qdisc *old, struct Qdisc *new,
1004 			struct netlink_ext_ack *extack)
1005 {
1006 	struct sk_buff *skb;
1007 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1008 
1009 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1010 	if (!skb)
1011 		return -ENOBUFS;
1012 
1013 	if (old && !tc_qdisc_dump_ignore(old, false)) {
1014 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1015 				  0, RTM_DELQDISC, extack) < 0)
1016 			goto err_out;
1017 	}
1018 	if (new && !tc_qdisc_dump_ignore(new, false)) {
1019 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1020 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0)
1021 			goto err_out;
1022 	}
1023 
1024 	if (skb->len)
1025 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1026 				      n->nlmsg_flags & NLM_F_ECHO);
1027 
1028 err_out:
1029 	kfree_skb(skb);
1030 	return -EINVAL;
1031 }
1032 
notify_and_destroy(struct net * net,struct sk_buff * skb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)1033 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1034 			       struct nlmsghdr *n, u32 clid,
1035 			       struct Qdisc *old, struct Qdisc *new,
1036 			       struct netlink_ext_ack *extack)
1037 {
1038 	if (new || old)
1039 		qdisc_notify(net, skb, n, clid, old, new, extack);
1040 
1041 	if (old)
1042 		qdisc_put(old);
1043 }
1044 
qdisc_clear_nolock(struct Qdisc * sch)1045 static void qdisc_clear_nolock(struct Qdisc *sch)
1046 {
1047 	sch->flags &= ~TCQ_F_NOLOCK;
1048 	if (!(sch->flags & TCQ_F_CPUSTATS))
1049 		return;
1050 
1051 	free_percpu(sch->cpu_bstats);
1052 	free_percpu(sch->cpu_qstats);
1053 	sch->cpu_bstats = NULL;
1054 	sch->cpu_qstats = NULL;
1055 	sch->flags &= ~TCQ_F_CPUSTATS;
1056 }
1057 
1058 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1059  * to device "dev".
1060  *
1061  * When appropriate send a netlink notification using 'skb'
1062  * and "n".
1063  *
1064  * On success, destroy old qdisc.
1065  */
1066 
qdisc_graft(struct net_device * dev,struct Qdisc * parent,struct sk_buff * skb,struct nlmsghdr * n,u32 classid,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)1067 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1068 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1069 		       struct Qdisc *new, struct Qdisc *old,
1070 		       struct netlink_ext_ack *extack)
1071 {
1072 	struct Qdisc *q = old;
1073 	struct net *net = dev_net(dev);
1074 
1075 	if (parent == NULL) {
1076 		unsigned int i, num_q, ingress;
1077 		struct netdev_queue *dev_queue;
1078 
1079 		ingress = 0;
1080 		num_q = dev->num_tx_queues;
1081 		if ((q && q->flags & TCQ_F_INGRESS) ||
1082 		    (new && new->flags & TCQ_F_INGRESS)) {
1083 			ingress = 1;
1084 			dev_queue = dev_ingress_queue(dev);
1085 			if (!dev_queue) {
1086 				NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1087 				return -ENOENT;
1088 			}
1089 
1090 			q = rtnl_dereference(dev_queue->qdisc_sleeping);
1091 
1092 			/* This is the counterpart of that qdisc_refcount_inc_nz() call in
1093 			 * __tcf_qdisc_find() for filter requests.
1094 			 */
1095 			if (!qdisc_refcount_dec_if_one(q)) {
1096 				NL_SET_ERR_MSG(extack,
1097 					       "Current ingress or clsact Qdisc has ongoing filter requests");
1098 				return -EBUSY;
1099 			}
1100 		}
1101 
1102 		if (dev->flags & IFF_UP)
1103 			dev_deactivate(dev);
1104 
1105 		qdisc_offload_graft_root(dev, new, old, extack);
1106 
1107 		if (new && new->ops->attach && !ingress)
1108 			goto skip;
1109 
1110 		if (!ingress) {
1111 			for (i = 0; i < num_q; i++) {
1112 				dev_queue = netdev_get_tx_queue(dev, i);
1113 				old = dev_graft_qdisc(dev_queue, new);
1114 
1115 				if (new && i > 0)
1116 					qdisc_refcount_inc(new);
1117 				qdisc_put(old);
1118 			}
1119 		} else {
1120 			old = dev_graft_qdisc(dev_queue, NULL);
1121 
1122 			/* {ingress,clsact}_destroy() @old before grafting @new to avoid
1123 			 * unprotected concurrent accesses to net_device::miniq_{in,e}gress
1124 			 * pointer(s) in mini_qdisc_pair_swap().
1125 			 */
1126 			qdisc_notify(net, skb, n, classid, old, new, extack);
1127 			qdisc_destroy(old);
1128 
1129 			dev_graft_qdisc(dev_queue, new);
1130 		}
1131 
1132 skip:
1133 		if (!ingress) {
1134 			old = rtnl_dereference(dev->qdisc);
1135 			if (new && !new->ops->attach)
1136 				qdisc_refcount_inc(new);
1137 			rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1138 
1139 			notify_and_destroy(net, skb, n, classid, old, new, extack);
1140 
1141 			if (new && new->ops->attach)
1142 				new->ops->attach(new);
1143 		}
1144 
1145 		if (dev->flags & IFF_UP)
1146 			dev_activate(dev);
1147 	} else {
1148 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1149 		unsigned long cl;
1150 		int err;
1151 
1152 		/* Only support running class lockless if parent is lockless */
1153 		if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1154 			qdisc_clear_nolock(new);
1155 
1156 		if (!cops || !cops->graft)
1157 			return -EOPNOTSUPP;
1158 
1159 		cl = cops->find(parent, classid);
1160 		if (!cl) {
1161 			NL_SET_ERR_MSG(extack, "Specified class not found");
1162 			return -ENOENT;
1163 		}
1164 
1165 		if (new && new->ops == &noqueue_qdisc_ops) {
1166 			NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
1167 			return -EINVAL;
1168 		}
1169 
1170 		if (new &&
1171 		    !(parent->flags & TCQ_F_MQROOT) &&
1172 		    rcu_access_pointer(new->stab)) {
1173 			NL_SET_ERR_MSG(extack, "STAB not supported on a non root");
1174 			return -EINVAL;
1175 		}
1176 		err = cops->graft(parent, cl, new, &old, extack);
1177 		if (err)
1178 			return err;
1179 		notify_and_destroy(net, skb, n, classid, old, new, extack);
1180 	}
1181 	return 0;
1182 }
1183 
qdisc_block_indexes_set(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1184 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1185 				   struct netlink_ext_ack *extack)
1186 {
1187 	u32 block_index;
1188 
1189 	if (tca[TCA_INGRESS_BLOCK]) {
1190 		block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1191 
1192 		if (!block_index) {
1193 			NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1194 			return -EINVAL;
1195 		}
1196 		if (!sch->ops->ingress_block_set) {
1197 			NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1198 			return -EOPNOTSUPP;
1199 		}
1200 		sch->ops->ingress_block_set(sch, block_index);
1201 	}
1202 	if (tca[TCA_EGRESS_BLOCK]) {
1203 		block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1204 
1205 		if (!block_index) {
1206 			NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1207 			return -EINVAL;
1208 		}
1209 		if (!sch->ops->egress_block_set) {
1210 			NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1211 			return -EOPNOTSUPP;
1212 		}
1213 		sch->ops->egress_block_set(sch, block_index);
1214 	}
1215 	return 0;
1216 }
1217 
1218 /*
1219    Allocate and initialize new qdisc.
1220 
1221    Parameters are passed via opt.
1222  */
1223 
qdisc_create(struct net_device * dev,struct netdev_queue * dev_queue,u32 parent,u32 handle,struct nlattr ** tca,int * errp,struct netlink_ext_ack * extack)1224 static struct Qdisc *qdisc_create(struct net_device *dev,
1225 				  struct netdev_queue *dev_queue,
1226 				  u32 parent, u32 handle,
1227 				  struct nlattr **tca, int *errp,
1228 				  struct netlink_ext_ack *extack)
1229 {
1230 	int err;
1231 	struct nlattr *kind = tca[TCA_KIND];
1232 	struct Qdisc *sch;
1233 	struct Qdisc_ops *ops;
1234 	struct qdisc_size_table *stab;
1235 
1236 	ops = qdisc_lookup_ops(kind);
1237 #ifdef CONFIG_MODULES
1238 	if (ops == NULL && kind != NULL) {
1239 		char name[IFNAMSIZ];
1240 		if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
1241 			/* We dropped the RTNL semaphore in order to
1242 			 * perform the module load.  So, even if we
1243 			 * succeeded in loading the module we have to
1244 			 * tell the caller to replay the request.  We
1245 			 * indicate this using -EAGAIN.
1246 			 * We replay the request because the device may
1247 			 * go away in the mean time.
1248 			 */
1249 			rtnl_unlock();
1250 			request_module("sch_%s", name);
1251 			rtnl_lock();
1252 			ops = qdisc_lookup_ops(kind);
1253 			if (ops != NULL) {
1254 				/* We will try again qdisc_lookup_ops,
1255 				 * so don't keep a reference.
1256 				 */
1257 				module_put(ops->owner);
1258 				err = -EAGAIN;
1259 				goto err_out;
1260 			}
1261 		}
1262 	}
1263 #endif
1264 
1265 	err = -ENOENT;
1266 	if (!ops) {
1267 		NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1268 		goto err_out;
1269 	}
1270 
1271 	sch = qdisc_alloc(dev_queue, ops, extack);
1272 	if (IS_ERR(sch)) {
1273 		err = PTR_ERR(sch);
1274 		goto err_out2;
1275 	}
1276 
1277 	sch->parent = parent;
1278 
1279 	if (handle == TC_H_INGRESS) {
1280 		if (!(sch->flags & TCQ_F_INGRESS)) {
1281 			NL_SET_ERR_MSG(extack,
1282 				       "Specified parent ID is reserved for ingress and clsact Qdiscs");
1283 			err = -EINVAL;
1284 			goto err_out3;
1285 		}
1286 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
1287 	} else {
1288 		if (handle == 0) {
1289 			handle = qdisc_alloc_handle(dev);
1290 			if (handle == 0) {
1291 				NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1292 				err = -ENOSPC;
1293 				goto err_out3;
1294 			}
1295 		}
1296 		if (!netif_is_multiqueue(dev))
1297 			sch->flags |= TCQ_F_ONETXQUEUE;
1298 	}
1299 
1300 	sch->handle = handle;
1301 
1302 	/* This exist to keep backward compatible with a userspace
1303 	 * loophole, what allowed userspace to get IFF_NO_QUEUE
1304 	 * facility on older kernels by setting tx_queue_len=0 (prior
1305 	 * to qdisc init), and then forgot to reinit tx_queue_len
1306 	 * before again attaching a qdisc.
1307 	 */
1308 	if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1309 		dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1310 		netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1311 	}
1312 
1313 	err = qdisc_block_indexes_set(sch, tca, extack);
1314 	if (err)
1315 		goto err_out3;
1316 
1317 	if (tca[TCA_STAB]) {
1318 		stab = qdisc_get_stab(tca[TCA_STAB], extack);
1319 		if (IS_ERR(stab)) {
1320 			err = PTR_ERR(stab);
1321 			goto err_out3;
1322 		}
1323 		rcu_assign_pointer(sch->stab, stab);
1324 	}
1325 
1326 	if (ops->init) {
1327 		err = ops->init(sch, tca[TCA_OPTIONS], extack);
1328 		if (err != 0)
1329 			goto err_out4;
1330 	}
1331 
1332 	if (tca[TCA_RATE]) {
1333 		err = -EOPNOTSUPP;
1334 		if (sch->flags & TCQ_F_MQROOT) {
1335 			NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1336 			goto err_out4;
1337 		}
1338 
1339 		err = gen_new_estimator(&sch->bstats,
1340 					sch->cpu_bstats,
1341 					&sch->rate_est,
1342 					NULL,
1343 					true,
1344 					tca[TCA_RATE]);
1345 		if (err) {
1346 			NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1347 			goto err_out4;
1348 		}
1349 	}
1350 
1351 	qdisc_hash_add(sch, false);
1352 	trace_qdisc_create(ops, dev, parent);
1353 
1354 	return sch;
1355 
1356 err_out4:
1357 	/* Even if ops->init() failed, we call ops->destroy()
1358 	 * like qdisc_create_dflt().
1359 	 */
1360 	if (ops->destroy)
1361 		ops->destroy(sch);
1362 	qdisc_put_stab(rtnl_dereference(sch->stab));
1363 err_out3:
1364 	lockdep_unregister_key(&sch->root_lock_key);
1365 	netdev_put(dev, &sch->dev_tracker);
1366 	qdisc_free(sch);
1367 err_out2:
1368 	module_put(ops->owner);
1369 err_out:
1370 	*errp = err;
1371 	return NULL;
1372 }
1373 
qdisc_change(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1374 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1375 			struct netlink_ext_ack *extack)
1376 {
1377 	struct qdisc_size_table *ostab, *stab = NULL;
1378 	int err = 0;
1379 
1380 	if (tca[TCA_OPTIONS]) {
1381 		if (!sch->ops->change) {
1382 			NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1383 			return -EINVAL;
1384 		}
1385 		if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1386 			NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1387 			return -EOPNOTSUPP;
1388 		}
1389 		err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1390 		if (err)
1391 			return err;
1392 	}
1393 
1394 	if (tca[TCA_STAB]) {
1395 		stab = qdisc_get_stab(tca[TCA_STAB], extack);
1396 		if (IS_ERR(stab))
1397 			return PTR_ERR(stab);
1398 	}
1399 
1400 	ostab = rtnl_dereference(sch->stab);
1401 	rcu_assign_pointer(sch->stab, stab);
1402 	qdisc_put_stab(ostab);
1403 
1404 	if (tca[TCA_RATE]) {
1405 		/* NB: ignores errors from replace_estimator
1406 		   because change can't be undone. */
1407 		if (sch->flags & TCQ_F_MQROOT)
1408 			goto out;
1409 		gen_replace_estimator(&sch->bstats,
1410 				      sch->cpu_bstats,
1411 				      &sch->rate_est,
1412 				      NULL,
1413 				      true,
1414 				      tca[TCA_RATE]);
1415 	}
1416 out:
1417 	return 0;
1418 }
1419 
1420 struct check_loop_arg {
1421 	struct qdisc_walker	w;
1422 	struct Qdisc		*p;
1423 	int			depth;
1424 };
1425 
1426 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1427 			 struct qdisc_walker *w);
1428 
check_loop(struct Qdisc * q,struct Qdisc * p,int depth)1429 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1430 {
1431 	struct check_loop_arg	arg;
1432 
1433 	if (q->ops->cl_ops == NULL)
1434 		return 0;
1435 
1436 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1437 	arg.w.fn = check_loop_fn;
1438 	arg.depth = depth;
1439 	arg.p = p;
1440 	q->ops->cl_ops->walk(q, &arg.w);
1441 	return arg.w.stop ? -ELOOP : 0;
1442 }
1443 
1444 static int
check_loop_fn(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)1445 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1446 {
1447 	struct Qdisc *leaf;
1448 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1449 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1450 
1451 	leaf = cops->leaf(q, cl);
1452 	if (leaf) {
1453 		if (leaf == arg->p || arg->depth > 7)
1454 			return -ELOOP;
1455 		return check_loop(leaf, arg->p, arg->depth + 1);
1456 	}
1457 	return 0;
1458 }
1459 
1460 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1461 	[TCA_KIND]		= { .type = NLA_STRING },
1462 	[TCA_RATE]		= { .type = NLA_BINARY,
1463 				    .len = sizeof(struct tc_estimator) },
1464 	[TCA_STAB]		= { .type = NLA_NESTED },
1465 	[TCA_DUMP_INVISIBLE]	= { .type = NLA_FLAG },
1466 	[TCA_CHAIN]		= { .type = NLA_U32 },
1467 	[TCA_INGRESS_BLOCK]	= { .type = NLA_U32 },
1468 	[TCA_EGRESS_BLOCK]	= { .type = NLA_U32 },
1469 };
1470 
1471 /*
1472  * Delete/get qdisc.
1473  */
1474 
tc_get_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1475 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1476 			struct netlink_ext_ack *extack)
1477 {
1478 	struct net *net = sock_net(skb->sk);
1479 	struct tcmsg *tcm = nlmsg_data(n);
1480 	struct nlattr *tca[TCA_MAX + 1];
1481 	struct net_device *dev;
1482 	u32 clid;
1483 	struct Qdisc *q = NULL;
1484 	struct Qdisc *p = NULL;
1485 	int err;
1486 
1487 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1488 				     rtm_tca_policy, extack);
1489 	if (err < 0)
1490 		return err;
1491 
1492 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1493 	if (!dev)
1494 		return -ENODEV;
1495 
1496 	clid = tcm->tcm_parent;
1497 	if (clid) {
1498 		if (clid != TC_H_ROOT) {
1499 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1500 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1501 				if (!p) {
1502 					NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1503 					return -ENOENT;
1504 				}
1505 				q = qdisc_leaf(p, clid, extack);
1506 			} else if (dev_ingress_queue(dev)) {
1507 				q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1508 			}
1509 		} else {
1510 			q = rtnl_dereference(dev->qdisc);
1511 		}
1512 		if (!q) {
1513 			NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1514 			return -ENOENT;
1515 		}
1516 		if (IS_ERR(q))
1517 			return PTR_ERR(q);
1518 
1519 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1520 			NL_SET_ERR_MSG(extack, "Invalid handle");
1521 			return -EINVAL;
1522 		}
1523 	} else {
1524 		q = qdisc_lookup(dev, tcm->tcm_handle);
1525 		if (!q) {
1526 			NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1527 			return -ENOENT;
1528 		}
1529 	}
1530 
1531 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1532 		NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1533 		return -EINVAL;
1534 	}
1535 
1536 	if (n->nlmsg_type == RTM_DELQDISC) {
1537 		if (!clid) {
1538 			NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1539 			return -EINVAL;
1540 		}
1541 		if (q->handle == 0) {
1542 			NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1543 			return -ENOENT;
1544 		}
1545 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1546 		if (err != 0)
1547 			return err;
1548 	} else {
1549 		qdisc_notify(net, skb, n, clid, NULL, q, NULL);
1550 	}
1551 	return 0;
1552 }
1553 
req_create_or_replace(struct nlmsghdr * n)1554 static bool req_create_or_replace(struct nlmsghdr *n)
1555 {
1556 	return (n->nlmsg_flags & NLM_F_CREATE &&
1557 		n->nlmsg_flags & NLM_F_REPLACE);
1558 }
1559 
req_create_exclusive(struct nlmsghdr * n)1560 static bool req_create_exclusive(struct nlmsghdr *n)
1561 {
1562 	return (n->nlmsg_flags & NLM_F_CREATE &&
1563 		n->nlmsg_flags & NLM_F_EXCL);
1564 }
1565 
req_change(struct nlmsghdr * n)1566 static bool req_change(struct nlmsghdr *n)
1567 {
1568 	return (!(n->nlmsg_flags & NLM_F_CREATE) &&
1569 		!(n->nlmsg_flags & NLM_F_REPLACE) &&
1570 		!(n->nlmsg_flags & NLM_F_EXCL));
1571 }
1572 
1573 /*
1574  * Create/change qdisc.
1575  */
tc_modify_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1576 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1577 			   struct netlink_ext_ack *extack)
1578 {
1579 	struct net *net = sock_net(skb->sk);
1580 	struct tcmsg *tcm;
1581 	struct nlattr *tca[TCA_MAX + 1];
1582 	struct net_device *dev;
1583 	u32 clid;
1584 	struct Qdisc *q, *p;
1585 	int err;
1586 
1587 replay:
1588 	/* Reinit, just in case something touches this. */
1589 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1590 				     rtm_tca_policy, extack);
1591 	if (err < 0)
1592 		return err;
1593 
1594 	tcm = nlmsg_data(n);
1595 	clid = tcm->tcm_parent;
1596 	q = p = NULL;
1597 
1598 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1599 	if (!dev)
1600 		return -ENODEV;
1601 
1602 
1603 	if (clid) {
1604 		if (clid != TC_H_ROOT) {
1605 			if (clid != TC_H_INGRESS) {
1606 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1607 				if (!p) {
1608 					NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1609 					return -ENOENT;
1610 				}
1611 				q = qdisc_leaf(p, clid, extack);
1612 				if (IS_ERR(q))
1613 					return PTR_ERR(q);
1614 			} else if (dev_ingress_queue_create(dev)) {
1615 				q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1616 			}
1617 		} else {
1618 			q = rtnl_dereference(dev->qdisc);
1619 		}
1620 
1621 		/* It may be default qdisc, ignore it */
1622 		if (q && q->handle == 0)
1623 			q = NULL;
1624 
1625 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1626 			if (tcm->tcm_handle) {
1627 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1628 					NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1629 					return -EEXIST;
1630 				}
1631 				if (TC_H_MIN(tcm->tcm_handle)) {
1632 					NL_SET_ERR_MSG(extack, "Invalid minor handle");
1633 					return -EINVAL;
1634 				}
1635 				q = qdisc_lookup(dev, tcm->tcm_handle);
1636 				if (!q)
1637 					goto create_n_graft;
1638 				if (q->parent != tcm->tcm_parent) {
1639 					NL_SET_ERR_MSG(extack, "Cannot move an existing qdisc to a different parent");
1640 					return -EINVAL;
1641 				}
1642 				if (n->nlmsg_flags & NLM_F_EXCL) {
1643 					NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1644 					return -EEXIST;
1645 				}
1646 				if (tca[TCA_KIND] &&
1647 				    nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1648 					NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1649 					return -EINVAL;
1650 				}
1651 				if (q->flags & TCQ_F_INGRESS) {
1652 					NL_SET_ERR_MSG(extack,
1653 						       "Cannot regraft ingress or clsact Qdiscs");
1654 					return -EINVAL;
1655 				}
1656 				if (q == p ||
1657 				    (p && check_loop(q, p, 0))) {
1658 					NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1659 					return -ELOOP;
1660 				}
1661 				if (clid == TC_H_INGRESS) {
1662 					NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
1663 					return -EINVAL;
1664 				}
1665 				qdisc_refcount_inc(q);
1666 				goto graft;
1667 			} else {
1668 				if (!q)
1669 					goto create_n_graft;
1670 
1671 				/* This magic test requires explanation.
1672 				 *
1673 				 *   We know, that some child q is already
1674 				 *   attached to this parent and have choice:
1675 				 *   1) change it or 2) create/graft new one.
1676 				 *   If the requested qdisc kind is different
1677 				 *   than the existing one, then we choose graft.
1678 				 *   If they are the same then this is "change"
1679 				 *   operation - just let it fallthrough..
1680 				 *
1681 				 *   1. We are allowed to create/graft only
1682 				 *   if the request is explicitly stating
1683 				 *   "please create if it doesn't exist".
1684 				 *
1685 				 *   2. If the request is to exclusive create
1686 				 *   then the qdisc tcm_handle is not expected
1687 				 *   to exist, so that we choose create/graft too.
1688 				 *
1689 				 *   3. The last case is when no flags are set.
1690 				 *   This will happen when for example tc
1691 				 *   utility issues a "change" command.
1692 				 *   Alas, it is sort of hole in API, we
1693 				 *   cannot decide what to do unambiguously.
1694 				 *   For now we select create/graft.
1695 				 */
1696 				if (tca[TCA_KIND] &&
1697 				    nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1698 					if (req_create_or_replace(n) ||
1699 					    req_create_exclusive(n))
1700 						goto create_n_graft;
1701 					else if (req_change(n))
1702 						goto create_n_graft2;
1703 				}
1704 			}
1705 		}
1706 	} else {
1707 		if (!tcm->tcm_handle) {
1708 			NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1709 			return -EINVAL;
1710 		}
1711 		q = qdisc_lookup(dev, tcm->tcm_handle);
1712 	}
1713 
1714 	/* Change qdisc parameters */
1715 	if (!q) {
1716 		NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1717 		return -ENOENT;
1718 	}
1719 	if (n->nlmsg_flags & NLM_F_EXCL) {
1720 		NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1721 		return -EEXIST;
1722 	}
1723 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1724 		NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1725 		return -EINVAL;
1726 	}
1727 	err = qdisc_change(q, tca, extack);
1728 	if (err == 0)
1729 		qdisc_notify(net, skb, n, clid, NULL, q, extack);
1730 	return err;
1731 
1732 create_n_graft:
1733 	if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1734 		NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1735 		return -ENOENT;
1736 	}
1737 create_n_graft2:
1738 	if (clid == TC_H_INGRESS) {
1739 		if (dev_ingress_queue(dev)) {
1740 			q = qdisc_create(dev, dev_ingress_queue(dev),
1741 					 tcm->tcm_parent, tcm->tcm_parent,
1742 					 tca, &err, extack);
1743 		} else {
1744 			NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1745 			err = -ENOENT;
1746 		}
1747 	} else {
1748 		struct netdev_queue *dev_queue;
1749 
1750 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1751 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1752 		else if (p)
1753 			dev_queue = p->dev_queue;
1754 		else
1755 			dev_queue = netdev_get_tx_queue(dev, 0);
1756 
1757 		q = qdisc_create(dev, dev_queue,
1758 				 tcm->tcm_parent, tcm->tcm_handle,
1759 				 tca, &err, extack);
1760 	}
1761 	if (q == NULL) {
1762 		if (err == -EAGAIN)
1763 			goto replay;
1764 		return err;
1765 	}
1766 
1767 graft:
1768 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1769 	if (err) {
1770 		if (q)
1771 			qdisc_put(q);
1772 		return err;
1773 	}
1774 
1775 	return 0;
1776 }
1777 
tc_dump_qdisc_root(struct Qdisc * root,struct sk_buff * skb,struct netlink_callback * cb,int * q_idx_p,int s_q_idx,bool recur,bool dump_invisible)1778 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1779 			      struct netlink_callback *cb,
1780 			      int *q_idx_p, int s_q_idx, bool recur,
1781 			      bool dump_invisible)
1782 {
1783 	int ret = 0, q_idx = *q_idx_p;
1784 	struct Qdisc *q;
1785 	int b;
1786 
1787 	if (!root)
1788 		return 0;
1789 
1790 	q = root;
1791 	if (q_idx < s_q_idx) {
1792 		q_idx++;
1793 	} else {
1794 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1795 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1796 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1797 				  RTM_NEWQDISC, NULL) <= 0)
1798 			goto done;
1799 		q_idx++;
1800 	}
1801 
1802 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1803 	 * itself has already been dumped.
1804 	 *
1805 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1806 	 * qdisc hashtable, we don't want to hit it again
1807 	 */
1808 	if (!qdisc_dev(root) || !recur)
1809 		goto out;
1810 
1811 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1812 		if (q_idx < s_q_idx) {
1813 			q_idx++;
1814 			continue;
1815 		}
1816 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1817 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1818 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1819 				  RTM_NEWQDISC, NULL) <= 0)
1820 			goto done;
1821 		q_idx++;
1822 	}
1823 
1824 out:
1825 	*q_idx_p = q_idx;
1826 	return ret;
1827 done:
1828 	ret = -1;
1829 	goto out;
1830 }
1831 
tc_dump_qdisc(struct sk_buff * skb,struct netlink_callback * cb)1832 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1833 {
1834 	struct net *net = sock_net(skb->sk);
1835 	int idx, q_idx;
1836 	int s_idx, s_q_idx;
1837 	struct net_device *dev;
1838 	const struct nlmsghdr *nlh = cb->nlh;
1839 	struct nlattr *tca[TCA_MAX + 1];
1840 	int err;
1841 
1842 	s_idx = cb->args[0];
1843 	s_q_idx = q_idx = cb->args[1];
1844 
1845 	idx = 0;
1846 	ASSERT_RTNL();
1847 
1848 	err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1849 				     rtm_tca_policy, cb->extack);
1850 	if (err < 0)
1851 		return err;
1852 
1853 	for_each_netdev(net, dev) {
1854 		struct netdev_queue *dev_queue;
1855 
1856 		if (idx < s_idx)
1857 			goto cont;
1858 		if (idx > s_idx)
1859 			s_q_idx = 0;
1860 		q_idx = 0;
1861 
1862 		if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1863 				       skb, cb, &q_idx, s_q_idx,
1864 				       true, tca[TCA_DUMP_INVISIBLE]) < 0)
1865 			goto done;
1866 
1867 		dev_queue = dev_ingress_queue(dev);
1868 		if (dev_queue &&
1869 		    tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
1870 				       skb, cb, &q_idx, s_q_idx, false,
1871 				       tca[TCA_DUMP_INVISIBLE]) < 0)
1872 			goto done;
1873 
1874 cont:
1875 		idx++;
1876 	}
1877 
1878 done:
1879 	cb->args[0] = idx;
1880 	cb->args[1] = q_idx;
1881 
1882 	return skb->len;
1883 }
1884 
1885 
1886 
1887 /************************************************
1888  *	Traffic classes manipulation.		*
1889  ************************************************/
1890 
tc_fill_tclass(struct sk_buff * skb,struct Qdisc * q,unsigned long cl,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)1891 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1892 			  unsigned long cl, u32 portid, u32 seq, u16 flags,
1893 			  int event, struct netlink_ext_ack *extack)
1894 {
1895 	struct tcmsg *tcm;
1896 	struct nlmsghdr  *nlh;
1897 	unsigned char *b = skb_tail_pointer(skb);
1898 	struct gnet_dump d;
1899 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1900 
1901 	cond_resched();
1902 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1903 	if (!nlh)
1904 		goto out_nlmsg_trim;
1905 	tcm = nlmsg_data(nlh);
1906 	tcm->tcm_family = AF_UNSPEC;
1907 	tcm->tcm__pad1 = 0;
1908 	tcm->tcm__pad2 = 0;
1909 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1910 	tcm->tcm_parent = q->handle;
1911 	tcm->tcm_handle = q->handle;
1912 	tcm->tcm_info = 0;
1913 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1914 		goto nla_put_failure;
1915 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1916 		goto nla_put_failure;
1917 
1918 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1919 					 NULL, &d, TCA_PAD) < 0)
1920 		goto nla_put_failure;
1921 
1922 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1923 		goto nla_put_failure;
1924 
1925 	if (gnet_stats_finish_copy(&d) < 0)
1926 		goto nla_put_failure;
1927 
1928 	if (extack && extack->_msg &&
1929 	    nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
1930 		goto out_nlmsg_trim;
1931 
1932 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1933 
1934 	return skb->len;
1935 
1936 out_nlmsg_trim:
1937 nla_put_failure:
1938 	nlmsg_trim(skb, b);
1939 	return -1;
1940 }
1941 
tclass_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,int event,struct netlink_ext_ack * extack)1942 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1943 			 struct nlmsghdr *n, struct Qdisc *q,
1944 			 unsigned long cl, int event, struct netlink_ext_ack *extack)
1945 {
1946 	struct sk_buff *skb;
1947 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1948 
1949 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1950 	if (!skb)
1951 		return -ENOBUFS;
1952 
1953 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
1954 		kfree_skb(skb);
1955 		return -EINVAL;
1956 	}
1957 
1958 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1959 			      n->nlmsg_flags & NLM_F_ECHO);
1960 }
1961 
tclass_del_notify(struct net * net,const struct Qdisc_class_ops * cops,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,struct netlink_ext_ack * extack)1962 static int tclass_del_notify(struct net *net,
1963 			     const struct Qdisc_class_ops *cops,
1964 			     struct sk_buff *oskb, struct nlmsghdr *n,
1965 			     struct Qdisc *q, unsigned long cl,
1966 			     struct netlink_ext_ack *extack)
1967 {
1968 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1969 	struct sk_buff *skb;
1970 	int err = 0;
1971 
1972 	if (!cops->delete)
1973 		return -EOPNOTSUPP;
1974 
1975 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1976 	if (!skb)
1977 		return -ENOBUFS;
1978 
1979 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1980 			   RTM_DELTCLASS, extack) < 0) {
1981 		kfree_skb(skb);
1982 		return -EINVAL;
1983 	}
1984 
1985 	err = cops->delete(q, cl, extack);
1986 	if (err) {
1987 		kfree_skb(skb);
1988 		return err;
1989 	}
1990 
1991 	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1992 			     n->nlmsg_flags & NLM_F_ECHO);
1993 	return err;
1994 }
1995 
1996 #ifdef CONFIG_NET_CLS
1997 
1998 struct tcf_bind_args {
1999 	struct tcf_walker w;
2000 	unsigned long base;
2001 	unsigned long cl;
2002 	u32 classid;
2003 };
2004 
tcf_node_bind(struct tcf_proto * tp,void * n,struct tcf_walker * arg)2005 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
2006 {
2007 	struct tcf_bind_args *a = (void *)arg;
2008 
2009 	if (n && tp->ops->bind_class) {
2010 		struct Qdisc *q = tcf_block_q(tp->chain->block);
2011 
2012 		sch_tree_lock(q);
2013 		tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
2014 		sch_tree_unlock(q);
2015 	}
2016 	return 0;
2017 }
2018 
2019 struct tc_bind_class_args {
2020 	struct qdisc_walker w;
2021 	unsigned long new_cl;
2022 	u32 portid;
2023 	u32 clid;
2024 };
2025 
tc_bind_class_walker(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)2026 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
2027 				struct qdisc_walker *w)
2028 {
2029 	struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
2030 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2031 	struct tcf_block *block;
2032 	struct tcf_chain *chain;
2033 
2034 	block = cops->tcf_block(q, cl, NULL);
2035 	if (!block)
2036 		return 0;
2037 	for (chain = tcf_get_next_chain(block, NULL);
2038 	     chain;
2039 	     chain = tcf_get_next_chain(block, chain)) {
2040 		struct tcf_proto *tp;
2041 
2042 		for (tp = tcf_get_next_proto(chain, NULL);
2043 		     tp; tp = tcf_get_next_proto(chain, tp)) {
2044 			struct tcf_bind_args arg = {};
2045 
2046 			arg.w.fn = tcf_node_bind;
2047 			arg.classid = a->clid;
2048 			arg.base = cl;
2049 			arg.cl = a->new_cl;
2050 			tp->ops->walk(tp, &arg.w, true);
2051 		}
2052 	}
2053 
2054 	return 0;
2055 }
2056 
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2057 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2058 			   unsigned long new_cl)
2059 {
2060 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2061 	struct tc_bind_class_args args = {};
2062 
2063 	if (!cops->tcf_block)
2064 		return;
2065 	args.portid = portid;
2066 	args.clid = clid;
2067 	args.new_cl = new_cl;
2068 	args.w.fn = tc_bind_class_walker;
2069 	q->ops->cl_ops->walk(q, &args.w);
2070 }
2071 
2072 #else
2073 
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2074 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2075 			   unsigned long new_cl)
2076 {
2077 }
2078 
2079 #endif
2080 
tc_ctl_tclass(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)2081 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2082 			 struct netlink_ext_ack *extack)
2083 {
2084 	struct net *net = sock_net(skb->sk);
2085 	struct tcmsg *tcm = nlmsg_data(n);
2086 	struct nlattr *tca[TCA_MAX + 1];
2087 	struct net_device *dev;
2088 	struct Qdisc *q = NULL;
2089 	const struct Qdisc_class_ops *cops;
2090 	unsigned long cl = 0;
2091 	unsigned long new_cl;
2092 	u32 portid;
2093 	u32 clid;
2094 	u32 qid;
2095 	int err;
2096 
2097 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2098 				     rtm_tca_policy, extack);
2099 	if (err < 0)
2100 		return err;
2101 
2102 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2103 	if (!dev)
2104 		return -ENODEV;
2105 
2106 	/*
2107 	   parent == TC_H_UNSPEC - unspecified parent.
2108 	   parent == TC_H_ROOT   - class is root, which has no parent.
2109 	   parent == X:0	 - parent is root class.
2110 	   parent == X:Y	 - parent is a node in hierarchy.
2111 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
2112 
2113 	   handle == 0:0	 - generate handle from kernel pool.
2114 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
2115 	   handle == X:Y	 - clear.
2116 	   handle == X:0	 - root class.
2117 	 */
2118 
2119 	/* Step 1. Determine qdisc handle X:0 */
2120 
2121 	portid = tcm->tcm_parent;
2122 	clid = tcm->tcm_handle;
2123 	qid = TC_H_MAJ(clid);
2124 
2125 	if (portid != TC_H_ROOT) {
2126 		u32 qid1 = TC_H_MAJ(portid);
2127 
2128 		if (qid && qid1) {
2129 			/* If both majors are known, they must be identical. */
2130 			if (qid != qid1)
2131 				return -EINVAL;
2132 		} else if (qid1) {
2133 			qid = qid1;
2134 		} else if (qid == 0)
2135 			qid = rtnl_dereference(dev->qdisc)->handle;
2136 
2137 		/* Now qid is genuine qdisc handle consistent
2138 		 * both with parent and child.
2139 		 *
2140 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
2141 		 */
2142 		if (portid)
2143 			portid = TC_H_MAKE(qid, portid);
2144 	} else {
2145 		if (qid == 0)
2146 			qid = rtnl_dereference(dev->qdisc)->handle;
2147 	}
2148 
2149 	/* OK. Locate qdisc */
2150 	q = qdisc_lookup(dev, qid);
2151 	if (!q)
2152 		return -ENOENT;
2153 
2154 	/* An check that it supports classes */
2155 	cops = q->ops->cl_ops;
2156 	if (cops == NULL)
2157 		return -EINVAL;
2158 
2159 	/* Now try to get class */
2160 	if (clid == 0) {
2161 		if (portid == TC_H_ROOT)
2162 			clid = qid;
2163 	} else
2164 		clid = TC_H_MAKE(qid, clid);
2165 
2166 	if (clid)
2167 		cl = cops->find(q, clid);
2168 
2169 	if (cl == 0) {
2170 		err = -ENOENT;
2171 		if (n->nlmsg_type != RTM_NEWTCLASS ||
2172 		    !(n->nlmsg_flags & NLM_F_CREATE))
2173 			goto out;
2174 	} else {
2175 		switch (n->nlmsg_type) {
2176 		case RTM_NEWTCLASS:
2177 			err = -EEXIST;
2178 			if (n->nlmsg_flags & NLM_F_EXCL)
2179 				goto out;
2180 			break;
2181 		case RTM_DELTCLASS:
2182 			err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
2183 			/* Unbind the class with flilters with 0 */
2184 			tc_bind_tclass(q, portid, clid, 0);
2185 			goto out;
2186 		case RTM_GETTCLASS:
2187 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS, extack);
2188 			goto out;
2189 		default:
2190 			err = -EINVAL;
2191 			goto out;
2192 		}
2193 	}
2194 
2195 	if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2196 		NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2197 		return -EOPNOTSUPP;
2198 	}
2199 
2200 	/* Prevent creation of traffic classes with classid TC_H_ROOT */
2201 	if (clid == TC_H_ROOT) {
2202 		NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT");
2203 		return -EINVAL;
2204 	}
2205 
2206 	new_cl = cl;
2207 	err = -EOPNOTSUPP;
2208 	if (cops->change)
2209 		err = cops->change(q, clid, portid, tca, &new_cl, extack);
2210 	if (err == 0) {
2211 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack);
2212 		/* We just create a new class, need to do reverse binding. */
2213 		if (cl != new_cl)
2214 			tc_bind_tclass(q, portid, clid, new_cl);
2215 	}
2216 out:
2217 	return err;
2218 }
2219 
2220 struct qdisc_dump_args {
2221 	struct qdisc_walker	w;
2222 	struct sk_buff		*skb;
2223 	struct netlink_callback	*cb;
2224 };
2225 
qdisc_class_dump(struct Qdisc * q,unsigned long cl,struct qdisc_walker * arg)2226 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2227 			    struct qdisc_walker *arg)
2228 {
2229 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2230 
2231 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2232 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2233 			      RTM_NEWTCLASS, NULL);
2234 }
2235 
tc_dump_tclass_qdisc(struct Qdisc * q,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t)2236 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2237 				struct tcmsg *tcm, struct netlink_callback *cb,
2238 				int *t_p, int s_t)
2239 {
2240 	struct qdisc_dump_args arg;
2241 
2242 	if (tc_qdisc_dump_ignore(q, false) ||
2243 	    *t_p < s_t || !q->ops->cl_ops ||
2244 	    (tcm->tcm_parent &&
2245 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2246 		(*t_p)++;
2247 		return 0;
2248 	}
2249 	if (*t_p > s_t)
2250 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2251 	arg.w.fn = qdisc_class_dump;
2252 	arg.skb = skb;
2253 	arg.cb = cb;
2254 	arg.w.stop  = 0;
2255 	arg.w.skip = cb->args[1];
2256 	arg.w.count = 0;
2257 	q->ops->cl_ops->walk(q, &arg.w);
2258 	cb->args[1] = arg.w.count;
2259 	if (arg.w.stop)
2260 		return -1;
2261 	(*t_p)++;
2262 	return 0;
2263 }
2264 
tc_dump_tclass_root(struct Qdisc * root,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t,bool recur)2265 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2266 			       struct tcmsg *tcm, struct netlink_callback *cb,
2267 			       int *t_p, int s_t, bool recur)
2268 {
2269 	struct Qdisc *q;
2270 	int b;
2271 
2272 	if (!root)
2273 		return 0;
2274 
2275 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2276 		return -1;
2277 
2278 	if (!qdisc_dev(root) || !recur)
2279 		return 0;
2280 
2281 	if (tcm->tcm_parent) {
2282 		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2283 		if (q && q != root &&
2284 		    tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2285 			return -1;
2286 		return 0;
2287 	}
2288 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2289 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2290 			return -1;
2291 	}
2292 
2293 	return 0;
2294 }
2295 
tc_dump_tclass(struct sk_buff * skb,struct netlink_callback * cb)2296 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2297 {
2298 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2299 	struct net *net = sock_net(skb->sk);
2300 	struct netdev_queue *dev_queue;
2301 	struct net_device *dev;
2302 	int t, s_t;
2303 
2304 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2305 		return 0;
2306 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
2307 	if (!dev)
2308 		return 0;
2309 
2310 	s_t = cb->args[0];
2311 	t = 0;
2312 
2313 	if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2314 				skb, tcm, cb, &t, s_t, true) < 0)
2315 		goto done;
2316 
2317 	dev_queue = dev_ingress_queue(dev);
2318 	if (dev_queue &&
2319 	    tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping),
2320 				skb, tcm, cb, &t, s_t, false) < 0)
2321 		goto done;
2322 
2323 done:
2324 	cb->args[0] = t;
2325 
2326 	dev_put(dev);
2327 	return skb->len;
2328 }
2329 
2330 #ifdef CONFIG_PROC_FS
psched_show(struct seq_file * seq,void * v)2331 static int psched_show(struct seq_file *seq, void *v)
2332 {
2333 	seq_printf(seq, "%08x %08x %08x %08x\n",
2334 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2335 		   1000000,
2336 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
2337 
2338 	return 0;
2339 }
2340 
psched_net_init(struct net * net)2341 static int __net_init psched_net_init(struct net *net)
2342 {
2343 	struct proc_dir_entry *e;
2344 
2345 	e = proc_create_single("psched", 0, net->proc_net, psched_show);
2346 	if (e == NULL)
2347 		return -ENOMEM;
2348 
2349 	return 0;
2350 }
2351 
psched_net_exit(struct net * net)2352 static void __net_exit psched_net_exit(struct net *net)
2353 {
2354 	remove_proc_entry("psched", net->proc_net);
2355 }
2356 #else
psched_net_init(struct net * net)2357 static int __net_init psched_net_init(struct net *net)
2358 {
2359 	return 0;
2360 }
2361 
psched_net_exit(struct net * net)2362 static void __net_exit psched_net_exit(struct net *net)
2363 {
2364 }
2365 #endif
2366 
2367 static struct pernet_operations psched_net_ops = {
2368 	.init = psched_net_init,
2369 	.exit = psched_net_exit,
2370 };
2371 
2372 #if IS_ENABLED(CONFIG_RETPOLINE)
2373 DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
2374 #endif
2375 
pktsched_init(void)2376 static int __init pktsched_init(void)
2377 {
2378 	int err;
2379 
2380 	err = register_pernet_subsys(&psched_net_ops);
2381 	if (err) {
2382 		pr_err("pktsched_init: "
2383 		       "cannot initialize per netns operations\n");
2384 		return err;
2385 	}
2386 
2387 	register_qdisc(&pfifo_fast_ops);
2388 	register_qdisc(&pfifo_qdisc_ops);
2389 	register_qdisc(&bfifo_qdisc_ops);
2390 	register_qdisc(&pfifo_head_drop_qdisc_ops);
2391 	register_qdisc(&mq_qdisc_ops);
2392 	register_qdisc(&noqueue_qdisc_ops);
2393 
2394 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2395 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2396 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2397 		      0);
2398 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2399 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2400 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2401 		      0);
2402 
2403 	tc_wrapper_init();
2404 
2405 	return 0;
2406 }
2407 
2408 subsys_initcall(pktsched_init);
2409