12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * net/sched/sch_api.c Packet scheduler API.
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
61da177e4SLinus Torvalds *
71da177e4SLinus Torvalds * Fixes:
81da177e4SLinus Torvalds *
91da177e4SLinus Torvalds * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
101da177e4SLinus Torvalds * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
111da177e4SLinus Torvalds * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
121da177e4SLinus Torvalds */
131da177e4SLinus Torvalds
141da177e4SLinus Torvalds #include <linux/module.h>
151da177e4SLinus Torvalds #include <linux/types.h>
161da177e4SLinus Torvalds #include <linux/kernel.h>
171da177e4SLinus Torvalds #include <linux/string.h>
181da177e4SLinus Torvalds #include <linux/errno.h>
191da177e4SLinus Torvalds #include <linux/skbuff.h>
201da177e4SLinus Torvalds #include <linux/init.h>
211da177e4SLinus Torvalds #include <linux/proc_fs.h>
221da177e4SLinus Torvalds #include <linux/seq_file.h>
231da177e4SLinus Torvalds #include <linux/kmod.h>
241da177e4SLinus Torvalds #include <linux/list.h>
254179477fSPatrick McHardy #include <linux/hrtimer.h>
265a0e3ad6STejun Heo #include <linux/slab.h>
2759cc1f61SJiri Kosina #include <linux/hashtable.h>
281da177e4SLinus Torvalds
29457c4cbcSEric W. Biederman #include <net/net_namespace.h>
30b854272bSDenis V. Lunev #include <net/sock.h>
31dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
321da177e4SLinus Torvalds #include <net/pkt_sched.h>
3307d79fc7SCong Wang #include <net/pkt_cls.h>
347f0e8102SPedro Tammela #include <net/tc_wrapper.h>
351da177e4SLinus Torvalds
36f5a7833eSCong Wang #include <trace/events/qdisc.h>
37f5a7833eSCong Wang
381da177e4SLinus Torvalds /*
391da177e4SLinus Torvalds
401da177e4SLinus Torvalds Short review.
411da177e4SLinus Torvalds -------------
421da177e4SLinus Torvalds
431da177e4SLinus Torvalds This file consists of two interrelated parts:
441da177e4SLinus Torvalds
451da177e4SLinus Torvalds 1. queueing disciplines manager frontend.
461da177e4SLinus Torvalds 2. traffic classes manager frontend.
471da177e4SLinus Torvalds
481da177e4SLinus Torvalds Generally, queueing discipline ("qdisc") is a black box,
491da177e4SLinus Torvalds which is able to enqueue packets and to dequeue them (when
501da177e4SLinus Torvalds device is ready to send something) in order and at times
511da177e4SLinus Torvalds determined by algorithm hidden in it.
521da177e4SLinus Torvalds
531da177e4SLinus Torvalds qdisc's are divided to two categories:
541da177e4SLinus Torvalds - "queues", which have no internal structure visible from outside.
551da177e4SLinus Torvalds - "schedulers", which split all the packets to "traffic classes",
561da177e4SLinus Torvalds using "packet classifiers" (look at cls_api.c)
571da177e4SLinus Torvalds
581da177e4SLinus Torvalds In turn, classes may have child qdiscs (as rule, queues)
591da177e4SLinus Torvalds attached to them etc. etc. etc.
601da177e4SLinus Torvalds
611da177e4SLinus Torvalds The goal of the routines in this file is to translate
621da177e4SLinus Torvalds information supplied by user in the form of handles
631da177e4SLinus Torvalds to more intelligible for kernel form, to make some sanity
641da177e4SLinus Torvalds checks and part of work, which is common to all qdiscs
651da177e4SLinus Torvalds and to provide rtnetlink notifications.
661da177e4SLinus Torvalds
671da177e4SLinus Torvalds All real intelligent work is done inside qdisc modules.
681da177e4SLinus Torvalds
691da177e4SLinus Torvalds
701da177e4SLinus Torvalds
711da177e4SLinus Torvalds Every discipline has two major routines: enqueue and dequeue.
721da177e4SLinus Torvalds
731da177e4SLinus Torvalds ---dequeue
741da177e4SLinus Torvalds
751da177e4SLinus Torvalds dequeue usually returns a skb to send. It is allowed to return NULL,
761da177e4SLinus Torvalds but it does not mean that queue is empty, it just means that
771da177e4SLinus Torvalds discipline does not want to send anything this time.
781da177e4SLinus Torvalds Queue is really empty if q->q.qlen == 0.
791da177e4SLinus Torvalds For complicated disciplines with multiple queues q->q is not
801da177e4SLinus Torvalds real packet queue, but however q->q.qlen must be valid.
811da177e4SLinus Torvalds
821da177e4SLinus Torvalds ---enqueue
831da177e4SLinus Torvalds
841da177e4SLinus Torvalds enqueue returns 0, if packet was enqueued successfully.
851da177e4SLinus Torvalds If packet (this one or another one) was dropped, it returns
861da177e4SLinus Torvalds not zero error code.
871da177e4SLinus Torvalds NET_XMIT_DROP - this packet dropped
881da177e4SLinus Torvalds Expected action: do not backoff, but wait until queue will clear.
891da177e4SLinus Torvalds NET_XMIT_CN - probably this packet enqueued, but another one dropped.
901da177e4SLinus Torvalds Expected action: backoff or ignore
911da177e4SLinus Torvalds
921da177e4SLinus Torvalds Auxiliary routines:
931da177e4SLinus Torvalds
9499c0db26SJarek Poplawski ---peek
9599c0db26SJarek Poplawski
9699c0db26SJarek Poplawski like dequeue but without removing a packet from the queue
9799c0db26SJarek Poplawski
981da177e4SLinus Torvalds ---reset
991da177e4SLinus Torvalds
1001da177e4SLinus Torvalds returns qdisc to initial state: purge all buffers, clear all
1011da177e4SLinus Torvalds timers, counters (except for statistics) etc.
1021da177e4SLinus Torvalds
1031da177e4SLinus Torvalds ---init
1041da177e4SLinus Torvalds
1051da177e4SLinus Torvalds initializes newly created qdisc.
1061da177e4SLinus Torvalds
1071da177e4SLinus Torvalds ---destroy
1081da177e4SLinus Torvalds
1091da177e4SLinus Torvalds destroys resources allocated by init and during lifetime of qdisc.
1101da177e4SLinus Torvalds
1111da177e4SLinus Torvalds ---change
1121da177e4SLinus Torvalds
1131da177e4SLinus Torvalds changes qdisc parameters.
1141da177e4SLinus Torvalds */
1151da177e4SLinus Torvalds
1161da177e4SLinus Torvalds /* Protects list of registered TC modules. It is pure SMP lock. */
1171da177e4SLinus Torvalds static DEFINE_RWLOCK(qdisc_mod_lock);
1181da177e4SLinus Torvalds
1191da177e4SLinus Torvalds
1201da177e4SLinus Torvalds /************************************************
1211da177e4SLinus Torvalds * Queueing disciplines manipulation. *
1221da177e4SLinus Torvalds ************************************************/
1231da177e4SLinus Torvalds
1241da177e4SLinus Torvalds
1251da177e4SLinus Torvalds /* The list of all installed queueing disciplines. */
1261da177e4SLinus Torvalds
1271da177e4SLinus Torvalds static struct Qdisc_ops *qdisc_base;
1281da177e4SLinus Torvalds
12921eb2189SZhi Yong Wu /* Register/unregister queueing discipline */
1301da177e4SLinus Torvalds
register_qdisc(struct Qdisc_ops * qops)1311da177e4SLinus Torvalds int register_qdisc(struct Qdisc_ops *qops)
1321da177e4SLinus Torvalds {
1331da177e4SLinus Torvalds struct Qdisc_ops *q, **qp;
1341da177e4SLinus Torvalds int rc = -EEXIST;
1351da177e4SLinus Torvalds
1361da177e4SLinus Torvalds write_lock(&qdisc_mod_lock);
1371da177e4SLinus Torvalds for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
1381da177e4SLinus Torvalds if (!strcmp(qops->id, q->id))
1391da177e4SLinus Torvalds goto out;
1401da177e4SLinus Torvalds
1411da177e4SLinus Torvalds if (qops->enqueue == NULL)
1421da177e4SLinus Torvalds qops->enqueue = noop_qdisc_ops.enqueue;
14399c0db26SJarek Poplawski if (qops->peek == NULL) {
14468fd26b5SJarek Poplawski if (qops->dequeue == NULL)
14599c0db26SJarek Poplawski qops->peek = noop_qdisc_ops.peek;
14668fd26b5SJarek Poplawski else
14768fd26b5SJarek Poplawski goto out_einval;
14899c0db26SJarek Poplawski }
1491da177e4SLinus Torvalds if (qops->dequeue == NULL)
1501da177e4SLinus Torvalds qops->dequeue = noop_qdisc_ops.dequeue;
1511da177e4SLinus Torvalds
15268fd26b5SJarek Poplawski if (qops->cl_ops) {
15368fd26b5SJarek Poplawski const struct Qdisc_class_ops *cops = qops->cl_ops;
15468fd26b5SJarek Poplawski
155143976ceSWANG Cong if (!(cops->find && cops->walk && cops->leaf))
15668fd26b5SJarek Poplawski goto out_einval;
15768fd26b5SJarek Poplawski
1586529eabaSJiri Pirko if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
15968fd26b5SJarek Poplawski goto out_einval;
16068fd26b5SJarek Poplawski }
16168fd26b5SJarek Poplawski
1621da177e4SLinus Torvalds qops->next = NULL;
1631da177e4SLinus Torvalds *qp = qops;
1641da177e4SLinus Torvalds rc = 0;
1651da177e4SLinus Torvalds out:
1661da177e4SLinus Torvalds write_unlock(&qdisc_mod_lock);
1671da177e4SLinus Torvalds return rc;
16868fd26b5SJarek Poplawski
16968fd26b5SJarek Poplawski out_einval:
17068fd26b5SJarek Poplawski rc = -EINVAL;
17168fd26b5SJarek Poplawski goto out;
1721da177e4SLinus Torvalds }
17362e3ba1bSPatrick McHardy EXPORT_SYMBOL(register_qdisc);
1741da177e4SLinus Torvalds
unregister_qdisc(struct Qdisc_ops * qops)17552327d2eSZhengchao Shao void unregister_qdisc(struct Qdisc_ops *qops)
1761da177e4SLinus Torvalds {
1771da177e4SLinus Torvalds struct Qdisc_ops *q, **qp;
1781da177e4SLinus Torvalds int err = -ENOENT;
1791da177e4SLinus Torvalds
1801da177e4SLinus Torvalds write_lock(&qdisc_mod_lock);
1811da177e4SLinus Torvalds for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
1821da177e4SLinus Torvalds if (q == qops)
1831da177e4SLinus Torvalds break;
1841da177e4SLinus Torvalds if (q) {
1851da177e4SLinus Torvalds *qp = q->next;
1861da177e4SLinus Torvalds q->next = NULL;
1871da177e4SLinus Torvalds err = 0;
1881da177e4SLinus Torvalds }
1891da177e4SLinus Torvalds write_unlock(&qdisc_mod_lock);
19052327d2eSZhengchao Shao
19152327d2eSZhengchao Shao WARN(err, "unregister qdisc(%s) failed\n", qops->id);
1921da177e4SLinus Torvalds }
19362e3ba1bSPatrick McHardy EXPORT_SYMBOL(unregister_qdisc);
1941da177e4SLinus Torvalds
1956da7c8fcSstephen hemminger /* Get default qdisc if not otherwise specified */
qdisc_get_default(char * name,size_t len)1966da7c8fcSstephen hemminger void qdisc_get_default(char *name, size_t len)
1976da7c8fcSstephen hemminger {
1986da7c8fcSstephen hemminger read_lock(&qdisc_mod_lock);
19992f24c6fSWolfram Sang strscpy(name, default_qdisc_ops->id, len);
2006da7c8fcSstephen hemminger read_unlock(&qdisc_mod_lock);
2016da7c8fcSstephen hemminger }
2026da7c8fcSstephen hemminger
qdisc_lookup_default(const char * name)2036da7c8fcSstephen hemminger static struct Qdisc_ops *qdisc_lookup_default(const char *name)
2046da7c8fcSstephen hemminger {
2056da7c8fcSstephen hemminger struct Qdisc_ops *q = NULL;
2066da7c8fcSstephen hemminger
2076da7c8fcSstephen hemminger for (q = qdisc_base; q; q = q->next) {
2086da7c8fcSstephen hemminger if (!strcmp(name, q->id)) {
2096da7c8fcSstephen hemminger if (!try_module_get(q->owner))
2106da7c8fcSstephen hemminger q = NULL;
2116da7c8fcSstephen hemminger break;
2126da7c8fcSstephen hemminger }
2136da7c8fcSstephen hemminger }
2146da7c8fcSstephen hemminger
2156da7c8fcSstephen hemminger return q;
2166da7c8fcSstephen hemminger }
2176da7c8fcSstephen hemminger
2186da7c8fcSstephen hemminger /* Set new default qdisc to use */
qdisc_set_default(const char * name)2196da7c8fcSstephen hemminger int qdisc_set_default(const char *name)
2206da7c8fcSstephen hemminger {
2216da7c8fcSstephen hemminger const struct Qdisc_ops *ops;
2226da7c8fcSstephen hemminger
2236da7c8fcSstephen hemminger if (!capable(CAP_NET_ADMIN))
2246da7c8fcSstephen hemminger return -EPERM;
2256da7c8fcSstephen hemminger
2266da7c8fcSstephen hemminger write_lock(&qdisc_mod_lock);
2276da7c8fcSstephen hemminger ops = qdisc_lookup_default(name);
2286da7c8fcSstephen hemminger if (!ops) {
2296da7c8fcSstephen hemminger /* Not found, drop lock and try to load module */
2306da7c8fcSstephen hemminger write_unlock(&qdisc_mod_lock);
2316da7c8fcSstephen hemminger request_module("sch_%s", name);
2326da7c8fcSstephen hemminger write_lock(&qdisc_mod_lock);
2336da7c8fcSstephen hemminger
2346da7c8fcSstephen hemminger ops = qdisc_lookup_default(name);
2356da7c8fcSstephen hemminger }
2366da7c8fcSstephen hemminger
2376da7c8fcSstephen hemminger if (ops) {
2386da7c8fcSstephen hemminger /* Set new default */
2396da7c8fcSstephen hemminger module_put(default_qdisc_ops->owner);
2406da7c8fcSstephen hemminger default_qdisc_ops = ops;
2416da7c8fcSstephen hemminger }
2426da7c8fcSstephen hemminger write_unlock(&qdisc_mod_lock);
2436da7c8fcSstephen hemminger
2446da7c8fcSstephen hemminger return ops ? 0 : -ENOENT;
2456da7c8fcSstephen hemminger }
2466da7c8fcSstephen hemminger
2478ea3e439Sstephen hemminger #ifdef CONFIG_NET_SCH_DEFAULT
2488ea3e439Sstephen hemminger /* Set default value from kernel config */
sch_default_qdisc(void)2498ea3e439Sstephen hemminger static int __init sch_default_qdisc(void)
2508ea3e439Sstephen hemminger {
2518ea3e439Sstephen hemminger return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
2528ea3e439Sstephen hemminger }
2538ea3e439Sstephen hemminger late_initcall(sch_default_qdisc);
2548ea3e439Sstephen hemminger #endif
2558ea3e439Sstephen hemminger
2561da177e4SLinus Torvalds /* We know handle. Find qdisc among all qdisc's attached to device
2574eaf3b84SEric Dumazet * (root qdisc, all its children, children of children etc.)
2584eaf3b84SEric Dumazet * Note: caller either uses rtnl or rcu_read_lock()
2591da177e4SLinus Torvalds */
2601da177e4SLinus Torvalds
qdisc_match_from_root(struct Qdisc * root,u32 handle)2616113b748SHannes Eder static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
2628123b421SDavid S. Miller {
2638123b421SDavid S. Miller struct Qdisc *q;
2648123b421SDavid S. Miller
26569012ae4SJiri Kosina if (!qdisc_dev(root))
26669012ae4SJiri Kosina return (root->handle == handle ? root : NULL);
26769012ae4SJiri Kosina
2688123b421SDavid S. Miller if (!(root->flags & TCQ_F_BUILTIN) &&
2698123b421SDavid S. Miller root->handle == handle)
2708123b421SDavid S. Miller return root;
2718123b421SDavid S. Miller
272a8b7b2d0SJiri Pirko hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
273a8b7b2d0SJiri Pirko lockdep_rtnl_is_held()) {
2748123b421SDavid S. Miller if (q->handle == handle)
2758123b421SDavid S. Miller return q;
2768123b421SDavid S. Miller }
2778123b421SDavid S. Miller return NULL;
2788123b421SDavid S. Miller }
2798123b421SDavid S. Miller
qdisc_hash_add(struct Qdisc * q,bool invisible)28049b49971SJiri Kosina void qdisc_hash_add(struct Qdisc *q, bool invisible)
281f6e0b239SJarek Poplawski {
28237314363SEric Dumazet if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
2834eaf3b84SEric Dumazet ASSERT_RTNL();
28459cc1f61SJiri Kosina hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
28549b49971SJiri Kosina if (invisible)
28649b49971SJiri Kosina q->flags |= TCQ_F_INVISIBLE;
287f6e0b239SJarek Poplawski }
28837314363SEric Dumazet }
28959cc1f61SJiri Kosina EXPORT_SYMBOL(qdisc_hash_add);
290f6e0b239SJarek Poplawski
qdisc_hash_del(struct Qdisc * q)29159cc1f61SJiri Kosina void qdisc_hash_del(struct Qdisc *q)
292f6e0b239SJarek Poplawski {
2934eaf3b84SEric Dumazet if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
2944eaf3b84SEric Dumazet ASSERT_RTNL();
29559cc1f61SJiri Kosina hash_del_rcu(&q->hash);
2964eaf3b84SEric Dumazet }
297f6e0b239SJarek Poplawski }
29859cc1f61SJiri Kosina EXPORT_SYMBOL(qdisc_hash_del);
299f6e0b239SJarek Poplawski
qdisc_lookup(struct net_device * dev,u32 handle)300ead81cc5SDavid S. Miller struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
30143effa1eSPatrick McHardy {
302f6e0b239SJarek Poplawski struct Qdisc *q;
303f6e0b239SJarek Poplawski
30450317fceSCong Wang if (!handle)
30550317fceSCong Wang return NULL;
3065891cd5eSEric Dumazet q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
3078123b421SDavid S. Miller if (q)
308f6486d40SJarek Poplawski goto out;
309f6e0b239SJarek Poplawski
31024824a09SEric Dumazet if (dev_ingress_queue(dev))
31124824a09SEric Dumazet q = qdisc_match_from_root(
312d636fc5dSEric Dumazet rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping),
31324824a09SEric Dumazet handle);
314f6486d40SJarek Poplawski out:
315f6e0b239SJarek Poplawski return q;
31643effa1eSPatrick McHardy }
31743effa1eSPatrick McHardy
qdisc_lookup_rcu(struct net_device * dev,u32 handle)3183a7d0d07SVlad Buslov struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
3193a7d0d07SVlad Buslov {
3203a7d0d07SVlad Buslov struct netdev_queue *nq;
3213a7d0d07SVlad Buslov struct Qdisc *q;
3223a7d0d07SVlad Buslov
3233a7d0d07SVlad Buslov if (!handle)
3243a7d0d07SVlad Buslov return NULL;
3255891cd5eSEric Dumazet q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
3263a7d0d07SVlad Buslov if (q)
3273a7d0d07SVlad Buslov goto out;
3283a7d0d07SVlad Buslov
3293a7d0d07SVlad Buslov nq = dev_ingress_queue_rcu(dev);
3303a7d0d07SVlad Buslov if (nq)
331d636fc5dSEric Dumazet q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping),
332d636fc5dSEric Dumazet handle);
3333a7d0d07SVlad Buslov out:
3343a7d0d07SVlad Buslov return q;
3353a7d0d07SVlad Buslov }
3363a7d0d07SVlad Buslov
qdisc_leaf(struct Qdisc * p,u32 classid)3371da177e4SLinus Torvalds static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
3381da177e4SLinus Torvalds {
3391da177e4SLinus Torvalds unsigned long cl;
34020fea08bSEric Dumazet const struct Qdisc_class_ops *cops = p->ops->cl_ops;
3411da177e4SLinus Torvalds
3421da177e4SLinus Torvalds if (cops == NULL)
3431da177e4SLinus Torvalds return NULL;
344143976ceSWANG Cong cl = cops->find(p, classid);
3451da177e4SLinus Torvalds
3461da177e4SLinus Torvalds if (cl == 0)
3471da177e4SLinus Torvalds return NULL;
3482561f972STonghao Zhang return cops->leaf(p, cl);
3491da177e4SLinus Torvalds }
3501da177e4SLinus Torvalds
3511da177e4SLinus Torvalds /* Find queueing discipline by name */
3521da177e4SLinus Torvalds
qdisc_lookup_ops(struct nlattr * kind)3531e90474cSPatrick McHardy static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
3541da177e4SLinus Torvalds {
3551da177e4SLinus Torvalds struct Qdisc_ops *q = NULL;
3561da177e4SLinus Torvalds
3571da177e4SLinus Torvalds if (kind) {
3581da177e4SLinus Torvalds read_lock(&qdisc_mod_lock);
3591da177e4SLinus Torvalds for (q = qdisc_base; q; q = q->next) {
3601e90474cSPatrick McHardy if (nla_strcmp(kind, q->id) == 0) {
3611da177e4SLinus Torvalds if (!try_module_get(q->owner))
3621da177e4SLinus Torvalds q = NULL;
3631da177e4SLinus Torvalds break;
3641da177e4SLinus Torvalds }
3651da177e4SLinus Torvalds }
3661da177e4SLinus Torvalds read_unlock(&qdisc_mod_lock);
3671da177e4SLinus Torvalds }
3681da177e4SLinus Torvalds return q;
3691da177e4SLinus Torvalds }
3701da177e4SLinus Torvalds
3718a8e3d84SJesper Dangaard Brouer /* The linklayer setting were not transferred from iproute2, in older
3728a8e3d84SJesper Dangaard Brouer * versions, and the rate tables lookup systems have been dropped in
3738a8e3d84SJesper Dangaard Brouer * the kernel. To keep backward compatible with older iproute2 tc
3748a8e3d84SJesper Dangaard Brouer * utils, we detect the linklayer setting by detecting if the rate
3758a8e3d84SJesper Dangaard Brouer * table were modified.
3768a8e3d84SJesper Dangaard Brouer *
3778a8e3d84SJesper Dangaard Brouer * For linklayer ATM table entries, the rate table will be aligned to
3788a8e3d84SJesper Dangaard Brouer * 48 bytes, thus some table entries will contain the same value. The
3798a8e3d84SJesper Dangaard Brouer * mpu (min packet unit) is also encoded into the old rate table, thus
3808a8e3d84SJesper Dangaard Brouer * starting from the mpu, we find low and high table entries for
3818a8e3d84SJesper Dangaard Brouer * mapping this cell. If these entries contain the same value, when
3828a8e3d84SJesper Dangaard Brouer * the rate tables have been modified for linklayer ATM.
3838a8e3d84SJesper Dangaard Brouer *
3848a8e3d84SJesper Dangaard Brouer * This is done by rounding mpu to the nearest 48 bytes cell/entry,
3858a8e3d84SJesper Dangaard Brouer * and then roundup to the next cell, calc the table entry one below,
3868a8e3d84SJesper Dangaard Brouer * and compare.
3878a8e3d84SJesper Dangaard Brouer */
__detect_linklayer(struct tc_ratespec * r,__u32 * rtab)3888a8e3d84SJesper Dangaard Brouer static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
3898a8e3d84SJesper Dangaard Brouer {
3908a8e3d84SJesper Dangaard Brouer int low = roundup(r->mpu, 48);
3918a8e3d84SJesper Dangaard Brouer int high = roundup(low+1, 48);
3928a8e3d84SJesper Dangaard Brouer int cell_low = low >> r->cell_log;
3938a8e3d84SJesper Dangaard Brouer int cell_high = (high >> r->cell_log) - 1;
3948a8e3d84SJesper Dangaard Brouer
3958a8e3d84SJesper Dangaard Brouer /* rtab is too inaccurate at rates > 100Mbit/s */
3968a8e3d84SJesper Dangaard Brouer if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
3978a8e3d84SJesper Dangaard Brouer pr_debug("TC linklayer: Giving up ATM detection\n");
3988a8e3d84SJesper Dangaard Brouer return TC_LINKLAYER_ETHERNET;
3998a8e3d84SJesper Dangaard Brouer }
4008a8e3d84SJesper Dangaard Brouer
4018a8e3d84SJesper Dangaard Brouer if ((cell_high > cell_low) && (cell_high < 256)
4028a8e3d84SJesper Dangaard Brouer && (rtab[cell_low] == rtab[cell_high])) {
4038a8e3d84SJesper Dangaard Brouer pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
4048a8e3d84SJesper Dangaard Brouer cell_low, cell_high, rtab[cell_high]);
4058a8e3d84SJesper Dangaard Brouer return TC_LINKLAYER_ATM;
4068a8e3d84SJesper Dangaard Brouer }
4078a8e3d84SJesper Dangaard Brouer return TC_LINKLAYER_ETHERNET;
4088a8e3d84SJesper Dangaard Brouer }
4098a8e3d84SJesper Dangaard Brouer
4101da177e4SLinus Torvalds static struct qdisc_rate_table *qdisc_rtab_list;
4111da177e4SLinus Torvalds
qdisc_get_rtab(struct tc_ratespec * r,struct nlattr * tab,struct netlink_ext_ack * extack)4125a7a5555SJamal Hadi Salim struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
413e9bc3fa2SAlexander Aring struct nlattr *tab,
414e9bc3fa2SAlexander Aring struct netlink_ext_ack *extack)
4151da177e4SLinus Torvalds {
4161da177e4SLinus Torvalds struct qdisc_rate_table *rtab;
4171da177e4SLinus Torvalds
418e4bedf48SEric Dumazet if (tab == NULL || r->rate == 0 ||
419e4bedf48SEric Dumazet r->cell_log == 0 || r->cell_log >= 32 ||
420e9bc3fa2SAlexander Aring nla_len(tab) != TC_RTAB_SIZE) {
421e9bc3fa2SAlexander Aring NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
42240edeff6SEric Dumazet return NULL;
423e9bc3fa2SAlexander Aring }
42440edeff6SEric Dumazet
4251da177e4SLinus Torvalds for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
42640edeff6SEric Dumazet if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
42740edeff6SEric Dumazet !memcmp(&rtab->data, nla_data(tab), 1024)) {
4281da177e4SLinus Torvalds rtab->refcnt++;
4291da177e4SLinus Torvalds return rtab;
4301da177e4SLinus Torvalds }
4311da177e4SLinus Torvalds }
4321da177e4SLinus Torvalds
4331da177e4SLinus Torvalds rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
4341da177e4SLinus Torvalds if (rtab) {
4351da177e4SLinus Torvalds rtab->rate = *r;
4361da177e4SLinus Torvalds rtab->refcnt = 1;
4371e90474cSPatrick McHardy memcpy(rtab->data, nla_data(tab), 1024);
4388a8e3d84SJesper Dangaard Brouer if (r->linklayer == TC_LINKLAYER_UNAWARE)
4398a8e3d84SJesper Dangaard Brouer r->linklayer = __detect_linklayer(r, rtab->data);
4401da177e4SLinus Torvalds rtab->next = qdisc_rtab_list;
4411da177e4SLinus Torvalds qdisc_rtab_list = rtab;
442e9bc3fa2SAlexander Aring } else {
443e9bc3fa2SAlexander Aring NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
4441da177e4SLinus Torvalds }
4451da177e4SLinus Torvalds return rtab;
4461da177e4SLinus Torvalds }
44762e3ba1bSPatrick McHardy EXPORT_SYMBOL(qdisc_get_rtab);
4481da177e4SLinus Torvalds
qdisc_put_rtab(struct qdisc_rate_table * tab)4491da177e4SLinus Torvalds void qdisc_put_rtab(struct qdisc_rate_table *tab)
4501da177e4SLinus Torvalds {
4511da177e4SLinus Torvalds struct qdisc_rate_table *rtab, **rtabp;
4521da177e4SLinus Torvalds
4531da177e4SLinus Torvalds if (!tab || --tab->refcnt)
4541da177e4SLinus Torvalds return;
4551da177e4SLinus Torvalds
456cc7ec456SEric Dumazet for (rtabp = &qdisc_rtab_list;
457cc7ec456SEric Dumazet (rtab = *rtabp) != NULL;
458cc7ec456SEric Dumazet rtabp = &rtab->next) {
4591da177e4SLinus Torvalds if (rtab == tab) {
4601da177e4SLinus Torvalds *rtabp = rtab->next;
4611da177e4SLinus Torvalds kfree(rtab);
4621da177e4SLinus Torvalds return;
4631da177e4SLinus Torvalds }
4641da177e4SLinus Torvalds }
4651da177e4SLinus Torvalds }
46662e3ba1bSPatrick McHardy EXPORT_SYMBOL(qdisc_put_rtab);
4671da177e4SLinus Torvalds
468175f9c1bSJussi Kivilinna static LIST_HEAD(qdisc_stab_list);
469175f9c1bSJussi Kivilinna
470175f9c1bSJussi Kivilinna static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
471175f9c1bSJussi Kivilinna [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
472175f9c1bSJussi Kivilinna [TCA_STAB_DATA] = { .type = NLA_BINARY },
473175f9c1bSJussi Kivilinna };
474175f9c1bSJussi Kivilinna
qdisc_get_stab(struct nlattr * opt,struct netlink_ext_ack * extack)47509215598SAlexander Aring static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
47609215598SAlexander Aring struct netlink_ext_ack *extack)
477175f9c1bSJussi Kivilinna {
478175f9c1bSJussi Kivilinna struct nlattr *tb[TCA_STAB_MAX + 1];
479175f9c1bSJussi Kivilinna struct qdisc_size_table *stab;
480175f9c1bSJussi Kivilinna struct tc_sizespec *s;
481175f9c1bSJussi Kivilinna unsigned int tsize = 0;
482175f9c1bSJussi Kivilinna u16 *tab = NULL;
483175f9c1bSJussi Kivilinna int err;
484175f9c1bSJussi Kivilinna
4858cb08174SJohannes Berg err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
4868cb08174SJohannes Berg extack);
487175f9c1bSJussi Kivilinna if (err < 0)
488175f9c1bSJussi Kivilinna return ERR_PTR(err);
48909215598SAlexander Aring if (!tb[TCA_STAB_BASE]) {
49009215598SAlexander Aring NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
491175f9c1bSJussi Kivilinna return ERR_PTR(-EINVAL);
49209215598SAlexander Aring }
493175f9c1bSJussi Kivilinna
494175f9c1bSJussi Kivilinna s = nla_data(tb[TCA_STAB_BASE]);
495175f9c1bSJussi Kivilinna
496175f9c1bSJussi Kivilinna if (s->tsize > 0) {
49709215598SAlexander Aring if (!tb[TCA_STAB_DATA]) {
49809215598SAlexander Aring NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
499175f9c1bSJussi Kivilinna return ERR_PTR(-EINVAL);
50009215598SAlexander Aring }
501175f9c1bSJussi Kivilinna tab = nla_data(tb[TCA_STAB_DATA]);
502175f9c1bSJussi Kivilinna tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
503175f9c1bSJussi Kivilinna }
504175f9c1bSJussi Kivilinna
50509215598SAlexander Aring if (tsize != s->tsize || (!tab && tsize > 0)) {
50609215598SAlexander Aring NL_SET_ERR_MSG(extack, "Invalid size of size table");
507175f9c1bSJussi Kivilinna return ERR_PTR(-EINVAL);
50809215598SAlexander Aring }
509175f9c1bSJussi Kivilinna
510175f9c1bSJussi Kivilinna list_for_each_entry(stab, &qdisc_stab_list, list) {
511175f9c1bSJussi Kivilinna if (memcmp(&stab->szopts, s, sizeof(*s)))
512175f9c1bSJussi Kivilinna continue;
51369508d43SGustavo A. R. Silva if (tsize > 0 &&
51469508d43SGustavo A. R. Silva memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
515175f9c1bSJussi Kivilinna continue;
516175f9c1bSJussi Kivilinna stab->refcnt++;
517175f9c1bSJussi Kivilinna return stab;
518175f9c1bSJussi Kivilinna }
519175f9c1bSJussi Kivilinna
520b193e15aS王贇 if (s->size_log > STAB_SIZE_LOG_MAX ||
521b193e15aS王贇 s->cell_log > STAB_SIZE_LOG_MAX) {
522b193e15aS王贇 NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
523b193e15aS王贇 return ERR_PTR(-EINVAL);
524b193e15aS王贇 }
525b193e15aS王贇
52669508d43SGustavo A. R. Silva stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
527175f9c1bSJussi Kivilinna if (!stab)
528175f9c1bSJussi Kivilinna return ERR_PTR(-ENOMEM);
529175f9c1bSJussi Kivilinna
530175f9c1bSJussi Kivilinna stab->refcnt = 1;
531175f9c1bSJussi Kivilinna stab->szopts = *s;
532175f9c1bSJussi Kivilinna if (tsize > 0)
53369508d43SGustavo A. R. Silva memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
534175f9c1bSJussi Kivilinna
535175f9c1bSJussi Kivilinna list_add_tail(&stab->list, &qdisc_stab_list);
536175f9c1bSJussi Kivilinna
537175f9c1bSJussi Kivilinna return stab;
538175f9c1bSJussi Kivilinna }
539175f9c1bSJussi Kivilinna
qdisc_put_stab(struct qdisc_size_table * tab)540175f9c1bSJussi Kivilinna void qdisc_put_stab(struct qdisc_size_table *tab)
541175f9c1bSJussi Kivilinna {
542175f9c1bSJussi Kivilinna if (!tab)
543175f9c1bSJussi Kivilinna return;
544175f9c1bSJussi Kivilinna
545175f9c1bSJussi Kivilinna if (--tab->refcnt == 0) {
546175f9c1bSJussi Kivilinna list_del(&tab->list);
5476e07902fSWei Yongjun kfree_rcu(tab, rcu);
548175f9c1bSJussi Kivilinna }
549175f9c1bSJussi Kivilinna }
550175f9c1bSJussi Kivilinna EXPORT_SYMBOL(qdisc_put_stab);
551175f9c1bSJussi Kivilinna
qdisc_dump_stab(struct sk_buff * skb,struct qdisc_size_table * stab)552175f9c1bSJussi Kivilinna static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
553175f9c1bSJussi Kivilinna {
554175f9c1bSJussi Kivilinna struct nlattr *nest;
555175f9c1bSJussi Kivilinna
556ae0be8deSMichal Kubecek nest = nla_nest_start_noflag(skb, TCA_STAB);
5573aa4614dSPatrick McHardy if (nest == NULL)
5583aa4614dSPatrick McHardy goto nla_put_failure;
5591b34ec43SDavid S. Miller if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
5601b34ec43SDavid S. Miller goto nla_put_failure;
561175f9c1bSJussi Kivilinna nla_nest_end(skb, nest);
562175f9c1bSJussi Kivilinna
563175f9c1bSJussi Kivilinna return skb->len;
564175f9c1bSJussi Kivilinna
565175f9c1bSJussi Kivilinna nla_put_failure:
566175f9c1bSJussi Kivilinna return -1;
567175f9c1bSJussi Kivilinna }
568175f9c1bSJussi Kivilinna
__qdisc_calculate_pkt_len(struct sk_buff * skb,const struct qdisc_size_table * stab)5695a7a5555SJamal Hadi Salim void __qdisc_calculate_pkt_len(struct sk_buff *skb,
5705a7a5555SJamal Hadi Salim const struct qdisc_size_table *stab)
571175f9c1bSJussi Kivilinna {
572175f9c1bSJussi Kivilinna int pkt_len, slot;
573175f9c1bSJussi Kivilinna
574175f9c1bSJussi Kivilinna pkt_len = skb->len + stab->szopts.overhead;
575175f9c1bSJussi Kivilinna if (unlikely(!stab->szopts.tsize))
576175f9c1bSJussi Kivilinna goto out;
577175f9c1bSJussi Kivilinna
578175f9c1bSJussi Kivilinna slot = pkt_len + stab->szopts.cell_align;
579175f9c1bSJussi Kivilinna if (unlikely(slot < 0))
580175f9c1bSJussi Kivilinna slot = 0;
581175f9c1bSJussi Kivilinna
582175f9c1bSJussi Kivilinna slot >>= stab->szopts.cell_log;
583175f9c1bSJussi Kivilinna if (likely(slot < stab->szopts.tsize))
584175f9c1bSJussi Kivilinna pkt_len = stab->data[slot];
585175f9c1bSJussi Kivilinna else
586175f9c1bSJussi Kivilinna pkt_len = stab->data[stab->szopts.tsize - 1] *
587175f9c1bSJussi Kivilinna (slot / stab->szopts.tsize) +
588175f9c1bSJussi Kivilinna stab->data[slot % stab->szopts.tsize];
589175f9c1bSJussi Kivilinna
590175f9c1bSJussi Kivilinna pkt_len <<= stab->szopts.size_log;
591175f9c1bSJussi Kivilinna out:
592175f9c1bSJussi Kivilinna if (unlikely(pkt_len < 1))
593175f9c1bSJussi Kivilinna pkt_len = 1;
594175f9c1bSJussi Kivilinna qdisc_skb_cb(skb)->pkt_len = pkt_len;
595175f9c1bSJussi Kivilinna }
596175f9c1bSJussi Kivilinna
qdisc_warn_nonwc(const char * txt,struct Qdisc * qdisc)5976e765a00SFlorian Westphal void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
598b00355dbSJarek Poplawski {
599b00355dbSJarek Poplawski if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
600cc7ec456SEric Dumazet pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
601b00355dbSJarek Poplawski txt, qdisc->ops->id, qdisc->handle >> 16);
602b00355dbSJarek Poplawski qdisc->flags |= TCQ_F_WARN_NONWC;
603b00355dbSJarek Poplawski }
604b00355dbSJarek Poplawski }
605b00355dbSJarek Poplawski EXPORT_SYMBOL(qdisc_warn_nonwc);
606b00355dbSJarek Poplawski
qdisc_watchdog(struct hrtimer * timer)6074179477fSPatrick McHardy static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
6084179477fSPatrick McHardy {
6094179477fSPatrick McHardy struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
6102fbd3da3SDavid S. Miller timer);
6114179477fSPatrick McHardy
6121e203c1aSJohn Fastabend rcu_read_lock();
6138608db03SDavid S. Miller __netif_schedule(qdisc_root(wd->qdisc));
6141e203c1aSJohn Fastabend rcu_read_unlock();
6151936502dSStephen Hemminger
6164179477fSPatrick McHardy return HRTIMER_NORESTART;
6174179477fSPatrick McHardy }
6184179477fSPatrick McHardy
qdisc_watchdog_init_clockid(struct qdisc_watchdog * wd,struct Qdisc * qdisc,clockid_t clockid)619860b642bSVinicius Costa Gomes void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
620860b642bSVinicius Costa Gomes clockid_t clockid)
6214179477fSPatrick McHardy {
622860b642bSVinicius Costa Gomes hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
6232fbd3da3SDavid S. Miller wd->timer.function = qdisc_watchdog;
6244179477fSPatrick McHardy wd->qdisc = qdisc;
6254179477fSPatrick McHardy }
626860b642bSVinicius Costa Gomes EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
627860b642bSVinicius Costa Gomes
qdisc_watchdog_init(struct qdisc_watchdog * wd,struct Qdisc * qdisc)628860b642bSVinicius Costa Gomes void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
629860b642bSVinicius Costa Gomes {
630860b642bSVinicius Costa Gomes qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
631860b642bSVinicius Costa Gomes }
6324179477fSPatrick McHardy EXPORT_SYMBOL(qdisc_watchdog_init);
6334179477fSPatrick McHardy
qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog * wd,u64 expires,u64 delta_ns)634efe074c2SEric Dumazet void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
635efe074c2SEric Dumazet u64 delta_ns)
6364179477fSPatrick McHardy {
637d636fc5dSEric Dumazet bool deactivated;
638d636fc5dSEric Dumazet
639d636fc5dSEric Dumazet rcu_read_lock();
640d636fc5dSEric Dumazet deactivated = test_bit(__QDISC_STATE_DEACTIVATED,
641d636fc5dSEric Dumazet &qdisc_root_sleeping(wd->qdisc)->state);
642d636fc5dSEric Dumazet rcu_read_unlock();
643d636fc5dSEric Dumazet if (deactivated)
6442540e051SJarek Poplawski return;
6452540e051SJarek Poplawski
646b88948fbSEric Dumazet if (hrtimer_is_queued(&wd->timer)) {
64762423bd2SEric Dumazet u64 softexpires;
64862423bd2SEric Dumazet
64962423bd2SEric Dumazet softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer));
650b88948fbSEric Dumazet /* If timer is already set in [expires, expires + delta_ns],
651b88948fbSEric Dumazet * do not reprogram it.
652b88948fbSEric Dumazet */
65362423bd2SEric Dumazet if (softexpires - expires <= delta_ns)
654a9efad8bSEric Dumazet return;
655b88948fbSEric Dumazet }
656a9efad8bSEric Dumazet
657efe074c2SEric Dumazet hrtimer_start_range_ns(&wd->timer,
65834c5d292SJiri Pirko ns_to_ktime(expires),
659efe074c2SEric Dumazet delta_ns,
6604a8e320cSEric Dumazet HRTIMER_MODE_ABS_PINNED);
6614179477fSPatrick McHardy }
662efe074c2SEric Dumazet EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
6634179477fSPatrick McHardy
qdisc_watchdog_cancel(struct qdisc_watchdog * wd)6644179477fSPatrick McHardy void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
6654179477fSPatrick McHardy {
6662fbd3da3SDavid S. Miller hrtimer_cancel(&wd->timer);
6674179477fSPatrick McHardy }
6684179477fSPatrick McHardy EXPORT_SYMBOL(qdisc_watchdog_cancel);
6691da177e4SLinus Torvalds
qdisc_class_hash_alloc(unsigned int n)670a94f779fSAdrian Bunk static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
6716fe1c7a5SPatrick McHardy {
6726fe1c7a5SPatrick McHardy struct hlist_head *h;
6739695fe6fSEric Dumazet unsigned int i;
6746fe1c7a5SPatrick McHardy
6759695fe6fSEric Dumazet h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
6766fe1c7a5SPatrick McHardy
6776fe1c7a5SPatrick McHardy if (h != NULL) {
6786fe1c7a5SPatrick McHardy for (i = 0; i < n; i++)
6796fe1c7a5SPatrick McHardy INIT_HLIST_HEAD(&h[i]);
6806fe1c7a5SPatrick McHardy }
6816fe1c7a5SPatrick McHardy return h;
6826fe1c7a5SPatrick McHardy }
6836fe1c7a5SPatrick McHardy
qdisc_class_hash_grow(struct Qdisc * sch,struct Qdisc_class_hash * clhash)6846fe1c7a5SPatrick McHardy void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
6856fe1c7a5SPatrick McHardy {
6866fe1c7a5SPatrick McHardy struct Qdisc_class_common *cl;
687b67bfe0dSSasha Levin struct hlist_node *next;
6886fe1c7a5SPatrick McHardy struct hlist_head *nhash, *ohash;
6896fe1c7a5SPatrick McHardy unsigned int nsize, nmask, osize;
6906fe1c7a5SPatrick McHardy unsigned int i, h;
6916fe1c7a5SPatrick McHardy
6926fe1c7a5SPatrick McHardy /* Rehash when load factor exceeds 0.75 */
6936fe1c7a5SPatrick McHardy if (clhash->hashelems * 4 <= clhash->hashsize * 3)
6946fe1c7a5SPatrick McHardy return;
6956fe1c7a5SPatrick McHardy nsize = clhash->hashsize * 2;
6966fe1c7a5SPatrick McHardy nmask = nsize - 1;
6976fe1c7a5SPatrick McHardy nhash = qdisc_class_hash_alloc(nsize);
6986fe1c7a5SPatrick McHardy if (nhash == NULL)
6996fe1c7a5SPatrick McHardy return;
7006fe1c7a5SPatrick McHardy
7016fe1c7a5SPatrick McHardy ohash = clhash->hash;
7026fe1c7a5SPatrick McHardy osize = clhash->hashsize;
7036fe1c7a5SPatrick McHardy
7046fe1c7a5SPatrick McHardy sch_tree_lock(sch);
7056fe1c7a5SPatrick McHardy for (i = 0; i < osize; i++) {
706b67bfe0dSSasha Levin hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
7076fe1c7a5SPatrick McHardy h = qdisc_class_hash(cl->classid, nmask);
7086fe1c7a5SPatrick McHardy hlist_add_head(&cl->hnode, &nhash[h]);
7096fe1c7a5SPatrick McHardy }
7106fe1c7a5SPatrick McHardy }
7116fe1c7a5SPatrick McHardy clhash->hash = nhash;
7126fe1c7a5SPatrick McHardy clhash->hashsize = nsize;
7136fe1c7a5SPatrick McHardy clhash->hashmask = nmask;
7146fe1c7a5SPatrick McHardy sch_tree_unlock(sch);
7156fe1c7a5SPatrick McHardy
7169695fe6fSEric Dumazet kvfree(ohash);
7176fe1c7a5SPatrick McHardy }
7186fe1c7a5SPatrick McHardy EXPORT_SYMBOL(qdisc_class_hash_grow);
7196fe1c7a5SPatrick McHardy
qdisc_class_hash_init(struct Qdisc_class_hash * clhash)7206fe1c7a5SPatrick McHardy int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
7216fe1c7a5SPatrick McHardy {
7226fe1c7a5SPatrick McHardy unsigned int size = 4;
7236fe1c7a5SPatrick McHardy
7246fe1c7a5SPatrick McHardy clhash->hash = qdisc_class_hash_alloc(size);
725ac8ef4abSAlexander Aring if (!clhash->hash)
7266fe1c7a5SPatrick McHardy return -ENOMEM;
7276fe1c7a5SPatrick McHardy clhash->hashsize = size;
7286fe1c7a5SPatrick McHardy clhash->hashmask = size - 1;
7296fe1c7a5SPatrick McHardy clhash->hashelems = 0;
7306fe1c7a5SPatrick McHardy return 0;
7316fe1c7a5SPatrick McHardy }
7326fe1c7a5SPatrick McHardy EXPORT_SYMBOL(qdisc_class_hash_init);
7336fe1c7a5SPatrick McHardy
qdisc_class_hash_destroy(struct Qdisc_class_hash * clhash)7346fe1c7a5SPatrick McHardy void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
7356fe1c7a5SPatrick McHardy {
7369695fe6fSEric Dumazet kvfree(clhash->hash);
7376fe1c7a5SPatrick McHardy }
7386fe1c7a5SPatrick McHardy EXPORT_SYMBOL(qdisc_class_hash_destroy);
7396fe1c7a5SPatrick McHardy
qdisc_class_hash_insert(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)7406fe1c7a5SPatrick McHardy void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
7416fe1c7a5SPatrick McHardy struct Qdisc_class_common *cl)
7426fe1c7a5SPatrick McHardy {
7436fe1c7a5SPatrick McHardy unsigned int h;
7446fe1c7a5SPatrick McHardy
7456fe1c7a5SPatrick McHardy INIT_HLIST_NODE(&cl->hnode);
7466fe1c7a5SPatrick McHardy h = qdisc_class_hash(cl->classid, clhash->hashmask);
7476fe1c7a5SPatrick McHardy hlist_add_head(&cl->hnode, &clhash->hash[h]);
7486fe1c7a5SPatrick McHardy clhash->hashelems++;
7496fe1c7a5SPatrick McHardy }
7506fe1c7a5SPatrick McHardy EXPORT_SYMBOL(qdisc_class_hash_insert);
7516fe1c7a5SPatrick McHardy
qdisc_class_hash_remove(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)7526fe1c7a5SPatrick McHardy void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
7536fe1c7a5SPatrick McHardy struct Qdisc_class_common *cl)
7546fe1c7a5SPatrick McHardy {
7556fe1c7a5SPatrick McHardy hlist_del(&cl->hnode);
7566fe1c7a5SPatrick McHardy clhash->hashelems--;
7576fe1c7a5SPatrick McHardy }
7586fe1c7a5SPatrick McHardy EXPORT_SYMBOL(qdisc_class_hash_remove);
7596fe1c7a5SPatrick McHardy
760fa0f5aa7SEric Dumazet /* Allocate an unique handle from space managed by kernel
761fa0f5aa7SEric Dumazet * Possible range is [8000-FFFF]:0000 (0x8000 values)
762fa0f5aa7SEric Dumazet */
qdisc_alloc_handle(struct net_device * dev)7631da177e4SLinus Torvalds static u32 qdisc_alloc_handle(struct net_device *dev)
7641da177e4SLinus Torvalds {
765fa0f5aa7SEric Dumazet int i = 0x8000;
7661da177e4SLinus Torvalds static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
7671da177e4SLinus Torvalds
7681da177e4SLinus Torvalds do {
7691da177e4SLinus Torvalds autohandle += TC_H_MAKE(0x10000U, 0);
7701da177e4SLinus Torvalds if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
7711da177e4SLinus Torvalds autohandle = TC_H_MAKE(0x80000000U, 0);
772fa0f5aa7SEric Dumazet if (!qdisc_lookup(dev, autohandle))
773fa0f5aa7SEric Dumazet return autohandle;
774fa0f5aa7SEric Dumazet cond_resched();
775fa0f5aa7SEric Dumazet } while (--i > 0);
7761da177e4SLinus Torvalds
777fa0f5aa7SEric Dumazet return 0;
7781da177e4SLinus Torvalds }
7791da177e4SLinus Torvalds
qdisc_tree_reduce_backlog(struct Qdisc * sch,int n,int len)7805f2939d9SToke Høiland-Jørgensen void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
78143effa1eSPatrick McHardy {
782fd5ac14aSNogah Frankel bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
78320fea08bSEric Dumazet const struct Qdisc_class_ops *cops;
78443effa1eSPatrick McHardy unsigned long cl;
78543effa1eSPatrick McHardy u32 parentid;
78695946658SKonstantin Khlebnikov bool notify;
7872c8c8e6fSEric Dumazet int drops;
78843effa1eSPatrick McHardy
7892ccccf5fSWANG Cong if (n == 0 && len == 0)
79043effa1eSPatrick McHardy return;
7912c8c8e6fSEric Dumazet drops = max_t(int, n, 0);
7924eaf3b84SEric Dumazet rcu_read_lock();
79343effa1eSPatrick McHardy while ((parentid = sch->parent)) {
794597cf974SPedro Tammela if (parentid == TC_H_ROOT)
7954eaf3b84SEric Dumazet break;
796066a3b5bSJarek Poplawski
7974eaf3b84SEric Dumazet if (sch->flags & TCQ_F_NOPARENT)
7984eaf3b84SEric Dumazet break;
79995946658SKonstantin Khlebnikov /* Notify parent qdisc only if child qdisc becomes empty.
80095946658SKonstantin Khlebnikov *
80195946658SKonstantin Khlebnikov * If child was empty even before update then backlog
80295946658SKonstantin Khlebnikov * counter is screwed and we skip notification because
80395946658SKonstantin Khlebnikov * parent class is already passive.
804fd5ac14aSNogah Frankel *
805fd5ac14aSNogah Frankel * If the original child was offloaded then it is allowed
806fd5ac14aSNogah Frankel * to be seem as empty, so the parent is notified anyway.
80795946658SKonstantin Khlebnikov */
808fd5ac14aSNogah Frankel notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
809fd5ac14aSNogah Frankel !qdisc_is_offloaded);
8104eaf3b84SEric Dumazet /* TODO: perform the search on a per txq basis */
811c040b994SEric Dumazet sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
812ffc8fefaSPatrick McHardy if (sch == NULL) {
8134eaf3b84SEric Dumazet WARN_ON_ONCE(parentid != TC_H_ROOT);
8144eaf3b84SEric Dumazet break;
815ffc8fefaSPatrick McHardy }
81643effa1eSPatrick McHardy cops = sch->ops->cl_ops;
81795946658SKonstantin Khlebnikov if (notify && cops->qlen_notify) {
818143976ceSWANG Cong cl = cops->find(sch, parentid);
81943effa1eSPatrick McHardy cops->qlen_notify(sch, cl);
82043effa1eSPatrick McHardy }
82143effa1eSPatrick McHardy sch->q.qlen -= n;
8222ccccf5fSWANG Cong sch->qstats.backlog -= len;
82325331d6cSJohn Fastabend __qdisc_qstats_drop(sch, drops);
82443effa1eSPatrick McHardy }
8254eaf3b84SEric Dumazet rcu_read_unlock();
82643effa1eSPatrick McHardy }
8272ccccf5fSWANG Cong EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
8281da177e4SLinus Torvalds
qdisc_offload_dump_helper(struct Qdisc * sch,enum tc_setup_type type,void * type_data)829b592843cSJakub Kicinski int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
830b592843cSJakub Kicinski void *type_data)
831b592843cSJakub Kicinski {
832b592843cSJakub Kicinski struct net_device *dev = qdisc_dev(sch);
833b592843cSJakub Kicinski int err;
834b592843cSJakub Kicinski
835b592843cSJakub Kicinski sch->flags &= ~TCQ_F_OFFLOADED;
836b592843cSJakub Kicinski if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
837b592843cSJakub Kicinski return 0;
838b592843cSJakub Kicinski
839b592843cSJakub Kicinski err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
840b592843cSJakub Kicinski if (err == -EOPNOTSUPP)
841b592843cSJakub Kicinski return 0;
842b592843cSJakub Kicinski
843b592843cSJakub Kicinski if (!err)
844b592843cSJakub Kicinski sch->flags |= TCQ_F_OFFLOADED;
845b592843cSJakub Kicinski
846b592843cSJakub Kicinski return err;
847b592843cSJakub Kicinski }
848b592843cSJakub Kicinski EXPORT_SYMBOL(qdisc_offload_dump_helper);
849b592843cSJakub Kicinski
qdisc_offload_graft_helper(struct net_device * dev,struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,enum tc_setup_type type,void * type_data,struct netlink_ext_ack * extack)850bfaee911SJakub Kicinski void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
851bfaee911SJakub Kicinski struct Qdisc *new, struct Qdisc *old,
852bfaee911SJakub Kicinski enum tc_setup_type type, void *type_data,
853bfaee911SJakub Kicinski struct netlink_ext_ack *extack)
854bfaee911SJakub Kicinski {
855bfaee911SJakub Kicinski bool any_qdisc_is_offloaded;
856bfaee911SJakub Kicinski int err;
857bfaee911SJakub Kicinski
858bfaee911SJakub Kicinski if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
859bfaee911SJakub Kicinski return;
860bfaee911SJakub Kicinski
861bfaee911SJakub Kicinski err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
862bfaee911SJakub Kicinski
863bfaee911SJakub Kicinski /* Don't report error if the graft is part of destroy operation. */
864bfaee911SJakub Kicinski if (!err || !new || new == &noop_qdisc)
865bfaee911SJakub Kicinski return;
866bfaee911SJakub Kicinski
867bfaee911SJakub Kicinski /* Don't report error if the parent, the old child and the new
868bfaee911SJakub Kicinski * one are not offloaded.
869bfaee911SJakub Kicinski */
870bfaee911SJakub Kicinski any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
871bfaee911SJakub Kicinski any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
872bfaee911SJakub Kicinski any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
873bfaee911SJakub Kicinski
874bfaee911SJakub Kicinski if (any_qdisc_is_offloaded)
875bfaee911SJakub Kicinski NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
876bfaee911SJakub Kicinski }
877bfaee911SJakub Kicinski EXPORT_SYMBOL(qdisc_offload_graft_helper);
878bfaee911SJakub Kicinski
qdisc_offload_query_caps(struct net_device * dev,enum tc_setup_type type,void * caps,size_t caps_len)879aac4daa8SVladimir Oltean void qdisc_offload_query_caps(struct net_device *dev,
880aac4daa8SVladimir Oltean enum tc_setup_type type,
881aac4daa8SVladimir Oltean void *caps, size_t caps_len)
882aac4daa8SVladimir Oltean {
883aac4daa8SVladimir Oltean const struct net_device_ops *ops = dev->netdev_ops;
884aac4daa8SVladimir Oltean struct tc_query_caps_base base = {
885aac4daa8SVladimir Oltean .type = type,
886aac4daa8SVladimir Oltean .caps = caps,
887aac4daa8SVladimir Oltean };
888aac4daa8SVladimir Oltean
889aac4daa8SVladimir Oltean memset(caps, 0, caps_len);
890aac4daa8SVladimir Oltean
891aac4daa8SVladimir Oltean if (ops->ndo_setup_tc)
892aac4daa8SVladimir Oltean ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
893aac4daa8SVladimir Oltean }
894aac4daa8SVladimir Oltean EXPORT_SYMBOL(qdisc_offload_query_caps);
895aac4daa8SVladimir Oltean
qdisc_offload_graft_root(struct net_device * dev,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)89698b0e5f6SJakub Kicinski static void qdisc_offload_graft_root(struct net_device *dev,
89798b0e5f6SJakub Kicinski struct Qdisc *new, struct Qdisc *old,
89898b0e5f6SJakub Kicinski struct netlink_ext_ack *extack)
89998b0e5f6SJakub Kicinski {
90098b0e5f6SJakub Kicinski struct tc_root_qopt_offload graft_offload = {
90198b0e5f6SJakub Kicinski .command = TC_ROOT_GRAFT,
90298b0e5f6SJakub Kicinski .handle = new ? new->handle : 0,
90398b0e5f6SJakub Kicinski .ingress = (new && new->flags & TCQ_F_INGRESS) ||
90498b0e5f6SJakub Kicinski (old && old->flags & TCQ_F_INGRESS),
90598b0e5f6SJakub Kicinski };
90698b0e5f6SJakub Kicinski
90798b0e5f6SJakub Kicinski qdisc_offload_graft_helper(dev, NULL, new, old,
90898b0e5f6SJakub Kicinski TC_SETUP_ROOT_QDISC, &graft_offload, extack);
90998b0e5f6SJakub Kicinski }
91098b0e5f6SJakub Kicinski
tc_fill_qdisc(struct sk_buff * skb,struct Qdisc * q,u32 clid,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)91127d7f07cSWANG Cong static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
9120349b877SHangbin Liu u32 portid, u32 seq, u16 flags, int event,
9130349b877SHangbin Liu struct netlink_ext_ack *extack)
91427d7f07cSWANG Cong {
91550dc9a85SAhmed S. Darwish struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
91627d7f07cSWANG Cong struct gnet_stats_queue __percpu *cpu_qstats = NULL;
91727d7f07cSWANG Cong struct tcmsg *tcm;
91827d7f07cSWANG Cong struct nlmsghdr *nlh;
91927d7f07cSWANG Cong unsigned char *b = skb_tail_pointer(skb);
92027d7f07cSWANG Cong struct gnet_dump d;
92127d7f07cSWANG Cong struct qdisc_size_table *stab;
922d47a6b0eSJiri Pirko u32 block_index;
92327d7f07cSWANG Cong __u32 qlen;
92427d7f07cSWANG Cong
92527d7f07cSWANG Cong cond_resched();
92627d7f07cSWANG Cong nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
92727d7f07cSWANG Cong if (!nlh)
92827d7f07cSWANG Cong goto out_nlmsg_trim;
92927d7f07cSWANG Cong tcm = nlmsg_data(nlh);
93027d7f07cSWANG Cong tcm->tcm_family = AF_UNSPEC;
93127d7f07cSWANG Cong tcm->tcm__pad1 = 0;
93227d7f07cSWANG Cong tcm->tcm__pad2 = 0;
93327d7f07cSWANG Cong tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
93427d7f07cSWANG Cong tcm->tcm_parent = clid;
93527d7f07cSWANG Cong tcm->tcm_handle = q->handle;
93627d7f07cSWANG Cong tcm->tcm_info = refcount_read(&q->refcnt);
93727d7f07cSWANG Cong if (nla_put_string(skb, TCA_KIND, q->ops->id))
93827d7f07cSWANG Cong goto nla_put_failure;
939d47a6b0eSJiri Pirko if (q->ops->ingress_block_get) {
940d47a6b0eSJiri Pirko block_index = q->ops->ingress_block_get(q);
941d47a6b0eSJiri Pirko if (block_index &&
942d47a6b0eSJiri Pirko nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
943d47a6b0eSJiri Pirko goto nla_put_failure;
944d47a6b0eSJiri Pirko }
945d47a6b0eSJiri Pirko if (q->ops->egress_block_get) {
946d47a6b0eSJiri Pirko block_index = q->ops->egress_block_get(q);
947d47a6b0eSJiri Pirko if (block_index &&
948d47a6b0eSJiri Pirko nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
949d47a6b0eSJiri Pirko goto nla_put_failure;
950d47a6b0eSJiri Pirko }
95127d7f07cSWANG Cong if (q->ops->dump && q->ops->dump(q, skb) < 0)
95227d7f07cSWANG Cong goto nla_put_failure;
95344edf2f8SNogah Frankel if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
95444edf2f8SNogah Frankel goto nla_put_failure;
9557e66016fSJohn Fastabend qlen = qdisc_qlen_sum(q);
95627d7f07cSWANG Cong
95727d7f07cSWANG Cong stab = rtnl_dereference(q->stab);
95827d7f07cSWANG Cong if (stab && qdisc_dump_stab(skb, stab) < 0)
95927d7f07cSWANG Cong goto nla_put_failure;
96027d7f07cSWANG Cong
96127d7f07cSWANG Cong if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
96227d7f07cSWANG Cong NULL, &d, TCA_PAD) < 0)
96327d7f07cSWANG Cong goto nla_put_failure;
96427d7f07cSWANG Cong
96527d7f07cSWANG Cong if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
96627d7f07cSWANG Cong goto nla_put_failure;
96727d7f07cSWANG Cong
96827d7f07cSWANG Cong if (qdisc_is_percpu_stats(q)) {
96927d7f07cSWANG Cong cpu_bstats = q->cpu_bstats;
97027d7f07cSWANG Cong cpu_qstats = q->cpu_qstats;
97127d7f07cSWANG Cong }
97227d7f07cSWANG Cong
97329cbcd85SAhmed S. Darwish if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
97427d7f07cSWANG Cong gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
97527d7f07cSWANG Cong gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
97627d7f07cSWANG Cong goto nla_put_failure;
97727d7f07cSWANG Cong
97827d7f07cSWANG Cong if (gnet_stats_finish_copy(&d) < 0)
97927d7f07cSWANG Cong goto nla_put_failure;
98027d7f07cSWANG Cong
9810349b877SHangbin Liu if (extack && extack->_msg &&
9820349b877SHangbin Liu nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
9830349b877SHangbin Liu goto out_nlmsg_trim;
9840349b877SHangbin Liu
98527d7f07cSWANG Cong nlh->nlmsg_len = skb_tail_pointer(skb) - b;
9860349b877SHangbin Liu
98727d7f07cSWANG Cong return skb->len;
98827d7f07cSWANG Cong
98927d7f07cSWANG Cong out_nlmsg_trim:
99027d7f07cSWANG Cong nla_put_failure:
99127d7f07cSWANG Cong nlmsg_trim(skb, b);
99227d7f07cSWANG Cong return -1;
99327d7f07cSWANG Cong }
99427d7f07cSWANG Cong
tc_qdisc_dump_ignore(struct Qdisc * q,bool dump_invisible)99527d7f07cSWANG Cong static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
99627d7f07cSWANG Cong {
99727d7f07cSWANG Cong if (q->flags & TCQ_F_BUILTIN)
99827d7f07cSWANG Cong return true;
99927d7f07cSWANG Cong if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
100027d7f07cSWANG Cong return true;
100127d7f07cSWANG Cong
100227d7f07cSWANG Cong return false;
100327d7f07cSWANG Cong }
100427d7f07cSWANG Cong
qdisc_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)100527d7f07cSWANG Cong static int qdisc_notify(struct net *net, struct sk_buff *oskb,
100627d7f07cSWANG Cong struct nlmsghdr *n, u32 clid,
10070349b877SHangbin Liu struct Qdisc *old, struct Qdisc *new,
10080349b877SHangbin Liu struct netlink_ext_ack *extack)
100927d7f07cSWANG Cong {
101027d7f07cSWANG Cong struct sk_buff *skb;
101127d7f07cSWANG Cong u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
101227d7f07cSWANG Cong
101327d7f07cSWANG Cong skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
101427d7f07cSWANG Cong if (!skb)
101527d7f07cSWANG Cong return -ENOBUFS;
101627d7f07cSWANG Cong
101727d7f07cSWANG Cong if (old && !tc_qdisc_dump_ignore(old, false)) {
101827d7f07cSWANG Cong if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
10190349b877SHangbin Liu 0, RTM_DELQDISC, extack) < 0)
102027d7f07cSWANG Cong goto err_out;
102127d7f07cSWANG Cong }
102227d7f07cSWANG Cong if (new && !tc_qdisc_dump_ignore(new, false)) {
102327d7f07cSWANG Cong if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
10240349b877SHangbin Liu old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0)
102527d7f07cSWANG Cong goto err_out;
102627d7f07cSWANG Cong }
102727d7f07cSWANG Cong
102827d7f07cSWANG Cong if (skb->len)
102927d7f07cSWANG Cong return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
103027d7f07cSWANG Cong n->nlmsg_flags & NLM_F_ECHO);
103127d7f07cSWANG Cong
103227d7f07cSWANG Cong err_out:
103327d7f07cSWANG Cong kfree_skb(skb);
103427d7f07cSWANG Cong return -EINVAL;
103527d7f07cSWANG Cong }
103627d7f07cSWANG Cong
notify_and_destroy(struct net * net,struct sk_buff * skb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)10377316ae88STom Goff static void notify_and_destroy(struct net *net, struct sk_buff *skb,
10387316ae88STom Goff struct nlmsghdr *n, u32 clid,
10390349b877SHangbin Liu struct Qdisc *old, struct Qdisc *new,
10400349b877SHangbin Liu struct netlink_ext_ack *extack)
104199194cffSDavid S. Miller {
104299194cffSDavid S. Miller if (new || old)
10430349b877SHangbin Liu qdisc_notify(net, skb, n, clid, old, new, extack);
10441da177e4SLinus Torvalds
10454d8863a2SDavid S. Miller if (old)
104686bd446bSVlad Buslov qdisc_put(old);
104799194cffSDavid S. Miller }
104899194cffSDavid S. Miller
qdisc_clear_nolock(struct Qdisc * sch)10498a53e616SPaolo Abeni static void qdisc_clear_nolock(struct Qdisc *sch)
10508a53e616SPaolo Abeni {
10518a53e616SPaolo Abeni sch->flags &= ~TCQ_F_NOLOCK;
10528a53e616SPaolo Abeni if (!(sch->flags & TCQ_F_CPUSTATS))
10538a53e616SPaolo Abeni return;
10548a53e616SPaolo Abeni
10558a53e616SPaolo Abeni free_percpu(sch->cpu_bstats);
10568a53e616SPaolo Abeni free_percpu(sch->cpu_qstats);
10578a53e616SPaolo Abeni sch->cpu_bstats = NULL;
10588a53e616SPaolo Abeni sch->cpu_qstats = NULL;
10598a53e616SPaolo Abeni sch->flags &= ~TCQ_F_CPUSTATS;
10608a53e616SPaolo Abeni }
10618a53e616SPaolo Abeni
106299194cffSDavid S. Miller /* Graft qdisc "new" to class "classid" of qdisc "parent" or
106399194cffSDavid S. Miller * to device "dev".
106499194cffSDavid S. Miller *
106599194cffSDavid S. Miller * When appropriate send a netlink notification using 'skb'
106699194cffSDavid S. Miller * and "n".
106799194cffSDavid S. Miller *
106899194cffSDavid S. Miller * On success, destroy old qdisc.
10691da177e4SLinus Torvalds */
10701da177e4SLinus Torvalds
qdisc_graft(struct net_device * dev,struct Qdisc * parent,struct sk_buff * skb,struct nlmsghdr * n,u32 classid,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)10711da177e4SLinus Torvalds static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
107299194cffSDavid S. Miller struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
107309215598SAlexander Aring struct Qdisc *new, struct Qdisc *old,
107409215598SAlexander Aring struct netlink_ext_ack *extack)
10751da177e4SLinus Torvalds {
107699194cffSDavid S. Miller struct Qdisc *q = old;
10777316ae88STom Goff struct net *net = dev_net(dev);
10781da177e4SLinus Torvalds
10791da177e4SLinus Torvalds if (parent == NULL) {
108099194cffSDavid S. Miller unsigned int i, num_q, ingress;
10812d5f6a8dSPeilin Ye struct netdev_queue *dev_queue;
108299194cffSDavid S. Miller
108399194cffSDavid S. Miller ingress = 0;
108499194cffSDavid S. Miller num_q = dev->num_tx_queues;
10858d50b53dSDavid S. Miller if ((q && q->flags & TCQ_F_INGRESS) ||
10868d50b53dSDavid S. Miller (new && new->flags & TCQ_F_INGRESS)) {
108799194cffSDavid S. Miller ingress = 1;
108884ad0af0SPeilin Ye dev_queue = dev_ingress_queue(dev);
108984ad0af0SPeilin Ye if (!dev_queue) {
109009215598SAlexander Aring NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
109124824a09SEric Dumazet return -ENOENT;
10921da177e4SLinus Torvalds }
109384ad0af0SPeilin Ye
109484ad0af0SPeilin Ye q = rtnl_dereference(dev_queue->qdisc_sleeping);
109584ad0af0SPeilin Ye
109684ad0af0SPeilin Ye /* This is the counterpart of that qdisc_refcount_inc_nz() call in
109784ad0af0SPeilin Ye * __tcf_qdisc_find() for filter requests.
109884ad0af0SPeilin Ye */
109984ad0af0SPeilin Ye if (!qdisc_refcount_dec_if_one(q)) {
110084ad0af0SPeilin Ye NL_SET_ERR_MSG(extack,
110184ad0af0SPeilin Ye "Current ingress or clsact Qdisc has ongoing filter requests");
110284ad0af0SPeilin Ye return -EBUSY;
110384ad0af0SPeilin Ye }
110409215598SAlexander Aring }
110599194cffSDavid S. Miller
110699194cffSDavid S. Miller if (dev->flags & IFF_UP)
110799194cffSDavid S. Miller dev_deactivate(dev);
110899194cffSDavid S. Miller
110998b0e5f6SJakub Kicinski qdisc_offload_graft_root(dev, new, old, extack);
111098b0e5f6SJakub Kicinski
1111de2d807bSMaxim Mikityanskiy if (new && new->ops->attach && !ingress)
111286e363dcSWANG Cong goto skip;
11136ec1c69aSDavid S. Miller
11142d5f6a8dSPeilin Ye if (!ingress) {
111599194cffSDavid S. Miller for (i = 0; i < num_q; i++) {
111699194cffSDavid S. Miller dev_queue = netdev_get_tx_queue(dev, i);
111799194cffSDavid S. Miller old = dev_graft_qdisc(dev_queue, new);
11182d5f6a8dSPeilin Ye
111999194cffSDavid S. Miller if (new && i > 0)
1120551143d8SEric Dumazet qdisc_refcount_inc(new);
112186bd446bSVlad Buslov qdisc_put(old);
112299194cffSDavid S. Miller }
11232d5f6a8dSPeilin Ye } else {
112484ad0af0SPeilin Ye old = dev_graft_qdisc(dev_queue, NULL);
112584ad0af0SPeilin Ye
112684ad0af0SPeilin Ye /* {ingress,clsact}_destroy() @old before grafting @new to avoid
112784ad0af0SPeilin Ye * unprotected concurrent accesses to net_device::miniq_{in,e}gress
112884ad0af0SPeilin Ye * pointer(s) in mini_qdisc_pair_swap().
112984ad0af0SPeilin Ye */
113084ad0af0SPeilin Ye qdisc_notify(net, skb, n, classid, old, new, extack);
113184ad0af0SPeilin Ye qdisc_destroy(old);
113284ad0af0SPeilin Ye
113384ad0af0SPeilin Ye dev_graft_qdisc(dev_queue, new);
11342d5f6a8dSPeilin Ye }
113599194cffSDavid S. Miller
113686e363dcSWANG Cong skip:
1137036d6a67SJarek Poplawski if (!ingress) {
1138ebda44daSEric Dumazet old = rtnl_dereference(dev->qdisc);
11396ec1c69aSDavid S. Miller if (new && !new->ops->attach)
1140551143d8SEric Dumazet qdisc_refcount_inc(new);
11415891cd5eSEric Dumazet rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
114286e363dcSWANG Cong
11430349b877SHangbin Liu notify_and_destroy(net, skb, n, classid, old, new, extack);
1144ebda44daSEric Dumazet
114586e363dcSWANG Cong if (new && new->ops->attach)
114686e363dcSWANG Cong new->ops->attach(new);
1147036d6a67SJarek Poplawski }
1148af356afaSPatrick McHardy
114999194cffSDavid S. Miller if (dev->flags & IFF_UP)
115099194cffSDavid S. Miller dev_activate(dev);
11511da177e4SLinus Torvalds } else {
115220fea08bSEric Dumazet const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
11539da93eceSJakub Kicinski unsigned long cl;
11549da93eceSJakub Kicinski int err;
11551da177e4SLinus Torvalds
1156c5ad119fSJohn Fastabend /* Only support running class lockless if parent is lockless */
11578eaf8d99SGaurav Singh if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
11588a53e616SPaolo Abeni qdisc_clear_nolock(new);
1159c5ad119fSJohn Fastabend
11609da93eceSJakub Kicinski if (!cops || !cops->graft)
11619da93eceSJakub Kicinski return -EOPNOTSUPP;
1162143976ceSWANG Cong
11639da93eceSJakub Kicinski cl = cops->find(parent, classid);
11649da93eceSJakub Kicinski if (!cl) {
116509215598SAlexander Aring NL_SET_ERR_MSG(extack, "Specified class not found");
11669da93eceSJakub Kicinski return -ENOENT;
11671da177e4SLinus Torvalds }
11689da93eceSJakub Kicinski
116996398560SFrederick Lawler if (new && new->ops == &noqueue_qdisc_ops) {
117096398560SFrederick Lawler NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
117196398560SFrederick Lawler return -EINVAL;
117296398560SFrederick Lawler }
117396398560SFrederick Lawler
11741edf039eSEric Dumazet if (new &&
11751edf039eSEric Dumazet !(parent->flags & TCQ_F_MQROOT) &&
11761edf039eSEric Dumazet rcu_access_pointer(new->stab)) {
11771edf039eSEric Dumazet NL_SET_ERR_MSG(extack, "STAB not supported on a non root");
11781edf039eSEric Dumazet return -EINVAL;
11791edf039eSEric Dumazet }
11809da93eceSJakub Kicinski err = cops->graft(parent, cl, new, &old, extack);
11819da93eceSJakub Kicinski if (err)
11829da93eceSJakub Kicinski return err;
11830349b877SHangbin Liu notify_and_destroy(net, skb, n, classid, old, new, extack);
11841da177e4SLinus Torvalds }
11859da93eceSJakub Kicinski return 0;
11861da177e4SLinus Torvalds }
11871da177e4SLinus Torvalds
qdisc_block_indexes_set(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1188d47a6b0eSJiri Pirko static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1189d47a6b0eSJiri Pirko struct netlink_ext_ack *extack)
1190d47a6b0eSJiri Pirko {
1191d47a6b0eSJiri Pirko u32 block_index;
1192d47a6b0eSJiri Pirko
1193d47a6b0eSJiri Pirko if (tca[TCA_INGRESS_BLOCK]) {
1194d47a6b0eSJiri Pirko block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1195d47a6b0eSJiri Pirko
1196d47a6b0eSJiri Pirko if (!block_index) {
1197d47a6b0eSJiri Pirko NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1198d47a6b0eSJiri Pirko return -EINVAL;
1199d47a6b0eSJiri Pirko }
1200d47a6b0eSJiri Pirko if (!sch->ops->ingress_block_set) {
1201d47a6b0eSJiri Pirko NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1202d47a6b0eSJiri Pirko return -EOPNOTSUPP;
1203d47a6b0eSJiri Pirko }
1204d47a6b0eSJiri Pirko sch->ops->ingress_block_set(sch, block_index);
1205d47a6b0eSJiri Pirko }
1206d47a6b0eSJiri Pirko if (tca[TCA_EGRESS_BLOCK]) {
1207d47a6b0eSJiri Pirko block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1208d47a6b0eSJiri Pirko
1209d47a6b0eSJiri Pirko if (!block_index) {
1210d47a6b0eSJiri Pirko NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1211d47a6b0eSJiri Pirko return -EINVAL;
1212d47a6b0eSJiri Pirko }
1213d47a6b0eSJiri Pirko if (!sch->ops->egress_block_set) {
1214d47a6b0eSJiri Pirko NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1215d47a6b0eSJiri Pirko return -EOPNOTSUPP;
1216d47a6b0eSJiri Pirko }
1217d47a6b0eSJiri Pirko sch->ops->egress_block_set(sch, block_index);
1218d47a6b0eSJiri Pirko }
1219d47a6b0eSJiri Pirko return 0;
1220d47a6b0eSJiri Pirko }
1221d47a6b0eSJiri Pirko
12221da177e4SLinus Torvalds /*
12231da177e4SLinus Torvalds Allocate and initialize new qdisc.
12241da177e4SLinus Torvalds
12251da177e4SLinus Torvalds Parameters are passed via opt.
12261da177e4SLinus Torvalds */
12271da177e4SLinus Torvalds
qdisc_create(struct net_device * dev,struct netdev_queue * dev_queue,u32 parent,u32 handle,struct nlattr ** tca,int * errp,struct netlink_ext_ack * extack)12285a7a5555SJamal Hadi Salim static struct Qdisc *qdisc_create(struct net_device *dev,
12295a7a5555SJamal Hadi Salim struct netdev_queue *dev_queue,
1230cfc111d5SZhengchao Shao u32 parent, u32 handle,
123109215598SAlexander Aring struct nlattr **tca, int *errp,
123209215598SAlexander Aring struct netlink_ext_ack *extack)
12331da177e4SLinus Torvalds {
12341da177e4SLinus Torvalds int err;
12351e90474cSPatrick McHardy struct nlattr *kind = tca[TCA_KIND];
12361da177e4SLinus Torvalds struct Qdisc *sch;
12371da177e4SLinus Torvalds struct Qdisc_ops *ops;
1238175f9c1bSJussi Kivilinna struct qdisc_size_table *stab;
12391da177e4SLinus Torvalds
12401da177e4SLinus Torvalds ops = qdisc_lookup_ops(kind);
124195a5afcaSJohannes Berg #ifdef CONFIG_MODULES
12421da177e4SLinus Torvalds if (ops == NULL && kind != NULL) {
12431da177e4SLinus Torvalds char name[IFNAMSIZ];
1244872f6903SFrancis Laniel if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
12451da177e4SLinus Torvalds /* We dropped the RTNL semaphore in order to
12461da177e4SLinus Torvalds * perform the module load. So, even if we
12471da177e4SLinus Torvalds * succeeded in loading the module we have to
12481da177e4SLinus Torvalds * tell the caller to replay the request. We
12491da177e4SLinus Torvalds * indicate this using -EAGAIN.
12501da177e4SLinus Torvalds * We replay the request because the device may
12511da177e4SLinus Torvalds * go away in the mean time.
12521da177e4SLinus Torvalds */
12531da177e4SLinus Torvalds rtnl_unlock();
12541da177e4SLinus Torvalds request_module("sch_%s", name);
12551da177e4SLinus Torvalds rtnl_lock();
12561da177e4SLinus Torvalds ops = qdisc_lookup_ops(kind);
12571da177e4SLinus Torvalds if (ops != NULL) {
12581da177e4SLinus Torvalds /* We will try again qdisc_lookup_ops,
12591da177e4SLinus Torvalds * so don't keep a reference.
12601da177e4SLinus Torvalds */
12611da177e4SLinus Torvalds module_put(ops->owner);
12621da177e4SLinus Torvalds err = -EAGAIN;
12631da177e4SLinus Torvalds goto err_out;
12641da177e4SLinus Torvalds }
12651da177e4SLinus Torvalds }
12661da177e4SLinus Torvalds }
12671da177e4SLinus Torvalds #endif
12681da177e4SLinus Torvalds
1269b9e2cc0fSJamal Hadi Salim err = -ENOENT;
127009215598SAlexander Aring if (!ops) {
1271973bf8fdSVictor Nogueira NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
12721da177e4SLinus Torvalds goto err_out;
127309215598SAlexander Aring }
12741da177e4SLinus Torvalds
1275d0bd684dSAlexander Aring sch = qdisc_alloc(dev_queue, ops, extack);
12763d54b82fSThomas Graf if (IS_ERR(sch)) {
12773d54b82fSThomas Graf err = PTR_ERR(sch);
12781da177e4SLinus Torvalds goto err_out2;
12793d54b82fSThomas Graf }
12801da177e4SLinus Torvalds
1281ffc8fefaSPatrick McHardy sch->parent = parent;
1282ffc8fefaSPatrick McHardy
12833d54b82fSThomas Graf if (handle == TC_H_INGRESS) {
1284f85fa45dSPeilin Ye if (!(sch->flags & TCQ_F_INGRESS)) {
1285f85fa45dSPeilin Ye NL_SET_ERR_MSG(extack,
1286f85fa45dSPeilin Ye "Specified parent ID is reserved for ingress and clsact Qdiscs");
1287f85fa45dSPeilin Ye err = -EINVAL;
1288f85fa45dSPeilin Ye goto err_out3;
1289f85fa45dSPeilin Ye }
12903d54b82fSThomas Graf handle = TC_H_MAKE(TC_H_INGRESS, 0);
1291fd44de7cSPatrick McHardy } else {
1292fd44de7cSPatrick McHardy if (handle == 0) {
12931da177e4SLinus Torvalds handle = qdisc_alloc_handle(dev);
1294aaeb1deaSIvan Vecera if (handle == 0) {
1295aaeb1deaSIvan Vecera NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1296aaeb1deaSIvan Vecera err = -ENOSPC;
12971da177e4SLinus Torvalds goto err_out3;
12981da177e4SLinus Torvalds }
1299aaeb1deaSIvan Vecera }
13001abbe139SEric Dumazet if (!netif_is_multiqueue(dev))
1301225734deSEric Dumazet sch->flags |= TCQ_F_ONETXQUEUE;
1302fd44de7cSPatrick McHardy }
13031da177e4SLinus Torvalds
13041da177e4SLinus Torvalds sch->handle = handle;
13051da177e4SLinus Torvalds
130684c46dd8SJesper Dangaard Brouer /* This exist to keep backward compatible with a userspace
130784c46dd8SJesper Dangaard Brouer * loophole, what allowed userspace to get IFF_NO_QUEUE
130884c46dd8SJesper Dangaard Brouer * facility on older kernels by setting tx_queue_len=0 (prior
130984c46dd8SJesper Dangaard Brouer * to qdisc init), and then forgot to reinit tx_queue_len
131084c46dd8SJesper Dangaard Brouer * before again attaching a qdisc.
131184c46dd8SJesper Dangaard Brouer */
131284c46dd8SJesper Dangaard Brouer if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
131384c46dd8SJesper Dangaard Brouer dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
131484c46dd8SJesper Dangaard Brouer netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
131584c46dd8SJesper Dangaard Brouer }
131684c46dd8SJesper Dangaard Brouer
1317d47a6b0eSJiri Pirko err = qdisc_block_indexes_set(sch, tca, extack);
1318d47a6b0eSJiri Pirko if (err)
1319d47a6b0eSJiri Pirko goto err_out3;
1320d47a6b0eSJiri Pirko
1321175f9c1bSJussi Kivilinna if (tca[TCA_STAB]) {
132209215598SAlexander Aring stab = qdisc_get_stab(tca[TCA_STAB], extack);
1323175f9c1bSJussi Kivilinna if (IS_ERR(stab)) {
1324175f9c1bSJussi Kivilinna err = PTR_ERR(stab);
13254fab6412SEric Dumazet goto err_out3;
1326175f9c1bSJussi Kivilinna }
1327a2da570dSEric Dumazet rcu_assign_pointer(sch->stab, stab);
1328175f9c1bSJussi Kivilinna }
13291f62879eSVladimir Oltean
13301f62879eSVladimir Oltean if (ops->init) {
13311f62879eSVladimir Oltean err = ops->init(sch, tca[TCA_OPTIONS], extack);
13321f62879eSVladimir Oltean if (err != 0)
13334fab6412SEric Dumazet goto err_out4;
13341f62879eSVladimir Oltean }
13351f62879eSVladimir Oltean
13361e90474cSPatrick McHardy if (tca[TCA_RATE]) {
133723bcf634SPatrick McHardy err = -EOPNOTSUPP;
133809215598SAlexander Aring if (sch->flags & TCQ_F_MQROOT) {
133909215598SAlexander Aring NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
13404fab6412SEric Dumazet goto err_out4;
134109215598SAlexander Aring }
134223bcf634SPatrick McHardy
134322e0f8b9SJohn Fastabend err = gen_new_estimator(&sch->bstats,
134422e0f8b9SJohn Fastabend sch->cpu_bstats,
134522e0f8b9SJohn Fastabend &sch->rate_est,
1346edb09eb1SEric Dumazet NULL,
134729cbcd85SAhmed S. Darwish true,
134822e0f8b9SJohn Fastabend tca[TCA_RATE]);
134909215598SAlexander Aring if (err) {
135009215598SAlexander Aring NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
13514fab6412SEric Dumazet goto err_out4;
1352023e09a7SThomas Graf }
135309215598SAlexander Aring }
1354f6e0b239SJarek Poplawski
135549b49971SJiri Kosina qdisc_hash_add(sch, false);
1356f5a7833eSCong Wang trace_qdisc_create(ops, dev, parent);
13571da177e4SLinus Torvalds
13581da177e4SLinus Torvalds return sch;
135954160ef6SAlexander Aring
13601f62879eSVladimir Oltean err_out4:
13614fab6412SEric Dumazet /* Even if ops->init() failed, we call ops->destroy()
13624fab6412SEric Dumazet * like qdisc_create_dflt().
13634fab6412SEric Dumazet */
1364c1a4872eSGao Feng if (ops->destroy)
136587b60cfaSEric Dumazet ops->destroy(sch);
13664fab6412SEric Dumazet qdisc_put_stab(rtnl_dereference(sch->stab));
13671da177e4SLinus Torvalds err_out3:
1368a0893693SDavide Caratti lockdep_unregister_key(&sch->root_lock_key);
1369d62607c3SJakub Kicinski netdev_put(dev, &sch->dev_tracker);
137081d947e2SDaniel Borkmann qdisc_free(sch);
13711da177e4SLinus Torvalds err_out2:
13721da177e4SLinus Torvalds module_put(ops->owner);
13731da177e4SLinus Torvalds err_out:
13741da177e4SLinus Torvalds *errp = err;
13751da177e4SLinus Torvalds return NULL;
13761da177e4SLinus Torvalds }
13771da177e4SLinus Torvalds
qdisc_change(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)137809215598SAlexander Aring static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
137909215598SAlexander Aring struct netlink_ext_ack *extack)
13801da177e4SLinus Torvalds {
1381a2da570dSEric Dumazet struct qdisc_size_table *ostab, *stab = NULL;
1382175f9c1bSJussi Kivilinna int err = 0;
13831da177e4SLinus Torvalds
1384175f9c1bSJussi Kivilinna if (tca[TCA_OPTIONS]) {
138509215598SAlexander Aring if (!sch->ops->change) {
138609215598SAlexander Aring NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
13871da177e4SLinus Torvalds return -EINVAL;
138809215598SAlexander Aring }
1389d47a6b0eSJiri Pirko if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1390d47a6b0eSJiri Pirko NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1391d47a6b0eSJiri Pirko return -EOPNOTSUPP;
1392d47a6b0eSJiri Pirko }
13932030721cSAlexander Aring err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
13941da177e4SLinus Torvalds if (err)
13951da177e4SLinus Torvalds return err;
13961da177e4SLinus Torvalds }
1397175f9c1bSJussi Kivilinna
1398175f9c1bSJussi Kivilinna if (tca[TCA_STAB]) {
139909215598SAlexander Aring stab = qdisc_get_stab(tca[TCA_STAB], extack);
1400175f9c1bSJussi Kivilinna if (IS_ERR(stab))
1401175f9c1bSJussi Kivilinna return PTR_ERR(stab);
1402175f9c1bSJussi Kivilinna }
1403175f9c1bSJussi Kivilinna
1404a2da570dSEric Dumazet ostab = rtnl_dereference(sch->stab);
1405a2da570dSEric Dumazet rcu_assign_pointer(sch->stab, stab);
1406a2da570dSEric Dumazet qdisc_put_stab(ostab);
1407175f9c1bSJussi Kivilinna
140823bcf634SPatrick McHardy if (tca[TCA_RATE]) {
140971bcb09aSStephen Hemminger /* NB: ignores errors from replace_estimator
141071bcb09aSStephen Hemminger because change can't be undone. */
141123bcf634SPatrick McHardy if (sch->flags & TCQ_F_MQROOT)
141223bcf634SPatrick McHardy goto out;
141322e0f8b9SJohn Fastabend gen_replace_estimator(&sch->bstats,
141422e0f8b9SJohn Fastabend sch->cpu_bstats,
141522e0f8b9SJohn Fastabend &sch->rate_est,
1416edb09eb1SEric Dumazet NULL,
141729cbcd85SAhmed S. Darwish true,
1418f6f9b93fSJarek Poplawski tca[TCA_RATE]);
141923bcf634SPatrick McHardy }
142023bcf634SPatrick McHardy out:
14211da177e4SLinus Torvalds return 0;
14221da177e4SLinus Torvalds }
14231da177e4SLinus Torvalds
1424cc7ec456SEric Dumazet struct check_loop_arg {
14251da177e4SLinus Torvalds struct qdisc_walker w;
14261da177e4SLinus Torvalds struct Qdisc *p;
14271da177e4SLinus Torvalds int depth;
14281da177e4SLinus Torvalds };
14291da177e4SLinus Torvalds
14305a7a5555SJamal Hadi Salim static int check_loop_fn(struct Qdisc *q, unsigned long cl,
14315a7a5555SJamal Hadi Salim struct qdisc_walker *w);
14321da177e4SLinus Torvalds
check_loop(struct Qdisc * q,struct Qdisc * p,int depth)14331da177e4SLinus Torvalds static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
14341da177e4SLinus Torvalds {
14351da177e4SLinus Torvalds struct check_loop_arg arg;
14361da177e4SLinus Torvalds
14371da177e4SLinus Torvalds if (q->ops->cl_ops == NULL)
14381da177e4SLinus Torvalds return 0;
14391da177e4SLinus Torvalds
14401da177e4SLinus Torvalds arg.w.stop = arg.w.skip = arg.w.count = 0;
14411da177e4SLinus Torvalds arg.w.fn = check_loop_fn;
14421da177e4SLinus Torvalds arg.depth = depth;
14431da177e4SLinus Torvalds arg.p = p;
14441da177e4SLinus Torvalds q->ops->cl_ops->walk(q, &arg.w);
14451da177e4SLinus Torvalds return arg.w.stop ? -ELOOP : 0;
14461da177e4SLinus Torvalds }
14471da177e4SLinus Torvalds
14481da177e4SLinus Torvalds static int
check_loop_fn(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)14491da177e4SLinus Torvalds check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
14501da177e4SLinus Torvalds {
14511da177e4SLinus Torvalds struct Qdisc *leaf;
145220fea08bSEric Dumazet const struct Qdisc_class_ops *cops = q->ops->cl_ops;
14531da177e4SLinus Torvalds struct check_loop_arg *arg = (struct check_loop_arg *)w;
14541da177e4SLinus Torvalds
14551da177e4SLinus Torvalds leaf = cops->leaf(q, cl);
14561da177e4SLinus Torvalds if (leaf) {
14571da177e4SLinus Torvalds if (leaf == arg->p || arg->depth > 7)
14581da177e4SLinus Torvalds return -ELOOP;
14591da177e4SLinus Torvalds return check_loop(leaf, arg->p, arg->depth + 1);
14601da177e4SLinus Torvalds }
14611da177e4SLinus Torvalds return 0;
14621da177e4SLinus Torvalds }
14631da177e4SLinus Torvalds
14648b4c3cddSDavid Ahern const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
14656f96c3c6SCong Wang [TCA_KIND] = { .type = NLA_STRING },
14668b4c3cddSDavid Ahern [TCA_RATE] = { .type = NLA_BINARY,
14678b4c3cddSDavid Ahern .len = sizeof(struct tc_estimator) },
14688b4c3cddSDavid Ahern [TCA_STAB] = { .type = NLA_NESTED },
14698b4c3cddSDavid Ahern [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
14708b4c3cddSDavid Ahern [TCA_CHAIN] = { .type = NLA_U32 },
14718b4c3cddSDavid Ahern [TCA_INGRESS_BLOCK] = { .type = NLA_U32 },
14728b4c3cddSDavid Ahern [TCA_EGRESS_BLOCK] = { .type = NLA_U32 },
14738b4c3cddSDavid Ahern };
14748b4c3cddSDavid Ahern
1475e331473fSDavide Caratti /*
1476e331473fSDavide Caratti * Delete/get qdisc.
1477e331473fSDavide Caratti */
1478e331473fSDavide Caratti
tc_get_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1479c21ef3e3SDavid Ahern static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1480c21ef3e3SDavid Ahern struct netlink_ext_ack *extack)
14811da177e4SLinus Torvalds {
14823b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(skb->sk);
148302ef22caSDavid S. Miller struct tcmsg *tcm = nlmsg_data(n);
14841e90474cSPatrick McHardy struct nlattr *tca[TCA_MAX + 1];
14851da177e4SLinus Torvalds struct net_device *dev;
1486de179c8cSHong zhi guo u32 clid;
14871da177e4SLinus Torvalds struct Qdisc *q = NULL;
14881da177e4SLinus Torvalds struct Qdisc *p = NULL;
14891da177e4SLinus Torvalds int err;
14901da177e4SLinus Torvalds
14918cb08174SJohannes Berg err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
14928cb08174SJohannes Berg rtm_tca_policy, extack);
14931e90474cSPatrick McHardy if (err < 0)
14941e90474cSPatrick McHardy return err;
14951e90474cSPatrick McHardy
1496de179c8cSHong zhi guo dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1497de179c8cSHong zhi guo if (!dev)
1498de179c8cSHong zhi guo return -ENODEV;
1499de179c8cSHong zhi guo
1500de179c8cSHong zhi guo clid = tcm->tcm_parent;
15011da177e4SLinus Torvalds if (clid) {
15021da177e4SLinus Torvalds if (clid != TC_H_ROOT) {
15031da177e4SLinus Torvalds if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1504cc7ec456SEric Dumazet p = qdisc_lookup(dev, TC_H_MAJ(clid));
150509215598SAlexander Aring if (!p) {
150609215598SAlexander Aring NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
15071da177e4SLinus Torvalds return -ENOENT;
150809215598SAlexander Aring }
15091da177e4SLinus Torvalds q = qdisc_leaf(p, clid);
1510cc7ec456SEric Dumazet } else if (dev_ingress_queue(dev)) {
1511d636fc5dSEric Dumazet q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
15121da177e4SLinus Torvalds }
15131da177e4SLinus Torvalds } else {
15145891cd5eSEric Dumazet q = rtnl_dereference(dev->qdisc);
15151da177e4SLinus Torvalds }
151609215598SAlexander Aring if (!q) {
151709215598SAlexander Aring NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
15181da177e4SLinus Torvalds return -ENOENT;
15191da177e4SLinus Torvalds }
15201da177e4SLinus Torvalds
152109215598SAlexander Aring if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
152209215598SAlexander Aring NL_SET_ERR_MSG(extack, "Invalid handle");
15231da177e4SLinus Torvalds return -EINVAL;
152409215598SAlexander Aring }
152509215598SAlexander Aring } else {
152609215598SAlexander Aring q = qdisc_lookup(dev, tcm->tcm_handle);
152709215598SAlexander Aring if (!q) {
152809215598SAlexander Aring NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
152909215598SAlexander Aring return -ENOENT;
153009215598SAlexander Aring }
153109215598SAlexander Aring }
153209215598SAlexander Aring
153309215598SAlexander Aring if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
153409215598SAlexander Aring NL_SET_ERR_MSG(extack, "Invalid qdisc name");
153509215598SAlexander Aring return -EINVAL;
153609215598SAlexander Aring }
15371da177e4SLinus Torvalds
15381da177e4SLinus Torvalds if (n->nlmsg_type == RTM_DELQDISC) {
153909215598SAlexander Aring if (!clid) {
154009215598SAlexander Aring NL_SET_ERR_MSG(extack, "Classid cannot be zero");
15411da177e4SLinus Torvalds return -EINVAL;
154209215598SAlexander Aring }
154309215598SAlexander Aring if (q->handle == 0) {
154409215598SAlexander Aring NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
15451da177e4SLinus Torvalds return -ENOENT;
154609215598SAlexander Aring }
154709215598SAlexander Aring err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1548cc7ec456SEric Dumazet if (err != 0)
15491da177e4SLinus Torvalds return err;
15501da177e4SLinus Torvalds } else {
15510349b877SHangbin Liu qdisc_notify(net, skb, n, clid, NULL, q, NULL);
15521da177e4SLinus Torvalds }
15531da177e4SLinus Torvalds return 0;
15541da177e4SLinus Torvalds }
15551da177e4SLinus Torvalds
req_create_or_replace(struct nlmsghdr * n)1556da71714eSJamal Hadi Salim static bool req_create_or_replace(struct nlmsghdr *n)
1557da71714eSJamal Hadi Salim {
1558da71714eSJamal Hadi Salim return (n->nlmsg_flags & NLM_F_CREATE &&
1559da71714eSJamal Hadi Salim n->nlmsg_flags & NLM_F_REPLACE);
1560da71714eSJamal Hadi Salim }
1561da71714eSJamal Hadi Salim
req_create_exclusive(struct nlmsghdr * n)1562da71714eSJamal Hadi Salim static bool req_create_exclusive(struct nlmsghdr *n)
1563da71714eSJamal Hadi Salim {
1564da71714eSJamal Hadi Salim return (n->nlmsg_flags & NLM_F_CREATE &&
1565da71714eSJamal Hadi Salim n->nlmsg_flags & NLM_F_EXCL);
1566da71714eSJamal Hadi Salim }
1567da71714eSJamal Hadi Salim
req_change(struct nlmsghdr * n)1568da71714eSJamal Hadi Salim static bool req_change(struct nlmsghdr *n)
1569da71714eSJamal Hadi Salim {
1570da71714eSJamal Hadi Salim return (!(n->nlmsg_flags & NLM_F_CREATE) &&
1571da71714eSJamal Hadi Salim !(n->nlmsg_flags & NLM_F_REPLACE) &&
1572da71714eSJamal Hadi Salim !(n->nlmsg_flags & NLM_F_EXCL));
1573da71714eSJamal Hadi Salim }
1574da71714eSJamal Hadi Salim
15751da177e4SLinus Torvalds /*
1576cc7ec456SEric Dumazet * Create/change qdisc.
15771da177e4SLinus Torvalds */
tc_modify_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1578c21ef3e3SDavid Ahern static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1579c21ef3e3SDavid Ahern struct netlink_ext_ack *extack)
15801da177e4SLinus Torvalds {
15813b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(skb->sk);
15821da177e4SLinus Torvalds struct tcmsg *tcm;
15831e90474cSPatrick McHardy struct nlattr *tca[TCA_MAX + 1];
15841da177e4SLinus Torvalds struct net_device *dev;
15851da177e4SLinus Torvalds u32 clid;
15861da177e4SLinus Torvalds struct Qdisc *q, *p;
15871da177e4SLinus Torvalds int err;
15881da177e4SLinus Torvalds
15891da177e4SLinus Torvalds replay:
15901da177e4SLinus Torvalds /* Reinit, just in case something touches this. */
15918cb08174SJohannes Berg err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
15928cb08174SJohannes Berg rtm_tca_policy, extack);
1593de179c8cSHong zhi guo if (err < 0)
1594de179c8cSHong zhi guo return err;
1595de179c8cSHong zhi guo
159602ef22caSDavid S. Miller tcm = nlmsg_data(n);
15971da177e4SLinus Torvalds clid = tcm->tcm_parent;
15981da177e4SLinus Torvalds q = p = NULL;
15991da177e4SLinus Torvalds
1600cc7ec456SEric Dumazet dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1601cc7ec456SEric Dumazet if (!dev)
16021da177e4SLinus Torvalds return -ENODEV;
16031da177e4SLinus Torvalds
16041e90474cSPatrick McHardy
16051da177e4SLinus Torvalds if (clid) {
16061da177e4SLinus Torvalds if (clid != TC_H_ROOT) {
16071da177e4SLinus Torvalds if (clid != TC_H_INGRESS) {
1608cc7ec456SEric Dumazet p = qdisc_lookup(dev, TC_H_MAJ(clid));
160909215598SAlexander Aring if (!p) {
161009215598SAlexander Aring NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
16111da177e4SLinus Torvalds return -ENOENT;
161209215598SAlexander Aring }
16131da177e4SLinus Torvalds q = qdisc_leaf(p, clid);
1614cc7ec456SEric Dumazet } else if (dev_ingress_queue_create(dev)) {
1615d636fc5dSEric Dumazet q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
16161da177e4SLinus Torvalds }
16171da177e4SLinus Torvalds } else {
16185891cd5eSEric Dumazet q = rtnl_dereference(dev->qdisc);
16191da177e4SLinus Torvalds }
16201da177e4SLinus Torvalds
16211da177e4SLinus Torvalds /* It may be default qdisc, ignore it */
16221da177e4SLinus Torvalds if (q && q->handle == 0)
16231da177e4SLinus Torvalds q = NULL;
16241da177e4SLinus Torvalds
16251da177e4SLinus Torvalds if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
16261da177e4SLinus Torvalds if (tcm->tcm_handle) {
162709215598SAlexander Aring if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
162809215598SAlexander Aring NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
16291da177e4SLinus Torvalds return -EEXIST;
163009215598SAlexander Aring }
163109215598SAlexander Aring if (TC_H_MIN(tcm->tcm_handle)) {
163209215598SAlexander Aring NL_SET_ERR_MSG(extack, "Invalid minor handle");
16331da177e4SLinus Torvalds return -EINVAL;
163409215598SAlexander Aring }
1635cc7ec456SEric Dumazet q = qdisc_lookup(dev, tcm->tcm_handle);
16368ec69574SJiri Pirko if (!q)
16371da177e4SLinus Torvalds goto create_n_graft;
16387e2bd8c1SJamal Hadi Salim if (q->parent != tcm->tcm_parent) {
16397e2bd8c1SJamal Hadi Salim NL_SET_ERR_MSG(extack, "Cannot move an existing qdisc to a different parent");
16407e2bd8c1SJamal Hadi Salim return -EINVAL;
16417e2bd8c1SJamal Hadi Salim }
164209215598SAlexander Aring if (n->nlmsg_flags & NLM_F_EXCL) {
164309215598SAlexander Aring NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
16441da177e4SLinus Torvalds return -EEXIST;
164509215598SAlexander Aring }
16460ac4bd68SAlexander Aring if (tca[TCA_KIND] &&
164709215598SAlexander Aring nla_strcmp(tca[TCA_KIND], q->ops->id)) {
164809215598SAlexander Aring NL_SET_ERR_MSG(extack, "Invalid qdisc name");
16491da177e4SLinus Torvalds return -EINVAL;
165009215598SAlexander Aring }
16519de95df5SPeilin Ye if (q->flags & TCQ_F_INGRESS) {
16529de95df5SPeilin Ye NL_SET_ERR_MSG(extack,
16539de95df5SPeilin Ye "Cannot regraft ingress or clsact Qdiscs");
16549de95df5SPeilin Ye return -EINVAL;
16559de95df5SPeilin Ye }
16561da177e4SLinus Torvalds if (q == p ||
165709215598SAlexander Aring (p && check_loop(q, p, 0))) {
165809215598SAlexander Aring NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
16591da177e4SLinus Torvalds return -ELOOP;
166009215598SAlexander Aring }
166136eec020SZhengchao Shao if (clid == TC_H_INGRESS) {
166236eec020SZhengchao Shao NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
166336eec020SZhengchao Shao return -EINVAL;
166436eec020SZhengchao Shao }
1665551143d8SEric Dumazet qdisc_refcount_inc(q);
16661da177e4SLinus Torvalds goto graft;
16671da177e4SLinus Torvalds } else {
1668cc7ec456SEric Dumazet if (!q)
16691da177e4SLinus Torvalds goto create_n_graft;
16701da177e4SLinus Torvalds
16711da177e4SLinus Torvalds /* This magic test requires explanation.
16721da177e4SLinus Torvalds *
16731da177e4SLinus Torvalds * We know, that some child q is already
16741da177e4SLinus Torvalds * attached to this parent and have choice:
1675da71714eSJamal Hadi Salim * 1) change it or 2) create/graft new one.
1676da71714eSJamal Hadi Salim * If the requested qdisc kind is different
1677da71714eSJamal Hadi Salim * than the existing one, then we choose graft.
1678da71714eSJamal Hadi Salim * If they are the same then this is "change"
1679da71714eSJamal Hadi Salim * operation - just let it fallthrough..
16801da177e4SLinus Torvalds *
16811da177e4SLinus Torvalds * 1. We are allowed to create/graft only
1682da71714eSJamal Hadi Salim * if the request is explicitly stating
1683da71714eSJamal Hadi Salim * "please create if it doesn't exist".
16841da177e4SLinus Torvalds *
1685da71714eSJamal Hadi Salim * 2. If the request is to exclusive create
1686da71714eSJamal Hadi Salim * then the qdisc tcm_handle is not expected
16871da177e4SLinus Torvalds * to exist, so that we choose create/graft too.
16881da177e4SLinus Torvalds *
16891da177e4SLinus Torvalds * 3. The last case is when no flags are set.
1690da71714eSJamal Hadi Salim * This will happen when for example tc
1691da71714eSJamal Hadi Salim * utility issues a "change" command.
16921da177e4SLinus Torvalds * Alas, it is sort of hole in API, we
16931da177e4SLinus Torvalds * cannot decide what to do unambiguously.
1694da71714eSJamal Hadi Salim * For now we select create/graft.
16951da177e4SLinus Torvalds */
1696da71714eSJamal Hadi Salim if (tca[TCA_KIND] &&
1697da71714eSJamal Hadi Salim nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1698da71714eSJamal Hadi Salim if (req_create_or_replace(n) ||
1699da71714eSJamal Hadi Salim req_create_exclusive(n))
17001da177e4SLinus Torvalds goto create_n_graft;
1701da71714eSJamal Hadi Salim else if (req_change(n))
1702da71714eSJamal Hadi Salim goto create_n_graft2;
1703da71714eSJamal Hadi Salim }
17041da177e4SLinus Torvalds }
17051da177e4SLinus Torvalds }
17061da177e4SLinus Torvalds } else {
170709215598SAlexander Aring if (!tcm->tcm_handle) {
170809215598SAlexander Aring NL_SET_ERR_MSG(extack, "Handle cannot be zero");
17091da177e4SLinus Torvalds return -EINVAL;
171009215598SAlexander Aring }
17111da177e4SLinus Torvalds q = qdisc_lookup(dev, tcm->tcm_handle);
17121da177e4SLinus Torvalds }
17131da177e4SLinus Torvalds
17141da177e4SLinus Torvalds /* Change qdisc parameters */
171509215598SAlexander Aring if (!q) {
171609215598SAlexander Aring NL_SET_ERR_MSG(extack, "Specified qdisc not found");
17171da177e4SLinus Torvalds return -ENOENT;
171809215598SAlexander Aring }
171909215598SAlexander Aring if (n->nlmsg_flags & NLM_F_EXCL) {
172009215598SAlexander Aring NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
17211da177e4SLinus Torvalds return -EEXIST;
172209215598SAlexander Aring }
172309215598SAlexander Aring if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
172409215598SAlexander Aring NL_SET_ERR_MSG(extack, "Invalid qdisc name");
17251da177e4SLinus Torvalds return -EINVAL;
172609215598SAlexander Aring }
172709215598SAlexander Aring err = qdisc_change(q, tca, extack);
17281da177e4SLinus Torvalds if (err == 0)
17290349b877SHangbin Liu qdisc_notify(net, skb, n, clid, NULL, q, extack);
17301da177e4SLinus Torvalds return err;
17311da177e4SLinus Torvalds
17321da177e4SLinus Torvalds create_n_graft:
173309215598SAlexander Aring if (!(n->nlmsg_flags & NLM_F_CREATE)) {
173409215598SAlexander Aring NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
17351da177e4SLinus Torvalds return -ENOENT;
173609215598SAlexander Aring }
1737da71714eSJamal Hadi Salim create_n_graft2:
173824824a09SEric Dumazet if (clid == TC_H_INGRESS) {
173909215598SAlexander Aring if (dev_ingress_queue(dev)) {
1740cfc111d5SZhengchao Shao q = qdisc_create(dev, dev_ingress_queue(dev),
1741bb949fbdSDavid S. Miller tcm->tcm_parent, tcm->tcm_parent,
174209215598SAlexander Aring tca, &err, extack);
174309215598SAlexander Aring } else {
174409215598SAlexander Aring NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
174524824a09SEric Dumazet err = -ENOENT;
174609215598SAlexander Aring }
174724824a09SEric Dumazet } else {
1748926e61b7SJarek Poplawski struct netdev_queue *dev_queue;
17496ec1c69aSDavid S. Miller
17506ec1c69aSDavid S. Miller if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1751926e61b7SJarek Poplawski dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1752926e61b7SJarek Poplawski else if (p)
1753926e61b7SJarek Poplawski dev_queue = p->dev_queue;
1754926e61b7SJarek Poplawski else
1755926e61b7SJarek Poplawski dev_queue = netdev_get_tx_queue(dev, 0);
17566ec1c69aSDavid S. Miller
1757cfc111d5SZhengchao Shao q = qdisc_create(dev, dev_queue,
1758bb949fbdSDavid S. Miller tcm->tcm_parent, tcm->tcm_handle,
175909215598SAlexander Aring tca, &err, extack);
17606ec1c69aSDavid S. Miller }
17611da177e4SLinus Torvalds if (q == NULL) {
17621da177e4SLinus Torvalds if (err == -EAGAIN)
17631da177e4SLinus Torvalds goto replay;
17641da177e4SLinus Torvalds return err;
17651da177e4SLinus Torvalds }
17661da177e4SLinus Torvalds
17671da177e4SLinus Torvalds graft:
176809215598SAlexander Aring err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
17691da177e4SLinus Torvalds if (err) {
17704d8863a2SDavid S. Miller if (q)
177186bd446bSVlad Buslov qdisc_put(q);
17721da177e4SLinus Torvalds return err;
17731da177e4SLinus Torvalds }
1774e5befbd9SIlpo Järvinen
17751da177e4SLinus Torvalds return 0;
17761da177e4SLinus Torvalds }
17771da177e4SLinus Torvalds
tc_dump_qdisc_root(struct Qdisc * root,struct sk_buff * skb,struct netlink_callback * cb,int * q_idx_p,int s_q_idx,bool recur,bool dump_invisible)177830723673SDavid S. Miller static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
177930723673SDavid S. Miller struct netlink_callback *cb,
178049b49971SJiri Kosina int *q_idx_p, int s_q_idx, bool recur,
178149b49971SJiri Kosina bool dump_invisible)
178230723673SDavid S. Miller {
178330723673SDavid S. Miller int ret = 0, q_idx = *q_idx_p;
178430723673SDavid S. Miller struct Qdisc *q;
178559cc1f61SJiri Kosina int b;
178630723673SDavid S. Miller
178730723673SDavid S. Miller if (!root)
178830723673SDavid S. Miller return 0;
178930723673SDavid S. Miller
179030723673SDavid S. Miller q = root;
179130723673SDavid S. Miller if (q_idx < s_q_idx) {
179230723673SDavid S. Miller q_idx++;
179330723673SDavid S. Miller } else {
179449b49971SJiri Kosina if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
179515e47304SEric W. Biederman tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
17965a7a5555SJamal Hadi Salim cb->nlh->nlmsg_seq, NLM_F_MULTI,
17970349b877SHangbin Liu RTM_NEWQDISC, NULL) <= 0)
179830723673SDavid S. Miller goto done;
179930723673SDavid S. Miller q_idx++;
180030723673SDavid S. Miller }
180169012ae4SJiri Kosina
1802ea327469SJiri Kosina /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1803ea327469SJiri Kosina * itself has already been dumped.
1804ea327469SJiri Kosina *
1805ea327469SJiri Kosina * If we've already dumped the top-level (ingress) qdisc above and the global
1806ea327469SJiri Kosina * qdisc hashtable, we don't want to hit it again
1807ea327469SJiri Kosina */
1808ea327469SJiri Kosina if (!qdisc_dev(root) || !recur)
180969012ae4SJiri Kosina goto out;
181069012ae4SJiri Kosina
181159cc1f61SJiri Kosina hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
181230723673SDavid S. Miller if (q_idx < s_q_idx) {
181330723673SDavid S. Miller q_idx++;
181430723673SDavid S. Miller continue;
181530723673SDavid S. Miller }
181649b49971SJiri Kosina if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
181715e47304SEric W. Biederman tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
18185a7a5555SJamal Hadi Salim cb->nlh->nlmsg_seq, NLM_F_MULTI,
18190349b877SHangbin Liu RTM_NEWQDISC, NULL) <= 0)
182030723673SDavid S. Miller goto done;
182130723673SDavid S. Miller q_idx++;
182230723673SDavid S. Miller }
182330723673SDavid S. Miller
182430723673SDavid S. Miller out:
182530723673SDavid S. Miller *q_idx_p = q_idx;
182630723673SDavid S. Miller return ret;
182730723673SDavid S. Miller done:
182830723673SDavid S. Miller ret = -1;
182930723673SDavid S. Miller goto out;
183030723673SDavid S. Miller }
183130723673SDavid S. Miller
tc_dump_qdisc(struct sk_buff * skb,struct netlink_callback * cb)18321da177e4SLinus Torvalds static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
18331da177e4SLinus Torvalds {
18343b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(skb->sk);
18351da177e4SLinus Torvalds int idx, q_idx;
18361da177e4SLinus Torvalds int s_idx, s_q_idx;
18371da177e4SLinus Torvalds struct net_device *dev;
183849b49971SJiri Kosina const struct nlmsghdr *nlh = cb->nlh;
183949b49971SJiri Kosina struct nlattr *tca[TCA_MAX + 1];
184049b49971SJiri Kosina int err;
18411da177e4SLinus Torvalds
18421da177e4SLinus Torvalds s_idx = cb->args[0];
18431da177e4SLinus Torvalds s_q_idx = q_idx = cb->args[1];
1844f1e9016dSstephen hemminger
18457562f876SPavel Emelianov idx = 0;
184615dc36ebSEric Dumazet ASSERT_RTNL();
184749b49971SJiri Kosina
18488cb08174SJohannes Berg err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1849dac9c979SDavid Ahern rtm_tca_policy, cb->extack);
185049b49971SJiri Kosina if (err < 0)
185149b49971SJiri Kosina return err;
185249b49971SJiri Kosina
185315dc36ebSEric Dumazet for_each_netdev(net, dev) {
185430723673SDavid S. Miller struct netdev_queue *dev_queue;
185530723673SDavid S. Miller
18561da177e4SLinus Torvalds if (idx < s_idx)
18577562f876SPavel Emelianov goto cont;
18581da177e4SLinus Torvalds if (idx > s_idx)
18591da177e4SLinus Torvalds s_q_idx = 0;
18601da177e4SLinus Torvalds q_idx = 0;
186130723673SDavid S. Miller
18625891cd5eSEric Dumazet if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
18635891cd5eSEric Dumazet skb, cb, &q_idx, s_q_idx,
186449b49971SJiri Kosina true, tca[TCA_DUMP_INVISIBLE]) < 0)
18651da177e4SLinus Torvalds goto done;
186630723673SDavid S. Miller
186724824a09SEric Dumazet dev_queue = dev_ingress_queue(dev);
186824824a09SEric Dumazet if (dev_queue &&
1869d636fc5dSEric Dumazet tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
1870d636fc5dSEric Dumazet skb, cb, &q_idx, s_q_idx, false,
187149b49971SJiri Kosina tca[TCA_DUMP_INVISIBLE]) < 0)
187230723673SDavid S. Miller goto done;
187330723673SDavid S. Miller
18747562f876SPavel Emelianov cont:
18757562f876SPavel Emelianov idx++;
18761da177e4SLinus Torvalds }
18771da177e4SLinus Torvalds
18781da177e4SLinus Torvalds done:
18791da177e4SLinus Torvalds cb->args[0] = idx;
18801da177e4SLinus Torvalds cb->args[1] = q_idx;
18811da177e4SLinus Torvalds
18821da177e4SLinus Torvalds return skb->len;
18831da177e4SLinus Torvalds }
18841da177e4SLinus Torvalds
18851da177e4SLinus Torvalds
18861da177e4SLinus Torvalds
18871da177e4SLinus Torvalds /************************************************
18881da177e4SLinus Torvalds * Traffic classes manipulation. *
18891da177e4SLinus Torvalds ************************************************/
18901da177e4SLinus Torvalds
tc_fill_tclass(struct sk_buff * skb,struct Qdisc * q,unsigned long cl,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)189127d7f07cSWANG Cong static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
18920349b877SHangbin Liu unsigned long cl, u32 portid, u32 seq, u16 flags,
18930349b877SHangbin Liu int event, struct netlink_ext_ack *extack)
189427d7f07cSWANG Cong {
189527d7f07cSWANG Cong struct tcmsg *tcm;
189627d7f07cSWANG Cong struct nlmsghdr *nlh;
189727d7f07cSWANG Cong unsigned char *b = skb_tail_pointer(skb);
189827d7f07cSWANG Cong struct gnet_dump d;
189927d7f07cSWANG Cong const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
19001da177e4SLinus Torvalds
190127d7f07cSWANG Cong cond_resched();
190227d7f07cSWANG Cong nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
190327d7f07cSWANG Cong if (!nlh)
190427d7f07cSWANG Cong goto out_nlmsg_trim;
190527d7f07cSWANG Cong tcm = nlmsg_data(nlh);
190627d7f07cSWANG Cong tcm->tcm_family = AF_UNSPEC;
190727d7f07cSWANG Cong tcm->tcm__pad1 = 0;
190827d7f07cSWANG Cong tcm->tcm__pad2 = 0;
190927d7f07cSWANG Cong tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
191027d7f07cSWANG Cong tcm->tcm_parent = q->handle;
191127d7f07cSWANG Cong tcm->tcm_handle = q->handle;
191227d7f07cSWANG Cong tcm->tcm_info = 0;
191327d7f07cSWANG Cong if (nla_put_string(skb, TCA_KIND, q->ops->id))
191427d7f07cSWANG Cong goto nla_put_failure;
191527d7f07cSWANG Cong if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
191627d7f07cSWANG Cong goto nla_put_failure;
191727d7f07cSWANG Cong
191827d7f07cSWANG Cong if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
191927d7f07cSWANG Cong NULL, &d, TCA_PAD) < 0)
192027d7f07cSWANG Cong goto nla_put_failure;
192127d7f07cSWANG Cong
192227d7f07cSWANG Cong if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
192327d7f07cSWANG Cong goto nla_put_failure;
192427d7f07cSWANG Cong
192527d7f07cSWANG Cong if (gnet_stats_finish_copy(&d) < 0)
192627d7f07cSWANG Cong goto nla_put_failure;
192727d7f07cSWANG Cong
19280349b877SHangbin Liu if (extack && extack->_msg &&
19290349b877SHangbin Liu nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
19300349b877SHangbin Liu goto out_nlmsg_trim;
19310349b877SHangbin Liu
193227d7f07cSWANG Cong nlh->nlmsg_len = skb_tail_pointer(skb) - b;
19330349b877SHangbin Liu
193427d7f07cSWANG Cong return skb->len;
193527d7f07cSWANG Cong
193627d7f07cSWANG Cong out_nlmsg_trim:
193727d7f07cSWANG Cong nla_put_failure:
193827d7f07cSWANG Cong nlmsg_trim(skb, b);
193927d7f07cSWANG Cong return -1;
194027d7f07cSWANG Cong }
194127d7f07cSWANG Cong
tclass_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,int event,struct netlink_ext_ack * extack)194227d7f07cSWANG Cong static int tclass_notify(struct net *net, struct sk_buff *oskb,
194327d7f07cSWANG Cong struct nlmsghdr *n, struct Qdisc *q,
19440349b877SHangbin Liu unsigned long cl, int event, struct netlink_ext_ack *extack)
194527d7f07cSWANG Cong {
194627d7f07cSWANG Cong struct sk_buff *skb;
194727d7f07cSWANG Cong u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
194827d7f07cSWANG Cong
194927d7f07cSWANG Cong skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
195027d7f07cSWANG Cong if (!skb)
195127d7f07cSWANG Cong return -ENOBUFS;
195227d7f07cSWANG Cong
19530349b877SHangbin Liu if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
195427d7f07cSWANG Cong kfree_skb(skb);
195527d7f07cSWANG Cong return -EINVAL;
195627d7f07cSWANG Cong }
195727d7f07cSWANG Cong
1958f79a3bcbSYajun Deng return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
195927d7f07cSWANG Cong n->nlmsg_flags & NLM_F_ECHO);
196027d7f07cSWANG Cong }
19611da177e4SLinus Torvalds
tclass_del_notify(struct net * net,const struct Qdisc_class_ops * cops,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,struct netlink_ext_ack * extack)196214546ba1SWANG Cong static int tclass_del_notify(struct net *net,
196314546ba1SWANG Cong const struct Qdisc_class_ops *cops,
196414546ba1SWANG Cong struct sk_buff *oskb, struct nlmsghdr *n,
19654dd78a73SMaxim Mikityanskiy struct Qdisc *q, unsigned long cl,
19664dd78a73SMaxim Mikityanskiy struct netlink_ext_ack *extack)
196714546ba1SWANG Cong {
196814546ba1SWANG Cong u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
196914546ba1SWANG Cong struct sk_buff *skb;
197014546ba1SWANG Cong int err = 0;
197114546ba1SWANG Cong
197214546ba1SWANG Cong if (!cops->delete)
197314546ba1SWANG Cong return -EOPNOTSUPP;
197414546ba1SWANG Cong
197514546ba1SWANG Cong skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
197614546ba1SWANG Cong if (!skb)
197714546ba1SWANG Cong return -ENOBUFS;
197814546ba1SWANG Cong
197914546ba1SWANG Cong if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
19800349b877SHangbin Liu RTM_DELTCLASS, extack) < 0) {
198114546ba1SWANG Cong kfree_skb(skb);
198214546ba1SWANG Cong return -EINVAL;
198314546ba1SWANG Cong }
198414546ba1SWANG Cong
19854dd78a73SMaxim Mikityanskiy err = cops->delete(q, cl, extack);
198614546ba1SWANG Cong if (err) {
198714546ba1SWANG Cong kfree_skb(skb);
198814546ba1SWANG Cong return err;
198914546ba1SWANG Cong }
199014546ba1SWANG Cong
19915b5f99b1SZhike Wang err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
199214546ba1SWANG Cong n->nlmsg_flags & NLM_F_ECHO);
19935b5f99b1SZhike Wang return err;
199414546ba1SWANG Cong }
199514546ba1SWANG Cong
199607d79fc7SCong Wang #ifdef CONFIG_NET_CLS
199707d79fc7SCong Wang
199807d79fc7SCong Wang struct tcf_bind_args {
199907d79fc7SCong Wang struct tcf_walker w;
20002e24cd75SCong Wang unsigned long base;
200107d79fc7SCong Wang unsigned long cl;
20022e24cd75SCong Wang u32 classid;
200307d79fc7SCong Wang };
200407d79fc7SCong Wang
tcf_node_bind(struct tcf_proto * tp,void * n,struct tcf_walker * arg)200507d79fc7SCong Wang static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
200607d79fc7SCong Wang {
200707d79fc7SCong Wang struct tcf_bind_args *a = (void *)arg;
200807d79fc7SCong Wang
20094e6263ecSZhengchao Shao if (n && tp->ops->bind_class) {
201074e3be60SJiri Pirko struct Qdisc *q = tcf_block_q(tp->chain->block);
201174e3be60SJiri Pirko
201274e3be60SJiri Pirko sch_tree_lock(q);
20132e24cd75SCong Wang tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
201474e3be60SJiri Pirko sch_tree_unlock(q);
201507d79fc7SCong Wang }
201607d79fc7SCong Wang return 0;
201707d79fc7SCong Wang }
201807d79fc7SCong Wang
2019760d228eSCong Wang struct tc_bind_class_args {
2020760d228eSCong Wang struct qdisc_walker w;
2021760d228eSCong Wang unsigned long new_cl;
2022760d228eSCong Wang u32 portid;
2023760d228eSCong Wang u32 clid;
2024760d228eSCong Wang };
2025760d228eSCong Wang
tc_bind_class_walker(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)2026760d228eSCong Wang static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
2027760d228eSCong Wang struct qdisc_walker *w)
202807d79fc7SCong Wang {
2029760d228eSCong Wang struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
203007d79fc7SCong Wang const struct Qdisc_class_ops *cops = q->ops->cl_ops;
203107d79fc7SCong Wang struct tcf_block *block;
203207d79fc7SCong Wang struct tcf_chain *chain;
203307d79fc7SCong Wang
2034cbaacc4eSAlexander Aring block = cops->tcf_block(q, cl, NULL);
203507d79fc7SCong Wang if (!block)
2036760d228eSCong Wang return 0;
2037bbf73830SVlad Buslov for (chain = tcf_get_next_chain(block, NULL);
2038bbf73830SVlad Buslov chain;
2039bbf73830SVlad Buslov chain = tcf_get_next_chain(block, chain)) {
204007d79fc7SCong Wang struct tcf_proto *tp;
204107d79fc7SCong Wang
20420fca55edSVlad Buslov for (tp = tcf_get_next_proto(chain, NULL);
20430fca55edSVlad Buslov tp; tp = tcf_get_next_proto(chain, tp)) {
204407d79fc7SCong Wang struct tcf_bind_args arg = {};
204507d79fc7SCong Wang
204607d79fc7SCong Wang arg.w.fn = tcf_node_bind;
2047760d228eSCong Wang arg.classid = a->clid;
20482e24cd75SCong Wang arg.base = cl;
2049760d228eSCong Wang arg.cl = a->new_cl;
205012db03b6SVlad Buslov tp->ops->walk(tp, &arg.w, true);
205107d79fc7SCong Wang }
205207d79fc7SCong Wang }
2053760d228eSCong Wang
2054760d228eSCong Wang return 0;
2055760d228eSCong Wang }
2056760d228eSCong Wang
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2057760d228eSCong Wang static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2058760d228eSCong Wang unsigned long new_cl)
2059760d228eSCong Wang {
2060760d228eSCong Wang const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2061760d228eSCong Wang struct tc_bind_class_args args = {};
2062760d228eSCong Wang
2063760d228eSCong Wang if (!cops->tcf_block)
2064760d228eSCong Wang return;
2065760d228eSCong Wang args.portid = portid;
2066760d228eSCong Wang args.clid = clid;
2067760d228eSCong Wang args.new_cl = new_cl;
2068760d228eSCong Wang args.w.fn = tc_bind_class_walker;
2069760d228eSCong Wang q->ops->cl_ops->walk(q, &args.w);
207007d79fc7SCong Wang }
207107d79fc7SCong Wang
207207d79fc7SCong Wang #else
207307d79fc7SCong Wang
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)207407d79fc7SCong Wang static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
207507d79fc7SCong Wang unsigned long new_cl)
207607d79fc7SCong Wang {
207707d79fc7SCong Wang }
207807d79fc7SCong Wang
207907d79fc7SCong Wang #endif
208007d79fc7SCong Wang
tc_ctl_tclass(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)2081c21ef3e3SDavid Ahern static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2082c21ef3e3SDavid Ahern struct netlink_ext_ack *extack)
20831da177e4SLinus Torvalds {
20843b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(skb->sk);
208502ef22caSDavid S. Miller struct tcmsg *tcm = nlmsg_data(n);
20861e90474cSPatrick McHardy struct nlattr *tca[TCA_MAX + 1];
20871da177e4SLinus Torvalds struct net_device *dev;
20881da177e4SLinus Torvalds struct Qdisc *q = NULL;
208920fea08bSEric Dumazet const struct Qdisc_class_ops *cops;
20901da177e4SLinus Torvalds unsigned long cl = 0;
20911da177e4SLinus Torvalds unsigned long new_cl;
2092de179c8cSHong zhi guo u32 portid;
2093de179c8cSHong zhi guo u32 clid;
2094de179c8cSHong zhi guo u32 qid;
20951da177e4SLinus Torvalds int err;
20961da177e4SLinus Torvalds
20978cb08174SJohannes Berg err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
20988cb08174SJohannes Berg rtm_tca_policy, extack);
20991e90474cSPatrick McHardy if (err < 0)
21001e90474cSPatrick McHardy return err;
21011e90474cSPatrick McHardy
2102de179c8cSHong zhi guo dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2103de179c8cSHong zhi guo if (!dev)
2104de179c8cSHong zhi guo return -ENODEV;
2105de179c8cSHong zhi guo
21061da177e4SLinus Torvalds /*
21071da177e4SLinus Torvalds parent == TC_H_UNSPEC - unspecified parent.
21081da177e4SLinus Torvalds parent == TC_H_ROOT - class is root, which has no parent.
21091da177e4SLinus Torvalds parent == X:0 - parent is root class.
21101da177e4SLinus Torvalds parent == X:Y - parent is a node in hierarchy.
21111da177e4SLinus Torvalds parent == 0:Y - parent is X:Y, where X:0 is qdisc.
21121da177e4SLinus Torvalds
21131da177e4SLinus Torvalds handle == 0:0 - generate handle from kernel pool.
21141da177e4SLinus Torvalds handle == 0:Y - class is X:Y, where X:0 is qdisc.
21151da177e4SLinus Torvalds handle == X:Y - clear.
21161da177e4SLinus Torvalds handle == X:0 - root class.
21171da177e4SLinus Torvalds */
21181da177e4SLinus Torvalds
21191da177e4SLinus Torvalds /* Step 1. Determine qdisc handle X:0 */
21201da177e4SLinus Torvalds
2121de179c8cSHong zhi guo portid = tcm->tcm_parent;
2122de179c8cSHong zhi guo clid = tcm->tcm_handle;
2123de179c8cSHong zhi guo qid = TC_H_MAJ(clid);
2124de179c8cSHong zhi guo
212515e47304SEric W. Biederman if (portid != TC_H_ROOT) {
212615e47304SEric W. Biederman u32 qid1 = TC_H_MAJ(portid);
21271da177e4SLinus Torvalds
21281da177e4SLinus Torvalds if (qid && qid1) {
21291da177e4SLinus Torvalds /* If both majors are known, they must be identical. */
21301da177e4SLinus Torvalds if (qid != qid1)
21311da177e4SLinus Torvalds return -EINVAL;
21321da177e4SLinus Torvalds } else if (qid1) {
21331da177e4SLinus Torvalds qid = qid1;
21341da177e4SLinus Torvalds } else if (qid == 0)
21355891cd5eSEric Dumazet qid = rtnl_dereference(dev->qdisc)->handle;
21361da177e4SLinus Torvalds
21371da177e4SLinus Torvalds /* Now qid is genuine qdisc handle consistent
2138cc7ec456SEric Dumazet * both with parent and child.
2139cc7ec456SEric Dumazet *
214015e47304SEric W. Biederman * TC_H_MAJ(portid) still may be unspecified, complete it now.
21411da177e4SLinus Torvalds */
214215e47304SEric W. Biederman if (portid)
214315e47304SEric W. Biederman portid = TC_H_MAKE(qid, portid);
21441da177e4SLinus Torvalds } else {
21451da177e4SLinus Torvalds if (qid == 0)
21465891cd5eSEric Dumazet qid = rtnl_dereference(dev->qdisc)->handle;
21471da177e4SLinus Torvalds }
21481da177e4SLinus Torvalds
21491da177e4SLinus Torvalds /* OK. Locate qdisc */
2150cc7ec456SEric Dumazet q = qdisc_lookup(dev, qid);
2151cc7ec456SEric Dumazet if (!q)
21521da177e4SLinus Torvalds return -ENOENT;
21531da177e4SLinus Torvalds
21541da177e4SLinus Torvalds /* An check that it supports classes */
21551da177e4SLinus Torvalds cops = q->ops->cl_ops;
21561da177e4SLinus Torvalds if (cops == NULL)
21571da177e4SLinus Torvalds return -EINVAL;
21581da177e4SLinus Torvalds
21591da177e4SLinus Torvalds /* Now try to get class */
21601da177e4SLinus Torvalds if (clid == 0) {
216115e47304SEric W. Biederman if (portid == TC_H_ROOT)
21621da177e4SLinus Torvalds clid = qid;
21631da177e4SLinus Torvalds } else
21641da177e4SLinus Torvalds clid = TC_H_MAKE(qid, clid);
21651da177e4SLinus Torvalds
21661da177e4SLinus Torvalds if (clid)
2167143976ceSWANG Cong cl = cops->find(q, clid);
21681da177e4SLinus Torvalds
21691da177e4SLinus Torvalds if (cl == 0) {
21701da177e4SLinus Torvalds err = -ENOENT;
2171cc7ec456SEric Dumazet if (n->nlmsg_type != RTM_NEWTCLASS ||
2172cc7ec456SEric Dumazet !(n->nlmsg_flags & NLM_F_CREATE))
21731da177e4SLinus Torvalds goto out;
21741da177e4SLinus Torvalds } else {
21751da177e4SLinus Torvalds switch (n->nlmsg_type) {
21761da177e4SLinus Torvalds case RTM_NEWTCLASS:
21771da177e4SLinus Torvalds err = -EEXIST;
21781da177e4SLinus Torvalds if (n->nlmsg_flags & NLM_F_EXCL)
21791da177e4SLinus Torvalds goto out;
21801da177e4SLinus Torvalds break;
21811da177e4SLinus Torvalds case RTM_DELTCLASS:
21824dd78a73SMaxim Mikityanskiy err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
218307d79fc7SCong Wang /* Unbind the class with flilters with 0 */
218407d79fc7SCong Wang tc_bind_tclass(q, portid, clid, 0);
21851da177e4SLinus Torvalds goto out;
21861da177e4SLinus Torvalds case RTM_GETTCLASS:
21870349b877SHangbin Liu err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS, extack);
21881da177e4SLinus Torvalds goto out;
21891da177e4SLinus Torvalds default:
21901da177e4SLinus Torvalds err = -EINVAL;
21911da177e4SLinus Torvalds goto out;
21921da177e4SLinus Torvalds }
21931da177e4SLinus Torvalds }
21941da177e4SLinus Torvalds
2195d47a6b0eSJiri Pirko if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2196d47a6b0eSJiri Pirko NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2197d47a6b0eSJiri Pirko return -EOPNOTSUPP;
2198d47a6b0eSJiri Pirko }
2199d47a6b0eSJiri Pirko
2200*78533c4aSCong Wang /* Prevent creation of traffic classes with classid TC_H_ROOT */
2201*78533c4aSCong Wang if (clid == TC_H_ROOT) {
2202*78533c4aSCong Wang NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT");
2203*78533c4aSCong Wang return -EINVAL;
2204*78533c4aSCong Wang }
2205*78533c4aSCong Wang
22061da177e4SLinus Torvalds new_cl = cl;
2207de6d5cdfSPatrick McHardy err = -EOPNOTSUPP;
2208de6d5cdfSPatrick McHardy if (cops->change)
2209793d81d6SAlexander Aring err = cops->change(q, clid, portid, tca, &new_cl, extack);
221007d79fc7SCong Wang if (err == 0) {
22110349b877SHangbin Liu tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack);
221207d79fc7SCong Wang /* We just create a new class, need to do reverse binding. */
221307d79fc7SCong Wang if (cl != new_cl)
221407d79fc7SCong Wang tc_bind_tclass(q, portid, clid, new_cl);
221507d79fc7SCong Wang }
22161da177e4SLinus Torvalds out:
22171da177e4SLinus Torvalds return err;
22181da177e4SLinus Torvalds }
22191da177e4SLinus Torvalds
2220cc7ec456SEric Dumazet struct qdisc_dump_args {
22211da177e4SLinus Torvalds struct qdisc_walker w;
22221da177e4SLinus Torvalds struct sk_buff *skb;
22231da177e4SLinus Torvalds struct netlink_callback *cb;
22241da177e4SLinus Torvalds };
22251da177e4SLinus Torvalds
qdisc_class_dump(struct Qdisc * q,unsigned long cl,struct qdisc_walker * arg)22265a7a5555SJamal Hadi Salim static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
22275a7a5555SJamal Hadi Salim struct qdisc_walker *arg)
22281da177e4SLinus Torvalds {
22291da177e4SLinus Torvalds struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
22301da177e4SLinus Torvalds
223115e47304SEric W. Biederman return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
22325a7a5555SJamal Hadi Salim a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
22330349b877SHangbin Liu RTM_NEWTCLASS, NULL);
22341da177e4SLinus Torvalds }
22351da177e4SLinus Torvalds
tc_dump_tclass_qdisc(struct Qdisc * q,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t)223630723673SDavid S. Miller static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
223730723673SDavid S. Miller struct tcmsg *tcm, struct netlink_callback *cb,
223830723673SDavid S. Miller int *t_p, int s_t)
223930723673SDavid S. Miller {
224030723673SDavid S. Miller struct qdisc_dump_args arg;
224130723673SDavid S. Miller
224249b49971SJiri Kosina if (tc_qdisc_dump_ignore(q, false) ||
224330723673SDavid S. Miller *t_p < s_t || !q->ops->cl_ops ||
224430723673SDavid S. Miller (tcm->tcm_parent &&
224530723673SDavid S. Miller TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
224630723673SDavid S. Miller (*t_p)++;
224730723673SDavid S. Miller return 0;
224830723673SDavid S. Miller }
224930723673SDavid S. Miller if (*t_p > s_t)
225030723673SDavid S. Miller memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
225130723673SDavid S. Miller arg.w.fn = qdisc_class_dump;
225230723673SDavid S. Miller arg.skb = skb;
225330723673SDavid S. Miller arg.cb = cb;
225430723673SDavid S. Miller arg.w.stop = 0;
225530723673SDavid S. Miller arg.w.skip = cb->args[1];
225630723673SDavid S. Miller arg.w.count = 0;
225730723673SDavid S. Miller q->ops->cl_ops->walk(q, &arg.w);
225830723673SDavid S. Miller cb->args[1] = arg.w.count;
225930723673SDavid S. Miller if (arg.w.stop)
226030723673SDavid S. Miller return -1;
226130723673SDavid S. Miller (*t_p)++;
226230723673SDavid S. Miller return 0;
226330723673SDavid S. Miller }
226430723673SDavid S. Miller
tc_dump_tclass_root(struct Qdisc * root,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t,bool recur)226530723673SDavid S. Miller static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
226630723673SDavid S. Miller struct tcmsg *tcm, struct netlink_callback *cb,
2267bfc25605SMaximilian Heyne int *t_p, int s_t, bool recur)
226830723673SDavid S. Miller {
226930723673SDavid S. Miller struct Qdisc *q;
227059cc1f61SJiri Kosina int b;
227130723673SDavid S. Miller
227230723673SDavid S. Miller if (!root)
227330723673SDavid S. Miller return 0;
227430723673SDavid S. Miller
227530723673SDavid S. Miller if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
227630723673SDavid S. Miller return -1;
227730723673SDavid S. Miller
2278bfc25605SMaximilian Heyne if (!qdisc_dev(root) || !recur)
227969012ae4SJiri Kosina return 0;
228069012ae4SJiri Kosina
2281cb395b20SEric Dumazet if (tcm->tcm_parent) {
2282cb395b20SEric Dumazet q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
22833c53ed8fSPhil Sutter if (q && q != root &&
22843c53ed8fSPhil Sutter tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2285cb395b20SEric Dumazet return -1;
2286cb395b20SEric Dumazet return 0;
2287cb395b20SEric Dumazet }
228859cc1f61SJiri Kosina hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
228930723673SDavid S. Miller if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
229030723673SDavid S. Miller return -1;
229130723673SDavid S. Miller }
229230723673SDavid S. Miller
229330723673SDavid S. Miller return 0;
229430723673SDavid S. Miller }
229530723673SDavid S. Miller
tc_dump_tclass(struct sk_buff * skb,struct netlink_callback * cb)22961da177e4SLinus Torvalds static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
22971da177e4SLinus Torvalds {
229802ef22caSDavid S. Miller struct tcmsg *tcm = nlmsg_data(cb->nlh);
229930723673SDavid S. Miller struct net *net = sock_net(skb->sk);
230030723673SDavid S. Miller struct netdev_queue *dev_queue;
230130723673SDavid S. Miller struct net_device *dev;
230230723673SDavid S. Miller int t, s_t;
23031da177e4SLinus Torvalds
2304573ce260SHong zhi guo if (nlmsg_len(cb->nlh) < sizeof(*tcm))
23051da177e4SLinus Torvalds return 0;
2306cc7ec456SEric Dumazet dev = dev_get_by_index(net, tcm->tcm_ifindex);
2307cc7ec456SEric Dumazet if (!dev)
23081da177e4SLinus Torvalds return 0;
23091da177e4SLinus Torvalds
23101da177e4SLinus Torvalds s_t = cb->args[0];
23111da177e4SLinus Torvalds t = 0;
23121da177e4SLinus Torvalds
23135891cd5eSEric Dumazet if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
23145891cd5eSEric Dumazet skb, tcm, cb, &t, s_t, true) < 0)
231530723673SDavid S. Miller goto done;
23161da177e4SLinus Torvalds
231724824a09SEric Dumazet dev_queue = dev_ingress_queue(dev);
231824824a09SEric Dumazet if (dev_queue &&
2319d636fc5dSEric Dumazet tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping),
2320d636fc5dSEric Dumazet skb, tcm, cb, &t, s_t, false) < 0)
232130723673SDavid S. Miller goto done;
232230723673SDavid S. Miller
232330723673SDavid S. Miller done:
23241da177e4SLinus Torvalds cb->args[0] = t;
23251da177e4SLinus Torvalds
23261da177e4SLinus Torvalds dev_put(dev);
23271da177e4SLinus Torvalds return skb->len;
23281da177e4SLinus Torvalds }
23291da177e4SLinus Torvalds
23301da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
psched_show(struct seq_file * seq,void * v)23311da177e4SLinus Torvalds static int psched_show(struct seq_file *seq, void *v)
23321da177e4SLinus Torvalds {
23331da177e4SLinus Torvalds seq_printf(seq, "%08x %08x %08x %08x\n",
2334ca44d6e6SJarek Poplawski (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2335514bca32SPatrick McHardy 1000000,
23361e317688SThomas Gleixner (u32)NSEC_PER_SEC / hrtimer_resolution);
23371da177e4SLinus Torvalds
23381da177e4SLinus Torvalds return 0;
23391da177e4SLinus Torvalds }
23401da177e4SLinus Torvalds
psched_net_init(struct net * net)23417316ae88STom Goff static int __net_init psched_net_init(struct net *net)
23427316ae88STom Goff {
23437316ae88STom Goff struct proc_dir_entry *e;
23447316ae88STom Goff
23453f3942acSChristoph Hellwig e = proc_create_single("psched", 0, net->proc_net, psched_show);
23467316ae88STom Goff if (e == NULL)
23477316ae88STom Goff return -ENOMEM;
23487316ae88STom Goff
23497316ae88STom Goff return 0;
23507316ae88STom Goff }
23517316ae88STom Goff
psched_net_exit(struct net * net)23527316ae88STom Goff static void __net_exit psched_net_exit(struct net *net)
23537316ae88STom Goff {
2354ece31ffdSGao feng remove_proc_entry("psched", net->proc_net);
23557316ae88STom Goff }
23567316ae88STom Goff #else
psched_net_init(struct net * net)23577316ae88STom Goff static int __net_init psched_net_init(struct net *net)
23587316ae88STom Goff {
23597316ae88STom Goff return 0;
23607316ae88STom Goff }
23617316ae88STom Goff
psched_net_exit(struct net * net)23627316ae88STom Goff static void __net_exit psched_net_exit(struct net *net)
23637316ae88STom Goff {
23647316ae88STom Goff }
23651da177e4SLinus Torvalds #endif
23661da177e4SLinus Torvalds
23677316ae88STom Goff static struct pernet_operations psched_net_ops = {
23687316ae88STom Goff .init = psched_net_init,
23697316ae88STom Goff .exit = psched_net_exit,
23707316ae88STom Goff };
23717316ae88STom Goff
23728cde87b0SMin-Hua Chen #if IS_ENABLED(CONFIG_RETPOLINE)
23737f0e8102SPedro Tammela DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
23748cde87b0SMin-Hua Chen #endif
23757f0e8102SPedro Tammela
pktsched_init(void)23761da177e4SLinus Torvalds static int __init pktsched_init(void)
23771da177e4SLinus Torvalds {
23787316ae88STom Goff int err;
23797316ae88STom Goff
23807316ae88STom Goff err = register_pernet_subsys(&psched_net_ops);
23817316ae88STom Goff if (err) {
2382cc7ec456SEric Dumazet pr_err("pktsched_init: "
23837316ae88STom Goff "cannot initialize per netns operations\n");
23847316ae88STom Goff return err;
23857316ae88STom Goff }
23867316ae88STom Goff
23876da7c8fcSstephen hemminger register_qdisc(&pfifo_fast_ops);
23881da177e4SLinus Torvalds register_qdisc(&pfifo_qdisc_ops);
23891da177e4SLinus Torvalds register_qdisc(&bfifo_qdisc_ops);
239057dbb2d8SHagen Paul Pfeifer register_qdisc(&pfifo_head_drop_qdisc_ops);
23916ec1c69aSDavid S. Miller register_qdisc(&mq_qdisc_ops);
2392d66d6c31SPhil Sutter register_qdisc(&noqueue_qdisc_ops);
23931da177e4SLinus Torvalds
2394b97bac64SFlorian Westphal rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2395b97bac64SFlorian Westphal rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
23965a7a5555SJamal Hadi Salim rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2397b97bac64SFlorian Westphal 0);
2398b97bac64SFlorian Westphal rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2399b97bac64SFlorian Westphal rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
24005a7a5555SJamal Hadi Salim rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2401b97bac64SFlorian Westphal 0);
2402be577ddcSThomas Graf
24037f0e8102SPedro Tammela tc_wrapper_init();
24047f0e8102SPedro Tammela
24051da177e4SLinus Torvalds return 0;
24061da177e4SLinus Torvalds }
24071da177e4SLinus Torvalds
24081da177e4SLinus Torvalds subsys_initcall(pktsched_init);
2409