xref: /openbmc/linux/net/sched/sch_mqprio.c (revision 29cbcd85828372333aa87542c51f2b2b0fd4380c)
1d2912cb1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2b8970f0bSJohn Fastabend /*
3b8970f0bSJohn Fastabend  * net/sched/sch_mqprio.c
4b8970f0bSJohn Fastabend  *
5b8970f0bSJohn Fastabend  * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
6b8970f0bSJohn Fastabend  */
7b8970f0bSJohn Fastabend 
8b8970f0bSJohn Fastabend #include <linux/types.h>
9b8970f0bSJohn Fastabend #include <linux/slab.h>
10b8970f0bSJohn Fastabend #include <linux/kernel.h>
11b8970f0bSJohn Fastabend #include <linux/string.h>
12b8970f0bSJohn Fastabend #include <linux/errno.h>
13b8970f0bSJohn Fastabend #include <linux/skbuff.h>
143a9a231dSPaul Gortmaker #include <linux/module.h>
15b8970f0bSJohn Fastabend #include <net/netlink.h>
16b8970f0bSJohn Fastabend #include <net/pkt_sched.h>
17b8970f0bSJohn Fastabend #include <net/sch_generic.h>
184e8b86c0SAmritha Nambiar #include <net/pkt_cls.h>
19b8970f0bSJohn Fastabend 
20b8970f0bSJohn Fastabend struct mqprio_sched {
21b8970f0bSJohn Fastabend 	struct Qdisc		**qdiscs;
224e8b86c0SAmritha Nambiar 	u16 mode;
234e8b86c0SAmritha Nambiar 	u16 shaper;
242026fecfSAlexander Duyck 	int hw_offload;
254e8b86c0SAmritha Nambiar 	u32 flags;
264e8b86c0SAmritha Nambiar 	u64 min_rate[TC_QOPT_MAX_QUEUE];
274e8b86c0SAmritha Nambiar 	u64 max_rate[TC_QOPT_MAX_QUEUE];
28b8970f0bSJohn Fastabend };
29b8970f0bSJohn Fastabend 
30b8970f0bSJohn Fastabend static void mqprio_destroy(struct Qdisc *sch)
31b8970f0bSJohn Fastabend {
32b8970f0bSJohn Fastabend 	struct net_device *dev = qdisc_dev(sch);
33b8970f0bSJohn Fastabend 	struct mqprio_sched *priv = qdisc_priv(sch);
34b8970f0bSJohn Fastabend 	unsigned int ntx;
35b8970f0bSJohn Fastabend 
36ac7100baSBen Hutchings 	if (priv->qdiscs) {
37ac7100baSBen Hutchings 		for (ntx = 0;
38ac7100baSBen Hutchings 		     ntx < dev->num_tx_queues && priv->qdiscs[ntx];
39ac7100baSBen Hutchings 		     ntx++)
4086bd446bSVlad Buslov 			qdisc_put(priv->qdiscs[ntx]);
41ac7100baSBen Hutchings 		kfree(priv->qdiscs);
42ac7100baSBen Hutchings 	}
43b8970f0bSJohn Fastabend 
4456f36acdSAmritha Nambiar 	if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
454e8b86c0SAmritha Nambiar 		struct tc_mqprio_qopt_offload mqprio = { { 0 } };
4656f36acdSAmritha Nambiar 
474e8b86c0SAmritha Nambiar 		switch (priv->mode) {
484e8b86c0SAmritha Nambiar 		case TC_MQPRIO_MODE_DCB:
494e8b86c0SAmritha Nambiar 		case TC_MQPRIO_MODE_CHANNEL:
50575ed7d3SNogah Frankel 			dev->netdev_ops->ndo_setup_tc(dev,
51575ed7d3SNogah Frankel 						      TC_SETUP_QDISC_MQPRIO,
524e8b86c0SAmritha Nambiar 						      &mqprio);
534e8b86c0SAmritha Nambiar 			break;
544e8b86c0SAmritha Nambiar 		default:
554e8b86c0SAmritha Nambiar 			return;
564e8b86c0SAmritha Nambiar 		}
5756f36acdSAmritha Nambiar 	} else {
58b8970f0bSJohn Fastabend 		netdev_set_num_tc(dev, 0);
59b8970f0bSJohn Fastabend 	}
6056f36acdSAmritha Nambiar }
61b8970f0bSJohn Fastabend 
62b8970f0bSJohn Fastabend static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
63b8970f0bSJohn Fastabend {
64b8970f0bSJohn Fastabend 	int i, j;
65b8970f0bSJohn Fastabend 
66b8970f0bSJohn Fastabend 	/* Verify num_tc is not out of max range */
67b8970f0bSJohn Fastabend 	if (qopt->num_tc > TC_MAX_QUEUE)
68b8970f0bSJohn Fastabend 		return -EINVAL;
69b8970f0bSJohn Fastabend 
70b8970f0bSJohn Fastabend 	/* Verify priority mapping uses valid tcs */
71b8970f0bSJohn Fastabend 	for (i = 0; i < TC_BITMASK + 1; i++) {
72b8970f0bSJohn Fastabend 		if (qopt->prio_tc_map[i] >= qopt->num_tc)
73b8970f0bSJohn Fastabend 			return -EINVAL;
74b8970f0bSJohn Fastabend 	}
75b8970f0bSJohn Fastabend 
762026fecfSAlexander Duyck 	/* Limit qopt->hw to maximum supported offload value.  Drivers have
772026fecfSAlexander Duyck 	 * the option of overriding this later if they don't support the a
782026fecfSAlexander Duyck 	 * given offload type.
792026fecfSAlexander Duyck 	 */
802026fecfSAlexander Duyck 	if (qopt->hw > TC_MQPRIO_HW_OFFLOAD_MAX)
812026fecfSAlexander Duyck 		qopt->hw = TC_MQPRIO_HW_OFFLOAD_MAX;
82b8970f0bSJohn Fastabend 
832026fecfSAlexander Duyck 	/* If hardware offload is requested we will leave it to the device
842026fecfSAlexander Duyck 	 * to either populate the queue counts itself or to validate the
852026fecfSAlexander Duyck 	 * provided queue counts.  If ndo_setup_tc is not present then
862026fecfSAlexander Duyck 	 * hardware doesn't support offload and we should return an error.
87b8970f0bSJohn Fastabend 	 */
88b8970f0bSJohn Fastabend 	if (qopt->hw)
892026fecfSAlexander Duyck 		return dev->netdev_ops->ndo_setup_tc ? 0 : -EINVAL;
90b8970f0bSJohn Fastabend 
91b8970f0bSJohn Fastabend 	for (i = 0; i < qopt->num_tc; i++) {
92b8970f0bSJohn Fastabend 		unsigned int last = qopt->offset[i] + qopt->count[i];
93b8970f0bSJohn Fastabend 
94b8970f0bSJohn Fastabend 		/* Verify the queue count is in tx range being equal to the
95b8970f0bSJohn Fastabend 		 * real_num_tx_queues indicates the last queue is in use.
96b8970f0bSJohn Fastabend 		 */
97b8970f0bSJohn Fastabend 		if (qopt->offset[i] >= dev->real_num_tx_queues ||
98b8970f0bSJohn Fastabend 		    !qopt->count[i] ||
99b8970f0bSJohn Fastabend 		    last > dev->real_num_tx_queues)
100b8970f0bSJohn Fastabend 			return -EINVAL;
101b8970f0bSJohn Fastabend 
102b8970f0bSJohn Fastabend 		/* Verify that the offset and counts do not overlap */
103b8970f0bSJohn Fastabend 		for (j = i + 1; j < qopt->num_tc; j++) {
104b8970f0bSJohn Fastabend 			if (last > qopt->offset[j])
105b8970f0bSJohn Fastabend 				return -EINVAL;
106b8970f0bSJohn Fastabend 		}
107b8970f0bSJohn Fastabend 	}
108b8970f0bSJohn Fastabend 
109b8970f0bSJohn Fastabend 	return 0;
110b8970f0bSJohn Fastabend }
111b8970f0bSJohn Fastabend 
1124e8b86c0SAmritha Nambiar static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = {
1134e8b86c0SAmritha Nambiar 	[TCA_MQPRIO_MODE]	= { .len = sizeof(u16) },
1144e8b86c0SAmritha Nambiar 	[TCA_MQPRIO_SHAPER]	= { .len = sizeof(u16) },
1154e8b86c0SAmritha Nambiar 	[TCA_MQPRIO_MIN_RATE64]	= { .type = NLA_NESTED },
1164e8b86c0SAmritha Nambiar 	[TCA_MQPRIO_MAX_RATE64]	= { .type = NLA_NESTED },
1174e8b86c0SAmritha Nambiar };
1184e8b86c0SAmritha Nambiar 
1194e8b86c0SAmritha Nambiar static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
1204e8b86c0SAmritha Nambiar 		      const struct nla_policy *policy, int len)
1214e8b86c0SAmritha Nambiar {
1224e8b86c0SAmritha Nambiar 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
1234e8b86c0SAmritha Nambiar 
1244e8b86c0SAmritha Nambiar 	if (nested_len >= nla_attr_size(0))
1258cb08174SJohannes Berg 		return nla_parse_deprecated(tb, maxtype,
1268cb08174SJohannes Berg 					    nla_data(nla) + NLA_ALIGN(len),
1274e8b86c0SAmritha Nambiar 					    nested_len, policy, NULL);
1284e8b86c0SAmritha Nambiar 
1294e8b86c0SAmritha Nambiar 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
1304e8b86c0SAmritha Nambiar 	return 0;
1314e8b86c0SAmritha Nambiar }
1324e8b86c0SAmritha Nambiar 
133e63d7dfdSAlexander Aring static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
134e63d7dfdSAlexander Aring 		       struct netlink_ext_ack *extack)
135b8970f0bSJohn Fastabend {
136b8970f0bSJohn Fastabend 	struct net_device *dev = qdisc_dev(sch);
137b8970f0bSJohn Fastabend 	struct mqprio_sched *priv = qdisc_priv(sch);
138b8970f0bSJohn Fastabend 	struct netdev_queue *dev_queue;
139b8970f0bSJohn Fastabend 	struct Qdisc *qdisc;
140b8970f0bSJohn Fastabend 	int i, err = -EOPNOTSUPP;
141b8970f0bSJohn Fastabend 	struct tc_mqprio_qopt *qopt = NULL;
1424e8b86c0SAmritha Nambiar 	struct nlattr *tb[TCA_MQPRIO_MAX + 1];
1434e8b86c0SAmritha Nambiar 	struct nlattr *attr;
1444e8b86c0SAmritha Nambiar 	int rem;
14522ce97feSColin Ian King 	int len;
146b8970f0bSJohn Fastabend 
147b8970f0bSJohn Fastabend 	BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
148b8970f0bSJohn Fastabend 	BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
149b8970f0bSJohn Fastabend 
150b8970f0bSJohn Fastabend 	if (sch->parent != TC_H_ROOT)
151b8970f0bSJohn Fastabend 		return -EOPNOTSUPP;
152b8970f0bSJohn Fastabend 
153b8970f0bSJohn Fastabend 	if (!netif_is_multiqueue(dev))
154b8970f0bSJohn Fastabend 		return -EOPNOTSUPP;
155b8970f0bSJohn Fastabend 
15632302902SAlexander Duyck 	/* make certain can allocate enough classids to handle queues */
15732302902SAlexander Duyck 	if (dev->num_tx_queues >= TC_H_MIN_PRIORITY)
15832302902SAlexander Duyck 		return -ENOMEM;
15932302902SAlexander Duyck 
1607838f2ceSThomas Graf 	if (!opt || nla_len(opt) < sizeof(*qopt))
161b8970f0bSJohn Fastabend 		return -EINVAL;
162b8970f0bSJohn Fastabend 
163b8970f0bSJohn Fastabend 	qopt = nla_data(opt);
164b8970f0bSJohn Fastabend 	if (mqprio_parse_opt(dev, qopt))
165b8970f0bSJohn Fastabend 		return -EINVAL;
166b8970f0bSJohn Fastabend 
16722ce97feSColin Ian King 	len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
1684e8b86c0SAmritha Nambiar 	if (len > 0) {
1694e8b86c0SAmritha Nambiar 		err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
1704e8b86c0SAmritha Nambiar 				 sizeof(*qopt));
1714e8b86c0SAmritha Nambiar 		if (err < 0)
1724e8b86c0SAmritha Nambiar 			return err;
1734e8b86c0SAmritha Nambiar 
1744e8b86c0SAmritha Nambiar 		if (!qopt->hw)
1754e8b86c0SAmritha Nambiar 			return -EINVAL;
1764e8b86c0SAmritha Nambiar 
1774e8b86c0SAmritha Nambiar 		if (tb[TCA_MQPRIO_MODE]) {
1784e8b86c0SAmritha Nambiar 			priv->flags |= TC_MQPRIO_F_MODE;
1794e8b86c0SAmritha Nambiar 			priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
1804e8b86c0SAmritha Nambiar 		}
1814e8b86c0SAmritha Nambiar 
1824e8b86c0SAmritha Nambiar 		if (tb[TCA_MQPRIO_SHAPER]) {
1834e8b86c0SAmritha Nambiar 			priv->flags |= TC_MQPRIO_F_SHAPER;
1844e8b86c0SAmritha Nambiar 			priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
1854e8b86c0SAmritha Nambiar 		}
1864e8b86c0SAmritha Nambiar 
1874e8b86c0SAmritha Nambiar 		if (tb[TCA_MQPRIO_MIN_RATE64]) {
1884e8b86c0SAmritha Nambiar 			if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
1894e8b86c0SAmritha Nambiar 				return -EINVAL;
1904e8b86c0SAmritha Nambiar 			i = 0;
1914e8b86c0SAmritha Nambiar 			nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
1924e8b86c0SAmritha Nambiar 					    rem) {
1934e8b86c0SAmritha Nambiar 				if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
1944e8b86c0SAmritha Nambiar 					return -EINVAL;
1954e8b86c0SAmritha Nambiar 				if (i >= qopt->num_tc)
1964e8b86c0SAmritha Nambiar 					break;
1974e8b86c0SAmritha Nambiar 				priv->min_rate[i] = *(u64 *)nla_data(attr);
1984e8b86c0SAmritha Nambiar 				i++;
1994e8b86c0SAmritha Nambiar 			}
2004e8b86c0SAmritha Nambiar 			priv->flags |= TC_MQPRIO_F_MIN_RATE;
2014e8b86c0SAmritha Nambiar 		}
2024e8b86c0SAmritha Nambiar 
2034e8b86c0SAmritha Nambiar 		if (tb[TCA_MQPRIO_MAX_RATE64]) {
2044e8b86c0SAmritha Nambiar 			if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
2054e8b86c0SAmritha Nambiar 				return -EINVAL;
2064e8b86c0SAmritha Nambiar 			i = 0;
2074e8b86c0SAmritha Nambiar 			nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
2084e8b86c0SAmritha Nambiar 					    rem) {
2094e8b86c0SAmritha Nambiar 				if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
2104e8b86c0SAmritha Nambiar 					return -EINVAL;
2114e8b86c0SAmritha Nambiar 				if (i >= qopt->num_tc)
2124e8b86c0SAmritha Nambiar 					break;
2134e8b86c0SAmritha Nambiar 				priv->max_rate[i] = *(u64 *)nla_data(attr);
2144e8b86c0SAmritha Nambiar 				i++;
2154e8b86c0SAmritha Nambiar 			}
2164e8b86c0SAmritha Nambiar 			priv->flags |= TC_MQPRIO_F_MAX_RATE;
2174e8b86c0SAmritha Nambiar 		}
2184e8b86c0SAmritha Nambiar 	}
2194e8b86c0SAmritha Nambiar 
220b8970f0bSJohn Fastabend 	/* pre-allocate qdisc, attachment can't fail */
221b8970f0bSJohn Fastabend 	priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
222b8970f0bSJohn Fastabend 			       GFP_KERNEL);
22387b60cfaSEric Dumazet 	if (!priv->qdiscs)
22487b60cfaSEric Dumazet 		return -ENOMEM;
225b8970f0bSJohn Fastabend 
226b8970f0bSJohn Fastabend 	for (i = 0; i < dev->num_tx_queues; i++) {
227b8970f0bSJohn Fastabend 		dev_queue = netdev_get_tx_queue(dev, i);
2281f27cde3SEric Dumazet 		qdisc = qdisc_create_dflt(dev_queue,
2291f27cde3SEric Dumazet 					  get_default_qdisc_ops(dev, i),
230b8970f0bSJohn Fastabend 					  TC_H_MAKE(TC_H_MAJ(sch->handle),
231a38a9882SAlexander Aring 						    TC_H_MIN(i + 1)), extack);
23287b60cfaSEric Dumazet 		if (!qdisc)
23387b60cfaSEric Dumazet 			return -ENOMEM;
23487b60cfaSEric Dumazet 
235b8970f0bSJohn Fastabend 		priv->qdiscs[i] = qdisc;
2364eaf3b84SEric Dumazet 		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
237b8970f0bSJohn Fastabend 	}
238b8970f0bSJohn Fastabend 
239b8970f0bSJohn Fastabend 	/* If the mqprio options indicate that hardware should own
240b8970f0bSJohn Fastabend 	 * the queue mapping then run ndo_setup_tc otherwise use the
241b8970f0bSJohn Fastabend 	 * supplied and verified mapping
242b8970f0bSJohn Fastabend 	 */
243b8970f0bSJohn Fastabend 	if (qopt->hw) {
2444e8b86c0SAmritha Nambiar 		struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
24516e5cc64SJohn Fastabend 
2464e8b86c0SAmritha Nambiar 		switch (priv->mode) {
2474e8b86c0SAmritha Nambiar 		case TC_MQPRIO_MODE_DCB:
2484e8b86c0SAmritha Nambiar 			if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
2494e8b86c0SAmritha Nambiar 				return -EINVAL;
2504e8b86c0SAmritha Nambiar 			break;
2514e8b86c0SAmritha Nambiar 		case TC_MQPRIO_MODE_CHANNEL:
2524e8b86c0SAmritha Nambiar 			mqprio.flags = priv->flags;
2534e8b86c0SAmritha Nambiar 			if (priv->flags & TC_MQPRIO_F_MODE)
2544e8b86c0SAmritha Nambiar 				mqprio.mode = priv->mode;
2554e8b86c0SAmritha Nambiar 			if (priv->flags & TC_MQPRIO_F_SHAPER)
2564e8b86c0SAmritha Nambiar 				mqprio.shaper = priv->shaper;
2574e8b86c0SAmritha Nambiar 			if (priv->flags & TC_MQPRIO_F_MIN_RATE)
2584e8b86c0SAmritha Nambiar 				for (i = 0; i < mqprio.qopt.num_tc; i++)
2594e8b86c0SAmritha Nambiar 					mqprio.min_rate[i] = priv->min_rate[i];
2604e8b86c0SAmritha Nambiar 			if (priv->flags & TC_MQPRIO_F_MAX_RATE)
2614e8b86c0SAmritha Nambiar 				for (i = 0; i < mqprio.qopt.num_tc; i++)
2624e8b86c0SAmritha Nambiar 					mqprio.max_rate[i] = priv->max_rate[i];
2634e8b86c0SAmritha Nambiar 			break;
2644e8b86c0SAmritha Nambiar 		default:
2654e8b86c0SAmritha Nambiar 			return -EINVAL;
2664e8b86c0SAmritha Nambiar 		}
2674e8b86c0SAmritha Nambiar 		err = dev->netdev_ops->ndo_setup_tc(dev,
268575ed7d3SNogah Frankel 						    TC_SETUP_QDISC_MQPRIO,
269de4784caSJiri Pirko 						    &mqprio);
270b8970f0bSJohn Fastabend 		if (err)
27187b60cfaSEric Dumazet 			return err;
2722026fecfSAlexander Duyck 
2734e8b86c0SAmritha Nambiar 		priv->hw_offload = mqprio.qopt.hw;
274b8970f0bSJohn Fastabend 	} else {
275b8970f0bSJohn Fastabend 		netdev_set_num_tc(dev, qopt->num_tc);
276b8970f0bSJohn Fastabend 		for (i = 0; i < qopt->num_tc; i++)
277b8970f0bSJohn Fastabend 			netdev_set_tc_queue(dev, i,
278b8970f0bSJohn Fastabend 					    qopt->count[i], qopt->offset[i]);
279b8970f0bSJohn Fastabend 	}
280b8970f0bSJohn Fastabend 
281b8970f0bSJohn Fastabend 	/* Always use supplied priority mappings */
282b8970f0bSJohn Fastabend 	for (i = 0; i < TC_BITMASK + 1; i++)
283b8970f0bSJohn Fastabend 		netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]);
284b8970f0bSJohn Fastabend 
285b8970f0bSJohn Fastabend 	sch->flags |= TCQ_F_MQROOT;
286b8970f0bSJohn Fastabend 	return 0;
287b8970f0bSJohn Fastabend }
288b8970f0bSJohn Fastabend 
289b8970f0bSJohn Fastabend static void mqprio_attach(struct Qdisc *sch)
290b8970f0bSJohn Fastabend {
291b8970f0bSJohn Fastabend 	struct net_device *dev = qdisc_dev(sch);
292b8970f0bSJohn Fastabend 	struct mqprio_sched *priv = qdisc_priv(sch);
29395dc1929SEric Dumazet 	struct Qdisc *qdisc, *old;
294b8970f0bSJohn Fastabend 	unsigned int ntx;
295b8970f0bSJohn Fastabend 
296b8970f0bSJohn Fastabend 	/* Attach underlying qdisc */
297b8970f0bSJohn Fastabend 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
298b8970f0bSJohn Fastabend 		qdisc = priv->qdiscs[ntx];
29995dc1929SEric Dumazet 		old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
30095dc1929SEric Dumazet 		if (old)
30186bd446bSVlad Buslov 			qdisc_put(old);
30295dc1929SEric Dumazet 		if (ntx < dev->real_num_tx_queues)
30349b49971SJiri Kosina 			qdisc_hash_add(qdisc, false);
304b8970f0bSJohn Fastabend 	}
305b8970f0bSJohn Fastabend 	kfree(priv->qdiscs);
306b8970f0bSJohn Fastabend 	priv->qdiscs = NULL;
307b8970f0bSJohn Fastabend }
308b8970f0bSJohn Fastabend 
309b8970f0bSJohn Fastabend static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
310b8970f0bSJohn Fastabend 					     unsigned long cl)
311b8970f0bSJohn Fastabend {
312b8970f0bSJohn Fastabend 	struct net_device *dev = qdisc_dev(sch);
31332302902SAlexander Duyck 	unsigned long ntx = cl - 1;
314b8970f0bSJohn Fastabend 
315b8970f0bSJohn Fastabend 	if (ntx >= dev->num_tx_queues)
316b8970f0bSJohn Fastabend 		return NULL;
317b8970f0bSJohn Fastabend 	return netdev_get_tx_queue(dev, ntx);
318b8970f0bSJohn Fastabend }
319b8970f0bSJohn Fastabend 
320b8970f0bSJohn Fastabend static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
321653d6fd6SAlexander Aring 			struct Qdisc **old, struct netlink_ext_ack *extack)
322b8970f0bSJohn Fastabend {
323b8970f0bSJohn Fastabend 	struct net_device *dev = qdisc_dev(sch);
324b8970f0bSJohn Fastabend 	struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
325b8970f0bSJohn Fastabend 
326b8970f0bSJohn Fastabend 	if (!dev_queue)
327b8970f0bSJohn Fastabend 		return -EINVAL;
328b8970f0bSJohn Fastabend 
329b8970f0bSJohn Fastabend 	if (dev->flags & IFF_UP)
330b8970f0bSJohn Fastabend 		dev_deactivate(dev);
331b8970f0bSJohn Fastabend 
332b8970f0bSJohn Fastabend 	*old = dev_graft_qdisc(dev_queue, new);
333b8970f0bSJohn Fastabend 
3341abbe139SEric Dumazet 	if (new)
3354eaf3b84SEric Dumazet 		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
3361abbe139SEric Dumazet 
337b8970f0bSJohn Fastabend 	if (dev->flags & IFF_UP)
338b8970f0bSJohn Fastabend 		dev_activate(dev);
339b8970f0bSJohn Fastabend 
340b8970f0bSJohn Fastabend 	return 0;
341b8970f0bSJohn Fastabend }
342b8970f0bSJohn Fastabend 
3434e8b86c0SAmritha Nambiar static int dump_rates(struct mqprio_sched *priv,
3444e8b86c0SAmritha Nambiar 		      struct tc_mqprio_qopt *opt, struct sk_buff *skb)
3454e8b86c0SAmritha Nambiar {
3464e8b86c0SAmritha Nambiar 	struct nlattr *nest;
3474e8b86c0SAmritha Nambiar 	int i;
3484e8b86c0SAmritha Nambiar 
3494e8b86c0SAmritha Nambiar 	if (priv->flags & TC_MQPRIO_F_MIN_RATE) {
350ae0be8deSMichal Kubecek 		nest = nla_nest_start_noflag(skb, TCA_MQPRIO_MIN_RATE64);
3514e8b86c0SAmritha Nambiar 		if (!nest)
3524e8b86c0SAmritha Nambiar 			goto nla_put_failure;
3534e8b86c0SAmritha Nambiar 
3544e8b86c0SAmritha Nambiar 		for (i = 0; i < opt->num_tc; i++) {
3554e8b86c0SAmritha Nambiar 			if (nla_put(skb, TCA_MQPRIO_MIN_RATE64,
3564e8b86c0SAmritha Nambiar 				    sizeof(priv->min_rate[i]),
3574e8b86c0SAmritha Nambiar 				    &priv->min_rate[i]))
3584e8b86c0SAmritha Nambiar 				goto nla_put_failure;
3594e8b86c0SAmritha Nambiar 		}
3604e8b86c0SAmritha Nambiar 		nla_nest_end(skb, nest);
3614e8b86c0SAmritha Nambiar 	}
3624e8b86c0SAmritha Nambiar 
3634e8b86c0SAmritha Nambiar 	if (priv->flags & TC_MQPRIO_F_MAX_RATE) {
364ae0be8deSMichal Kubecek 		nest = nla_nest_start_noflag(skb, TCA_MQPRIO_MAX_RATE64);
3654e8b86c0SAmritha Nambiar 		if (!nest)
3664e8b86c0SAmritha Nambiar 			goto nla_put_failure;
3674e8b86c0SAmritha Nambiar 
3684e8b86c0SAmritha Nambiar 		for (i = 0; i < opt->num_tc; i++) {
3694e8b86c0SAmritha Nambiar 			if (nla_put(skb, TCA_MQPRIO_MAX_RATE64,
3704e8b86c0SAmritha Nambiar 				    sizeof(priv->max_rate[i]),
3714e8b86c0SAmritha Nambiar 				    &priv->max_rate[i]))
3724e8b86c0SAmritha Nambiar 				goto nla_put_failure;
3734e8b86c0SAmritha Nambiar 		}
3744e8b86c0SAmritha Nambiar 		nla_nest_end(skb, nest);
3754e8b86c0SAmritha Nambiar 	}
3764e8b86c0SAmritha Nambiar 	return 0;
3774e8b86c0SAmritha Nambiar 
3784e8b86c0SAmritha Nambiar nla_put_failure:
3794e8b86c0SAmritha Nambiar 	nla_nest_cancel(skb, nest);
3804e8b86c0SAmritha Nambiar 	return -1;
3814e8b86c0SAmritha Nambiar }
3824e8b86c0SAmritha Nambiar 
383b8970f0bSJohn Fastabend static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
384b8970f0bSJohn Fastabend {
385b8970f0bSJohn Fastabend 	struct net_device *dev = qdisc_dev(sch);
386b8970f0bSJohn Fastabend 	struct mqprio_sched *priv = qdisc_priv(sch);
3874e8b86c0SAmritha Nambiar 	struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
388144ce879SEric Dumazet 	struct tc_mqprio_qopt opt = { 0 };
389b8970f0bSJohn Fastabend 	struct Qdisc *qdisc;
390ce679e8dSJohn Fastabend 	unsigned int ntx, tc;
391b8970f0bSJohn Fastabend 
392b8970f0bSJohn Fastabend 	sch->q.qlen = 0;
39350dc9a85SAhmed S. Darwish 	gnet_stats_basic_sync_init(&sch->bstats);
394b8970f0bSJohn Fastabend 	memset(&sch->qstats, 0, sizeof(sch->qstats));
395b8970f0bSJohn Fastabend 
396ce679e8dSJohn Fastabend 	/* MQ supports lockless qdiscs. However, statistics accounting needs
397ce679e8dSJohn Fastabend 	 * to account for all, none, or a mix of locked and unlocked child
398ce679e8dSJohn Fastabend 	 * qdiscs. Percpu stats are added to counters in-band and locking
399ce679e8dSJohn Fastabend 	 * qdisc totals are added at end.
400ce679e8dSJohn Fastabend 	 */
401ce679e8dSJohn Fastabend 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
402ce679e8dSJohn Fastabend 		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
403b8970f0bSJohn Fastabend 		spin_lock_bh(qdisc_lock(qdisc));
404ce679e8dSJohn Fastabend 
405*29cbcd85SAhmed S. Darwish 		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
406*29cbcd85SAhmed S. Darwish 				     &qdisc->bstats, false);
4077361df46SSebastian Andrzej Siewior 		gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
4087361df46SSebastian Andrzej Siewior 				     &qdisc->qstats);
4097361df46SSebastian Andrzej Siewior 		sch->q.qlen += qdisc_qlen(qdisc);
410ce679e8dSJohn Fastabend 
411b8970f0bSJohn Fastabend 		spin_unlock_bh(qdisc_lock(qdisc));
412b8970f0bSJohn Fastabend 	}
413b8970f0bSJohn Fastabend 
414b8970f0bSJohn Fastabend 	opt.num_tc = netdev_get_num_tc(dev);
415b8970f0bSJohn Fastabend 	memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
4162026fecfSAlexander Duyck 	opt.hw = priv->hw_offload;
417b8970f0bSJohn Fastabend 
418ce679e8dSJohn Fastabend 	for (tc = 0; tc < netdev_get_num_tc(dev); tc++) {
419ce679e8dSJohn Fastabend 		opt.count[tc] = dev->tc_to_txq[tc].count;
420ce679e8dSJohn Fastabend 		opt.offset[tc] = dev->tc_to_txq[tc].offset;
421b8970f0bSJohn Fastabend 	}
422b8970f0bSJohn Fastabend 
4239f104c77SVladyslav Tarasiuk 	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
4241b34ec43SDavid S. Miller 		goto nla_put_failure;
425b8970f0bSJohn Fastabend 
4264e8b86c0SAmritha Nambiar 	if ((priv->flags & TC_MQPRIO_F_MODE) &&
4274e8b86c0SAmritha Nambiar 	    nla_put_u16(skb, TCA_MQPRIO_MODE, priv->mode))
4284e8b86c0SAmritha Nambiar 		goto nla_put_failure;
4294e8b86c0SAmritha Nambiar 
4304e8b86c0SAmritha Nambiar 	if ((priv->flags & TC_MQPRIO_F_SHAPER) &&
4314e8b86c0SAmritha Nambiar 	    nla_put_u16(skb, TCA_MQPRIO_SHAPER, priv->shaper))
4324e8b86c0SAmritha Nambiar 		goto nla_put_failure;
4334e8b86c0SAmritha Nambiar 
4344e8b86c0SAmritha Nambiar 	if ((priv->flags & TC_MQPRIO_F_MIN_RATE ||
4354e8b86c0SAmritha Nambiar 	     priv->flags & TC_MQPRIO_F_MAX_RATE) &&
4364e8b86c0SAmritha Nambiar 	    (dump_rates(priv, &opt, skb) != 0))
4374e8b86c0SAmritha Nambiar 		goto nla_put_failure;
4384e8b86c0SAmritha Nambiar 
4394e8b86c0SAmritha Nambiar 	return nla_nest_end(skb, nla);
440b8970f0bSJohn Fastabend nla_put_failure:
4414e8b86c0SAmritha Nambiar 	nlmsg_trim(skb, nla);
442b8970f0bSJohn Fastabend 	return -1;
443b8970f0bSJohn Fastabend }
444b8970f0bSJohn Fastabend 
445b8970f0bSJohn Fastabend static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
446b8970f0bSJohn Fastabend {
447b8970f0bSJohn Fastabend 	struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
448b8970f0bSJohn Fastabend 
449b8970f0bSJohn Fastabend 	if (!dev_queue)
450b8970f0bSJohn Fastabend 		return NULL;
451b8970f0bSJohn Fastabend 
452b8970f0bSJohn Fastabend 	return dev_queue->qdisc_sleeping;
453b8970f0bSJohn Fastabend }
454b8970f0bSJohn Fastabend 
455143976ceSWANG Cong static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
456b8970f0bSJohn Fastabend {
457b8970f0bSJohn Fastabend 	struct net_device *dev = qdisc_dev(sch);
458b8970f0bSJohn Fastabend 	unsigned int ntx = TC_H_MIN(classid);
459b8970f0bSJohn Fastabend 
46032302902SAlexander Duyck 	/* There are essentially two regions here that have valid classid
46132302902SAlexander Duyck 	 * values. The first region will have a classid value of 1 through
46232302902SAlexander Duyck 	 * num_tx_queues. All of these are backed by actual Qdiscs.
46332302902SAlexander Duyck 	 */
46432302902SAlexander Duyck 	if (ntx < TC_H_MIN_PRIORITY)
46532302902SAlexander Duyck 		return (ntx <= dev->num_tx_queues) ? ntx : 0;
46632302902SAlexander Duyck 
46732302902SAlexander Duyck 	/* The second region represents the hardware traffic classes. These
46832302902SAlexander Duyck 	 * are represented by classid values of TC_H_MIN_PRIORITY through
46932302902SAlexander Duyck 	 * TC_H_MIN_PRIORITY + netdev_get_num_tc - 1
47032302902SAlexander Duyck 	 */
47132302902SAlexander Duyck 	return ((ntx - TC_H_MIN_PRIORITY) < netdev_get_num_tc(dev)) ? ntx : 0;
472b8970f0bSJohn Fastabend }
473b8970f0bSJohn Fastabend 
474b8970f0bSJohn Fastabend static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
475b8970f0bSJohn Fastabend 			 struct sk_buff *skb, struct tcmsg *tcm)
476b8970f0bSJohn Fastabend {
47732302902SAlexander Duyck 	if (cl < TC_H_MIN_PRIORITY) {
47832302902SAlexander Duyck 		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
479b8970f0bSJohn Fastabend 		struct net_device *dev = qdisc_dev(sch);
48032302902SAlexander Duyck 		int tc = netdev_txq_to_tc(dev, cl - 1);
481b8970f0bSJohn Fastabend 
48232302902SAlexander Duyck 		tcm->tcm_parent = (tc < 0) ? 0 :
48332302902SAlexander Duyck 			TC_H_MAKE(TC_H_MAJ(sch->handle),
48432302902SAlexander Duyck 				  TC_H_MIN(tc + TC_H_MIN_PRIORITY));
48532302902SAlexander Duyck 		tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
48632302902SAlexander Duyck 	} else {
487b8970f0bSJohn Fastabend 		tcm->tcm_parent = TC_H_ROOT;
488b8970f0bSJohn Fastabend 		tcm->tcm_info = 0;
489b8970f0bSJohn Fastabend 	}
490b8970f0bSJohn Fastabend 	tcm->tcm_handle |= TC_H_MIN(cl);
491b8970f0bSJohn Fastabend 	return 0;
492b8970f0bSJohn Fastabend }
493b8970f0bSJohn Fastabend 
494b8970f0bSJohn Fastabend static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
495b8970f0bSJohn Fastabend 				   struct gnet_dump *d)
496ea18fd95Sstephen hemminger 	__releases(d->lock)
497ea18fd95Sstephen hemminger 	__acquires(d->lock)
498b8970f0bSJohn Fastabend {
49932302902SAlexander Duyck 	if (cl >= TC_H_MIN_PRIORITY) {
500b8970f0bSJohn Fastabend 		int i;
5017361df46SSebastian Andrzej Siewior 		__u32 qlen;
502b8970f0bSJohn Fastabend 		struct gnet_stats_queue qstats = {0};
50350dc9a85SAhmed S. Darwish 		struct gnet_stats_basic_sync bstats;
50432302902SAlexander Duyck 		struct net_device *dev = qdisc_dev(sch);
50532302902SAlexander Duyck 		struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
506b8970f0bSJohn Fastabend 
50750dc9a85SAhmed S. Darwish 		gnet_stats_basic_sync_init(&bstats);
508b8970f0bSJohn Fastabend 		/* Drop lock here it will be reclaimed before touching
509b8970f0bSJohn Fastabend 		 * statistics this is required because the d->lock we
510b8970f0bSJohn Fastabend 		 * hold here is the look on dev_queue->qdisc_sleeping
511b8970f0bSJohn Fastabend 		 * also acquired below.
512b8970f0bSJohn Fastabend 		 */
513edb09eb1SEric Dumazet 		if (d->lock)
514b8970f0bSJohn Fastabend 			spin_unlock_bh(d->lock);
515b8970f0bSJohn Fastabend 
516b8970f0bSJohn Fastabend 		for (i = tc.offset; i < tc.offset + tc.count; i++) {
51746e5da40SJohn Fastabend 			struct netdev_queue *q = netdev_get_tx_queue(dev, i);
518ce679e8dSJohn Fastabend 			struct Qdisc *qdisc = rtnl_dereference(q->qdisc);
51946e5da40SJohn Fastabend 
520b8970f0bSJohn Fastabend 			spin_lock_bh(qdisc_lock(qdisc));
521ce679e8dSJohn Fastabend 
522*29cbcd85SAhmed S. Darwish 			gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
523*29cbcd85SAhmed S. Darwish 					     &qdisc->bstats, false);
5247361df46SSebastian Andrzej Siewior 			gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
5257361df46SSebastian Andrzej Siewior 					     &qdisc->qstats);
5267361df46SSebastian Andrzej Siewior 			sch->q.qlen += qdisc_qlen(qdisc);
5277361df46SSebastian Andrzej Siewior 
528b8970f0bSJohn Fastabend 			spin_unlock_bh(qdisc_lock(qdisc));
529b8970f0bSJohn Fastabend 		}
5307361df46SSebastian Andrzej Siewior 		qlen = qdisc_qlen(sch) + qstats.qlen;
531ce679e8dSJohn Fastabend 
532b8970f0bSJohn Fastabend 		/* Reclaim root sleeping lock before completing stats */
533edb09eb1SEric Dumazet 		if (d->lock)
534b8970f0bSJohn Fastabend 			spin_lock_bh(d->lock);
535*29cbcd85SAhmed S. Darwish 		if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 ||
536b0ab6f92SJohn Fastabend 		    gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
537b8970f0bSJohn Fastabend 			return -1;
538b8970f0bSJohn Fastabend 	} else {
539b8970f0bSJohn Fastabend 		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
540b8970f0bSJohn Fastabend 
541b8970f0bSJohn Fastabend 		sch = dev_queue->qdisc_sleeping;
542*29cbcd85SAhmed S. Darwish 		if (gnet_stats_copy_basic(d, sch->cpu_bstats,
543*29cbcd85SAhmed S. Darwish 					  &sch->bstats, true) < 0 ||
5445dd431b6SPaolo Abeni 		    qdisc_qstats_copy(d, sch) < 0)
545b8970f0bSJohn Fastabend 			return -1;
546b8970f0bSJohn Fastabend 	}
547b8970f0bSJohn Fastabend 	return 0;
548b8970f0bSJohn Fastabend }
549b8970f0bSJohn Fastabend 
550b8970f0bSJohn Fastabend static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
551b8970f0bSJohn Fastabend {
552b8970f0bSJohn Fastabend 	struct net_device *dev = qdisc_dev(sch);
553b8970f0bSJohn Fastabend 	unsigned long ntx;
554b8970f0bSJohn Fastabend 
555b8970f0bSJohn Fastabend 	if (arg->stop)
556b8970f0bSJohn Fastabend 		return;
557b8970f0bSJohn Fastabend 
558b8970f0bSJohn Fastabend 	/* Walk hierarchy with a virtual class per tc */
559b8970f0bSJohn Fastabend 	arg->count = arg->skip;
56032302902SAlexander Duyck 	for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) {
56132302902SAlexander Duyck 		if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) {
56232302902SAlexander Duyck 			arg->stop = 1;
56332302902SAlexander Duyck 			return;
56432302902SAlexander Duyck 		}
56532302902SAlexander Duyck 		arg->count++;
56632302902SAlexander Duyck 	}
56732302902SAlexander Duyck 
56832302902SAlexander Duyck 	/* Pad the values and skip over unused traffic classes */
56932302902SAlexander Duyck 	if (ntx < TC_MAX_QUEUE) {
57032302902SAlexander Duyck 		arg->count = TC_MAX_QUEUE;
57132302902SAlexander Duyck 		ntx = TC_MAX_QUEUE;
57232302902SAlexander Duyck 	}
57332302902SAlexander Duyck 
57432302902SAlexander Duyck 	/* Reset offset, sort out remaining per-queue qdiscs */
57532302902SAlexander Duyck 	for (ntx -= TC_MAX_QUEUE; ntx < dev->num_tx_queues; ntx++) {
576b8970f0bSJohn Fastabend 		if (arg->fn(sch, ntx + 1, arg) < 0) {
577b8970f0bSJohn Fastabend 			arg->stop = 1;
57832302902SAlexander Duyck 			return;
579b8970f0bSJohn Fastabend 		}
580b8970f0bSJohn Fastabend 		arg->count++;
581b8970f0bSJohn Fastabend 	}
582b8970f0bSJohn Fastabend }
583b8970f0bSJohn Fastabend 
5840f7787b4SJesus Sanchez-Palencia static struct netdev_queue *mqprio_select_queue(struct Qdisc *sch,
5850f7787b4SJesus Sanchez-Palencia 						struct tcmsg *tcm)
5860f7787b4SJesus Sanchez-Palencia {
5870f7787b4SJesus Sanchez-Palencia 	return mqprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
5880f7787b4SJesus Sanchez-Palencia }
5890f7787b4SJesus Sanchez-Palencia 
590b8970f0bSJohn Fastabend static const struct Qdisc_class_ops mqprio_class_ops = {
591b8970f0bSJohn Fastabend 	.graft		= mqprio_graft,
592b8970f0bSJohn Fastabend 	.leaf		= mqprio_leaf,
593143976ceSWANG Cong 	.find		= mqprio_find,
594b8970f0bSJohn Fastabend 	.walk		= mqprio_walk,
595b8970f0bSJohn Fastabend 	.dump		= mqprio_dump_class,
596b8970f0bSJohn Fastabend 	.dump_stats	= mqprio_dump_class_stats,
5970f7787b4SJesus Sanchez-Palencia 	.select_queue	= mqprio_select_queue,
598b8970f0bSJohn Fastabend };
599b8970f0bSJohn Fastabend 
600ea18fd95Sstephen hemminger static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
601b8970f0bSJohn Fastabend 	.cl_ops		= &mqprio_class_ops,
602b8970f0bSJohn Fastabend 	.id		= "mqprio",
603b8970f0bSJohn Fastabend 	.priv_size	= sizeof(struct mqprio_sched),
604b8970f0bSJohn Fastabend 	.init		= mqprio_init,
605b8970f0bSJohn Fastabend 	.destroy	= mqprio_destroy,
606b8970f0bSJohn Fastabend 	.attach		= mqprio_attach,
607f7116fb4SJakub Kicinski 	.change_real_num_tx = mq_change_real_num_tx,
608b8970f0bSJohn Fastabend 	.dump		= mqprio_dump,
609b8970f0bSJohn Fastabend 	.owner		= THIS_MODULE,
610b8970f0bSJohn Fastabend };
611b8970f0bSJohn Fastabend 
612b8970f0bSJohn Fastabend static int __init mqprio_module_init(void)
613b8970f0bSJohn Fastabend {
614b8970f0bSJohn Fastabend 	return register_qdisc(&mqprio_qdisc_ops);
615b8970f0bSJohn Fastabend }
616b8970f0bSJohn Fastabend 
617b8970f0bSJohn Fastabend static void __exit mqprio_module_exit(void)
618b8970f0bSJohn Fastabend {
619b8970f0bSJohn Fastabend 	unregister_qdisc(&mqprio_qdisc_ops);
620b8970f0bSJohn Fastabend }
621b8970f0bSJohn Fastabend 
622b8970f0bSJohn Fastabend module_init(mqprio_module_init);
623b8970f0bSJohn Fastabend module_exit(mqprio_module_exit);
624b8970f0bSJohn Fastabend 
625b8970f0bSJohn Fastabend MODULE_LICENSE("GPL");
626