xref: /openbmc/linux/net/sched/sch_ets.c (revision 4baf4a2919b2a13a7f67d63f34b03f823ea7c0e8)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * net/sched/sch_ets.c         Enhanced Transmission Selection scheduler
4  *
5  * Description
6  * -----------
7  *
8  * The Enhanced Transmission Selection scheduler is a classful queuing
9  * discipline that merges functionality of PRIO and DRR qdiscs in one scheduler.
10  * ETS makes it easy to configure a set of strict and bandwidth-sharing bands to
11  * implement the transmission selection described in 802.1Qaz.
12  *
13  * Although ETS is technically classful, it's not possible to add and remove
14  * classes at will. Instead one specifies number of classes, how many are
15  * PRIO-like and how many DRR-like, and quanta for the latter.
16  *
17  * Algorithm
18  * ---------
19  *
20  * The strict classes, if any, are tried for traffic first: first band 0, if it
21  * has no traffic then band 1, etc.
22  *
23  * When there is no traffic in any of the strict queues, the bandwidth-sharing
24  * ones are tried next. Each band is assigned a deficit counter, initialized to
25  * "quantum" of that band. ETS maintains a list of active bandwidth-sharing
26  * bands whose qdiscs are non-empty. A packet is dequeued from the band at the
27  * head of the list if the packet size is smaller or equal to the deficit
28  * counter. If the counter is too small, it is increased by "quantum" and the
29  * scheduler moves on to the next band in the active list.
30  */
31 
32 #include <linux/module.h>
33 #include <net/gen_stats.h>
34 #include <net/netlink.h>
35 #include <net/pkt_cls.h>
36 #include <net/pkt_sched.h>
37 #include <net/sch_generic.h>
38 
39 struct ets_class {
40 	struct list_head alist; /* In struct ets_sched.active. */
41 	struct Qdisc *qdisc;
42 	u32 quantum;
43 	u32 deficit;
44 	struct gnet_stats_basic_sync bstats;
45 	struct gnet_stats_queue qstats;
46 };
47 
48 struct ets_sched {
49 	struct list_head active;
50 	struct tcf_proto __rcu *filter_list;
51 	struct tcf_block *block;
52 	unsigned int nbands;
53 	unsigned int nstrict;
54 	u8 prio2band[TC_PRIO_MAX + 1];
55 	struct ets_class classes[TCQ_ETS_MAX_BANDS];
56 };
57 
58 static const struct nla_policy ets_policy[TCA_ETS_MAX + 1] = {
59 	[TCA_ETS_NBANDS] = { .type = NLA_U8 },
60 	[TCA_ETS_NSTRICT] = { .type = NLA_U8 },
61 	[TCA_ETS_QUANTA] = { .type = NLA_NESTED },
62 	[TCA_ETS_PRIOMAP] = { .type = NLA_NESTED },
63 };
64 
65 static const struct nla_policy ets_priomap_policy[TCA_ETS_MAX + 1] = {
66 	[TCA_ETS_PRIOMAP_BAND] = { .type = NLA_U8 },
67 };
68 
69 static const struct nla_policy ets_quanta_policy[TCA_ETS_MAX + 1] = {
70 	[TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
71 };
72 
73 static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = {
74 	[TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
75 };
76 
ets_quantum_parse(struct Qdisc * sch,const struct nlattr * attr,unsigned int * quantum,struct netlink_ext_ack * extack)77 static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr,
78 			     unsigned int *quantum,
79 			     struct netlink_ext_ack *extack)
80 {
81 	*quantum = nla_get_u32(attr);
82 	if (!*quantum) {
83 		NL_SET_ERR_MSG(extack, "ETS quantum cannot be zero");
84 		return -EINVAL;
85 	}
86 	return 0;
87 }
88 
89 static struct ets_class *
ets_class_from_arg(struct Qdisc * sch,unsigned long arg)90 ets_class_from_arg(struct Qdisc *sch, unsigned long arg)
91 {
92 	struct ets_sched *q = qdisc_priv(sch);
93 
94 	if (arg == 0 || arg > q->nbands)
95 		return NULL;
96 	return &q->classes[arg - 1];
97 }
98 
ets_class_id(struct Qdisc * sch,const struct ets_class * cl)99 static u32 ets_class_id(struct Qdisc *sch, const struct ets_class *cl)
100 {
101 	struct ets_sched *q = qdisc_priv(sch);
102 	int band = cl - q->classes;
103 
104 	return TC_H_MAKE(sch->handle, band + 1);
105 }
106 
ets_offload_change(struct Qdisc * sch)107 static void ets_offload_change(struct Qdisc *sch)
108 {
109 	struct net_device *dev = qdisc_dev(sch);
110 	struct ets_sched *q = qdisc_priv(sch);
111 	struct tc_ets_qopt_offload qopt;
112 	unsigned int w_psum_prev = 0;
113 	unsigned int q_psum = 0;
114 	unsigned int q_sum = 0;
115 	unsigned int quantum;
116 	unsigned int w_psum;
117 	unsigned int weight;
118 	unsigned int i;
119 
120 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
121 		return;
122 
123 	qopt.command = TC_ETS_REPLACE;
124 	qopt.handle = sch->handle;
125 	qopt.parent = sch->parent;
126 	qopt.replace_params.bands = q->nbands;
127 	qopt.replace_params.qstats = &sch->qstats;
128 	memcpy(&qopt.replace_params.priomap,
129 	       q->prio2band, sizeof(q->prio2band));
130 
131 	for (i = 0; i < q->nbands; i++)
132 		q_sum += q->classes[i].quantum;
133 
134 	for (i = 0; i < q->nbands; i++) {
135 		quantum = q->classes[i].quantum;
136 		q_psum += quantum;
137 		w_psum = quantum ? q_psum * 100 / q_sum : 0;
138 		weight = w_psum - w_psum_prev;
139 		w_psum_prev = w_psum;
140 
141 		qopt.replace_params.quanta[i] = quantum;
142 		qopt.replace_params.weights[i] = weight;
143 	}
144 
145 	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
146 }
147 
ets_offload_destroy(struct Qdisc * sch)148 static void ets_offload_destroy(struct Qdisc *sch)
149 {
150 	struct net_device *dev = qdisc_dev(sch);
151 	struct tc_ets_qopt_offload qopt;
152 
153 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
154 		return;
155 
156 	qopt.command = TC_ETS_DESTROY;
157 	qopt.handle = sch->handle;
158 	qopt.parent = sch->parent;
159 	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
160 }
161 
ets_offload_graft(struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,unsigned long arg,struct netlink_ext_ack * extack)162 static void ets_offload_graft(struct Qdisc *sch, struct Qdisc *new,
163 			      struct Qdisc *old, unsigned long arg,
164 			      struct netlink_ext_ack *extack)
165 {
166 	struct net_device *dev = qdisc_dev(sch);
167 	struct tc_ets_qopt_offload qopt;
168 
169 	qopt.command = TC_ETS_GRAFT;
170 	qopt.handle = sch->handle;
171 	qopt.parent = sch->parent;
172 	qopt.graft_params.band = arg - 1;
173 	qopt.graft_params.child_handle = new->handle;
174 
175 	qdisc_offload_graft_helper(dev, sch, new, old, TC_SETUP_QDISC_ETS,
176 				   &qopt, extack);
177 }
178 
ets_offload_dump(struct Qdisc * sch)179 static int ets_offload_dump(struct Qdisc *sch)
180 {
181 	struct tc_ets_qopt_offload qopt;
182 
183 	qopt.command = TC_ETS_STATS;
184 	qopt.handle = sch->handle;
185 	qopt.parent = sch->parent;
186 	qopt.stats.bstats = &sch->bstats;
187 	qopt.stats.qstats = &sch->qstats;
188 
189 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_ETS, &qopt);
190 }
191 
ets_class_is_strict(struct ets_sched * q,const struct ets_class * cl)192 static bool ets_class_is_strict(struct ets_sched *q, const struct ets_class *cl)
193 {
194 	unsigned int band = cl - q->classes;
195 
196 	return band < q->nstrict;
197 }
198 
ets_class_change(struct Qdisc * sch,u32 classid,u32 parentid,struct nlattr ** tca,unsigned long * arg,struct netlink_ext_ack * extack)199 static int ets_class_change(struct Qdisc *sch, u32 classid, u32 parentid,
200 			    struct nlattr **tca, unsigned long *arg,
201 			    struct netlink_ext_ack *extack)
202 {
203 	struct ets_class *cl = ets_class_from_arg(sch, *arg);
204 	struct ets_sched *q = qdisc_priv(sch);
205 	struct nlattr *opt = tca[TCA_OPTIONS];
206 	struct nlattr *tb[TCA_ETS_MAX + 1];
207 	unsigned int quantum;
208 	int err;
209 
210 	/* Classes can be added and removed only through Qdisc_ops.change
211 	 * interface.
212 	 */
213 	if (!cl) {
214 		NL_SET_ERR_MSG(extack, "Fine-grained class addition and removal is not supported");
215 		return -EOPNOTSUPP;
216 	}
217 
218 	if (!opt) {
219 		NL_SET_ERR_MSG(extack, "ETS options are required for this operation");
220 		return -EINVAL;
221 	}
222 
223 	err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_class_policy, extack);
224 	if (err < 0)
225 		return err;
226 
227 	if (!tb[TCA_ETS_QUANTA_BAND])
228 		/* Nothing to configure. */
229 		return 0;
230 
231 	if (ets_class_is_strict(q, cl)) {
232 		NL_SET_ERR_MSG(extack, "Strict bands do not have a configurable quantum");
233 		return -EINVAL;
234 	}
235 
236 	err = ets_quantum_parse(sch, tb[TCA_ETS_QUANTA_BAND], &quantum,
237 				extack);
238 	if (err)
239 		return err;
240 
241 	sch_tree_lock(sch);
242 	cl->quantum = quantum;
243 	sch_tree_unlock(sch);
244 
245 	ets_offload_change(sch);
246 	return 0;
247 }
248 
ets_class_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)249 static int ets_class_graft(struct Qdisc *sch, unsigned long arg,
250 			   struct Qdisc *new, struct Qdisc **old,
251 			   struct netlink_ext_ack *extack)
252 {
253 	struct ets_class *cl = ets_class_from_arg(sch, arg);
254 
255 	if (!new) {
256 		new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
257 					ets_class_id(sch, cl), NULL);
258 		if (!new)
259 			new = &noop_qdisc;
260 		else
261 			qdisc_hash_add(new, true);
262 	}
263 
264 	*old = qdisc_replace(sch, new, &cl->qdisc);
265 	ets_offload_graft(sch, new, *old, arg, extack);
266 	return 0;
267 }
268 
ets_class_leaf(struct Qdisc * sch,unsigned long arg)269 static struct Qdisc *ets_class_leaf(struct Qdisc *sch, unsigned long arg)
270 {
271 	struct ets_class *cl = ets_class_from_arg(sch, arg);
272 
273 	return cl->qdisc;
274 }
275 
ets_class_find(struct Qdisc * sch,u32 classid)276 static unsigned long ets_class_find(struct Qdisc *sch, u32 classid)
277 {
278 	unsigned long band = TC_H_MIN(classid);
279 	struct ets_sched *q = qdisc_priv(sch);
280 
281 	if (band - 1 >= q->nbands)
282 		return 0;
283 	return band;
284 }
285 
ets_class_qlen_notify(struct Qdisc * sch,unsigned long arg)286 static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg)
287 {
288 	struct ets_class *cl = ets_class_from_arg(sch, arg);
289 	struct ets_sched *q = qdisc_priv(sch);
290 
291 	/* We get notified about zero-length child Qdiscs as well if they are
292 	 * offloaded. Those aren't on the active list though, so don't attempt
293 	 * to remove them.
294 	 */
295 	if (!ets_class_is_strict(q, cl) && sch->q.qlen)
296 		list_del(&cl->alist);
297 }
298 
ets_class_dump(struct Qdisc * sch,unsigned long arg,struct sk_buff * skb,struct tcmsg * tcm)299 static int ets_class_dump(struct Qdisc *sch, unsigned long arg,
300 			  struct sk_buff *skb, struct tcmsg *tcm)
301 {
302 	struct ets_class *cl = ets_class_from_arg(sch, arg);
303 	struct ets_sched *q = qdisc_priv(sch);
304 	struct nlattr *nest;
305 
306 	tcm->tcm_parent = TC_H_ROOT;
307 	tcm->tcm_handle = ets_class_id(sch, cl);
308 	tcm->tcm_info = cl->qdisc->handle;
309 
310 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
311 	if (!nest)
312 		goto nla_put_failure;
313 	if (!ets_class_is_strict(q, cl)) {
314 		if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND, cl->quantum))
315 			goto nla_put_failure;
316 	}
317 	return nla_nest_end(skb, nest);
318 
319 nla_put_failure:
320 	nla_nest_cancel(skb, nest);
321 	return -EMSGSIZE;
322 }
323 
ets_class_dump_stats(struct Qdisc * sch,unsigned long arg,struct gnet_dump * d)324 static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg,
325 				struct gnet_dump *d)
326 {
327 	struct ets_class *cl = ets_class_from_arg(sch, arg);
328 	struct Qdisc *cl_q = cl->qdisc;
329 
330 	if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 ||
331 	    qdisc_qstats_copy(d, cl_q) < 0)
332 		return -1;
333 
334 	return 0;
335 }
336 
ets_qdisc_walk(struct Qdisc * sch,struct qdisc_walker * arg)337 static void ets_qdisc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
338 {
339 	struct ets_sched *q = qdisc_priv(sch);
340 	int i;
341 
342 	if (arg->stop)
343 		return;
344 
345 	for (i = 0; i < q->nbands; i++) {
346 		if (!tc_qdisc_stats_dump(sch, i + 1, arg))
347 			break;
348 	}
349 }
350 
351 static struct tcf_block *
ets_qdisc_tcf_block(struct Qdisc * sch,unsigned long cl,struct netlink_ext_ack * extack)352 ets_qdisc_tcf_block(struct Qdisc *sch, unsigned long cl,
353 		    struct netlink_ext_ack *extack)
354 {
355 	struct ets_sched *q = qdisc_priv(sch);
356 
357 	if (cl) {
358 		NL_SET_ERR_MSG(extack, "ETS classid must be zero");
359 		return NULL;
360 	}
361 
362 	return q->block;
363 }
364 
ets_qdisc_bind_tcf(struct Qdisc * sch,unsigned long parent,u32 classid)365 static unsigned long ets_qdisc_bind_tcf(struct Qdisc *sch, unsigned long parent,
366 					u32 classid)
367 {
368 	return ets_class_find(sch, classid);
369 }
370 
ets_qdisc_unbind_tcf(struct Qdisc * sch,unsigned long arg)371 static void ets_qdisc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
372 {
373 }
374 
ets_classify(struct sk_buff * skb,struct Qdisc * sch,int * qerr)375 static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
376 				      int *qerr)
377 {
378 	struct ets_sched *q = qdisc_priv(sch);
379 	u32 band = skb->priority;
380 	struct tcf_result res;
381 	struct tcf_proto *fl;
382 	int err;
383 
384 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
385 	if (TC_H_MAJ(skb->priority) != sch->handle) {
386 		fl = rcu_dereference_bh(q->filter_list);
387 		err = tcf_classify(skb, NULL, fl, &res, false);
388 #ifdef CONFIG_NET_CLS_ACT
389 		switch (err) {
390 		case TC_ACT_STOLEN:
391 		case TC_ACT_QUEUED:
392 		case TC_ACT_TRAP:
393 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
394 			fallthrough;
395 		case TC_ACT_SHOT:
396 			return NULL;
397 		}
398 #endif
399 		if (!fl || err < 0) {
400 			if (TC_H_MAJ(band))
401 				band = 0;
402 			return &q->classes[q->prio2band[band & TC_PRIO_MAX]];
403 		}
404 		band = res.classid;
405 	}
406 	band = TC_H_MIN(band) - 1;
407 	if (band >= q->nbands)
408 		return &q->classes[q->prio2band[0]];
409 	return &q->classes[band];
410 }
411 
ets_qdisc_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)412 static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
413 			     struct sk_buff **to_free)
414 {
415 	unsigned int len = qdisc_pkt_len(skb);
416 	struct ets_sched *q = qdisc_priv(sch);
417 	struct ets_class *cl;
418 	int err = 0;
419 	bool first;
420 
421 	cl = ets_classify(skb, sch, &err);
422 	if (!cl) {
423 		if (err & __NET_XMIT_BYPASS)
424 			qdisc_qstats_drop(sch);
425 		__qdisc_drop(skb, to_free);
426 		return err;
427 	}
428 
429 	first = !cl->qdisc->q.qlen;
430 	err = qdisc_enqueue(skb, cl->qdisc, to_free);
431 	if (unlikely(err != NET_XMIT_SUCCESS)) {
432 		if (net_xmit_drop_count(err)) {
433 			cl->qstats.drops++;
434 			qdisc_qstats_drop(sch);
435 		}
436 		return err;
437 	}
438 
439 	if (first && !ets_class_is_strict(q, cl)) {
440 		list_add_tail(&cl->alist, &q->active);
441 		cl->deficit = cl->quantum;
442 	}
443 
444 	sch->qstats.backlog += len;
445 	sch->q.qlen++;
446 	return err;
447 }
448 
449 static struct sk_buff *
ets_qdisc_dequeue_skb(struct Qdisc * sch,struct sk_buff * skb)450 ets_qdisc_dequeue_skb(struct Qdisc *sch, struct sk_buff *skb)
451 {
452 	qdisc_bstats_update(sch, skb);
453 	qdisc_qstats_backlog_dec(sch, skb);
454 	sch->q.qlen--;
455 	return skb;
456 }
457 
ets_qdisc_dequeue(struct Qdisc * sch)458 static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)
459 {
460 	struct ets_sched *q = qdisc_priv(sch);
461 	struct ets_class *cl;
462 	struct sk_buff *skb;
463 	unsigned int band;
464 	unsigned int len;
465 
466 	while (1) {
467 		for (band = 0; band < q->nstrict; band++) {
468 			cl = &q->classes[band];
469 			skb = qdisc_dequeue_peeked(cl->qdisc);
470 			if (skb)
471 				return ets_qdisc_dequeue_skb(sch, skb);
472 		}
473 
474 		if (list_empty(&q->active))
475 			goto out;
476 
477 		cl = list_first_entry(&q->active, struct ets_class, alist);
478 		skb = cl->qdisc->ops->peek(cl->qdisc);
479 		if (!skb) {
480 			qdisc_warn_nonwc(__func__, cl->qdisc);
481 			goto out;
482 		}
483 
484 		len = qdisc_pkt_len(skb);
485 		if (len <= cl->deficit) {
486 			cl->deficit -= len;
487 			skb = qdisc_dequeue_peeked(cl->qdisc);
488 			if (unlikely(!skb))
489 				goto out;
490 			if (cl->qdisc->q.qlen == 0)
491 				list_del(&cl->alist);
492 			return ets_qdisc_dequeue_skb(sch, skb);
493 		}
494 
495 		cl->deficit += cl->quantum;
496 		list_move_tail(&cl->alist, &q->active);
497 	}
498 out:
499 	return NULL;
500 }
501 
ets_qdisc_priomap_parse(struct nlattr * priomap_attr,unsigned int nbands,u8 * priomap,struct netlink_ext_ack * extack)502 static int ets_qdisc_priomap_parse(struct nlattr *priomap_attr,
503 				   unsigned int nbands, u8 *priomap,
504 				   struct netlink_ext_ack *extack)
505 {
506 	const struct nlattr *attr;
507 	int prio = 0;
508 	u8 band;
509 	int rem;
510 	int err;
511 
512 	err = __nla_validate_nested(priomap_attr, TCA_ETS_MAX,
513 				    ets_priomap_policy, NL_VALIDATE_STRICT,
514 				    extack);
515 	if (err)
516 		return err;
517 
518 	nla_for_each_nested(attr, priomap_attr, rem) {
519 		switch (nla_type(attr)) {
520 		case TCA_ETS_PRIOMAP_BAND:
521 			if (prio > TC_PRIO_MAX) {
522 				NL_SET_ERR_MSG_MOD(extack, "Too many priorities in ETS priomap");
523 				return -EINVAL;
524 			}
525 			band = nla_get_u8(attr);
526 			if (band >= nbands) {
527 				NL_SET_ERR_MSG_MOD(extack, "Invalid band number in ETS priomap");
528 				return -EINVAL;
529 			}
530 			priomap[prio++] = band;
531 			break;
532 		default:
533 			WARN_ON_ONCE(1); /* Validate should have caught this. */
534 			return -EINVAL;
535 		}
536 	}
537 
538 	return 0;
539 }
540 
ets_qdisc_quanta_parse(struct Qdisc * sch,struct nlattr * quanta_attr,unsigned int nbands,unsigned int nstrict,unsigned int * quanta,struct netlink_ext_ack * extack)541 static int ets_qdisc_quanta_parse(struct Qdisc *sch, struct nlattr *quanta_attr,
542 				  unsigned int nbands, unsigned int nstrict,
543 				  unsigned int *quanta,
544 				  struct netlink_ext_ack *extack)
545 {
546 	const struct nlattr *attr;
547 	int band = nstrict;
548 	int rem;
549 	int err;
550 
551 	err = __nla_validate_nested(quanta_attr, TCA_ETS_MAX,
552 				    ets_quanta_policy, NL_VALIDATE_STRICT,
553 				    extack);
554 	if (err < 0)
555 		return err;
556 
557 	nla_for_each_nested(attr, quanta_attr, rem) {
558 		switch (nla_type(attr)) {
559 		case TCA_ETS_QUANTA_BAND:
560 			if (band >= nbands) {
561 				NL_SET_ERR_MSG_MOD(extack, "ETS quanta has more values than bands");
562 				return -EINVAL;
563 			}
564 			err = ets_quantum_parse(sch, attr, &quanta[band++],
565 						extack);
566 			if (err)
567 				return err;
568 			break;
569 		default:
570 			WARN_ON_ONCE(1); /* Validate should have caught this. */
571 			return -EINVAL;
572 		}
573 	}
574 
575 	return 0;
576 }
577 
ets_qdisc_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)578 static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
579 			    struct netlink_ext_ack *extack)
580 {
581 	unsigned int quanta[TCQ_ETS_MAX_BANDS] = {0};
582 	struct Qdisc *queues[TCQ_ETS_MAX_BANDS];
583 	struct ets_sched *q = qdisc_priv(sch);
584 	struct nlattr *tb[TCA_ETS_MAX + 1];
585 	unsigned int oldbands = q->nbands;
586 	u8 priomap[TC_PRIO_MAX + 1];
587 	unsigned int nstrict = 0;
588 	unsigned int nbands;
589 	unsigned int i;
590 	int err;
591 
592 	err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_policy, extack);
593 	if (err < 0)
594 		return err;
595 
596 	if (!tb[TCA_ETS_NBANDS]) {
597 		NL_SET_ERR_MSG_MOD(extack, "Number of bands is a required argument");
598 		return -EINVAL;
599 	}
600 	nbands = nla_get_u8(tb[TCA_ETS_NBANDS]);
601 	if (nbands < 1 || nbands > TCQ_ETS_MAX_BANDS) {
602 		NL_SET_ERR_MSG_MOD(extack, "Invalid number of bands");
603 		return -EINVAL;
604 	}
605 	/* Unless overridden, traffic goes to the last band. */
606 	memset(priomap, nbands - 1, sizeof(priomap));
607 
608 	if (tb[TCA_ETS_NSTRICT]) {
609 		nstrict = nla_get_u8(tb[TCA_ETS_NSTRICT]);
610 		if (nstrict > nbands) {
611 			NL_SET_ERR_MSG_MOD(extack, "Invalid number of strict bands");
612 			return -EINVAL;
613 		}
614 	}
615 
616 	if (tb[TCA_ETS_PRIOMAP]) {
617 		err = ets_qdisc_priomap_parse(tb[TCA_ETS_PRIOMAP],
618 					      nbands, priomap, extack);
619 		if (err)
620 			return err;
621 	}
622 
623 	if (tb[TCA_ETS_QUANTA]) {
624 		err = ets_qdisc_quanta_parse(sch, tb[TCA_ETS_QUANTA],
625 					     nbands, nstrict, quanta, extack);
626 		if (err)
627 			return err;
628 	}
629 	/* If there are more bands than strict + quanta provided, the remaining
630 	 * ones are ETS with quantum of MTU. Initialize the missing values here.
631 	 */
632 	for (i = nstrict; i < nbands; i++) {
633 		if (!quanta[i])
634 			quanta[i] = psched_mtu(qdisc_dev(sch));
635 	}
636 
637 	/* Before commit, make sure we can allocate all new qdiscs */
638 	for (i = oldbands; i < nbands; i++) {
639 		queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
640 					      ets_class_id(sch, &q->classes[i]),
641 					      extack);
642 		if (!queues[i]) {
643 			while (i > oldbands)
644 				qdisc_put(queues[--i]);
645 			return -ENOMEM;
646 		}
647 	}
648 
649 	sch_tree_lock(sch);
650 
651 	q->nbands = nbands;
652 	for (i = nstrict; i < q->nstrict; i++) {
653 		if (q->classes[i].qdisc->q.qlen) {
654 			list_add_tail(&q->classes[i].alist, &q->active);
655 			q->classes[i].deficit = quanta[i];
656 		}
657 	}
658 	for (i = q->nbands; i < oldbands; i++) {
659 		if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
660 			list_del(&q->classes[i].alist);
661 		qdisc_tree_flush_backlog(q->classes[i].qdisc);
662 	}
663 	q->nstrict = nstrict;
664 	memcpy(q->prio2band, priomap, sizeof(priomap));
665 
666 	for (i = 0; i < q->nbands; i++)
667 		q->classes[i].quantum = quanta[i];
668 
669 	for (i = oldbands; i < q->nbands; i++) {
670 		q->classes[i].qdisc = queues[i];
671 		if (q->classes[i].qdisc != &noop_qdisc)
672 			qdisc_hash_add(q->classes[i].qdisc, true);
673 	}
674 
675 	sch_tree_unlock(sch);
676 
677 	ets_offload_change(sch);
678 	for (i = q->nbands; i < oldbands; i++) {
679 		qdisc_put(q->classes[i].qdisc);
680 		q->classes[i].qdisc = NULL;
681 		q->classes[i].quantum = 0;
682 		q->classes[i].deficit = 0;
683 		gnet_stats_basic_sync_init(&q->classes[i].bstats);
684 		memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
685 	}
686 	return 0;
687 }
688 
ets_qdisc_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)689 static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
690 			  struct netlink_ext_ack *extack)
691 {
692 	struct ets_sched *q = qdisc_priv(sch);
693 	int err, i;
694 
695 	if (!opt)
696 		return -EINVAL;
697 
698 	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
699 	if (err)
700 		return err;
701 
702 	INIT_LIST_HEAD(&q->active);
703 	for (i = 0; i < TCQ_ETS_MAX_BANDS; i++)
704 		INIT_LIST_HEAD(&q->classes[i].alist);
705 
706 	return ets_qdisc_change(sch, opt, extack);
707 }
708 
ets_qdisc_reset(struct Qdisc * sch)709 static void ets_qdisc_reset(struct Qdisc *sch)
710 {
711 	struct ets_sched *q = qdisc_priv(sch);
712 	int band;
713 
714 	for (band = q->nstrict; band < q->nbands; band++) {
715 		if (q->classes[band].qdisc->q.qlen)
716 			list_del(&q->classes[band].alist);
717 	}
718 	for (band = 0; band < q->nbands; band++)
719 		qdisc_reset(q->classes[band].qdisc);
720 }
721 
ets_qdisc_destroy(struct Qdisc * sch)722 static void ets_qdisc_destroy(struct Qdisc *sch)
723 {
724 	struct ets_sched *q = qdisc_priv(sch);
725 	int band;
726 
727 	ets_offload_destroy(sch);
728 	tcf_block_put(q->block);
729 	for (band = 0; band < q->nbands; band++)
730 		qdisc_put(q->classes[band].qdisc);
731 }
732 
ets_qdisc_dump(struct Qdisc * sch,struct sk_buff * skb)733 static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
734 {
735 	struct ets_sched *q = qdisc_priv(sch);
736 	struct nlattr *opts;
737 	struct nlattr *nest;
738 	int band;
739 	int prio;
740 	int err;
741 
742 	err = ets_offload_dump(sch);
743 	if (err)
744 		return err;
745 
746 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
747 	if (!opts)
748 		goto nla_err;
749 
750 	if (nla_put_u8(skb, TCA_ETS_NBANDS, q->nbands))
751 		goto nla_err;
752 
753 	if (q->nstrict &&
754 	    nla_put_u8(skb, TCA_ETS_NSTRICT, q->nstrict))
755 		goto nla_err;
756 
757 	if (q->nbands > q->nstrict) {
758 		nest = nla_nest_start(skb, TCA_ETS_QUANTA);
759 		if (!nest)
760 			goto nla_err;
761 
762 		for (band = q->nstrict; band < q->nbands; band++) {
763 			if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND,
764 					q->classes[band].quantum))
765 				goto nla_err;
766 		}
767 
768 		nla_nest_end(skb, nest);
769 	}
770 
771 	nest = nla_nest_start(skb, TCA_ETS_PRIOMAP);
772 	if (!nest)
773 		goto nla_err;
774 
775 	for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
776 		if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND, q->prio2band[prio]))
777 			goto nla_err;
778 	}
779 
780 	nla_nest_end(skb, nest);
781 
782 	return nla_nest_end(skb, opts);
783 
784 nla_err:
785 	nla_nest_cancel(skb, opts);
786 	return -EMSGSIZE;
787 }
788 
789 static const struct Qdisc_class_ops ets_class_ops = {
790 	.change		= ets_class_change,
791 	.graft		= ets_class_graft,
792 	.leaf		= ets_class_leaf,
793 	.find		= ets_class_find,
794 	.qlen_notify	= ets_class_qlen_notify,
795 	.dump		= ets_class_dump,
796 	.dump_stats	= ets_class_dump_stats,
797 	.walk		= ets_qdisc_walk,
798 	.tcf_block	= ets_qdisc_tcf_block,
799 	.bind_tcf	= ets_qdisc_bind_tcf,
800 	.unbind_tcf	= ets_qdisc_unbind_tcf,
801 };
802 
803 static struct Qdisc_ops ets_qdisc_ops __read_mostly = {
804 	.cl_ops		= &ets_class_ops,
805 	.id		= "ets",
806 	.priv_size	= sizeof(struct ets_sched),
807 	.enqueue	= ets_qdisc_enqueue,
808 	.dequeue	= ets_qdisc_dequeue,
809 	.peek		= qdisc_peek_dequeued,
810 	.change		= ets_qdisc_change,
811 	.init		= ets_qdisc_init,
812 	.reset		= ets_qdisc_reset,
813 	.destroy	= ets_qdisc_destroy,
814 	.dump		= ets_qdisc_dump,
815 	.owner		= THIS_MODULE,
816 };
817 
ets_init(void)818 static int __init ets_init(void)
819 {
820 	return register_qdisc(&ets_qdisc_ops);
821 }
822 
ets_exit(void)823 static void __exit ets_exit(void)
824 {
825 	unregister_qdisc(&ets_qdisc_ops);
826 }
827 
828 module_init(ets_init);
829 module_exit(ets_exit);
830 MODULE_LICENSE("GPL");
831