xref: /openbmc/linux/net/sched/sch_etf.c (revision 8cb08174)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* net/sched/sch_etf.c  Earliest TxTime First queueing discipline.
4  *
5  * Authors:	Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
6  *		Vinicius Costa Gomes <vinicius.gomes@intel.com>
7  */
8 
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/errqueue.h>
15 #include <linux/rbtree.h>
16 #include <linux/skbuff.h>
17 #include <linux/posix-timers.h>
18 #include <net/netlink.h>
19 #include <net/sch_generic.h>
20 #include <net/pkt_sched.h>
21 #include <net/sock.h>
22 
23 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
24 #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
25 
26 struct etf_sched_data {
27 	bool offload;
28 	bool deadline_mode;
29 	int clockid;
30 	int queue;
31 	s32 delta; /* in ns */
32 	ktime_t last; /* The txtime of the last skb sent to the netdevice. */
33 	struct rb_root_cached head;
34 	struct qdisc_watchdog watchdog;
35 	ktime_t (*get_time)(void);
36 };
37 
38 static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
39 	[TCA_ETF_PARMS]	= { .len = sizeof(struct tc_etf_qopt) },
40 };
41 
42 static inline int validate_input_params(struct tc_etf_qopt *qopt,
43 					struct netlink_ext_ack *extack)
44 {
45 	/* Check if params comply to the following rules:
46 	 *	* Clockid and delta must be valid.
47 	 *
48 	 *	* Dynamic clockids are not supported.
49 	 *
50 	 *	* Delta must be a positive integer.
51 	 *
52 	 * Also note that for the HW offload case, we must
53 	 * expect that system clocks have been synchronized to PHC.
54 	 */
55 	if (qopt->clockid < 0) {
56 		NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
57 		return -ENOTSUPP;
58 	}
59 
60 	if (qopt->clockid != CLOCK_TAI) {
61 		NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
62 		return -EINVAL;
63 	}
64 
65 	if (qopt->delta < 0) {
66 		NL_SET_ERR_MSG(extack, "Delta must be positive");
67 		return -EINVAL;
68 	}
69 
70 	return 0;
71 }
72 
73 static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
74 {
75 	struct etf_sched_data *q = qdisc_priv(sch);
76 	ktime_t txtime = nskb->tstamp;
77 	struct sock *sk = nskb->sk;
78 	ktime_t now;
79 
80 	if (!sk)
81 		return false;
82 
83 	if (!sock_flag(sk, SOCK_TXTIME))
84 		return false;
85 
86 	/* We don't perform crosstimestamping.
87 	 * Drop if packet's clockid differs from qdisc's.
88 	 */
89 	if (sk->sk_clockid != q->clockid)
90 		return false;
91 
92 	if (sk->sk_txtime_deadline_mode != q->deadline_mode)
93 		return false;
94 
95 	now = q->get_time();
96 	if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
97 		return false;
98 
99 	return true;
100 }
101 
102 static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
103 {
104 	struct etf_sched_data *q = qdisc_priv(sch);
105 	struct rb_node *p;
106 
107 	p = rb_first_cached(&q->head);
108 	if (!p)
109 		return NULL;
110 
111 	return rb_to_skb(p);
112 }
113 
114 static void reset_watchdog(struct Qdisc *sch)
115 {
116 	struct etf_sched_data *q = qdisc_priv(sch);
117 	struct sk_buff *skb = etf_peek_timesortedlist(sch);
118 	ktime_t next;
119 
120 	if (!skb) {
121 		qdisc_watchdog_cancel(&q->watchdog);
122 		return;
123 	}
124 
125 	next = ktime_sub_ns(skb->tstamp, q->delta);
126 	qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
127 }
128 
129 static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
130 {
131 	struct sock_exterr_skb *serr;
132 	struct sk_buff *clone;
133 	ktime_t txtime = skb->tstamp;
134 
135 	if (!skb->sk || !(skb->sk->sk_txtime_report_errors))
136 		return;
137 
138 	clone = skb_clone(skb, GFP_ATOMIC);
139 	if (!clone)
140 		return;
141 
142 	serr = SKB_EXT_ERR(clone);
143 	serr->ee.ee_errno = err;
144 	serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
145 	serr->ee.ee_type = 0;
146 	serr->ee.ee_code = code;
147 	serr->ee.ee_pad = 0;
148 	serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
149 	serr->ee.ee_info = txtime; /* low part of tstamp */
150 
151 	if (sock_queue_err_skb(skb->sk, clone))
152 		kfree_skb(clone);
153 }
154 
155 static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
156 				      struct sk_buff **to_free)
157 {
158 	struct etf_sched_data *q = qdisc_priv(sch);
159 	struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;
160 	ktime_t txtime = nskb->tstamp;
161 	bool leftmost = true;
162 
163 	if (!is_packet_valid(sch, nskb)) {
164 		report_sock_error(nskb, EINVAL,
165 				  SO_EE_CODE_TXTIME_INVALID_PARAM);
166 		return qdisc_drop(nskb, sch, to_free);
167 	}
168 
169 	while (*p) {
170 		struct sk_buff *skb;
171 
172 		parent = *p;
173 		skb = rb_to_skb(parent);
174 		if (ktime_after(txtime, skb->tstamp)) {
175 			p = &parent->rb_right;
176 			leftmost = false;
177 		} else {
178 			p = &parent->rb_left;
179 		}
180 	}
181 	rb_link_node(&nskb->rbnode, parent, p);
182 	rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost);
183 
184 	qdisc_qstats_backlog_inc(sch, nskb);
185 	sch->q.qlen++;
186 
187 	/* Now we may need to re-arm the qdisc watchdog for the next packet. */
188 	reset_watchdog(sch);
189 
190 	return NET_XMIT_SUCCESS;
191 }
192 
193 static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb,
194 				ktime_t now)
195 {
196 	struct etf_sched_data *q = qdisc_priv(sch);
197 	struct sk_buff *to_free = NULL;
198 	struct sk_buff *tmp = NULL;
199 
200 	skb_rbtree_walk_from_safe(skb, tmp) {
201 		if (ktime_after(skb->tstamp, now))
202 			break;
203 
204 		rb_erase_cached(&skb->rbnode, &q->head);
205 
206 		/* The rbnode field in the skb re-uses these fields, now that
207 		 * we are done with the rbnode, reset them.
208 		 */
209 		skb->next = NULL;
210 		skb->prev = NULL;
211 		skb->dev = qdisc_dev(sch);
212 
213 		report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
214 
215 		qdisc_qstats_backlog_dec(sch, skb);
216 		qdisc_drop(skb, sch, &to_free);
217 		qdisc_qstats_overlimit(sch);
218 		sch->q.qlen--;
219 	}
220 
221 	kfree_skb_list(to_free);
222 }
223 
224 static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb)
225 {
226 	struct etf_sched_data *q = qdisc_priv(sch);
227 
228 	rb_erase_cached(&skb->rbnode, &q->head);
229 
230 	/* The rbnode field in the skb re-uses these fields, now that
231 	 * we are done with the rbnode, reset them.
232 	 */
233 	skb->next = NULL;
234 	skb->prev = NULL;
235 	skb->dev = qdisc_dev(sch);
236 
237 	qdisc_qstats_backlog_dec(sch, skb);
238 
239 	qdisc_bstats_update(sch, skb);
240 
241 	q->last = skb->tstamp;
242 
243 	sch->q.qlen--;
244 }
245 
246 static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
247 {
248 	struct etf_sched_data *q = qdisc_priv(sch);
249 	struct sk_buff *skb;
250 	ktime_t now, next;
251 
252 	skb = etf_peek_timesortedlist(sch);
253 	if (!skb)
254 		return NULL;
255 
256 	now = q->get_time();
257 
258 	/* Drop if packet has expired while in queue. */
259 	if (ktime_before(skb->tstamp, now)) {
260 		timesortedlist_drop(sch, skb, now);
261 		skb = NULL;
262 		goto out;
263 	}
264 
265 	/* When in deadline mode, dequeue as soon as possible and change the
266 	 * txtime from deadline to (now + delta).
267 	 */
268 	if (q->deadline_mode) {
269 		timesortedlist_remove(sch, skb);
270 		skb->tstamp = now;
271 		goto out;
272 	}
273 
274 	next = ktime_sub_ns(skb->tstamp, q->delta);
275 
276 	/* Dequeue only if now is within the [txtime - delta, txtime] range. */
277 	if (ktime_after(now, next))
278 		timesortedlist_remove(sch, skb);
279 	else
280 		skb = NULL;
281 
282 out:
283 	/* Now we may need to re-arm the qdisc watchdog for the next packet. */
284 	reset_watchdog(sch);
285 
286 	return skb;
287 }
288 
289 static void etf_disable_offload(struct net_device *dev,
290 				struct etf_sched_data *q)
291 {
292 	struct tc_etf_qopt_offload etf = { };
293 	const struct net_device_ops *ops;
294 	int err;
295 
296 	if (!q->offload)
297 		return;
298 
299 	ops = dev->netdev_ops;
300 	if (!ops->ndo_setup_tc)
301 		return;
302 
303 	etf.queue = q->queue;
304 	etf.enable = 0;
305 
306 	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
307 	if (err < 0)
308 		pr_warn("Couldn't disable ETF offload for queue %d\n",
309 			etf.queue);
310 }
311 
312 static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
313 			      struct netlink_ext_ack *extack)
314 {
315 	const struct net_device_ops *ops = dev->netdev_ops;
316 	struct tc_etf_qopt_offload etf = { };
317 	int err;
318 
319 	if (q->offload)
320 		return 0;
321 
322 	if (!ops->ndo_setup_tc) {
323 		NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
324 		return -EOPNOTSUPP;
325 	}
326 
327 	etf.queue = q->queue;
328 	etf.enable = 1;
329 
330 	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
331 	if (err < 0) {
332 		NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
333 		return err;
334 	}
335 
336 	return 0;
337 }
338 
339 static int etf_init(struct Qdisc *sch, struct nlattr *opt,
340 		    struct netlink_ext_ack *extack)
341 {
342 	struct etf_sched_data *q = qdisc_priv(sch);
343 	struct net_device *dev = qdisc_dev(sch);
344 	struct nlattr *tb[TCA_ETF_MAX + 1];
345 	struct tc_etf_qopt *qopt;
346 	int err;
347 
348 	if (!opt) {
349 		NL_SET_ERR_MSG(extack,
350 			       "Missing ETF qdisc options which are mandatory");
351 		return -EINVAL;
352 	}
353 
354 	err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy,
355 					  extack);
356 	if (err < 0)
357 		return err;
358 
359 	if (!tb[TCA_ETF_PARMS]) {
360 		NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
361 		return -EINVAL;
362 	}
363 
364 	qopt = nla_data(tb[TCA_ETF_PARMS]);
365 
366 	pr_debug("delta %d clockid %d offload %s deadline %s\n",
367 		 qopt->delta, qopt->clockid,
368 		 OFFLOAD_IS_ON(qopt) ? "on" : "off",
369 		 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
370 
371 	err = validate_input_params(qopt, extack);
372 	if (err < 0)
373 		return err;
374 
375 	q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
376 
377 	if (OFFLOAD_IS_ON(qopt)) {
378 		err = etf_enable_offload(dev, q, extack);
379 		if (err < 0)
380 			return err;
381 	}
382 
383 	/* Everything went OK, save the parameters used. */
384 	q->delta = qopt->delta;
385 	q->clockid = qopt->clockid;
386 	q->offload = OFFLOAD_IS_ON(qopt);
387 	q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
388 
389 	switch (q->clockid) {
390 	case CLOCK_REALTIME:
391 		q->get_time = ktime_get_real;
392 		break;
393 	case CLOCK_MONOTONIC:
394 		q->get_time = ktime_get;
395 		break;
396 	case CLOCK_BOOTTIME:
397 		q->get_time = ktime_get_boottime;
398 		break;
399 	case CLOCK_TAI:
400 		q->get_time = ktime_get_clocktai;
401 		break;
402 	default:
403 		NL_SET_ERR_MSG(extack, "Clockid is not supported");
404 		return -ENOTSUPP;
405 	}
406 
407 	qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
408 
409 	return 0;
410 }
411 
412 static void timesortedlist_clear(struct Qdisc *sch)
413 {
414 	struct etf_sched_data *q = qdisc_priv(sch);
415 	struct rb_node *p = rb_first_cached(&q->head);
416 
417 	while (p) {
418 		struct sk_buff *skb = rb_to_skb(p);
419 
420 		p = rb_next(p);
421 
422 		rb_erase_cached(&skb->rbnode, &q->head);
423 		rtnl_kfree_skbs(skb, skb);
424 		sch->q.qlen--;
425 	}
426 }
427 
428 static void etf_reset(struct Qdisc *sch)
429 {
430 	struct etf_sched_data *q = qdisc_priv(sch);
431 
432 	/* Only cancel watchdog if it's been initialized. */
433 	if (q->watchdog.qdisc == sch)
434 		qdisc_watchdog_cancel(&q->watchdog);
435 
436 	/* No matter which mode we are on, it's safe to clear both lists. */
437 	timesortedlist_clear(sch);
438 	__qdisc_reset_queue(&sch->q);
439 
440 	sch->qstats.backlog = 0;
441 	sch->q.qlen = 0;
442 
443 	q->last = 0;
444 }
445 
446 static void etf_destroy(struct Qdisc *sch)
447 {
448 	struct etf_sched_data *q = qdisc_priv(sch);
449 	struct net_device *dev = qdisc_dev(sch);
450 
451 	/* Only cancel watchdog if it's been initialized. */
452 	if (q->watchdog.qdisc == sch)
453 		qdisc_watchdog_cancel(&q->watchdog);
454 
455 	etf_disable_offload(dev, q);
456 }
457 
458 static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
459 {
460 	struct etf_sched_data *q = qdisc_priv(sch);
461 	struct tc_etf_qopt opt = { };
462 	struct nlattr *nest;
463 
464 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
465 	if (!nest)
466 		goto nla_put_failure;
467 
468 	opt.delta = q->delta;
469 	opt.clockid = q->clockid;
470 	if (q->offload)
471 		opt.flags |= TC_ETF_OFFLOAD_ON;
472 
473 	if (q->deadline_mode)
474 		opt.flags |= TC_ETF_DEADLINE_MODE_ON;
475 
476 	if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
477 		goto nla_put_failure;
478 
479 	return nla_nest_end(skb, nest);
480 
481 nla_put_failure:
482 	nla_nest_cancel(skb, nest);
483 	return -1;
484 }
485 
486 static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
487 	.id		=	"etf",
488 	.priv_size	=	sizeof(struct etf_sched_data),
489 	.enqueue	=	etf_enqueue_timesortedlist,
490 	.dequeue	=	etf_dequeue_timesortedlist,
491 	.peek		=	etf_peek_timesortedlist,
492 	.init		=	etf_init,
493 	.reset		=	etf_reset,
494 	.destroy	=	etf_destroy,
495 	.dump		=	etf_dump,
496 	.owner		=	THIS_MODULE,
497 };
498 
499 static int __init etf_module_init(void)
500 {
501 	return register_qdisc(&etf_qdisc_ops);
502 }
503 
504 static void __exit etf_module_exit(void)
505 {
506 	unregister_qdisc(&etf_qdisc_ops);
507 }
508 module_init(etf_module_init)
509 module_exit(etf_module_exit)
510 MODULE_LICENSE("GPL");
511