xref: /openbmc/linux/net/sched/sch_etf.c (revision f21e49be)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* net/sched/sch_etf.c  Earliest TxTime First queueing discipline.
4  *
5  * Authors:	Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
6  *		Vinicius Costa Gomes <vinicius.gomes@intel.com>
7  */
8 
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/errqueue.h>
15 #include <linux/rbtree.h>
16 #include <linux/skbuff.h>
17 #include <linux/posix-timers.h>
18 #include <net/netlink.h>
19 #include <net/sch_generic.h>
20 #include <net/pkt_sched.h>
21 #include <net/sock.h>
22 
23 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
24 #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
25 #define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
26 
27 struct etf_sched_data {
28 	bool offload;
29 	bool deadline_mode;
30 	bool skip_sock_check;
31 	int clockid;
32 	int queue;
33 	s32 delta; /* in ns */
34 	ktime_t last; /* The txtime of the last skb sent to the netdevice. */
35 	struct rb_root_cached head;
36 	struct qdisc_watchdog watchdog;
37 	ktime_t (*get_time)(void);
38 };
39 
40 static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
41 	[TCA_ETF_PARMS]	= { .len = sizeof(struct tc_etf_qopt) },
42 };
43 
44 static inline int validate_input_params(struct tc_etf_qopt *qopt,
45 					struct netlink_ext_ack *extack)
46 {
47 	/* Check if params comply to the following rules:
48 	 *	* Clockid and delta must be valid.
49 	 *
50 	 *	* Dynamic clockids are not supported.
51 	 *
52 	 *	* Delta must be a positive integer.
53 	 *
54 	 * Also note that for the HW offload case, we must
55 	 * expect that system clocks have been synchronized to PHC.
56 	 */
57 	if (qopt->clockid < 0) {
58 		NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
59 		return -ENOTSUPP;
60 	}
61 
62 	if (qopt->clockid != CLOCK_TAI) {
63 		NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
64 		return -EINVAL;
65 	}
66 
67 	if (qopt->delta < 0) {
68 		NL_SET_ERR_MSG(extack, "Delta must be positive");
69 		return -EINVAL;
70 	}
71 
72 	return 0;
73 }
74 
75 static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
76 {
77 	struct etf_sched_data *q = qdisc_priv(sch);
78 	ktime_t txtime = nskb->tstamp;
79 	struct sock *sk = nskb->sk;
80 	ktime_t now;
81 
82 	if (q->skip_sock_check)
83 		goto skip;
84 
85 	if (!sk || !sk_fullsock(sk))
86 		return false;
87 
88 	if (!sock_flag(sk, SOCK_TXTIME))
89 		return false;
90 
91 	/* We don't perform crosstimestamping.
92 	 * Drop if packet's clockid differs from qdisc's.
93 	 */
94 	if (sk->sk_clockid != q->clockid)
95 		return false;
96 
97 	if (sk->sk_txtime_deadline_mode != q->deadline_mode)
98 		return false;
99 
100 skip:
101 	now = q->get_time();
102 	if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
103 		return false;
104 
105 	return true;
106 }
107 
108 static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
109 {
110 	struct etf_sched_data *q = qdisc_priv(sch);
111 	struct rb_node *p;
112 
113 	p = rb_first_cached(&q->head);
114 	if (!p)
115 		return NULL;
116 
117 	return rb_to_skb(p);
118 }
119 
120 static void reset_watchdog(struct Qdisc *sch)
121 {
122 	struct etf_sched_data *q = qdisc_priv(sch);
123 	struct sk_buff *skb = etf_peek_timesortedlist(sch);
124 	ktime_t next;
125 
126 	if (!skb) {
127 		qdisc_watchdog_cancel(&q->watchdog);
128 		return;
129 	}
130 
131 	next = ktime_sub_ns(skb->tstamp, q->delta);
132 	qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
133 }
134 
135 static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
136 {
137 	struct sock_exterr_skb *serr;
138 	struct sk_buff *clone;
139 	ktime_t txtime = skb->tstamp;
140 	struct sock *sk = skb->sk;
141 
142 	if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors))
143 		return;
144 
145 	clone = skb_clone(skb, GFP_ATOMIC);
146 	if (!clone)
147 		return;
148 
149 	serr = SKB_EXT_ERR(clone);
150 	serr->ee.ee_errno = err;
151 	serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
152 	serr->ee.ee_type = 0;
153 	serr->ee.ee_code = code;
154 	serr->ee.ee_pad = 0;
155 	serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
156 	serr->ee.ee_info = txtime; /* low part of tstamp */
157 
158 	if (sock_queue_err_skb(sk, clone))
159 		kfree_skb(clone);
160 }
161 
162 static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
163 				      struct sk_buff **to_free)
164 {
165 	struct etf_sched_data *q = qdisc_priv(sch);
166 	struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;
167 	ktime_t txtime = nskb->tstamp;
168 	bool leftmost = true;
169 
170 	if (!is_packet_valid(sch, nskb)) {
171 		report_sock_error(nskb, EINVAL,
172 				  SO_EE_CODE_TXTIME_INVALID_PARAM);
173 		return qdisc_drop(nskb, sch, to_free);
174 	}
175 
176 	while (*p) {
177 		struct sk_buff *skb;
178 
179 		parent = *p;
180 		skb = rb_to_skb(parent);
181 		if (ktime_compare(txtime, skb->tstamp) >= 0) {
182 			p = &parent->rb_right;
183 			leftmost = false;
184 		} else {
185 			p = &parent->rb_left;
186 		}
187 	}
188 	rb_link_node(&nskb->rbnode, parent, p);
189 	rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost);
190 
191 	qdisc_qstats_backlog_inc(sch, nskb);
192 	sch->q.qlen++;
193 
194 	/* Now we may need to re-arm the qdisc watchdog for the next packet. */
195 	reset_watchdog(sch);
196 
197 	return NET_XMIT_SUCCESS;
198 }
199 
200 static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb,
201 				ktime_t now)
202 {
203 	struct etf_sched_data *q = qdisc_priv(sch);
204 	struct sk_buff *to_free = NULL;
205 	struct sk_buff *tmp = NULL;
206 
207 	skb_rbtree_walk_from_safe(skb, tmp) {
208 		if (ktime_after(skb->tstamp, now))
209 			break;
210 
211 		rb_erase_cached(&skb->rbnode, &q->head);
212 
213 		/* The rbnode field in the skb re-uses these fields, now that
214 		 * we are done with the rbnode, reset them.
215 		 */
216 		skb->next = NULL;
217 		skb->prev = NULL;
218 		skb->dev = qdisc_dev(sch);
219 
220 		report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
221 
222 		qdisc_qstats_backlog_dec(sch, skb);
223 		qdisc_drop(skb, sch, &to_free);
224 		qdisc_qstats_overlimit(sch);
225 		sch->q.qlen--;
226 	}
227 
228 	kfree_skb_list(to_free);
229 }
230 
231 static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb)
232 {
233 	struct etf_sched_data *q = qdisc_priv(sch);
234 
235 	rb_erase_cached(&skb->rbnode, &q->head);
236 
237 	/* The rbnode field in the skb re-uses these fields, now that
238 	 * we are done with the rbnode, reset them.
239 	 */
240 	skb->next = NULL;
241 	skb->prev = NULL;
242 	skb->dev = qdisc_dev(sch);
243 
244 	qdisc_qstats_backlog_dec(sch, skb);
245 
246 	qdisc_bstats_update(sch, skb);
247 
248 	q->last = skb->tstamp;
249 
250 	sch->q.qlen--;
251 }
252 
253 static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
254 {
255 	struct etf_sched_data *q = qdisc_priv(sch);
256 	struct sk_buff *skb;
257 	ktime_t now, next;
258 
259 	skb = etf_peek_timesortedlist(sch);
260 	if (!skb)
261 		return NULL;
262 
263 	now = q->get_time();
264 
265 	/* Drop if packet has expired while in queue. */
266 	if (ktime_before(skb->tstamp, now)) {
267 		timesortedlist_drop(sch, skb, now);
268 		skb = NULL;
269 		goto out;
270 	}
271 
272 	/* When in deadline mode, dequeue as soon as possible and change the
273 	 * txtime from deadline to (now + delta).
274 	 */
275 	if (q->deadline_mode) {
276 		timesortedlist_remove(sch, skb);
277 		skb->tstamp = now;
278 		goto out;
279 	}
280 
281 	next = ktime_sub_ns(skb->tstamp, q->delta);
282 
283 	/* Dequeue only if now is within the [txtime - delta, txtime] range. */
284 	if (ktime_after(now, next))
285 		timesortedlist_remove(sch, skb);
286 	else
287 		skb = NULL;
288 
289 out:
290 	/* Now we may need to re-arm the qdisc watchdog for the next packet. */
291 	reset_watchdog(sch);
292 
293 	return skb;
294 }
295 
296 static void etf_disable_offload(struct net_device *dev,
297 				struct etf_sched_data *q)
298 {
299 	struct tc_etf_qopt_offload etf = { };
300 	const struct net_device_ops *ops;
301 	int err;
302 
303 	if (!q->offload)
304 		return;
305 
306 	ops = dev->netdev_ops;
307 	if (!ops->ndo_setup_tc)
308 		return;
309 
310 	etf.queue = q->queue;
311 	etf.enable = 0;
312 
313 	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
314 	if (err < 0)
315 		pr_warn("Couldn't disable ETF offload for queue %d\n",
316 			etf.queue);
317 }
318 
319 static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
320 			      struct netlink_ext_ack *extack)
321 {
322 	const struct net_device_ops *ops = dev->netdev_ops;
323 	struct tc_etf_qopt_offload etf = { };
324 	int err;
325 
326 	if (q->offload)
327 		return 0;
328 
329 	if (!ops->ndo_setup_tc) {
330 		NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
331 		return -EOPNOTSUPP;
332 	}
333 
334 	etf.queue = q->queue;
335 	etf.enable = 1;
336 
337 	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
338 	if (err < 0) {
339 		NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
340 		return err;
341 	}
342 
343 	return 0;
344 }
345 
346 static int etf_init(struct Qdisc *sch, struct nlattr *opt,
347 		    struct netlink_ext_ack *extack)
348 {
349 	struct etf_sched_data *q = qdisc_priv(sch);
350 	struct net_device *dev = qdisc_dev(sch);
351 	struct nlattr *tb[TCA_ETF_MAX + 1];
352 	struct tc_etf_qopt *qopt;
353 	int err;
354 
355 	if (!opt) {
356 		NL_SET_ERR_MSG(extack,
357 			       "Missing ETF qdisc options which are mandatory");
358 		return -EINVAL;
359 	}
360 
361 	err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy,
362 					  extack);
363 	if (err < 0)
364 		return err;
365 
366 	if (!tb[TCA_ETF_PARMS]) {
367 		NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
368 		return -EINVAL;
369 	}
370 
371 	qopt = nla_data(tb[TCA_ETF_PARMS]);
372 
373 	pr_debug("delta %d clockid %d offload %s deadline %s\n",
374 		 qopt->delta, qopt->clockid,
375 		 OFFLOAD_IS_ON(qopt) ? "on" : "off",
376 		 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
377 
378 	err = validate_input_params(qopt, extack);
379 	if (err < 0)
380 		return err;
381 
382 	q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
383 
384 	if (OFFLOAD_IS_ON(qopt)) {
385 		err = etf_enable_offload(dev, q, extack);
386 		if (err < 0)
387 			return err;
388 	}
389 
390 	/* Everything went OK, save the parameters used. */
391 	q->delta = qopt->delta;
392 	q->clockid = qopt->clockid;
393 	q->offload = OFFLOAD_IS_ON(qopt);
394 	q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
395 	q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt);
396 
397 	switch (q->clockid) {
398 	case CLOCK_REALTIME:
399 		q->get_time = ktime_get_real;
400 		break;
401 	case CLOCK_MONOTONIC:
402 		q->get_time = ktime_get;
403 		break;
404 	case CLOCK_BOOTTIME:
405 		q->get_time = ktime_get_boottime;
406 		break;
407 	case CLOCK_TAI:
408 		q->get_time = ktime_get_clocktai;
409 		break;
410 	default:
411 		NL_SET_ERR_MSG(extack, "Clockid is not supported");
412 		return -ENOTSUPP;
413 	}
414 
415 	qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
416 
417 	return 0;
418 }
419 
420 static void timesortedlist_clear(struct Qdisc *sch)
421 {
422 	struct etf_sched_data *q = qdisc_priv(sch);
423 	struct rb_node *p = rb_first_cached(&q->head);
424 
425 	while (p) {
426 		struct sk_buff *skb = rb_to_skb(p);
427 
428 		p = rb_next(p);
429 
430 		rb_erase_cached(&skb->rbnode, &q->head);
431 		rtnl_kfree_skbs(skb, skb);
432 		sch->q.qlen--;
433 	}
434 }
435 
436 static void etf_reset(struct Qdisc *sch)
437 {
438 	struct etf_sched_data *q = qdisc_priv(sch);
439 
440 	/* Only cancel watchdog if it's been initialized. */
441 	if (q->watchdog.qdisc == sch)
442 		qdisc_watchdog_cancel(&q->watchdog);
443 
444 	/* No matter which mode we are on, it's safe to clear both lists. */
445 	timesortedlist_clear(sch);
446 	__qdisc_reset_queue(&sch->q);
447 
448 	sch->qstats.backlog = 0;
449 	sch->q.qlen = 0;
450 
451 	q->last = 0;
452 }
453 
454 static void etf_destroy(struct Qdisc *sch)
455 {
456 	struct etf_sched_data *q = qdisc_priv(sch);
457 	struct net_device *dev = qdisc_dev(sch);
458 
459 	/* Only cancel watchdog if it's been initialized. */
460 	if (q->watchdog.qdisc == sch)
461 		qdisc_watchdog_cancel(&q->watchdog);
462 
463 	etf_disable_offload(dev, q);
464 }
465 
466 static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
467 {
468 	struct etf_sched_data *q = qdisc_priv(sch);
469 	struct tc_etf_qopt opt = { };
470 	struct nlattr *nest;
471 
472 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
473 	if (!nest)
474 		goto nla_put_failure;
475 
476 	opt.delta = q->delta;
477 	opt.clockid = q->clockid;
478 	if (q->offload)
479 		opt.flags |= TC_ETF_OFFLOAD_ON;
480 
481 	if (q->deadline_mode)
482 		opt.flags |= TC_ETF_DEADLINE_MODE_ON;
483 
484 	if (q->skip_sock_check)
485 		opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
486 
487 	if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
488 		goto nla_put_failure;
489 
490 	return nla_nest_end(skb, nest);
491 
492 nla_put_failure:
493 	nla_nest_cancel(skb, nest);
494 	return -1;
495 }
496 
497 static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
498 	.id		=	"etf",
499 	.priv_size	=	sizeof(struct etf_sched_data),
500 	.enqueue	=	etf_enqueue_timesortedlist,
501 	.dequeue	=	etf_dequeue_timesortedlist,
502 	.peek		=	etf_peek_timesortedlist,
503 	.init		=	etf_init,
504 	.reset		=	etf_reset,
505 	.destroy	=	etf_destroy,
506 	.dump		=	etf_dump,
507 	.owner		=	THIS_MODULE,
508 };
509 
510 static int __init etf_module_init(void)
511 {
512 	return register_qdisc(&etf_qdisc_ops);
513 }
514 
515 static void __exit etf_module_exit(void)
516 {
517 	unregister_qdisc(&etf_qdisc_ops);
518 }
519 module_init(etf_module_init)
520 module_exit(etf_module_exit)
521 MODULE_LICENSE("GPL");
522