1 // SPDX-License-Identifier: GPL-2.0 2 3 /* net/sched/sch_etf.c Earliest TxTime First queueing discipline. 4 * 5 * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com> 6 * Vinicius Costa Gomes <vinicius.gomes@intel.com> 7 */ 8 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/string.h> 13 #include <linux/errno.h> 14 #include <linux/errqueue.h> 15 #include <linux/rbtree.h> 16 #include <linux/skbuff.h> 17 #include <linux/posix-timers.h> 18 #include <net/netlink.h> 19 #include <net/sch_generic.h> 20 #include <net/pkt_sched.h> 21 #include <net/sock.h> 22 23 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON) 24 #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON) 25 #define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK) 26 27 struct etf_sched_data { 28 bool offload; 29 bool deadline_mode; 30 bool skip_sock_check; 31 int clockid; 32 int queue; 33 s32 delta; /* in ns */ 34 ktime_t last; /* The txtime of the last skb sent to the netdevice. */ 35 struct rb_root_cached head; 36 struct qdisc_watchdog watchdog; 37 ktime_t (*get_time)(void); 38 }; 39 40 static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = { 41 [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) }, 42 }; 43 44 static inline int validate_input_params(struct tc_etf_qopt *qopt, 45 struct netlink_ext_ack *extack) 46 { 47 /* Check if params comply to the following rules: 48 * * Clockid and delta must be valid. 49 * 50 * * Dynamic clockids are not supported. 51 * 52 * * Delta must be a positive integer. 53 * 54 * Also note that for the HW offload case, we must 55 * expect that system clocks have been synchronized to PHC. 56 */ 57 if (qopt->clockid < 0) { 58 NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported"); 59 return -ENOTSUPP; 60 } 61 62 if (qopt->clockid != CLOCK_TAI) { 63 NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used"); 64 return -EINVAL; 65 } 66 67 if (qopt->delta < 0) { 68 NL_SET_ERR_MSG(extack, "Delta must be positive"); 69 return -EINVAL; 70 } 71 72 return 0; 73 } 74 75 static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb) 76 { 77 struct etf_sched_data *q = qdisc_priv(sch); 78 ktime_t txtime = nskb->tstamp; 79 struct sock *sk = nskb->sk; 80 ktime_t now; 81 82 if (q->skip_sock_check) 83 goto skip; 84 85 if (!sk || !sk_fullsock(sk)) 86 return false; 87 88 if (!sock_flag(sk, SOCK_TXTIME)) 89 return false; 90 91 /* We don't perform crosstimestamping. 92 * Drop if packet's clockid differs from qdisc's. 93 */ 94 if (sk->sk_clockid != q->clockid) 95 return false; 96 97 if (sk->sk_txtime_deadline_mode != q->deadline_mode) 98 return false; 99 100 skip: 101 now = q->get_time(); 102 if (ktime_before(txtime, now) || ktime_before(txtime, q->last)) 103 return false; 104 105 return true; 106 } 107 108 static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch) 109 { 110 struct etf_sched_data *q = qdisc_priv(sch); 111 struct rb_node *p; 112 113 p = rb_first_cached(&q->head); 114 if (!p) 115 return NULL; 116 117 return rb_to_skb(p); 118 } 119 120 static void reset_watchdog(struct Qdisc *sch) 121 { 122 struct etf_sched_data *q = qdisc_priv(sch); 123 struct sk_buff *skb = etf_peek_timesortedlist(sch); 124 ktime_t next; 125 126 if (!skb) { 127 qdisc_watchdog_cancel(&q->watchdog); 128 return; 129 } 130 131 next = ktime_sub_ns(skb->tstamp, q->delta); 132 qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next)); 133 } 134 135 static void report_sock_error(struct sk_buff *skb, u32 err, u8 code) 136 { 137 struct sock_exterr_skb *serr; 138 struct sk_buff *clone; 139 ktime_t txtime = skb->tstamp; 140 struct sock *sk = skb->sk; 141 142 if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors)) 143 return; 144 145 clone = skb_clone(skb, GFP_ATOMIC); 146 if (!clone) 147 return; 148 149 serr = SKB_EXT_ERR(clone); 150 serr->ee.ee_errno = err; 151 serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME; 152 serr->ee.ee_type = 0; 153 serr->ee.ee_code = code; 154 serr->ee.ee_pad = 0; 155 serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */ 156 serr->ee.ee_info = txtime; /* low part of tstamp */ 157 158 if (sock_queue_err_skb(sk, clone)) 159 kfree_skb(clone); 160 } 161 162 static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch, 163 struct sk_buff **to_free) 164 { 165 struct etf_sched_data *q = qdisc_priv(sch); 166 struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL; 167 ktime_t txtime = nskb->tstamp; 168 bool leftmost = true; 169 170 if (!is_packet_valid(sch, nskb)) { 171 report_sock_error(nskb, EINVAL, 172 SO_EE_CODE_TXTIME_INVALID_PARAM); 173 return qdisc_drop(nskb, sch, to_free); 174 } 175 176 while (*p) { 177 struct sk_buff *skb; 178 179 parent = *p; 180 skb = rb_to_skb(parent); 181 if (ktime_compare(txtime, skb->tstamp) >= 0) { 182 p = &parent->rb_right; 183 leftmost = false; 184 } else { 185 p = &parent->rb_left; 186 } 187 } 188 rb_link_node(&nskb->rbnode, parent, p); 189 rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost); 190 191 qdisc_qstats_backlog_inc(sch, nskb); 192 sch->q.qlen++; 193 194 /* Now we may need to re-arm the qdisc watchdog for the next packet. */ 195 reset_watchdog(sch); 196 197 return NET_XMIT_SUCCESS; 198 } 199 200 static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb, 201 ktime_t now) 202 { 203 struct etf_sched_data *q = qdisc_priv(sch); 204 struct sk_buff *to_free = NULL; 205 struct sk_buff *tmp = NULL; 206 207 skb_rbtree_walk_from_safe(skb, tmp) { 208 if (ktime_after(skb->tstamp, now)) 209 break; 210 211 rb_erase_cached(&skb->rbnode, &q->head); 212 213 /* The rbnode field in the skb re-uses these fields, now that 214 * we are done with the rbnode, reset them. 215 */ 216 skb->next = NULL; 217 skb->prev = NULL; 218 skb->dev = qdisc_dev(sch); 219 220 report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED); 221 222 qdisc_qstats_backlog_dec(sch, skb); 223 qdisc_drop(skb, sch, &to_free); 224 qdisc_qstats_overlimit(sch); 225 sch->q.qlen--; 226 } 227 228 kfree_skb_list(to_free); 229 } 230 231 static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb) 232 { 233 struct etf_sched_data *q = qdisc_priv(sch); 234 235 rb_erase_cached(&skb->rbnode, &q->head); 236 237 /* The rbnode field in the skb re-uses these fields, now that 238 * we are done with the rbnode, reset them. 239 */ 240 skb->next = NULL; 241 skb->prev = NULL; 242 skb->dev = qdisc_dev(sch); 243 244 qdisc_qstats_backlog_dec(sch, skb); 245 246 qdisc_bstats_update(sch, skb); 247 248 q->last = skb->tstamp; 249 250 sch->q.qlen--; 251 } 252 253 static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch) 254 { 255 struct etf_sched_data *q = qdisc_priv(sch); 256 struct sk_buff *skb; 257 ktime_t now, next; 258 259 skb = etf_peek_timesortedlist(sch); 260 if (!skb) 261 return NULL; 262 263 now = q->get_time(); 264 265 /* Drop if packet has expired while in queue. */ 266 if (ktime_before(skb->tstamp, now)) { 267 timesortedlist_drop(sch, skb, now); 268 skb = NULL; 269 goto out; 270 } 271 272 /* When in deadline mode, dequeue as soon as possible and change the 273 * txtime from deadline to (now + delta). 274 */ 275 if (q->deadline_mode) { 276 timesortedlist_remove(sch, skb); 277 skb->tstamp = now; 278 goto out; 279 } 280 281 next = ktime_sub_ns(skb->tstamp, q->delta); 282 283 /* Dequeue only if now is within the [txtime - delta, txtime] range. */ 284 if (ktime_after(now, next)) 285 timesortedlist_remove(sch, skb); 286 else 287 skb = NULL; 288 289 out: 290 /* Now we may need to re-arm the qdisc watchdog for the next packet. */ 291 reset_watchdog(sch); 292 293 return skb; 294 } 295 296 static void etf_disable_offload(struct net_device *dev, 297 struct etf_sched_data *q) 298 { 299 struct tc_etf_qopt_offload etf = { }; 300 const struct net_device_ops *ops; 301 int err; 302 303 if (!q->offload) 304 return; 305 306 ops = dev->netdev_ops; 307 if (!ops->ndo_setup_tc) 308 return; 309 310 etf.queue = q->queue; 311 etf.enable = 0; 312 313 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf); 314 if (err < 0) 315 pr_warn("Couldn't disable ETF offload for queue %d\n", 316 etf.queue); 317 } 318 319 static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q, 320 struct netlink_ext_ack *extack) 321 { 322 const struct net_device_ops *ops = dev->netdev_ops; 323 struct tc_etf_qopt_offload etf = { }; 324 int err; 325 326 if (q->offload) 327 return 0; 328 329 if (!ops->ndo_setup_tc) { 330 NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload"); 331 return -EOPNOTSUPP; 332 } 333 334 etf.queue = q->queue; 335 etf.enable = 1; 336 337 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf); 338 if (err < 0) { 339 NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload"); 340 return err; 341 } 342 343 return 0; 344 } 345 346 static int etf_init(struct Qdisc *sch, struct nlattr *opt, 347 struct netlink_ext_ack *extack) 348 { 349 struct etf_sched_data *q = qdisc_priv(sch); 350 struct net_device *dev = qdisc_dev(sch); 351 struct nlattr *tb[TCA_ETF_MAX + 1]; 352 struct tc_etf_qopt *qopt; 353 int err; 354 355 if (!opt) { 356 NL_SET_ERR_MSG(extack, 357 "Missing ETF qdisc options which are mandatory"); 358 return -EINVAL; 359 } 360 361 err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy, 362 extack); 363 if (err < 0) 364 return err; 365 366 if (!tb[TCA_ETF_PARMS]) { 367 NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters"); 368 return -EINVAL; 369 } 370 371 qopt = nla_data(tb[TCA_ETF_PARMS]); 372 373 pr_debug("delta %d clockid %d offload %s deadline %s\n", 374 qopt->delta, qopt->clockid, 375 OFFLOAD_IS_ON(qopt) ? "on" : "off", 376 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off"); 377 378 err = validate_input_params(qopt, extack); 379 if (err < 0) 380 return err; 381 382 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); 383 384 if (OFFLOAD_IS_ON(qopt)) { 385 err = etf_enable_offload(dev, q, extack); 386 if (err < 0) 387 return err; 388 } 389 390 /* Everything went OK, save the parameters used. */ 391 q->delta = qopt->delta; 392 q->clockid = qopt->clockid; 393 q->offload = OFFLOAD_IS_ON(qopt); 394 q->deadline_mode = DEADLINE_MODE_IS_ON(qopt); 395 q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt); 396 397 switch (q->clockid) { 398 case CLOCK_REALTIME: 399 q->get_time = ktime_get_real; 400 break; 401 case CLOCK_MONOTONIC: 402 q->get_time = ktime_get; 403 break; 404 case CLOCK_BOOTTIME: 405 q->get_time = ktime_get_boottime; 406 break; 407 case CLOCK_TAI: 408 q->get_time = ktime_get_clocktai; 409 break; 410 default: 411 NL_SET_ERR_MSG(extack, "Clockid is not supported"); 412 return -ENOTSUPP; 413 } 414 415 qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid); 416 417 return 0; 418 } 419 420 static void timesortedlist_clear(struct Qdisc *sch) 421 { 422 struct etf_sched_data *q = qdisc_priv(sch); 423 struct rb_node *p = rb_first_cached(&q->head); 424 425 while (p) { 426 struct sk_buff *skb = rb_to_skb(p); 427 428 p = rb_next(p); 429 430 rb_erase_cached(&skb->rbnode, &q->head); 431 rtnl_kfree_skbs(skb, skb); 432 sch->q.qlen--; 433 } 434 } 435 436 static void etf_reset(struct Qdisc *sch) 437 { 438 struct etf_sched_data *q = qdisc_priv(sch); 439 440 /* Only cancel watchdog if it's been initialized. */ 441 if (q->watchdog.qdisc == sch) 442 qdisc_watchdog_cancel(&q->watchdog); 443 444 /* No matter which mode we are on, it's safe to clear both lists. */ 445 timesortedlist_clear(sch); 446 __qdisc_reset_queue(&sch->q); 447 448 sch->qstats.backlog = 0; 449 sch->q.qlen = 0; 450 451 q->last = 0; 452 } 453 454 static void etf_destroy(struct Qdisc *sch) 455 { 456 struct etf_sched_data *q = qdisc_priv(sch); 457 struct net_device *dev = qdisc_dev(sch); 458 459 /* Only cancel watchdog if it's been initialized. */ 460 if (q->watchdog.qdisc == sch) 461 qdisc_watchdog_cancel(&q->watchdog); 462 463 etf_disable_offload(dev, q); 464 } 465 466 static int etf_dump(struct Qdisc *sch, struct sk_buff *skb) 467 { 468 struct etf_sched_data *q = qdisc_priv(sch); 469 struct tc_etf_qopt opt = { }; 470 struct nlattr *nest; 471 472 nest = nla_nest_start_noflag(skb, TCA_OPTIONS); 473 if (!nest) 474 goto nla_put_failure; 475 476 opt.delta = q->delta; 477 opt.clockid = q->clockid; 478 if (q->offload) 479 opt.flags |= TC_ETF_OFFLOAD_ON; 480 481 if (q->deadline_mode) 482 opt.flags |= TC_ETF_DEADLINE_MODE_ON; 483 484 if (q->skip_sock_check) 485 opt.flags |= TC_ETF_SKIP_SOCK_CHECK; 486 487 if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt)) 488 goto nla_put_failure; 489 490 return nla_nest_end(skb, nest); 491 492 nla_put_failure: 493 nla_nest_cancel(skb, nest); 494 return -1; 495 } 496 497 static struct Qdisc_ops etf_qdisc_ops __read_mostly = { 498 .id = "etf", 499 .priv_size = sizeof(struct etf_sched_data), 500 .enqueue = etf_enqueue_timesortedlist, 501 .dequeue = etf_dequeue_timesortedlist, 502 .peek = etf_peek_timesortedlist, 503 .init = etf_init, 504 .reset = etf_reset, 505 .destroy = etf_destroy, 506 .dump = etf_dump, 507 .owner = THIS_MODULE, 508 }; 509 510 static int __init etf_module_init(void) 511 { 512 return register_qdisc(&etf_qdisc_ops); 513 } 514 515 static void __exit etf_module_exit(void) 516 { 517 unregister_qdisc(&etf_qdisc_ops); 518 } 519 module_init(etf_module_init) 520 module_exit(etf_module_exit) 521 MODULE_LICENSE("GPL"); 522