1 // SPDX-License-Identifier: GPL-2.0 2 3 /* net/sched/sch_etf.c Earliest TxTime First queueing discipline. 4 * 5 * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com> 6 * Vinicius Costa Gomes <vinicius.gomes@intel.com> 7 */ 8 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/string.h> 13 #include <linux/errno.h> 14 #include <linux/errqueue.h> 15 #include <linux/rbtree.h> 16 #include <linux/skbuff.h> 17 #include <linux/posix-timers.h> 18 #include <net/netlink.h> 19 #include <net/sch_generic.h> 20 #include <net/pkt_sched.h> 21 #include <net/sock.h> 22 23 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON) 24 #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON) 25 #define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK) 26 27 struct etf_sched_data { 28 bool offload; 29 bool deadline_mode; 30 bool skip_sock_check; 31 int clockid; 32 int queue; 33 s32 delta; /* in ns */ 34 ktime_t last; /* The txtime of the last skb sent to the netdevice. */ 35 struct rb_root_cached head; 36 struct qdisc_watchdog watchdog; 37 ktime_t (*get_time)(void); 38 }; 39 40 static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = { 41 [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) }, 42 }; 43 44 static inline int validate_input_params(struct tc_etf_qopt *qopt, 45 struct netlink_ext_ack *extack) 46 { 47 /* Check if params comply to the following rules: 48 * * Clockid and delta must be valid. 49 * 50 * * Dynamic clockids are not supported. 51 * 52 * * Delta must be a positive integer. 53 * 54 * Also note that for the HW offload case, we must 55 * expect that system clocks have been synchronized to PHC. 56 */ 57 if (qopt->clockid < 0) { 58 NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported"); 59 return -ENOTSUPP; 60 } 61 62 if (qopt->clockid != CLOCK_TAI) { 63 NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used"); 64 return -EINVAL; 65 } 66 67 if (qopt->delta < 0) { 68 NL_SET_ERR_MSG(extack, "Delta must be positive"); 69 return -EINVAL; 70 } 71 72 return 0; 73 } 74 75 static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb) 76 { 77 struct etf_sched_data *q = qdisc_priv(sch); 78 ktime_t txtime = nskb->tstamp; 79 struct sock *sk = nskb->sk; 80 ktime_t now; 81 82 if (q->skip_sock_check) 83 goto skip; 84 85 if (!sk) 86 return false; 87 88 if (!sock_flag(sk, SOCK_TXTIME)) 89 return false; 90 91 /* We don't perform crosstimestamping. 92 * Drop if packet's clockid differs from qdisc's. 93 */ 94 if (sk->sk_clockid != q->clockid) 95 return false; 96 97 if (sk->sk_txtime_deadline_mode != q->deadline_mode) 98 return false; 99 100 skip: 101 now = q->get_time(); 102 if (ktime_before(txtime, now) || ktime_before(txtime, q->last)) 103 return false; 104 105 return true; 106 } 107 108 static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch) 109 { 110 struct etf_sched_data *q = qdisc_priv(sch); 111 struct rb_node *p; 112 113 p = rb_first_cached(&q->head); 114 if (!p) 115 return NULL; 116 117 return rb_to_skb(p); 118 } 119 120 static void reset_watchdog(struct Qdisc *sch) 121 { 122 struct etf_sched_data *q = qdisc_priv(sch); 123 struct sk_buff *skb = etf_peek_timesortedlist(sch); 124 ktime_t next; 125 126 if (!skb) { 127 qdisc_watchdog_cancel(&q->watchdog); 128 return; 129 } 130 131 next = ktime_sub_ns(skb->tstamp, q->delta); 132 qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next)); 133 } 134 135 static void report_sock_error(struct sk_buff *skb, u32 err, u8 code) 136 { 137 struct sock_exterr_skb *serr; 138 struct sk_buff *clone; 139 ktime_t txtime = skb->tstamp; 140 141 if (!skb->sk || !(skb->sk->sk_txtime_report_errors)) 142 return; 143 144 clone = skb_clone(skb, GFP_ATOMIC); 145 if (!clone) 146 return; 147 148 serr = SKB_EXT_ERR(clone); 149 serr->ee.ee_errno = err; 150 serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME; 151 serr->ee.ee_type = 0; 152 serr->ee.ee_code = code; 153 serr->ee.ee_pad = 0; 154 serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */ 155 serr->ee.ee_info = txtime; /* low part of tstamp */ 156 157 if (sock_queue_err_skb(skb->sk, clone)) 158 kfree_skb(clone); 159 } 160 161 static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch, 162 struct sk_buff **to_free) 163 { 164 struct etf_sched_data *q = qdisc_priv(sch); 165 struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL; 166 ktime_t txtime = nskb->tstamp; 167 bool leftmost = true; 168 169 if (!is_packet_valid(sch, nskb)) { 170 report_sock_error(nskb, EINVAL, 171 SO_EE_CODE_TXTIME_INVALID_PARAM); 172 return qdisc_drop(nskb, sch, to_free); 173 } 174 175 while (*p) { 176 struct sk_buff *skb; 177 178 parent = *p; 179 skb = rb_to_skb(parent); 180 if (ktime_compare(txtime, skb->tstamp) >= 0) { 181 p = &parent->rb_right; 182 leftmost = false; 183 } else { 184 p = &parent->rb_left; 185 } 186 } 187 rb_link_node(&nskb->rbnode, parent, p); 188 rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost); 189 190 qdisc_qstats_backlog_inc(sch, nskb); 191 sch->q.qlen++; 192 193 /* Now we may need to re-arm the qdisc watchdog for the next packet. */ 194 reset_watchdog(sch); 195 196 return NET_XMIT_SUCCESS; 197 } 198 199 static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb, 200 ktime_t now) 201 { 202 struct etf_sched_data *q = qdisc_priv(sch); 203 struct sk_buff *to_free = NULL; 204 struct sk_buff *tmp = NULL; 205 206 skb_rbtree_walk_from_safe(skb, tmp) { 207 if (ktime_after(skb->tstamp, now)) 208 break; 209 210 rb_erase_cached(&skb->rbnode, &q->head); 211 212 /* The rbnode field in the skb re-uses these fields, now that 213 * we are done with the rbnode, reset them. 214 */ 215 skb->next = NULL; 216 skb->prev = NULL; 217 skb->dev = qdisc_dev(sch); 218 219 report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED); 220 221 qdisc_qstats_backlog_dec(sch, skb); 222 qdisc_drop(skb, sch, &to_free); 223 qdisc_qstats_overlimit(sch); 224 sch->q.qlen--; 225 } 226 227 kfree_skb_list(to_free); 228 } 229 230 static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb) 231 { 232 struct etf_sched_data *q = qdisc_priv(sch); 233 234 rb_erase_cached(&skb->rbnode, &q->head); 235 236 /* The rbnode field in the skb re-uses these fields, now that 237 * we are done with the rbnode, reset them. 238 */ 239 skb->next = NULL; 240 skb->prev = NULL; 241 skb->dev = qdisc_dev(sch); 242 243 qdisc_qstats_backlog_dec(sch, skb); 244 245 qdisc_bstats_update(sch, skb); 246 247 q->last = skb->tstamp; 248 249 sch->q.qlen--; 250 } 251 252 static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch) 253 { 254 struct etf_sched_data *q = qdisc_priv(sch); 255 struct sk_buff *skb; 256 ktime_t now, next; 257 258 skb = etf_peek_timesortedlist(sch); 259 if (!skb) 260 return NULL; 261 262 now = q->get_time(); 263 264 /* Drop if packet has expired while in queue. */ 265 if (ktime_before(skb->tstamp, now)) { 266 timesortedlist_drop(sch, skb, now); 267 skb = NULL; 268 goto out; 269 } 270 271 /* When in deadline mode, dequeue as soon as possible and change the 272 * txtime from deadline to (now + delta). 273 */ 274 if (q->deadline_mode) { 275 timesortedlist_remove(sch, skb); 276 skb->tstamp = now; 277 goto out; 278 } 279 280 next = ktime_sub_ns(skb->tstamp, q->delta); 281 282 /* Dequeue only if now is within the [txtime - delta, txtime] range. */ 283 if (ktime_after(now, next)) 284 timesortedlist_remove(sch, skb); 285 else 286 skb = NULL; 287 288 out: 289 /* Now we may need to re-arm the qdisc watchdog for the next packet. */ 290 reset_watchdog(sch); 291 292 return skb; 293 } 294 295 static void etf_disable_offload(struct net_device *dev, 296 struct etf_sched_data *q) 297 { 298 struct tc_etf_qopt_offload etf = { }; 299 const struct net_device_ops *ops; 300 int err; 301 302 if (!q->offload) 303 return; 304 305 ops = dev->netdev_ops; 306 if (!ops->ndo_setup_tc) 307 return; 308 309 etf.queue = q->queue; 310 etf.enable = 0; 311 312 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf); 313 if (err < 0) 314 pr_warn("Couldn't disable ETF offload for queue %d\n", 315 etf.queue); 316 } 317 318 static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q, 319 struct netlink_ext_ack *extack) 320 { 321 const struct net_device_ops *ops = dev->netdev_ops; 322 struct tc_etf_qopt_offload etf = { }; 323 int err; 324 325 if (q->offload) 326 return 0; 327 328 if (!ops->ndo_setup_tc) { 329 NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload"); 330 return -EOPNOTSUPP; 331 } 332 333 etf.queue = q->queue; 334 etf.enable = 1; 335 336 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf); 337 if (err < 0) { 338 NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload"); 339 return err; 340 } 341 342 return 0; 343 } 344 345 static int etf_init(struct Qdisc *sch, struct nlattr *opt, 346 struct netlink_ext_ack *extack) 347 { 348 struct etf_sched_data *q = qdisc_priv(sch); 349 struct net_device *dev = qdisc_dev(sch); 350 struct nlattr *tb[TCA_ETF_MAX + 1]; 351 struct tc_etf_qopt *qopt; 352 int err; 353 354 if (!opt) { 355 NL_SET_ERR_MSG(extack, 356 "Missing ETF qdisc options which are mandatory"); 357 return -EINVAL; 358 } 359 360 err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy, 361 extack); 362 if (err < 0) 363 return err; 364 365 if (!tb[TCA_ETF_PARMS]) { 366 NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters"); 367 return -EINVAL; 368 } 369 370 qopt = nla_data(tb[TCA_ETF_PARMS]); 371 372 pr_debug("delta %d clockid %d offload %s deadline %s\n", 373 qopt->delta, qopt->clockid, 374 OFFLOAD_IS_ON(qopt) ? "on" : "off", 375 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off"); 376 377 err = validate_input_params(qopt, extack); 378 if (err < 0) 379 return err; 380 381 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); 382 383 if (OFFLOAD_IS_ON(qopt)) { 384 err = etf_enable_offload(dev, q, extack); 385 if (err < 0) 386 return err; 387 } 388 389 /* Everything went OK, save the parameters used. */ 390 q->delta = qopt->delta; 391 q->clockid = qopt->clockid; 392 q->offload = OFFLOAD_IS_ON(qopt); 393 q->deadline_mode = DEADLINE_MODE_IS_ON(qopt); 394 q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt); 395 396 switch (q->clockid) { 397 case CLOCK_REALTIME: 398 q->get_time = ktime_get_real; 399 break; 400 case CLOCK_MONOTONIC: 401 q->get_time = ktime_get; 402 break; 403 case CLOCK_BOOTTIME: 404 q->get_time = ktime_get_boottime; 405 break; 406 case CLOCK_TAI: 407 q->get_time = ktime_get_clocktai; 408 break; 409 default: 410 NL_SET_ERR_MSG(extack, "Clockid is not supported"); 411 return -ENOTSUPP; 412 } 413 414 qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid); 415 416 return 0; 417 } 418 419 static void timesortedlist_clear(struct Qdisc *sch) 420 { 421 struct etf_sched_data *q = qdisc_priv(sch); 422 struct rb_node *p = rb_first_cached(&q->head); 423 424 while (p) { 425 struct sk_buff *skb = rb_to_skb(p); 426 427 p = rb_next(p); 428 429 rb_erase_cached(&skb->rbnode, &q->head); 430 rtnl_kfree_skbs(skb, skb); 431 sch->q.qlen--; 432 } 433 } 434 435 static void etf_reset(struct Qdisc *sch) 436 { 437 struct etf_sched_data *q = qdisc_priv(sch); 438 439 /* Only cancel watchdog if it's been initialized. */ 440 if (q->watchdog.qdisc == sch) 441 qdisc_watchdog_cancel(&q->watchdog); 442 443 /* No matter which mode we are on, it's safe to clear both lists. */ 444 timesortedlist_clear(sch); 445 __qdisc_reset_queue(&sch->q); 446 447 sch->qstats.backlog = 0; 448 sch->q.qlen = 0; 449 450 q->last = 0; 451 } 452 453 static void etf_destroy(struct Qdisc *sch) 454 { 455 struct etf_sched_data *q = qdisc_priv(sch); 456 struct net_device *dev = qdisc_dev(sch); 457 458 /* Only cancel watchdog if it's been initialized. */ 459 if (q->watchdog.qdisc == sch) 460 qdisc_watchdog_cancel(&q->watchdog); 461 462 etf_disable_offload(dev, q); 463 } 464 465 static int etf_dump(struct Qdisc *sch, struct sk_buff *skb) 466 { 467 struct etf_sched_data *q = qdisc_priv(sch); 468 struct tc_etf_qopt opt = { }; 469 struct nlattr *nest; 470 471 nest = nla_nest_start_noflag(skb, TCA_OPTIONS); 472 if (!nest) 473 goto nla_put_failure; 474 475 opt.delta = q->delta; 476 opt.clockid = q->clockid; 477 if (q->offload) 478 opt.flags |= TC_ETF_OFFLOAD_ON; 479 480 if (q->deadline_mode) 481 opt.flags |= TC_ETF_DEADLINE_MODE_ON; 482 483 if (q->skip_sock_check) 484 opt.flags |= TC_ETF_SKIP_SOCK_CHECK; 485 486 if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt)) 487 goto nla_put_failure; 488 489 return nla_nest_end(skb, nest); 490 491 nla_put_failure: 492 nla_nest_cancel(skb, nest); 493 return -1; 494 } 495 496 static struct Qdisc_ops etf_qdisc_ops __read_mostly = { 497 .id = "etf", 498 .priv_size = sizeof(struct etf_sched_data), 499 .enqueue = etf_enqueue_timesortedlist, 500 .dequeue = etf_dequeue_timesortedlist, 501 .peek = etf_peek_timesortedlist, 502 .init = etf_init, 503 .reset = etf_reset, 504 .destroy = etf_destroy, 505 .dump = etf_dump, 506 .owner = THIS_MODULE, 507 }; 508 509 static int __init etf_module_init(void) 510 { 511 return register_qdisc(&etf_qdisc_ops); 512 } 513 514 static void __exit etf_module_exit(void) 515 { 516 unregister_qdisc(&etf_qdisc_ops); 517 } 518 module_init(etf_module_init) 519 module_exit(etf_module_exit) 520 MODULE_LICENSE("GPL"); 521