1 // SPDX-License-Identifier: GPL-2.0 2 3 /* net/sched/sch_etf.c Earliest TxTime First queueing discipline. 4 * 5 * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com> 6 * Vinicius Costa Gomes <vinicius.gomes@intel.com> 7 */ 8 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/string.h> 13 #include <linux/errno.h> 14 #include <linux/errqueue.h> 15 #include <linux/rbtree.h> 16 #include <linux/skbuff.h> 17 #include <linux/posix-timers.h> 18 #include <net/netlink.h> 19 #include <net/sch_generic.h> 20 #include <net/pkt_sched.h> 21 #include <net/sock.h> 22 23 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON) 24 #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON) 25 26 struct etf_sched_data { 27 bool offload; 28 bool deadline_mode; 29 int clockid; 30 int queue; 31 s32 delta; /* in ns */ 32 ktime_t last; /* The txtime of the last skb sent to the netdevice. */ 33 struct rb_root_cached head; 34 struct qdisc_watchdog watchdog; 35 ktime_t (*get_time)(void); 36 }; 37 38 static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = { 39 [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) }, 40 }; 41 42 static inline int validate_input_params(struct tc_etf_qopt *qopt, 43 struct netlink_ext_ack *extack) 44 { 45 /* Check if params comply to the following rules: 46 * * Clockid and delta must be valid. 47 * 48 * * Dynamic clockids are not supported. 49 * 50 * * Delta must be a positive integer. 51 * 52 * Also note that for the HW offload case, we must 53 * expect that system clocks have been synchronized to PHC. 54 */ 55 if (qopt->clockid < 0) { 56 NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported"); 57 return -ENOTSUPP; 58 } 59 60 if (qopt->clockid != CLOCK_TAI) { 61 NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used"); 62 return -EINVAL; 63 } 64 65 if (qopt->delta < 0) { 66 NL_SET_ERR_MSG(extack, "Delta must be positive"); 67 return -EINVAL; 68 } 69 70 return 0; 71 } 72 73 static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb) 74 { 75 struct etf_sched_data *q = qdisc_priv(sch); 76 ktime_t txtime = nskb->tstamp; 77 struct sock *sk = nskb->sk; 78 ktime_t now; 79 80 if (!sk) 81 return false; 82 83 if (!sock_flag(sk, SOCK_TXTIME)) 84 return false; 85 86 /* We don't perform crosstimestamping. 87 * Drop if packet's clockid differs from qdisc's. 88 */ 89 if (sk->sk_clockid != q->clockid) 90 return false; 91 92 if (sk->sk_txtime_deadline_mode != q->deadline_mode) 93 return false; 94 95 now = q->get_time(); 96 if (ktime_before(txtime, now) || ktime_before(txtime, q->last)) 97 return false; 98 99 return true; 100 } 101 102 static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch) 103 { 104 struct etf_sched_data *q = qdisc_priv(sch); 105 struct rb_node *p; 106 107 p = rb_first_cached(&q->head); 108 if (!p) 109 return NULL; 110 111 return rb_to_skb(p); 112 } 113 114 static void reset_watchdog(struct Qdisc *sch) 115 { 116 struct etf_sched_data *q = qdisc_priv(sch); 117 struct sk_buff *skb = etf_peek_timesortedlist(sch); 118 ktime_t next; 119 120 if (!skb) { 121 qdisc_watchdog_cancel(&q->watchdog); 122 return; 123 } 124 125 next = ktime_sub_ns(skb->tstamp, q->delta); 126 qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next)); 127 } 128 129 static void report_sock_error(struct sk_buff *skb, u32 err, u8 code) 130 { 131 struct sock_exterr_skb *serr; 132 struct sk_buff *clone; 133 ktime_t txtime = skb->tstamp; 134 135 if (!skb->sk || !(skb->sk->sk_txtime_report_errors)) 136 return; 137 138 clone = skb_clone(skb, GFP_ATOMIC); 139 if (!clone) 140 return; 141 142 serr = SKB_EXT_ERR(clone); 143 serr->ee.ee_errno = err; 144 serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME; 145 serr->ee.ee_type = 0; 146 serr->ee.ee_code = code; 147 serr->ee.ee_pad = 0; 148 serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */ 149 serr->ee.ee_info = txtime; /* low part of tstamp */ 150 151 if (sock_queue_err_skb(skb->sk, clone)) 152 kfree_skb(clone); 153 } 154 155 static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch, 156 struct sk_buff **to_free) 157 { 158 struct etf_sched_data *q = qdisc_priv(sch); 159 struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL; 160 ktime_t txtime = nskb->tstamp; 161 bool leftmost = true; 162 163 if (!is_packet_valid(sch, nskb)) { 164 report_sock_error(nskb, EINVAL, 165 SO_EE_CODE_TXTIME_INVALID_PARAM); 166 return qdisc_drop(nskb, sch, to_free); 167 } 168 169 while (*p) { 170 struct sk_buff *skb; 171 172 parent = *p; 173 skb = rb_to_skb(parent); 174 if (ktime_after(txtime, skb->tstamp)) { 175 p = &parent->rb_right; 176 leftmost = false; 177 } else { 178 p = &parent->rb_left; 179 } 180 } 181 rb_link_node(&nskb->rbnode, parent, p); 182 rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost); 183 184 qdisc_qstats_backlog_inc(sch, nskb); 185 sch->q.qlen++; 186 187 /* Now we may need to re-arm the qdisc watchdog for the next packet. */ 188 reset_watchdog(sch); 189 190 return NET_XMIT_SUCCESS; 191 } 192 193 static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb, 194 ktime_t now) 195 { 196 struct etf_sched_data *q = qdisc_priv(sch); 197 struct sk_buff *to_free = NULL; 198 struct sk_buff *tmp = NULL; 199 200 skb_rbtree_walk_from_safe(skb, tmp) { 201 if (ktime_after(skb->tstamp, now)) 202 break; 203 204 rb_erase_cached(&skb->rbnode, &q->head); 205 206 /* The rbnode field in the skb re-uses these fields, now that 207 * we are done with the rbnode, reset them. 208 */ 209 skb->next = NULL; 210 skb->prev = NULL; 211 skb->dev = qdisc_dev(sch); 212 213 report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED); 214 215 qdisc_qstats_backlog_dec(sch, skb); 216 qdisc_drop(skb, sch, &to_free); 217 qdisc_qstats_overlimit(sch); 218 sch->q.qlen--; 219 } 220 221 kfree_skb_list(to_free); 222 } 223 224 static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb) 225 { 226 struct etf_sched_data *q = qdisc_priv(sch); 227 228 rb_erase_cached(&skb->rbnode, &q->head); 229 230 /* The rbnode field in the skb re-uses these fields, now that 231 * we are done with the rbnode, reset them. 232 */ 233 skb->next = NULL; 234 skb->prev = NULL; 235 skb->dev = qdisc_dev(sch); 236 237 qdisc_qstats_backlog_dec(sch, skb); 238 239 qdisc_bstats_update(sch, skb); 240 241 q->last = skb->tstamp; 242 243 sch->q.qlen--; 244 } 245 246 static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch) 247 { 248 struct etf_sched_data *q = qdisc_priv(sch); 249 struct sk_buff *skb; 250 ktime_t now, next; 251 252 skb = etf_peek_timesortedlist(sch); 253 if (!skb) 254 return NULL; 255 256 now = q->get_time(); 257 258 /* Drop if packet has expired while in queue. */ 259 if (ktime_before(skb->tstamp, now)) { 260 timesortedlist_drop(sch, skb, now); 261 skb = NULL; 262 goto out; 263 } 264 265 /* When in deadline mode, dequeue as soon as possible and change the 266 * txtime from deadline to (now + delta). 267 */ 268 if (q->deadline_mode) { 269 timesortedlist_remove(sch, skb); 270 skb->tstamp = now; 271 goto out; 272 } 273 274 next = ktime_sub_ns(skb->tstamp, q->delta); 275 276 /* Dequeue only if now is within the [txtime - delta, txtime] range. */ 277 if (ktime_after(now, next)) 278 timesortedlist_remove(sch, skb); 279 else 280 skb = NULL; 281 282 out: 283 /* Now we may need to re-arm the qdisc watchdog for the next packet. */ 284 reset_watchdog(sch); 285 286 return skb; 287 } 288 289 static void etf_disable_offload(struct net_device *dev, 290 struct etf_sched_data *q) 291 { 292 struct tc_etf_qopt_offload etf = { }; 293 const struct net_device_ops *ops; 294 int err; 295 296 if (!q->offload) 297 return; 298 299 ops = dev->netdev_ops; 300 if (!ops->ndo_setup_tc) 301 return; 302 303 etf.queue = q->queue; 304 etf.enable = 0; 305 306 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf); 307 if (err < 0) 308 pr_warn("Couldn't disable ETF offload for queue %d\n", 309 etf.queue); 310 } 311 312 static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q, 313 struct netlink_ext_ack *extack) 314 { 315 const struct net_device_ops *ops = dev->netdev_ops; 316 struct tc_etf_qopt_offload etf = { }; 317 int err; 318 319 if (q->offload) 320 return 0; 321 322 if (!ops->ndo_setup_tc) { 323 NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload"); 324 return -EOPNOTSUPP; 325 } 326 327 etf.queue = q->queue; 328 etf.enable = 1; 329 330 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf); 331 if (err < 0) { 332 NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload"); 333 return err; 334 } 335 336 return 0; 337 } 338 339 static int etf_init(struct Qdisc *sch, struct nlattr *opt, 340 struct netlink_ext_ack *extack) 341 { 342 struct etf_sched_data *q = qdisc_priv(sch); 343 struct net_device *dev = qdisc_dev(sch); 344 struct nlattr *tb[TCA_ETF_MAX + 1]; 345 struct tc_etf_qopt *qopt; 346 int err; 347 348 if (!opt) { 349 NL_SET_ERR_MSG(extack, 350 "Missing ETF qdisc options which are mandatory"); 351 return -EINVAL; 352 } 353 354 err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy, 355 extack); 356 if (err < 0) 357 return err; 358 359 if (!tb[TCA_ETF_PARMS]) { 360 NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters"); 361 return -EINVAL; 362 } 363 364 qopt = nla_data(tb[TCA_ETF_PARMS]); 365 366 pr_debug("delta %d clockid %d offload %s deadline %s\n", 367 qopt->delta, qopt->clockid, 368 OFFLOAD_IS_ON(qopt) ? "on" : "off", 369 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off"); 370 371 err = validate_input_params(qopt, extack); 372 if (err < 0) 373 return err; 374 375 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); 376 377 if (OFFLOAD_IS_ON(qopt)) { 378 err = etf_enable_offload(dev, q, extack); 379 if (err < 0) 380 return err; 381 } 382 383 /* Everything went OK, save the parameters used. */ 384 q->delta = qopt->delta; 385 q->clockid = qopt->clockid; 386 q->offload = OFFLOAD_IS_ON(qopt); 387 q->deadline_mode = DEADLINE_MODE_IS_ON(qopt); 388 389 switch (q->clockid) { 390 case CLOCK_REALTIME: 391 q->get_time = ktime_get_real; 392 break; 393 case CLOCK_MONOTONIC: 394 q->get_time = ktime_get; 395 break; 396 case CLOCK_BOOTTIME: 397 q->get_time = ktime_get_boottime; 398 break; 399 case CLOCK_TAI: 400 q->get_time = ktime_get_clocktai; 401 break; 402 default: 403 NL_SET_ERR_MSG(extack, "Clockid is not supported"); 404 return -ENOTSUPP; 405 } 406 407 qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid); 408 409 return 0; 410 } 411 412 static void timesortedlist_clear(struct Qdisc *sch) 413 { 414 struct etf_sched_data *q = qdisc_priv(sch); 415 struct rb_node *p = rb_first_cached(&q->head); 416 417 while (p) { 418 struct sk_buff *skb = rb_to_skb(p); 419 420 p = rb_next(p); 421 422 rb_erase_cached(&skb->rbnode, &q->head); 423 rtnl_kfree_skbs(skb, skb); 424 sch->q.qlen--; 425 } 426 } 427 428 static void etf_reset(struct Qdisc *sch) 429 { 430 struct etf_sched_data *q = qdisc_priv(sch); 431 432 /* Only cancel watchdog if it's been initialized. */ 433 if (q->watchdog.qdisc == sch) 434 qdisc_watchdog_cancel(&q->watchdog); 435 436 /* No matter which mode we are on, it's safe to clear both lists. */ 437 timesortedlist_clear(sch); 438 __qdisc_reset_queue(&sch->q); 439 440 sch->qstats.backlog = 0; 441 sch->q.qlen = 0; 442 443 q->last = 0; 444 } 445 446 static void etf_destroy(struct Qdisc *sch) 447 { 448 struct etf_sched_data *q = qdisc_priv(sch); 449 struct net_device *dev = qdisc_dev(sch); 450 451 /* Only cancel watchdog if it's been initialized. */ 452 if (q->watchdog.qdisc == sch) 453 qdisc_watchdog_cancel(&q->watchdog); 454 455 etf_disable_offload(dev, q); 456 } 457 458 static int etf_dump(struct Qdisc *sch, struct sk_buff *skb) 459 { 460 struct etf_sched_data *q = qdisc_priv(sch); 461 struct tc_etf_qopt opt = { }; 462 struct nlattr *nest; 463 464 nest = nla_nest_start_noflag(skb, TCA_OPTIONS); 465 if (!nest) 466 goto nla_put_failure; 467 468 opt.delta = q->delta; 469 opt.clockid = q->clockid; 470 if (q->offload) 471 opt.flags |= TC_ETF_OFFLOAD_ON; 472 473 if (q->deadline_mode) 474 opt.flags |= TC_ETF_DEADLINE_MODE_ON; 475 476 if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt)) 477 goto nla_put_failure; 478 479 return nla_nest_end(skb, nest); 480 481 nla_put_failure: 482 nla_nest_cancel(skb, nest); 483 return -1; 484 } 485 486 static struct Qdisc_ops etf_qdisc_ops __read_mostly = { 487 .id = "etf", 488 .priv_size = sizeof(struct etf_sched_data), 489 .enqueue = etf_enqueue_timesortedlist, 490 .dequeue = etf_dequeue_timesortedlist, 491 .peek = etf_peek_timesortedlist, 492 .init = etf_init, 493 .reset = etf_reset, 494 .destroy = etf_destroy, 495 .dump = etf_dump, 496 .owner = THIS_MODULE, 497 }; 498 499 static int __init etf_module_init(void) 500 { 501 return register_qdisc(&etf_qdisc_ops); 502 } 503 504 static void __exit etf_module_exit(void) 505 { 506 unregister_qdisc(&etf_qdisc_ops); 507 } 508 module_init(etf_module_init) 509 module_exit(etf_module_exit) 510 MODULE_LICENSE("GPL"); 511