1 // SPDX-License-Identifier: GPL-2.0 2 3 /* net/sched/sch_etf.c Earliest TxTime First queueing discipline. 4 * 5 * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com> 6 * Vinicius Costa Gomes <vinicius.gomes@intel.com> 7 */ 8 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/string.h> 13 #include <linux/errno.h> 14 #include <linux/errqueue.h> 15 #include <linux/rbtree.h> 16 #include <linux/skbuff.h> 17 #include <linux/posix-timers.h> 18 #include <net/netlink.h> 19 #include <net/sch_generic.h> 20 #include <net/pkt_sched.h> 21 #include <net/sock.h> 22 23 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON) 24 #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON) 25 26 struct etf_sched_data { 27 bool offload; 28 bool deadline_mode; 29 int clockid; 30 int queue; 31 s32 delta; /* in ns */ 32 ktime_t last; /* The txtime of the last skb sent to the netdevice. */ 33 struct rb_root head; 34 struct qdisc_watchdog watchdog; 35 ktime_t (*get_time)(void); 36 }; 37 38 static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = { 39 [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) }, 40 }; 41 42 static inline int validate_input_params(struct tc_etf_qopt *qopt, 43 struct netlink_ext_ack *extack) 44 { 45 /* Check if params comply to the following rules: 46 * * Clockid and delta must be valid. 47 * 48 * * Dynamic clockids are not supported. 49 * 50 * * Delta must be a positive integer. 51 * 52 * Also note that for the HW offload case, we must 53 * expect that system clocks have been synchronized to PHC. 54 */ 55 if (qopt->clockid < 0) { 56 NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported"); 57 return -ENOTSUPP; 58 } 59 60 if (qopt->clockid != CLOCK_TAI) { 61 NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used"); 62 return -EINVAL; 63 } 64 65 if (qopt->delta < 0) { 66 NL_SET_ERR_MSG(extack, "Delta must be positive"); 67 return -EINVAL; 68 } 69 70 return 0; 71 } 72 73 static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb) 74 { 75 struct etf_sched_data *q = qdisc_priv(sch); 76 ktime_t txtime = nskb->tstamp; 77 struct sock *sk = nskb->sk; 78 ktime_t now; 79 80 if (!sk) 81 return false; 82 83 if (!sock_flag(sk, SOCK_TXTIME)) 84 return false; 85 86 /* We don't perform crosstimestamping. 87 * Drop if packet's clockid differs from qdisc's. 88 */ 89 if (sk->sk_clockid != q->clockid) 90 return false; 91 92 if (sk->sk_txtime_deadline_mode != q->deadline_mode) 93 return false; 94 95 now = q->get_time(); 96 if (ktime_before(txtime, now) || ktime_before(txtime, q->last)) 97 return false; 98 99 return true; 100 } 101 102 static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch) 103 { 104 struct etf_sched_data *q = qdisc_priv(sch); 105 struct rb_node *p; 106 107 p = rb_first(&q->head); 108 if (!p) 109 return NULL; 110 111 return rb_to_skb(p); 112 } 113 114 static void reset_watchdog(struct Qdisc *sch) 115 { 116 struct etf_sched_data *q = qdisc_priv(sch); 117 struct sk_buff *skb = etf_peek_timesortedlist(sch); 118 ktime_t next; 119 120 if (!skb) 121 return; 122 123 next = ktime_sub_ns(skb->tstamp, q->delta); 124 qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next)); 125 } 126 127 static void report_sock_error(struct sk_buff *skb, u32 err, u8 code) 128 { 129 struct sock_exterr_skb *serr; 130 struct sk_buff *clone; 131 ktime_t txtime = skb->tstamp; 132 133 if (!skb->sk || !(skb->sk->sk_txtime_report_errors)) 134 return; 135 136 clone = skb_clone(skb, GFP_ATOMIC); 137 if (!clone) 138 return; 139 140 serr = SKB_EXT_ERR(clone); 141 serr->ee.ee_errno = err; 142 serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME; 143 serr->ee.ee_type = 0; 144 serr->ee.ee_code = code; 145 serr->ee.ee_pad = 0; 146 serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */ 147 serr->ee.ee_info = txtime; /* low part of tstamp */ 148 149 if (sock_queue_err_skb(skb->sk, clone)) 150 kfree_skb(clone); 151 } 152 153 static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch, 154 struct sk_buff **to_free) 155 { 156 struct etf_sched_data *q = qdisc_priv(sch); 157 struct rb_node **p = &q->head.rb_node, *parent = NULL; 158 ktime_t txtime = nskb->tstamp; 159 160 if (!is_packet_valid(sch, nskb)) { 161 report_sock_error(nskb, EINVAL, 162 SO_EE_CODE_TXTIME_INVALID_PARAM); 163 return qdisc_drop(nskb, sch, to_free); 164 } 165 166 while (*p) { 167 struct sk_buff *skb; 168 169 parent = *p; 170 skb = rb_to_skb(parent); 171 if (ktime_after(txtime, skb->tstamp)) 172 p = &parent->rb_right; 173 else 174 p = &parent->rb_left; 175 } 176 rb_link_node(&nskb->rbnode, parent, p); 177 rb_insert_color(&nskb->rbnode, &q->head); 178 179 qdisc_qstats_backlog_inc(sch, nskb); 180 sch->q.qlen++; 181 182 /* Now we may need to re-arm the qdisc watchdog for the next packet. */ 183 reset_watchdog(sch); 184 185 return NET_XMIT_SUCCESS; 186 } 187 188 static void timesortedlist_erase(struct Qdisc *sch, struct sk_buff *skb, 189 bool drop) 190 { 191 struct etf_sched_data *q = qdisc_priv(sch); 192 193 rb_erase(&skb->rbnode, &q->head); 194 195 /* The rbnode field in the skb re-uses these fields, now that 196 * we are done with the rbnode, reset them. 197 */ 198 skb->next = NULL; 199 skb->prev = NULL; 200 skb->dev = qdisc_dev(sch); 201 202 qdisc_qstats_backlog_dec(sch, skb); 203 204 if (drop) { 205 struct sk_buff *to_free = NULL; 206 207 report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED); 208 209 qdisc_drop(skb, sch, &to_free); 210 kfree_skb_list(to_free); 211 qdisc_qstats_overlimit(sch); 212 } else { 213 qdisc_bstats_update(sch, skb); 214 215 q->last = skb->tstamp; 216 } 217 218 sch->q.qlen--; 219 } 220 221 static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch) 222 { 223 struct etf_sched_data *q = qdisc_priv(sch); 224 struct sk_buff *skb; 225 ktime_t now, next; 226 227 skb = etf_peek_timesortedlist(sch); 228 if (!skb) 229 return NULL; 230 231 now = q->get_time(); 232 233 /* Drop if packet has expired while in queue. */ 234 if (ktime_before(skb->tstamp, now)) { 235 timesortedlist_erase(sch, skb, true); 236 skb = NULL; 237 goto out; 238 } 239 240 /* When in deadline mode, dequeue as soon as possible and change the 241 * txtime from deadline to (now + delta). 242 */ 243 if (q->deadline_mode) { 244 timesortedlist_erase(sch, skb, false); 245 skb->tstamp = now; 246 goto out; 247 } 248 249 next = ktime_sub_ns(skb->tstamp, q->delta); 250 251 /* Dequeue only if now is within the [txtime - delta, txtime] range. */ 252 if (ktime_after(now, next)) 253 timesortedlist_erase(sch, skb, false); 254 else 255 skb = NULL; 256 257 out: 258 /* Now we may need to re-arm the qdisc watchdog for the next packet. */ 259 reset_watchdog(sch); 260 261 return skb; 262 } 263 264 static void etf_disable_offload(struct net_device *dev, 265 struct etf_sched_data *q) 266 { 267 struct tc_etf_qopt_offload etf = { }; 268 const struct net_device_ops *ops; 269 int err; 270 271 if (!q->offload) 272 return; 273 274 ops = dev->netdev_ops; 275 if (!ops->ndo_setup_tc) 276 return; 277 278 etf.queue = q->queue; 279 etf.enable = 0; 280 281 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf); 282 if (err < 0) 283 pr_warn("Couldn't disable ETF offload for queue %d\n", 284 etf.queue); 285 } 286 287 static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q, 288 struct netlink_ext_ack *extack) 289 { 290 const struct net_device_ops *ops = dev->netdev_ops; 291 struct tc_etf_qopt_offload etf = { }; 292 int err; 293 294 if (q->offload) 295 return 0; 296 297 if (!ops->ndo_setup_tc) { 298 NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload"); 299 return -EOPNOTSUPP; 300 } 301 302 etf.queue = q->queue; 303 etf.enable = 1; 304 305 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf); 306 if (err < 0) { 307 NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload"); 308 return err; 309 } 310 311 return 0; 312 } 313 314 static int etf_init(struct Qdisc *sch, struct nlattr *opt, 315 struct netlink_ext_ack *extack) 316 { 317 struct etf_sched_data *q = qdisc_priv(sch); 318 struct net_device *dev = qdisc_dev(sch); 319 struct nlattr *tb[TCA_ETF_MAX + 1]; 320 struct tc_etf_qopt *qopt; 321 int err; 322 323 if (!opt) { 324 NL_SET_ERR_MSG(extack, 325 "Missing ETF qdisc options which are mandatory"); 326 return -EINVAL; 327 } 328 329 err = nla_parse_nested(tb, TCA_ETF_MAX, opt, etf_policy, extack); 330 if (err < 0) 331 return err; 332 333 if (!tb[TCA_ETF_PARMS]) { 334 NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters"); 335 return -EINVAL; 336 } 337 338 qopt = nla_data(tb[TCA_ETF_PARMS]); 339 340 pr_debug("delta %d clockid %d offload %s deadline %s\n", 341 qopt->delta, qopt->clockid, 342 OFFLOAD_IS_ON(qopt) ? "on" : "off", 343 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off"); 344 345 err = validate_input_params(qopt, extack); 346 if (err < 0) 347 return err; 348 349 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); 350 351 if (OFFLOAD_IS_ON(qopt)) { 352 err = etf_enable_offload(dev, q, extack); 353 if (err < 0) 354 return err; 355 } 356 357 /* Everything went OK, save the parameters used. */ 358 q->delta = qopt->delta; 359 q->clockid = qopt->clockid; 360 q->offload = OFFLOAD_IS_ON(qopt); 361 q->deadline_mode = DEADLINE_MODE_IS_ON(qopt); 362 363 switch (q->clockid) { 364 case CLOCK_REALTIME: 365 q->get_time = ktime_get_real; 366 break; 367 case CLOCK_MONOTONIC: 368 q->get_time = ktime_get; 369 break; 370 case CLOCK_BOOTTIME: 371 q->get_time = ktime_get_boottime; 372 break; 373 case CLOCK_TAI: 374 q->get_time = ktime_get_clocktai; 375 break; 376 default: 377 NL_SET_ERR_MSG(extack, "Clockid is not supported"); 378 return -ENOTSUPP; 379 } 380 381 qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid); 382 383 return 0; 384 } 385 386 static void timesortedlist_clear(struct Qdisc *sch) 387 { 388 struct etf_sched_data *q = qdisc_priv(sch); 389 struct rb_node *p = rb_first(&q->head); 390 391 while (p) { 392 struct sk_buff *skb = rb_to_skb(p); 393 394 p = rb_next(p); 395 396 rb_erase(&skb->rbnode, &q->head); 397 rtnl_kfree_skbs(skb, skb); 398 sch->q.qlen--; 399 } 400 } 401 402 static void etf_reset(struct Qdisc *sch) 403 { 404 struct etf_sched_data *q = qdisc_priv(sch); 405 406 /* Only cancel watchdog if it's been initialized. */ 407 if (q->watchdog.qdisc == sch) 408 qdisc_watchdog_cancel(&q->watchdog); 409 410 /* No matter which mode we are on, it's safe to clear both lists. */ 411 timesortedlist_clear(sch); 412 __qdisc_reset_queue(&sch->q); 413 414 sch->qstats.backlog = 0; 415 sch->q.qlen = 0; 416 417 q->last = 0; 418 } 419 420 static void etf_destroy(struct Qdisc *sch) 421 { 422 struct etf_sched_data *q = qdisc_priv(sch); 423 struct net_device *dev = qdisc_dev(sch); 424 425 /* Only cancel watchdog if it's been initialized. */ 426 if (q->watchdog.qdisc == sch) 427 qdisc_watchdog_cancel(&q->watchdog); 428 429 etf_disable_offload(dev, q); 430 } 431 432 static int etf_dump(struct Qdisc *sch, struct sk_buff *skb) 433 { 434 struct etf_sched_data *q = qdisc_priv(sch); 435 struct tc_etf_qopt opt = { }; 436 struct nlattr *nest; 437 438 nest = nla_nest_start(skb, TCA_OPTIONS); 439 if (!nest) 440 goto nla_put_failure; 441 442 opt.delta = q->delta; 443 opt.clockid = q->clockid; 444 if (q->offload) 445 opt.flags |= TC_ETF_OFFLOAD_ON; 446 447 if (q->deadline_mode) 448 opt.flags |= TC_ETF_DEADLINE_MODE_ON; 449 450 if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt)) 451 goto nla_put_failure; 452 453 return nla_nest_end(skb, nest); 454 455 nla_put_failure: 456 nla_nest_cancel(skb, nest); 457 return -1; 458 } 459 460 static struct Qdisc_ops etf_qdisc_ops __read_mostly = { 461 .id = "etf", 462 .priv_size = sizeof(struct etf_sched_data), 463 .enqueue = etf_enqueue_timesortedlist, 464 .dequeue = etf_dequeue_timesortedlist, 465 .peek = etf_peek_timesortedlist, 466 .init = etf_init, 467 .reset = etf_reset, 468 .destroy = etf_destroy, 469 .dump = etf_dump, 470 .owner = THIS_MODULE, 471 }; 472 473 static int __init etf_module_init(void) 474 { 475 return register_qdisc(&etf_qdisc_ops); 476 } 477 478 static void __exit etf_module_exit(void) 479 { 480 unregister_qdisc(&etf_qdisc_ops); 481 } 482 module_init(etf_module_init) 483 module_exit(etf_module_exit) 484 MODULE_LICENSE("GPL"); 485