1 // SPDX-License-Identifier: GPL-2.0 2 3 /* net/sched/sch_taprio.c Time Aware Priority Scheduler 4 * 5 * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> 6 * 7 */ 8 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/kernel.h> 12 #include <linux/string.h> 13 #include <linux/list.h> 14 #include <linux/errno.h> 15 #include <linux/skbuff.h> 16 #include <linux/module.h> 17 #include <linux/spinlock.h> 18 #include <net/netlink.h> 19 #include <net/pkt_sched.h> 20 #include <net/pkt_cls.h> 21 #include <net/sch_generic.h> 22 23 static LIST_HEAD(taprio_list); 24 static DEFINE_SPINLOCK(taprio_list_lock); 25 26 #define TAPRIO_ALL_GATES_OPEN -1 27 28 struct sched_entry { 29 struct list_head list; 30 31 /* The instant that this entry "closes" and the next one 32 * should open, the qdisc will make some effort so that no 33 * packet leaves after this time. 34 */ 35 ktime_t close_time; 36 atomic_t budget; 37 int index; 38 u32 gate_mask; 39 u32 interval; 40 u8 command; 41 }; 42 43 struct taprio_sched { 44 struct Qdisc **qdiscs; 45 struct Qdisc *root; 46 s64 base_time; 47 int clockid; 48 atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+ 49 * speeds it's sub-nanoseconds per byte 50 */ 51 size_t num_entries; 52 53 /* Protects the update side of the RCU protected current_entry */ 54 spinlock_t current_entry_lock; 55 struct sched_entry __rcu *current_entry; 56 struct list_head entries; 57 ktime_t (*get_time)(void); 58 struct hrtimer advance_timer; 59 struct list_head taprio_list; 60 }; 61 62 static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, 63 struct sk_buff **to_free) 64 { 65 struct taprio_sched *q = qdisc_priv(sch); 66 struct Qdisc *child; 67 int queue; 68 69 queue = skb_get_queue_mapping(skb); 70 71 child = q->qdiscs[queue]; 72 if (unlikely(!child)) 73 return qdisc_drop(skb, sch, to_free); 74 75 qdisc_qstats_backlog_inc(sch, skb); 76 sch->q.qlen++; 77 78 return qdisc_enqueue(skb, child, to_free); 79 } 80 81 static struct sk_buff *taprio_peek(struct Qdisc *sch) 82 { 83 struct taprio_sched *q = qdisc_priv(sch); 84 struct net_device *dev = qdisc_dev(sch); 85 struct sched_entry *entry; 86 struct sk_buff *skb; 87 u32 gate_mask; 88 int i; 89 90 rcu_read_lock(); 91 entry = rcu_dereference(q->current_entry); 92 gate_mask = entry ? entry->gate_mask : -1; 93 rcu_read_unlock(); 94 95 if (!gate_mask) 96 return NULL; 97 98 for (i = 0; i < dev->num_tx_queues; i++) { 99 struct Qdisc *child = q->qdiscs[i]; 100 int prio; 101 u8 tc; 102 103 if (unlikely(!child)) 104 continue; 105 106 skb = child->ops->peek(child); 107 if (!skb) 108 continue; 109 110 prio = skb->priority; 111 tc = netdev_get_prio_tc_map(dev, prio); 112 113 if (!(gate_mask & BIT(tc))) 114 return NULL; 115 116 return skb; 117 } 118 119 return NULL; 120 } 121 122 static inline int length_to_duration(struct taprio_sched *q, int len) 123 { 124 return (len * atomic64_read(&q->picos_per_byte)) / 1000; 125 } 126 127 static struct sk_buff *taprio_dequeue(struct Qdisc *sch) 128 { 129 struct taprio_sched *q = qdisc_priv(sch); 130 struct net_device *dev = qdisc_dev(sch); 131 struct sched_entry *entry; 132 struct sk_buff *skb; 133 u32 gate_mask; 134 int i; 135 136 if (atomic64_read(&q->picos_per_byte) == -1) { 137 WARN_ONCE(1, "taprio: dequeue() called with unknown picos per byte."); 138 return NULL; 139 } 140 141 rcu_read_lock(); 142 entry = rcu_dereference(q->current_entry); 143 /* if there's no entry, it means that the schedule didn't 144 * start yet, so force all gates to be open, this is in 145 * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5 146 * "AdminGateSates" 147 */ 148 gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; 149 rcu_read_unlock(); 150 151 if (!gate_mask) 152 return NULL; 153 154 for (i = 0; i < dev->num_tx_queues; i++) { 155 struct Qdisc *child = q->qdiscs[i]; 156 ktime_t guard; 157 int prio; 158 int len; 159 u8 tc; 160 161 if (unlikely(!child)) 162 continue; 163 164 skb = child->ops->peek(child); 165 if (!skb) 166 continue; 167 168 prio = skb->priority; 169 tc = netdev_get_prio_tc_map(dev, prio); 170 171 if (!(gate_mask & BIT(tc))) 172 continue; 173 174 len = qdisc_pkt_len(skb); 175 guard = ktime_add_ns(q->get_time(), 176 length_to_duration(q, len)); 177 178 /* In the case that there's no gate entry, there's no 179 * guard band ... 180 */ 181 if (gate_mask != TAPRIO_ALL_GATES_OPEN && 182 ktime_after(guard, entry->close_time)) 183 return NULL; 184 185 /* ... and no budget. */ 186 if (gate_mask != TAPRIO_ALL_GATES_OPEN && 187 atomic_sub_return(len, &entry->budget) < 0) 188 return NULL; 189 190 skb = child->ops->dequeue(child); 191 if (unlikely(!skb)) 192 return NULL; 193 194 qdisc_bstats_update(sch, skb); 195 qdisc_qstats_backlog_dec(sch, skb); 196 sch->q.qlen--; 197 198 return skb; 199 } 200 201 return NULL; 202 } 203 204 static bool should_restart_cycle(const struct taprio_sched *q, 205 const struct sched_entry *entry) 206 { 207 WARN_ON(!entry); 208 209 return list_is_last(&entry->list, &q->entries); 210 } 211 212 static enum hrtimer_restart advance_sched(struct hrtimer *timer) 213 { 214 struct taprio_sched *q = container_of(timer, struct taprio_sched, 215 advance_timer); 216 struct sched_entry *entry, *next; 217 struct Qdisc *sch = q->root; 218 ktime_t close_time; 219 220 spin_lock(&q->current_entry_lock); 221 entry = rcu_dereference_protected(q->current_entry, 222 lockdep_is_held(&q->current_entry_lock)); 223 224 /* This is the case that it's the first time that the schedule 225 * runs, so it only happens once per schedule. The first entry 226 * is pre-calculated during the schedule initialization. 227 */ 228 if (unlikely(!entry)) { 229 next = list_first_entry(&q->entries, struct sched_entry, 230 list); 231 close_time = next->close_time; 232 goto first_run; 233 } 234 235 if (should_restart_cycle(q, entry)) 236 next = list_first_entry(&q->entries, struct sched_entry, 237 list); 238 else 239 next = list_next_entry(entry, list); 240 241 close_time = ktime_add_ns(entry->close_time, next->interval); 242 243 next->close_time = close_time; 244 atomic_set(&next->budget, 245 (next->interval * 1000) / atomic64_read(&q->picos_per_byte)); 246 247 first_run: 248 rcu_assign_pointer(q->current_entry, next); 249 spin_unlock(&q->current_entry_lock); 250 251 hrtimer_set_expires(&q->advance_timer, close_time); 252 253 rcu_read_lock(); 254 __netif_schedule(sch); 255 rcu_read_unlock(); 256 257 return HRTIMER_RESTART; 258 } 259 260 static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { 261 [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 }, 262 [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 }, 263 [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 }, 264 [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, 265 }; 266 267 static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = { 268 [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED }, 269 }; 270 271 static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { 272 [TCA_TAPRIO_ATTR_PRIOMAP] = { 273 .len = sizeof(struct tc_mqprio_qopt) 274 }, 275 [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED }, 276 [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, 277 [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, 278 [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, 279 }; 280 281 static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, 282 struct netlink_ext_ack *extack) 283 { 284 u32 interval = 0; 285 286 if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD]) 287 entry->command = nla_get_u8( 288 tb[TCA_TAPRIO_SCHED_ENTRY_CMD]); 289 290 if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]) 291 entry->gate_mask = nla_get_u32( 292 tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]); 293 294 if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]) 295 interval = nla_get_u32( 296 tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]); 297 298 if (interval == 0) { 299 NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry"); 300 return -EINVAL; 301 } 302 303 entry->interval = interval; 304 305 return 0; 306 } 307 308 static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, 309 int index, struct netlink_ext_ack *extack) 310 { 311 struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; 312 int err; 313 314 err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n, 315 entry_policy, NULL); 316 if (err < 0) { 317 NL_SET_ERR_MSG(extack, "Could not parse nested entry"); 318 return -EINVAL; 319 } 320 321 entry->index = index; 322 323 return fill_sched_entry(tb, entry, extack); 324 } 325 326 /* Returns the number of entries in case of success */ 327 static int parse_sched_single_entry(struct nlattr *n, 328 struct taprio_sched *q, 329 struct netlink_ext_ack *extack) 330 { 331 struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; 332 struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { }; 333 struct sched_entry *entry; 334 bool found = false; 335 u32 index; 336 int err; 337 338 err = nla_parse_nested(tb_list, TCA_TAPRIO_SCHED_MAX, 339 n, entry_list_policy, NULL); 340 if (err < 0) { 341 NL_SET_ERR_MSG(extack, "Could not parse nested entry"); 342 return -EINVAL; 343 } 344 345 if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) { 346 NL_SET_ERR_MSG(extack, "Single-entry must include an entry"); 347 return -EINVAL; 348 } 349 350 err = nla_parse_nested(tb_entry, TCA_TAPRIO_SCHED_ENTRY_MAX, 351 tb_list[TCA_TAPRIO_SCHED_ENTRY], 352 entry_policy, NULL); 353 if (err < 0) { 354 NL_SET_ERR_MSG(extack, "Could not parse nested entry"); 355 return -EINVAL; 356 } 357 358 if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) { 359 NL_SET_ERR_MSG(extack, "Entry must specify an index\n"); 360 return -EINVAL; 361 } 362 363 index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]); 364 if (index >= q->num_entries) { 365 NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule"); 366 return -EINVAL; 367 } 368 369 list_for_each_entry(entry, &q->entries, list) { 370 if (entry->index == index) { 371 found = true; 372 break; 373 } 374 } 375 376 if (!found) { 377 NL_SET_ERR_MSG(extack, "Could not find entry"); 378 return -ENOENT; 379 } 380 381 err = fill_sched_entry(tb_entry, entry, extack); 382 if (err < 0) 383 return err; 384 385 return q->num_entries; 386 } 387 388 static int parse_sched_list(struct nlattr *list, 389 struct taprio_sched *q, 390 struct netlink_ext_ack *extack) 391 { 392 struct nlattr *n; 393 int err, rem; 394 int i = 0; 395 396 if (!list) 397 return -EINVAL; 398 399 nla_for_each_nested(n, list, rem) { 400 struct sched_entry *entry; 401 402 if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) { 403 NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'"); 404 continue; 405 } 406 407 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 408 if (!entry) { 409 NL_SET_ERR_MSG(extack, "Not enough memory for entry"); 410 return -ENOMEM; 411 } 412 413 err = parse_sched_entry(n, entry, i, extack); 414 if (err < 0) { 415 kfree(entry); 416 return err; 417 } 418 419 list_add_tail(&entry->list, &q->entries); 420 i++; 421 } 422 423 q->num_entries = i; 424 425 return i; 426 } 427 428 /* Returns the number of entries in case of success */ 429 static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q, 430 struct netlink_ext_ack *extack) 431 { 432 int err = 0; 433 int clockid; 434 435 if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] && 436 tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) 437 return -EINVAL; 438 439 if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0) 440 return -EINVAL; 441 442 if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) 443 return -EINVAL; 444 445 if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]) 446 q->base_time = nla_get_s64( 447 tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); 448 449 if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { 450 clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); 451 452 /* We only support static clockids and we don't allow 453 * for it to be modified after the first init. 454 */ 455 if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid)) 456 return -EINVAL; 457 458 q->clockid = clockid; 459 } 460 461 if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) 462 err = parse_sched_list( 463 tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack); 464 else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) 465 err = parse_sched_single_entry( 466 tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack); 467 468 /* parse_sched_* return the number of entries in the schedule, 469 * a schedule with zero entries is an error. 470 */ 471 if (err == 0) { 472 NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry"); 473 return -EINVAL; 474 } 475 476 return err; 477 } 478 479 static int taprio_parse_mqprio_opt(struct net_device *dev, 480 struct tc_mqprio_qopt *qopt, 481 struct netlink_ext_ack *extack) 482 { 483 int i, j; 484 485 if (!qopt) { 486 NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); 487 return -EINVAL; 488 } 489 490 /* Verify num_tc is not out of max range */ 491 if (qopt->num_tc > TC_MAX_QUEUE) { 492 NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range"); 493 return -EINVAL; 494 } 495 496 /* taprio imposes that traffic classes map 1:n to tx queues */ 497 if (qopt->num_tc > dev->num_tx_queues) { 498 NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues"); 499 return -EINVAL; 500 } 501 502 /* Verify priority mapping uses valid tcs */ 503 for (i = 0; i < TC_BITMASK + 1; i++) { 504 if (qopt->prio_tc_map[i] >= qopt->num_tc) { 505 NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping"); 506 return -EINVAL; 507 } 508 } 509 510 for (i = 0; i < qopt->num_tc; i++) { 511 unsigned int last = qopt->offset[i] + qopt->count[i]; 512 513 /* Verify the queue count is in tx range being equal to the 514 * real_num_tx_queues indicates the last queue is in use. 515 */ 516 if (qopt->offset[i] >= dev->num_tx_queues || 517 !qopt->count[i] || 518 last > dev->real_num_tx_queues) { 519 NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping"); 520 return -EINVAL; 521 } 522 523 /* Verify that the offset and counts do not overlap */ 524 for (j = i + 1; j < qopt->num_tc; j++) { 525 if (last > qopt->offset[j]) { 526 NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping"); 527 return -EINVAL; 528 } 529 } 530 } 531 532 return 0; 533 } 534 535 static ktime_t taprio_get_start_time(struct Qdisc *sch) 536 { 537 struct taprio_sched *q = qdisc_priv(sch); 538 struct sched_entry *entry; 539 ktime_t now, base, cycle; 540 s64 n; 541 542 base = ns_to_ktime(q->base_time); 543 cycle = 0; 544 545 /* Calculate the cycle_time, by summing all the intervals. 546 */ 547 list_for_each_entry(entry, &q->entries, list) 548 cycle = ktime_add_ns(cycle, entry->interval); 549 550 if (!cycle) 551 return base; 552 553 now = q->get_time(); 554 555 if (ktime_after(base, now)) 556 return base; 557 558 /* Schedule the start time for the beginning of the next 559 * cycle. 560 */ 561 n = div64_s64(ktime_sub_ns(now, base), cycle); 562 563 return ktime_add_ns(base, (n + 1) * cycle); 564 } 565 566 static void taprio_start_sched(struct Qdisc *sch, ktime_t start) 567 { 568 struct taprio_sched *q = qdisc_priv(sch); 569 struct sched_entry *first; 570 unsigned long flags; 571 572 spin_lock_irqsave(&q->current_entry_lock, flags); 573 574 first = list_first_entry(&q->entries, struct sched_entry, 575 list); 576 577 first->close_time = ktime_add_ns(start, first->interval); 578 atomic_set(&first->budget, 579 (first->interval * 1000) / 580 atomic64_read(&q->picos_per_byte)); 581 rcu_assign_pointer(q->current_entry, NULL); 582 583 spin_unlock_irqrestore(&q->current_entry_lock, flags); 584 585 hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS); 586 } 587 588 static void taprio_set_picos_per_byte(struct net_device *dev, 589 struct taprio_sched *q) 590 { 591 struct ethtool_link_ksettings ecmd; 592 int picos_per_byte = -1; 593 594 if (!__ethtool_get_link_ksettings(dev, &ecmd) && 595 ecmd.base.speed != SPEED_UNKNOWN) 596 picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, 597 ecmd.base.speed * 1000 * 1000); 598 599 atomic64_set(&q->picos_per_byte, picos_per_byte); 600 netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", 601 dev->name, (long long)atomic64_read(&q->picos_per_byte), 602 ecmd.base.speed); 603 } 604 605 static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, 606 void *ptr) 607 { 608 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 609 struct net_device *qdev; 610 struct taprio_sched *q; 611 bool found = false; 612 613 ASSERT_RTNL(); 614 615 if (event != NETDEV_UP && event != NETDEV_CHANGE) 616 return NOTIFY_DONE; 617 618 spin_lock(&taprio_list_lock); 619 list_for_each_entry(q, &taprio_list, taprio_list) { 620 qdev = qdisc_dev(q->root); 621 if (qdev == dev) { 622 found = true; 623 break; 624 } 625 } 626 spin_unlock(&taprio_list_lock); 627 628 if (found) 629 taprio_set_picos_per_byte(dev, q); 630 631 return NOTIFY_DONE; 632 } 633 634 static int taprio_change(struct Qdisc *sch, struct nlattr *opt, 635 struct netlink_ext_ack *extack) 636 { 637 struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { }; 638 struct taprio_sched *q = qdisc_priv(sch); 639 struct net_device *dev = qdisc_dev(sch); 640 struct tc_mqprio_qopt *mqprio = NULL; 641 int i, err, size; 642 ktime_t start; 643 644 err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt, 645 taprio_policy, extack); 646 if (err < 0) 647 return err; 648 649 err = -EINVAL; 650 if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) 651 mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); 652 653 err = taprio_parse_mqprio_opt(dev, mqprio, extack); 654 if (err < 0) 655 return err; 656 657 /* A schedule with less than one entry is an error */ 658 size = parse_taprio_opt(tb, q, extack); 659 if (size < 0) 660 return size; 661 662 hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); 663 q->advance_timer.function = advance_sched; 664 665 switch (q->clockid) { 666 case CLOCK_REALTIME: 667 q->get_time = ktime_get_real; 668 break; 669 case CLOCK_MONOTONIC: 670 q->get_time = ktime_get; 671 break; 672 case CLOCK_BOOTTIME: 673 q->get_time = ktime_get_boottime; 674 break; 675 case CLOCK_TAI: 676 q->get_time = ktime_get_clocktai; 677 break; 678 default: 679 return -ENOTSUPP; 680 } 681 682 for (i = 0; i < dev->num_tx_queues; i++) { 683 struct netdev_queue *dev_queue; 684 struct Qdisc *qdisc; 685 686 dev_queue = netdev_get_tx_queue(dev, i); 687 qdisc = qdisc_create_dflt(dev_queue, 688 &pfifo_qdisc_ops, 689 TC_H_MAKE(TC_H_MAJ(sch->handle), 690 TC_H_MIN(i + 1)), 691 extack); 692 if (!qdisc) 693 return -ENOMEM; 694 695 if (i < dev->real_num_tx_queues) 696 qdisc_hash_add(qdisc, false); 697 698 q->qdiscs[i] = qdisc; 699 } 700 701 if (mqprio) { 702 netdev_set_num_tc(dev, mqprio->num_tc); 703 for (i = 0; i < mqprio->num_tc; i++) 704 netdev_set_tc_queue(dev, i, 705 mqprio->count[i], 706 mqprio->offset[i]); 707 708 /* Always use supplied priority mappings */ 709 for (i = 0; i < TC_BITMASK + 1; i++) 710 netdev_set_prio_tc_map(dev, i, 711 mqprio->prio_tc_map[i]); 712 } 713 714 taprio_set_picos_per_byte(dev, q); 715 start = taprio_get_start_time(sch); 716 if (!start) 717 return 0; 718 719 taprio_start_sched(sch, start); 720 721 return 0; 722 } 723 724 static void taprio_destroy(struct Qdisc *sch) 725 { 726 struct taprio_sched *q = qdisc_priv(sch); 727 struct net_device *dev = qdisc_dev(sch); 728 struct sched_entry *entry, *n; 729 unsigned int i; 730 731 spin_lock(&taprio_list_lock); 732 list_del(&q->taprio_list); 733 spin_unlock(&taprio_list_lock); 734 735 hrtimer_cancel(&q->advance_timer); 736 737 if (q->qdiscs) { 738 for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) 739 qdisc_put(q->qdiscs[i]); 740 741 kfree(q->qdiscs); 742 } 743 q->qdiscs = NULL; 744 745 netdev_set_num_tc(dev, 0); 746 747 list_for_each_entry_safe(entry, n, &q->entries, list) { 748 list_del(&entry->list); 749 kfree(entry); 750 } 751 } 752 753 static int taprio_init(struct Qdisc *sch, struct nlattr *opt, 754 struct netlink_ext_ack *extack) 755 { 756 struct taprio_sched *q = qdisc_priv(sch); 757 struct net_device *dev = qdisc_dev(sch); 758 759 INIT_LIST_HEAD(&q->entries); 760 spin_lock_init(&q->current_entry_lock); 761 762 /* We may overwrite the configuration later */ 763 hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); 764 765 q->root = sch; 766 767 /* We only support static clockids. Use an invalid value as default 768 * and get the valid one on taprio_change(). 769 */ 770 q->clockid = -1; 771 772 if (sch->parent != TC_H_ROOT) 773 return -EOPNOTSUPP; 774 775 if (!netif_is_multiqueue(dev)) 776 return -EOPNOTSUPP; 777 778 /* pre-allocate qdisc, attachment can't fail */ 779 q->qdiscs = kcalloc(dev->num_tx_queues, 780 sizeof(q->qdiscs[0]), 781 GFP_KERNEL); 782 783 if (!q->qdiscs) 784 return -ENOMEM; 785 786 if (!opt) 787 return -EINVAL; 788 789 spin_lock(&taprio_list_lock); 790 list_add(&q->taprio_list, &taprio_list); 791 spin_unlock(&taprio_list_lock); 792 793 return taprio_change(sch, opt, extack); 794 } 795 796 static struct netdev_queue *taprio_queue_get(struct Qdisc *sch, 797 unsigned long cl) 798 { 799 struct net_device *dev = qdisc_dev(sch); 800 unsigned long ntx = cl - 1; 801 802 if (ntx >= dev->num_tx_queues) 803 return NULL; 804 805 return netdev_get_tx_queue(dev, ntx); 806 } 807 808 static int taprio_graft(struct Qdisc *sch, unsigned long cl, 809 struct Qdisc *new, struct Qdisc **old, 810 struct netlink_ext_ack *extack) 811 { 812 struct taprio_sched *q = qdisc_priv(sch); 813 struct net_device *dev = qdisc_dev(sch); 814 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 815 816 if (!dev_queue) 817 return -EINVAL; 818 819 if (dev->flags & IFF_UP) 820 dev_deactivate(dev); 821 822 *old = q->qdiscs[cl - 1]; 823 q->qdiscs[cl - 1] = new; 824 825 if (new) 826 new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; 827 828 if (dev->flags & IFF_UP) 829 dev_activate(dev); 830 831 return 0; 832 } 833 834 static int dump_entry(struct sk_buff *msg, 835 const struct sched_entry *entry) 836 { 837 struct nlattr *item; 838 839 item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY); 840 if (!item) 841 return -ENOSPC; 842 843 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index)) 844 goto nla_put_failure; 845 846 if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command)) 847 goto nla_put_failure; 848 849 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, 850 entry->gate_mask)) 851 goto nla_put_failure; 852 853 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL, 854 entry->interval)) 855 goto nla_put_failure; 856 857 return nla_nest_end(msg, item); 858 859 nla_put_failure: 860 nla_nest_cancel(msg, item); 861 return -1; 862 } 863 864 static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) 865 { 866 struct taprio_sched *q = qdisc_priv(sch); 867 struct net_device *dev = qdisc_dev(sch); 868 struct tc_mqprio_qopt opt = { 0 }; 869 struct nlattr *nest, *entry_list; 870 struct sched_entry *entry; 871 unsigned int i; 872 873 opt.num_tc = netdev_get_num_tc(dev); 874 memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); 875 876 for (i = 0; i < netdev_get_num_tc(dev); i++) { 877 opt.count[i] = dev->tc_to_txq[i].count; 878 opt.offset[i] = dev->tc_to_txq[i].offset; 879 } 880 881 nest = nla_nest_start(skb, TCA_OPTIONS); 882 if (!nest) 883 return -ENOSPC; 884 885 if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) 886 goto options_error; 887 888 if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, 889 q->base_time, TCA_TAPRIO_PAD)) 890 goto options_error; 891 892 if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) 893 goto options_error; 894 895 entry_list = nla_nest_start(skb, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); 896 if (!entry_list) 897 goto options_error; 898 899 list_for_each_entry(entry, &q->entries, list) { 900 if (dump_entry(skb, entry) < 0) 901 goto options_error; 902 } 903 904 nla_nest_end(skb, entry_list); 905 906 return nla_nest_end(skb, nest); 907 908 options_error: 909 nla_nest_cancel(skb, nest); 910 return -1; 911 } 912 913 static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) 914 { 915 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 916 917 if (!dev_queue) 918 return NULL; 919 920 return dev_queue->qdisc_sleeping; 921 } 922 923 static unsigned long taprio_find(struct Qdisc *sch, u32 classid) 924 { 925 unsigned int ntx = TC_H_MIN(classid); 926 927 if (!taprio_queue_get(sch, ntx)) 928 return 0; 929 return ntx; 930 } 931 932 static int taprio_dump_class(struct Qdisc *sch, unsigned long cl, 933 struct sk_buff *skb, struct tcmsg *tcm) 934 { 935 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 936 937 tcm->tcm_parent = TC_H_ROOT; 938 tcm->tcm_handle |= TC_H_MIN(cl); 939 tcm->tcm_info = dev_queue->qdisc_sleeping->handle; 940 941 return 0; 942 } 943 944 static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, 945 struct gnet_dump *d) 946 __releases(d->lock) 947 __acquires(d->lock) 948 { 949 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 950 951 sch = dev_queue->qdisc_sleeping; 952 if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || 953 qdisc_qstats_copy(d, sch) < 0) 954 return -1; 955 return 0; 956 } 957 958 static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) 959 { 960 struct net_device *dev = qdisc_dev(sch); 961 unsigned long ntx; 962 963 if (arg->stop) 964 return; 965 966 arg->count = arg->skip; 967 for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { 968 if (arg->fn(sch, ntx + 1, arg) < 0) { 969 arg->stop = 1; 970 break; 971 } 972 arg->count++; 973 } 974 } 975 976 static struct netdev_queue *taprio_select_queue(struct Qdisc *sch, 977 struct tcmsg *tcm) 978 { 979 return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); 980 } 981 982 static const struct Qdisc_class_ops taprio_class_ops = { 983 .graft = taprio_graft, 984 .leaf = taprio_leaf, 985 .find = taprio_find, 986 .walk = taprio_walk, 987 .dump = taprio_dump_class, 988 .dump_stats = taprio_dump_class_stats, 989 .select_queue = taprio_select_queue, 990 }; 991 992 static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { 993 .cl_ops = &taprio_class_ops, 994 .id = "taprio", 995 .priv_size = sizeof(struct taprio_sched), 996 .init = taprio_init, 997 .destroy = taprio_destroy, 998 .peek = taprio_peek, 999 .dequeue = taprio_dequeue, 1000 .enqueue = taprio_enqueue, 1001 .dump = taprio_dump, 1002 .owner = THIS_MODULE, 1003 }; 1004 1005 static struct notifier_block taprio_device_notifier = { 1006 .notifier_call = taprio_dev_notifier, 1007 }; 1008 1009 static int __init taprio_module_init(void) 1010 { 1011 int err = register_netdevice_notifier(&taprio_device_notifier); 1012 1013 if (err) 1014 return err; 1015 1016 return register_qdisc(&taprio_qdisc_ops); 1017 } 1018 1019 static void __exit taprio_module_exit(void) 1020 { 1021 unregister_qdisc(&taprio_qdisc_ops); 1022 unregister_netdevice_notifier(&taprio_device_notifier); 1023 } 1024 1025 module_init(taprio_module_init); 1026 module_exit(taprio_module_exit); 1027 MODULE_LICENSE("GPL"); 1028