1 // SPDX-License-Identifier: GPL-2.0 2 3 /* net/sched/sch_taprio.c Time Aware Priority Scheduler 4 * 5 * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> 6 * 7 */ 8 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/kernel.h> 12 #include <linux/string.h> 13 #include <linux/list.h> 14 #include <linux/errno.h> 15 #include <linux/skbuff.h> 16 #include <linux/math64.h> 17 #include <linux/module.h> 18 #include <linux/spinlock.h> 19 #include <net/netlink.h> 20 #include <net/pkt_sched.h> 21 #include <net/pkt_cls.h> 22 #include <net/sch_generic.h> 23 24 static LIST_HEAD(taprio_list); 25 static DEFINE_SPINLOCK(taprio_list_lock); 26 27 #define TAPRIO_ALL_GATES_OPEN -1 28 29 struct sched_entry { 30 struct list_head list; 31 32 /* The instant that this entry "closes" and the next one 33 * should open, the qdisc will make some effort so that no 34 * packet leaves after this time. 35 */ 36 ktime_t close_time; 37 atomic_t budget; 38 int index; 39 u32 gate_mask; 40 u32 interval; 41 u8 command; 42 }; 43 44 struct taprio_sched { 45 struct Qdisc **qdiscs; 46 struct Qdisc *root; 47 s64 base_time; 48 int clockid; 49 atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+ 50 * speeds it's sub-nanoseconds per byte 51 */ 52 size_t num_entries; 53 54 /* Protects the update side of the RCU protected current_entry */ 55 spinlock_t current_entry_lock; 56 struct sched_entry __rcu *current_entry; 57 struct list_head entries; 58 ktime_t (*get_time)(void); 59 struct hrtimer advance_timer; 60 struct list_head taprio_list; 61 }; 62 63 static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, 64 struct sk_buff **to_free) 65 { 66 struct taprio_sched *q = qdisc_priv(sch); 67 struct Qdisc *child; 68 int queue; 69 70 queue = skb_get_queue_mapping(skb); 71 72 child = q->qdiscs[queue]; 73 if (unlikely(!child)) 74 return qdisc_drop(skb, sch, to_free); 75 76 qdisc_qstats_backlog_inc(sch, skb); 77 sch->q.qlen++; 78 79 return qdisc_enqueue(skb, child, to_free); 80 } 81 82 static struct sk_buff *taprio_peek(struct Qdisc *sch) 83 { 84 struct taprio_sched *q = qdisc_priv(sch); 85 struct net_device *dev = qdisc_dev(sch); 86 struct sched_entry *entry; 87 struct sk_buff *skb; 88 u32 gate_mask; 89 int i; 90 91 rcu_read_lock(); 92 entry = rcu_dereference(q->current_entry); 93 gate_mask = entry ? entry->gate_mask : -1; 94 rcu_read_unlock(); 95 96 if (!gate_mask) 97 return NULL; 98 99 for (i = 0; i < dev->num_tx_queues; i++) { 100 struct Qdisc *child = q->qdiscs[i]; 101 int prio; 102 u8 tc; 103 104 if (unlikely(!child)) 105 continue; 106 107 skb = child->ops->peek(child); 108 if (!skb) 109 continue; 110 111 prio = skb->priority; 112 tc = netdev_get_prio_tc_map(dev, prio); 113 114 if (!(gate_mask & BIT(tc))) 115 return NULL; 116 117 return skb; 118 } 119 120 return NULL; 121 } 122 123 static inline int length_to_duration(struct taprio_sched *q, int len) 124 { 125 return div_u64(len * atomic64_read(&q->picos_per_byte), 1000); 126 } 127 128 static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) 129 { 130 atomic_set(&entry->budget, 131 div64_u64((u64)entry->interval * 1000, 132 atomic64_read(&q->picos_per_byte))); 133 } 134 135 static struct sk_buff *taprio_dequeue(struct Qdisc *sch) 136 { 137 struct taprio_sched *q = qdisc_priv(sch); 138 struct net_device *dev = qdisc_dev(sch); 139 struct sched_entry *entry; 140 struct sk_buff *skb; 141 u32 gate_mask; 142 int i; 143 144 if (atomic64_read(&q->picos_per_byte) == -1) { 145 WARN_ONCE(1, "taprio: dequeue() called with unknown picos per byte."); 146 return NULL; 147 } 148 149 rcu_read_lock(); 150 entry = rcu_dereference(q->current_entry); 151 /* if there's no entry, it means that the schedule didn't 152 * start yet, so force all gates to be open, this is in 153 * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5 154 * "AdminGateSates" 155 */ 156 gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; 157 rcu_read_unlock(); 158 159 if (!gate_mask) 160 return NULL; 161 162 for (i = 0; i < dev->num_tx_queues; i++) { 163 struct Qdisc *child = q->qdiscs[i]; 164 ktime_t guard; 165 int prio; 166 int len; 167 u8 tc; 168 169 if (unlikely(!child)) 170 continue; 171 172 skb = child->ops->peek(child); 173 if (!skb) 174 continue; 175 176 prio = skb->priority; 177 tc = netdev_get_prio_tc_map(dev, prio); 178 179 if (!(gate_mask & BIT(tc))) 180 continue; 181 182 len = qdisc_pkt_len(skb); 183 guard = ktime_add_ns(q->get_time(), 184 length_to_duration(q, len)); 185 186 /* In the case that there's no gate entry, there's no 187 * guard band ... 188 */ 189 if (gate_mask != TAPRIO_ALL_GATES_OPEN && 190 ktime_after(guard, entry->close_time)) 191 return NULL; 192 193 /* ... and no budget. */ 194 if (gate_mask != TAPRIO_ALL_GATES_OPEN && 195 atomic_sub_return(len, &entry->budget) < 0) 196 return NULL; 197 198 skb = child->ops->dequeue(child); 199 if (unlikely(!skb)) 200 return NULL; 201 202 qdisc_bstats_update(sch, skb); 203 qdisc_qstats_backlog_dec(sch, skb); 204 sch->q.qlen--; 205 206 return skb; 207 } 208 209 return NULL; 210 } 211 212 static bool should_restart_cycle(const struct taprio_sched *q, 213 const struct sched_entry *entry) 214 { 215 WARN_ON(!entry); 216 217 return list_is_last(&entry->list, &q->entries); 218 } 219 220 static enum hrtimer_restart advance_sched(struct hrtimer *timer) 221 { 222 struct taprio_sched *q = container_of(timer, struct taprio_sched, 223 advance_timer); 224 struct sched_entry *entry, *next; 225 struct Qdisc *sch = q->root; 226 ktime_t close_time; 227 228 spin_lock(&q->current_entry_lock); 229 entry = rcu_dereference_protected(q->current_entry, 230 lockdep_is_held(&q->current_entry_lock)); 231 232 /* This is the case that it's the first time that the schedule 233 * runs, so it only happens once per schedule. The first entry 234 * is pre-calculated during the schedule initialization. 235 */ 236 if (unlikely(!entry)) { 237 next = list_first_entry(&q->entries, struct sched_entry, 238 list); 239 close_time = next->close_time; 240 goto first_run; 241 } 242 243 if (should_restart_cycle(q, entry)) 244 next = list_first_entry(&q->entries, struct sched_entry, 245 list); 246 else 247 next = list_next_entry(entry, list); 248 249 close_time = ktime_add_ns(entry->close_time, next->interval); 250 251 next->close_time = close_time; 252 taprio_set_budget(q, next); 253 254 first_run: 255 rcu_assign_pointer(q->current_entry, next); 256 spin_unlock(&q->current_entry_lock); 257 258 hrtimer_set_expires(&q->advance_timer, close_time); 259 260 rcu_read_lock(); 261 __netif_schedule(sch); 262 rcu_read_unlock(); 263 264 return HRTIMER_RESTART; 265 } 266 267 static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { 268 [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 }, 269 [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 }, 270 [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 }, 271 [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, 272 }; 273 274 static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = { 275 [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED }, 276 }; 277 278 static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { 279 [TCA_TAPRIO_ATTR_PRIOMAP] = { 280 .len = sizeof(struct tc_mqprio_qopt) 281 }, 282 [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED }, 283 [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, 284 [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, 285 [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, 286 }; 287 288 static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, 289 struct netlink_ext_ack *extack) 290 { 291 u32 interval = 0; 292 293 if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD]) 294 entry->command = nla_get_u8( 295 tb[TCA_TAPRIO_SCHED_ENTRY_CMD]); 296 297 if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]) 298 entry->gate_mask = nla_get_u32( 299 tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]); 300 301 if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]) 302 interval = nla_get_u32( 303 tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]); 304 305 if (interval == 0) { 306 NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry"); 307 return -EINVAL; 308 } 309 310 entry->interval = interval; 311 312 return 0; 313 } 314 315 static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, 316 int index, struct netlink_ext_ack *extack) 317 { 318 struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; 319 int err; 320 321 err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n, 322 entry_policy, NULL); 323 if (err < 0) { 324 NL_SET_ERR_MSG(extack, "Could not parse nested entry"); 325 return -EINVAL; 326 } 327 328 entry->index = index; 329 330 return fill_sched_entry(tb, entry, extack); 331 } 332 333 /* Returns the number of entries in case of success */ 334 static int parse_sched_single_entry(struct nlattr *n, 335 struct taprio_sched *q, 336 struct netlink_ext_ack *extack) 337 { 338 struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; 339 struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { }; 340 struct sched_entry *entry; 341 bool found = false; 342 u32 index; 343 int err; 344 345 err = nla_parse_nested(tb_list, TCA_TAPRIO_SCHED_MAX, 346 n, entry_list_policy, NULL); 347 if (err < 0) { 348 NL_SET_ERR_MSG(extack, "Could not parse nested entry"); 349 return -EINVAL; 350 } 351 352 if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) { 353 NL_SET_ERR_MSG(extack, "Single-entry must include an entry"); 354 return -EINVAL; 355 } 356 357 err = nla_parse_nested(tb_entry, TCA_TAPRIO_SCHED_ENTRY_MAX, 358 tb_list[TCA_TAPRIO_SCHED_ENTRY], 359 entry_policy, NULL); 360 if (err < 0) { 361 NL_SET_ERR_MSG(extack, "Could not parse nested entry"); 362 return -EINVAL; 363 } 364 365 if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) { 366 NL_SET_ERR_MSG(extack, "Entry must specify an index\n"); 367 return -EINVAL; 368 } 369 370 index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]); 371 if (index >= q->num_entries) { 372 NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule"); 373 return -EINVAL; 374 } 375 376 list_for_each_entry(entry, &q->entries, list) { 377 if (entry->index == index) { 378 found = true; 379 break; 380 } 381 } 382 383 if (!found) { 384 NL_SET_ERR_MSG(extack, "Could not find entry"); 385 return -ENOENT; 386 } 387 388 err = fill_sched_entry(tb_entry, entry, extack); 389 if (err < 0) 390 return err; 391 392 return q->num_entries; 393 } 394 395 static int parse_sched_list(struct nlattr *list, 396 struct taprio_sched *q, 397 struct netlink_ext_ack *extack) 398 { 399 struct nlattr *n; 400 int err, rem; 401 int i = 0; 402 403 if (!list) 404 return -EINVAL; 405 406 nla_for_each_nested(n, list, rem) { 407 struct sched_entry *entry; 408 409 if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) { 410 NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'"); 411 continue; 412 } 413 414 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 415 if (!entry) { 416 NL_SET_ERR_MSG(extack, "Not enough memory for entry"); 417 return -ENOMEM; 418 } 419 420 err = parse_sched_entry(n, entry, i, extack); 421 if (err < 0) { 422 kfree(entry); 423 return err; 424 } 425 426 list_add_tail(&entry->list, &q->entries); 427 i++; 428 } 429 430 q->num_entries = i; 431 432 return i; 433 } 434 435 /* Returns the number of entries in case of success */ 436 static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q, 437 struct netlink_ext_ack *extack) 438 { 439 int err = 0; 440 int clockid; 441 442 if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] && 443 tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) 444 return -EINVAL; 445 446 if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0) 447 return -EINVAL; 448 449 if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) 450 return -EINVAL; 451 452 if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]) 453 q->base_time = nla_get_s64( 454 tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); 455 456 if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { 457 clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); 458 459 /* We only support static clockids and we don't allow 460 * for it to be modified after the first init. 461 */ 462 if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid)) 463 return -EINVAL; 464 465 q->clockid = clockid; 466 } 467 468 if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) 469 err = parse_sched_list( 470 tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack); 471 else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) 472 err = parse_sched_single_entry( 473 tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack); 474 475 /* parse_sched_* return the number of entries in the schedule, 476 * a schedule with zero entries is an error. 477 */ 478 if (err == 0) { 479 NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry"); 480 return -EINVAL; 481 } 482 483 return err; 484 } 485 486 static int taprio_parse_mqprio_opt(struct net_device *dev, 487 struct tc_mqprio_qopt *qopt, 488 struct netlink_ext_ack *extack) 489 { 490 int i, j; 491 492 if (!qopt) { 493 NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); 494 return -EINVAL; 495 } 496 497 /* Verify num_tc is not out of max range */ 498 if (qopt->num_tc > TC_MAX_QUEUE) { 499 NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range"); 500 return -EINVAL; 501 } 502 503 /* taprio imposes that traffic classes map 1:n to tx queues */ 504 if (qopt->num_tc > dev->num_tx_queues) { 505 NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues"); 506 return -EINVAL; 507 } 508 509 /* Verify priority mapping uses valid tcs */ 510 for (i = 0; i < TC_BITMASK + 1; i++) { 511 if (qopt->prio_tc_map[i] >= qopt->num_tc) { 512 NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping"); 513 return -EINVAL; 514 } 515 } 516 517 for (i = 0; i < qopt->num_tc; i++) { 518 unsigned int last = qopt->offset[i] + qopt->count[i]; 519 520 /* Verify the queue count is in tx range being equal to the 521 * real_num_tx_queues indicates the last queue is in use. 522 */ 523 if (qopt->offset[i] >= dev->num_tx_queues || 524 !qopt->count[i] || 525 last > dev->real_num_tx_queues) { 526 NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping"); 527 return -EINVAL; 528 } 529 530 /* Verify that the offset and counts do not overlap */ 531 for (j = i + 1; j < qopt->num_tc; j++) { 532 if (last > qopt->offset[j]) { 533 NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping"); 534 return -EINVAL; 535 } 536 } 537 } 538 539 return 0; 540 } 541 542 static ktime_t taprio_get_start_time(struct Qdisc *sch) 543 { 544 struct taprio_sched *q = qdisc_priv(sch); 545 struct sched_entry *entry; 546 ktime_t now, base, cycle; 547 s64 n; 548 549 base = ns_to_ktime(q->base_time); 550 cycle = 0; 551 552 /* Calculate the cycle_time, by summing all the intervals. 553 */ 554 list_for_each_entry(entry, &q->entries, list) 555 cycle = ktime_add_ns(cycle, entry->interval); 556 557 if (!cycle) 558 return base; 559 560 now = q->get_time(); 561 562 if (ktime_after(base, now)) 563 return base; 564 565 /* Schedule the start time for the beginning of the next 566 * cycle. 567 */ 568 n = div64_s64(ktime_sub_ns(now, base), cycle); 569 570 return ktime_add_ns(base, (n + 1) * cycle); 571 } 572 573 static void taprio_start_sched(struct Qdisc *sch, ktime_t start) 574 { 575 struct taprio_sched *q = qdisc_priv(sch); 576 struct sched_entry *first; 577 unsigned long flags; 578 579 spin_lock_irqsave(&q->current_entry_lock, flags); 580 581 first = list_first_entry(&q->entries, struct sched_entry, 582 list); 583 584 first->close_time = ktime_add_ns(start, first->interval); 585 taprio_set_budget(q, first); 586 rcu_assign_pointer(q->current_entry, NULL); 587 588 spin_unlock_irqrestore(&q->current_entry_lock, flags); 589 590 hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS); 591 } 592 593 static void taprio_set_picos_per_byte(struct net_device *dev, 594 struct taprio_sched *q) 595 { 596 struct ethtool_link_ksettings ecmd; 597 int picos_per_byte = -1; 598 599 if (!__ethtool_get_link_ksettings(dev, &ecmd) && 600 ecmd.base.speed != SPEED_UNKNOWN) 601 picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, 602 ecmd.base.speed * 1000 * 1000); 603 604 atomic64_set(&q->picos_per_byte, picos_per_byte); 605 netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", 606 dev->name, (long long)atomic64_read(&q->picos_per_byte), 607 ecmd.base.speed); 608 } 609 610 static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, 611 void *ptr) 612 { 613 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 614 struct net_device *qdev; 615 struct taprio_sched *q; 616 bool found = false; 617 618 ASSERT_RTNL(); 619 620 if (event != NETDEV_UP && event != NETDEV_CHANGE) 621 return NOTIFY_DONE; 622 623 spin_lock(&taprio_list_lock); 624 list_for_each_entry(q, &taprio_list, taprio_list) { 625 qdev = qdisc_dev(q->root); 626 if (qdev == dev) { 627 found = true; 628 break; 629 } 630 } 631 spin_unlock(&taprio_list_lock); 632 633 if (found) 634 taprio_set_picos_per_byte(dev, q); 635 636 return NOTIFY_DONE; 637 } 638 639 static int taprio_change(struct Qdisc *sch, struct nlattr *opt, 640 struct netlink_ext_ack *extack) 641 { 642 struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { }; 643 struct taprio_sched *q = qdisc_priv(sch); 644 struct net_device *dev = qdisc_dev(sch); 645 struct tc_mqprio_qopt *mqprio = NULL; 646 int i, err, size; 647 ktime_t start; 648 649 err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt, 650 taprio_policy, extack); 651 if (err < 0) 652 return err; 653 654 err = -EINVAL; 655 if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) 656 mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); 657 658 err = taprio_parse_mqprio_opt(dev, mqprio, extack); 659 if (err < 0) 660 return err; 661 662 /* A schedule with less than one entry is an error */ 663 size = parse_taprio_opt(tb, q, extack); 664 if (size < 0) 665 return size; 666 667 hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); 668 q->advance_timer.function = advance_sched; 669 670 switch (q->clockid) { 671 case CLOCK_REALTIME: 672 q->get_time = ktime_get_real; 673 break; 674 case CLOCK_MONOTONIC: 675 q->get_time = ktime_get; 676 break; 677 case CLOCK_BOOTTIME: 678 q->get_time = ktime_get_boottime; 679 break; 680 case CLOCK_TAI: 681 q->get_time = ktime_get_clocktai; 682 break; 683 default: 684 return -ENOTSUPP; 685 } 686 687 for (i = 0; i < dev->num_tx_queues; i++) { 688 struct netdev_queue *dev_queue; 689 struct Qdisc *qdisc; 690 691 dev_queue = netdev_get_tx_queue(dev, i); 692 qdisc = qdisc_create_dflt(dev_queue, 693 &pfifo_qdisc_ops, 694 TC_H_MAKE(TC_H_MAJ(sch->handle), 695 TC_H_MIN(i + 1)), 696 extack); 697 if (!qdisc) 698 return -ENOMEM; 699 700 if (i < dev->real_num_tx_queues) 701 qdisc_hash_add(qdisc, false); 702 703 q->qdiscs[i] = qdisc; 704 } 705 706 if (mqprio) { 707 netdev_set_num_tc(dev, mqprio->num_tc); 708 for (i = 0; i < mqprio->num_tc; i++) 709 netdev_set_tc_queue(dev, i, 710 mqprio->count[i], 711 mqprio->offset[i]); 712 713 /* Always use supplied priority mappings */ 714 for (i = 0; i < TC_BITMASK + 1; i++) 715 netdev_set_prio_tc_map(dev, i, 716 mqprio->prio_tc_map[i]); 717 } 718 719 taprio_set_picos_per_byte(dev, q); 720 start = taprio_get_start_time(sch); 721 if (!start) 722 return 0; 723 724 taprio_start_sched(sch, start); 725 726 return 0; 727 } 728 729 static void taprio_destroy(struct Qdisc *sch) 730 { 731 struct taprio_sched *q = qdisc_priv(sch); 732 struct net_device *dev = qdisc_dev(sch); 733 struct sched_entry *entry, *n; 734 unsigned int i; 735 736 spin_lock(&taprio_list_lock); 737 list_del(&q->taprio_list); 738 spin_unlock(&taprio_list_lock); 739 740 hrtimer_cancel(&q->advance_timer); 741 742 if (q->qdiscs) { 743 for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) 744 qdisc_put(q->qdiscs[i]); 745 746 kfree(q->qdiscs); 747 } 748 q->qdiscs = NULL; 749 750 netdev_set_num_tc(dev, 0); 751 752 list_for_each_entry_safe(entry, n, &q->entries, list) { 753 list_del(&entry->list); 754 kfree(entry); 755 } 756 } 757 758 static int taprio_init(struct Qdisc *sch, struct nlattr *opt, 759 struct netlink_ext_ack *extack) 760 { 761 struct taprio_sched *q = qdisc_priv(sch); 762 struct net_device *dev = qdisc_dev(sch); 763 764 INIT_LIST_HEAD(&q->entries); 765 spin_lock_init(&q->current_entry_lock); 766 767 /* We may overwrite the configuration later */ 768 hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); 769 770 q->root = sch; 771 772 /* We only support static clockids. Use an invalid value as default 773 * and get the valid one on taprio_change(). 774 */ 775 q->clockid = -1; 776 777 if (sch->parent != TC_H_ROOT) 778 return -EOPNOTSUPP; 779 780 if (!netif_is_multiqueue(dev)) 781 return -EOPNOTSUPP; 782 783 /* pre-allocate qdisc, attachment can't fail */ 784 q->qdiscs = kcalloc(dev->num_tx_queues, 785 sizeof(q->qdiscs[0]), 786 GFP_KERNEL); 787 788 if (!q->qdiscs) 789 return -ENOMEM; 790 791 if (!opt) 792 return -EINVAL; 793 794 spin_lock(&taprio_list_lock); 795 list_add(&q->taprio_list, &taprio_list); 796 spin_unlock(&taprio_list_lock); 797 798 return taprio_change(sch, opt, extack); 799 } 800 801 static struct netdev_queue *taprio_queue_get(struct Qdisc *sch, 802 unsigned long cl) 803 { 804 struct net_device *dev = qdisc_dev(sch); 805 unsigned long ntx = cl - 1; 806 807 if (ntx >= dev->num_tx_queues) 808 return NULL; 809 810 return netdev_get_tx_queue(dev, ntx); 811 } 812 813 static int taprio_graft(struct Qdisc *sch, unsigned long cl, 814 struct Qdisc *new, struct Qdisc **old, 815 struct netlink_ext_ack *extack) 816 { 817 struct taprio_sched *q = qdisc_priv(sch); 818 struct net_device *dev = qdisc_dev(sch); 819 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 820 821 if (!dev_queue) 822 return -EINVAL; 823 824 if (dev->flags & IFF_UP) 825 dev_deactivate(dev); 826 827 *old = q->qdiscs[cl - 1]; 828 q->qdiscs[cl - 1] = new; 829 830 if (new) 831 new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; 832 833 if (dev->flags & IFF_UP) 834 dev_activate(dev); 835 836 return 0; 837 } 838 839 static int dump_entry(struct sk_buff *msg, 840 const struct sched_entry *entry) 841 { 842 struct nlattr *item; 843 844 item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY); 845 if (!item) 846 return -ENOSPC; 847 848 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index)) 849 goto nla_put_failure; 850 851 if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command)) 852 goto nla_put_failure; 853 854 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, 855 entry->gate_mask)) 856 goto nla_put_failure; 857 858 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL, 859 entry->interval)) 860 goto nla_put_failure; 861 862 return nla_nest_end(msg, item); 863 864 nla_put_failure: 865 nla_nest_cancel(msg, item); 866 return -1; 867 } 868 869 static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) 870 { 871 struct taprio_sched *q = qdisc_priv(sch); 872 struct net_device *dev = qdisc_dev(sch); 873 struct tc_mqprio_qopt opt = { 0 }; 874 struct nlattr *nest, *entry_list; 875 struct sched_entry *entry; 876 unsigned int i; 877 878 opt.num_tc = netdev_get_num_tc(dev); 879 memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); 880 881 for (i = 0; i < netdev_get_num_tc(dev); i++) { 882 opt.count[i] = dev->tc_to_txq[i].count; 883 opt.offset[i] = dev->tc_to_txq[i].offset; 884 } 885 886 nest = nla_nest_start(skb, TCA_OPTIONS); 887 if (!nest) 888 return -ENOSPC; 889 890 if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) 891 goto options_error; 892 893 if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, 894 q->base_time, TCA_TAPRIO_PAD)) 895 goto options_error; 896 897 if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) 898 goto options_error; 899 900 entry_list = nla_nest_start(skb, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); 901 if (!entry_list) 902 goto options_error; 903 904 list_for_each_entry(entry, &q->entries, list) { 905 if (dump_entry(skb, entry) < 0) 906 goto options_error; 907 } 908 909 nla_nest_end(skb, entry_list); 910 911 return nla_nest_end(skb, nest); 912 913 options_error: 914 nla_nest_cancel(skb, nest); 915 return -1; 916 } 917 918 static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) 919 { 920 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 921 922 if (!dev_queue) 923 return NULL; 924 925 return dev_queue->qdisc_sleeping; 926 } 927 928 static unsigned long taprio_find(struct Qdisc *sch, u32 classid) 929 { 930 unsigned int ntx = TC_H_MIN(classid); 931 932 if (!taprio_queue_get(sch, ntx)) 933 return 0; 934 return ntx; 935 } 936 937 static int taprio_dump_class(struct Qdisc *sch, unsigned long cl, 938 struct sk_buff *skb, struct tcmsg *tcm) 939 { 940 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 941 942 tcm->tcm_parent = TC_H_ROOT; 943 tcm->tcm_handle |= TC_H_MIN(cl); 944 tcm->tcm_info = dev_queue->qdisc_sleeping->handle; 945 946 return 0; 947 } 948 949 static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, 950 struct gnet_dump *d) 951 __releases(d->lock) 952 __acquires(d->lock) 953 { 954 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 955 956 sch = dev_queue->qdisc_sleeping; 957 if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || 958 qdisc_qstats_copy(d, sch) < 0) 959 return -1; 960 return 0; 961 } 962 963 static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) 964 { 965 struct net_device *dev = qdisc_dev(sch); 966 unsigned long ntx; 967 968 if (arg->stop) 969 return; 970 971 arg->count = arg->skip; 972 for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { 973 if (arg->fn(sch, ntx + 1, arg) < 0) { 974 arg->stop = 1; 975 break; 976 } 977 arg->count++; 978 } 979 } 980 981 static struct netdev_queue *taprio_select_queue(struct Qdisc *sch, 982 struct tcmsg *tcm) 983 { 984 return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); 985 } 986 987 static const struct Qdisc_class_ops taprio_class_ops = { 988 .graft = taprio_graft, 989 .leaf = taprio_leaf, 990 .find = taprio_find, 991 .walk = taprio_walk, 992 .dump = taprio_dump_class, 993 .dump_stats = taprio_dump_class_stats, 994 .select_queue = taprio_select_queue, 995 }; 996 997 static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { 998 .cl_ops = &taprio_class_ops, 999 .id = "taprio", 1000 .priv_size = sizeof(struct taprio_sched), 1001 .init = taprio_init, 1002 .destroy = taprio_destroy, 1003 .peek = taprio_peek, 1004 .dequeue = taprio_dequeue, 1005 .enqueue = taprio_enqueue, 1006 .dump = taprio_dump, 1007 .owner = THIS_MODULE, 1008 }; 1009 1010 static struct notifier_block taprio_device_notifier = { 1011 .notifier_call = taprio_dev_notifier, 1012 }; 1013 1014 static int __init taprio_module_init(void) 1015 { 1016 int err = register_netdevice_notifier(&taprio_device_notifier); 1017 1018 if (err) 1019 return err; 1020 1021 return register_qdisc(&taprio_qdisc_ops); 1022 } 1023 1024 static void __exit taprio_module_exit(void) 1025 { 1026 unregister_qdisc(&taprio_qdisc_ops); 1027 unregister_netdevice_notifier(&taprio_device_notifier); 1028 } 1029 1030 module_init(taprio_module_init); 1031 module_exit(taprio_module_exit); 1032 MODULE_LICENSE("GPL"); 1033