1 // SPDX-License-Identifier: GPL-2.0 2 3 /* net/sched/sch_taprio.c Time Aware Priority Scheduler 4 * 5 * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> 6 * 7 */ 8 9 #include <linux/ethtool.h> 10 #include <linux/types.h> 11 #include <linux/slab.h> 12 #include <linux/kernel.h> 13 #include <linux/string.h> 14 #include <linux/list.h> 15 #include <linux/errno.h> 16 #include <linux/skbuff.h> 17 #include <linux/math64.h> 18 #include <linux/module.h> 19 #include <linux/spinlock.h> 20 #include <linux/rcupdate.h> 21 #include <linux/time.h> 22 #include <net/netlink.h> 23 #include <net/pkt_sched.h> 24 #include <net/pkt_cls.h> 25 #include <net/sch_generic.h> 26 #include <net/sock.h> 27 #include <net/tcp.h> 28 29 static LIST_HEAD(taprio_list); 30 31 #define TAPRIO_ALL_GATES_OPEN -1 32 33 #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) 34 #define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) 35 #define TAPRIO_FLAGS_INVALID U32_MAX 36 37 struct sched_entry { 38 struct list_head list; 39 40 /* The instant that this entry "closes" and the next one 41 * should open, the qdisc will make some effort so that no 42 * packet leaves after this time. 43 */ 44 ktime_t close_time; 45 ktime_t next_txtime; 46 atomic_t budget; 47 int index; 48 u32 gate_mask; 49 u32 interval; 50 u8 command; 51 }; 52 53 struct sched_gate_list { 54 struct rcu_head rcu; 55 struct list_head entries; 56 size_t num_entries; 57 ktime_t cycle_close_time; 58 s64 cycle_time; 59 s64 cycle_time_extension; 60 s64 base_time; 61 }; 62 63 struct taprio_sched { 64 struct Qdisc **qdiscs; 65 struct Qdisc *root; 66 u32 flags; 67 enum tk_offsets tk_offset; 68 int clockid; 69 bool offloaded; 70 atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+ 71 * speeds it's sub-nanoseconds per byte 72 */ 73 74 /* Protects the update side of the RCU protected current_entry */ 75 spinlock_t current_entry_lock; 76 struct sched_entry __rcu *current_entry; 77 struct sched_gate_list __rcu *oper_sched; 78 struct sched_gate_list __rcu *admin_sched; 79 struct hrtimer advance_timer; 80 struct list_head taprio_list; 81 u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */ 82 u32 max_sdu[TC_MAX_QUEUE]; /* for dump and offloading */ 83 u32 txtime_delay; 84 }; 85 86 struct __tc_taprio_qopt_offload { 87 refcount_t users; 88 struct tc_taprio_qopt_offload offload; 89 }; 90 91 static ktime_t sched_base_time(const struct sched_gate_list *sched) 92 { 93 if (!sched) 94 return KTIME_MAX; 95 96 return ns_to_ktime(sched->base_time); 97 } 98 99 static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono) 100 { 101 /* This pairs with WRITE_ONCE() in taprio_parse_clockid() */ 102 enum tk_offsets tk_offset = READ_ONCE(q->tk_offset); 103 104 switch (tk_offset) { 105 case TK_OFFS_MAX: 106 return mono; 107 default: 108 return ktime_mono_to_any(mono, tk_offset); 109 } 110 } 111 112 static ktime_t taprio_get_time(const struct taprio_sched *q) 113 { 114 return taprio_mono_to_any(q, ktime_get()); 115 } 116 117 static void taprio_free_sched_cb(struct rcu_head *head) 118 { 119 struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu); 120 struct sched_entry *entry, *n; 121 122 list_for_each_entry_safe(entry, n, &sched->entries, list) { 123 list_del(&entry->list); 124 kfree(entry); 125 } 126 127 kfree(sched); 128 } 129 130 static void switch_schedules(struct taprio_sched *q, 131 struct sched_gate_list **admin, 132 struct sched_gate_list **oper) 133 { 134 rcu_assign_pointer(q->oper_sched, *admin); 135 rcu_assign_pointer(q->admin_sched, NULL); 136 137 if (*oper) 138 call_rcu(&(*oper)->rcu, taprio_free_sched_cb); 139 140 *oper = *admin; 141 *admin = NULL; 142 } 143 144 /* Get how much time has been already elapsed in the current cycle. */ 145 static s32 get_cycle_time_elapsed(struct sched_gate_list *sched, ktime_t time) 146 { 147 ktime_t time_since_sched_start; 148 s32 time_elapsed; 149 150 time_since_sched_start = ktime_sub(time, sched->base_time); 151 div_s64_rem(time_since_sched_start, sched->cycle_time, &time_elapsed); 152 153 return time_elapsed; 154 } 155 156 static ktime_t get_interval_end_time(struct sched_gate_list *sched, 157 struct sched_gate_list *admin, 158 struct sched_entry *entry, 159 ktime_t intv_start) 160 { 161 s32 cycle_elapsed = get_cycle_time_elapsed(sched, intv_start); 162 ktime_t intv_end, cycle_ext_end, cycle_end; 163 164 cycle_end = ktime_add_ns(intv_start, sched->cycle_time - cycle_elapsed); 165 intv_end = ktime_add_ns(intv_start, entry->interval); 166 cycle_ext_end = ktime_add(cycle_end, sched->cycle_time_extension); 167 168 if (ktime_before(intv_end, cycle_end)) 169 return intv_end; 170 else if (admin && admin != sched && 171 ktime_after(admin->base_time, cycle_end) && 172 ktime_before(admin->base_time, cycle_ext_end)) 173 return admin->base_time; 174 else 175 return cycle_end; 176 } 177 178 static int length_to_duration(struct taprio_sched *q, int len) 179 { 180 return div_u64(len * atomic64_read(&q->picos_per_byte), PSEC_PER_NSEC); 181 } 182 183 /* Returns the entry corresponding to next available interval. If 184 * validate_interval is set, it only validates whether the timestamp occurs 185 * when the gate corresponding to the skb's traffic class is open. 186 */ 187 static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb, 188 struct Qdisc *sch, 189 struct sched_gate_list *sched, 190 struct sched_gate_list *admin, 191 ktime_t time, 192 ktime_t *interval_start, 193 ktime_t *interval_end, 194 bool validate_interval) 195 { 196 ktime_t curr_intv_start, curr_intv_end, cycle_end, packet_transmit_time; 197 ktime_t earliest_txtime = KTIME_MAX, txtime, cycle, transmit_end_time; 198 struct sched_entry *entry = NULL, *entry_found = NULL; 199 struct taprio_sched *q = qdisc_priv(sch); 200 struct net_device *dev = qdisc_dev(sch); 201 bool entry_available = false; 202 s32 cycle_elapsed; 203 int tc, n; 204 205 tc = netdev_get_prio_tc_map(dev, skb->priority); 206 packet_transmit_time = length_to_duration(q, qdisc_pkt_len(skb)); 207 208 *interval_start = 0; 209 *interval_end = 0; 210 211 if (!sched) 212 return NULL; 213 214 cycle = sched->cycle_time; 215 cycle_elapsed = get_cycle_time_elapsed(sched, time); 216 curr_intv_end = ktime_sub_ns(time, cycle_elapsed); 217 cycle_end = ktime_add_ns(curr_intv_end, cycle); 218 219 list_for_each_entry(entry, &sched->entries, list) { 220 curr_intv_start = curr_intv_end; 221 curr_intv_end = get_interval_end_time(sched, admin, entry, 222 curr_intv_start); 223 224 if (ktime_after(curr_intv_start, cycle_end)) 225 break; 226 227 if (!(entry->gate_mask & BIT(tc)) || 228 packet_transmit_time > entry->interval) 229 continue; 230 231 txtime = entry->next_txtime; 232 233 if (ktime_before(txtime, time) || validate_interval) { 234 transmit_end_time = ktime_add_ns(time, packet_transmit_time); 235 if ((ktime_before(curr_intv_start, time) && 236 ktime_before(transmit_end_time, curr_intv_end)) || 237 (ktime_after(curr_intv_start, time) && !validate_interval)) { 238 entry_found = entry; 239 *interval_start = curr_intv_start; 240 *interval_end = curr_intv_end; 241 break; 242 } else if (!entry_available && !validate_interval) { 243 /* Here, we are just trying to find out the 244 * first available interval in the next cycle. 245 */ 246 entry_available = true; 247 entry_found = entry; 248 *interval_start = ktime_add_ns(curr_intv_start, cycle); 249 *interval_end = ktime_add_ns(curr_intv_end, cycle); 250 } 251 } else if (ktime_before(txtime, earliest_txtime) && 252 !entry_available) { 253 earliest_txtime = txtime; 254 entry_found = entry; 255 n = div_s64(ktime_sub(txtime, curr_intv_start), cycle); 256 *interval_start = ktime_add(curr_intv_start, n * cycle); 257 *interval_end = ktime_add(curr_intv_end, n * cycle); 258 } 259 } 260 261 return entry_found; 262 } 263 264 static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch) 265 { 266 struct taprio_sched *q = qdisc_priv(sch); 267 struct sched_gate_list *sched, *admin; 268 ktime_t interval_start, interval_end; 269 struct sched_entry *entry; 270 271 rcu_read_lock(); 272 sched = rcu_dereference(q->oper_sched); 273 admin = rcu_dereference(q->admin_sched); 274 275 entry = find_entry_to_transmit(skb, sch, sched, admin, skb->tstamp, 276 &interval_start, &interval_end, true); 277 rcu_read_unlock(); 278 279 return entry; 280 } 281 282 static bool taprio_flags_valid(u32 flags) 283 { 284 /* Make sure no other flag bits are set. */ 285 if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | 286 TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) 287 return false; 288 /* txtime-assist and full offload are mutually exclusive */ 289 if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) && 290 (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) 291 return false; 292 return true; 293 } 294 295 /* This returns the tstamp value set by TCP in terms of the set clock. */ 296 static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb) 297 { 298 unsigned int offset = skb_network_offset(skb); 299 const struct ipv6hdr *ipv6h; 300 const struct iphdr *iph; 301 struct ipv6hdr _ipv6h; 302 303 ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h); 304 if (!ipv6h) 305 return 0; 306 307 if (ipv6h->version == 4) { 308 iph = (struct iphdr *)ipv6h; 309 offset += iph->ihl * 4; 310 311 /* special-case 6in4 tunnelling, as that is a common way to get 312 * v6 connectivity in the home 313 */ 314 if (iph->protocol == IPPROTO_IPV6) { 315 ipv6h = skb_header_pointer(skb, offset, 316 sizeof(_ipv6h), &_ipv6h); 317 318 if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP) 319 return 0; 320 } else if (iph->protocol != IPPROTO_TCP) { 321 return 0; 322 } 323 } else if (ipv6h->version == 6 && ipv6h->nexthdr != IPPROTO_TCP) { 324 return 0; 325 } 326 327 return taprio_mono_to_any(q, skb->skb_mstamp_ns); 328 } 329 330 /* There are a few scenarios where we will have to modify the txtime from 331 * what is read from next_txtime in sched_entry. They are: 332 * 1. If txtime is in the past, 333 * a. The gate for the traffic class is currently open and packet can be 334 * transmitted before it closes, schedule the packet right away. 335 * b. If the gate corresponding to the traffic class is going to open later 336 * in the cycle, set the txtime of packet to the interval start. 337 * 2. If txtime is in the future, there are packets corresponding to the 338 * current traffic class waiting to be transmitted. So, the following 339 * possibilities exist: 340 * a. We can transmit the packet before the window containing the txtime 341 * closes. 342 * b. The window might close before the transmission can be completed 343 * successfully. So, schedule the packet in the next open window. 344 */ 345 static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch) 346 { 347 ktime_t transmit_end_time, interval_end, interval_start, tcp_tstamp; 348 struct taprio_sched *q = qdisc_priv(sch); 349 struct sched_gate_list *sched, *admin; 350 ktime_t minimum_time, now, txtime; 351 int len, packet_transmit_time; 352 struct sched_entry *entry; 353 bool sched_changed; 354 355 now = taprio_get_time(q); 356 minimum_time = ktime_add_ns(now, q->txtime_delay); 357 358 tcp_tstamp = get_tcp_tstamp(q, skb); 359 minimum_time = max_t(ktime_t, minimum_time, tcp_tstamp); 360 361 rcu_read_lock(); 362 admin = rcu_dereference(q->admin_sched); 363 sched = rcu_dereference(q->oper_sched); 364 if (admin && ktime_after(minimum_time, admin->base_time)) 365 switch_schedules(q, &admin, &sched); 366 367 /* Until the schedule starts, all the queues are open */ 368 if (!sched || ktime_before(minimum_time, sched->base_time)) { 369 txtime = minimum_time; 370 goto done; 371 } 372 373 len = qdisc_pkt_len(skb); 374 packet_transmit_time = length_to_duration(q, len); 375 376 do { 377 sched_changed = false; 378 379 entry = find_entry_to_transmit(skb, sch, sched, admin, 380 minimum_time, 381 &interval_start, &interval_end, 382 false); 383 if (!entry) { 384 txtime = 0; 385 goto done; 386 } 387 388 txtime = entry->next_txtime; 389 txtime = max_t(ktime_t, txtime, minimum_time); 390 txtime = max_t(ktime_t, txtime, interval_start); 391 392 if (admin && admin != sched && 393 ktime_after(txtime, admin->base_time)) { 394 sched = admin; 395 sched_changed = true; 396 continue; 397 } 398 399 transmit_end_time = ktime_add(txtime, packet_transmit_time); 400 minimum_time = transmit_end_time; 401 402 /* Update the txtime of current entry to the next time it's 403 * interval starts. 404 */ 405 if (ktime_after(transmit_end_time, interval_end)) 406 entry->next_txtime = ktime_add(interval_start, sched->cycle_time); 407 } while (sched_changed || ktime_after(transmit_end_time, interval_end)); 408 409 entry->next_txtime = transmit_end_time; 410 411 done: 412 rcu_read_unlock(); 413 return txtime; 414 } 415 416 static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, 417 struct Qdisc *child, struct sk_buff **to_free) 418 { 419 struct taprio_sched *q = qdisc_priv(sch); 420 struct net_device *dev = qdisc_dev(sch); 421 int prio = skb->priority; 422 u8 tc; 423 424 /* sk_flags are only safe to use on full sockets. */ 425 if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) { 426 if (!is_valid_interval(skb, sch)) 427 return qdisc_drop(skb, sch, to_free); 428 } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { 429 skb->tstamp = get_packet_txtime(skb, sch); 430 if (!skb->tstamp) 431 return qdisc_drop(skb, sch, to_free); 432 } 433 434 /* Devices with full offload are expected to honor this in hardware */ 435 tc = netdev_get_prio_tc_map(dev, prio); 436 if (skb->len > q->max_frm_len[tc]) 437 return qdisc_drop(skb, sch, to_free); 438 439 qdisc_qstats_backlog_inc(sch, skb); 440 sch->q.qlen++; 441 442 return qdisc_enqueue(skb, child, to_free); 443 } 444 445 /* Will not be called in the full offload case, since the TX queues are 446 * attached to the Qdisc created using qdisc_create_dflt() 447 */ 448 static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, 449 struct sk_buff **to_free) 450 { 451 struct taprio_sched *q = qdisc_priv(sch); 452 struct Qdisc *child; 453 int queue; 454 455 queue = skb_get_queue_mapping(skb); 456 457 child = q->qdiscs[queue]; 458 if (unlikely(!child)) 459 return qdisc_drop(skb, sch, to_free); 460 461 /* Large packets might not be transmitted when the transmission duration 462 * exceeds any configured interval. Therefore, segment the skb into 463 * smaller chunks. Drivers with full offload are expected to handle 464 * this in hardware. 465 */ 466 if (skb_is_gso(skb)) { 467 unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb); 468 netdev_features_t features = netif_skb_features(skb); 469 struct sk_buff *segs, *nskb; 470 int ret; 471 472 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 473 if (IS_ERR_OR_NULL(segs)) 474 return qdisc_drop(skb, sch, to_free); 475 476 skb_list_walk_safe(segs, segs, nskb) { 477 skb_mark_not_on_list(segs); 478 qdisc_skb_cb(segs)->pkt_len = segs->len; 479 slen += segs->len; 480 481 ret = taprio_enqueue_one(segs, sch, child, to_free); 482 if (ret != NET_XMIT_SUCCESS) { 483 if (net_xmit_drop_count(ret)) 484 qdisc_qstats_drop(sch); 485 } else { 486 numsegs++; 487 } 488 } 489 490 if (numsegs > 1) 491 qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen); 492 consume_skb(skb); 493 494 return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; 495 } 496 497 return taprio_enqueue_one(skb, sch, child, to_free); 498 } 499 500 /* Will not be called in the full offload case, since the TX queues are 501 * attached to the Qdisc created using qdisc_create_dflt() 502 */ 503 static struct sk_buff *taprio_peek(struct Qdisc *sch) 504 { 505 struct taprio_sched *q = qdisc_priv(sch); 506 struct net_device *dev = qdisc_dev(sch); 507 struct sched_entry *entry; 508 struct sk_buff *skb; 509 u32 gate_mask; 510 int i; 511 512 rcu_read_lock(); 513 entry = rcu_dereference(q->current_entry); 514 gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; 515 rcu_read_unlock(); 516 517 if (!gate_mask) 518 return NULL; 519 520 for (i = 0; i < dev->num_tx_queues; i++) { 521 struct Qdisc *child = q->qdiscs[i]; 522 int prio; 523 u8 tc; 524 525 if (unlikely(!child)) 526 continue; 527 528 skb = child->ops->peek(child); 529 if (!skb) 530 continue; 531 532 if (TXTIME_ASSIST_IS_ENABLED(q->flags)) 533 return skb; 534 535 prio = skb->priority; 536 tc = netdev_get_prio_tc_map(dev, prio); 537 538 if (!(gate_mask & BIT(tc))) 539 continue; 540 541 return skb; 542 } 543 544 return NULL; 545 } 546 547 static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) 548 { 549 atomic_set(&entry->budget, 550 div64_u64((u64)entry->interval * PSEC_PER_NSEC, 551 atomic64_read(&q->picos_per_byte))); 552 } 553 554 /* Will not be called in the full offload case, since the TX queues are 555 * attached to the Qdisc created using qdisc_create_dflt() 556 */ 557 static struct sk_buff *taprio_dequeue(struct Qdisc *sch) 558 { 559 struct taprio_sched *q = qdisc_priv(sch); 560 struct net_device *dev = qdisc_dev(sch); 561 struct sk_buff *skb = NULL; 562 struct sched_entry *entry; 563 u32 gate_mask; 564 int i; 565 566 rcu_read_lock(); 567 entry = rcu_dereference(q->current_entry); 568 /* if there's no entry, it means that the schedule didn't 569 * start yet, so force all gates to be open, this is in 570 * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5 571 * "AdminGateStates" 572 */ 573 gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; 574 575 if (!gate_mask) 576 goto done; 577 578 for (i = 0; i < dev->num_tx_queues; i++) { 579 struct Qdisc *child = q->qdiscs[i]; 580 ktime_t guard; 581 int prio; 582 int len; 583 u8 tc; 584 585 if (unlikely(!child)) 586 continue; 587 588 if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { 589 skb = child->ops->dequeue(child); 590 if (!skb) 591 continue; 592 goto skb_found; 593 } 594 595 skb = child->ops->peek(child); 596 if (!skb) 597 continue; 598 599 prio = skb->priority; 600 tc = netdev_get_prio_tc_map(dev, prio); 601 602 if (!(gate_mask & BIT(tc))) { 603 skb = NULL; 604 continue; 605 } 606 607 len = qdisc_pkt_len(skb); 608 guard = ktime_add_ns(taprio_get_time(q), 609 length_to_duration(q, len)); 610 611 /* In the case that there's no gate entry, there's no 612 * guard band ... 613 */ 614 if (gate_mask != TAPRIO_ALL_GATES_OPEN && 615 ktime_after(guard, entry->close_time)) { 616 skb = NULL; 617 continue; 618 } 619 620 /* ... and no budget. */ 621 if (gate_mask != TAPRIO_ALL_GATES_OPEN && 622 atomic_sub_return(len, &entry->budget) < 0) { 623 skb = NULL; 624 continue; 625 } 626 627 skb = child->ops->dequeue(child); 628 if (unlikely(!skb)) 629 goto done; 630 631 skb_found: 632 qdisc_bstats_update(sch, skb); 633 qdisc_qstats_backlog_dec(sch, skb); 634 sch->q.qlen--; 635 636 goto done; 637 } 638 639 done: 640 rcu_read_unlock(); 641 642 return skb; 643 } 644 645 static bool should_restart_cycle(const struct sched_gate_list *oper, 646 const struct sched_entry *entry) 647 { 648 if (list_is_last(&entry->list, &oper->entries)) 649 return true; 650 651 if (ktime_compare(entry->close_time, oper->cycle_close_time) == 0) 652 return true; 653 654 return false; 655 } 656 657 static bool should_change_schedules(const struct sched_gate_list *admin, 658 const struct sched_gate_list *oper, 659 ktime_t close_time) 660 { 661 ktime_t next_base_time, extension_time; 662 663 if (!admin) 664 return false; 665 666 next_base_time = sched_base_time(admin); 667 668 /* This is the simple case, the close_time would fall after 669 * the next schedule base_time. 670 */ 671 if (ktime_compare(next_base_time, close_time) <= 0) 672 return true; 673 674 /* This is the cycle_time_extension case, if the close_time 675 * plus the amount that can be extended would fall after the 676 * next schedule base_time, we can extend the current schedule 677 * for that amount. 678 */ 679 extension_time = ktime_add_ns(close_time, oper->cycle_time_extension); 680 681 /* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about 682 * how precisely the extension should be made. So after 683 * conformance testing, this logic may change. 684 */ 685 if (ktime_compare(next_base_time, extension_time) <= 0) 686 return true; 687 688 return false; 689 } 690 691 static enum hrtimer_restart advance_sched(struct hrtimer *timer) 692 { 693 struct taprio_sched *q = container_of(timer, struct taprio_sched, 694 advance_timer); 695 struct sched_gate_list *oper, *admin; 696 struct sched_entry *entry, *next; 697 struct Qdisc *sch = q->root; 698 ktime_t close_time; 699 700 spin_lock(&q->current_entry_lock); 701 entry = rcu_dereference_protected(q->current_entry, 702 lockdep_is_held(&q->current_entry_lock)); 703 oper = rcu_dereference_protected(q->oper_sched, 704 lockdep_is_held(&q->current_entry_lock)); 705 admin = rcu_dereference_protected(q->admin_sched, 706 lockdep_is_held(&q->current_entry_lock)); 707 708 if (!oper) 709 switch_schedules(q, &admin, &oper); 710 711 /* This can happen in two cases: 1. this is the very first run 712 * of this function (i.e. we weren't running any schedule 713 * previously); 2. The previous schedule just ended. The first 714 * entry of all schedules are pre-calculated during the 715 * schedule initialization. 716 */ 717 if (unlikely(!entry || entry->close_time == oper->base_time)) { 718 next = list_first_entry(&oper->entries, struct sched_entry, 719 list); 720 close_time = next->close_time; 721 goto first_run; 722 } 723 724 if (should_restart_cycle(oper, entry)) { 725 next = list_first_entry(&oper->entries, struct sched_entry, 726 list); 727 oper->cycle_close_time = ktime_add_ns(oper->cycle_close_time, 728 oper->cycle_time); 729 } else { 730 next = list_next_entry(entry, list); 731 } 732 733 close_time = ktime_add_ns(entry->close_time, next->interval); 734 close_time = min_t(ktime_t, close_time, oper->cycle_close_time); 735 736 if (should_change_schedules(admin, oper, close_time)) { 737 /* Set things so the next time this runs, the new 738 * schedule runs. 739 */ 740 close_time = sched_base_time(admin); 741 switch_schedules(q, &admin, &oper); 742 } 743 744 next->close_time = close_time; 745 taprio_set_budget(q, next); 746 747 first_run: 748 rcu_assign_pointer(q->current_entry, next); 749 spin_unlock(&q->current_entry_lock); 750 751 hrtimer_set_expires(&q->advance_timer, close_time); 752 753 rcu_read_lock(); 754 __netif_schedule(sch); 755 rcu_read_unlock(); 756 757 return HRTIMER_RESTART; 758 } 759 760 static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { 761 [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 }, 762 [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 }, 763 [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 }, 764 [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, 765 }; 766 767 static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { 768 [TCA_TAPRIO_TC_ENTRY_INDEX] = { .type = NLA_U32 }, 769 [TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 }, 770 }; 771 772 static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { 773 [TCA_TAPRIO_ATTR_PRIOMAP] = { 774 .len = sizeof(struct tc_mqprio_qopt) 775 }, 776 [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED }, 777 [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, 778 [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, 779 [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, 780 [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 }, 781 [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, 782 [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 }, 783 [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 }, 784 [TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED }, 785 }; 786 787 static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb, 788 struct sched_entry *entry, 789 struct netlink_ext_ack *extack) 790 { 791 int min_duration = length_to_duration(q, ETH_ZLEN); 792 u32 interval = 0; 793 794 if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD]) 795 entry->command = nla_get_u8( 796 tb[TCA_TAPRIO_SCHED_ENTRY_CMD]); 797 798 if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]) 799 entry->gate_mask = nla_get_u32( 800 tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]); 801 802 if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]) 803 interval = nla_get_u32( 804 tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]); 805 806 /* The interval should allow at least the minimum ethernet 807 * frame to go out. 808 */ 809 if (interval < min_duration) { 810 NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry"); 811 return -EINVAL; 812 } 813 814 entry->interval = interval; 815 816 return 0; 817 } 818 819 static int parse_sched_entry(struct taprio_sched *q, struct nlattr *n, 820 struct sched_entry *entry, int index, 821 struct netlink_ext_ack *extack) 822 { 823 struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; 824 int err; 825 826 err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n, 827 entry_policy, NULL); 828 if (err < 0) { 829 NL_SET_ERR_MSG(extack, "Could not parse nested entry"); 830 return -EINVAL; 831 } 832 833 entry->index = index; 834 835 return fill_sched_entry(q, tb, entry, extack); 836 } 837 838 static int parse_sched_list(struct taprio_sched *q, struct nlattr *list, 839 struct sched_gate_list *sched, 840 struct netlink_ext_ack *extack) 841 { 842 struct nlattr *n; 843 int err, rem; 844 int i = 0; 845 846 if (!list) 847 return -EINVAL; 848 849 nla_for_each_nested(n, list, rem) { 850 struct sched_entry *entry; 851 852 if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) { 853 NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'"); 854 continue; 855 } 856 857 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 858 if (!entry) { 859 NL_SET_ERR_MSG(extack, "Not enough memory for entry"); 860 return -ENOMEM; 861 } 862 863 err = parse_sched_entry(q, n, entry, i, extack); 864 if (err < 0) { 865 kfree(entry); 866 return err; 867 } 868 869 list_add_tail(&entry->list, &sched->entries); 870 i++; 871 } 872 873 sched->num_entries = i; 874 875 return i; 876 } 877 878 static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, 879 struct sched_gate_list *new, 880 struct netlink_ext_ack *extack) 881 { 882 int err = 0; 883 884 if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) { 885 NL_SET_ERR_MSG(extack, "Adding a single entry is not supported"); 886 return -ENOTSUPP; 887 } 888 889 if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]) 890 new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); 891 892 if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]) 893 new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]); 894 895 if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]) 896 new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]); 897 898 if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) 899 err = parse_sched_list(q, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], 900 new, extack); 901 if (err < 0) 902 return err; 903 904 if (!new->cycle_time) { 905 struct sched_entry *entry; 906 ktime_t cycle = 0; 907 908 list_for_each_entry(entry, &new->entries, list) 909 cycle = ktime_add_ns(cycle, entry->interval); 910 911 if (!cycle) { 912 NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0"); 913 return -EINVAL; 914 } 915 916 new->cycle_time = cycle; 917 } 918 919 return 0; 920 } 921 922 static int taprio_parse_mqprio_opt(struct net_device *dev, 923 struct tc_mqprio_qopt *qopt, 924 struct netlink_ext_ack *extack, 925 u32 taprio_flags) 926 { 927 int i, j; 928 929 if (!qopt && !dev->num_tc) { 930 NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); 931 return -EINVAL; 932 } 933 934 /* If num_tc is already set, it means that the user already 935 * configured the mqprio part 936 */ 937 if (dev->num_tc) 938 return 0; 939 940 /* Verify num_tc is not out of max range */ 941 if (qopt->num_tc > TC_MAX_QUEUE) { 942 NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range"); 943 return -EINVAL; 944 } 945 946 /* taprio imposes that traffic classes map 1:n to tx queues */ 947 if (qopt->num_tc > dev->num_tx_queues) { 948 NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues"); 949 return -EINVAL; 950 } 951 952 /* Verify priority mapping uses valid tcs */ 953 for (i = 0; i <= TC_BITMASK; i++) { 954 if (qopt->prio_tc_map[i] >= qopt->num_tc) { 955 NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping"); 956 return -EINVAL; 957 } 958 } 959 960 for (i = 0; i < qopt->num_tc; i++) { 961 unsigned int last = qopt->offset[i] + qopt->count[i]; 962 963 /* Verify the queue count is in tx range being equal to the 964 * real_num_tx_queues indicates the last queue is in use. 965 */ 966 if (qopt->offset[i] >= dev->num_tx_queues || 967 !qopt->count[i] || 968 last > dev->real_num_tx_queues) { 969 NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping"); 970 return -EINVAL; 971 } 972 973 if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) 974 continue; 975 976 /* Verify that the offset and counts do not overlap */ 977 for (j = i + 1; j < qopt->num_tc; j++) { 978 if (last > qopt->offset[j]) { 979 NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping"); 980 return -EINVAL; 981 } 982 } 983 } 984 985 return 0; 986 } 987 988 static int taprio_get_start_time(struct Qdisc *sch, 989 struct sched_gate_list *sched, 990 ktime_t *start) 991 { 992 struct taprio_sched *q = qdisc_priv(sch); 993 ktime_t now, base, cycle; 994 s64 n; 995 996 base = sched_base_time(sched); 997 now = taprio_get_time(q); 998 999 if (ktime_after(base, now)) { 1000 *start = base; 1001 return 0; 1002 } 1003 1004 cycle = sched->cycle_time; 1005 1006 /* The qdisc is expected to have at least one sched_entry. Moreover, 1007 * any entry must have 'interval' > 0. Thus if the cycle time is zero, 1008 * something went really wrong. In that case, we should warn about this 1009 * inconsistent state and return error. 1010 */ 1011 if (WARN_ON(!cycle)) 1012 return -EFAULT; 1013 1014 /* Schedule the start time for the beginning of the next 1015 * cycle. 1016 */ 1017 n = div64_s64(ktime_sub_ns(now, base), cycle); 1018 *start = ktime_add_ns(base, (n + 1) * cycle); 1019 return 0; 1020 } 1021 1022 static void setup_first_close_time(struct taprio_sched *q, 1023 struct sched_gate_list *sched, ktime_t base) 1024 { 1025 struct sched_entry *first; 1026 ktime_t cycle; 1027 1028 first = list_first_entry(&sched->entries, 1029 struct sched_entry, list); 1030 1031 cycle = sched->cycle_time; 1032 1033 /* FIXME: find a better place to do this */ 1034 sched->cycle_close_time = ktime_add_ns(base, cycle); 1035 1036 first->close_time = ktime_add_ns(base, first->interval); 1037 taprio_set_budget(q, first); 1038 rcu_assign_pointer(q->current_entry, NULL); 1039 } 1040 1041 static void taprio_start_sched(struct Qdisc *sch, 1042 ktime_t start, struct sched_gate_list *new) 1043 { 1044 struct taprio_sched *q = qdisc_priv(sch); 1045 ktime_t expires; 1046 1047 if (FULL_OFFLOAD_IS_ENABLED(q->flags)) 1048 return; 1049 1050 expires = hrtimer_get_expires(&q->advance_timer); 1051 if (expires == 0) 1052 expires = KTIME_MAX; 1053 1054 /* If the new schedule starts before the next expiration, we 1055 * reprogram it to the earliest one, so we change the admin 1056 * schedule to the operational one at the right time. 1057 */ 1058 start = min_t(ktime_t, start, expires); 1059 1060 hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS); 1061 } 1062 1063 static void taprio_set_picos_per_byte(struct net_device *dev, 1064 struct taprio_sched *q) 1065 { 1066 struct ethtool_link_ksettings ecmd; 1067 int speed = SPEED_10; 1068 int picos_per_byte; 1069 int err; 1070 1071 err = __ethtool_get_link_ksettings(dev, &ecmd); 1072 if (err < 0) 1073 goto skip; 1074 1075 if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN) 1076 speed = ecmd.base.speed; 1077 1078 skip: 1079 picos_per_byte = (USEC_PER_SEC * 8) / speed; 1080 1081 atomic64_set(&q->picos_per_byte, picos_per_byte); 1082 netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", 1083 dev->name, (long long)atomic64_read(&q->picos_per_byte), 1084 ecmd.base.speed); 1085 } 1086 1087 static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, 1088 void *ptr) 1089 { 1090 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1091 struct taprio_sched *q; 1092 1093 ASSERT_RTNL(); 1094 1095 if (event != NETDEV_UP && event != NETDEV_CHANGE) 1096 return NOTIFY_DONE; 1097 1098 list_for_each_entry(q, &taprio_list, taprio_list) { 1099 if (dev != qdisc_dev(q->root)) 1100 continue; 1101 1102 taprio_set_picos_per_byte(dev, q); 1103 break; 1104 } 1105 1106 return NOTIFY_DONE; 1107 } 1108 1109 static void setup_txtime(struct taprio_sched *q, 1110 struct sched_gate_list *sched, ktime_t base) 1111 { 1112 struct sched_entry *entry; 1113 u32 interval = 0; 1114 1115 list_for_each_entry(entry, &sched->entries, list) { 1116 entry->next_txtime = ktime_add_ns(base, interval); 1117 interval += entry->interval; 1118 } 1119 } 1120 1121 static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries) 1122 { 1123 struct __tc_taprio_qopt_offload *__offload; 1124 1125 __offload = kzalloc(struct_size(__offload, offload.entries, num_entries), 1126 GFP_KERNEL); 1127 if (!__offload) 1128 return NULL; 1129 1130 refcount_set(&__offload->users, 1); 1131 1132 return &__offload->offload; 1133 } 1134 1135 struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload 1136 *offload) 1137 { 1138 struct __tc_taprio_qopt_offload *__offload; 1139 1140 __offload = container_of(offload, struct __tc_taprio_qopt_offload, 1141 offload); 1142 1143 refcount_inc(&__offload->users); 1144 1145 return offload; 1146 } 1147 EXPORT_SYMBOL_GPL(taprio_offload_get); 1148 1149 void taprio_offload_free(struct tc_taprio_qopt_offload *offload) 1150 { 1151 struct __tc_taprio_qopt_offload *__offload; 1152 1153 __offload = container_of(offload, struct __tc_taprio_qopt_offload, 1154 offload); 1155 1156 if (!refcount_dec_and_test(&__offload->users)) 1157 return; 1158 1159 kfree(__offload); 1160 } 1161 EXPORT_SYMBOL_GPL(taprio_offload_free); 1162 1163 /* The function will only serve to keep the pointers to the "oper" and "admin" 1164 * schedules valid in relation to their base times, so when calling dump() the 1165 * users looks at the right schedules. 1166 * When using full offload, the admin configuration is promoted to oper at the 1167 * base_time in the PHC time domain. But because the system time is not 1168 * necessarily in sync with that, we can't just trigger a hrtimer to call 1169 * switch_schedules at the right hardware time. 1170 * At the moment we call this by hand right away from taprio, but in the future 1171 * it will be useful to create a mechanism for drivers to notify taprio of the 1172 * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump(). 1173 * This is left as TODO. 1174 */ 1175 static void taprio_offload_config_changed(struct taprio_sched *q) 1176 { 1177 struct sched_gate_list *oper, *admin; 1178 1179 oper = rtnl_dereference(q->oper_sched); 1180 admin = rtnl_dereference(q->admin_sched); 1181 1182 switch_schedules(q, &admin, &oper); 1183 } 1184 1185 static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask) 1186 { 1187 u32 i, queue_mask = 0; 1188 1189 for (i = 0; i < dev->num_tc; i++) { 1190 u32 offset, count; 1191 1192 if (!(tc_mask & BIT(i))) 1193 continue; 1194 1195 offset = dev->tc_to_txq[i].offset; 1196 count = dev->tc_to_txq[i].count; 1197 1198 queue_mask |= GENMASK(offset + count - 1, offset); 1199 } 1200 1201 return queue_mask; 1202 } 1203 1204 static void taprio_sched_to_offload(struct net_device *dev, 1205 struct sched_gate_list *sched, 1206 struct tc_taprio_qopt_offload *offload) 1207 { 1208 struct sched_entry *entry; 1209 int i = 0; 1210 1211 offload->base_time = sched->base_time; 1212 offload->cycle_time = sched->cycle_time; 1213 offload->cycle_time_extension = sched->cycle_time_extension; 1214 1215 list_for_each_entry(entry, &sched->entries, list) { 1216 struct tc_taprio_sched_entry *e = &offload->entries[i]; 1217 1218 e->command = entry->command; 1219 e->interval = entry->interval; 1220 e->gate_mask = tc_map_to_queue_mask(dev, entry->gate_mask); 1221 1222 i++; 1223 } 1224 1225 offload->num_entries = i; 1226 } 1227 1228 static int taprio_enable_offload(struct net_device *dev, 1229 struct taprio_sched *q, 1230 struct sched_gate_list *sched, 1231 struct netlink_ext_ack *extack) 1232 { 1233 const struct net_device_ops *ops = dev->netdev_ops; 1234 struct tc_taprio_qopt_offload *offload; 1235 struct tc_taprio_caps caps; 1236 int tc, err = 0; 1237 1238 if (!ops->ndo_setup_tc) { 1239 NL_SET_ERR_MSG(extack, 1240 "Device does not support taprio offload"); 1241 return -EOPNOTSUPP; 1242 } 1243 1244 qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO, 1245 &caps, sizeof(caps)); 1246 1247 if (!caps.supports_queue_max_sdu) { 1248 for (tc = 0; tc < TC_MAX_QUEUE; tc++) { 1249 if (q->max_sdu[tc]) { 1250 NL_SET_ERR_MSG_MOD(extack, 1251 "Device does not handle queueMaxSDU"); 1252 return -EOPNOTSUPP; 1253 } 1254 } 1255 } 1256 1257 offload = taprio_offload_alloc(sched->num_entries); 1258 if (!offload) { 1259 NL_SET_ERR_MSG(extack, 1260 "Not enough memory for enabling offload mode"); 1261 return -ENOMEM; 1262 } 1263 offload->enable = 1; 1264 taprio_sched_to_offload(dev, sched, offload); 1265 1266 for (tc = 0; tc < TC_MAX_QUEUE; tc++) 1267 offload->max_sdu[tc] = q->max_sdu[tc]; 1268 1269 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); 1270 if (err < 0) { 1271 NL_SET_ERR_MSG(extack, 1272 "Device failed to setup taprio offload"); 1273 goto done; 1274 } 1275 1276 q->offloaded = true; 1277 1278 done: 1279 taprio_offload_free(offload); 1280 1281 return err; 1282 } 1283 1284 static int taprio_disable_offload(struct net_device *dev, 1285 struct taprio_sched *q, 1286 struct netlink_ext_ack *extack) 1287 { 1288 const struct net_device_ops *ops = dev->netdev_ops; 1289 struct tc_taprio_qopt_offload *offload; 1290 int err; 1291 1292 if (!q->offloaded) 1293 return 0; 1294 1295 offload = taprio_offload_alloc(0); 1296 if (!offload) { 1297 NL_SET_ERR_MSG(extack, 1298 "Not enough memory to disable offload mode"); 1299 return -ENOMEM; 1300 } 1301 offload->enable = 0; 1302 1303 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); 1304 if (err < 0) { 1305 NL_SET_ERR_MSG(extack, 1306 "Device failed to disable offload"); 1307 goto out; 1308 } 1309 1310 q->offloaded = false; 1311 1312 out: 1313 taprio_offload_free(offload); 1314 1315 return err; 1316 } 1317 1318 /* If full offload is enabled, the only possible clockid is the net device's 1319 * PHC. For that reason, specifying a clockid through netlink is incorrect. 1320 * For txtime-assist, it is implicitly assumed that the device's PHC is kept 1321 * in sync with the specified clockid via a user space daemon such as phc2sys. 1322 * For both software taprio and txtime-assist, the clockid is used for the 1323 * hrtimer that advances the schedule and hence mandatory. 1324 */ 1325 static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, 1326 struct netlink_ext_ack *extack) 1327 { 1328 struct taprio_sched *q = qdisc_priv(sch); 1329 struct net_device *dev = qdisc_dev(sch); 1330 int err = -EINVAL; 1331 1332 if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { 1333 const struct ethtool_ops *ops = dev->ethtool_ops; 1334 struct ethtool_ts_info info = { 1335 .cmd = ETHTOOL_GET_TS_INFO, 1336 .phc_index = -1, 1337 }; 1338 1339 if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { 1340 NL_SET_ERR_MSG(extack, 1341 "The 'clockid' cannot be specified for full offload"); 1342 goto out; 1343 } 1344 1345 if (ops && ops->get_ts_info) 1346 err = ops->get_ts_info(dev, &info); 1347 1348 if (err || info.phc_index < 0) { 1349 NL_SET_ERR_MSG(extack, 1350 "Device does not have a PTP clock"); 1351 err = -ENOTSUPP; 1352 goto out; 1353 } 1354 } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { 1355 int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); 1356 enum tk_offsets tk_offset; 1357 1358 /* We only support static clockids and we don't allow 1359 * for it to be modified after the first init. 1360 */ 1361 if (clockid < 0 || 1362 (q->clockid != -1 && q->clockid != clockid)) { 1363 NL_SET_ERR_MSG(extack, 1364 "Changing the 'clockid' of a running schedule is not supported"); 1365 err = -ENOTSUPP; 1366 goto out; 1367 } 1368 1369 switch (clockid) { 1370 case CLOCK_REALTIME: 1371 tk_offset = TK_OFFS_REAL; 1372 break; 1373 case CLOCK_MONOTONIC: 1374 tk_offset = TK_OFFS_MAX; 1375 break; 1376 case CLOCK_BOOTTIME: 1377 tk_offset = TK_OFFS_BOOT; 1378 break; 1379 case CLOCK_TAI: 1380 tk_offset = TK_OFFS_TAI; 1381 break; 1382 default: 1383 NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); 1384 err = -EINVAL; 1385 goto out; 1386 } 1387 /* This pairs with READ_ONCE() in taprio_mono_to_any */ 1388 WRITE_ONCE(q->tk_offset, tk_offset); 1389 1390 q->clockid = clockid; 1391 } else { 1392 NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory"); 1393 goto out; 1394 } 1395 1396 /* Everything went ok, return success. */ 1397 err = 0; 1398 1399 out: 1400 return err; 1401 } 1402 1403 static int taprio_parse_tc_entry(struct Qdisc *sch, 1404 struct nlattr *opt, 1405 u32 max_sdu[TC_QOPT_MAX_QUEUE], 1406 unsigned long *seen_tcs, 1407 struct netlink_ext_ack *extack) 1408 { 1409 struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { }; 1410 struct net_device *dev = qdisc_dev(sch); 1411 u32 val = 0; 1412 int err, tc; 1413 1414 err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt, 1415 taprio_tc_policy, extack); 1416 if (err < 0) 1417 return err; 1418 1419 if (!tb[TCA_TAPRIO_TC_ENTRY_INDEX]) { 1420 NL_SET_ERR_MSG_MOD(extack, "TC entry index missing"); 1421 return -EINVAL; 1422 } 1423 1424 tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]); 1425 if (tc >= TC_QOPT_MAX_QUEUE) { 1426 NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range"); 1427 return -ERANGE; 1428 } 1429 1430 if (*seen_tcs & BIT(tc)) { 1431 NL_SET_ERR_MSG_MOD(extack, "Duplicate TC entry"); 1432 return -EINVAL; 1433 } 1434 1435 *seen_tcs |= BIT(tc); 1436 1437 if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) 1438 val = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]); 1439 1440 if (val > dev->max_mtu) { 1441 NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU"); 1442 return -ERANGE; 1443 } 1444 1445 max_sdu[tc] = val; 1446 1447 return 0; 1448 } 1449 1450 static int taprio_parse_tc_entries(struct Qdisc *sch, 1451 struct nlattr *opt, 1452 struct netlink_ext_ack *extack) 1453 { 1454 struct taprio_sched *q = qdisc_priv(sch); 1455 struct net_device *dev = qdisc_dev(sch); 1456 u32 max_sdu[TC_QOPT_MAX_QUEUE]; 1457 unsigned long seen_tcs = 0; 1458 struct nlattr *n; 1459 int tc, rem; 1460 int err = 0; 1461 1462 for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) 1463 max_sdu[tc] = q->max_sdu[tc]; 1464 1465 nla_for_each_nested(n, opt, rem) { 1466 if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY) 1467 continue; 1468 1469 err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs, extack); 1470 if (err) 1471 goto out; 1472 } 1473 1474 for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) { 1475 q->max_sdu[tc] = max_sdu[tc]; 1476 if (max_sdu[tc]) 1477 q->max_frm_len[tc] = max_sdu[tc] + dev->hard_header_len; 1478 else 1479 q->max_frm_len[tc] = U32_MAX; /* never oversized */ 1480 } 1481 1482 out: 1483 return err; 1484 } 1485 1486 static int taprio_mqprio_cmp(const struct net_device *dev, 1487 const struct tc_mqprio_qopt *mqprio) 1488 { 1489 int i; 1490 1491 if (!mqprio || mqprio->num_tc != dev->num_tc) 1492 return -1; 1493 1494 for (i = 0; i < mqprio->num_tc; i++) 1495 if (dev->tc_to_txq[i].count != mqprio->count[i] || 1496 dev->tc_to_txq[i].offset != mqprio->offset[i]) 1497 return -1; 1498 1499 for (i = 0; i <= TC_BITMASK; i++) 1500 if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i]) 1501 return -1; 1502 1503 return 0; 1504 } 1505 1506 /* The semantics of the 'flags' argument in relation to 'change()' 1507 * requests, are interpreted following two rules (which are applied in 1508 * this order): (1) an omitted 'flags' argument is interpreted as 1509 * zero; (2) the 'flags' of a "running" taprio instance cannot be 1510 * changed. 1511 */ 1512 static int taprio_new_flags(const struct nlattr *attr, u32 old, 1513 struct netlink_ext_ack *extack) 1514 { 1515 u32 new = 0; 1516 1517 if (attr) 1518 new = nla_get_u32(attr); 1519 1520 if (old != TAPRIO_FLAGS_INVALID && old != new) { 1521 NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); 1522 return -EOPNOTSUPP; 1523 } 1524 1525 if (!taprio_flags_valid(new)) { 1526 NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); 1527 return -EINVAL; 1528 } 1529 1530 return new; 1531 } 1532 1533 static int taprio_change(struct Qdisc *sch, struct nlattr *opt, 1534 struct netlink_ext_ack *extack) 1535 { 1536 struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { }; 1537 struct sched_gate_list *oper, *admin, *new_admin; 1538 struct taprio_sched *q = qdisc_priv(sch); 1539 struct net_device *dev = qdisc_dev(sch); 1540 struct tc_mqprio_qopt *mqprio = NULL; 1541 unsigned long flags; 1542 ktime_t start; 1543 int i, err; 1544 1545 err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt, 1546 taprio_policy, extack); 1547 if (err < 0) 1548 return err; 1549 1550 if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) 1551 mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); 1552 1553 err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS], 1554 q->flags, extack); 1555 if (err < 0) 1556 return err; 1557 1558 q->flags = err; 1559 1560 err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags); 1561 if (err < 0) 1562 return err; 1563 1564 err = taprio_parse_tc_entries(sch, opt, extack); 1565 if (err) 1566 return err; 1567 1568 new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL); 1569 if (!new_admin) { 1570 NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule"); 1571 return -ENOMEM; 1572 } 1573 INIT_LIST_HEAD(&new_admin->entries); 1574 1575 oper = rtnl_dereference(q->oper_sched); 1576 admin = rtnl_dereference(q->admin_sched); 1577 1578 /* no changes - no new mqprio settings */ 1579 if (!taprio_mqprio_cmp(dev, mqprio)) 1580 mqprio = NULL; 1581 1582 if (mqprio && (oper || admin)) { 1583 NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported"); 1584 err = -ENOTSUPP; 1585 goto free_sched; 1586 } 1587 1588 err = parse_taprio_schedule(q, tb, new_admin, extack); 1589 if (err < 0) 1590 goto free_sched; 1591 1592 if (new_admin->num_entries == 0) { 1593 NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule"); 1594 err = -EINVAL; 1595 goto free_sched; 1596 } 1597 1598 err = taprio_parse_clockid(sch, tb, extack); 1599 if (err < 0) 1600 goto free_sched; 1601 1602 taprio_set_picos_per_byte(dev, q); 1603 1604 if (mqprio) { 1605 err = netdev_set_num_tc(dev, mqprio->num_tc); 1606 if (err) 1607 goto free_sched; 1608 for (i = 0; i < mqprio->num_tc; i++) 1609 netdev_set_tc_queue(dev, i, 1610 mqprio->count[i], 1611 mqprio->offset[i]); 1612 1613 /* Always use supplied priority mappings */ 1614 for (i = 0; i <= TC_BITMASK; i++) 1615 netdev_set_prio_tc_map(dev, i, 1616 mqprio->prio_tc_map[i]); 1617 } 1618 1619 if (FULL_OFFLOAD_IS_ENABLED(q->flags)) 1620 err = taprio_enable_offload(dev, q, new_admin, extack); 1621 else 1622 err = taprio_disable_offload(dev, q, extack); 1623 if (err) 1624 goto free_sched; 1625 1626 /* Protects against enqueue()/dequeue() */ 1627 spin_lock_bh(qdisc_lock(sch)); 1628 1629 if (tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]) { 1630 if (!TXTIME_ASSIST_IS_ENABLED(q->flags)) { 1631 NL_SET_ERR_MSG_MOD(extack, "txtime-delay can only be set when txtime-assist mode is enabled"); 1632 err = -EINVAL; 1633 goto unlock; 1634 } 1635 1636 q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]); 1637 } 1638 1639 if (!TXTIME_ASSIST_IS_ENABLED(q->flags) && 1640 !FULL_OFFLOAD_IS_ENABLED(q->flags) && 1641 !hrtimer_active(&q->advance_timer)) { 1642 hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); 1643 q->advance_timer.function = advance_sched; 1644 } 1645 1646 err = taprio_get_start_time(sch, new_admin, &start); 1647 if (err < 0) { 1648 NL_SET_ERR_MSG(extack, "Internal error: failed get start time"); 1649 goto unlock; 1650 } 1651 1652 setup_txtime(q, new_admin, start); 1653 1654 if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { 1655 if (!oper) { 1656 rcu_assign_pointer(q->oper_sched, new_admin); 1657 err = 0; 1658 new_admin = NULL; 1659 goto unlock; 1660 } 1661 1662 rcu_assign_pointer(q->admin_sched, new_admin); 1663 if (admin) 1664 call_rcu(&admin->rcu, taprio_free_sched_cb); 1665 } else { 1666 setup_first_close_time(q, new_admin, start); 1667 1668 /* Protects against advance_sched() */ 1669 spin_lock_irqsave(&q->current_entry_lock, flags); 1670 1671 taprio_start_sched(sch, start, new_admin); 1672 1673 rcu_assign_pointer(q->admin_sched, new_admin); 1674 if (admin) 1675 call_rcu(&admin->rcu, taprio_free_sched_cb); 1676 1677 spin_unlock_irqrestore(&q->current_entry_lock, flags); 1678 1679 if (FULL_OFFLOAD_IS_ENABLED(q->flags)) 1680 taprio_offload_config_changed(q); 1681 } 1682 1683 new_admin = NULL; 1684 err = 0; 1685 1686 unlock: 1687 spin_unlock_bh(qdisc_lock(sch)); 1688 1689 free_sched: 1690 if (new_admin) 1691 call_rcu(&new_admin->rcu, taprio_free_sched_cb); 1692 1693 return err; 1694 } 1695 1696 static void taprio_reset(struct Qdisc *sch) 1697 { 1698 struct taprio_sched *q = qdisc_priv(sch); 1699 struct net_device *dev = qdisc_dev(sch); 1700 int i; 1701 1702 hrtimer_cancel(&q->advance_timer); 1703 qdisc_synchronize(sch); 1704 1705 if (q->qdiscs) { 1706 for (i = 0; i < dev->num_tx_queues; i++) 1707 if (q->qdiscs[i]) 1708 qdisc_reset(q->qdiscs[i]); 1709 } 1710 } 1711 1712 static void taprio_destroy(struct Qdisc *sch) 1713 { 1714 struct taprio_sched *q = qdisc_priv(sch); 1715 struct net_device *dev = qdisc_dev(sch); 1716 struct sched_gate_list *oper, *admin; 1717 unsigned int i; 1718 1719 list_del(&q->taprio_list); 1720 1721 /* Note that taprio_reset() might not be called if an error 1722 * happens in qdisc_create(), after taprio_init() has been called. 1723 */ 1724 hrtimer_cancel(&q->advance_timer); 1725 qdisc_synchronize(sch); 1726 1727 taprio_disable_offload(dev, q, NULL); 1728 1729 if (q->qdiscs) { 1730 for (i = 0; i < dev->num_tx_queues; i++) 1731 qdisc_put(q->qdiscs[i]); 1732 1733 kfree(q->qdiscs); 1734 } 1735 q->qdiscs = NULL; 1736 1737 netdev_reset_tc(dev); 1738 1739 oper = rtnl_dereference(q->oper_sched); 1740 admin = rtnl_dereference(q->admin_sched); 1741 1742 if (oper) 1743 call_rcu(&oper->rcu, taprio_free_sched_cb); 1744 1745 if (admin) 1746 call_rcu(&admin->rcu, taprio_free_sched_cb); 1747 } 1748 1749 static int taprio_init(struct Qdisc *sch, struct nlattr *opt, 1750 struct netlink_ext_ack *extack) 1751 { 1752 struct taprio_sched *q = qdisc_priv(sch); 1753 struct net_device *dev = qdisc_dev(sch); 1754 int i; 1755 1756 spin_lock_init(&q->current_entry_lock); 1757 1758 hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); 1759 q->advance_timer.function = advance_sched; 1760 1761 q->root = sch; 1762 1763 /* We only support static clockids. Use an invalid value as default 1764 * and get the valid one on taprio_change(). 1765 */ 1766 q->clockid = -1; 1767 q->flags = TAPRIO_FLAGS_INVALID; 1768 1769 list_add(&q->taprio_list, &taprio_list); 1770 1771 if (sch->parent != TC_H_ROOT) { 1772 NL_SET_ERR_MSG_MOD(extack, "Can only be attached as root qdisc"); 1773 return -EOPNOTSUPP; 1774 } 1775 1776 if (!netif_is_multiqueue(dev)) { 1777 NL_SET_ERR_MSG_MOD(extack, "Multi-queue device is required"); 1778 return -EOPNOTSUPP; 1779 } 1780 1781 /* pre-allocate qdisc, attachment can't fail */ 1782 q->qdiscs = kcalloc(dev->num_tx_queues, 1783 sizeof(q->qdiscs[0]), 1784 GFP_KERNEL); 1785 1786 if (!q->qdiscs) 1787 return -ENOMEM; 1788 1789 if (!opt) 1790 return -EINVAL; 1791 1792 for (i = 0; i < dev->num_tx_queues; i++) { 1793 struct netdev_queue *dev_queue; 1794 struct Qdisc *qdisc; 1795 1796 dev_queue = netdev_get_tx_queue(dev, i); 1797 qdisc = qdisc_create_dflt(dev_queue, 1798 &pfifo_qdisc_ops, 1799 TC_H_MAKE(TC_H_MAJ(sch->handle), 1800 TC_H_MIN(i + 1)), 1801 extack); 1802 if (!qdisc) 1803 return -ENOMEM; 1804 1805 if (i < dev->real_num_tx_queues) 1806 qdisc_hash_add(qdisc, false); 1807 1808 q->qdiscs[i] = qdisc; 1809 } 1810 1811 return taprio_change(sch, opt, extack); 1812 } 1813 1814 static void taprio_attach(struct Qdisc *sch) 1815 { 1816 struct taprio_sched *q = qdisc_priv(sch); 1817 struct net_device *dev = qdisc_dev(sch); 1818 unsigned int ntx; 1819 1820 /* Attach underlying qdisc */ 1821 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { 1822 struct Qdisc *qdisc = q->qdiscs[ntx]; 1823 struct Qdisc *old; 1824 1825 if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { 1826 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; 1827 old = dev_graft_qdisc(qdisc->dev_queue, qdisc); 1828 } else { 1829 old = dev_graft_qdisc(qdisc->dev_queue, sch); 1830 qdisc_refcount_inc(sch); 1831 } 1832 if (old) 1833 qdisc_put(old); 1834 } 1835 1836 /* access to the child qdiscs is not needed in offload mode */ 1837 if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { 1838 kfree(q->qdiscs); 1839 q->qdiscs = NULL; 1840 } 1841 } 1842 1843 static struct netdev_queue *taprio_queue_get(struct Qdisc *sch, 1844 unsigned long cl) 1845 { 1846 struct net_device *dev = qdisc_dev(sch); 1847 unsigned long ntx = cl - 1; 1848 1849 if (ntx >= dev->num_tx_queues) 1850 return NULL; 1851 1852 return netdev_get_tx_queue(dev, ntx); 1853 } 1854 1855 static int taprio_graft(struct Qdisc *sch, unsigned long cl, 1856 struct Qdisc *new, struct Qdisc **old, 1857 struct netlink_ext_ack *extack) 1858 { 1859 struct taprio_sched *q = qdisc_priv(sch); 1860 struct net_device *dev = qdisc_dev(sch); 1861 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 1862 1863 if (!dev_queue) 1864 return -EINVAL; 1865 1866 if (dev->flags & IFF_UP) 1867 dev_deactivate(dev); 1868 1869 if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { 1870 *old = dev_graft_qdisc(dev_queue, new); 1871 } else { 1872 *old = q->qdiscs[cl - 1]; 1873 q->qdiscs[cl - 1] = new; 1874 } 1875 1876 if (new) 1877 new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; 1878 1879 if (dev->flags & IFF_UP) 1880 dev_activate(dev); 1881 1882 return 0; 1883 } 1884 1885 static int dump_entry(struct sk_buff *msg, 1886 const struct sched_entry *entry) 1887 { 1888 struct nlattr *item; 1889 1890 item = nla_nest_start_noflag(msg, TCA_TAPRIO_SCHED_ENTRY); 1891 if (!item) 1892 return -ENOSPC; 1893 1894 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index)) 1895 goto nla_put_failure; 1896 1897 if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command)) 1898 goto nla_put_failure; 1899 1900 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, 1901 entry->gate_mask)) 1902 goto nla_put_failure; 1903 1904 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL, 1905 entry->interval)) 1906 goto nla_put_failure; 1907 1908 return nla_nest_end(msg, item); 1909 1910 nla_put_failure: 1911 nla_nest_cancel(msg, item); 1912 return -1; 1913 } 1914 1915 static int dump_schedule(struct sk_buff *msg, 1916 const struct sched_gate_list *root) 1917 { 1918 struct nlattr *entry_list; 1919 struct sched_entry *entry; 1920 1921 if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, 1922 root->base_time, TCA_TAPRIO_PAD)) 1923 return -1; 1924 1925 if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, 1926 root->cycle_time, TCA_TAPRIO_PAD)) 1927 return -1; 1928 1929 if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, 1930 root->cycle_time_extension, TCA_TAPRIO_PAD)) 1931 return -1; 1932 1933 entry_list = nla_nest_start_noflag(msg, 1934 TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); 1935 if (!entry_list) 1936 goto error_nest; 1937 1938 list_for_each_entry(entry, &root->entries, list) { 1939 if (dump_entry(msg, entry) < 0) 1940 goto error_nest; 1941 } 1942 1943 nla_nest_end(msg, entry_list); 1944 return 0; 1945 1946 error_nest: 1947 nla_nest_cancel(msg, entry_list); 1948 return -1; 1949 } 1950 1951 static int taprio_dump_tc_entries(struct taprio_sched *q, struct sk_buff *skb) 1952 { 1953 struct nlattr *n; 1954 int tc; 1955 1956 for (tc = 0; tc < TC_MAX_QUEUE; tc++) { 1957 n = nla_nest_start(skb, TCA_TAPRIO_ATTR_TC_ENTRY); 1958 if (!n) 1959 return -EMSGSIZE; 1960 1961 if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_INDEX, tc)) 1962 goto nla_put_failure; 1963 1964 if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_MAX_SDU, 1965 q->max_sdu[tc])) 1966 goto nla_put_failure; 1967 1968 nla_nest_end(skb, n); 1969 } 1970 1971 return 0; 1972 1973 nla_put_failure: 1974 nla_nest_cancel(skb, n); 1975 return -EMSGSIZE; 1976 } 1977 1978 static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) 1979 { 1980 struct taprio_sched *q = qdisc_priv(sch); 1981 struct net_device *dev = qdisc_dev(sch); 1982 struct sched_gate_list *oper, *admin; 1983 struct tc_mqprio_qopt opt = { 0 }; 1984 struct nlattr *nest, *sched_nest; 1985 unsigned int i; 1986 1987 oper = rtnl_dereference(q->oper_sched); 1988 admin = rtnl_dereference(q->admin_sched); 1989 1990 opt.num_tc = netdev_get_num_tc(dev); 1991 memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); 1992 1993 for (i = 0; i < netdev_get_num_tc(dev); i++) { 1994 opt.count[i] = dev->tc_to_txq[i].count; 1995 opt.offset[i] = dev->tc_to_txq[i].offset; 1996 } 1997 1998 nest = nla_nest_start_noflag(skb, TCA_OPTIONS); 1999 if (!nest) 2000 goto start_error; 2001 2002 if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) 2003 goto options_error; 2004 2005 if (!FULL_OFFLOAD_IS_ENABLED(q->flags) && 2006 nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) 2007 goto options_error; 2008 2009 if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags)) 2010 goto options_error; 2011 2012 if (q->txtime_delay && 2013 nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay)) 2014 goto options_error; 2015 2016 if (taprio_dump_tc_entries(q, skb)) 2017 goto options_error; 2018 2019 if (oper && dump_schedule(skb, oper)) 2020 goto options_error; 2021 2022 if (!admin) 2023 goto done; 2024 2025 sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED); 2026 if (!sched_nest) 2027 goto options_error; 2028 2029 if (dump_schedule(skb, admin)) 2030 goto admin_error; 2031 2032 nla_nest_end(skb, sched_nest); 2033 2034 done: 2035 return nla_nest_end(skb, nest); 2036 2037 admin_error: 2038 nla_nest_cancel(skb, sched_nest); 2039 2040 options_error: 2041 nla_nest_cancel(skb, nest); 2042 2043 start_error: 2044 return -ENOSPC; 2045 } 2046 2047 static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) 2048 { 2049 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 2050 2051 if (!dev_queue) 2052 return NULL; 2053 2054 return dev_queue->qdisc_sleeping; 2055 } 2056 2057 static unsigned long taprio_find(struct Qdisc *sch, u32 classid) 2058 { 2059 unsigned int ntx = TC_H_MIN(classid); 2060 2061 if (!taprio_queue_get(sch, ntx)) 2062 return 0; 2063 return ntx; 2064 } 2065 2066 static int taprio_dump_class(struct Qdisc *sch, unsigned long cl, 2067 struct sk_buff *skb, struct tcmsg *tcm) 2068 { 2069 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 2070 2071 tcm->tcm_parent = TC_H_ROOT; 2072 tcm->tcm_handle |= TC_H_MIN(cl); 2073 tcm->tcm_info = dev_queue->qdisc_sleeping->handle; 2074 2075 return 0; 2076 } 2077 2078 static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, 2079 struct gnet_dump *d) 2080 __releases(d->lock) 2081 __acquires(d->lock) 2082 { 2083 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 2084 2085 sch = dev_queue->qdisc_sleeping; 2086 if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 || 2087 qdisc_qstats_copy(d, sch) < 0) 2088 return -1; 2089 return 0; 2090 } 2091 2092 static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) 2093 { 2094 struct net_device *dev = qdisc_dev(sch); 2095 unsigned long ntx; 2096 2097 if (arg->stop) 2098 return; 2099 2100 arg->count = arg->skip; 2101 for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { 2102 if (!tc_qdisc_stats_dump(sch, ntx + 1, arg)) 2103 break; 2104 } 2105 } 2106 2107 static struct netdev_queue *taprio_select_queue(struct Qdisc *sch, 2108 struct tcmsg *tcm) 2109 { 2110 return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); 2111 } 2112 2113 static const struct Qdisc_class_ops taprio_class_ops = { 2114 .graft = taprio_graft, 2115 .leaf = taprio_leaf, 2116 .find = taprio_find, 2117 .walk = taprio_walk, 2118 .dump = taprio_dump_class, 2119 .dump_stats = taprio_dump_class_stats, 2120 .select_queue = taprio_select_queue, 2121 }; 2122 2123 static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { 2124 .cl_ops = &taprio_class_ops, 2125 .id = "taprio", 2126 .priv_size = sizeof(struct taprio_sched), 2127 .init = taprio_init, 2128 .change = taprio_change, 2129 .destroy = taprio_destroy, 2130 .reset = taprio_reset, 2131 .attach = taprio_attach, 2132 .peek = taprio_peek, 2133 .dequeue = taprio_dequeue, 2134 .enqueue = taprio_enqueue, 2135 .dump = taprio_dump, 2136 .owner = THIS_MODULE, 2137 }; 2138 2139 static struct notifier_block taprio_device_notifier = { 2140 .notifier_call = taprio_dev_notifier, 2141 }; 2142 2143 static int __init taprio_module_init(void) 2144 { 2145 int err = register_netdevice_notifier(&taprio_device_notifier); 2146 2147 if (err) 2148 return err; 2149 2150 return register_qdisc(&taprio_qdisc_ops); 2151 } 2152 2153 static void __exit taprio_module_exit(void) 2154 { 2155 unregister_qdisc(&taprio_qdisc_ops); 2156 unregister_netdevice_notifier(&taprio_device_notifier); 2157 } 2158 2159 module_init(taprio_module_init); 2160 module_exit(taprio_module_exit); 2161 MODULE_LICENSE("GPL"); 2162