1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * net/sched/sch_api.c Packet scheduler API. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 * 7 * Fixes: 8 * 9 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired. 10 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support 11 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support 12 */ 13 14 #include <linux/module.h> 15 #include <linux/types.h> 16 #include <linux/kernel.h> 17 #include <linux/string.h> 18 #include <linux/errno.h> 19 #include <linux/skbuff.h> 20 #include <linux/init.h> 21 #include <linux/proc_fs.h> 22 #include <linux/seq_file.h> 23 #include <linux/kmod.h> 24 #include <linux/list.h> 25 #include <linux/hrtimer.h> 26 #include <linux/slab.h> 27 #include <linux/hashtable.h> 28 29 #include <net/net_namespace.h> 30 #include <net/sock.h> 31 #include <net/netlink.h> 32 #include <net/pkt_sched.h> 33 #include <net/pkt_cls.h> 34 #include <net/tc_wrapper.h> 35 36 #include <trace/events/qdisc.h> 37 38 /* 39 40 Short review. 41 ------------- 42 43 This file consists of two interrelated parts: 44 45 1. queueing disciplines manager frontend. 46 2. traffic classes manager frontend. 47 48 Generally, queueing discipline ("qdisc") is a black box, 49 which is able to enqueue packets and to dequeue them (when 50 device is ready to send something) in order and at times 51 determined by algorithm hidden in it. 52 53 qdisc's are divided to two categories: 54 - "queues", which have no internal structure visible from outside. 55 - "schedulers", which split all the packets to "traffic classes", 56 using "packet classifiers" (look at cls_api.c) 57 58 In turn, classes may have child qdiscs (as rule, queues) 59 attached to them etc. etc. etc. 60 61 The goal of the routines in this file is to translate 62 information supplied by user in the form of handles 63 to more intelligible for kernel form, to make some sanity 64 checks and part of work, which is common to all qdiscs 65 and to provide rtnetlink notifications. 66 67 All real intelligent work is done inside qdisc modules. 68 69 70 71 Every discipline has two major routines: enqueue and dequeue. 72 73 ---dequeue 74 75 dequeue usually returns a skb to send. It is allowed to return NULL, 76 but it does not mean that queue is empty, it just means that 77 discipline does not want to send anything this time. 78 Queue is really empty if q->q.qlen == 0. 79 For complicated disciplines with multiple queues q->q is not 80 real packet queue, but however q->q.qlen must be valid. 81 82 ---enqueue 83 84 enqueue returns 0, if packet was enqueued successfully. 85 If packet (this one or another one) was dropped, it returns 86 not zero error code. 87 NET_XMIT_DROP - this packet dropped 88 Expected action: do not backoff, but wait until queue will clear. 89 NET_XMIT_CN - probably this packet enqueued, but another one dropped. 90 Expected action: backoff or ignore 91 92 Auxiliary routines: 93 94 ---peek 95 96 like dequeue but without removing a packet from the queue 97 98 ---reset 99 100 returns qdisc to initial state: purge all buffers, clear all 101 timers, counters (except for statistics) etc. 102 103 ---init 104 105 initializes newly created qdisc. 106 107 ---destroy 108 109 destroys resources allocated by init and during lifetime of qdisc. 110 111 ---change 112 113 changes qdisc parameters. 114 */ 115 116 /* Protects list of registered TC modules. It is pure SMP lock. */ 117 static DEFINE_RWLOCK(qdisc_mod_lock); 118 119 120 /************************************************ 121 * Queueing disciplines manipulation. * 122 ************************************************/ 123 124 125 /* The list of all installed queueing disciplines. */ 126 127 static struct Qdisc_ops *qdisc_base; 128 129 /* Register/unregister queueing discipline */ 130 131 int register_qdisc(struct Qdisc_ops *qops) 132 { 133 struct Qdisc_ops *q, **qp; 134 int rc = -EEXIST; 135 136 write_lock(&qdisc_mod_lock); 137 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) 138 if (!strcmp(qops->id, q->id)) 139 goto out; 140 141 if (qops->enqueue == NULL) 142 qops->enqueue = noop_qdisc_ops.enqueue; 143 if (qops->peek == NULL) { 144 if (qops->dequeue == NULL) 145 qops->peek = noop_qdisc_ops.peek; 146 else 147 goto out_einval; 148 } 149 if (qops->dequeue == NULL) 150 qops->dequeue = noop_qdisc_ops.dequeue; 151 152 if (qops->cl_ops) { 153 const struct Qdisc_class_ops *cops = qops->cl_ops; 154 155 if (!(cops->find && cops->walk && cops->leaf)) 156 goto out_einval; 157 158 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf)) 159 goto out_einval; 160 } 161 162 qops->next = NULL; 163 *qp = qops; 164 rc = 0; 165 out: 166 write_unlock(&qdisc_mod_lock); 167 return rc; 168 169 out_einval: 170 rc = -EINVAL; 171 goto out; 172 } 173 EXPORT_SYMBOL(register_qdisc); 174 175 void unregister_qdisc(struct Qdisc_ops *qops) 176 { 177 struct Qdisc_ops *q, **qp; 178 int err = -ENOENT; 179 180 write_lock(&qdisc_mod_lock); 181 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) 182 if (q == qops) 183 break; 184 if (q) { 185 *qp = q->next; 186 q->next = NULL; 187 err = 0; 188 } 189 write_unlock(&qdisc_mod_lock); 190 191 WARN(err, "unregister qdisc(%s) failed\n", qops->id); 192 } 193 EXPORT_SYMBOL(unregister_qdisc); 194 195 /* Get default qdisc if not otherwise specified */ 196 void qdisc_get_default(char *name, size_t len) 197 { 198 read_lock(&qdisc_mod_lock); 199 strscpy(name, default_qdisc_ops->id, len); 200 read_unlock(&qdisc_mod_lock); 201 } 202 203 static struct Qdisc_ops *qdisc_lookup_default(const char *name) 204 { 205 struct Qdisc_ops *q = NULL; 206 207 for (q = qdisc_base; q; q = q->next) { 208 if (!strcmp(name, q->id)) { 209 if (!try_module_get(q->owner)) 210 q = NULL; 211 break; 212 } 213 } 214 215 return q; 216 } 217 218 /* Set new default qdisc to use */ 219 int qdisc_set_default(const char *name) 220 { 221 const struct Qdisc_ops *ops; 222 223 if (!capable(CAP_NET_ADMIN)) 224 return -EPERM; 225 226 write_lock(&qdisc_mod_lock); 227 ops = qdisc_lookup_default(name); 228 if (!ops) { 229 /* Not found, drop lock and try to load module */ 230 write_unlock(&qdisc_mod_lock); 231 request_module("sch_%s", name); 232 write_lock(&qdisc_mod_lock); 233 234 ops = qdisc_lookup_default(name); 235 } 236 237 if (ops) { 238 /* Set new default */ 239 module_put(default_qdisc_ops->owner); 240 default_qdisc_ops = ops; 241 } 242 write_unlock(&qdisc_mod_lock); 243 244 return ops ? 0 : -ENOENT; 245 } 246 247 #ifdef CONFIG_NET_SCH_DEFAULT 248 /* Set default value from kernel config */ 249 static int __init sch_default_qdisc(void) 250 { 251 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH); 252 } 253 late_initcall(sch_default_qdisc); 254 #endif 255 256 /* We know handle. Find qdisc among all qdisc's attached to device 257 * (root qdisc, all its children, children of children etc.) 258 * Note: caller either uses rtnl or rcu_read_lock() 259 */ 260 261 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) 262 { 263 struct Qdisc *q; 264 265 if (!qdisc_dev(root)) 266 return (root->handle == handle ? root : NULL); 267 268 if (!(root->flags & TCQ_F_BUILTIN) && 269 root->handle == handle) 270 return root; 271 272 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle, 273 lockdep_rtnl_is_held()) { 274 if (q->handle == handle) 275 return q; 276 } 277 return NULL; 278 } 279 280 void qdisc_hash_add(struct Qdisc *q, bool invisible) 281 { 282 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 283 ASSERT_RTNL(); 284 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); 285 if (invisible) 286 q->flags |= TCQ_F_INVISIBLE; 287 } 288 } 289 EXPORT_SYMBOL(qdisc_hash_add); 290 291 void qdisc_hash_del(struct Qdisc *q) 292 { 293 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 294 ASSERT_RTNL(); 295 hash_del_rcu(&q->hash); 296 } 297 } 298 EXPORT_SYMBOL(qdisc_hash_del); 299 300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) 301 { 302 struct Qdisc *q; 303 304 if (!handle) 305 return NULL; 306 q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle); 307 if (q) 308 goto out; 309 310 if (dev_ingress_queue(dev)) 311 q = qdisc_match_from_root( 312 dev_ingress_queue(dev)->qdisc_sleeping, 313 handle); 314 out: 315 return q; 316 } 317 318 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) 319 { 320 struct netdev_queue *nq; 321 struct Qdisc *q; 322 323 if (!handle) 324 return NULL; 325 q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle); 326 if (q) 327 goto out; 328 329 nq = dev_ingress_queue_rcu(dev); 330 if (nq) 331 q = qdisc_match_from_root(nq->qdisc_sleeping, handle); 332 out: 333 return q; 334 } 335 336 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) 337 { 338 unsigned long cl; 339 const struct Qdisc_class_ops *cops = p->ops->cl_ops; 340 341 if (cops == NULL) 342 return NULL; 343 cl = cops->find(p, classid); 344 345 if (cl == 0) 346 return NULL; 347 return cops->leaf(p, cl); 348 } 349 350 /* Find queueing discipline by name */ 351 352 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) 353 { 354 struct Qdisc_ops *q = NULL; 355 356 if (kind) { 357 read_lock(&qdisc_mod_lock); 358 for (q = qdisc_base; q; q = q->next) { 359 if (nla_strcmp(kind, q->id) == 0) { 360 if (!try_module_get(q->owner)) 361 q = NULL; 362 break; 363 } 364 } 365 read_unlock(&qdisc_mod_lock); 366 } 367 return q; 368 } 369 370 /* The linklayer setting were not transferred from iproute2, in older 371 * versions, and the rate tables lookup systems have been dropped in 372 * the kernel. To keep backward compatible with older iproute2 tc 373 * utils, we detect the linklayer setting by detecting if the rate 374 * table were modified. 375 * 376 * For linklayer ATM table entries, the rate table will be aligned to 377 * 48 bytes, thus some table entries will contain the same value. The 378 * mpu (min packet unit) is also encoded into the old rate table, thus 379 * starting from the mpu, we find low and high table entries for 380 * mapping this cell. If these entries contain the same value, when 381 * the rate tables have been modified for linklayer ATM. 382 * 383 * This is done by rounding mpu to the nearest 48 bytes cell/entry, 384 * and then roundup to the next cell, calc the table entry one below, 385 * and compare. 386 */ 387 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) 388 { 389 int low = roundup(r->mpu, 48); 390 int high = roundup(low+1, 48); 391 int cell_low = low >> r->cell_log; 392 int cell_high = (high >> r->cell_log) - 1; 393 394 /* rtab is too inaccurate at rates > 100Mbit/s */ 395 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) { 396 pr_debug("TC linklayer: Giving up ATM detection\n"); 397 return TC_LINKLAYER_ETHERNET; 398 } 399 400 if ((cell_high > cell_low) && (cell_high < 256) 401 && (rtab[cell_low] == rtab[cell_high])) { 402 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n", 403 cell_low, cell_high, rtab[cell_high]); 404 return TC_LINKLAYER_ATM; 405 } 406 return TC_LINKLAYER_ETHERNET; 407 } 408 409 static struct qdisc_rate_table *qdisc_rtab_list; 410 411 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, 412 struct nlattr *tab, 413 struct netlink_ext_ack *extack) 414 { 415 struct qdisc_rate_table *rtab; 416 417 if (tab == NULL || r->rate == 0 || 418 r->cell_log == 0 || r->cell_log >= 32 || 419 nla_len(tab) != TC_RTAB_SIZE) { 420 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching"); 421 return NULL; 422 } 423 424 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) { 425 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) && 426 !memcmp(&rtab->data, nla_data(tab), 1024)) { 427 rtab->refcnt++; 428 return rtab; 429 } 430 } 431 432 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL); 433 if (rtab) { 434 rtab->rate = *r; 435 rtab->refcnt = 1; 436 memcpy(rtab->data, nla_data(tab), 1024); 437 if (r->linklayer == TC_LINKLAYER_UNAWARE) 438 r->linklayer = __detect_linklayer(r, rtab->data); 439 rtab->next = qdisc_rtab_list; 440 qdisc_rtab_list = rtab; 441 } else { 442 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table"); 443 } 444 return rtab; 445 } 446 EXPORT_SYMBOL(qdisc_get_rtab); 447 448 void qdisc_put_rtab(struct qdisc_rate_table *tab) 449 { 450 struct qdisc_rate_table *rtab, **rtabp; 451 452 if (!tab || --tab->refcnt) 453 return; 454 455 for (rtabp = &qdisc_rtab_list; 456 (rtab = *rtabp) != NULL; 457 rtabp = &rtab->next) { 458 if (rtab == tab) { 459 *rtabp = rtab->next; 460 kfree(rtab); 461 return; 462 } 463 } 464 } 465 EXPORT_SYMBOL(qdisc_put_rtab); 466 467 static LIST_HEAD(qdisc_stab_list); 468 469 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { 470 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, 471 [TCA_STAB_DATA] = { .type = NLA_BINARY }, 472 }; 473 474 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, 475 struct netlink_ext_ack *extack) 476 { 477 struct nlattr *tb[TCA_STAB_MAX + 1]; 478 struct qdisc_size_table *stab; 479 struct tc_sizespec *s; 480 unsigned int tsize = 0; 481 u16 *tab = NULL; 482 int err; 483 484 err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy, 485 extack); 486 if (err < 0) 487 return ERR_PTR(err); 488 if (!tb[TCA_STAB_BASE]) { 489 NL_SET_ERR_MSG(extack, "Size table base attribute is missing"); 490 return ERR_PTR(-EINVAL); 491 } 492 493 s = nla_data(tb[TCA_STAB_BASE]); 494 495 if (s->tsize > 0) { 496 if (!tb[TCA_STAB_DATA]) { 497 NL_SET_ERR_MSG(extack, "Size table data attribute is missing"); 498 return ERR_PTR(-EINVAL); 499 } 500 tab = nla_data(tb[TCA_STAB_DATA]); 501 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 502 } 503 504 if (tsize != s->tsize || (!tab && tsize > 0)) { 505 NL_SET_ERR_MSG(extack, "Invalid size of size table"); 506 return ERR_PTR(-EINVAL); 507 } 508 509 list_for_each_entry(stab, &qdisc_stab_list, list) { 510 if (memcmp(&stab->szopts, s, sizeof(*s))) 511 continue; 512 if (tsize > 0 && 513 memcmp(stab->data, tab, flex_array_size(stab, data, tsize))) 514 continue; 515 stab->refcnt++; 516 return stab; 517 } 518 519 if (s->size_log > STAB_SIZE_LOG_MAX || 520 s->cell_log > STAB_SIZE_LOG_MAX) { 521 NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table"); 522 return ERR_PTR(-EINVAL); 523 } 524 525 stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL); 526 if (!stab) 527 return ERR_PTR(-ENOMEM); 528 529 stab->refcnt = 1; 530 stab->szopts = *s; 531 if (tsize > 0) 532 memcpy(stab->data, tab, flex_array_size(stab, data, tsize)); 533 534 list_add_tail(&stab->list, &qdisc_stab_list); 535 536 return stab; 537 } 538 539 void qdisc_put_stab(struct qdisc_size_table *tab) 540 { 541 if (!tab) 542 return; 543 544 if (--tab->refcnt == 0) { 545 list_del(&tab->list); 546 kfree_rcu(tab, rcu); 547 } 548 } 549 EXPORT_SYMBOL(qdisc_put_stab); 550 551 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) 552 { 553 struct nlattr *nest; 554 555 nest = nla_nest_start_noflag(skb, TCA_STAB); 556 if (nest == NULL) 557 goto nla_put_failure; 558 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts)) 559 goto nla_put_failure; 560 nla_nest_end(skb, nest); 561 562 return skb->len; 563 564 nla_put_failure: 565 return -1; 566 } 567 568 void __qdisc_calculate_pkt_len(struct sk_buff *skb, 569 const struct qdisc_size_table *stab) 570 { 571 int pkt_len, slot; 572 573 pkt_len = skb->len + stab->szopts.overhead; 574 if (unlikely(!stab->szopts.tsize)) 575 goto out; 576 577 slot = pkt_len + stab->szopts.cell_align; 578 if (unlikely(slot < 0)) 579 slot = 0; 580 581 slot >>= stab->szopts.cell_log; 582 if (likely(slot < stab->szopts.tsize)) 583 pkt_len = stab->data[slot]; 584 else 585 pkt_len = stab->data[stab->szopts.tsize - 1] * 586 (slot / stab->szopts.tsize) + 587 stab->data[slot % stab->szopts.tsize]; 588 589 pkt_len <<= stab->szopts.size_log; 590 out: 591 if (unlikely(pkt_len < 1)) 592 pkt_len = 1; 593 qdisc_skb_cb(skb)->pkt_len = pkt_len; 594 } 595 EXPORT_SYMBOL(__qdisc_calculate_pkt_len); 596 597 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) 598 { 599 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { 600 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", 601 txt, qdisc->ops->id, qdisc->handle >> 16); 602 qdisc->flags |= TCQ_F_WARN_NONWC; 603 } 604 } 605 EXPORT_SYMBOL(qdisc_warn_nonwc); 606 607 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) 608 { 609 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, 610 timer); 611 612 rcu_read_lock(); 613 __netif_schedule(qdisc_root(wd->qdisc)); 614 rcu_read_unlock(); 615 616 return HRTIMER_NORESTART; 617 } 618 619 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc, 620 clockid_t clockid) 621 { 622 hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED); 623 wd->timer.function = qdisc_watchdog; 624 wd->qdisc = qdisc; 625 } 626 EXPORT_SYMBOL(qdisc_watchdog_init_clockid); 627 628 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) 629 { 630 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC); 631 } 632 EXPORT_SYMBOL(qdisc_watchdog_init); 633 634 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, 635 u64 delta_ns) 636 { 637 if (test_bit(__QDISC_STATE_DEACTIVATED, 638 &qdisc_root_sleeping(wd->qdisc)->state)) 639 return; 640 641 if (hrtimer_is_queued(&wd->timer)) { 642 /* If timer is already set in [expires, expires + delta_ns], 643 * do not reprogram it. 644 */ 645 if (wd->last_expires - expires <= delta_ns) 646 return; 647 } 648 649 wd->last_expires = expires; 650 hrtimer_start_range_ns(&wd->timer, 651 ns_to_ktime(expires), 652 delta_ns, 653 HRTIMER_MODE_ABS_PINNED); 654 } 655 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns); 656 657 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) 658 { 659 hrtimer_cancel(&wd->timer); 660 } 661 EXPORT_SYMBOL(qdisc_watchdog_cancel); 662 663 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) 664 { 665 struct hlist_head *h; 666 unsigned int i; 667 668 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL); 669 670 if (h != NULL) { 671 for (i = 0; i < n; i++) 672 INIT_HLIST_HEAD(&h[i]); 673 } 674 return h; 675 } 676 677 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) 678 { 679 struct Qdisc_class_common *cl; 680 struct hlist_node *next; 681 struct hlist_head *nhash, *ohash; 682 unsigned int nsize, nmask, osize; 683 unsigned int i, h; 684 685 /* Rehash when load factor exceeds 0.75 */ 686 if (clhash->hashelems * 4 <= clhash->hashsize * 3) 687 return; 688 nsize = clhash->hashsize * 2; 689 nmask = nsize - 1; 690 nhash = qdisc_class_hash_alloc(nsize); 691 if (nhash == NULL) 692 return; 693 694 ohash = clhash->hash; 695 osize = clhash->hashsize; 696 697 sch_tree_lock(sch); 698 for (i = 0; i < osize; i++) { 699 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) { 700 h = qdisc_class_hash(cl->classid, nmask); 701 hlist_add_head(&cl->hnode, &nhash[h]); 702 } 703 } 704 clhash->hash = nhash; 705 clhash->hashsize = nsize; 706 clhash->hashmask = nmask; 707 sch_tree_unlock(sch); 708 709 kvfree(ohash); 710 } 711 EXPORT_SYMBOL(qdisc_class_hash_grow); 712 713 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) 714 { 715 unsigned int size = 4; 716 717 clhash->hash = qdisc_class_hash_alloc(size); 718 if (!clhash->hash) 719 return -ENOMEM; 720 clhash->hashsize = size; 721 clhash->hashmask = size - 1; 722 clhash->hashelems = 0; 723 return 0; 724 } 725 EXPORT_SYMBOL(qdisc_class_hash_init); 726 727 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) 728 { 729 kvfree(clhash->hash); 730 } 731 EXPORT_SYMBOL(qdisc_class_hash_destroy); 732 733 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash, 734 struct Qdisc_class_common *cl) 735 { 736 unsigned int h; 737 738 INIT_HLIST_NODE(&cl->hnode); 739 h = qdisc_class_hash(cl->classid, clhash->hashmask); 740 hlist_add_head(&cl->hnode, &clhash->hash[h]); 741 clhash->hashelems++; 742 } 743 EXPORT_SYMBOL(qdisc_class_hash_insert); 744 745 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, 746 struct Qdisc_class_common *cl) 747 { 748 hlist_del(&cl->hnode); 749 clhash->hashelems--; 750 } 751 EXPORT_SYMBOL(qdisc_class_hash_remove); 752 753 /* Allocate an unique handle from space managed by kernel 754 * Possible range is [8000-FFFF]:0000 (0x8000 values) 755 */ 756 static u32 qdisc_alloc_handle(struct net_device *dev) 757 { 758 int i = 0x8000; 759 static u32 autohandle = TC_H_MAKE(0x80000000U, 0); 760 761 do { 762 autohandle += TC_H_MAKE(0x10000U, 0); 763 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0)) 764 autohandle = TC_H_MAKE(0x80000000U, 0); 765 if (!qdisc_lookup(dev, autohandle)) 766 return autohandle; 767 cond_resched(); 768 } while (--i > 0); 769 770 return 0; 771 } 772 773 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) 774 { 775 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; 776 const struct Qdisc_class_ops *cops; 777 unsigned long cl; 778 u32 parentid; 779 bool notify; 780 int drops; 781 782 if (n == 0 && len == 0) 783 return; 784 drops = max_t(int, n, 0); 785 rcu_read_lock(); 786 while ((parentid = sch->parent)) { 787 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) 788 break; 789 790 if (sch->flags & TCQ_F_NOPARENT) 791 break; 792 /* Notify parent qdisc only if child qdisc becomes empty. 793 * 794 * If child was empty even before update then backlog 795 * counter is screwed and we skip notification because 796 * parent class is already passive. 797 * 798 * If the original child was offloaded then it is allowed 799 * to be seem as empty, so the parent is notified anyway. 800 */ 801 notify = !sch->q.qlen && !WARN_ON_ONCE(!n && 802 !qdisc_is_offloaded); 803 /* TODO: perform the search on a per txq basis */ 804 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); 805 if (sch == NULL) { 806 WARN_ON_ONCE(parentid != TC_H_ROOT); 807 break; 808 } 809 cops = sch->ops->cl_ops; 810 if (notify && cops->qlen_notify) { 811 cl = cops->find(sch, parentid); 812 cops->qlen_notify(sch, cl); 813 } 814 sch->q.qlen -= n; 815 sch->qstats.backlog -= len; 816 __qdisc_qstats_drop(sch, drops); 817 } 818 rcu_read_unlock(); 819 } 820 EXPORT_SYMBOL(qdisc_tree_reduce_backlog); 821 822 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type, 823 void *type_data) 824 { 825 struct net_device *dev = qdisc_dev(sch); 826 int err; 827 828 sch->flags &= ~TCQ_F_OFFLOADED; 829 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 830 return 0; 831 832 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); 833 if (err == -EOPNOTSUPP) 834 return 0; 835 836 if (!err) 837 sch->flags |= TCQ_F_OFFLOADED; 838 839 return err; 840 } 841 EXPORT_SYMBOL(qdisc_offload_dump_helper); 842 843 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, 844 struct Qdisc *new, struct Qdisc *old, 845 enum tc_setup_type type, void *type_data, 846 struct netlink_ext_ack *extack) 847 { 848 bool any_qdisc_is_offloaded; 849 int err; 850 851 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 852 return; 853 854 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); 855 856 /* Don't report error if the graft is part of destroy operation. */ 857 if (!err || !new || new == &noop_qdisc) 858 return; 859 860 /* Don't report error if the parent, the old child and the new 861 * one are not offloaded. 862 */ 863 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED; 864 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED; 865 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED; 866 867 if (any_qdisc_is_offloaded) 868 NL_SET_ERR_MSG(extack, "Offloading graft operation failed."); 869 } 870 EXPORT_SYMBOL(qdisc_offload_graft_helper); 871 872 void qdisc_offload_query_caps(struct net_device *dev, 873 enum tc_setup_type type, 874 void *caps, size_t caps_len) 875 { 876 const struct net_device_ops *ops = dev->netdev_ops; 877 struct tc_query_caps_base base = { 878 .type = type, 879 .caps = caps, 880 }; 881 882 memset(caps, 0, caps_len); 883 884 if (ops->ndo_setup_tc) 885 ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base); 886 } 887 EXPORT_SYMBOL(qdisc_offload_query_caps); 888 889 static void qdisc_offload_graft_root(struct net_device *dev, 890 struct Qdisc *new, struct Qdisc *old, 891 struct netlink_ext_ack *extack) 892 { 893 struct tc_root_qopt_offload graft_offload = { 894 .command = TC_ROOT_GRAFT, 895 .handle = new ? new->handle : 0, 896 .ingress = (new && new->flags & TCQ_F_INGRESS) || 897 (old && old->flags & TCQ_F_INGRESS), 898 }; 899 900 qdisc_offload_graft_helper(dev, NULL, new, old, 901 TC_SETUP_ROOT_QDISC, &graft_offload, extack); 902 } 903 904 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, 905 u32 portid, u32 seq, u16 flags, int event) 906 { 907 struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL; 908 struct gnet_stats_queue __percpu *cpu_qstats = NULL; 909 struct tcmsg *tcm; 910 struct nlmsghdr *nlh; 911 unsigned char *b = skb_tail_pointer(skb); 912 struct gnet_dump d; 913 struct qdisc_size_table *stab; 914 u32 block_index; 915 __u32 qlen; 916 917 cond_resched(); 918 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 919 if (!nlh) 920 goto out_nlmsg_trim; 921 tcm = nlmsg_data(nlh); 922 tcm->tcm_family = AF_UNSPEC; 923 tcm->tcm__pad1 = 0; 924 tcm->tcm__pad2 = 0; 925 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 926 tcm->tcm_parent = clid; 927 tcm->tcm_handle = q->handle; 928 tcm->tcm_info = refcount_read(&q->refcnt); 929 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 930 goto nla_put_failure; 931 if (q->ops->ingress_block_get) { 932 block_index = q->ops->ingress_block_get(q); 933 if (block_index && 934 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index)) 935 goto nla_put_failure; 936 } 937 if (q->ops->egress_block_get) { 938 block_index = q->ops->egress_block_get(q); 939 if (block_index && 940 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index)) 941 goto nla_put_failure; 942 } 943 if (q->ops->dump && q->ops->dump(q, skb) < 0) 944 goto nla_put_failure; 945 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED))) 946 goto nla_put_failure; 947 qlen = qdisc_qlen_sum(q); 948 949 stab = rtnl_dereference(q->stab); 950 if (stab && qdisc_dump_stab(skb, stab) < 0) 951 goto nla_put_failure; 952 953 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 954 NULL, &d, TCA_PAD) < 0) 955 goto nla_put_failure; 956 957 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) 958 goto nla_put_failure; 959 960 if (qdisc_is_percpu_stats(q)) { 961 cpu_bstats = q->cpu_bstats; 962 cpu_qstats = q->cpu_qstats; 963 } 964 965 if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 || 966 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || 967 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) 968 goto nla_put_failure; 969 970 if (gnet_stats_finish_copy(&d) < 0) 971 goto nla_put_failure; 972 973 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 974 return skb->len; 975 976 out_nlmsg_trim: 977 nla_put_failure: 978 nlmsg_trim(skb, b); 979 return -1; 980 } 981 982 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) 983 { 984 if (q->flags & TCQ_F_BUILTIN) 985 return true; 986 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) 987 return true; 988 989 return false; 990 } 991 992 static int qdisc_notify(struct net *net, struct sk_buff *oskb, 993 struct nlmsghdr *n, u32 clid, 994 struct Qdisc *old, struct Qdisc *new) 995 { 996 struct sk_buff *skb; 997 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 998 999 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1000 if (!skb) 1001 return -ENOBUFS; 1002 1003 if (old && !tc_qdisc_dump_ignore(old, false)) { 1004 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 1005 0, RTM_DELQDISC) < 0) 1006 goto err_out; 1007 } 1008 if (new && !tc_qdisc_dump_ignore(new, false)) { 1009 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, 1010 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) 1011 goto err_out; 1012 } 1013 1014 if (skb->len) 1015 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1016 n->nlmsg_flags & NLM_F_ECHO); 1017 1018 err_out: 1019 kfree_skb(skb); 1020 return -EINVAL; 1021 } 1022 1023 static void notify_and_destroy(struct net *net, struct sk_buff *skb, 1024 struct nlmsghdr *n, u32 clid, 1025 struct Qdisc *old, struct Qdisc *new) 1026 { 1027 if (new || old) 1028 qdisc_notify(net, skb, n, clid, old, new); 1029 1030 if (old) 1031 qdisc_put(old); 1032 } 1033 1034 static void qdisc_clear_nolock(struct Qdisc *sch) 1035 { 1036 sch->flags &= ~TCQ_F_NOLOCK; 1037 if (!(sch->flags & TCQ_F_CPUSTATS)) 1038 return; 1039 1040 free_percpu(sch->cpu_bstats); 1041 free_percpu(sch->cpu_qstats); 1042 sch->cpu_bstats = NULL; 1043 sch->cpu_qstats = NULL; 1044 sch->flags &= ~TCQ_F_CPUSTATS; 1045 } 1046 1047 /* Graft qdisc "new" to class "classid" of qdisc "parent" or 1048 * to device "dev". 1049 * 1050 * When appropriate send a netlink notification using 'skb' 1051 * and "n". 1052 * 1053 * On success, destroy old qdisc. 1054 */ 1055 1056 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, 1057 struct sk_buff *skb, struct nlmsghdr *n, u32 classid, 1058 struct Qdisc *new, struct Qdisc *old, 1059 struct netlink_ext_ack *extack) 1060 { 1061 struct Qdisc *q = old; 1062 struct net *net = dev_net(dev); 1063 1064 if (parent == NULL) { 1065 unsigned int i, num_q, ingress; 1066 1067 ingress = 0; 1068 num_q = dev->num_tx_queues; 1069 if ((q && q->flags & TCQ_F_INGRESS) || 1070 (new && new->flags & TCQ_F_INGRESS)) { 1071 num_q = 1; 1072 ingress = 1; 1073 if (!dev_ingress_queue(dev)) { 1074 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); 1075 return -ENOENT; 1076 } 1077 } 1078 1079 if (dev->flags & IFF_UP) 1080 dev_deactivate(dev); 1081 1082 qdisc_offload_graft_root(dev, new, old, extack); 1083 1084 if (new && new->ops->attach && !ingress) 1085 goto skip; 1086 1087 for (i = 0; i < num_q; i++) { 1088 struct netdev_queue *dev_queue = dev_ingress_queue(dev); 1089 1090 if (!ingress) 1091 dev_queue = netdev_get_tx_queue(dev, i); 1092 1093 old = dev_graft_qdisc(dev_queue, new); 1094 if (new && i > 0) 1095 qdisc_refcount_inc(new); 1096 1097 if (!ingress) 1098 qdisc_put(old); 1099 } 1100 1101 skip: 1102 if (!ingress) { 1103 old = rtnl_dereference(dev->qdisc); 1104 if (new && !new->ops->attach) 1105 qdisc_refcount_inc(new); 1106 rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc); 1107 1108 notify_and_destroy(net, skb, n, classid, old, new); 1109 1110 if (new && new->ops->attach) 1111 new->ops->attach(new); 1112 } else { 1113 notify_and_destroy(net, skb, n, classid, old, new); 1114 } 1115 1116 if (dev->flags & IFF_UP) 1117 dev_activate(dev); 1118 } else { 1119 const struct Qdisc_class_ops *cops = parent->ops->cl_ops; 1120 unsigned long cl; 1121 int err; 1122 1123 /* Only support running class lockless if parent is lockless */ 1124 if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK)) 1125 qdisc_clear_nolock(new); 1126 1127 if (!cops || !cops->graft) 1128 return -EOPNOTSUPP; 1129 1130 cl = cops->find(parent, classid); 1131 if (!cl) { 1132 NL_SET_ERR_MSG(extack, "Specified class not found"); 1133 return -ENOENT; 1134 } 1135 1136 err = cops->graft(parent, cl, new, &old, extack); 1137 if (err) 1138 return err; 1139 notify_and_destroy(net, skb, n, classid, old, new); 1140 } 1141 return 0; 1142 } 1143 1144 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca, 1145 struct netlink_ext_ack *extack) 1146 { 1147 u32 block_index; 1148 1149 if (tca[TCA_INGRESS_BLOCK]) { 1150 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]); 1151 1152 if (!block_index) { 1153 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0"); 1154 return -EINVAL; 1155 } 1156 if (!sch->ops->ingress_block_set) { 1157 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported"); 1158 return -EOPNOTSUPP; 1159 } 1160 sch->ops->ingress_block_set(sch, block_index); 1161 } 1162 if (tca[TCA_EGRESS_BLOCK]) { 1163 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]); 1164 1165 if (!block_index) { 1166 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0"); 1167 return -EINVAL; 1168 } 1169 if (!sch->ops->egress_block_set) { 1170 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported"); 1171 return -EOPNOTSUPP; 1172 } 1173 sch->ops->egress_block_set(sch, block_index); 1174 } 1175 return 0; 1176 } 1177 1178 /* 1179 Allocate and initialize new qdisc. 1180 1181 Parameters are passed via opt. 1182 */ 1183 1184 static struct Qdisc *qdisc_create(struct net_device *dev, 1185 struct netdev_queue *dev_queue, 1186 u32 parent, u32 handle, 1187 struct nlattr **tca, int *errp, 1188 struct netlink_ext_ack *extack) 1189 { 1190 int err; 1191 struct nlattr *kind = tca[TCA_KIND]; 1192 struct Qdisc *sch; 1193 struct Qdisc_ops *ops; 1194 struct qdisc_size_table *stab; 1195 1196 ops = qdisc_lookup_ops(kind); 1197 #ifdef CONFIG_MODULES 1198 if (ops == NULL && kind != NULL) { 1199 char name[IFNAMSIZ]; 1200 if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) { 1201 /* We dropped the RTNL semaphore in order to 1202 * perform the module load. So, even if we 1203 * succeeded in loading the module we have to 1204 * tell the caller to replay the request. We 1205 * indicate this using -EAGAIN. 1206 * We replay the request because the device may 1207 * go away in the mean time. 1208 */ 1209 rtnl_unlock(); 1210 request_module("sch_%s", name); 1211 rtnl_lock(); 1212 ops = qdisc_lookup_ops(kind); 1213 if (ops != NULL) { 1214 /* We will try again qdisc_lookup_ops, 1215 * so don't keep a reference. 1216 */ 1217 module_put(ops->owner); 1218 err = -EAGAIN; 1219 goto err_out; 1220 } 1221 } 1222 } 1223 #endif 1224 1225 err = -ENOENT; 1226 if (!ops) { 1227 NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); 1228 goto err_out; 1229 } 1230 1231 sch = qdisc_alloc(dev_queue, ops, extack); 1232 if (IS_ERR(sch)) { 1233 err = PTR_ERR(sch); 1234 goto err_out2; 1235 } 1236 1237 sch->parent = parent; 1238 1239 if (handle == TC_H_INGRESS) { 1240 sch->flags |= TCQ_F_INGRESS; 1241 handle = TC_H_MAKE(TC_H_INGRESS, 0); 1242 } else { 1243 if (handle == 0) { 1244 handle = qdisc_alloc_handle(dev); 1245 if (handle == 0) { 1246 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded"); 1247 err = -ENOSPC; 1248 goto err_out3; 1249 } 1250 } 1251 if (!netif_is_multiqueue(dev)) 1252 sch->flags |= TCQ_F_ONETXQUEUE; 1253 } 1254 1255 sch->handle = handle; 1256 1257 /* This exist to keep backward compatible with a userspace 1258 * loophole, what allowed userspace to get IFF_NO_QUEUE 1259 * facility on older kernels by setting tx_queue_len=0 (prior 1260 * to qdisc init), and then forgot to reinit tx_queue_len 1261 * before again attaching a qdisc. 1262 */ 1263 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { 1264 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; 1265 netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); 1266 } 1267 1268 err = qdisc_block_indexes_set(sch, tca, extack); 1269 if (err) 1270 goto err_out3; 1271 1272 if (ops->init) { 1273 err = ops->init(sch, tca[TCA_OPTIONS], extack); 1274 if (err != 0) 1275 goto err_out5; 1276 } 1277 1278 if (tca[TCA_STAB]) { 1279 stab = qdisc_get_stab(tca[TCA_STAB], extack); 1280 if (IS_ERR(stab)) { 1281 err = PTR_ERR(stab); 1282 goto err_out4; 1283 } 1284 rcu_assign_pointer(sch->stab, stab); 1285 } 1286 if (tca[TCA_RATE]) { 1287 err = -EOPNOTSUPP; 1288 if (sch->flags & TCQ_F_MQROOT) { 1289 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); 1290 goto err_out4; 1291 } 1292 1293 err = gen_new_estimator(&sch->bstats, 1294 sch->cpu_bstats, 1295 &sch->rate_est, 1296 NULL, 1297 true, 1298 tca[TCA_RATE]); 1299 if (err) { 1300 NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); 1301 goto err_out4; 1302 } 1303 } 1304 1305 qdisc_hash_add(sch, false); 1306 trace_qdisc_create(ops, dev, parent); 1307 1308 return sch; 1309 1310 err_out5: 1311 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ 1312 if (ops->destroy) 1313 ops->destroy(sch); 1314 err_out3: 1315 netdev_put(dev, &sch->dev_tracker); 1316 qdisc_free(sch); 1317 err_out2: 1318 module_put(ops->owner); 1319 err_out: 1320 *errp = err; 1321 return NULL; 1322 1323 err_out4: 1324 /* 1325 * Any broken qdiscs that would require a ops->reset() here? 1326 * The qdisc was never in action so it shouldn't be necessary. 1327 */ 1328 qdisc_put_stab(rtnl_dereference(sch->stab)); 1329 if (ops->destroy) 1330 ops->destroy(sch); 1331 goto err_out3; 1332 } 1333 1334 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, 1335 struct netlink_ext_ack *extack) 1336 { 1337 struct qdisc_size_table *ostab, *stab = NULL; 1338 int err = 0; 1339 1340 if (tca[TCA_OPTIONS]) { 1341 if (!sch->ops->change) { 1342 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc"); 1343 return -EINVAL; 1344 } 1345 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { 1346 NL_SET_ERR_MSG(extack, "Change of blocks is not supported"); 1347 return -EOPNOTSUPP; 1348 } 1349 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack); 1350 if (err) 1351 return err; 1352 } 1353 1354 if (tca[TCA_STAB]) { 1355 stab = qdisc_get_stab(tca[TCA_STAB], extack); 1356 if (IS_ERR(stab)) 1357 return PTR_ERR(stab); 1358 } 1359 1360 ostab = rtnl_dereference(sch->stab); 1361 rcu_assign_pointer(sch->stab, stab); 1362 qdisc_put_stab(ostab); 1363 1364 if (tca[TCA_RATE]) { 1365 /* NB: ignores errors from replace_estimator 1366 because change can't be undone. */ 1367 if (sch->flags & TCQ_F_MQROOT) 1368 goto out; 1369 gen_replace_estimator(&sch->bstats, 1370 sch->cpu_bstats, 1371 &sch->rate_est, 1372 NULL, 1373 true, 1374 tca[TCA_RATE]); 1375 } 1376 out: 1377 return 0; 1378 } 1379 1380 struct check_loop_arg { 1381 struct qdisc_walker w; 1382 struct Qdisc *p; 1383 int depth; 1384 }; 1385 1386 static int check_loop_fn(struct Qdisc *q, unsigned long cl, 1387 struct qdisc_walker *w); 1388 1389 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) 1390 { 1391 struct check_loop_arg arg; 1392 1393 if (q->ops->cl_ops == NULL) 1394 return 0; 1395 1396 arg.w.stop = arg.w.skip = arg.w.count = 0; 1397 arg.w.fn = check_loop_fn; 1398 arg.depth = depth; 1399 arg.p = p; 1400 q->ops->cl_ops->walk(q, &arg.w); 1401 return arg.w.stop ? -ELOOP : 0; 1402 } 1403 1404 static int 1405 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) 1406 { 1407 struct Qdisc *leaf; 1408 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1409 struct check_loop_arg *arg = (struct check_loop_arg *)w; 1410 1411 leaf = cops->leaf(q, cl); 1412 if (leaf) { 1413 if (leaf == arg->p || arg->depth > 7) 1414 return -ELOOP; 1415 return check_loop(leaf, arg->p, arg->depth + 1); 1416 } 1417 return 0; 1418 } 1419 1420 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { 1421 [TCA_KIND] = { .type = NLA_STRING }, 1422 [TCA_RATE] = { .type = NLA_BINARY, 1423 .len = sizeof(struct tc_estimator) }, 1424 [TCA_STAB] = { .type = NLA_NESTED }, 1425 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG }, 1426 [TCA_CHAIN] = { .type = NLA_U32 }, 1427 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 }, 1428 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 }, 1429 }; 1430 1431 /* 1432 * Delete/get qdisc. 1433 */ 1434 1435 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1436 struct netlink_ext_ack *extack) 1437 { 1438 struct net *net = sock_net(skb->sk); 1439 struct tcmsg *tcm = nlmsg_data(n); 1440 struct nlattr *tca[TCA_MAX + 1]; 1441 struct net_device *dev; 1442 u32 clid; 1443 struct Qdisc *q = NULL; 1444 struct Qdisc *p = NULL; 1445 int err; 1446 1447 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, 1448 rtm_tca_policy, extack); 1449 if (err < 0) 1450 return err; 1451 1452 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1453 if (!dev) 1454 return -ENODEV; 1455 1456 clid = tcm->tcm_parent; 1457 if (clid) { 1458 if (clid != TC_H_ROOT) { 1459 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 1460 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1461 if (!p) { 1462 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); 1463 return -ENOENT; 1464 } 1465 q = qdisc_leaf(p, clid); 1466 } else if (dev_ingress_queue(dev)) { 1467 q = dev_ingress_queue(dev)->qdisc_sleeping; 1468 } 1469 } else { 1470 q = rtnl_dereference(dev->qdisc); 1471 } 1472 if (!q) { 1473 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); 1474 return -ENOENT; 1475 } 1476 1477 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { 1478 NL_SET_ERR_MSG(extack, "Invalid handle"); 1479 return -EINVAL; 1480 } 1481 } else { 1482 q = qdisc_lookup(dev, tcm->tcm_handle); 1483 if (!q) { 1484 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle"); 1485 return -ENOENT; 1486 } 1487 } 1488 1489 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1490 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1491 return -EINVAL; 1492 } 1493 1494 if (n->nlmsg_type == RTM_DELQDISC) { 1495 if (!clid) { 1496 NL_SET_ERR_MSG(extack, "Classid cannot be zero"); 1497 return -EINVAL; 1498 } 1499 if (q->handle == 0) { 1500 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero"); 1501 return -ENOENT; 1502 } 1503 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack); 1504 if (err != 0) 1505 return err; 1506 } else { 1507 qdisc_notify(net, skb, n, clid, NULL, q); 1508 } 1509 return 0; 1510 } 1511 1512 /* 1513 * Create/change qdisc. 1514 */ 1515 1516 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1517 struct netlink_ext_ack *extack) 1518 { 1519 struct net *net = sock_net(skb->sk); 1520 struct tcmsg *tcm; 1521 struct nlattr *tca[TCA_MAX + 1]; 1522 struct net_device *dev; 1523 u32 clid; 1524 struct Qdisc *q, *p; 1525 int err; 1526 1527 replay: 1528 /* Reinit, just in case something touches this. */ 1529 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, 1530 rtm_tca_policy, extack); 1531 if (err < 0) 1532 return err; 1533 1534 tcm = nlmsg_data(n); 1535 clid = tcm->tcm_parent; 1536 q = p = NULL; 1537 1538 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1539 if (!dev) 1540 return -ENODEV; 1541 1542 1543 if (clid) { 1544 if (clid != TC_H_ROOT) { 1545 if (clid != TC_H_INGRESS) { 1546 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1547 if (!p) { 1548 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); 1549 return -ENOENT; 1550 } 1551 q = qdisc_leaf(p, clid); 1552 } else if (dev_ingress_queue_create(dev)) { 1553 q = dev_ingress_queue(dev)->qdisc_sleeping; 1554 } 1555 } else { 1556 q = rtnl_dereference(dev->qdisc); 1557 } 1558 1559 /* It may be default qdisc, ignore it */ 1560 if (q && q->handle == 0) 1561 q = NULL; 1562 1563 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { 1564 if (tcm->tcm_handle) { 1565 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) { 1566 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override"); 1567 return -EEXIST; 1568 } 1569 if (TC_H_MIN(tcm->tcm_handle)) { 1570 NL_SET_ERR_MSG(extack, "Invalid minor handle"); 1571 return -EINVAL; 1572 } 1573 q = qdisc_lookup(dev, tcm->tcm_handle); 1574 if (!q) 1575 goto create_n_graft; 1576 if (n->nlmsg_flags & NLM_F_EXCL) { 1577 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override"); 1578 return -EEXIST; 1579 } 1580 if (tca[TCA_KIND] && 1581 nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1582 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1583 return -EINVAL; 1584 } 1585 if (q == p || 1586 (p && check_loop(q, p, 0))) { 1587 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected"); 1588 return -ELOOP; 1589 } 1590 qdisc_refcount_inc(q); 1591 goto graft; 1592 } else { 1593 if (!q) 1594 goto create_n_graft; 1595 1596 /* This magic test requires explanation. 1597 * 1598 * We know, that some child q is already 1599 * attached to this parent and have choice: 1600 * either to change it or to create/graft new one. 1601 * 1602 * 1. We are allowed to create/graft only 1603 * if CREATE and REPLACE flags are set. 1604 * 1605 * 2. If EXCL is set, requestor wanted to say, 1606 * that qdisc tcm_handle is not expected 1607 * to exist, so that we choose create/graft too. 1608 * 1609 * 3. The last case is when no flags are set. 1610 * Alas, it is sort of hole in API, we 1611 * cannot decide what to do unambiguously. 1612 * For now we select create/graft, if 1613 * user gave KIND, which does not match existing. 1614 */ 1615 if ((n->nlmsg_flags & NLM_F_CREATE) && 1616 (n->nlmsg_flags & NLM_F_REPLACE) && 1617 ((n->nlmsg_flags & NLM_F_EXCL) || 1618 (tca[TCA_KIND] && 1619 nla_strcmp(tca[TCA_KIND], q->ops->id)))) 1620 goto create_n_graft; 1621 } 1622 } 1623 } else { 1624 if (!tcm->tcm_handle) { 1625 NL_SET_ERR_MSG(extack, "Handle cannot be zero"); 1626 return -EINVAL; 1627 } 1628 q = qdisc_lookup(dev, tcm->tcm_handle); 1629 } 1630 1631 /* Change qdisc parameters */ 1632 if (!q) { 1633 NL_SET_ERR_MSG(extack, "Specified qdisc not found"); 1634 return -ENOENT; 1635 } 1636 if (n->nlmsg_flags & NLM_F_EXCL) { 1637 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify"); 1638 return -EEXIST; 1639 } 1640 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1641 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1642 return -EINVAL; 1643 } 1644 err = qdisc_change(q, tca, extack); 1645 if (err == 0) 1646 qdisc_notify(net, skb, n, clid, NULL, q); 1647 return err; 1648 1649 create_n_graft: 1650 if (!(n->nlmsg_flags & NLM_F_CREATE)) { 1651 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag"); 1652 return -ENOENT; 1653 } 1654 if (clid == TC_H_INGRESS) { 1655 if (dev_ingress_queue(dev)) { 1656 q = qdisc_create(dev, dev_ingress_queue(dev), 1657 tcm->tcm_parent, tcm->tcm_parent, 1658 tca, &err, extack); 1659 } else { 1660 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device"); 1661 err = -ENOENT; 1662 } 1663 } else { 1664 struct netdev_queue *dev_queue; 1665 1666 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) 1667 dev_queue = p->ops->cl_ops->select_queue(p, tcm); 1668 else if (p) 1669 dev_queue = p->dev_queue; 1670 else 1671 dev_queue = netdev_get_tx_queue(dev, 0); 1672 1673 q = qdisc_create(dev, dev_queue, 1674 tcm->tcm_parent, tcm->tcm_handle, 1675 tca, &err, extack); 1676 } 1677 if (q == NULL) { 1678 if (err == -EAGAIN) 1679 goto replay; 1680 return err; 1681 } 1682 1683 graft: 1684 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); 1685 if (err) { 1686 if (q) 1687 qdisc_put(q); 1688 return err; 1689 } 1690 1691 return 0; 1692 } 1693 1694 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, 1695 struct netlink_callback *cb, 1696 int *q_idx_p, int s_q_idx, bool recur, 1697 bool dump_invisible) 1698 { 1699 int ret = 0, q_idx = *q_idx_p; 1700 struct Qdisc *q; 1701 int b; 1702 1703 if (!root) 1704 return 0; 1705 1706 q = root; 1707 if (q_idx < s_q_idx) { 1708 q_idx++; 1709 } else { 1710 if (!tc_qdisc_dump_ignore(q, dump_invisible) && 1711 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, 1712 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1713 RTM_NEWQDISC) <= 0) 1714 goto done; 1715 q_idx++; 1716 } 1717 1718 /* If dumping singletons, there is no qdisc_dev(root) and the singleton 1719 * itself has already been dumped. 1720 * 1721 * If we've already dumped the top-level (ingress) qdisc above and the global 1722 * qdisc hashtable, we don't want to hit it again 1723 */ 1724 if (!qdisc_dev(root) || !recur) 1725 goto out; 1726 1727 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { 1728 if (q_idx < s_q_idx) { 1729 q_idx++; 1730 continue; 1731 } 1732 if (!tc_qdisc_dump_ignore(q, dump_invisible) && 1733 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, 1734 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1735 RTM_NEWQDISC) <= 0) 1736 goto done; 1737 q_idx++; 1738 } 1739 1740 out: 1741 *q_idx_p = q_idx; 1742 return ret; 1743 done: 1744 ret = -1; 1745 goto out; 1746 } 1747 1748 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) 1749 { 1750 struct net *net = sock_net(skb->sk); 1751 int idx, q_idx; 1752 int s_idx, s_q_idx; 1753 struct net_device *dev; 1754 const struct nlmsghdr *nlh = cb->nlh; 1755 struct nlattr *tca[TCA_MAX + 1]; 1756 int err; 1757 1758 s_idx = cb->args[0]; 1759 s_q_idx = q_idx = cb->args[1]; 1760 1761 idx = 0; 1762 ASSERT_RTNL(); 1763 1764 err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX, 1765 rtm_tca_policy, cb->extack); 1766 if (err < 0) 1767 return err; 1768 1769 for_each_netdev(net, dev) { 1770 struct netdev_queue *dev_queue; 1771 1772 if (idx < s_idx) 1773 goto cont; 1774 if (idx > s_idx) 1775 s_q_idx = 0; 1776 q_idx = 0; 1777 1778 if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc), 1779 skb, cb, &q_idx, s_q_idx, 1780 true, tca[TCA_DUMP_INVISIBLE]) < 0) 1781 goto done; 1782 1783 dev_queue = dev_ingress_queue(dev); 1784 if (dev_queue && 1785 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, 1786 &q_idx, s_q_idx, false, 1787 tca[TCA_DUMP_INVISIBLE]) < 0) 1788 goto done; 1789 1790 cont: 1791 idx++; 1792 } 1793 1794 done: 1795 cb->args[0] = idx; 1796 cb->args[1] = q_idx; 1797 1798 return skb->len; 1799 } 1800 1801 1802 1803 /************************************************ 1804 * Traffic classes manipulation. * 1805 ************************************************/ 1806 1807 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, 1808 unsigned long cl, 1809 u32 portid, u32 seq, u16 flags, int event) 1810 { 1811 struct tcmsg *tcm; 1812 struct nlmsghdr *nlh; 1813 unsigned char *b = skb_tail_pointer(skb); 1814 struct gnet_dump d; 1815 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; 1816 1817 cond_resched(); 1818 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 1819 if (!nlh) 1820 goto out_nlmsg_trim; 1821 tcm = nlmsg_data(nlh); 1822 tcm->tcm_family = AF_UNSPEC; 1823 tcm->tcm__pad1 = 0; 1824 tcm->tcm__pad2 = 0; 1825 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 1826 tcm->tcm_parent = q->handle; 1827 tcm->tcm_handle = q->handle; 1828 tcm->tcm_info = 0; 1829 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 1830 goto nla_put_failure; 1831 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) 1832 goto nla_put_failure; 1833 1834 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 1835 NULL, &d, TCA_PAD) < 0) 1836 goto nla_put_failure; 1837 1838 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) 1839 goto nla_put_failure; 1840 1841 if (gnet_stats_finish_copy(&d) < 0) 1842 goto nla_put_failure; 1843 1844 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1845 return skb->len; 1846 1847 out_nlmsg_trim: 1848 nla_put_failure: 1849 nlmsg_trim(skb, b); 1850 return -1; 1851 } 1852 1853 static int tclass_notify(struct net *net, struct sk_buff *oskb, 1854 struct nlmsghdr *n, struct Qdisc *q, 1855 unsigned long cl, int event) 1856 { 1857 struct sk_buff *skb; 1858 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1859 1860 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1861 if (!skb) 1862 return -ENOBUFS; 1863 1864 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { 1865 kfree_skb(skb); 1866 return -EINVAL; 1867 } 1868 1869 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1870 n->nlmsg_flags & NLM_F_ECHO); 1871 } 1872 1873 static int tclass_del_notify(struct net *net, 1874 const struct Qdisc_class_ops *cops, 1875 struct sk_buff *oskb, struct nlmsghdr *n, 1876 struct Qdisc *q, unsigned long cl, 1877 struct netlink_ext_ack *extack) 1878 { 1879 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1880 struct sk_buff *skb; 1881 int err = 0; 1882 1883 if (!cops->delete) 1884 return -EOPNOTSUPP; 1885 1886 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1887 if (!skb) 1888 return -ENOBUFS; 1889 1890 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, 1891 RTM_DELTCLASS) < 0) { 1892 kfree_skb(skb); 1893 return -EINVAL; 1894 } 1895 1896 err = cops->delete(q, cl, extack); 1897 if (err) { 1898 kfree_skb(skb); 1899 return err; 1900 } 1901 1902 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1903 n->nlmsg_flags & NLM_F_ECHO); 1904 return err; 1905 } 1906 1907 #ifdef CONFIG_NET_CLS 1908 1909 struct tcf_bind_args { 1910 struct tcf_walker w; 1911 unsigned long base; 1912 unsigned long cl; 1913 u32 classid; 1914 }; 1915 1916 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) 1917 { 1918 struct tcf_bind_args *a = (void *)arg; 1919 1920 if (n && tp->ops->bind_class) { 1921 struct Qdisc *q = tcf_block_q(tp->chain->block); 1922 1923 sch_tree_lock(q); 1924 tp->ops->bind_class(n, a->classid, a->cl, q, a->base); 1925 sch_tree_unlock(q); 1926 } 1927 return 0; 1928 } 1929 1930 struct tc_bind_class_args { 1931 struct qdisc_walker w; 1932 unsigned long new_cl; 1933 u32 portid; 1934 u32 clid; 1935 }; 1936 1937 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl, 1938 struct qdisc_walker *w) 1939 { 1940 struct tc_bind_class_args *a = (struct tc_bind_class_args *)w; 1941 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1942 struct tcf_block *block; 1943 struct tcf_chain *chain; 1944 1945 block = cops->tcf_block(q, cl, NULL); 1946 if (!block) 1947 return 0; 1948 for (chain = tcf_get_next_chain(block, NULL); 1949 chain; 1950 chain = tcf_get_next_chain(block, chain)) { 1951 struct tcf_proto *tp; 1952 1953 for (tp = tcf_get_next_proto(chain, NULL); 1954 tp; tp = tcf_get_next_proto(chain, tp)) { 1955 struct tcf_bind_args arg = {}; 1956 1957 arg.w.fn = tcf_node_bind; 1958 arg.classid = a->clid; 1959 arg.base = cl; 1960 arg.cl = a->new_cl; 1961 tp->ops->walk(tp, &arg.w, true); 1962 } 1963 } 1964 1965 return 0; 1966 } 1967 1968 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, 1969 unsigned long new_cl) 1970 { 1971 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1972 struct tc_bind_class_args args = {}; 1973 1974 if (!cops->tcf_block) 1975 return; 1976 args.portid = portid; 1977 args.clid = clid; 1978 args.new_cl = new_cl; 1979 args.w.fn = tc_bind_class_walker; 1980 q->ops->cl_ops->walk(q, &args.w); 1981 } 1982 1983 #else 1984 1985 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, 1986 unsigned long new_cl) 1987 { 1988 } 1989 1990 #endif 1991 1992 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, 1993 struct netlink_ext_ack *extack) 1994 { 1995 struct net *net = sock_net(skb->sk); 1996 struct tcmsg *tcm = nlmsg_data(n); 1997 struct nlattr *tca[TCA_MAX + 1]; 1998 struct net_device *dev; 1999 struct Qdisc *q = NULL; 2000 const struct Qdisc_class_ops *cops; 2001 unsigned long cl = 0; 2002 unsigned long new_cl; 2003 u32 portid; 2004 u32 clid; 2005 u32 qid; 2006 int err; 2007 2008 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, 2009 rtm_tca_policy, extack); 2010 if (err < 0) 2011 return err; 2012 2013 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 2014 if (!dev) 2015 return -ENODEV; 2016 2017 /* 2018 parent == TC_H_UNSPEC - unspecified parent. 2019 parent == TC_H_ROOT - class is root, which has no parent. 2020 parent == X:0 - parent is root class. 2021 parent == X:Y - parent is a node in hierarchy. 2022 parent == 0:Y - parent is X:Y, where X:0 is qdisc. 2023 2024 handle == 0:0 - generate handle from kernel pool. 2025 handle == 0:Y - class is X:Y, where X:0 is qdisc. 2026 handle == X:Y - clear. 2027 handle == X:0 - root class. 2028 */ 2029 2030 /* Step 1. Determine qdisc handle X:0 */ 2031 2032 portid = tcm->tcm_parent; 2033 clid = tcm->tcm_handle; 2034 qid = TC_H_MAJ(clid); 2035 2036 if (portid != TC_H_ROOT) { 2037 u32 qid1 = TC_H_MAJ(portid); 2038 2039 if (qid && qid1) { 2040 /* If both majors are known, they must be identical. */ 2041 if (qid != qid1) 2042 return -EINVAL; 2043 } else if (qid1) { 2044 qid = qid1; 2045 } else if (qid == 0) 2046 qid = rtnl_dereference(dev->qdisc)->handle; 2047 2048 /* Now qid is genuine qdisc handle consistent 2049 * both with parent and child. 2050 * 2051 * TC_H_MAJ(portid) still may be unspecified, complete it now. 2052 */ 2053 if (portid) 2054 portid = TC_H_MAKE(qid, portid); 2055 } else { 2056 if (qid == 0) 2057 qid = rtnl_dereference(dev->qdisc)->handle; 2058 } 2059 2060 /* OK. Locate qdisc */ 2061 q = qdisc_lookup(dev, qid); 2062 if (!q) 2063 return -ENOENT; 2064 2065 /* An check that it supports classes */ 2066 cops = q->ops->cl_ops; 2067 if (cops == NULL) 2068 return -EINVAL; 2069 2070 /* Now try to get class */ 2071 if (clid == 0) { 2072 if (portid == TC_H_ROOT) 2073 clid = qid; 2074 } else 2075 clid = TC_H_MAKE(qid, clid); 2076 2077 if (clid) 2078 cl = cops->find(q, clid); 2079 2080 if (cl == 0) { 2081 err = -ENOENT; 2082 if (n->nlmsg_type != RTM_NEWTCLASS || 2083 !(n->nlmsg_flags & NLM_F_CREATE)) 2084 goto out; 2085 } else { 2086 switch (n->nlmsg_type) { 2087 case RTM_NEWTCLASS: 2088 err = -EEXIST; 2089 if (n->nlmsg_flags & NLM_F_EXCL) 2090 goto out; 2091 break; 2092 case RTM_DELTCLASS: 2093 err = tclass_del_notify(net, cops, skb, n, q, cl, extack); 2094 /* Unbind the class with flilters with 0 */ 2095 tc_bind_tclass(q, portid, clid, 0); 2096 goto out; 2097 case RTM_GETTCLASS: 2098 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); 2099 goto out; 2100 default: 2101 err = -EINVAL; 2102 goto out; 2103 } 2104 } 2105 2106 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { 2107 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes"); 2108 return -EOPNOTSUPP; 2109 } 2110 2111 new_cl = cl; 2112 err = -EOPNOTSUPP; 2113 if (cops->change) 2114 err = cops->change(q, clid, portid, tca, &new_cl, extack); 2115 if (err == 0) { 2116 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); 2117 /* We just create a new class, need to do reverse binding. */ 2118 if (cl != new_cl) 2119 tc_bind_tclass(q, portid, clid, new_cl); 2120 } 2121 out: 2122 return err; 2123 } 2124 2125 struct qdisc_dump_args { 2126 struct qdisc_walker w; 2127 struct sk_buff *skb; 2128 struct netlink_callback *cb; 2129 }; 2130 2131 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, 2132 struct qdisc_walker *arg) 2133 { 2134 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; 2135 2136 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, 2137 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, 2138 RTM_NEWTCLASS); 2139 } 2140 2141 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, 2142 struct tcmsg *tcm, struct netlink_callback *cb, 2143 int *t_p, int s_t) 2144 { 2145 struct qdisc_dump_args arg; 2146 2147 if (tc_qdisc_dump_ignore(q, false) || 2148 *t_p < s_t || !q->ops->cl_ops || 2149 (tcm->tcm_parent && 2150 TC_H_MAJ(tcm->tcm_parent) != q->handle)) { 2151 (*t_p)++; 2152 return 0; 2153 } 2154 if (*t_p > s_t) 2155 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); 2156 arg.w.fn = qdisc_class_dump; 2157 arg.skb = skb; 2158 arg.cb = cb; 2159 arg.w.stop = 0; 2160 arg.w.skip = cb->args[1]; 2161 arg.w.count = 0; 2162 q->ops->cl_ops->walk(q, &arg.w); 2163 cb->args[1] = arg.w.count; 2164 if (arg.w.stop) 2165 return -1; 2166 (*t_p)++; 2167 return 0; 2168 } 2169 2170 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, 2171 struct tcmsg *tcm, struct netlink_callback *cb, 2172 int *t_p, int s_t, bool recur) 2173 { 2174 struct Qdisc *q; 2175 int b; 2176 2177 if (!root) 2178 return 0; 2179 2180 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) 2181 return -1; 2182 2183 if (!qdisc_dev(root) || !recur) 2184 return 0; 2185 2186 if (tcm->tcm_parent) { 2187 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); 2188 if (q && q != root && 2189 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) 2190 return -1; 2191 return 0; 2192 } 2193 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { 2194 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) 2195 return -1; 2196 } 2197 2198 return 0; 2199 } 2200 2201 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) 2202 { 2203 struct tcmsg *tcm = nlmsg_data(cb->nlh); 2204 struct net *net = sock_net(skb->sk); 2205 struct netdev_queue *dev_queue; 2206 struct net_device *dev; 2207 int t, s_t; 2208 2209 if (nlmsg_len(cb->nlh) < sizeof(*tcm)) 2210 return 0; 2211 dev = dev_get_by_index(net, tcm->tcm_ifindex); 2212 if (!dev) 2213 return 0; 2214 2215 s_t = cb->args[0]; 2216 t = 0; 2217 2218 if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc), 2219 skb, tcm, cb, &t, s_t, true) < 0) 2220 goto done; 2221 2222 dev_queue = dev_ingress_queue(dev); 2223 if (dev_queue && 2224 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, 2225 &t, s_t, false) < 0) 2226 goto done; 2227 2228 done: 2229 cb->args[0] = t; 2230 2231 dev_put(dev); 2232 return skb->len; 2233 } 2234 2235 #ifdef CONFIG_PROC_FS 2236 static int psched_show(struct seq_file *seq, void *v) 2237 { 2238 seq_printf(seq, "%08x %08x %08x %08x\n", 2239 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1), 2240 1000000, 2241 (u32)NSEC_PER_SEC / hrtimer_resolution); 2242 2243 return 0; 2244 } 2245 2246 static int __net_init psched_net_init(struct net *net) 2247 { 2248 struct proc_dir_entry *e; 2249 2250 e = proc_create_single("psched", 0, net->proc_net, psched_show); 2251 if (e == NULL) 2252 return -ENOMEM; 2253 2254 return 0; 2255 } 2256 2257 static void __net_exit psched_net_exit(struct net *net) 2258 { 2259 remove_proc_entry("psched", net->proc_net); 2260 } 2261 #else 2262 static int __net_init psched_net_init(struct net *net) 2263 { 2264 return 0; 2265 } 2266 2267 static void __net_exit psched_net_exit(struct net *net) 2268 { 2269 } 2270 #endif 2271 2272 static struct pernet_operations psched_net_ops = { 2273 .init = psched_net_init, 2274 .exit = psched_net_exit, 2275 }; 2276 2277 DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper); 2278 2279 static int __init pktsched_init(void) 2280 { 2281 int err; 2282 2283 err = register_pernet_subsys(&psched_net_ops); 2284 if (err) { 2285 pr_err("pktsched_init: " 2286 "cannot initialize per netns operations\n"); 2287 return err; 2288 } 2289 2290 register_qdisc(&pfifo_fast_ops); 2291 register_qdisc(&pfifo_qdisc_ops); 2292 register_qdisc(&bfifo_qdisc_ops); 2293 register_qdisc(&pfifo_head_drop_qdisc_ops); 2294 register_qdisc(&mq_qdisc_ops); 2295 register_qdisc(&noqueue_qdisc_ops); 2296 2297 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0); 2298 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0); 2299 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, 2300 0); 2301 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0); 2302 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0); 2303 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, 2304 0); 2305 2306 tc_wrapper_init(); 2307 2308 return 0; 2309 } 2310 2311 subsys_initcall(pktsched_init); 2312