1 /* 2 * net/sched/sch_api.c Packet scheduler API. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * 11 * Fixes: 12 * 13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired. 14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support 15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support 16 */ 17 18 #include <linux/module.h> 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/string.h> 22 #include <linux/errno.h> 23 #include <linux/skbuff.h> 24 #include <linux/init.h> 25 #include <linux/proc_fs.h> 26 #include <linux/seq_file.h> 27 #include <linux/kmod.h> 28 #include <linux/list.h> 29 #include <linux/hrtimer.h> 30 #include <linux/slab.h> 31 #include <linux/hashtable.h> 32 33 #include <net/net_namespace.h> 34 #include <net/sock.h> 35 #include <net/netlink.h> 36 #include <net/pkt_sched.h> 37 #include <net/pkt_cls.h> 38 39 /* 40 41 Short review. 42 ------------- 43 44 This file consists of two interrelated parts: 45 46 1. queueing disciplines manager frontend. 47 2. traffic classes manager frontend. 48 49 Generally, queueing discipline ("qdisc") is a black box, 50 which is able to enqueue packets and to dequeue them (when 51 device is ready to send something) in order and at times 52 determined by algorithm hidden in it. 53 54 qdisc's are divided to two categories: 55 - "queues", which have no internal structure visible from outside. 56 - "schedulers", which split all the packets to "traffic classes", 57 using "packet classifiers" (look at cls_api.c) 58 59 In turn, classes may have child qdiscs (as rule, queues) 60 attached to them etc. etc. etc. 61 62 The goal of the routines in this file is to translate 63 information supplied by user in the form of handles 64 to more intelligible for kernel form, to make some sanity 65 checks and part of work, which is common to all qdiscs 66 and to provide rtnetlink notifications. 67 68 All real intelligent work is done inside qdisc modules. 69 70 71 72 Every discipline has two major routines: enqueue and dequeue. 73 74 ---dequeue 75 76 dequeue usually returns a skb to send. It is allowed to return NULL, 77 but it does not mean that queue is empty, it just means that 78 discipline does not want to send anything this time. 79 Queue is really empty if q->q.qlen == 0. 80 For complicated disciplines with multiple queues q->q is not 81 real packet queue, but however q->q.qlen must be valid. 82 83 ---enqueue 84 85 enqueue returns 0, if packet was enqueued successfully. 86 If packet (this one or another one) was dropped, it returns 87 not zero error code. 88 NET_XMIT_DROP - this packet dropped 89 Expected action: do not backoff, but wait until queue will clear. 90 NET_XMIT_CN - probably this packet enqueued, but another one dropped. 91 Expected action: backoff or ignore 92 93 Auxiliary routines: 94 95 ---peek 96 97 like dequeue but without removing a packet from the queue 98 99 ---reset 100 101 returns qdisc to initial state: purge all buffers, clear all 102 timers, counters (except for statistics) etc. 103 104 ---init 105 106 initializes newly created qdisc. 107 108 ---destroy 109 110 destroys resources allocated by init and during lifetime of qdisc. 111 112 ---change 113 114 changes qdisc parameters. 115 */ 116 117 /* Protects list of registered TC modules. It is pure SMP lock. */ 118 static DEFINE_RWLOCK(qdisc_mod_lock); 119 120 121 /************************************************ 122 * Queueing disciplines manipulation. * 123 ************************************************/ 124 125 126 /* The list of all installed queueing disciplines. */ 127 128 static struct Qdisc_ops *qdisc_base; 129 130 /* Register/unregister queueing discipline */ 131 132 int register_qdisc(struct Qdisc_ops *qops) 133 { 134 struct Qdisc_ops *q, **qp; 135 int rc = -EEXIST; 136 137 write_lock(&qdisc_mod_lock); 138 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) 139 if (!strcmp(qops->id, q->id)) 140 goto out; 141 142 if (qops->enqueue == NULL) 143 qops->enqueue = noop_qdisc_ops.enqueue; 144 if (qops->peek == NULL) { 145 if (qops->dequeue == NULL) 146 qops->peek = noop_qdisc_ops.peek; 147 else 148 goto out_einval; 149 } 150 if (qops->dequeue == NULL) 151 qops->dequeue = noop_qdisc_ops.dequeue; 152 153 if (qops->cl_ops) { 154 const struct Qdisc_class_ops *cops = qops->cl_ops; 155 156 if (!(cops->find && cops->walk && cops->leaf)) 157 goto out_einval; 158 159 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf)) 160 goto out_einval; 161 } 162 163 qops->next = NULL; 164 *qp = qops; 165 rc = 0; 166 out: 167 write_unlock(&qdisc_mod_lock); 168 return rc; 169 170 out_einval: 171 rc = -EINVAL; 172 goto out; 173 } 174 EXPORT_SYMBOL(register_qdisc); 175 176 int unregister_qdisc(struct Qdisc_ops *qops) 177 { 178 struct Qdisc_ops *q, **qp; 179 int err = -ENOENT; 180 181 write_lock(&qdisc_mod_lock); 182 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) 183 if (q == qops) 184 break; 185 if (q) { 186 *qp = q->next; 187 q->next = NULL; 188 err = 0; 189 } 190 write_unlock(&qdisc_mod_lock); 191 return err; 192 } 193 EXPORT_SYMBOL(unregister_qdisc); 194 195 /* Get default qdisc if not otherwise specified */ 196 void qdisc_get_default(char *name, size_t len) 197 { 198 read_lock(&qdisc_mod_lock); 199 strlcpy(name, default_qdisc_ops->id, len); 200 read_unlock(&qdisc_mod_lock); 201 } 202 203 static struct Qdisc_ops *qdisc_lookup_default(const char *name) 204 { 205 struct Qdisc_ops *q = NULL; 206 207 for (q = qdisc_base; q; q = q->next) { 208 if (!strcmp(name, q->id)) { 209 if (!try_module_get(q->owner)) 210 q = NULL; 211 break; 212 } 213 } 214 215 return q; 216 } 217 218 /* Set new default qdisc to use */ 219 int qdisc_set_default(const char *name) 220 { 221 const struct Qdisc_ops *ops; 222 223 if (!capable(CAP_NET_ADMIN)) 224 return -EPERM; 225 226 write_lock(&qdisc_mod_lock); 227 ops = qdisc_lookup_default(name); 228 if (!ops) { 229 /* Not found, drop lock and try to load module */ 230 write_unlock(&qdisc_mod_lock); 231 request_module("sch_%s", name); 232 write_lock(&qdisc_mod_lock); 233 234 ops = qdisc_lookup_default(name); 235 } 236 237 if (ops) { 238 /* Set new default */ 239 module_put(default_qdisc_ops->owner); 240 default_qdisc_ops = ops; 241 } 242 write_unlock(&qdisc_mod_lock); 243 244 return ops ? 0 : -ENOENT; 245 } 246 247 #ifdef CONFIG_NET_SCH_DEFAULT 248 /* Set default value from kernel config */ 249 static int __init sch_default_qdisc(void) 250 { 251 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH); 252 } 253 late_initcall(sch_default_qdisc); 254 #endif 255 256 /* We know handle. Find qdisc among all qdisc's attached to device 257 * (root qdisc, all its children, children of children etc.) 258 * Note: caller either uses rtnl or rcu_read_lock() 259 */ 260 261 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) 262 { 263 struct Qdisc *q; 264 265 if (!qdisc_dev(root)) 266 return (root->handle == handle ? root : NULL); 267 268 if (!(root->flags & TCQ_F_BUILTIN) && 269 root->handle == handle) 270 return root; 271 272 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) { 273 if (q->handle == handle) 274 return q; 275 } 276 return NULL; 277 } 278 279 void qdisc_hash_add(struct Qdisc *q, bool invisible) 280 { 281 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 282 ASSERT_RTNL(); 283 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); 284 if (invisible) 285 q->flags |= TCQ_F_INVISIBLE; 286 } 287 } 288 EXPORT_SYMBOL(qdisc_hash_add); 289 290 void qdisc_hash_del(struct Qdisc *q) 291 { 292 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 293 ASSERT_RTNL(); 294 hash_del_rcu(&q->hash); 295 } 296 } 297 EXPORT_SYMBOL(qdisc_hash_del); 298 299 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) 300 { 301 struct Qdisc *q; 302 303 if (!handle) 304 return NULL; 305 q = qdisc_match_from_root(dev->qdisc, handle); 306 if (q) 307 goto out; 308 309 if (dev_ingress_queue(dev)) 310 q = qdisc_match_from_root( 311 dev_ingress_queue(dev)->qdisc_sleeping, 312 handle); 313 out: 314 return q; 315 } 316 317 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) 318 { 319 struct netdev_queue *nq; 320 struct Qdisc *q; 321 322 if (!handle) 323 return NULL; 324 q = qdisc_match_from_root(dev->qdisc, handle); 325 if (q) 326 goto out; 327 328 nq = dev_ingress_queue_rcu(dev); 329 if (nq) 330 q = qdisc_match_from_root(nq->qdisc_sleeping, handle); 331 out: 332 return q; 333 } 334 335 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) 336 { 337 unsigned long cl; 338 const struct Qdisc_class_ops *cops = p->ops->cl_ops; 339 340 if (cops == NULL) 341 return NULL; 342 cl = cops->find(p, classid); 343 344 if (cl == 0) 345 return NULL; 346 return cops->leaf(p, cl); 347 } 348 349 /* Find queueing discipline by name */ 350 351 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) 352 { 353 struct Qdisc_ops *q = NULL; 354 355 if (kind) { 356 read_lock(&qdisc_mod_lock); 357 for (q = qdisc_base; q; q = q->next) { 358 if (nla_strcmp(kind, q->id) == 0) { 359 if (!try_module_get(q->owner)) 360 q = NULL; 361 break; 362 } 363 } 364 read_unlock(&qdisc_mod_lock); 365 } 366 return q; 367 } 368 369 /* The linklayer setting were not transferred from iproute2, in older 370 * versions, and the rate tables lookup systems have been dropped in 371 * the kernel. To keep backward compatible with older iproute2 tc 372 * utils, we detect the linklayer setting by detecting if the rate 373 * table were modified. 374 * 375 * For linklayer ATM table entries, the rate table will be aligned to 376 * 48 bytes, thus some table entries will contain the same value. The 377 * mpu (min packet unit) is also encoded into the old rate table, thus 378 * starting from the mpu, we find low and high table entries for 379 * mapping this cell. If these entries contain the same value, when 380 * the rate tables have been modified for linklayer ATM. 381 * 382 * This is done by rounding mpu to the nearest 48 bytes cell/entry, 383 * and then roundup to the next cell, calc the table entry one below, 384 * and compare. 385 */ 386 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) 387 { 388 int low = roundup(r->mpu, 48); 389 int high = roundup(low+1, 48); 390 int cell_low = low >> r->cell_log; 391 int cell_high = (high >> r->cell_log) - 1; 392 393 /* rtab is too inaccurate at rates > 100Mbit/s */ 394 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) { 395 pr_debug("TC linklayer: Giving up ATM detection\n"); 396 return TC_LINKLAYER_ETHERNET; 397 } 398 399 if ((cell_high > cell_low) && (cell_high < 256) 400 && (rtab[cell_low] == rtab[cell_high])) { 401 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n", 402 cell_low, cell_high, rtab[cell_high]); 403 return TC_LINKLAYER_ATM; 404 } 405 return TC_LINKLAYER_ETHERNET; 406 } 407 408 static struct qdisc_rate_table *qdisc_rtab_list; 409 410 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, 411 struct nlattr *tab, 412 struct netlink_ext_ack *extack) 413 { 414 struct qdisc_rate_table *rtab; 415 416 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || 417 nla_len(tab) != TC_RTAB_SIZE) { 418 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching"); 419 return NULL; 420 } 421 422 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) { 423 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) && 424 !memcmp(&rtab->data, nla_data(tab), 1024)) { 425 rtab->refcnt++; 426 return rtab; 427 } 428 } 429 430 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL); 431 if (rtab) { 432 rtab->rate = *r; 433 rtab->refcnt = 1; 434 memcpy(rtab->data, nla_data(tab), 1024); 435 if (r->linklayer == TC_LINKLAYER_UNAWARE) 436 r->linklayer = __detect_linklayer(r, rtab->data); 437 rtab->next = qdisc_rtab_list; 438 qdisc_rtab_list = rtab; 439 } else { 440 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table"); 441 } 442 return rtab; 443 } 444 EXPORT_SYMBOL(qdisc_get_rtab); 445 446 void qdisc_put_rtab(struct qdisc_rate_table *tab) 447 { 448 struct qdisc_rate_table *rtab, **rtabp; 449 450 if (!tab || --tab->refcnt) 451 return; 452 453 for (rtabp = &qdisc_rtab_list; 454 (rtab = *rtabp) != NULL; 455 rtabp = &rtab->next) { 456 if (rtab == tab) { 457 *rtabp = rtab->next; 458 kfree(rtab); 459 return; 460 } 461 } 462 } 463 EXPORT_SYMBOL(qdisc_put_rtab); 464 465 static LIST_HEAD(qdisc_stab_list); 466 467 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { 468 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, 469 [TCA_STAB_DATA] = { .type = NLA_BINARY }, 470 }; 471 472 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, 473 struct netlink_ext_ack *extack) 474 { 475 struct nlattr *tb[TCA_STAB_MAX + 1]; 476 struct qdisc_size_table *stab; 477 struct tc_sizespec *s; 478 unsigned int tsize = 0; 479 u16 *tab = NULL; 480 int err; 481 482 err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy, 483 extack); 484 if (err < 0) 485 return ERR_PTR(err); 486 if (!tb[TCA_STAB_BASE]) { 487 NL_SET_ERR_MSG(extack, "Size table base attribute is missing"); 488 return ERR_PTR(-EINVAL); 489 } 490 491 s = nla_data(tb[TCA_STAB_BASE]); 492 493 if (s->tsize > 0) { 494 if (!tb[TCA_STAB_DATA]) { 495 NL_SET_ERR_MSG(extack, "Size table data attribute is missing"); 496 return ERR_PTR(-EINVAL); 497 } 498 tab = nla_data(tb[TCA_STAB_DATA]); 499 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 500 } 501 502 if (tsize != s->tsize || (!tab && tsize > 0)) { 503 NL_SET_ERR_MSG(extack, "Invalid size of size table"); 504 return ERR_PTR(-EINVAL); 505 } 506 507 list_for_each_entry(stab, &qdisc_stab_list, list) { 508 if (memcmp(&stab->szopts, s, sizeof(*s))) 509 continue; 510 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) 511 continue; 512 stab->refcnt++; 513 return stab; 514 } 515 516 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); 517 if (!stab) 518 return ERR_PTR(-ENOMEM); 519 520 stab->refcnt = 1; 521 stab->szopts = *s; 522 if (tsize > 0) 523 memcpy(stab->data, tab, tsize * sizeof(u16)); 524 525 list_add_tail(&stab->list, &qdisc_stab_list); 526 527 return stab; 528 } 529 530 void qdisc_put_stab(struct qdisc_size_table *tab) 531 { 532 if (!tab) 533 return; 534 535 if (--tab->refcnt == 0) { 536 list_del(&tab->list); 537 kfree_rcu(tab, rcu); 538 } 539 } 540 EXPORT_SYMBOL(qdisc_put_stab); 541 542 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) 543 { 544 struct nlattr *nest; 545 546 nest = nla_nest_start_noflag(skb, TCA_STAB); 547 if (nest == NULL) 548 goto nla_put_failure; 549 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts)) 550 goto nla_put_failure; 551 nla_nest_end(skb, nest); 552 553 return skb->len; 554 555 nla_put_failure: 556 return -1; 557 } 558 559 void __qdisc_calculate_pkt_len(struct sk_buff *skb, 560 const struct qdisc_size_table *stab) 561 { 562 int pkt_len, slot; 563 564 pkt_len = skb->len + stab->szopts.overhead; 565 if (unlikely(!stab->szopts.tsize)) 566 goto out; 567 568 slot = pkt_len + stab->szopts.cell_align; 569 if (unlikely(slot < 0)) 570 slot = 0; 571 572 slot >>= stab->szopts.cell_log; 573 if (likely(slot < stab->szopts.tsize)) 574 pkt_len = stab->data[slot]; 575 else 576 pkt_len = stab->data[stab->szopts.tsize - 1] * 577 (slot / stab->szopts.tsize) + 578 stab->data[slot % stab->szopts.tsize]; 579 580 pkt_len <<= stab->szopts.size_log; 581 out: 582 if (unlikely(pkt_len < 1)) 583 pkt_len = 1; 584 qdisc_skb_cb(skb)->pkt_len = pkt_len; 585 } 586 EXPORT_SYMBOL(__qdisc_calculate_pkt_len); 587 588 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) 589 { 590 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { 591 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", 592 txt, qdisc->ops->id, qdisc->handle >> 16); 593 qdisc->flags |= TCQ_F_WARN_NONWC; 594 } 595 } 596 EXPORT_SYMBOL(qdisc_warn_nonwc); 597 598 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) 599 { 600 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, 601 timer); 602 603 rcu_read_lock(); 604 __netif_schedule(qdisc_root(wd->qdisc)); 605 rcu_read_unlock(); 606 607 return HRTIMER_NORESTART; 608 } 609 610 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc, 611 clockid_t clockid) 612 { 613 hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED); 614 wd->timer.function = qdisc_watchdog; 615 wd->qdisc = qdisc; 616 } 617 EXPORT_SYMBOL(qdisc_watchdog_init_clockid); 618 619 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) 620 { 621 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC); 622 } 623 EXPORT_SYMBOL(qdisc_watchdog_init); 624 625 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires) 626 { 627 if (test_bit(__QDISC_STATE_DEACTIVATED, 628 &qdisc_root_sleeping(wd->qdisc)->state)) 629 return; 630 631 if (wd->last_expires == expires) 632 return; 633 634 wd->last_expires = expires; 635 hrtimer_start(&wd->timer, 636 ns_to_ktime(expires), 637 HRTIMER_MODE_ABS_PINNED); 638 } 639 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns); 640 641 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) 642 { 643 hrtimer_cancel(&wd->timer); 644 } 645 EXPORT_SYMBOL(qdisc_watchdog_cancel); 646 647 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) 648 { 649 struct hlist_head *h; 650 unsigned int i; 651 652 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL); 653 654 if (h != NULL) { 655 for (i = 0; i < n; i++) 656 INIT_HLIST_HEAD(&h[i]); 657 } 658 return h; 659 } 660 661 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) 662 { 663 struct Qdisc_class_common *cl; 664 struct hlist_node *next; 665 struct hlist_head *nhash, *ohash; 666 unsigned int nsize, nmask, osize; 667 unsigned int i, h; 668 669 /* Rehash when load factor exceeds 0.75 */ 670 if (clhash->hashelems * 4 <= clhash->hashsize * 3) 671 return; 672 nsize = clhash->hashsize * 2; 673 nmask = nsize - 1; 674 nhash = qdisc_class_hash_alloc(nsize); 675 if (nhash == NULL) 676 return; 677 678 ohash = clhash->hash; 679 osize = clhash->hashsize; 680 681 sch_tree_lock(sch); 682 for (i = 0; i < osize; i++) { 683 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) { 684 h = qdisc_class_hash(cl->classid, nmask); 685 hlist_add_head(&cl->hnode, &nhash[h]); 686 } 687 } 688 clhash->hash = nhash; 689 clhash->hashsize = nsize; 690 clhash->hashmask = nmask; 691 sch_tree_unlock(sch); 692 693 kvfree(ohash); 694 } 695 EXPORT_SYMBOL(qdisc_class_hash_grow); 696 697 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) 698 { 699 unsigned int size = 4; 700 701 clhash->hash = qdisc_class_hash_alloc(size); 702 if (!clhash->hash) 703 return -ENOMEM; 704 clhash->hashsize = size; 705 clhash->hashmask = size - 1; 706 clhash->hashelems = 0; 707 return 0; 708 } 709 EXPORT_SYMBOL(qdisc_class_hash_init); 710 711 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) 712 { 713 kvfree(clhash->hash); 714 } 715 EXPORT_SYMBOL(qdisc_class_hash_destroy); 716 717 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash, 718 struct Qdisc_class_common *cl) 719 { 720 unsigned int h; 721 722 INIT_HLIST_NODE(&cl->hnode); 723 h = qdisc_class_hash(cl->classid, clhash->hashmask); 724 hlist_add_head(&cl->hnode, &clhash->hash[h]); 725 clhash->hashelems++; 726 } 727 EXPORT_SYMBOL(qdisc_class_hash_insert); 728 729 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, 730 struct Qdisc_class_common *cl) 731 { 732 hlist_del(&cl->hnode); 733 clhash->hashelems--; 734 } 735 EXPORT_SYMBOL(qdisc_class_hash_remove); 736 737 /* Allocate an unique handle from space managed by kernel 738 * Possible range is [8000-FFFF]:0000 (0x8000 values) 739 */ 740 static u32 qdisc_alloc_handle(struct net_device *dev) 741 { 742 int i = 0x8000; 743 static u32 autohandle = TC_H_MAKE(0x80000000U, 0); 744 745 do { 746 autohandle += TC_H_MAKE(0x10000U, 0); 747 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0)) 748 autohandle = TC_H_MAKE(0x80000000U, 0); 749 if (!qdisc_lookup(dev, autohandle)) 750 return autohandle; 751 cond_resched(); 752 } while (--i > 0); 753 754 return 0; 755 } 756 757 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) 758 { 759 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; 760 const struct Qdisc_class_ops *cops; 761 unsigned long cl; 762 u32 parentid; 763 bool notify; 764 int drops; 765 766 if (n == 0 && len == 0) 767 return; 768 drops = max_t(int, n, 0); 769 rcu_read_lock(); 770 while ((parentid = sch->parent)) { 771 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) 772 break; 773 774 if (sch->flags & TCQ_F_NOPARENT) 775 break; 776 /* Notify parent qdisc only if child qdisc becomes empty. 777 * 778 * If child was empty even before update then backlog 779 * counter is screwed and we skip notification because 780 * parent class is already passive. 781 * 782 * If the original child was offloaded then it is allowed 783 * to be seem as empty, so the parent is notified anyway. 784 */ 785 notify = !sch->q.qlen && !WARN_ON_ONCE(!n && 786 !qdisc_is_offloaded); 787 /* TODO: perform the search on a per txq basis */ 788 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); 789 if (sch == NULL) { 790 WARN_ON_ONCE(parentid != TC_H_ROOT); 791 break; 792 } 793 cops = sch->ops->cl_ops; 794 if (notify && cops->qlen_notify) { 795 cl = cops->find(sch, parentid); 796 cops->qlen_notify(sch, cl); 797 } 798 sch->q.qlen -= n; 799 sch->qstats.backlog -= len; 800 __qdisc_qstats_drop(sch, drops); 801 } 802 rcu_read_unlock(); 803 } 804 EXPORT_SYMBOL(qdisc_tree_reduce_backlog); 805 806 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type, 807 void *type_data) 808 { 809 struct net_device *dev = qdisc_dev(sch); 810 int err; 811 812 sch->flags &= ~TCQ_F_OFFLOADED; 813 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 814 return 0; 815 816 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); 817 if (err == -EOPNOTSUPP) 818 return 0; 819 820 if (!err) 821 sch->flags |= TCQ_F_OFFLOADED; 822 823 return err; 824 } 825 EXPORT_SYMBOL(qdisc_offload_dump_helper); 826 827 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, 828 struct Qdisc *new, struct Qdisc *old, 829 enum tc_setup_type type, void *type_data, 830 struct netlink_ext_ack *extack) 831 { 832 bool any_qdisc_is_offloaded; 833 int err; 834 835 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 836 return; 837 838 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); 839 840 /* Don't report error if the graft is part of destroy operation. */ 841 if (!err || !new || new == &noop_qdisc) 842 return; 843 844 /* Don't report error if the parent, the old child and the new 845 * one are not offloaded. 846 */ 847 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED; 848 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED; 849 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED; 850 851 if (any_qdisc_is_offloaded) 852 NL_SET_ERR_MSG(extack, "Offloading graft operation failed."); 853 } 854 EXPORT_SYMBOL(qdisc_offload_graft_helper); 855 856 static void qdisc_offload_graft_root(struct net_device *dev, 857 struct Qdisc *new, struct Qdisc *old, 858 struct netlink_ext_ack *extack) 859 { 860 struct tc_root_qopt_offload graft_offload = { 861 .command = TC_ROOT_GRAFT, 862 .handle = new ? new->handle : 0, 863 .ingress = (new && new->flags & TCQ_F_INGRESS) || 864 (old && old->flags & TCQ_F_INGRESS), 865 }; 866 867 qdisc_offload_graft_helper(dev, NULL, new, old, 868 TC_SETUP_ROOT_QDISC, &graft_offload, extack); 869 } 870 871 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, 872 u32 portid, u32 seq, u16 flags, int event) 873 { 874 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; 875 struct gnet_stats_queue __percpu *cpu_qstats = NULL; 876 struct tcmsg *tcm; 877 struct nlmsghdr *nlh; 878 unsigned char *b = skb_tail_pointer(skb); 879 struct gnet_dump d; 880 struct qdisc_size_table *stab; 881 u32 block_index; 882 __u32 qlen; 883 884 cond_resched(); 885 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 886 if (!nlh) 887 goto out_nlmsg_trim; 888 tcm = nlmsg_data(nlh); 889 tcm->tcm_family = AF_UNSPEC; 890 tcm->tcm__pad1 = 0; 891 tcm->tcm__pad2 = 0; 892 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 893 tcm->tcm_parent = clid; 894 tcm->tcm_handle = q->handle; 895 tcm->tcm_info = refcount_read(&q->refcnt); 896 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 897 goto nla_put_failure; 898 if (q->ops->ingress_block_get) { 899 block_index = q->ops->ingress_block_get(q); 900 if (block_index && 901 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index)) 902 goto nla_put_failure; 903 } 904 if (q->ops->egress_block_get) { 905 block_index = q->ops->egress_block_get(q); 906 if (block_index && 907 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index)) 908 goto nla_put_failure; 909 } 910 if (q->ops->dump && q->ops->dump(q, skb) < 0) 911 goto nla_put_failure; 912 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED))) 913 goto nla_put_failure; 914 qlen = qdisc_qlen_sum(q); 915 916 stab = rtnl_dereference(q->stab); 917 if (stab && qdisc_dump_stab(skb, stab) < 0) 918 goto nla_put_failure; 919 920 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 921 NULL, &d, TCA_PAD) < 0) 922 goto nla_put_failure; 923 924 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) 925 goto nla_put_failure; 926 927 if (qdisc_is_percpu_stats(q)) { 928 cpu_bstats = q->cpu_bstats; 929 cpu_qstats = q->cpu_qstats; 930 } 931 932 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), 933 &d, cpu_bstats, &q->bstats) < 0 || 934 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || 935 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) 936 goto nla_put_failure; 937 938 if (gnet_stats_finish_copy(&d) < 0) 939 goto nla_put_failure; 940 941 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 942 return skb->len; 943 944 out_nlmsg_trim: 945 nla_put_failure: 946 nlmsg_trim(skb, b); 947 return -1; 948 } 949 950 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) 951 { 952 if (q->flags & TCQ_F_BUILTIN) 953 return true; 954 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) 955 return true; 956 957 return false; 958 } 959 960 static int qdisc_notify(struct net *net, struct sk_buff *oskb, 961 struct nlmsghdr *n, u32 clid, 962 struct Qdisc *old, struct Qdisc *new) 963 { 964 struct sk_buff *skb; 965 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 966 967 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 968 if (!skb) 969 return -ENOBUFS; 970 971 if (old && !tc_qdisc_dump_ignore(old, false)) { 972 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 973 0, RTM_DELQDISC) < 0) 974 goto err_out; 975 } 976 if (new && !tc_qdisc_dump_ignore(new, false)) { 977 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, 978 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) 979 goto err_out; 980 } 981 982 if (skb->len) 983 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 984 n->nlmsg_flags & NLM_F_ECHO); 985 986 err_out: 987 kfree_skb(skb); 988 return -EINVAL; 989 } 990 991 static void notify_and_destroy(struct net *net, struct sk_buff *skb, 992 struct nlmsghdr *n, u32 clid, 993 struct Qdisc *old, struct Qdisc *new) 994 { 995 if (new || old) 996 qdisc_notify(net, skb, n, clid, old, new); 997 998 if (old) 999 qdisc_put(old); 1000 } 1001 1002 static void qdisc_clear_nolock(struct Qdisc *sch) 1003 { 1004 sch->flags &= ~TCQ_F_NOLOCK; 1005 if (!(sch->flags & TCQ_F_CPUSTATS)) 1006 return; 1007 1008 free_percpu(sch->cpu_bstats); 1009 free_percpu(sch->cpu_qstats); 1010 sch->cpu_bstats = NULL; 1011 sch->cpu_qstats = NULL; 1012 sch->flags &= ~TCQ_F_CPUSTATS; 1013 } 1014 1015 /* Graft qdisc "new" to class "classid" of qdisc "parent" or 1016 * to device "dev". 1017 * 1018 * When appropriate send a netlink notification using 'skb' 1019 * and "n". 1020 * 1021 * On success, destroy old qdisc. 1022 */ 1023 1024 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, 1025 struct sk_buff *skb, struct nlmsghdr *n, u32 classid, 1026 struct Qdisc *new, struct Qdisc *old, 1027 struct netlink_ext_ack *extack) 1028 { 1029 struct Qdisc *q = old; 1030 struct net *net = dev_net(dev); 1031 1032 if (parent == NULL) { 1033 unsigned int i, num_q, ingress; 1034 1035 ingress = 0; 1036 num_q = dev->num_tx_queues; 1037 if ((q && q->flags & TCQ_F_INGRESS) || 1038 (new && new->flags & TCQ_F_INGRESS)) { 1039 num_q = 1; 1040 ingress = 1; 1041 if (!dev_ingress_queue(dev)) { 1042 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); 1043 return -ENOENT; 1044 } 1045 } 1046 1047 if (dev->flags & IFF_UP) 1048 dev_deactivate(dev); 1049 1050 qdisc_offload_graft_root(dev, new, old, extack); 1051 1052 if (new && new->ops->attach) 1053 goto skip; 1054 1055 for (i = 0; i < num_q; i++) { 1056 struct netdev_queue *dev_queue = dev_ingress_queue(dev); 1057 1058 if (!ingress) 1059 dev_queue = netdev_get_tx_queue(dev, i); 1060 1061 old = dev_graft_qdisc(dev_queue, new); 1062 if (new && i > 0) 1063 qdisc_refcount_inc(new); 1064 1065 if (!ingress) 1066 qdisc_put(old); 1067 } 1068 1069 skip: 1070 if (!ingress) { 1071 notify_and_destroy(net, skb, n, classid, 1072 dev->qdisc, new); 1073 if (new && !new->ops->attach) 1074 qdisc_refcount_inc(new); 1075 dev->qdisc = new ? : &noop_qdisc; 1076 1077 if (new && new->ops->attach) 1078 new->ops->attach(new); 1079 } else { 1080 notify_and_destroy(net, skb, n, classid, old, new); 1081 } 1082 1083 if (dev->flags & IFF_UP) 1084 dev_activate(dev); 1085 } else { 1086 const struct Qdisc_class_ops *cops = parent->ops->cl_ops; 1087 unsigned long cl; 1088 int err; 1089 1090 /* Only support running class lockless if parent is lockless */ 1091 if (new && (new->flags & TCQ_F_NOLOCK) && 1092 parent && !(parent->flags & TCQ_F_NOLOCK)) 1093 qdisc_clear_nolock(new); 1094 1095 if (!cops || !cops->graft) 1096 return -EOPNOTSUPP; 1097 1098 cl = cops->find(parent, classid); 1099 if (!cl) { 1100 NL_SET_ERR_MSG(extack, "Specified class not found"); 1101 return -ENOENT; 1102 } 1103 1104 err = cops->graft(parent, cl, new, &old, extack); 1105 if (err) 1106 return err; 1107 notify_and_destroy(net, skb, n, classid, old, new); 1108 } 1109 return 0; 1110 } 1111 1112 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca, 1113 struct netlink_ext_ack *extack) 1114 { 1115 u32 block_index; 1116 1117 if (tca[TCA_INGRESS_BLOCK]) { 1118 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]); 1119 1120 if (!block_index) { 1121 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0"); 1122 return -EINVAL; 1123 } 1124 if (!sch->ops->ingress_block_set) { 1125 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported"); 1126 return -EOPNOTSUPP; 1127 } 1128 sch->ops->ingress_block_set(sch, block_index); 1129 } 1130 if (tca[TCA_EGRESS_BLOCK]) { 1131 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]); 1132 1133 if (!block_index) { 1134 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0"); 1135 return -EINVAL; 1136 } 1137 if (!sch->ops->egress_block_set) { 1138 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported"); 1139 return -EOPNOTSUPP; 1140 } 1141 sch->ops->egress_block_set(sch, block_index); 1142 } 1143 return 0; 1144 } 1145 1146 /* 1147 Allocate and initialize new qdisc. 1148 1149 Parameters are passed via opt. 1150 */ 1151 1152 static struct Qdisc *qdisc_create(struct net_device *dev, 1153 struct netdev_queue *dev_queue, 1154 struct Qdisc *p, u32 parent, u32 handle, 1155 struct nlattr **tca, int *errp, 1156 struct netlink_ext_ack *extack) 1157 { 1158 int err; 1159 struct nlattr *kind = tca[TCA_KIND]; 1160 struct Qdisc *sch; 1161 struct Qdisc_ops *ops; 1162 struct qdisc_size_table *stab; 1163 1164 ops = qdisc_lookup_ops(kind); 1165 #ifdef CONFIG_MODULES 1166 if (ops == NULL && kind != NULL) { 1167 char name[IFNAMSIZ]; 1168 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { 1169 /* We dropped the RTNL semaphore in order to 1170 * perform the module load. So, even if we 1171 * succeeded in loading the module we have to 1172 * tell the caller to replay the request. We 1173 * indicate this using -EAGAIN. 1174 * We replay the request because the device may 1175 * go away in the mean time. 1176 */ 1177 rtnl_unlock(); 1178 request_module("sch_%s", name); 1179 rtnl_lock(); 1180 ops = qdisc_lookup_ops(kind); 1181 if (ops != NULL) { 1182 /* We will try again qdisc_lookup_ops, 1183 * so don't keep a reference. 1184 */ 1185 module_put(ops->owner); 1186 err = -EAGAIN; 1187 goto err_out; 1188 } 1189 } 1190 } 1191 #endif 1192 1193 err = -ENOENT; 1194 if (!ops) { 1195 NL_SET_ERR_MSG(extack, "Specified qdisc not found"); 1196 goto err_out; 1197 } 1198 1199 sch = qdisc_alloc(dev_queue, ops, extack); 1200 if (IS_ERR(sch)) { 1201 err = PTR_ERR(sch); 1202 goto err_out2; 1203 } 1204 1205 sch->parent = parent; 1206 1207 if (handle == TC_H_INGRESS) { 1208 sch->flags |= TCQ_F_INGRESS; 1209 handle = TC_H_MAKE(TC_H_INGRESS, 0); 1210 } else { 1211 if (handle == 0) { 1212 handle = qdisc_alloc_handle(dev); 1213 if (handle == 0) { 1214 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded"); 1215 err = -ENOSPC; 1216 goto err_out3; 1217 } 1218 } 1219 if (!netif_is_multiqueue(dev)) 1220 sch->flags |= TCQ_F_ONETXQUEUE; 1221 } 1222 1223 sch->handle = handle; 1224 1225 /* This exist to keep backward compatible with a userspace 1226 * loophole, what allowed userspace to get IFF_NO_QUEUE 1227 * facility on older kernels by setting tx_queue_len=0 (prior 1228 * to qdisc init), and then forgot to reinit tx_queue_len 1229 * before again attaching a qdisc. 1230 */ 1231 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { 1232 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; 1233 netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); 1234 } 1235 1236 err = qdisc_block_indexes_set(sch, tca, extack); 1237 if (err) 1238 goto err_out3; 1239 1240 if (ops->init) { 1241 err = ops->init(sch, tca[TCA_OPTIONS], extack); 1242 if (err != 0) 1243 goto err_out5; 1244 } 1245 1246 if (tca[TCA_STAB]) { 1247 stab = qdisc_get_stab(tca[TCA_STAB], extack); 1248 if (IS_ERR(stab)) { 1249 err = PTR_ERR(stab); 1250 goto err_out4; 1251 } 1252 rcu_assign_pointer(sch->stab, stab); 1253 } 1254 if (tca[TCA_RATE]) { 1255 seqcount_t *running; 1256 1257 err = -EOPNOTSUPP; 1258 if (sch->flags & TCQ_F_MQROOT) { 1259 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); 1260 goto err_out4; 1261 } 1262 1263 if (sch->parent != TC_H_ROOT && 1264 !(sch->flags & TCQ_F_INGRESS) && 1265 (!p || !(p->flags & TCQ_F_MQROOT))) 1266 running = qdisc_root_sleeping_running(sch); 1267 else 1268 running = &sch->running; 1269 1270 err = gen_new_estimator(&sch->bstats, 1271 sch->cpu_bstats, 1272 &sch->rate_est, 1273 NULL, 1274 running, 1275 tca[TCA_RATE]); 1276 if (err) { 1277 NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); 1278 goto err_out4; 1279 } 1280 } 1281 1282 qdisc_hash_add(sch, false); 1283 1284 return sch; 1285 1286 err_out5: 1287 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ 1288 if (ops->destroy) 1289 ops->destroy(sch); 1290 err_out3: 1291 dev_put(dev); 1292 qdisc_free(sch); 1293 err_out2: 1294 module_put(ops->owner); 1295 err_out: 1296 *errp = err; 1297 return NULL; 1298 1299 err_out4: 1300 /* 1301 * Any broken qdiscs that would require a ops->reset() here? 1302 * The qdisc was never in action so it shouldn't be necessary. 1303 */ 1304 qdisc_put_stab(rtnl_dereference(sch->stab)); 1305 if (ops->destroy) 1306 ops->destroy(sch); 1307 goto err_out3; 1308 } 1309 1310 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, 1311 struct netlink_ext_ack *extack) 1312 { 1313 struct qdisc_size_table *ostab, *stab = NULL; 1314 int err = 0; 1315 1316 if (tca[TCA_OPTIONS]) { 1317 if (!sch->ops->change) { 1318 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc"); 1319 return -EINVAL; 1320 } 1321 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { 1322 NL_SET_ERR_MSG(extack, "Change of blocks is not supported"); 1323 return -EOPNOTSUPP; 1324 } 1325 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack); 1326 if (err) 1327 return err; 1328 } 1329 1330 if (tca[TCA_STAB]) { 1331 stab = qdisc_get_stab(tca[TCA_STAB], extack); 1332 if (IS_ERR(stab)) 1333 return PTR_ERR(stab); 1334 } 1335 1336 ostab = rtnl_dereference(sch->stab); 1337 rcu_assign_pointer(sch->stab, stab); 1338 qdisc_put_stab(ostab); 1339 1340 if (tca[TCA_RATE]) { 1341 /* NB: ignores errors from replace_estimator 1342 because change can't be undone. */ 1343 if (sch->flags & TCQ_F_MQROOT) 1344 goto out; 1345 gen_replace_estimator(&sch->bstats, 1346 sch->cpu_bstats, 1347 &sch->rate_est, 1348 NULL, 1349 qdisc_root_sleeping_running(sch), 1350 tca[TCA_RATE]); 1351 } 1352 out: 1353 return 0; 1354 } 1355 1356 struct check_loop_arg { 1357 struct qdisc_walker w; 1358 struct Qdisc *p; 1359 int depth; 1360 }; 1361 1362 static int check_loop_fn(struct Qdisc *q, unsigned long cl, 1363 struct qdisc_walker *w); 1364 1365 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) 1366 { 1367 struct check_loop_arg arg; 1368 1369 if (q->ops->cl_ops == NULL) 1370 return 0; 1371 1372 arg.w.stop = arg.w.skip = arg.w.count = 0; 1373 arg.w.fn = check_loop_fn; 1374 arg.depth = depth; 1375 arg.p = p; 1376 q->ops->cl_ops->walk(q, &arg.w); 1377 return arg.w.stop ? -ELOOP : 0; 1378 } 1379 1380 static int 1381 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) 1382 { 1383 struct Qdisc *leaf; 1384 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1385 struct check_loop_arg *arg = (struct check_loop_arg *)w; 1386 1387 leaf = cops->leaf(q, cl); 1388 if (leaf) { 1389 if (leaf == arg->p || arg->depth > 7) 1390 return -ELOOP; 1391 return check_loop(leaf, arg->p, arg->depth + 1); 1392 } 1393 return 0; 1394 } 1395 1396 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { 1397 [TCA_KIND] = { .type = NLA_STRING }, 1398 [TCA_RATE] = { .type = NLA_BINARY, 1399 .len = sizeof(struct tc_estimator) }, 1400 [TCA_STAB] = { .type = NLA_NESTED }, 1401 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG }, 1402 [TCA_CHAIN] = { .type = NLA_U32 }, 1403 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 }, 1404 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 }, 1405 }; 1406 1407 /* 1408 * Delete/get qdisc. 1409 */ 1410 1411 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1412 struct netlink_ext_ack *extack) 1413 { 1414 struct net *net = sock_net(skb->sk); 1415 struct tcmsg *tcm = nlmsg_data(n); 1416 struct nlattr *tca[TCA_MAX + 1]; 1417 struct net_device *dev; 1418 u32 clid; 1419 struct Qdisc *q = NULL; 1420 struct Qdisc *p = NULL; 1421 int err; 1422 1423 if ((n->nlmsg_type != RTM_GETQDISC) && 1424 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1425 return -EPERM; 1426 1427 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, 1428 rtm_tca_policy, extack); 1429 if (err < 0) 1430 return err; 1431 1432 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1433 if (!dev) 1434 return -ENODEV; 1435 1436 clid = tcm->tcm_parent; 1437 if (clid) { 1438 if (clid != TC_H_ROOT) { 1439 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 1440 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1441 if (!p) { 1442 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); 1443 return -ENOENT; 1444 } 1445 q = qdisc_leaf(p, clid); 1446 } else if (dev_ingress_queue(dev)) { 1447 q = dev_ingress_queue(dev)->qdisc_sleeping; 1448 } 1449 } else { 1450 q = dev->qdisc; 1451 } 1452 if (!q) { 1453 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); 1454 return -ENOENT; 1455 } 1456 1457 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { 1458 NL_SET_ERR_MSG(extack, "Invalid handle"); 1459 return -EINVAL; 1460 } 1461 } else { 1462 q = qdisc_lookup(dev, tcm->tcm_handle); 1463 if (!q) { 1464 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle"); 1465 return -ENOENT; 1466 } 1467 } 1468 1469 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1470 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1471 return -EINVAL; 1472 } 1473 1474 if (n->nlmsg_type == RTM_DELQDISC) { 1475 if (!clid) { 1476 NL_SET_ERR_MSG(extack, "Classid cannot be zero"); 1477 return -EINVAL; 1478 } 1479 if (q->handle == 0) { 1480 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero"); 1481 return -ENOENT; 1482 } 1483 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack); 1484 if (err != 0) 1485 return err; 1486 } else { 1487 qdisc_notify(net, skb, n, clid, NULL, q); 1488 } 1489 return 0; 1490 } 1491 1492 /* 1493 * Create/change qdisc. 1494 */ 1495 1496 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1497 struct netlink_ext_ack *extack) 1498 { 1499 struct net *net = sock_net(skb->sk); 1500 struct tcmsg *tcm; 1501 struct nlattr *tca[TCA_MAX + 1]; 1502 struct net_device *dev; 1503 u32 clid; 1504 struct Qdisc *q, *p; 1505 int err; 1506 1507 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1508 return -EPERM; 1509 1510 replay: 1511 /* Reinit, just in case something touches this. */ 1512 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, 1513 rtm_tca_policy, extack); 1514 if (err < 0) 1515 return err; 1516 1517 tcm = nlmsg_data(n); 1518 clid = tcm->tcm_parent; 1519 q = p = NULL; 1520 1521 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1522 if (!dev) 1523 return -ENODEV; 1524 1525 1526 if (clid) { 1527 if (clid != TC_H_ROOT) { 1528 if (clid != TC_H_INGRESS) { 1529 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1530 if (!p) { 1531 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); 1532 return -ENOENT; 1533 } 1534 q = qdisc_leaf(p, clid); 1535 } else if (dev_ingress_queue_create(dev)) { 1536 q = dev_ingress_queue(dev)->qdisc_sleeping; 1537 } 1538 } else { 1539 q = dev->qdisc; 1540 } 1541 1542 /* It may be default qdisc, ignore it */ 1543 if (q && q->handle == 0) 1544 q = NULL; 1545 1546 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { 1547 if (tcm->tcm_handle) { 1548 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) { 1549 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override"); 1550 return -EEXIST; 1551 } 1552 if (TC_H_MIN(tcm->tcm_handle)) { 1553 NL_SET_ERR_MSG(extack, "Invalid minor handle"); 1554 return -EINVAL; 1555 } 1556 q = qdisc_lookup(dev, tcm->tcm_handle); 1557 if (!q) 1558 goto create_n_graft; 1559 if (n->nlmsg_flags & NLM_F_EXCL) { 1560 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override"); 1561 return -EEXIST; 1562 } 1563 if (tca[TCA_KIND] && 1564 nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1565 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1566 return -EINVAL; 1567 } 1568 if (q == p || 1569 (p && check_loop(q, p, 0))) { 1570 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected"); 1571 return -ELOOP; 1572 } 1573 qdisc_refcount_inc(q); 1574 goto graft; 1575 } else { 1576 if (!q) 1577 goto create_n_graft; 1578 1579 /* This magic test requires explanation. 1580 * 1581 * We know, that some child q is already 1582 * attached to this parent and have choice: 1583 * either to change it or to create/graft new one. 1584 * 1585 * 1. We are allowed to create/graft only 1586 * if CREATE and REPLACE flags are set. 1587 * 1588 * 2. If EXCL is set, requestor wanted to say, 1589 * that qdisc tcm_handle is not expected 1590 * to exist, so that we choose create/graft too. 1591 * 1592 * 3. The last case is when no flags are set. 1593 * Alas, it is sort of hole in API, we 1594 * cannot decide what to do unambiguously. 1595 * For now we select create/graft, if 1596 * user gave KIND, which does not match existing. 1597 */ 1598 if ((n->nlmsg_flags & NLM_F_CREATE) && 1599 (n->nlmsg_flags & NLM_F_REPLACE) && 1600 ((n->nlmsg_flags & NLM_F_EXCL) || 1601 (tca[TCA_KIND] && 1602 nla_strcmp(tca[TCA_KIND], q->ops->id)))) 1603 goto create_n_graft; 1604 } 1605 } 1606 } else { 1607 if (!tcm->tcm_handle) { 1608 NL_SET_ERR_MSG(extack, "Handle cannot be zero"); 1609 return -EINVAL; 1610 } 1611 q = qdisc_lookup(dev, tcm->tcm_handle); 1612 } 1613 1614 /* Change qdisc parameters */ 1615 if (!q) { 1616 NL_SET_ERR_MSG(extack, "Specified qdisc not found"); 1617 return -ENOENT; 1618 } 1619 if (n->nlmsg_flags & NLM_F_EXCL) { 1620 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify"); 1621 return -EEXIST; 1622 } 1623 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1624 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1625 return -EINVAL; 1626 } 1627 err = qdisc_change(q, tca, extack); 1628 if (err == 0) 1629 qdisc_notify(net, skb, n, clid, NULL, q); 1630 return err; 1631 1632 create_n_graft: 1633 if (!(n->nlmsg_flags & NLM_F_CREATE)) { 1634 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag"); 1635 return -ENOENT; 1636 } 1637 if (clid == TC_H_INGRESS) { 1638 if (dev_ingress_queue(dev)) { 1639 q = qdisc_create(dev, dev_ingress_queue(dev), p, 1640 tcm->tcm_parent, tcm->tcm_parent, 1641 tca, &err, extack); 1642 } else { 1643 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device"); 1644 err = -ENOENT; 1645 } 1646 } else { 1647 struct netdev_queue *dev_queue; 1648 1649 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) 1650 dev_queue = p->ops->cl_ops->select_queue(p, tcm); 1651 else if (p) 1652 dev_queue = p->dev_queue; 1653 else 1654 dev_queue = netdev_get_tx_queue(dev, 0); 1655 1656 q = qdisc_create(dev, dev_queue, p, 1657 tcm->tcm_parent, tcm->tcm_handle, 1658 tca, &err, extack); 1659 } 1660 if (q == NULL) { 1661 if (err == -EAGAIN) 1662 goto replay; 1663 return err; 1664 } 1665 1666 graft: 1667 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); 1668 if (err) { 1669 if (q) 1670 qdisc_put(q); 1671 return err; 1672 } 1673 1674 return 0; 1675 } 1676 1677 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, 1678 struct netlink_callback *cb, 1679 int *q_idx_p, int s_q_idx, bool recur, 1680 bool dump_invisible) 1681 { 1682 int ret = 0, q_idx = *q_idx_p; 1683 struct Qdisc *q; 1684 int b; 1685 1686 if (!root) 1687 return 0; 1688 1689 q = root; 1690 if (q_idx < s_q_idx) { 1691 q_idx++; 1692 } else { 1693 if (!tc_qdisc_dump_ignore(q, dump_invisible) && 1694 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, 1695 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1696 RTM_NEWQDISC) <= 0) 1697 goto done; 1698 q_idx++; 1699 } 1700 1701 /* If dumping singletons, there is no qdisc_dev(root) and the singleton 1702 * itself has already been dumped. 1703 * 1704 * If we've already dumped the top-level (ingress) qdisc above and the global 1705 * qdisc hashtable, we don't want to hit it again 1706 */ 1707 if (!qdisc_dev(root) || !recur) 1708 goto out; 1709 1710 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { 1711 if (q_idx < s_q_idx) { 1712 q_idx++; 1713 continue; 1714 } 1715 if (!tc_qdisc_dump_ignore(q, dump_invisible) && 1716 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, 1717 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1718 RTM_NEWQDISC) <= 0) 1719 goto done; 1720 q_idx++; 1721 } 1722 1723 out: 1724 *q_idx_p = q_idx; 1725 return ret; 1726 done: 1727 ret = -1; 1728 goto out; 1729 } 1730 1731 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) 1732 { 1733 struct net *net = sock_net(skb->sk); 1734 int idx, q_idx; 1735 int s_idx, s_q_idx; 1736 struct net_device *dev; 1737 const struct nlmsghdr *nlh = cb->nlh; 1738 struct nlattr *tca[TCA_MAX + 1]; 1739 int err; 1740 1741 s_idx = cb->args[0]; 1742 s_q_idx = q_idx = cb->args[1]; 1743 1744 idx = 0; 1745 ASSERT_RTNL(); 1746 1747 err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX, 1748 rtm_tca_policy, cb->extack); 1749 if (err < 0) 1750 return err; 1751 1752 for_each_netdev(net, dev) { 1753 struct netdev_queue *dev_queue; 1754 1755 if (idx < s_idx) 1756 goto cont; 1757 if (idx > s_idx) 1758 s_q_idx = 0; 1759 q_idx = 0; 1760 1761 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, 1762 true, tca[TCA_DUMP_INVISIBLE]) < 0) 1763 goto done; 1764 1765 dev_queue = dev_ingress_queue(dev); 1766 if (dev_queue && 1767 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, 1768 &q_idx, s_q_idx, false, 1769 tca[TCA_DUMP_INVISIBLE]) < 0) 1770 goto done; 1771 1772 cont: 1773 idx++; 1774 } 1775 1776 done: 1777 cb->args[0] = idx; 1778 cb->args[1] = q_idx; 1779 1780 return skb->len; 1781 } 1782 1783 1784 1785 /************************************************ 1786 * Traffic classes manipulation. * 1787 ************************************************/ 1788 1789 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, 1790 unsigned long cl, 1791 u32 portid, u32 seq, u16 flags, int event) 1792 { 1793 struct tcmsg *tcm; 1794 struct nlmsghdr *nlh; 1795 unsigned char *b = skb_tail_pointer(skb); 1796 struct gnet_dump d; 1797 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; 1798 1799 cond_resched(); 1800 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 1801 if (!nlh) 1802 goto out_nlmsg_trim; 1803 tcm = nlmsg_data(nlh); 1804 tcm->tcm_family = AF_UNSPEC; 1805 tcm->tcm__pad1 = 0; 1806 tcm->tcm__pad2 = 0; 1807 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 1808 tcm->tcm_parent = q->handle; 1809 tcm->tcm_handle = q->handle; 1810 tcm->tcm_info = 0; 1811 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 1812 goto nla_put_failure; 1813 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) 1814 goto nla_put_failure; 1815 1816 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 1817 NULL, &d, TCA_PAD) < 0) 1818 goto nla_put_failure; 1819 1820 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) 1821 goto nla_put_failure; 1822 1823 if (gnet_stats_finish_copy(&d) < 0) 1824 goto nla_put_failure; 1825 1826 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1827 return skb->len; 1828 1829 out_nlmsg_trim: 1830 nla_put_failure: 1831 nlmsg_trim(skb, b); 1832 return -1; 1833 } 1834 1835 static int tclass_notify(struct net *net, struct sk_buff *oskb, 1836 struct nlmsghdr *n, struct Qdisc *q, 1837 unsigned long cl, int event) 1838 { 1839 struct sk_buff *skb; 1840 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1841 int err = 0; 1842 1843 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1844 if (!skb) 1845 return -ENOBUFS; 1846 1847 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { 1848 kfree_skb(skb); 1849 return -EINVAL; 1850 } 1851 1852 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1853 n->nlmsg_flags & NLM_F_ECHO); 1854 if (err > 0) 1855 err = 0; 1856 return err; 1857 } 1858 1859 static int tclass_del_notify(struct net *net, 1860 const struct Qdisc_class_ops *cops, 1861 struct sk_buff *oskb, struct nlmsghdr *n, 1862 struct Qdisc *q, unsigned long cl) 1863 { 1864 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1865 struct sk_buff *skb; 1866 int err = 0; 1867 1868 if (!cops->delete) 1869 return -EOPNOTSUPP; 1870 1871 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1872 if (!skb) 1873 return -ENOBUFS; 1874 1875 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, 1876 RTM_DELTCLASS) < 0) { 1877 kfree_skb(skb); 1878 return -EINVAL; 1879 } 1880 1881 err = cops->delete(q, cl); 1882 if (err) { 1883 kfree_skb(skb); 1884 return err; 1885 } 1886 1887 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1888 n->nlmsg_flags & NLM_F_ECHO); 1889 if (err > 0) 1890 err = 0; 1891 return err; 1892 } 1893 1894 #ifdef CONFIG_NET_CLS 1895 1896 struct tcf_bind_args { 1897 struct tcf_walker w; 1898 u32 classid; 1899 unsigned long cl; 1900 }; 1901 1902 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) 1903 { 1904 struct tcf_bind_args *a = (void *)arg; 1905 1906 if (tp->ops->bind_class) { 1907 struct Qdisc *q = tcf_block_q(tp->chain->block); 1908 1909 sch_tree_lock(q); 1910 tp->ops->bind_class(n, a->classid, a->cl); 1911 sch_tree_unlock(q); 1912 } 1913 return 0; 1914 } 1915 1916 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, 1917 unsigned long new_cl) 1918 { 1919 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1920 struct tcf_block *block; 1921 struct tcf_chain *chain; 1922 unsigned long cl; 1923 1924 cl = cops->find(q, portid); 1925 if (!cl) 1926 return; 1927 block = cops->tcf_block(q, cl, NULL); 1928 if (!block) 1929 return; 1930 for (chain = tcf_get_next_chain(block, NULL); 1931 chain; 1932 chain = tcf_get_next_chain(block, chain)) { 1933 struct tcf_proto *tp; 1934 1935 for (tp = tcf_get_next_proto(chain, NULL, true); 1936 tp; tp = tcf_get_next_proto(chain, tp, true)) { 1937 struct tcf_bind_args arg = {}; 1938 1939 arg.w.fn = tcf_node_bind; 1940 arg.classid = clid; 1941 arg.cl = new_cl; 1942 tp->ops->walk(tp, &arg.w, true); 1943 } 1944 } 1945 } 1946 1947 #else 1948 1949 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, 1950 unsigned long new_cl) 1951 { 1952 } 1953 1954 #endif 1955 1956 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, 1957 struct netlink_ext_ack *extack) 1958 { 1959 struct net *net = sock_net(skb->sk); 1960 struct tcmsg *tcm = nlmsg_data(n); 1961 struct nlattr *tca[TCA_MAX + 1]; 1962 struct net_device *dev; 1963 struct Qdisc *q = NULL; 1964 const struct Qdisc_class_ops *cops; 1965 unsigned long cl = 0; 1966 unsigned long new_cl; 1967 u32 portid; 1968 u32 clid; 1969 u32 qid; 1970 int err; 1971 1972 if ((n->nlmsg_type != RTM_GETTCLASS) && 1973 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1974 return -EPERM; 1975 1976 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, 1977 rtm_tca_policy, extack); 1978 if (err < 0) 1979 return err; 1980 1981 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1982 if (!dev) 1983 return -ENODEV; 1984 1985 /* 1986 parent == TC_H_UNSPEC - unspecified parent. 1987 parent == TC_H_ROOT - class is root, which has no parent. 1988 parent == X:0 - parent is root class. 1989 parent == X:Y - parent is a node in hierarchy. 1990 parent == 0:Y - parent is X:Y, where X:0 is qdisc. 1991 1992 handle == 0:0 - generate handle from kernel pool. 1993 handle == 0:Y - class is X:Y, where X:0 is qdisc. 1994 handle == X:Y - clear. 1995 handle == X:0 - root class. 1996 */ 1997 1998 /* Step 1. Determine qdisc handle X:0 */ 1999 2000 portid = tcm->tcm_parent; 2001 clid = tcm->tcm_handle; 2002 qid = TC_H_MAJ(clid); 2003 2004 if (portid != TC_H_ROOT) { 2005 u32 qid1 = TC_H_MAJ(portid); 2006 2007 if (qid && qid1) { 2008 /* If both majors are known, they must be identical. */ 2009 if (qid != qid1) 2010 return -EINVAL; 2011 } else if (qid1) { 2012 qid = qid1; 2013 } else if (qid == 0) 2014 qid = dev->qdisc->handle; 2015 2016 /* Now qid is genuine qdisc handle consistent 2017 * both with parent and child. 2018 * 2019 * TC_H_MAJ(portid) still may be unspecified, complete it now. 2020 */ 2021 if (portid) 2022 portid = TC_H_MAKE(qid, portid); 2023 } else { 2024 if (qid == 0) 2025 qid = dev->qdisc->handle; 2026 } 2027 2028 /* OK. Locate qdisc */ 2029 q = qdisc_lookup(dev, qid); 2030 if (!q) 2031 return -ENOENT; 2032 2033 /* An check that it supports classes */ 2034 cops = q->ops->cl_ops; 2035 if (cops == NULL) 2036 return -EINVAL; 2037 2038 /* Now try to get class */ 2039 if (clid == 0) { 2040 if (portid == TC_H_ROOT) 2041 clid = qid; 2042 } else 2043 clid = TC_H_MAKE(qid, clid); 2044 2045 if (clid) 2046 cl = cops->find(q, clid); 2047 2048 if (cl == 0) { 2049 err = -ENOENT; 2050 if (n->nlmsg_type != RTM_NEWTCLASS || 2051 !(n->nlmsg_flags & NLM_F_CREATE)) 2052 goto out; 2053 } else { 2054 switch (n->nlmsg_type) { 2055 case RTM_NEWTCLASS: 2056 err = -EEXIST; 2057 if (n->nlmsg_flags & NLM_F_EXCL) 2058 goto out; 2059 break; 2060 case RTM_DELTCLASS: 2061 err = tclass_del_notify(net, cops, skb, n, q, cl); 2062 /* Unbind the class with flilters with 0 */ 2063 tc_bind_tclass(q, portid, clid, 0); 2064 goto out; 2065 case RTM_GETTCLASS: 2066 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); 2067 goto out; 2068 default: 2069 err = -EINVAL; 2070 goto out; 2071 } 2072 } 2073 2074 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { 2075 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes"); 2076 return -EOPNOTSUPP; 2077 } 2078 2079 new_cl = cl; 2080 err = -EOPNOTSUPP; 2081 if (cops->change) 2082 err = cops->change(q, clid, portid, tca, &new_cl, extack); 2083 if (err == 0) { 2084 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); 2085 /* We just create a new class, need to do reverse binding. */ 2086 if (cl != new_cl) 2087 tc_bind_tclass(q, portid, clid, new_cl); 2088 } 2089 out: 2090 return err; 2091 } 2092 2093 struct qdisc_dump_args { 2094 struct qdisc_walker w; 2095 struct sk_buff *skb; 2096 struct netlink_callback *cb; 2097 }; 2098 2099 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, 2100 struct qdisc_walker *arg) 2101 { 2102 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; 2103 2104 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, 2105 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, 2106 RTM_NEWTCLASS); 2107 } 2108 2109 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, 2110 struct tcmsg *tcm, struct netlink_callback *cb, 2111 int *t_p, int s_t) 2112 { 2113 struct qdisc_dump_args arg; 2114 2115 if (tc_qdisc_dump_ignore(q, false) || 2116 *t_p < s_t || !q->ops->cl_ops || 2117 (tcm->tcm_parent && 2118 TC_H_MAJ(tcm->tcm_parent) != q->handle)) { 2119 (*t_p)++; 2120 return 0; 2121 } 2122 if (*t_p > s_t) 2123 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); 2124 arg.w.fn = qdisc_class_dump; 2125 arg.skb = skb; 2126 arg.cb = cb; 2127 arg.w.stop = 0; 2128 arg.w.skip = cb->args[1]; 2129 arg.w.count = 0; 2130 q->ops->cl_ops->walk(q, &arg.w); 2131 cb->args[1] = arg.w.count; 2132 if (arg.w.stop) 2133 return -1; 2134 (*t_p)++; 2135 return 0; 2136 } 2137 2138 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, 2139 struct tcmsg *tcm, struct netlink_callback *cb, 2140 int *t_p, int s_t) 2141 { 2142 struct Qdisc *q; 2143 int b; 2144 2145 if (!root) 2146 return 0; 2147 2148 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) 2149 return -1; 2150 2151 if (!qdisc_dev(root)) 2152 return 0; 2153 2154 if (tcm->tcm_parent) { 2155 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); 2156 if (q && q != root && 2157 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) 2158 return -1; 2159 return 0; 2160 } 2161 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { 2162 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) 2163 return -1; 2164 } 2165 2166 return 0; 2167 } 2168 2169 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) 2170 { 2171 struct tcmsg *tcm = nlmsg_data(cb->nlh); 2172 struct net *net = sock_net(skb->sk); 2173 struct netdev_queue *dev_queue; 2174 struct net_device *dev; 2175 int t, s_t; 2176 2177 if (nlmsg_len(cb->nlh) < sizeof(*tcm)) 2178 return 0; 2179 dev = dev_get_by_index(net, tcm->tcm_ifindex); 2180 if (!dev) 2181 return 0; 2182 2183 s_t = cb->args[0]; 2184 t = 0; 2185 2186 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) 2187 goto done; 2188 2189 dev_queue = dev_ingress_queue(dev); 2190 if (dev_queue && 2191 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, 2192 &t, s_t) < 0) 2193 goto done; 2194 2195 done: 2196 cb->args[0] = t; 2197 2198 dev_put(dev); 2199 return skb->len; 2200 } 2201 2202 #ifdef CONFIG_PROC_FS 2203 static int psched_show(struct seq_file *seq, void *v) 2204 { 2205 seq_printf(seq, "%08x %08x %08x %08x\n", 2206 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1), 2207 1000000, 2208 (u32)NSEC_PER_SEC / hrtimer_resolution); 2209 2210 return 0; 2211 } 2212 2213 static int __net_init psched_net_init(struct net *net) 2214 { 2215 struct proc_dir_entry *e; 2216 2217 e = proc_create_single("psched", 0, net->proc_net, psched_show); 2218 if (e == NULL) 2219 return -ENOMEM; 2220 2221 return 0; 2222 } 2223 2224 static void __net_exit psched_net_exit(struct net *net) 2225 { 2226 remove_proc_entry("psched", net->proc_net); 2227 } 2228 #else 2229 static int __net_init psched_net_init(struct net *net) 2230 { 2231 return 0; 2232 } 2233 2234 static void __net_exit psched_net_exit(struct net *net) 2235 { 2236 } 2237 #endif 2238 2239 static struct pernet_operations psched_net_ops = { 2240 .init = psched_net_init, 2241 .exit = psched_net_exit, 2242 }; 2243 2244 static int __init pktsched_init(void) 2245 { 2246 int err; 2247 2248 err = register_pernet_subsys(&psched_net_ops); 2249 if (err) { 2250 pr_err("pktsched_init: " 2251 "cannot initialize per netns operations\n"); 2252 return err; 2253 } 2254 2255 register_qdisc(&pfifo_fast_ops); 2256 register_qdisc(&pfifo_qdisc_ops); 2257 register_qdisc(&bfifo_qdisc_ops); 2258 register_qdisc(&pfifo_head_drop_qdisc_ops); 2259 register_qdisc(&mq_qdisc_ops); 2260 register_qdisc(&noqueue_qdisc_ops); 2261 2262 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0); 2263 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0); 2264 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, 2265 0); 2266 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0); 2267 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0); 2268 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, 2269 0); 2270 2271 return 0; 2272 } 2273 2274 subsys_initcall(pktsched_init); 2275