1 /* 2 * net/sched/sch_api.c Packet scheduler API. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * 11 * Fixes: 12 * 13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired. 14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support 15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support 16 */ 17 18 #include <linux/module.h> 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/string.h> 22 #include <linux/errno.h> 23 #include <linux/skbuff.h> 24 #include <linux/init.h> 25 #include <linux/proc_fs.h> 26 #include <linux/seq_file.h> 27 #include <linux/kmod.h> 28 #include <linux/list.h> 29 #include <linux/hrtimer.h> 30 #include <linux/slab.h> 31 #include <linux/hashtable.h> 32 33 #include <net/net_namespace.h> 34 #include <net/sock.h> 35 #include <net/netlink.h> 36 #include <net/pkt_sched.h> 37 #include <net/pkt_cls.h> 38 39 /* 40 41 Short review. 42 ------------- 43 44 This file consists of two interrelated parts: 45 46 1. queueing disciplines manager frontend. 47 2. traffic classes manager frontend. 48 49 Generally, queueing discipline ("qdisc") is a black box, 50 which is able to enqueue packets and to dequeue them (when 51 device is ready to send something) in order and at times 52 determined by algorithm hidden in it. 53 54 qdisc's are divided to two categories: 55 - "queues", which have no internal structure visible from outside. 56 - "schedulers", which split all the packets to "traffic classes", 57 using "packet classifiers" (look at cls_api.c) 58 59 In turn, classes may have child qdiscs (as rule, queues) 60 attached to them etc. etc. etc. 61 62 The goal of the routines in this file is to translate 63 information supplied by user in the form of handles 64 to more intelligible for kernel form, to make some sanity 65 checks and part of work, which is common to all qdiscs 66 and to provide rtnetlink notifications. 67 68 All real intelligent work is done inside qdisc modules. 69 70 71 72 Every discipline has two major routines: enqueue and dequeue. 73 74 ---dequeue 75 76 dequeue usually returns a skb to send. It is allowed to return NULL, 77 but it does not mean that queue is empty, it just means that 78 discipline does not want to send anything this time. 79 Queue is really empty if q->q.qlen == 0. 80 For complicated disciplines with multiple queues q->q is not 81 real packet queue, but however q->q.qlen must be valid. 82 83 ---enqueue 84 85 enqueue returns 0, if packet was enqueued successfully. 86 If packet (this one or another one) was dropped, it returns 87 not zero error code. 88 NET_XMIT_DROP - this packet dropped 89 Expected action: do not backoff, but wait until queue will clear. 90 NET_XMIT_CN - probably this packet enqueued, but another one dropped. 91 Expected action: backoff or ignore 92 93 Auxiliary routines: 94 95 ---peek 96 97 like dequeue but without removing a packet from the queue 98 99 ---reset 100 101 returns qdisc to initial state: purge all buffers, clear all 102 timers, counters (except for statistics) etc. 103 104 ---init 105 106 initializes newly created qdisc. 107 108 ---destroy 109 110 destroys resources allocated by init and during lifetime of qdisc. 111 112 ---change 113 114 changes qdisc parameters. 115 */ 116 117 /* Protects list of registered TC modules. It is pure SMP lock. */ 118 static DEFINE_RWLOCK(qdisc_mod_lock); 119 120 121 /************************************************ 122 * Queueing disciplines manipulation. * 123 ************************************************/ 124 125 126 /* The list of all installed queueing disciplines. */ 127 128 static struct Qdisc_ops *qdisc_base; 129 130 /* Register/unregister queueing discipline */ 131 132 int register_qdisc(struct Qdisc_ops *qops) 133 { 134 struct Qdisc_ops *q, **qp; 135 int rc = -EEXIST; 136 137 write_lock(&qdisc_mod_lock); 138 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) 139 if (!strcmp(qops->id, q->id)) 140 goto out; 141 142 if (qops->enqueue == NULL) 143 qops->enqueue = noop_qdisc_ops.enqueue; 144 if (qops->peek == NULL) { 145 if (qops->dequeue == NULL) 146 qops->peek = noop_qdisc_ops.peek; 147 else 148 goto out_einval; 149 } 150 if (qops->dequeue == NULL) 151 qops->dequeue = noop_qdisc_ops.dequeue; 152 153 if (qops->cl_ops) { 154 const struct Qdisc_class_ops *cops = qops->cl_ops; 155 156 if (!(cops->find && cops->walk && cops->leaf)) 157 goto out_einval; 158 159 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf)) 160 goto out_einval; 161 } 162 163 qops->next = NULL; 164 *qp = qops; 165 rc = 0; 166 out: 167 write_unlock(&qdisc_mod_lock); 168 return rc; 169 170 out_einval: 171 rc = -EINVAL; 172 goto out; 173 } 174 EXPORT_SYMBOL(register_qdisc); 175 176 int unregister_qdisc(struct Qdisc_ops *qops) 177 { 178 struct Qdisc_ops *q, **qp; 179 int err = -ENOENT; 180 181 write_lock(&qdisc_mod_lock); 182 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) 183 if (q == qops) 184 break; 185 if (q) { 186 *qp = q->next; 187 q->next = NULL; 188 err = 0; 189 } 190 write_unlock(&qdisc_mod_lock); 191 return err; 192 } 193 EXPORT_SYMBOL(unregister_qdisc); 194 195 /* Get default qdisc if not otherwise specified */ 196 void qdisc_get_default(char *name, size_t len) 197 { 198 read_lock(&qdisc_mod_lock); 199 strlcpy(name, default_qdisc_ops->id, len); 200 read_unlock(&qdisc_mod_lock); 201 } 202 203 static struct Qdisc_ops *qdisc_lookup_default(const char *name) 204 { 205 struct Qdisc_ops *q = NULL; 206 207 for (q = qdisc_base; q; q = q->next) { 208 if (!strcmp(name, q->id)) { 209 if (!try_module_get(q->owner)) 210 q = NULL; 211 break; 212 } 213 } 214 215 return q; 216 } 217 218 /* Set new default qdisc to use */ 219 int qdisc_set_default(const char *name) 220 { 221 const struct Qdisc_ops *ops; 222 223 if (!capable(CAP_NET_ADMIN)) 224 return -EPERM; 225 226 write_lock(&qdisc_mod_lock); 227 ops = qdisc_lookup_default(name); 228 if (!ops) { 229 /* Not found, drop lock and try to load module */ 230 write_unlock(&qdisc_mod_lock); 231 request_module("sch_%s", name); 232 write_lock(&qdisc_mod_lock); 233 234 ops = qdisc_lookup_default(name); 235 } 236 237 if (ops) { 238 /* Set new default */ 239 module_put(default_qdisc_ops->owner); 240 default_qdisc_ops = ops; 241 } 242 write_unlock(&qdisc_mod_lock); 243 244 return ops ? 0 : -ENOENT; 245 } 246 247 #ifdef CONFIG_NET_SCH_DEFAULT 248 /* Set default value from kernel config */ 249 static int __init sch_default_qdisc(void) 250 { 251 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH); 252 } 253 late_initcall(sch_default_qdisc); 254 #endif 255 256 /* We know handle. Find qdisc among all qdisc's attached to device 257 * (root qdisc, all its children, children of children etc.) 258 * Note: caller either uses rtnl or rcu_read_lock() 259 */ 260 261 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) 262 { 263 struct Qdisc *q; 264 265 if (!qdisc_dev(root)) 266 return (root->handle == handle ? root : NULL); 267 268 if (!(root->flags & TCQ_F_BUILTIN) && 269 root->handle == handle) 270 return root; 271 272 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) { 273 if (q->handle == handle) 274 return q; 275 } 276 return NULL; 277 } 278 279 void qdisc_hash_add(struct Qdisc *q, bool invisible) 280 { 281 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 282 ASSERT_RTNL(); 283 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); 284 if (invisible) 285 q->flags |= TCQ_F_INVISIBLE; 286 } 287 } 288 EXPORT_SYMBOL(qdisc_hash_add); 289 290 void qdisc_hash_del(struct Qdisc *q) 291 { 292 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 293 ASSERT_RTNL(); 294 hash_del_rcu(&q->hash); 295 } 296 } 297 EXPORT_SYMBOL(qdisc_hash_del); 298 299 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) 300 { 301 struct Qdisc *q; 302 303 if (!handle) 304 return NULL; 305 q = qdisc_match_from_root(dev->qdisc, handle); 306 if (q) 307 goto out; 308 309 if (dev_ingress_queue(dev)) 310 q = qdisc_match_from_root( 311 dev_ingress_queue(dev)->qdisc_sleeping, 312 handle); 313 out: 314 return q; 315 } 316 317 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) 318 { 319 struct netdev_queue *nq; 320 struct Qdisc *q; 321 322 if (!handle) 323 return NULL; 324 q = qdisc_match_from_root(dev->qdisc, handle); 325 if (q) 326 goto out; 327 328 nq = dev_ingress_queue_rcu(dev); 329 if (nq) 330 q = qdisc_match_from_root(nq->qdisc_sleeping, handle); 331 out: 332 return q; 333 } 334 335 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) 336 { 337 unsigned long cl; 338 const struct Qdisc_class_ops *cops = p->ops->cl_ops; 339 340 if (cops == NULL) 341 return NULL; 342 cl = cops->find(p, classid); 343 344 if (cl == 0) 345 return NULL; 346 return cops->leaf(p, cl); 347 } 348 349 /* Find queueing discipline by name */ 350 351 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) 352 { 353 struct Qdisc_ops *q = NULL; 354 355 if (kind) { 356 read_lock(&qdisc_mod_lock); 357 for (q = qdisc_base; q; q = q->next) { 358 if (nla_strcmp(kind, q->id) == 0) { 359 if (!try_module_get(q->owner)) 360 q = NULL; 361 break; 362 } 363 } 364 read_unlock(&qdisc_mod_lock); 365 } 366 return q; 367 } 368 369 /* The linklayer setting were not transferred from iproute2, in older 370 * versions, and the rate tables lookup systems have been dropped in 371 * the kernel. To keep backward compatible with older iproute2 tc 372 * utils, we detect the linklayer setting by detecting if the rate 373 * table were modified. 374 * 375 * For linklayer ATM table entries, the rate table will be aligned to 376 * 48 bytes, thus some table entries will contain the same value. The 377 * mpu (min packet unit) is also encoded into the old rate table, thus 378 * starting from the mpu, we find low and high table entries for 379 * mapping this cell. If these entries contain the same value, when 380 * the rate tables have been modified for linklayer ATM. 381 * 382 * This is done by rounding mpu to the nearest 48 bytes cell/entry, 383 * and then roundup to the next cell, calc the table entry one below, 384 * and compare. 385 */ 386 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) 387 { 388 int low = roundup(r->mpu, 48); 389 int high = roundup(low+1, 48); 390 int cell_low = low >> r->cell_log; 391 int cell_high = (high >> r->cell_log) - 1; 392 393 /* rtab is too inaccurate at rates > 100Mbit/s */ 394 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) { 395 pr_debug("TC linklayer: Giving up ATM detection\n"); 396 return TC_LINKLAYER_ETHERNET; 397 } 398 399 if ((cell_high > cell_low) && (cell_high < 256) 400 && (rtab[cell_low] == rtab[cell_high])) { 401 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n", 402 cell_low, cell_high, rtab[cell_high]); 403 return TC_LINKLAYER_ATM; 404 } 405 return TC_LINKLAYER_ETHERNET; 406 } 407 408 static struct qdisc_rate_table *qdisc_rtab_list; 409 410 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, 411 struct nlattr *tab, 412 struct netlink_ext_ack *extack) 413 { 414 struct qdisc_rate_table *rtab; 415 416 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || 417 nla_len(tab) != TC_RTAB_SIZE) { 418 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching"); 419 return NULL; 420 } 421 422 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) { 423 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) && 424 !memcmp(&rtab->data, nla_data(tab), 1024)) { 425 rtab->refcnt++; 426 return rtab; 427 } 428 } 429 430 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL); 431 if (rtab) { 432 rtab->rate = *r; 433 rtab->refcnt = 1; 434 memcpy(rtab->data, nla_data(tab), 1024); 435 if (r->linklayer == TC_LINKLAYER_UNAWARE) 436 r->linklayer = __detect_linklayer(r, rtab->data); 437 rtab->next = qdisc_rtab_list; 438 qdisc_rtab_list = rtab; 439 } else { 440 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table"); 441 } 442 return rtab; 443 } 444 EXPORT_SYMBOL(qdisc_get_rtab); 445 446 void qdisc_put_rtab(struct qdisc_rate_table *tab) 447 { 448 struct qdisc_rate_table *rtab, **rtabp; 449 450 if (!tab || --tab->refcnt) 451 return; 452 453 for (rtabp = &qdisc_rtab_list; 454 (rtab = *rtabp) != NULL; 455 rtabp = &rtab->next) { 456 if (rtab == tab) { 457 *rtabp = rtab->next; 458 kfree(rtab); 459 return; 460 } 461 } 462 } 463 EXPORT_SYMBOL(qdisc_put_rtab); 464 465 static LIST_HEAD(qdisc_stab_list); 466 467 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { 468 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, 469 [TCA_STAB_DATA] = { .type = NLA_BINARY }, 470 }; 471 472 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, 473 struct netlink_ext_ack *extack) 474 { 475 struct nlattr *tb[TCA_STAB_MAX + 1]; 476 struct qdisc_size_table *stab; 477 struct tc_sizespec *s; 478 unsigned int tsize = 0; 479 u16 *tab = NULL; 480 int err; 481 482 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack); 483 if (err < 0) 484 return ERR_PTR(err); 485 if (!tb[TCA_STAB_BASE]) { 486 NL_SET_ERR_MSG(extack, "Size table base attribute is missing"); 487 return ERR_PTR(-EINVAL); 488 } 489 490 s = nla_data(tb[TCA_STAB_BASE]); 491 492 if (s->tsize > 0) { 493 if (!tb[TCA_STAB_DATA]) { 494 NL_SET_ERR_MSG(extack, "Size table data attribute is missing"); 495 return ERR_PTR(-EINVAL); 496 } 497 tab = nla_data(tb[TCA_STAB_DATA]); 498 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 499 } 500 501 if (tsize != s->tsize || (!tab && tsize > 0)) { 502 NL_SET_ERR_MSG(extack, "Invalid size of size table"); 503 return ERR_PTR(-EINVAL); 504 } 505 506 list_for_each_entry(stab, &qdisc_stab_list, list) { 507 if (memcmp(&stab->szopts, s, sizeof(*s))) 508 continue; 509 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) 510 continue; 511 stab->refcnt++; 512 return stab; 513 } 514 515 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); 516 if (!stab) 517 return ERR_PTR(-ENOMEM); 518 519 stab->refcnt = 1; 520 stab->szopts = *s; 521 if (tsize > 0) 522 memcpy(stab->data, tab, tsize * sizeof(u16)); 523 524 list_add_tail(&stab->list, &qdisc_stab_list); 525 526 return stab; 527 } 528 529 void qdisc_put_stab(struct qdisc_size_table *tab) 530 { 531 if (!tab) 532 return; 533 534 if (--tab->refcnt == 0) { 535 list_del(&tab->list); 536 kfree_rcu(tab, rcu); 537 } 538 } 539 EXPORT_SYMBOL(qdisc_put_stab); 540 541 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) 542 { 543 struct nlattr *nest; 544 545 nest = nla_nest_start(skb, TCA_STAB); 546 if (nest == NULL) 547 goto nla_put_failure; 548 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts)) 549 goto nla_put_failure; 550 nla_nest_end(skb, nest); 551 552 return skb->len; 553 554 nla_put_failure: 555 return -1; 556 } 557 558 void __qdisc_calculate_pkt_len(struct sk_buff *skb, 559 const struct qdisc_size_table *stab) 560 { 561 int pkt_len, slot; 562 563 pkt_len = skb->len + stab->szopts.overhead; 564 if (unlikely(!stab->szopts.tsize)) 565 goto out; 566 567 slot = pkt_len + stab->szopts.cell_align; 568 if (unlikely(slot < 0)) 569 slot = 0; 570 571 slot >>= stab->szopts.cell_log; 572 if (likely(slot < stab->szopts.tsize)) 573 pkt_len = stab->data[slot]; 574 else 575 pkt_len = stab->data[stab->szopts.tsize - 1] * 576 (slot / stab->szopts.tsize) + 577 stab->data[slot % stab->szopts.tsize]; 578 579 pkt_len <<= stab->szopts.size_log; 580 out: 581 if (unlikely(pkt_len < 1)) 582 pkt_len = 1; 583 qdisc_skb_cb(skb)->pkt_len = pkt_len; 584 } 585 EXPORT_SYMBOL(__qdisc_calculate_pkt_len); 586 587 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) 588 { 589 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { 590 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", 591 txt, qdisc->ops->id, qdisc->handle >> 16); 592 qdisc->flags |= TCQ_F_WARN_NONWC; 593 } 594 } 595 EXPORT_SYMBOL(qdisc_warn_nonwc); 596 597 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) 598 { 599 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, 600 timer); 601 602 rcu_read_lock(); 603 __netif_schedule(qdisc_root(wd->qdisc)); 604 rcu_read_unlock(); 605 606 return HRTIMER_NORESTART; 607 } 608 609 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc, 610 clockid_t clockid) 611 { 612 hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED); 613 wd->timer.function = qdisc_watchdog; 614 wd->qdisc = qdisc; 615 } 616 EXPORT_SYMBOL(qdisc_watchdog_init_clockid); 617 618 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) 619 { 620 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC); 621 } 622 EXPORT_SYMBOL(qdisc_watchdog_init); 623 624 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires) 625 { 626 if (test_bit(__QDISC_STATE_DEACTIVATED, 627 &qdisc_root_sleeping(wd->qdisc)->state)) 628 return; 629 630 if (wd->last_expires == expires) 631 return; 632 633 wd->last_expires = expires; 634 hrtimer_start(&wd->timer, 635 ns_to_ktime(expires), 636 HRTIMER_MODE_ABS_PINNED); 637 } 638 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns); 639 640 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) 641 { 642 hrtimer_cancel(&wd->timer); 643 } 644 EXPORT_SYMBOL(qdisc_watchdog_cancel); 645 646 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) 647 { 648 struct hlist_head *h; 649 unsigned int i; 650 651 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL); 652 653 if (h != NULL) { 654 for (i = 0; i < n; i++) 655 INIT_HLIST_HEAD(&h[i]); 656 } 657 return h; 658 } 659 660 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) 661 { 662 struct Qdisc_class_common *cl; 663 struct hlist_node *next; 664 struct hlist_head *nhash, *ohash; 665 unsigned int nsize, nmask, osize; 666 unsigned int i, h; 667 668 /* Rehash when load factor exceeds 0.75 */ 669 if (clhash->hashelems * 4 <= clhash->hashsize * 3) 670 return; 671 nsize = clhash->hashsize * 2; 672 nmask = nsize - 1; 673 nhash = qdisc_class_hash_alloc(nsize); 674 if (nhash == NULL) 675 return; 676 677 ohash = clhash->hash; 678 osize = clhash->hashsize; 679 680 sch_tree_lock(sch); 681 for (i = 0; i < osize; i++) { 682 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) { 683 h = qdisc_class_hash(cl->classid, nmask); 684 hlist_add_head(&cl->hnode, &nhash[h]); 685 } 686 } 687 clhash->hash = nhash; 688 clhash->hashsize = nsize; 689 clhash->hashmask = nmask; 690 sch_tree_unlock(sch); 691 692 kvfree(ohash); 693 } 694 EXPORT_SYMBOL(qdisc_class_hash_grow); 695 696 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) 697 { 698 unsigned int size = 4; 699 700 clhash->hash = qdisc_class_hash_alloc(size); 701 if (!clhash->hash) 702 return -ENOMEM; 703 clhash->hashsize = size; 704 clhash->hashmask = size - 1; 705 clhash->hashelems = 0; 706 return 0; 707 } 708 EXPORT_SYMBOL(qdisc_class_hash_init); 709 710 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) 711 { 712 kvfree(clhash->hash); 713 } 714 EXPORT_SYMBOL(qdisc_class_hash_destroy); 715 716 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash, 717 struct Qdisc_class_common *cl) 718 { 719 unsigned int h; 720 721 INIT_HLIST_NODE(&cl->hnode); 722 h = qdisc_class_hash(cl->classid, clhash->hashmask); 723 hlist_add_head(&cl->hnode, &clhash->hash[h]); 724 clhash->hashelems++; 725 } 726 EXPORT_SYMBOL(qdisc_class_hash_insert); 727 728 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, 729 struct Qdisc_class_common *cl) 730 { 731 hlist_del(&cl->hnode); 732 clhash->hashelems--; 733 } 734 EXPORT_SYMBOL(qdisc_class_hash_remove); 735 736 /* Allocate an unique handle from space managed by kernel 737 * Possible range is [8000-FFFF]:0000 (0x8000 values) 738 */ 739 static u32 qdisc_alloc_handle(struct net_device *dev) 740 { 741 int i = 0x8000; 742 static u32 autohandle = TC_H_MAKE(0x80000000U, 0); 743 744 do { 745 autohandle += TC_H_MAKE(0x10000U, 0); 746 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0)) 747 autohandle = TC_H_MAKE(0x80000000U, 0); 748 if (!qdisc_lookup(dev, autohandle)) 749 return autohandle; 750 cond_resched(); 751 } while (--i > 0); 752 753 return 0; 754 } 755 756 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) 757 { 758 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; 759 const struct Qdisc_class_ops *cops; 760 unsigned long cl; 761 u32 parentid; 762 bool notify; 763 int drops; 764 765 if (n == 0 && len == 0) 766 return; 767 drops = max_t(int, n, 0); 768 rcu_read_lock(); 769 while ((parentid = sch->parent)) { 770 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) 771 break; 772 773 if (sch->flags & TCQ_F_NOPARENT) 774 break; 775 /* Notify parent qdisc only if child qdisc becomes empty. 776 * 777 * If child was empty even before update then backlog 778 * counter is screwed and we skip notification because 779 * parent class is already passive. 780 * 781 * If the original child was offloaded then it is allowed 782 * to be seem as empty, so the parent is notified anyway. 783 */ 784 notify = !sch->q.qlen && !WARN_ON_ONCE(!n && 785 !qdisc_is_offloaded); 786 /* TODO: perform the search on a per txq basis */ 787 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); 788 if (sch == NULL) { 789 WARN_ON_ONCE(parentid != TC_H_ROOT); 790 break; 791 } 792 cops = sch->ops->cl_ops; 793 if (notify && cops->qlen_notify) { 794 cl = cops->find(sch, parentid); 795 cops->qlen_notify(sch, cl); 796 } 797 sch->q.qlen -= n; 798 sch->qstats.backlog -= len; 799 __qdisc_qstats_drop(sch, drops); 800 } 801 rcu_read_unlock(); 802 } 803 EXPORT_SYMBOL(qdisc_tree_reduce_backlog); 804 805 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type, 806 void *type_data) 807 { 808 struct net_device *dev = qdisc_dev(sch); 809 int err; 810 811 sch->flags &= ~TCQ_F_OFFLOADED; 812 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 813 return 0; 814 815 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); 816 if (err == -EOPNOTSUPP) 817 return 0; 818 819 if (!err) 820 sch->flags |= TCQ_F_OFFLOADED; 821 822 return err; 823 } 824 EXPORT_SYMBOL(qdisc_offload_dump_helper); 825 826 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, 827 struct Qdisc *new, struct Qdisc *old, 828 enum tc_setup_type type, void *type_data, 829 struct netlink_ext_ack *extack) 830 { 831 bool any_qdisc_is_offloaded; 832 int err; 833 834 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 835 return; 836 837 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); 838 839 /* Don't report error if the graft is part of destroy operation. */ 840 if (!err || !new || new == &noop_qdisc) 841 return; 842 843 /* Don't report error if the parent, the old child and the new 844 * one are not offloaded. 845 */ 846 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED; 847 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED; 848 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED; 849 850 if (any_qdisc_is_offloaded) 851 NL_SET_ERR_MSG(extack, "Offloading graft operation failed."); 852 } 853 EXPORT_SYMBOL(qdisc_offload_graft_helper); 854 855 static void qdisc_offload_graft_root(struct net_device *dev, 856 struct Qdisc *new, struct Qdisc *old, 857 struct netlink_ext_ack *extack) 858 { 859 struct tc_root_qopt_offload graft_offload = { 860 .command = TC_ROOT_GRAFT, 861 .handle = new ? new->handle : 0, 862 .ingress = (new && new->flags & TCQ_F_INGRESS) || 863 (old && old->flags & TCQ_F_INGRESS), 864 }; 865 866 qdisc_offload_graft_helper(dev, NULL, new, old, 867 TC_SETUP_ROOT_QDISC, &graft_offload, extack); 868 } 869 870 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, 871 u32 portid, u32 seq, u16 flags, int event) 872 { 873 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; 874 struct gnet_stats_queue __percpu *cpu_qstats = NULL; 875 struct tcmsg *tcm; 876 struct nlmsghdr *nlh; 877 unsigned char *b = skb_tail_pointer(skb); 878 struct gnet_dump d; 879 struct qdisc_size_table *stab; 880 u32 block_index; 881 __u32 qlen; 882 883 cond_resched(); 884 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 885 if (!nlh) 886 goto out_nlmsg_trim; 887 tcm = nlmsg_data(nlh); 888 tcm->tcm_family = AF_UNSPEC; 889 tcm->tcm__pad1 = 0; 890 tcm->tcm__pad2 = 0; 891 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 892 tcm->tcm_parent = clid; 893 tcm->tcm_handle = q->handle; 894 tcm->tcm_info = refcount_read(&q->refcnt); 895 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 896 goto nla_put_failure; 897 if (q->ops->ingress_block_get) { 898 block_index = q->ops->ingress_block_get(q); 899 if (block_index && 900 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index)) 901 goto nla_put_failure; 902 } 903 if (q->ops->egress_block_get) { 904 block_index = q->ops->egress_block_get(q); 905 if (block_index && 906 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index)) 907 goto nla_put_failure; 908 } 909 if (q->ops->dump && q->ops->dump(q, skb) < 0) 910 goto nla_put_failure; 911 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED))) 912 goto nla_put_failure; 913 qlen = qdisc_qlen_sum(q); 914 915 stab = rtnl_dereference(q->stab); 916 if (stab && qdisc_dump_stab(skb, stab) < 0) 917 goto nla_put_failure; 918 919 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 920 NULL, &d, TCA_PAD) < 0) 921 goto nla_put_failure; 922 923 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) 924 goto nla_put_failure; 925 926 if (qdisc_is_percpu_stats(q)) { 927 cpu_bstats = q->cpu_bstats; 928 cpu_qstats = q->cpu_qstats; 929 } 930 931 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), 932 &d, cpu_bstats, &q->bstats) < 0 || 933 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || 934 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) 935 goto nla_put_failure; 936 937 if (gnet_stats_finish_copy(&d) < 0) 938 goto nla_put_failure; 939 940 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 941 return skb->len; 942 943 out_nlmsg_trim: 944 nla_put_failure: 945 nlmsg_trim(skb, b); 946 return -1; 947 } 948 949 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) 950 { 951 if (q->flags & TCQ_F_BUILTIN) 952 return true; 953 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) 954 return true; 955 956 return false; 957 } 958 959 static int qdisc_notify(struct net *net, struct sk_buff *oskb, 960 struct nlmsghdr *n, u32 clid, 961 struct Qdisc *old, struct Qdisc *new) 962 { 963 struct sk_buff *skb; 964 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 965 966 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 967 if (!skb) 968 return -ENOBUFS; 969 970 if (old && !tc_qdisc_dump_ignore(old, false)) { 971 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 972 0, RTM_DELQDISC) < 0) 973 goto err_out; 974 } 975 if (new && !tc_qdisc_dump_ignore(new, false)) { 976 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, 977 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) 978 goto err_out; 979 } 980 981 if (skb->len) 982 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 983 n->nlmsg_flags & NLM_F_ECHO); 984 985 err_out: 986 kfree_skb(skb); 987 return -EINVAL; 988 } 989 990 static void notify_and_destroy(struct net *net, struct sk_buff *skb, 991 struct nlmsghdr *n, u32 clid, 992 struct Qdisc *old, struct Qdisc *new) 993 { 994 if (new || old) 995 qdisc_notify(net, skb, n, clid, old, new); 996 997 if (old) 998 qdisc_put(old); 999 } 1000 1001 static void qdisc_clear_nolock(struct Qdisc *sch) 1002 { 1003 sch->flags &= ~TCQ_F_NOLOCK; 1004 if (!(sch->flags & TCQ_F_CPUSTATS)) 1005 return; 1006 1007 free_percpu(sch->cpu_bstats); 1008 free_percpu(sch->cpu_qstats); 1009 sch->cpu_bstats = NULL; 1010 sch->cpu_qstats = NULL; 1011 sch->flags &= ~TCQ_F_CPUSTATS; 1012 } 1013 1014 /* Graft qdisc "new" to class "classid" of qdisc "parent" or 1015 * to device "dev". 1016 * 1017 * When appropriate send a netlink notification using 'skb' 1018 * and "n". 1019 * 1020 * On success, destroy old qdisc. 1021 */ 1022 1023 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, 1024 struct sk_buff *skb, struct nlmsghdr *n, u32 classid, 1025 struct Qdisc *new, struct Qdisc *old, 1026 struct netlink_ext_ack *extack) 1027 { 1028 struct Qdisc *q = old; 1029 struct net *net = dev_net(dev); 1030 1031 if (parent == NULL) { 1032 unsigned int i, num_q, ingress; 1033 1034 ingress = 0; 1035 num_q = dev->num_tx_queues; 1036 if ((q && q->flags & TCQ_F_INGRESS) || 1037 (new && new->flags & TCQ_F_INGRESS)) { 1038 num_q = 1; 1039 ingress = 1; 1040 if (!dev_ingress_queue(dev)) { 1041 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); 1042 return -ENOENT; 1043 } 1044 } 1045 1046 if (dev->flags & IFF_UP) 1047 dev_deactivate(dev); 1048 1049 qdisc_offload_graft_root(dev, new, old, extack); 1050 1051 if (new && new->ops->attach) 1052 goto skip; 1053 1054 for (i = 0; i < num_q; i++) { 1055 struct netdev_queue *dev_queue = dev_ingress_queue(dev); 1056 1057 if (!ingress) 1058 dev_queue = netdev_get_tx_queue(dev, i); 1059 1060 old = dev_graft_qdisc(dev_queue, new); 1061 if (new && i > 0) 1062 qdisc_refcount_inc(new); 1063 1064 if (!ingress) 1065 qdisc_put(old); 1066 } 1067 1068 skip: 1069 if (!ingress) { 1070 notify_and_destroy(net, skb, n, classid, 1071 dev->qdisc, new); 1072 if (new && !new->ops->attach) 1073 qdisc_refcount_inc(new); 1074 dev->qdisc = new ? : &noop_qdisc; 1075 1076 if (new && new->ops->attach) 1077 new->ops->attach(new); 1078 } else { 1079 notify_and_destroy(net, skb, n, classid, old, new); 1080 } 1081 1082 if (dev->flags & IFF_UP) 1083 dev_activate(dev); 1084 } else { 1085 const struct Qdisc_class_ops *cops = parent->ops->cl_ops; 1086 unsigned long cl; 1087 int err; 1088 1089 /* Only support running class lockless if parent is lockless */ 1090 if (new && (new->flags & TCQ_F_NOLOCK) && 1091 parent && !(parent->flags & TCQ_F_NOLOCK)) 1092 qdisc_clear_nolock(new); 1093 1094 if (!cops || !cops->graft) 1095 return -EOPNOTSUPP; 1096 1097 cl = cops->find(parent, classid); 1098 if (!cl) { 1099 NL_SET_ERR_MSG(extack, "Specified class not found"); 1100 return -ENOENT; 1101 } 1102 1103 err = cops->graft(parent, cl, new, &old, extack); 1104 if (err) 1105 return err; 1106 notify_and_destroy(net, skb, n, classid, old, new); 1107 } 1108 return 0; 1109 } 1110 1111 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca, 1112 struct netlink_ext_ack *extack) 1113 { 1114 u32 block_index; 1115 1116 if (tca[TCA_INGRESS_BLOCK]) { 1117 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]); 1118 1119 if (!block_index) { 1120 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0"); 1121 return -EINVAL; 1122 } 1123 if (!sch->ops->ingress_block_set) { 1124 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported"); 1125 return -EOPNOTSUPP; 1126 } 1127 sch->ops->ingress_block_set(sch, block_index); 1128 } 1129 if (tca[TCA_EGRESS_BLOCK]) { 1130 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]); 1131 1132 if (!block_index) { 1133 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0"); 1134 return -EINVAL; 1135 } 1136 if (!sch->ops->egress_block_set) { 1137 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported"); 1138 return -EOPNOTSUPP; 1139 } 1140 sch->ops->egress_block_set(sch, block_index); 1141 } 1142 return 0; 1143 } 1144 1145 /* 1146 Allocate and initialize new qdisc. 1147 1148 Parameters are passed via opt. 1149 */ 1150 1151 static struct Qdisc *qdisc_create(struct net_device *dev, 1152 struct netdev_queue *dev_queue, 1153 struct Qdisc *p, u32 parent, u32 handle, 1154 struct nlattr **tca, int *errp, 1155 struct netlink_ext_ack *extack) 1156 { 1157 int err; 1158 struct nlattr *kind = tca[TCA_KIND]; 1159 struct Qdisc *sch; 1160 struct Qdisc_ops *ops; 1161 struct qdisc_size_table *stab; 1162 1163 ops = qdisc_lookup_ops(kind); 1164 #ifdef CONFIG_MODULES 1165 if (ops == NULL && kind != NULL) { 1166 char name[IFNAMSIZ]; 1167 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { 1168 /* We dropped the RTNL semaphore in order to 1169 * perform the module load. So, even if we 1170 * succeeded in loading the module we have to 1171 * tell the caller to replay the request. We 1172 * indicate this using -EAGAIN. 1173 * We replay the request because the device may 1174 * go away in the mean time. 1175 */ 1176 rtnl_unlock(); 1177 request_module("sch_%s", name); 1178 rtnl_lock(); 1179 ops = qdisc_lookup_ops(kind); 1180 if (ops != NULL) { 1181 /* We will try again qdisc_lookup_ops, 1182 * so don't keep a reference. 1183 */ 1184 module_put(ops->owner); 1185 err = -EAGAIN; 1186 goto err_out; 1187 } 1188 } 1189 } 1190 #endif 1191 1192 err = -ENOENT; 1193 if (!ops) { 1194 NL_SET_ERR_MSG(extack, "Specified qdisc not found"); 1195 goto err_out; 1196 } 1197 1198 sch = qdisc_alloc(dev_queue, ops, extack); 1199 if (IS_ERR(sch)) { 1200 err = PTR_ERR(sch); 1201 goto err_out2; 1202 } 1203 1204 sch->parent = parent; 1205 1206 if (handle == TC_H_INGRESS) { 1207 sch->flags |= TCQ_F_INGRESS; 1208 handle = TC_H_MAKE(TC_H_INGRESS, 0); 1209 } else { 1210 if (handle == 0) { 1211 handle = qdisc_alloc_handle(dev); 1212 if (handle == 0) { 1213 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded"); 1214 err = -ENOSPC; 1215 goto err_out3; 1216 } 1217 } 1218 if (!netif_is_multiqueue(dev)) 1219 sch->flags |= TCQ_F_ONETXQUEUE; 1220 } 1221 1222 sch->handle = handle; 1223 1224 /* This exist to keep backward compatible with a userspace 1225 * loophole, what allowed userspace to get IFF_NO_QUEUE 1226 * facility on older kernels by setting tx_queue_len=0 (prior 1227 * to qdisc init), and then forgot to reinit tx_queue_len 1228 * before again attaching a qdisc. 1229 */ 1230 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { 1231 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; 1232 netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); 1233 } 1234 1235 err = qdisc_block_indexes_set(sch, tca, extack); 1236 if (err) 1237 goto err_out3; 1238 1239 if (ops->init) { 1240 err = ops->init(sch, tca[TCA_OPTIONS], extack); 1241 if (err != 0) 1242 goto err_out5; 1243 } 1244 1245 if (tca[TCA_STAB]) { 1246 stab = qdisc_get_stab(tca[TCA_STAB], extack); 1247 if (IS_ERR(stab)) { 1248 err = PTR_ERR(stab); 1249 goto err_out4; 1250 } 1251 rcu_assign_pointer(sch->stab, stab); 1252 } 1253 if (tca[TCA_RATE]) { 1254 seqcount_t *running; 1255 1256 err = -EOPNOTSUPP; 1257 if (sch->flags & TCQ_F_MQROOT) { 1258 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); 1259 goto err_out4; 1260 } 1261 1262 if (sch->parent != TC_H_ROOT && 1263 !(sch->flags & TCQ_F_INGRESS) && 1264 (!p || !(p->flags & TCQ_F_MQROOT))) 1265 running = qdisc_root_sleeping_running(sch); 1266 else 1267 running = &sch->running; 1268 1269 err = gen_new_estimator(&sch->bstats, 1270 sch->cpu_bstats, 1271 &sch->rate_est, 1272 NULL, 1273 running, 1274 tca[TCA_RATE]); 1275 if (err) { 1276 NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); 1277 goto err_out4; 1278 } 1279 } 1280 1281 qdisc_hash_add(sch, false); 1282 1283 return sch; 1284 1285 err_out5: 1286 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ 1287 if (ops->destroy) 1288 ops->destroy(sch); 1289 err_out3: 1290 dev_put(dev); 1291 qdisc_free(sch); 1292 err_out2: 1293 module_put(ops->owner); 1294 err_out: 1295 *errp = err; 1296 return NULL; 1297 1298 err_out4: 1299 /* 1300 * Any broken qdiscs that would require a ops->reset() here? 1301 * The qdisc was never in action so it shouldn't be necessary. 1302 */ 1303 qdisc_put_stab(rtnl_dereference(sch->stab)); 1304 if (ops->destroy) 1305 ops->destroy(sch); 1306 goto err_out3; 1307 } 1308 1309 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, 1310 struct netlink_ext_ack *extack) 1311 { 1312 struct qdisc_size_table *ostab, *stab = NULL; 1313 int err = 0; 1314 1315 if (tca[TCA_OPTIONS]) { 1316 if (!sch->ops->change) { 1317 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc"); 1318 return -EINVAL; 1319 } 1320 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { 1321 NL_SET_ERR_MSG(extack, "Change of blocks is not supported"); 1322 return -EOPNOTSUPP; 1323 } 1324 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack); 1325 if (err) 1326 return err; 1327 } 1328 1329 if (tca[TCA_STAB]) { 1330 stab = qdisc_get_stab(tca[TCA_STAB], extack); 1331 if (IS_ERR(stab)) 1332 return PTR_ERR(stab); 1333 } 1334 1335 ostab = rtnl_dereference(sch->stab); 1336 rcu_assign_pointer(sch->stab, stab); 1337 qdisc_put_stab(ostab); 1338 1339 if (tca[TCA_RATE]) { 1340 /* NB: ignores errors from replace_estimator 1341 because change can't be undone. */ 1342 if (sch->flags & TCQ_F_MQROOT) 1343 goto out; 1344 gen_replace_estimator(&sch->bstats, 1345 sch->cpu_bstats, 1346 &sch->rate_est, 1347 NULL, 1348 qdisc_root_sleeping_running(sch), 1349 tca[TCA_RATE]); 1350 } 1351 out: 1352 return 0; 1353 } 1354 1355 struct check_loop_arg { 1356 struct qdisc_walker w; 1357 struct Qdisc *p; 1358 int depth; 1359 }; 1360 1361 static int check_loop_fn(struct Qdisc *q, unsigned long cl, 1362 struct qdisc_walker *w); 1363 1364 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) 1365 { 1366 struct check_loop_arg arg; 1367 1368 if (q->ops->cl_ops == NULL) 1369 return 0; 1370 1371 arg.w.stop = arg.w.skip = arg.w.count = 0; 1372 arg.w.fn = check_loop_fn; 1373 arg.depth = depth; 1374 arg.p = p; 1375 q->ops->cl_ops->walk(q, &arg.w); 1376 return arg.w.stop ? -ELOOP : 0; 1377 } 1378 1379 static int 1380 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) 1381 { 1382 struct Qdisc *leaf; 1383 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1384 struct check_loop_arg *arg = (struct check_loop_arg *)w; 1385 1386 leaf = cops->leaf(q, cl); 1387 if (leaf) { 1388 if (leaf == arg->p || arg->depth > 7) 1389 return -ELOOP; 1390 return check_loop(leaf, arg->p, arg->depth + 1); 1391 } 1392 return 0; 1393 } 1394 1395 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { 1396 [TCA_KIND] = { .type = NLA_STRING }, 1397 [TCA_RATE] = { .type = NLA_BINARY, 1398 .len = sizeof(struct tc_estimator) }, 1399 [TCA_STAB] = { .type = NLA_NESTED }, 1400 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG }, 1401 [TCA_CHAIN] = { .type = NLA_U32 }, 1402 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 }, 1403 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 }, 1404 }; 1405 1406 /* 1407 * Delete/get qdisc. 1408 */ 1409 1410 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1411 struct netlink_ext_ack *extack) 1412 { 1413 struct net *net = sock_net(skb->sk); 1414 struct tcmsg *tcm = nlmsg_data(n); 1415 struct nlattr *tca[TCA_MAX + 1]; 1416 struct net_device *dev; 1417 u32 clid; 1418 struct Qdisc *q = NULL; 1419 struct Qdisc *p = NULL; 1420 int err; 1421 1422 if ((n->nlmsg_type != RTM_GETQDISC) && 1423 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1424 return -EPERM; 1425 1426 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, 1427 extack); 1428 if (err < 0) 1429 return err; 1430 1431 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1432 if (!dev) 1433 return -ENODEV; 1434 1435 clid = tcm->tcm_parent; 1436 if (clid) { 1437 if (clid != TC_H_ROOT) { 1438 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 1439 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1440 if (!p) { 1441 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); 1442 return -ENOENT; 1443 } 1444 q = qdisc_leaf(p, clid); 1445 } else if (dev_ingress_queue(dev)) { 1446 q = dev_ingress_queue(dev)->qdisc_sleeping; 1447 } 1448 } else { 1449 q = dev->qdisc; 1450 } 1451 if (!q) { 1452 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); 1453 return -ENOENT; 1454 } 1455 1456 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { 1457 NL_SET_ERR_MSG(extack, "Invalid handle"); 1458 return -EINVAL; 1459 } 1460 } else { 1461 q = qdisc_lookup(dev, tcm->tcm_handle); 1462 if (!q) { 1463 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle"); 1464 return -ENOENT; 1465 } 1466 } 1467 1468 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1469 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1470 return -EINVAL; 1471 } 1472 1473 if (n->nlmsg_type == RTM_DELQDISC) { 1474 if (!clid) { 1475 NL_SET_ERR_MSG(extack, "Classid cannot be zero"); 1476 return -EINVAL; 1477 } 1478 if (q->handle == 0) { 1479 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero"); 1480 return -ENOENT; 1481 } 1482 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack); 1483 if (err != 0) 1484 return err; 1485 } else { 1486 qdisc_notify(net, skb, n, clid, NULL, q); 1487 } 1488 return 0; 1489 } 1490 1491 /* 1492 * Create/change qdisc. 1493 */ 1494 1495 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1496 struct netlink_ext_ack *extack) 1497 { 1498 struct net *net = sock_net(skb->sk); 1499 struct tcmsg *tcm; 1500 struct nlattr *tca[TCA_MAX + 1]; 1501 struct net_device *dev; 1502 u32 clid; 1503 struct Qdisc *q, *p; 1504 int err; 1505 1506 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1507 return -EPERM; 1508 1509 replay: 1510 /* Reinit, just in case something touches this. */ 1511 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, 1512 extack); 1513 if (err < 0) 1514 return err; 1515 1516 tcm = nlmsg_data(n); 1517 clid = tcm->tcm_parent; 1518 q = p = NULL; 1519 1520 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1521 if (!dev) 1522 return -ENODEV; 1523 1524 1525 if (clid) { 1526 if (clid != TC_H_ROOT) { 1527 if (clid != TC_H_INGRESS) { 1528 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1529 if (!p) { 1530 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); 1531 return -ENOENT; 1532 } 1533 q = qdisc_leaf(p, clid); 1534 } else if (dev_ingress_queue_create(dev)) { 1535 q = dev_ingress_queue(dev)->qdisc_sleeping; 1536 } 1537 } else { 1538 q = dev->qdisc; 1539 } 1540 1541 /* It may be default qdisc, ignore it */ 1542 if (q && q->handle == 0) 1543 q = NULL; 1544 1545 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { 1546 if (tcm->tcm_handle) { 1547 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) { 1548 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override"); 1549 return -EEXIST; 1550 } 1551 if (TC_H_MIN(tcm->tcm_handle)) { 1552 NL_SET_ERR_MSG(extack, "Invalid minor handle"); 1553 return -EINVAL; 1554 } 1555 q = qdisc_lookup(dev, tcm->tcm_handle); 1556 if (!q) 1557 goto create_n_graft; 1558 if (n->nlmsg_flags & NLM_F_EXCL) { 1559 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override"); 1560 return -EEXIST; 1561 } 1562 if (tca[TCA_KIND] && 1563 nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1564 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1565 return -EINVAL; 1566 } 1567 if (q == p || 1568 (p && check_loop(q, p, 0))) { 1569 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected"); 1570 return -ELOOP; 1571 } 1572 qdisc_refcount_inc(q); 1573 goto graft; 1574 } else { 1575 if (!q) 1576 goto create_n_graft; 1577 1578 /* This magic test requires explanation. 1579 * 1580 * We know, that some child q is already 1581 * attached to this parent and have choice: 1582 * either to change it or to create/graft new one. 1583 * 1584 * 1. We are allowed to create/graft only 1585 * if CREATE and REPLACE flags are set. 1586 * 1587 * 2. If EXCL is set, requestor wanted to say, 1588 * that qdisc tcm_handle is not expected 1589 * to exist, so that we choose create/graft too. 1590 * 1591 * 3. The last case is when no flags are set. 1592 * Alas, it is sort of hole in API, we 1593 * cannot decide what to do unambiguously. 1594 * For now we select create/graft, if 1595 * user gave KIND, which does not match existing. 1596 */ 1597 if ((n->nlmsg_flags & NLM_F_CREATE) && 1598 (n->nlmsg_flags & NLM_F_REPLACE) && 1599 ((n->nlmsg_flags & NLM_F_EXCL) || 1600 (tca[TCA_KIND] && 1601 nla_strcmp(tca[TCA_KIND], q->ops->id)))) 1602 goto create_n_graft; 1603 } 1604 } 1605 } else { 1606 if (!tcm->tcm_handle) { 1607 NL_SET_ERR_MSG(extack, "Handle cannot be zero"); 1608 return -EINVAL; 1609 } 1610 q = qdisc_lookup(dev, tcm->tcm_handle); 1611 } 1612 1613 /* Change qdisc parameters */ 1614 if (!q) { 1615 NL_SET_ERR_MSG(extack, "Specified qdisc not found"); 1616 return -ENOENT; 1617 } 1618 if (n->nlmsg_flags & NLM_F_EXCL) { 1619 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify"); 1620 return -EEXIST; 1621 } 1622 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1623 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1624 return -EINVAL; 1625 } 1626 err = qdisc_change(q, tca, extack); 1627 if (err == 0) 1628 qdisc_notify(net, skb, n, clid, NULL, q); 1629 return err; 1630 1631 create_n_graft: 1632 if (!(n->nlmsg_flags & NLM_F_CREATE)) { 1633 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag"); 1634 return -ENOENT; 1635 } 1636 if (clid == TC_H_INGRESS) { 1637 if (dev_ingress_queue(dev)) { 1638 q = qdisc_create(dev, dev_ingress_queue(dev), p, 1639 tcm->tcm_parent, tcm->tcm_parent, 1640 tca, &err, extack); 1641 } else { 1642 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device"); 1643 err = -ENOENT; 1644 } 1645 } else { 1646 struct netdev_queue *dev_queue; 1647 1648 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) 1649 dev_queue = p->ops->cl_ops->select_queue(p, tcm); 1650 else if (p) 1651 dev_queue = p->dev_queue; 1652 else 1653 dev_queue = netdev_get_tx_queue(dev, 0); 1654 1655 q = qdisc_create(dev, dev_queue, p, 1656 tcm->tcm_parent, tcm->tcm_handle, 1657 tca, &err, extack); 1658 } 1659 if (q == NULL) { 1660 if (err == -EAGAIN) 1661 goto replay; 1662 return err; 1663 } 1664 1665 graft: 1666 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); 1667 if (err) { 1668 if (q) 1669 qdisc_put(q); 1670 return err; 1671 } 1672 1673 return 0; 1674 } 1675 1676 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, 1677 struct netlink_callback *cb, 1678 int *q_idx_p, int s_q_idx, bool recur, 1679 bool dump_invisible) 1680 { 1681 int ret = 0, q_idx = *q_idx_p; 1682 struct Qdisc *q; 1683 int b; 1684 1685 if (!root) 1686 return 0; 1687 1688 q = root; 1689 if (q_idx < s_q_idx) { 1690 q_idx++; 1691 } else { 1692 if (!tc_qdisc_dump_ignore(q, dump_invisible) && 1693 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, 1694 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1695 RTM_NEWQDISC) <= 0) 1696 goto done; 1697 q_idx++; 1698 } 1699 1700 /* If dumping singletons, there is no qdisc_dev(root) and the singleton 1701 * itself has already been dumped. 1702 * 1703 * If we've already dumped the top-level (ingress) qdisc above and the global 1704 * qdisc hashtable, we don't want to hit it again 1705 */ 1706 if (!qdisc_dev(root) || !recur) 1707 goto out; 1708 1709 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { 1710 if (q_idx < s_q_idx) { 1711 q_idx++; 1712 continue; 1713 } 1714 if (!tc_qdisc_dump_ignore(q, dump_invisible) && 1715 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, 1716 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1717 RTM_NEWQDISC) <= 0) 1718 goto done; 1719 q_idx++; 1720 } 1721 1722 out: 1723 *q_idx_p = q_idx; 1724 return ret; 1725 done: 1726 ret = -1; 1727 goto out; 1728 } 1729 1730 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) 1731 { 1732 struct net *net = sock_net(skb->sk); 1733 int idx, q_idx; 1734 int s_idx, s_q_idx; 1735 struct net_device *dev; 1736 const struct nlmsghdr *nlh = cb->nlh; 1737 struct nlattr *tca[TCA_MAX + 1]; 1738 int err; 1739 1740 s_idx = cb->args[0]; 1741 s_q_idx = q_idx = cb->args[1]; 1742 1743 idx = 0; 1744 ASSERT_RTNL(); 1745 1746 err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, 1747 rtm_tca_policy, cb->extack); 1748 if (err < 0) 1749 return err; 1750 1751 for_each_netdev(net, dev) { 1752 struct netdev_queue *dev_queue; 1753 1754 if (idx < s_idx) 1755 goto cont; 1756 if (idx > s_idx) 1757 s_q_idx = 0; 1758 q_idx = 0; 1759 1760 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, 1761 true, tca[TCA_DUMP_INVISIBLE]) < 0) 1762 goto done; 1763 1764 dev_queue = dev_ingress_queue(dev); 1765 if (dev_queue && 1766 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, 1767 &q_idx, s_q_idx, false, 1768 tca[TCA_DUMP_INVISIBLE]) < 0) 1769 goto done; 1770 1771 cont: 1772 idx++; 1773 } 1774 1775 done: 1776 cb->args[0] = idx; 1777 cb->args[1] = q_idx; 1778 1779 return skb->len; 1780 } 1781 1782 1783 1784 /************************************************ 1785 * Traffic classes manipulation. * 1786 ************************************************/ 1787 1788 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, 1789 unsigned long cl, 1790 u32 portid, u32 seq, u16 flags, int event) 1791 { 1792 struct tcmsg *tcm; 1793 struct nlmsghdr *nlh; 1794 unsigned char *b = skb_tail_pointer(skb); 1795 struct gnet_dump d; 1796 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; 1797 1798 cond_resched(); 1799 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 1800 if (!nlh) 1801 goto out_nlmsg_trim; 1802 tcm = nlmsg_data(nlh); 1803 tcm->tcm_family = AF_UNSPEC; 1804 tcm->tcm__pad1 = 0; 1805 tcm->tcm__pad2 = 0; 1806 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 1807 tcm->tcm_parent = q->handle; 1808 tcm->tcm_handle = q->handle; 1809 tcm->tcm_info = 0; 1810 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 1811 goto nla_put_failure; 1812 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) 1813 goto nla_put_failure; 1814 1815 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 1816 NULL, &d, TCA_PAD) < 0) 1817 goto nla_put_failure; 1818 1819 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) 1820 goto nla_put_failure; 1821 1822 if (gnet_stats_finish_copy(&d) < 0) 1823 goto nla_put_failure; 1824 1825 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1826 return skb->len; 1827 1828 out_nlmsg_trim: 1829 nla_put_failure: 1830 nlmsg_trim(skb, b); 1831 return -1; 1832 } 1833 1834 static int tclass_notify(struct net *net, struct sk_buff *oskb, 1835 struct nlmsghdr *n, struct Qdisc *q, 1836 unsigned long cl, int event) 1837 { 1838 struct sk_buff *skb; 1839 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1840 int err = 0; 1841 1842 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1843 if (!skb) 1844 return -ENOBUFS; 1845 1846 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { 1847 kfree_skb(skb); 1848 return -EINVAL; 1849 } 1850 1851 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1852 n->nlmsg_flags & NLM_F_ECHO); 1853 if (err > 0) 1854 err = 0; 1855 return err; 1856 } 1857 1858 static int tclass_del_notify(struct net *net, 1859 const struct Qdisc_class_ops *cops, 1860 struct sk_buff *oskb, struct nlmsghdr *n, 1861 struct Qdisc *q, unsigned long cl) 1862 { 1863 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1864 struct sk_buff *skb; 1865 int err = 0; 1866 1867 if (!cops->delete) 1868 return -EOPNOTSUPP; 1869 1870 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1871 if (!skb) 1872 return -ENOBUFS; 1873 1874 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, 1875 RTM_DELTCLASS) < 0) { 1876 kfree_skb(skb); 1877 return -EINVAL; 1878 } 1879 1880 err = cops->delete(q, cl); 1881 if (err) { 1882 kfree_skb(skb); 1883 return err; 1884 } 1885 1886 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1887 n->nlmsg_flags & NLM_F_ECHO); 1888 if (err > 0) 1889 err = 0; 1890 return err; 1891 } 1892 1893 #ifdef CONFIG_NET_CLS 1894 1895 struct tcf_bind_args { 1896 struct tcf_walker w; 1897 u32 classid; 1898 unsigned long cl; 1899 }; 1900 1901 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) 1902 { 1903 struct tcf_bind_args *a = (void *)arg; 1904 1905 if (tp->ops->bind_class) { 1906 struct Qdisc *q = tcf_block_q(tp->chain->block); 1907 1908 sch_tree_lock(q); 1909 tp->ops->bind_class(n, a->classid, a->cl); 1910 sch_tree_unlock(q); 1911 } 1912 return 0; 1913 } 1914 1915 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, 1916 unsigned long new_cl) 1917 { 1918 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1919 struct tcf_block *block; 1920 struct tcf_chain *chain; 1921 unsigned long cl; 1922 1923 cl = cops->find(q, portid); 1924 if (!cl) 1925 return; 1926 block = cops->tcf_block(q, cl, NULL); 1927 if (!block) 1928 return; 1929 for (chain = tcf_get_next_chain(block, NULL); 1930 chain; 1931 chain = tcf_get_next_chain(block, chain)) { 1932 struct tcf_proto *tp; 1933 1934 for (tp = tcf_get_next_proto(chain, NULL, true); 1935 tp; tp = tcf_get_next_proto(chain, tp, true)) { 1936 struct tcf_bind_args arg = {}; 1937 1938 arg.w.fn = tcf_node_bind; 1939 arg.classid = clid; 1940 arg.cl = new_cl; 1941 tp->ops->walk(tp, &arg.w, true); 1942 } 1943 } 1944 } 1945 1946 #else 1947 1948 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, 1949 unsigned long new_cl) 1950 { 1951 } 1952 1953 #endif 1954 1955 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, 1956 struct netlink_ext_ack *extack) 1957 { 1958 struct net *net = sock_net(skb->sk); 1959 struct tcmsg *tcm = nlmsg_data(n); 1960 struct nlattr *tca[TCA_MAX + 1]; 1961 struct net_device *dev; 1962 struct Qdisc *q = NULL; 1963 const struct Qdisc_class_ops *cops; 1964 unsigned long cl = 0; 1965 unsigned long new_cl; 1966 u32 portid; 1967 u32 clid; 1968 u32 qid; 1969 int err; 1970 1971 if ((n->nlmsg_type != RTM_GETTCLASS) && 1972 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1973 return -EPERM; 1974 1975 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, 1976 extack); 1977 if (err < 0) 1978 return err; 1979 1980 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1981 if (!dev) 1982 return -ENODEV; 1983 1984 /* 1985 parent == TC_H_UNSPEC - unspecified parent. 1986 parent == TC_H_ROOT - class is root, which has no parent. 1987 parent == X:0 - parent is root class. 1988 parent == X:Y - parent is a node in hierarchy. 1989 parent == 0:Y - parent is X:Y, where X:0 is qdisc. 1990 1991 handle == 0:0 - generate handle from kernel pool. 1992 handle == 0:Y - class is X:Y, where X:0 is qdisc. 1993 handle == X:Y - clear. 1994 handle == X:0 - root class. 1995 */ 1996 1997 /* Step 1. Determine qdisc handle X:0 */ 1998 1999 portid = tcm->tcm_parent; 2000 clid = tcm->tcm_handle; 2001 qid = TC_H_MAJ(clid); 2002 2003 if (portid != TC_H_ROOT) { 2004 u32 qid1 = TC_H_MAJ(portid); 2005 2006 if (qid && qid1) { 2007 /* If both majors are known, they must be identical. */ 2008 if (qid != qid1) 2009 return -EINVAL; 2010 } else if (qid1) { 2011 qid = qid1; 2012 } else if (qid == 0) 2013 qid = dev->qdisc->handle; 2014 2015 /* Now qid is genuine qdisc handle consistent 2016 * both with parent and child. 2017 * 2018 * TC_H_MAJ(portid) still may be unspecified, complete it now. 2019 */ 2020 if (portid) 2021 portid = TC_H_MAKE(qid, portid); 2022 } else { 2023 if (qid == 0) 2024 qid = dev->qdisc->handle; 2025 } 2026 2027 /* OK. Locate qdisc */ 2028 q = qdisc_lookup(dev, qid); 2029 if (!q) 2030 return -ENOENT; 2031 2032 /* An check that it supports classes */ 2033 cops = q->ops->cl_ops; 2034 if (cops == NULL) 2035 return -EINVAL; 2036 2037 /* Now try to get class */ 2038 if (clid == 0) { 2039 if (portid == TC_H_ROOT) 2040 clid = qid; 2041 } else 2042 clid = TC_H_MAKE(qid, clid); 2043 2044 if (clid) 2045 cl = cops->find(q, clid); 2046 2047 if (cl == 0) { 2048 err = -ENOENT; 2049 if (n->nlmsg_type != RTM_NEWTCLASS || 2050 !(n->nlmsg_flags & NLM_F_CREATE)) 2051 goto out; 2052 } else { 2053 switch (n->nlmsg_type) { 2054 case RTM_NEWTCLASS: 2055 err = -EEXIST; 2056 if (n->nlmsg_flags & NLM_F_EXCL) 2057 goto out; 2058 break; 2059 case RTM_DELTCLASS: 2060 err = tclass_del_notify(net, cops, skb, n, q, cl); 2061 /* Unbind the class with flilters with 0 */ 2062 tc_bind_tclass(q, portid, clid, 0); 2063 goto out; 2064 case RTM_GETTCLASS: 2065 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); 2066 goto out; 2067 default: 2068 err = -EINVAL; 2069 goto out; 2070 } 2071 } 2072 2073 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { 2074 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes"); 2075 return -EOPNOTSUPP; 2076 } 2077 2078 new_cl = cl; 2079 err = -EOPNOTSUPP; 2080 if (cops->change) 2081 err = cops->change(q, clid, portid, tca, &new_cl, extack); 2082 if (err == 0) { 2083 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); 2084 /* We just create a new class, need to do reverse binding. */ 2085 if (cl != new_cl) 2086 tc_bind_tclass(q, portid, clid, new_cl); 2087 } 2088 out: 2089 return err; 2090 } 2091 2092 struct qdisc_dump_args { 2093 struct qdisc_walker w; 2094 struct sk_buff *skb; 2095 struct netlink_callback *cb; 2096 }; 2097 2098 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, 2099 struct qdisc_walker *arg) 2100 { 2101 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; 2102 2103 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, 2104 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, 2105 RTM_NEWTCLASS); 2106 } 2107 2108 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, 2109 struct tcmsg *tcm, struct netlink_callback *cb, 2110 int *t_p, int s_t) 2111 { 2112 struct qdisc_dump_args arg; 2113 2114 if (tc_qdisc_dump_ignore(q, false) || 2115 *t_p < s_t || !q->ops->cl_ops || 2116 (tcm->tcm_parent && 2117 TC_H_MAJ(tcm->tcm_parent) != q->handle)) { 2118 (*t_p)++; 2119 return 0; 2120 } 2121 if (*t_p > s_t) 2122 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); 2123 arg.w.fn = qdisc_class_dump; 2124 arg.skb = skb; 2125 arg.cb = cb; 2126 arg.w.stop = 0; 2127 arg.w.skip = cb->args[1]; 2128 arg.w.count = 0; 2129 q->ops->cl_ops->walk(q, &arg.w); 2130 cb->args[1] = arg.w.count; 2131 if (arg.w.stop) 2132 return -1; 2133 (*t_p)++; 2134 return 0; 2135 } 2136 2137 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, 2138 struct tcmsg *tcm, struct netlink_callback *cb, 2139 int *t_p, int s_t) 2140 { 2141 struct Qdisc *q; 2142 int b; 2143 2144 if (!root) 2145 return 0; 2146 2147 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) 2148 return -1; 2149 2150 if (!qdisc_dev(root)) 2151 return 0; 2152 2153 if (tcm->tcm_parent) { 2154 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); 2155 if (q && q != root && 2156 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) 2157 return -1; 2158 return 0; 2159 } 2160 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { 2161 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) 2162 return -1; 2163 } 2164 2165 return 0; 2166 } 2167 2168 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) 2169 { 2170 struct tcmsg *tcm = nlmsg_data(cb->nlh); 2171 struct net *net = sock_net(skb->sk); 2172 struct netdev_queue *dev_queue; 2173 struct net_device *dev; 2174 int t, s_t; 2175 2176 if (nlmsg_len(cb->nlh) < sizeof(*tcm)) 2177 return 0; 2178 dev = dev_get_by_index(net, tcm->tcm_ifindex); 2179 if (!dev) 2180 return 0; 2181 2182 s_t = cb->args[0]; 2183 t = 0; 2184 2185 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) 2186 goto done; 2187 2188 dev_queue = dev_ingress_queue(dev); 2189 if (dev_queue && 2190 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, 2191 &t, s_t) < 0) 2192 goto done; 2193 2194 done: 2195 cb->args[0] = t; 2196 2197 dev_put(dev); 2198 return skb->len; 2199 } 2200 2201 #ifdef CONFIG_PROC_FS 2202 static int psched_show(struct seq_file *seq, void *v) 2203 { 2204 seq_printf(seq, "%08x %08x %08x %08x\n", 2205 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1), 2206 1000000, 2207 (u32)NSEC_PER_SEC / hrtimer_resolution); 2208 2209 return 0; 2210 } 2211 2212 static int __net_init psched_net_init(struct net *net) 2213 { 2214 struct proc_dir_entry *e; 2215 2216 e = proc_create_single("psched", 0, net->proc_net, psched_show); 2217 if (e == NULL) 2218 return -ENOMEM; 2219 2220 return 0; 2221 } 2222 2223 static void __net_exit psched_net_exit(struct net *net) 2224 { 2225 remove_proc_entry("psched", net->proc_net); 2226 } 2227 #else 2228 static int __net_init psched_net_init(struct net *net) 2229 { 2230 return 0; 2231 } 2232 2233 static void __net_exit psched_net_exit(struct net *net) 2234 { 2235 } 2236 #endif 2237 2238 static struct pernet_operations psched_net_ops = { 2239 .init = psched_net_init, 2240 .exit = psched_net_exit, 2241 }; 2242 2243 static int __init pktsched_init(void) 2244 { 2245 int err; 2246 2247 err = register_pernet_subsys(&psched_net_ops); 2248 if (err) { 2249 pr_err("pktsched_init: " 2250 "cannot initialize per netns operations\n"); 2251 return err; 2252 } 2253 2254 register_qdisc(&pfifo_fast_ops); 2255 register_qdisc(&pfifo_qdisc_ops); 2256 register_qdisc(&bfifo_qdisc_ops); 2257 register_qdisc(&pfifo_head_drop_qdisc_ops); 2258 register_qdisc(&mq_qdisc_ops); 2259 register_qdisc(&noqueue_qdisc_ops); 2260 2261 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0); 2262 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0); 2263 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, 2264 0); 2265 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0); 2266 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0); 2267 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, 2268 0); 2269 2270 return 0; 2271 } 2272 2273 subsys_initcall(pktsched_init); 2274