1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * net/sched/sch_generic.c Generic packet scheduler routines. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 * Jamal Hadi Salim, <hadi@cyberus.ca> 990601 7 * - Ingress support 8 */ 9 10 #include <linux/bitops.h> 11 #include <linux/module.h> 12 #include <linux/types.h> 13 #include <linux/kernel.h> 14 #include <linux/sched.h> 15 #include <linux/string.h> 16 #include <linux/errno.h> 17 #include <linux/netdevice.h> 18 #include <linux/skbuff.h> 19 #include <linux/rtnetlink.h> 20 #include <linux/init.h> 21 #include <linux/rcupdate.h> 22 #include <linux/list.h> 23 #include <linux/slab.h> 24 #include <linux/if_vlan.h> 25 #include <linux/skb_array.h> 26 #include <linux/if_macvlan.h> 27 #include <net/sch_generic.h> 28 #include <net/pkt_sched.h> 29 #include <net/dst.h> 30 #include <trace/events/qdisc.h> 31 #include <trace/events/net.h> 32 #include <net/xfrm.h> 33 34 /* Qdisc to use by default */ 35 const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops; 36 EXPORT_SYMBOL(default_qdisc_ops); 37 38 /* Main transmission queue. */ 39 40 /* Modifications to data participating in scheduling must be protected with 41 * qdisc_lock(qdisc) spinlock. 42 * 43 * The idea is the following: 44 * - enqueue, dequeue are serialized via qdisc root lock 45 * - ingress filtering is also serialized via qdisc root lock 46 * - updates to tree and tree walking are only done under the rtnl mutex. 47 */ 48 49 #define SKB_XOFF_MAGIC ((struct sk_buff *)1UL) 50 51 static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) 52 { 53 const struct netdev_queue *txq = q->dev_queue; 54 spinlock_t *lock = NULL; 55 struct sk_buff *skb; 56 57 if (q->flags & TCQ_F_NOLOCK) { 58 lock = qdisc_lock(q); 59 spin_lock(lock); 60 } 61 62 skb = skb_peek(&q->skb_bad_txq); 63 if (skb) { 64 /* check the reason of requeuing without tx lock first */ 65 txq = skb_get_tx_queue(txq->dev, skb); 66 if (!netif_xmit_frozen_or_stopped(txq)) { 67 skb = __skb_dequeue(&q->skb_bad_txq); 68 if (qdisc_is_percpu_stats(q)) { 69 qdisc_qstats_cpu_backlog_dec(q, skb); 70 qdisc_qstats_cpu_qlen_dec(q); 71 } else { 72 qdisc_qstats_backlog_dec(q, skb); 73 q->q.qlen--; 74 } 75 } else { 76 skb = SKB_XOFF_MAGIC; 77 } 78 } 79 80 if (lock) 81 spin_unlock(lock); 82 83 return skb; 84 } 85 86 static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q) 87 { 88 struct sk_buff *skb = skb_peek(&q->skb_bad_txq); 89 90 if (unlikely(skb)) 91 skb = __skb_dequeue_bad_txq(q); 92 93 return skb; 94 } 95 96 static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, 97 struct sk_buff *skb) 98 { 99 spinlock_t *lock = NULL; 100 101 if (q->flags & TCQ_F_NOLOCK) { 102 lock = qdisc_lock(q); 103 spin_lock(lock); 104 } 105 106 __skb_queue_tail(&q->skb_bad_txq, skb); 107 108 if (qdisc_is_percpu_stats(q)) { 109 qdisc_qstats_cpu_backlog_inc(q, skb); 110 qdisc_qstats_cpu_qlen_inc(q); 111 } else { 112 qdisc_qstats_backlog_inc(q, skb); 113 q->q.qlen++; 114 } 115 116 if (lock) 117 spin_unlock(lock); 118 } 119 120 static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) 121 { 122 spinlock_t *lock = NULL; 123 124 if (q->flags & TCQ_F_NOLOCK) { 125 lock = qdisc_lock(q); 126 spin_lock(lock); 127 } 128 129 while (skb) { 130 struct sk_buff *next = skb->next; 131 132 __skb_queue_tail(&q->gso_skb, skb); 133 134 /* it's still part of the queue */ 135 if (qdisc_is_percpu_stats(q)) { 136 qdisc_qstats_cpu_requeues_inc(q); 137 qdisc_qstats_cpu_backlog_inc(q, skb); 138 qdisc_qstats_cpu_qlen_inc(q); 139 } else { 140 q->qstats.requeues++; 141 qdisc_qstats_backlog_inc(q, skb); 142 q->q.qlen++; 143 } 144 145 skb = next; 146 } 147 if (lock) 148 spin_unlock(lock); 149 __netif_schedule(q); 150 } 151 152 static void try_bulk_dequeue_skb(struct Qdisc *q, 153 struct sk_buff *skb, 154 const struct netdev_queue *txq, 155 int *packets) 156 { 157 int bytelimit = qdisc_avail_bulklimit(txq) - skb->len; 158 159 while (bytelimit > 0) { 160 struct sk_buff *nskb = q->dequeue(q); 161 162 if (!nskb) 163 break; 164 165 bytelimit -= nskb->len; /* covers GSO len */ 166 skb->next = nskb; 167 skb = nskb; 168 (*packets)++; /* GSO counts as one pkt */ 169 } 170 skb_mark_not_on_list(skb); 171 } 172 173 /* This variant of try_bulk_dequeue_skb() makes sure 174 * all skbs in the chain are for the same txq 175 */ 176 static void try_bulk_dequeue_skb_slow(struct Qdisc *q, 177 struct sk_buff *skb, 178 int *packets) 179 { 180 int mapping = skb_get_queue_mapping(skb); 181 struct sk_buff *nskb; 182 int cnt = 0; 183 184 do { 185 nskb = q->dequeue(q); 186 if (!nskb) 187 break; 188 if (unlikely(skb_get_queue_mapping(nskb) != mapping)) { 189 qdisc_enqueue_skb_bad_txq(q, nskb); 190 break; 191 } 192 skb->next = nskb; 193 skb = nskb; 194 } while (++cnt < 8); 195 (*packets) += cnt; 196 skb_mark_not_on_list(skb); 197 } 198 199 /* Note that dequeue_skb can possibly return a SKB list (via skb->next). 200 * A requeued skb (via q->gso_skb) can also be a SKB list. 201 */ 202 static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, 203 int *packets) 204 { 205 const struct netdev_queue *txq = q->dev_queue; 206 struct sk_buff *skb = NULL; 207 208 *packets = 1; 209 if (unlikely(!skb_queue_empty(&q->gso_skb))) { 210 spinlock_t *lock = NULL; 211 212 if (q->flags & TCQ_F_NOLOCK) { 213 lock = qdisc_lock(q); 214 spin_lock(lock); 215 } 216 217 skb = skb_peek(&q->gso_skb); 218 219 /* skb may be null if another cpu pulls gso_skb off in between 220 * empty check and lock. 221 */ 222 if (!skb) { 223 if (lock) 224 spin_unlock(lock); 225 goto validate; 226 } 227 228 /* skb in gso_skb were already validated */ 229 *validate = false; 230 if (xfrm_offload(skb)) 231 *validate = true; 232 /* check the reason of requeuing without tx lock first */ 233 txq = skb_get_tx_queue(txq->dev, skb); 234 if (!netif_xmit_frozen_or_stopped(txq)) { 235 skb = __skb_dequeue(&q->gso_skb); 236 if (qdisc_is_percpu_stats(q)) { 237 qdisc_qstats_cpu_backlog_dec(q, skb); 238 qdisc_qstats_cpu_qlen_dec(q); 239 } else { 240 qdisc_qstats_backlog_dec(q, skb); 241 q->q.qlen--; 242 } 243 } else { 244 skb = NULL; 245 } 246 if (lock) 247 spin_unlock(lock); 248 goto trace; 249 } 250 validate: 251 *validate = true; 252 253 if ((q->flags & TCQ_F_ONETXQUEUE) && 254 netif_xmit_frozen_or_stopped(txq)) 255 return skb; 256 257 skb = qdisc_dequeue_skb_bad_txq(q); 258 if (unlikely(skb)) { 259 if (skb == SKB_XOFF_MAGIC) 260 return NULL; 261 goto bulk; 262 } 263 skb = q->dequeue(q); 264 if (skb) { 265 bulk: 266 if (qdisc_may_bulk(q)) 267 try_bulk_dequeue_skb(q, skb, txq, packets); 268 else 269 try_bulk_dequeue_skb_slow(q, skb, packets); 270 } 271 trace: 272 trace_qdisc_dequeue(q, txq, *packets, skb); 273 return skb; 274 } 275 276 /* 277 * Transmit possibly several skbs, and handle the return status as 278 * required. Owning running seqcount bit guarantees that 279 * only one CPU can execute this function. 280 * 281 * Returns to the caller: 282 * false - hardware queue frozen backoff 283 * true - feel free to send more pkts 284 */ 285 bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, 286 struct net_device *dev, struct netdev_queue *txq, 287 spinlock_t *root_lock, bool validate) 288 { 289 int ret = NETDEV_TX_BUSY; 290 bool again = false; 291 292 /* And release qdisc */ 293 if (root_lock) 294 spin_unlock(root_lock); 295 296 /* Note that we validate skb (GSO, checksum, ...) outside of locks */ 297 if (validate) 298 skb = validate_xmit_skb_list(skb, dev, &again); 299 300 #ifdef CONFIG_XFRM_OFFLOAD 301 if (unlikely(again)) { 302 if (root_lock) 303 spin_lock(root_lock); 304 305 dev_requeue_skb(skb, q); 306 return false; 307 } 308 #endif 309 310 if (likely(skb)) { 311 HARD_TX_LOCK(dev, txq, smp_processor_id()); 312 if (!netif_xmit_frozen_or_stopped(txq)) 313 skb = dev_hard_start_xmit(skb, dev, txq, &ret); 314 315 HARD_TX_UNLOCK(dev, txq); 316 } else { 317 if (root_lock) 318 spin_lock(root_lock); 319 return true; 320 } 321 322 if (root_lock) 323 spin_lock(root_lock); 324 325 if (!dev_xmit_complete(ret)) { 326 /* Driver returned NETDEV_TX_BUSY - requeue skb */ 327 if (unlikely(ret != NETDEV_TX_BUSY)) 328 net_warn_ratelimited("BUG %s code %d qlen %d\n", 329 dev->name, ret, q->q.qlen); 330 331 dev_requeue_skb(skb, q); 332 return false; 333 } 334 335 return true; 336 } 337 338 /* 339 * NOTE: Called under qdisc_lock(q) with locally disabled BH. 340 * 341 * running seqcount guarantees only one CPU can process 342 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for 343 * this queue. 344 * 345 * netif_tx_lock serializes accesses to device driver. 346 * 347 * qdisc_lock(q) and netif_tx_lock are mutually exclusive, 348 * if one is grabbed, another must be free. 349 * 350 * Note, that this procedure can be called by a watchdog timer 351 * 352 * Returns to the caller: 353 * 0 - queue is empty or throttled. 354 * >0 - queue is not empty. 355 * 356 */ 357 static inline bool qdisc_restart(struct Qdisc *q, int *packets) 358 { 359 spinlock_t *root_lock = NULL; 360 struct netdev_queue *txq; 361 struct net_device *dev; 362 struct sk_buff *skb; 363 bool validate; 364 365 /* Dequeue packet */ 366 skb = dequeue_skb(q, &validate, packets); 367 if (unlikely(!skb)) 368 return false; 369 370 if (!(q->flags & TCQ_F_NOLOCK)) 371 root_lock = qdisc_lock(q); 372 373 dev = qdisc_dev(q); 374 txq = skb_get_tx_queue(dev, skb); 375 376 return sch_direct_xmit(skb, q, dev, txq, root_lock, validate); 377 } 378 379 void __qdisc_run(struct Qdisc *q) 380 { 381 int quota = dev_tx_weight; 382 int packets; 383 384 while (qdisc_restart(q, &packets)) { 385 quota -= packets; 386 if (quota <= 0) { 387 __netif_schedule(q); 388 break; 389 } 390 } 391 } 392 393 unsigned long dev_trans_start(struct net_device *dev) 394 { 395 unsigned long val, res; 396 unsigned int i; 397 398 if (is_vlan_dev(dev)) 399 dev = vlan_dev_real_dev(dev); 400 else if (netif_is_macvlan(dev)) 401 dev = macvlan_dev_real_dev(dev); 402 res = netdev_get_tx_queue(dev, 0)->trans_start; 403 for (i = 1; i < dev->num_tx_queues; i++) { 404 val = netdev_get_tx_queue(dev, i)->trans_start; 405 if (val && time_after(val, res)) 406 res = val; 407 } 408 409 return res; 410 } 411 EXPORT_SYMBOL(dev_trans_start); 412 413 static void dev_watchdog(struct timer_list *t) 414 { 415 struct net_device *dev = from_timer(dev, t, watchdog_timer); 416 417 netif_tx_lock(dev); 418 if (!qdisc_tx_is_noop(dev)) { 419 if (netif_device_present(dev) && 420 netif_running(dev) && 421 netif_carrier_ok(dev)) { 422 int some_queue_timedout = 0; 423 unsigned int i; 424 unsigned long trans_start; 425 426 for (i = 0; i < dev->num_tx_queues; i++) { 427 struct netdev_queue *txq; 428 429 txq = netdev_get_tx_queue(dev, i); 430 trans_start = txq->trans_start; 431 if (netif_xmit_stopped(txq) && 432 time_after(jiffies, (trans_start + 433 dev->watchdog_timeo))) { 434 some_queue_timedout = 1; 435 txq->trans_timeout++; 436 break; 437 } 438 } 439 440 if (some_queue_timedout) { 441 trace_net_dev_xmit_timeout(dev, i); 442 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", 443 dev->name, netdev_drivername(dev), i); 444 dev->netdev_ops->ndo_tx_timeout(dev, i); 445 } 446 if (!mod_timer(&dev->watchdog_timer, 447 round_jiffies(jiffies + 448 dev->watchdog_timeo))) 449 dev_hold(dev); 450 } 451 } 452 netif_tx_unlock(dev); 453 454 dev_put(dev); 455 } 456 457 void __netdev_watchdog_up(struct net_device *dev) 458 { 459 if (dev->netdev_ops->ndo_tx_timeout) { 460 if (dev->watchdog_timeo <= 0) 461 dev->watchdog_timeo = 5*HZ; 462 if (!mod_timer(&dev->watchdog_timer, 463 round_jiffies(jiffies + dev->watchdog_timeo))) 464 dev_hold(dev); 465 } 466 } 467 EXPORT_SYMBOL_GPL(__netdev_watchdog_up); 468 469 static void dev_watchdog_up(struct net_device *dev) 470 { 471 __netdev_watchdog_up(dev); 472 } 473 474 static void dev_watchdog_down(struct net_device *dev) 475 { 476 netif_tx_lock_bh(dev); 477 if (del_timer(&dev->watchdog_timer)) 478 dev_put(dev); 479 netif_tx_unlock_bh(dev); 480 } 481 482 /** 483 * netif_carrier_on - set carrier 484 * @dev: network device 485 * 486 * Device has detected acquisition of carrier. 487 */ 488 void netif_carrier_on(struct net_device *dev) 489 { 490 if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { 491 if (dev->reg_state == NETREG_UNINITIALIZED) 492 return; 493 atomic_inc(&dev->carrier_up_count); 494 linkwatch_fire_event(dev); 495 if (netif_running(dev)) 496 __netdev_watchdog_up(dev); 497 } 498 } 499 EXPORT_SYMBOL(netif_carrier_on); 500 501 /** 502 * netif_carrier_off - clear carrier 503 * @dev: network device 504 * 505 * Device has detected loss of carrier. 506 */ 507 void netif_carrier_off(struct net_device *dev) 508 { 509 if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { 510 if (dev->reg_state == NETREG_UNINITIALIZED) 511 return; 512 atomic_inc(&dev->carrier_down_count); 513 linkwatch_fire_event(dev); 514 } 515 } 516 EXPORT_SYMBOL(netif_carrier_off); 517 518 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces 519 under all circumstances. It is difficult to invent anything faster or 520 cheaper. 521 */ 522 523 static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, 524 struct sk_buff **to_free) 525 { 526 __qdisc_drop(skb, to_free); 527 return NET_XMIT_CN; 528 } 529 530 static struct sk_buff *noop_dequeue(struct Qdisc *qdisc) 531 { 532 return NULL; 533 } 534 535 struct Qdisc_ops noop_qdisc_ops __read_mostly = { 536 .id = "noop", 537 .priv_size = 0, 538 .enqueue = noop_enqueue, 539 .dequeue = noop_dequeue, 540 .peek = noop_dequeue, 541 .owner = THIS_MODULE, 542 }; 543 544 static struct netdev_queue noop_netdev_queue = { 545 RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc), 546 .qdisc_sleeping = &noop_qdisc, 547 }; 548 549 struct Qdisc noop_qdisc = { 550 .enqueue = noop_enqueue, 551 .dequeue = noop_dequeue, 552 .flags = TCQ_F_BUILTIN, 553 .ops = &noop_qdisc_ops, 554 .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), 555 .dev_queue = &noop_netdev_queue, 556 .running = SEQCNT_ZERO(noop_qdisc.running), 557 .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), 558 .gso_skb = { 559 .next = (struct sk_buff *)&noop_qdisc.gso_skb, 560 .prev = (struct sk_buff *)&noop_qdisc.gso_skb, 561 .qlen = 0, 562 .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.gso_skb.lock), 563 }, 564 .skb_bad_txq = { 565 .next = (struct sk_buff *)&noop_qdisc.skb_bad_txq, 566 .prev = (struct sk_buff *)&noop_qdisc.skb_bad_txq, 567 .qlen = 0, 568 .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock), 569 }, 570 }; 571 EXPORT_SYMBOL(noop_qdisc); 572 573 static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt, 574 struct netlink_ext_ack *extack) 575 { 576 /* register_qdisc() assigns a default of noop_enqueue if unset, 577 * but __dev_queue_xmit() treats noqueue only as such 578 * if this is NULL - so clear it here. */ 579 qdisc->enqueue = NULL; 580 return 0; 581 } 582 583 struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { 584 .id = "noqueue", 585 .priv_size = 0, 586 .init = noqueue_init, 587 .enqueue = noop_enqueue, 588 .dequeue = noop_dequeue, 589 .peek = noop_dequeue, 590 .owner = THIS_MODULE, 591 }; 592 593 static const u8 prio2band[TC_PRIO_MAX + 1] = { 594 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 595 }; 596 597 /* 3-band FIFO queue: old style, but should be a bit faster than 598 generic prio+fifo combination. 599 */ 600 601 #define PFIFO_FAST_BANDS 3 602 603 /* 604 * Private data for a pfifo_fast scheduler containing: 605 * - rings for priority bands 606 */ 607 struct pfifo_fast_priv { 608 struct skb_array q[PFIFO_FAST_BANDS]; 609 }; 610 611 static inline struct skb_array *band2list(struct pfifo_fast_priv *priv, 612 int band) 613 { 614 return &priv->q[band]; 615 } 616 617 static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, 618 struct sk_buff **to_free) 619 { 620 int band = prio2band[skb->priority & TC_PRIO_MAX]; 621 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 622 struct skb_array *q = band2list(priv, band); 623 unsigned int pkt_len = qdisc_pkt_len(skb); 624 int err; 625 626 err = skb_array_produce(q, skb); 627 628 if (unlikely(err)) { 629 if (qdisc_is_percpu_stats(qdisc)) 630 return qdisc_drop_cpu(skb, qdisc, to_free); 631 else 632 return qdisc_drop(skb, qdisc, to_free); 633 } 634 635 qdisc_update_stats_at_enqueue(qdisc, pkt_len); 636 return NET_XMIT_SUCCESS; 637 } 638 639 static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) 640 { 641 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 642 struct sk_buff *skb = NULL; 643 int band; 644 645 for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) { 646 struct skb_array *q = band2list(priv, band); 647 648 if (__skb_array_empty(q)) 649 continue; 650 651 skb = __skb_array_consume(q); 652 } 653 if (likely(skb)) { 654 qdisc_update_stats_at_dequeue(qdisc, skb); 655 } else { 656 WRITE_ONCE(qdisc->empty, true); 657 } 658 659 return skb; 660 } 661 662 static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) 663 { 664 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 665 struct sk_buff *skb = NULL; 666 int band; 667 668 for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) { 669 struct skb_array *q = band2list(priv, band); 670 671 skb = __skb_array_peek(q); 672 } 673 674 return skb; 675 } 676 677 static void pfifo_fast_reset(struct Qdisc *qdisc) 678 { 679 int i, band; 680 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 681 682 for (band = 0; band < PFIFO_FAST_BANDS; band++) { 683 struct skb_array *q = band2list(priv, band); 684 struct sk_buff *skb; 685 686 /* NULL ring is possible if destroy path is due to a failed 687 * skb_array_init() in pfifo_fast_init() case. 688 */ 689 if (!q->ring.queue) 690 continue; 691 692 while ((skb = __skb_array_consume(q)) != NULL) 693 kfree_skb(skb); 694 } 695 696 if (qdisc_is_percpu_stats(qdisc)) { 697 for_each_possible_cpu(i) { 698 struct gnet_stats_queue *q; 699 700 q = per_cpu_ptr(qdisc->cpu_qstats, i); 701 q->backlog = 0; 702 q->qlen = 0; 703 } 704 } 705 } 706 707 static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) 708 { 709 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; 710 711 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1); 712 if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) 713 goto nla_put_failure; 714 return skb->len; 715 716 nla_put_failure: 717 return -1; 718 } 719 720 static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt, 721 struct netlink_ext_ack *extack) 722 { 723 unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len; 724 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 725 int prio; 726 727 /* guard against zero length rings */ 728 if (!qlen) 729 return -EINVAL; 730 731 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { 732 struct skb_array *q = band2list(priv, prio); 733 int err; 734 735 err = skb_array_init(q, qlen, GFP_KERNEL); 736 if (err) 737 return -ENOMEM; 738 } 739 740 /* Can by-pass the queue discipline */ 741 qdisc->flags |= TCQ_F_CAN_BYPASS; 742 return 0; 743 } 744 745 static void pfifo_fast_destroy(struct Qdisc *sch) 746 { 747 struct pfifo_fast_priv *priv = qdisc_priv(sch); 748 int prio; 749 750 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { 751 struct skb_array *q = band2list(priv, prio); 752 753 /* NULL ring is possible if destroy path is due to a failed 754 * skb_array_init() in pfifo_fast_init() case. 755 */ 756 if (!q->ring.queue) 757 continue; 758 /* Destroy ring but no need to kfree_skb because a call to 759 * pfifo_fast_reset() has already done that work. 760 */ 761 ptr_ring_cleanup(&q->ring, NULL); 762 } 763 } 764 765 static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch, 766 unsigned int new_len) 767 { 768 struct pfifo_fast_priv *priv = qdisc_priv(sch); 769 struct skb_array *bands[PFIFO_FAST_BANDS]; 770 int prio; 771 772 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { 773 struct skb_array *q = band2list(priv, prio); 774 775 bands[prio] = q; 776 } 777 778 return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len, 779 GFP_KERNEL); 780 } 781 782 struct Qdisc_ops pfifo_fast_ops __read_mostly = { 783 .id = "pfifo_fast", 784 .priv_size = sizeof(struct pfifo_fast_priv), 785 .enqueue = pfifo_fast_enqueue, 786 .dequeue = pfifo_fast_dequeue, 787 .peek = pfifo_fast_peek, 788 .init = pfifo_fast_init, 789 .destroy = pfifo_fast_destroy, 790 .reset = pfifo_fast_reset, 791 .dump = pfifo_fast_dump, 792 .change_tx_queue_len = pfifo_fast_change_tx_queue_len, 793 .owner = THIS_MODULE, 794 .static_flags = TCQ_F_NOLOCK | TCQ_F_CPUSTATS, 795 }; 796 EXPORT_SYMBOL(pfifo_fast_ops); 797 798 static struct lock_class_key qdisc_tx_busylock; 799 static struct lock_class_key qdisc_running_key; 800 801 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, 802 const struct Qdisc_ops *ops, 803 struct netlink_ext_ack *extack) 804 { 805 struct Qdisc *sch; 806 unsigned int size = sizeof(*sch) + ops->priv_size; 807 int err = -ENOBUFS; 808 struct net_device *dev; 809 810 if (!dev_queue) { 811 NL_SET_ERR_MSG(extack, "No device queue given"); 812 err = -EINVAL; 813 goto errout; 814 } 815 816 dev = dev_queue->dev; 817 sch = kzalloc_node(size, GFP_KERNEL, netdev_queue_numa_node_read(dev_queue)); 818 819 if (!sch) 820 goto errout; 821 __skb_queue_head_init(&sch->gso_skb); 822 __skb_queue_head_init(&sch->skb_bad_txq); 823 qdisc_skb_head_init(&sch->q); 824 spin_lock_init(&sch->q.lock); 825 826 if (ops->static_flags & TCQ_F_CPUSTATS) { 827 sch->cpu_bstats = 828 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); 829 if (!sch->cpu_bstats) 830 goto errout1; 831 832 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue); 833 if (!sch->cpu_qstats) { 834 free_percpu(sch->cpu_bstats); 835 goto errout1; 836 } 837 } 838 839 spin_lock_init(&sch->busylock); 840 lockdep_set_class(&sch->busylock, 841 dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); 842 843 /* seqlock has the same scope of busylock, for NOLOCK qdisc */ 844 spin_lock_init(&sch->seqlock); 845 lockdep_set_class(&sch->busylock, 846 dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); 847 848 seqcount_init(&sch->running); 849 lockdep_set_class(&sch->running, 850 dev->qdisc_running_key ?: &qdisc_running_key); 851 852 sch->ops = ops; 853 sch->flags = ops->static_flags; 854 sch->enqueue = ops->enqueue; 855 sch->dequeue = ops->dequeue; 856 sch->dev_queue = dev_queue; 857 sch->empty = true; 858 dev_hold(dev); 859 refcount_set(&sch->refcnt, 1); 860 861 return sch; 862 errout1: 863 kfree(sch); 864 errout: 865 return ERR_PTR(err); 866 } 867 868 struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, 869 const struct Qdisc_ops *ops, 870 unsigned int parentid, 871 struct netlink_ext_ack *extack) 872 { 873 struct Qdisc *sch; 874 875 if (!try_module_get(ops->owner)) { 876 NL_SET_ERR_MSG(extack, "Failed to increase module reference counter"); 877 return NULL; 878 } 879 880 sch = qdisc_alloc(dev_queue, ops, extack); 881 if (IS_ERR(sch)) { 882 module_put(ops->owner); 883 return NULL; 884 } 885 sch->parent = parentid; 886 887 if (!ops->init || ops->init(sch, NULL, extack) == 0) { 888 trace_qdisc_create(ops, dev_queue->dev, parentid); 889 return sch; 890 } 891 892 qdisc_put(sch); 893 return NULL; 894 } 895 EXPORT_SYMBOL(qdisc_create_dflt); 896 897 /* Under qdisc_lock(qdisc) and BH! */ 898 899 void qdisc_reset(struct Qdisc *qdisc) 900 { 901 const struct Qdisc_ops *ops = qdisc->ops; 902 struct sk_buff *skb, *tmp; 903 904 trace_qdisc_reset(qdisc); 905 906 if (ops->reset) 907 ops->reset(qdisc); 908 909 skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) { 910 __skb_unlink(skb, &qdisc->gso_skb); 911 kfree_skb_list(skb); 912 } 913 914 skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) { 915 __skb_unlink(skb, &qdisc->skb_bad_txq); 916 kfree_skb_list(skb); 917 } 918 919 qdisc->q.qlen = 0; 920 qdisc->qstats.backlog = 0; 921 } 922 EXPORT_SYMBOL(qdisc_reset); 923 924 void qdisc_free(struct Qdisc *qdisc) 925 { 926 if (qdisc_is_percpu_stats(qdisc)) { 927 free_percpu(qdisc->cpu_bstats); 928 free_percpu(qdisc->cpu_qstats); 929 } 930 931 kfree(qdisc); 932 } 933 934 static void qdisc_free_cb(struct rcu_head *head) 935 { 936 struct Qdisc *q = container_of(head, struct Qdisc, rcu); 937 938 qdisc_free(q); 939 } 940 941 static void qdisc_destroy(struct Qdisc *qdisc) 942 { 943 const struct Qdisc_ops *ops = qdisc->ops; 944 945 #ifdef CONFIG_NET_SCHED 946 qdisc_hash_del(qdisc); 947 948 qdisc_put_stab(rtnl_dereference(qdisc->stab)); 949 #endif 950 gen_kill_estimator(&qdisc->rate_est); 951 952 qdisc_reset(qdisc); 953 954 if (ops->destroy) 955 ops->destroy(qdisc); 956 957 module_put(ops->owner); 958 dev_put(qdisc_dev(qdisc)); 959 960 trace_qdisc_destroy(qdisc); 961 962 call_rcu(&qdisc->rcu, qdisc_free_cb); 963 } 964 965 void qdisc_put(struct Qdisc *qdisc) 966 { 967 if (!qdisc) 968 return; 969 970 if (qdisc->flags & TCQ_F_BUILTIN || 971 !refcount_dec_and_test(&qdisc->refcnt)) 972 return; 973 974 qdisc_destroy(qdisc); 975 } 976 EXPORT_SYMBOL(qdisc_put); 977 978 /* Version of qdisc_put() that is called with rtnl mutex unlocked. 979 * Intended to be used as optimization, this function only takes rtnl lock if 980 * qdisc reference counter reached zero. 981 */ 982 983 void qdisc_put_unlocked(struct Qdisc *qdisc) 984 { 985 if (qdisc->flags & TCQ_F_BUILTIN || 986 !refcount_dec_and_rtnl_lock(&qdisc->refcnt)) 987 return; 988 989 qdisc_destroy(qdisc); 990 rtnl_unlock(); 991 } 992 EXPORT_SYMBOL(qdisc_put_unlocked); 993 994 /* Attach toplevel qdisc to device queue. */ 995 struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, 996 struct Qdisc *qdisc) 997 { 998 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; 999 spinlock_t *root_lock; 1000 1001 root_lock = qdisc_lock(oqdisc); 1002 spin_lock_bh(root_lock); 1003 1004 /* ... and graft new one */ 1005 if (qdisc == NULL) 1006 qdisc = &noop_qdisc; 1007 dev_queue->qdisc_sleeping = qdisc; 1008 rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); 1009 1010 spin_unlock_bh(root_lock); 1011 1012 return oqdisc; 1013 } 1014 EXPORT_SYMBOL(dev_graft_qdisc); 1015 1016 static void attach_one_default_qdisc(struct net_device *dev, 1017 struct netdev_queue *dev_queue, 1018 void *_unused) 1019 { 1020 struct Qdisc *qdisc; 1021 const struct Qdisc_ops *ops = default_qdisc_ops; 1022 1023 if (dev->priv_flags & IFF_NO_QUEUE) 1024 ops = &noqueue_qdisc_ops; 1025 else if(dev->type == ARPHRD_CAN) 1026 ops = &pfifo_fast_ops; 1027 1028 qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL); 1029 if (!qdisc) 1030 return; 1031 1032 if (!netif_is_multiqueue(dev)) 1033 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; 1034 dev_queue->qdisc_sleeping = qdisc; 1035 } 1036 1037 static void attach_default_qdiscs(struct net_device *dev) 1038 { 1039 struct netdev_queue *txq; 1040 struct Qdisc *qdisc; 1041 1042 txq = netdev_get_tx_queue(dev, 0); 1043 1044 if (!netif_is_multiqueue(dev) || 1045 dev->priv_flags & IFF_NO_QUEUE) { 1046 netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); 1047 dev->qdisc = txq->qdisc_sleeping; 1048 qdisc_refcount_inc(dev->qdisc); 1049 } else { 1050 qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL); 1051 if (qdisc) { 1052 dev->qdisc = qdisc; 1053 qdisc->ops->attach(qdisc); 1054 } 1055 } 1056 1057 /* Detect default qdisc setup/init failed and fallback to "noqueue" */ 1058 if (dev->qdisc == &noop_qdisc) { 1059 netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n", 1060 default_qdisc_ops->id, noqueue_qdisc_ops.id); 1061 dev->priv_flags |= IFF_NO_QUEUE; 1062 netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); 1063 dev->qdisc = txq->qdisc_sleeping; 1064 qdisc_refcount_inc(dev->qdisc); 1065 dev->priv_flags ^= IFF_NO_QUEUE; 1066 } 1067 1068 #ifdef CONFIG_NET_SCHED 1069 if (dev->qdisc != &noop_qdisc) 1070 qdisc_hash_add(dev->qdisc, false); 1071 #endif 1072 } 1073 1074 static void transition_one_qdisc(struct net_device *dev, 1075 struct netdev_queue *dev_queue, 1076 void *_need_watchdog) 1077 { 1078 struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping; 1079 int *need_watchdog_p = _need_watchdog; 1080 1081 if (!(new_qdisc->flags & TCQ_F_BUILTIN)) 1082 clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state); 1083 1084 rcu_assign_pointer(dev_queue->qdisc, new_qdisc); 1085 if (need_watchdog_p) { 1086 dev_queue->trans_start = 0; 1087 *need_watchdog_p = 1; 1088 } 1089 } 1090 1091 void dev_activate(struct net_device *dev) 1092 { 1093 int need_watchdog; 1094 1095 /* No queueing discipline is attached to device; 1096 * create default one for devices, which need queueing 1097 * and noqueue_qdisc for virtual interfaces 1098 */ 1099 1100 if (dev->qdisc == &noop_qdisc) 1101 attach_default_qdiscs(dev); 1102 1103 if (!netif_carrier_ok(dev)) 1104 /* Delay activation until next carrier-on event */ 1105 return; 1106 1107 need_watchdog = 0; 1108 netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); 1109 if (dev_ingress_queue(dev)) 1110 transition_one_qdisc(dev, dev_ingress_queue(dev), NULL); 1111 1112 if (need_watchdog) { 1113 netif_trans_update(dev); 1114 dev_watchdog_up(dev); 1115 } 1116 } 1117 EXPORT_SYMBOL(dev_activate); 1118 1119 static void qdisc_deactivate(struct Qdisc *qdisc) 1120 { 1121 if (qdisc->flags & TCQ_F_BUILTIN) 1122 return; 1123 1124 set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); 1125 } 1126 1127 static void dev_deactivate_queue(struct net_device *dev, 1128 struct netdev_queue *dev_queue, 1129 void *_qdisc_default) 1130 { 1131 struct Qdisc *qdisc_default = _qdisc_default; 1132 struct Qdisc *qdisc; 1133 1134 qdisc = rtnl_dereference(dev_queue->qdisc); 1135 if (qdisc) { 1136 qdisc_deactivate(qdisc); 1137 rcu_assign_pointer(dev_queue->qdisc, qdisc_default); 1138 } 1139 } 1140 1141 static void dev_reset_queue(struct net_device *dev, 1142 struct netdev_queue *dev_queue, 1143 void *_unused) 1144 { 1145 struct Qdisc *qdisc; 1146 bool nolock; 1147 1148 qdisc = dev_queue->qdisc_sleeping; 1149 if (!qdisc) 1150 return; 1151 1152 nolock = qdisc->flags & TCQ_F_NOLOCK; 1153 1154 if (nolock) 1155 spin_lock_bh(&qdisc->seqlock); 1156 spin_lock_bh(qdisc_lock(qdisc)); 1157 1158 qdisc_reset(qdisc); 1159 1160 spin_unlock_bh(qdisc_lock(qdisc)); 1161 if (nolock) 1162 spin_unlock_bh(&qdisc->seqlock); 1163 } 1164 1165 static bool some_qdisc_is_busy(struct net_device *dev) 1166 { 1167 unsigned int i; 1168 1169 for (i = 0; i < dev->num_tx_queues; i++) { 1170 struct netdev_queue *dev_queue; 1171 spinlock_t *root_lock; 1172 struct Qdisc *q; 1173 int val; 1174 1175 dev_queue = netdev_get_tx_queue(dev, i); 1176 q = dev_queue->qdisc_sleeping; 1177 1178 root_lock = qdisc_lock(q); 1179 spin_lock_bh(root_lock); 1180 1181 val = (qdisc_is_running(q) || 1182 test_bit(__QDISC_STATE_SCHED, &q->state)); 1183 1184 spin_unlock_bh(root_lock); 1185 1186 if (val) 1187 return true; 1188 } 1189 return false; 1190 } 1191 1192 /** 1193 * dev_deactivate_many - deactivate transmissions on several devices 1194 * @head: list of devices to deactivate 1195 * 1196 * This function returns only when all outstanding transmissions 1197 * have completed, unless all devices are in dismantle phase. 1198 */ 1199 void dev_deactivate_many(struct list_head *head) 1200 { 1201 struct net_device *dev; 1202 1203 list_for_each_entry(dev, head, close_list) { 1204 netdev_for_each_tx_queue(dev, dev_deactivate_queue, 1205 &noop_qdisc); 1206 if (dev_ingress_queue(dev)) 1207 dev_deactivate_queue(dev, dev_ingress_queue(dev), 1208 &noop_qdisc); 1209 1210 dev_watchdog_down(dev); 1211 } 1212 1213 /* Wait for outstanding qdisc-less dev_queue_xmit calls or 1214 * outstanding qdisc enqueuing calls. 1215 * This is avoided if all devices are in dismantle phase : 1216 * Caller will call synchronize_net() for us 1217 */ 1218 synchronize_net(); 1219 1220 list_for_each_entry(dev, head, close_list) { 1221 netdev_for_each_tx_queue(dev, dev_reset_queue, NULL); 1222 1223 if (dev_ingress_queue(dev)) 1224 dev_reset_queue(dev, dev_ingress_queue(dev), NULL); 1225 } 1226 1227 /* Wait for outstanding qdisc_run calls. */ 1228 list_for_each_entry(dev, head, close_list) { 1229 while (some_qdisc_is_busy(dev)) { 1230 /* wait_event() would avoid this sleep-loop but would 1231 * require expensive checks in the fast paths of packet 1232 * processing which isn't worth it. 1233 */ 1234 schedule_timeout_uninterruptible(1); 1235 } 1236 } 1237 } 1238 1239 void dev_deactivate(struct net_device *dev) 1240 { 1241 LIST_HEAD(single); 1242 1243 list_add(&dev->close_list, &single); 1244 dev_deactivate_many(&single); 1245 list_del(&single); 1246 } 1247 EXPORT_SYMBOL(dev_deactivate); 1248 1249 static int qdisc_change_tx_queue_len(struct net_device *dev, 1250 struct netdev_queue *dev_queue) 1251 { 1252 struct Qdisc *qdisc = dev_queue->qdisc_sleeping; 1253 const struct Qdisc_ops *ops = qdisc->ops; 1254 1255 if (ops->change_tx_queue_len) 1256 return ops->change_tx_queue_len(qdisc, dev->tx_queue_len); 1257 return 0; 1258 } 1259 1260 int dev_qdisc_change_tx_queue_len(struct net_device *dev) 1261 { 1262 bool up = dev->flags & IFF_UP; 1263 unsigned int i; 1264 int ret = 0; 1265 1266 if (up) 1267 dev_deactivate(dev); 1268 1269 for (i = 0; i < dev->num_tx_queues; i++) { 1270 ret = qdisc_change_tx_queue_len(dev, &dev->_tx[i]); 1271 1272 /* TODO: revert changes on a partial failure */ 1273 if (ret) 1274 break; 1275 } 1276 1277 if (up) 1278 dev_activate(dev); 1279 return ret; 1280 } 1281 1282 static void dev_init_scheduler_queue(struct net_device *dev, 1283 struct netdev_queue *dev_queue, 1284 void *_qdisc) 1285 { 1286 struct Qdisc *qdisc = _qdisc; 1287 1288 rcu_assign_pointer(dev_queue->qdisc, qdisc); 1289 dev_queue->qdisc_sleeping = qdisc; 1290 } 1291 1292 void dev_init_scheduler(struct net_device *dev) 1293 { 1294 dev->qdisc = &noop_qdisc; 1295 netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); 1296 if (dev_ingress_queue(dev)) 1297 dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); 1298 1299 timer_setup(&dev->watchdog_timer, dev_watchdog, 0); 1300 } 1301 1302 static void shutdown_scheduler_queue(struct net_device *dev, 1303 struct netdev_queue *dev_queue, 1304 void *_qdisc_default) 1305 { 1306 struct Qdisc *qdisc = dev_queue->qdisc_sleeping; 1307 struct Qdisc *qdisc_default = _qdisc_default; 1308 1309 if (qdisc) { 1310 rcu_assign_pointer(dev_queue->qdisc, qdisc_default); 1311 dev_queue->qdisc_sleeping = qdisc_default; 1312 1313 qdisc_put(qdisc); 1314 } 1315 } 1316 1317 void dev_shutdown(struct net_device *dev) 1318 { 1319 netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); 1320 if (dev_ingress_queue(dev)) 1321 shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); 1322 qdisc_put(dev->qdisc); 1323 dev->qdisc = &noop_qdisc; 1324 1325 WARN_ON(timer_pending(&dev->watchdog_timer)); 1326 } 1327 1328 /** 1329 * psched_ratecfg_precompute__() - Pre-compute values for reciprocal division 1330 * @rate: Rate to compute reciprocal division values of 1331 * @mult: Multiplier for reciprocal division 1332 * @shift: Shift for reciprocal division 1333 * 1334 * The multiplier and shift for reciprocal division by rate are stored 1335 * in mult and shift. 1336 * 1337 * The deal here is to replace a divide by a reciprocal one 1338 * in fast path (a reciprocal divide is a multiply and a shift) 1339 * 1340 * Normal formula would be : 1341 * time_in_ns = (NSEC_PER_SEC * len) / rate_bps 1342 * 1343 * We compute mult/shift to use instead : 1344 * time_in_ns = (len * mult) >> shift; 1345 * 1346 * We try to get the highest possible mult value for accuracy, 1347 * but have to make sure no overflows will ever happen. 1348 * 1349 * reciprocal_value() is not used here it doesn't handle 64-bit values. 1350 */ 1351 static void psched_ratecfg_precompute__(u64 rate, u32 *mult, u8 *shift) 1352 { 1353 u64 factor = NSEC_PER_SEC; 1354 1355 *mult = 1; 1356 *shift = 0; 1357 1358 if (rate <= 0) 1359 return; 1360 1361 for (;;) { 1362 *mult = div64_u64(factor, rate); 1363 if (*mult & (1U << 31) || factor & (1ULL << 63)) 1364 break; 1365 factor <<= 1; 1366 (*shift)++; 1367 } 1368 } 1369 1370 void psched_ratecfg_precompute(struct psched_ratecfg *r, 1371 const struct tc_ratespec *conf, 1372 u64 rate64) 1373 { 1374 memset(r, 0, sizeof(*r)); 1375 r->overhead = conf->overhead; 1376 r->rate_bytes_ps = max_t(u64, conf->rate, rate64); 1377 r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); 1378 psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift); 1379 } 1380 EXPORT_SYMBOL(psched_ratecfg_precompute); 1381 1382 void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64) 1383 { 1384 r->rate_pkts_ps = pktrate64; 1385 psched_ratecfg_precompute__(r->rate_pkts_ps, &r->mult, &r->shift); 1386 } 1387 EXPORT_SYMBOL(psched_ppscfg_precompute); 1388 1389 static void mini_qdisc_rcu_func(struct rcu_head *head) 1390 { 1391 } 1392 1393 void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, 1394 struct tcf_proto *tp_head) 1395 { 1396 /* Protected with chain0->filter_chain_lock. 1397 * Can't access chain directly because tp_head can be NULL. 1398 */ 1399 struct mini_Qdisc *miniq_old = 1400 rcu_dereference_protected(*miniqp->p_miniq, 1); 1401 struct mini_Qdisc *miniq; 1402 1403 if (!tp_head) { 1404 RCU_INIT_POINTER(*miniqp->p_miniq, NULL); 1405 /* Wait for flying RCU callback before it is freed. */ 1406 rcu_barrier(); 1407 return; 1408 } 1409 1410 miniq = !miniq_old || miniq_old == &miniqp->miniq2 ? 1411 &miniqp->miniq1 : &miniqp->miniq2; 1412 1413 /* We need to make sure that readers won't see the miniq 1414 * we are about to modify. So wait until previous call_rcu callback 1415 * is done. 1416 */ 1417 rcu_barrier(); 1418 miniq->filter_list = tp_head; 1419 rcu_assign_pointer(*miniqp->p_miniq, miniq); 1420 1421 if (miniq_old) 1422 /* This is counterpart of the rcu barriers above. We need to 1423 * block potential new user of miniq_old until all readers 1424 * are not seeing it. 1425 */ 1426 call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func); 1427 } 1428 EXPORT_SYMBOL(mini_qdisc_pair_swap); 1429 1430 void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, 1431 struct tcf_block *block) 1432 { 1433 miniqp->miniq1.block = block; 1434 miniqp->miniq2.block = block; 1435 } 1436 EXPORT_SYMBOL(mini_qdisc_pair_block_init); 1437 1438 void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, 1439 struct mini_Qdisc __rcu **p_miniq) 1440 { 1441 miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats; 1442 miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats; 1443 miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats; 1444 miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats; 1445 miniqp->p_miniq = p_miniq; 1446 } 1447 EXPORT_SYMBOL(mini_qdisc_pair_init); 1448