1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __NET_SCHED_GENERIC_H 3 #define __NET_SCHED_GENERIC_H 4 5 #include <linux/netdevice.h> 6 #include <linux/types.h> 7 #include <linux/rcupdate.h> 8 #include <linux/pkt_sched.h> 9 #include <linux/pkt_cls.h> 10 #include <linux/percpu.h> 11 #include <linux/dynamic_queue_limits.h> 12 #include <linux/list.h> 13 #include <linux/refcount.h> 14 #include <linux/workqueue.h> 15 #include <linux/mutex.h> 16 #include <linux/rwsem.h> 17 #include <linux/atomic.h> 18 #include <linux/hashtable.h> 19 #include <net/gen_stats.h> 20 #include <net/rtnetlink.h> 21 #include <net/flow_offload.h> 22 23 struct Qdisc_ops; 24 struct qdisc_walker; 25 struct tcf_walker; 26 struct module; 27 struct bpf_flow_keys; 28 29 struct qdisc_rate_table { 30 struct tc_ratespec rate; 31 u32 data[256]; 32 struct qdisc_rate_table *next; 33 int refcnt; 34 }; 35 36 enum qdisc_state_t { 37 __QDISC_STATE_SCHED, 38 __QDISC_STATE_DEACTIVATED, 39 __QDISC_STATE_MISSED, 40 __QDISC_STATE_DRAINING, 41 }; 42 43 #define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED) 44 #define QDISC_STATE_DRAINING BIT(__QDISC_STATE_DRAINING) 45 46 #define QDISC_STATE_NON_EMPTY (QDISC_STATE_MISSED | \ 47 QDISC_STATE_DRAINING) 48 49 struct qdisc_size_table { 50 struct rcu_head rcu; 51 struct list_head list; 52 struct tc_sizespec szopts; 53 int refcnt; 54 u16 data[]; 55 }; 56 57 /* similar to sk_buff_head, but skb->prev pointer is undefined. */ 58 struct qdisc_skb_head { 59 struct sk_buff *head; 60 struct sk_buff *tail; 61 __u32 qlen; 62 spinlock_t lock; 63 }; 64 65 struct Qdisc { 66 int (*enqueue)(struct sk_buff *skb, 67 struct Qdisc *sch, 68 struct sk_buff **to_free); 69 struct sk_buff * (*dequeue)(struct Qdisc *sch); 70 unsigned int flags; 71 #define TCQ_F_BUILTIN 1 72 #define TCQ_F_INGRESS 2 73 #define TCQ_F_CAN_BYPASS 4 74 #define TCQ_F_MQROOT 8 75 #define TCQ_F_ONETXQUEUE 0x10 /* dequeue_skb() can assume all skbs are for 76 * q->dev_queue : It can test 77 * netif_xmit_frozen_or_stopped() before 78 * dequeueing next packet. 79 * Its true for MQ/MQPRIO slaves, or non 80 * multiqueue device. 81 */ 82 #define TCQ_F_WARN_NONWC (1 << 16) 83 #define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */ 84 #define TCQ_F_NOPARENT 0x40 /* root of its hierarchy : 85 * qdisc_tree_decrease_qlen() should stop. 86 */ 87 #define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ 88 #define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */ 89 #define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */ 90 u32 limit; 91 const struct Qdisc_ops *ops; 92 struct qdisc_size_table __rcu *stab; 93 struct hlist_node hash; 94 u32 handle; 95 u32 parent; 96 97 struct netdev_queue *dev_queue; 98 99 struct net_rate_estimator __rcu *rate_est; 100 struct gnet_stats_basic_cpu __percpu *cpu_bstats; 101 struct gnet_stats_queue __percpu *cpu_qstats; 102 int pad; 103 refcount_t refcnt; 104 105 /* 106 * For performance sake on SMP, we put highly modified fields at the end 107 */ 108 struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; 109 struct qdisc_skb_head q; 110 struct gnet_stats_basic_packed bstats; 111 seqcount_t running; 112 struct gnet_stats_queue qstats; 113 unsigned long state; 114 struct Qdisc *next_sched; 115 struct sk_buff_head skb_bad_txq; 116 117 spinlock_t busylock ____cacheline_aligned_in_smp; 118 spinlock_t seqlock; 119 120 struct rcu_head rcu; 121 122 /* private data */ 123 long privdata[] ____cacheline_aligned; 124 }; 125 126 static inline void qdisc_refcount_inc(struct Qdisc *qdisc) 127 { 128 if (qdisc->flags & TCQ_F_BUILTIN) 129 return; 130 refcount_inc(&qdisc->refcnt); 131 } 132 133 /* Intended to be used by unlocked users, when concurrent qdisc release is 134 * possible. 135 */ 136 137 static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc) 138 { 139 if (qdisc->flags & TCQ_F_BUILTIN) 140 return qdisc; 141 if (refcount_inc_not_zero(&qdisc->refcnt)) 142 return qdisc; 143 return NULL; 144 } 145 146 static inline bool qdisc_is_running(struct Qdisc *qdisc) 147 { 148 if (qdisc->flags & TCQ_F_NOLOCK) 149 return spin_is_locked(&qdisc->seqlock); 150 return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; 151 } 152 153 static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc) 154 { 155 return !(READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY); 156 } 157 158 static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) 159 { 160 return q->flags & TCQ_F_CPUSTATS; 161 } 162 163 static inline bool qdisc_is_empty(const struct Qdisc *qdisc) 164 { 165 if (qdisc_is_percpu_stats(qdisc)) 166 return nolock_qdisc_is_empty(qdisc); 167 return !READ_ONCE(qdisc->q.qlen); 168 } 169 170 static inline bool qdisc_run_begin(struct Qdisc *qdisc) 171 { 172 if (qdisc->flags & TCQ_F_NOLOCK) { 173 if (spin_trylock(&qdisc->seqlock)) 174 return true; 175 176 /* Paired with smp_mb__after_atomic() to make sure 177 * STATE_MISSED checking is synchronized with clearing 178 * in pfifo_fast_dequeue(). 179 */ 180 smp_mb__before_atomic(); 181 182 /* If the MISSED flag is set, it means other thread has 183 * set the MISSED flag before second spin_trylock(), so 184 * we can return false here to avoid multi cpus doing 185 * the set_bit() and second spin_trylock() concurrently. 186 */ 187 if (test_bit(__QDISC_STATE_MISSED, &qdisc->state)) 188 return false; 189 190 /* Set the MISSED flag before the second spin_trylock(), 191 * if the second spin_trylock() return false, it means 192 * other cpu holding the lock will do dequeuing for us 193 * or it will see the MISSED flag set after releasing 194 * lock and reschedule the net_tx_action() to do the 195 * dequeuing. 196 */ 197 set_bit(__QDISC_STATE_MISSED, &qdisc->state); 198 199 /* spin_trylock() only has load-acquire semantic, so use 200 * smp_mb__after_atomic() to ensure STATE_MISSED is set 201 * before doing the second spin_trylock(). 202 */ 203 smp_mb__after_atomic(); 204 205 /* Retry again in case other CPU may not see the new flag 206 * after it releases the lock at the end of qdisc_run_end(). 207 */ 208 return spin_trylock(&qdisc->seqlock); 209 } else if (qdisc_is_running(qdisc)) { 210 return false; 211 } 212 /* Variant of write_seqcount_begin() telling lockdep a trylock 213 * was attempted. 214 */ 215 raw_write_seqcount_begin(&qdisc->running); 216 seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); 217 return true; 218 } 219 220 static inline void qdisc_run_end(struct Qdisc *qdisc) 221 { 222 if (qdisc->flags & TCQ_F_NOLOCK) { 223 spin_unlock(&qdisc->seqlock); 224 225 if (unlikely(test_bit(__QDISC_STATE_MISSED, 226 &qdisc->state))) 227 __netif_schedule(qdisc); 228 } else { 229 write_seqcount_end(&qdisc->running); 230 } 231 } 232 233 static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) 234 { 235 return qdisc->flags & TCQ_F_ONETXQUEUE; 236 } 237 238 static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq) 239 { 240 #ifdef CONFIG_BQL 241 /* Non-BQL migrated drivers will return 0, too. */ 242 return dql_avail(&txq->dql); 243 #else 244 return 0; 245 #endif 246 } 247 248 struct Qdisc_class_ops { 249 unsigned int flags; 250 /* Child qdisc manipulation */ 251 struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *); 252 int (*graft)(struct Qdisc *, unsigned long cl, 253 struct Qdisc *, struct Qdisc **, 254 struct netlink_ext_ack *extack); 255 struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl); 256 void (*qlen_notify)(struct Qdisc *, unsigned long); 257 258 /* Class manipulation routines */ 259 unsigned long (*find)(struct Qdisc *, u32 classid); 260 int (*change)(struct Qdisc *, u32, u32, 261 struct nlattr **, unsigned long *, 262 struct netlink_ext_ack *); 263 int (*delete)(struct Qdisc *, unsigned long, 264 struct netlink_ext_ack *); 265 void (*walk)(struct Qdisc *, struct qdisc_walker * arg); 266 267 /* Filter manipulation */ 268 struct tcf_block * (*tcf_block)(struct Qdisc *sch, 269 unsigned long arg, 270 struct netlink_ext_ack *extack); 271 unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, 272 u32 classid); 273 void (*unbind_tcf)(struct Qdisc *, unsigned long); 274 275 /* rtnetlink specific */ 276 int (*dump)(struct Qdisc *, unsigned long, 277 struct sk_buff *skb, struct tcmsg*); 278 int (*dump_stats)(struct Qdisc *, unsigned long, 279 struct gnet_dump *); 280 }; 281 282 /* Qdisc_class_ops flag values */ 283 284 /* Implements API that doesn't require rtnl lock */ 285 enum qdisc_class_ops_flags { 286 QDISC_CLASS_OPS_DOIT_UNLOCKED = 1, 287 }; 288 289 struct Qdisc_ops { 290 struct Qdisc_ops *next; 291 const struct Qdisc_class_ops *cl_ops; 292 char id[IFNAMSIZ]; 293 int priv_size; 294 unsigned int static_flags; 295 296 int (*enqueue)(struct sk_buff *skb, 297 struct Qdisc *sch, 298 struct sk_buff **to_free); 299 struct sk_buff * (*dequeue)(struct Qdisc *); 300 struct sk_buff * (*peek)(struct Qdisc *); 301 302 int (*init)(struct Qdisc *sch, struct nlattr *arg, 303 struct netlink_ext_ack *extack); 304 void (*reset)(struct Qdisc *); 305 void (*destroy)(struct Qdisc *); 306 int (*change)(struct Qdisc *sch, 307 struct nlattr *arg, 308 struct netlink_ext_ack *extack); 309 void (*attach)(struct Qdisc *sch); 310 int (*change_tx_queue_len)(struct Qdisc *, unsigned int); 311 312 int (*dump)(struct Qdisc *, struct sk_buff *); 313 int (*dump_stats)(struct Qdisc *, struct gnet_dump *); 314 315 void (*ingress_block_set)(struct Qdisc *sch, 316 u32 block_index); 317 void (*egress_block_set)(struct Qdisc *sch, 318 u32 block_index); 319 u32 (*ingress_block_get)(struct Qdisc *sch); 320 u32 (*egress_block_get)(struct Qdisc *sch); 321 322 struct module *owner; 323 }; 324 325 326 struct tcf_result { 327 union { 328 struct { 329 unsigned long class; 330 u32 classid; 331 }; 332 const struct tcf_proto *goto_tp; 333 334 /* used in the skb_tc_reinsert function */ 335 struct { 336 bool ingress; 337 struct gnet_stats_queue *qstats; 338 }; 339 }; 340 }; 341 342 struct tcf_chain; 343 344 struct tcf_proto_ops { 345 struct list_head head; 346 char kind[IFNAMSIZ]; 347 348 int (*classify)(struct sk_buff *, 349 const struct tcf_proto *, 350 struct tcf_result *); 351 int (*init)(struct tcf_proto*); 352 void (*destroy)(struct tcf_proto *tp, bool rtnl_held, 353 struct netlink_ext_ack *extack); 354 355 void* (*get)(struct tcf_proto*, u32 handle); 356 void (*put)(struct tcf_proto *tp, void *f); 357 int (*change)(struct net *net, struct sk_buff *, 358 struct tcf_proto*, unsigned long, 359 u32 handle, struct nlattr **, 360 void **, bool, bool, 361 struct netlink_ext_ack *); 362 int (*delete)(struct tcf_proto *tp, void *arg, 363 bool *last, bool rtnl_held, 364 struct netlink_ext_ack *); 365 bool (*delete_empty)(struct tcf_proto *tp); 366 void (*walk)(struct tcf_proto *tp, 367 struct tcf_walker *arg, bool rtnl_held); 368 int (*reoffload)(struct tcf_proto *tp, bool add, 369 flow_setup_cb_t *cb, void *cb_priv, 370 struct netlink_ext_ack *extack); 371 void (*hw_add)(struct tcf_proto *tp, 372 void *type_data); 373 void (*hw_del)(struct tcf_proto *tp, 374 void *type_data); 375 void (*bind_class)(void *, u32, unsigned long, 376 void *, unsigned long); 377 void * (*tmplt_create)(struct net *net, 378 struct tcf_chain *chain, 379 struct nlattr **tca, 380 struct netlink_ext_ack *extack); 381 void (*tmplt_destroy)(void *tmplt_priv); 382 383 /* rtnetlink specific */ 384 int (*dump)(struct net*, struct tcf_proto*, void *, 385 struct sk_buff *skb, struct tcmsg*, 386 bool); 387 int (*terse_dump)(struct net *net, 388 struct tcf_proto *tp, void *fh, 389 struct sk_buff *skb, 390 struct tcmsg *t, bool rtnl_held); 391 int (*tmplt_dump)(struct sk_buff *skb, 392 struct net *net, 393 void *tmplt_priv); 394 395 struct module *owner; 396 int flags; 397 }; 398 399 /* Classifiers setting TCF_PROTO_OPS_DOIT_UNLOCKED in tcf_proto_ops->flags 400 * are expected to implement tcf_proto_ops->delete_empty(), otherwise race 401 * conditions can occur when filters are inserted/deleted simultaneously. 402 */ 403 enum tcf_proto_ops_flags { 404 TCF_PROTO_OPS_DOIT_UNLOCKED = 1, 405 }; 406 407 struct tcf_proto { 408 /* Fast access part */ 409 struct tcf_proto __rcu *next; 410 void __rcu *root; 411 412 /* called under RCU BH lock*/ 413 int (*classify)(struct sk_buff *, 414 const struct tcf_proto *, 415 struct tcf_result *); 416 __be16 protocol; 417 418 /* All the rest */ 419 u32 prio; 420 void *data; 421 const struct tcf_proto_ops *ops; 422 struct tcf_chain *chain; 423 /* Lock protects tcf_proto shared state and can be used by unlocked 424 * classifiers to protect their private data. 425 */ 426 spinlock_t lock; 427 bool deleting; 428 refcount_t refcnt; 429 struct rcu_head rcu; 430 struct hlist_node destroy_ht_node; 431 }; 432 433 struct qdisc_skb_cb { 434 struct { 435 unsigned int pkt_len; 436 u16 slave_dev_queue_mapping; 437 u16 tc_classid; 438 }; 439 #define QDISC_CB_PRIV_LEN 20 440 unsigned char data[QDISC_CB_PRIV_LEN]; 441 u16 mru; 442 bool post_ct; 443 }; 444 445 typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); 446 447 struct tcf_chain { 448 /* Protects filter_chain. */ 449 struct mutex filter_chain_lock; 450 struct tcf_proto __rcu *filter_chain; 451 struct list_head list; 452 struct tcf_block *block; 453 u32 index; /* chain index */ 454 unsigned int refcnt; 455 unsigned int action_refcnt; 456 bool explicitly_created; 457 bool flushing; 458 const struct tcf_proto_ops *tmplt_ops; 459 void *tmplt_priv; 460 struct rcu_head rcu; 461 }; 462 463 struct tcf_block { 464 /* Lock protects tcf_block and lifetime-management data of chains 465 * attached to the block (refcnt, action_refcnt, explicitly_created). 466 */ 467 struct mutex lock; 468 struct list_head chain_list; 469 u32 index; /* block index for shared blocks */ 470 u32 classid; /* which class this block belongs to */ 471 refcount_t refcnt; 472 struct net *net; 473 struct Qdisc *q; 474 struct rw_semaphore cb_lock; /* protects cb_list and offload counters */ 475 struct flow_block flow_block; 476 struct list_head owner_list; 477 bool keep_dst; 478 atomic_t offloadcnt; /* Number of oddloaded filters */ 479 unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ 480 unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */ 481 struct { 482 struct tcf_chain *chain; 483 struct list_head filter_chain_list; 484 } chain0; 485 struct rcu_head rcu; 486 DECLARE_HASHTABLE(proto_destroy_ht, 7); 487 struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */ 488 }; 489 490 static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain) 491 { 492 return lockdep_is_held(&chain->filter_chain_lock); 493 } 494 495 static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) 496 { 497 return lockdep_is_held(&tp->lock); 498 } 499 500 #define tcf_chain_dereference(p, chain) \ 501 rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain)) 502 503 #define tcf_proto_dereference(p, tp) \ 504 rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp)) 505 506 static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) 507 { 508 struct qdisc_skb_cb *qcb; 509 510 BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*qcb)); 511 BUILD_BUG_ON(sizeof(qcb->data) < sz); 512 } 513 514 static inline int qdisc_qlen_cpu(const struct Qdisc *q) 515 { 516 return this_cpu_ptr(q->cpu_qstats)->qlen; 517 } 518 519 static inline int qdisc_qlen(const struct Qdisc *q) 520 { 521 return q->q.qlen; 522 } 523 524 static inline int qdisc_qlen_sum(const struct Qdisc *q) 525 { 526 __u32 qlen = q->qstats.qlen; 527 int i; 528 529 if (qdisc_is_percpu_stats(q)) { 530 for_each_possible_cpu(i) 531 qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen; 532 } else { 533 qlen += q->q.qlen; 534 } 535 536 return qlen; 537 } 538 539 static inline struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb) 540 { 541 return (struct qdisc_skb_cb *)skb->cb; 542 } 543 544 static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc) 545 { 546 return &qdisc->q.lock; 547 } 548 549 static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc) 550 { 551 struct Qdisc *q = rcu_dereference_rtnl(qdisc->dev_queue->qdisc); 552 553 return q; 554 } 555 556 static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc) 557 { 558 return rcu_dereference_bh(qdisc->dev_queue->qdisc); 559 } 560 561 static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc) 562 { 563 return qdisc->dev_queue->qdisc_sleeping; 564 } 565 566 /* The qdisc root lock is a mechanism by which to top level 567 * of a qdisc tree can be locked from any qdisc node in the 568 * forest. This allows changing the configuration of some 569 * aspect of the qdisc tree while blocking out asynchronous 570 * qdisc access in the packet processing paths. 571 * 572 * It is only legal to do this when the root will not change 573 * on us. Otherwise we'll potentially lock the wrong qdisc 574 * root. This is enforced by holding the RTNL semaphore, which 575 * all users of this lock accessor must do. 576 */ 577 static inline spinlock_t *qdisc_root_lock(const struct Qdisc *qdisc) 578 { 579 struct Qdisc *root = qdisc_root(qdisc); 580 581 ASSERT_RTNL(); 582 return qdisc_lock(root); 583 } 584 585 static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) 586 { 587 struct Qdisc *root = qdisc_root_sleeping(qdisc); 588 589 ASSERT_RTNL(); 590 return qdisc_lock(root); 591 } 592 593 static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) 594 { 595 struct Qdisc *root = qdisc_root_sleeping(qdisc); 596 597 ASSERT_RTNL(); 598 return &root->running; 599 } 600 601 static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc) 602 { 603 return qdisc->dev_queue->dev; 604 } 605 606 static inline void sch_tree_lock(struct Qdisc *q) 607 { 608 if (q->flags & TCQ_F_MQROOT) 609 spin_lock_bh(qdisc_lock(q)); 610 else 611 spin_lock_bh(qdisc_root_sleeping_lock(q)); 612 } 613 614 static inline void sch_tree_unlock(struct Qdisc *q) 615 { 616 if (q->flags & TCQ_F_MQROOT) 617 spin_unlock_bh(qdisc_lock(q)); 618 else 619 spin_unlock_bh(qdisc_root_sleeping_lock(q)); 620 } 621 622 extern struct Qdisc noop_qdisc; 623 extern struct Qdisc_ops noop_qdisc_ops; 624 extern struct Qdisc_ops pfifo_fast_ops; 625 extern struct Qdisc_ops mq_qdisc_ops; 626 extern struct Qdisc_ops noqueue_qdisc_ops; 627 extern const struct Qdisc_ops *default_qdisc_ops; 628 static inline const struct Qdisc_ops * 629 get_default_qdisc_ops(const struct net_device *dev, int ntx) 630 { 631 return ntx < dev->real_num_tx_queues ? 632 default_qdisc_ops : &pfifo_fast_ops; 633 } 634 635 struct Qdisc_class_common { 636 u32 classid; 637 struct hlist_node hnode; 638 }; 639 640 struct Qdisc_class_hash { 641 struct hlist_head *hash; 642 unsigned int hashsize; 643 unsigned int hashmask; 644 unsigned int hashelems; 645 }; 646 647 static inline unsigned int qdisc_class_hash(u32 id, u32 mask) 648 { 649 id ^= id >> 8; 650 id ^= id >> 4; 651 return id & mask; 652 } 653 654 static inline struct Qdisc_class_common * 655 qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id) 656 { 657 struct Qdisc_class_common *cl; 658 unsigned int h; 659 660 if (!id) 661 return NULL; 662 663 h = qdisc_class_hash(id, hash->hashmask); 664 hlist_for_each_entry(cl, &hash->hash[h], hnode) { 665 if (cl->classid == id) 666 return cl; 667 } 668 return NULL; 669 } 670 671 static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid) 672 { 673 u32 hwtc = TC_H_MIN(classid) - TC_H_MIN_PRIORITY; 674 675 return (hwtc < netdev_get_num_tc(dev)) ? hwtc : -EINVAL; 676 } 677 678 int qdisc_class_hash_init(struct Qdisc_class_hash *); 679 void qdisc_class_hash_insert(struct Qdisc_class_hash *, 680 struct Qdisc_class_common *); 681 void qdisc_class_hash_remove(struct Qdisc_class_hash *, 682 struct Qdisc_class_common *); 683 void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *); 684 void qdisc_class_hash_destroy(struct Qdisc_class_hash *); 685 686 int dev_qdisc_change_tx_queue_len(struct net_device *dev); 687 void dev_init_scheduler(struct net_device *dev); 688 void dev_shutdown(struct net_device *dev); 689 void dev_activate(struct net_device *dev); 690 void dev_deactivate(struct net_device *dev); 691 void dev_deactivate_many(struct list_head *head); 692 struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, 693 struct Qdisc *qdisc); 694 void qdisc_reset(struct Qdisc *qdisc); 695 void qdisc_put(struct Qdisc *qdisc); 696 void qdisc_put_unlocked(struct Qdisc *qdisc); 697 void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len); 698 #ifdef CONFIG_NET_SCHED 699 int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, 700 void *type_data); 701 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, 702 struct Qdisc *new, struct Qdisc *old, 703 enum tc_setup_type type, void *type_data, 704 struct netlink_ext_ack *extack); 705 #else 706 static inline int 707 qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, 708 void *type_data) 709 { 710 q->flags &= ~TCQ_F_OFFLOADED; 711 return 0; 712 } 713 714 static inline void 715 qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, 716 struct Qdisc *new, struct Qdisc *old, 717 enum tc_setup_type type, void *type_data, 718 struct netlink_ext_ack *extack) 719 { 720 } 721 #endif 722 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, 723 const struct Qdisc_ops *ops, 724 struct netlink_ext_ack *extack); 725 void qdisc_free(struct Qdisc *qdisc); 726 struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, 727 const struct Qdisc_ops *ops, u32 parentid, 728 struct netlink_ext_ack *extack); 729 void __qdisc_calculate_pkt_len(struct sk_buff *skb, 730 const struct qdisc_size_table *stab); 731 int skb_do_redirect(struct sk_buff *); 732 733 static inline bool skb_at_tc_ingress(const struct sk_buff *skb) 734 { 735 #ifdef CONFIG_NET_CLS_ACT 736 return skb->tc_at_ingress; 737 #else 738 return false; 739 #endif 740 } 741 742 static inline bool skb_skip_tc_classify(struct sk_buff *skb) 743 { 744 #ifdef CONFIG_NET_CLS_ACT 745 if (skb->tc_skip_classify) { 746 skb->tc_skip_classify = 0; 747 return true; 748 } 749 #endif 750 return false; 751 } 752 753 /* Reset all TX qdiscs greater than index of a device. */ 754 static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) 755 { 756 struct Qdisc *qdisc; 757 758 for (; i < dev->num_tx_queues; i++) { 759 qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); 760 if (qdisc) { 761 spin_lock_bh(qdisc_lock(qdisc)); 762 qdisc_reset(qdisc); 763 spin_unlock_bh(qdisc_lock(qdisc)); 764 } 765 } 766 } 767 768 /* Are all TX queues of the device empty? */ 769 static inline bool qdisc_all_tx_empty(const struct net_device *dev) 770 { 771 unsigned int i; 772 773 rcu_read_lock(); 774 for (i = 0; i < dev->num_tx_queues; i++) { 775 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 776 const struct Qdisc *q = rcu_dereference(txq->qdisc); 777 778 if (!qdisc_is_empty(q)) { 779 rcu_read_unlock(); 780 return false; 781 } 782 } 783 rcu_read_unlock(); 784 return true; 785 } 786 787 /* Are any of the TX qdiscs changing? */ 788 static inline bool qdisc_tx_changing(const struct net_device *dev) 789 { 790 unsigned int i; 791 792 for (i = 0; i < dev->num_tx_queues; i++) { 793 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 794 if (rcu_access_pointer(txq->qdisc) != txq->qdisc_sleeping) 795 return true; 796 } 797 return false; 798 } 799 800 /* Is the device using the noop qdisc on all queues? */ 801 static inline bool qdisc_tx_is_noop(const struct net_device *dev) 802 { 803 unsigned int i; 804 805 for (i = 0; i < dev->num_tx_queues; i++) { 806 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 807 if (rcu_access_pointer(txq->qdisc) != &noop_qdisc) 808 return false; 809 } 810 return true; 811 } 812 813 static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb) 814 { 815 return qdisc_skb_cb(skb)->pkt_len; 816 } 817 818 /* additional qdisc xmit flags (NET_XMIT_MASK in linux/netdevice.h) */ 819 enum net_xmit_qdisc_t { 820 __NET_XMIT_STOLEN = 0x00010000, 821 __NET_XMIT_BYPASS = 0x00020000, 822 }; 823 824 #ifdef CONFIG_NET_CLS_ACT 825 #define net_xmit_drop_count(e) ((e) & __NET_XMIT_STOLEN ? 0 : 1) 826 #else 827 #define net_xmit_drop_count(e) (1) 828 #endif 829 830 static inline void qdisc_calculate_pkt_len(struct sk_buff *skb, 831 const struct Qdisc *sch) 832 { 833 #ifdef CONFIG_NET_SCHED 834 struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab); 835 836 if (stab) 837 __qdisc_calculate_pkt_len(skb, stab); 838 #endif 839 } 840 841 static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, 842 struct sk_buff **to_free) 843 { 844 qdisc_calculate_pkt_len(skb, sch); 845 return sch->enqueue(skb, sch, to_free); 846 } 847 848 static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, 849 __u64 bytes, __u32 packets) 850 { 851 bstats->bytes += bytes; 852 bstats->packets += packets; 853 } 854 855 static inline void bstats_update(struct gnet_stats_basic_packed *bstats, 856 const struct sk_buff *skb) 857 { 858 _bstats_update(bstats, 859 qdisc_pkt_len(skb), 860 skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1); 861 } 862 863 static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, 864 __u64 bytes, __u32 packets) 865 { 866 u64_stats_update_begin(&bstats->syncp); 867 _bstats_update(&bstats->bstats, bytes, packets); 868 u64_stats_update_end(&bstats->syncp); 869 } 870 871 static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, 872 const struct sk_buff *skb) 873 { 874 u64_stats_update_begin(&bstats->syncp); 875 bstats_update(&bstats->bstats, skb); 876 u64_stats_update_end(&bstats->syncp); 877 } 878 879 static inline void qdisc_bstats_cpu_update(struct Qdisc *sch, 880 const struct sk_buff *skb) 881 { 882 bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb); 883 } 884 885 static inline void qdisc_bstats_update(struct Qdisc *sch, 886 const struct sk_buff *skb) 887 { 888 bstats_update(&sch->bstats, skb); 889 } 890 891 static inline void qdisc_qstats_backlog_dec(struct Qdisc *sch, 892 const struct sk_buff *skb) 893 { 894 sch->qstats.backlog -= qdisc_pkt_len(skb); 895 } 896 897 static inline void qdisc_qstats_cpu_backlog_dec(struct Qdisc *sch, 898 const struct sk_buff *skb) 899 { 900 this_cpu_sub(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); 901 } 902 903 static inline void qdisc_qstats_backlog_inc(struct Qdisc *sch, 904 const struct sk_buff *skb) 905 { 906 sch->qstats.backlog += qdisc_pkt_len(skb); 907 } 908 909 static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch, 910 const struct sk_buff *skb) 911 { 912 this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); 913 } 914 915 static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch) 916 { 917 this_cpu_inc(sch->cpu_qstats->qlen); 918 } 919 920 static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch) 921 { 922 this_cpu_dec(sch->cpu_qstats->qlen); 923 } 924 925 static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch) 926 { 927 this_cpu_inc(sch->cpu_qstats->requeues); 928 } 929 930 static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count) 931 { 932 sch->qstats.drops += count; 933 } 934 935 static inline void qstats_drop_inc(struct gnet_stats_queue *qstats) 936 { 937 qstats->drops++; 938 } 939 940 static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats) 941 { 942 qstats->overlimits++; 943 } 944 945 static inline void qdisc_qstats_drop(struct Qdisc *sch) 946 { 947 qstats_drop_inc(&sch->qstats); 948 } 949 950 static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch) 951 { 952 this_cpu_inc(sch->cpu_qstats->drops); 953 } 954 955 static inline void qdisc_qstats_overlimit(struct Qdisc *sch) 956 { 957 sch->qstats.overlimits++; 958 } 959 960 static inline int qdisc_qstats_copy(struct gnet_dump *d, struct Qdisc *sch) 961 { 962 __u32 qlen = qdisc_qlen_sum(sch); 963 964 return gnet_stats_copy_queue(d, sch->cpu_qstats, &sch->qstats, qlen); 965 } 966 967 static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen, 968 __u32 *backlog) 969 { 970 struct gnet_stats_queue qstats = { 0 }; 971 __u32 len = qdisc_qlen_sum(sch); 972 973 __gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len); 974 *qlen = qstats.qlen; 975 *backlog = qstats.backlog; 976 } 977 978 static inline void qdisc_tree_flush_backlog(struct Qdisc *sch) 979 { 980 __u32 qlen, backlog; 981 982 qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); 983 qdisc_tree_reduce_backlog(sch, qlen, backlog); 984 } 985 986 static inline void qdisc_purge_queue(struct Qdisc *sch) 987 { 988 __u32 qlen, backlog; 989 990 qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); 991 qdisc_reset(sch); 992 qdisc_tree_reduce_backlog(sch, qlen, backlog); 993 } 994 995 static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) 996 { 997 qh->head = NULL; 998 qh->tail = NULL; 999 qh->qlen = 0; 1000 } 1001 1002 static inline void __qdisc_enqueue_tail(struct sk_buff *skb, 1003 struct qdisc_skb_head *qh) 1004 { 1005 struct sk_buff *last = qh->tail; 1006 1007 if (last) { 1008 skb->next = NULL; 1009 last->next = skb; 1010 qh->tail = skb; 1011 } else { 1012 qh->tail = skb; 1013 qh->head = skb; 1014 } 1015 qh->qlen++; 1016 } 1017 1018 static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) 1019 { 1020 __qdisc_enqueue_tail(skb, &sch->q); 1021 qdisc_qstats_backlog_inc(sch, skb); 1022 return NET_XMIT_SUCCESS; 1023 } 1024 1025 static inline void __qdisc_enqueue_head(struct sk_buff *skb, 1026 struct qdisc_skb_head *qh) 1027 { 1028 skb->next = qh->head; 1029 1030 if (!qh->head) 1031 qh->tail = skb; 1032 qh->head = skb; 1033 qh->qlen++; 1034 } 1035 1036 static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) 1037 { 1038 struct sk_buff *skb = qh->head; 1039 1040 if (likely(skb != NULL)) { 1041 qh->head = skb->next; 1042 qh->qlen--; 1043 if (qh->head == NULL) 1044 qh->tail = NULL; 1045 skb->next = NULL; 1046 } 1047 1048 return skb; 1049 } 1050 1051 static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) 1052 { 1053 struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); 1054 1055 if (likely(skb != NULL)) { 1056 qdisc_qstats_backlog_dec(sch, skb); 1057 qdisc_bstats_update(sch, skb); 1058 } 1059 1060 return skb; 1061 } 1062 1063 /* Instead of calling kfree_skb() while root qdisc lock is held, 1064 * queue the skb for future freeing at end of __dev_xmit_skb() 1065 */ 1066 static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free) 1067 { 1068 skb->next = *to_free; 1069 *to_free = skb; 1070 } 1071 1072 static inline void __qdisc_drop_all(struct sk_buff *skb, 1073 struct sk_buff **to_free) 1074 { 1075 if (skb->prev) 1076 skb->prev->next = *to_free; 1077 else 1078 skb->next = *to_free; 1079 *to_free = skb; 1080 } 1081 1082 static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, 1083 struct qdisc_skb_head *qh, 1084 struct sk_buff **to_free) 1085 { 1086 struct sk_buff *skb = __qdisc_dequeue_head(qh); 1087 1088 if (likely(skb != NULL)) { 1089 unsigned int len = qdisc_pkt_len(skb); 1090 1091 qdisc_qstats_backlog_dec(sch, skb); 1092 __qdisc_drop(skb, to_free); 1093 return len; 1094 } 1095 1096 return 0; 1097 } 1098 1099 static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) 1100 { 1101 const struct qdisc_skb_head *qh = &sch->q; 1102 1103 return qh->head; 1104 } 1105 1106 /* generic pseudo peek method for non-work-conserving qdisc */ 1107 static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) 1108 { 1109 struct sk_buff *skb = skb_peek(&sch->gso_skb); 1110 1111 /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ 1112 if (!skb) { 1113 skb = sch->dequeue(sch); 1114 1115 if (skb) { 1116 __skb_queue_head(&sch->gso_skb, skb); 1117 /* it's still part of the queue */ 1118 qdisc_qstats_backlog_inc(sch, skb); 1119 sch->q.qlen++; 1120 } 1121 } 1122 1123 return skb; 1124 } 1125 1126 static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch, 1127 struct sk_buff *skb) 1128 { 1129 if (qdisc_is_percpu_stats(sch)) { 1130 qdisc_qstats_cpu_backlog_dec(sch, skb); 1131 qdisc_bstats_cpu_update(sch, skb); 1132 qdisc_qstats_cpu_qlen_dec(sch); 1133 } else { 1134 qdisc_qstats_backlog_dec(sch, skb); 1135 qdisc_bstats_update(sch, skb); 1136 sch->q.qlen--; 1137 } 1138 } 1139 1140 static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch, 1141 unsigned int pkt_len) 1142 { 1143 if (qdisc_is_percpu_stats(sch)) { 1144 qdisc_qstats_cpu_qlen_inc(sch); 1145 this_cpu_add(sch->cpu_qstats->backlog, pkt_len); 1146 } else { 1147 sch->qstats.backlog += pkt_len; 1148 sch->q.qlen++; 1149 } 1150 } 1151 1152 /* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */ 1153 static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) 1154 { 1155 struct sk_buff *skb = skb_peek(&sch->gso_skb); 1156 1157 if (skb) { 1158 skb = __skb_dequeue(&sch->gso_skb); 1159 if (qdisc_is_percpu_stats(sch)) { 1160 qdisc_qstats_cpu_backlog_dec(sch, skb); 1161 qdisc_qstats_cpu_qlen_dec(sch); 1162 } else { 1163 qdisc_qstats_backlog_dec(sch, skb); 1164 sch->q.qlen--; 1165 } 1166 } else { 1167 skb = sch->dequeue(sch); 1168 } 1169 1170 return skb; 1171 } 1172 1173 static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh) 1174 { 1175 /* 1176 * We do not know the backlog in bytes of this list, it 1177 * is up to the caller to correct it 1178 */ 1179 ASSERT_RTNL(); 1180 if (qh->qlen) { 1181 rtnl_kfree_skbs(qh->head, qh->tail); 1182 1183 qh->head = NULL; 1184 qh->tail = NULL; 1185 qh->qlen = 0; 1186 } 1187 } 1188 1189 static inline void qdisc_reset_queue(struct Qdisc *sch) 1190 { 1191 __qdisc_reset_queue(&sch->q); 1192 sch->qstats.backlog = 0; 1193 } 1194 1195 static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, 1196 struct Qdisc **pold) 1197 { 1198 struct Qdisc *old; 1199 1200 sch_tree_lock(sch); 1201 old = *pold; 1202 *pold = new; 1203 if (old != NULL) 1204 qdisc_purge_queue(old); 1205 sch_tree_unlock(sch); 1206 1207 return old; 1208 } 1209 1210 static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) 1211 { 1212 rtnl_kfree_skbs(skb, skb); 1213 qdisc_qstats_drop(sch); 1214 } 1215 1216 static inline int qdisc_drop_cpu(struct sk_buff *skb, struct Qdisc *sch, 1217 struct sk_buff **to_free) 1218 { 1219 __qdisc_drop(skb, to_free); 1220 qdisc_qstats_cpu_drop(sch); 1221 1222 return NET_XMIT_DROP; 1223 } 1224 1225 static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch, 1226 struct sk_buff **to_free) 1227 { 1228 __qdisc_drop(skb, to_free); 1229 qdisc_qstats_drop(sch); 1230 1231 return NET_XMIT_DROP; 1232 } 1233 1234 static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch, 1235 struct sk_buff **to_free) 1236 { 1237 __qdisc_drop_all(skb, to_free); 1238 qdisc_qstats_drop(sch); 1239 1240 return NET_XMIT_DROP; 1241 } 1242 1243 /* Length to Time (L2T) lookup in a qdisc_rate_table, to determine how 1244 long it will take to send a packet given its size. 1245 */ 1246 static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen) 1247 { 1248 int slot = pktlen + rtab->rate.cell_align + rtab->rate.overhead; 1249 if (slot < 0) 1250 slot = 0; 1251 slot >>= rtab->rate.cell_log; 1252 if (slot > 255) 1253 return rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF]; 1254 return rtab->data[slot]; 1255 } 1256 1257 struct psched_ratecfg { 1258 u64 rate_bytes_ps; /* bytes per second */ 1259 u32 mult; 1260 u16 overhead; 1261 u8 linklayer; 1262 u8 shift; 1263 }; 1264 1265 static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, 1266 unsigned int len) 1267 { 1268 len += r->overhead; 1269 1270 if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) 1271 return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; 1272 1273 return ((u64)len * r->mult) >> r->shift; 1274 } 1275 1276 void psched_ratecfg_precompute(struct psched_ratecfg *r, 1277 const struct tc_ratespec *conf, 1278 u64 rate64); 1279 1280 static inline void psched_ratecfg_getrate(struct tc_ratespec *res, 1281 const struct psched_ratecfg *r) 1282 { 1283 memset(res, 0, sizeof(*res)); 1284 1285 /* legacy struct tc_ratespec has a 32bit @rate field 1286 * Qdisc using 64bit rate should add new attributes 1287 * in order to maintain compatibility. 1288 */ 1289 res->rate = min_t(u64, r->rate_bytes_ps, ~0U); 1290 1291 res->overhead = r->overhead; 1292 res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); 1293 } 1294 1295 struct psched_pktrate { 1296 u64 rate_pkts_ps; /* packets per second */ 1297 u32 mult; 1298 u8 shift; 1299 }; 1300 1301 static inline u64 psched_pkt2t_ns(const struct psched_pktrate *r, 1302 unsigned int pkt_num) 1303 { 1304 return ((u64)pkt_num * r->mult) >> r->shift; 1305 } 1306 1307 void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64); 1308 1309 /* Mini Qdisc serves for specific needs of ingress/clsact Qdisc. 1310 * The fast path only needs to access filter list and to update stats 1311 */ 1312 struct mini_Qdisc { 1313 struct tcf_proto *filter_list; 1314 struct tcf_block *block; 1315 struct gnet_stats_basic_cpu __percpu *cpu_bstats; 1316 struct gnet_stats_queue __percpu *cpu_qstats; 1317 struct rcu_head rcu; 1318 }; 1319 1320 static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq, 1321 const struct sk_buff *skb) 1322 { 1323 bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb); 1324 } 1325 1326 static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq) 1327 { 1328 this_cpu_inc(miniq->cpu_qstats->drops); 1329 } 1330 1331 struct mini_Qdisc_pair { 1332 struct mini_Qdisc miniq1; 1333 struct mini_Qdisc miniq2; 1334 struct mini_Qdisc __rcu **p_miniq; 1335 }; 1336 1337 void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, 1338 struct tcf_proto *tp_head); 1339 void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, 1340 struct mini_Qdisc __rcu **p_miniq); 1341 void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, 1342 struct tcf_block *block); 1343 1344 int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); 1345 1346 #endif 1347