1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * net/sched/cls_api.c Packet classifier API. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 * 7 * Changes: 8 * 9 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support 10 */ 11 12 #include <linux/module.h> 13 #include <linux/types.h> 14 #include <linux/kernel.h> 15 #include <linux/string.h> 16 #include <linux/errno.h> 17 #include <linux/err.h> 18 #include <linux/skbuff.h> 19 #include <linux/init.h> 20 #include <linux/kmod.h> 21 #include <linux/slab.h> 22 #include <linux/idr.h> 23 #include <linux/jhash.h> 24 #include <linux/rculist.h> 25 #include <net/net_namespace.h> 26 #include <net/sock.h> 27 #include <net/netlink.h> 28 #include <net/pkt_sched.h> 29 #include <net/pkt_cls.h> 30 #include <net/tc_act/tc_pedit.h> 31 #include <net/tc_act/tc_mirred.h> 32 #include <net/tc_act/tc_vlan.h> 33 #include <net/tc_act/tc_tunnel_key.h> 34 #include <net/tc_act/tc_csum.h> 35 #include <net/tc_act/tc_gact.h> 36 #include <net/tc_act/tc_police.h> 37 #include <net/tc_act/tc_sample.h> 38 #include <net/tc_act/tc_skbedit.h> 39 #include <net/tc_act/tc_ct.h> 40 #include <net/tc_act/tc_mpls.h> 41 #include <net/tc_act/tc_gate.h> 42 #include <net/flow_offload.h> 43 44 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; 45 46 /* The list of all installed classifier types */ 47 static LIST_HEAD(tcf_proto_base); 48 49 /* Protects list of registered TC modules. It is pure SMP lock. */ 50 static DEFINE_RWLOCK(cls_mod_lock); 51 52 #ifdef CONFIG_NET_CLS_ACT 53 DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc); 54 EXPORT_SYMBOL(tc_skb_ext_tc); 55 56 void tc_skb_ext_tc_enable(void) 57 { 58 static_branch_inc(&tc_skb_ext_tc); 59 } 60 EXPORT_SYMBOL(tc_skb_ext_tc_enable); 61 62 void tc_skb_ext_tc_disable(void) 63 { 64 static_branch_dec(&tc_skb_ext_tc); 65 } 66 EXPORT_SYMBOL(tc_skb_ext_tc_disable); 67 #endif 68 69 static u32 destroy_obj_hashfn(const struct tcf_proto *tp) 70 { 71 return jhash_3words(tp->chain->index, tp->prio, 72 (__force __u32)tp->protocol, 0); 73 } 74 75 static void tcf_proto_signal_destroying(struct tcf_chain *chain, 76 struct tcf_proto *tp) 77 { 78 struct tcf_block *block = chain->block; 79 80 mutex_lock(&block->proto_destroy_lock); 81 hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node, 82 destroy_obj_hashfn(tp)); 83 mutex_unlock(&block->proto_destroy_lock); 84 } 85 86 static bool tcf_proto_cmp(const struct tcf_proto *tp1, 87 const struct tcf_proto *tp2) 88 { 89 return tp1->chain->index == tp2->chain->index && 90 tp1->prio == tp2->prio && 91 tp1->protocol == tp2->protocol; 92 } 93 94 static bool tcf_proto_exists_destroying(struct tcf_chain *chain, 95 struct tcf_proto *tp) 96 { 97 u32 hash = destroy_obj_hashfn(tp); 98 struct tcf_proto *iter; 99 bool found = false; 100 101 rcu_read_lock(); 102 hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter, 103 destroy_ht_node, hash) { 104 if (tcf_proto_cmp(tp, iter)) { 105 found = true; 106 break; 107 } 108 } 109 rcu_read_unlock(); 110 111 return found; 112 } 113 114 static void 115 tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp) 116 { 117 struct tcf_block *block = chain->block; 118 119 mutex_lock(&block->proto_destroy_lock); 120 if (hash_hashed(&tp->destroy_ht_node)) 121 hash_del_rcu(&tp->destroy_ht_node); 122 mutex_unlock(&block->proto_destroy_lock); 123 } 124 125 /* Find classifier type by string name */ 126 127 static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind) 128 { 129 const struct tcf_proto_ops *t, *res = NULL; 130 131 if (kind) { 132 read_lock(&cls_mod_lock); 133 list_for_each_entry(t, &tcf_proto_base, head) { 134 if (strcmp(kind, t->kind) == 0) { 135 if (try_module_get(t->owner)) 136 res = t; 137 break; 138 } 139 } 140 read_unlock(&cls_mod_lock); 141 } 142 return res; 143 } 144 145 static const struct tcf_proto_ops * 146 tcf_proto_lookup_ops(const char *kind, bool rtnl_held, 147 struct netlink_ext_ack *extack) 148 { 149 const struct tcf_proto_ops *ops; 150 151 ops = __tcf_proto_lookup_ops(kind); 152 if (ops) 153 return ops; 154 #ifdef CONFIG_MODULES 155 if (rtnl_held) 156 rtnl_unlock(); 157 request_module("cls_%s", kind); 158 if (rtnl_held) 159 rtnl_lock(); 160 ops = __tcf_proto_lookup_ops(kind); 161 /* We dropped the RTNL semaphore in order to perform 162 * the module load. So, even if we succeeded in loading 163 * the module we have to replay the request. We indicate 164 * this using -EAGAIN. 165 */ 166 if (ops) { 167 module_put(ops->owner); 168 return ERR_PTR(-EAGAIN); 169 } 170 #endif 171 NL_SET_ERR_MSG(extack, "TC classifier not found"); 172 return ERR_PTR(-ENOENT); 173 } 174 175 /* Register(unregister) new classifier type */ 176 177 int register_tcf_proto_ops(struct tcf_proto_ops *ops) 178 { 179 struct tcf_proto_ops *t; 180 int rc = -EEXIST; 181 182 write_lock(&cls_mod_lock); 183 list_for_each_entry(t, &tcf_proto_base, head) 184 if (!strcmp(ops->kind, t->kind)) 185 goto out; 186 187 list_add_tail(&ops->head, &tcf_proto_base); 188 rc = 0; 189 out: 190 write_unlock(&cls_mod_lock); 191 return rc; 192 } 193 EXPORT_SYMBOL(register_tcf_proto_ops); 194 195 static struct workqueue_struct *tc_filter_wq; 196 197 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) 198 { 199 struct tcf_proto_ops *t; 200 int rc = -ENOENT; 201 202 /* Wait for outstanding call_rcu()s, if any, from a 203 * tcf_proto_ops's destroy() handler. 204 */ 205 rcu_barrier(); 206 flush_workqueue(tc_filter_wq); 207 208 write_lock(&cls_mod_lock); 209 list_for_each_entry(t, &tcf_proto_base, head) { 210 if (t == ops) { 211 list_del(&t->head); 212 rc = 0; 213 break; 214 } 215 } 216 write_unlock(&cls_mod_lock); 217 return rc; 218 } 219 EXPORT_SYMBOL(unregister_tcf_proto_ops); 220 221 bool tcf_queue_work(struct rcu_work *rwork, work_func_t func) 222 { 223 INIT_RCU_WORK(rwork, func); 224 return queue_rcu_work(tc_filter_wq, rwork); 225 } 226 EXPORT_SYMBOL(tcf_queue_work); 227 228 /* Select new prio value from the range, managed by kernel. */ 229 230 static inline u32 tcf_auto_prio(struct tcf_proto *tp) 231 { 232 u32 first = TC_H_MAKE(0xC0000000U, 0U); 233 234 if (tp) 235 first = tp->prio - 1; 236 237 return TC_H_MAJ(first); 238 } 239 240 static bool tcf_proto_check_kind(struct nlattr *kind, char *name) 241 { 242 if (kind) 243 return nla_strscpy(name, kind, IFNAMSIZ) < 0; 244 memset(name, 0, IFNAMSIZ); 245 return false; 246 } 247 248 static bool tcf_proto_is_unlocked(const char *kind) 249 { 250 const struct tcf_proto_ops *ops; 251 bool ret; 252 253 if (strlen(kind) == 0) 254 return false; 255 256 ops = tcf_proto_lookup_ops(kind, false, NULL); 257 /* On error return false to take rtnl lock. Proto lookup/create 258 * functions will perform lookup again and properly handle errors. 259 */ 260 if (IS_ERR(ops)) 261 return false; 262 263 ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED); 264 module_put(ops->owner); 265 return ret; 266 } 267 268 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, 269 u32 prio, struct tcf_chain *chain, 270 bool rtnl_held, 271 struct netlink_ext_ack *extack) 272 { 273 struct tcf_proto *tp; 274 int err; 275 276 tp = kzalloc(sizeof(*tp), GFP_KERNEL); 277 if (!tp) 278 return ERR_PTR(-ENOBUFS); 279 280 tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack); 281 if (IS_ERR(tp->ops)) { 282 err = PTR_ERR(tp->ops); 283 goto errout; 284 } 285 tp->classify = tp->ops->classify; 286 tp->protocol = protocol; 287 tp->prio = prio; 288 tp->chain = chain; 289 spin_lock_init(&tp->lock); 290 refcount_set(&tp->refcnt, 1); 291 292 err = tp->ops->init(tp); 293 if (err) { 294 module_put(tp->ops->owner); 295 goto errout; 296 } 297 return tp; 298 299 errout: 300 kfree(tp); 301 return ERR_PTR(err); 302 } 303 304 static void tcf_proto_get(struct tcf_proto *tp) 305 { 306 refcount_inc(&tp->refcnt); 307 } 308 309 static void tcf_chain_put(struct tcf_chain *chain); 310 311 static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, 312 bool sig_destroy, struct netlink_ext_ack *extack) 313 { 314 tp->ops->destroy(tp, rtnl_held, extack); 315 if (sig_destroy) 316 tcf_proto_signal_destroyed(tp->chain, tp); 317 tcf_chain_put(tp->chain); 318 module_put(tp->ops->owner); 319 kfree_rcu(tp, rcu); 320 } 321 322 static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, 323 struct netlink_ext_ack *extack) 324 { 325 if (refcount_dec_and_test(&tp->refcnt)) 326 tcf_proto_destroy(tp, rtnl_held, true, extack); 327 } 328 329 static bool tcf_proto_check_delete(struct tcf_proto *tp) 330 { 331 if (tp->ops->delete_empty) 332 return tp->ops->delete_empty(tp); 333 334 tp->deleting = true; 335 return tp->deleting; 336 } 337 338 static void tcf_proto_mark_delete(struct tcf_proto *tp) 339 { 340 spin_lock(&tp->lock); 341 tp->deleting = true; 342 spin_unlock(&tp->lock); 343 } 344 345 static bool tcf_proto_is_deleting(struct tcf_proto *tp) 346 { 347 bool deleting; 348 349 spin_lock(&tp->lock); 350 deleting = tp->deleting; 351 spin_unlock(&tp->lock); 352 353 return deleting; 354 } 355 356 #define ASSERT_BLOCK_LOCKED(block) \ 357 lockdep_assert_held(&(block)->lock) 358 359 struct tcf_filter_chain_list_item { 360 struct list_head list; 361 tcf_chain_head_change_t *chain_head_change; 362 void *chain_head_change_priv; 363 }; 364 365 static struct tcf_chain *tcf_chain_create(struct tcf_block *block, 366 u32 chain_index) 367 { 368 struct tcf_chain *chain; 369 370 ASSERT_BLOCK_LOCKED(block); 371 372 chain = kzalloc(sizeof(*chain), GFP_KERNEL); 373 if (!chain) 374 return NULL; 375 list_add_tail_rcu(&chain->list, &block->chain_list); 376 mutex_init(&chain->filter_chain_lock); 377 chain->block = block; 378 chain->index = chain_index; 379 chain->refcnt = 1; 380 if (!chain->index) 381 block->chain0.chain = chain; 382 return chain; 383 } 384 385 static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item, 386 struct tcf_proto *tp_head) 387 { 388 if (item->chain_head_change) 389 item->chain_head_change(tp_head, item->chain_head_change_priv); 390 } 391 392 static void tcf_chain0_head_change(struct tcf_chain *chain, 393 struct tcf_proto *tp_head) 394 { 395 struct tcf_filter_chain_list_item *item; 396 struct tcf_block *block = chain->block; 397 398 if (chain->index) 399 return; 400 401 mutex_lock(&block->lock); 402 list_for_each_entry(item, &block->chain0.filter_chain_list, list) 403 tcf_chain_head_change_item(item, tp_head); 404 mutex_unlock(&block->lock); 405 } 406 407 /* Returns true if block can be safely freed. */ 408 409 static bool tcf_chain_detach(struct tcf_chain *chain) 410 { 411 struct tcf_block *block = chain->block; 412 413 ASSERT_BLOCK_LOCKED(block); 414 415 list_del_rcu(&chain->list); 416 if (!chain->index) 417 block->chain0.chain = NULL; 418 419 if (list_empty(&block->chain_list) && 420 refcount_read(&block->refcnt) == 0) 421 return true; 422 423 return false; 424 } 425 426 static void tcf_block_destroy(struct tcf_block *block) 427 { 428 mutex_destroy(&block->lock); 429 mutex_destroy(&block->proto_destroy_lock); 430 kfree_rcu(block, rcu); 431 } 432 433 static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block) 434 { 435 struct tcf_block *block = chain->block; 436 437 mutex_destroy(&chain->filter_chain_lock); 438 kfree_rcu(chain, rcu); 439 if (free_block) 440 tcf_block_destroy(block); 441 } 442 443 static void tcf_chain_hold(struct tcf_chain *chain) 444 { 445 ASSERT_BLOCK_LOCKED(chain->block); 446 447 ++chain->refcnt; 448 } 449 450 static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain) 451 { 452 ASSERT_BLOCK_LOCKED(chain->block); 453 454 /* In case all the references are action references, this 455 * chain should not be shown to the user. 456 */ 457 return chain->refcnt == chain->action_refcnt; 458 } 459 460 static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block, 461 u32 chain_index) 462 { 463 struct tcf_chain *chain; 464 465 ASSERT_BLOCK_LOCKED(block); 466 467 list_for_each_entry(chain, &block->chain_list, list) { 468 if (chain->index == chain_index) 469 return chain; 470 } 471 return NULL; 472 } 473 474 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 475 static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block, 476 u32 chain_index) 477 { 478 struct tcf_chain *chain; 479 480 list_for_each_entry_rcu(chain, &block->chain_list, list) { 481 if (chain->index == chain_index) 482 return chain; 483 } 484 return NULL; 485 } 486 #endif 487 488 static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, 489 u32 seq, u16 flags, int event, bool unicast); 490 491 static struct tcf_chain *__tcf_chain_get(struct tcf_block *block, 492 u32 chain_index, bool create, 493 bool by_act) 494 { 495 struct tcf_chain *chain = NULL; 496 bool is_first_reference; 497 498 mutex_lock(&block->lock); 499 chain = tcf_chain_lookup(block, chain_index); 500 if (chain) { 501 tcf_chain_hold(chain); 502 } else { 503 if (!create) 504 goto errout; 505 chain = tcf_chain_create(block, chain_index); 506 if (!chain) 507 goto errout; 508 } 509 510 if (by_act) 511 ++chain->action_refcnt; 512 is_first_reference = chain->refcnt - chain->action_refcnt == 1; 513 mutex_unlock(&block->lock); 514 515 /* Send notification only in case we got the first 516 * non-action reference. Until then, the chain acts only as 517 * a placeholder for actions pointing to it and user ought 518 * not know about them. 519 */ 520 if (is_first_reference && !by_act) 521 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, 522 RTM_NEWCHAIN, false); 523 524 return chain; 525 526 errout: 527 mutex_unlock(&block->lock); 528 return chain; 529 } 530 531 static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, 532 bool create) 533 { 534 return __tcf_chain_get(block, chain_index, create, false); 535 } 536 537 struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index) 538 { 539 return __tcf_chain_get(block, chain_index, true, true); 540 } 541 EXPORT_SYMBOL(tcf_chain_get_by_act); 542 543 static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, 544 void *tmplt_priv); 545 static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, 546 void *tmplt_priv, u32 chain_index, 547 struct tcf_block *block, struct sk_buff *oskb, 548 u32 seq, u16 flags, bool unicast); 549 550 static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, 551 bool explicitly_created) 552 { 553 struct tcf_block *block = chain->block; 554 const struct tcf_proto_ops *tmplt_ops; 555 bool free_block = false; 556 unsigned int refcnt; 557 void *tmplt_priv; 558 559 mutex_lock(&block->lock); 560 if (explicitly_created) { 561 if (!chain->explicitly_created) { 562 mutex_unlock(&block->lock); 563 return; 564 } 565 chain->explicitly_created = false; 566 } 567 568 if (by_act) 569 chain->action_refcnt--; 570 571 /* tc_chain_notify_delete can't be called while holding block lock. 572 * However, when block is unlocked chain can be changed concurrently, so 573 * save these to temporary variables. 574 */ 575 refcnt = --chain->refcnt; 576 tmplt_ops = chain->tmplt_ops; 577 tmplt_priv = chain->tmplt_priv; 578 579 /* The last dropped non-action reference will trigger notification. */ 580 if (refcnt - chain->action_refcnt == 0 && !by_act) { 581 tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index, 582 block, NULL, 0, 0, false); 583 /* Last reference to chain, no need to lock. */ 584 chain->flushing = false; 585 } 586 587 if (refcnt == 0) 588 free_block = tcf_chain_detach(chain); 589 mutex_unlock(&block->lock); 590 591 if (refcnt == 0) { 592 tc_chain_tmplt_del(tmplt_ops, tmplt_priv); 593 tcf_chain_destroy(chain, free_block); 594 } 595 } 596 597 static void tcf_chain_put(struct tcf_chain *chain) 598 { 599 __tcf_chain_put(chain, false, false); 600 } 601 602 void tcf_chain_put_by_act(struct tcf_chain *chain) 603 { 604 __tcf_chain_put(chain, true, false); 605 } 606 EXPORT_SYMBOL(tcf_chain_put_by_act); 607 608 static void tcf_chain_put_explicitly_created(struct tcf_chain *chain) 609 { 610 __tcf_chain_put(chain, false, true); 611 } 612 613 static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) 614 { 615 struct tcf_proto *tp, *tp_next; 616 617 mutex_lock(&chain->filter_chain_lock); 618 tp = tcf_chain_dereference(chain->filter_chain, chain); 619 while (tp) { 620 tp_next = rcu_dereference_protected(tp->next, 1); 621 tcf_proto_signal_destroying(chain, tp); 622 tp = tp_next; 623 } 624 tp = tcf_chain_dereference(chain->filter_chain, chain); 625 RCU_INIT_POINTER(chain->filter_chain, NULL); 626 tcf_chain0_head_change(chain, NULL); 627 chain->flushing = true; 628 mutex_unlock(&chain->filter_chain_lock); 629 630 while (tp) { 631 tp_next = rcu_dereference_protected(tp->next, 1); 632 tcf_proto_put(tp, rtnl_held, NULL); 633 tp = tp_next; 634 } 635 } 636 637 static int tcf_block_setup(struct tcf_block *block, 638 struct flow_block_offload *bo); 639 640 static void tcf_block_offload_init(struct flow_block_offload *bo, 641 struct net_device *dev, struct Qdisc *sch, 642 enum flow_block_command command, 643 enum flow_block_binder_type binder_type, 644 struct flow_block *flow_block, 645 bool shared, struct netlink_ext_ack *extack) 646 { 647 bo->net = dev_net(dev); 648 bo->command = command; 649 bo->binder_type = binder_type; 650 bo->block = flow_block; 651 bo->block_shared = shared; 652 bo->extack = extack; 653 bo->sch = sch; 654 bo->cb_list_head = &flow_block->cb_list; 655 INIT_LIST_HEAD(&bo->cb_list); 656 } 657 658 static void tcf_block_unbind(struct tcf_block *block, 659 struct flow_block_offload *bo); 660 661 static void tc_block_indr_cleanup(struct flow_block_cb *block_cb) 662 { 663 struct tcf_block *block = block_cb->indr.data; 664 struct net_device *dev = block_cb->indr.dev; 665 struct Qdisc *sch = block_cb->indr.sch; 666 struct netlink_ext_ack extack = {}; 667 struct flow_block_offload bo = {}; 668 669 tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND, 670 block_cb->indr.binder_type, 671 &block->flow_block, tcf_block_shared(block), 672 &extack); 673 rtnl_lock(); 674 down_write(&block->cb_lock); 675 list_del(&block_cb->driver_list); 676 list_move(&block_cb->list, &bo.cb_list); 677 tcf_block_unbind(block, &bo); 678 up_write(&block->cb_lock); 679 rtnl_unlock(); 680 } 681 682 static bool tcf_block_offload_in_use(struct tcf_block *block) 683 { 684 return atomic_read(&block->offloadcnt); 685 } 686 687 static int tcf_block_offload_cmd(struct tcf_block *block, 688 struct net_device *dev, struct Qdisc *sch, 689 struct tcf_block_ext_info *ei, 690 enum flow_block_command command, 691 struct netlink_ext_ack *extack) 692 { 693 struct flow_block_offload bo = {}; 694 695 tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type, 696 &block->flow_block, tcf_block_shared(block), 697 extack); 698 699 if (dev->netdev_ops->ndo_setup_tc) { 700 int err; 701 702 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); 703 if (err < 0) { 704 if (err != -EOPNOTSUPP) 705 NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed"); 706 return err; 707 } 708 709 return tcf_block_setup(block, &bo); 710 } 711 712 flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo, 713 tc_block_indr_cleanup); 714 tcf_block_setup(block, &bo); 715 716 return -EOPNOTSUPP; 717 } 718 719 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, 720 struct tcf_block_ext_info *ei, 721 struct netlink_ext_ack *extack) 722 { 723 struct net_device *dev = q->dev_queue->dev; 724 int err; 725 726 down_write(&block->cb_lock); 727 728 /* If tc offload feature is disabled and the block we try to bind 729 * to already has some offloaded filters, forbid to bind. 730 */ 731 if (dev->netdev_ops->ndo_setup_tc && 732 !tc_can_offload(dev) && 733 tcf_block_offload_in_use(block)) { 734 NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled"); 735 err = -EOPNOTSUPP; 736 goto err_unlock; 737 } 738 739 err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack); 740 if (err == -EOPNOTSUPP) 741 goto no_offload_dev_inc; 742 if (err) 743 goto err_unlock; 744 745 up_write(&block->cb_lock); 746 return 0; 747 748 no_offload_dev_inc: 749 if (tcf_block_offload_in_use(block)) 750 goto err_unlock; 751 752 err = 0; 753 block->nooffloaddevcnt++; 754 err_unlock: 755 up_write(&block->cb_lock); 756 return err; 757 } 758 759 static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, 760 struct tcf_block_ext_info *ei) 761 { 762 struct net_device *dev = q->dev_queue->dev; 763 int err; 764 765 down_write(&block->cb_lock); 766 err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_UNBIND, NULL); 767 if (err == -EOPNOTSUPP) 768 goto no_offload_dev_dec; 769 up_write(&block->cb_lock); 770 return; 771 772 no_offload_dev_dec: 773 WARN_ON(block->nooffloaddevcnt-- == 0); 774 up_write(&block->cb_lock); 775 } 776 777 static int 778 tcf_chain0_head_change_cb_add(struct tcf_block *block, 779 struct tcf_block_ext_info *ei, 780 struct netlink_ext_ack *extack) 781 { 782 struct tcf_filter_chain_list_item *item; 783 struct tcf_chain *chain0; 784 785 item = kmalloc(sizeof(*item), GFP_KERNEL); 786 if (!item) { 787 NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed"); 788 return -ENOMEM; 789 } 790 item->chain_head_change = ei->chain_head_change; 791 item->chain_head_change_priv = ei->chain_head_change_priv; 792 793 mutex_lock(&block->lock); 794 chain0 = block->chain0.chain; 795 if (chain0) 796 tcf_chain_hold(chain0); 797 else 798 list_add(&item->list, &block->chain0.filter_chain_list); 799 mutex_unlock(&block->lock); 800 801 if (chain0) { 802 struct tcf_proto *tp_head; 803 804 mutex_lock(&chain0->filter_chain_lock); 805 806 tp_head = tcf_chain_dereference(chain0->filter_chain, chain0); 807 if (tp_head) 808 tcf_chain_head_change_item(item, tp_head); 809 810 mutex_lock(&block->lock); 811 list_add(&item->list, &block->chain0.filter_chain_list); 812 mutex_unlock(&block->lock); 813 814 mutex_unlock(&chain0->filter_chain_lock); 815 tcf_chain_put(chain0); 816 } 817 818 return 0; 819 } 820 821 static void 822 tcf_chain0_head_change_cb_del(struct tcf_block *block, 823 struct tcf_block_ext_info *ei) 824 { 825 struct tcf_filter_chain_list_item *item; 826 827 mutex_lock(&block->lock); 828 list_for_each_entry(item, &block->chain0.filter_chain_list, list) { 829 if ((!ei->chain_head_change && !ei->chain_head_change_priv) || 830 (item->chain_head_change == ei->chain_head_change && 831 item->chain_head_change_priv == ei->chain_head_change_priv)) { 832 if (block->chain0.chain) 833 tcf_chain_head_change_item(item, NULL); 834 list_del(&item->list); 835 mutex_unlock(&block->lock); 836 837 kfree(item); 838 return; 839 } 840 } 841 mutex_unlock(&block->lock); 842 WARN_ON(1); 843 } 844 845 struct tcf_net { 846 spinlock_t idr_lock; /* Protects idr */ 847 struct idr idr; 848 }; 849 850 static unsigned int tcf_net_id; 851 852 static int tcf_block_insert(struct tcf_block *block, struct net *net, 853 struct netlink_ext_ack *extack) 854 { 855 struct tcf_net *tn = net_generic(net, tcf_net_id); 856 int err; 857 858 idr_preload(GFP_KERNEL); 859 spin_lock(&tn->idr_lock); 860 err = idr_alloc_u32(&tn->idr, block, &block->index, block->index, 861 GFP_NOWAIT); 862 spin_unlock(&tn->idr_lock); 863 idr_preload_end(); 864 865 return err; 866 } 867 868 static void tcf_block_remove(struct tcf_block *block, struct net *net) 869 { 870 struct tcf_net *tn = net_generic(net, tcf_net_id); 871 872 spin_lock(&tn->idr_lock); 873 idr_remove(&tn->idr, block->index); 874 spin_unlock(&tn->idr_lock); 875 } 876 877 static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, 878 u32 block_index, 879 struct netlink_ext_ack *extack) 880 { 881 struct tcf_block *block; 882 883 block = kzalloc(sizeof(*block), GFP_KERNEL); 884 if (!block) { 885 NL_SET_ERR_MSG(extack, "Memory allocation for block failed"); 886 return ERR_PTR(-ENOMEM); 887 } 888 mutex_init(&block->lock); 889 mutex_init(&block->proto_destroy_lock); 890 init_rwsem(&block->cb_lock); 891 flow_block_init(&block->flow_block); 892 INIT_LIST_HEAD(&block->chain_list); 893 INIT_LIST_HEAD(&block->owner_list); 894 INIT_LIST_HEAD(&block->chain0.filter_chain_list); 895 896 refcount_set(&block->refcnt, 1); 897 block->net = net; 898 block->index = block_index; 899 900 /* Don't store q pointer for blocks which are shared */ 901 if (!tcf_block_shared(block)) 902 block->q = q; 903 return block; 904 } 905 906 static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index) 907 { 908 struct tcf_net *tn = net_generic(net, tcf_net_id); 909 910 return idr_find(&tn->idr, block_index); 911 } 912 913 static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index) 914 { 915 struct tcf_block *block; 916 917 rcu_read_lock(); 918 block = tcf_block_lookup(net, block_index); 919 if (block && !refcount_inc_not_zero(&block->refcnt)) 920 block = NULL; 921 rcu_read_unlock(); 922 923 return block; 924 } 925 926 static struct tcf_chain * 927 __tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) 928 { 929 mutex_lock(&block->lock); 930 if (chain) 931 chain = list_is_last(&chain->list, &block->chain_list) ? 932 NULL : list_next_entry(chain, list); 933 else 934 chain = list_first_entry_or_null(&block->chain_list, 935 struct tcf_chain, list); 936 937 /* skip all action-only chains */ 938 while (chain && tcf_chain_held_by_acts_only(chain)) 939 chain = list_is_last(&chain->list, &block->chain_list) ? 940 NULL : list_next_entry(chain, list); 941 942 if (chain) 943 tcf_chain_hold(chain); 944 mutex_unlock(&block->lock); 945 946 return chain; 947 } 948 949 /* Function to be used by all clients that want to iterate over all chains on 950 * block. It properly obtains block->lock and takes reference to chain before 951 * returning it. Users of this function must be tolerant to concurrent chain 952 * insertion/deletion or ensure that no concurrent chain modification is 953 * possible. Note that all netlink dump callbacks cannot guarantee to provide 954 * consistent dump because rtnl lock is released each time skb is filled with 955 * data and sent to user-space. 956 */ 957 958 struct tcf_chain * 959 tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) 960 { 961 struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain); 962 963 if (chain) 964 tcf_chain_put(chain); 965 966 return chain_next; 967 } 968 EXPORT_SYMBOL(tcf_get_next_chain); 969 970 static struct tcf_proto * 971 __tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) 972 { 973 u32 prio = 0; 974 975 ASSERT_RTNL(); 976 mutex_lock(&chain->filter_chain_lock); 977 978 if (!tp) { 979 tp = tcf_chain_dereference(chain->filter_chain, chain); 980 } else if (tcf_proto_is_deleting(tp)) { 981 /* 'deleting' flag is set and chain->filter_chain_lock was 982 * unlocked, which means next pointer could be invalid. Restart 983 * search. 984 */ 985 prio = tp->prio + 1; 986 tp = tcf_chain_dereference(chain->filter_chain, chain); 987 988 for (; tp; tp = tcf_chain_dereference(tp->next, chain)) 989 if (!tp->deleting && tp->prio >= prio) 990 break; 991 } else { 992 tp = tcf_chain_dereference(tp->next, chain); 993 } 994 995 if (tp) 996 tcf_proto_get(tp); 997 998 mutex_unlock(&chain->filter_chain_lock); 999 1000 return tp; 1001 } 1002 1003 /* Function to be used by all clients that want to iterate over all tp's on 1004 * chain. Users of this function must be tolerant to concurrent tp 1005 * insertion/deletion or ensure that no concurrent chain modification is 1006 * possible. Note that all netlink dump callbacks cannot guarantee to provide 1007 * consistent dump because rtnl lock is released each time skb is filled with 1008 * data and sent to user-space. 1009 */ 1010 1011 struct tcf_proto * 1012 tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) 1013 { 1014 struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp); 1015 1016 if (tp) 1017 tcf_proto_put(tp, true, NULL); 1018 1019 return tp_next; 1020 } 1021 EXPORT_SYMBOL(tcf_get_next_proto); 1022 1023 static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held) 1024 { 1025 struct tcf_chain *chain; 1026 1027 /* Last reference to block. At this point chains cannot be added or 1028 * removed concurrently. 1029 */ 1030 for (chain = tcf_get_next_chain(block, NULL); 1031 chain; 1032 chain = tcf_get_next_chain(block, chain)) { 1033 tcf_chain_put_explicitly_created(chain); 1034 tcf_chain_flush(chain, rtnl_held); 1035 } 1036 } 1037 1038 /* Lookup Qdisc and increments its reference counter. 1039 * Set parent, if necessary. 1040 */ 1041 1042 static int __tcf_qdisc_find(struct net *net, struct Qdisc **q, 1043 u32 *parent, int ifindex, bool rtnl_held, 1044 struct netlink_ext_ack *extack) 1045 { 1046 const struct Qdisc_class_ops *cops; 1047 struct net_device *dev; 1048 int err = 0; 1049 1050 if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) 1051 return 0; 1052 1053 rcu_read_lock(); 1054 1055 /* Find link */ 1056 dev = dev_get_by_index_rcu(net, ifindex); 1057 if (!dev) { 1058 rcu_read_unlock(); 1059 return -ENODEV; 1060 } 1061 1062 /* Find qdisc */ 1063 if (!*parent) { 1064 *q = rcu_dereference(dev->qdisc); 1065 *parent = (*q)->handle; 1066 } else { 1067 *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); 1068 if (!*q) { 1069 NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); 1070 err = -EINVAL; 1071 goto errout_rcu; 1072 } 1073 } 1074 1075 *q = qdisc_refcount_inc_nz(*q); 1076 if (!*q) { 1077 NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); 1078 err = -EINVAL; 1079 goto errout_rcu; 1080 } 1081 1082 /* Is it classful? */ 1083 cops = (*q)->ops->cl_ops; 1084 if (!cops) { 1085 NL_SET_ERR_MSG(extack, "Qdisc not classful"); 1086 err = -EINVAL; 1087 goto errout_qdisc; 1088 } 1089 1090 if (!cops->tcf_block) { 1091 NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); 1092 err = -EOPNOTSUPP; 1093 goto errout_qdisc; 1094 } 1095 1096 errout_rcu: 1097 /* At this point we know that qdisc is not noop_qdisc, 1098 * which means that qdisc holds a reference to net_device 1099 * and we hold a reference to qdisc, so it is safe to release 1100 * rcu read lock. 1101 */ 1102 rcu_read_unlock(); 1103 return err; 1104 1105 errout_qdisc: 1106 rcu_read_unlock(); 1107 1108 if (rtnl_held) 1109 qdisc_put(*q); 1110 else 1111 qdisc_put_unlocked(*q); 1112 *q = NULL; 1113 1114 return err; 1115 } 1116 1117 static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl, 1118 int ifindex, struct netlink_ext_ack *extack) 1119 { 1120 if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) 1121 return 0; 1122 1123 /* Do we search for filter, attached to class? */ 1124 if (TC_H_MIN(parent)) { 1125 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1126 1127 *cl = cops->find(q, parent); 1128 if (*cl == 0) { 1129 NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); 1130 return -ENOENT; 1131 } 1132 } 1133 1134 return 0; 1135 } 1136 1137 static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q, 1138 unsigned long cl, int ifindex, 1139 u32 block_index, 1140 struct netlink_ext_ack *extack) 1141 { 1142 struct tcf_block *block; 1143 1144 if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { 1145 block = tcf_block_refcnt_get(net, block_index); 1146 if (!block) { 1147 NL_SET_ERR_MSG(extack, "Block of given index was not found"); 1148 return ERR_PTR(-EINVAL); 1149 } 1150 } else { 1151 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1152 1153 block = cops->tcf_block(q, cl, extack); 1154 if (!block) 1155 return ERR_PTR(-EINVAL); 1156 1157 if (tcf_block_shared(block)) { 1158 NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); 1159 return ERR_PTR(-EOPNOTSUPP); 1160 } 1161 1162 /* Always take reference to block in order to support execution 1163 * of rules update path of cls API without rtnl lock. Caller 1164 * must release block when it is finished using it. 'if' block 1165 * of this conditional obtain reference to block by calling 1166 * tcf_block_refcnt_get(). 1167 */ 1168 refcount_inc(&block->refcnt); 1169 } 1170 1171 return block; 1172 } 1173 1174 static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, 1175 struct tcf_block_ext_info *ei, bool rtnl_held) 1176 { 1177 if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) { 1178 /* Flushing/putting all chains will cause the block to be 1179 * deallocated when last chain is freed. However, if chain_list 1180 * is empty, block has to be manually deallocated. After block 1181 * reference counter reached 0, it is no longer possible to 1182 * increment it or add new chains to block. 1183 */ 1184 bool free_block = list_empty(&block->chain_list); 1185 1186 mutex_unlock(&block->lock); 1187 if (tcf_block_shared(block)) 1188 tcf_block_remove(block, block->net); 1189 1190 if (q) 1191 tcf_block_offload_unbind(block, q, ei); 1192 1193 if (free_block) 1194 tcf_block_destroy(block); 1195 else 1196 tcf_block_flush_all_chains(block, rtnl_held); 1197 } else if (q) { 1198 tcf_block_offload_unbind(block, q, ei); 1199 } 1200 } 1201 1202 static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held) 1203 { 1204 __tcf_block_put(block, NULL, NULL, rtnl_held); 1205 } 1206 1207 /* Find tcf block. 1208 * Set q, parent, cl when appropriate. 1209 */ 1210 1211 static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, 1212 u32 *parent, unsigned long *cl, 1213 int ifindex, u32 block_index, 1214 struct netlink_ext_ack *extack) 1215 { 1216 struct tcf_block *block; 1217 int err = 0; 1218 1219 ASSERT_RTNL(); 1220 1221 err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack); 1222 if (err) 1223 goto errout; 1224 1225 err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack); 1226 if (err) 1227 goto errout_qdisc; 1228 1229 block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack); 1230 if (IS_ERR(block)) { 1231 err = PTR_ERR(block); 1232 goto errout_qdisc; 1233 } 1234 1235 return block; 1236 1237 errout_qdisc: 1238 if (*q) 1239 qdisc_put(*q); 1240 errout: 1241 *q = NULL; 1242 return ERR_PTR(err); 1243 } 1244 1245 static void tcf_block_release(struct Qdisc *q, struct tcf_block *block, 1246 bool rtnl_held) 1247 { 1248 if (!IS_ERR_OR_NULL(block)) 1249 tcf_block_refcnt_put(block, rtnl_held); 1250 1251 if (q) { 1252 if (rtnl_held) 1253 qdisc_put(q); 1254 else 1255 qdisc_put_unlocked(q); 1256 } 1257 } 1258 1259 struct tcf_block_owner_item { 1260 struct list_head list; 1261 struct Qdisc *q; 1262 enum flow_block_binder_type binder_type; 1263 }; 1264 1265 static void 1266 tcf_block_owner_netif_keep_dst(struct tcf_block *block, 1267 struct Qdisc *q, 1268 enum flow_block_binder_type binder_type) 1269 { 1270 if (block->keep_dst && 1271 binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && 1272 binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) 1273 netif_keep_dst(qdisc_dev(q)); 1274 } 1275 1276 void tcf_block_netif_keep_dst(struct tcf_block *block) 1277 { 1278 struct tcf_block_owner_item *item; 1279 1280 block->keep_dst = true; 1281 list_for_each_entry(item, &block->owner_list, list) 1282 tcf_block_owner_netif_keep_dst(block, item->q, 1283 item->binder_type); 1284 } 1285 EXPORT_SYMBOL(tcf_block_netif_keep_dst); 1286 1287 static int tcf_block_owner_add(struct tcf_block *block, 1288 struct Qdisc *q, 1289 enum flow_block_binder_type binder_type) 1290 { 1291 struct tcf_block_owner_item *item; 1292 1293 item = kmalloc(sizeof(*item), GFP_KERNEL); 1294 if (!item) 1295 return -ENOMEM; 1296 item->q = q; 1297 item->binder_type = binder_type; 1298 list_add(&item->list, &block->owner_list); 1299 return 0; 1300 } 1301 1302 static void tcf_block_owner_del(struct tcf_block *block, 1303 struct Qdisc *q, 1304 enum flow_block_binder_type binder_type) 1305 { 1306 struct tcf_block_owner_item *item; 1307 1308 list_for_each_entry(item, &block->owner_list, list) { 1309 if (item->q == q && item->binder_type == binder_type) { 1310 list_del(&item->list); 1311 kfree(item); 1312 return; 1313 } 1314 } 1315 WARN_ON(1); 1316 } 1317 1318 int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, 1319 struct tcf_block_ext_info *ei, 1320 struct netlink_ext_ack *extack) 1321 { 1322 struct net *net = qdisc_net(q); 1323 struct tcf_block *block = NULL; 1324 int err; 1325 1326 if (ei->block_index) 1327 /* block_index not 0 means the shared block is requested */ 1328 block = tcf_block_refcnt_get(net, ei->block_index); 1329 1330 if (!block) { 1331 block = tcf_block_create(net, q, ei->block_index, extack); 1332 if (IS_ERR(block)) 1333 return PTR_ERR(block); 1334 if (tcf_block_shared(block)) { 1335 err = tcf_block_insert(block, net, extack); 1336 if (err) 1337 goto err_block_insert; 1338 } 1339 } 1340 1341 err = tcf_block_owner_add(block, q, ei->binder_type); 1342 if (err) 1343 goto err_block_owner_add; 1344 1345 tcf_block_owner_netif_keep_dst(block, q, ei->binder_type); 1346 1347 err = tcf_chain0_head_change_cb_add(block, ei, extack); 1348 if (err) 1349 goto err_chain0_head_change_cb_add; 1350 1351 err = tcf_block_offload_bind(block, q, ei, extack); 1352 if (err) 1353 goto err_block_offload_bind; 1354 1355 *p_block = block; 1356 return 0; 1357 1358 err_block_offload_bind: 1359 tcf_chain0_head_change_cb_del(block, ei); 1360 err_chain0_head_change_cb_add: 1361 tcf_block_owner_del(block, q, ei->binder_type); 1362 err_block_owner_add: 1363 err_block_insert: 1364 tcf_block_refcnt_put(block, true); 1365 return err; 1366 } 1367 EXPORT_SYMBOL(tcf_block_get_ext); 1368 1369 static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv) 1370 { 1371 struct tcf_proto __rcu **p_filter_chain = priv; 1372 1373 rcu_assign_pointer(*p_filter_chain, tp_head); 1374 } 1375 1376 int tcf_block_get(struct tcf_block **p_block, 1377 struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, 1378 struct netlink_ext_ack *extack) 1379 { 1380 struct tcf_block_ext_info ei = { 1381 .chain_head_change = tcf_chain_head_change_dflt, 1382 .chain_head_change_priv = p_filter_chain, 1383 }; 1384 1385 WARN_ON(!p_filter_chain); 1386 return tcf_block_get_ext(p_block, q, &ei, extack); 1387 } 1388 EXPORT_SYMBOL(tcf_block_get); 1389 1390 /* XXX: Standalone actions are not allowed to jump to any chain, and bound 1391 * actions should be all removed after flushing. 1392 */ 1393 void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, 1394 struct tcf_block_ext_info *ei) 1395 { 1396 if (!block) 1397 return; 1398 tcf_chain0_head_change_cb_del(block, ei); 1399 tcf_block_owner_del(block, q, ei->binder_type); 1400 1401 __tcf_block_put(block, q, ei, true); 1402 } 1403 EXPORT_SYMBOL(tcf_block_put_ext); 1404 1405 void tcf_block_put(struct tcf_block *block) 1406 { 1407 struct tcf_block_ext_info ei = {0, }; 1408 1409 if (!block) 1410 return; 1411 tcf_block_put_ext(block, block->q, &ei); 1412 } 1413 1414 EXPORT_SYMBOL(tcf_block_put); 1415 1416 static int 1417 tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, 1418 void *cb_priv, bool add, bool offload_in_use, 1419 struct netlink_ext_ack *extack) 1420 { 1421 struct tcf_chain *chain, *chain_prev; 1422 struct tcf_proto *tp, *tp_prev; 1423 int err; 1424 1425 lockdep_assert_held(&block->cb_lock); 1426 1427 for (chain = __tcf_get_next_chain(block, NULL); 1428 chain; 1429 chain_prev = chain, 1430 chain = __tcf_get_next_chain(block, chain), 1431 tcf_chain_put(chain_prev)) { 1432 for (tp = __tcf_get_next_proto(chain, NULL); tp; 1433 tp_prev = tp, 1434 tp = __tcf_get_next_proto(chain, tp), 1435 tcf_proto_put(tp_prev, true, NULL)) { 1436 if (tp->ops->reoffload) { 1437 err = tp->ops->reoffload(tp, add, cb, cb_priv, 1438 extack); 1439 if (err && add) 1440 goto err_playback_remove; 1441 } else if (add && offload_in_use) { 1442 err = -EOPNOTSUPP; 1443 NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support"); 1444 goto err_playback_remove; 1445 } 1446 } 1447 } 1448 1449 return 0; 1450 1451 err_playback_remove: 1452 tcf_proto_put(tp, true, NULL); 1453 tcf_chain_put(chain); 1454 tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use, 1455 extack); 1456 return err; 1457 } 1458 1459 static int tcf_block_bind(struct tcf_block *block, 1460 struct flow_block_offload *bo) 1461 { 1462 struct flow_block_cb *block_cb, *next; 1463 int err, i = 0; 1464 1465 lockdep_assert_held(&block->cb_lock); 1466 1467 list_for_each_entry(block_cb, &bo->cb_list, list) { 1468 err = tcf_block_playback_offloads(block, block_cb->cb, 1469 block_cb->cb_priv, true, 1470 tcf_block_offload_in_use(block), 1471 bo->extack); 1472 if (err) 1473 goto err_unroll; 1474 if (!bo->unlocked_driver_cb) 1475 block->lockeddevcnt++; 1476 1477 i++; 1478 } 1479 list_splice(&bo->cb_list, &block->flow_block.cb_list); 1480 1481 return 0; 1482 1483 err_unroll: 1484 list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { 1485 if (i-- > 0) { 1486 list_del(&block_cb->list); 1487 tcf_block_playback_offloads(block, block_cb->cb, 1488 block_cb->cb_priv, false, 1489 tcf_block_offload_in_use(block), 1490 NULL); 1491 if (!bo->unlocked_driver_cb) 1492 block->lockeddevcnt--; 1493 } 1494 flow_block_cb_free(block_cb); 1495 } 1496 1497 return err; 1498 } 1499 1500 static void tcf_block_unbind(struct tcf_block *block, 1501 struct flow_block_offload *bo) 1502 { 1503 struct flow_block_cb *block_cb, *next; 1504 1505 lockdep_assert_held(&block->cb_lock); 1506 1507 list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { 1508 tcf_block_playback_offloads(block, block_cb->cb, 1509 block_cb->cb_priv, false, 1510 tcf_block_offload_in_use(block), 1511 NULL); 1512 list_del(&block_cb->list); 1513 flow_block_cb_free(block_cb); 1514 if (!bo->unlocked_driver_cb) 1515 block->lockeddevcnt--; 1516 } 1517 } 1518 1519 static int tcf_block_setup(struct tcf_block *block, 1520 struct flow_block_offload *bo) 1521 { 1522 int err; 1523 1524 switch (bo->command) { 1525 case FLOW_BLOCK_BIND: 1526 err = tcf_block_bind(block, bo); 1527 break; 1528 case FLOW_BLOCK_UNBIND: 1529 err = 0; 1530 tcf_block_unbind(block, bo); 1531 break; 1532 default: 1533 WARN_ON_ONCE(1); 1534 err = -EOPNOTSUPP; 1535 } 1536 1537 return err; 1538 } 1539 1540 /* Main classifier routine: scans classifier chain attached 1541 * to this qdisc, (optionally) tests for protocol and asks 1542 * specific classifiers. 1543 */ 1544 static inline int __tcf_classify(struct sk_buff *skb, 1545 const struct tcf_proto *tp, 1546 const struct tcf_proto *orig_tp, 1547 struct tcf_result *res, 1548 bool compat_mode, 1549 u32 *last_executed_chain) 1550 { 1551 #ifdef CONFIG_NET_CLS_ACT 1552 const int max_reclassify_loop = 16; 1553 const struct tcf_proto *first_tp; 1554 int limit = 0; 1555 1556 reclassify: 1557 #endif 1558 for (; tp; tp = rcu_dereference_bh(tp->next)) { 1559 __be16 protocol = skb_protocol(skb, false); 1560 int err; 1561 1562 if (tp->protocol != protocol && 1563 tp->protocol != htons(ETH_P_ALL)) 1564 continue; 1565 1566 err = tp->classify(skb, tp, res); 1567 #ifdef CONFIG_NET_CLS_ACT 1568 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) { 1569 first_tp = orig_tp; 1570 *last_executed_chain = first_tp->chain->index; 1571 goto reset; 1572 } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { 1573 first_tp = res->goto_tp; 1574 *last_executed_chain = err & TC_ACT_EXT_VAL_MASK; 1575 goto reset; 1576 } 1577 #endif 1578 if (err >= 0) 1579 return err; 1580 } 1581 1582 return TC_ACT_UNSPEC; /* signal: continue lookup */ 1583 #ifdef CONFIG_NET_CLS_ACT 1584 reset: 1585 if (unlikely(limit++ >= max_reclassify_loop)) { 1586 net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n", 1587 tp->chain->block->index, 1588 tp->prio & 0xffff, 1589 ntohs(tp->protocol)); 1590 return TC_ACT_SHOT; 1591 } 1592 1593 tp = first_tp; 1594 goto reclassify; 1595 #endif 1596 } 1597 1598 int tcf_classify(struct sk_buff *skb, 1599 const struct tcf_block *block, 1600 const struct tcf_proto *tp, 1601 struct tcf_result *res, bool compat_mode) 1602 { 1603 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 1604 u32 last_executed_chain = 0; 1605 1606 return __tcf_classify(skb, tp, tp, res, compat_mode, 1607 &last_executed_chain); 1608 #else 1609 u32 last_executed_chain = tp ? tp->chain->index : 0; 1610 const struct tcf_proto *orig_tp = tp; 1611 struct tc_skb_ext *ext; 1612 int ret; 1613 1614 if (block) { 1615 ext = skb_ext_find(skb, TC_SKB_EXT); 1616 1617 if (ext && ext->chain) { 1618 struct tcf_chain *fchain; 1619 1620 fchain = tcf_chain_lookup_rcu(block, ext->chain); 1621 if (!fchain) 1622 return TC_ACT_SHOT; 1623 1624 /* Consume, so cloned/redirect skbs won't inherit ext */ 1625 skb_ext_del(skb, TC_SKB_EXT); 1626 1627 tp = rcu_dereference_bh(fchain->filter_chain); 1628 last_executed_chain = fchain->index; 1629 } 1630 } 1631 1632 ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, 1633 &last_executed_chain); 1634 1635 if (tc_skb_ext_tc_enabled()) { 1636 /* If we missed on some chain */ 1637 if (ret == TC_ACT_UNSPEC && last_executed_chain) { 1638 struct tc_skb_cb *cb = tc_skb_cb(skb); 1639 1640 ext = tc_skb_ext_alloc(skb); 1641 if (WARN_ON_ONCE(!ext)) 1642 return TC_ACT_SHOT; 1643 ext->chain = last_executed_chain; 1644 ext->mru = cb->mru; 1645 ext->post_ct = cb->post_ct; 1646 ext->post_ct_snat = cb->post_ct_snat; 1647 ext->post_ct_dnat = cb->post_ct_dnat; 1648 ext->zone = cb->zone; 1649 } 1650 } 1651 1652 return ret; 1653 #endif 1654 } 1655 EXPORT_SYMBOL(tcf_classify); 1656 1657 struct tcf_chain_info { 1658 struct tcf_proto __rcu **pprev; 1659 struct tcf_proto __rcu *next; 1660 }; 1661 1662 static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain, 1663 struct tcf_chain_info *chain_info) 1664 { 1665 return tcf_chain_dereference(*chain_info->pprev, chain); 1666 } 1667 1668 static int tcf_chain_tp_insert(struct tcf_chain *chain, 1669 struct tcf_chain_info *chain_info, 1670 struct tcf_proto *tp) 1671 { 1672 if (chain->flushing) 1673 return -EAGAIN; 1674 1675 RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); 1676 if (*chain_info->pprev == chain->filter_chain) 1677 tcf_chain0_head_change(chain, tp); 1678 tcf_proto_get(tp); 1679 rcu_assign_pointer(*chain_info->pprev, tp); 1680 1681 return 0; 1682 } 1683 1684 static void tcf_chain_tp_remove(struct tcf_chain *chain, 1685 struct tcf_chain_info *chain_info, 1686 struct tcf_proto *tp) 1687 { 1688 struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain); 1689 1690 tcf_proto_mark_delete(tp); 1691 if (tp == chain->filter_chain) 1692 tcf_chain0_head_change(chain, next); 1693 RCU_INIT_POINTER(*chain_info->pprev, next); 1694 } 1695 1696 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, 1697 struct tcf_chain_info *chain_info, 1698 u32 protocol, u32 prio, 1699 bool prio_allocate); 1700 1701 /* Try to insert new proto. 1702 * If proto with specified priority already exists, free new proto 1703 * and return existing one. 1704 */ 1705 1706 static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, 1707 struct tcf_proto *tp_new, 1708 u32 protocol, u32 prio, 1709 bool rtnl_held) 1710 { 1711 struct tcf_chain_info chain_info; 1712 struct tcf_proto *tp; 1713 int err = 0; 1714 1715 mutex_lock(&chain->filter_chain_lock); 1716 1717 if (tcf_proto_exists_destroying(chain, tp_new)) { 1718 mutex_unlock(&chain->filter_chain_lock); 1719 tcf_proto_destroy(tp_new, rtnl_held, false, NULL); 1720 return ERR_PTR(-EAGAIN); 1721 } 1722 1723 tp = tcf_chain_tp_find(chain, &chain_info, 1724 protocol, prio, false); 1725 if (!tp) 1726 err = tcf_chain_tp_insert(chain, &chain_info, tp_new); 1727 mutex_unlock(&chain->filter_chain_lock); 1728 1729 if (tp) { 1730 tcf_proto_destroy(tp_new, rtnl_held, false, NULL); 1731 tp_new = tp; 1732 } else if (err) { 1733 tcf_proto_destroy(tp_new, rtnl_held, false, NULL); 1734 tp_new = ERR_PTR(err); 1735 } 1736 1737 return tp_new; 1738 } 1739 1740 static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, 1741 struct tcf_proto *tp, bool rtnl_held, 1742 struct netlink_ext_ack *extack) 1743 { 1744 struct tcf_chain_info chain_info; 1745 struct tcf_proto *tp_iter; 1746 struct tcf_proto **pprev; 1747 struct tcf_proto *next; 1748 1749 mutex_lock(&chain->filter_chain_lock); 1750 1751 /* Atomically find and remove tp from chain. */ 1752 for (pprev = &chain->filter_chain; 1753 (tp_iter = tcf_chain_dereference(*pprev, chain)); 1754 pprev = &tp_iter->next) { 1755 if (tp_iter == tp) { 1756 chain_info.pprev = pprev; 1757 chain_info.next = tp_iter->next; 1758 WARN_ON(tp_iter->deleting); 1759 break; 1760 } 1761 } 1762 /* Verify that tp still exists and no new filters were inserted 1763 * concurrently. 1764 * Mark tp for deletion if it is empty. 1765 */ 1766 if (!tp_iter || !tcf_proto_check_delete(tp)) { 1767 mutex_unlock(&chain->filter_chain_lock); 1768 return; 1769 } 1770 1771 tcf_proto_signal_destroying(chain, tp); 1772 next = tcf_chain_dereference(chain_info.next, chain); 1773 if (tp == chain->filter_chain) 1774 tcf_chain0_head_change(chain, next); 1775 RCU_INIT_POINTER(*chain_info.pprev, next); 1776 mutex_unlock(&chain->filter_chain_lock); 1777 1778 tcf_proto_put(tp, rtnl_held, extack); 1779 } 1780 1781 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, 1782 struct tcf_chain_info *chain_info, 1783 u32 protocol, u32 prio, 1784 bool prio_allocate) 1785 { 1786 struct tcf_proto **pprev; 1787 struct tcf_proto *tp; 1788 1789 /* Check the chain for existence of proto-tcf with this priority */ 1790 for (pprev = &chain->filter_chain; 1791 (tp = tcf_chain_dereference(*pprev, chain)); 1792 pprev = &tp->next) { 1793 if (tp->prio >= prio) { 1794 if (tp->prio == prio) { 1795 if (prio_allocate || 1796 (tp->protocol != protocol && protocol)) 1797 return ERR_PTR(-EINVAL); 1798 } else { 1799 tp = NULL; 1800 } 1801 break; 1802 } 1803 } 1804 chain_info->pprev = pprev; 1805 if (tp) { 1806 chain_info->next = tp->next; 1807 tcf_proto_get(tp); 1808 } else { 1809 chain_info->next = NULL; 1810 } 1811 return tp; 1812 } 1813 1814 static int tcf_fill_node(struct net *net, struct sk_buff *skb, 1815 struct tcf_proto *tp, struct tcf_block *block, 1816 struct Qdisc *q, u32 parent, void *fh, 1817 u32 portid, u32 seq, u16 flags, int event, 1818 bool terse_dump, bool rtnl_held) 1819 { 1820 struct tcmsg *tcm; 1821 struct nlmsghdr *nlh; 1822 unsigned char *b = skb_tail_pointer(skb); 1823 1824 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 1825 if (!nlh) 1826 goto out_nlmsg_trim; 1827 tcm = nlmsg_data(nlh); 1828 tcm->tcm_family = AF_UNSPEC; 1829 tcm->tcm__pad1 = 0; 1830 tcm->tcm__pad2 = 0; 1831 if (q) { 1832 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 1833 tcm->tcm_parent = parent; 1834 } else { 1835 tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; 1836 tcm->tcm_block_index = block->index; 1837 } 1838 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); 1839 if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) 1840 goto nla_put_failure; 1841 if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index)) 1842 goto nla_put_failure; 1843 if (!fh) { 1844 tcm->tcm_handle = 0; 1845 } else if (terse_dump) { 1846 if (tp->ops->terse_dump) { 1847 if (tp->ops->terse_dump(net, tp, fh, skb, tcm, 1848 rtnl_held) < 0) 1849 goto nla_put_failure; 1850 } else { 1851 goto cls_op_not_supp; 1852 } 1853 } else { 1854 if (tp->ops->dump && 1855 tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0) 1856 goto nla_put_failure; 1857 } 1858 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1859 return skb->len; 1860 1861 out_nlmsg_trim: 1862 nla_put_failure: 1863 cls_op_not_supp: 1864 nlmsg_trim(skb, b); 1865 return -1; 1866 } 1867 1868 static int tfilter_notify(struct net *net, struct sk_buff *oskb, 1869 struct nlmsghdr *n, struct tcf_proto *tp, 1870 struct tcf_block *block, struct Qdisc *q, 1871 u32 parent, void *fh, int event, bool unicast, 1872 bool rtnl_held) 1873 { 1874 struct sk_buff *skb; 1875 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1876 int err = 0; 1877 1878 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1879 if (!skb) 1880 return -ENOBUFS; 1881 1882 if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, 1883 n->nlmsg_seq, n->nlmsg_flags, event, 1884 false, rtnl_held) <= 0) { 1885 kfree_skb(skb); 1886 return -EINVAL; 1887 } 1888 1889 if (unicast) 1890 err = rtnl_unicast(skb, net, portid); 1891 else 1892 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1893 n->nlmsg_flags & NLM_F_ECHO); 1894 return err; 1895 } 1896 1897 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, 1898 struct nlmsghdr *n, struct tcf_proto *tp, 1899 struct tcf_block *block, struct Qdisc *q, 1900 u32 parent, void *fh, bool unicast, bool *last, 1901 bool rtnl_held, struct netlink_ext_ack *extack) 1902 { 1903 struct sk_buff *skb; 1904 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1905 int err; 1906 1907 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1908 if (!skb) 1909 return -ENOBUFS; 1910 1911 if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, 1912 n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER, 1913 false, rtnl_held) <= 0) { 1914 NL_SET_ERR_MSG(extack, "Failed to build del event notification"); 1915 kfree_skb(skb); 1916 return -EINVAL; 1917 } 1918 1919 err = tp->ops->delete(tp, fh, last, rtnl_held, extack); 1920 if (err) { 1921 kfree_skb(skb); 1922 return err; 1923 } 1924 1925 if (unicast) 1926 err = rtnl_unicast(skb, net, portid); 1927 else 1928 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1929 n->nlmsg_flags & NLM_F_ECHO); 1930 if (err < 0) 1931 NL_SET_ERR_MSG(extack, "Failed to send filter delete notification"); 1932 1933 return err; 1934 } 1935 1936 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, 1937 struct tcf_block *block, struct Qdisc *q, 1938 u32 parent, struct nlmsghdr *n, 1939 struct tcf_chain *chain, int event) 1940 { 1941 struct tcf_proto *tp; 1942 1943 for (tp = tcf_get_next_proto(chain, NULL); 1944 tp; tp = tcf_get_next_proto(chain, tp)) 1945 tfilter_notify(net, oskb, n, tp, block, 1946 q, parent, NULL, event, false, true); 1947 } 1948 1949 static void tfilter_put(struct tcf_proto *tp, void *fh) 1950 { 1951 if (tp->ops->put && fh) 1952 tp->ops->put(tp, fh); 1953 } 1954 1955 static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, 1956 struct netlink_ext_ack *extack) 1957 { 1958 struct net *net = sock_net(skb->sk); 1959 struct nlattr *tca[TCA_MAX + 1]; 1960 char name[IFNAMSIZ]; 1961 struct tcmsg *t; 1962 u32 protocol; 1963 u32 prio; 1964 bool prio_allocate; 1965 u32 parent; 1966 u32 chain_index; 1967 struct Qdisc *q; 1968 struct tcf_chain_info chain_info; 1969 struct tcf_chain *chain; 1970 struct tcf_block *block; 1971 struct tcf_proto *tp; 1972 unsigned long cl; 1973 void *fh; 1974 int err; 1975 int tp_created; 1976 bool rtnl_held = false; 1977 u32 flags; 1978 1979 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1980 return -EPERM; 1981 1982 replay: 1983 tp_created = 0; 1984 1985 err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, 1986 rtm_tca_policy, extack); 1987 if (err < 0) 1988 return err; 1989 1990 t = nlmsg_data(n); 1991 protocol = TC_H_MIN(t->tcm_info); 1992 prio = TC_H_MAJ(t->tcm_info); 1993 prio_allocate = false; 1994 parent = t->tcm_parent; 1995 tp = NULL; 1996 cl = 0; 1997 block = NULL; 1998 q = NULL; 1999 chain = NULL; 2000 flags = 0; 2001 2002 if (prio == 0) { 2003 /* If no priority is provided by the user, 2004 * we allocate one. 2005 */ 2006 if (n->nlmsg_flags & NLM_F_CREATE) { 2007 prio = TC_H_MAKE(0x80000000U, 0U); 2008 prio_allocate = true; 2009 } else { 2010 NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero"); 2011 return -ENOENT; 2012 } 2013 } 2014 2015 /* Find head of filter chain. */ 2016 2017 err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); 2018 if (err) 2019 return err; 2020 2021 if (tcf_proto_check_kind(tca[TCA_KIND], name)) { 2022 NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); 2023 err = -EINVAL; 2024 goto errout; 2025 } 2026 2027 /* Take rtnl mutex if rtnl_held was set to true on previous iteration, 2028 * block is shared (no qdisc found), qdisc is not unlocked, classifier 2029 * type is not specified, classifier is not unlocked. 2030 */ 2031 if (rtnl_held || 2032 (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || 2033 !tcf_proto_is_unlocked(name)) { 2034 rtnl_held = true; 2035 rtnl_lock(); 2036 } 2037 2038 err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); 2039 if (err) 2040 goto errout; 2041 2042 block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, 2043 extack); 2044 if (IS_ERR(block)) { 2045 err = PTR_ERR(block); 2046 goto errout; 2047 } 2048 block->classid = parent; 2049 2050 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; 2051 if (chain_index > TC_ACT_EXT_VAL_MASK) { 2052 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); 2053 err = -EINVAL; 2054 goto errout; 2055 } 2056 chain = tcf_chain_get(block, chain_index, true); 2057 if (!chain) { 2058 NL_SET_ERR_MSG(extack, "Cannot create specified filter chain"); 2059 err = -ENOMEM; 2060 goto errout; 2061 } 2062 2063 mutex_lock(&chain->filter_chain_lock); 2064 tp = tcf_chain_tp_find(chain, &chain_info, protocol, 2065 prio, prio_allocate); 2066 if (IS_ERR(tp)) { 2067 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); 2068 err = PTR_ERR(tp); 2069 goto errout_locked; 2070 } 2071 2072 if (tp == NULL) { 2073 struct tcf_proto *tp_new = NULL; 2074 2075 if (chain->flushing) { 2076 err = -EAGAIN; 2077 goto errout_locked; 2078 } 2079 2080 /* Proto-tcf does not exist, create new one */ 2081 2082 if (tca[TCA_KIND] == NULL || !protocol) { 2083 NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified"); 2084 err = -EINVAL; 2085 goto errout_locked; 2086 } 2087 2088 if (!(n->nlmsg_flags & NLM_F_CREATE)) { 2089 NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); 2090 err = -ENOENT; 2091 goto errout_locked; 2092 } 2093 2094 if (prio_allocate) 2095 prio = tcf_auto_prio(tcf_chain_tp_prev(chain, 2096 &chain_info)); 2097 2098 mutex_unlock(&chain->filter_chain_lock); 2099 tp_new = tcf_proto_create(name, protocol, prio, chain, 2100 rtnl_held, extack); 2101 if (IS_ERR(tp_new)) { 2102 err = PTR_ERR(tp_new); 2103 goto errout_tp; 2104 } 2105 2106 tp_created = 1; 2107 tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio, 2108 rtnl_held); 2109 if (IS_ERR(tp)) { 2110 err = PTR_ERR(tp); 2111 goto errout_tp; 2112 } 2113 } else { 2114 mutex_unlock(&chain->filter_chain_lock); 2115 } 2116 2117 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { 2118 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); 2119 err = -EINVAL; 2120 goto errout; 2121 } 2122 2123 fh = tp->ops->get(tp, t->tcm_handle); 2124 2125 if (!fh) { 2126 if (!(n->nlmsg_flags & NLM_F_CREATE)) { 2127 NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); 2128 err = -ENOENT; 2129 goto errout; 2130 } 2131 } else if (n->nlmsg_flags & NLM_F_EXCL) { 2132 tfilter_put(tp, fh); 2133 NL_SET_ERR_MSG(extack, "Filter already exists"); 2134 err = -EEXIST; 2135 goto errout; 2136 } 2137 2138 if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) { 2139 NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind"); 2140 err = -EINVAL; 2141 goto errout; 2142 } 2143 2144 if (!(n->nlmsg_flags & NLM_F_CREATE)) 2145 flags |= TCA_ACT_FLAGS_REPLACE; 2146 if (!rtnl_held) 2147 flags |= TCA_ACT_FLAGS_NO_RTNL; 2148 err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, 2149 flags, extack); 2150 if (err == 0) { 2151 tfilter_notify(net, skb, n, tp, block, q, parent, fh, 2152 RTM_NEWTFILTER, false, rtnl_held); 2153 tfilter_put(tp, fh); 2154 /* q pointer is NULL for shared blocks */ 2155 if (q) 2156 q->flags &= ~TCQ_F_CAN_BYPASS; 2157 } 2158 2159 errout: 2160 if (err && tp_created) 2161 tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL); 2162 errout_tp: 2163 if (chain) { 2164 if (tp && !IS_ERR(tp)) 2165 tcf_proto_put(tp, rtnl_held, NULL); 2166 if (!tp_created) 2167 tcf_chain_put(chain); 2168 } 2169 tcf_block_release(q, block, rtnl_held); 2170 2171 if (rtnl_held) 2172 rtnl_unlock(); 2173 2174 if (err == -EAGAIN) { 2175 /* Take rtnl lock in case EAGAIN is caused by concurrent flush 2176 * of target chain. 2177 */ 2178 rtnl_held = true; 2179 /* Replay the request. */ 2180 goto replay; 2181 } 2182 return err; 2183 2184 errout_locked: 2185 mutex_unlock(&chain->filter_chain_lock); 2186 goto errout; 2187 } 2188 2189 static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, 2190 struct netlink_ext_ack *extack) 2191 { 2192 struct net *net = sock_net(skb->sk); 2193 struct nlattr *tca[TCA_MAX + 1]; 2194 char name[IFNAMSIZ]; 2195 struct tcmsg *t; 2196 u32 protocol; 2197 u32 prio; 2198 u32 parent; 2199 u32 chain_index; 2200 struct Qdisc *q = NULL; 2201 struct tcf_chain_info chain_info; 2202 struct tcf_chain *chain = NULL; 2203 struct tcf_block *block = NULL; 2204 struct tcf_proto *tp = NULL; 2205 unsigned long cl = 0; 2206 void *fh = NULL; 2207 int err; 2208 bool rtnl_held = false; 2209 2210 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 2211 return -EPERM; 2212 2213 err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, 2214 rtm_tca_policy, extack); 2215 if (err < 0) 2216 return err; 2217 2218 t = nlmsg_data(n); 2219 protocol = TC_H_MIN(t->tcm_info); 2220 prio = TC_H_MAJ(t->tcm_info); 2221 parent = t->tcm_parent; 2222 2223 if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) { 2224 NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set"); 2225 return -ENOENT; 2226 } 2227 2228 /* Find head of filter chain. */ 2229 2230 err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); 2231 if (err) 2232 return err; 2233 2234 if (tcf_proto_check_kind(tca[TCA_KIND], name)) { 2235 NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); 2236 err = -EINVAL; 2237 goto errout; 2238 } 2239 /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc 2240 * found), qdisc is not unlocked, classifier type is not specified, 2241 * classifier is not unlocked. 2242 */ 2243 if (!prio || 2244 (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || 2245 !tcf_proto_is_unlocked(name)) { 2246 rtnl_held = true; 2247 rtnl_lock(); 2248 } 2249 2250 err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); 2251 if (err) 2252 goto errout; 2253 2254 block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, 2255 extack); 2256 if (IS_ERR(block)) { 2257 err = PTR_ERR(block); 2258 goto errout; 2259 } 2260 2261 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; 2262 if (chain_index > TC_ACT_EXT_VAL_MASK) { 2263 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); 2264 err = -EINVAL; 2265 goto errout; 2266 } 2267 chain = tcf_chain_get(block, chain_index, false); 2268 if (!chain) { 2269 /* User requested flush on non-existent chain. Nothing to do, 2270 * so just return success. 2271 */ 2272 if (prio == 0) { 2273 err = 0; 2274 goto errout; 2275 } 2276 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); 2277 err = -ENOENT; 2278 goto errout; 2279 } 2280 2281 if (prio == 0) { 2282 tfilter_notify_chain(net, skb, block, q, parent, n, 2283 chain, RTM_DELTFILTER); 2284 tcf_chain_flush(chain, rtnl_held); 2285 err = 0; 2286 goto errout; 2287 } 2288 2289 mutex_lock(&chain->filter_chain_lock); 2290 tp = tcf_chain_tp_find(chain, &chain_info, protocol, 2291 prio, false); 2292 if (!tp || IS_ERR(tp)) { 2293 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); 2294 err = tp ? PTR_ERR(tp) : -ENOENT; 2295 goto errout_locked; 2296 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { 2297 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); 2298 err = -EINVAL; 2299 goto errout_locked; 2300 } else if (t->tcm_handle == 0) { 2301 tcf_proto_signal_destroying(chain, tp); 2302 tcf_chain_tp_remove(chain, &chain_info, tp); 2303 mutex_unlock(&chain->filter_chain_lock); 2304 2305 tcf_proto_put(tp, rtnl_held, NULL); 2306 tfilter_notify(net, skb, n, tp, block, q, parent, fh, 2307 RTM_DELTFILTER, false, rtnl_held); 2308 err = 0; 2309 goto errout; 2310 } 2311 mutex_unlock(&chain->filter_chain_lock); 2312 2313 fh = tp->ops->get(tp, t->tcm_handle); 2314 2315 if (!fh) { 2316 NL_SET_ERR_MSG(extack, "Specified filter handle not found"); 2317 err = -ENOENT; 2318 } else { 2319 bool last; 2320 2321 err = tfilter_del_notify(net, skb, n, tp, block, 2322 q, parent, fh, false, &last, 2323 rtnl_held, extack); 2324 2325 if (err) 2326 goto errout; 2327 if (last) 2328 tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack); 2329 } 2330 2331 errout: 2332 if (chain) { 2333 if (tp && !IS_ERR(tp)) 2334 tcf_proto_put(tp, rtnl_held, NULL); 2335 tcf_chain_put(chain); 2336 } 2337 tcf_block_release(q, block, rtnl_held); 2338 2339 if (rtnl_held) 2340 rtnl_unlock(); 2341 2342 return err; 2343 2344 errout_locked: 2345 mutex_unlock(&chain->filter_chain_lock); 2346 goto errout; 2347 } 2348 2349 static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, 2350 struct netlink_ext_ack *extack) 2351 { 2352 struct net *net = sock_net(skb->sk); 2353 struct nlattr *tca[TCA_MAX + 1]; 2354 char name[IFNAMSIZ]; 2355 struct tcmsg *t; 2356 u32 protocol; 2357 u32 prio; 2358 u32 parent; 2359 u32 chain_index; 2360 struct Qdisc *q = NULL; 2361 struct tcf_chain_info chain_info; 2362 struct tcf_chain *chain = NULL; 2363 struct tcf_block *block = NULL; 2364 struct tcf_proto *tp = NULL; 2365 unsigned long cl = 0; 2366 void *fh = NULL; 2367 int err; 2368 bool rtnl_held = false; 2369 2370 err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, 2371 rtm_tca_policy, extack); 2372 if (err < 0) 2373 return err; 2374 2375 t = nlmsg_data(n); 2376 protocol = TC_H_MIN(t->tcm_info); 2377 prio = TC_H_MAJ(t->tcm_info); 2378 parent = t->tcm_parent; 2379 2380 if (prio == 0) { 2381 NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero"); 2382 return -ENOENT; 2383 } 2384 2385 /* Find head of filter chain. */ 2386 2387 err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); 2388 if (err) 2389 return err; 2390 2391 if (tcf_proto_check_kind(tca[TCA_KIND], name)) { 2392 NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); 2393 err = -EINVAL; 2394 goto errout; 2395 } 2396 /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not 2397 * unlocked, classifier type is not specified, classifier is not 2398 * unlocked. 2399 */ 2400 if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || 2401 !tcf_proto_is_unlocked(name)) { 2402 rtnl_held = true; 2403 rtnl_lock(); 2404 } 2405 2406 err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); 2407 if (err) 2408 goto errout; 2409 2410 block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, 2411 extack); 2412 if (IS_ERR(block)) { 2413 err = PTR_ERR(block); 2414 goto errout; 2415 } 2416 2417 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; 2418 if (chain_index > TC_ACT_EXT_VAL_MASK) { 2419 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); 2420 err = -EINVAL; 2421 goto errout; 2422 } 2423 chain = tcf_chain_get(block, chain_index, false); 2424 if (!chain) { 2425 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); 2426 err = -EINVAL; 2427 goto errout; 2428 } 2429 2430 mutex_lock(&chain->filter_chain_lock); 2431 tp = tcf_chain_tp_find(chain, &chain_info, protocol, 2432 prio, false); 2433 mutex_unlock(&chain->filter_chain_lock); 2434 if (!tp || IS_ERR(tp)) { 2435 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); 2436 err = tp ? PTR_ERR(tp) : -ENOENT; 2437 goto errout; 2438 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { 2439 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); 2440 err = -EINVAL; 2441 goto errout; 2442 } 2443 2444 fh = tp->ops->get(tp, t->tcm_handle); 2445 2446 if (!fh) { 2447 NL_SET_ERR_MSG(extack, "Specified filter handle not found"); 2448 err = -ENOENT; 2449 } else { 2450 err = tfilter_notify(net, skb, n, tp, block, q, parent, 2451 fh, RTM_NEWTFILTER, true, rtnl_held); 2452 if (err < 0) 2453 NL_SET_ERR_MSG(extack, "Failed to send filter notify message"); 2454 } 2455 2456 tfilter_put(tp, fh); 2457 errout: 2458 if (chain) { 2459 if (tp && !IS_ERR(tp)) 2460 tcf_proto_put(tp, rtnl_held, NULL); 2461 tcf_chain_put(chain); 2462 } 2463 tcf_block_release(q, block, rtnl_held); 2464 2465 if (rtnl_held) 2466 rtnl_unlock(); 2467 2468 return err; 2469 } 2470 2471 struct tcf_dump_args { 2472 struct tcf_walker w; 2473 struct sk_buff *skb; 2474 struct netlink_callback *cb; 2475 struct tcf_block *block; 2476 struct Qdisc *q; 2477 u32 parent; 2478 bool terse_dump; 2479 }; 2480 2481 static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) 2482 { 2483 struct tcf_dump_args *a = (void *)arg; 2484 struct net *net = sock_net(a->skb->sk); 2485 2486 return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent, 2487 n, NETLINK_CB(a->cb->skb).portid, 2488 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, 2489 RTM_NEWTFILTER, a->terse_dump, true); 2490 } 2491 2492 static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, 2493 struct sk_buff *skb, struct netlink_callback *cb, 2494 long index_start, long *p_index, bool terse) 2495 { 2496 struct net *net = sock_net(skb->sk); 2497 struct tcf_block *block = chain->block; 2498 struct tcmsg *tcm = nlmsg_data(cb->nlh); 2499 struct tcf_proto *tp, *tp_prev; 2500 struct tcf_dump_args arg; 2501 2502 for (tp = __tcf_get_next_proto(chain, NULL); 2503 tp; 2504 tp_prev = tp, 2505 tp = __tcf_get_next_proto(chain, tp), 2506 tcf_proto_put(tp_prev, true, NULL), 2507 (*p_index)++) { 2508 if (*p_index < index_start) 2509 continue; 2510 if (TC_H_MAJ(tcm->tcm_info) && 2511 TC_H_MAJ(tcm->tcm_info) != tp->prio) 2512 continue; 2513 if (TC_H_MIN(tcm->tcm_info) && 2514 TC_H_MIN(tcm->tcm_info) != tp->protocol) 2515 continue; 2516 if (*p_index > index_start) 2517 memset(&cb->args[1], 0, 2518 sizeof(cb->args) - sizeof(cb->args[0])); 2519 if (cb->args[1] == 0) { 2520 if (tcf_fill_node(net, skb, tp, block, q, parent, NULL, 2521 NETLINK_CB(cb->skb).portid, 2522 cb->nlh->nlmsg_seq, NLM_F_MULTI, 2523 RTM_NEWTFILTER, false, true) <= 0) 2524 goto errout; 2525 cb->args[1] = 1; 2526 } 2527 if (!tp->ops->walk) 2528 continue; 2529 arg.w.fn = tcf_node_dump; 2530 arg.skb = skb; 2531 arg.cb = cb; 2532 arg.block = block; 2533 arg.q = q; 2534 arg.parent = parent; 2535 arg.w.stop = 0; 2536 arg.w.skip = cb->args[1] - 1; 2537 arg.w.count = 0; 2538 arg.w.cookie = cb->args[2]; 2539 arg.terse_dump = terse; 2540 tp->ops->walk(tp, &arg.w, true); 2541 cb->args[2] = arg.w.cookie; 2542 cb->args[1] = arg.w.count + 1; 2543 if (arg.w.stop) 2544 goto errout; 2545 } 2546 return true; 2547 2548 errout: 2549 tcf_proto_put(tp, true, NULL); 2550 return false; 2551 } 2552 2553 static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = { 2554 [TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE), 2555 }; 2556 2557 /* called with RTNL */ 2558 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) 2559 { 2560 struct tcf_chain *chain, *chain_prev; 2561 struct net *net = sock_net(skb->sk); 2562 struct nlattr *tca[TCA_MAX + 1]; 2563 struct Qdisc *q = NULL; 2564 struct tcf_block *block; 2565 struct tcmsg *tcm = nlmsg_data(cb->nlh); 2566 bool terse_dump = false; 2567 long index_start; 2568 long index; 2569 u32 parent; 2570 int err; 2571 2572 if (nlmsg_len(cb->nlh) < sizeof(*tcm)) 2573 return skb->len; 2574 2575 err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX, 2576 tcf_tfilter_dump_policy, cb->extack); 2577 if (err) 2578 return err; 2579 2580 if (tca[TCA_DUMP_FLAGS]) { 2581 struct nla_bitfield32 flags = 2582 nla_get_bitfield32(tca[TCA_DUMP_FLAGS]); 2583 2584 terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE; 2585 } 2586 2587 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { 2588 block = tcf_block_refcnt_get(net, tcm->tcm_block_index); 2589 if (!block) 2590 goto out; 2591 /* If we work with block index, q is NULL and parent value 2592 * will never be used in the following code. The check 2593 * in tcf_fill_node prevents it. However, compiler does not 2594 * see that far, so set parent to zero to silence the warning 2595 * about parent being uninitialized. 2596 */ 2597 parent = 0; 2598 } else { 2599 const struct Qdisc_class_ops *cops; 2600 struct net_device *dev; 2601 unsigned long cl = 0; 2602 2603 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 2604 if (!dev) 2605 return skb->len; 2606 2607 parent = tcm->tcm_parent; 2608 if (!parent) 2609 q = rtnl_dereference(dev->qdisc); 2610 else 2611 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); 2612 if (!q) 2613 goto out; 2614 cops = q->ops->cl_ops; 2615 if (!cops) 2616 goto out; 2617 if (!cops->tcf_block) 2618 goto out; 2619 if (TC_H_MIN(tcm->tcm_parent)) { 2620 cl = cops->find(q, tcm->tcm_parent); 2621 if (cl == 0) 2622 goto out; 2623 } 2624 block = cops->tcf_block(q, cl, NULL); 2625 if (!block) 2626 goto out; 2627 parent = block->classid; 2628 if (tcf_block_shared(block)) 2629 q = NULL; 2630 } 2631 2632 index_start = cb->args[0]; 2633 index = 0; 2634 2635 for (chain = __tcf_get_next_chain(block, NULL); 2636 chain; 2637 chain_prev = chain, 2638 chain = __tcf_get_next_chain(block, chain), 2639 tcf_chain_put(chain_prev)) { 2640 if (tca[TCA_CHAIN] && 2641 nla_get_u32(tca[TCA_CHAIN]) != chain->index) 2642 continue; 2643 if (!tcf_chain_dump(chain, q, parent, skb, cb, 2644 index_start, &index, terse_dump)) { 2645 tcf_chain_put(chain); 2646 err = -EMSGSIZE; 2647 break; 2648 } 2649 } 2650 2651 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) 2652 tcf_block_refcnt_put(block, true); 2653 cb->args[0] = index; 2654 2655 out: 2656 /* If we did no progress, the error (EMSGSIZE) is real */ 2657 if (skb->len == 0 && err) 2658 return err; 2659 return skb->len; 2660 } 2661 2662 static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops, 2663 void *tmplt_priv, u32 chain_index, 2664 struct net *net, struct sk_buff *skb, 2665 struct tcf_block *block, 2666 u32 portid, u32 seq, u16 flags, int event) 2667 { 2668 unsigned char *b = skb_tail_pointer(skb); 2669 const struct tcf_proto_ops *ops; 2670 struct nlmsghdr *nlh; 2671 struct tcmsg *tcm; 2672 void *priv; 2673 2674 ops = tmplt_ops; 2675 priv = tmplt_priv; 2676 2677 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 2678 if (!nlh) 2679 goto out_nlmsg_trim; 2680 tcm = nlmsg_data(nlh); 2681 tcm->tcm_family = AF_UNSPEC; 2682 tcm->tcm__pad1 = 0; 2683 tcm->tcm__pad2 = 0; 2684 tcm->tcm_handle = 0; 2685 if (block->q) { 2686 tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex; 2687 tcm->tcm_parent = block->q->handle; 2688 } else { 2689 tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; 2690 tcm->tcm_block_index = block->index; 2691 } 2692 2693 if (nla_put_u32(skb, TCA_CHAIN, chain_index)) 2694 goto nla_put_failure; 2695 2696 if (ops) { 2697 if (nla_put_string(skb, TCA_KIND, ops->kind)) 2698 goto nla_put_failure; 2699 if (ops->tmplt_dump(skb, net, priv) < 0) 2700 goto nla_put_failure; 2701 } 2702 2703 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 2704 return skb->len; 2705 2706 out_nlmsg_trim: 2707 nla_put_failure: 2708 nlmsg_trim(skb, b); 2709 return -EMSGSIZE; 2710 } 2711 2712 static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, 2713 u32 seq, u16 flags, int event, bool unicast) 2714 { 2715 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 2716 struct tcf_block *block = chain->block; 2717 struct net *net = block->net; 2718 struct sk_buff *skb; 2719 int err = 0; 2720 2721 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2722 if (!skb) 2723 return -ENOBUFS; 2724 2725 if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, 2726 chain->index, net, skb, block, portid, 2727 seq, flags, event) <= 0) { 2728 kfree_skb(skb); 2729 return -EINVAL; 2730 } 2731 2732 if (unicast) 2733 err = rtnl_unicast(skb, net, portid); 2734 else 2735 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, 2736 flags & NLM_F_ECHO); 2737 2738 return err; 2739 } 2740 2741 static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, 2742 void *tmplt_priv, u32 chain_index, 2743 struct tcf_block *block, struct sk_buff *oskb, 2744 u32 seq, u16 flags, bool unicast) 2745 { 2746 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 2747 struct net *net = block->net; 2748 struct sk_buff *skb; 2749 2750 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2751 if (!skb) 2752 return -ENOBUFS; 2753 2754 if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb, 2755 block, portid, seq, flags, RTM_DELCHAIN) <= 0) { 2756 kfree_skb(skb); 2757 return -EINVAL; 2758 } 2759 2760 if (unicast) 2761 return rtnl_unicast(skb, net, portid); 2762 2763 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); 2764 } 2765 2766 static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, 2767 struct nlattr **tca, 2768 struct netlink_ext_ack *extack) 2769 { 2770 const struct tcf_proto_ops *ops; 2771 char name[IFNAMSIZ]; 2772 void *tmplt_priv; 2773 2774 /* If kind is not set, user did not specify template. */ 2775 if (!tca[TCA_KIND]) 2776 return 0; 2777 2778 if (tcf_proto_check_kind(tca[TCA_KIND], name)) { 2779 NL_SET_ERR_MSG(extack, "Specified TC chain template name too long"); 2780 return -EINVAL; 2781 } 2782 2783 ops = tcf_proto_lookup_ops(name, true, extack); 2784 if (IS_ERR(ops)) 2785 return PTR_ERR(ops); 2786 if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { 2787 NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier"); 2788 return -EOPNOTSUPP; 2789 } 2790 2791 tmplt_priv = ops->tmplt_create(net, chain, tca, extack); 2792 if (IS_ERR(tmplt_priv)) { 2793 module_put(ops->owner); 2794 return PTR_ERR(tmplt_priv); 2795 } 2796 chain->tmplt_ops = ops; 2797 chain->tmplt_priv = tmplt_priv; 2798 return 0; 2799 } 2800 2801 static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, 2802 void *tmplt_priv) 2803 { 2804 /* If template ops are set, no work to do for us. */ 2805 if (!tmplt_ops) 2806 return; 2807 2808 tmplt_ops->tmplt_destroy(tmplt_priv); 2809 module_put(tmplt_ops->owner); 2810 } 2811 2812 /* Add/delete/get a chain */ 2813 2814 static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, 2815 struct netlink_ext_ack *extack) 2816 { 2817 struct net *net = sock_net(skb->sk); 2818 struct nlattr *tca[TCA_MAX + 1]; 2819 struct tcmsg *t; 2820 u32 parent; 2821 u32 chain_index; 2822 struct Qdisc *q; 2823 struct tcf_chain *chain; 2824 struct tcf_block *block; 2825 unsigned long cl; 2826 int err; 2827 2828 if (n->nlmsg_type != RTM_GETCHAIN && 2829 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 2830 return -EPERM; 2831 2832 replay: 2833 q = NULL; 2834 err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, 2835 rtm_tca_policy, extack); 2836 if (err < 0) 2837 return err; 2838 2839 t = nlmsg_data(n); 2840 parent = t->tcm_parent; 2841 cl = 0; 2842 2843 block = tcf_block_find(net, &q, &parent, &cl, 2844 t->tcm_ifindex, t->tcm_block_index, extack); 2845 if (IS_ERR(block)) 2846 return PTR_ERR(block); 2847 2848 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; 2849 if (chain_index > TC_ACT_EXT_VAL_MASK) { 2850 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); 2851 err = -EINVAL; 2852 goto errout_block; 2853 } 2854 2855 mutex_lock(&block->lock); 2856 chain = tcf_chain_lookup(block, chain_index); 2857 if (n->nlmsg_type == RTM_NEWCHAIN) { 2858 if (chain) { 2859 if (tcf_chain_held_by_acts_only(chain)) { 2860 /* The chain exists only because there is 2861 * some action referencing it. 2862 */ 2863 tcf_chain_hold(chain); 2864 } else { 2865 NL_SET_ERR_MSG(extack, "Filter chain already exists"); 2866 err = -EEXIST; 2867 goto errout_block_locked; 2868 } 2869 } else { 2870 if (!(n->nlmsg_flags & NLM_F_CREATE)) { 2871 NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain"); 2872 err = -ENOENT; 2873 goto errout_block_locked; 2874 } 2875 chain = tcf_chain_create(block, chain_index); 2876 if (!chain) { 2877 NL_SET_ERR_MSG(extack, "Failed to create filter chain"); 2878 err = -ENOMEM; 2879 goto errout_block_locked; 2880 } 2881 } 2882 } else { 2883 if (!chain || tcf_chain_held_by_acts_only(chain)) { 2884 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); 2885 err = -EINVAL; 2886 goto errout_block_locked; 2887 } 2888 tcf_chain_hold(chain); 2889 } 2890 2891 if (n->nlmsg_type == RTM_NEWCHAIN) { 2892 /* Modifying chain requires holding parent block lock. In case 2893 * the chain was successfully added, take a reference to the 2894 * chain. This ensures that an empty chain does not disappear at 2895 * the end of this function. 2896 */ 2897 tcf_chain_hold(chain); 2898 chain->explicitly_created = true; 2899 } 2900 mutex_unlock(&block->lock); 2901 2902 switch (n->nlmsg_type) { 2903 case RTM_NEWCHAIN: 2904 err = tc_chain_tmplt_add(chain, net, tca, extack); 2905 if (err) { 2906 tcf_chain_put_explicitly_created(chain); 2907 goto errout; 2908 } 2909 2910 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, 2911 RTM_NEWCHAIN, false); 2912 break; 2913 case RTM_DELCHAIN: 2914 tfilter_notify_chain(net, skb, block, q, parent, n, 2915 chain, RTM_DELTFILTER); 2916 /* Flush the chain first as the user requested chain removal. */ 2917 tcf_chain_flush(chain, true); 2918 /* In case the chain was successfully deleted, put a reference 2919 * to the chain previously taken during addition. 2920 */ 2921 tcf_chain_put_explicitly_created(chain); 2922 break; 2923 case RTM_GETCHAIN: 2924 err = tc_chain_notify(chain, skb, n->nlmsg_seq, 2925 n->nlmsg_flags, n->nlmsg_type, true); 2926 if (err < 0) 2927 NL_SET_ERR_MSG(extack, "Failed to send chain notify message"); 2928 break; 2929 default: 2930 err = -EOPNOTSUPP; 2931 NL_SET_ERR_MSG(extack, "Unsupported message type"); 2932 goto errout; 2933 } 2934 2935 errout: 2936 tcf_chain_put(chain); 2937 errout_block: 2938 tcf_block_release(q, block, true); 2939 if (err == -EAGAIN) 2940 /* Replay the request. */ 2941 goto replay; 2942 return err; 2943 2944 errout_block_locked: 2945 mutex_unlock(&block->lock); 2946 goto errout_block; 2947 } 2948 2949 /* called with RTNL */ 2950 static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) 2951 { 2952 struct net *net = sock_net(skb->sk); 2953 struct nlattr *tca[TCA_MAX + 1]; 2954 struct Qdisc *q = NULL; 2955 struct tcf_block *block; 2956 struct tcmsg *tcm = nlmsg_data(cb->nlh); 2957 struct tcf_chain *chain; 2958 long index_start; 2959 long index; 2960 int err; 2961 2962 if (nlmsg_len(cb->nlh) < sizeof(*tcm)) 2963 return skb->len; 2964 2965 err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX, 2966 rtm_tca_policy, cb->extack); 2967 if (err) 2968 return err; 2969 2970 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { 2971 block = tcf_block_refcnt_get(net, tcm->tcm_block_index); 2972 if (!block) 2973 goto out; 2974 } else { 2975 const struct Qdisc_class_ops *cops; 2976 struct net_device *dev; 2977 unsigned long cl = 0; 2978 2979 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 2980 if (!dev) 2981 return skb->len; 2982 2983 if (!tcm->tcm_parent) 2984 q = rtnl_dereference(dev->qdisc); 2985 else 2986 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); 2987 2988 if (!q) 2989 goto out; 2990 cops = q->ops->cl_ops; 2991 if (!cops) 2992 goto out; 2993 if (!cops->tcf_block) 2994 goto out; 2995 if (TC_H_MIN(tcm->tcm_parent)) { 2996 cl = cops->find(q, tcm->tcm_parent); 2997 if (cl == 0) 2998 goto out; 2999 } 3000 block = cops->tcf_block(q, cl, NULL); 3001 if (!block) 3002 goto out; 3003 if (tcf_block_shared(block)) 3004 q = NULL; 3005 } 3006 3007 index_start = cb->args[0]; 3008 index = 0; 3009 3010 mutex_lock(&block->lock); 3011 list_for_each_entry(chain, &block->chain_list, list) { 3012 if ((tca[TCA_CHAIN] && 3013 nla_get_u32(tca[TCA_CHAIN]) != chain->index)) 3014 continue; 3015 if (index < index_start) { 3016 index++; 3017 continue; 3018 } 3019 if (tcf_chain_held_by_acts_only(chain)) 3020 continue; 3021 err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, 3022 chain->index, net, skb, block, 3023 NETLINK_CB(cb->skb).portid, 3024 cb->nlh->nlmsg_seq, NLM_F_MULTI, 3025 RTM_NEWCHAIN); 3026 if (err <= 0) 3027 break; 3028 index++; 3029 } 3030 mutex_unlock(&block->lock); 3031 3032 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) 3033 tcf_block_refcnt_put(block, true); 3034 cb->args[0] = index; 3035 3036 out: 3037 /* If we did no progress, the error (EMSGSIZE) is real */ 3038 if (skb->len == 0 && err) 3039 return err; 3040 return skb->len; 3041 } 3042 3043 void tcf_exts_destroy(struct tcf_exts *exts) 3044 { 3045 #ifdef CONFIG_NET_CLS_ACT 3046 if (exts->actions) { 3047 tcf_action_destroy(exts->actions, TCA_ACT_UNBIND); 3048 kfree(exts->actions); 3049 } 3050 exts->nr_actions = 0; 3051 #endif 3052 } 3053 EXPORT_SYMBOL(tcf_exts_destroy); 3054 3055 int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb, 3056 struct nlattr *rate_tlv, struct tcf_exts *exts, 3057 u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) 3058 { 3059 #ifdef CONFIG_NET_CLS_ACT 3060 { 3061 int init_res[TCA_ACT_MAX_PRIO] = {}; 3062 struct tc_action *act; 3063 size_t attr_size = 0; 3064 3065 if (exts->police && tb[exts->police]) { 3066 struct tc_action_ops *a_o; 3067 3068 a_o = tc_action_load_ops(tb[exts->police], true, 3069 !(flags & TCA_ACT_FLAGS_NO_RTNL), 3070 extack); 3071 if (IS_ERR(a_o)) 3072 return PTR_ERR(a_o); 3073 flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND; 3074 act = tcf_action_init_1(net, tp, tb[exts->police], 3075 rate_tlv, a_o, init_res, flags, 3076 extack); 3077 module_put(a_o->owner); 3078 if (IS_ERR(act)) 3079 return PTR_ERR(act); 3080 3081 act->type = exts->type = TCA_OLD_COMPAT; 3082 exts->actions[0] = act; 3083 exts->nr_actions = 1; 3084 tcf_idr_insert_many(exts->actions); 3085 } else if (exts->action && tb[exts->action]) { 3086 int err; 3087 3088 flags |= TCA_ACT_FLAGS_BIND; 3089 err = tcf_action_init(net, tp, tb[exts->action], 3090 rate_tlv, exts->actions, init_res, 3091 &attr_size, flags, fl_flags, 3092 extack); 3093 if (err < 0) 3094 return err; 3095 exts->nr_actions = err; 3096 } 3097 } 3098 #else 3099 if ((exts->action && tb[exts->action]) || 3100 (exts->police && tb[exts->police])) { 3101 NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)"); 3102 return -EOPNOTSUPP; 3103 } 3104 #endif 3105 3106 return 0; 3107 } 3108 EXPORT_SYMBOL(tcf_exts_validate_ex); 3109 3110 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, 3111 struct nlattr *rate_tlv, struct tcf_exts *exts, 3112 u32 flags, struct netlink_ext_ack *extack) 3113 { 3114 return tcf_exts_validate_ex(net, tp, tb, rate_tlv, exts, 3115 flags, 0, extack); 3116 } 3117 EXPORT_SYMBOL(tcf_exts_validate); 3118 3119 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src) 3120 { 3121 #ifdef CONFIG_NET_CLS_ACT 3122 struct tcf_exts old = *dst; 3123 3124 *dst = *src; 3125 tcf_exts_destroy(&old); 3126 #endif 3127 } 3128 EXPORT_SYMBOL(tcf_exts_change); 3129 3130 #ifdef CONFIG_NET_CLS_ACT 3131 static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts) 3132 { 3133 if (exts->nr_actions == 0) 3134 return NULL; 3135 else 3136 return exts->actions[0]; 3137 } 3138 #endif 3139 3140 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) 3141 { 3142 #ifdef CONFIG_NET_CLS_ACT 3143 struct nlattr *nest; 3144 3145 if (exts->action && tcf_exts_has_actions(exts)) { 3146 /* 3147 * again for backward compatible mode - we want 3148 * to work with both old and new modes of entering 3149 * tc data even if iproute2 was newer - jhs 3150 */ 3151 if (exts->type != TCA_OLD_COMPAT) { 3152 nest = nla_nest_start_noflag(skb, exts->action); 3153 if (nest == NULL) 3154 goto nla_put_failure; 3155 3156 if (tcf_action_dump(skb, exts->actions, 0, 0, false) 3157 < 0) 3158 goto nla_put_failure; 3159 nla_nest_end(skb, nest); 3160 } else if (exts->police) { 3161 struct tc_action *act = tcf_exts_first_act(exts); 3162 nest = nla_nest_start_noflag(skb, exts->police); 3163 if (nest == NULL || !act) 3164 goto nla_put_failure; 3165 if (tcf_action_dump_old(skb, act, 0, 0) < 0) 3166 goto nla_put_failure; 3167 nla_nest_end(skb, nest); 3168 } 3169 } 3170 return 0; 3171 3172 nla_put_failure: 3173 nla_nest_cancel(skb, nest); 3174 return -1; 3175 #else 3176 return 0; 3177 #endif 3178 } 3179 EXPORT_SYMBOL(tcf_exts_dump); 3180 3181 int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts) 3182 { 3183 #ifdef CONFIG_NET_CLS_ACT 3184 struct nlattr *nest; 3185 3186 if (!exts->action || !tcf_exts_has_actions(exts)) 3187 return 0; 3188 3189 nest = nla_nest_start_noflag(skb, exts->action); 3190 if (!nest) 3191 goto nla_put_failure; 3192 3193 if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0) 3194 goto nla_put_failure; 3195 nla_nest_end(skb, nest); 3196 return 0; 3197 3198 nla_put_failure: 3199 nla_nest_cancel(skb, nest); 3200 return -1; 3201 #else 3202 return 0; 3203 #endif 3204 } 3205 EXPORT_SYMBOL(tcf_exts_terse_dump); 3206 3207 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) 3208 { 3209 #ifdef CONFIG_NET_CLS_ACT 3210 struct tc_action *a = tcf_exts_first_act(exts); 3211 if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0) 3212 return -1; 3213 #endif 3214 return 0; 3215 } 3216 EXPORT_SYMBOL(tcf_exts_dump_stats); 3217 3218 static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) 3219 { 3220 if (*flags & TCA_CLS_FLAGS_IN_HW) 3221 return; 3222 *flags |= TCA_CLS_FLAGS_IN_HW; 3223 atomic_inc(&block->offloadcnt); 3224 } 3225 3226 static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) 3227 { 3228 if (!(*flags & TCA_CLS_FLAGS_IN_HW)) 3229 return; 3230 *flags &= ~TCA_CLS_FLAGS_IN_HW; 3231 atomic_dec(&block->offloadcnt); 3232 } 3233 3234 static void tc_cls_offload_cnt_update(struct tcf_block *block, 3235 struct tcf_proto *tp, u32 *cnt, 3236 u32 *flags, u32 diff, bool add) 3237 { 3238 lockdep_assert_held(&block->cb_lock); 3239 3240 spin_lock(&tp->lock); 3241 if (add) { 3242 if (!*cnt) 3243 tcf_block_offload_inc(block, flags); 3244 *cnt += diff; 3245 } else { 3246 *cnt -= diff; 3247 if (!*cnt) 3248 tcf_block_offload_dec(block, flags); 3249 } 3250 spin_unlock(&tp->lock); 3251 } 3252 3253 static void 3254 tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp, 3255 u32 *cnt, u32 *flags) 3256 { 3257 lockdep_assert_held(&block->cb_lock); 3258 3259 spin_lock(&tp->lock); 3260 tcf_block_offload_dec(block, flags); 3261 *cnt = 0; 3262 spin_unlock(&tp->lock); 3263 } 3264 3265 static int 3266 __tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, 3267 void *type_data, bool err_stop) 3268 { 3269 struct flow_block_cb *block_cb; 3270 int ok_count = 0; 3271 int err; 3272 3273 list_for_each_entry(block_cb, &block->flow_block.cb_list, list) { 3274 err = block_cb->cb(type, type_data, block_cb->cb_priv); 3275 if (err) { 3276 if (err_stop) 3277 return err; 3278 } else { 3279 ok_count++; 3280 } 3281 } 3282 return ok_count; 3283 } 3284 3285 int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, 3286 void *type_data, bool err_stop, bool rtnl_held) 3287 { 3288 bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; 3289 int ok_count; 3290 3291 retry: 3292 if (take_rtnl) 3293 rtnl_lock(); 3294 down_read(&block->cb_lock); 3295 /* Need to obtain rtnl lock if block is bound to devs that require it. 3296 * In block bind code cb_lock is obtained while holding rtnl, so we must 3297 * obtain the locks in same order here. 3298 */ 3299 if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { 3300 up_read(&block->cb_lock); 3301 take_rtnl = true; 3302 goto retry; 3303 } 3304 3305 ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); 3306 3307 up_read(&block->cb_lock); 3308 if (take_rtnl) 3309 rtnl_unlock(); 3310 return ok_count; 3311 } 3312 EXPORT_SYMBOL(tc_setup_cb_call); 3313 3314 /* Non-destructive filter add. If filter that wasn't already in hardware is 3315 * successfully offloaded, increment block offloads counter. On failure, 3316 * previously offloaded filter is considered to be intact and offloads counter 3317 * is not decremented. 3318 */ 3319 3320 int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, 3321 enum tc_setup_type type, void *type_data, bool err_stop, 3322 u32 *flags, unsigned int *in_hw_count, bool rtnl_held) 3323 { 3324 bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; 3325 int ok_count; 3326 3327 retry: 3328 if (take_rtnl) 3329 rtnl_lock(); 3330 down_read(&block->cb_lock); 3331 /* Need to obtain rtnl lock if block is bound to devs that require it. 3332 * In block bind code cb_lock is obtained while holding rtnl, so we must 3333 * obtain the locks in same order here. 3334 */ 3335 if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { 3336 up_read(&block->cb_lock); 3337 take_rtnl = true; 3338 goto retry; 3339 } 3340 3341 /* Make sure all netdevs sharing this block are offload-capable. */ 3342 if (block->nooffloaddevcnt && err_stop) { 3343 ok_count = -EOPNOTSUPP; 3344 goto err_unlock; 3345 } 3346 3347 ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); 3348 if (ok_count < 0) 3349 goto err_unlock; 3350 3351 if (tp->ops->hw_add) 3352 tp->ops->hw_add(tp, type_data); 3353 if (ok_count > 0) 3354 tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 3355 ok_count, true); 3356 err_unlock: 3357 up_read(&block->cb_lock); 3358 if (take_rtnl) 3359 rtnl_unlock(); 3360 return min(ok_count, 0); 3361 } 3362 EXPORT_SYMBOL(tc_setup_cb_add); 3363 3364 /* Destructive filter replace. If filter that wasn't already in hardware is 3365 * successfully offloaded, increment block offload counter. On failure, 3366 * previously offloaded filter is considered to be destroyed and offload counter 3367 * is decremented. 3368 */ 3369 3370 int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, 3371 enum tc_setup_type type, void *type_data, bool err_stop, 3372 u32 *old_flags, unsigned int *old_in_hw_count, 3373 u32 *new_flags, unsigned int *new_in_hw_count, 3374 bool rtnl_held) 3375 { 3376 bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; 3377 int ok_count; 3378 3379 retry: 3380 if (take_rtnl) 3381 rtnl_lock(); 3382 down_read(&block->cb_lock); 3383 /* Need to obtain rtnl lock if block is bound to devs that require it. 3384 * In block bind code cb_lock is obtained while holding rtnl, so we must 3385 * obtain the locks in same order here. 3386 */ 3387 if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { 3388 up_read(&block->cb_lock); 3389 take_rtnl = true; 3390 goto retry; 3391 } 3392 3393 /* Make sure all netdevs sharing this block are offload-capable. */ 3394 if (block->nooffloaddevcnt && err_stop) { 3395 ok_count = -EOPNOTSUPP; 3396 goto err_unlock; 3397 } 3398 3399 tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags); 3400 if (tp->ops->hw_del) 3401 tp->ops->hw_del(tp, type_data); 3402 3403 ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); 3404 if (ok_count < 0) 3405 goto err_unlock; 3406 3407 if (tp->ops->hw_add) 3408 tp->ops->hw_add(tp, type_data); 3409 if (ok_count > 0) 3410 tc_cls_offload_cnt_update(block, tp, new_in_hw_count, 3411 new_flags, ok_count, true); 3412 err_unlock: 3413 up_read(&block->cb_lock); 3414 if (take_rtnl) 3415 rtnl_unlock(); 3416 return min(ok_count, 0); 3417 } 3418 EXPORT_SYMBOL(tc_setup_cb_replace); 3419 3420 /* Destroy filter and decrement block offload counter, if filter was previously 3421 * offloaded. 3422 */ 3423 3424 int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, 3425 enum tc_setup_type type, void *type_data, bool err_stop, 3426 u32 *flags, unsigned int *in_hw_count, bool rtnl_held) 3427 { 3428 bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; 3429 int ok_count; 3430 3431 retry: 3432 if (take_rtnl) 3433 rtnl_lock(); 3434 down_read(&block->cb_lock); 3435 /* Need to obtain rtnl lock if block is bound to devs that require it. 3436 * In block bind code cb_lock is obtained while holding rtnl, so we must 3437 * obtain the locks in same order here. 3438 */ 3439 if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { 3440 up_read(&block->cb_lock); 3441 take_rtnl = true; 3442 goto retry; 3443 } 3444 3445 ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); 3446 3447 tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags); 3448 if (tp->ops->hw_del) 3449 tp->ops->hw_del(tp, type_data); 3450 3451 up_read(&block->cb_lock); 3452 if (take_rtnl) 3453 rtnl_unlock(); 3454 return min(ok_count, 0); 3455 } 3456 EXPORT_SYMBOL(tc_setup_cb_destroy); 3457 3458 int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, 3459 bool add, flow_setup_cb_t *cb, 3460 enum tc_setup_type type, void *type_data, 3461 void *cb_priv, u32 *flags, unsigned int *in_hw_count) 3462 { 3463 int err = cb(type, type_data, cb_priv); 3464 3465 if (err) { 3466 if (add && tc_skip_sw(*flags)) 3467 return err; 3468 } else { 3469 tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1, 3470 add); 3471 } 3472 3473 return 0; 3474 } 3475 EXPORT_SYMBOL(tc_setup_cb_reoffload); 3476 3477 static int tcf_act_get_cookie(struct flow_action_entry *entry, 3478 const struct tc_action *act) 3479 { 3480 struct tc_cookie *cookie; 3481 int err = 0; 3482 3483 rcu_read_lock(); 3484 cookie = rcu_dereference(act->act_cookie); 3485 if (cookie) { 3486 entry->cookie = flow_action_cookie_create(cookie->data, 3487 cookie->len, 3488 GFP_ATOMIC); 3489 if (!entry->cookie) 3490 err = -ENOMEM; 3491 } 3492 rcu_read_unlock(); 3493 return err; 3494 } 3495 3496 static void tcf_act_put_cookie(struct flow_action_entry *entry) 3497 { 3498 flow_action_cookie_destroy(entry->cookie); 3499 } 3500 3501 void tc_cleanup_offload_action(struct flow_action *flow_action) 3502 { 3503 struct flow_action_entry *entry; 3504 int i; 3505 3506 flow_action_for_each(i, entry, flow_action) { 3507 tcf_act_put_cookie(entry); 3508 if (entry->destructor) 3509 entry->destructor(entry->destructor_priv); 3510 } 3511 } 3512 EXPORT_SYMBOL(tc_cleanup_offload_action); 3513 3514 static int tc_setup_offload_act(struct tc_action *act, 3515 struct flow_action_entry *entry, 3516 u32 *index_inc) 3517 { 3518 #ifdef CONFIG_NET_CLS_ACT 3519 if (act->ops->offload_act_setup) 3520 return act->ops->offload_act_setup(act, entry, index_inc, true); 3521 else 3522 return -EOPNOTSUPP; 3523 #else 3524 return 0; 3525 #endif 3526 } 3527 3528 int tc_setup_action(struct flow_action *flow_action, 3529 struct tc_action *actions[]) 3530 { 3531 int i, j, index, err = 0; 3532 struct tc_action *act; 3533 3534 BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY); 3535 BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE); 3536 BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED); 3537 3538 if (!actions) 3539 return 0; 3540 3541 j = 0; 3542 tcf_act_for_each_action(i, act, actions) { 3543 struct flow_action_entry *entry; 3544 3545 entry = &flow_action->entries[j]; 3546 spin_lock_bh(&act->tcfa_lock); 3547 err = tcf_act_get_cookie(entry, act); 3548 if (err) 3549 goto err_out_locked; 3550 3551 entry->hw_stats = tc_act_hw_stats(act->hw_stats); 3552 entry->hw_index = act->tcfa_index; 3553 index = 0; 3554 err = tc_setup_offload_act(act, entry, &index); 3555 if (!err) 3556 j += index; 3557 else 3558 goto err_out_locked; 3559 spin_unlock_bh(&act->tcfa_lock); 3560 } 3561 3562 err_out: 3563 if (err) 3564 tc_cleanup_offload_action(flow_action); 3565 3566 return err; 3567 err_out_locked: 3568 spin_unlock_bh(&act->tcfa_lock); 3569 goto err_out; 3570 } 3571 3572 int tc_setup_offload_action(struct flow_action *flow_action, 3573 const struct tcf_exts *exts) 3574 { 3575 #ifdef CONFIG_NET_CLS_ACT 3576 if (!exts) 3577 return 0; 3578 3579 return tc_setup_action(flow_action, exts->actions); 3580 #else 3581 return 0; 3582 #endif 3583 } 3584 EXPORT_SYMBOL(tc_setup_offload_action); 3585 3586 unsigned int tcf_exts_num_actions(struct tcf_exts *exts) 3587 { 3588 unsigned int num_acts = 0; 3589 struct tc_action *act; 3590 int i; 3591 3592 tcf_exts_for_each_action(i, act, exts) { 3593 if (is_tcf_pedit(act)) 3594 num_acts += tcf_pedit_nkeys(act); 3595 else 3596 num_acts++; 3597 } 3598 return num_acts; 3599 } 3600 EXPORT_SYMBOL(tcf_exts_num_actions); 3601 3602 #ifdef CONFIG_NET_CLS_ACT 3603 static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr, 3604 u32 *p_block_index, 3605 struct netlink_ext_ack *extack) 3606 { 3607 *p_block_index = nla_get_u32(block_index_attr); 3608 if (!*p_block_index) { 3609 NL_SET_ERR_MSG(extack, "Block number may not be zero"); 3610 return -EINVAL; 3611 } 3612 3613 return 0; 3614 } 3615 3616 int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch, 3617 enum flow_block_binder_type binder_type, 3618 struct nlattr *block_index_attr, 3619 struct netlink_ext_ack *extack) 3620 { 3621 u32 block_index; 3622 int err; 3623 3624 if (!block_index_attr) 3625 return 0; 3626 3627 err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack); 3628 if (err) 3629 return err; 3630 3631 if (!block_index) 3632 return 0; 3633 3634 qe->info.binder_type = binder_type; 3635 qe->info.chain_head_change = tcf_chain_head_change_dflt; 3636 qe->info.chain_head_change_priv = &qe->filter_chain; 3637 qe->info.block_index = block_index; 3638 3639 return tcf_block_get_ext(&qe->block, sch, &qe->info, extack); 3640 } 3641 EXPORT_SYMBOL(tcf_qevent_init); 3642 3643 void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch) 3644 { 3645 if (qe->info.block_index) 3646 tcf_block_put_ext(qe->block, sch, &qe->info); 3647 } 3648 EXPORT_SYMBOL(tcf_qevent_destroy); 3649 3650 int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr, 3651 struct netlink_ext_ack *extack) 3652 { 3653 u32 block_index; 3654 int err; 3655 3656 if (!block_index_attr) 3657 return 0; 3658 3659 err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack); 3660 if (err) 3661 return err; 3662 3663 /* Bounce newly-configured block or change in block. */ 3664 if (block_index != qe->info.block_index) { 3665 NL_SET_ERR_MSG(extack, "Change of blocks is not supported"); 3666 return -EINVAL; 3667 } 3668 3669 return 0; 3670 } 3671 EXPORT_SYMBOL(tcf_qevent_validate_change); 3672 3673 struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb, 3674 struct sk_buff **to_free, int *ret) 3675 { 3676 struct tcf_result cl_res; 3677 struct tcf_proto *fl; 3678 3679 if (!qe->info.block_index) 3680 return skb; 3681 3682 fl = rcu_dereference_bh(qe->filter_chain); 3683 3684 switch (tcf_classify(skb, NULL, fl, &cl_res, false)) { 3685 case TC_ACT_SHOT: 3686 qdisc_qstats_drop(sch); 3687 __qdisc_drop(skb, to_free); 3688 *ret = __NET_XMIT_BYPASS; 3689 return NULL; 3690 case TC_ACT_STOLEN: 3691 case TC_ACT_QUEUED: 3692 case TC_ACT_TRAP: 3693 __qdisc_drop(skb, to_free); 3694 *ret = __NET_XMIT_STOLEN; 3695 return NULL; 3696 case TC_ACT_REDIRECT: 3697 skb_do_redirect(skb); 3698 *ret = __NET_XMIT_STOLEN; 3699 return NULL; 3700 } 3701 3702 return skb; 3703 } 3704 EXPORT_SYMBOL(tcf_qevent_handle); 3705 3706 int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe) 3707 { 3708 if (!qe->info.block_index) 3709 return 0; 3710 return nla_put_u32(skb, attr_name, qe->info.block_index); 3711 } 3712 EXPORT_SYMBOL(tcf_qevent_dump); 3713 #endif 3714 3715 static __net_init int tcf_net_init(struct net *net) 3716 { 3717 struct tcf_net *tn = net_generic(net, tcf_net_id); 3718 3719 spin_lock_init(&tn->idr_lock); 3720 idr_init(&tn->idr); 3721 return 0; 3722 } 3723 3724 static void __net_exit tcf_net_exit(struct net *net) 3725 { 3726 struct tcf_net *tn = net_generic(net, tcf_net_id); 3727 3728 idr_destroy(&tn->idr); 3729 } 3730 3731 static struct pernet_operations tcf_net_ops = { 3732 .init = tcf_net_init, 3733 .exit = tcf_net_exit, 3734 .id = &tcf_net_id, 3735 .size = sizeof(struct tcf_net), 3736 }; 3737 3738 static int __init tc_filter_init(void) 3739 { 3740 int err; 3741 3742 tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0); 3743 if (!tc_filter_wq) 3744 return -ENOMEM; 3745 3746 err = register_pernet_subsys(&tcf_net_ops); 3747 if (err) 3748 goto err_register_pernet_subsys; 3749 3750 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 3751 RTNL_FLAG_DOIT_UNLOCKED); 3752 rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 3753 RTNL_FLAG_DOIT_UNLOCKED); 3754 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter, 3755 tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED); 3756 rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0); 3757 rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0); 3758 rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain, 3759 tc_dump_chain, 0); 3760 3761 return 0; 3762 3763 err_register_pernet_subsys: 3764 destroy_workqueue(tc_filter_wq); 3765 return err; 3766 } 3767 3768 subsys_initcall(tc_filter_init); 3769