1 /* 2 * net/sched/cls_u32.c Ugly (or Universal) 32bit key Packet Classifier. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * 11 * The filters are packed to hash tables of key nodes 12 * with a set of 32bit key/mask pairs at every node. 13 * Nodes reference next level hash tables etc. 14 * 15 * This scheme is the best universal classifier I managed to 16 * invent; it is not super-fast, but it is not slow (provided you 17 * program it correctly), and general enough. And its relative 18 * speed grows as the number of rules becomes larger. 19 * 20 * It seems that it represents the best middle point between 21 * speed and manageability both by human and by machine. 22 * 23 * It is especially useful for link sharing combined with QoS; 24 * pure RSVP doesn't need such a general approach and can use 25 * much simpler (and faster) schemes, sort of cls_rsvp.c. 26 * 27 * JHS: We should remove the CONFIG_NET_CLS_IND from here 28 * eventually when the meta match extension is made available 29 * 30 * nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro> 31 */ 32 33 #include <linux/module.h> 34 #include <linux/slab.h> 35 #include <linux/types.h> 36 #include <linux/kernel.h> 37 #include <linux/string.h> 38 #include <linux/errno.h> 39 #include <linux/percpu.h> 40 #include <linux/rtnetlink.h> 41 #include <linux/skbuff.h> 42 #include <linux/bitmap.h> 43 #include <linux/netdevice.h> 44 #include <linux/hash.h> 45 #include <net/netlink.h> 46 #include <net/act_api.h> 47 #include <net/pkt_cls.h> 48 #include <linux/idr.h> 49 50 struct tc_u_knode { 51 struct tc_u_knode __rcu *next; 52 u32 handle; 53 struct tc_u_hnode __rcu *ht_up; 54 struct tcf_exts exts; 55 #ifdef CONFIG_NET_CLS_IND 56 int ifindex; 57 #endif 58 u8 fshift; 59 struct tcf_result res; 60 struct tc_u_hnode __rcu *ht_down; 61 #ifdef CONFIG_CLS_U32_PERF 62 struct tc_u32_pcnt __percpu *pf; 63 #endif 64 u32 flags; 65 #ifdef CONFIG_CLS_U32_MARK 66 u32 val; 67 u32 mask; 68 u32 __percpu *pcpu_success; 69 #endif 70 struct tcf_proto *tp; 71 union { 72 struct work_struct work; 73 struct rcu_head rcu; 74 }; 75 /* The 'sel' field MUST be the last field in structure to allow for 76 * tc_u32_keys allocated at end of structure. 77 */ 78 struct tc_u32_sel sel; 79 }; 80 81 struct tc_u_hnode { 82 struct tc_u_hnode __rcu *next; 83 u32 handle; 84 u32 prio; 85 struct tc_u_common *tp_c; 86 int refcnt; 87 unsigned int divisor; 88 struct idr handle_idr; 89 struct rcu_head rcu; 90 u32 flags; 91 /* The 'ht' field MUST be the last field in structure to allow for 92 * more entries allocated at end of structure. 93 */ 94 struct tc_u_knode __rcu *ht[1]; 95 }; 96 97 struct tc_u_common { 98 struct tc_u_hnode __rcu *hlist; 99 void *ptr; 100 int refcnt; 101 struct idr handle_idr; 102 struct hlist_node hnode; 103 struct rcu_head rcu; 104 }; 105 106 static inline unsigned int u32_hash_fold(__be32 key, 107 const struct tc_u32_sel *sel, 108 u8 fshift) 109 { 110 unsigned int h = ntohl(key & sel->hmask) >> fshift; 111 112 return h; 113 } 114 115 static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, 116 struct tcf_result *res) 117 { 118 struct { 119 struct tc_u_knode *knode; 120 unsigned int off; 121 } stack[TC_U32_MAXDEPTH]; 122 123 struct tc_u_hnode *ht = rcu_dereference_bh(tp->root); 124 unsigned int off = skb_network_offset(skb); 125 struct tc_u_knode *n; 126 int sdepth = 0; 127 int off2 = 0; 128 int sel = 0; 129 #ifdef CONFIG_CLS_U32_PERF 130 int j; 131 #endif 132 int i, r; 133 134 next_ht: 135 n = rcu_dereference_bh(ht->ht[sel]); 136 137 next_knode: 138 if (n) { 139 struct tc_u32_key *key = n->sel.keys; 140 141 #ifdef CONFIG_CLS_U32_PERF 142 __this_cpu_inc(n->pf->rcnt); 143 j = 0; 144 #endif 145 146 if (tc_skip_sw(n->flags)) { 147 n = rcu_dereference_bh(n->next); 148 goto next_knode; 149 } 150 151 #ifdef CONFIG_CLS_U32_MARK 152 if ((skb->mark & n->mask) != n->val) { 153 n = rcu_dereference_bh(n->next); 154 goto next_knode; 155 } else { 156 __this_cpu_inc(*n->pcpu_success); 157 } 158 #endif 159 160 for (i = n->sel.nkeys; i > 0; i--, key++) { 161 int toff = off + key->off + (off2 & key->offmask); 162 __be32 *data, hdata; 163 164 if (skb_headroom(skb) + toff > INT_MAX) 165 goto out; 166 167 data = skb_header_pointer(skb, toff, 4, &hdata); 168 if (!data) 169 goto out; 170 if ((*data ^ key->val) & key->mask) { 171 n = rcu_dereference_bh(n->next); 172 goto next_knode; 173 } 174 #ifdef CONFIG_CLS_U32_PERF 175 __this_cpu_inc(n->pf->kcnts[j]); 176 j++; 177 #endif 178 } 179 180 ht = rcu_dereference_bh(n->ht_down); 181 if (!ht) { 182 check_terminal: 183 if (n->sel.flags & TC_U32_TERMINAL) { 184 185 *res = n->res; 186 #ifdef CONFIG_NET_CLS_IND 187 if (!tcf_match_indev(skb, n->ifindex)) { 188 n = rcu_dereference_bh(n->next); 189 goto next_knode; 190 } 191 #endif 192 #ifdef CONFIG_CLS_U32_PERF 193 __this_cpu_inc(n->pf->rhit); 194 #endif 195 r = tcf_exts_exec(skb, &n->exts, res); 196 if (r < 0) { 197 n = rcu_dereference_bh(n->next); 198 goto next_knode; 199 } 200 201 return r; 202 } 203 n = rcu_dereference_bh(n->next); 204 goto next_knode; 205 } 206 207 /* PUSH */ 208 if (sdepth >= TC_U32_MAXDEPTH) 209 goto deadloop; 210 stack[sdepth].knode = n; 211 stack[sdepth].off = off; 212 sdepth++; 213 214 ht = rcu_dereference_bh(n->ht_down); 215 sel = 0; 216 if (ht->divisor) { 217 __be32 *data, hdata; 218 219 data = skb_header_pointer(skb, off + n->sel.hoff, 4, 220 &hdata); 221 if (!data) 222 goto out; 223 sel = ht->divisor & u32_hash_fold(*data, &n->sel, 224 n->fshift); 225 } 226 if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT))) 227 goto next_ht; 228 229 if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) { 230 off2 = n->sel.off + 3; 231 if (n->sel.flags & TC_U32_VAROFFSET) { 232 __be16 *data, hdata; 233 234 data = skb_header_pointer(skb, 235 off + n->sel.offoff, 236 2, &hdata); 237 if (!data) 238 goto out; 239 off2 += ntohs(n->sel.offmask & *data) >> 240 n->sel.offshift; 241 } 242 off2 &= ~3; 243 } 244 if (n->sel.flags & TC_U32_EAT) { 245 off += off2; 246 off2 = 0; 247 } 248 249 if (off < skb->len) 250 goto next_ht; 251 } 252 253 /* POP */ 254 if (sdepth--) { 255 n = stack[sdepth].knode; 256 ht = rcu_dereference_bh(n->ht_up); 257 off = stack[sdepth].off; 258 goto check_terminal; 259 } 260 out: 261 return -1; 262 263 deadloop: 264 net_warn_ratelimited("cls_u32: dead loop\n"); 265 return -1; 266 } 267 268 static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) 269 { 270 struct tc_u_hnode *ht; 271 272 for (ht = rtnl_dereference(tp_c->hlist); 273 ht; 274 ht = rtnl_dereference(ht->next)) 275 if (ht->handle == handle) 276 break; 277 278 return ht; 279 } 280 281 static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle) 282 { 283 unsigned int sel; 284 struct tc_u_knode *n = NULL; 285 286 sel = TC_U32_HASH(handle); 287 if (sel > ht->divisor) 288 goto out; 289 290 for (n = rtnl_dereference(ht->ht[sel]); 291 n; 292 n = rtnl_dereference(n->next)) 293 if (n->handle == handle) 294 break; 295 out: 296 return n; 297 } 298 299 300 static void *u32_get(struct tcf_proto *tp, u32 handle) 301 { 302 struct tc_u_hnode *ht; 303 struct tc_u_common *tp_c = tp->data; 304 305 if (TC_U32_HTID(handle) == TC_U32_ROOT) 306 ht = rtnl_dereference(tp->root); 307 else 308 ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle)); 309 310 if (!ht) 311 return NULL; 312 313 if (TC_U32_KEY(handle) == 0) 314 return ht; 315 316 return u32_lookup_key(ht, handle); 317 } 318 319 /* Protected by rtnl lock */ 320 static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr) 321 { 322 int id = idr_alloc_cyclic(&tp_c->handle_idr, ptr, 1, 0x7FF, GFP_KERNEL); 323 if (id < 0) 324 return 0; 325 return (id | 0x800U) << 20; 326 } 327 328 static struct hlist_head *tc_u_common_hash; 329 330 #define U32_HASH_SHIFT 10 331 #define U32_HASH_SIZE (1 << U32_HASH_SHIFT) 332 333 static void *tc_u_common_ptr(const struct tcf_proto *tp) 334 { 335 struct tcf_block *block = tp->chain->block; 336 337 /* The block sharing is currently supported only 338 * for classless qdiscs. In that case we use block 339 * for tc_u_common identification. In case the 340 * block is not shared, block->q is a valid pointer 341 * and we can use that. That works for classful qdiscs. 342 */ 343 if (tcf_block_shared(block)) 344 return block; 345 else 346 return block->q; 347 } 348 349 static unsigned int tc_u_hash(const struct tcf_proto *tp) 350 { 351 return hash_ptr(tc_u_common_ptr(tp), U32_HASH_SHIFT); 352 } 353 354 static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp) 355 { 356 struct tc_u_common *tc; 357 unsigned int h; 358 359 h = tc_u_hash(tp); 360 hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) { 361 if (tc->ptr == tc_u_common_ptr(tp)) 362 return tc; 363 } 364 return NULL; 365 } 366 367 static int u32_init(struct tcf_proto *tp) 368 { 369 struct tc_u_hnode *root_ht; 370 struct tc_u_common *tp_c; 371 unsigned int h; 372 373 tp_c = tc_u_common_find(tp); 374 375 root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL); 376 if (root_ht == NULL) 377 return -ENOBUFS; 378 379 root_ht->refcnt++; 380 root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000; 381 root_ht->prio = tp->prio; 382 idr_init(&root_ht->handle_idr); 383 384 if (tp_c == NULL) { 385 tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL); 386 if (tp_c == NULL) { 387 kfree(root_ht); 388 return -ENOBUFS; 389 } 390 tp_c->ptr = tc_u_common_ptr(tp); 391 INIT_HLIST_NODE(&tp_c->hnode); 392 idr_init(&tp_c->handle_idr); 393 394 h = tc_u_hash(tp); 395 hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]); 396 } 397 398 tp_c->refcnt++; 399 RCU_INIT_POINTER(root_ht->next, tp_c->hlist); 400 rcu_assign_pointer(tp_c->hlist, root_ht); 401 root_ht->tp_c = tp_c; 402 403 rcu_assign_pointer(tp->root, root_ht); 404 tp->data = tp_c; 405 return 0; 406 } 407 408 static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, 409 bool free_pf) 410 { 411 struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); 412 413 tcf_exts_destroy(&n->exts); 414 tcf_exts_put_net(&n->exts); 415 if (ht && --ht->refcnt == 0) 416 kfree(ht); 417 #ifdef CONFIG_CLS_U32_PERF 418 if (free_pf) 419 free_percpu(n->pf); 420 #endif 421 #ifdef CONFIG_CLS_U32_MARK 422 if (free_pf) 423 free_percpu(n->pcpu_success); 424 #endif 425 kfree(n); 426 return 0; 427 } 428 429 /* u32_delete_key_rcu should be called when free'ing a copied 430 * version of a tc_u_knode obtained from u32_init_knode(). When 431 * copies are obtained from u32_init_knode() the statistics are 432 * shared between the old and new copies to allow readers to 433 * continue to update the statistics during the copy. To support 434 * this the u32_delete_key_rcu variant does not free the percpu 435 * statistics. 436 */ 437 static void u32_delete_key_work(struct work_struct *work) 438 { 439 struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); 440 441 rtnl_lock(); 442 u32_destroy_key(key->tp, key, false); 443 rtnl_unlock(); 444 } 445 446 static void u32_delete_key_rcu(struct rcu_head *rcu) 447 { 448 struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); 449 450 INIT_WORK(&key->work, u32_delete_key_work); 451 tcf_queue_work(&key->work); 452 } 453 454 /* u32_delete_key_freepf_rcu is the rcu callback variant 455 * that free's the entire structure including the statistics 456 * percpu variables. Only use this if the key is not a copy 457 * returned by u32_init_knode(). See u32_delete_key_rcu() 458 * for the variant that should be used with keys return from 459 * u32_init_knode() 460 */ 461 static void u32_delete_key_freepf_work(struct work_struct *work) 462 { 463 struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); 464 465 rtnl_lock(); 466 u32_destroy_key(key->tp, key, true); 467 rtnl_unlock(); 468 } 469 470 static void u32_delete_key_freepf_rcu(struct rcu_head *rcu) 471 { 472 struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); 473 474 INIT_WORK(&key->work, u32_delete_key_freepf_work); 475 tcf_queue_work(&key->work); 476 } 477 478 static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) 479 { 480 struct tc_u_knode __rcu **kp; 481 struct tc_u_knode *pkp; 482 struct tc_u_hnode *ht = rtnl_dereference(key->ht_up); 483 484 if (ht) { 485 kp = &ht->ht[TC_U32_HASH(key->handle)]; 486 for (pkp = rtnl_dereference(*kp); pkp; 487 kp = &pkp->next, pkp = rtnl_dereference(*kp)) { 488 if (pkp == key) { 489 RCU_INIT_POINTER(*kp, key->next); 490 491 tcf_unbind_filter(tp, &key->res); 492 tcf_exts_get_net(&key->exts); 493 call_rcu(&key->rcu, u32_delete_key_freepf_rcu); 494 return 0; 495 } 496 } 497 } 498 WARN_ON(1); 499 return 0; 500 } 501 502 static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, 503 struct netlink_ext_ack *extack) 504 { 505 struct tcf_block *block = tp->chain->block; 506 struct tc_cls_u32_offload cls_u32 = {}; 507 508 tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack); 509 cls_u32.command = TC_CLSU32_DELETE_HNODE; 510 cls_u32.hnode.divisor = h->divisor; 511 cls_u32.hnode.handle = h->handle; 512 cls_u32.hnode.prio = h->prio; 513 514 tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); 515 } 516 517 static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, 518 u32 flags, struct netlink_ext_ack *extack) 519 { 520 struct tcf_block *block = tp->chain->block; 521 struct tc_cls_u32_offload cls_u32 = {}; 522 bool skip_sw = tc_skip_sw(flags); 523 bool offloaded = false; 524 int err; 525 526 tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack); 527 cls_u32.command = TC_CLSU32_NEW_HNODE; 528 cls_u32.hnode.divisor = h->divisor; 529 cls_u32.hnode.handle = h->handle; 530 cls_u32.hnode.prio = h->prio; 531 532 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); 533 if (err < 0) { 534 u32_clear_hw_hnode(tp, h, NULL); 535 return err; 536 } else if (err > 0) { 537 offloaded = true; 538 } 539 540 if (skip_sw && !offloaded) 541 return -EINVAL; 542 543 return 0; 544 } 545 546 static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, 547 struct netlink_ext_ack *extack) 548 { 549 struct tcf_block *block = tp->chain->block; 550 struct tc_cls_u32_offload cls_u32 = {}; 551 552 tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack); 553 cls_u32.command = TC_CLSU32_DELETE_KNODE; 554 cls_u32.knode.handle = n->handle; 555 556 tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); 557 tcf_block_offload_dec(block, &n->flags); 558 } 559 560 static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, 561 u32 flags, struct netlink_ext_ack *extack) 562 { 563 struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); 564 struct tcf_block *block = tp->chain->block; 565 struct tc_cls_u32_offload cls_u32 = {}; 566 bool skip_sw = tc_skip_sw(flags); 567 int err; 568 569 tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack); 570 cls_u32.command = TC_CLSU32_REPLACE_KNODE; 571 cls_u32.knode.handle = n->handle; 572 cls_u32.knode.fshift = n->fshift; 573 #ifdef CONFIG_CLS_U32_MARK 574 cls_u32.knode.val = n->val; 575 cls_u32.knode.mask = n->mask; 576 #else 577 cls_u32.knode.val = 0; 578 cls_u32.knode.mask = 0; 579 #endif 580 cls_u32.knode.sel = &n->sel; 581 cls_u32.knode.exts = &n->exts; 582 if (n->ht_down) 583 cls_u32.knode.link_handle = ht->handle; 584 585 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); 586 if (err < 0) { 587 u32_remove_hw_knode(tp, n, NULL); 588 return err; 589 } else if (err > 0) { 590 tcf_block_offload_inc(block, &n->flags); 591 } 592 593 if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW)) 594 return -EINVAL; 595 596 return 0; 597 } 598 599 static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, 600 struct netlink_ext_ack *extack) 601 { 602 struct tc_u_knode *n; 603 unsigned int h; 604 605 for (h = 0; h <= ht->divisor; h++) { 606 while ((n = rtnl_dereference(ht->ht[h])) != NULL) { 607 RCU_INIT_POINTER(ht->ht[h], 608 rtnl_dereference(n->next)); 609 tcf_unbind_filter(tp, &n->res); 610 u32_remove_hw_knode(tp, n, extack); 611 idr_remove(&ht->handle_idr, n->handle); 612 if (tcf_exts_get_net(&n->exts)) 613 call_rcu(&n->rcu, u32_delete_key_freepf_rcu); 614 else 615 u32_destroy_key(n->tp, n, true); 616 } 617 } 618 } 619 620 static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, 621 struct netlink_ext_ack *extack) 622 { 623 struct tc_u_common *tp_c = tp->data; 624 struct tc_u_hnode __rcu **hn; 625 struct tc_u_hnode *phn; 626 627 WARN_ON(ht->refcnt); 628 629 u32_clear_hnode(tp, ht, extack); 630 631 hn = &tp_c->hlist; 632 for (phn = rtnl_dereference(*hn); 633 phn; 634 hn = &phn->next, phn = rtnl_dereference(*hn)) { 635 if (phn == ht) { 636 u32_clear_hw_hnode(tp, ht, extack); 637 idr_destroy(&ht->handle_idr); 638 idr_remove(&tp_c->handle_idr, ht->handle); 639 RCU_INIT_POINTER(*hn, ht->next); 640 kfree_rcu(ht, rcu); 641 return 0; 642 } 643 } 644 645 return -ENOENT; 646 } 647 648 static bool ht_empty(struct tc_u_hnode *ht) 649 { 650 unsigned int h; 651 652 for (h = 0; h <= ht->divisor; h++) 653 if (rcu_access_pointer(ht->ht[h])) 654 return false; 655 656 return true; 657 } 658 659 static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) 660 { 661 struct tc_u_common *tp_c = tp->data; 662 struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); 663 664 WARN_ON(root_ht == NULL); 665 666 if (root_ht && --root_ht->refcnt == 0) 667 u32_destroy_hnode(tp, root_ht, extack); 668 669 if (--tp_c->refcnt == 0) { 670 struct tc_u_hnode *ht; 671 672 hlist_del(&tp_c->hnode); 673 674 while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) { 675 u32_clear_hnode(tp, ht, extack); 676 RCU_INIT_POINTER(tp_c->hlist, ht->next); 677 678 /* u32_destroy_key() will later free ht for us, if it's 679 * still referenced by some knode 680 */ 681 if (--ht->refcnt == 0) 682 kfree_rcu(ht, rcu); 683 } 684 685 idr_destroy(&tp_c->handle_idr); 686 kfree(tp_c); 687 } 688 689 tp->data = NULL; 690 } 691 692 static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, 693 struct netlink_ext_ack *extack) 694 { 695 struct tc_u_hnode *ht = arg; 696 struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); 697 struct tc_u_common *tp_c = tp->data; 698 int ret = 0; 699 700 if (ht == NULL) 701 goto out; 702 703 if (TC_U32_KEY(ht->handle)) { 704 u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack); 705 ret = u32_delete_key(tp, (struct tc_u_knode *)ht); 706 goto out; 707 } 708 709 if (root_ht == ht) { 710 NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node"); 711 return -EINVAL; 712 } 713 714 if (ht->refcnt == 1) { 715 ht->refcnt--; 716 u32_destroy_hnode(tp, ht, extack); 717 } else { 718 NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); 719 return -EBUSY; 720 } 721 722 out: 723 *last = true; 724 if (root_ht) { 725 if (root_ht->refcnt > 1) { 726 *last = false; 727 goto ret; 728 } 729 if (root_ht->refcnt == 1) { 730 if (!ht_empty(root_ht)) { 731 *last = false; 732 goto ret; 733 } 734 } 735 } 736 737 if (tp_c->refcnt > 1) { 738 *last = false; 739 goto ret; 740 } 741 742 if (tp_c->refcnt == 1) { 743 struct tc_u_hnode *ht; 744 745 for (ht = rtnl_dereference(tp_c->hlist); 746 ht; 747 ht = rtnl_dereference(ht->next)) 748 if (!ht_empty(ht)) { 749 *last = false; 750 break; 751 } 752 } 753 754 ret: 755 return ret; 756 } 757 758 static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid) 759 { 760 u32 index = htid | 0x800; 761 u32 max = htid | 0xFFF; 762 763 if (idr_alloc_u32(&ht->handle_idr, NULL, &index, max, GFP_KERNEL)) { 764 index = htid + 1; 765 if (idr_alloc_u32(&ht->handle_idr, NULL, &index, max, 766 GFP_KERNEL)) 767 index = max; 768 } 769 770 return index; 771 } 772 773 static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { 774 [TCA_U32_CLASSID] = { .type = NLA_U32 }, 775 [TCA_U32_HASH] = { .type = NLA_U32 }, 776 [TCA_U32_LINK] = { .type = NLA_U32 }, 777 [TCA_U32_DIVISOR] = { .type = NLA_U32 }, 778 [TCA_U32_SEL] = { .len = sizeof(struct tc_u32_sel) }, 779 [TCA_U32_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ }, 780 [TCA_U32_MARK] = { .len = sizeof(struct tc_u32_mark) }, 781 [TCA_U32_FLAGS] = { .type = NLA_U32 }, 782 }; 783 784 static int u32_set_parms(struct net *net, struct tcf_proto *tp, 785 unsigned long base, struct tc_u_hnode *ht, 786 struct tc_u_knode *n, struct nlattr **tb, 787 struct nlattr *est, bool ovr, 788 struct netlink_ext_ack *extack) 789 { 790 int err; 791 792 err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack); 793 if (err < 0) 794 return err; 795 796 if (tb[TCA_U32_LINK]) { 797 u32 handle = nla_get_u32(tb[TCA_U32_LINK]); 798 struct tc_u_hnode *ht_down = NULL, *ht_old; 799 800 if (TC_U32_KEY(handle)) { 801 NL_SET_ERR_MSG_MOD(extack, "u32 Link handle must be a hash table"); 802 return -EINVAL; 803 } 804 805 if (handle) { 806 ht_down = u32_lookup_ht(ht->tp_c, handle); 807 808 if (!ht_down) { 809 NL_SET_ERR_MSG_MOD(extack, "Link hash table not found"); 810 return -EINVAL; 811 } 812 ht_down->refcnt++; 813 } 814 815 ht_old = rtnl_dereference(n->ht_down); 816 rcu_assign_pointer(n->ht_down, ht_down); 817 818 if (ht_old) 819 ht_old->refcnt--; 820 } 821 if (tb[TCA_U32_CLASSID]) { 822 n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]); 823 tcf_bind_filter(tp, &n->res, base); 824 } 825 826 #ifdef CONFIG_NET_CLS_IND 827 if (tb[TCA_U32_INDEV]) { 828 int ret; 829 ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); 830 if (ret < 0) 831 return -EINVAL; 832 n->ifindex = ret; 833 } 834 #endif 835 return 0; 836 } 837 838 static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, 839 struct tc_u_knode *n) 840 { 841 struct tc_u_knode __rcu **ins; 842 struct tc_u_knode *pins; 843 struct tc_u_hnode *ht; 844 845 if (TC_U32_HTID(n->handle) == TC_U32_ROOT) 846 ht = rtnl_dereference(tp->root); 847 else 848 ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle)); 849 850 ins = &ht->ht[TC_U32_HASH(n->handle)]; 851 852 /* The node must always exist for it to be replaced if this is not the 853 * case then something went very wrong elsewhere. 854 */ 855 for (pins = rtnl_dereference(*ins); ; 856 ins = &pins->next, pins = rtnl_dereference(*ins)) 857 if (pins->handle == n->handle) 858 break; 859 860 idr_replace(&ht->handle_idr, n, n->handle); 861 RCU_INIT_POINTER(n->next, pins->next); 862 rcu_assign_pointer(*ins, n); 863 } 864 865 static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, 866 struct tc_u_knode *n) 867 { 868 struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); 869 struct tc_u32_sel *s = &n->sel; 870 struct tc_u_knode *new; 871 872 new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), 873 GFP_KERNEL); 874 875 if (!new) 876 return NULL; 877 878 RCU_INIT_POINTER(new->next, n->next); 879 new->handle = n->handle; 880 RCU_INIT_POINTER(new->ht_up, n->ht_up); 881 882 #ifdef CONFIG_NET_CLS_IND 883 new->ifindex = n->ifindex; 884 #endif 885 new->fshift = n->fshift; 886 new->res = n->res; 887 new->flags = n->flags; 888 RCU_INIT_POINTER(new->ht_down, ht); 889 890 /* bump reference count as long as we hold pointer to structure */ 891 if (ht) 892 ht->refcnt++; 893 894 #ifdef CONFIG_CLS_U32_PERF 895 /* Statistics may be incremented by readers during update 896 * so we must keep them in tact. When the node is later destroyed 897 * a special destroy call must be made to not free the pf memory. 898 */ 899 new->pf = n->pf; 900 #endif 901 902 #ifdef CONFIG_CLS_U32_MARK 903 new->val = n->val; 904 new->mask = n->mask; 905 /* Similarly success statistics must be moved as pointers */ 906 new->pcpu_success = n->pcpu_success; 907 #endif 908 new->tp = tp; 909 memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); 910 911 if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) { 912 kfree(new); 913 return NULL; 914 } 915 916 return new; 917 } 918 919 static int u32_change(struct net *net, struct sk_buff *in_skb, 920 struct tcf_proto *tp, unsigned long base, u32 handle, 921 struct nlattr **tca, void **arg, bool ovr, 922 struct netlink_ext_ack *extack) 923 { 924 struct tc_u_common *tp_c = tp->data; 925 struct tc_u_hnode *ht; 926 struct tc_u_knode *n; 927 struct tc_u32_sel *s; 928 struct nlattr *opt = tca[TCA_OPTIONS]; 929 struct nlattr *tb[TCA_U32_MAX + 1]; 930 u32 htid, flags = 0; 931 int err; 932 #ifdef CONFIG_CLS_U32_PERF 933 size_t size; 934 #endif 935 936 if (!opt) { 937 if (handle) { 938 NL_SET_ERR_MSG_MOD(extack, "Filter handle requires options"); 939 return -EINVAL; 940 } else { 941 return 0; 942 } 943 } 944 945 err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, extack); 946 if (err < 0) 947 return err; 948 949 if (tb[TCA_U32_FLAGS]) { 950 flags = nla_get_u32(tb[TCA_U32_FLAGS]); 951 if (!tc_flags_valid(flags)) { 952 NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags"); 953 return -EINVAL; 954 } 955 } 956 957 n = *arg; 958 if (n) { 959 struct tc_u_knode *new; 960 961 if (TC_U32_KEY(n->handle) == 0) { 962 NL_SET_ERR_MSG_MOD(extack, "Key node id cannot be zero"); 963 return -EINVAL; 964 } 965 966 if ((n->flags ^ flags) & 967 ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) { 968 NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags"); 969 return -EINVAL; 970 } 971 972 new = u32_init_knode(tp, n); 973 if (!new) 974 return -ENOMEM; 975 976 err = u32_set_parms(net, tp, base, 977 rtnl_dereference(n->ht_up), new, tb, 978 tca[TCA_RATE], ovr, extack); 979 980 if (err) { 981 u32_destroy_key(tp, new, false); 982 return err; 983 } 984 985 err = u32_replace_hw_knode(tp, new, flags, extack); 986 if (err) { 987 u32_destroy_key(tp, new, false); 988 return err; 989 } 990 991 if (!tc_in_hw(new->flags)) 992 new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; 993 994 u32_replace_knode(tp, tp_c, new); 995 tcf_unbind_filter(tp, &n->res); 996 tcf_exts_get_net(&n->exts); 997 call_rcu(&n->rcu, u32_delete_key_rcu); 998 return 0; 999 } 1000 1001 if (tb[TCA_U32_DIVISOR]) { 1002 unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); 1003 1004 if (--divisor > 0x100) { 1005 NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets"); 1006 return -EINVAL; 1007 } 1008 if (TC_U32_KEY(handle)) { 1009 NL_SET_ERR_MSG_MOD(extack, "Divisor can only be used on a hash table"); 1010 return -EINVAL; 1011 } 1012 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL); 1013 if (ht == NULL) 1014 return -ENOBUFS; 1015 if (handle == 0) { 1016 handle = gen_new_htid(tp->data, ht); 1017 if (handle == 0) { 1018 kfree(ht); 1019 return -ENOMEM; 1020 } 1021 } else { 1022 err = idr_alloc_u32(&tp_c->handle_idr, ht, &handle, 1023 handle, GFP_KERNEL); 1024 if (err) { 1025 kfree(ht); 1026 return err; 1027 } 1028 } 1029 ht->tp_c = tp_c; 1030 ht->refcnt = 1; 1031 ht->divisor = divisor; 1032 ht->handle = handle; 1033 ht->prio = tp->prio; 1034 idr_init(&ht->handle_idr); 1035 ht->flags = flags; 1036 1037 err = u32_replace_hw_hnode(tp, ht, flags, extack); 1038 if (err) { 1039 idr_remove(&tp_c->handle_idr, handle); 1040 kfree(ht); 1041 return err; 1042 } 1043 1044 RCU_INIT_POINTER(ht->next, tp_c->hlist); 1045 rcu_assign_pointer(tp_c->hlist, ht); 1046 *arg = ht; 1047 1048 return 0; 1049 } 1050 1051 if (tb[TCA_U32_HASH]) { 1052 htid = nla_get_u32(tb[TCA_U32_HASH]); 1053 if (TC_U32_HTID(htid) == TC_U32_ROOT) { 1054 ht = rtnl_dereference(tp->root); 1055 htid = ht->handle; 1056 } else { 1057 ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid)); 1058 if (!ht) { 1059 NL_SET_ERR_MSG_MOD(extack, "Specified hash table not found"); 1060 return -EINVAL; 1061 } 1062 } 1063 } else { 1064 ht = rtnl_dereference(tp->root); 1065 htid = ht->handle; 1066 } 1067 1068 if (ht->divisor < TC_U32_HASH(htid)) { 1069 NL_SET_ERR_MSG_MOD(extack, "Specified hash table buckets exceed configured value"); 1070 return -EINVAL; 1071 } 1072 1073 if (handle) { 1074 if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) { 1075 NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch"); 1076 return -EINVAL; 1077 } 1078 handle = htid | TC_U32_NODE(handle); 1079 err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle, 1080 GFP_KERNEL); 1081 if (err) 1082 return err; 1083 } else 1084 handle = gen_new_kid(ht, htid); 1085 1086 if (tb[TCA_U32_SEL] == NULL) { 1087 NL_SET_ERR_MSG_MOD(extack, "Selector not specified"); 1088 err = -EINVAL; 1089 goto erridr; 1090 } 1091 1092 s = nla_data(tb[TCA_U32_SEL]); 1093 1094 n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); 1095 if (n == NULL) { 1096 err = -ENOBUFS; 1097 goto erridr; 1098 } 1099 1100 #ifdef CONFIG_CLS_U32_PERF 1101 size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64); 1102 n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt)); 1103 if (!n->pf) { 1104 err = -ENOBUFS; 1105 goto errfree; 1106 } 1107 #endif 1108 1109 memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); 1110 RCU_INIT_POINTER(n->ht_up, ht); 1111 n->handle = handle; 1112 n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; 1113 n->flags = flags; 1114 n->tp = tp; 1115 1116 err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE); 1117 if (err < 0) 1118 goto errout; 1119 1120 #ifdef CONFIG_CLS_U32_MARK 1121 n->pcpu_success = alloc_percpu(u32); 1122 if (!n->pcpu_success) { 1123 err = -ENOMEM; 1124 goto errout; 1125 } 1126 1127 if (tb[TCA_U32_MARK]) { 1128 struct tc_u32_mark *mark; 1129 1130 mark = nla_data(tb[TCA_U32_MARK]); 1131 n->val = mark->val; 1132 n->mask = mark->mask; 1133 } 1134 #endif 1135 1136 err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr, 1137 extack); 1138 if (err == 0) { 1139 struct tc_u_knode __rcu **ins; 1140 struct tc_u_knode *pins; 1141 1142 err = u32_replace_hw_knode(tp, n, flags, extack); 1143 if (err) 1144 goto errhw; 1145 1146 if (!tc_in_hw(n->flags)) 1147 n->flags |= TCA_CLS_FLAGS_NOT_IN_HW; 1148 1149 ins = &ht->ht[TC_U32_HASH(handle)]; 1150 for (pins = rtnl_dereference(*ins); pins; 1151 ins = &pins->next, pins = rtnl_dereference(*ins)) 1152 if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle)) 1153 break; 1154 1155 RCU_INIT_POINTER(n->next, pins); 1156 rcu_assign_pointer(*ins, n); 1157 *arg = n; 1158 return 0; 1159 } 1160 1161 errhw: 1162 #ifdef CONFIG_CLS_U32_MARK 1163 free_percpu(n->pcpu_success); 1164 #endif 1165 1166 errout: 1167 tcf_exts_destroy(&n->exts); 1168 #ifdef CONFIG_CLS_U32_PERF 1169 errfree: 1170 free_percpu(n->pf); 1171 #endif 1172 kfree(n); 1173 erridr: 1174 idr_remove(&ht->handle_idr, handle); 1175 return err; 1176 } 1177 1178 static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) 1179 { 1180 struct tc_u_common *tp_c = tp->data; 1181 struct tc_u_hnode *ht; 1182 struct tc_u_knode *n; 1183 unsigned int h; 1184 1185 if (arg->stop) 1186 return; 1187 1188 for (ht = rtnl_dereference(tp_c->hlist); 1189 ht; 1190 ht = rtnl_dereference(ht->next)) { 1191 if (ht->prio != tp->prio) 1192 continue; 1193 if (arg->count >= arg->skip) { 1194 if (arg->fn(tp, ht, arg) < 0) { 1195 arg->stop = 1; 1196 return; 1197 } 1198 } 1199 arg->count++; 1200 for (h = 0; h <= ht->divisor; h++) { 1201 for (n = rtnl_dereference(ht->ht[h]); 1202 n; 1203 n = rtnl_dereference(n->next)) { 1204 if (arg->count < arg->skip) { 1205 arg->count++; 1206 continue; 1207 } 1208 if (arg->fn(tp, n, arg) < 0) { 1209 arg->stop = 1; 1210 return; 1211 } 1212 arg->count++; 1213 } 1214 } 1215 } 1216 } 1217 1218 static void u32_bind_class(void *fh, u32 classid, unsigned long cl) 1219 { 1220 struct tc_u_knode *n = fh; 1221 1222 if (n && n->res.classid == classid) 1223 n->res.class = cl; 1224 } 1225 1226 static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, 1227 struct sk_buff *skb, struct tcmsg *t) 1228 { 1229 struct tc_u_knode *n = fh; 1230 struct tc_u_hnode *ht_up, *ht_down; 1231 struct nlattr *nest; 1232 1233 if (n == NULL) 1234 return skb->len; 1235 1236 t->tcm_handle = n->handle; 1237 1238 nest = nla_nest_start(skb, TCA_OPTIONS); 1239 if (nest == NULL) 1240 goto nla_put_failure; 1241 1242 if (TC_U32_KEY(n->handle) == 0) { 1243 struct tc_u_hnode *ht = fh; 1244 u32 divisor = ht->divisor + 1; 1245 1246 if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor)) 1247 goto nla_put_failure; 1248 } else { 1249 #ifdef CONFIG_CLS_U32_PERF 1250 struct tc_u32_pcnt *gpf; 1251 int cpu; 1252 #endif 1253 1254 if (nla_put(skb, TCA_U32_SEL, 1255 sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key), 1256 &n->sel)) 1257 goto nla_put_failure; 1258 1259 ht_up = rtnl_dereference(n->ht_up); 1260 if (ht_up) { 1261 u32 htid = n->handle & 0xFFFFF000; 1262 if (nla_put_u32(skb, TCA_U32_HASH, htid)) 1263 goto nla_put_failure; 1264 } 1265 if (n->res.classid && 1266 nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid)) 1267 goto nla_put_failure; 1268 1269 ht_down = rtnl_dereference(n->ht_down); 1270 if (ht_down && 1271 nla_put_u32(skb, TCA_U32_LINK, ht_down->handle)) 1272 goto nla_put_failure; 1273 1274 if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags)) 1275 goto nla_put_failure; 1276 1277 #ifdef CONFIG_CLS_U32_MARK 1278 if ((n->val || n->mask)) { 1279 struct tc_u32_mark mark = {.val = n->val, 1280 .mask = n->mask, 1281 .success = 0}; 1282 int cpum; 1283 1284 for_each_possible_cpu(cpum) { 1285 __u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum); 1286 1287 mark.success += cnt; 1288 } 1289 1290 if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark)) 1291 goto nla_put_failure; 1292 } 1293 #endif 1294 1295 if (tcf_exts_dump(skb, &n->exts) < 0) 1296 goto nla_put_failure; 1297 1298 #ifdef CONFIG_NET_CLS_IND 1299 if (n->ifindex) { 1300 struct net_device *dev; 1301 dev = __dev_get_by_index(net, n->ifindex); 1302 if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name)) 1303 goto nla_put_failure; 1304 } 1305 #endif 1306 #ifdef CONFIG_CLS_U32_PERF 1307 gpf = kzalloc(sizeof(struct tc_u32_pcnt) + 1308 n->sel.nkeys * sizeof(u64), 1309 GFP_KERNEL); 1310 if (!gpf) 1311 goto nla_put_failure; 1312 1313 for_each_possible_cpu(cpu) { 1314 int i; 1315 struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu); 1316 1317 gpf->rcnt += pf->rcnt; 1318 gpf->rhit += pf->rhit; 1319 for (i = 0; i < n->sel.nkeys; i++) 1320 gpf->kcnts[i] += pf->kcnts[i]; 1321 } 1322 1323 if (nla_put_64bit(skb, TCA_U32_PCNT, 1324 sizeof(struct tc_u32_pcnt) + 1325 n->sel.nkeys * sizeof(u64), 1326 gpf, TCA_U32_PAD)) { 1327 kfree(gpf); 1328 goto nla_put_failure; 1329 } 1330 kfree(gpf); 1331 #endif 1332 } 1333 1334 nla_nest_end(skb, nest); 1335 1336 if (TC_U32_KEY(n->handle)) 1337 if (tcf_exts_dump_stats(skb, &n->exts) < 0) 1338 goto nla_put_failure; 1339 return skb->len; 1340 1341 nla_put_failure: 1342 nla_nest_cancel(skb, nest); 1343 return -1; 1344 } 1345 1346 static struct tcf_proto_ops cls_u32_ops __read_mostly = { 1347 .kind = "u32", 1348 .classify = u32_classify, 1349 .init = u32_init, 1350 .destroy = u32_destroy, 1351 .get = u32_get, 1352 .change = u32_change, 1353 .delete = u32_delete, 1354 .walk = u32_walk, 1355 .dump = u32_dump, 1356 .bind_class = u32_bind_class, 1357 .owner = THIS_MODULE, 1358 }; 1359 1360 static int __init init_u32(void) 1361 { 1362 int i, ret; 1363 1364 pr_info("u32 classifier\n"); 1365 #ifdef CONFIG_CLS_U32_PERF 1366 pr_info(" Performance counters on\n"); 1367 #endif 1368 #ifdef CONFIG_NET_CLS_IND 1369 pr_info(" input device check on\n"); 1370 #endif 1371 #ifdef CONFIG_NET_CLS_ACT 1372 pr_info(" Actions configured\n"); 1373 #endif 1374 tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE, 1375 sizeof(struct hlist_head), 1376 GFP_KERNEL); 1377 if (!tc_u_common_hash) 1378 return -ENOMEM; 1379 1380 for (i = 0; i < U32_HASH_SIZE; i++) 1381 INIT_HLIST_HEAD(&tc_u_common_hash[i]); 1382 1383 ret = register_tcf_proto_ops(&cls_u32_ops); 1384 if (ret) 1385 kvfree(tc_u_common_hash); 1386 return ret; 1387 } 1388 1389 static void __exit exit_u32(void) 1390 { 1391 unregister_tcf_proto_ops(&cls_u32_ops); 1392 kvfree(tc_u_common_hash); 1393 } 1394 1395 module_init(init_u32) 1396 module_exit(exit_u32) 1397 MODULE_LICENSE("GPL"); 1398