1 /* 2 * net/sched/cls_u32.c Ugly (or Universal) 32bit key Packet Classifier. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * 11 * The filters are packed to hash tables of key nodes 12 * with a set of 32bit key/mask pairs at every node. 13 * Nodes reference next level hash tables etc. 14 * 15 * This scheme is the best universal classifier I managed to 16 * invent; it is not super-fast, but it is not slow (provided you 17 * program it correctly), and general enough. And its relative 18 * speed grows as the number of rules becomes larger. 19 * 20 * It seems that it represents the best middle point between 21 * speed and manageability both by human and by machine. 22 * 23 * It is especially useful for link sharing combined with QoS; 24 * pure RSVP doesn't need such a general approach and can use 25 * much simpler (and faster) schemes, sort of cls_rsvp.c. 26 * 27 * JHS: We should remove the CONFIG_NET_CLS_IND from here 28 * eventually when the meta match extension is made available 29 * 30 * nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro> 31 */ 32 33 #include <linux/module.h> 34 #include <linux/slab.h> 35 #include <linux/types.h> 36 #include <linux/kernel.h> 37 #include <linux/string.h> 38 #include <linux/errno.h> 39 #include <linux/percpu.h> 40 #include <linux/rtnetlink.h> 41 #include <linux/skbuff.h> 42 #include <linux/bitmap.h> 43 #include <linux/netdevice.h> 44 #include <linux/hash.h> 45 #include <net/netlink.h> 46 #include <net/act_api.h> 47 #include <net/pkt_cls.h> 48 #include <linux/idr.h> 49 50 struct tc_u_knode { 51 struct tc_u_knode __rcu *next; 52 u32 handle; 53 struct tc_u_hnode __rcu *ht_up; 54 struct tcf_exts exts; 55 #ifdef CONFIG_NET_CLS_IND 56 int ifindex; 57 #endif 58 u8 fshift; 59 struct tcf_result res; 60 struct tc_u_hnode __rcu *ht_down; 61 #ifdef CONFIG_CLS_U32_PERF 62 struct tc_u32_pcnt __percpu *pf; 63 #endif 64 u32 flags; 65 #ifdef CONFIG_CLS_U32_MARK 66 u32 val; 67 u32 mask; 68 u32 __percpu *pcpu_success; 69 #endif 70 struct tcf_proto *tp; 71 union { 72 struct work_struct work; 73 struct rcu_head rcu; 74 }; 75 /* The 'sel' field MUST be the last field in structure to allow for 76 * tc_u32_keys allocated at end of structure. 77 */ 78 struct tc_u32_sel sel; 79 }; 80 81 struct tc_u_hnode { 82 struct tc_u_hnode __rcu *next; 83 u32 handle; 84 u32 prio; 85 struct tc_u_common *tp_c; 86 int refcnt; 87 unsigned int divisor; 88 struct idr handle_idr; 89 struct rcu_head rcu; 90 u32 flags; 91 /* The 'ht' field MUST be the last field in structure to allow for 92 * more entries allocated at end of structure. 93 */ 94 struct tc_u_knode __rcu *ht[1]; 95 }; 96 97 struct tc_u_common { 98 struct tc_u_hnode __rcu *hlist; 99 void *ptr; 100 int refcnt; 101 struct idr handle_idr; 102 struct hlist_node hnode; 103 struct rcu_head rcu; 104 }; 105 106 static inline unsigned int u32_hash_fold(__be32 key, 107 const struct tc_u32_sel *sel, 108 u8 fshift) 109 { 110 unsigned int h = ntohl(key & sel->hmask) >> fshift; 111 112 return h; 113 } 114 115 static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, 116 struct tcf_result *res) 117 { 118 struct { 119 struct tc_u_knode *knode; 120 unsigned int off; 121 } stack[TC_U32_MAXDEPTH]; 122 123 struct tc_u_hnode *ht = rcu_dereference_bh(tp->root); 124 unsigned int off = skb_network_offset(skb); 125 struct tc_u_knode *n; 126 int sdepth = 0; 127 int off2 = 0; 128 int sel = 0; 129 #ifdef CONFIG_CLS_U32_PERF 130 int j; 131 #endif 132 int i, r; 133 134 next_ht: 135 n = rcu_dereference_bh(ht->ht[sel]); 136 137 next_knode: 138 if (n) { 139 struct tc_u32_key *key = n->sel.keys; 140 141 #ifdef CONFIG_CLS_U32_PERF 142 __this_cpu_inc(n->pf->rcnt); 143 j = 0; 144 #endif 145 146 if (tc_skip_sw(n->flags)) { 147 n = rcu_dereference_bh(n->next); 148 goto next_knode; 149 } 150 151 #ifdef CONFIG_CLS_U32_MARK 152 if ((skb->mark & n->mask) != n->val) { 153 n = rcu_dereference_bh(n->next); 154 goto next_knode; 155 } else { 156 __this_cpu_inc(*n->pcpu_success); 157 } 158 #endif 159 160 for (i = n->sel.nkeys; i > 0; i--, key++) { 161 int toff = off + key->off + (off2 & key->offmask); 162 __be32 *data, hdata; 163 164 if (skb_headroom(skb) + toff > INT_MAX) 165 goto out; 166 167 data = skb_header_pointer(skb, toff, 4, &hdata); 168 if (!data) 169 goto out; 170 if ((*data ^ key->val) & key->mask) { 171 n = rcu_dereference_bh(n->next); 172 goto next_knode; 173 } 174 #ifdef CONFIG_CLS_U32_PERF 175 __this_cpu_inc(n->pf->kcnts[j]); 176 j++; 177 #endif 178 } 179 180 ht = rcu_dereference_bh(n->ht_down); 181 if (!ht) { 182 check_terminal: 183 if (n->sel.flags & TC_U32_TERMINAL) { 184 185 *res = n->res; 186 #ifdef CONFIG_NET_CLS_IND 187 if (!tcf_match_indev(skb, n->ifindex)) { 188 n = rcu_dereference_bh(n->next); 189 goto next_knode; 190 } 191 #endif 192 #ifdef CONFIG_CLS_U32_PERF 193 __this_cpu_inc(n->pf->rhit); 194 #endif 195 r = tcf_exts_exec(skb, &n->exts, res); 196 if (r < 0) { 197 n = rcu_dereference_bh(n->next); 198 goto next_knode; 199 } 200 201 return r; 202 } 203 n = rcu_dereference_bh(n->next); 204 goto next_knode; 205 } 206 207 /* PUSH */ 208 if (sdepth >= TC_U32_MAXDEPTH) 209 goto deadloop; 210 stack[sdepth].knode = n; 211 stack[sdepth].off = off; 212 sdepth++; 213 214 ht = rcu_dereference_bh(n->ht_down); 215 sel = 0; 216 if (ht->divisor) { 217 __be32 *data, hdata; 218 219 data = skb_header_pointer(skb, off + n->sel.hoff, 4, 220 &hdata); 221 if (!data) 222 goto out; 223 sel = ht->divisor & u32_hash_fold(*data, &n->sel, 224 n->fshift); 225 } 226 if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT))) 227 goto next_ht; 228 229 if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) { 230 off2 = n->sel.off + 3; 231 if (n->sel.flags & TC_U32_VAROFFSET) { 232 __be16 *data, hdata; 233 234 data = skb_header_pointer(skb, 235 off + n->sel.offoff, 236 2, &hdata); 237 if (!data) 238 goto out; 239 off2 += ntohs(n->sel.offmask & *data) >> 240 n->sel.offshift; 241 } 242 off2 &= ~3; 243 } 244 if (n->sel.flags & TC_U32_EAT) { 245 off += off2; 246 off2 = 0; 247 } 248 249 if (off < skb->len) 250 goto next_ht; 251 } 252 253 /* POP */ 254 if (sdepth--) { 255 n = stack[sdepth].knode; 256 ht = rcu_dereference_bh(n->ht_up); 257 off = stack[sdepth].off; 258 goto check_terminal; 259 } 260 out: 261 return -1; 262 263 deadloop: 264 net_warn_ratelimited("cls_u32: dead loop\n"); 265 return -1; 266 } 267 268 static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) 269 { 270 struct tc_u_hnode *ht; 271 272 for (ht = rtnl_dereference(tp_c->hlist); 273 ht; 274 ht = rtnl_dereference(ht->next)) 275 if (ht->handle == handle) 276 break; 277 278 return ht; 279 } 280 281 static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle) 282 { 283 unsigned int sel; 284 struct tc_u_knode *n = NULL; 285 286 sel = TC_U32_HASH(handle); 287 if (sel > ht->divisor) 288 goto out; 289 290 for (n = rtnl_dereference(ht->ht[sel]); 291 n; 292 n = rtnl_dereference(n->next)) 293 if (n->handle == handle) 294 break; 295 out: 296 return n; 297 } 298 299 300 static void *u32_get(struct tcf_proto *tp, u32 handle) 301 { 302 struct tc_u_hnode *ht; 303 struct tc_u_common *tp_c = tp->data; 304 305 if (TC_U32_HTID(handle) == TC_U32_ROOT) 306 ht = rtnl_dereference(tp->root); 307 else 308 ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle)); 309 310 if (!ht) 311 return NULL; 312 313 if (TC_U32_KEY(handle) == 0) 314 return ht; 315 316 return u32_lookup_key(ht, handle); 317 } 318 319 /* Protected by rtnl lock */ 320 static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr) 321 { 322 int id = idr_alloc_cyclic(&tp_c->handle_idr, ptr, 1, 0x7FF, GFP_KERNEL); 323 if (id < 0) 324 return 0; 325 return (id | 0x800U) << 20; 326 } 327 328 static struct hlist_head *tc_u_common_hash; 329 330 #define U32_HASH_SHIFT 10 331 #define U32_HASH_SIZE (1 << U32_HASH_SHIFT) 332 333 static void *tc_u_common_ptr(const struct tcf_proto *tp) 334 { 335 struct tcf_block *block = tp->chain->block; 336 337 /* The block sharing is currently supported only 338 * for classless qdiscs. In that case we use block 339 * for tc_u_common identification. In case the 340 * block is not shared, block->q is a valid pointer 341 * and we can use that. That works for classful qdiscs. 342 */ 343 if (tcf_block_shared(block)) 344 return block; 345 else 346 return block->q; 347 } 348 349 static unsigned int tc_u_hash(const struct tcf_proto *tp) 350 { 351 return hash_ptr(tc_u_common_ptr(tp), U32_HASH_SHIFT); 352 } 353 354 static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp) 355 { 356 struct tc_u_common *tc; 357 unsigned int h; 358 359 h = tc_u_hash(tp); 360 hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) { 361 if (tc->ptr == tc_u_common_ptr(tp)) 362 return tc; 363 } 364 return NULL; 365 } 366 367 static int u32_init(struct tcf_proto *tp) 368 { 369 struct tc_u_hnode *root_ht; 370 struct tc_u_common *tp_c; 371 unsigned int h; 372 373 tp_c = tc_u_common_find(tp); 374 375 root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL); 376 if (root_ht == NULL) 377 return -ENOBUFS; 378 379 root_ht->refcnt++; 380 root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000; 381 root_ht->prio = tp->prio; 382 idr_init(&root_ht->handle_idr); 383 384 if (tp_c == NULL) { 385 tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL); 386 if (tp_c == NULL) { 387 kfree(root_ht); 388 return -ENOBUFS; 389 } 390 tp_c->ptr = tc_u_common_ptr(tp); 391 INIT_HLIST_NODE(&tp_c->hnode); 392 idr_init(&tp_c->handle_idr); 393 394 h = tc_u_hash(tp); 395 hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]); 396 } 397 398 tp_c->refcnt++; 399 RCU_INIT_POINTER(root_ht->next, tp_c->hlist); 400 rcu_assign_pointer(tp_c->hlist, root_ht); 401 root_ht->tp_c = tp_c; 402 403 rcu_assign_pointer(tp->root, root_ht); 404 tp->data = tp_c; 405 return 0; 406 } 407 408 static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, 409 bool free_pf) 410 { 411 struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); 412 413 tcf_exts_destroy(&n->exts); 414 tcf_exts_put_net(&n->exts); 415 if (ht && --ht->refcnt == 0) 416 kfree(ht); 417 #ifdef CONFIG_CLS_U32_PERF 418 if (free_pf) 419 free_percpu(n->pf); 420 #endif 421 #ifdef CONFIG_CLS_U32_MARK 422 if (free_pf) 423 free_percpu(n->pcpu_success); 424 #endif 425 kfree(n); 426 return 0; 427 } 428 429 /* u32_delete_key_rcu should be called when free'ing a copied 430 * version of a tc_u_knode obtained from u32_init_knode(). When 431 * copies are obtained from u32_init_knode() the statistics are 432 * shared between the old and new copies to allow readers to 433 * continue to update the statistics during the copy. To support 434 * this the u32_delete_key_rcu variant does not free the percpu 435 * statistics. 436 */ 437 static void u32_delete_key_work(struct work_struct *work) 438 { 439 struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); 440 441 rtnl_lock(); 442 u32_destroy_key(key->tp, key, false); 443 rtnl_unlock(); 444 } 445 446 static void u32_delete_key_rcu(struct rcu_head *rcu) 447 { 448 struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); 449 450 INIT_WORK(&key->work, u32_delete_key_work); 451 tcf_queue_work(&key->work); 452 } 453 454 /* u32_delete_key_freepf_rcu is the rcu callback variant 455 * that free's the entire structure including the statistics 456 * percpu variables. Only use this if the key is not a copy 457 * returned by u32_init_knode(). See u32_delete_key_rcu() 458 * for the variant that should be used with keys return from 459 * u32_init_knode() 460 */ 461 static void u32_delete_key_freepf_work(struct work_struct *work) 462 { 463 struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); 464 465 rtnl_lock(); 466 u32_destroy_key(key->tp, key, true); 467 rtnl_unlock(); 468 } 469 470 static void u32_delete_key_freepf_rcu(struct rcu_head *rcu) 471 { 472 struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); 473 474 INIT_WORK(&key->work, u32_delete_key_freepf_work); 475 tcf_queue_work(&key->work); 476 } 477 478 static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) 479 { 480 struct tc_u_knode __rcu **kp; 481 struct tc_u_knode *pkp; 482 struct tc_u_hnode *ht = rtnl_dereference(key->ht_up); 483 484 if (ht) { 485 kp = &ht->ht[TC_U32_HASH(key->handle)]; 486 for (pkp = rtnl_dereference(*kp); pkp; 487 kp = &pkp->next, pkp = rtnl_dereference(*kp)) { 488 if (pkp == key) { 489 RCU_INIT_POINTER(*kp, key->next); 490 491 tcf_unbind_filter(tp, &key->res); 492 idr_remove(&ht->handle_idr, key->handle); 493 tcf_exts_get_net(&key->exts); 494 call_rcu(&key->rcu, u32_delete_key_freepf_rcu); 495 return 0; 496 } 497 } 498 } 499 WARN_ON(1); 500 return 0; 501 } 502 503 static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, 504 struct netlink_ext_ack *extack) 505 { 506 struct tcf_block *block = tp->chain->block; 507 struct tc_cls_u32_offload cls_u32 = {}; 508 509 tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack); 510 cls_u32.command = TC_CLSU32_DELETE_HNODE; 511 cls_u32.hnode.divisor = h->divisor; 512 cls_u32.hnode.handle = h->handle; 513 cls_u32.hnode.prio = h->prio; 514 515 tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); 516 } 517 518 static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, 519 u32 flags, struct netlink_ext_ack *extack) 520 { 521 struct tcf_block *block = tp->chain->block; 522 struct tc_cls_u32_offload cls_u32 = {}; 523 bool skip_sw = tc_skip_sw(flags); 524 bool offloaded = false; 525 int err; 526 527 tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack); 528 cls_u32.command = TC_CLSU32_NEW_HNODE; 529 cls_u32.hnode.divisor = h->divisor; 530 cls_u32.hnode.handle = h->handle; 531 cls_u32.hnode.prio = h->prio; 532 533 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); 534 if (err < 0) { 535 u32_clear_hw_hnode(tp, h, NULL); 536 return err; 537 } else if (err > 0) { 538 offloaded = true; 539 } 540 541 if (skip_sw && !offloaded) 542 return -EINVAL; 543 544 return 0; 545 } 546 547 static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, 548 struct netlink_ext_ack *extack) 549 { 550 struct tcf_block *block = tp->chain->block; 551 struct tc_cls_u32_offload cls_u32 = {}; 552 553 tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack); 554 cls_u32.command = TC_CLSU32_DELETE_KNODE; 555 cls_u32.knode.handle = n->handle; 556 557 tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); 558 tcf_block_offload_dec(block, &n->flags); 559 } 560 561 static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, 562 u32 flags, struct netlink_ext_ack *extack) 563 { 564 struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); 565 struct tcf_block *block = tp->chain->block; 566 struct tc_cls_u32_offload cls_u32 = {}; 567 bool skip_sw = tc_skip_sw(flags); 568 int err; 569 570 tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack); 571 cls_u32.command = TC_CLSU32_REPLACE_KNODE; 572 cls_u32.knode.handle = n->handle; 573 cls_u32.knode.fshift = n->fshift; 574 #ifdef CONFIG_CLS_U32_MARK 575 cls_u32.knode.val = n->val; 576 cls_u32.knode.mask = n->mask; 577 #else 578 cls_u32.knode.val = 0; 579 cls_u32.knode.mask = 0; 580 #endif 581 cls_u32.knode.sel = &n->sel; 582 cls_u32.knode.exts = &n->exts; 583 if (n->ht_down) 584 cls_u32.knode.link_handle = ht->handle; 585 586 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); 587 if (err < 0) { 588 u32_remove_hw_knode(tp, n, NULL); 589 return err; 590 } else if (err > 0) { 591 tcf_block_offload_inc(block, &n->flags); 592 } 593 594 if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW)) 595 return -EINVAL; 596 597 return 0; 598 } 599 600 static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, 601 struct netlink_ext_ack *extack) 602 { 603 struct tc_u_knode *n; 604 unsigned int h; 605 606 for (h = 0; h <= ht->divisor; h++) { 607 while ((n = rtnl_dereference(ht->ht[h])) != NULL) { 608 RCU_INIT_POINTER(ht->ht[h], 609 rtnl_dereference(n->next)); 610 tcf_unbind_filter(tp, &n->res); 611 u32_remove_hw_knode(tp, n, extack); 612 idr_remove(&ht->handle_idr, n->handle); 613 if (tcf_exts_get_net(&n->exts)) 614 call_rcu(&n->rcu, u32_delete_key_freepf_rcu); 615 else 616 u32_destroy_key(n->tp, n, true); 617 } 618 } 619 } 620 621 static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, 622 struct netlink_ext_ack *extack) 623 { 624 struct tc_u_common *tp_c = tp->data; 625 struct tc_u_hnode __rcu **hn; 626 struct tc_u_hnode *phn; 627 628 WARN_ON(ht->refcnt); 629 630 u32_clear_hnode(tp, ht, extack); 631 632 hn = &tp_c->hlist; 633 for (phn = rtnl_dereference(*hn); 634 phn; 635 hn = &phn->next, phn = rtnl_dereference(*hn)) { 636 if (phn == ht) { 637 u32_clear_hw_hnode(tp, ht, extack); 638 idr_destroy(&ht->handle_idr); 639 idr_remove(&tp_c->handle_idr, ht->handle); 640 RCU_INIT_POINTER(*hn, ht->next); 641 kfree_rcu(ht, rcu); 642 return 0; 643 } 644 } 645 646 return -ENOENT; 647 } 648 649 static bool ht_empty(struct tc_u_hnode *ht) 650 { 651 unsigned int h; 652 653 for (h = 0; h <= ht->divisor; h++) 654 if (rcu_access_pointer(ht->ht[h])) 655 return false; 656 657 return true; 658 } 659 660 static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) 661 { 662 struct tc_u_common *tp_c = tp->data; 663 struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); 664 665 WARN_ON(root_ht == NULL); 666 667 if (root_ht && --root_ht->refcnt == 0) 668 u32_destroy_hnode(tp, root_ht, extack); 669 670 if (--tp_c->refcnt == 0) { 671 struct tc_u_hnode *ht; 672 673 hlist_del(&tp_c->hnode); 674 675 while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) { 676 u32_clear_hnode(tp, ht, extack); 677 RCU_INIT_POINTER(tp_c->hlist, ht->next); 678 679 /* u32_destroy_key() will later free ht for us, if it's 680 * still referenced by some knode 681 */ 682 if (--ht->refcnt == 0) 683 kfree_rcu(ht, rcu); 684 } 685 686 idr_destroy(&tp_c->handle_idr); 687 kfree(tp_c); 688 } 689 690 tp->data = NULL; 691 } 692 693 static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, 694 struct netlink_ext_ack *extack) 695 { 696 struct tc_u_hnode *ht = arg; 697 struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); 698 struct tc_u_common *tp_c = tp->data; 699 int ret = 0; 700 701 if (ht == NULL) 702 goto out; 703 704 if (TC_U32_KEY(ht->handle)) { 705 u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack); 706 ret = u32_delete_key(tp, (struct tc_u_knode *)ht); 707 goto out; 708 } 709 710 if (root_ht == ht) { 711 NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node"); 712 return -EINVAL; 713 } 714 715 if (ht->refcnt == 1) { 716 ht->refcnt--; 717 u32_destroy_hnode(tp, ht, extack); 718 } else { 719 NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); 720 return -EBUSY; 721 } 722 723 out: 724 *last = true; 725 if (root_ht) { 726 if (root_ht->refcnt > 1) { 727 *last = false; 728 goto ret; 729 } 730 if (root_ht->refcnt == 1) { 731 if (!ht_empty(root_ht)) { 732 *last = false; 733 goto ret; 734 } 735 } 736 } 737 738 if (tp_c->refcnt > 1) { 739 *last = false; 740 goto ret; 741 } 742 743 if (tp_c->refcnt == 1) { 744 struct tc_u_hnode *ht; 745 746 for (ht = rtnl_dereference(tp_c->hlist); 747 ht; 748 ht = rtnl_dereference(ht->next)) 749 if (!ht_empty(ht)) { 750 *last = false; 751 break; 752 } 753 } 754 755 ret: 756 return ret; 757 } 758 759 static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid) 760 { 761 u32 index = htid | 0x800; 762 u32 max = htid | 0xFFF; 763 764 if (idr_alloc_u32(&ht->handle_idr, NULL, &index, max, GFP_KERNEL)) { 765 index = htid + 1; 766 if (idr_alloc_u32(&ht->handle_idr, NULL, &index, max, 767 GFP_KERNEL)) 768 index = max; 769 } 770 771 return index; 772 } 773 774 static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { 775 [TCA_U32_CLASSID] = { .type = NLA_U32 }, 776 [TCA_U32_HASH] = { .type = NLA_U32 }, 777 [TCA_U32_LINK] = { .type = NLA_U32 }, 778 [TCA_U32_DIVISOR] = { .type = NLA_U32 }, 779 [TCA_U32_SEL] = { .len = sizeof(struct tc_u32_sel) }, 780 [TCA_U32_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ }, 781 [TCA_U32_MARK] = { .len = sizeof(struct tc_u32_mark) }, 782 [TCA_U32_FLAGS] = { .type = NLA_U32 }, 783 }; 784 785 static int u32_set_parms(struct net *net, struct tcf_proto *tp, 786 unsigned long base, struct tc_u_hnode *ht, 787 struct tc_u_knode *n, struct nlattr **tb, 788 struct nlattr *est, bool ovr, 789 struct netlink_ext_ack *extack) 790 { 791 int err; 792 793 err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack); 794 if (err < 0) 795 return err; 796 797 if (tb[TCA_U32_LINK]) { 798 u32 handle = nla_get_u32(tb[TCA_U32_LINK]); 799 struct tc_u_hnode *ht_down = NULL, *ht_old; 800 801 if (TC_U32_KEY(handle)) { 802 NL_SET_ERR_MSG_MOD(extack, "u32 Link handle must be a hash table"); 803 return -EINVAL; 804 } 805 806 if (handle) { 807 ht_down = u32_lookup_ht(ht->tp_c, handle); 808 809 if (!ht_down) { 810 NL_SET_ERR_MSG_MOD(extack, "Link hash table not found"); 811 return -EINVAL; 812 } 813 ht_down->refcnt++; 814 } 815 816 ht_old = rtnl_dereference(n->ht_down); 817 rcu_assign_pointer(n->ht_down, ht_down); 818 819 if (ht_old) 820 ht_old->refcnt--; 821 } 822 if (tb[TCA_U32_CLASSID]) { 823 n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]); 824 tcf_bind_filter(tp, &n->res, base); 825 } 826 827 #ifdef CONFIG_NET_CLS_IND 828 if (tb[TCA_U32_INDEV]) { 829 int ret; 830 ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); 831 if (ret < 0) 832 return -EINVAL; 833 n->ifindex = ret; 834 } 835 #endif 836 return 0; 837 } 838 839 static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, 840 struct tc_u_knode *n) 841 { 842 struct tc_u_knode __rcu **ins; 843 struct tc_u_knode *pins; 844 struct tc_u_hnode *ht; 845 846 if (TC_U32_HTID(n->handle) == TC_U32_ROOT) 847 ht = rtnl_dereference(tp->root); 848 else 849 ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle)); 850 851 ins = &ht->ht[TC_U32_HASH(n->handle)]; 852 853 /* The node must always exist for it to be replaced if this is not the 854 * case then something went very wrong elsewhere. 855 */ 856 for (pins = rtnl_dereference(*ins); ; 857 ins = &pins->next, pins = rtnl_dereference(*ins)) 858 if (pins->handle == n->handle) 859 break; 860 861 idr_replace(&ht->handle_idr, n, n->handle); 862 RCU_INIT_POINTER(n->next, pins->next); 863 rcu_assign_pointer(*ins, n); 864 } 865 866 static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, 867 struct tc_u_knode *n) 868 { 869 struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); 870 struct tc_u32_sel *s = &n->sel; 871 struct tc_u_knode *new; 872 873 new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), 874 GFP_KERNEL); 875 876 if (!new) 877 return NULL; 878 879 RCU_INIT_POINTER(new->next, n->next); 880 new->handle = n->handle; 881 RCU_INIT_POINTER(new->ht_up, n->ht_up); 882 883 #ifdef CONFIG_NET_CLS_IND 884 new->ifindex = n->ifindex; 885 #endif 886 new->fshift = n->fshift; 887 new->res = n->res; 888 new->flags = n->flags; 889 RCU_INIT_POINTER(new->ht_down, ht); 890 891 /* bump reference count as long as we hold pointer to structure */ 892 if (ht) 893 ht->refcnt++; 894 895 #ifdef CONFIG_CLS_U32_PERF 896 /* Statistics may be incremented by readers during update 897 * so we must keep them in tact. When the node is later destroyed 898 * a special destroy call must be made to not free the pf memory. 899 */ 900 new->pf = n->pf; 901 #endif 902 903 #ifdef CONFIG_CLS_U32_MARK 904 new->val = n->val; 905 new->mask = n->mask; 906 /* Similarly success statistics must be moved as pointers */ 907 new->pcpu_success = n->pcpu_success; 908 #endif 909 new->tp = tp; 910 memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); 911 912 if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) { 913 kfree(new); 914 return NULL; 915 } 916 917 return new; 918 } 919 920 static int u32_change(struct net *net, struct sk_buff *in_skb, 921 struct tcf_proto *tp, unsigned long base, u32 handle, 922 struct nlattr **tca, void **arg, bool ovr, 923 struct netlink_ext_ack *extack) 924 { 925 struct tc_u_common *tp_c = tp->data; 926 struct tc_u_hnode *ht; 927 struct tc_u_knode *n; 928 struct tc_u32_sel *s; 929 struct nlattr *opt = tca[TCA_OPTIONS]; 930 struct nlattr *tb[TCA_U32_MAX + 1]; 931 u32 htid, flags = 0; 932 int err; 933 #ifdef CONFIG_CLS_U32_PERF 934 size_t size; 935 #endif 936 937 if (!opt) { 938 if (handle) { 939 NL_SET_ERR_MSG_MOD(extack, "Filter handle requires options"); 940 return -EINVAL; 941 } else { 942 return 0; 943 } 944 } 945 946 err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, extack); 947 if (err < 0) 948 return err; 949 950 if (tb[TCA_U32_FLAGS]) { 951 flags = nla_get_u32(tb[TCA_U32_FLAGS]); 952 if (!tc_flags_valid(flags)) { 953 NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags"); 954 return -EINVAL; 955 } 956 } 957 958 n = *arg; 959 if (n) { 960 struct tc_u_knode *new; 961 962 if (TC_U32_KEY(n->handle) == 0) { 963 NL_SET_ERR_MSG_MOD(extack, "Key node id cannot be zero"); 964 return -EINVAL; 965 } 966 967 if ((n->flags ^ flags) & 968 ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) { 969 NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags"); 970 return -EINVAL; 971 } 972 973 new = u32_init_knode(tp, n); 974 if (!new) 975 return -ENOMEM; 976 977 err = u32_set_parms(net, tp, base, 978 rtnl_dereference(n->ht_up), new, tb, 979 tca[TCA_RATE], ovr, extack); 980 981 if (err) { 982 u32_destroy_key(tp, new, false); 983 return err; 984 } 985 986 err = u32_replace_hw_knode(tp, new, flags, extack); 987 if (err) { 988 u32_destroy_key(tp, new, false); 989 return err; 990 } 991 992 if (!tc_in_hw(new->flags)) 993 new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; 994 995 u32_replace_knode(tp, tp_c, new); 996 tcf_unbind_filter(tp, &n->res); 997 tcf_exts_get_net(&n->exts); 998 call_rcu(&n->rcu, u32_delete_key_rcu); 999 return 0; 1000 } 1001 1002 if (tb[TCA_U32_DIVISOR]) { 1003 unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); 1004 1005 if (--divisor > 0x100) { 1006 NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets"); 1007 return -EINVAL; 1008 } 1009 if (TC_U32_KEY(handle)) { 1010 NL_SET_ERR_MSG_MOD(extack, "Divisor can only be used on a hash table"); 1011 return -EINVAL; 1012 } 1013 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL); 1014 if (ht == NULL) 1015 return -ENOBUFS; 1016 if (handle == 0) { 1017 handle = gen_new_htid(tp->data, ht); 1018 if (handle == 0) { 1019 kfree(ht); 1020 return -ENOMEM; 1021 } 1022 } else { 1023 err = idr_alloc_u32(&tp_c->handle_idr, ht, &handle, 1024 handle, GFP_KERNEL); 1025 if (err) { 1026 kfree(ht); 1027 return err; 1028 } 1029 } 1030 ht->tp_c = tp_c; 1031 ht->refcnt = 1; 1032 ht->divisor = divisor; 1033 ht->handle = handle; 1034 ht->prio = tp->prio; 1035 idr_init(&ht->handle_idr); 1036 ht->flags = flags; 1037 1038 err = u32_replace_hw_hnode(tp, ht, flags, extack); 1039 if (err) { 1040 idr_remove(&tp_c->handle_idr, handle); 1041 kfree(ht); 1042 return err; 1043 } 1044 1045 RCU_INIT_POINTER(ht->next, tp_c->hlist); 1046 rcu_assign_pointer(tp_c->hlist, ht); 1047 *arg = ht; 1048 1049 return 0; 1050 } 1051 1052 if (tb[TCA_U32_HASH]) { 1053 htid = nla_get_u32(tb[TCA_U32_HASH]); 1054 if (TC_U32_HTID(htid) == TC_U32_ROOT) { 1055 ht = rtnl_dereference(tp->root); 1056 htid = ht->handle; 1057 } else { 1058 ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid)); 1059 if (!ht) { 1060 NL_SET_ERR_MSG_MOD(extack, "Specified hash table not found"); 1061 return -EINVAL; 1062 } 1063 } 1064 } else { 1065 ht = rtnl_dereference(tp->root); 1066 htid = ht->handle; 1067 } 1068 1069 if (ht->divisor < TC_U32_HASH(htid)) { 1070 NL_SET_ERR_MSG_MOD(extack, "Specified hash table buckets exceed configured value"); 1071 return -EINVAL; 1072 } 1073 1074 if (handle) { 1075 if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) { 1076 NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch"); 1077 return -EINVAL; 1078 } 1079 handle = htid | TC_U32_NODE(handle); 1080 err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle, 1081 GFP_KERNEL); 1082 if (err) 1083 return err; 1084 } else 1085 handle = gen_new_kid(ht, htid); 1086 1087 if (tb[TCA_U32_SEL] == NULL) { 1088 NL_SET_ERR_MSG_MOD(extack, "Selector not specified"); 1089 err = -EINVAL; 1090 goto erridr; 1091 } 1092 1093 s = nla_data(tb[TCA_U32_SEL]); 1094 1095 n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); 1096 if (n == NULL) { 1097 err = -ENOBUFS; 1098 goto erridr; 1099 } 1100 1101 #ifdef CONFIG_CLS_U32_PERF 1102 size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64); 1103 n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt)); 1104 if (!n->pf) { 1105 err = -ENOBUFS; 1106 goto errfree; 1107 } 1108 #endif 1109 1110 memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); 1111 RCU_INIT_POINTER(n->ht_up, ht); 1112 n->handle = handle; 1113 n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; 1114 n->flags = flags; 1115 n->tp = tp; 1116 1117 err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE); 1118 if (err < 0) 1119 goto errout; 1120 1121 #ifdef CONFIG_CLS_U32_MARK 1122 n->pcpu_success = alloc_percpu(u32); 1123 if (!n->pcpu_success) { 1124 err = -ENOMEM; 1125 goto errout; 1126 } 1127 1128 if (tb[TCA_U32_MARK]) { 1129 struct tc_u32_mark *mark; 1130 1131 mark = nla_data(tb[TCA_U32_MARK]); 1132 n->val = mark->val; 1133 n->mask = mark->mask; 1134 } 1135 #endif 1136 1137 err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr, 1138 extack); 1139 if (err == 0) { 1140 struct tc_u_knode __rcu **ins; 1141 struct tc_u_knode *pins; 1142 1143 err = u32_replace_hw_knode(tp, n, flags, extack); 1144 if (err) 1145 goto errhw; 1146 1147 if (!tc_in_hw(n->flags)) 1148 n->flags |= TCA_CLS_FLAGS_NOT_IN_HW; 1149 1150 ins = &ht->ht[TC_U32_HASH(handle)]; 1151 for (pins = rtnl_dereference(*ins); pins; 1152 ins = &pins->next, pins = rtnl_dereference(*ins)) 1153 if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle)) 1154 break; 1155 1156 RCU_INIT_POINTER(n->next, pins); 1157 rcu_assign_pointer(*ins, n); 1158 *arg = n; 1159 return 0; 1160 } 1161 1162 errhw: 1163 #ifdef CONFIG_CLS_U32_MARK 1164 free_percpu(n->pcpu_success); 1165 #endif 1166 1167 errout: 1168 tcf_exts_destroy(&n->exts); 1169 #ifdef CONFIG_CLS_U32_PERF 1170 errfree: 1171 free_percpu(n->pf); 1172 #endif 1173 kfree(n); 1174 erridr: 1175 idr_remove(&ht->handle_idr, handle); 1176 return err; 1177 } 1178 1179 static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) 1180 { 1181 struct tc_u_common *tp_c = tp->data; 1182 struct tc_u_hnode *ht; 1183 struct tc_u_knode *n; 1184 unsigned int h; 1185 1186 if (arg->stop) 1187 return; 1188 1189 for (ht = rtnl_dereference(tp_c->hlist); 1190 ht; 1191 ht = rtnl_dereference(ht->next)) { 1192 if (ht->prio != tp->prio) 1193 continue; 1194 if (arg->count >= arg->skip) { 1195 if (arg->fn(tp, ht, arg) < 0) { 1196 arg->stop = 1; 1197 return; 1198 } 1199 } 1200 arg->count++; 1201 for (h = 0; h <= ht->divisor; h++) { 1202 for (n = rtnl_dereference(ht->ht[h]); 1203 n; 1204 n = rtnl_dereference(n->next)) { 1205 if (arg->count < arg->skip) { 1206 arg->count++; 1207 continue; 1208 } 1209 if (arg->fn(tp, n, arg) < 0) { 1210 arg->stop = 1; 1211 return; 1212 } 1213 arg->count++; 1214 } 1215 } 1216 } 1217 } 1218 1219 static void u32_bind_class(void *fh, u32 classid, unsigned long cl) 1220 { 1221 struct tc_u_knode *n = fh; 1222 1223 if (n && n->res.classid == classid) 1224 n->res.class = cl; 1225 } 1226 1227 static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, 1228 struct sk_buff *skb, struct tcmsg *t) 1229 { 1230 struct tc_u_knode *n = fh; 1231 struct tc_u_hnode *ht_up, *ht_down; 1232 struct nlattr *nest; 1233 1234 if (n == NULL) 1235 return skb->len; 1236 1237 t->tcm_handle = n->handle; 1238 1239 nest = nla_nest_start(skb, TCA_OPTIONS); 1240 if (nest == NULL) 1241 goto nla_put_failure; 1242 1243 if (TC_U32_KEY(n->handle) == 0) { 1244 struct tc_u_hnode *ht = fh; 1245 u32 divisor = ht->divisor + 1; 1246 1247 if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor)) 1248 goto nla_put_failure; 1249 } else { 1250 #ifdef CONFIG_CLS_U32_PERF 1251 struct tc_u32_pcnt *gpf; 1252 int cpu; 1253 #endif 1254 1255 if (nla_put(skb, TCA_U32_SEL, 1256 sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key), 1257 &n->sel)) 1258 goto nla_put_failure; 1259 1260 ht_up = rtnl_dereference(n->ht_up); 1261 if (ht_up) { 1262 u32 htid = n->handle & 0xFFFFF000; 1263 if (nla_put_u32(skb, TCA_U32_HASH, htid)) 1264 goto nla_put_failure; 1265 } 1266 if (n->res.classid && 1267 nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid)) 1268 goto nla_put_failure; 1269 1270 ht_down = rtnl_dereference(n->ht_down); 1271 if (ht_down && 1272 nla_put_u32(skb, TCA_U32_LINK, ht_down->handle)) 1273 goto nla_put_failure; 1274 1275 if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags)) 1276 goto nla_put_failure; 1277 1278 #ifdef CONFIG_CLS_U32_MARK 1279 if ((n->val || n->mask)) { 1280 struct tc_u32_mark mark = {.val = n->val, 1281 .mask = n->mask, 1282 .success = 0}; 1283 int cpum; 1284 1285 for_each_possible_cpu(cpum) { 1286 __u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum); 1287 1288 mark.success += cnt; 1289 } 1290 1291 if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark)) 1292 goto nla_put_failure; 1293 } 1294 #endif 1295 1296 if (tcf_exts_dump(skb, &n->exts) < 0) 1297 goto nla_put_failure; 1298 1299 #ifdef CONFIG_NET_CLS_IND 1300 if (n->ifindex) { 1301 struct net_device *dev; 1302 dev = __dev_get_by_index(net, n->ifindex); 1303 if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name)) 1304 goto nla_put_failure; 1305 } 1306 #endif 1307 #ifdef CONFIG_CLS_U32_PERF 1308 gpf = kzalloc(sizeof(struct tc_u32_pcnt) + 1309 n->sel.nkeys * sizeof(u64), 1310 GFP_KERNEL); 1311 if (!gpf) 1312 goto nla_put_failure; 1313 1314 for_each_possible_cpu(cpu) { 1315 int i; 1316 struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu); 1317 1318 gpf->rcnt += pf->rcnt; 1319 gpf->rhit += pf->rhit; 1320 for (i = 0; i < n->sel.nkeys; i++) 1321 gpf->kcnts[i] += pf->kcnts[i]; 1322 } 1323 1324 if (nla_put_64bit(skb, TCA_U32_PCNT, 1325 sizeof(struct tc_u32_pcnt) + 1326 n->sel.nkeys * sizeof(u64), 1327 gpf, TCA_U32_PAD)) { 1328 kfree(gpf); 1329 goto nla_put_failure; 1330 } 1331 kfree(gpf); 1332 #endif 1333 } 1334 1335 nla_nest_end(skb, nest); 1336 1337 if (TC_U32_KEY(n->handle)) 1338 if (tcf_exts_dump_stats(skb, &n->exts) < 0) 1339 goto nla_put_failure; 1340 return skb->len; 1341 1342 nla_put_failure: 1343 nla_nest_cancel(skb, nest); 1344 return -1; 1345 } 1346 1347 static struct tcf_proto_ops cls_u32_ops __read_mostly = { 1348 .kind = "u32", 1349 .classify = u32_classify, 1350 .init = u32_init, 1351 .destroy = u32_destroy, 1352 .get = u32_get, 1353 .change = u32_change, 1354 .delete = u32_delete, 1355 .walk = u32_walk, 1356 .dump = u32_dump, 1357 .bind_class = u32_bind_class, 1358 .owner = THIS_MODULE, 1359 }; 1360 1361 static int __init init_u32(void) 1362 { 1363 int i, ret; 1364 1365 pr_info("u32 classifier\n"); 1366 #ifdef CONFIG_CLS_U32_PERF 1367 pr_info(" Performance counters on\n"); 1368 #endif 1369 #ifdef CONFIG_NET_CLS_IND 1370 pr_info(" input device check on\n"); 1371 #endif 1372 #ifdef CONFIG_NET_CLS_ACT 1373 pr_info(" Actions configured\n"); 1374 #endif 1375 tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE, 1376 sizeof(struct hlist_head), 1377 GFP_KERNEL); 1378 if (!tc_u_common_hash) 1379 return -ENOMEM; 1380 1381 for (i = 0; i < U32_HASH_SIZE; i++) 1382 INIT_HLIST_HEAD(&tc_u_common_hash[i]); 1383 1384 ret = register_tcf_proto_ops(&cls_u32_ops); 1385 if (ret) 1386 kvfree(tc_u_common_hash); 1387 return ret; 1388 } 1389 1390 static void __exit exit_u32(void) 1391 { 1392 unregister_tcf_proto_ops(&cls_u32_ops); 1393 kvfree(tc_u_common_hash); 1394 } 1395 1396 module_init(init_u32) 1397 module_exit(exit_u32) 1398 MODULE_LICENSE("GPL"); 1399