1 /* 2 * net/sched/cls_flow.c Generic flow classifier 3 * 4 * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/init.h> 14 #include <linux/list.h> 15 #include <linux/jhash.h> 16 #include <linux/random.h> 17 #include <linux/pkt_cls.h> 18 #include <linux/skbuff.h> 19 #include <linux/in.h> 20 #include <linux/ip.h> 21 #include <linux/ipv6.h> 22 #include <linux/if_vlan.h> 23 #include <linux/slab.h> 24 #include <linux/module.h> 25 26 #include <net/pkt_cls.h> 27 #include <net/ip.h> 28 #include <net/route.h> 29 #include <net/flow_keys.h> 30 31 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 32 #include <net/netfilter/nf_conntrack.h> 33 #endif 34 35 struct flow_head { 36 struct list_head filters; 37 }; 38 39 struct flow_filter { 40 struct list_head list; 41 struct tcf_exts exts; 42 struct tcf_ematch_tree ematches; 43 struct timer_list perturb_timer; 44 u32 perturb_period; 45 u32 handle; 46 47 u32 nkeys; 48 u32 keymask; 49 u32 mode; 50 u32 mask; 51 u32 xor; 52 u32 rshift; 53 u32 addend; 54 u32 divisor; 55 u32 baseclass; 56 u32 hashrnd; 57 }; 58 59 static inline u32 addr_fold(void *addr) 60 { 61 unsigned long a = (unsigned long)addr; 62 63 return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0); 64 } 65 66 static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow) 67 { 68 if (flow->src) 69 return ntohl(flow->src); 70 return addr_fold(skb->sk); 71 } 72 73 static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) 74 { 75 if (flow->dst) 76 return ntohl(flow->dst); 77 return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; 78 } 79 80 static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) 81 { 82 return flow->ip_proto; 83 } 84 85 static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) 86 { 87 if (flow->ports) 88 return ntohs(flow->port16[0]); 89 90 return addr_fold(skb->sk); 91 } 92 93 static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) 94 { 95 if (flow->ports) 96 return ntohs(flow->port16[1]); 97 98 return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; 99 } 100 101 static u32 flow_get_iif(const struct sk_buff *skb) 102 { 103 return skb->skb_iif; 104 } 105 106 static u32 flow_get_priority(const struct sk_buff *skb) 107 { 108 return skb->priority; 109 } 110 111 static u32 flow_get_mark(const struct sk_buff *skb) 112 { 113 return skb->mark; 114 } 115 116 static u32 flow_get_nfct(const struct sk_buff *skb) 117 { 118 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 119 return addr_fold(skb->nfct); 120 #else 121 return 0; 122 #endif 123 } 124 125 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 126 #define CTTUPLE(skb, member) \ 127 ({ \ 128 enum ip_conntrack_info ctinfo; \ 129 const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); \ 130 if (ct == NULL) \ 131 goto fallback; \ 132 ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member; \ 133 }) 134 #else 135 #define CTTUPLE(skb, member) \ 136 ({ \ 137 goto fallback; \ 138 0; \ 139 }) 140 #endif 141 142 static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) 143 { 144 switch (skb->protocol) { 145 case htons(ETH_P_IP): 146 return ntohl(CTTUPLE(skb, src.u3.ip)); 147 case htons(ETH_P_IPV6): 148 return ntohl(CTTUPLE(skb, src.u3.ip6[3])); 149 } 150 fallback: 151 return flow_get_src(skb, flow); 152 } 153 154 static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) 155 { 156 switch (skb->protocol) { 157 case htons(ETH_P_IP): 158 return ntohl(CTTUPLE(skb, dst.u3.ip)); 159 case htons(ETH_P_IPV6): 160 return ntohl(CTTUPLE(skb, dst.u3.ip6[3])); 161 } 162 fallback: 163 return flow_get_dst(skb, flow); 164 } 165 166 static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) 167 { 168 return ntohs(CTTUPLE(skb, src.u.all)); 169 fallback: 170 return flow_get_proto_src(skb, flow); 171 } 172 173 static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) 174 { 175 return ntohs(CTTUPLE(skb, dst.u.all)); 176 fallback: 177 return flow_get_proto_dst(skb, flow); 178 } 179 180 static u32 flow_get_rtclassid(const struct sk_buff *skb) 181 { 182 #ifdef CONFIG_IP_ROUTE_CLASSID 183 if (skb_dst(skb)) 184 return skb_dst(skb)->tclassid; 185 #endif 186 return 0; 187 } 188 189 static u32 flow_get_skuid(const struct sk_buff *skb) 190 { 191 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) { 192 kuid_t skuid = skb->sk->sk_socket->file->f_cred->fsuid; 193 return from_kuid(&init_user_ns, skuid); 194 } 195 return 0; 196 } 197 198 static u32 flow_get_skgid(const struct sk_buff *skb) 199 { 200 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) { 201 kgid_t skgid = skb->sk->sk_socket->file->f_cred->fsgid; 202 return from_kgid(&init_user_ns, skgid); 203 } 204 return 0; 205 } 206 207 static u32 flow_get_vlan_tag(const struct sk_buff *skb) 208 { 209 u16 uninitialized_var(tag); 210 211 if (vlan_get_tag(skb, &tag) < 0) 212 return 0; 213 return tag & VLAN_VID_MASK; 214 } 215 216 static u32 flow_get_rxhash(struct sk_buff *skb) 217 { 218 return skb_get_hash(skb); 219 } 220 221 static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow) 222 { 223 switch (key) { 224 case FLOW_KEY_SRC: 225 return flow_get_src(skb, flow); 226 case FLOW_KEY_DST: 227 return flow_get_dst(skb, flow); 228 case FLOW_KEY_PROTO: 229 return flow_get_proto(skb, flow); 230 case FLOW_KEY_PROTO_SRC: 231 return flow_get_proto_src(skb, flow); 232 case FLOW_KEY_PROTO_DST: 233 return flow_get_proto_dst(skb, flow); 234 case FLOW_KEY_IIF: 235 return flow_get_iif(skb); 236 case FLOW_KEY_PRIORITY: 237 return flow_get_priority(skb); 238 case FLOW_KEY_MARK: 239 return flow_get_mark(skb); 240 case FLOW_KEY_NFCT: 241 return flow_get_nfct(skb); 242 case FLOW_KEY_NFCT_SRC: 243 return flow_get_nfct_src(skb, flow); 244 case FLOW_KEY_NFCT_DST: 245 return flow_get_nfct_dst(skb, flow); 246 case FLOW_KEY_NFCT_PROTO_SRC: 247 return flow_get_nfct_proto_src(skb, flow); 248 case FLOW_KEY_NFCT_PROTO_DST: 249 return flow_get_nfct_proto_dst(skb, flow); 250 case FLOW_KEY_RTCLASSID: 251 return flow_get_rtclassid(skb); 252 case FLOW_KEY_SKUID: 253 return flow_get_skuid(skb); 254 case FLOW_KEY_SKGID: 255 return flow_get_skgid(skb); 256 case FLOW_KEY_VLAN_TAG: 257 return flow_get_vlan_tag(skb); 258 case FLOW_KEY_RXHASH: 259 return flow_get_rxhash(skb); 260 default: 261 WARN_ON(1); 262 return 0; 263 } 264 } 265 266 #define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | \ 267 (1 << FLOW_KEY_DST) | \ 268 (1 << FLOW_KEY_PROTO) | \ 269 (1 << FLOW_KEY_PROTO_SRC) | \ 270 (1 << FLOW_KEY_PROTO_DST) | \ 271 (1 << FLOW_KEY_NFCT_SRC) | \ 272 (1 << FLOW_KEY_NFCT_DST) | \ 273 (1 << FLOW_KEY_NFCT_PROTO_SRC) | \ 274 (1 << FLOW_KEY_NFCT_PROTO_DST)) 275 276 static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, 277 struct tcf_result *res) 278 { 279 struct flow_head *head = tp->root; 280 struct flow_filter *f; 281 u32 keymask; 282 u32 classid; 283 unsigned int n, key; 284 int r; 285 286 list_for_each_entry(f, &head->filters, list) { 287 u32 keys[FLOW_KEY_MAX + 1]; 288 struct flow_keys flow_keys; 289 290 if (!tcf_em_tree_match(skb, &f->ematches, NULL)) 291 continue; 292 293 keymask = f->keymask; 294 if (keymask & FLOW_KEYS_NEEDED) 295 skb_flow_dissect(skb, &flow_keys); 296 297 for (n = 0; n < f->nkeys; n++) { 298 key = ffs(keymask) - 1; 299 keymask &= ~(1 << key); 300 keys[n] = flow_key_get(skb, key, &flow_keys); 301 } 302 303 if (f->mode == FLOW_MODE_HASH) 304 classid = jhash2(keys, f->nkeys, f->hashrnd); 305 else { 306 classid = keys[0]; 307 classid = (classid & f->mask) ^ f->xor; 308 classid = (classid >> f->rshift) + f->addend; 309 } 310 311 if (f->divisor) 312 classid %= f->divisor; 313 314 res->class = 0; 315 res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid); 316 317 r = tcf_exts_exec(skb, &f->exts, res); 318 if (r < 0) 319 continue; 320 return r; 321 } 322 return -1; 323 } 324 325 static void flow_perturbation(unsigned long arg) 326 { 327 struct flow_filter *f = (struct flow_filter *)arg; 328 329 get_random_bytes(&f->hashrnd, 4); 330 if (f->perturb_period) 331 mod_timer(&f->perturb_timer, jiffies + f->perturb_period); 332 } 333 334 static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { 335 [TCA_FLOW_KEYS] = { .type = NLA_U32 }, 336 [TCA_FLOW_MODE] = { .type = NLA_U32 }, 337 [TCA_FLOW_BASECLASS] = { .type = NLA_U32 }, 338 [TCA_FLOW_RSHIFT] = { .type = NLA_U32 }, 339 [TCA_FLOW_ADDEND] = { .type = NLA_U32 }, 340 [TCA_FLOW_MASK] = { .type = NLA_U32 }, 341 [TCA_FLOW_XOR] = { .type = NLA_U32 }, 342 [TCA_FLOW_DIVISOR] = { .type = NLA_U32 }, 343 [TCA_FLOW_ACT] = { .type = NLA_NESTED }, 344 [TCA_FLOW_POLICE] = { .type = NLA_NESTED }, 345 [TCA_FLOW_EMATCHES] = { .type = NLA_NESTED }, 346 [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, 347 }; 348 349 static int flow_change(struct net *net, struct sk_buff *in_skb, 350 struct tcf_proto *tp, unsigned long base, 351 u32 handle, struct nlattr **tca, 352 unsigned long *arg) 353 { 354 struct flow_head *head = tp->root; 355 struct flow_filter *f; 356 struct nlattr *opt = tca[TCA_OPTIONS]; 357 struct nlattr *tb[TCA_FLOW_MAX + 1]; 358 struct tcf_exts e; 359 struct tcf_ematch_tree t; 360 unsigned int nkeys = 0; 361 unsigned int perturb_period = 0; 362 u32 baseclass = 0; 363 u32 keymask = 0; 364 u32 mode; 365 int err; 366 367 if (opt == NULL) 368 return -EINVAL; 369 370 err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy); 371 if (err < 0) 372 return err; 373 374 if (tb[TCA_FLOW_BASECLASS]) { 375 baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]); 376 if (TC_H_MIN(baseclass) == 0) 377 return -EINVAL; 378 } 379 380 if (tb[TCA_FLOW_KEYS]) { 381 keymask = nla_get_u32(tb[TCA_FLOW_KEYS]); 382 383 nkeys = hweight32(keymask); 384 if (nkeys == 0) 385 return -EINVAL; 386 387 if (fls(keymask) - 1 > FLOW_KEY_MAX) 388 return -EOPNOTSUPP; 389 390 if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) && 391 sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns) 392 return -EOPNOTSUPP; 393 } 394 395 tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); 396 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); 397 if (err < 0) 398 return err; 399 400 err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t); 401 if (err < 0) 402 goto err1; 403 404 f = (struct flow_filter *)*arg; 405 if (f != NULL) { 406 err = -EINVAL; 407 if (f->handle != handle && handle) 408 goto err2; 409 410 mode = f->mode; 411 if (tb[TCA_FLOW_MODE]) 412 mode = nla_get_u32(tb[TCA_FLOW_MODE]); 413 if (mode != FLOW_MODE_HASH && nkeys > 1) 414 goto err2; 415 416 if (mode == FLOW_MODE_HASH) 417 perturb_period = f->perturb_period; 418 if (tb[TCA_FLOW_PERTURB]) { 419 if (mode != FLOW_MODE_HASH) 420 goto err2; 421 perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; 422 } 423 } else { 424 err = -EINVAL; 425 if (!handle) 426 goto err2; 427 if (!tb[TCA_FLOW_KEYS]) 428 goto err2; 429 430 mode = FLOW_MODE_MAP; 431 if (tb[TCA_FLOW_MODE]) 432 mode = nla_get_u32(tb[TCA_FLOW_MODE]); 433 if (mode != FLOW_MODE_HASH && nkeys > 1) 434 goto err2; 435 436 if (tb[TCA_FLOW_PERTURB]) { 437 if (mode != FLOW_MODE_HASH) 438 goto err2; 439 perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; 440 } 441 442 if (TC_H_MAJ(baseclass) == 0) 443 baseclass = TC_H_MAKE(tp->q->handle, baseclass); 444 if (TC_H_MIN(baseclass) == 0) 445 baseclass = TC_H_MAKE(baseclass, 1); 446 447 err = -ENOBUFS; 448 f = kzalloc(sizeof(*f), GFP_KERNEL); 449 if (f == NULL) 450 goto err2; 451 452 f->handle = handle; 453 f->mask = ~0U; 454 tcf_exts_init(&f->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); 455 456 get_random_bytes(&f->hashrnd, 4); 457 f->perturb_timer.function = flow_perturbation; 458 f->perturb_timer.data = (unsigned long)f; 459 init_timer_deferrable(&f->perturb_timer); 460 } 461 462 tcf_exts_change(tp, &f->exts, &e); 463 tcf_em_tree_change(tp, &f->ematches, &t); 464 465 tcf_tree_lock(tp); 466 467 if (tb[TCA_FLOW_KEYS]) { 468 f->keymask = keymask; 469 f->nkeys = nkeys; 470 } 471 472 f->mode = mode; 473 474 if (tb[TCA_FLOW_MASK]) 475 f->mask = nla_get_u32(tb[TCA_FLOW_MASK]); 476 if (tb[TCA_FLOW_XOR]) 477 f->xor = nla_get_u32(tb[TCA_FLOW_XOR]); 478 if (tb[TCA_FLOW_RSHIFT]) 479 f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]); 480 if (tb[TCA_FLOW_ADDEND]) 481 f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]); 482 483 if (tb[TCA_FLOW_DIVISOR]) 484 f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]); 485 if (baseclass) 486 f->baseclass = baseclass; 487 488 f->perturb_period = perturb_period; 489 del_timer(&f->perturb_timer); 490 if (perturb_period) 491 mod_timer(&f->perturb_timer, jiffies + perturb_period); 492 493 if (*arg == 0) 494 list_add_tail(&f->list, &head->filters); 495 496 tcf_tree_unlock(tp); 497 498 *arg = (unsigned long)f; 499 return 0; 500 501 err2: 502 tcf_em_tree_destroy(tp, &t); 503 err1: 504 tcf_exts_destroy(tp, &e); 505 return err; 506 } 507 508 static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f) 509 { 510 del_timer_sync(&f->perturb_timer); 511 tcf_exts_destroy(tp, &f->exts); 512 tcf_em_tree_destroy(tp, &f->ematches); 513 kfree(f); 514 } 515 516 static int flow_delete(struct tcf_proto *tp, unsigned long arg) 517 { 518 struct flow_filter *f = (struct flow_filter *)arg; 519 520 tcf_tree_lock(tp); 521 list_del(&f->list); 522 tcf_tree_unlock(tp); 523 flow_destroy_filter(tp, f); 524 return 0; 525 } 526 527 static int flow_init(struct tcf_proto *tp) 528 { 529 struct flow_head *head; 530 531 head = kzalloc(sizeof(*head), GFP_KERNEL); 532 if (head == NULL) 533 return -ENOBUFS; 534 INIT_LIST_HEAD(&head->filters); 535 tp->root = head; 536 return 0; 537 } 538 539 static void flow_destroy(struct tcf_proto *tp) 540 { 541 struct flow_head *head = tp->root; 542 struct flow_filter *f, *next; 543 544 list_for_each_entry_safe(f, next, &head->filters, list) { 545 list_del(&f->list); 546 flow_destroy_filter(tp, f); 547 } 548 kfree(head); 549 } 550 551 static unsigned long flow_get(struct tcf_proto *tp, u32 handle) 552 { 553 struct flow_head *head = tp->root; 554 struct flow_filter *f; 555 556 list_for_each_entry(f, &head->filters, list) 557 if (f->handle == handle) 558 return (unsigned long)f; 559 return 0; 560 } 561 562 static void flow_put(struct tcf_proto *tp, unsigned long f) 563 { 564 } 565 566 static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, 567 struct sk_buff *skb, struct tcmsg *t) 568 { 569 struct flow_filter *f = (struct flow_filter *)fh; 570 struct nlattr *nest; 571 572 if (f == NULL) 573 return skb->len; 574 575 t->tcm_handle = f->handle; 576 577 nest = nla_nest_start(skb, TCA_OPTIONS); 578 if (nest == NULL) 579 goto nla_put_failure; 580 581 if (nla_put_u32(skb, TCA_FLOW_KEYS, f->keymask) || 582 nla_put_u32(skb, TCA_FLOW_MODE, f->mode)) 583 goto nla_put_failure; 584 585 if (f->mask != ~0 || f->xor != 0) { 586 if (nla_put_u32(skb, TCA_FLOW_MASK, f->mask) || 587 nla_put_u32(skb, TCA_FLOW_XOR, f->xor)) 588 goto nla_put_failure; 589 } 590 if (f->rshift && 591 nla_put_u32(skb, TCA_FLOW_RSHIFT, f->rshift)) 592 goto nla_put_failure; 593 if (f->addend && 594 nla_put_u32(skb, TCA_FLOW_ADDEND, f->addend)) 595 goto nla_put_failure; 596 597 if (f->divisor && 598 nla_put_u32(skb, TCA_FLOW_DIVISOR, f->divisor)) 599 goto nla_put_failure; 600 if (f->baseclass && 601 nla_put_u32(skb, TCA_FLOW_BASECLASS, f->baseclass)) 602 goto nla_put_failure; 603 604 if (f->perturb_period && 605 nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ)) 606 goto nla_put_failure; 607 608 if (tcf_exts_dump(skb, &f->exts) < 0) 609 goto nla_put_failure; 610 #ifdef CONFIG_NET_EMATCH 611 if (f->ematches.hdr.nmatches && 612 tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0) 613 goto nla_put_failure; 614 #endif 615 nla_nest_end(skb, nest); 616 617 if (tcf_exts_dump_stats(skb, &f->exts) < 0) 618 goto nla_put_failure; 619 620 return skb->len; 621 622 nla_put_failure: 623 nlmsg_trim(skb, nest); 624 return -1; 625 } 626 627 static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg) 628 { 629 struct flow_head *head = tp->root; 630 struct flow_filter *f; 631 632 list_for_each_entry(f, &head->filters, list) { 633 if (arg->count < arg->skip) 634 goto skip; 635 if (arg->fn(tp, (unsigned long)f, arg) < 0) { 636 arg->stop = 1; 637 break; 638 } 639 skip: 640 arg->count++; 641 } 642 } 643 644 static struct tcf_proto_ops cls_flow_ops __read_mostly = { 645 .kind = "flow", 646 .classify = flow_classify, 647 .init = flow_init, 648 .destroy = flow_destroy, 649 .change = flow_change, 650 .delete = flow_delete, 651 .get = flow_get, 652 .put = flow_put, 653 .dump = flow_dump, 654 .walk = flow_walk, 655 .owner = THIS_MODULE, 656 }; 657 658 static int __init cls_flow_init(void) 659 { 660 return register_tcf_proto_ops(&cls_flow_ops); 661 } 662 663 static void __exit cls_flow_exit(void) 664 { 665 unregister_tcf_proto_ops(&cls_flow_ops); 666 } 667 668 module_init(cls_flow_init); 669 module_exit(cls_flow_exit); 670 671 MODULE_LICENSE("GPL"); 672 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 673 MODULE_DESCRIPTION("TC flow classifier"); 674