1 /* 2 * net/core/fib_rules.c Generic Routing Rules 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License as 6 * published by the Free Software Foundation, version 2. 7 * 8 * Authors: Thomas Graf <tgraf@suug.ch> 9 */ 10 11 #include <linux/types.h> 12 #include <linux/kernel.h> 13 #include <linux/list.h> 14 #include <net/net_namespace.h> 15 #include <net/sock.h> 16 #include <net/fib_rules.h> 17 18 int fib_default_rule_add(struct fib_rules_ops *ops, 19 u32 pref, u32 table, u32 flags) 20 { 21 struct fib_rule *r; 22 23 r = kzalloc(ops->rule_size, GFP_KERNEL); 24 if (r == NULL) 25 return -ENOMEM; 26 27 atomic_set(&r->refcnt, 1); 28 r->action = FR_ACT_TO_TBL; 29 r->pref = pref; 30 r->table = table; 31 r->flags = flags; 32 r->fr_net = hold_net(ops->fro_net); 33 34 /* The lock is not required here, the list in unreacheable 35 * at the moment this function is called */ 36 list_add_tail(&r->list, &ops->rules_list); 37 return 0; 38 } 39 EXPORT_SYMBOL(fib_default_rule_add); 40 41 static void notify_rule_change(int event, struct fib_rule *rule, 42 struct fib_rules_ops *ops, struct nlmsghdr *nlh, 43 u32 pid); 44 45 static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family) 46 { 47 struct fib_rules_ops *ops; 48 49 rcu_read_lock(); 50 list_for_each_entry_rcu(ops, &net->rules_ops, list) { 51 if (ops->family == family) { 52 if (!try_module_get(ops->owner)) 53 ops = NULL; 54 rcu_read_unlock(); 55 return ops; 56 } 57 } 58 rcu_read_unlock(); 59 60 return NULL; 61 } 62 63 static void rules_ops_put(struct fib_rules_ops *ops) 64 { 65 if (ops) 66 module_put(ops->owner); 67 } 68 69 static void flush_route_cache(struct fib_rules_ops *ops) 70 { 71 if (ops->flush_cache) 72 ops->flush_cache(ops); 73 } 74 75 static int __fib_rules_register(struct fib_rules_ops *ops) 76 { 77 int err = -EEXIST; 78 struct fib_rules_ops *o; 79 struct net *net; 80 81 net = ops->fro_net; 82 83 if (ops->rule_size < sizeof(struct fib_rule)) 84 return -EINVAL; 85 86 if (ops->match == NULL || ops->configure == NULL || 87 ops->compare == NULL || ops->fill == NULL || 88 ops->action == NULL) 89 return -EINVAL; 90 91 spin_lock(&net->rules_mod_lock); 92 list_for_each_entry(o, &net->rules_ops, list) 93 if (ops->family == o->family) 94 goto errout; 95 96 hold_net(net); 97 list_add_tail_rcu(&ops->list, &net->rules_ops); 98 err = 0; 99 errout: 100 spin_unlock(&net->rules_mod_lock); 101 102 return err; 103 } 104 105 struct fib_rules_ops * 106 fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) 107 { 108 struct fib_rules_ops *ops; 109 int err; 110 111 ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL); 112 if (ops == NULL) 113 return ERR_PTR(-ENOMEM); 114 115 INIT_LIST_HEAD(&ops->rules_list); 116 ops->fro_net = net; 117 118 err = __fib_rules_register(ops); 119 if (err) { 120 kfree(ops); 121 ops = ERR_PTR(err); 122 } 123 124 return ops; 125 } 126 127 EXPORT_SYMBOL_GPL(fib_rules_register); 128 129 void fib_rules_cleanup_ops(struct fib_rules_ops *ops) 130 { 131 struct fib_rule *rule, *tmp; 132 133 list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) { 134 list_del_rcu(&rule->list); 135 fib_rule_put(rule); 136 } 137 } 138 EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops); 139 140 static void fib_rules_put_rcu(struct rcu_head *head) 141 { 142 struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu); 143 struct net *net = ops->fro_net; 144 145 release_net(net); 146 kfree(ops); 147 } 148 149 void fib_rules_unregister(struct fib_rules_ops *ops) 150 { 151 struct net *net = ops->fro_net; 152 153 spin_lock(&net->rules_mod_lock); 154 list_del_rcu(&ops->list); 155 fib_rules_cleanup_ops(ops); 156 spin_unlock(&net->rules_mod_lock); 157 158 call_rcu(&ops->rcu, fib_rules_put_rcu); 159 } 160 161 EXPORT_SYMBOL_GPL(fib_rules_unregister); 162 163 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, 164 struct flowi *fl, int flags) 165 { 166 int ret = 0; 167 168 if (rule->iifindex && (rule->iifindex != fl->iif)) 169 goto out; 170 171 if (rule->oifindex && (rule->oifindex != fl->oif)) 172 goto out; 173 174 if ((rule->mark ^ fl->mark) & rule->mark_mask) 175 goto out; 176 177 ret = ops->match(rule, fl, flags); 178 out: 179 return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; 180 } 181 182 int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, 183 int flags, struct fib_lookup_arg *arg) 184 { 185 struct fib_rule *rule; 186 int err; 187 188 rcu_read_lock(); 189 190 list_for_each_entry_rcu(rule, &ops->rules_list, list) { 191 jumped: 192 if (!fib_rule_match(rule, ops, fl, flags)) 193 continue; 194 195 if (rule->action == FR_ACT_GOTO) { 196 struct fib_rule *target; 197 198 target = rcu_dereference(rule->ctarget); 199 if (target == NULL) { 200 continue; 201 } else { 202 rule = target; 203 goto jumped; 204 } 205 } else if (rule->action == FR_ACT_NOP) 206 continue; 207 else 208 err = ops->action(rule, fl, flags, arg); 209 210 if (err != -EAGAIN) { 211 fib_rule_get(rule); 212 arg->rule = rule; 213 goto out; 214 } 215 } 216 217 err = -ESRCH; 218 out: 219 rcu_read_unlock(); 220 221 return err; 222 } 223 224 EXPORT_SYMBOL_GPL(fib_rules_lookup); 225 226 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, 227 struct fib_rules_ops *ops) 228 { 229 int err = -EINVAL; 230 231 if (frh->src_len) 232 if (tb[FRA_SRC] == NULL || 233 frh->src_len > (ops->addr_size * 8) || 234 nla_len(tb[FRA_SRC]) != ops->addr_size) 235 goto errout; 236 237 if (frh->dst_len) 238 if (tb[FRA_DST] == NULL || 239 frh->dst_len > (ops->addr_size * 8) || 240 nla_len(tb[FRA_DST]) != ops->addr_size) 241 goto errout; 242 243 err = 0; 244 errout: 245 return err; 246 } 247 248 static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 249 { 250 struct net *net = sock_net(skb->sk); 251 struct fib_rule_hdr *frh = nlmsg_data(nlh); 252 struct fib_rules_ops *ops = NULL; 253 struct fib_rule *rule, *r, *last = NULL; 254 struct nlattr *tb[FRA_MAX+1]; 255 int err = -EINVAL, unresolved = 0; 256 257 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) 258 goto errout; 259 260 ops = lookup_rules_ops(net, frh->family); 261 if (ops == NULL) { 262 err = -EAFNOSUPPORT; 263 goto errout; 264 } 265 266 err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); 267 if (err < 0) 268 goto errout; 269 270 err = validate_rulemsg(frh, tb, ops); 271 if (err < 0) 272 goto errout; 273 274 rule = kzalloc(ops->rule_size, GFP_KERNEL); 275 if (rule == NULL) { 276 err = -ENOMEM; 277 goto errout; 278 } 279 rule->fr_net = hold_net(net); 280 281 if (tb[FRA_PRIORITY]) 282 rule->pref = nla_get_u32(tb[FRA_PRIORITY]); 283 284 if (tb[FRA_IIFNAME]) { 285 struct net_device *dev; 286 287 rule->iifindex = -1; 288 nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); 289 dev = __dev_get_by_name(net, rule->iifname); 290 if (dev) 291 rule->iifindex = dev->ifindex; 292 } 293 294 if (tb[FRA_OIFNAME]) { 295 struct net_device *dev; 296 297 rule->oifindex = -1; 298 nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); 299 dev = __dev_get_by_name(net, rule->oifname); 300 if (dev) 301 rule->oifindex = dev->ifindex; 302 } 303 304 if (tb[FRA_FWMARK]) { 305 rule->mark = nla_get_u32(tb[FRA_FWMARK]); 306 if (rule->mark) 307 /* compatibility: if the mark value is non-zero all bits 308 * are compared unless a mask is explicitly specified. 309 */ 310 rule->mark_mask = 0xFFFFFFFF; 311 } 312 313 if (tb[FRA_FWMASK]) 314 rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]); 315 316 rule->action = frh->action; 317 rule->flags = frh->flags; 318 rule->table = frh_get_table(frh, tb); 319 320 if (!tb[FRA_PRIORITY] && ops->default_pref) 321 rule->pref = ops->default_pref(ops); 322 323 err = -EINVAL; 324 if (tb[FRA_GOTO]) { 325 if (rule->action != FR_ACT_GOTO) 326 goto errout_free; 327 328 rule->target = nla_get_u32(tb[FRA_GOTO]); 329 /* Backward jumps are prohibited to avoid endless loops */ 330 if (rule->target <= rule->pref) 331 goto errout_free; 332 333 list_for_each_entry(r, &ops->rules_list, list) { 334 if (r->pref == rule->target) { 335 rule->ctarget = r; 336 break; 337 } 338 } 339 340 if (rule->ctarget == NULL) 341 unresolved = 1; 342 } else if (rule->action == FR_ACT_GOTO) 343 goto errout_free; 344 345 err = ops->configure(rule, skb, frh, tb); 346 if (err < 0) 347 goto errout_free; 348 349 list_for_each_entry(r, &ops->rules_list, list) { 350 if (r->pref > rule->pref) 351 break; 352 last = r; 353 } 354 355 fib_rule_get(rule); 356 357 if (ops->unresolved_rules) { 358 /* 359 * There are unresolved goto rules in the list, check if 360 * any of them are pointing to this new rule. 361 */ 362 list_for_each_entry(r, &ops->rules_list, list) { 363 if (r->action == FR_ACT_GOTO && 364 r->target == rule->pref) { 365 BUG_ON(r->ctarget != NULL); 366 rcu_assign_pointer(r->ctarget, rule); 367 if (--ops->unresolved_rules == 0) 368 break; 369 } 370 } 371 } 372 373 if (rule->action == FR_ACT_GOTO) 374 ops->nr_goto_rules++; 375 376 if (unresolved) 377 ops->unresolved_rules++; 378 379 if (last) 380 list_add_rcu(&rule->list, &last->list); 381 else 382 list_add_rcu(&rule->list, &ops->rules_list); 383 384 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); 385 flush_route_cache(ops); 386 rules_ops_put(ops); 387 return 0; 388 389 errout_free: 390 release_net(rule->fr_net); 391 kfree(rule); 392 errout: 393 rules_ops_put(ops); 394 return err; 395 } 396 397 static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 398 { 399 struct net *net = sock_net(skb->sk); 400 struct fib_rule_hdr *frh = nlmsg_data(nlh); 401 struct fib_rules_ops *ops = NULL; 402 struct fib_rule *rule, *tmp; 403 struct nlattr *tb[FRA_MAX+1]; 404 int err = -EINVAL; 405 406 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) 407 goto errout; 408 409 ops = lookup_rules_ops(net, frh->family); 410 if (ops == NULL) { 411 err = -EAFNOSUPPORT; 412 goto errout; 413 } 414 415 err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); 416 if (err < 0) 417 goto errout; 418 419 err = validate_rulemsg(frh, tb, ops); 420 if (err < 0) 421 goto errout; 422 423 list_for_each_entry(rule, &ops->rules_list, list) { 424 if (frh->action && (frh->action != rule->action)) 425 continue; 426 427 if (frh->table && (frh_get_table(frh, tb) != rule->table)) 428 continue; 429 430 if (tb[FRA_PRIORITY] && 431 (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) 432 continue; 433 434 if (tb[FRA_IIFNAME] && 435 nla_strcmp(tb[FRA_IIFNAME], rule->iifname)) 436 continue; 437 438 if (tb[FRA_OIFNAME] && 439 nla_strcmp(tb[FRA_OIFNAME], rule->oifname)) 440 continue; 441 442 if (tb[FRA_FWMARK] && 443 (rule->mark != nla_get_u32(tb[FRA_FWMARK]))) 444 continue; 445 446 if (tb[FRA_FWMASK] && 447 (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK]))) 448 continue; 449 450 if (!ops->compare(rule, frh, tb)) 451 continue; 452 453 if (rule->flags & FIB_RULE_PERMANENT) { 454 err = -EPERM; 455 goto errout; 456 } 457 458 list_del_rcu(&rule->list); 459 460 if (rule->action == FR_ACT_GOTO) 461 ops->nr_goto_rules--; 462 463 /* 464 * Check if this rule is a target to any of them. If so, 465 * disable them. As this operation is eventually very 466 * expensive, it is only performed if goto rules have 467 * actually been added. 468 */ 469 if (ops->nr_goto_rules > 0) { 470 list_for_each_entry(tmp, &ops->rules_list, list) { 471 if (tmp->ctarget == rule) { 472 rcu_assign_pointer(tmp->ctarget, NULL); 473 ops->unresolved_rules++; 474 } 475 } 476 } 477 478 synchronize_rcu(); 479 notify_rule_change(RTM_DELRULE, rule, ops, nlh, 480 NETLINK_CB(skb).pid); 481 fib_rule_put(rule); 482 flush_route_cache(ops); 483 rules_ops_put(ops); 484 return 0; 485 } 486 487 err = -ENOENT; 488 errout: 489 rules_ops_put(ops); 490 return err; 491 } 492 493 static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, 494 struct fib_rule *rule) 495 { 496 size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)) 497 + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */ 498 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */ 499 + nla_total_size(4) /* FRA_PRIORITY */ 500 + nla_total_size(4) /* FRA_TABLE */ 501 + nla_total_size(4) /* FRA_FWMARK */ 502 + nla_total_size(4); /* FRA_FWMASK */ 503 504 if (ops->nlmsg_payload) 505 payload += ops->nlmsg_payload(rule); 506 507 return payload; 508 } 509 510 static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, 511 u32 pid, u32 seq, int type, int flags, 512 struct fib_rules_ops *ops) 513 { 514 struct nlmsghdr *nlh; 515 struct fib_rule_hdr *frh; 516 517 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags); 518 if (nlh == NULL) 519 return -EMSGSIZE; 520 521 frh = nlmsg_data(nlh); 522 frh->table = rule->table; 523 NLA_PUT_U32(skb, FRA_TABLE, rule->table); 524 frh->res1 = 0; 525 frh->res2 = 0; 526 frh->action = rule->action; 527 frh->flags = rule->flags; 528 529 if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL) 530 frh->flags |= FIB_RULE_UNRESOLVED; 531 532 if (rule->iifname[0]) { 533 NLA_PUT_STRING(skb, FRA_IIFNAME, rule->iifname); 534 535 if (rule->iifindex == -1) 536 frh->flags |= FIB_RULE_IIF_DETACHED; 537 } 538 539 if (rule->oifname[0]) { 540 NLA_PUT_STRING(skb, FRA_OIFNAME, rule->oifname); 541 542 if (rule->oifindex == -1) 543 frh->flags |= FIB_RULE_OIF_DETACHED; 544 } 545 546 if (rule->pref) 547 NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref); 548 549 if (rule->mark) 550 NLA_PUT_U32(skb, FRA_FWMARK, rule->mark); 551 552 if (rule->mark_mask || rule->mark) 553 NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask); 554 555 if (rule->target) 556 NLA_PUT_U32(skb, FRA_GOTO, rule->target); 557 558 if (ops->fill(rule, skb, frh) < 0) 559 goto nla_put_failure; 560 561 return nlmsg_end(skb, nlh); 562 563 nla_put_failure: 564 nlmsg_cancel(skb, nlh); 565 return -EMSGSIZE; 566 } 567 568 static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, 569 struct fib_rules_ops *ops) 570 { 571 int idx = 0; 572 struct fib_rule *rule; 573 574 list_for_each_entry(rule, &ops->rules_list, list) { 575 if (idx < cb->args[1]) 576 goto skip; 577 578 if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, 579 cb->nlh->nlmsg_seq, RTM_NEWRULE, 580 NLM_F_MULTI, ops) < 0) 581 break; 582 skip: 583 idx++; 584 } 585 cb->args[1] = idx; 586 rules_ops_put(ops); 587 588 return skb->len; 589 } 590 591 static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) 592 { 593 struct net *net = sock_net(skb->sk); 594 struct fib_rules_ops *ops; 595 int idx = 0, family; 596 597 family = rtnl_msg_family(cb->nlh); 598 if (family != AF_UNSPEC) { 599 /* Protocol specific dump request */ 600 ops = lookup_rules_ops(net, family); 601 if (ops == NULL) 602 return -EAFNOSUPPORT; 603 604 return dump_rules(skb, cb, ops); 605 } 606 607 rcu_read_lock(); 608 list_for_each_entry_rcu(ops, &net->rules_ops, list) { 609 if (idx < cb->args[0] || !try_module_get(ops->owner)) 610 goto skip; 611 612 if (dump_rules(skb, cb, ops) < 0) 613 break; 614 615 cb->args[1] = 0; 616 skip: 617 idx++; 618 } 619 rcu_read_unlock(); 620 cb->args[0] = idx; 621 622 return skb->len; 623 } 624 625 static void notify_rule_change(int event, struct fib_rule *rule, 626 struct fib_rules_ops *ops, struct nlmsghdr *nlh, 627 u32 pid) 628 { 629 struct net *net; 630 struct sk_buff *skb; 631 int err = -ENOBUFS; 632 633 net = ops->fro_net; 634 skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL); 635 if (skb == NULL) 636 goto errout; 637 638 err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops); 639 if (err < 0) { 640 /* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */ 641 WARN_ON(err == -EMSGSIZE); 642 kfree_skb(skb); 643 goto errout; 644 } 645 646 rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); 647 return; 648 errout: 649 if (err < 0) 650 rtnl_set_sk_err(net, ops->nlgroup, err); 651 } 652 653 static void attach_rules(struct list_head *rules, struct net_device *dev) 654 { 655 struct fib_rule *rule; 656 657 list_for_each_entry(rule, rules, list) { 658 if (rule->iifindex == -1 && 659 strcmp(dev->name, rule->iifname) == 0) 660 rule->iifindex = dev->ifindex; 661 if (rule->oifindex == -1 && 662 strcmp(dev->name, rule->oifname) == 0) 663 rule->oifindex = dev->ifindex; 664 } 665 } 666 667 static void detach_rules(struct list_head *rules, struct net_device *dev) 668 { 669 struct fib_rule *rule; 670 671 list_for_each_entry(rule, rules, list) { 672 if (rule->iifindex == dev->ifindex) 673 rule->iifindex = -1; 674 if (rule->oifindex == dev->ifindex) 675 rule->oifindex = -1; 676 } 677 } 678 679 680 static int fib_rules_event(struct notifier_block *this, unsigned long event, 681 void *ptr) 682 { 683 struct net_device *dev = ptr; 684 struct net *net = dev_net(dev); 685 struct fib_rules_ops *ops; 686 687 ASSERT_RTNL(); 688 rcu_read_lock(); 689 690 switch (event) { 691 case NETDEV_REGISTER: 692 list_for_each_entry(ops, &net->rules_ops, list) 693 attach_rules(&ops->rules_list, dev); 694 break; 695 696 case NETDEV_UNREGISTER: 697 list_for_each_entry(ops, &net->rules_ops, list) 698 detach_rules(&ops->rules_list, dev); 699 break; 700 } 701 702 rcu_read_unlock(); 703 704 return NOTIFY_DONE; 705 } 706 707 static struct notifier_block fib_rules_notifier = { 708 .notifier_call = fib_rules_event, 709 }; 710 711 static int __net_init fib_rules_net_init(struct net *net) 712 { 713 INIT_LIST_HEAD(&net->rules_ops); 714 spin_lock_init(&net->rules_mod_lock); 715 return 0; 716 } 717 718 static struct pernet_operations fib_rules_net_ops = { 719 .init = fib_rules_net_init, 720 }; 721 722 static int __init fib_rules_init(void) 723 { 724 int err; 725 rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL); 726 rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); 727 rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); 728 729 err = register_pernet_subsys(&fib_rules_net_ops); 730 if (err < 0) 731 goto fail; 732 733 err = register_netdevice_notifier(&fib_rules_notifier); 734 if (err < 0) 735 goto fail_unregister; 736 737 return 0; 738 739 fail_unregister: 740 unregister_pernet_subsys(&fib_rules_net_ops); 741 fail: 742 rtnl_unregister(PF_UNSPEC, RTM_NEWRULE); 743 rtnl_unregister(PF_UNSPEC, RTM_DELRULE); 744 rtnl_unregister(PF_UNSPEC, RTM_GETRULE); 745 return err; 746 } 747 748 subsys_initcall(fib_rules_init); 749