1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: FIB frontend. 7 * 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16 #include <linux/module.h> 17 #include <asm/uaccess.h> 18 #include <asm/system.h> 19 #include <linux/bitops.h> 20 #include <linux/capability.h> 21 #include <linux/types.h> 22 #include <linux/kernel.h> 23 #include <linux/mm.h> 24 #include <linux/string.h> 25 #include <linux/socket.h> 26 #include <linux/sockios.h> 27 #include <linux/errno.h> 28 #include <linux/in.h> 29 #include <linux/inet.h> 30 #include <linux/inetdevice.h> 31 #include <linux/netdevice.h> 32 #include <linux/if_addr.h> 33 #include <linux/if_arp.h> 34 #include <linux/skbuff.h> 35 #include <linux/init.h> 36 #include <linux/list.h> 37 38 #include <net/ip.h> 39 #include <net/protocol.h> 40 #include <net/route.h> 41 #include <net/tcp.h> 42 #include <net/sock.h> 43 #include <net/arp.h> 44 #include <net/ip_fib.h> 45 #include <net/rtnetlink.h> 46 47 #ifndef CONFIG_IP_MULTIPLE_TABLES 48 49 static int __net_init fib4_rules_init(struct net *net) 50 { 51 struct fib_table *local_table, *main_table; 52 53 local_table = fib_hash_table(RT_TABLE_LOCAL); 54 if (local_table == NULL) 55 return -ENOMEM; 56 57 main_table = fib_hash_table(RT_TABLE_MAIN); 58 if (main_table == NULL) 59 goto fail; 60 61 hlist_add_head_rcu(&local_table->tb_hlist, 62 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]); 63 hlist_add_head_rcu(&main_table->tb_hlist, 64 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]); 65 return 0; 66 67 fail: 68 kfree(local_table); 69 return -ENOMEM; 70 } 71 #else 72 73 struct fib_table *fib_new_table(struct net *net, u32 id) 74 { 75 struct fib_table *tb; 76 unsigned int h; 77 78 if (id == 0) 79 id = RT_TABLE_MAIN; 80 tb = fib_get_table(net, id); 81 if (tb) 82 return tb; 83 84 tb = fib_hash_table(id); 85 if (!tb) 86 return NULL; 87 h = id & (FIB_TABLE_HASHSZ - 1); 88 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); 89 return tb; 90 } 91 92 struct fib_table *fib_get_table(struct net *net, u32 id) 93 { 94 struct fib_table *tb; 95 struct hlist_node *node; 96 struct hlist_head *head; 97 unsigned int h; 98 99 if (id == 0) 100 id = RT_TABLE_MAIN; 101 h = id & (FIB_TABLE_HASHSZ - 1); 102 103 rcu_read_lock(); 104 head = &net->ipv4.fib_table_hash[h]; 105 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 106 if (tb->tb_id == id) { 107 rcu_read_unlock(); 108 return tb; 109 } 110 } 111 rcu_read_unlock(); 112 return NULL; 113 } 114 #endif /* CONFIG_IP_MULTIPLE_TABLES */ 115 116 void fib_select_default(struct net *net, 117 const struct flowi *flp, struct fib_result *res) 118 { 119 struct fib_table *tb; 120 int table = RT_TABLE_MAIN; 121 #ifdef CONFIG_IP_MULTIPLE_TABLES 122 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL) 123 return; 124 table = res->r->table; 125 #endif 126 tb = fib_get_table(net, table); 127 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 128 tb->tb_select_default(tb, flp, res); 129 } 130 131 static void fib_flush(struct net *net) 132 { 133 int flushed = 0; 134 struct fib_table *tb; 135 struct hlist_node *node; 136 struct hlist_head *head; 137 unsigned int h; 138 139 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 140 head = &net->ipv4.fib_table_hash[h]; 141 hlist_for_each_entry(tb, node, head, tb_hlist) 142 flushed += tb->tb_flush(tb); 143 } 144 145 if (flushed) 146 rt_cache_flush(net, -1); 147 } 148 149 /* 150 * Find the first device with a given source address. 151 */ 152 153 struct net_device * ip_dev_find(struct net *net, __be32 addr) 154 { 155 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 156 struct fib_result res; 157 struct net_device *dev = NULL; 158 struct fib_table *local_table; 159 160 #ifdef CONFIG_IP_MULTIPLE_TABLES 161 res.r = NULL; 162 #endif 163 164 local_table = fib_get_table(net, RT_TABLE_LOCAL); 165 if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) 166 return NULL; 167 if (res.type != RTN_LOCAL) 168 goto out; 169 dev = FIB_RES_DEV(res); 170 171 if (dev) 172 dev_hold(dev); 173 out: 174 fib_res_put(&res); 175 return dev; 176 } 177 178 /* 179 * Find address type as if only "dev" was present in the system. If 180 * on_dev is NULL then all interfaces are taken into consideration. 181 */ 182 static inline unsigned __inet_dev_addr_type(struct net *net, 183 const struct net_device *dev, 184 __be32 addr) 185 { 186 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 187 struct fib_result res; 188 unsigned ret = RTN_BROADCAST; 189 struct fib_table *local_table; 190 191 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) 192 return RTN_BROADCAST; 193 if (ipv4_is_multicast(addr)) 194 return RTN_MULTICAST; 195 196 #ifdef CONFIG_IP_MULTIPLE_TABLES 197 res.r = NULL; 198 #endif 199 200 local_table = fib_get_table(net, RT_TABLE_LOCAL); 201 if (local_table) { 202 ret = RTN_UNICAST; 203 if (!local_table->tb_lookup(local_table, &fl, &res)) { 204 if (!dev || dev == res.fi->fib_dev) 205 ret = res.type; 206 fib_res_put(&res); 207 } 208 } 209 return ret; 210 } 211 212 unsigned int inet_addr_type(struct net *net, __be32 addr) 213 { 214 return __inet_dev_addr_type(net, NULL, addr); 215 } 216 217 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 218 __be32 addr) 219 { 220 return __inet_dev_addr_type(net, dev, addr); 221 } 222 223 /* Given (packet source, input interface) and optional (dst, oif, tos): 224 - (main) check, that source is valid i.e. not broadcast or our local 225 address. 226 - figure out what "logical" interface this packet arrived 227 and calculate "specific destination" address. 228 - check, that packet arrived from expected physical interface. 229 */ 230 231 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 232 struct net_device *dev, __be32 *spec_dst, u32 *itag) 233 { 234 struct in_device *in_dev; 235 struct flowi fl = { .nl_u = { .ip4_u = 236 { .daddr = src, 237 .saddr = dst, 238 .tos = tos } }, 239 .iif = oif }; 240 struct fib_result res; 241 int no_addr, rpf; 242 int ret; 243 struct net *net; 244 245 no_addr = rpf = 0; 246 rcu_read_lock(); 247 in_dev = __in_dev_get_rcu(dev); 248 if (in_dev) { 249 no_addr = in_dev->ifa_list == NULL; 250 rpf = IN_DEV_RPFILTER(in_dev); 251 } 252 rcu_read_unlock(); 253 254 if (in_dev == NULL) 255 goto e_inval; 256 257 net = dev_net(dev); 258 if (fib_lookup(net, &fl, &res)) 259 goto last_resort; 260 if (res.type != RTN_UNICAST) 261 goto e_inval_res; 262 *spec_dst = FIB_RES_PREFSRC(res); 263 fib_combine_itag(itag, &res); 264 #ifdef CONFIG_IP_ROUTE_MULTIPATH 265 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) 266 #else 267 if (FIB_RES_DEV(res) == dev) 268 #endif 269 { 270 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 271 fib_res_put(&res); 272 return ret; 273 } 274 fib_res_put(&res); 275 if (no_addr) 276 goto last_resort; 277 if (rpf == 1) 278 goto e_inval; 279 fl.oif = dev->ifindex; 280 281 ret = 0; 282 if (fib_lookup(net, &fl, &res) == 0) { 283 if (res.type == RTN_UNICAST) { 284 *spec_dst = FIB_RES_PREFSRC(res); 285 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 286 } 287 fib_res_put(&res); 288 } 289 return ret; 290 291 last_resort: 292 if (rpf) 293 goto e_inval; 294 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 295 *itag = 0; 296 return 0; 297 298 e_inval_res: 299 fib_res_put(&res); 300 e_inval: 301 return -EINVAL; 302 } 303 304 static inline __be32 sk_extract_addr(struct sockaddr *addr) 305 { 306 return ((struct sockaddr_in *) addr)->sin_addr.s_addr; 307 } 308 309 static int put_rtax(struct nlattr *mx, int len, int type, u32 value) 310 { 311 struct nlattr *nla; 312 313 nla = (struct nlattr *) ((char *) mx + len); 314 nla->nla_type = type; 315 nla->nla_len = nla_attr_size(4); 316 *(u32 *) nla_data(nla) = value; 317 318 return len + nla_total_size(4); 319 } 320 321 static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, 322 struct fib_config *cfg) 323 { 324 __be32 addr; 325 int plen; 326 327 memset(cfg, 0, sizeof(*cfg)); 328 cfg->fc_nlinfo.nl_net = net; 329 330 if (rt->rt_dst.sa_family != AF_INET) 331 return -EAFNOSUPPORT; 332 333 /* 334 * Check mask for validity: 335 * a) it must be contiguous. 336 * b) destination must have all host bits clear. 337 * c) if application forgot to set correct family (AF_INET), 338 * reject request unless it is absolutely clear i.e. 339 * both family and mask are zero. 340 */ 341 plen = 32; 342 addr = sk_extract_addr(&rt->rt_dst); 343 if (!(rt->rt_flags & RTF_HOST)) { 344 __be32 mask = sk_extract_addr(&rt->rt_genmask); 345 346 if (rt->rt_genmask.sa_family != AF_INET) { 347 if (mask || rt->rt_genmask.sa_family) 348 return -EAFNOSUPPORT; 349 } 350 351 if (bad_mask(mask, addr)) 352 return -EINVAL; 353 354 plen = inet_mask_len(mask); 355 } 356 357 cfg->fc_dst_len = plen; 358 cfg->fc_dst = addr; 359 360 if (cmd != SIOCDELRT) { 361 cfg->fc_nlflags = NLM_F_CREATE; 362 cfg->fc_protocol = RTPROT_BOOT; 363 } 364 365 if (rt->rt_metric) 366 cfg->fc_priority = rt->rt_metric - 1; 367 368 if (rt->rt_flags & RTF_REJECT) { 369 cfg->fc_scope = RT_SCOPE_HOST; 370 cfg->fc_type = RTN_UNREACHABLE; 371 return 0; 372 } 373 374 cfg->fc_scope = RT_SCOPE_NOWHERE; 375 cfg->fc_type = RTN_UNICAST; 376 377 if (rt->rt_dev) { 378 char *colon; 379 struct net_device *dev; 380 char devname[IFNAMSIZ]; 381 382 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) 383 return -EFAULT; 384 385 devname[IFNAMSIZ-1] = 0; 386 colon = strchr(devname, ':'); 387 if (colon) 388 *colon = 0; 389 dev = __dev_get_by_name(net, devname); 390 if (!dev) 391 return -ENODEV; 392 cfg->fc_oif = dev->ifindex; 393 if (colon) { 394 struct in_ifaddr *ifa; 395 struct in_device *in_dev = __in_dev_get_rtnl(dev); 396 if (!in_dev) 397 return -ENODEV; 398 *colon = ':'; 399 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) 400 if (strcmp(ifa->ifa_label, devname) == 0) 401 break; 402 if (ifa == NULL) 403 return -ENODEV; 404 cfg->fc_prefsrc = ifa->ifa_local; 405 } 406 } 407 408 addr = sk_extract_addr(&rt->rt_gateway); 409 if (rt->rt_gateway.sa_family == AF_INET && addr) { 410 cfg->fc_gw = addr; 411 if (rt->rt_flags & RTF_GATEWAY && 412 inet_addr_type(net, addr) == RTN_UNICAST) 413 cfg->fc_scope = RT_SCOPE_UNIVERSE; 414 } 415 416 if (cmd == SIOCDELRT) 417 return 0; 418 419 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) 420 return -EINVAL; 421 422 if (cfg->fc_scope == RT_SCOPE_NOWHERE) 423 cfg->fc_scope = RT_SCOPE_LINK; 424 425 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { 426 struct nlattr *mx; 427 int len = 0; 428 429 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); 430 if (mx == NULL) 431 return -ENOMEM; 432 433 if (rt->rt_flags & RTF_MTU) 434 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); 435 436 if (rt->rt_flags & RTF_WINDOW) 437 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); 438 439 if (rt->rt_flags & RTF_IRTT) 440 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); 441 442 cfg->fc_mx = mx; 443 cfg->fc_mx_len = len; 444 } 445 446 return 0; 447 } 448 449 /* 450 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 451 */ 452 453 int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) 454 { 455 struct fib_config cfg; 456 struct rtentry rt; 457 int err; 458 459 switch (cmd) { 460 case SIOCADDRT: /* Add a route */ 461 case SIOCDELRT: /* Delete a route */ 462 if (!capable(CAP_NET_ADMIN)) 463 return -EPERM; 464 465 if (copy_from_user(&rt, arg, sizeof(rt))) 466 return -EFAULT; 467 468 rtnl_lock(); 469 err = rtentry_to_fib_config(net, cmd, &rt, &cfg); 470 if (err == 0) { 471 struct fib_table *tb; 472 473 if (cmd == SIOCDELRT) { 474 tb = fib_get_table(net, cfg.fc_table); 475 if (tb) 476 err = tb->tb_delete(tb, &cfg); 477 else 478 err = -ESRCH; 479 } else { 480 tb = fib_new_table(net, cfg.fc_table); 481 if (tb) 482 err = tb->tb_insert(tb, &cfg); 483 else 484 err = -ENOBUFS; 485 } 486 487 /* allocated by rtentry_to_fib_config() */ 488 kfree(cfg.fc_mx); 489 } 490 rtnl_unlock(); 491 return err; 492 } 493 return -EINVAL; 494 } 495 496 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { 497 [RTA_DST] = { .type = NLA_U32 }, 498 [RTA_SRC] = { .type = NLA_U32 }, 499 [RTA_IIF] = { .type = NLA_U32 }, 500 [RTA_OIF] = { .type = NLA_U32 }, 501 [RTA_GATEWAY] = { .type = NLA_U32 }, 502 [RTA_PRIORITY] = { .type = NLA_U32 }, 503 [RTA_PREFSRC] = { .type = NLA_U32 }, 504 [RTA_METRICS] = { .type = NLA_NESTED }, 505 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 506 [RTA_FLOW] = { .type = NLA_U32 }, 507 }; 508 509 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, 510 struct nlmsghdr *nlh, struct fib_config *cfg) 511 { 512 struct nlattr *attr; 513 int err, remaining; 514 struct rtmsg *rtm; 515 516 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); 517 if (err < 0) 518 goto errout; 519 520 memset(cfg, 0, sizeof(*cfg)); 521 522 rtm = nlmsg_data(nlh); 523 cfg->fc_dst_len = rtm->rtm_dst_len; 524 cfg->fc_tos = rtm->rtm_tos; 525 cfg->fc_table = rtm->rtm_table; 526 cfg->fc_protocol = rtm->rtm_protocol; 527 cfg->fc_scope = rtm->rtm_scope; 528 cfg->fc_type = rtm->rtm_type; 529 cfg->fc_flags = rtm->rtm_flags; 530 cfg->fc_nlflags = nlh->nlmsg_flags; 531 532 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 533 cfg->fc_nlinfo.nlh = nlh; 534 cfg->fc_nlinfo.nl_net = net; 535 536 if (cfg->fc_type > RTN_MAX) { 537 err = -EINVAL; 538 goto errout; 539 } 540 541 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { 542 switch (nla_type(attr)) { 543 case RTA_DST: 544 cfg->fc_dst = nla_get_be32(attr); 545 break; 546 case RTA_OIF: 547 cfg->fc_oif = nla_get_u32(attr); 548 break; 549 case RTA_GATEWAY: 550 cfg->fc_gw = nla_get_be32(attr); 551 break; 552 case RTA_PRIORITY: 553 cfg->fc_priority = nla_get_u32(attr); 554 break; 555 case RTA_PREFSRC: 556 cfg->fc_prefsrc = nla_get_be32(attr); 557 break; 558 case RTA_METRICS: 559 cfg->fc_mx = nla_data(attr); 560 cfg->fc_mx_len = nla_len(attr); 561 break; 562 case RTA_MULTIPATH: 563 cfg->fc_mp = nla_data(attr); 564 cfg->fc_mp_len = nla_len(attr); 565 break; 566 case RTA_FLOW: 567 cfg->fc_flow = nla_get_u32(attr); 568 break; 569 case RTA_TABLE: 570 cfg->fc_table = nla_get_u32(attr); 571 break; 572 } 573 } 574 575 return 0; 576 errout: 577 return err; 578 } 579 580 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 581 { 582 struct net *net = sock_net(skb->sk); 583 struct fib_config cfg; 584 struct fib_table *tb; 585 int err; 586 587 err = rtm_to_fib_config(net, skb, nlh, &cfg); 588 if (err < 0) 589 goto errout; 590 591 tb = fib_get_table(net, cfg.fc_table); 592 if (tb == NULL) { 593 err = -ESRCH; 594 goto errout; 595 } 596 597 err = tb->tb_delete(tb, &cfg); 598 errout: 599 return err; 600 } 601 602 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 603 { 604 struct net *net = sock_net(skb->sk); 605 struct fib_config cfg; 606 struct fib_table *tb; 607 int err; 608 609 err = rtm_to_fib_config(net, skb, nlh, &cfg); 610 if (err < 0) 611 goto errout; 612 613 tb = fib_new_table(net, cfg.fc_table); 614 if (tb == NULL) { 615 err = -ENOBUFS; 616 goto errout; 617 } 618 619 err = tb->tb_insert(tb, &cfg); 620 errout: 621 return err; 622 } 623 624 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 625 { 626 struct net *net = sock_net(skb->sk); 627 unsigned int h, s_h; 628 unsigned int e = 0, s_e; 629 struct fib_table *tb; 630 struct hlist_node *node; 631 struct hlist_head *head; 632 int dumped = 0; 633 634 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && 635 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) 636 return ip_rt_dump(skb, cb); 637 638 s_h = cb->args[0]; 639 s_e = cb->args[1]; 640 641 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { 642 e = 0; 643 head = &net->ipv4.fib_table_hash[h]; 644 hlist_for_each_entry(tb, node, head, tb_hlist) { 645 if (e < s_e) 646 goto next; 647 if (dumped) 648 memset(&cb->args[2], 0, sizeof(cb->args) - 649 2 * sizeof(cb->args[0])); 650 if (tb->tb_dump(tb, skb, cb) < 0) 651 goto out; 652 dumped = 1; 653 next: 654 e++; 655 } 656 } 657 out: 658 cb->args[1] = e; 659 cb->args[0] = h; 660 661 return skb->len; 662 } 663 664 /* Prepare and feed intra-kernel routing request. 665 Really, it should be netlink message, but :-( netlink 666 can be not configured, so that we feed it directly 667 to fib engine. It is legal, because all events occur 668 only when netlink is already locked. 669 */ 670 671 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 672 { 673 struct net *net = dev_net(ifa->ifa_dev->dev); 674 struct fib_table *tb; 675 struct fib_config cfg = { 676 .fc_protocol = RTPROT_KERNEL, 677 .fc_type = type, 678 .fc_dst = dst, 679 .fc_dst_len = dst_len, 680 .fc_prefsrc = ifa->ifa_local, 681 .fc_oif = ifa->ifa_dev->dev->ifindex, 682 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, 683 .fc_nlinfo = { 684 .nl_net = net, 685 }, 686 }; 687 688 if (type == RTN_UNICAST) 689 tb = fib_new_table(net, RT_TABLE_MAIN); 690 else 691 tb = fib_new_table(net, RT_TABLE_LOCAL); 692 693 if (tb == NULL) 694 return; 695 696 cfg.fc_table = tb->tb_id; 697 698 if (type != RTN_LOCAL) 699 cfg.fc_scope = RT_SCOPE_LINK; 700 else 701 cfg.fc_scope = RT_SCOPE_HOST; 702 703 if (cmd == RTM_NEWROUTE) 704 tb->tb_insert(tb, &cfg); 705 else 706 tb->tb_delete(tb, &cfg); 707 } 708 709 void fib_add_ifaddr(struct in_ifaddr *ifa) 710 { 711 struct in_device *in_dev = ifa->ifa_dev; 712 struct net_device *dev = in_dev->dev; 713 struct in_ifaddr *prim = ifa; 714 __be32 mask = ifa->ifa_mask; 715 __be32 addr = ifa->ifa_local; 716 __be32 prefix = ifa->ifa_address&mask; 717 718 if (ifa->ifa_flags&IFA_F_SECONDARY) { 719 prim = inet_ifa_byprefix(in_dev, prefix, mask); 720 if (prim == NULL) { 721 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); 722 return; 723 } 724 } 725 726 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); 727 728 if (!(dev->flags&IFF_UP)) 729 return; 730 731 /* Add broadcast address, if it is explicitly assigned. */ 732 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 733 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 734 735 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && 736 (prefix != addr || ifa->ifa_prefixlen < 32)) { 737 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 738 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); 739 740 /* Add network specific broadcasts, when it takes a sense */ 741 if (ifa->ifa_prefixlen < 31) { 742 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); 743 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); 744 } 745 } 746 } 747 748 static void fib_del_ifaddr(struct in_ifaddr *ifa) 749 { 750 struct in_device *in_dev = ifa->ifa_dev; 751 struct net_device *dev = in_dev->dev; 752 struct in_ifaddr *ifa1; 753 struct in_ifaddr *prim = ifa; 754 __be32 brd = ifa->ifa_address|~ifa->ifa_mask; 755 __be32 any = ifa->ifa_address&ifa->ifa_mask; 756 #define LOCAL_OK 1 757 #define BRD_OK 2 758 #define BRD0_OK 4 759 #define BRD1_OK 8 760 unsigned ok = 0; 761 762 if (!(ifa->ifa_flags&IFA_F_SECONDARY)) 763 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 764 RTN_UNICAST, any, ifa->ifa_prefixlen, prim); 765 else { 766 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 767 if (prim == NULL) { 768 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); 769 return; 770 } 771 } 772 773 /* Deletion is more complicated than add. 774 We should take care of not to delete too much :-) 775 776 Scan address list to be sure that addresses are really gone. 777 */ 778 779 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 780 if (ifa->ifa_local == ifa1->ifa_local) 781 ok |= LOCAL_OK; 782 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 783 ok |= BRD_OK; 784 if (brd == ifa1->ifa_broadcast) 785 ok |= BRD1_OK; 786 if (any == ifa1->ifa_broadcast) 787 ok |= BRD0_OK; 788 } 789 790 if (!(ok&BRD_OK)) 791 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 792 if (!(ok&BRD1_OK)) 793 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 794 if (!(ok&BRD0_OK)) 795 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 796 if (!(ok&LOCAL_OK)) { 797 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 798 799 /* Check, that this local address finally disappeared. */ 800 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { 801 /* And the last, but not the least thing. 802 We must flush stray FIB entries. 803 804 First of all, we scan fib_info list searching 805 for stray nexthop entries, then ignite fib_flush. 806 */ 807 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local)) 808 fib_flush(dev_net(dev)); 809 } 810 } 811 #undef LOCAL_OK 812 #undef BRD_OK 813 #undef BRD0_OK 814 #undef BRD1_OK 815 } 816 817 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) 818 { 819 820 struct fib_result res; 821 struct flowi fl = { .mark = frn->fl_mark, 822 .nl_u = { .ip4_u = { .daddr = frn->fl_addr, 823 .tos = frn->fl_tos, 824 .scope = frn->fl_scope } } }; 825 826 #ifdef CONFIG_IP_MULTIPLE_TABLES 827 res.r = NULL; 828 #endif 829 830 frn->err = -ENOENT; 831 if (tb) { 832 local_bh_disable(); 833 834 frn->tb_id = tb->tb_id; 835 frn->err = tb->tb_lookup(tb, &fl, &res); 836 837 if (!frn->err) { 838 frn->prefixlen = res.prefixlen; 839 frn->nh_sel = res.nh_sel; 840 frn->type = res.type; 841 frn->scope = res.scope; 842 fib_res_put(&res); 843 } 844 local_bh_enable(); 845 } 846 } 847 848 static void nl_fib_input(struct sk_buff *skb) 849 { 850 struct net *net; 851 struct fib_result_nl *frn; 852 struct nlmsghdr *nlh; 853 struct fib_table *tb; 854 u32 pid; 855 856 net = sock_net(skb->sk); 857 nlh = nlmsg_hdr(skb); 858 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 859 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 860 return; 861 862 skb = skb_clone(skb, GFP_KERNEL); 863 if (skb == NULL) 864 return; 865 nlh = nlmsg_hdr(skb); 866 867 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 868 tb = fib_get_table(net, frn->tb_id_in); 869 870 nl_fib_lookup(frn, tb); 871 872 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 873 NETLINK_CB(skb).pid = 0; /* from kernel */ 874 NETLINK_CB(skb).dst_group = 0; /* unicast */ 875 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); 876 } 877 878 static int nl_fib_lookup_init(struct net *net) 879 { 880 struct sock *sk; 881 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, 882 nl_fib_input, NULL, THIS_MODULE); 883 if (sk == NULL) 884 return -EAFNOSUPPORT; 885 net->ipv4.fibnl = sk; 886 return 0; 887 } 888 889 static void nl_fib_lookup_exit(struct net *net) 890 { 891 netlink_kernel_release(net->ipv4.fibnl); 892 net->ipv4.fibnl = NULL; 893 } 894 895 static void fib_disable_ip(struct net_device *dev, int force) 896 { 897 if (fib_sync_down_dev(dev, force)) 898 fib_flush(dev_net(dev)); 899 rt_cache_flush(dev_net(dev), 0); 900 arp_ifdown(dev); 901 } 902 903 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 904 { 905 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; 906 struct net_device *dev = ifa->ifa_dev->dev; 907 908 switch (event) { 909 case NETDEV_UP: 910 fib_add_ifaddr(ifa); 911 #ifdef CONFIG_IP_ROUTE_MULTIPATH 912 fib_sync_up(dev); 913 #endif 914 rt_cache_flush(dev_net(dev), -1); 915 break; 916 case NETDEV_DOWN: 917 fib_del_ifaddr(ifa); 918 if (ifa->ifa_dev->ifa_list == NULL) { 919 /* Last address was deleted from this interface. 920 Disable IP. 921 */ 922 fib_disable_ip(dev, 1); 923 } else { 924 rt_cache_flush(dev_net(dev), -1); 925 } 926 break; 927 } 928 return NOTIFY_DONE; 929 } 930 931 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 932 { 933 struct net_device *dev = ptr; 934 struct in_device *in_dev = __in_dev_get_rtnl(dev); 935 936 if (event == NETDEV_UNREGISTER) { 937 fib_disable_ip(dev, 2); 938 return NOTIFY_DONE; 939 } 940 941 if (!in_dev) 942 return NOTIFY_DONE; 943 944 switch (event) { 945 case NETDEV_UP: 946 for_ifa(in_dev) { 947 fib_add_ifaddr(ifa); 948 } endfor_ifa(in_dev); 949 #ifdef CONFIG_IP_ROUTE_MULTIPATH 950 fib_sync_up(dev); 951 #endif 952 rt_cache_flush(dev_net(dev), -1); 953 break; 954 case NETDEV_DOWN: 955 fib_disable_ip(dev, 0); 956 break; 957 case NETDEV_CHANGEMTU: 958 case NETDEV_CHANGE: 959 rt_cache_flush(dev_net(dev), 0); 960 break; 961 } 962 return NOTIFY_DONE; 963 } 964 965 static struct notifier_block fib_inetaddr_notifier = { 966 .notifier_call = fib_inetaddr_event, 967 }; 968 969 static struct notifier_block fib_netdev_notifier = { 970 .notifier_call = fib_netdev_event, 971 }; 972 973 static int __net_init ip_fib_net_init(struct net *net) 974 { 975 int err; 976 unsigned int i; 977 978 net->ipv4.fib_table_hash = kzalloc( 979 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL); 980 if (net->ipv4.fib_table_hash == NULL) 981 return -ENOMEM; 982 983 for (i = 0; i < FIB_TABLE_HASHSZ; i++) 984 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]); 985 986 err = fib4_rules_init(net); 987 if (err < 0) 988 goto fail; 989 return 0; 990 991 fail: 992 kfree(net->ipv4.fib_table_hash); 993 return err; 994 } 995 996 static void __net_exit ip_fib_net_exit(struct net *net) 997 { 998 unsigned int i; 999 1000 #ifdef CONFIG_IP_MULTIPLE_TABLES 1001 fib4_rules_exit(net); 1002 #endif 1003 1004 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1005 struct fib_table *tb; 1006 struct hlist_head *head; 1007 struct hlist_node *node, *tmp; 1008 1009 head = &net->ipv4.fib_table_hash[i]; 1010 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 1011 hlist_del(node); 1012 tb->tb_flush(tb); 1013 kfree(tb); 1014 } 1015 } 1016 kfree(net->ipv4.fib_table_hash); 1017 } 1018 1019 static int __net_init fib_net_init(struct net *net) 1020 { 1021 int error; 1022 1023 error = ip_fib_net_init(net); 1024 if (error < 0) 1025 goto out; 1026 error = nl_fib_lookup_init(net); 1027 if (error < 0) 1028 goto out_nlfl; 1029 error = fib_proc_init(net); 1030 if (error < 0) 1031 goto out_proc; 1032 out: 1033 return error; 1034 1035 out_proc: 1036 nl_fib_lookup_exit(net); 1037 out_nlfl: 1038 ip_fib_net_exit(net); 1039 goto out; 1040 } 1041 1042 static void __net_exit fib_net_exit(struct net *net) 1043 { 1044 fib_proc_exit(net); 1045 nl_fib_lookup_exit(net); 1046 ip_fib_net_exit(net); 1047 } 1048 1049 static struct pernet_operations fib_net_ops = { 1050 .init = fib_net_init, 1051 .exit = fib_net_exit, 1052 }; 1053 1054 void __init ip_fib_init(void) 1055 { 1056 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); 1057 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); 1058 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); 1059 1060 register_pernet_subsys(&fib_net_ops); 1061 register_netdevice_notifier(&fib_netdev_notifier); 1062 register_inetaddr_notifier(&fib_inetaddr_notifier); 1063 1064 fib_hash_init(); 1065 } 1066 1067 EXPORT_SYMBOL(inet_addr_type); 1068 EXPORT_SYMBOL(inet_dev_addr_type); 1069 EXPORT_SYMBOL(ip_dev_find); 1070