1 /* 2 * NET3 IP device support routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Derived from the IP parts of dev.c 1.0.19 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 13 * 14 * Additional Authors: 15 * Alan Cox, <gw4pts@gw4pts.ampr.org> 16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 17 * 18 * Changes: 19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr 20 * lists. 21 * Cyrus Durgin: updated for kmod 22 * Matthias Andree: in devinet_ioctl, compare label and 23 * address (4.4BSD alias style support), 24 * fall back to comparing just the label 25 * if no match found. 26 */ 27 28 29 #include <asm/uaccess.h> 30 #include <linux/bitops.h> 31 #include <linux/capability.h> 32 #include <linux/module.h> 33 #include <linux/types.h> 34 #include <linux/kernel.h> 35 #include <linux/string.h> 36 #include <linux/mm.h> 37 #include <linux/socket.h> 38 #include <linux/sockios.h> 39 #include <linux/in.h> 40 #include <linux/errno.h> 41 #include <linux/interrupt.h> 42 #include <linux/if_addr.h> 43 #include <linux/if_ether.h> 44 #include <linux/inet.h> 45 #include <linux/netdevice.h> 46 #include <linux/etherdevice.h> 47 #include <linux/skbuff.h> 48 #include <linux/init.h> 49 #include <linux/notifier.h> 50 #include <linux/inetdevice.h> 51 #include <linux/igmp.h> 52 #include <linux/slab.h> 53 #include <linux/hash.h> 54 #ifdef CONFIG_SYSCTL 55 #include <linux/sysctl.h> 56 #endif 57 #include <linux/kmod.h> 58 #include <linux/netconf.h> 59 60 #include <net/arp.h> 61 #include <net/ip.h> 62 #include <net/route.h> 63 #include <net/ip_fib.h> 64 #include <net/rtnetlink.h> 65 #include <net/net_namespace.h> 66 #include <net/addrconf.h> 67 68 #include "fib_lookup.h" 69 70 static struct ipv4_devconf ipv4_devconf = { 71 .data = { 72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 76 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, 77 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, 78 }, 79 }; 80 81 static struct ipv4_devconf ipv4_devconf_dflt = { 82 .data = { 83 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 84 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 85 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 86 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 87 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, 88 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, 89 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, 90 }, 91 }; 92 93 #define IPV4_DEVCONF_DFLT(net, attr) \ 94 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr) 95 96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { 97 [IFA_LOCAL] = { .type = NLA_U32 }, 98 [IFA_ADDRESS] = { .type = NLA_U32 }, 99 [IFA_BROADCAST] = { .type = NLA_U32 }, 100 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 101 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, 102 [IFA_FLAGS] = { .type = NLA_U32 }, 103 }; 104 105 #define IN4_ADDR_HSIZE_SHIFT 8 106 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) 107 108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; 109 110 static u32 inet_addr_hash(const struct net *net, __be32 addr) 111 { 112 u32 val = (__force u32) addr ^ net_hash_mix(net); 113 114 return hash_32(val, IN4_ADDR_HSIZE_SHIFT); 115 } 116 117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) 118 { 119 u32 hash = inet_addr_hash(net, ifa->ifa_local); 120 121 ASSERT_RTNL(); 122 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); 123 } 124 125 static void inet_hash_remove(struct in_ifaddr *ifa) 126 { 127 ASSERT_RTNL(); 128 hlist_del_init_rcu(&ifa->hash); 129 } 130 131 /** 132 * __ip_dev_find - find the first device with a given source address. 133 * @net: the net namespace 134 * @addr: the source address 135 * @devref: if true, take a reference on the found device 136 * 137 * If a caller uses devref=false, it should be protected by RCU, or RTNL 138 */ 139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) 140 { 141 u32 hash = inet_addr_hash(net, addr); 142 struct net_device *result = NULL; 143 struct in_ifaddr *ifa; 144 145 rcu_read_lock(); 146 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) { 147 if (ifa->ifa_local == addr) { 148 struct net_device *dev = ifa->ifa_dev->dev; 149 150 if (!net_eq(dev_net(dev), net)) 151 continue; 152 result = dev; 153 break; 154 } 155 } 156 if (!result) { 157 struct flowi4 fl4 = { .daddr = addr }; 158 struct fib_result res = { 0 }; 159 struct fib_table *local; 160 161 /* Fallback to FIB local table so that communication 162 * over loopback subnets work. 163 */ 164 local = fib_get_table(net, RT_TABLE_LOCAL); 165 if (local && 166 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && 167 res.type == RTN_LOCAL) 168 result = FIB_RES_DEV(res); 169 } 170 if (result && devref) 171 dev_hold(result); 172 rcu_read_unlock(); 173 return result; 174 } 175 EXPORT_SYMBOL(__ip_dev_find); 176 177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); 178 179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 181 int destroy); 182 #ifdef CONFIG_SYSCTL 183 static int devinet_sysctl_register(struct in_device *idev); 184 static void devinet_sysctl_unregister(struct in_device *idev); 185 #else 186 static int devinet_sysctl_register(struct in_device *idev) 187 { 188 return 0; 189 } 190 static void devinet_sysctl_unregister(struct in_device *idev) 191 { 192 } 193 #endif 194 195 /* Locks all the inet devices. */ 196 197 static struct in_ifaddr *inet_alloc_ifa(void) 198 { 199 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); 200 } 201 202 static void inet_rcu_free_ifa(struct rcu_head *head) 203 { 204 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head); 205 if (ifa->ifa_dev) 206 in_dev_put(ifa->ifa_dev); 207 kfree(ifa); 208 } 209 210 static void inet_free_ifa(struct in_ifaddr *ifa) 211 { 212 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); 213 } 214 215 void in_dev_finish_destroy(struct in_device *idev) 216 { 217 struct net_device *dev = idev->dev; 218 219 WARN_ON(idev->ifa_list); 220 WARN_ON(idev->mc_list); 221 kfree(rcu_dereference_protected(idev->mc_hash, 1)); 222 #ifdef NET_REFCNT_DEBUG 223 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); 224 #endif 225 dev_put(dev); 226 if (!idev->dead) 227 pr_err("Freeing alive in_device %p\n", idev); 228 else 229 kfree(idev); 230 } 231 EXPORT_SYMBOL(in_dev_finish_destroy); 232 233 static struct in_device *inetdev_init(struct net_device *dev) 234 { 235 struct in_device *in_dev; 236 int err = -ENOMEM; 237 238 ASSERT_RTNL(); 239 240 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); 241 if (!in_dev) 242 goto out; 243 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, 244 sizeof(in_dev->cnf)); 245 in_dev->cnf.sysctl = NULL; 246 in_dev->dev = dev; 247 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl); 248 if (!in_dev->arp_parms) 249 goto out_kfree; 250 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) 251 dev_disable_lro(dev); 252 /* Reference in_dev->dev */ 253 dev_hold(dev); 254 /* Account for reference dev->ip_ptr (below) */ 255 in_dev_hold(in_dev); 256 257 err = devinet_sysctl_register(in_dev); 258 if (err) { 259 in_dev->dead = 1; 260 in_dev_put(in_dev); 261 in_dev = NULL; 262 goto out; 263 } 264 ip_mc_init_dev(in_dev); 265 if (dev->flags & IFF_UP) 266 ip_mc_up(in_dev); 267 268 /* we can receive as soon as ip_ptr is set -- do this last */ 269 rcu_assign_pointer(dev->ip_ptr, in_dev); 270 out: 271 return in_dev ?: ERR_PTR(err); 272 out_kfree: 273 kfree(in_dev); 274 in_dev = NULL; 275 goto out; 276 } 277 278 static void in_dev_rcu_put(struct rcu_head *head) 279 { 280 struct in_device *idev = container_of(head, struct in_device, rcu_head); 281 in_dev_put(idev); 282 } 283 284 static void inetdev_destroy(struct in_device *in_dev) 285 { 286 struct in_ifaddr *ifa; 287 struct net_device *dev; 288 289 ASSERT_RTNL(); 290 291 dev = in_dev->dev; 292 293 in_dev->dead = 1; 294 295 ip_mc_destroy_dev(in_dev); 296 297 while ((ifa = in_dev->ifa_list) != NULL) { 298 inet_del_ifa(in_dev, &in_dev->ifa_list, 0); 299 inet_free_ifa(ifa); 300 } 301 302 RCU_INIT_POINTER(dev->ip_ptr, NULL); 303 304 devinet_sysctl_unregister(in_dev); 305 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 306 arp_ifdown(dev); 307 308 call_rcu(&in_dev->rcu_head, in_dev_rcu_put); 309 } 310 311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) 312 { 313 rcu_read_lock(); 314 for_primary_ifa(in_dev) { 315 if (inet_ifa_match(a, ifa)) { 316 if (!b || inet_ifa_match(b, ifa)) { 317 rcu_read_unlock(); 318 return 1; 319 } 320 } 321 } endfor_ifa(in_dev); 322 rcu_read_unlock(); 323 return 0; 324 } 325 326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 327 int destroy, struct nlmsghdr *nlh, u32 portid) 328 { 329 struct in_ifaddr *promote = NULL; 330 struct in_ifaddr *ifa, *ifa1 = *ifap; 331 struct in_ifaddr *last_prim = in_dev->ifa_list; 332 struct in_ifaddr *prev_prom = NULL; 333 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev); 334 335 ASSERT_RTNL(); 336 337 /* 1. Deleting primary ifaddr forces deletion all secondaries 338 * unless alias promotion is set 339 **/ 340 341 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { 342 struct in_ifaddr **ifap1 = &ifa1->ifa_next; 343 344 while ((ifa = *ifap1) != NULL) { 345 if (!(ifa->ifa_flags & IFA_F_SECONDARY) && 346 ifa1->ifa_scope <= ifa->ifa_scope) 347 last_prim = ifa; 348 349 if (!(ifa->ifa_flags & IFA_F_SECONDARY) || 350 ifa1->ifa_mask != ifa->ifa_mask || 351 !inet_ifa_match(ifa1->ifa_address, ifa)) { 352 ifap1 = &ifa->ifa_next; 353 prev_prom = ifa; 354 continue; 355 } 356 357 if (!do_promote) { 358 inet_hash_remove(ifa); 359 *ifap1 = ifa->ifa_next; 360 361 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid); 362 blocking_notifier_call_chain(&inetaddr_chain, 363 NETDEV_DOWN, ifa); 364 inet_free_ifa(ifa); 365 } else { 366 promote = ifa; 367 break; 368 } 369 } 370 } 371 372 /* On promotion all secondaries from subnet are changing 373 * the primary IP, we must remove all their routes silently 374 * and later to add them back with new prefsrc. Do this 375 * while all addresses are on the device list. 376 */ 377 for (ifa = promote; ifa; ifa = ifa->ifa_next) { 378 if (ifa1->ifa_mask == ifa->ifa_mask && 379 inet_ifa_match(ifa1->ifa_address, ifa)) 380 fib_del_ifaddr(ifa, ifa1); 381 } 382 383 /* 2. Unlink it */ 384 385 *ifap = ifa1->ifa_next; 386 inet_hash_remove(ifa1); 387 388 /* 3. Announce address deletion */ 389 390 /* Send message first, then call notifier. 391 At first sight, FIB update triggered by notifier 392 will refer to already deleted ifaddr, that could confuse 393 netlink listeners. It is not true: look, gated sees 394 that route deleted and if it still thinks that ifaddr 395 is valid, it will try to restore deleted routes... Grr. 396 So that, this order is correct. 397 */ 398 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid); 399 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); 400 401 if (promote) { 402 struct in_ifaddr *next_sec = promote->ifa_next; 403 404 if (prev_prom) { 405 prev_prom->ifa_next = promote->ifa_next; 406 promote->ifa_next = last_prim->ifa_next; 407 last_prim->ifa_next = promote; 408 } 409 410 promote->ifa_flags &= ~IFA_F_SECONDARY; 411 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid); 412 blocking_notifier_call_chain(&inetaddr_chain, 413 NETDEV_UP, promote); 414 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { 415 if (ifa1->ifa_mask != ifa->ifa_mask || 416 !inet_ifa_match(ifa1->ifa_address, ifa)) 417 continue; 418 fib_add_ifaddr(ifa); 419 } 420 421 } 422 if (destroy) 423 inet_free_ifa(ifa1); 424 } 425 426 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 427 int destroy) 428 { 429 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); 430 } 431 432 static void check_lifetime(struct work_struct *work); 433 434 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); 435 436 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, 437 u32 portid) 438 { 439 struct in_device *in_dev = ifa->ifa_dev; 440 struct in_ifaddr *ifa1, **ifap, **last_primary; 441 442 ASSERT_RTNL(); 443 444 if (!ifa->ifa_local) { 445 inet_free_ifa(ifa); 446 return 0; 447 } 448 449 ifa->ifa_flags &= ~IFA_F_SECONDARY; 450 last_primary = &in_dev->ifa_list; 451 452 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; 453 ifap = &ifa1->ifa_next) { 454 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) && 455 ifa->ifa_scope <= ifa1->ifa_scope) 456 last_primary = &ifa1->ifa_next; 457 if (ifa1->ifa_mask == ifa->ifa_mask && 458 inet_ifa_match(ifa1->ifa_address, ifa)) { 459 if (ifa1->ifa_local == ifa->ifa_local) { 460 inet_free_ifa(ifa); 461 return -EEXIST; 462 } 463 if (ifa1->ifa_scope != ifa->ifa_scope) { 464 inet_free_ifa(ifa); 465 return -EINVAL; 466 } 467 ifa->ifa_flags |= IFA_F_SECONDARY; 468 } 469 } 470 471 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { 472 prandom_seed((__force u32) ifa->ifa_local); 473 ifap = last_primary; 474 } 475 476 ifa->ifa_next = *ifap; 477 *ifap = ifa; 478 479 inet_hash_insert(dev_net(in_dev->dev), ifa); 480 481 cancel_delayed_work(&check_lifetime_work); 482 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); 483 484 /* Send message first, then call notifier. 485 Notifier will trigger FIB update, so that 486 listeners of netlink will know about new ifaddr */ 487 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid); 488 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 489 490 return 0; 491 } 492 493 static int inet_insert_ifa(struct in_ifaddr *ifa) 494 { 495 return __inet_insert_ifa(ifa, NULL, 0); 496 } 497 498 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) 499 { 500 struct in_device *in_dev = __in_dev_get_rtnl(dev); 501 502 ASSERT_RTNL(); 503 504 if (!in_dev) { 505 inet_free_ifa(ifa); 506 return -ENOBUFS; 507 } 508 ipv4_devconf_setall(in_dev); 509 neigh_parms_data_state_setall(in_dev->arp_parms); 510 if (ifa->ifa_dev != in_dev) { 511 WARN_ON(ifa->ifa_dev); 512 in_dev_hold(in_dev); 513 ifa->ifa_dev = in_dev; 514 } 515 if (ipv4_is_loopback(ifa->ifa_local)) 516 ifa->ifa_scope = RT_SCOPE_HOST; 517 return inet_insert_ifa(ifa); 518 } 519 520 /* Caller must hold RCU or RTNL : 521 * We dont take a reference on found in_device 522 */ 523 struct in_device *inetdev_by_index(struct net *net, int ifindex) 524 { 525 struct net_device *dev; 526 struct in_device *in_dev = NULL; 527 528 rcu_read_lock(); 529 dev = dev_get_by_index_rcu(net, ifindex); 530 if (dev) 531 in_dev = rcu_dereference_rtnl(dev->ip_ptr); 532 rcu_read_unlock(); 533 return in_dev; 534 } 535 EXPORT_SYMBOL(inetdev_by_index); 536 537 /* Called only from RTNL semaphored context. No locks. */ 538 539 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, 540 __be32 mask) 541 { 542 ASSERT_RTNL(); 543 544 for_primary_ifa(in_dev) { 545 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa)) 546 return ifa; 547 } endfor_ifa(in_dev); 548 return NULL; 549 } 550 551 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) 552 { 553 struct ip_mreqn mreq = { 554 .imr_multiaddr.s_addr = ifa->ifa_address, 555 .imr_ifindex = ifa->ifa_dev->dev->ifindex, 556 }; 557 int ret; 558 559 ASSERT_RTNL(); 560 561 lock_sock(sk); 562 if (join) 563 ret = ip_mc_join_group(sk, &mreq); 564 else 565 ret = ip_mc_leave_group(sk, &mreq); 566 release_sock(sk); 567 568 return ret; 569 } 570 571 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) 572 { 573 struct net *net = sock_net(skb->sk); 574 struct nlattr *tb[IFA_MAX+1]; 575 struct in_device *in_dev; 576 struct ifaddrmsg *ifm; 577 struct in_ifaddr *ifa, **ifap; 578 int err = -EINVAL; 579 580 ASSERT_RTNL(); 581 582 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 583 if (err < 0) 584 goto errout; 585 586 ifm = nlmsg_data(nlh); 587 in_dev = inetdev_by_index(net, ifm->ifa_index); 588 if (!in_dev) { 589 err = -ENODEV; 590 goto errout; 591 } 592 593 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 594 ifap = &ifa->ifa_next) { 595 if (tb[IFA_LOCAL] && 596 ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL])) 597 continue; 598 599 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) 600 continue; 601 602 if (tb[IFA_ADDRESS] && 603 (ifm->ifa_prefixlen != ifa->ifa_prefixlen || 604 !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa))) 605 continue; 606 607 if (ipv4_is_multicast(ifa->ifa_address)) 608 ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa); 609 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); 610 return 0; 611 } 612 613 err = -EADDRNOTAVAIL; 614 errout: 615 return err; 616 } 617 618 #define INFINITY_LIFE_TIME 0xFFFFFFFF 619 620 static void check_lifetime(struct work_struct *work) 621 { 622 unsigned long now, next, next_sec, next_sched; 623 struct in_ifaddr *ifa; 624 struct hlist_node *n; 625 int i; 626 627 now = jiffies; 628 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); 629 630 for (i = 0; i < IN4_ADDR_HSIZE; i++) { 631 bool change_needed = false; 632 633 rcu_read_lock(); 634 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { 635 unsigned long age; 636 637 if (ifa->ifa_flags & IFA_F_PERMANENT) 638 continue; 639 640 /* We try to batch several events at once. */ 641 age = (now - ifa->ifa_tstamp + 642 ADDRCONF_TIMER_FUZZ_MINUS) / HZ; 643 644 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && 645 age >= ifa->ifa_valid_lft) { 646 change_needed = true; 647 } else if (ifa->ifa_preferred_lft == 648 INFINITY_LIFE_TIME) { 649 continue; 650 } else if (age >= ifa->ifa_preferred_lft) { 651 if (time_before(ifa->ifa_tstamp + 652 ifa->ifa_valid_lft * HZ, next)) 653 next = ifa->ifa_tstamp + 654 ifa->ifa_valid_lft * HZ; 655 656 if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) 657 change_needed = true; 658 } else if (time_before(ifa->ifa_tstamp + 659 ifa->ifa_preferred_lft * HZ, 660 next)) { 661 next = ifa->ifa_tstamp + 662 ifa->ifa_preferred_lft * HZ; 663 } 664 } 665 rcu_read_unlock(); 666 if (!change_needed) 667 continue; 668 rtnl_lock(); 669 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) { 670 unsigned long age; 671 672 if (ifa->ifa_flags & IFA_F_PERMANENT) 673 continue; 674 675 /* We try to batch several events at once. */ 676 age = (now - ifa->ifa_tstamp + 677 ADDRCONF_TIMER_FUZZ_MINUS) / HZ; 678 679 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && 680 age >= ifa->ifa_valid_lft) { 681 struct in_ifaddr **ifap; 682 683 for (ifap = &ifa->ifa_dev->ifa_list; 684 *ifap != NULL; ifap = &(*ifap)->ifa_next) { 685 if (*ifap == ifa) { 686 inet_del_ifa(ifa->ifa_dev, 687 ifap, 1); 688 break; 689 } 690 } 691 } else if (ifa->ifa_preferred_lft != 692 INFINITY_LIFE_TIME && 693 age >= ifa->ifa_preferred_lft && 694 !(ifa->ifa_flags & IFA_F_DEPRECATED)) { 695 ifa->ifa_flags |= IFA_F_DEPRECATED; 696 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 697 } 698 } 699 rtnl_unlock(); 700 } 701 702 next_sec = round_jiffies_up(next); 703 next_sched = next; 704 705 /* If rounded timeout is accurate enough, accept it. */ 706 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ)) 707 next_sched = next_sec; 708 709 now = jiffies; 710 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */ 711 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) 712 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; 713 714 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 715 next_sched - now); 716 } 717 718 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, 719 __u32 prefered_lft) 720 { 721 unsigned long timeout; 722 723 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); 724 725 timeout = addrconf_timeout_fixup(valid_lft, HZ); 726 if (addrconf_finite_timeout(timeout)) 727 ifa->ifa_valid_lft = timeout; 728 else 729 ifa->ifa_flags |= IFA_F_PERMANENT; 730 731 timeout = addrconf_timeout_fixup(prefered_lft, HZ); 732 if (addrconf_finite_timeout(timeout)) { 733 if (timeout == 0) 734 ifa->ifa_flags |= IFA_F_DEPRECATED; 735 ifa->ifa_preferred_lft = timeout; 736 } 737 ifa->ifa_tstamp = jiffies; 738 if (!ifa->ifa_cstamp) 739 ifa->ifa_cstamp = ifa->ifa_tstamp; 740 } 741 742 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, 743 __u32 *pvalid_lft, __u32 *pprefered_lft) 744 { 745 struct nlattr *tb[IFA_MAX+1]; 746 struct in_ifaddr *ifa; 747 struct ifaddrmsg *ifm; 748 struct net_device *dev; 749 struct in_device *in_dev; 750 int err; 751 752 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 753 if (err < 0) 754 goto errout; 755 756 ifm = nlmsg_data(nlh); 757 err = -EINVAL; 758 if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL]) 759 goto errout; 760 761 dev = __dev_get_by_index(net, ifm->ifa_index); 762 err = -ENODEV; 763 if (!dev) 764 goto errout; 765 766 in_dev = __in_dev_get_rtnl(dev); 767 err = -ENOBUFS; 768 if (!in_dev) 769 goto errout; 770 771 ifa = inet_alloc_ifa(); 772 if (!ifa) 773 /* 774 * A potential indev allocation can be left alive, it stays 775 * assigned to its device and is destroy with it. 776 */ 777 goto errout; 778 779 ipv4_devconf_setall(in_dev); 780 neigh_parms_data_state_setall(in_dev->arp_parms); 781 in_dev_hold(in_dev); 782 783 if (!tb[IFA_ADDRESS]) 784 tb[IFA_ADDRESS] = tb[IFA_LOCAL]; 785 786 INIT_HLIST_NODE(&ifa->hash); 787 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 788 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 789 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : 790 ifm->ifa_flags; 791 ifa->ifa_scope = ifm->ifa_scope; 792 ifa->ifa_dev = in_dev; 793 794 ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]); 795 ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]); 796 797 if (tb[IFA_BROADCAST]) 798 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]); 799 800 if (tb[IFA_LABEL]) 801 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); 802 else 803 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 804 805 if (tb[IFA_CACHEINFO]) { 806 struct ifa_cacheinfo *ci; 807 808 ci = nla_data(tb[IFA_CACHEINFO]); 809 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { 810 err = -EINVAL; 811 goto errout_free; 812 } 813 *pvalid_lft = ci->ifa_valid; 814 *pprefered_lft = ci->ifa_prefered; 815 } 816 817 return ifa; 818 819 errout_free: 820 inet_free_ifa(ifa); 821 errout: 822 return ERR_PTR(err); 823 } 824 825 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) 826 { 827 struct in_device *in_dev = ifa->ifa_dev; 828 struct in_ifaddr *ifa1, **ifap; 829 830 if (!ifa->ifa_local) 831 return NULL; 832 833 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; 834 ifap = &ifa1->ifa_next) { 835 if (ifa1->ifa_mask == ifa->ifa_mask && 836 inet_ifa_match(ifa1->ifa_address, ifa) && 837 ifa1->ifa_local == ifa->ifa_local) 838 return ifa1; 839 } 840 return NULL; 841 } 842 843 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) 844 { 845 struct net *net = sock_net(skb->sk); 846 struct in_ifaddr *ifa; 847 struct in_ifaddr *ifa_existing; 848 __u32 valid_lft = INFINITY_LIFE_TIME; 849 __u32 prefered_lft = INFINITY_LIFE_TIME; 850 851 ASSERT_RTNL(); 852 853 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft); 854 if (IS_ERR(ifa)) 855 return PTR_ERR(ifa); 856 857 ifa_existing = find_matching_ifa(ifa); 858 if (!ifa_existing) { 859 /* It would be best to check for !NLM_F_CREATE here but 860 * userspace already relies on not having to provide this. 861 */ 862 set_ifa_lifetime(ifa, valid_lft, prefered_lft); 863 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { 864 int ret = ip_mc_config(net->ipv4.mc_autojoin_sk, 865 true, ifa); 866 867 if (ret < 0) { 868 inet_free_ifa(ifa); 869 return ret; 870 } 871 } 872 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); 873 } else { 874 inet_free_ifa(ifa); 875 876 if (nlh->nlmsg_flags & NLM_F_EXCL || 877 !(nlh->nlmsg_flags & NLM_F_REPLACE)) 878 return -EEXIST; 879 ifa = ifa_existing; 880 set_ifa_lifetime(ifa, valid_lft, prefered_lft); 881 cancel_delayed_work(&check_lifetime_work); 882 queue_delayed_work(system_power_efficient_wq, 883 &check_lifetime_work, 0); 884 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); 885 } 886 return 0; 887 } 888 889 /* 890 * Determine a default network mask, based on the IP address. 891 */ 892 893 static int inet_abc_len(__be32 addr) 894 { 895 int rc = -1; /* Something else, probably a multicast. */ 896 897 if (ipv4_is_zeronet(addr)) 898 rc = 0; 899 else { 900 __u32 haddr = ntohl(addr); 901 902 if (IN_CLASSA(haddr)) 903 rc = 8; 904 else if (IN_CLASSB(haddr)) 905 rc = 16; 906 else if (IN_CLASSC(haddr)) 907 rc = 24; 908 } 909 910 return rc; 911 } 912 913 914 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) 915 { 916 struct ifreq ifr; 917 struct sockaddr_in sin_orig; 918 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; 919 struct in_device *in_dev; 920 struct in_ifaddr **ifap = NULL; 921 struct in_ifaddr *ifa = NULL; 922 struct net_device *dev; 923 char *colon; 924 int ret = -EFAULT; 925 int tryaddrmatch = 0; 926 927 /* 928 * Fetch the caller's info block into kernel space 929 */ 930 931 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 932 goto out; 933 ifr.ifr_name[IFNAMSIZ - 1] = 0; 934 935 /* save original address for comparison */ 936 memcpy(&sin_orig, sin, sizeof(*sin)); 937 938 colon = strchr(ifr.ifr_name, ':'); 939 if (colon) 940 *colon = 0; 941 942 dev_load(net, ifr.ifr_name); 943 944 switch (cmd) { 945 case SIOCGIFADDR: /* Get interface address */ 946 case SIOCGIFBRDADDR: /* Get the broadcast address */ 947 case SIOCGIFDSTADDR: /* Get the destination address */ 948 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 949 /* Note that these ioctls will not sleep, 950 so that we do not impose a lock. 951 One day we will be forced to put shlock here (I mean SMP) 952 */ 953 tryaddrmatch = (sin_orig.sin_family == AF_INET); 954 memset(sin, 0, sizeof(*sin)); 955 sin->sin_family = AF_INET; 956 break; 957 958 case SIOCSIFFLAGS: 959 ret = -EPERM; 960 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 961 goto out; 962 break; 963 case SIOCSIFADDR: /* Set interface address (and family) */ 964 case SIOCSIFBRDADDR: /* Set the broadcast address */ 965 case SIOCSIFDSTADDR: /* Set the destination address */ 966 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 967 ret = -EPERM; 968 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 969 goto out; 970 ret = -EINVAL; 971 if (sin->sin_family != AF_INET) 972 goto out; 973 break; 974 default: 975 ret = -EINVAL; 976 goto out; 977 } 978 979 rtnl_lock(); 980 981 ret = -ENODEV; 982 dev = __dev_get_by_name(net, ifr.ifr_name); 983 if (!dev) 984 goto done; 985 986 if (colon) 987 *colon = ':'; 988 989 in_dev = __in_dev_get_rtnl(dev); 990 if (in_dev) { 991 if (tryaddrmatch) { 992 /* Matthias Andree */ 993 /* compare label and address (4.4BSD style) */ 994 /* note: we only do this for a limited set of ioctls 995 and only if the original address family was AF_INET. 996 This is checked above. */ 997 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 998 ifap = &ifa->ifa_next) { 999 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 1000 sin_orig.sin_addr.s_addr == 1001 ifa->ifa_local) { 1002 break; /* found */ 1003 } 1004 } 1005 } 1006 /* we didn't get a match, maybe the application is 1007 4.3BSD-style and passed in junk so we fall back to 1008 comparing just the label */ 1009 if (!ifa) { 1010 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 1011 ifap = &ifa->ifa_next) 1012 if (!strcmp(ifr.ifr_name, ifa->ifa_label)) 1013 break; 1014 } 1015 } 1016 1017 ret = -EADDRNOTAVAIL; 1018 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) 1019 goto done; 1020 1021 switch (cmd) { 1022 case SIOCGIFADDR: /* Get interface address */ 1023 sin->sin_addr.s_addr = ifa->ifa_local; 1024 goto rarok; 1025 1026 case SIOCGIFBRDADDR: /* Get the broadcast address */ 1027 sin->sin_addr.s_addr = ifa->ifa_broadcast; 1028 goto rarok; 1029 1030 case SIOCGIFDSTADDR: /* Get the destination address */ 1031 sin->sin_addr.s_addr = ifa->ifa_address; 1032 goto rarok; 1033 1034 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 1035 sin->sin_addr.s_addr = ifa->ifa_mask; 1036 goto rarok; 1037 1038 case SIOCSIFFLAGS: 1039 if (colon) { 1040 ret = -EADDRNOTAVAIL; 1041 if (!ifa) 1042 break; 1043 ret = 0; 1044 if (!(ifr.ifr_flags & IFF_UP)) 1045 inet_del_ifa(in_dev, ifap, 1); 1046 break; 1047 } 1048 ret = dev_change_flags(dev, ifr.ifr_flags); 1049 break; 1050 1051 case SIOCSIFADDR: /* Set interface address (and family) */ 1052 ret = -EINVAL; 1053 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 1054 break; 1055 1056 if (!ifa) { 1057 ret = -ENOBUFS; 1058 ifa = inet_alloc_ifa(); 1059 if (!ifa) 1060 break; 1061 INIT_HLIST_NODE(&ifa->hash); 1062 if (colon) 1063 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); 1064 else 1065 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1066 } else { 1067 ret = 0; 1068 if (ifa->ifa_local == sin->sin_addr.s_addr) 1069 break; 1070 inet_del_ifa(in_dev, ifap, 0); 1071 ifa->ifa_broadcast = 0; 1072 ifa->ifa_scope = 0; 1073 } 1074 1075 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr; 1076 1077 if (!(dev->flags & IFF_POINTOPOINT)) { 1078 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); 1079 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); 1080 if ((dev->flags & IFF_BROADCAST) && 1081 ifa->ifa_prefixlen < 31) 1082 ifa->ifa_broadcast = ifa->ifa_address | 1083 ~ifa->ifa_mask; 1084 } else { 1085 ifa->ifa_prefixlen = 32; 1086 ifa->ifa_mask = inet_make_mask(32); 1087 } 1088 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); 1089 ret = inet_set_ifa(dev, ifa); 1090 break; 1091 1092 case SIOCSIFBRDADDR: /* Set the broadcast address */ 1093 ret = 0; 1094 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) { 1095 inet_del_ifa(in_dev, ifap, 0); 1096 ifa->ifa_broadcast = sin->sin_addr.s_addr; 1097 inet_insert_ifa(ifa); 1098 } 1099 break; 1100 1101 case SIOCSIFDSTADDR: /* Set the destination address */ 1102 ret = 0; 1103 if (ifa->ifa_address == sin->sin_addr.s_addr) 1104 break; 1105 ret = -EINVAL; 1106 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 1107 break; 1108 ret = 0; 1109 inet_del_ifa(in_dev, ifap, 0); 1110 ifa->ifa_address = sin->sin_addr.s_addr; 1111 inet_insert_ifa(ifa); 1112 break; 1113 1114 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 1115 1116 /* 1117 * The mask we set must be legal. 1118 */ 1119 ret = -EINVAL; 1120 if (bad_mask(sin->sin_addr.s_addr, 0)) 1121 break; 1122 ret = 0; 1123 if (ifa->ifa_mask != sin->sin_addr.s_addr) { 1124 __be32 old_mask = ifa->ifa_mask; 1125 inet_del_ifa(in_dev, ifap, 0); 1126 ifa->ifa_mask = sin->sin_addr.s_addr; 1127 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); 1128 1129 /* See if current broadcast address matches 1130 * with current netmask, then recalculate 1131 * the broadcast address. Otherwise it's a 1132 * funny address, so don't touch it since 1133 * the user seems to know what (s)he's doing... 1134 */ 1135 if ((dev->flags & IFF_BROADCAST) && 1136 (ifa->ifa_prefixlen < 31) && 1137 (ifa->ifa_broadcast == 1138 (ifa->ifa_local|~old_mask))) { 1139 ifa->ifa_broadcast = (ifa->ifa_local | 1140 ~sin->sin_addr.s_addr); 1141 } 1142 inet_insert_ifa(ifa); 1143 } 1144 break; 1145 } 1146 done: 1147 rtnl_unlock(); 1148 out: 1149 return ret; 1150 rarok: 1151 rtnl_unlock(); 1152 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0; 1153 goto out; 1154 } 1155 1156 static int inet_gifconf(struct net_device *dev, char __user *buf, int len) 1157 { 1158 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1159 struct in_ifaddr *ifa; 1160 struct ifreq ifr; 1161 int done = 0; 1162 1163 if (!in_dev) 1164 goto out; 1165 1166 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 1167 if (!buf) { 1168 done += sizeof(ifr); 1169 continue; 1170 } 1171 if (len < (int) sizeof(ifr)) 1172 break; 1173 memset(&ifr, 0, sizeof(struct ifreq)); 1174 strcpy(ifr.ifr_name, ifa->ifa_label); 1175 1176 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET; 1177 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = 1178 ifa->ifa_local; 1179 1180 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) { 1181 done = -EFAULT; 1182 break; 1183 } 1184 buf += sizeof(struct ifreq); 1185 len -= sizeof(struct ifreq); 1186 done += sizeof(struct ifreq); 1187 } 1188 out: 1189 return done; 1190 } 1191 1192 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) 1193 { 1194 __be32 addr = 0; 1195 struct in_device *in_dev; 1196 struct net *net = dev_net(dev); 1197 int master_idx; 1198 1199 rcu_read_lock(); 1200 in_dev = __in_dev_get_rcu(dev); 1201 if (!in_dev) 1202 goto no_in_dev; 1203 1204 for_primary_ifa(in_dev) { 1205 if (ifa->ifa_scope > scope) 1206 continue; 1207 if (!dst || inet_ifa_match(dst, ifa)) { 1208 addr = ifa->ifa_local; 1209 break; 1210 } 1211 if (!addr) 1212 addr = ifa->ifa_local; 1213 } endfor_ifa(in_dev); 1214 1215 if (addr) 1216 goto out_unlock; 1217 no_in_dev: 1218 master_idx = l3mdev_master_ifindex_rcu(dev); 1219 1220 /* Not loopback addresses on loopback should be preferred 1221 in this case. It is important that lo is the first interface 1222 in dev_base list. 1223 */ 1224 for_each_netdev_rcu(net, dev) { 1225 if (l3mdev_master_ifindex_rcu(dev) != master_idx) 1226 continue; 1227 1228 in_dev = __in_dev_get_rcu(dev); 1229 if (!in_dev) 1230 continue; 1231 1232 for_primary_ifa(in_dev) { 1233 if (ifa->ifa_scope != RT_SCOPE_LINK && 1234 ifa->ifa_scope <= scope) { 1235 addr = ifa->ifa_local; 1236 goto out_unlock; 1237 } 1238 } endfor_ifa(in_dev); 1239 } 1240 out_unlock: 1241 rcu_read_unlock(); 1242 return addr; 1243 } 1244 EXPORT_SYMBOL(inet_select_addr); 1245 1246 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, 1247 __be32 local, int scope) 1248 { 1249 int same = 0; 1250 __be32 addr = 0; 1251 1252 for_ifa(in_dev) { 1253 if (!addr && 1254 (local == ifa->ifa_local || !local) && 1255 ifa->ifa_scope <= scope) { 1256 addr = ifa->ifa_local; 1257 if (same) 1258 break; 1259 } 1260 if (!same) { 1261 same = (!local || inet_ifa_match(local, ifa)) && 1262 (!dst || inet_ifa_match(dst, ifa)); 1263 if (same && addr) { 1264 if (local || !dst) 1265 break; 1266 /* Is the selected addr into dst subnet? */ 1267 if (inet_ifa_match(addr, ifa)) 1268 break; 1269 /* No, then can we use new local src? */ 1270 if (ifa->ifa_scope <= scope) { 1271 addr = ifa->ifa_local; 1272 break; 1273 } 1274 /* search for large dst subnet for addr */ 1275 same = 0; 1276 } 1277 } 1278 } endfor_ifa(in_dev); 1279 1280 return same ? addr : 0; 1281 } 1282 1283 /* 1284 * Confirm that local IP address exists using wildcards: 1285 * - net: netns to check, cannot be NULL 1286 * - in_dev: only on this interface, NULL=any interface 1287 * - dst: only in the same subnet as dst, 0=any dst 1288 * - local: address, 0=autoselect the local address 1289 * - scope: maximum allowed scope value for the local address 1290 */ 1291 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, 1292 __be32 dst, __be32 local, int scope) 1293 { 1294 __be32 addr = 0; 1295 struct net_device *dev; 1296 1297 if (in_dev) 1298 return confirm_addr_indev(in_dev, dst, local, scope); 1299 1300 rcu_read_lock(); 1301 for_each_netdev_rcu(net, dev) { 1302 in_dev = __in_dev_get_rcu(dev); 1303 if (in_dev) { 1304 addr = confirm_addr_indev(in_dev, dst, local, scope); 1305 if (addr) 1306 break; 1307 } 1308 } 1309 rcu_read_unlock(); 1310 1311 return addr; 1312 } 1313 EXPORT_SYMBOL(inet_confirm_addr); 1314 1315 /* 1316 * Device notifier 1317 */ 1318 1319 int register_inetaddr_notifier(struct notifier_block *nb) 1320 { 1321 return blocking_notifier_chain_register(&inetaddr_chain, nb); 1322 } 1323 EXPORT_SYMBOL(register_inetaddr_notifier); 1324 1325 int unregister_inetaddr_notifier(struct notifier_block *nb) 1326 { 1327 return blocking_notifier_chain_unregister(&inetaddr_chain, nb); 1328 } 1329 EXPORT_SYMBOL(unregister_inetaddr_notifier); 1330 1331 /* Rename ifa_labels for a device name change. Make some effort to preserve 1332 * existing alias numbering and to create unique labels if possible. 1333 */ 1334 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) 1335 { 1336 struct in_ifaddr *ifa; 1337 int named = 0; 1338 1339 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 1340 char old[IFNAMSIZ], *dot; 1341 1342 memcpy(old, ifa->ifa_label, IFNAMSIZ); 1343 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1344 if (named++ == 0) 1345 goto skip; 1346 dot = strchr(old, ':'); 1347 if (!dot) { 1348 sprintf(old, ":%d", named); 1349 dot = old; 1350 } 1351 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) 1352 strcat(ifa->ifa_label, dot); 1353 else 1354 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 1355 skip: 1356 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 1357 } 1358 } 1359 1360 static bool inetdev_valid_mtu(unsigned int mtu) 1361 { 1362 return mtu >= 68; 1363 } 1364 1365 static void inetdev_send_gratuitous_arp(struct net_device *dev, 1366 struct in_device *in_dev) 1367 1368 { 1369 struct in_ifaddr *ifa; 1370 1371 for (ifa = in_dev->ifa_list; ifa; 1372 ifa = ifa->ifa_next) { 1373 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1374 ifa->ifa_local, dev, 1375 ifa->ifa_local, NULL, 1376 dev->dev_addr, NULL); 1377 } 1378 } 1379 1380 /* Called only under RTNL semaphore */ 1381 1382 static int inetdev_event(struct notifier_block *this, unsigned long event, 1383 void *ptr) 1384 { 1385 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1386 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1387 1388 ASSERT_RTNL(); 1389 1390 if (!in_dev) { 1391 if (event == NETDEV_REGISTER) { 1392 in_dev = inetdev_init(dev); 1393 if (IS_ERR(in_dev)) 1394 return notifier_from_errno(PTR_ERR(in_dev)); 1395 if (dev->flags & IFF_LOOPBACK) { 1396 IN_DEV_CONF_SET(in_dev, NOXFRM, 1); 1397 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); 1398 } 1399 } else if (event == NETDEV_CHANGEMTU) { 1400 /* Re-enabling IP */ 1401 if (inetdev_valid_mtu(dev->mtu)) 1402 in_dev = inetdev_init(dev); 1403 } 1404 goto out; 1405 } 1406 1407 switch (event) { 1408 case NETDEV_REGISTER: 1409 pr_debug("%s: bug\n", __func__); 1410 RCU_INIT_POINTER(dev->ip_ptr, NULL); 1411 break; 1412 case NETDEV_UP: 1413 if (!inetdev_valid_mtu(dev->mtu)) 1414 break; 1415 if (dev->flags & IFF_LOOPBACK) { 1416 struct in_ifaddr *ifa = inet_alloc_ifa(); 1417 1418 if (ifa) { 1419 INIT_HLIST_NODE(&ifa->hash); 1420 ifa->ifa_local = 1421 ifa->ifa_address = htonl(INADDR_LOOPBACK); 1422 ifa->ifa_prefixlen = 8; 1423 ifa->ifa_mask = inet_make_mask(8); 1424 in_dev_hold(in_dev); 1425 ifa->ifa_dev = in_dev; 1426 ifa->ifa_scope = RT_SCOPE_HOST; 1427 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1428 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, 1429 INFINITY_LIFE_TIME); 1430 ipv4_devconf_setall(in_dev); 1431 neigh_parms_data_state_setall(in_dev->arp_parms); 1432 inet_insert_ifa(ifa); 1433 } 1434 } 1435 ip_mc_up(in_dev); 1436 /* fall through */ 1437 case NETDEV_CHANGEADDR: 1438 if (!IN_DEV_ARP_NOTIFY(in_dev)) 1439 break; 1440 /* fall through */ 1441 case NETDEV_NOTIFY_PEERS: 1442 /* Send gratuitous ARP to notify of link change */ 1443 inetdev_send_gratuitous_arp(dev, in_dev); 1444 break; 1445 case NETDEV_DOWN: 1446 ip_mc_down(in_dev); 1447 break; 1448 case NETDEV_PRE_TYPE_CHANGE: 1449 ip_mc_unmap(in_dev); 1450 break; 1451 case NETDEV_POST_TYPE_CHANGE: 1452 ip_mc_remap(in_dev); 1453 break; 1454 case NETDEV_CHANGEMTU: 1455 if (inetdev_valid_mtu(dev->mtu)) 1456 break; 1457 /* disable IP when MTU is not enough */ 1458 case NETDEV_UNREGISTER: 1459 inetdev_destroy(in_dev); 1460 break; 1461 case NETDEV_CHANGENAME: 1462 /* Do not notify about label change, this event is 1463 * not interesting to applications using netlink. 1464 */ 1465 inetdev_changename(dev, in_dev); 1466 1467 devinet_sysctl_unregister(in_dev); 1468 devinet_sysctl_register(in_dev); 1469 break; 1470 } 1471 out: 1472 return NOTIFY_DONE; 1473 } 1474 1475 static struct notifier_block ip_netdev_notifier = { 1476 .notifier_call = inetdev_event, 1477 }; 1478 1479 static size_t inet_nlmsg_size(void) 1480 { 1481 return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) 1482 + nla_total_size(4) /* IFA_ADDRESS */ 1483 + nla_total_size(4) /* IFA_LOCAL */ 1484 + nla_total_size(4) /* IFA_BROADCAST */ 1485 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ 1486 + nla_total_size(4) /* IFA_FLAGS */ 1487 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ 1488 } 1489 1490 static inline u32 cstamp_delta(unsigned long cstamp) 1491 { 1492 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; 1493 } 1494 1495 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, 1496 unsigned long tstamp, u32 preferred, u32 valid) 1497 { 1498 struct ifa_cacheinfo ci; 1499 1500 ci.cstamp = cstamp_delta(cstamp); 1501 ci.tstamp = cstamp_delta(tstamp); 1502 ci.ifa_prefered = preferred; 1503 ci.ifa_valid = valid; 1504 1505 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); 1506 } 1507 1508 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, 1509 u32 portid, u32 seq, int event, unsigned int flags) 1510 { 1511 struct ifaddrmsg *ifm; 1512 struct nlmsghdr *nlh; 1513 u32 preferred, valid; 1514 1515 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); 1516 if (!nlh) 1517 return -EMSGSIZE; 1518 1519 ifm = nlmsg_data(nlh); 1520 ifm->ifa_family = AF_INET; 1521 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1522 ifm->ifa_flags = ifa->ifa_flags; 1523 ifm->ifa_scope = ifa->ifa_scope; 1524 ifm->ifa_index = ifa->ifa_dev->dev->ifindex; 1525 1526 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { 1527 preferred = ifa->ifa_preferred_lft; 1528 valid = ifa->ifa_valid_lft; 1529 if (preferred != INFINITY_LIFE_TIME) { 1530 long tval = (jiffies - ifa->ifa_tstamp) / HZ; 1531 1532 if (preferred > tval) 1533 preferred -= tval; 1534 else 1535 preferred = 0; 1536 if (valid != INFINITY_LIFE_TIME) { 1537 if (valid > tval) 1538 valid -= tval; 1539 else 1540 valid = 0; 1541 } 1542 } 1543 } else { 1544 preferred = INFINITY_LIFE_TIME; 1545 valid = INFINITY_LIFE_TIME; 1546 } 1547 if ((ifa->ifa_address && 1548 nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) || 1549 (ifa->ifa_local && 1550 nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) || 1551 (ifa->ifa_broadcast && 1552 nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || 1553 (ifa->ifa_label[0] && 1554 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || 1555 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || 1556 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, 1557 preferred, valid)) 1558 goto nla_put_failure; 1559 1560 nlmsg_end(skb, nlh); 1561 return 0; 1562 1563 nla_put_failure: 1564 nlmsg_cancel(skb, nlh); 1565 return -EMSGSIZE; 1566 } 1567 1568 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1569 { 1570 struct net *net = sock_net(skb->sk); 1571 int h, s_h; 1572 int idx, s_idx; 1573 int ip_idx, s_ip_idx; 1574 struct net_device *dev; 1575 struct in_device *in_dev; 1576 struct in_ifaddr *ifa; 1577 struct hlist_head *head; 1578 1579 s_h = cb->args[0]; 1580 s_idx = idx = cb->args[1]; 1581 s_ip_idx = ip_idx = cb->args[2]; 1582 1583 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1584 idx = 0; 1585 head = &net->dev_index_head[h]; 1586 rcu_read_lock(); 1587 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ 1588 net->dev_base_seq; 1589 hlist_for_each_entry_rcu(dev, head, index_hlist) { 1590 if (idx < s_idx) 1591 goto cont; 1592 if (h > s_h || idx > s_idx) 1593 s_ip_idx = 0; 1594 in_dev = __in_dev_get_rcu(dev); 1595 if (!in_dev) 1596 goto cont; 1597 1598 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; 1599 ifa = ifa->ifa_next, ip_idx++) { 1600 if (ip_idx < s_ip_idx) 1601 continue; 1602 if (inet_fill_ifaddr(skb, ifa, 1603 NETLINK_CB(cb->skb).portid, 1604 cb->nlh->nlmsg_seq, 1605 RTM_NEWADDR, NLM_F_MULTI) < 0) { 1606 rcu_read_unlock(); 1607 goto done; 1608 } 1609 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1610 } 1611 cont: 1612 idx++; 1613 } 1614 rcu_read_unlock(); 1615 } 1616 1617 done: 1618 cb->args[0] = h; 1619 cb->args[1] = idx; 1620 cb->args[2] = ip_idx; 1621 1622 return skb->len; 1623 } 1624 1625 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, 1626 u32 portid) 1627 { 1628 struct sk_buff *skb; 1629 u32 seq = nlh ? nlh->nlmsg_seq : 0; 1630 int err = -ENOBUFS; 1631 struct net *net; 1632 1633 net = dev_net(ifa->ifa_dev->dev); 1634 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); 1635 if (!skb) 1636 goto errout; 1637 1638 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0); 1639 if (err < 0) { 1640 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ 1641 WARN_ON(err == -EMSGSIZE); 1642 kfree_skb(skb); 1643 goto errout; 1644 } 1645 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); 1646 return; 1647 errout: 1648 if (err < 0) 1649 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); 1650 } 1651 1652 static size_t inet_get_link_af_size(const struct net_device *dev, 1653 u32 ext_filter_mask) 1654 { 1655 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); 1656 1657 if (!in_dev) 1658 return 0; 1659 1660 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ 1661 } 1662 1663 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev, 1664 u32 ext_filter_mask) 1665 { 1666 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); 1667 struct nlattr *nla; 1668 int i; 1669 1670 if (!in_dev) 1671 return -ENODATA; 1672 1673 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); 1674 if (!nla) 1675 return -EMSGSIZE; 1676 1677 for (i = 0; i < IPV4_DEVCONF_MAX; i++) 1678 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; 1679 1680 return 0; 1681 } 1682 1683 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { 1684 [IFLA_INET_CONF] = { .type = NLA_NESTED }, 1685 }; 1686 1687 static int inet_validate_link_af(const struct net_device *dev, 1688 const struct nlattr *nla) 1689 { 1690 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1691 int err, rem; 1692 1693 if (dev && !__in_dev_get_rtnl(dev)) 1694 return -EAFNOSUPPORT; 1695 1696 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); 1697 if (err < 0) 1698 return err; 1699 1700 if (tb[IFLA_INET_CONF]) { 1701 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { 1702 int cfgid = nla_type(a); 1703 1704 if (nla_len(a) < 4) 1705 return -EINVAL; 1706 1707 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) 1708 return -EINVAL; 1709 } 1710 } 1711 1712 return 0; 1713 } 1714 1715 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) 1716 { 1717 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1718 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1719 int rem; 1720 1721 if (!in_dev) 1722 return -EAFNOSUPPORT; 1723 1724 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0) 1725 BUG(); 1726 1727 if (tb[IFLA_INET_CONF]) { 1728 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) 1729 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); 1730 } 1731 1732 return 0; 1733 } 1734 1735 static int inet_netconf_msgsize_devconf(int type) 1736 { 1737 int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) 1738 + nla_total_size(4); /* NETCONFA_IFINDEX */ 1739 1740 /* type -1 is used for ALL */ 1741 if (type == -1 || type == NETCONFA_FORWARDING) 1742 size += nla_total_size(4); 1743 if (type == -1 || type == NETCONFA_RP_FILTER) 1744 size += nla_total_size(4); 1745 if (type == -1 || type == NETCONFA_MC_FORWARDING) 1746 size += nla_total_size(4); 1747 if (type == -1 || type == NETCONFA_PROXY_NEIGH) 1748 size += nla_total_size(4); 1749 if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) 1750 size += nla_total_size(4); 1751 1752 return size; 1753 } 1754 1755 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, 1756 struct ipv4_devconf *devconf, u32 portid, 1757 u32 seq, int event, unsigned int flags, 1758 int type) 1759 { 1760 struct nlmsghdr *nlh; 1761 struct netconfmsg *ncm; 1762 1763 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), 1764 flags); 1765 if (!nlh) 1766 return -EMSGSIZE; 1767 1768 ncm = nlmsg_data(nlh); 1769 ncm->ncm_family = AF_INET; 1770 1771 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) 1772 goto nla_put_failure; 1773 1774 /* type -1 is used for ALL */ 1775 if ((type == -1 || type == NETCONFA_FORWARDING) && 1776 nla_put_s32(skb, NETCONFA_FORWARDING, 1777 IPV4_DEVCONF(*devconf, FORWARDING)) < 0) 1778 goto nla_put_failure; 1779 if ((type == -1 || type == NETCONFA_RP_FILTER) && 1780 nla_put_s32(skb, NETCONFA_RP_FILTER, 1781 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) 1782 goto nla_put_failure; 1783 if ((type == -1 || type == NETCONFA_MC_FORWARDING) && 1784 nla_put_s32(skb, NETCONFA_MC_FORWARDING, 1785 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) 1786 goto nla_put_failure; 1787 if ((type == -1 || type == NETCONFA_PROXY_NEIGH) && 1788 nla_put_s32(skb, NETCONFA_PROXY_NEIGH, 1789 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) 1790 goto nla_put_failure; 1791 if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && 1792 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, 1793 IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) 1794 goto nla_put_failure; 1795 1796 nlmsg_end(skb, nlh); 1797 return 0; 1798 1799 nla_put_failure: 1800 nlmsg_cancel(skb, nlh); 1801 return -EMSGSIZE; 1802 } 1803 1804 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, 1805 struct ipv4_devconf *devconf) 1806 { 1807 struct sk_buff *skb; 1808 int err = -ENOBUFS; 1809 1810 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC); 1811 if (!skb) 1812 goto errout; 1813 1814 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, 1815 RTM_NEWNETCONF, 0, type); 1816 if (err < 0) { 1817 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ 1818 WARN_ON(err == -EMSGSIZE); 1819 kfree_skb(skb); 1820 goto errout; 1821 } 1822 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC); 1823 return; 1824 errout: 1825 if (err < 0) 1826 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err); 1827 } 1828 1829 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { 1830 [NETCONFA_IFINDEX] = { .len = sizeof(int) }, 1831 [NETCONFA_FORWARDING] = { .len = sizeof(int) }, 1832 [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, 1833 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, 1834 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, 1835 }; 1836 1837 static int inet_netconf_get_devconf(struct sk_buff *in_skb, 1838 struct nlmsghdr *nlh) 1839 { 1840 struct net *net = sock_net(in_skb->sk); 1841 struct nlattr *tb[NETCONFA_MAX+1]; 1842 struct netconfmsg *ncm; 1843 struct sk_buff *skb; 1844 struct ipv4_devconf *devconf; 1845 struct in_device *in_dev; 1846 struct net_device *dev; 1847 int ifindex; 1848 int err; 1849 1850 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, 1851 devconf_ipv4_policy); 1852 if (err < 0) 1853 goto errout; 1854 1855 err = -EINVAL; 1856 if (!tb[NETCONFA_IFINDEX]) 1857 goto errout; 1858 1859 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); 1860 switch (ifindex) { 1861 case NETCONFA_IFINDEX_ALL: 1862 devconf = net->ipv4.devconf_all; 1863 break; 1864 case NETCONFA_IFINDEX_DEFAULT: 1865 devconf = net->ipv4.devconf_dflt; 1866 break; 1867 default: 1868 dev = __dev_get_by_index(net, ifindex); 1869 if (!dev) 1870 goto errout; 1871 in_dev = __in_dev_get_rtnl(dev); 1872 if (!in_dev) 1873 goto errout; 1874 devconf = &in_dev->cnf; 1875 break; 1876 } 1877 1878 err = -ENOBUFS; 1879 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC); 1880 if (!skb) 1881 goto errout; 1882 1883 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 1884 NETLINK_CB(in_skb).portid, 1885 nlh->nlmsg_seq, RTM_NEWNETCONF, 0, 1886 -1); 1887 if (err < 0) { 1888 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ 1889 WARN_ON(err == -EMSGSIZE); 1890 kfree_skb(skb); 1891 goto errout; 1892 } 1893 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 1894 errout: 1895 return err; 1896 } 1897 1898 static int inet_netconf_dump_devconf(struct sk_buff *skb, 1899 struct netlink_callback *cb) 1900 { 1901 struct net *net = sock_net(skb->sk); 1902 int h, s_h; 1903 int idx, s_idx; 1904 struct net_device *dev; 1905 struct in_device *in_dev; 1906 struct hlist_head *head; 1907 1908 s_h = cb->args[0]; 1909 s_idx = idx = cb->args[1]; 1910 1911 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1912 idx = 0; 1913 head = &net->dev_index_head[h]; 1914 rcu_read_lock(); 1915 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ 1916 net->dev_base_seq; 1917 hlist_for_each_entry_rcu(dev, head, index_hlist) { 1918 if (idx < s_idx) 1919 goto cont; 1920 in_dev = __in_dev_get_rcu(dev); 1921 if (!in_dev) 1922 goto cont; 1923 1924 if (inet_netconf_fill_devconf(skb, dev->ifindex, 1925 &in_dev->cnf, 1926 NETLINK_CB(cb->skb).portid, 1927 cb->nlh->nlmsg_seq, 1928 RTM_NEWNETCONF, 1929 NLM_F_MULTI, 1930 -1) < 0) { 1931 rcu_read_unlock(); 1932 goto done; 1933 } 1934 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1935 cont: 1936 idx++; 1937 } 1938 rcu_read_unlock(); 1939 } 1940 if (h == NETDEV_HASHENTRIES) { 1941 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, 1942 net->ipv4.devconf_all, 1943 NETLINK_CB(cb->skb).portid, 1944 cb->nlh->nlmsg_seq, 1945 RTM_NEWNETCONF, NLM_F_MULTI, 1946 -1) < 0) 1947 goto done; 1948 else 1949 h++; 1950 } 1951 if (h == NETDEV_HASHENTRIES + 1) { 1952 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, 1953 net->ipv4.devconf_dflt, 1954 NETLINK_CB(cb->skb).portid, 1955 cb->nlh->nlmsg_seq, 1956 RTM_NEWNETCONF, NLM_F_MULTI, 1957 -1) < 0) 1958 goto done; 1959 else 1960 h++; 1961 } 1962 done: 1963 cb->args[0] = h; 1964 cb->args[1] = idx; 1965 1966 return skb->len; 1967 } 1968 1969 #ifdef CONFIG_SYSCTL 1970 1971 static void devinet_copy_dflt_conf(struct net *net, int i) 1972 { 1973 struct net_device *dev; 1974 1975 rcu_read_lock(); 1976 for_each_netdev_rcu(net, dev) { 1977 struct in_device *in_dev; 1978 1979 in_dev = __in_dev_get_rcu(dev); 1980 if (in_dev && !test_bit(i, in_dev->cnf.state)) 1981 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; 1982 } 1983 rcu_read_unlock(); 1984 } 1985 1986 /* called with RTNL locked */ 1987 static void inet_forward_change(struct net *net) 1988 { 1989 struct net_device *dev; 1990 int on = IPV4_DEVCONF_ALL(net, FORWARDING); 1991 1992 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; 1993 IPV4_DEVCONF_DFLT(net, FORWARDING) = on; 1994 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, 1995 NETCONFA_IFINDEX_ALL, 1996 net->ipv4.devconf_all); 1997 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, 1998 NETCONFA_IFINDEX_DEFAULT, 1999 net->ipv4.devconf_dflt); 2000 2001 for_each_netdev(net, dev) { 2002 struct in_device *in_dev; 2003 if (on) 2004 dev_disable_lro(dev); 2005 rcu_read_lock(); 2006 in_dev = __in_dev_get_rcu(dev); 2007 if (in_dev) { 2008 IN_DEV_CONF_SET(in_dev, FORWARDING, on); 2009 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, 2010 dev->ifindex, &in_dev->cnf); 2011 } 2012 rcu_read_unlock(); 2013 } 2014 } 2015 2016 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf) 2017 { 2018 if (cnf == net->ipv4.devconf_dflt) 2019 return NETCONFA_IFINDEX_DEFAULT; 2020 else if (cnf == net->ipv4.devconf_all) 2021 return NETCONFA_IFINDEX_ALL; 2022 else { 2023 struct in_device *idev 2024 = container_of(cnf, struct in_device, cnf); 2025 return idev->dev->ifindex; 2026 } 2027 } 2028 2029 static int devinet_conf_proc(struct ctl_table *ctl, int write, 2030 void __user *buffer, 2031 size_t *lenp, loff_t *ppos) 2032 { 2033 int old_value = *(int *)ctl->data; 2034 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 2035 int new_value = *(int *)ctl->data; 2036 2037 if (write) { 2038 struct ipv4_devconf *cnf = ctl->extra1; 2039 struct net *net = ctl->extra2; 2040 int i = (int *)ctl->data - cnf->data; 2041 int ifindex; 2042 2043 set_bit(i, cnf->state); 2044 2045 if (cnf == net->ipv4.devconf_dflt) 2046 devinet_copy_dflt_conf(net, i); 2047 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 || 2048 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) 2049 if ((new_value == 0) && (old_value != 0)) 2050 rt_cache_flush(net); 2051 2052 if (i == IPV4_DEVCONF_RP_FILTER - 1 && 2053 new_value != old_value) { 2054 ifindex = devinet_conf_ifindex(net, cnf); 2055 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER, 2056 ifindex, cnf); 2057 } 2058 if (i == IPV4_DEVCONF_PROXY_ARP - 1 && 2059 new_value != old_value) { 2060 ifindex = devinet_conf_ifindex(net, cnf); 2061 inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, 2062 ifindex, cnf); 2063 } 2064 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 && 2065 new_value != old_value) { 2066 ifindex = devinet_conf_ifindex(net, cnf); 2067 inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, 2068 ifindex, cnf); 2069 } 2070 } 2071 2072 return ret; 2073 } 2074 2075 static int devinet_sysctl_forward(struct ctl_table *ctl, int write, 2076 void __user *buffer, 2077 size_t *lenp, loff_t *ppos) 2078 { 2079 int *valp = ctl->data; 2080 int val = *valp; 2081 loff_t pos = *ppos; 2082 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 2083 2084 if (write && *valp != val) { 2085 struct net *net = ctl->extra2; 2086 2087 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { 2088 if (!rtnl_trylock()) { 2089 /* Restore the original values before restarting */ 2090 *valp = val; 2091 *ppos = pos; 2092 return restart_syscall(); 2093 } 2094 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { 2095 inet_forward_change(net); 2096 } else { 2097 struct ipv4_devconf *cnf = ctl->extra1; 2098 struct in_device *idev = 2099 container_of(cnf, struct in_device, cnf); 2100 if (*valp) 2101 dev_disable_lro(idev->dev); 2102 inet_netconf_notify_devconf(net, 2103 NETCONFA_FORWARDING, 2104 idev->dev->ifindex, 2105 cnf); 2106 } 2107 rtnl_unlock(); 2108 rt_cache_flush(net); 2109 } else 2110 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, 2111 NETCONFA_IFINDEX_DEFAULT, 2112 net->ipv4.devconf_dflt); 2113 } 2114 2115 return ret; 2116 } 2117 2118 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write, 2119 void __user *buffer, 2120 size_t *lenp, loff_t *ppos) 2121 { 2122 int *valp = ctl->data; 2123 int val = *valp; 2124 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 2125 struct net *net = ctl->extra2; 2126 2127 if (write && *valp != val) 2128 rt_cache_flush(net); 2129 2130 return ret; 2131 } 2132 2133 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \ 2134 { \ 2135 .procname = name, \ 2136 .data = ipv4_devconf.data + \ 2137 IPV4_DEVCONF_ ## attr - 1, \ 2138 .maxlen = sizeof(int), \ 2139 .mode = mval, \ 2140 .proc_handler = proc, \ 2141 .extra1 = &ipv4_devconf, \ 2142 } 2143 2144 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ 2145 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc) 2146 2147 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ 2148 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc) 2149 2150 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \ 2151 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc) 2152 2153 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ 2154 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush) 2155 2156 static struct devinet_sysctl_table { 2157 struct ctl_table_header *sysctl_header; 2158 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; 2159 } devinet_sysctl = { 2160 .devinet_vars = { 2161 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", 2162 devinet_sysctl_forward), 2163 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), 2164 2165 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), 2166 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"), 2167 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"), 2168 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"), 2169 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), 2170 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, 2171 "accept_source_route"), 2172 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), 2173 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), 2174 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), 2175 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), 2176 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), 2177 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"), 2178 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"), 2179 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"), 2180 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), 2181 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), 2182 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 2183 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), 2184 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), 2185 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION, 2186 "force_igmp_version"), 2187 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL, 2188 "igmpv2_unsolicited_report_interval"), 2189 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL, 2190 "igmpv3_unsolicited_report_interval"), 2191 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN, 2192 "ignore_routes_with_linkdown"), 2193 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP, 2194 "drop_gratuitous_arp"), 2195 2196 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 2197 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 2198 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, 2199 "promote_secondaries"), 2200 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, 2201 "route_localnet"), 2202 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST, 2203 "drop_unicast_in_l2_multicast"), 2204 }, 2205 }; 2206 2207 static int __devinet_sysctl_register(struct net *net, char *dev_name, 2208 struct ipv4_devconf *p) 2209 { 2210 int i; 2211 struct devinet_sysctl_table *t; 2212 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ]; 2213 2214 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); 2215 if (!t) 2216 goto out; 2217 2218 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { 2219 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; 2220 t->devinet_vars[i].extra1 = p; 2221 t->devinet_vars[i].extra2 = net; 2222 } 2223 2224 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name); 2225 2226 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars); 2227 if (!t->sysctl_header) 2228 goto free; 2229 2230 p->sysctl = t; 2231 return 0; 2232 2233 free: 2234 kfree(t); 2235 out: 2236 return -ENOBUFS; 2237 } 2238 2239 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) 2240 { 2241 struct devinet_sysctl_table *t = cnf->sysctl; 2242 2243 if (!t) 2244 return; 2245 2246 cnf->sysctl = NULL; 2247 unregister_net_sysctl_table(t->sysctl_header); 2248 kfree(t); 2249 } 2250 2251 static int devinet_sysctl_register(struct in_device *idev) 2252 { 2253 int err; 2254 2255 if (!sysctl_dev_name_is_allowed(idev->dev->name)) 2256 return -EINVAL; 2257 2258 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL); 2259 if (err) 2260 return err; 2261 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, 2262 &idev->cnf); 2263 if (err) 2264 neigh_sysctl_unregister(idev->arp_parms); 2265 return err; 2266 } 2267 2268 static void devinet_sysctl_unregister(struct in_device *idev) 2269 { 2270 __devinet_sysctl_unregister(&idev->cnf); 2271 neigh_sysctl_unregister(idev->arp_parms); 2272 } 2273 2274 static struct ctl_table ctl_forward_entry[] = { 2275 { 2276 .procname = "ip_forward", 2277 .data = &ipv4_devconf.data[ 2278 IPV4_DEVCONF_FORWARDING - 1], 2279 .maxlen = sizeof(int), 2280 .mode = 0644, 2281 .proc_handler = devinet_sysctl_forward, 2282 .extra1 = &ipv4_devconf, 2283 .extra2 = &init_net, 2284 }, 2285 { }, 2286 }; 2287 #endif 2288 2289 static __net_init int devinet_init_net(struct net *net) 2290 { 2291 int err; 2292 struct ipv4_devconf *all, *dflt; 2293 #ifdef CONFIG_SYSCTL 2294 struct ctl_table *tbl = ctl_forward_entry; 2295 struct ctl_table_header *forw_hdr; 2296 #endif 2297 2298 err = -ENOMEM; 2299 all = &ipv4_devconf; 2300 dflt = &ipv4_devconf_dflt; 2301 2302 if (!net_eq(net, &init_net)) { 2303 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); 2304 if (!all) 2305 goto err_alloc_all; 2306 2307 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); 2308 if (!dflt) 2309 goto err_alloc_dflt; 2310 2311 #ifdef CONFIG_SYSCTL 2312 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); 2313 if (!tbl) 2314 goto err_alloc_ctl; 2315 2316 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; 2317 tbl[0].extra1 = all; 2318 tbl[0].extra2 = net; 2319 #endif 2320 } 2321 2322 #ifdef CONFIG_SYSCTL 2323 err = __devinet_sysctl_register(net, "all", all); 2324 if (err < 0) 2325 goto err_reg_all; 2326 2327 err = __devinet_sysctl_register(net, "default", dflt); 2328 if (err < 0) 2329 goto err_reg_dflt; 2330 2331 err = -ENOMEM; 2332 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl); 2333 if (!forw_hdr) 2334 goto err_reg_ctl; 2335 net->ipv4.forw_hdr = forw_hdr; 2336 #endif 2337 2338 net->ipv4.devconf_all = all; 2339 net->ipv4.devconf_dflt = dflt; 2340 return 0; 2341 2342 #ifdef CONFIG_SYSCTL 2343 err_reg_ctl: 2344 __devinet_sysctl_unregister(dflt); 2345 err_reg_dflt: 2346 __devinet_sysctl_unregister(all); 2347 err_reg_all: 2348 if (tbl != ctl_forward_entry) 2349 kfree(tbl); 2350 err_alloc_ctl: 2351 #endif 2352 if (dflt != &ipv4_devconf_dflt) 2353 kfree(dflt); 2354 err_alloc_dflt: 2355 if (all != &ipv4_devconf) 2356 kfree(all); 2357 err_alloc_all: 2358 return err; 2359 } 2360 2361 static __net_exit void devinet_exit_net(struct net *net) 2362 { 2363 #ifdef CONFIG_SYSCTL 2364 struct ctl_table *tbl; 2365 2366 tbl = net->ipv4.forw_hdr->ctl_table_arg; 2367 unregister_net_sysctl_table(net->ipv4.forw_hdr); 2368 __devinet_sysctl_unregister(net->ipv4.devconf_dflt); 2369 __devinet_sysctl_unregister(net->ipv4.devconf_all); 2370 kfree(tbl); 2371 #endif 2372 kfree(net->ipv4.devconf_dflt); 2373 kfree(net->ipv4.devconf_all); 2374 } 2375 2376 static __net_initdata struct pernet_operations devinet_ops = { 2377 .init = devinet_init_net, 2378 .exit = devinet_exit_net, 2379 }; 2380 2381 static struct rtnl_af_ops inet_af_ops __read_mostly = { 2382 .family = AF_INET, 2383 .fill_link_af = inet_fill_link_af, 2384 .get_link_af_size = inet_get_link_af_size, 2385 .validate_link_af = inet_validate_link_af, 2386 .set_link_af = inet_set_link_af, 2387 }; 2388 2389 void __init devinet_init(void) 2390 { 2391 int i; 2392 2393 for (i = 0; i < IN4_ADDR_HSIZE; i++) 2394 INIT_HLIST_HEAD(&inet_addr_lst[i]); 2395 2396 register_pernet_subsys(&devinet_ops); 2397 2398 register_gifconf(PF_INET, inet_gifconf); 2399 register_netdevice_notifier(&ip_netdev_notifier); 2400 2401 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); 2402 2403 rtnl_af_register(&inet_af_ops); 2404 2405 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); 2406 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); 2407 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); 2408 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, 2409 inet_netconf_dump_devconf, NULL); 2410 } 2411