xref: /openbmc/linux/net/ipv4/devinet.c (revision e190bfe5)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65 
66 static struct ipv4_devconf ipv4_devconf = {
67 	.data = {
68 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
69 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
70 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
71 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
72 	},
73 };
74 
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76 	.data = {
77 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82 	},
83 };
84 
85 #define IPV4_DEVCONF_DFLT(net, attr) \
86 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
87 
88 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89 	[IFA_LOCAL]     	= { .type = NLA_U32 },
90 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
91 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
92 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93 };
94 
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96 
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99 			 int destroy);
100 #ifdef CONFIG_SYSCTL
101 static void devinet_sysctl_register(struct in_device *idev);
102 static void devinet_sysctl_unregister(struct in_device *idev);
103 #else
104 static inline void devinet_sysctl_register(struct in_device *idev)
105 {
106 }
107 static inline void devinet_sysctl_unregister(struct in_device *idev)
108 {
109 }
110 #endif
111 
112 /* Locks all the inet devices. */
113 
114 static struct in_ifaddr *inet_alloc_ifa(void)
115 {
116 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
117 }
118 
119 static void inet_rcu_free_ifa(struct rcu_head *head)
120 {
121 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122 	if (ifa->ifa_dev)
123 		in_dev_put(ifa->ifa_dev);
124 	kfree(ifa);
125 }
126 
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
128 {
129 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130 }
131 
132 void in_dev_finish_destroy(struct in_device *idev)
133 {
134 	struct net_device *dev = idev->dev;
135 
136 	WARN_ON(idev->ifa_list);
137 	WARN_ON(idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140 	       idev, dev ? dev->name : "NIL");
141 #endif
142 	dev_put(dev);
143 	if (!idev->dead)
144 		pr_err("Freeing alive in_device %p\n", idev);
145 	else
146 		kfree(idev);
147 }
148 EXPORT_SYMBOL(in_dev_finish_destroy);
149 
150 static struct in_device *inetdev_init(struct net_device *dev)
151 {
152 	struct in_device *in_dev;
153 
154 	ASSERT_RTNL();
155 
156 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157 	if (!in_dev)
158 		goto out;
159 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
160 			sizeof(in_dev->cnf));
161 	in_dev->cnf.sysctl = NULL;
162 	in_dev->dev = dev;
163 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
164 	if (!in_dev->arp_parms)
165 		goto out_kfree;
166 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
167 		dev_disable_lro(dev);
168 	/* Reference in_dev->dev */
169 	dev_hold(dev);
170 	/* Account for reference dev->ip_ptr (below) */
171 	in_dev_hold(in_dev);
172 
173 	devinet_sysctl_register(in_dev);
174 	ip_mc_init_dev(in_dev);
175 	if (dev->flags & IFF_UP)
176 		ip_mc_up(in_dev);
177 
178 	/* we can receive as soon as ip_ptr is set -- do this last */
179 	rcu_assign_pointer(dev->ip_ptr, in_dev);
180 out:
181 	return in_dev;
182 out_kfree:
183 	kfree(in_dev);
184 	in_dev = NULL;
185 	goto out;
186 }
187 
188 static void in_dev_rcu_put(struct rcu_head *head)
189 {
190 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
191 	in_dev_put(idev);
192 }
193 
194 static void inetdev_destroy(struct in_device *in_dev)
195 {
196 	struct in_ifaddr *ifa;
197 	struct net_device *dev;
198 
199 	ASSERT_RTNL();
200 
201 	dev = in_dev->dev;
202 
203 	in_dev->dead = 1;
204 
205 	ip_mc_destroy_dev(in_dev);
206 
207 	while ((ifa = in_dev->ifa_list) != NULL) {
208 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
209 		inet_free_ifa(ifa);
210 	}
211 
212 	dev->ip_ptr = NULL;
213 
214 	devinet_sysctl_unregister(in_dev);
215 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
216 	arp_ifdown(dev);
217 
218 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
219 }
220 
221 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
222 {
223 	rcu_read_lock();
224 	for_primary_ifa(in_dev) {
225 		if (inet_ifa_match(a, ifa)) {
226 			if (!b || inet_ifa_match(b, ifa)) {
227 				rcu_read_unlock();
228 				return 1;
229 			}
230 		}
231 	} endfor_ifa(in_dev);
232 	rcu_read_unlock();
233 	return 0;
234 }
235 
236 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
237 			 int destroy, struct nlmsghdr *nlh, u32 pid)
238 {
239 	struct in_ifaddr *promote = NULL;
240 	struct in_ifaddr *ifa, *ifa1 = *ifap;
241 	struct in_ifaddr *last_prim = in_dev->ifa_list;
242 	struct in_ifaddr *prev_prom = NULL;
243 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
244 
245 	ASSERT_RTNL();
246 
247 	/* 1. Deleting primary ifaddr forces deletion all secondaries
248 	 * unless alias promotion is set
249 	 **/
250 
251 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
252 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
253 
254 		while ((ifa = *ifap1) != NULL) {
255 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
256 			    ifa1->ifa_scope <= ifa->ifa_scope)
257 				last_prim = ifa;
258 
259 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
260 			    ifa1->ifa_mask != ifa->ifa_mask ||
261 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
262 				ifap1 = &ifa->ifa_next;
263 				prev_prom = ifa;
264 				continue;
265 			}
266 
267 			if (!do_promote) {
268 				*ifap1 = ifa->ifa_next;
269 
270 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
271 				blocking_notifier_call_chain(&inetaddr_chain,
272 						NETDEV_DOWN, ifa);
273 				inet_free_ifa(ifa);
274 			} else {
275 				promote = ifa;
276 				break;
277 			}
278 		}
279 	}
280 
281 	/* 2. Unlink it */
282 
283 	*ifap = ifa1->ifa_next;
284 
285 	/* 3. Announce address deletion */
286 
287 	/* Send message first, then call notifier.
288 	   At first sight, FIB update triggered by notifier
289 	   will refer to already deleted ifaddr, that could confuse
290 	   netlink listeners. It is not true: look, gated sees
291 	   that route deleted and if it still thinks that ifaddr
292 	   is valid, it will try to restore deleted routes... Grr.
293 	   So that, this order is correct.
294 	 */
295 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
296 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
297 
298 	if (promote) {
299 
300 		if (prev_prom) {
301 			prev_prom->ifa_next = promote->ifa_next;
302 			promote->ifa_next = last_prim->ifa_next;
303 			last_prim->ifa_next = promote;
304 		}
305 
306 		promote->ifa_flags &= ~IFA_F_SECONDARY;
307 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
308 		blocking_notifier_call_chain(&inetaddr_chain,
309 				NETDEV_UP, promote);
310 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
311 			if (ifa1->ifa_mask != ifa->ifa_mask ||
312 			    !inet_ifa_match(ifa1->ifa_address, ifa))
313 					continue;
314 			fib_add_ifaddr(ifa);
315 		}
316 
317 	}
318 	if (destroy)
319 		inet_free_ifa(ifa1);
320 }
321 
322 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
323 			 int destroy)
324 {
325 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
326 }
327 
328 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
329 			     u32 pid)
330 {
331 	struct in_device *in_dev = ifa->ifa_dev;
332 	struct in_ifaddr *ifa1, **ifap, **last_primary;
333 
334 	ASSERT_RTNL();
335 
336 	if (!ifa->ifa_local) {
337 		inet_free_ifa(ifa);
338 		return 0;
339 	}
340 
341 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
342 	last_primary = &in_dev->ifa_list;
343 
344 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
345 	     ifap = &ifa1->ifa_next) {
346 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
347 		    ifa->ifa_scope <= ifa1->ifa_scope)
348 			last_primary = &ifa1->ifa_next;
349 		if (ifa1->ifa_mask == ifa->ifa_mask &&
350 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
351 			if (ifa1->ifa_local == ifa->ifa_local) {
352 				inet_free_ifa(ifa);
353 				return -EEXIST;
354 			}
355 			if (ifa1->ifa_scope != ifa->ifa_scope) {
356 				inet_free_ifa(ifa);
357 				return -EINVAL;
358 			}
359 			ifa->ifa_flags |= IFA_F_SECONDARY;
360 		}
361 	}
362 
363 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
364 		net_srandom(ifa->ifa_local);
365 		ifap = last_primary;
366 	}
367 
368 	ifa->ifa_next = *ifap;
369 	*ifap = ifa;
370 
371 	/* Send message first, then call notifier.
372 	   Notifier will trigger FIB update, so that
373 	   listeners of netlink will know about new ifaddr */
374 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
375 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
376 
377 	return 0;
378 }
379 
380 static int inet_insert_ifa(struct in_ifaddr *ifa)
381 {
382 	return __inet_insert_ifa(ifa, NULL, 0);
383 }
384 
385 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
386 {
387 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
388 
389 	ASSERT_RTNL();
390 
391 	if (!in_dev) {
392 		inet_free_ifa(ifa);
393 		return -ENOBUFS;
394 	}
395 	ipv4_devconf_setall(in_dev);
396 	if (ifa->ifa_dev != in_dev) {
397 		WARN_ON(ifa->ifa_dev);
398 		in_dev_hold(in_dev);
399 		ifa->ifa_dev = in_dev;
400 	}
401 	if (ipv4_is_loopback(ifa->ifa_local))
402 		ifa->ifa_scope = RT_SCOPE_HOST;
403 	return inet_insert_ifa(ifa);
404 }
405 
406 struct in_device *inetdev_by_index(struct net *net, int ifindex)
407 {
408 	struct net_device *dev;
409 	struct in_device *in_dev = NULL;
410 
411 	rcu_read_lock();
412 	dev = dev_get_by_index_rcu(net, ifindex);
413 	if (dev)
414 		in_dev = in_dev_get(dev);
415 	rcu_read_unlock();
416 	return in_dev;
417 }
418 EXPORT_SYMBOL(inetdev_by_index);
419 
420 /* Called only from RTNL semaphored context. No locks. */
421 
422 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
423 				    __be32 mask)
424 {
425 	ASSERT_RTNL();
426 
427 	for_primary_ifa(in_dev) {
428 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
429 			return ifa;
430 	} endfor_ifa(in_dev);
431 	return NULL;
432 }
433 
434 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
435 {
436 	struct net *net = sock_net(skb->sk);
437 	struct nlattr *tb[IFA_MAX+1];
438 	struct in_device *in_dev;
439 	struct ifaddrmsg *ifm;
440 	struct in_ifaddr *ifa, **ifap;
441 	int err = -EINVAL;
442 
443 	ASSERT_RTNL();
444 
445 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
446 	if (err < 0)
447 		goto errout;
448 
449 	ifm = nlmsg_data(nlh);
450 	in_dev = inetdev_by_index(net, ifm->ifa_index);
451 	if (in_dev == NULL) {
452 		err = -ENODEV;
453 		goto errout;
454 	}
455 
456 	__in_dev_put(in_dev);
457 
458 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
459 	     ifap = &ifa->ifa_next) {
460 		if (tb[IFA_LOCAL] &&
461 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
462 			continue;
463 
464 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
465 			continue;
466 
467 		if (tb[IFA_ADDRESS] &&
468 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
469 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
470 			continue;
471 
472 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
473 		return 0;
474 	}
475 
476 	err = -EADDRNOTAVAIL;
477 errout:
478 	return err;
479 }
480 
481 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
482 {
483 	struct nlattr *tb[IFA_MAX+1];
484 	struct in_ifaddr *ifa;
485 	struct ifaddrmsg *ifm;
486 	struct net_device *dev;
487 	struct in_device *in_dev;
488 	int err;
489 
490 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
491 	if (err < 0)
492 		goto errout;
493 
494 	ifm = nlmsg_data(nlh);
495 	err = -EINVAL;
496 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
497 		goto errout;
498 
499 	dev = __dev_get_by_index(net, ifm->ifa_index);
500 	err = -ENODEV;
501 	if (dev == NULL)
502 		goto errout;
503 
504 	in_dev = __in_dev_get_rtnl(dev);
505 	err = -ENOBUFS;
506 	if (in_dev == NULL)
507 		goto errout;
508 
509 	ifa = inet_alloc_ifa();
510 	if (ifa == NULL)
511 		/*
512 		 * A potential indev allocation can be left alive, it stays
513 		 * assigned to its device and is destroy with it.
514 		 */
515 		goto errout;
516 
517 	ipv4_devconf_setall(in_dev);
518 	in_dev_hold(in_dev);
519 
520 	if (tb[IFA_ADDRESS] == NULL)
521 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
522 
523 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
524 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
525 	ifa->ifa_flags = ifm->ifa_flags;
526 	ifa->ifa_scope = ifm->ifa_scope;
527 	ifa->ifa_dev = in_dev;
528 
529 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
530 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
531 
532 	if (tb[IFA_BROADCAST])
533 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
534 
535 	if (tb[IFA_LABEL])
536 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
537 	else
538 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
539 
540 	return ifa;
541 
542 errout:
543 	return ERR_PTR(err);
544 }
545 
546 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
547 {
548 	struct net *net = sock_net(skb->sk);
549 	struct in_ifaddr *ifa;
550 
551 	ASSERT_RTNL();
552 
553 	ifa = rtm_to_ifaddr(net, nlh);
554 	if (IS_ERR(ifa))
555 		return PTR_ERR(ifa);
556 
557 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
558 }
559 
560 /*
561  *	Determine a default network mask, based on the IP address.
562  */
563 
564 static inline int inet_abc_len(__be32 addr)
565 {
566 	int rc = -1;	/* Something else, probably a multicast. */
567 
568 	if (ipv4_is_zeronet(addr))
569 		rc = 0;
570 	else {
571 		__u32 haddr = ntohl(addr);
572 
573 		if (IN_CLASSA(haddr))
574 			rc = 8;
575 		else if (IN_CLASSB(haddr))
576 			rc = 16;
577 		else if (IN_CLASSC(haddr))
578 			rc = 24;
579 	}
580 
581 	return rc;
582 }
583 
584 
585 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
586 {
587 	struct ifreq ifr;
588 	struct sockaddr_in sin_orig;
589 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
590 	struct in_device *in_dev;
591 	struct in_ifaddr **ifap = NULL;
592 	struct in_ifaddr *ifa = NULL;
593 	struct net_device *dev;
594 	char *colon;
595 	int ret = -EFAULT;
596 	int tryaddrmatch = 0;
597 
598 	/*
599 	 *	Fetch the caller's info block into kernel space
600 	 */
601 
602 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
603 		goto out;
604 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
605 
606 	/* save original address for comparison */
607 	memcpy(&sin_orig, sin, sizeof(*sin));
608 
609 	colon = strchr(ifr.ifr_name, ':');
610 	if (colon)
611 		*colon = 0;
612 
613 	dev_load(net, ifr.ifr_name);
614 
615 	switch (cmd) {
616 	case SIOCGIFADDR:	/* Get interface address */
617 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
618 	case SIOCGIFDSTADDR:	/* Get the destination address */
619 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
620 		/* Note that these ioctls will not sleep,
621 		   so that we do not impose a lock.
622 		   One day we will be forced to put shlock here (I mean SMP)
623 		 */
624 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
625 		memset(sin, 0, sizeof(*sin));
626 		sin->sin_family = AF_INET;
627 		break;
628 
629 	case SIOCSIFFLAGS:
630 		ret = -EACCES;
631 		if (!capable(CAP_NET_ADMIN))
632 			goto out;
633 		break;
634 	case SIOCSIFADDR:	/* Set interface address (and family) */
635 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
636 	case SIOCSIFDSTADDR:	/* Set the destination address */
637 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
638 		ret = -EACCES;
639 		if (!capable(CAP_NET_ADMIN))
640 			goto out;
641 		ret = -EINVAL;
642 		if (sin->sin_family != AF_INET)
643 			goto out;
644 		break;
645 	default:
646 		ret = -EINVAL;
647 		goto out;
648 	}
649 
650 	rtnl_lock();
651 
652 	ret = -ENODEV;
653 	dev = __dev_get_by_name(net, ifr.ifr_name);
654 	if (!dev)
655 		goto done;
656 
657 	if (colon)
658 		*colon = ':';
659 
660 	in_dev = __in_dev_get_rtnl(dev);
661 	if (in_dev) {
662 		if (tryaddrmatch) {
663 			/* Matthias Andree */
664 			/* compare label and address (4.4BSD style) */
665 			/* note: we only do this for a limited set of ioctls
666 			   and only if the original address family was AF_INET.
667 			   This is checked above. */
668 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
669 			     ifap = &ifa->ifa_next) {
670 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
671 				    sin_orig.sin_addr.s_addr ==
672 							ifa->ifa_address) {
673 					break; /* found */
674 				}
675 			}
676 		}
677 		/* we didn't get a match, maybe the application is
678 		   4.3BSD-style and passed in junk so we fall back to
679 		   comparing just the label */
680 		if (!ifa) {
681 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
682 			     ifap = &ifa->ifa_next)
683 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
684 					break;
685 		}
686 	}
687 
688 	ret = -EADDRNOTAVAIL;
689 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
690 		goto done;
691 
692 	switch (cmd) {
693 	case SIOCGIFADDR:	/* Get interface address */
694 		sin->sin_addr.s_addr = ifa->ifa_local;
695 		goto rarok;
696 
697 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
698 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
699 		goto rarok;
700 
701 	case SIOCGIFDSTADDR:	/* Get the destination address */
702 		sin->sin_addr.s_addr = ifa->ifa_address;
703 		goto rarok;
704 
705 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
706 		sin->sin_addr.s_addr = ifa->ifa_mask;
707 		goto rarok;
708 
709 	case SIOCSIFFLAGS:
710 		if (colon) {
711 			ret = -EADDRNOTAVAIL;
712 			if (!ifa)
713 				break;
714 			ret = 0;
715 			if (!(ifr.ifr_flags & IFF_UP))
716 				inet_del_ifa(in_dev, ifap, 1);
717 			break;
718 		}
719 		ret = dev_change_flags(dev, ifr.ifr_flags);
720 		break;
721 
722 	case SIOCSIFADDR:	/* Set interface address (and family) */
723 		ret = -EINVAL;
724 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
725 			break;
726 
727 		if (!ifa) {
728 			ret = -ENOBUFS;
729 			ifa = inet_alloc_ifa();
730 			if (!ifa)
731 				break;
732 			if (colon)
733 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
734 			else
735 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
736 		} else {
737 			ret = 0;
738 			if (ifa->ifa_local == sin->sin_addr.s_addr)
739 				break;
740 			inet_del_ifa(in_dev, ifap, 0);
741 			ifa->ifa_broadcast = 0;
742 			ifa->ifa_scope = 0;
743 		}
744 
745 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
746 
747 		if (!(dev->flags & IFF_POINTOPOINT)) {
748 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
749 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
750 			if ((dev->flags & IFF_BROADCAST) &&
751 			    ifa->ifa_prefixlen < 31)
752 				ifa->ifa_broadcast = ifa->ifa_address |
753 						     ~ifa->ifa_mask;
754 		} else {
755 			ifa->ifa_prefixlen = 32;
756 			ifa->ifa_mask = inet_make_mask(32);
757 		}
758 		ret = inet_set_ifa(dev, ifa);
759 		break;
760 
761 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
762 		ret = 0;
763 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
764 			inet_del_ifa(in_dev, ifap, 0);
765 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
766 			inet_insert_ifa(ifa);
767 		}
768 		break;
769 
770 	case SIOCSIFDSTADDR:	/* Set the destination address */
771 		ret = 0;
772 		if (ifa->ifa_address == sin->sin_addr.s_addr)
773 			break;
774 		ret = -EINVAL;
775 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
776 			break;
777 		ret = 0;
778 		inet_del_ifa(in_dev, ifap, 0);
779 		ifa->ifa_address = sin->sin_addr.s_addr;
780 		inet_insert_ifa(ifa);
781 		break;
782 
783 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
784 
785 		/*
786 		 *	The mask we set must be legal.
787 		 */
788 		ret = -EINVAL;
789 		if (bad_mask(sin->sin_addr.s_addr, 0))
790 			break;
791 		ret = 0;
792 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
793 			__be32 old_mask = ifa->ifa_mask;
794 			inet_del_ifa(in_dev, ifap, 0);
795 			ifa->ifa_mask = sin->sin_addr.s_addr;
796 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
797 
798 			/* See if current broadcast address matches
799 			 * with current netmask, then recalculate
800 			 * the broadcast address. Otherwise it's a
801 			 * funny address, so don't touch it since
802 			 * the user seems to know what (s)he's doing...
803 			 */
804 			if ((dev->flags & IFF_BROADCAST) &&
805 			    (ifa->ifa_prefixlen < 31) &&
806 			    (ifa->ifa_broadcast ==
807 			     (ifa->ifa_local|~old_mask))) {
808 				ifa->ifa_broadcast = (ifa->ifa_local |
809 						      ~sin->sin_addr.s_addr);
810 			}
811 			inet_insert_ifa(ifa);
812 		}
813 		break;
814 	}
815 done:
816 	rtnl_unlock();
817 out:
818 	return ret;
819 rarok:
820 	rtnl_unlock();
821 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
822 	goto out;
823 }
824 
825 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
826 {
827 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
828 	struct in_ifaddr *ifa;
829 	struct ifreq ifr;
830 	int done = 0;
831 
832 	if (!in_dev)
833 		goto out;
834 
835 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
836 		if (!buf) {
837 			done += sizeof(ifr);
838 			continue;
839 		}
840 		if (len < (int) sizeof(ifr))
841 			break;
842 		memset(&ifr, 0, sizeof(struct ifreq));
843 		if (ifa->ifa_label)
844 			strcpy(ifr.ifr_name, ifa->ifa_label);
845 		else
846 			strcpy(ifr.ifr_name, dev->name);
847 
848 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
849 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
850 								ifa->ifa_local;
851 
852 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
853 			done = -EFAULT;
854 			break;
855 		}
856 		buf  += sizeof(struct ifreq);
857 		len  -= sizeof(struct ifreq);
858 		done += sizeof(struct ifreq);
859 	}
860 out:
861 	return done;
862 }
863 
864 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
865 {
866 	__be32 addr = 0;
867 	struct in_device *in_dev;
868 	struct net *net = dev_net(dev);
869 
870 	rcu_read_lock();
871 	in_dev = __in_dev_get_rcu(dev);
872 	if (!in_dev)
873 		goto no_in_dev;
874 
875 	for_primary_ifa(in_dev) {
876 		if (ifa->ifa_scope > scope)
877 			continue;
878 		if (!dst || inet_ifa_match(dst, ifa)) {
879 			addr = ifa->ifa_local;
880 			break;
881 		}
882 		if (!addr)
883 			addr = ifa->ifa_local;
884 	} endfor_ifa(in_dev);
885 
886 	if (addr)
887 		goto out_unlock;
888 no_in_dev:
889 
890 	/* Not loopback addresses on loopback should be preferred
891 	   in this case. It is importnat that lo is the first interface
892 	   in dev_base list.
893 	 */
894 	for_each_netdev_rcu(net, dev) {
895 		in_dev = __in_dev_get_rcu(dev);
896 		if (!in_dev)
897 			continue;
898 
899 		for_primary_ifa(in_dev) {
900 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
901 			    ifa->ifa_scope <= scope) {
902 				addr = ifa->ifa_local;
903 				goto out_unlock;
904 			}
905 		} endfor_ifa(in_dev);
906 	}
907 out_unlock:
908 	rcu_read_unlock();
909 	return addr;
910 }
911 EXPORT_SYMBOL(inet_select_addr);
912 
913 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
914 			      __be32 local, int scope)
915 {
916 	int same = 0;
917 	__be32 addr = 0;
918 
919 	for_ifa(in_dev) {
920 		if (!addr &&
921 		    (local == ifa->ifa_local || !local) &&
922 		    ifa->ifa_scope <= scope) {
923 			addr = ifa->ifa_local;
924 			if (same)
925 				break;
926 		}
927 		if (!same) {
928 			same = (!local || inet_ifa_match(local, ifa)) &&
929 				(!dst || inet_ifa_match(dst, ifa));
930 			if (same && addr) {
931 				if (local || !dst)
932 					break;
933 				/* Is the selected addr into dst subnet? */
934 				if (inet_ifa_match(addr, ifa))
935 					break;
936 				/* No, then can we use new local src? */
937 				if (ifa->ifa_scope <= scope) {
938 					addr = ifa->ifa_local;
939 					break;
940 				}
941 				/* search for large dst subnet for addr */
942 				same = 0;
943 			}
944 		}
945 	} endfor_ifa(in_dev);
946 
947 	return same ? addr : 0;
948 }
949 
950 /*
951  * Confirm that local IP address exists using wildcards:
952  * - in_dev: only on this interface, 0=any interface
953  * - dst: only in the same subnet as dst, 0=any dst
954  * - local: address, 0=autoselect the local address
955  * - scope: maximum allowed scope value for the local address
956  */
957 __be32 inet_confirm_addr(struct in_device *in_dev,
958 			 __be32 dst, __be32 local, int scope)
959 {
960 	__be32 addr = 0;
961 	struct net_device *dev;
962 	struct net *net;
963 
964 	if (scope != RT_SCOPE_LINK)
965 		return confirm_addr_indev(in_dev, dst, local, scope);
966 
967 	net = dev_net(in_dev->dev);
968 	rcu_read_lock();
969 	for_each_netdev_rcu(net, dev) {
970 		in_dev = __in_dev_get_rcu(dev);
971 		if (in_dev) {
972 			addr = confirm_addr_indev(in_dev, dst, local, scope);
973 			if (addr)
974 				break;
975 		}
976 	}
977 	rcu_read_unlock();
978 
979 	return addr;
980 }
981 
982 /*
983  *	Device notifier
984  */
985 
986 int register_inetaddr_notifier(struct notifier_block *nb)
987 {
988 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
989 }
990 EXPORT_SYMBOL(register_inetaddr_notifier);
991 
992 int unregister_inetaddr_notifier(struct notifier_block *nb)
993 {
994 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
995 }
996 EXPORT_SYMBOL(unregister_inetaddr_notifier);
997 
998 /* Rename ifa_labels for a device name change. Make some effort to preserve
999  * existing alias numbering and to create unique labels if possible.
1000 */
1001 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1002 {
1003 	struct in_ifaddr *ifa;
1004 	int named = 0;
1005 
1006 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1007 		char old[IFNAMSIZ], *dot;
1008 
1009 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1010 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1011 		if (named++ == 0)
1012 			goto skip;
1013 		dot = strchr(old, ':');
1014 		if (dot == NULL) {
1015 			sprintf(old, ":%d", named);
1016 			dot = old;
1017 		}
1018 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1019 			strcat(ifa->ifa_label, dot);
1020 		else
1021 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1022 skip:
1023 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1024 	}
1025 }
1026 
1027 static inline bool inetdev_valid_mtu(unsigned mtu)
1028 {
1029 	return mtu >= 68;
1030 }
1031 
1032 /* Called only under RTNL semaphore */
1033 
1034 static int inetdev_event(struct notifier_block *this, unsigned long event,
1035 			 void *ptr)
1036 {
1037 	struct net_device *dev = ptr;
1038 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1039 
1040 	ASSERT_RTNL();
1041 
1042 	if (!in_dev) {
1043 		if (event == NETDEV_REGISTER) {
1044 			in_dev = inetdev_init(dev);
1045 			if (!in_dev)
1046 				return notifier_from_errno(-ENOMEM);
1047 			if (dev->flags & IFF_LOOPBACK) {
1048 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1049 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1050 			}
1051 		} else if (event == NETDEV_CHANGEMTU) {
1052 			/* Re-enabling IP */
1053 			if (inetdev_valid_mtu(dev->mtu))
1054 				in_dev = inetdev_init(dev);
1055 		}
1056 		goto out;
1057 	}
1058 
1059 	switch (event) {
1060 	case NETDEV_REGISTER:
1061 		printk(KERN_DEBUG "inetdev_event: bug\n");
1062 		dev->ip_ptr = NULL;
1063 		break;
1064 	case NETDEV_UP:
1065 		if (!inetdev_valid_mtu(dev->mtu))
1066 			break;
1067 		if (dev->flags & IFF_LOOPBACK) {
1068 			struct in_ifaddr *ifa = inet_alloc_ifa();
1069 
1070 			if (ifa) {
1071 				ifa->ifa_local =
1072 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1073 				ifa->ifa_prefixlen = 8;
1074 				ifa->ifa_mask = inet_make_mask(8);
1075 				in_dev_hold(in_dev);
1076 				ifa->ifa_dev = in_dev;
1077 				ifa->ifa_scope = RT_SCOPE_HOST;
1078 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1079 				inet_insert_ifa(ifa);
1080 			}
1081 		}
1082 		ip_mc_up(in_dev);
1083 		/* fall through */
1084 	case NETDEV_CHANGEADDR:
1085 		/* Send gratuitous ARP to notify of link change */
1086 		if (IN_DEV_ARP_NOTIFY(in_dev)) {
1087 			struct in_ifaddr *ifa = in_dev->ifa_list;
1088 
1089 			if (ifa)
1090 				arp_send(ARPOP_REQUEST, ETH_P_ARP,
1091 					 ifa->ifa_address, dev,
1092 					 ifa->ifa_address, NULL,
1093 					 dev->dev_addr, NULL);
1094 		}
1095 		break;
1096 	case NETDEV_DOWN:
1097 		ip_mc_down(in_dev);
1098 		break;
1099 	case NETDEV_PRE_TYPE_CHANGE:
1100 		ip_mc_unmap(in_dev);
1101 		break;
1102 	case NETDEV_POST_TYPE_CHANGE:
1103 		ip_mc_remap(in_dev);
1104 		break;
1105 	case NETDEV_CHANGEMTU:
1106 		if (inetdev_valid_mtu(dev->mtu))
1107 			break;
1108 		/* disable IP when MTU is not enough */
1109 	case NETDEV_UNREGISTER:
1110 		inetdev_destroy(in_dev);
1111 		break;
1112 	case NETDEV_CHANGENAME:
1113 		/* Do not notify about label change, this event is
1114 		 * not interesting to applications using netlink.
1115 		 */
1116 		inetdev_changename(dev, in_dev);
1117 
1118 		devinet_sysctl_unregister(in_dev);
1119 		devinet_sysctl_register(in_dev);
1120 		break;
1121 	}
1122 out:
1123 	return NOTIFY_DONE;
1124 }
1125 
1126 static struct notifier_block ip_netdev_notifier = {
1127 	.notifier_call = inetdev_event,
1128 };
1129 
1130 static inline size_t inet_nlmsg_size(void)
1131 {
1132 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1133 	       + nla_total_size(4) /* IFA_ADDRESS */
1134 	       + nla_total_size(4) /* IFA_LOCAL */
1135 	       + nla_total_size(4) /* IFA_BROADCAST */
1136 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1137 }
1138 
1139 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1140 			    u32 pid, u32 seq, int event, unsigned int flags)
1141 {
1142 	struct ifaddrmsg *ifm;
1143 	struct nlmsghdr  *nlh;
1144 
1145 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1146 	if (nlh == NULL)
1147 		return -EMSGSIZE;
1148 
1149 	ifm = nlmsg_data(nlh);
1150 	ifm->ifa_family = AF_INET;
1151 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1152 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1153 	ifm->ifa_scope = ifa->ifa_scope;
1154 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1155 
1156 	if (ifa->ifa_address)
1157 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1158 
1159 	if (ifa->ifa_local)
1160 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1161 
1162 	if (ifa->ifa_broadcast)
1163 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1164 
1165 	if (ifa->ifa_label[0])
1166 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1167 
1168 	return nlmsg_end(skb, nlh);
1169 
1170 nla_put_failure:
1171 	nlmsg_cancel(skb, nlh);
1172 	return -EMSGSIZE;
1173 }
1174 
1175 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1176 {
1177 	struct net *net = sock_net(skb->sk);
1178 	int h, s_h;
1179 	int idx, s_idx;
1180 	int ip_idx, s_ip_idx;
1181 	struct net_device *dev;
1182 	struct in_device *in_dev;
1183 	struct in_ifaddr *ifa;
1184 	struct hlist_head *head;
1185 	struct hlist_node *node;
1186 
1187 	s_h = cb->args[0];
1188 	s_idx = idx = cb->args[1];
1189 	s_ip_idx = ip_idx = cb->args[2];
1190 
1191 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1192 		idx = 0;
1193 		head = &net->dev_index_head[h];
1194 		rcu_read_lock();
1195 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1196 			if (idx < s_idx)
1197 				goto cont;
1198 			if (h > s_h || idx > s_idx)
1199 				s_ip_idx = 0;
1200 			in_dev = __in_dev_get_rcu(dev);
1201 			if (!in_dev)
1202 				goto cont;
1203 
1204 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1205 			     ifa = ifa->ifa_next, ip_idx++) {
1206 				if (ip_idx < s_ip_idx)
1207 					continue;
1208 				if (inet_fill_ifaddr(skb, ifa,
1209 					     NETLINK_CB(cb->skb).pid,
1210 					     cb->nlh->nlmsg_seq,
1211 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1212 					rcu_read_unlock();
1213 					goto done;
1214 				}
1215 			}
1216 cont:
1217 			idx++;
1218 		}
1219 		rcu_read_unlock();
1220 	}
1221 
1222 done:
1223 	cb->args[0] = h;
1224 	cb->args[1] = idx;
1225 	cb->args[2] = ip_idx;
1226 
1227 	return skb->len;
1228 }
1229 
1230 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1231 		      u32 pid)
1232 {
1233 	struct sk_buff *skb;
1234 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1235 	int err = -ENOBUFS;
1236 	struct net *net;
1237 
1238 	net = dev_net(ifa->ifa_dev->dev);
1239 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1240 	if (skb == NULL)
1241 		goto errout;
1242 
1243 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1244 	if (err < 0) {
1245 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1246 		WARN_ON(err == -EMSGSIZE);
1247 		kfree_skb(skb);
1248 		goto errout;
1249 	}
1250 	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1251 	return;
1252 errout:
1253 	if (err < 0)
1254 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1255 }
1256 
1257 #ifdef CONFIG_SYSCTL
1258 
1259 static void devinet_copy_dflt_conf(struct net *net, int i)
1260 {
1261 	struct net_device *dev;
1262 
1263 	rcu_read_lock();
1264 	for_each_netdev_rcu(net, dev) {
1265 		struct in_device *in_dev;
1266 
1267 		in_dev = __in_dev_get_rcu(dev);
1268 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1269 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1270 	}
1271 	rcu_read_unlock();
1272 }
1273 
1274 /* called with RTNL locked */
1275 static void inet_forward_change(struct net *net)
1276 {
1277 	struct net_device *dev;
1278 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1279 
1280 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1281 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1282 
1283 	for_each_netdev(net, dev) {
1284 		struct in_device *in_dev;
1285 		if (on)
1286 			dev_disable_lro(dev);
1287 		rcu_read_lock();
1288 		in_dev = __in_dev_get_rcu(dev);
1289 		if (in_dev)
1290 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1291 		rcu_read_unlock();
1292 	}
1293 }
1294 
1295 static int devinet_conf_proc(ctl_table *ctl, int write,
1296 			     void __user *buffer,
1297 			     size_t *lenp, loff_t *ppos)
1298 {
1299 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1300 
1301 	if (write) {
1302 		struct ipv4_devconf *cnf = ctl->extra1;
1303 		struct net *net = ctl->extra2;
1304 		int i = (int *)ctl->data - cnf->data;
1305 
1306 		set_bit(i, cnf->state);
1307 
1308 		if (cnf == net->ipv4.devconf_dflt)
1309 			devinet_copy_dflt_conf(net, i);
1310 	}
1311 
1312 	return ret;
1313 }
1314 
1315 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1316 				  void __user *buffer,
1317 				  size_t *lenp, loff_t *ppos)
1318 {
1319 	int *valp = ctl->data;
1320 	int val = *valp;
1321 	loff_t pos = *ppos;
1322 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1323 
1324 	if (write && *valp != val) {
1325 		struct net *net = ctl->extra2;
1326 
1327 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1328 			if (!rtnl_trylock()) {
1329 				/* Restore the original values before restarting */
1330 				*valp = val;
1331 				*ppos = pos;
1332 				return restart_syscall();
1333 			}
1334 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1335 				inet_forward_change(net);
1336 			} else if (*valp) {
1337 				struct ipv4_devconf *cnf = ctl->extra1;
1338 				struct in_device *idev =
1339 					container_of(cnf, struct in_device, cnf);
1340 				dev_disable_lro(idev->dev);
1341 			}
1342 			rtnl_unlock();
1343 			rt_cache_flush(net, 0);
1344 		}
1345 	}
1346 
1347 	return ret;
1348 }
1349 
1350 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1351 			 void __user *buffer,
1352 			 size_t *lenp, loff_t *ppos)
1353 {
1354 	int *valp = ctl->data;
1355 	int val = *valp;
1356 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1357 	struct net *net = ctl->extra2;
1358 
1359 	if (write && *valp != val)
1360 		rt_cache_flush(net, 0);
1361 
1362 	return ret;
1363 }
1364 
1365 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1366 	{ \
1367 		.procname	= name, \
1368 		.data		= ipv4_devconf.data + \
1369 				  IPV4_DEVCONF_ ## attr - 1, \
1370 		.maxlen		= sizeof(int), \
1371 		.mode		= mval, \
1372 		.proc_handler	= proc, \
1373 		.extra1		= &ipv4_devconf, \
1374 	}
1375 
1376 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1377 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1378 
1379 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1380 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1381 
1382 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1383 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1384 
1385 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1386 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1387 
1388 static struct devinet_sysctl_table {
1389 	struct ctl_table_header *sysctl_header;
1390 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1391 	char *dev_name;
1392 } devinet_sysctl = {
1393 	.devinet_vars = {
1394 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1395 					     devinet_sysctl_forward),
1396 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1397 
1398 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1399 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1400 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1401 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1402 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1403 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1404 					"accept_source_route"),
1405 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1406 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1407 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1408 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1409 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1410 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1411 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1412 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1413 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1414 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1415 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1416 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1417 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1418 
1419 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1420 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1421 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1422 					      "force_igmp_version"),
1423 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1424 					      "promote_secondaries"),
1425 	},
1426 };
1427 
1428 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1429 					struct ipv4_devconf *p)
1430 {
1431 	int i;
1432 	struct devinet_sysctl_table *t;
1433 
1434 #define DEVINET_CTL_PATH_DEV	3
1435 
1436 	struct ctl_path devinet_ctl_path[] = {
1437 		{ .procname = "net",  },
1438 		{ .procname = "ipv4", },
1439 		{ .procname = "conf", },
1440 		{ /* to be set */ },
1441 		{ },
1442 	};
1443 
1444 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1445 	if (!t)
1446 		goto out;
1447 
1448 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1449 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1450 		t->devinet_vars[i].extra1 = p;
1451 		t->devinet_vars[i].extra2 = net;
1452 	}
1453 
1454 	/*
1455 	 * Make a copy of dev_name, because '.procname' is regarded as const
1456 	 * by sysctl and we wouldn't want anyone to change it under our feet
1457 	 * (see SIOCSIFNAME).
1458 	 */
1459 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1460 	if (!t->dev_name)
1461 		goto free;
1462 
1463 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1464 
1465 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1466 			t->devinet_vars);
1467 	if (!t->sysctl_header)
1468 		goto free_procname;
1469 
1470 	p->sysctl = t;
1471 	return 0;
1472 
1473 free_procname:
1474 	kfree(t->dev_name);
1475 free:
1476 	kfree(t);
1477 out:
1478 	return -ENOBUFS;
1479 }
1480 
1481 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1482 {
1483 	struct devinet_sysctl_table *t = cnf->sysctl;
1484 
1485 	if (t == NULL)
1486 		return;
1487 
1488 	cnf->sysctl = NULL;
1489 	unregister_sysctl_table(t->sysctl_header);
1490 	kfree(t->dev_name);
1491 	kfree(t);
1492 }
1493 
1494 static void devinet_sysctl_register(struct in_device *idev)
1495 {
1496 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1497 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1498 					&idev->cnf);
1499 }
1500 
1501 static void devinet_sysctl_unregister(struct in_device *idev)
1502 {
1503 	__devinet_sysctl_unregister(&idev->cnf);
1504 	neigh_sysctl_unregister(idev->arp_parms);
1505 }
1506 
1507 static struct ctl_table ctl_forward_entry[] = {
1508 	{
1509 		.procname	= "ip_forward",
1510 		.data		= &ipv4_devconf.data[
1511 					IPV4_DEVCONF_FORWARDING - 1],
1512 		.maxlen		= sizeof(int),
1513 		.mode		= 0644,
1514 		.proc_handler	= devinet_sysctl_forward,
1515 		.extra1		= &ipv4_devconf,
1516 		.extra2		= &init_net,
1517 	},
1518 	{ },
1519 };
1520 
1521 static __net_initdata struct ctl_path net_ipv4_path[] = {
1522 	{ .procname = "net", },
1523 	{ .procname = "ipv4", },
1524 	{ },
1525 };
1526 #endif
1527 
1528 static __net_init int devinet_init_net(struct net *net)
1529 {
1530 	int err;
1531 	struct ipv4_devconf *all, *dflt;
1532 #ifdef CONFIG_SYSCTL
1533 	struct ctl_table *tbl = ctl_forward_entry;
1534 	struct ctl_table_header *forw_hdr;
1535 #endif
1536 
1537 	err = -ENOMEM;
1538 	all = &ipv4_devconf;
1539 	dflt = &ipv4_devconf_dflt;
1540 
1541 	if (!net_eq(net, &init_net)) {
1542 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1543 		if (all == NULL)
1544 			goto err_alloc_all;
1545 
1546 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1547 		if (dflt == NULL)
1548 			goto err_alloc_dflt;
1549 
1550 #ifdef CONFIG_SYSCTL
1551 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1552 		if (tbl == NULL)
1553 			goto err_alloc_ctl;
1554 
1555 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1556 		tbl[0].extra1 = all;
1557 		tbl[0].extra2 = net;
1558 #endif
1559 	}
1560 
1561 #ifdef CONFIG_SYSCTL
1562 	err = __devinet_sysctl_register(net, "all", all);
1563 	if (err < 0)
1564 		goto err_reg_all;
1565 
1566 	err = __devinet_sysctl_register(net, "default", dflt);
1567 	if (err < 0)
1568 		goto err_reg_dflt;
1569 
1570 	err = -ENOMEM;
1571 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1572 	if (forw_hdr == NULL)
1573 		goto err_reg_ctl;
1574 	net->ipv4.forw_hdr = forw_hdr;
1575 #endif
1576 
1577 	net->ipv4.devconf_all = all;
1578 	net->ipv4.devconf_dflt = dflt;
1579 	return 0;
1580 
1581 #ifdef CONFIG_SYSCTL
1582 err_reg_ctl:
1583 	__devinet_sysctl_unregister(dflt);
1584 err_reg_dflt:
1585 	__devinet_sysctl_unregister(all);
1586 err_reg_all:
1587 	if (tbl != ctl_forward_entry)
1588 		kfree(tbl);
1589 err_alloc_ctl:
1590 #endif
1591 	if (dflt != &ipv4_devconf_dflt)
1592 		kfree(dflt);
1593 err_alloc_dflt:
1594 	if (all != &ipv4_devconf)
1595 		kfree(all);
1596 err_alloc_all:
1597 	return err;
1598 }
1599 
1600 static __net_exit void devinet_exit_net(struct net *net)
1601 {
1602 #ifdef CONFIG_SYSCTL
1603 	struct ctl_table *tbl;
1604 
1605 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1606 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1607 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1608 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1609 	kfree(tbl);
1610 #endif
1611 	kfree(net->ipv4.devconf_dflt);
1612 	kfree(net->ipv4.devconf_all);
1613 }
1614 
1615 static __net_initdata struct pernet_operations devinet_ops = {
1616 	.init = devinet_init_net,
1617 	.exit = devinet_exit_net,
1618 };
1619 
1620 void __init devinet_init(void)
1621 {
1622 	register_pernet_subsys(&devinet_ops);
1623 
1624 	register_gifconf(PF_INET, inet_gifconf);
1625 	register_netdevice_notifier(&ip_netdev_notifier);
1626 
1627 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1628 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1629 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1630 }
1631 
1632