xref: /openbmc/linux/net/ipv4/devinet.c (revision 643d1f7f)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  *	Derived from the IP parts of dev.c 1.0.19
12  * 		Authors:	Ross Biro
13  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *	Additional Authors:
17  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *	Changes:
21  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
22  *					lists.
23  *		Cyrus Durgin:		updated for kmod
24  *		Matthias Andree:	in devinet_ioctl, compare label and
25  *					address (4.4BSD alias style support),
26  *					fall back to comparing just the label
27  *					if no match found.
28  */
29 
30 
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 
67 static struct ipv4_devconf ipv4_devconf = {
68 	.data = {
69 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
73 	},
74 };
75 
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77 	.data = {
78 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82 		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83 	},
84 };
85 
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88 
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90 	[IFA_LOCAL]     	= { .type = NLA_U32 },
91 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
92 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
93 	[IFA_ANYCAST]   	= { .type = NLA_U32 },
94 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96 
97 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
98 
99 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
100 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
101 			 int destroy);
102 #ifdef CONFIG_SYSCTL
103 static void devinet_sysctl_register(struct in_device *idev);
104 static void devinet_sysctl_unregister(struct in_device *idev);
105 #else
106 static inline void devinet_sysctl_register(struct in_device *idev)
107 {
108 }
109 static inline void devinet_sysctl_unregister(struct in_device *idev)
110 {
111 }
112 #endif
113 
114 /* Locks all the inet devices. */
115 
116 static struct in_ifaddr *inet_alloc_ifa(void)
117 {
118 	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
119 
120 	if (ifa) {
121 		INIT_RCU_HEAD(&ifa->rcu_head);
122 	}
123 
124 	return ifa;
125 }
126 
127 static void inet_rcu_free_ifa(struct rcu_head *head)
128 {
129 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
130 	if (ifa->ifa_dev)
131 		in_dev_put(ifa->ifa_dev);
132 	kfree(ifa);
133 }
134 
135 static inline void inet_free_ifa(struct in_ifaddr *ifa)
136 {
137 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
138 }
139 
140 void in_dev_finish_destroy(struct in_device *idev)
141 {
142 	struct net_device *dev = idev->dev;
143 
144 	BUG_TRAP(!idev->ifa_list);
145 	BUG_TRAP(!idev->mc_list);
146 #ifdef NET_REFCNT_DEBUG
147 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
148 	       idev, dev ? dev->name : "NIL");
149 #endif
150 	dev_put(dev);
151 	if (!idev->dead)
152 		printk("Freeing alive in_device %p\n", idev);
153 	else {
154 		kfree(idev);
155 	}
156 }
157 
158 static struct in_device *inetdev_init(struct net_device *dev)
159 {
160 	struct in_device *in_dev;
161 
162 	ASSERT_RTNL();
163 
164 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
165 	if (!in_dev)
166 		goto out;
167 	INIT_RCU_HEAD(&in_dev->rcu_head);
168 	memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt,
169 			sizeof(in_dev->cnf));
170 	in_dev->cnf.sysctl = NULL;
171 	in_dev->dev = dev;
172 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
173 		goto out_kfree;
174 	/* Reference in_dev->dev */
175 	dev_hold(dev);
176 	/* Account for reference dev->ip_ptr (below) */
177 	in_dev_hold(in_dev);
178 
179 	devinet_sysctl_register(in_dev);
180 	ip_mc_init_dev(in_dev);
181 	if (dev->flags & IFF_UP)
182 		ip_mc_up(in_dev);
183 
184 	/* we can receive as soon as ip_ptr is set -- do this last */
185 	rcu_assign_pointer(dev->ip_ptr, in_dev);
186 out:
187 	return in_dev;
188 out_kfree:
189 	kfree(in_dev);
190 	in_dev = NULL;
191 	goto out;
192 }
193 
194 static void in_dev_rcu_put(struct rcu_head *head)
195 {
196 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
197 	in_dev_put(idev);
198 }
199 
200 static void inetdev_destroy(struct in_device *in_dev)
201 {
202 	struct in_ifaddr *ifa;
203 	struct net_device *dev;
204 
205 	ASSERT_RTNL();
206 
207 	dev = in_dev->dev;
208 
209 	in_dev->dead = 1;
210 
211 	ip_mc_destroy_dev(in_dev);
212 
213 	while ((ifa = in_dev->ifa_list) != NULL) {
214 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
215 		inet_free_ifa(ifa);
216 	}
217 
218 	dev->ip_ptr = NULL;
219 
220 	devinet_sysctl_unregister(in_dev);
221 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
222 	arp_ifdown(dev);
223 
224 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
225 }
226 
227 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
228 {
229 	rcu_read_lock();
230 	for_primary_ifa(in_dev) {
231 		if (inet_ifa_match(a, ifa)) {
232 			if (!b || inet_ifa_match(b, ifa)) {
233 				rcu_read_unlock();
234 				return 1;
235 			}
236 		}
237 	} endfor_ifa(in_dev);
238 	rcu_read_unlock();
239 	return 0;
240 }
241 
242 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
243 			 int destroy, struct nlmsghdr *nlh, u32 pid)
244 {
245 	struct in_ifaddr *promote = NULL;
246 	struct in_ifaddr *ifa, *ifa1 = *ifap;
247 	struct in_ifaddr *last_prim = in_dev->ifa_list;
248 	struct in_ifaddr *prev_prom = NULL;
249 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
250 
251 	ASSERT_RTNL();
252 
253 	/* 1. Deleting primary ifaddr forces deletion all secondaries
254 	 * unless alias promotion is set
255 	 **/
256 
257 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
258 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
259 
260 		while ((ifa = *ifap1) != NULL) {
261 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
262 			    ifa1->ifa_scope <= ifa->ifa_scope)
263 				last_prim = ifa;
264 
265 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
266 			    ifa1->ifa_mask != ifa->ifa_mask ||
267 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
268 				ifap1 = &ifa->ifa_next;
269 				prev_prom = ifa;
270 				continue;
271 			}
272 
273 			if (!do_promote) {
274 				*ifap1 = ifa->ifa_next;
275 
276 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
277 				blocking_notifier_call_chain(&inetaddr_chain,
278 						NETDEV_DOWN, ifa);
279 				inet_free_ifa(ifa);
280 			} else {
281 				promote = ifa;
282 				break;
283 			}
284 		}
285 	}
286 
287 	/* 2. Unlink it */
288 
289 	*ifap = ifa1->ifa_next;
290 
291 	/* 3. Announce address deletion */
292 
293 	/* Send message first, then call notifier.
294 	   At first sight, FIB update triggered by notifier
295 	   will refer to already deleted ifaddr, that could confuse
296 	   netlink listeners. It is not true: look, gated sees
297 	   that route deleted and if it still thinks that ifaddr
298 	   is valid, it will try to restore deleted routes... Grr.
299 	   So that, this order is correct.
300 	 */
301 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
302 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
303 
304 	if (promote) {
305 
306 		if (prev_prom) {
307 			prev_prom->ifa_next = promote->ifa_next;
308 			promote->ifa_next = last_prim->ifa_next;
309 			last_prim->ifa_next = promote;
310 		}
311 
312 		promote->ifa_flags &= ~IFA_F_SECONDARY;
313 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
314 		blocking_notifier_call_chain(&inetaddr_chain,
315 				NETDEV_UP, promote);
316 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
317 			if (ifa1->ifa_mask != ifa->ifa_mask ||
318 			    !inet_ifa_match(ifa1->ifa_address, ifa))
319 					continue;
320 			fib_add_ifaddr(ifa);
321 		}
322 
323 	}
324 	if (destroy)
325 		inet_free_ifa(ifa1);
326 }
327 
328 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
329 			 int destroy)
330 {
331 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
332 }
333 
334 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
335 			     u32 pid)
336 {
337 	struct in_device *in_dev = ifa->ifa_dev;
338 	struct in_ifaddr *ifa1, **ifap, **last_primary;
339 
340 	ASSERT_RTNL();
341 
342 	if (!ifa->ifa_local) {
343 		inet_free_ifa(ifa);
344 		return 0;
345 	}
346 
347 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
348 	last_primary = &in_dev->ifa_list;
349 
350 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
351 	     ifap = &ifa1->ifa_next) {
352 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
353 		    ifa->ifa_scope <= ifa1->ifa_scope)
354 			last_primary = &ifa1->ifa_next;
355 		if (ifa1->ifa_mask == ifa->ifa_mask &&
356 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
357 			if (ifa1->ifa_local == ifa->ifa_local) {
358 				inet_free_ifa(ifa);
359 				return -EEXIST;
360 			}
361 			if (ifa1->ifa_scope != ifa->ifa_scope) {
362 				inet_free_ifa(ifa);
363 				return -EINVAL;
364 			}
365 			ifa->ifa_flags |= IFA_F_SECONDARY;
366 		}
367 	}
368 
369 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
370 		net_srandom(ifa->ifa_local);
371 		ifap = last_primary;
372 	}
373 
374 	ifa->ifa_next = *ifap;
375 	*ifap = ifa;
376 
377 	/* Send message first, then call notifier.
378 	   Notifier will trigger FIB update, so that
379 	   listeners of netlink will know about new ifaddr */
380 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
381 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
382 
383 	return 0;
384 }
385 
386 static int inet_insert_ifa(struct in_ifaddr *ifa)
387 {
388 	return __inet_insert_ifa(ifa, NULL, 0);
389 }
390 
391 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
392 {
393 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
394 
395 	ASSERT_RTNL();
396 
397 	if (!in_dev) {
398 		inet_free_ifa(ifa);
399 		return -ENOBUFS;
400 	}
401 	ipv4_devconf_setall(in_dev);
402 	if (ifa->ifa_dev != in_dev) {
403 		BUG_TRAP(!ifa->ifa_dev);
404 		in_dev_hold(in_dev);
405 		ifa->ifa_dev = in_dev;
406 	}
407 	if (ipv4_is_loopback(ifa->ifa_local))
408 		ifa->ifa_scope = RT_SCOPE_HOST;
409 	return inet_insert_ifa(ifa);
410 }
411 
412 struct in_device *inetdev_by_index(struct net *net, int ifindex)
413 {
414 	struct net_device *dev;
415 	struct in_device *in_dev = NULL;
416 	read_lock(&dev_base_lock);
417 	dev = __dev_get_by_index(net, ifindex);
418 	if (dev)
419 		in_dev = in_dev_get(dev);
420 	read_unlock(&dev_base_lock);
421 	return in_dev;
422 }
423 
424 /* Called only from RTNL semaphored context. No locks. */
425 
426 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
427 				    __be32 mask)
428 {
429 	ASSERT_RTNL();
430 
431 	for_primary_ifa(in_dev) {
432 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
433 			return ifa;
434 	} endfor_ifa(in_dev);
435 	return NULL;
436 }
437 
438 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
439 {
440 	struct net *net = skb->sk->sk_net;
441 	struct nlattr *tb[IFA_MAX+1];
442 	struct in_device *in_dev;
443 	struct ifaddrmsg *ifm;
444 	struct in_ifaddr *ifa, **ifap;
445 	int err = -EINVAL;
446 
447 	ASSERT_RTNL();
448 
449 	if (net != &init_net)
450 		return -EINVAL;
451 
452 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453 	if (err < 0)
454 		goto errout;
455 
456 	ifm = nlmsg_data(nlh);
457 	in_dev = inetdev_by_index(net, ifm->ifa_index);
458 	if (in_dev == NULL) {
459 		err = -ENODEV;
460 		goto errout;
461 	}
462 
463 	__in_dev_put(in_dev);
464 
465 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
466 	     ifap = &ifa->ifa_next) {
467 		if (tb[IFA_LOCAL] &&
468 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
469 			continue;
470 
471 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
472 			continue;
473 
474 		if (tb[IFA_ADDRESS] &&
475 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
476 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
477 			continue;
478 
479 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
480 		return 0;
481 	}
482 
483 	err = -EADDRNOTAVAIL;
484 errout:
485 	return err;
486 }
487 
488 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
489 {
490 	struct nlattr *tb[IFA_MAX+1];
491 	struct in_ifaddr *ifa;
492 	struct ifaddrmsg *ifm;
493 	struct net_device *dev;
494 	struct in_device *in_dev;
495 	int err;
496 
497 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
498 	if (err < 0)
499 		goto errout;
500 
501 	ifm = nlmsg_data(nlh);
502 	err = -EINVAL;
503 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
504 		goto errout;
505 
506 	dev = __dev_get_by_index(net, ifm->ifa_index);
507 	err = -ENODEV;
508 	if (dev == NULL)
509 		goto errout;
510 
511 	in_dev = __in_dev_get_rtnl(dev);
512 	err = -ENOBUFS;
513 	if (in_dev == NULL)
514 		goto errout;
515 
516 	ifa = inet_alloc_ifa();
517 	if (ifa == NULL)
518 		/*
519 		 * A potential indev allocation can be left alive, it stays
520 		 * assigned to its device and is destroy with it.
521 		 */
522 		goto errout;
523 
524 	ipv4_devconf_setall(in_dev);
525 	in_dev_hold(in_dev);
526 
527 	if (tb[IFA_ADDRESS] == NULL)
528 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
529 
530 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
531 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
532 	ifa->ifa_flags = ifm->ifa_flags;
533 	ifa->ifa_scope = ifm->ifa_scope;
534 	ifa->ifa_dev = in_dev;
535 
536 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
537 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
538 
539 	if (tb[IFA_BROADCAST])
540 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
541 
542 	if (tb[IFA_ANYCAST])
543 		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
544 
545 	if (tb[IFA_LABEL])
546 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
547 	else
548 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
549 
550 	return ifa;
551 
552 errout:
553 	return ERR_PTR(err);
554 }
555 
556 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
557 {
558 	struct net *net = skb->sk->sk_net;
559 	struct in_ifaddr *ifa;
560 
561 	ASSERT_RTNL();
562 
563 	if (net != &init_net)
564 		return -EINVAL;
565 
566 	ifa = rtm_to_ifaddr(net, nlh);
567 	if (IS_ERR(ifa))
568 		return PTR_ERR(ifa);
569 
570 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
571 }
572 
573 /*
574  *	Determine a default network mask, based on the IP address.
575  */
576 
577 static __inline__ int inet_abc_len(__be32 addr)
578 {
579 	int rc = -1;	/* Something else, probably a multicast. */
580 
581 	if (ipv4_is_zeronet(addr))
582 		rc = 0;
583 	else {
584 		__u32 haddr = ntohl(addr);
585 
586 		if (IN_CLASSA(haddr))
587 			rc = 8;
588 		else if (IN_CLASSB(haddr))
589 			rc = 16;
590 		else if (IN_CLASSC(haddr))
591 			rc = 24;
592 	}
593 
594 	return rc;
595 }
596 
597 
598 int devinet_ioctl(unsigned int cmd, void __user *arg)
599 {
600 	struct ifreq ifr;
601 	struct sockaddr_in sin_orig;
602 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
603 	struct in_device *in_dev;
604 	struct in_ifaddr **ifap = NULL;
605 	struct in_ifaddr *ifa = NULL;
606 	struct net_device *dev;
607 	char *colon;
608 	int ret = -EFAULT;
609 	int tryaddrmatch = 0;
610 
611 	/*
612 	 *	Fetch the caller's info block into kernel space
613 	 */
614 
615 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
616 		goto out;
617 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
618 
619 	/* save original address for comparison */
620 	memcpy(&sin_orig, sin, sizeof(*sin));
621 
622 	colon = strchr(ifr.ifr_name, ':');
623 	if (colon)
624 		*colon = 0;
625 
626 #ifdef CONFIG_KMOD
627 	dev_load(&init_net, ifr.ifr_name);
628 #endif
629 
630 	switch (cmd) {
631 	case SIOCGIFADDR:	/* Get interface address */
632 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
633 	case SIOCGIFDSTADDR:	/* Get the destination address */
634 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
635 		/* Note that these ioctls will not sleep,
636 		   so that we do not impose a lock.
637 		   One day we will be forced to put shlock here (I mean SMP)
638 		 */
639 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
640 		memset(sin, 0, sizeof(*sin));
641 		sin->sin_family = AF_INET;
642 		break;
643 
644 	case SIOCSIFFLAGS:
645 		ret = -EACCES;
646 		if (!capable(CAP_NET_ADMIN))
647 			goto out;
648 		break;
649 	case SIOCSIFADDR:	/* Set interface address (and family) */
650 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
651 	case SIOCSIFDSTADDR:	/* Set the destination address */
652 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
653 		ret = -EACCES;
654 		if (!capable(CAP_NET_ADMIN))
655 			goto out;
656 		ret = -EINVAL;
657 		if (sin->sin_family != AF_INET)
658 			goto out;
659 		break;
660 	default:
661 		ret = -EINVAL;
662 		goto out;
663 	}
664 
665 	rtnl_lock();
666 
667 	ret = -ENODEV;
668 	if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
669 		goto done;
670 
671 	if (colon)
672 		*colon = ':';
673 
674 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
675 		if (tryaddrmatch) {
676 			/* Matthias Andree */
677 			/* compare label and address (4.4BSD style) */
678 			/* note: we only do this for a limited set of ioctls
679 			   and only if the original address family was AF_INET.
680 			   This is checked above. */
681 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
682 			     ifap = &ifa->ifa_next) {
683 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
684 				    sin_orig.sin_addr.s_addr ==
685 							ifa->ifa_address) {
686 					break; /* found */
687 				}
688 			}
689 		}
690 		/* we didn't get a match, maybe the application is
691 		   4.3BSD-style and passed in junk so we fall back to
692 		   comparing just the label */
693 		if (!ifa) {
694 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
695 			     ifap = &ifa->ifa_next)
696 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
697 					break;
698 		}
699 	}
700 
701 	ret = -EADDRNOTAVAIL;
702 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
703 		goto done;
704 
705 	switch (cmd) {
706 	case SIOCGIFADDR:	/* Get interface address */
707 		sin->sin_addr.s_addr = ifa->ifa_local;
708 		goto rarok;
709 
710 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
711 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
712 		goto rarok;
713 
714 	case SIOCGIFDSTADDR:	/* Get the destination address */
715 		sin->sin_addr.s_addr = ifa->ifa_address;
716 		goto rarok;
717 
718 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
719 		sin->sin_addr.s_addr = ifa->ifa_mask;
720 		goto rarok;
721 
722 	case SIOCSIFFLAGS:
723 		if (colon) {
724 			ret = -EADDRNOTAVAIL;
725 			if (!ifa)
726 				break;
727 			ret = 0;
728 			if (!(ifr.ifr_flags & IFF_UP))
729 				inet_del_ifa(in_dev, ifap, 1);
730 			break;
731 		}
732 		ret = dev_change_flags(dev, ifr.ifr_flags);
733 		break;
734 
735 	case SIOCSIFADDR:	/* Set interface address (and family) */
736 		ret = -EINVAL;
737 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
738 			break;
739 
740 		if (!ifa) {
741 			ret = -ENOBUFS;
742 			if ((ifa = inet_alloc_ifa()) == NULL)
743 				break;
744 			if (colon)
745 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
746 			else
747 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
748 		} else {
749 			ret = 0;
750 			if (ifa->ifa_local == sin->sin_addr.s_addr)
751 				break;
752 			inet_del_ifa(in_dev, ifap, 0);
753 			ifa->ifa_broadcast = 0;
754 			ifa->ifa_anycast = 0;
755 		}
756 
757 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
758 
759 		if (!(dev->flags & IFF_POINTOPOINT)) {
760 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
761 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
762 			if ((dev->flags & IFF_BROADCAST) &&
763 			    ifa->ifa_prefixlen < 31)
764 				ifa->ifa_broadcast = ifa->ifa_address |
765 						     ~ifa->ifa_mask;
766 		} else {
767 			ifa->ifa_prefixlen = 32;
768 			ifa->ifa_mask = inet_make_mask(32);
769 		}
770 		ret = inet_set_ifa(dev, ifa);
771 		break;
772 
773 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
774 		ret = 0;
775 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
776 			inet_del_ifa(in_dev, ifap, 0);
777 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
778 			inet_insert_ifa(ifa);
779 		}
780 		break;
781 
782 	case SIOCSIFDSTADDR:	/* Set the destination address */
783 		ret = 0;
784 		if (ifa->ifa_address == sin->sin_addr.s_addr)
785 			break;
786 		ret = -EINVAL;
787 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
788 			break;
789 		ret = 0;
790 		inet_del_ifa(in_dev, ifap, 0);
791 		ifa->ifa_address = sin->sin_addr.s_addr;
792 		inet_insert_ifa(ifa);
793 		break;
794 
795 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
796 
797 		/*
798 		 *	The mask we set must be legal.
799 		 */
800 		ret = -EINVAL;
801 		if (bad_mask(sin->sin_addr.s_addr, 0))
802 			break;
803 		ret = 0;
804 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
805 			__be32 old_mask = ifa->ifa_mask;
806 			inet_del_ifa(in_dev, ifap, 0);
807 			ifa->ifa_mask = sin->sin_addr.s_addr;
808 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
809 
810 			/* See if current broadcast address matches
811 			 * with current netmask, then recalculate
812 			 * the broadcast address. Otherwise it's a
813 			 * funny address, so don't touch it since
814 			 * the user seems to know what (s)he's doing...
815 			 */
816 			if ((dev->flags & IFF_BROADCAST) &&
817 			    (ifa->ifa_prefixlen < 31) &&
818 			    (ifa->ifa_broadcast ==
819 			     (ifa->ifa_local|~old_mask))) {
820 				ifa->ifa_broadcast = (ifa->ifa_local |
821 						      ~sin->sin_addr.s_addr);
822 			}
823 			inet_insert_ifa(ifa);
824 		}
825 		break;
826 	}
827 done:
828 	rtnl_unlock();
829 out:
830 	return ret;
831 rarok:
832 	rtnl_unlock();
833 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
834 	goto out;
835 }
836 
837 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
838 {
839 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
840 	struct in_ifaddr *ifa;
841 	struct ifreq ifr;
842 	int done = 0;
843 
844 	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
845 		goto out;
846 
847 	for (; ifa; ifa = ifa->ifa_next) {
848 		if (!buf) {
849 			done += sizeof(ifr);
850 			continue;
851 		}
852 		if (len < (int) sizeof(ifr))
853 			break;
854 		memset(&ifr, 0, sizeof(struct ifreq));
855 		if (ifa->ifa_label)
856 			strcpy(ifr.ifr_name, ifa->ifa_label);
857 		else
858 			strcpy(ifr.ifr_name, dev->name);
859 
860 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
861 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
862 								ifa->ifa_local;
863 
864 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
865 			done = -EFAULT;
866 			break;
867 		}
868 		buf  += sizeof(struct ifreq);
869 		len  -= sizeof(struct ifreq);
870 		done += sizeof(struct ifreq);
871 	}
872 out:
873 	return done;
874 }
875 
876 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
877 {
878 	__be32 addr = 0;
879 	struct in_device *in_dev;
880 
881 	rcu_read_lock();
882 	in_dev = __in_dev_get_rcu(dev);
883 	if (!in_dev)
884 		goto no_in_dev;
885 
886 	for_primary_ifa(in_dev) {
887 		if (ifa->ifa_scope > scope)
888 			continue;
889 		if (!dst || inet_ifa_match(dst, ifa)) {
890 			addr = ifa->ifa_local;
891 			break;
892 		}
893 		if (!addr)
894 			addr = ifa->ifa_local;
895 	} endfor_ifa(in_dev);
896 no_in_dev:
897 	rcu_read_unlock();
898 
899 	if (addr)
900 		goto out;
901 
902 	/* Not loopback addresses on loopback should be preferred
903 	   in this case. It is importnat that lo is the first interface
904 	   in dev_base list.
905 	 */
906 	read_lock(&dev_base_lock);
907 	rcu_read_lock();
908 	for_each_netdev(&init_net, dev) {
909 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
910 			continue;
911 
912 		for_primary_ifa(in_dev) {
913 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
914 			    ifa->ifa_scope <= scope) {
915 				addr = ifa->ifa_local;
916 				goto out_unlock_both;
917 			}
918 		} endfor_ifa(in_dev);
919 	}
920 out_unlock_both:
921 	read_unlock(&dev_base_lock);
922 	rcu_read_unlock();
923 out:
924 	return addr;
925 }
926 
927 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
928 			      __be32 local, int scope)
929 {
930 	int same = 0;
931 	__be32 addr = 0;
932 
933 	for_ifa(in_dev) {
934 		if (!addr &&
935 		    (local == ifa->ifa_local || !local) &&
936 		    ifa->ifa_scope <= scope) {
937 			addr = ifa->ifa_local;
938 			if (same)
939 				break;
940 		}
941 		if (!same) {
942 			same = (!local || inet_ifa_match(local, ifa)) &&
943 				(!dst || inet_ifa_match(dst, ifa));
944 			if (same && addr) {
945 				if (local || !dst)
946 					break;
947 				/* Is the selected addr into dst subnet? */
948 				if (inet_ifa_match(addr, ifa))
949 					break;
950 				/* No, then can we use new local src? */
951 				if (ifa->ifa_scope <= scope) {
952 					addr = ifa->ifa_local;
953 					break;
954 				}
955 				/* search for large dst subnet for addr */
956 				same = 0;
957 			}
958 		}
959 	} endfor_ifa(in_dev);
960 
961 	return same? addr : 0;
962 }
963 
964 /*
965  * Confirm that local IP address exists using wildcards:
966  * - in_dev: only on this interface, 0=any interface
967  * - dst: only in the same subnet as dst, 0=any dst
968  * - local: address, 0=autoselect the local address
969  * - scope: maximum allowed scope value for the local address
970  */
971 __be32 inet_confirm_addr(struct in_device *in_dev,
972 			 __be32 dst, __be32 local, int scope)
973 {
974 	__be32 addr = 0;
975 	struct net_device *dev;
976 	struct net *net;
977 
978 	if (scope != RT_SCOPE_LINK)
979 		return confirm_addr_indev(in_dev, dst, local, scope);
980 
981 	net = in_dev->dev->nd_net;
982 	read_lock(&dev_base_lock);
983 	rcu_read_lock();
984 	for_each_netdev(net, dev) {
985 		if ((in_dev = __in_dev_get_rcu(dev))) {
986 			addr = confirm_addr_indev(in_dev, dst, local, scope);
987 			if (addr)
988 				break;
989 		}
990 	}
991 	rcu_read_unlock();
992 	read_unlock(&dev_base_lock);
993 
994 	return addr;
995 }
996 
997 /*
998  *	Device notifier
999  */
1000 
1001 int register_inetaddr_notifier(struct notifier_block *nb)
1002 {
1003 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1004 }
1005 
1006 int unregister_inetaddr_notifier(struct notifier_block *nb)
1007 {
1008 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1009 }
1010 
1011 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1012  * alias numbering and to create unique labels if possible.
1013 */
1014 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1015 {
1016 	struct in_ifaddr *ifa;
1017 	int named = 0;
1018 
1019 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1020 		char old[IFNAMSIZ], *dot;
1021 
1022 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1023 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1024 		if (named++ == 0)
1025 			continue;
1026 		dot = strchr(old, ':');
1027 		if (dot == NULL) {
1028 			sprintf(old, ":%d", named);
1029 			dot = old;
1030 		}
1031 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1032 			strcat(ifa->ifa_label, dot);
1033 		} else {
1034 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1035 		}
1036 	}
1037 }
1038 
1039 /* Called only under RTNL semaphore */
1040 
1041 static int inetdev_event(struct notifier_block *this, unsigned long event,
1042 			 void *ptr)
1043 {
1044 	struct net_device *dev = ptr;
1045 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1046 
1047 	if (dev->nd_net != &init_net)
1048 		return NOTIFY_DONE;
1049 
1050 	ASSERT_RTNL();
1051 
1052 	if (!in_dev) {
1053 		if (event == NETDEV_REGISTER) {
1054 			in_dev = inetdev_init(dev);
1055 			if (!in_dev)
1056 				return notifier_from_errno(-ENOMEM);
1057 			if (dev->flags & IFF_LOOPBACK) {
1058 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1059 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1060 			}
1061 		}
1062 		goto out;
1063 	}
1064 
1065 	switch (event) {
1066 	case NETDEV_REGISTER:
1067 		printk(KERN_DEBUG "inetdev_event: bug\n");
1068 		dev->ip_ptr = NULL;
1069 		break;
1070 	case NETDEV_UP:
1071 		if (dev->mtu < 68)
1072 			break;
1073 		if (dev->flags & IFF_LOOPBACK) {
1074 			struct in_ifaddr *ifa;
1075 			if ((ifa = inet_alloc_ifa()) != NULL) {
1076 				ifa->ifa_local =
1077 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1078 				ifa->ifa_prefixlen = 8;
1079 				ifa->ifa_mask = inet_make_mask(8);
1080 				in_dev_hold(in_dev);
1081 				ifa->ifa_dev = in_dev;
1082 				ifa->ifa_scope = RT_SCOPE_HOST;
1083 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1084 				inet_insert_ifa(ifa);
1085 			}
1086 		}
1087 		ip_mc_up(in_dev);
1088 		break;
1089 	case NETDEV_DOWN:
1090 		ip_mc_down(in_dev);
1091 		break;
1092 	case NETDEV_CHANGEMTU:
1093 		if (dev->mtu >= 68)
1094 			break;
1095 		/* MTU falled under 68, disable IP */
1096 	case NETDEV_UNREGISTER:
1097 		inetdev_destroy(in_dev);
1098 		break;
1099 	case NETDEV_CHANGENAME:
1100 		/* Do not notify about label change, this event is
1101 		 * not interesting to applications using netlink.
1102 		 */
1103 		inetdev_changename(dev, in_dev);
1104 
1105 		devinet_sysctl_unregister(in_dev);
1106 		devinet_sysctl_register(in_dev);
1107 		break;
1108 	}
1109 out:
1110 	return NOTIFY_DONE;
1111 }
1112 
1113 static struct notifier_block ip_netdev_notifier = {
1114 	.notifier_call =inetdev_event,
1115 };
1116 
1117 static inline size_t inet_nlmsg_size(void)
1118 {
1119 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1120 	       + nla_total_size(4) /* IFA_ADDRESS */
1121 	       + nla_total_size(4) /* IFA_LOCAL */
1122 	       + nla_total_size(4) /* IFA_BROADCAST */
1123 	       + nla_total_size(4) /* IFA_ANYCAST */
1124 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1125 }
1126 
1127 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1128 			    u32 pid, u32 seq, int event, unsigned int flags)
1129 {
1130 	struct ifaddrmsg *ifm;
1131 	struct nlmsghdr  *nlh;
1132 
1133 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1134 	if (nlh == NULL)
1135 		return -EMSGSIZE;
1136 
1137 	ifm = nlmsg_data(nlh);
1138 	ifm->ifa_family = AF_INET;
1139 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1140 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1141 	ifm->ifa_scope = ifa->ifa_scope;
1142 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1143 
1144 	if (ifa->ifa_address)
1145 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1146 
1147 	if (ifa->ifa_local)
1148 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1149 
1150 	if (ifa->ifa_broadcast)
1151 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1152 
1153 	if (ifa->ifa_anycast)
1154 		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1155 
1156 	if (ifa->ifa_label[0])
1157 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1158 
1159 	return nlmsg_end(skb, nlh);
1160 
1161 nla_put_failure:
1162 	nlmsg_cancel(skb, nlh);
1163 	return -EMSGSIZE;
1164 }
1165 
1166 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1167 {
1168 	struct net *net = skb->sk->sk_net;
1169 	int idx, ip_idx;
1170 	struct net_device *dev;
1171 	struct in_device *in_dev;
1172 	struct in_ifaddr *ifa;
1173 	int s_ip_idx, s_idx = cb->args[0];
1174 
1175 	if (net != &init_net)
1176 		return 0;
1177 
1178 	s_ip_idx = ip_idx = cb->args[1];
1179 	idx = 0;
1180 	for_each_netdev(net, dev) {
1181 		if (idx < s_idx)
1182 			goto cont;
1183 		if (idx > s_idx)
1184 			s_ip_idx = 0;
1185 		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1186 			goto cont;
1187 
1188 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1189 		     ifa = ifa->ifa_next, ip_idx++) {
1190 			if (ip_idx < s_ip_idx)
1191 				continue;
1192 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1193 					     cb->nlh->nlmsg_seq,
1194 					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1195 				goto done;
1196 		}
1197 cont:
1198 		idx++;
1199 	}
1200 
1201 done:
1202 	cb->args[0] = idx;
1203 	cb->args[1] = ip_idx;
1204 
1205 	return skb->len;
1206 }
1207 
1208 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1209 		      u32 pid)
1210 {
1211 	struct sk_buff *skb;
1212 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1213 	int err = -ENOBUFS;
1214 	struct net *net;
1215 
1216 	net = ifa->ifa_dev->dev->nd_net;
1217 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1218 	if (skb == NULL)
1219 		goto errout;
1220 
1221 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1222 	if (err < 0) {
1223 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1224 		WARN_ON(err == -EMSGSIZE);
1225 		kfree_skb(skb);
1226 		goto errout;
1227 	}
1228 	err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1229 errout:
1230 	if (err < 0)
1231 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1232 }
1233 
1234 #ifdef CONFIG_SYSCTL
1235 
1236 static void devinet_copy_dflt_conf(struct net *net, int i)
1237 {
1238 	struct net_device *dev;
1239 
1240 	read_lock(&dev_base_lock);
1241 	for_each_netdev(net, dev) {
1242 		struct in_device *in_dev;
1243 		rcu_read_lock();
1244 		in_dev = __in_dev_get_rcu(dev);
1245 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1246 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1247 		rcu_read_unlock();
1248 	}
1249 	read_unlock(&dev_base_lock);
1250 }
1251 
1252 static void inet_forward_change(struct net *net)
1253 {
1254 	struct net_device *dev;
1255 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1256 
1257 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1258 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1259 
1260 	read_lock(&dev_base_lock);
1261 	for_each_netdev(net, dev) {
1262 		struct in_device *in_dev;
1263 		rcu_read_lock();
1264 		in_dev = __in_dev_get_rcu(dev);
1265 		if (in_dev)
1266 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1267 		rcu_read_unlock();
1268 	}
1269 	read_unlock(&dev_base_lock);
1270 
1271 	rt_cache_flush(0);
1272 }
1273 
1274 static int devinet_conf_proc(ctl_table *ctl, int write,
1275 			     struct file* filp, void __user *buffer,
1276 			     size_t *lenp, loff_t *ppos)
1277 {
1278 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1279 
1280 	if (write) {
1281 		struct ipv4_devconf *cnf = ctl->extra1;
1282 		struct net *net = ctl->extra2;
1283 		int i = (int *)ctl->data - cnf->data;
1284 
1285 		set_bit(i, cnf->state);
1286 
1287 		if (cnf == net->ipv4.devconf_dflt)
1288 			devinet_copy_dflt_conf(net, i);
1289 	}
1290 
1291 	return ret;
1292 }
1293 
1294 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1295 			       void __user *oldval, size_t __user *oldlenp,
1296 			       void __user *newval, size_t newlen)
1297 {
1298 	struct ipv4_devconf *cnf;
1299 	struct net *net;
1300 	int *valp = table->data;
1301 	int new;
1302 	int i;
1303 
1304 	if (!newval || !newlen)
1305 		return 0;
1306 
1307 	if (newlen != sizeof(int))
1308 		return -EINVAL;
1309 
1310 	if (get_user(new, (int __user *)newval))
1311 		return -EFAULT;
1312 
1313 	if (new == *valp)
1314 		return 0;
1315 
1316 	if (oldval && oldlenp) {
1317 		size_t len;
1318 
1319 		if (get_user(len, oldlenp))
1320 			return -EFAULT;
1321 
1322 		if (len) {
1323 			if (len > table->maxlen)
1324 				len = table->maxlen;
1325 			if (copy_to_user(oldval, valp, len))
1326 				return -EFAULT;
1327 			if (put_user(len, oldlenp))
1328 				return -EFAULT;
1329 		}
1330 	}
1331 
1332 	*valp = new;
1333 
1334 	cnf = table->extra1;
1335 	net = table->extra2;
1336 	i = (int *)table->data - cnf->data;
1337 
1338 	set_bit(i, cnf->state);
1339 
1340 	if (cnf == net->ipv4.devconf_dflt)
1341 		devinet_copy_dflt_conf(net, i);
1342 
1343 	return 1;
1344 }
1345 
1346 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1347 				  struct file* filp, void __user *buffer,
1348 				  size_t *lenp, loff_t *ppos)
1349 {
1350 	int *valp = ctl->data;
1351 	int val = *valp;
1352 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1353 
1354 	if (write && *valp != val) {
1355 		struct net *net = ctl->extra2;
1356 
1357 		if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1358 			inet_forward_change(net);
1359 		else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1360 			rt_cache_flush(0);
1361 	}
1362 
1363 	return ret;
1364 }
1365 
1366 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1367 			 struct file* filp, void __user *buffer,
1368 			 size_t *lenp, loff_t *ppos)
1369 {
1370 	int *valp = ctl->data;
1371 	int val = *valp;
1372 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1373 
1374 	if (write && *valp != val)
1375 		rt_cache_flush(0);
1376 
1377 	return ret;
1378 }
1379 
1380 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1381 				  void __user *oldval, size_t __user *oldlenp,
1382 				  void __user *newval, size_t newlen)
1383 {
1384 	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1385 				      newval, newlen);
1386 
1387 	if (ret == 1)
1388 		rt_cache_flush(0);
1389 
1390 	return ret;
1391 }
1392 
1393 
1394 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1395 	{ \
1396 		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1397 		.procname	= name, \
1398 		.data		= ipv4_devconf.data + \
1399 				  NET_IPV4_CONF_ ## attr - 1, \
1400 		.maxlen		= sizeof(int), \
1401 		.mode		= mval, \
1402 		.proc_handler	= proc, \
1403 		.strategy	= sysctl, \
1404 		.extra1		= &ipv4_devconf, \
1405 	}
1406 
1407 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1408 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1409 			     devinet_conf_sysctl)
1410 
1411 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1412 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1413 			     devinet_conf_sysctl)
1414 
1415 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1416 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1417 
1418 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1419 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1420 				     ipv4_doint_and_flush_strategy)
1421 
1422 static struct devinet_sysctl_table {
1423 	struct ctl_table_header *sysctl_header;
1424 	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1425 	char *dev_name;
1426 } devinet_sysctl = {
1427 	.devinet_vars = {
1428 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1429 					     devinet_sysctl_forward,
1430 					     devinet_conf_sysctl),
1431 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1432 
1433 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1434 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1435 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1436 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1437 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1438 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1439 					"accept_source_route"),
1440 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1441 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1442 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1443 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1444 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1445 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1446 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1447 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1448 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1449 
1450 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1451 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1452 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1453 					      "force_igmp_version"),
1454 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1455 					      "promote_secondaries"),
1456 	},
1457 };
1458 
1459 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1460 		int ctl_name, struct ipv4_devconf *p)
1461 {
1462 	int i;
1463 	struct devinet_sysctl_table *t;
1464 
1465 #define DEVINET_CTL_PATH_DEV	3
1466 
1467 	struct ctl_path devinet_ctl_path[] = {
1468 		{ .procname = "net", .ctl_name = CTL_NET, },
1469 		{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1470 		{ .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1471 		{ /* to be set */ },
1472 		{ },
1473 	};
1474 
1475 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1476 	if (!t)
1477 		goto out;
1478 
1479 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1480 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1481 		t->devinet_vars[i].extra1 = p;
1482 		t->devinet_vars[i].extra2 = net;
1483 	}
1484 
1485 	/*
1486 	 * Make a copy of dev_name, because '.procname' is regarded as const
1487 	 * by sysctl and we wouldn't want anyone to change it under our feet
1488 	 * (see SIOCSIFNAME).
1489 	 */
1490 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1491 	if (!t->dev_name)
1492 		goto free;
1493 
1494 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1495 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1496 
1497 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1498 			t->devinet_vars);
1499 	if (!t->sysctl_header)
1500 		goto free_procname;
1501 
1502 	p->sysctl = t;
1503 	return 0;
1504 
1505 free_procname:
1506 	kfree(t->dev_name);
1507 free:
1508 	kfree(t);
1509 out:
1510 	return -ENOBUFS;
1511 }
1512 
1513 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1514 {
1515 	struct devinet_sysctl_table *t = cnf->sysctl;
1516 
1517 	if (t == NULL)
1518 		return;
1519 
1520 	cnf->sysctl = NULL;
1521 	unregister_sysctl_table(t->sysctl_header);
1522 	kfree(t->dev_name);
1523 	kfree(t);
1524 }
1525 
1526 static void devinet_sysctl_register(struct in_device *idev)
1527 {
1528 	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1529 			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1530 	__devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
1531 			idev->dev->ifindex, &idev->cnf);
1532 }
1533 
1534 static void devinet_sysctl_unregister(struct in_device *idev)
1535 {
1536 	__devinet_sysctl_unregister(&idev->cnf);
1537 	neigh_sysctl_unregister(idev->arp_parms);
1538 }
1539 
1540 static struct ctl_table ctl_forward_entry[] = {
1541 	{
1542 		.ctl_name	= NET_IPV4_FORWARD,
1543 		.procname	= "ip_forward",
1544 		.data		= &ipv4_devconf.data[
1545 					NET_IPV4_CONF_FORWARDING - 1],
1546 		.maxlen		= sizeof(int),
1547 		.mode		= 0644,
1548 		.proc_handler	= devinet_sysctl_forward,
1549 		.strategy	= devinet_conf_sysctl,
1550 		.extra1		= &ipv4_devconf,
1551 		.extra2		= &init_net,
1552 	},
1553 	{ },
1554 };
1555 
1556 static __net_initdata struct ctl_path net_ipv4_path[] = {
1557 	{ .procname = "net", .ctl_name = CTL_NET, },
1558 	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1559 	{ },
1560 };
1561 #endif
1562 
1563 static __net_init int devinet_init_net(struct net *net)
1564 {
1565 	int err;
1566 	struct ipv4_devconf *all, *dflt;
1567 #ifdef CONFIG_SYSCTL
1568 	struct ctl_table *tbl = ctl_forward_entry;
1569 	struct ctl_table_header *forw_hdr;
1570 #endif
1571 
1572 	err = -ENOMEM;
1573 	all = &ipv4_devconf;
1574 	dflt = &ipv4_devconf_dflt;
1575 
1576 	if (net != &init_net) {
1577 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1578 		if (all == NULL)
1579 			goto err_alloc_all;
1580 
1581 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1582 		if (dflt == NULL)
1583 			goto err_alloc_dflt;
1584 
1585 #ifdef CONFIG_SYSCTL
1586 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1587 		if (tbl == NULL)
1588 			goto err_alloc_ctl;
1589 
1590 		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1591 		tbl[0].extra1 = all;
1592 		tbl[0].extra2 = net;
1593 #endif
1594 	}
1595 
1596 #ifdef CONFIG_SYSCTL
1597 	err = __devinet_sysctl_register(net, "all",
1598 			NET_PROTO_CONF_ALL, all);
1599 	if (err < 0)
1600 		goto err_reg_all;
1601 
1602 	err = __devinet_sysctl_register(net, "default",
1603 			NET_PROTO_CONF_DEFAULT, dflt);
1604 	if (err < 0)
1605 		goto err_reg_dflt;
1606 
1607 	err = -ENOMEM;
1608 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1609 	if (forw_hdr == NULL)
1610 		goto err_reg_ctl;
1611 	net->ipv4.forw_hdr = forw_hdr;
1612 #endif
1613 
1614 	net->ipv4.devconf_all = all;
1615 	net->ipv4.devconf_dflt = dflt;
1616 	return 0;
1617 
1618 #ifdef CONFIG_SYSCTL
1619 err_reg_ctl:
1620 	__devinet_sysctl_unregister(dflt);
1621 err_reg_dflt:
1622 	__devinet_sysctl_unregister(all);
1623 err_reg_all:
1624 	if (tbl != ctl_forward_entry)
1625 		kfree(tbl);
1626 err_alloc_ctl:
1627 #endif
1628 	if (dflt != &ipv4_devconf_dflt)
1629 		kfree(dflt);
1630 err_alloc_dflt:
1631 	if (all != &ipv4_devconf)
1632 		kfree(all);
1633 err_alloc_all:
1634 	return err;
1635 }
1636 
1637 static __net_exit void devinet_exit_net(struct net *net)
1638 {
1639 #ifdef CONFIG_SYSCTL
1640 	struct ctl_table *tbl;
1641 
1642 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1643 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1644 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1645 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1646 	kfree(tbl);
1647 #endif
1648 	kfree(net->ipv4.devconf_dflt);
1649 	kfree(net->ipv4.devconf_all);
1650 }
1651 
1652 static __net_initdata struct pernet_operations devinet_ops = {
1653 	.init = devinet_init_net,
1654 	.exit = devinet_exit_net,
1655 };
1656 
1657 void __init devinet_init(void)
1658 {
1659 	register_pernet_subsys(&devinet_ops);
1660 
1661 	register_gifconf(PF_INET, inet_gifconf);
1662 	register_netdevice_notifier(&ip_netdev_notifier);
1663 
1664 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1665 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1666 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1667 }
1668 
1669 EXPORT_SYMBOL(in_dev_finish_destroy);
1670 EXPORT_SYMBOL(inet_select_addr);
1671 EXPORT_SYMBOL(inetdev_by_index);
1672 EXPORT_SYMBOL(register_inetaddr_notifier);
1673 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1674