xref: /openbmc/linux/net/ipv4/devinet.c (revision f42b3800)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  *	Derived from the IP parts of dev.c 1.0.19
12  * 		Authors:	Ross Biro
13  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *	Additional Authors:
17  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *	Changes:
21  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
22  *					lists.
23  *		Cyrus Durgin:		updated for kmod
24  *		Matthias Andree:	in devinet_ioctl, compare label and
25  *					address (4.4BSD alias style support),
26  *					fall back to comparing just the label
27  *					if no match found.
28  */
29 
30 
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 
67 static struct ipv4_devconf ipv4_devconf = {
68 	.data = {
69 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
73 	},
74 };
75 
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77 	.data = {
78 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82 		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83 	},
84 };
85 
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88 
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90 	[IFA_LOCAL]     	= { .type = NLA_U32 },
91 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
92 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
93 	[IFA_ANYCAST]   	= { .type = NLA_U32 },
94 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96 
97 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
98 
99 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
100 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
101 			 int destroy);
102 #ifdef CONFIG_SYSCTL
103 static void devinet_sysctl_register(struct in_device *idev);
104 static void devinet_sysctl_unregister(struct in_device *idev);
105 #else
106 static inline void devinet_sysctl_register(struct in_device *idev)
107 {
108 }
109 static inline void devinet_sysctl_unregister(struct in_device *idev)
110 {
111 }
112 #endif
113 
114 /* Locks all the inet devices. */
115 
116 static struct in_ifaddr *inet_alloc_ifa(void)
117 {
118 	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
119 
120 	if (ifa) {
121 		INIT_RCU_HEAD(&ifa->rcu_head);
122 	}
123 
124 	return ifa;
125 }
126 
127 static void inet_rcu_free_ifa(struct rcu_head *head)
128 {
129 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
130 	if (ifa->ifa_dev)
131 		in_dev_put(ifa->ifa_dev);
132 	kfree(ifa);
133 }
134 
135 static inline void inet_free_ifa(struct in_ifaddr *ifa)
136 {
137 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
138 }
139 
140 void in_dev_finish_destroy(struct in_device *idev)
141 {
142 	struct net_device *dev = idev->dev;
143 
144 	BUG_TRAP(!idev->ifa_list);
145 	BUG_TRAP(!idev->mc_list);
146 #ifdef NET_REFCNT_DEBUG
147 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
148 	       idev, dev ? dev->name : "NIL");
149 #endif
150 	dev_put(dev);
151 	if (!idev->dead)
152 		printk("Freeing alive in_device %p\n", idev);
153 	else {
154 		kfree(idev);
155 	}
156 }
157 
158 static struct in_device *inetdev_init(struct net_device *dev)
159 {
160 	struct in_device *in_dev;
161 
162 	ASSERT_RTNL();
163 
164 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
165 	if (!in_dev)
166 		goto out;
167 	INIT_RCU_HEAD(&in_dev->rcu_head);
168 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
169 			sizeof(in_dev->cnf));
170 	in_dev->cnf.sysctl = NULL;
171 	in_dev->dev = dev;
172 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
173 		goto out_kfree;
174 	/* Reference in_dev->dev */
175 	dev_hold(dev);
176 	/* Account for reference dev->ip_ptr (below) */
177 	in_dev_hold(in_dev);
178 
179 	devinet_sysctl_register(in_dev);
180 	ip_mc_init_dev(in_dev);
181 	if (dev->flags & IFF_UP)
182 		ip_mc_up(in_dev);
183 
184 	/* we can receive as soon as ip_ptr is set -- do this last */
185 	rcu_assign_pointer(dev->ip_ptr, in_dev);
186 out:
187 	return in_dev;
188 out_kfree:
189 	kfree(in_dev);
190 	in_dev = NULL;
191 	goto out;
192 }
193 
194 static void in_dev_rcu_put(struct rcu_head *head)
195 {
196 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
197 	in_dev_put(idev);
198 }
199 
200 static void inetdev_destroy(struct in_device *in_dev)
201 {
202 	struct in_ifaddr *ifa;
203 	struct net_device *dev;
204 
205 	ASSERT_RTNL();
206 
207 	dev = in_dev->dev;
208 
209 	in_dev->dead = 1;
210 
211 	ip_mc_destroy_dev(in_dev);
212 
213 	while ((ifa = in_dev->ifa_list) != NULL) {
214 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
215 		inet_free_ifa(ifa);
216 	}
217 
218 	dev->ip_ptr = NULL;
219 
220 	devinet_sysctl_unregister(in_dev);
221 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
222 	arp_ifdown(dev);
223 
224 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
225 }
226 
227 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
228 {
229 	rcu_read_lock();
230 	for_primary_ifa(in_dev) {
231 		if (inet_ifa_match(a, ifa)) {
232 			if (!b || inet_ifa_match(b, ifa)) {
233 				rcu_read_unlock();
234 				return 1;
235 			}
236 		}
237 	} endfor_ifa(in_dev);
238 	rcu_read_unlock();
239 	return 0;
240 }
241 
242 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
243 			 int destroy, struct nlmsghdr *nlh, u32 pid)
244 {
245 	struct in_ifaddr *promote = NULL;
246 	struct in_ifaddr *ifa, *ifa1 = *ifap;
247 	struct in_ifaddr *last_prim = in_dev->ifa_list;
248 	struct in_ifaddr *prev_prom = NULL;
249 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
250 
251 	ASSERT_RTNL();
252 
253 	/* 1. Deleting primary ifaddr forces deletion all secondaries
254 	 * unless alias promotion is set
255 	 **/
256 
257 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
258 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
259 
260 		while ((ifa = *ifap1) != NULL) {
261 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
262 			    ifa1->ifa_scope <= ifa->ifa_scope)
263 				last_prim = ifa;
264 
265 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
266 			    ifa1->ifa_mask != ifa->ifa_mask ||
267 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
268 				ifap1 = &ifa->ifa_next;
269 				prev_prom = ifa;
270 				continue;
271 			}
272 
273 			if (!do_promote) {
274 				*ifap1 = ifa->ifa_next;
275 
276 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
277 				blocking_notifier_call_chain(&inetaddr_chain,
278 						NETDEV_DOWN, ifa);
279 				inet_free_ifa(ifa);
280 			} else {
281 				promote = ifa;
282 				break;
283 			}
284 		}
285 	}
286 
287 	/* 2. Unlink it */
288 
289 	*ifap = ifa1->ifa_next;
290 
291 	/* 3. Announce address deletion */
292 
293 	/* Send message first, then call notifier.
294 	   At first sight, FIB update triggered by notifier
295 	   will refer to already deleted ifaddr, that could confuse
296 	   netlink listeners. It is not true: look, gated sees
297 	   that route deleted and if it still thinks that ifaddr
298 	   is valid, it will try to restore deleted routes... Grr.
299 	   So that, this order is correct.
300 	 */
301 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
302 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
303 
304 	if (promote) {
305 
306 		if (prev_prom) {
307 			prev_prom->ifa_next = promote->ifa_next;
308 			promote->ifa_next = last_prim->ifa_next;
309 			last_prim->ifa_next = promote;
310 		}
311 
312 		promote->ifa_flags &= ~IFA_F_SECONDARY;
313 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
314 		blocking_notifier_call_chain(&inetaddr_chain,
315 				NETDEV_UP, promote);
316 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
317 			if (ifa1->ifa_mask != ifa->ifa_mask ||
318 			    !inet_ifa_match(ifa1->ifa_address, ifa))
319 					continue;
320 			fib_add_ifaddr(ifa);
321 		}
322 
323 	}
324 	if (destroy)
325 		inet_free_ifa(ifa1);
326 }
327 
328 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
329 			 int destroy)
330 {
331 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
332 }
333 
334 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
335 			     u32 pid)
336 {
337 	struct in_device *in_dev = ifa->ifa_dev;
338 	struct in_ifaddr *ifa1, **ifap, **last_primary;
339 
340 	ASSERT_RTNL();
341 
342 	if (!ifa->ifa_local) {
343 		inet_free_ifa(ifa);
344 		return 0;
345 	}
346 
347 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
348 	last_primary = &in_dev->ifa_list;
349 
350 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
351 	     ifap = &ifa1->ifa_next) {
352 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
353 		    ifa->ifa_scope <= ifa1->ifa_scope)
354 			last_primary = &ifa1->ifa_next;
355 		if (ifa1->ifa_mask == ifa->ifa_mask &&
356 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
357 			if (ifa1->ifa_local == ifa->ifa_local) {
358 				inet_free_ifa(ifa);
359 				return -EEXIST;
360 			}
361 			if (ifa1->ifa_scope != ifa->ifa_scope) {
362 				inet_free_ifa(ifa);
363 				return -EINVAL;
364 			}
365 			ifa->ifa_flags |= IFA_F_SECONDARY;
366 		}
367 	}
368 
369 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
370 		net_srandom(ifa->ifa_local);
371 		ifap = last_primary;
372 	}
373 
374 	ifa->ifa_next = *ifap;
375 	*ifap = ifa;
376 
377 	/* Send message first, then call notifier.
378 	   Notifier will trigger FIB update, so that
379 	   listeners of netlink will know about new ifaddr */
380 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
381 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
382 
383 	return 0;
384 }
385 
386 static int inet_insert_ifa(struct in_ifaddr *ifa)
387 {
388 	return __inet_insert_ifa(ifa, NULL, 0);
389 }
390 
391 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
392 {
393 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
394 
395 	ASSERT_RTNL();
396 
397 	if (!in_dev) {
398 		inet_free_ifa(ifa);
399 		return -ENOBUFS;
400 	}
401 	ipv4_devconf_setall(in_dev);
402 	if (ifa->ifa_dev != in_dev) {
403 		BUG_TRAP(!ifa->ifa_dev);
404 		in_dev_hold(in_dev);
405 		ifa->ifa_dev = in_dev;
406 	}
407 	if (ipv4_is_loopback(ifa->ifa_local))
408 		ifa->ifa_scope = RT_SCOPE_HOST;
409 	return inet_insert_ifa(ifa);
410 }
411 
412 struct in_device *inetdev_by_index(struct net *net, int ifindex)
413 {
414 	struct net_device *dev;
415 	struct in_device *in_dev = NULL;
416 	read_lock(&dev_base_lock);
417 	dev = __dev_get_by_index(net, ifindex);
418 	if (dev)
419 		in_dev = in_dev_get(dev);
420 	read_unlock(&dev_base_lock);
421 	return in_dev;
422 }
423 
424 /* Called only from RTNL semaphored context. No locks. */
425 
426 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
427 				    __be32 mask)
428 {
429 	ASSERT_RTNL();
430 
431 	for_primary_ifa(in_dev) {
432 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
433 			return ifa;
434 	} endfor_ifa(in_dev);
435 	return NULL;
436 }
437 
438 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
439 {
440 	struct net *net = sock_net(skb->sk);
441 	struct nlattr *tb[IFA_MAX+1];
442 	struct in_device *in_dev;
443 	struct ifaddrmsg *ifm;
444 	struct in_ifaddr *ifa, **ifap;
445 	int err = -EINVAL;
446 
447 	ASSERT_RTNL();
448 
449 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
450 	if (err < 0)
451 		goto errout;
452 
453 	ifm = nlmsg_data(nlh);
454 	in_dev = inetdev_by_index(net, ifm->ifa_index);
455 	if (in_dev == NULL) {
456 		err = -ENODEV;
457 		goto errout;
458 	}
459 
460 	__in_dev_put(in_dev);
461 
462 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
463 	     ifap = &ifa->ifa_next) {
464 		if (tb[IFA_LOCAL] &&
465 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
466 			continue;
467 
468 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
469 			continue;
470 
471 		if (tb[IFA_ADDRESS] &&
472 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
473 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
474 			continue;
475 
476 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
477 		return 0;
478 	}
479 
480 	err = -EADDRNOTAVAIL;
481 errout:
482 	return err;
483 }
484 
485 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
486 {
487 	struct nlattr *tb[IFA_MAX+1];
488 	struct in_ifaddr *ifa;
489 	struct ifaddrmsg *ifm;
490 	struct net_device *dev;
491 	struct in_device *in_dev;
492 	int err;
493 
494 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
495 	if (err < 0)
496 		goto errout;
497 
498 	ifm = nlmsg_data(nlh);
499 	err = -EINVAL;
500 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
501 		goto errout;
502 
503 	dev = __dev_get_by_index(net, ifm->ifa_index);
504 	err = -ENODEV;
505 	if (dev == NULL)
506 		goto errout;
507 
508 	in_dev = __in_dev_get_rtnl(dev);
509 	err = -ENOBUFS;
510 	if (in_dev == NULL)
511 		goto errout;
512 
513 	ifa = inet_alloc_ifa();
514 	if (ifa == NULL)
515 		/*
516 		 * A potential indev allocation can be left alive, it stays
517 		 * assigned to its device and is destroy with it.
518 		 */
519 		goto errout;
520 
521 	ipv4_devconf_setall(in_dev);
522 	in_dev_hold(in_dev);
523 
524 	if (tb[IFA_ADDRESS] == NULL)
525 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
526 
527 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
528 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
529 	ifa->ifa_flags = ifm->ifa_flags;
530 	ifa->ifa_scope = ifm->ifa_scope;
531 	ifa->ifa_dev = in_dev;
532 
533 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
534 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
535 
536 	if (tb[IFA_BROADCAST])
537 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
538 
539 	if (tb[IFA_ANYCAST])
540 		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
541 
542 	if (tb[IFA_LABEL])
543 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
544 	else
545 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
546 
547 	return ifa;
548 
549 errout:
550 	return ERR_PTR(err);
551 }
552 
553 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
554 {
555 	struct net *net = sock_net(skb->sk);
556 	struct in_ifaddr *ifa;
557 
558 	ASSERT_RTNL();
559 
560 	ifa = rtm_to_ifaddr(net, nlh);
561 	if (IS_ERR(ifa))
562 		return PTR_ERR(ifa);
563 
564 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
565 }
566 
567 /*
568  *	Determine a default network mask, based on the IP address.
569  */
570 
571 static __inline__ int inet_abc_len(__be32 addr)
572 {
573 	int rc = -1;	/* Something else, probably a multicast. */
574 
575 	if (ipv4_is_zeronet(addr))
576 		rc = 0;
577 	else {
578 		__u32 haddr = ntohl(addr);
579 
580 		if (IN_CLASSA(haddr))
581 			rc = 8;
582 		else if (IN_CLASSB(haddr))
583 			rc = 16;
584 		else if (IN_CLASSC(haddr))
585 			rc = 24;
586 	}
587 
588 	return rc;
589 }
590 
591 
592 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
593 {
594 	struct ifreq ifr;
595 	struct sockaddr_in sin_orig;
596 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
597 	struct in_device *in_dev;
598 	struct in_ifaddr **ifap = NULL;
599 	struct in_ifaddr *ifa = NULL;
600 	struct net_device *dev;
601 	char *colon;
602 	int ret = -EFAULT;
603 	int tryaddrmatch = 0;
604 
605 	/*
606 	 *	Fetch the caller's info block into kernel space
607 	 */
608 
609 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
610 		goto out;
611 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
612 
613 	/* save original address for comparison */
614 	memcpy(&sin_orig, sin, sizeof(*sin));
615 
616 	colon = strchr(ifr.ifr_name, ':');
617 	if (colon)
618 		*colon = 0;
619 
620 #ifdef CONFIG_KMOD
621 	dev_load(net, ifr.ifr_name);
622 #endif
623 
624 	switch (cmd) {
625 	case SIOCGIFADDR:	/* Get interface address */
626 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
627 	case SIOCGIFDSTADDR:	/* Get the destination address */
628 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
629 		/* Note that these ioctls will not sleep,
630 		   so that we do not impose a lock.
631 		   One day we will be forced to put shlock here (I mean SMP)
632 		 */
633 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
634 		memset(sin, 0, sizeof(*sin));
635 		sin->sin_family = AF_INET;
636 		break;
637 
638 	case SIOCSIFFLAGS:
639 		ret = -EACCES;
640 		if (!capable(CAP_NET_ADMIN))
641 			goto out;
642 		break;
643 	case SIOCSIFADDR:	/* Set interface address (and family) */
644 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
645 	case SIOCSIFDSTADDR:	/* Set the destination address */
646 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
647 		ret = -EACCES;
648 		if (!capable(CAP_NET_ADMIN))
649 			goto out;
650 		ret = -EINVAL;
651 		if (sin->sin_family != AF_INET)
652 			goto out;
653 		break;
654 	default:
655 		ret = -EINVAL;
656 		goto out;
657 	}
658 
659 	rtnl_lock();
660 
661 	ret = -ENODEV;
662 	if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
663 		goto done;
664 
665 	if (colon)
666 		*colon = ':';
667 
668 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
669 		if (tryaddrmatch) {
670 			/* Matthias Andree */
671 			/* compare label and address (4.4BSD style) */
672 			/* note: we only do this for a limited set of ioctls
673 			   and only if the original address family was AF_INET.
674 			   This is checked above. */
675 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
676 			     ifap = &ifa->ifa_next) {
677 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
678 				    sin_orig.sin_addr.s_addr ==
679 							ifa->ifa_address) {
680 					break; /* found */
681 				}
682 			}
683 		}
684 		/* we didn't get a match, maybe the application is
685 		   4.3BSD-style and passed in junk so we fall back to
686 		   comparing just the label */
687 		if (!ifa) {
688 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
689 			     ifap = &ifa->ifa_next)
690 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
691 					break;
692 		}
693 	}
694 
695 	ret = -EADDRNOTAVAIL;
696 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
697 		goto done;
698 
699 	switch (cmd) {
700 	case SIOCGIFADDR:	/* Get interface address */
701 		sin->sin_addr.s_addr = ifa->ifa_local;
702 		goto rarok;
703 
704 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
705 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
706 		goto rarok;
707 
708 	case SIOCGIFDSTADDR:	/* Get the destination address */
709 		sin->sin_addr.s_addr = ifa->ifa_address;
710 		goto rarok;
711 
712 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
713 		sin->sin_addr.s_addr = ifa->ifa_mask;
714 		goto rarok;
715 
716 	case SIOCSIFFLAGS:
717 		if (colon) {
718 			ret = -EADDRNOTAVAIL;
719 			if (!ifa)
720 				break;
721 			ret = 0;
722 			if (!(ifr.ifr_flags & IFF_UP))
723 				inet_del_ifa(in_dev, ifap, 1);
724 			break;
725 		}
726 		ret = dev_change_flags(dev, ifr.ifr_flags);
727 		break;
728 
729 	case SIOCSIFADDR:	/* Set interface address (and family) */
730 		ret = -EINVAL;
731 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
732 			break;
733 
734 		if (!ifa) {
735 			ret = -ENOBUFS;
736 			if ((ifa = inet_alloc_ifa()) == NULL)
737 				break;
738 			if (colon)
739 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
740 			else
741 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
742 		} else {
743 			ret = 0;
744 			if (ifa->ifa_local == sin->sin_addr.s_addr)
745 				break;
746 			inet_del_ifa(in_dev, ifap, 0);
747 			ifa->ifa_broadcast = 0;
748 			ifa->ifa_anycast = 0;
749 			ifa->ifa_scope = 0;
750 		}
751 
752 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
753 
754 		if (!(dev->flags & IFF_POINTOPOINT)) {
755 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
756 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
757 			if ((dev->flags & IFF_BROADCAST) &&
758 			    ifa->ifa_prefixlen < 31)
759 				ifa->ifa_broadcast = ifa->ifa_address |
760 						     ~ifa->ifa_mask;
761 		} else {
762 			ifa->ifa_prefixlen = 32;
763 			ifa->ifa_mask = inet_make_mask(32);
764 		}
765 		ret = inet_set_ifa(dev, ifa);
766 		break;
767 
768 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
769 		ret = 0;
770 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
771 			inet_del_ifa(in_dev, ifap, 0);
772 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
773 			inet_insert_ifa(ifa);
774 		}
775 		break;
776 
777 	case SIOCSIFDSTADDR:	/* Set the destination address */
778 		ret = 0;
779 		if (ifa->ifa_address == sin->sin_addr.s_addr)
780 			break;
781 		ret = -EINVAL;
782 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
783 			break;
784 		ret = 0;
785 		inet_del_ifa(in_dev, ifap, 0);
786 		ifa->ifa_address = sin->sin_addr.s_addr;
787 		inet_insert_ifa(ifa);
788 		break;
789 
790 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
791 
792 		/*
793 		 *	The mask we set must be legal.
794 		 */
795 		ret = -EINVAL;
796 		if (bad_mask(sin->sin_addr.s_addr, 0))
797 			break;
798 		ret = 0;
799 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
800 			__be32 old_mask = ifa->ifa_mask;
801 			inet_del_ifa(in_dev, ifap, 0);
802 			ifa->ifa_mask = sin->sin_addr.s_addr;
803 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
804 
805 			/* See if current broadcast address matches
806 			 * with current netmask, then recalculate
807 			 * the broadcast address. Otherwise it's a
808 			 * funny address, so don't touch it since
809 			 * the user seems to know what (s)he's doing...
810 			 */
811 			if ((dev->flags & IFF_BROADCAST) &&
812 			    (ifa->ifa_prefixlen < 31) &&
813 			    (ifa->ifa_broadcast ==
814 			     (ifa->ifa_local|~old_mask))) {
815 				ifa->ifa_broadcast = (ifa->ifa_local |
816 						      ~sin->sin_addr.s_addr);
817 			}
818 			inet_insert_ifa(ifa);
819 		}
820 		break;
821 	}
822 done:
823 	rtnl_unlock();
824 out:
825 	return ret;
826 rarok:
827 	rtnl_unlock();
828 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
829 	goto out;
830 }
831 
832 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
833 {
834 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
835 	struct in_ifaddr *ifa;
836 	struct ifreq ifr;
837 	int done = 0;
838 
839 	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
840 		goto out;
841 
842 	for (; ifa; ifa = ifa->ifa_next) {
843 		if (!buf) {
844 			done += sizeof(ifr);
845 			continue;
846 		}
847 		if (len < (int) sizeof(ifr))
848 			break;
849 		memset(&ifr, 0, sizeof(struct ifreq));
850 		if (ifa->ifa_label)
851 			strcpy(ifr.ifr_name, ifa->ifa_label);
852 		else
853 			strcpy(ifr.ifr_name, dev->name);
854 
855 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
856 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
857 								ifa->ifa_local;
858 
859 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
860 			done = -EFAULT;
861 			break;
862 		}
863 		buf  += sizeof(struct ifreq);
864 		len  -= sizeof(struct ifreq);
865 		done += sizeof(struct ifreq);
866 	}
867 out:
868 	return done;
869 }
870 
871 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
872 {
873 	__be32 addr = 0;
874 	struct in_device *in_dev;
875 	struct net *net = dev_net(dev);
876 
877 	rcu_read_lock();
878 	in_dev = __in_dev_get_rcu(dev);
879 	if (!in_dev)
880 		goto no_in_dev;
881 
882 	for_primary_ifa(in_dev) {
883 		if (ifa->ifa_scope > scope)
884 			continue;
885 		if (!dst || inet_ifa_match(dst, ifa)) {
886 			addr = ifa->ifa_local;
887 			break;
888 		}
889 		if (!addr)
890 			addr = ifa->ifa_local;
891 	} endfor_ifa(in_dev);
892 no_in_dev:
893 	rcu_read_unlock();
894 
895 	if (addr)
896 		goto out;
897 
898 	/* Not loopback addresses on loopback should be preferred
899 	   in this case. It is importnat that lo is the first interface
900 	   in dev_base list.
901 	 */
902 	read_lock(&dev_base_lock);
903 	rcu_read_lock();
904 	for_each_netdev(net, dev) {
905 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
906 			continue;
907 
908 		for_primary_ifa(in_dev) {
909 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
910 			    ifa->ifa_scope <= scope) {
911 				addr = ifa->ifa_local;
912 				goto out_unlock_both;
913 			}
914 		} endfor_ifa(in_dev);
915 	}
916 out_unlock_both:
917 	read_unlock(&dev_base_lock);
918 	rcu_read_unlock();
919 out:
920 	return addr;
921 }
922 
923 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
924 			      __be32 local, int scope)
925 {
926 	int same = 0;
927 	__be32 addr = 0;
928 
929 	for_ifa(in_dev) {
930 		if (!addr &&
931 		    (local == ifa->ifa_local || !local) &&
932 		    ifa->ifa_scope <= scope) {
933 			addr = ifa->ifa_local;
934 			if (same)
935 				break;
936 		}
937 		if (!same) {
938 			same = (!local || inet_ifa_match(local, ifa)) &&
939 				(!dst || inet_ifa_match(dst, ifa));
940 			if (same && addr) {
941 				if (local || !dst)
942 					break;
943 				/* Is the selected addr into dst subnet? */
944 				if (inet_ifa_match(addr, ifa))
945 					break;
946 				/* No, then can we use new local src? */
947 				if (ifa->ifa_scope <= scope) {
948 					addr = ifa->ifa_local;
949 					break;
950 				}
951 				/* search for large dst subnet for addr */
952 				same = 0;
953 			}
954 		}
955 	} endfor_ifa(in_dev);
956 
957 	return same? addr : 0;
958 }
959 
960 /*
961  * Confirm that local IP address exists using wildcards:
962  * - in_dev: only on this interface, 0=any interface
963  * - dst: only in the same subnet as dst, 0=any dst
964  * - local: address, 0=autoselect the local address
965  * - scope: maximum allowed scope value for the local address
966  */
967 __be32 inet_confirm_addr(struct in_device *in_dev,
968 			 __be32 dst, __be32 local, int scope)
969 {
970 	__be32 addr = 0;
971 	struct net_device *dev;
972 	struct net *net;
973 
974 	if (scope != RT_SCOPE_LINK)
975 		return confirm_addr_indev(in_dev, dst, local, scope);
976 
977 	net = dev_net(in_dev->dev);
978 	read_lock(&dev_base_lock);
979 	rcu_read_lock();
980 	for_each_netdev(net, dev) {
981 		if ((in_dev = __in_dev_get_rcu(dev))) {
982 			addr = confirm_addr_indev(in_dev, dst, local, scope);
983 			if (addr)
984 				break;
985 		}
986 	}
987 	rcu_read_unlock();
988 	read_unlock(&dev_base_lock);
989 
990 	return addr;
991 }
992 
993 /*
994  *	Device notifier
995  */
996 
997 int register_inetaddr_notifier(struct notifier_block *nb)
998 {
999 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1000 }
1001 
1002 int unregister_inetaddr_notifier(struct notifier_block *nb)
1003 {
1004 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1005 }
1006 
1007 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1008  * alias numbering and to create unique labels if possible.
1009 */
1010 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1011 {
1012 	struct in_ifaddr *ifa;
1013 	int named = 0;
1014 
1015 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1016 		char old[IFNAMSIZ], *dot;
1017 
1018 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1019 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1020 		if (named++ == 0)
1021 			continue;
1022 		dot = strchr(old, ':');
1023 		if (dot == NULL) {
1024 			sprintf(old, ":%d", named);
1025 			dot = old;
1026 		}
1027 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1028 			strcat(ifa->ifa_label, dot);
1029 		} else {
1030 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1031 		}
1032 	}
1033 }
1034 
1035 /* Called only under RTNL semaphore */
1036 
1037 static int inetdev_event(struct notifier_block *this, unsigned long event,
1038 			 void *ptr)
1039 {
1040 	struct net_device *dev = ptr;
1041 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1042 
1043 	ASSERT_RTNL();
1044 
1045 	if (!in_dev) {
1046 		if (event == NETDEV_REGISTER) {
1047 			in_dev = inetdev_init(dev);
1048 			if (!in_dev)
1049 				return notifier_from_errno(-ENOMEM);
1050 			if (dev->flags & IFF_LOOPBACK) {
1051 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1052 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1053 			}
1054 		}
1055 		goto out;
1056 	}
1057 
1058 	switch (event) {
1059 	case NETDEV_REGISTER:
1060 		printk(KERN_DEBUG "inetdev_event: bug\n");
1061 		dev->ip_ptr = NULL;
1062 		break;
1063 	case NETDEV_UP:
1064 		if (dev->mtu < 68)
1065 			break;
1066 		if (dev->flags & IFF_LOOPBACK) {
1067 			struct in_ifaddr *ifa;
1068 			if ((ifa = inet_alloc_ifa()) != NULL) {
1069 				ifa->ifa_local =
1070 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1071 				ifa->ifa_prefixlen = 8;
1072 				ifa->ifa_mask = inet_make_mask(8);
1073 				in_dev_hold(in_dev);
1074 				ifa->ifa_dev = in_dev;
1075 				ifa->ifa_scope = RT_SCOPE_HOST;
1076 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1077 				inet_insert_ifa(ifa);
1078 			}
1079 		}
1080 		ip_mc_up(in_dev);
1081 		break;
1082 	case NETDEV_DOWN:
1083 		ip_mc_down(in_dev);
1084 		break;
1085 	case NETDEV_CHANGEMTU:
1086 		if (dev->mtu >= 68)
1087 			break;
1088 		/* MTU falled under 68, disable IP */
1089 	case NETDEV_UNREGISTER:
1090 		inetdev_destroy(in_dev);
1091 		break;
1092 	case NETDEV_CHANGENAME:
1093 		/* Do not notify about label change, this event is
1094 		 * not interesting to applications using netlink.
1095 		 */
1096 		inetdev_changename(dev, in_dev);
1097 
1098 		devinet_sysctl_unregister(in_dev);
1099 		devinet_sysctl_register(in_dev);
1100 		break;
1101 	}
1102 out:
1103 	return NOTIFY_DONE;
1104 }
1105 
1106 static struct notifier_block ip_netdev_notifier = {
1107 	.notifier_call =inetdev_event,
1108 };
1109 
1110 static inline size_t inet_nlmsg_size(void)
1111 {
1112 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1113 	       + nla_total_size(4) /* IFA_ADDRESS */
1114 	       + nla_total_size(4) /* IFA_LOCAL */
1115 	       + nla_total_size(4) /* IFA_BROADCAST */
1116 	       + nla_total_size(4) /* IFA_ANYCAST */
1117 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1118 }
1119 
1120 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1121 			    u32 pid, u32 seq, int event, unsigned int flags)
1122 {
1123 	struct ifaddrmsg *ifm;
1124 	struct nlmsghdr  *nlh;
1125 
1126 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1127 	if (nlh == NULL)
1128 		return -EMSGSIZE;
1129 
1130 	ifm = nlmsg_data(nlh);
1131 	ifm->ifa_family = AF_INET;
1132 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1133 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1134 	ifm->ifa_scope = ifa->ifa_scope;
1135 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1136 
1137 	if (ifa->ifa_address)
1138 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1139 
1140 	if (ifa->ifa_local)
1141 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1142 
1143 	if (ifa->ifa_broadcast)
1144 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1145 
1146 	if (ifa->ifa_anycast)
1147 		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1148 
1149 	if (ifa->ifa_label[0])
1150 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1151 
1152 	return nlmsg_end(skb, nlh);
1153 
1154 nla_put_failure:
1155 	nlmsg_cancel(skb, nlh);
1156 	return -EMSGSIZE;
1157 }
1158 
1159 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1160 {
1161 	struct net *net = sock_net(skb->sk);
1162 	int idx, ip_idx;
1163 	struct net_device *dev;
1164 	struct in_device *in_dev;
1165 	struct in_ifaddr *ifa;
1166 	int s_ip_idx, s_idx = cb->args[0];
1167 
1168 	s_ip_idx = ip_idx = cb->args[1];
1169 	idx = 0;
1170 	for_each_netdev(net, dev) {
1171 		if (idx < s_idx)
1172 			goto cont;
1173 		if (idx > s_idx)
1174 			s_ip_idx = 0;
1175 		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1176 			goto cont;
1177 
1178 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1179 		     ifa = ifa->ifa_next, ip_idx++) {
1180 			if (ip_idx < s_ip_idx)
1181 				continue;
1182 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1183 					     cb->nlh->nlmsg_seq,
1184 					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1185 				goto done;
1186 		}
1187 cont:
1188 		idx++;
1189 	}
1190 
1191 done:
1192 	cb->args[0] = idx;
1193 	cb->args[1] = ip_idx;
1194 
1195 	return skb->len;
1196 }
1197 
1198 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1199 		      u32 pid)
1200 {
1201 	struct sk_buff *skb;
1202 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1203 	int err = -ENOBUFS;
1204 	struct net *net;
1205 
1206 	net = dev_net(ifa->ifa_dev->dev);
1207 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1208 	if (skb == NULL)
1209 		goto errout;
1210 
1211 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1212 	if (err < 0) {
1213 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1214 		WARN_ON(err == -EMSGSIZE);
1215 		kfree_skb(skb);
1216 		goto errout;
1217 	}
1218 	err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1219 errout:
1220 	if (err < 0)
1221 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1222 }
1223 
1224 #ifdef CONFIG_SYSCTL
1225 
1226 static void devinet_copy_dflt_conf(struct net *net, int i)
1227 {
1228 	struct net_device *dev;
1229 
1230 	read_lock(&dev_base_lock);
1231 	for_each_netdev(net, dev) {
1232 		struct in_device *in_dev;
1233 		rcu_read_lock();
1234 		in_dev = __in_dev_get_rcu(dev);
1235 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1236 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1237 		rcu_read_unlock();
1238 	}
1239 	read_unlock(&dev_base_lock);
1240 }
1241 
1242 static void inet_forward_change(struct net *net)
1243 {
1244 	struct net_device *dev;
1245 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1246 
1247 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1248 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1249 
1250 	read_lock(&dev_base_lock);
1251 	for_each_netdev(net, dev) {
1252 		struct in_device *in_dev;
1253 		rcu_read_lock();
1254 		in_dev = __in_dev_get_rcu(dev);
1255 		if (in_dev)
1256 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1257 		rcu_read_unlock();
1258 	}
1259 	read_unlock(&dev_base_lock);
1260 
1261 	rt_cache_flush(0);
1262 }
1263 
1264 static int devinet_conf_proc(ctl_table *ctl, int write,
1265 			     struct file* filp, void __user *buffer,
1266 			     size_t *lenp, loff_t *ppos)
1267 {
1268 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1269 
1270 	if (write) {
1271 		struct ipv4_devconf *cnf = ctl->extra1;
1272 		struct net *net = ctl->extra2;
1273 		int i = (int *)ctl->data - cnf->data;
1274 
1275 		set_bit(i, cnf->state);
1276 
1277 		if (cnf == net->ipv4.devconf_dflt)
1278 			devinet_copy_dflt_conf(net, i);
1279 	}
1280 
1281 	return ret;
1282 }
1283 
1284 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1285 			       void __user *oldval, size_t __user *oldlenp,
1286 			       void __user *newval, size_t newlen)
1287 {
1288 	struct ipv4_devconf *cnf;
1289 	struct net *net;
1290 	int *valp = table->data;
1291 	int new;
1292 	int i;
1293 
1294 	if (!newval || !newlen)
1295 		return 0;
1296 
1297 	if (newlen != sizeof(int))
1298 		return -EINVAL;
1299 
1300 	if (get_user(new, (int __user *)newval))
1301 		return -EFAULT;
1302 
1303 	if (new == *valp)
1304 		return 0;
1305 
1306 	if (oldval && oldlenp) {
1307 		size_t len;
1308 
1309 		if (get_user(len, oldlenp))
1310 			return -EFAULT;
1311 
1312 		if (len) {
1313 			if (len > table->maxlen)
1314 				len = table->maxlen;
1315 			if (copy_to_user(oldval, valp, len))
1316 				return -EFAULT;
1317 			if (put_user(len, oldlenp))
1318 				return -EFAULT;
1319 		}
1320 	}
1321 
1322 	*valp = new;
1323 
1324 	cnf = table->extra1;
1325 	net = table->extra2;
1326 	i = (int *)table->data - cnf->data;
1327 
1328 	set_bit(i, cnf->state);
1329 
1330 	if (cnf == net->ipv4.devconf_dflt)
1331 		devinet_copy_dflt_conf(net, i);
1332 
1333 	return 1;
1334 }
1335 
1336 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1337 				  struct file* filp, void __user *buffer,
1338 				  size_t *lenp, loff_t *ppos)
1339 {
1340 	int *valp = ctl->data;
1341 	int val = *valp;
1342 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1343 
1344 	if (write && *valp != val) {
1345 		struct net *net = ctl->extra2;
1346 
1347 		if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1348 			inet_forward_change(net);
1349 		else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1350 			rt_cache_flush(0);
1351 	}
1352 
1353 	return ret;
1354 }
1355 
1356 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1357 			 struct file* filp, void __user *buffer,
1358 			 size_t *lenp, loff_t *ppos)
1359 {
1360 	int *valp = ctl->data;
1361 	int val = *valp;
1362 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1363 
1364 	if (write && *valp != val)
1365 		rt_cache_flush(0);
1366 
1367 	return ret;
1368 }
1369 
1370 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1371 				  void __user *oldval, size_t __user *oldlenp,
1372 				  void __user *newval, size_t newlen)
1373 {
1374 	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1375 				      newval, newlen);
1376 
1377 	if (ret == 1)
1378 		rt_cache_flush(0);
1379 
1380 	return ret;
1381 }
1382 
1383 
1384 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1385 	{ \
1386 		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1387 		.procname	= name, \
1388 		.data		= ipv4_devconf.data + \
1389 				  NET_IPV4_CONF_ ## attr - 1, \
1390 		.maxlen		= sizeof(int), \
1391 		.mode		= mval, \
1392 		.proc_handler	= proc, \
1393 		.strategy	= sysctl, \
1394 		.extra1		= &ipv4_devconf, \
1395 	}
1396 
1397 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1398 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1399 			     devinet_conf_sysctl)
1400 
1401 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1402 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1403 			     devinet_conf_sysctl)
1404 
1405 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1406 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1407 
1408 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1409 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1410 				     ipv4_doint_and_flush_strategy)
1411 
1412 static struct devinet_sysctl_table {
1413 	struct ctl_table_header *sysctl_header;
1414 	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1415 	char *dev_name;
1416 } devinet_sysctl = {
1417 	.devinet_vars = {
1418 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1419 					     devinet_sysctl_forward,
1420 					     devinet_conf_sysctl),
1421 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1422 
1423 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1424 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1425 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1426 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1427 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1428 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1429 					"accept_source_route"),
1430 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1431 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1432 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1433 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1434 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1435 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1436 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1437 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1438 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1439 
1440 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1441 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1442 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1443 					      "force_igmp_version"),
1444 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1445 					      "promote_secondaries"),
1446 	},
1447 };
1448 
1449 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1450 		int ctl_name, struct ipv4_devconf *p)
1451 {
1452 	int i;
1453 	struct devinet_sysctl_table *t;
1454 
1455 #define DEVINET_CTL_PATH_DEV	3
1456 
1457 	struct ctl_path devinet_ctl_path[] = {
1458 		{ .procname = "net", .ctl_name = CTL_NET, },
1459 		{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1460 		{ .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1461 		{ /* to be set */ },
1462 		{ },
1463 	};
1464 
1465 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1466 	if (!t)
1467 		goto out;
1468 
1469 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1470 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1471 		t->devinet_vars[i].extra1 = p;
1472 		t->devinet_vars[i].extra2 = net;
1473 	}
1474 
1475 	/*
1476 	 * Make a copy of dev_name, because '.procname' is regarded as const
1477 	 * by sysctl and we wouldn't want anyone to change it under our feet
1478 	 * (see SIOCSIFNAME).
1479 	 */
1480 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1481 	if (!t->dev_name)
1482 		goto free;
1483 
1484 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1485 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1486 
1487 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1488 			t->devinet_vars);
1489 	if (!t->sysctl_header)
1490 		goto free_procname;
1491 
1492 	p->sysctl = t;
1493 	return 0;
1494 
1495 free_procname:
1496 	kfree(t->dev_name);
1497 free:
1498 	kfree(t);
1499 out:
1500 	return -ENOBUFS;
1501 }
1502 
1503 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1504 {
1505 	struct devinet_sysctl_table *t = cnf->sysctl;
1506 
1507 	if (t == NULL)
1508 		return;
1509 
1510 	cnf->sysctl = NULL;
1511 	unregister_sysctl_table(t->sysctl_header);
1512 	kfree(t->dev_name);
1513 	kfree(t);
1514 }
1515 
1516 static void devinet_sysctl_register(struct in_device *idev)
1517 {
1518 	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1519 			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1520 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1521 			idev->dev->ifindex, &idev->cnf);
1522 }
1523 
1524 static void devinet_sysctl_unregister(struct in_device *idev)
1525 {
1526 	__devinet_sysctl_unregister(&idev->cnf);
1527 	neigh_sysctl_unregister(idev->arp_parms);
1528 }
1529 
1530 static struct ctl_table ctl_forward_entry[] = {
1531 	{
1532 		.ctl_name	= NET_IPV4_FORWARD,
1533 		.procname	= "ip_forward",
1534 		.data		= &ipv4_devconf.data[
1535 					NET_IPV4_CONF_FORWARDING - 1],
1536 		.maxlen		= sizeof(int),
1537 		.mode		= 0644,
1538 		.proc_handler	= devinet_sysctl_forward,
1539 		.strategy	= devinet_conf_sysctl,
1540 		.extra1		= &ipv4_devconf,
1541 		.extra2		= &init_net,
1542 	},
1543 	{ },
1544 };
1545 
1546 static __net_initdata struct ctl_path net_ipv4_path[] = {
1547 	{ .procname = "net", .ctl_name = CTL_NET, },
1548 	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1549 	{ },
1550 };
1551 #endif
1552 
1553 static __net_init int devinet_init_net(struct net *net)
1554 {
1555 	int err;
1556 	struct ipv4_devconf *all, *dflt;
1557 #ifdef CONFIG_SYSCTL
1558 	struct ctl_table *tbl = ctl_forward_entry;
1559 	struct ctl_table_header *forw_hdr;
1560 #endif
1561 
1562 	err = -ENOMEM;
1563 	all = &ipv4_devconf;
1564 	dflt = &ipv4_devconf_dflt;
1565 
1566 	if (net != &init_net) {
1567 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1568 		if (all == NULL)
1569 			goto err_alloc_all;
1570 
1571 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1572 		if (dflt == NULL)
1573 			goto err_alloc_dflt;
1574 
1575 #ifdef CONFIG_SYSCTL
1576 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1577 		if (tbl == NULL)
1578 			goto err_alloc_ctl;
1579 
1580 		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1581 		tbl[0].extra1 = all;
1582 		tbl[0].extra2 = net;
1583 #endif
1584 	}
1585 
1586 #ifdef CONFIG_SYSCTL
1587 	err = __devinet_sysctl_register(net, "all",
1588 			NET_PROTO_CONF_ALL, all);
1589 	if (err < 0)
1590 		goto err_reg_all;
1591 
1592 	err = __devinet_sysctl_register(net, "default",
1593 			NET_PROTO_CONF_DEFAULT, dflt);
1594 	if (err < 0)
1595 		goto err_reg_dflt;
1596 
1597 	err = -ENOMEM;
1598 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1599 	if (forw_hdr == NULL)
1600 		goto err_reg_ctl;
1601 	net->ipv4.forw_hdr = forw_hdr;
1602 #endif
1603 
1604 	net->ipv4.devconf_all = all;
1605 	net->ipv4.devconf_dflt = dflt;
1606 	return 0;
1607 
1608 #ifdef CONFIG_SYSCTL
1609 err_reg_ctl:
1610 	__devinet_sysctl_unregister(dflt);
1611 err_reg_dflt:
1612 	__devinet_sysctl_unregister(all);
1613 err_reg_all:
1614 	if (tbl != ctl_forward_entry)
1615 		kfree(tbl);
1616 err_alloc_ctl:
1617 #endif
1618 	if (dflt != &ipv4_devconf_dflt)
1619 		kfree(dflt);
1620 err_alloc_dflt:
1621 	if (all != &ipv4_devconf)
1622 		kfree(all);
1623 err_alloc_all:
1624 	return err;
1625 }
1626 
1627 static __net_exit void devinet_exit_net(struct net *net)
1628 {
1629 #ifdef CONFIG_SYSCTL
1630 	struct ctl_table *tbl;
1631 
1632 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1633 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1634 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1635 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1636 	kfree(tbl);
1637 #endif
1638 	kfree(net->ipv4.devconf_dflt);
1639 	kfree(net->ipv4.devconf_all);
1640 }
1641 
1642 static __net_initdata struct pernet_operations devinet_ops = {
1643 	.init = devinet_init_net,
1644 	.exit = devinet_exit_net,
1645 };
1646 
1647 void __init devinet_init(void)
1648 {
1649 	register_pernet_subsys(&devinet_ops);
1650 
1651 	register_gifconf(PF_INET, inet_gifconf);
1652 	register_netdevice_notifier(&ip_netdev_notifier);
1653 
1654 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1655 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1656 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1657 }
1658 
1659 EXPORT_SYMBOL(in_dev_finish_destroy);
1660 EXPORT_SYMBOL(inet_select_addr);
1661 EXPORT_SYMBOL(inetdev_by_index);
1662 EXPORT_SYMBOL(register_inetaddr_notifier);
1663 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1664