xref: /openbmc/linux/net/ipv4/devinet.c (revision 732a675a)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  *	Derived from the IP parts of dev.c 1.0.19
12  * 		Authors:	Ross Biro
13  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *	Additional Authors:
17  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *	Changes:
21  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
22  *					lists.
23  *		Cyrus Durgin:		updated for kmod
24  *		Matthias Andree:	in devinet_ioctl, compare label and
25  *					address (4.4BSD alias style support),
26  *					fall back to comparing just the label
27  *					if no match found.
28  */
29 
30 
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 
67 static struct ipv4_devconf ipv4_devconf = {
68 	.data = {
69 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
73 	},
74 };
75 
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77 	.data = {
78 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82 		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83 	},
84 };
85 
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88 
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90 	[IFA_LOCAL]     	= { .type = NLA_U32 },
91 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
92 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
93 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
94 };
95 
96 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
97 
98 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
99 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
100 			 int destroy);
101 #ifdef CONFIG_SYSCTL
102 static void devinet_sysctl_register(struct in_device *idev);
103 static void devinet_sysctl_unregister(struct in_device *idev);
104 #else
105 static inline void devinet_sysctl_register(struct in_device *idev)
106 {
107 }
108 static inline void devinet_sysctl_unregister(struct in_device *idev)
109 {
110 }
111 #endif
112 
113 /* Locks all the inet devices. */
114 
115 static struct in_ifaddr *inet_alloc_ifa(void)
116 {
117 	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
118 
119 	if (ifa) {
120 		INIT_RCU_HEAD(&ifa->rcu_head);
121 	}
122 
123 	return ifa;
124 }
125 
126 static void inet_rcu_free_ifa(struct rcu_head *head)
127 {
128 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
129 	if (ifa->ifa_dev)
130 		in_dev_put(ifa->ifa_dev);
131 	kfree(ifa);
132 }
133 
134 static inline void inet_free_ifa(struct in_ifaddr *ifa)
135 {
136 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
137 }
138 
139 void in_dev_finish_destroy(struct in_device *idev)
140 {
141 	struct net_device *dev = idev->dev;
142 
143 	BUG_TRAP(!idev->ifa_list);
144 	BUG_TRAP(!idev->mc_list);
145 #ifdef NET_REFCNT_DEBUG
146 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
147 	       idev, dev ? dev->name : "NIL");
148 #endif
149 	dev_put(dev);
150 	if (!idev->dead)
151 		printk("Freeing alive in_device %p\n", idev);
152 	else {
153 		kfree(idev);
154 	}
155 }
156 
157 static struct in_device *inetdev_init(struct net_device *dev)
158 {
159 	struct in_device *in_dev;
160 
161 	ASSERT_RTNL();
162 
163 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
164 	if (!in_dev)
165 		goto out;
166 	INIT_RCU_HEAD(&in_dev->rcu_head);
167 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
168 			sizeof(in_dev->cnf));
169 	in_dev->cnf.sysctl = NULL;
170 	in_dev->dev = dev;
171 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
172 		goto out_kfree;
173 	/* Reference in_dev->dev */
174 	dev_hold(dev);
175 	/* Account for reference dev->ip_ptr (below) */
176 	in_dev_hold(in_dev);
177 
178 	devinet_sysctl_register(in_dev);
179 	ip_mc_init_dev(in_dev);
180 	if (dev->flags & IFF_UP)
181 		ip_mc_up(in_dev);
182 
183 	/* we can receive as soon as ip_ptr is set -- do this last */
184 	rcu_assign_pointer(dev->ip_ptr, in_dev);
185 out:
186 	return in_dev;
187 out_kfree:
188 	kfree(in_dev);
189 	in_dev = NULL;
190 	goto out;
191 }
192 
193 static void in_dev_rcu_put(struct rcu_head *head)
194 {
195 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
196 	in_dev_put(idev);
197 }
198 
199 static void inetdev_destroy(struct in_device *in_dev)
200 {
201 	struct in_ifaddr *ifa;
202 	struct net_device *dev;
203 
204 	ASSERT_RTNL();
205 
206 	dev = in_dev->dev;
207 
208 	in_dev->dead = 1;
209 
210 	ip_mc_destroy_dev(in_dev);
211 
212 	while ((ifa = in_dev->ifa_list) != NULL) {
213 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
214 		inet_free_ifa(ifa);
215 	}
216 
217 	dev->ip_ptr = NULL;
218 
219 	devinet_sysctl_unregister(in_dev);
220 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
221 	arp_ifdown(dev);
222 
223 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
224 }
225 
226 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
227 {
228 	rcu_read_lock();
229 	for_primary_ifa(in_dev) {
230 		if (inet_ifa_match(a, ifa)) {
231 			if (!b || inet_ifa_match(b, ifa)) {
232 				rcu_read_unlock();
233 				return 1;
234 			}
235 		}
236 	} endfor_ifa(in_dev);
237 	rcu_read_unlock();
238 	return 0;
239 }
240 
241 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
242 			 int destroy, struct nlmsghdr *nlh, u32 pid)
243 {
244 	struct in_ifaddr *promote = NULL;
245 	struct in_ifaddr *ifa, *ifa1 = *ifap;
246 	struct in_ifaddr *last_prim = in_dev->ifa_list;
247 	struct in_ifaddr *prev_prom = NULL;
248 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
249 
250 	ASSERT_RTNL();
251 
252 	/* 1. Deleting primary ifaddr forces deletion all secondaries
253 	 * unless alias promotion is set
254 	 **/
255 
256 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
257 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
258 
259 		while ((ifa = *ifap1) != NULL) {
260 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
261 			    ifa1->ifa_scope <= ifa->ifa_scope)
262 				last_prim = ifa;
263 
264 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
265 			    ifa1->ifa_mask != ifa->ifa_mask ||
266 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
267 				ifap1 = &ifa->ifa_next;
268 				prev_prom = ifa;
269 				continue;
270 			}
271 
272 			if (!do_promote) {
273 				*ifap1 = ifa->ifa_next;
274 
275 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
276 				blocking_notifier_call_chain(&inetaddr_chain,
277 						NETDEV_DOWN, ifa);
278 				inet_free_ifa(ifa);
279 			} else {
280 				promote = ifa;
281 				break;
282 			}
283 		}
284 	}
285 
286 	/* 2. Unlink it */
287 
288 	*ifap = ifa1->ifa_next;
289 
290 	/* 3. Announce address deletion */
291 
292 	/* Send message first, then call notifier.
293 	   At first sight, FIB update triggered by notifier
294 	   will refer to already deleted ifaddr, that could confuse
295 	   netlink listeners. It is not true: look, gated sees
296 	   that route deleted and if it still thinks that ifaddr
297 	   is valid, it will try to restore deleted routes... Grr.
298 	   So that, this order is correct.
299 	 */
300 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
301 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
302 
303 	if (promote) {
304 
305 		if (prev_prom) {
306 			prev_prom->ifa_next = promote->ifa_next;
307 			promote->ifa_next = last_prim->ifa_next;
308 			last_prim->ifa_next = promote;
309 		}
310 
311 		promote->ifa_flags &= ~IFA_F_SECONDARY;
312 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
313 		blocking_notifier_call_chain(&inetaddr_chain,
314 				NETDEV_UP, promote);
315 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
316 			if (ifa1->ifa_mask != ifa->ifa_mask ||
317 			    !inet_ifa_match(ifa1->ifa_address, ifa))
318 					continue;
319 			fib_add_ifaddr(ifa);
320 		}
321 
322 	}
323 	if (destroy)
324 		inet_free_ifa(ifa1);
325 }
326 
327 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
328 			 int destroy)
329 {
330 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
331 }
332 
333 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
334 			     u32 pid)
335 {
336 	struct in_device *in_dev = ifa->ifa_dev;
337 	struct in_ifaddr *ifa1, **ifap, **last_primary;
338 
339 	ASSERT_RTNL();
340 
341 	if (!ifa->ifa_local) {
342 		inet_free_ifa(ifa);
343 		return 0;
344 	}
345 
346 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
347 	last_primary = &in_dev->ifa_list;
348 
349 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
350 	     ifap = &ifa1->ifa_next) {
351 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
352 		    ifa->ifa_scope <= ifa1->ifa_scope)
353 			last_primary = &ifa1->ifa_next;
354 		if (ifa1->ifa_mask == ifa->ifa_mask &&
355 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
356 			if (ifa1->ifa_local == ifa->ifa_local) {
357 				inet_free_ifa(ifa);
358 				return -EEXIST;
359 			}
360 			if (ifa1->ifa_scope != ifa->ifa_scope) {
361 				inet_free_ifa(ifa);
362 				return -EINVAL;
363 			}
364 			ifa->ifa_flags |= IFA_F_SECONDARY;
365 		}
366 	}
367 
368 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
369 		net_srandom(ifa->ifa_local);
370 		ifap = last_primary;
371 	}
372 
373 	ifa->ifa_next = *ifap;
374 	*ifap = ifa;
375 
376 	/* Send message first, then call notifier.
377 	   Notifier will trigger FIB update, so that
378 	   listeners of netlink will know about new ifaddr */
379 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
380 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
381 
382 	return 0;
383 }
384 
385 static int inet_insert_ifa(struct in_ifaddr *ifa)
386 {
387 	return __inet_insert_ifa(ifa, NULL, 0);
388 }
389 
390 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
391 {
392 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
393 
394 	ASSERT_RTNL();
395 
396 	if (!in_dev) {
397 		inet_free_ifa(ifa);
398 		return -ENOBUFS;
399 	}
400 	ipv4_devconf_setall(in_dev);
401 	if (ifa->ifa_dev != in_dev) {
402 		BUG_TRAP(!ifa->ifa_dev);
403 		in_dev_hold(in_dev);
404 		ifa->ifa_dev = in_dev;
405 	}
406 	if (ipv4_is_loopback(ifa->ifa_local))
407 		ifa->ifa_scope = RT_SCOPE_HOST;
408 	return inet_insert_ifa(ifa);
409 }
410 
411 struct in_device *inetdev_by_index(struct net *net, int ifindex)
412 {
413 	struct net_device *dev;
414 	struct in_device *in_dev = NULL;
415 	read_lock(&dev_base_lock);
416 	dev = __dev_get_by_index(net, ifindex);
417 	if (dev)
418 		in_dev = in_dev_get(dev);
419 	read_unlock(&dev_base_lock);
420 	return in_dev;
421 }
422 
423 /* Called only from RTNL semaphored context. No locks. */
424 
425 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426 				    __be32 mask)
427 {
428 	ASSERT_RTNL();
429 
430 	for_primary_ifa(in_dev) {
431 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432 			return ifa;
433 	} endfor_ifa(in_dev);
434 	return NULL;
435 }
436 
437 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438 {
439 	struct net *net = sock_net(skb->sk);
440 	struct nlattr *tb[IFA_MAX+1];
441 	struct in_device *in_dev;
442 	struct ifaddrmsg *ifm;
443 	struct in_ifaddr *ifa, **ifap;
444 	int err = -EINVAL;
445 
446 	ASSERT_RTNL();
447 
448 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449 	if (err < 0)
450 		goto errout;
451 
452 	ifm = nlmsg_data(nlh);
453 	in_dev = inetdev_by_index(net, ifm->ifa_index);
454 	if (in_dev == NULL) {
455 		err = -ENODEV;
456 		goto errout;
457 	}
458 
459 	__in_dev_put(in_dev);
460 
461 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
462 	     ifap = &ifa->ifa_next) {
463 		if (tb[IFA_LOCAL] &&
464 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
465 			continue;
466 
467 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
468 			continue;
469 
470 		if (tb[IFA_ADDRESS] &&
471 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
472 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
473 			continue;
474 
475 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
476 		return 0;
477 	}
478 
479 	err = -EADDRNOTAVAIL;
480 errout:
481 	return err;
482 }
483 
484 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
485 {
486 	struct nlattr *tb[IFA_MAX+1];
487 	struct in_ifaddr *ifa;
488 	struct ifaddrmsg *ifm;
489 	struct net_device *dev;
490 	struct in_device *in_dev;
491 	int err;
492 
493 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
494 	if (err < 0)
495 		goto errout;
496 
497 	ifm = nlmsg_data(nlh);
498 	err = -EINVAL;
499 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
500 		goto errout;
501 
502 	dev = __dev_get_by_index(net, ifm->ifa_index);
503 	err = -ENODEV;
504 	if (dev == NULL)
505 		goto errout;
506 
507 	in_dev = __in_dev_get_rtnl(dev);
508 	err = -ENOBUFS;
509 	if (in_dev == NULL)
510 		goto errout;
511 
512 	ifa = inet_alloc_ifa();
513 	if (ifa == NULL)
514 		/*
515 		 * A potential indev allocation can be left alive, it stays
516 		 * assigned to its device and is destroy with it.
517 		 */
518 		goto errout;
519 
520 	ipv4_devconf_setall(in_dev);
521 	in_dev_hold(in_dev);
522 
523 	if (tb[IFA_ADDRESS] == NULL)
524 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
525 
526 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
527 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
528 	ifa->ifa_flags = ifm->ifa_flags;
529 	ifa->ifa_scope = ifm->ifa_scope;
530 	ifa->ifa_dev = in_dev;
531 
532 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
533 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
534 
535 	if (tb[IFA_BROADCAST])
536 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
537 
538 	if (tb[IFA_LABEL])
539 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
540 	else
541 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
542 
543 	return ifa;
544 
545 errout:
546 	return ERR_PTR(err);
547 }
548 
549 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
550 {
551 	struct net *net = sock_net(skb->sk);
552 	struct in_ifaddr *ifa;
553 
554 	ASSERT_RTNL();
555 
556 	ifa = rtm_to_ifaddr(net, nlh);
557 	if (IS_ERR(ifa))
558 		return PTR_ERR(ifa);
559 
560 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
561 }
562 
563 /*
564  *	Determine a default network mask, based on the IP address.
565  */
566 
567 static __inline__ int inet_abc_len(__be32 addr)
568 {
569 	int rc = -1;	/* Something else, probably a multicast. */
570 
571 	if (ipv4_is_zeronet(addr))
572 		rc = 0;
573 	else {
574 		__u32 haddr = ntohl(addr);
575 
576 		if (IN_CLASSA(haddr))
577 			rc = 8;
578 		else if (IN_CLASSB(haddr))
579 			rc = 16;
580 		else if (IN_CLASSC(haddr))
581 			rc = 24;
582 	}
583 
584 	return rc;
585 }
586 
587 
588 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
589 {
590 	struct ifreq ifr;
591 	struct sockaddr_in sin_orig;
592 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
593 	struct in_device *in_dev;
594 	struct in_ifaddr **ifap = NULL;
595 	struct in_ifaddr *ifa = NULL;
596 	struct net_device *dev;
597 	char *colon;
598 	int ret = -EFAULT;
599 	int tryaddrmatch = 0;
600 
601 	/*
602 	 *	Fetch the caller's info block into kernel space
603 	 */
604 
605 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
606 		goto out;
607 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
608 
609 	/* save original address for comparison */
610 	memcpy(&sin_orig, sin, sizeof(*sin));
611 
612 	colon = strchr(ifr.ifr_name, ':');
613 	if (colon)
614 		*colon = 0;
615 
616 #ifdef CONFIG_KMOD
617 	dev_load(net, ifr.ifr_name);
618 #endif
619 
620 	switch (cmd) {
621 	case SIOCGIFADDR:	/* Get interface address */
622 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
623 	case SIOCGIFDSTADDR:	/* Get the destination address */
624 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
625 		/* Note that these ioctls will not sleep,
626 		   so that we do not impose a lock.
627 		   One day we will be forced to put shlock here (I mean SMP)
628 		 */
629 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
630 		memset(sin, 0, sizeof(*sin));
631 		sin->sin_family = AF_INET;
632 		break;
633 
634 	case SIOCSIFFLAGS:
635 		ret = -EACCES;
636 		if (!capable(CAP_NET_ADMIN))
637 			goto out;
638 		break;
639 	case SIOCSIFADDR:	/* Set interface address (and family) */
640 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
641 	case SIOCSIFDSTADDR:	/* Set the destination address */
642 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
643 		ret = -EACCES;
644 		if (!capable(CAP_NET_ADMIN))
645 			goto out;
646 		ret = -EINVAL;
647 		if (sin->sin_family != AF_INET)
648 			goto out;
649 		break;
650 	default:
651 		ret = -EINVAL;
652 		goto out;
653 	}
654 
655 	rtnl_lock();
656 
657 	ret = -ENODEV;
658 	if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
659 		goto done;
660 
661 	if (colon)
662 		*colon = ':';
663 
664 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
665 		if (tryaddrmatch) {
666 			/* Matthias Andree */
667 			/* compare label and address (4.4BSD style) */
668 			/* note: we only do this for a limited set of ioctls
669 			   and only if the original address family was AF_INET.
670 			   This is checked above. */
671 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
672 			     ifap = &ifa->ifa_next) {
673 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
674 				    sin_orig.sin_addr.s_addr ==
675 							ifa->ifa_address) {
676 					break; /* found */
677 				}
678 			}
679 		}
680 		/* we didn't get a match, maybe the application is
681 		   4.3BSD-style and passed in junk so we fall back to
682 		   comparing just the label */
683 		if (!ifa) {
684 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
685 			     ifap = &ifa->ifa_next)
686 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
687 					break;
688 		}
689 	}
690 
691 	ret = -EADDRNOTAVAIL;
692 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
693 		goto done;
694 
695 	switch (cmd) {
696 	case SIOCGIFADDR:	/* Get interface address */
697 		sin->sin_addr.s_addr = ifa->ifa_local;
698 		goto rarok;
699 
700 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
701 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
702 		goto rarok;
703 
704 	case SIOCGIFDSTADDR:	/* Get the destination address */
705 		sin->sin_addr.s_addr = ifa->ifa_address;
706 		goto rarok;
707 
708 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
709 		sin->sin_addr.s_addr = ifa->ifa_mask;
710 		goto rarok;
711 
712 	case SIOCSIFFLAGS:
713 		if (colon) {
714 			ret = -EADDRNOTAVAIL;
715 			if (!ifa)
716 				break;
717 			ret = 0;
718 			if (!(ifr.ifr_flags & IFF_UP))
719 				inet_del_ifa(in_dev, ifap, 1);
720 			break;
721 		}
722 		ret = dev_change_flags(dev, ifr.ifr_flags);
723 		break;
724 
725 	case SIOCSIFADDR:	/* Set interface address (and family) */
726 		ret = -EINVAL;
727 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
728 			break;
729 
730 		if (!ifa) {
731 			ret = -ENOBUFS;
732 			if ((ifa = inet_alloc_ifa()) == NULL)
733 				break;
734 			if (colon)
735 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
736 			else
737 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
738 		} else {
739 			ret = 0;
740 			if (ifa->ifa_local == sin->sin_addr.s_addr)
741 				break;
742 			inet_del_ifa(in_dev, ifap, 0);
743 			ifa->ifa_broadcast = 0;
744 			ifa->ifa_scope = 0;
745 		}
746 
747 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
748 
749 		if (!(dev->flags & IFF_POINTOPOINT)) {
750 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
751 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
752 			if ((dev->flags & IFF_BROADCAST) &&
753 			    ifa->ifa_prefixlen < 31)
754 				ifa->ifa_broadcast = ifa->ifa_address |
755 						     ~ifa->ifa_mask;
756 		} else {
757 			ifa->ifa_prefixlen = 32;
758 			ifa->ifa_mask = inet_make_mask(32);
759 		}
760 		ret = inet_set_ifa(dev, ifa);
761 		break;
762 
763 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
764 		ret = 0;
765 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
766 			inet_del_ifa(in_dev, ifap, 0);
767 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
768 			inet_insert_ifa(ifa);
769 		}
770 		break;
771 
772 	case SIOCSIFDSTADDR:	/* Set the destination address */
773 		ret = 0;
774 		if (ifa->ifa_address == sin->sin_addr.s_addr)
775 			break;
776 		ret = -EINVAL;
777 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
778 			break;
779 		ret = 0;
780 		inet_del_ifa(in_dev, ifap, 0);
781 		ifa->ifa_address = sin->sin_addr.s_addr;
782 		inet_insert_ifa(ifa);
783 		break;
784 
785 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
786 
787 		/*
788 		 *	The mask we set must be legal.
789 		 */
790 		ret = -EINVAL;
791 		if (bad_mask(sin->sin_addr.s_addr, 0))
792 			break;
793 		ret = 0;
794 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
795 			__be32 old_mask = ifa->ifa_mask;
796 			inet_del_ifa(in_dev, ifap, 0);
797 			ifa->ifa_mask = sin->sin_addr.s_addr;
798 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
799 
800 			/* See if current broadcast address matches
801 			 * with current netmask, then recalculate
802 			 * the broadcast address. Otherwise it's a
803 			 * funny address, so don't touch it since
804 			 * the user seems to know what (s)he's doing...
805 			 */
806 			if ((dev->flags & IFF_BROADCAST) &&
807 			    (ifa->ifa_prefixlen < 31) &&
808 			    (ifa->ifa_broadcast ==
809 			     (ifa->ifa_local|~old_mask))) {
810 				ifa->ifa_broadcast = (ifa->ifa_local |
811 						      ~sin->sin_addr.s_addr);
812 			}
813 			inet_insert_ifa(ifa);
814 		}
815 		break;
816 	}
817 done:
818 	rtnl_unlock();
819 out:
820 	return ret;
821 rarok:
822 	rtnl_unlock();
823 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
824 	goto out;
825 }
826 
827 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
828 {
829 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
830 	struct in_ifaddr *ifa;
831 	struct ifreq ifr;
832 	int done = 0;
833 
834 	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
835 		goto out;
836 
837 	for (; ifa; ifa = ifa->ifa_next) {
838 		if (!buf) {
839 			done += sizeof(ifr);
840 			continue;
841 		}
842 		if (len < (int) sizeof(ifr))
843 			break;
844 		memset(&ifr, 0, sizeof(struct ifreq));
845 		if (ifa->ifa_label)
846 			strcpy(ifr.ifr_name, ifa->ifa_label);
847 		else
848 			strcpy(ifr.ifr_name, dev->name);
849 
850 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
851 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
852 								ifa->ifa_local;
853 
854 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
855 			done = -EFAULT;
856 			break;
857 		}
858 		buf  += sizeof(struct ifreq);
859 		len  -= sizeof(struct ifreq);
860 		done += sizeof(struct ifreq);
861 	}
862 out:
863 	return done;
864 }
865 
866 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
867 {
868 	__be32 addr = 0;
869 	struct in_device *in_dev;
870 	struct net *net = dev_net(dev);
871 
872 	rcu_read_lock();
873 	in_dev = __in_dev_get_rcu(dev);
874 	if (!in_dev)
875 		goto no_in_dev;
876 
877 	for_primary_ifa(in_dev) {
878 		if (ifa->ifa_scope > scope)
879 			continue;
880 		if (!dst || inet_ifa_match(dst, ifa)) {
881 			addr = ifa->ifa_local;
882 			break;
883 		}
884 		if (!addr)
885 			addr = ifa->ifa_local;
886 	} endfor_ifa(in_dev);
887 no_in_dev:
888 	rcu_read_unlock();
889 
890 	if (addr)
891 		goto out;
892 
893 	/* Not loopback addresses on loopback should be preferred
894 	   in this case. It is importnat that lo is the first interface
895 	   in dev_base list.
896 	 */
897 	read_lock(&dev_base_lock);
898 	rcu_read_lock();
899 	for_each_netdev(net, dev) {
900 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
901 			continue;
902 
903 		for_primary_ifa(in_dev) {
904 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
905 			    ifa->ifa_scope <= scope) {
906 				addr = ifa->ifa_local;
907 				goto out_unlock_both;
908 			}
909 		} endfor_ifa(in_dev);
910 	}
911 out_unlock_both:
912 	read_unlock(&dev_base_lock);
913 	rcu_read_unlock();
914 out:
915 	return addr;
916 }
917 
918 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
919 			      __be32 local, int scope)
920 {
921 	int same = 0;
922 	__be32 addr = 0;
923 
924 	for_ifa(in_dev) {
925 		if (!addr &&
926 		    (local == ifa->ifa_local || !local) &&
927 		    ifa->ifa_scope <= scope) {
928 			addr = ifa->ifa_local;
929 			if (same)
930 				break;
931 		}
932 		if (!same) {
933 			same = (!local || inet_ifa_match(local, ifa)) &&
934 				(!dst || inet_ifa_match(dst, ifa));
935 			if (same && addr) {
936 				if (local || !dst)
937 					break;
938 				/* Is the selected addr into dst subnet? */
939 				if (inet_ifa_match(addr, ifa))
940 					break;
941 				/* No, then can we use new local src? */
942 				if (ifa->ifa_scope <= scope) {
943 					addr = ifa->ifa_local;
944 					break;
945 				}
946 				/* search for large dst subnet for addr */
947 				same = 0;
948 			}
949 		}
950 	} endfor_ifa(in_dev);
951 
952 	return same? addr : 0;
953 }
954 
955 /*
956  * Confirm that local IP address exists using wildcards:
957  * - in_dev: only on this interface, 0=any interface
958  * - dst: only in the same subnet as dst, 0=any dst
959  * - local: address, 0=autoselect the local address
960  * - scope: maximum allowed scope value for the local address
961  */
962 __be32 inet_confirm_addr(struct in_device *in_dev,
963 			 __be32 dst, __be32 local, int scope)
964 {
965 	__be32 addr = 0;
966 	struct net_device *dev;
967 	struct net *net;
968 
969 	if (scope != RT_SCOPE_LINK)
970 		return confirm_addr_indev(in_dev, dst, local, scope);
971 
972 	net = dev_net(in_dev->dev);
973 	read_lock(&dev_base_lock);
974 	rcu_read_lock();
975 	for_each_netdev(net, dev) {
976 		if ((in_dev = __in_dev_get_rcu(dev))) {
977 			addr = confirm_addr_indev(in_dev, dst, local, scope);
978 			if (addr)
979 				break;
980 		}
981 	}
982 	rcu_read_unlock();
983 	read_unlock(&dev_base_lock);
984 
985 	return addr;
986 }
987 
988 /*
989  *	Device notifier
990  */
991 
992 int register_inetaddr_notifier(struct notifier_block *nb)
993 {
994 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
995 }
996 
997 int unregister_inetaddr_notifier(struct notifier_block *nb)
998 {
999 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1000 }
1001 
1002 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1003  * alias numbering and to create unique labels if possible.
1004 */
1005 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1006 {
1007 	struct in_ifaddr *ifa;
1008 	int named = 0;
1009 
1010 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1011 		char old[IFNAMSIZ], *dot;
1012 
1013 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1014 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1015 		if (named++ == 0)
1016 			continue;
1017 		dot = strchr(old, ':');
1018 		if (dot == NULL) {
1019 			sprintf(old, ":%d", named);
1020 			dot = old;
1021 		}
1022 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1023 			strcat(ifa->ifa_label, dot);
1024 		} else {
1025 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1026 		}
1027 	}
1028 }
1029 
1030 /* Called only under RTNL semaphore */
1031 
1032 static int inetdev_event(struct notifier_block *this, unsigned long event,
1033 			 void *ptr)
1034 {
1035 	struct net_device *dev = ptr;
1036 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1037 
1038 	ASSERT_RTNL();
1039 
1040 	if (!in_dev) {
1041 		if (event == NETDEV_REGISTER) {
1042 			in_dev = inetdev_init(dev);
1043 			if (!in_dev)
1044 				return notifier_from_errno(-ENOMEM);
1045 			if (dev->flags & IFF_LOOPBACK) {
1046 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1047 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1048 			}
1049 		}
1050 		goto out;
1051 	}
1052 
1053 	switch (event) {
1054 	case NETDEV_REGISTER:
1055 		printk(KERN_DEBUG "inetdev_event: bug\n");
1056 		dev->ip_ptr = NULL;
1057 		break;
1058 	case NETDEV_UP:
1059 		if (dev->mtu < 68)
1060 			break;
1061 		if (dev->flags & IFF_LOOPBACK) {
1062 			struct in_ifaddr *ifa;
1063 			if ((ifa = inet_alloc_ifa()) != NULL) {
1064 				ifa->ifa_local =
1065 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1066 				ifa->ifa_prefixlen = 8;
1067 				ifa->ifa_mask = inet_make_mask(8);
1068 				in_dev_hold(in_dev);
1069 				ifa->ifa_dev = in_dev;
1070 				ifa->ifa_scope = RT_SCOPE_HOST;
1071 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1072 				inet_insert_ifa(ifa);
1073 			}
1074 		}
1075 		ip_mc_up(in_dev);
1076 		break;
1077 	case NETDEV_DOWN:
1078 		ip_mc_down(in_dev);
1079 		break;
1080 	case NETDEV_CHANGEMTU:
1081 		if (dev->mtu >= 68)
1082 			break;
1083 		/* MTU falled under 68, disable IP */
1084 	case NETDEV_UNREGISTER:
1085 		inetdev_destroy(in_dev);
1086 		break;
1087 	case NETDEV_CHANGENAME:
1088 		/* Do not notify about label change, this event is
1089 		 * not interesting to applications using netlink.
1090 		 */
1091 		inetdev_changename(dev, in_dev);
1092 
1093 		devinet_sysctl_unregister(in_dev);
1094 		devinet_sysctl_register(in_dev);
1095 		break;
1096 	}
1097 out:
1098 	return NOTIFY_DONE;
1099 }
1100 
1101 static struct notifier_block ip_netdev_notifier = {
1102 	.notifier_call =inetdev_event,
1103 };
1104 
1105 static inline size_t inet_nlmsg_size(void)
1106 {
1107 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1108 	       + nla_total_size(4) /* IFA_ADDRESS */
1109 	       + nla_total_size(4) /* IFA_LOCAL */
1110 	       + nla_total_size(4) /* IFA_BROADCAST */
1111 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1112 }
1113 
1114 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1115 			    u32 pid, u32 seq, int event, unsigned int flags)
1116 {
1117 	struct ifaddrmsg *ifm;
1118 	struct nlmsghdr  *nlh;
1119 
1120 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1121 	if (nlh == NULL)
1122 		return -EMSGSIZE;
1123 
1124 	ifm = nlmsg_data(nlh);
1125 	ifm->ifa_family = AF_INET;
1126 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1127 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1128 	ifm->ifa_scope = ifa->ifa_scope;
1129 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1130 
1131 	if (ifa->ifa_address)
1132 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1133 
1134 	if (ifa->ifa_local)
1135 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1136 
1137 	if (ifa->ifa_broadcast)
1138 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1139 
1140 	if (ifa->ifa_label[0])
1141 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1142 
1143 	return nlmsg_end(skb, nlh);
1144 
1145 nla_put_failure:
1146 	nlmsg_cancel(skb, nlh);
1147 	return -EMSGSIZE;
1148 }
1149 
1150 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1151 {
1152 	struct net *net = sock_net(skb->sk);
1153 	int idx, ip_idx;
1154 	struct net_device *dev;
1155 	struct in_device *in_dev;
1156 	struct in_ifaddr *ifa;
1157 	int s_ip_idx, s_idx = cb->args[0];
1158 
1159 	s_ip_idx = ip_idx = cb->args[1];
1160 	idx = 0;
1161 	for_each_netdev(net, dev) {
1162 		if (idx < s_idx)
1163 			goto cont;
1164 		if (idx > s_idx)
1165 			s_ip_idx = 0;
1166 		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1167 			goto cont;
1168 
1169 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1170 		     ifa = ifa->ifa_next, ip_idx++) {
1171 			if (ip_idx < s_ip_idx)
1172 				continue;
1173 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1174 					     cb->nlh->nlmsg_seq,
1175 					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1176 				goto done;
1177 		}
1178 cont:
1179 		idx++;
1180 	}
1181 
1182 done:
1183 	cb->args[0] = idx;
1184 	cb->args[1] = ip_idx;
1185 
1186 	return skb->len;
1187 }
1188 
1189 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1190 		      u32 pid)
1191 {
1192 	struct sk_buff *skb;
1193 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1194 	int err = -ENOBUFS;
1195 	struct net *net;
1196 
1197 	net = dev_net(ifa->ifa_dev->dev);
1198 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1199 	if (skb == NULL)
1200 		goto errout;
1201 
1202 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1203 	if (err < 0) {
1204 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1205 		WARN_ON(err == -EMSGSIZE);
1206 		kfree_skb(skb);
1207 		goto errout;
1208 	}
1209 	err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1210 errout:
1211 	if (err < 0)
1212 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1213 }
1214 
1215 #ifdef CONFIG_SYSCTL
1216 
1217 static void devinet_copy_dflt_conf(struct net *net, int i)
1218 {
1219 	struct net_device *dev;
1220 
1221 	read_lock(&dev_base_lock);
1222 	for_each_netdev(net, dev) {
1223 		struct in_device *in_dev;
1224 		rcu_read_lock();
1225 		in_dev = __in_dev_get_rcu(dev);
1226 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1227 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1228 		rcu_read_unlock();
1229 	}
1230 	read_unlock(&dev_base_lock);
1231 }
1232 
1233 static void inet_forward_change(struct net *net)
1234 {
1235 	struct net_device *dev;
1236 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1237 
1238 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1239 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1240 
1241 	read_lock(&dev_base_lock);
1242 	for_each_netdev(net, dev) {
1243 		struct in_device *in_dev;
1244 		rcu_read_lock();
1245 		in_dev = __in_dev_get_rcu(dev);
1246 		if (in_dev)
1247 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1248 		rcu_read_unlock();
1249 	}
1250 	read_unlock(&dev_base_lock);
1251 
1252 	rt_cache_flush(0);
1253 }
1254 
1255 static int devinet_conf_proc(ctl_table *ctl, int write,
1256 			     struct file* filp, void __user *buffer,
1257 			     size_t *lenp, loff_t *ppos)
1258 {
1259 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1260 
1261 	if (write) {
1262 		struct ipv4_devconf *cnf = ctl->extra1;
1263 		struct net *net = ctl->extra2;
1264 		int i = (int *)ctl->data - cnf->data;
1265 
1266 		set_bit(i, cnf->state);
1267 
1268 		if (cnf == net->ipv4.devconf_dflt)
1269 			devinet_copy_dflt_conf(net, i);
1270 	}
1271 
1272 	return ret;
1273 }
1274 
1275 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1276 			       void __user *oldval, size_t __user *oldlenp,
1277 			       void __user *newval, size_t newlen)
1278 {
1279 	struct ipv4_devconf *cnf;
1280 	struct net *net;
1281 	int *valp = table->data;
1282 	int new;
1283 	int i;
1284 
1285 	if (!newval || !newlen)
1286 		return 0;
1287 
1288 	if (newlen != sizeof(int))
1289 		return -EINVAL;
1290 
1291 	if (get_user(new, (int __user *)newval))
1292 		return -EFAULT;
1293 
1294 	if (new == *valp)
1295 		return 0;
1296 
1297 	if (oldval && oldlenp) {
1298 		size_t len;
1299 
1300 		if (get_user(len, oldlenp))
1301 			return -EFAULT;
1302 
1303 		if (len) {
1304 			if (len > table->maxlen)
1305 				len = table->maxlen;
1306 			if (copy_to_user(oldval, valp, len))
1307 				return -EFAULT;
1308 			if (put_user(len, oldlenp))
1309 				return -EFAULT;
1310 		}
1311 	}
1312 
1313 	*valp = new;
1314 
1315 	cnf = table->extra1;
1316 	net = table->extra2;
1317 	i = (int *)table->data - cnf->data;
1318 
1319 	set_bit(i, cnf->state);
1320 
1321 	if (cnf == net->ipv4.devconf_dflt)
1322 		devinet_copy_dflt_conf(net, i);
1323 
1324 	return 1;
1325 }
1326 
1327 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1328 				  struct file* filp, void __user *buffer,
1329 				  size_t *lenp, loff_t *ppos)
1330 {
1331 	int *valp = ctl->data;
1332 	int val = *valp;
1333 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1334 
1335 	if (write && *valp != val) {
1336 		struct net *net = ctl->extra2;
1337 
1338 		if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1339 			inet_forward_change(net);
1340 		else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1341 			rt_cache_flush(0);
1342 	}
1343 
1344 	return ret;
1345 }
1346 
1347 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1348 			 struct file* filp, void __user *buffer,
1349 			 size_t *lenp, loff_t *ppos)
1350 {
1351 	int *valp = ctl->data;
1352 	int val = *valp;
1353 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1354 
1355 	if (write && *valp != val)
1356 		rt_cache_flush(0);
1357 
1358 	return ret;
1359 }
1360 
1361 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1362 				  void __user *oldval, size_t __user *oldlenp,
1363 				  void __user *newval, size_t newlen)
1364 {
1365 	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1366 				      newval, newlen);
1367 
1368 	if (ret == 1)
1369 		rt_cache_flush(0);
1370 
1371 	return ret;
1372 }
1373 
1374 
1375 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1376 	{ \
1377 		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1378 		.procname	= name, \
1379 		.data		= ipv4_devconf.data + \
1380 				  NET_IPV4_CONF_ ## attr - 1, \
1381 		.maxlen		= sizeof(int), \
1382 		.mode		= mval, \
1383 		.proc_handler	= proc, \
1384 		.strategy	= sysctl, \
1385 		.extra1		= &ipv4_devconf, \
1386 	}
1387 
1388 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1389 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1390 			     devinet_conf_sysctl)
1391 
1392 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1393 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1394 			     devinet_conf_sysctl)
1395 
1396 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1397 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1398 
1399 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1400 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1401 				     ipv4_doint_and_flush_strategy)
1402 
1403 static struct devinet_sysctl_table {
1404 	struct ctl_table_header *sysctl_header;
1405 	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1406 	char *dev_name;
1407 } devinet_sysctl = {
1408 	.devinet_vars = {
1409 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1410 					     devinet_sysctl_forward,
1411 					     devinet_conf_sysctl),
1412 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1413 
1414 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1415 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1416 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1417 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1418 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1419 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1420 					"accept_source_route"),
1421 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1422 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1423 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1424 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1425 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1426 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1427 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1428 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1429 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1430 
1431 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1432 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1433 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1434 					      "force_igmp_version"),
1435 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1436 					      "promote_secondaries"),
1437 	},
1438 };
1439 
1440 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1441 		int ctl_name, struct ipv4_devconf *p)
1442 {
1443 	int i;
1444 	struct devinet_sysctl_table *t;
1445 
1446 #define DEVINET_CTL_PATH_DEV	3
1447 
1448 	struct ctl_path devinet_ctl_path[] = {
1449 		{ .procname = "net", .ctl_name = CTL_NET, },
1450 		{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1451 		{ .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1452 		{ /* to be set */ },
1453 		{ },
1454 	};
1455 
1456 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1457 	if (!t)
1458 		goto out;
1459 
1460 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1461 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1462 		t->devinet_vars[i].extra1 = p;
1463 		t->devinet_vars[i].extra2 = net;
1464 	}
1465 
1466 	/*
1467 	 * Make a copy of dev_name, because '.procname' is regarded as const
1468 	 * by sysctl and we wouldn't want anyone to change it under our feet
1469 	 * (see SIOCSIFNAME).
1470 	 */
1471 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1472 	if (!t->dev_name)
1473 		goto free;
1474 
1475 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1476 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1477 
1478 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1479 			t->devinet_vars);
1480 	if (!t->sysctl_header)
1481 		goto free_procname;
1482 
1483 	p->sysctl = t;
1484 	return 0;
1485 
1486 free_procname:
1487 	kfree(t->dev_name);
1488 free:
1489 	kfree(t);
1490 out:
1491 	return -ENOBUFS;
1492 }
1493 
1494 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1495 {
1496 	struct devinet_sysctl_table *t = cnf->sysctl;
1497 
1498 	if (t == NULL)
1499 		return;
1500 
1501 	cnf->sysctl = NULL;
1502 	unregister_sysctl_table(t->sysctl_header);
1503 	kfree(t->dev_name);
1504 	kfree(t);
1505 }
1506 
1507 static void devinet_sysctl_register(struct in_device *idev)
1508 {
1509 	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1510 			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1511 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1512 			idev->dev->ifindex, &idev->cnf);
1513 }
1514 
1515 static void devinet_sysctl_unregister(struct in_device *idev)
1516 {
1517 	__devinet_sysctl_unregister(&idev->cnf);
1518 	neigh_sysctl_unregister(idev->arp_parms);
1519 }
1520 
1521 static struct ctl_table ctl_forward_entry[] = {
1522 	{
1523 		.ctl_name	= NET_IPV4_FORWARD,
1524 		.procname	= "ip_forward",
1525 		.data		= &ipv4_devconf.data[
1526 					NET_IPV4_CONF_FORWARDING - 1],
1527 		.maxlen		= sizeof(int),
1528 		.mode		= 0644,
1529 		.proc_handler	= devinet_sysctl_forward,
1530 		.strategy	= devinet_conf_sysctl,
1531 		.extra1		= &ipv4_devconf,
1532 		.extra2		= &init_net,
1533 	},
1534 	{ },
1535 };
1536 
1537 static __net_initdata struct ctl_path net_ipv4_path[] = {
1538 	{ .procname = "net", .ctl_name = CTL_NET, },
1539 	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1540 	{ },
1541 };
1542 #endif
1543 
1544 static __net_init int devinet_init_net(struct net *net)
1545 {
1546 	int err;
1547 	struct ipv4_devconf *all, *dflt;
1548 #ifdef CONFIG_SYSCTL
1549 	struct ctl_table *tbl = ctl_forward_entry;
1550 	struct ctl_table_header *forw_hdr;
1551 #endif
1552 
1553 	err = -ENOMEM;
1554 	all = &ipv4_devconf;
1555 	dflt = &ipv4_devconf_dflt;
1556 
1557 	if (net != &init_net) {
1558 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1559 		if (all == NULL)
1560 			goto err_alloc_all;
1561 
1562 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1563 		if (dflt == NULL)
1564 			goto err_alloc_dflt;
1565 
1566 #ifdef CONFIG_SYSCTL
1567 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1568 		if (tbl == NULL)
1569 			goto err_alloc_ctl;
1570 
1571 		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1572 		tbl[0].extra1 = all;
1573 		tbl[0].extra2 = net;
1574 #endif
1575 	}
1576 
1577 #ifdef CONFIG_SYSCTL
1578 	err = __devinet_sysctl_register(net, "all",
1579 			NET_PROTO_CONF_ALL, all);
1580 	if (err < 0)
1581 		goto err_reg_all;
1582 
1583 	err = __devinet_sysctl_register(net, "default",
1584 			NET_PROTO_CONF_DEFAULT, dflt);
1585 	if (err < 0)
1586 		goto err_reg_dflt;
1587 
1588 	err = -ENOMEM;
1589 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1590 	if (forw_hdr == NULL)
1591 		goto err_reg_ctl;
1592 	net->ipv4.forw_hdr = forw_hdr;
1593 #endif
1594 
1595 	net->ipv4.devconf_all = all;
1596 	net->ipv4.devconf_dflt = dflt;
1597 	return 0;
1598 
1599 #ifdef CONFIG_SYSCTL
1600 err_reg_ctl:
1601 	__devinet_sysctl_unregister(dflt);
1602 err_reg_dflt:
1603 	__devinet_sysctl_unregister(all);
1604 err_reg_all:
1605 	if (tbl != ctl_forward_entry)
1606 		kfree(tbl);
1607 err_alloc_ctl:
1608 #endif
1609 	if (dflt != &ipv4_devconf_dflt)
1610 		kfree(dflt);
1611 err_alloc_dflt:
1612 	if (all != &ipv4_devconf)
1613 		kfree(all);
1614 err_alloc_all:
1615 	return err;
1616 }
1617 
1618 static __net_exit void devinet_exit_net(struct net *net)
1619 {
1620 #ifdef CONFIG_SYSCTL
1621 	struct ctl_table *tbl;
1622 
1623 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1624 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1625 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1626 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1627 	kfree(tbl);
1628 #endif
1629 	kfree(net->ipv4.devconf_dflt);
1630 	kfree(net->ipv4.devconf_all);
1631 }
1632 
1633 static __net_initdata struct pernet_operations devinet_ops = {
1634 	.init = devinet_init_net,
1635 	.exit = devinet_exit_net,
1636 };
1637 
1638 void __init devinet_init(void)
1639 {
1640 	register_pernet_subsys(&devinet_ops);
1641 
1642 	register_gifconf(PF_INET, inet_gifconf);
1643 	register_netdevice_notifier(&ip_netdev_notifier);
1644 
1645 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1646 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1647 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1648 }
1649 
1650 EXPORT_SYMBOL(in_dev_finish_destroy);
1651 EXPORT_SYMBOL(inet_select_addr);
1652 EXPORT_SYMBOL(inetdev_by_index);
1653 EXPORT_SYMBOL(register_inetaddr_notifier);
1654 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1655