xref: /openbmc/linux/net/ipv4/devinet.c (revision b2315372eac9cd9f622c32a93e323cf6f0f03462)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  *	Derived from the IP parts of dev.c 1.0.19
12  * 		Authors:	Ross Biro
13  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *	Additional Authors:
17  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *	Changes:
21  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
22  *					lists.
23  *		Cyrus Durgin:		updated for kmod
24  *		Matthias Andree:	in devinet_ioctl, compare label and
25  *					address (4.4BSD alias style support),
26  *					fall back to comparing just the label
27  *					if no match found.
28  */
29 
30 
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 
66 struct ipv4_devconf ipv4_devconf = {
67 	.data = {
68 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
69 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
70 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
71 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
72 	},
73 };
74 
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76 	.data = {
77 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
78 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
79 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
80 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
81 		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82 	},
83 };
84 
85 #define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
86 
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88 	[IFA_LOCAL]     	= { .type = NLA_U32 },
89 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
90 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
91 	[IFA_ANYCAST]   	= { .type = NLA_U32 },
92 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93 };
94 
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96 
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99 			 int destroy);
100 #ifdef CONFIG_SYSCTL
101 static void devinet_sysctl_register(struct in_device *in_dev,
102 				    struct ipv4_devconf *p);
103 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
104 #endif
105 
106 /* Locks all the inet devices. */
107 
108 static struct in_ifaddr *inet_alloc_ifa(void)
109 {
110 	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
111 
112 	if (ifa) {
113 		INIT_RCU_HEAD(&ifa->rcu_head);
114 	}
115 
116 	return ifa;
117 }
118 
119 static void inet_rcu_free_ifa(struct rcu_head *head)
120 {
121 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122 	if (ifa->ifa_dev)
123 		in_dev_put(ifa->ifa_dev);
124 	kfree(ifa);
125 }
126 
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
128 {
129 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130 }
131 
132 void in_dev_finish_destroy(struct in_device *idev)
133 {
134 	struct net_device *dev = idev->dev;
135 
136 	BUG_TRAP(!idev->ifa_list);
137 	BUG_TRAP(!idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140 	       idev, dev ? dev->name : "NIL");
141 #endif
142 	dev_put(dev);
143 	if (!idev->dead)
144 		printk("Freeing alive in_device %p\n", idev);
145 	else {
146 		kfree(idev);
147 	}
148 }
149 
150 static struct in_device *inetdev_init(struct net_device *dev)
151 {
152 	struct in_device *in_dev;
153 
154 	ASSERT_RTNL();
155 
156 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157 	if (!in_dev)
158 		goto out;
159 	INIT_RCU_HEAD(&in_dev->rcu_head);
160 	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
161 	in_dev->cnf.sysctl = NULL;
162 	in_dev->dev = dev;
163 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
164 		goto out_kfree;
165 	/* Reference in_dev->dev */
166 	dev_hold(dev);
167 #ifdef CONFIG_SYSCTL
168 	neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
169 			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
170 #endif
171 
172 	/* Account for reference dev->ip_ptr (below) */
173 	in_dev_hold(in_dev);
174 
175 #ifdef CONFIG_SYSCTL
176 	devinet_sysctl_register(in_dev, &in_dev->cnf);
177 #endif
178 	ip_mc_init_dev(in_dev);
179 	if (dev->flags & IFF_UP)
180 		ip_mc_up(in_dev);
181 
182 	/* we can receive as soon as ip_ptr is set -- do this last */
183 	rcu_assign_pointer(dev->ip_ptr, in_dev);
184 out:
185 	return in_dev;
186 out_kfree:
187 	kfree(in_dev);
188 	in_dev = NULL;
189 	goto out;
190 }
191 
192 static void in_dev_rcu_put(struct rcu_head *head)
193 {
194 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
195 	in_dev_put(idev);
196 }
197 
198 static void inetdev_destroy(struct in_device *in_dev)
199 {
200 	struct in_ifaddr *ifa;
201 	struct net_device *dev;
202 
203 	ASSERT_RTNL();
204 
205 	dev = in_dev->dev;
206 	if (dev == &loopback_dev)
207 		return;
208 
209 	in_dev->dead = 1;
210 
211 	ip_mc_destroy_dev(in_dev);
212 
213 	while ((ifa = in_dev->ifa_list) != NULL) {
214 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
215 		inet_free_ifa(ifa);
216 	}
217 
218 #ifdef CONFIG_SYSCTL
219 	devinet_sysctl_unregister(&in_dev->cnf);
220 #endif
221 
222 	dev->ip_ptr = NULL;
223 
224 #ifdef CONFIG_SYSCTL
225 	neigh_sysctl_unregister(in_dev->arp_parms);
226 #endif
227 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
228 	arp_ifdown(dev);
229 
230 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
231 }
232 
233 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
234 {
235 	rcu_read_lock();
236 	for_primary_ifa(in_dev) {
237 		if (inet_ifa_match(a, ifa)) {
238 			if (!b || inet_ifa_match(b, ifa)) {
239 				rcu_read_unlock();
240 				return 1;
241 			}
242 		}
243 	} endfor_ifa(in_dev);
244 	rcu_read_unlock();
245 	return 0;
246 }
247 
248 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
249 			 int destroy, struct nlmsghdr *nlh, u32 pid)
250 {
251 	struct in_ifaddr *promote = NULL;
252 	struct in_ifaddr *ifa, *ifa1 = *ifap;
253 	struct in_ifaddr *last_prim = in_dev->ifa_list;
254 	struct in_ifaddr *prev_prom = NULL;
255 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
256 
257 	ASSERT_RTNL();
258 
259 	/* 1. Deleting primary ifaddr forces deletion all secondaries
260 	 * unless alias promotion is set
261 	 **/
262 
263 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
264 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
265 
266 		while ((ifa = *ifap1) != NULL) {
267 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
268 			    ifa1->ifa_scope <= ifa->ifa_scope)
269 				last_prim = ifa;
270 
271 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
272 			    ifa1->ifa_mask != ifa->ifa_mask ||
273 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
274 				ifap1 = &ifa->ifa_next;
275 				prev_prom = ifa;
276 				continue;
277 			}
278 
279 			if (!do_promote) {
280 				*ifap1 = ifa->ifa_next;
281 
282 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
283 				blocking_notifier_call_chain(&inetaddr_chain,
284 						NETDEV_DOWN, ifa);
285 				inet_free_ifa(ifa);
286 			} else {
287 				promote = ifa;
288 				break;
289 			}
290 		}
291 	}
292 
293 	/* 2. Unlink it */
294 
295 	*ifap = ifa1->ifa_next;
296 
297 	/* 3. Announce address deletion */
298 
299 	/* Send message first, then call notifier.
300 	   At first sight, FIB update triggered by notifier
301 	   will refer to already deleted ifaddr, that could confuse
302 	   netlink listeners. It is not true: look, gated sees
303 	   that route deleted and if it still thinks that ifaddr
304 	   is valid, it will try to restore deleted routes... Grr.
305 	   So that, this order is correct.
306 	 */
307 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
308 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
309 
310 	if (promote) {
311 
312 		if (prev_prom) {
313 			prev_prom->ifa_next = promote->ifa_next;
314 			promote->ifa_next = last_prim->ifa_next;
315 			last_prim->ifa_next = promote;
316 		}
317 
318 		promote->ifa_flags &= ~IFA_F_SECONDARY;
319 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
320 		blocking_notifier_call_chain(&inetaddr_chain,
321 				NETDEV_UP, promote);
322 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
323 			if (ifa1->ifa_mask != ifa->ifa_mask ||
324 			    !inet_ifa_match(ifa1->ifa_address, ifa))
325 					continue;
326 			fib_add_ifaddr(ifa);
327 		}
328 
329 	}
330 	if (destroy)
331 		inet_free_ifa(ifa1);
332 }
333 
334 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
335 			 int destroy)
336 {
337 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
338 }
339 
340 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
341 			     u32 pid)
342 {
343 	struct in_device *in_dev = ifa->ifa_dev;
344 	struct in_ifaddr *ifa1, **ifap, **last_primary;
345 
346 	ASSERT_RTNL();
347 
348 	if (!ifa->ifa_local) {
349 		inet_free_ifa(ifa);
350 		return 0;
351 	}
352 
353 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
354 	last_primary = &in_dev->ifa_list;
355 
356 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
357 	     ifap = &ifa1->ifa_next) {
358 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
359 		    ifa->ifa_scope <= ifa1->ifa_scope)
360 			last_primary = &ifa1->ifa_next;
361 		if (ifa1->ifa_mask == ifa->ifa_mask &&
362 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
363 			if (ifa1->ifa_local == ifa->ifa_local) {
364 				inet_free_ifa(ifa);
365 				return -EEXIST;
366 			}
367 			if (ifa1->ifa_scope != ifa->ifa_scope) {
368 				inet_free_ifa(ifa);
369 				return -EINVAL;
370 			}
371 			ifa->ifa_flags |= IFA_F_SECONDARY;
372 		}
373 	}
374 
375 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
376 		net_srandom(ifa->ifa_local);
377 		ifap = last_primary;
378 	}
379 
380 	ifa->ifa_next = *ifap;
381 	*ifap = ifa;
382 
383 	/* Send message first, then call notifier.
384 	   Notifier will trigger FIB update, so that
385 	   listeners of netlink will know about new ifaddr */
386 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
387 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
388 
389 	return 0;
390 }
391 
392 static int inet_insert_ifa(struct in_ifaddr *ifa)
393 {
394 	return __inet_insert_ifa(ifa, NULL, 0);
395 }
396 
397 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
398 {
399 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
400 
401 	ASSERT_RTNL();
402 
403 	if (!in_dev) {
404 		inet_free_ifa(ifa);
405 		return -ENOBUFS;
406 	}
407 	ipv4_devconf_setall(in_dev);
408 	if (ifa->ifa_dev != in_dev) {
409 		BUG_TRAP(!ifa->ifa_dev);
410 		in_dev_hold(in_dev);
411 		ifa->ifa_dev = in_dev;
412 	}
413 	if (LOOPBACK(ifa->ifa_local))
414 		ifa->ifa_scope = RT_SCOPE_HOST;
415 	return inet_insert_ifa(ifa);
416 }
417 
418 struct in_device *inetdev_by_index(int ifindex)
419 {
420 	struct net_device *dev;
421 	struct in_device *in_dev = NULL;
422 	read_lock(&dev_base_lock);
423 	dev = __dev_get_by_index(ifindex);
424 	if (dev)
425 		in_dev = in_dev_get(dev);
426 	read_unlock(&dev_base_lock);
427 	return in_dev;
428 }
429 
430 /* Called only from RTNL semaphored context. No locks. */
431 
432 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
433 				    __be32 mask)
434 {
435 	ASSERT_RTNL();
436 
437 	for_primary_ifa(in_dev) {
438 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
439 			return ifa;
440 	} endfor_ifa(in_dev);
441 	return NULL;
442 }
443 
444 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
445 {
446 	struct nlattr *tb[IFA_MAX+1];
447 	struct in_device *in_dev;
448 	struct ifaddrmsg *ifm;
449 	struct in_ifaddr *ifa, **ifap;
450 	int err = -EINVAL;
451 
452 	ASSERT_RTNL();
453 
454 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
455 	if (err < 0)
456 		goto errout;
457 
458 	ifm = nlmsg_data(nlh);
459 	in_dev = inetdev_by_index(ifm->ifa_index);
460 	if (in_dev == NULL) {
461 		err = -ENODEV;
462 		goto errout;
463 	}
464 
465 	__in_dev_put(in_dev);
466 
467 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
468 	     ifap = &ifa->ifa_next) {
469 		if (tb[IFA_LOCAL] &&
470 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
471 			continue;
472 
473 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
474 			continue;
475 
476 		if (tb[IFA_ADDRESS] &&
477 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
478 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
479 			continue;
480 
481 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
482 		return 0;
483 	}
484 
485 	err = -EADDRNOTAVAIL;
486 errout:
487 	return err;
488 }
489 
490 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
491 {
492 	struct nlattr *tb[IFA_MAX+1];
493 	struct in_ifaddr *ifa;
494 	struct ifaddrmsg *ifm;
495 	struct net_device *dev;
496 	struct in_device *in_dev;
497 	int err = -EINVAL;
498 
499 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
500 	if (err < 0)
501 		goto errout;
502 
503 	ifm = nlmsg_data(nlh);
504 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
505 		err = -EINVAL;
506 		goto errout;
507 	}
508 
509 	dev = __dev_get_by_index(ifm->ifa_index);
510 	if (dev == NULL) {
511 		err = -ENODEV;
512 		goto errout;
513 	}
514 
515 	in_dev = __in_dev_get_rtnl(dev);
516 	if (in_dev == NULL) {
517 		err = -ENOBUFS;
518 		goto errout;
519 	}
520 
521 	ipv4_devconf_setall(in_dev);
522 
523 	ifa = inet_alloc_ifa();
524 	if (ifa == NULL) {
525 		/*
526 		 * A potential indev allocation can be left alive, it stays
527 		 * assigned to its device and is destroy with it.
528 		 */
529 		err = -ENOBUFS;
530 		goto errout;
531 	}
532 
533 	in_dev_hold(in_dev);
534 
535 	if (tb[IFA_ADDRESS] == NULL)
536 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
537 
538 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
539 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
540 	ifa->ifa_flags = ifm->ifa_flags;
541 	ifa->ifa_scope = ifm->ifa_scope;
542 	ifa->ifa_dev = in_dev;
543 
544 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
545 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
546 
547 	if (tb[IFA_BROADCAST])
548 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
549 
550 	if (tb[IFA_ANYCAST])
551 		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
552 
553 	if (tb[IFA_LABEL])
554 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
555 	else
556 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
557 
558 	return ifa;
559 
560 errout:
561 	return ERR_PTR(err);
562 }
563 
564 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
565 {
566 	struct in_ifaddr *ifa;
567 
568 	ASSERT_RTNL();
569 
570 	ifa = rtm_to_ifaddr(nlh);
571 	if (IS_ERR(ifa))
572 		return PTR_ERR(ifa);
573 
574 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
575 }
576 
577 /*
578  *	Determine a default network mask, based on the IP address.
579  */
580 
581 static __inline__ int inet_abc_len(__be32 addr)
582 {
583 	int rc = -1;	/* Something else, probably a multicast. */
584 
585 	if (ZERONET(addr))
586 		rc = 0;
587 	else {
588 		__u32 haddr = ntohl(addr);
589 
590 		if (IN_CLASSA(haddr))
591 			rc = 8;
592 		else if (IN_CLASSB(haddr))
593 			rc = 16;
594 		else if (IN_CLASSC(haddr))
595 			rc = 24;
596 	}
597 
598 	return rc;
599 }
600 
601 
602 int devinet_ioctl(unsigned int cmd, void __user *arg)
603 {
604 	struct ifreq ifr;
605 	struct sockaddr_in sin_orig;
606 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
607 	struct in_device *in_dev;
608 	struct in_ifaddr **ifap = NULL;
609 	struct in_ifaddr *ifa = NULL;
610 	struct net_device *dev;
611 	char *colon;
612 	int ret = -EFAULT;
613 	int tryaddrmatch = 0;
614 
615 	/*
616 	 *	Fetch the caller's info block into kernel space
617 	 */
618 
619 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
620 		goto out;
621 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
622 
623 	/* save original address for comparison */
624 	memcpy(&sin_orig, sin, sizeof(*sin));
625 
626 	colon = strchr(ifr.ifr_name, ':');
627 	if (colon)
628 		*colon = 0;
629 
630 #ifdef CONFIG_KMOD
631 	dev_load(ifr.ifr_name);
632 #endif
633 
634 	switch (cmd) {
635 	case SIOCGIFADDR:	/* Get interface address */
636 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
637 	case SIOCGIFDSTADDR:	/* Get the destination address */
638 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
639 		/* Note that these ioctls will not sleep,
640 		   so that we do not impose a lock.
641 		   One day we will be forced to put shlock here (I mean SMP)
642 		 */
643 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
644 		memset(sin, 0, sizeof(*sin));
645 		sin->sin_family = AF_INET;
646 		break;
647 
648 	case SIOCSIFFLAGS:
649 		ret = -EACCES;
650 		if (!capable(CAP_NET_ADMIN))
651 			goto out;
652 		break;
653 	case SIOCSIFADDR:	/* Set interface address (and family) */
654 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
655 	case SIOCSIFDSTADDR:	/* Set the destination address */
656 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
657 		ret = -EACCES;
658 		if (!capable(CAP_NET_ADMIN))
659 			goto out;
660 		ret = -EINVAL;
661 		if (sin->sin_family != AF_INET)
662 			goto out;
663 		break;
664 	default:
665 		ret = -EINVAL;
666 		goto out;
667 	}
668 
669 	rtnl_lock();
670 
671 	ret = -ENODEV;
672 	if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
673 		goto done;
674 
675 	if (colon)
676 		*colon = ':';
677 
678 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
679 		if (tryaddrmatch) {
680 			/* Matthias Andree */
681 			/* compare label and address (4.4BSD style) */
682 			/* note: we only do this for a limited set of ioctls
683 			   and only if the original address family was AF_INET.
684 			   This is checked above. */
685 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
686 			     ifap = &ifa->ifa_next) {
687 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
688 				    sin_orig.sin_addr.s_addr ==
689 							ifa->ifa_address) {
690 					break; /* found */
691 				}
692 			}
693 		}
694 		/* we didn't get a match, maybe the application is
695 		   4.3BSD-style and passed in junk so we fall back to
696 		   comparing just the label */
697 		if (!ifa) {
698 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
699 			     ifap = &ifa->ifa_next)
700 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
701 					break;
702 		}
703 	}
704 
705 	ret = -EADDRNOTAVAIL;
706 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
707 		goto done;
708 
709 	switch (cmd) {
710 	case SIOCGIFADDR:	/* Get interface address */
711 		sin->sin_addr.s_addr = ifa->ifa_local;
712 		goto rarok;
713 
714 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
715 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
716 		goto rarok;
717 
718 	case SIOCGIFDSTADDR:	/* Get the destination address */
719 		sin->sin_addr.s_addr = ifa->ifa_address;
720 		goto rarok;
721 
722 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
723 		sin->sin_addr.s_addr = ifa->ifa_mask;
724 		goto rarok;
725 
726 	case SIOCSIFFLAGS:
727 		if (colon) {
728 			ret = -EADDRNOTAVAIL;
729 			if (!ifa)
730 				break;
731 			ret = 0;
732 			if (!(ifr.ifr_flags & IFF_UP))
733 				inet_del_ifa(in_dev, ifap, 1);
734 			break;
735 		}
736 		ret = dev_change_flags(dev, ifr.ifr_flags);
737 		break;
738 
739 	case SIOCSIFADDR:	/* Set interface address (and family) */
740 		ret = -EINVAL;
741 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
742 			break;
743 
744 		if (!ifa) {
745 			ret = -ENOBUFS;
746 			if ((ifa = inet_alloc_ifa()) == NULL)
747 				break;
748 			if (colon)
749 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
750 			else
751 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
752 		} else {
753 			ret = 0;
754 			if (ifa->ifa_local == sin->sin_addr.s_addr)
755 				break;
756 			inet_del_ifa(in_dev, ifap, 0);
757 			ifa->ifa_broadcast = 0;
758 			ifa->ifa_anycast = 0;
759 		}
760 
761 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
762 
763 		if (!(dev->flags & IFF_POINTOPOINT)) {
764 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
765 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
766 			if ((dev->flags & IFF_BROADCAST) &&
767 			    ifa->ifa_prefixlen < 31)
768 				ifa->ifa_broadcast = ifa->ifa_address |
769 						     ~ifa->ifa_mask;
770 		} else {
771 			ifa->ifa_prefixlen = 32;
772 			ifa->ifa_mask = inet_make_mask(32);
773 		}
774 		ret = inet_set_ifa(dev, ifa);
775 		break;
776 
777 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
778 		ret = 0;
779 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
780 			inet_del_ifa(in_dev, ifap, 0);
781 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
782 			inet_insert_ifa(ifa);
783 		}
784 		break;
785 
786 	case SIOCSIFDSTADDR:	/* Set the destination address */
787 		ret = 0;
788 		if (ifa->ifa_address == sin->sin_addr.s_addr)
789 			break;
790 		ret = -EINVAL;
791 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
792 			break;
793 		ret = 0;
794 		inet_del_ifa(in_dev, ifap, 0);
795 		ifa->ifa_address = sin->sin_addr.s_addr;
796 		inet_insert_ifa(ifa);
797 		break;
798 
799 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
800 
801 		/*
802 		 *	The mask we set must be legal.
803 		 */
804 		ret = -EINVAL;
805 		if (bad_mask(sin->sin_addr.s_addr, 0))
806 			break;
807 		ret = 0;
808 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
809 			__be32 old_mask = ifa->ifa_mask;
810 			inet_del_ifa(in_dev, ifap, 0);
811 			ifa->ifa_mask = sin->sin_addr.s_addr;
812 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
813 
814 			/* See if current broadcast address matches
815 			 * with current netmask, then recalculate
816 			 * the broadcast address. Otherwise it's a
817 			 * funny address, so don't touch it since
818 			 * the user seems to know what (s)he's doing...
819 			 */
820 			if ((dev->flags & IFF_BROADCAST) &&
821 			    (ifa->ifa_prefixlen < 31) &&
822 			    (ifa->ifa_broadcast ==
823 			     (ifa->ifa_local|~old_mask))) {
824 				ifa->ifa_broadcast = (ifa->ifa_local |
825 						      ~sin->sin_addr.s_addr);
826 			}
827 			inet_insert_ifa(ifa);
828 		}
829 		break;
830 	}
831 done:
832 	rtnl_unlock();
833 out:
834 	return ret;
835 rarok:
836 	rtnl_unlock();
837 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
838 	goto out;
839 }
840 
841 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
842 {
843 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
844 	struct in_ifaddr *ifa;
845 	struct ifreq ifr;
846 	int done = 0;
847 
848 	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
849 		goto out;
850 
851 	for (; ifa; ifa = ifa->ifa_next) {
852 		if (!buf) {
853 			done += sizeof(ifr);
854 			continue;
855 		}
856 		if (len < (int) sizeof(ifr))
857 			break;
858 		memset(&ifr, 0, sizeof(struct ifreq));
859 		if (ifa->ifa_label)
860 			strcpy(ifr.ifr_name, ifa->ifa_label);
861 		else
862 			strcpy(ifr.ifr_name, dev->name);
863 
864 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
865 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
866 								ifa->ifa_local;
867 
868 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
869 			done = -EFAULT;
870 			break;
871 		}
872 		buf  += sizeof(struct ifreq);
873 		len  -= sizeof(struct ifreq);
874 		done += sizeof(struct ifreq);
875 	}
876 out:
877 	return done;
878 }
879 
880 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
881 {
882 	__be32 addr = 0;
883 	struct in_device *in_dev;
884 
885 	rcu_read_lock();
886 	in_dev = __in_dev_get_rcu(dev);
887 	if (!in_dev)
888 		goto no_in_dev;
889 
890 	for_primary_ifa(in_dev) {
891 		if (ifa->ifa_scope > scope)
892 			continue;
893 		if (!dst || inet_ifa_match(dst, ifa)) {
894 			addr = ifa->ifa_local;
895 			break;
896 		}
897 		if (!addr)
898 			addr = ifa->ifa_local;
899 	} endfor_ifa(in_dev);
900 no_in_dev:
901 	rcu_read_unlock();
902 
903 	if (addr)
904 		goto out;
905 
906 	/* Not loopback addresses on loopback should be preferred
907 	   in this case. It is importnat that lo is the first interface
908 	   in dev_base list.
909 	 */
910 	read_lock(&dev_base_lock);
911 	rcu_read_lock();
912 	for_each_netdev(dev) {
913 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
914 			continue;
915 
916 		for_primary_ifa(in_dev) {
917 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
918 			    ifa->ifa_scope <= scope) {
919 				addr = ifa->ifa_local;
920 				goto out_unlock_both;
921 			}
922 		} endfor_ifa(in_dev);
923 	}
924 out_unlock_both:
925 	read_unlock(&dev_base_lock);
926 	rcu_read_unlock();
927 out:
928 	return addr;
929 }
930 
931 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
932 			      __be32 local, int scope)
933 {
934 	int same = 0;
935 	__be32 addr = 0;
936 
937 	for_ifa(in_dev) {
938 		if (!addr &&
939 		    (local == ifa->ifa_local || !local) &&
940 		    ifa->ifa_scope <= scope) {
941 			addr = ifa->ifa_local;
942 			if (same)
943 				break;
944 		}
945 		if (!same) {
946 			same = (!local || inet_ifa_match(local, ifa)) &&
947 				(!dst || inet_ifa_match(dst, ifa));
948 			if (same && addr) {
949 				if (local || !dst)
950 					break;
951 				/* Is the selected addr into dst subnet? */
952 				if (inet_ifa_match(addr, ifa))
953 					break;
954 				/* No, then can we use new local src? */
955 				if (ifa->ifa_scope <= scope) {
956 					addr = ifa->ifa_local;
957 					break;
958 				}
959 				/* search for large dst subnet for addr */
960 				same = 0;
961 			}
962 		}
963 	} endfor_ifa(in_dev);
964 
965 	return same? addr : 0;
966 }
967 
968 /*
969  * Confirm that local IP address exists using wildcards:
970  * - dev: only on this interface, 0=any interface
971  * - dst: only in the same subnet as dst, 0=any dst
972  * - local: address, 0=autoselect the local address
973  * - scope: maximum allowed scope value for the local address
974  */
975 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
976 {
977 	__be32 addr = 0;
978 	struct in_device *in_dev;
979 
980 	if (dev) {
981 		rcu_read_lock();
982 		if ((in_dev = __in_dev_get_rcu(dev)))
983 			addr = confirm_addr_indev(in_dev, dst, local, scope);
984 		rcu_read_unlock();
985 
986 		return addr;
987 	}
988 
989 	read_lock(&dev_base_lock);
990 	rcu_read_lock();
991 	for_each_netdev(dev) {
992 		if ((in_dev = __in_dev_get_rcu(dev))) {
993 			addr = confirm_addr_indev(in_dev, dst, local, scope);
994 			if (addr)
995 				break;
996 		}
997 	}
998 	rcu_read_unlock();
999 	read_unlock(&dev_base_lock);
1000 
1001 	return addr;
1002 }
1003 
1004 /*
1005  *	Device notifier
1006  */
1007 
1008 int register_inetaddr_notifier(struct notifier_block *nb)
1009 {
1010 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1011 }
1012 
1013 int unregister_inetaddr_notifier(struct notifier_block *nb)
1014 {
1015 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1016 }
1017 
1018 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1019  * alias numbering and to create unique labels if possible.
1020 */
1021 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1022 {
1023 	struct in_ifaddr *ifa;
1024 	int named = 0;
1025 
1026 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1027 		char old[IFNAMSIZ], *dot;
1028 
1029 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1030 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1031 		if (named++ == 0)
1032 			continue;
1033 		dot = strchr(ifa->ifa_label, ':');
1034 		if (dot == NULL) {
1035 			sprintf(old, ":%d", named);
1036 			dot = old;
1037 		}
1038 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1039 			strcat(ifa->ifa_label, dot);
1040 		} else {
1041 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1042 		}
1043 	}
1044 }
1045 
1046 /* Called only under RTNL semaphore */
1047 
1048 static int inetdev_event(struct notifier_block *this, unsigned long event,
1049 			 void *ptr)
1050 {
1051 	struct net_device *dev = ptr;
1052 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1053 
1054 	ASSERT_RTNL();
1055 
1056 	if (!in_dev) {
1057 		if (event == NETDEV_REGISTER) {
1058 			in_dev = inetdev_init(dev);
1059 			if (!in_dev)
1060 				return notifier_from_errno(-ENOMEM);
1061 			if (dev == &loopback_dev) {
1062 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1063 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1064 			}
1065 		}
1066 		goto out;
1067 	}
1068 
1069 	switch (event) {
1070 	case NETDEV_REGISTER:
1071 		printk(KERN_DEBUG "inetdev_event: bug\n");
1072 		dev->ip_ptr = NULL;
1073 		break;
1074 	case NETDEV_UP:
1075 		if (dev->mtu < 68)
1076 			break;
1077 		if (dev == &loopback_dev) {
1078 			struct in_ifaddr *ifa;
1079 			if ((ifa = inet_alloc_ifa()) != NULL) {
1080 				ifa->ifa_local =
1081 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1082 				ifa->ifa_prefixlen = 8;
1083 				ifa->ifa_mask = inet_make_mask(8);
1084 				in_dev_hold(in_dev);
1085 				ifa->ifa_dev = in_dev;
1086 				ifa->ifa_scope = RT_SCOPE_HOST;
1087 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1088 				inet_insert_ifa(ifa);
1089 			}
1090 		}
1091 		ip_mc_up(in_dev);
1092 		break;
1093 	case NETDEV_DOWN:
1094 		ip_mc_down(in_dev);
1095 		break;
1096 	case NETDEV_CHANGEMTU:
1097 		if (dev->mtu >= 68)
1098 			break;
1099 		/* MTU falled under 68, disable IP */
1100 	case NETDEV_UNREGISTER:
1101 		inetdev_destroy(in_dev);
1102 		break;
1103 	case NETDEV_CHANGENAME:
1104 		/* Do not notify about label change, this event is
1105 		 * not interesting to applications using netlink.
1106 		 */
1107 		inetdev_changename(dev, in_dev);
1108 
1109 #ifdef CONFIG_SYSCTL
1110 		devinet_sysctl_unregister(&in_dev->cnf);
1111 		neigh_sysctl_unregister(in_dev->arp_parms);
1112 		neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1113 				      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1114 		devinet_sysctl_register(in_dev, &in_dev->cnf);
1115 #endif
1116 		break;
1117 	}
1118 out:
1119 	return NOTIFY_DONE;
1120 }
1121 
1122 static struct notifier_block ip_netdev_notifier = {
1123 	.notifier_call =inetdev_event,
1124 };
1125 
1126 static inline size_t inet_nlmsg_size(void)
1127 {
1128 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1129 	       + nla_total_size(4) /* IFA_ADDRESS */
1130 	       + nla_total_size(4) /* IFA_LOCAL */
1131 	       + nla_total_size(4) /* IFA_BROADCAST */
1132 	       + nla_total_size(4) /* IFA_ANYCAST */
1133 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1134 }
1135 
1136 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1137 			    u32 pid, u32 seq, int event, unsigned int flags)
1138 {
1139 	struct ifaddrmsg *ifm;
1140 	struct nlmsghdr  *nlh;
1141 
1142 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1143 	if (nlh == NULL)
1144 		return -EMSGSIZE;
1145 
1146 	ifm = nlmsg_data(nlh);
1147 	ifm->ifa_family = AF_INET;
1148 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1149 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1150 	ifm->ifa_scope = ifa->ifa_scope;
1151 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1152 
1153 	if (ifa->ifa_address)
1154 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1155 
1156 	if (ifa->ifa_local)
1157 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1158 
1159 	if (ifa->ifa_broadcast)
1160 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1161 
1162 	if (ifa->ifa_anycast)
1163 		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1164 
1165 	if (ifa->ifa_label[0])
1166 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1167 
1168 	return nlmsg_end(skb, nlh);
1169 
1170 nla_put_failure:
1171 	nlmsg_cancel(skb, nlh);
1172 	return -EMSGSIZE;
1173 }
1174 
1175 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1176 {
1177 	int idx, ip_idx;
1178 	struct net_device *dev;
1179 	struct in_device *in_dev;
1180 	struct in_ifaddr *ifa;
1181 	int s_ip_idx, s_idx = cb->args[0];
1182 
1183 	s_ip_idx = ip_idx = cb->args[1];
1184 	idx = 0;
1185 	for_each_netdev(dev) {
1186 		if (idx < s_idx)
1187 			goto cont;
1188 		if (idx > s_idx)
1189 			s_ip_idx = 0;
1190 		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1191 			goto cont;
1192 
1193 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1194 		     ifa = ifa->ifa_next, ip_idx++) {
1195 			if (ip_idx < s_ip_idx)
1196 				continue;
1197 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1198 					     cb->nlh->nlmsg_seq,
1199 					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1200 				goto done;
1201 		}
1202 cont:
1203 		idx++;
1204 	}
1205 
1206 done:
1207 	cb->args[0] = idx;
1208 	cb->args[1] = ip_idx;
1209 
1210 	return skb->len;
1211 }
1212 
1213 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1214 		      u32 pid)
1215 {
1216 	struct sk_buff *skb;
1217 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1218 	int err = -ENOBUFS;
1219 
1220 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1221 	if (skb == NULL)
1222 		goto errout;
1223 
1224 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1225 	if (err < 0) {
1226 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1227 		WARN_ON(err == -EMSGSIZE);
1228 		kfree_skb(skb);
1229 		goto errout;
1230 	}
1231 	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1232 errout:
1233 	if (err < 0)
1234 		rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1235 }
1236 
1237 #ifdef CONFIG_SYSCTL
1238 
1239 static void devinet_copy_dflt_conf(int i)
1240 {
1241 	struct net_device *dev;
1242 
1243 	read_lock(&dev_base_lock);
1244 	for_each_netdev(dev) {
1245 		struct in_device *in_dev;
1246 		rcu_read_lock();
1247 		in_dev = __in_dev_get_rcu(dev);
1248 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1249 			in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
1250 		rcu_read_unlock();
1251 	}
1252 	read_unlock(&dev_base_lock);
1253 }
1254 
1255 static int devinet_conf_proc(ctl_table *ctl, int write,
1256 			     struct file* filp, void __user *buffer,
1257 			     size_t *lenp, loff_t *ppos)
1258 {
1259 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1260 
1261 	if (write) {
1262 		struct ipv4_devconf *cnf = ctl->extra1;
1263 		int i = (int *)ctl->data - cnf->data;
1264 
1265 		set_bit(i, cnf->state);
1266 
1267 		if (cnf == &ipv4_devconf_dflt)
1268 			devinet_copy_dflt_conf(i);
1269 	}
1270 
1271 	return ret;
1272 }
1273 
1274 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1275 			       void __user *oldval, size_t __user *oldlenp,
1276 			       void __user *newval, size_t newlen)
1277 {
1278 	struct ipv4_devconf *cnf;
1279 	int *valp = table->data;
1280 	int new;
1281 	int i;
1282 
1283 	if (!newval || !newlen)
1284 		return 0;
1285 
1286 	if (newlen != sizeof(int))
1287 		return -EINVAL;
1288 
1289 	if (get_user(new, (int __user *)newval))
1290 		return -EFAULT;
1291 
1292 	if (new == *valp)
1293 		return 0;
1294 
1295 	if (oldval && oldlenp) {
1296 		size_t len;
1297 
1298 		if (get_user(len, oldlenp))
1299 			return -EFAULT;
1300 
1301 		if (len) {
1302 			if (len > table->maxlen)
1303 				len = table->maxlen;
1304 			if (copy_to_user(oldval, valp, len))
1305 				return -EFAULT;
1306 			if (put_user(len, oldlenp))
1307 				return -EFAULT;
1308 		}
1309 	}
1310 
1311 	*valp = new;
1312 
1313 	cnf = table->extra1;
1314 	i = (int *)table->data - cnf->data;
1315 
1316 	set_bit(i, cnf->state);
1317 
1318 	if (cnf == &ipv4_devconf_dflt)
1319 		devinet_copy_dflt_conf(i);
1320 
1321 	return 1;
1322 }
1323 
1324 void inet_forward_change(void)
1325 {
1326 	struct net_device *dev;
1327 	int on = IPV4_DEVCONF_ALL(FORWARDING);
1328 
1329 	IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
1330 	IPV4_DEVCONF_DFLT(FORWARDING) = on;
1331 
1332 	read_lock(&dev_base_lock);
1333 	for_each_netdev(dev) {
1334 		struct in_device *in_dev;
1335 		rcu_read_lock();
1336 		in_dev = __in_dev_get_rcu(dev);
1337 		if (in_dev)
1338 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1339 		rcu_read_unlock();
1340 	}
1341 	read_unlock(&dev_base_lock);
1342 
1343 	rt_cache_flush(0);
1344 }
1345 
1346 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1347 				  struct file* filp, void __user *buffer,
1348 				  size_t *lenp, loff_t *ppos)
1349 {
1350 	int *valp = ctl->data;
1351 	int val = *valp;
1352 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1353 
1354 	if (write && *valp != val) {
1355 		if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
1356 			inet_forward_change();
1357 		else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
1358 			rt_cache_flush(0);
1359 	}
1360 
1361 	return ret;
1362 }
1363 
1364 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1365 			 struct file* filp, void __user *buffer,
1366 			 size_t *lenp, loff_t *ppos)
1367 {
1368 	int *valp = ctl->data;
1369 	int val = *valp;
1370 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1371 
1372 	if (write && *valp != val)
1373 		rt_cache_flush(0);
1374 
1375 	return ret;
1376 }
1377 
1378 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1379 				  void __user *oldval, size_t __user *oldlenp,
1380 				  void __user *newval, size_t newlen)
1381 {
1382 	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1383 				      newval, newlen);
1384 
1385 	if (ret == 1)
1386 		rt_cache_flush(0);
1387 
1388 	return ret;
1389 }
1390 
1391 
1392 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1393 	{ \
1394 		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1395 		.procname	= name, \
1396 		.data		= ipv4_devconf.data + \
1397 				  NET_IPV4_CONF_ ## attr - 1, \
1398 		.maxlen		= sizeof(int), \
1399 		.mode		= mval, \
1400 		.proc_handler	= proc, \
1401 		.strategy	= sysctl, \
1402 		.extra1		= &ipv4_devconf, \
1403 	}
1404 
1405 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1406 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1407 			     devinet_conf_sysctl)
1408 
1409 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1410 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1411 			     devinet_conf_sysctl)
1412 
1413 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1414 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1415 
1416 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1417 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1418 				     ipv4_doint_and_flush_strategy)
1419 
1420 static struct devinet_sysctl_table {
1421 	struct ctl_table_header *sysctl_header;
1422 	ctl_table		devinet_vars[__NET_IPV4_CONF_MAX];
1423 	ctl_table		devinet_dev[2];
1424 	ctl_table		devinet_conf_dir[2];
1425 	ctl_table		devinet_proto_dir[2];
1426 	ctl_table		devinet_root_dir[2];
1427 } devinet_sysctl = {
1428 	.devinet_vars = {
1429 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1430 					     devinet_sysctl_forward,
1431 					     devinet_conf_sysctl),
1432 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1433 
1434 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1435 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1436 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1437 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1438 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1439 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1440 					"accept_source_route"),
1441 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1442 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1443 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1444 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1445 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1446 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1447 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1448 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1449 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1450 
1451 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1452 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1453 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1454 					      "force_igmp_version"),
1455 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1456 					      "promote_secondaries"),
1457 	},
1458 	.devinet_dev = {
1459 		{
1460 			.ctl_name	= NET_PROTO_CONF_ALL,
1461 			.procname	= "all",
1462 			.mode		= 0555,
1463 			.child		= devinet_sysctl.devinet_vars,
1464 		},
1465 	},
1466 	.devinet_conf_dir = {
1467 		{
1468 			.ctl_name	= NET_IPV4_CONF,
1469 			.procname	= "conf",
1470 			.mode		= 0555,
1471 			.child		= devinet_sysctl.devinet_dev,
1472 		},
1473 	},
1474 	.devinet_proto_dir = {
1475 		{
1476 			.ctl_name	= NET_IPV4,
1477 			.procname	= "ipv4",
1478 			.mode		= 0555,
1479 			.child 		= devinet_sysctl.devinet_conf_dir,
1480 		},
1481 	},
1482 	.devinet_root_dir = {
1483 		{
1484 			.ctl_name	= CTL_NET,
1485 			.procname 	= "net",
1486 			.mode		= 0555,
1487 			.child		= devinet_sysctl.devinet_proto_dir,
1488 		},
1489 	},
1490 };
1491 
1492 static void devinet_sysctl_register(struct in_device *in_dev,
1493 				    struct ipv4_devconf *p)
1494 {
1495 	int i;
1496 	struct net_device *dev = in_dev ? in_dev->dev : NULL;
1497 	struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1498 						 GFP_KERNEL);
1499 	char *dev_name = NULL;
1500 
1501 	if (!t)
1502 		return;
1503 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1504 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1505 		t->devinet_vars[i].extra1 = p;
1506 	}
1507 
1508 	if (dev) {
1509 		dev_name = dev->name;
1510 		t->devinet_dev[0].ctl_name = dev->ifindex;
1511 	} else {
1512 		dev_name = "default";
1513 		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1514 	}
1515 
1516 	/*
1517 	 * Make a copy of dev_name, because '.procname' is regarded as const
1518 	 * by sysctl and we wouldn't want anyone to change it under our feet
1519 	 * (see SIOCSIFNAME).
1520 	 */
1521 	dev_name = kstrdup(dev_name, GFP_KERNEL);
1522 	if (!dev_name)
1523 	    goto free;
1524 
1525 	t->devinet_dev[0].procname    = dev_name;
1526 	t->devinet_dev[0].child	      = t->devinet_vars;
1527 	t->devinet_conf_dir[0].child  = t->devinet_dev;
1528 	t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1529 	t->devinet_root_dir[0].child  = t->devinet_proto_dir;
1530 
1531 	t->sysctl_header = register_sysctl_table(t->devinet_root_dir);
1532 	if (!t->sysctl_header)
1533 	    goto free_procname;
1534 
1535 	p->sysctl = t;
1536 	return;
1537 
1538 	/* error path */
1539  free_procname:
1540 	kfree(dev_name);
1541  free:
1542 	kfree(t);
1543 	return;
1544 }
1545 
1546 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1547 {
1548 	if (p->sysctl) {
1549 		struct devinet_sysctl_table *t = p->sysctl;
1550 		p->sysctl = NULL;
1551 		unregister_sysctl_table(t->sysctl_header);
1552 		kfree(t->devinet_dev[0].procname);
1553 		kfree(t);
1554 	}
1555 }
1556 #endif
1557 
1558 void __init devinet_init(void)
1559 {
1560 	register_gifconf(PF_INET, inet_gifconf);
1561 	register_netdevice_notifier(&ip_netdev_notifier);
1562 
1563 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1564 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1565 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1566 #ifdef CONFIG_SYSCTL
1567 	devinet_sysctl.sysctl_header =
1568 		register_sysctl_table(devinet_sysctl.devinet_root_dir);
1569 	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1570 #endif
1571 }
1572 
1573 EXPORT_SYMBOL(in_dev_finish_destroy);
1574 EXPORT_SYMBOL(inet_select_addr);
1575 EXPORT_SYMBOL(inetdev_by_index);
1576 EXPORT_SYMBOL(register_inetaddr_notifier);
1577 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1578