xref: /openbmc/linux/net/ipv4/devinet.c (revision 96de0e252cedffad61b3cb5e05662c591898e69a)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  *	Derived from the IP parts of dev.c 1.0.19
12  * 		Authors:	Ross Biro
13  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *	Additional Authors:
17  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *	Changes:
21  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
22  *					lists.
23  *		Cyrus Durgin:		updated for kmod
24  *		Matthias Andree:	in devinet_ioctl, compare label and
25  *					address (4.4BSD alias style support),
26  *					fall back to comparing just the label
27  *					if no match found.
28  */
29 
30 
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 
66 struct ipv4_devconf ipv4_devconf = {
67 	.data = {
68 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
69 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
70 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
71 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
72 	},
73 };
74 
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76 	.data = {
77 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
78 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
79 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
80 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
81 		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82 	},
83 };
84 
85 #define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
86 
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88 	[IFA_LOCAL]     	= { .type = NLA_U32 },
89 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
90 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
91 	[IFA_ANYCAST]   	= { .type = NLA_U32 },
92 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93 };
94 
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96 
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99 			 int destroy);
100 #ifdef CONFIG_SYSCTL
101 static void devinet_sysctl_register(struct in_device *in_dev,
102 				    struct ipv4_devconf *p);
103 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
104 #endif
105 
106 /* Locks all the inet devices. */
107 
108 static struct in_ifaddr *inet_alloc_ifa(void)
109 {
110 	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
111 
112 	if (ifa) {
113 		INIT_RCU_HEAD(&ifa->rcu_head);
114 	}
115 
116 	return ifa;
117 }
118 
119 static void inet_rcu_free_ifa(struct rcu_head *head)
120 {
121 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122 	if (ifa->ifa_dev)
123 		in_dev_put(ifa->ifa_dev);
124 	kfree(ifa);
125 }
126 
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
128 {
129 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130 }
131 
132 void in_dev_finish_destroy(struct in_device *idev)
133 {
134 	struct net_device *dev = idev->dev;
135 
136 	BUG_TRAP(!idev->ifa_list);
137 	BUG_TRAP(!idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140 	       idev, dev ? dev->name : "NIL");
141 #endif
142 	dev_put(dev);
143 	if (!idev->dead)
144 		printk("Freeing alive in_device %p\n", idev);
145 	else {
146 		kfree(idev);
147 	}
148 }
149 
150 static struct in_device *inetdev_init(struct net_device *dev)
151 {
152 	struct in_device *in_dev;
153 
154 	ASSERT_RTNL();
155 
156 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157 	if (!in_dev)
158 		goto out;
159 	INIT_RCU_HEAD(&in_dev->rcu_head);
160 	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
161 	in_dev->cnf.sysctl = NULL;
162 	in_dev->dev = dev;
163 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
164 		goto out_kfree;
165 	/* Reference in_dev->dev */
166 	dev_hold(dev);
167 #ifdef CONFIG_SYSCTL
168 	neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
169 			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
170 #endif
171 
172 	/* Account for reference dev->ip_ptr (below) */
173 	in_dev_hold(in_dev);
174 
175 #ifdef CONFIG_SYSCTL
176 	devinet_sysctl_register(in_dev, &in_dev->cnf);
177 #endif
178 	ip_mc_init_dev(in_dev);
179 	if (dev->flags & IFF_UP)
180 		ip_mc_up(in_dev);
181 
182 	/* we can receive as soon as ip_ptr is set -- do this last */
183 	rcu_assign_pointer(dev->ip_ptr, in_dev);
184 out:
185 	return in_dev;
186 out_kfree:
187 	kfree(in_dev);
188 	in_dev = NULL;
189 	goto out;
190 }
191 
192 static void in_dev_rcu_put(struct rcu_head *head)
193 {
194 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
195 	in_dev_put(idev);
196 }
197 
198 static void inetdev_destroy(struct in_device *in_dev)
199 {
200 	struct in_ifaddr *ifa;
201 	struct net_device *dev;
202 
203 	ASSERT_RTNL();
204 
205 	dev = in_dev->dev;
206 
207 	in_dev->dead = 1;
208 
209 	ip_mc_destroy_dev(in_dev);
210 
211 	while ((ifa = in_dev->ifa_list) != NULL) {
212 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
213 		inet_free_ifa(ifa);
214 	}
215 
216 #ifdef CONFIG_SYSCTL
217 	devinet_sysctl_unregister(&in_dev->cnf);
218 #endif
219 
220 	dev->ip_ptr = NULL;
221 
222 #ifdef CONFIG_SYSCTL
223 	neigh_sysctl_unregister(in_dev->arp_parms);
224 #endif
225 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
226 	arp_ifdown(dev);
227 
228 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
229 }
230 
231 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
232 {
233 	rcu_read_lock();
234 	for_primary_ifa(in_dev) {
235 		if (inet_ifa_match(a, ifa)) {
236 			if (!b || inet_ifa_match(b, ifa)) {
237 				rcu_read_unlock();
238 				return 1;
239 			}
240 		}
241 	} endfor_ifa(in_dev);
242 	rcu_read_unlock();
243 	return 0;
244 }
245 
246 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
247 			 int destroy, struct nlmsghdr *nlh, u32 pid)
248 {
249 	struct in_ifaddr *promote = NULL;
250 	struct in_ifaddr *ifa, *ifa1 = *ifap;
251 	struct in_ifaddr *last_prim = in_dev->ifa_list;
252 	struct in_ifaddr *prev_prom = NULL;
253 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
254 
255 	ASSERT_RTNL();
256 
257 	/* 1. Deleting primary ifaddr forces deletion all secondaries
258 	 * unless alias promotion is set
259 	 **/
260 
261 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
262 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
263 
264 		while ((ifa = *ifap1) != NULL) {
265 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
266 			    ifa1->ifa_scope <= ifa->ifa_scope)
267 				last_prim = ifa;
268 
269 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
270 			    ifa1->ifa_mask != ifa->ifa_mask ||
271 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
272 				ifap1 = &ifa->ifa_next;
273 				prev_prom = ifa;
274 				continue;
275 			}
276 
277 			if (!do_promote) {
278 				*ifap1 = ifa->ifa_next;
279 
280 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
281 				blocking_notifier_call_chain(&inetaddr_chain,
282 						NETDEV_DOWN, ifa);
283 				inet_free_ifa(ifa);
284 			} else {
285 				promote = ifa;
286 				break;
287 			}
288 		}
289 	}
290 
291 	/* 2. Unlink it */
292 
293 	*ifap = ifa1->ifa_next;
294 
295 	/* 3. Announce address deletion */
296 
297 	/* Send message first, then call notifier.
298 	   At first sight, FIB update triggered by notifier
299 	   will refer to already deleted ifaddr, that could confuse
300 	   netlink listeners. It is not true: look, gated sees
301 	   that route deleted and if it still thinks that ifaddr
302 	   is valid, it will try to restore deleted routes... Grr.
303 	   So that, this order is correct.
304 	 */
305 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
306 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
307 
308 	if (promote) {
309 
310 		if (prev_prom) {
311 			prev_prom->ifa_next = promote->ifa_next;
312 			promote->ifa_next = last_prim->ifa_next;
313 			last_prim->ifa_next = promote;
314 		}
315 
316 		promote->ifa_flags &= ~IFA_F_SECONDARY;
317 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
318 		blocking_notifier_call_chain(&inetaddr_chain,
319 				NETDEV_UP, promote);
320 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
321 			if (ifa1->ifa_mask != ifa->ifa_mask ||
322 			    !inet_ifa_match(ifa1->ifa_address, ifa))
323 					continue;
324 			fib_add_ifaddr(ifa);
325 		}
326 
327 	}
328 	if (destroy)
329 		inet_free_ifa(ifa1);
330 }
331 
332 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
333 			 int destroy)
334 {
335 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
336 }
337 
338 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
339 			     u32 pid)
340 {
341 	struct in_device *in_dev = ifa->ifa_dev;
342 	struct in_ifaddr *ifa1, **ifap, **last_primary;
343 
344 	ASSERT_RTNL();
345 
346 	if (!ifa->ifa_local) {
347 		inet_free_ifa(ifa);
348 		return 0;
349 	}
350 
351 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
352 	last_primary = &in_dev->ifa_list;
353 
354 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
355 	     ifap = &ifa1->ifa_next) {
356 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
357 		    ifa->ifa_scope <= ifa1->ifa_scope)
358 			last_primary = &ifa1->ifa_next;
359 		if (ifa1->ifa_mask == ifa->ifa_mask &&
360 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
361 			if (ifa1->ifa_local == ifa->ifa_local) {
362 				inet_free_ifa(ifa);
363 				return -EEXIST;
364 			}
365 			if (ifa1->ifa_scope != ifa->ifa_scope) {
366 				inet_free_ifa(ifa);
367 				return -EINVAL;
368 			}
369 			ifa->ifa_flags |= IFA_F_SECONDARY;
370 		}
371 	}
372 
373 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
374 		net_srandom(ifa->ifa_local);
375 		ifap = last_primary;
376 	}
377 
378 	ifa->ifa_next = *ifap;
379 	*ifap = ifa;
380 
381 	/* Send message first, then call notifier.
382 	   Notifier will trigger FIB update, so that
383 	   listeners of netlink will know about new ifaddr */
384 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
385 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
386 
387 	return 0;
388 }
389 
390 static int inet_insert_ifa(struct in_ifaddr *ifa)
391 {
392 	return __inet_insert_ifa(ifa, NULL, 0);
393 }
394 
395 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
396 {
397 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
398 
399 	ASSERT_RTNL();
400 
401 	if (!in_dev) {
402 		inet_free_ifa(ifa);
403 		return -ENOBUFS;
404 	}
405 	ipv4_devconf_setall(in_dev);
406 	if (ifa->ifa_dev != in_dev) {
407 		BUG_TRAP(!ifa->ifa_dev);
408 		in_dev_hold(in_dev);
409 		ifa->ifa_dev = in_dev;
410 	}
411 	if (LOOPBACK(ifa->ifa_local))
412 		ifa->ifa_scope = RT_SCOPE_HOST;
413 	return inet_insert_ifa(ifa);
414 }
415 
416 struct in_device *inetdev_by_index(int ifindex)
417 {
418 	struct net_device *dev;
419 	struct in_device *in_dev = NULL;
420 	read_lock(&dev_base_lock);
421 	dev = __dev_get_by_index(&init_net, ifindex);
422 	if (dev)
423 		in_dev = in_dev_get(dev);
424 	read_unlock(&dev_base_lock);
425 	return in_dev;
426 }
427 
428 /* Called only from RTNL semaphored context. No locks. */
429 
430 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
431 				    __be32 mask)
432 {
433 	ASSERT_RTNL();
434 
435 	for_primary_ifa(in_dev) {
436 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
437 			return ifa;
438 	} endfor_ifa(in_dev);
439 	return NULL;
440 }
441 
442 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
443 {
444 	struct nlattr *tb[IFA_MAX+1];
445 	struct in_device *in_dev;
446 	struct ifaddrmsg *ifm;
447 	struct in_ifaddr *ifa, **ifap;
448 	int err = -EINVAL;
449 
450 	ASSERT_RTNL();
451 
452 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453 	if (err < 0)
454 		goto errout;
455 
456 	ifm = nlmsg_data(nlh);
457 	in_dev = inetdev_by_index(ifm->ifa_index);
458 	if (in_dev == NULL) {
459 		err = -ENODEV;
460 		goto errout;
461 	}
462 
463 	__in_dev_put(in_dev);
464 
465 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
466 	     ifap = &ifa->ifa_next) {
467 		if (tb[IFA_LOCAL] &&
468 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
469 			continue;
470 
471 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
472 			continue;
473 
474 		if (tb[IFA_ADDRESS] &&
475 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
476 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
477 			continue;
478 
479 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
480 		return 0;
481 	}
482 
483 	err = -EADDRNOTAVAIL;
484 errout:
485 	return err;
486 }
487 
488 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
489 {
490 	struct nlattr *tb[IFA_MAX+1];
491 	struct in_ifaddr *ifa;
492 	struct ifaddrmsg *ifm;
493 	struct net_device *dev;
494 	struct in_device *in_dev;
495 	int err = -EINVAL;
496 
497 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
498 	if (err < 0)
499 		goto errout;
500 
501 	ifm = nlmsg_data(nlh);
502 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
503 		err = -EINVAL;
504 		goto errout;
505 	}
506 
507 	dev = __dev_get_by_index(&init_net, ifm->ifa_index);
508 	if (dev == NULL) {
509 		err = -ENODEV;
510 		goto errout;
511 	}
512 
513 	in_dev = __in_dev_get_rtnl(dev);
514 	if (in_dev == NULL) {
515 		err = -ENOBUFS;
516 		goto errout;
517 	}
518 
519 	ipv4_devconf_setall(in_dev);
520 
521 	ifa = inet_alloc_ifa();
522 	if (ifa == NULL) {
523 		/*
524 		 * A potential indev allocation can be left alive, it stays
525 		 * assigned to its device and is destroy with it.
526 		 */
527 		err = -ENOBUFS;
528 		goto errout;
529 	}
530 
531 	in_dev_hold(in_dev);
532 
533 	if (tb[IFA_ADDRESS] == NULL)
534 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
535 
536 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
537 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
538 	ifa->ifa_flags = ifm->ifa_flags;
539 	ifa->ifa_scope = ifm->ifa_scope;
540 	ifa->ifa_dev = in_dev;
541 
542 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
543 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
544 
545 	if (tb[IFA_BROADCAST])
546 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
547 
548 	if (tb[IFA_ANYCAST])
549 		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
550 
551 	if (tb[IFA_LABEL])
552 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
553 	else
554 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
555 
556 	return ifa;
557 
558 errout:
559 	return ERR_PTR(err);
560 }
561 
562 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
563 {
564 	struct in_ifaddr *ifa;
565 
566 	ASSERT_RTNL();
567 
568 	ifa = rtm_to_ifaddr(nlh);
569 	if (IS_ERR(ifa))
570 		return PTR_ERR(ifa);
571 
572 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
573 }
574 
575 /*
576  *	Determine a default network mask, based on the IP address.
577  */
578 
579 static __inline__ int inet_abc_len(__be32 addr)
580 {
581 	int rc = -1;	/* Something else, probably a multicast. */
582 
583 	if (ZERONET(addr))
584 		rc = 0;
585 	else {
586 		__u32 haddr = ntohl(addr);
587 
588 		if (IN_CLASSA(haddr))
589 			rc = 8;
590 		else if (IN_CLASSB(haddr))
591 			rc = 16;
592 		else if (IN_CLASSC(haddr))
593 			rc = 24;
594 	}
595 
596 	return rc;
597 }
598 
599 
600 int devinet_ioctl(unsigned int cmd, void __user *arg)
601 {
602 	struct ifreq ifr;
603 	struct sockaddr_in sin_orig;
604 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
605 	struct in_device *in_dev;
606 	struct in_ifaddr **ifap = NULL;
607 	struct in_ifaddr *ifa = NULL;
608 	struct net_device *dev;
609 	char *colon;
610 	int ret = -EFAULT;
611 	int tryaddrmatch = 0;
612 
613 	/*
614 	 *	Fetch the caller's info block into kernel space
615 	 */
616 
617 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
618 		goto out;
619 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
620 
621 	/* save original address for comparison */
622 	memcpy(&sin_orig, sin, sizeof(*sin));
623 
624 	colon = strchr(ifr.ifr_name, ':');
625 	if (colon)
626 		*colon = 0;
627 
628 #ifdef CONFIG_KMOD
629 	dev_load(&init_net, ifr.ifr_name);
630 #endif
631 
632 	switch (cmd) {
633 	case SIOCGIFADDR:	/* Get interface address */
634 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
635 	case SIOCGIFDSTADDR:	/* Get the destination address */
636 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
637 		/* Note that these ioctls will not sleep,
638 		   so that we do not impose a lock.
639 		   One day we will be forced to put shlock here (I mean SMP)
640 		 */
641 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
642 		memset(sin, 0, sizeof(*sin));
643 		sin->sin_family = AF_INET;
644 		break;
645 
646 	case SIOCSIFFLAGS:
647 		ret = -EACCES;
648 		if (!capable(CAP_NET_ADMIN))
649 			goto out;
650 		break;
651 	case SIOCSIFADDR:	/* Set interface address (and family) */
652 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
653 	case SIOCSIFDSTADDR:	/* Set the destination address */
654 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
655 		ret = -EACCES;
656 		if (!capable(CAP_NET_ADMIN))
657 			goto out;
658 		ret = -EINVAL;
659 		if (sin->sin_family != AF_INET)
660 			goto out;
661 		break;
662 	default:
663 		ret = -EINVAL;
664 		goto out;
665 	}
666 
667 	rtnl_lock();
668 
669 	ret = -ENODEV;
670 	if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
671 		goto done;
672 
673 	if (colon)
674 		*colon = ':';
675 
676 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
677 		if (tryaddrmatch) {
678 			/* Matthias Andree */
679 			/* compare label and address (4.4BSD style) */
680 			/* note: we only do this for a limited set of ioctls
681 			   and only if the original address family was AF_INET.
682 			   This is checked above. */
683 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
684 			     ifap = &ifa->ifa_next) {
685 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
686 				    sin_orig.sin_addr.s_addr ==
687 							ifa->ifa_address) {
688 					break; /* found */
689 				}
690 			}
691 		}
692 		/* we didn't get a match, maybe the application is
693 		   4.3BSD-style and passed in junk so we fall back to
694 		   comparing just the label */
695 		if (!ifa) {
696 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
697 			     ifap = &ifa->ifa_next)
698 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
699 					break;
700 		}
701 	}
702 
703 	ret = -EADDRNOTAVAIL;
704 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
705 		goto done;
706 
707 	switch (cmd) {
708 	case SIOCGIFADDR:	/* Get interface address */
709 		sin->sin_addr.s_addr = ifa->ifa_local;
710 		goto rarok;
711 
712 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
713 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
714 		goto rarok;
715 
716 	case SIOCGIFDSTADDR:	/* Get the destination address */
717 		sin->sin_addr.s_addr = ifa->ifa_address;
718 		goto rarok;
719 
720 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
721 		sin->sin_addr.s_addr = ifa->ifa_mask;
722 		goto rarok;
723 
724 	case SIOCSIFFLAGS:
725 		if (colon) {
726 			ret = -EADDRNOTAVAIL;
727 			if (!ifa)
728 				break;
729 			ret = 0;
730 			if (!(ifr.ifr_flags & IFF_UP))
731 				inet_del_ifa(in_dev, ifap, 1);
732 			break;
733 		}
734 		ret = dev_change_flags(dev, ifr.ifr_flags);
735 		break;
736 
737 	case SIOCSIFADDR:	/* Set interface address (and family) */
738 		ret = -EINVAL;
739 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
740 			break;
741 
742 		if (!ifa) {
743 			ret = -ENOBUFS;
744 			if ((ifa = inet_alloc_ifa()) == NULL)
745 				break;
746 			if (colon)
747 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
748 			else
749 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
750 		} else {
751 			ret = 0;
752 			if (ifa->ifa_local == sin->sin_addr.s_addr)
753 				break;
754 			inet_del_ifa(in_dev, ifap, 0);
755 			ifa->ifa_broadcast = 0;
756 			ifa->ifa_anycast = 0;
757 		}
758 
759 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
760 
761 		if (!(dev->flags & IFF_POINTOPOINT)) {
762 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
763 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
764 			if ((dev->flags & IFF_BROADCAST) &&
765 			    ifa->ifa_prefixlen < 31)
766 				ifa->ifa_broadcast = ifa->ifa_address |
767 						     ~ifa->ifa_mask;
768 		} else {
769 			ifa->ifa_prefixlen = 32;
770 			ifa->ifa_mask = inet_make_mask(32);
771 		}
772 		ret = inet_set_ifa(dev, ifa);
773 		break;
774 
775 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
776 		ret = 0;
777 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
778 			inet_del_ifa(in_dev, ifap, 0);
779 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
780 			inet_insert_ifa(ifa);
781 		}
782 		break;
783 
784 	case SIOCSIFDSTADDR:	/* Set the destination address */
785 		ret = 0;
786 		if (ifa->ifa_address == sin->sin_addr.s_addr)
787 			break;
788 		ret = -EINVAL;
789 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
790 			break;
791 		ret = 0;
792 		inet_del_ifa(in_dev, ifap, 0);
793 		ifa->ifa_address = sin->sin_addr.s_addr;
794 		inet_insert_ifa(ifa);
795 		break;
796 
797 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
798 
799 		/*
800 		 *	The mask we set must be legal.
801 		 */
802 		ret = -EINVAL;
803 		if (bad_mask(sin->sin_addr.s_addr, 0))
804 			break;
805 		ret = 0;
806 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
807 			__be32 old_mask = ifa->ifa_mask;
808 			inet_del_ifa(in_dev, ifap, 0);
809 			ifa->ifa_mask = sin->sin_addr.s_addr;
810 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
811 
812 			/* See if current broadcast address matches
813 			 * with current netmask, then recalculate
814 			 * the broadcast address. Otherwise it's a
815 			 * funny address, so don't touch it since
816 			 * the user seems to know what (s)he's doing...
817 			 */
818 			if ((dev->flags & IFF_BROADCAST) &&
819 			    (ifa->ifa_prefixlen < 31) &&
820 			    (ifa->ifa_broadcast ==
821 			     (ifa->ifa_local|~old_mask))) {
822 				ifa->ifa_broadcast = (ifa->ifa_local |
823 						      ~sin->sin_addr.s_addr);
824 			}
825 			inet_insert_ifa(ifa);
826 		}
827 		break;
828 	}
829 done:
830 	rtnl_unlock();
831 out:
832 	return ret;
833 rarok:
834 	rtnl_unlock();
835 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
836 	goto out;
837 }
838 
839 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
840 {
841 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
842 	struct in_ifaddr *ifa;
843 	struct ifreq ifr;
844 	int done = 0;
845 
846 	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
847 		goto out;
848 
849 	for (; ifa; ifa = ifa->ifa_next) {
850 		if (!buf) {
851 			done += sizeof(ifr);
852 			continue;
853 		}
854 		if (len < (int) sizeof(ifr))
855 			break;
856 		memset(&ifr, 0, sizeof(struct ifreq));
857 		if (ifa->ifa_label)
858 			strcpy(ifr.ifr_name, ifa->ifa_label);
859 		else
860 			strcpy(ifr.ifr_name, dev->name);
861 
862 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
863 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
864 								ifa->ifa_local;
865 
866 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
867 			done = -EFAULT;
868 			break;
869 		}
870 		buf  += sizeof(struct ifreq);
871 		len  -= sizeof(struct ifreq);
872 		done += sizeof(struct ifreq);
873 	}
874 out:
875 	return done;
876 }
877 
878 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
879 {
880 	__be32 addr = 0;
881 	struct in_device *in_dev;
882 
883 	rcu_read_lock();
884 	in_dev = __in_dev_get_rcu(dev);
885 	if (!in_dev)
886 		goto no_in_dev;
887 
888 	for_primary_ifa(in_dev) {
889 		if (ifa->ifa_scope > scope)
890 			continue;
891 		if (!dst || inet_ifa_match(dst, ifa)) {
892 			addr = ifa->ifa_local;
893 			break;
894 		}
895 		if (!addr)
896 			addr = ifa->ifa_local;
897 	} endfor_ifa(in_dev);
898 no_in_dev:
899 	rcu_read_unlock();
900 
901 	if (addr)
902 		goto out;
903 
904 	/* Not loopback addresses on loopback should be preferred
905 	   in this case. It is importnat that lo is the first interface
906 	   in dev_base list.
907 	 */
908 	read_lock(&dev_base_lock);
909 	rcu_read_lock();
910 	for_each_netdev(&init_net, dev) {
911 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
912 			continue;
913 
914 		for_primary_ifa(in_dev) {
915 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
916 			    ifa->ifa_scope <= scope) {
917 				addr = ifa->ifa_local;
918 				goto out_unlock_both;
919 			}
920 		} endfor_ifa(in_dev);
921 	}
922 out_unlock_both:
923 	read_unlock(&dev_base_lock);
924 	rcu_read_unlock();
925 out:
926 	return addr;
927 }
928 
929 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
930 			      __be32 local, int scope)
931 {
932 	int same = 0;
933 	__be32 addr = 0;
934 
935 	for_ifa(in_dev) {
936 		if (!addr &&
937 		    (local == ifa->ifa_local || !local) &&
938 		    ifa->ifa_scope <= scope) {
939 			addr = ifa->ifa_local;
940 			if (same)
941 				break;
942 		}
943 		if (!same) {
944 			same = (!local || inet_ifa_match(local, ifa)) &&
945 				(!dst || inet_ifa_match(dst, ifa));
946 			if (same && addr) {
947 				if (local || !dst)
948 					break;
949 				/* Is the selected addr into dst subnet? */
950 				if (inet_ifa_match(addr, ifa))
951 					break;
952 				/* No, then can we use new local src? */
953 				if (ifa->ifa_scope <= scope) {
954 					addr = ifa->ifa_local;
955 					break;
956 				}
957 				/* search for large dst subnet for addr */
958 				same = 0;
959 			}
960 		}
961 	} endfor_ifa(in_dev);
962 
963 	return same? addr : 0;
964 }
965 
966 /*
967  * Confirm that local IP address exists using wildcards:
968  * - dev: only on this interface, 0=any interface
969  * - dst: only in the same subnet as dst, 0=any dst
970  * - local: address, 0=autoselect the local address
971  * - scope: maximum allowed scope value for the local address
972  */
973 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
974 {
975 	__be32 addr = 0;
976 	struct in_device *in_dev;
977 
978 	if (dev) {
979 		rcu_read_lock();
980 		if ((in_dev = __in_dev_get_rcu(dev)))
981 			addr = confirm_addr_indev(in_dev, dst, local, scope);
982 		rcu_read_unlock();
983 
984 		return addr;
985 	}
986 
987 	read_lock(&dev_base_lock);
988 	rcu_read_lock();
989 	for_each_netdev(&init_net, dev) {
990 		if ((in_dev = __in_dev_get_rcu(dev))) {
991 			addr = confirm_addr_indev(in_dev, dst, local, scope);
992 			if (addr)
993 				break;
994 		}
995 	}
996 	rcu_read_unlock();
997 	read_unlock(&dev_base_lock);
998 
999 	return addr;
1000 }
1001 
1002 /*
1003  *	Device notifier
1004  */
1005 
1006 int register_inetaddr_notifier(struct notifier_block *nb)
1007 {
1008 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1009 }
1010 
1011 int unregister_inetaddr_notifier(struct notifier_block *nb)
1012 {
1013 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1014 }
1015 
1016 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1017  * alias numbering and to create unique labels if possible.
1018 */
1019 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1020 {
1021 	struct in_ifaddr *ifa;
1022 	int named = 0;
1023 
1024 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1025 		char old[IFNAMSIZ], *dot;
1026 
1027 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1028 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1029 		if (named++ == 0)
1030 			continue;
1031 		dot = strchr(ifa->ifa_label, ':');
1032 		if (dot == NULL) {
1033 			sprintf(old, ":%d", named);
1034 			dot = old;
1035 		}
1036 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1037 			strcat(ifa->ifa_label, dot);
1038 		} else {
1039 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1040 		}
1041 	}
1042 }
1043 
1044 /* Called only under RTNL semaphore */
1045 
1046 static int inetdev_event(struct notifier_block *this, unsigned long event,
1047 			 void *ptr)
1048 {
1049 	struct net_device *dev = ptr;
1050 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1051 
1052 	if (dev->nd_net != &init_net)
1053 		return NOTIFY_DONE;
1054 
1055 	ASSERT_RTNL();
1056 
1057 	if (!in_dev) {
1058 		if (event == NETDEV_REGISTER) {
1059 			in_dev = inetdev_init(dev);
1060 			if (!in_dev)
1061 				return notifier_from_errno(-ENOMEM);
1062 			if (dev->flags & IFF_LOOPBACK) {
1063 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1064 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1065 			}
1066 		}
1067 		goto out;
1068 	}
1069 
1070 	switch (event) {
1071 	case NETDEV_REGISTER:
1072 		printk(KERN_DEBUG "inetdev_event: bug\n");
1073 		dev->ip_ptr = NULL;
1074 		break;
1075 	case NETDEV_UP:
1076 		if (dev->mtu < 68)
1077 			break;
1078 		if (dev->flags & IFF_LOOPBACK) {
1079 			struct in_ifaddr *ifa;
1080 			if ((ifa = inet_alloc_ifa()) != NULL) {
1081 				ifa->ifa_local =
1082 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1083 				ifa->ifa_prefixlen = 8;
1084 				ifa->ifa_mask = inet_make_mask(8);
1085 				in_dev_hold(in_dev);
1086 				ifa->ifa_dev = in_dev;
1087 				ifa->ifa_scope = RT_SCOPE_HOST;
1088 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1089 				inet_insert_ifa(ifa);
1090 			}
1091 		}
1092 		ip_mc_up(in_dev);
1093 		break;
1094 	case NETDEV_DOWN:
1095 		ip_mc_down(in_dev);
1096 		break;
1097 	case NETDEV_CHANGEMTU:
1098 		if (dev->mtu >= 68)
1099 			break;
1100 		/* MTU falled under 68, disable IP */
1101 	case NETDEV_UNREGISTER:
1102 		inetdev_destroy(in_dev);
1103 		break;
1104 	case NETDEV_CHANGENAME:
1105 		/* Do not notify about label change, this event is
1106 		 * not interesting to applications using netlink.
1107 		 */
1108 		inetdev_changename(dev, in_dev);
1109 
1110 #ifdef CONFIG_SYSCTL
1111 		devinet_sysctl_unregister(&in_dev->cnf);
1112 		neigh_sysctl_unregister(in_dev->arp_parms);
1113 		neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1114 				      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1115 		devinet_sysctl_register(in_dev, &in_dev->cnf);
1116 #endif
1117 		break;
1118 	}
1119 out:
1120 	return NOTIFY_DONE;
1121 }
1122 
1123 static struct notifier_block ip_netdev_notifier = {
1124 	.notifier_call =inetdev_event,
1125 };
1126 
1127 static inline size_t inet_nlmsg_size(void)
1128 {
1129 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1130 	       + nla_total_size(4) /* IFA_ADDRESS */
1131 	       + nla_total_size(4) /* IFA_LOCAL */
1132 	       + nla_total_size(4) /* IFA_BROADCAST */
1133 	       + nla_total_size(4) /* IFA_ANYCAST */
1134 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1135 }
1136 
1137 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1138 			    u32 pid, u32 seq, int event, unsigned int flags)
1139 {
1140 	struct ifaddrmsg *ifm;
1141 	struct nlmsghdr  *nlh;
1142 
1143 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1144 	if (nlh == NULL)
1145 		return -EMSGSIZE;
1146 
1147 	ifm = nlmsg_data(nlh);
1148 	ifm->ifa_family = AF_INET;
1149 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1150 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1151 	ifm->ifa_scope = ifa->ifa_scope;
1152 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1153 
1154 	if (ifa->ifa_address)
1155 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1156 
1157 	if (ifa->ifa_local)
1158 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1159 
1160 	if (ifa->ifa_broadcast)
1161 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1162 
1163 	if (ifa->ifa_anycast)
1164 		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1165 
1166 	if (ifa->ifa_label[0])
1167 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1168 
1169 	return nlmsg_end(skb, nlh);
1170 
1171 nla_put_failure:
1172 	nlmsg_cancel(skb, nlh);
1173 	return -EMSGSIZE;
1174 }
1175 
1176 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1177 {
1178 	int idx, ip_idx;
1179 	struct net_device *dev;
1180 	struct in_device *in_dev;
1181 	struct in_ifaddr *ifa;
1182 	int s_ip_idx, s_idx = cb->args[0];
1183 
1184 	s_ip_idx = ip_idx = cb->args[1];
1185 	idx = 0;
1186 	for_each_netdev(&init_net, dev) {
1187 		if (idx < s_idx)
1188 			goto cont;
1189 		if (idx > s_idx)
1190 			s_ip_idx = 0;
1191 		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1192 			goto cont;
1193 
1194 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1195 		     ifa = ifa->ifa_next, ip_idx++) {
1196 			if (ip_idx < s_ip_idx)
1197 				continue;
1198 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1199 					     cb->nlh->nlmsg_seq,
1200 					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1201 				goto done;
1202 		}
1203 cont:
1204 		idx++;
1205 	}
1206 
1207 done:
1208 	cb->args[0] = idx;
1209 	cb->args[1] = ip_idx;
1210 
1211 	return skb->len;
1212 }
1213 
1214 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1215 		      u32 pid)
1216 {
1217 	struct sk_buff *skb;
1218 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1219 	int err = -ENOBUFS;
1220 
1221 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1222 	if (skb == NULL)
1223 		goto errout;
1224 
1225 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1226 	if (err < 0) {
1227 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1228 		WARN_ON(err == -EMSGSIZE);
1229 		kfree_skb(skb);
1230 		goto errout;
1231 	}
1232 	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1233 errout:
1234 	if (err < 0)
1235 		rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1236 }
1237 
1238 #ifdef CONFIG_SYSCTL
1239 
1240 static void devinet_copy_dflt_conf(int i)
1241 {
1242 	struct net_device *dev;
1243 
1244 	read_lock(&dev_base_lock);
1245 	for_each_netdev(&init_net, dev) {
1246 		struct in_device *in_dev;
1247 		rcu_read_lock();
1248 		in_dev = __in_dev_get_rcu(dev);
1249 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1250 			in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
1251 		rcu_read_unlock();
1252 	}
1253 	read_unlock(&dev_base_lock);
1254 }
1255 
1256 static int devinet_conf_proc(ctl_table *ctl, int write,
1257 			     struct file* filp, void __user *buffer,
1258 			     size_t *lenp, loff_t *ppos)
1259 {
1260 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1261 
1262 	if (write) {
1263 		struct ipv4_devconf *cnf = ctl->extra1;
1264 		int i = (int *)ctl->data - cnf->data;
1265 
1266 		set_bit(i, cnf->state);
1267 
1268 		if (cnf == &ipv4_devconf_dflt)
1269 			devinet_copy_dflt_conf(i);
1270 	}
1271 
1272 	return ret;
1273 }
1274 
1275 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1276 			       void __user *oldval, size_t __user *oldlenp,
1277 			       void __user *newval, size_t newlen)
1278 {
1279 	struct ipv4_devconf *cnf;
1280 	int *valp = table->data;
1281 	int new;
1282 	int i;
1283 
1284 	if (!newval || !newlen)
1285 		return 0;
1286 
1287 	if (newlen != sizeof(int))
1288 		return -EINVAL;
1289 
1290 	if (get_user(new, (int __user *)newval))
1291 		return -EFAULT;
1292 
1293 	if (new == *valp)
1294 		return 0;
1295 
1296 	if (oldval && oldlenp) {
1297 		size_t len;
1298 
1299 		if (get_user(len, oldlenp))
1300 			return -EFAULT;
1301 
1302 		if (len) {
1303 			if (len > table->maxlen)
1304 				len = table->maxlen;
1305 			if (copy_to_user(oldval, valp, len))
1306 				return -EFAULT;
1307 			if (put_user(len, oldlenp))
1308 				return -EFAULT;
1309 		}
1310 	}
1311 
1312 	*valp = new;
1313 
1314 	cnf = table->extra1;
1315 	i = (int *)table->data - cnf->data;
1316 
1317 	set_bit(i, cnf->state);
1318 
1319 	if (cnf == &ipv4_devconf_dflt)
1320 		devinet_copy_dflt_conf(i);
1321 
1322 	return 1;
1323 }
1324 
1325 void inet_forward_change(void)
1326 {
1327 	struct net_device *dev;
1328 	int on = IPV4_DEVCONF_ALL(FORWARDING);
1329 
1330 	IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
1331 	IPV4_DEVCONF_DFLT(FORWARDING) = on;
1332 
1333 	read_lock(&dev_base_lock);
1334 	for_each_netdev(&init_net, dev) {
1335 		struct in_device *in_dev;
1336 		rcu_read_lock();
1337 		in_dev = __in_dev_get_rcu(dev);
1338 		if (in_dev)
1339 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1340 		rcu_read_unlock();
1341 	}
1342 	read_unlock(&dev_base_lock);
1343 
1344 	rt_cache_flush(0);
1345 }
1346 
1347 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1348 				  struct file* filp, void __user *buffer,
1349 				  size_t *lenp, loff_t *ppos)
1350 {
1351 	int *valp = ctl->data;
1352 	int val = *valp;
1353 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1354 
1355 	if (write && *valp != val) {
1356 		if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
1357 			inet_forward_change();
1358 		else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
1359 			rt_cache_flush(0);
1360 	}
1361 
1362 	return ret;
1363 }
1364 
1365 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1366 			 struct file* filp, void __user *buffer,
1367 			 size_t *lenp, loff_t *ppos)
1368 {
1369 	int *valp = ctl->data;
1370 	int val = *valp;
1371 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1372 
1373 	if (write && *valp != val)
1374 		rt_cache_flush(0);
1375 
1376 	return ret;
1377 }
1378 
1379 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1380 				  void __user *oldval, size_t __user *oldlenp,
1381 				  void __user *newval, size_t newlen)
1382 {
1383 	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1384 				      newval, newlen);
1385 
1386 	if (ret == 1)
1387 		rt_cache_flush(0);
1388 
1389 	return ret;
1390 }
1391 
1392 
1393 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1394 	{ \
1395 		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1396 		.procname	= name, \
1397 		.data		= ipv4_devconf.data + \
1398 				  NET_IPV4_CONF_ ## attr - 1, \
1399 		.maxlen		= sizeof(int), \
1400 		.mode		= mval, \
1401 		.proc_handler	= proc, \
1402 		.strategy	= sysctl, \
1403 		.extra1		= &ipv4_devconf, \
1404 	}
1405 
1406 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1407 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1408 			     devinet_conf_sysctl)
1409 
1410 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1411 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1412 			     devinet_conf_sysctl)
1413 
1414 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1415 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1416 
1417 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1418 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1419 				     ipv4_doint_and_flush_strategy)
1420 
1421 static struct devinet_sysctl_table {
1422 	struct ctl_table_header *sysctl_header;
1423 	ctl_table		devinet_vars[__NET_IPV4_CONF_MAX];
1424 	ctl_table		devinet_dev[2];
1425 	ctl_table		devinet_conf_dir[2];
1426 	ctl_table		devinet_proto_dir[2];
1427 	ctl_table		devinet_root_dir[2];
1428 } devinet_sysctl = {
1429 	.devinet_vars = {
1430 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1431 					     devinet_sysctl_forward,
1432 					     devinet_conf_sysctl),
1433 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1434 
1435 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1436 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1437 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1438 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1439 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1440 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1441 					"accept_source_route"),
1442 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1443 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1444 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1445 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1446 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1447 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1448 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1449 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1450 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1451 
1452 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1453 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1454 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1455 					      "force_igmp_version"),
1456 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1457 					      "promote_secondaries"),
1458 	},
1459 	.devinet_dev = {
1460 		{
1461 			.ctl_name	= NET_PROTO_CONF_ALL,
1462 			.procname	= "all",
1463 			.mode		= 0555,
1464 			.child		= devinet_sysctl.devinet_vars,
1465 		},
1466 	},
1467 	.devinet_conf_dir = {
1468 		{
1469 			.ctl_name	= NET_IPV4_CONF,
1470 			.procname	= "conf",
1471 			.mode		= 0555,
1472 			.child		= devinet_sysctl.devinet_dev,
1473 		},
1474 	},
1475 	.devinet_proto_dir = {
1476 		{
1477 			.ctl_name	= NET_IPV4,
1478 			.procname	= "ipv4",
1479 			.mode		= 0555,
1480 			.child 		= devinet_sysctl.devinet_conf_dir,
1481 		},
1482 	},
1483 	.devinet_root_dir = {
1484 		{
1485 			.ctl_name	= CTL_NET,
1486 			.procname 	= "net",
1487 			.mode		= 0555,
1488 			.child		= devinet_sysctl.devinet_proto_dir,
1489 		},
1490 	},
1491 };
1492 
1493 static void devinet_sysctl_register(struct in_device *in_dev,
1494 				    struct ipv4_devconf *p)
1495 {
1496 	int i;
1497 	struct net_device *dev = in_dev ? in_dev->dev : NULL;
1498 	struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1499 						 GFP_KERNEL);
1500 	char *dev_name = NULL;
1501 
1502 	if (!t)
1503 		return;
1504 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1505 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1506 		t->devinet_vars[i].extra1 = p;
1507 	}
1508 
1509 	if (dev) {
1510 		dev_name = dev->name;
1511 		t->devinet_dev[0].ctl_name = dev->ifindex;
1512 	} else {
1513 		dev_name = "default";
1514 		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1515 	}
1516 
1517 	/*
1518 	 * Make a copy of dev_name, because '.procname' is regarded as const
1519 	 * by sysctl and we wouldn't want anyone to change it under our feet
1520 	 * (see SIOCSIFNAME).
1521 	 */
1522 	dev_name = kstrdup(dev_name, GFP_KERNEL);
1523 	if (!dev_name)
1524 	    goto free;
1525 
1526 	t->devinet_dev[0].procname    = dev_name;
1527 	t->devinet_dev[0].child	      = t->devinet_vars;
1528 	t->devinet_conf_dir[0].child  = t->devinet_dev;
1529 	t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1530 	t->devinet_root_dir[0].child  = t->devinet_proto_dir;
1531 
1532 	t->sysctl_header = register_sysctl_table(t->devinet_root_dir);
1533 	if (!t->sysctl_header)
1534 	    goto free_procname;
1535 
1536 	p->sysctl = t;
1537 	return;
1538 
1539 	/* error path */
1540  free_procname:
1541 	kfree(dev_name);
1542  free:
1543 	kfree(t);
1544 	return;
1545 }
1546 
1547 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1548 {
1549 	if (p->sysctl) {
1550 		struct devinet_sysctl_table *t = p->sysctl;
1551 		p->sysctl = NULL;
1552 		unregister_sysctl_table(t->sysctl_header);
1553 		kfree(t->devinet_dev[0].procname);
1554 		kfree(t);
1555 	}
1556 }
1557 #endif
1558 
1559 void __init devinet_init(void)
1560 {
1561 	register_gifconf(PF_INET, inet_gifconf);
1562 	register_netdevice_notifier(&ip_netdev_notifier);
1563 
1564 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1565 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1566 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1567 #ifdef CONFIG_SYSCTL
1568 	devinet_sysctl.sysctl_header =
1569 		register_sysctl_table(devinet_sysctl.devinet_root_dir);
1570 	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1571 #endif
1572 }
1573 
1574 EXPORT_SYMBOL(in_dev_finish_destroy);
1575 EXPORT_SYMBOL(inet_select_addr);
1576 EXPORT_SYMBOL(inetdev_by_index);
1577 EXPORT_SYMBOL(register_inetaddr_notifier);
1578 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1579