xref: /openbmc/linux/net/ipv4/devinet.c (revision b454cc66)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  *	Derived from the IP parts of dev.c 1.0.19
12  * 		Authors:	Ross Biro
13  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *	Additional Authors:
17  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *	Changes:
21  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
22  *					lists.
23  *		Cyrus Durgin:		updated for kmod
24  *		Matthias Andree:	in devinet_ioctl, compare label and
25  *					address (4.4BSD alias style support),
26  *					fall back to comparing just the label
27  *					if no match found.
28  */
29 
30 
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/sched.h>
39 #include <linux/string.h>
40 #include <linux/mm.h>
41 #include <linux/socket.h>
42 #include <linux/sockios.h>
43 #include <linux/in.h>
44 #include <linux/errno.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_addr.h>
47 #include <linux/if_ether.h>
48 #include <linux/inet.h>
49 #include <linux/netdevice.h>
50 #include <linux/etherdevice.h>
51 #include <linux/skbuff.h>
52 #include <linux/rtnetlink.h>
53 #include <linux/init.h>
54 #include <linux/notifier.h>
55 #include <linux/inetdevice.h>
56 #include <linux/igmp.h>
57 #ifdef CONFIG_SYSCTL
58 #include <linux/sysctl.h>
59 #endif
60 #include <linux/kmod.h>
61 
62 #include <net/arp.h>
63 #include <net/ip.h>
64 #include <net/route.h>
65 #include <net/ip_fib.h>
66 #include <net/netlink.h>
67 
68 struct ipv4_devconf ipv4_devconf = {
69 	.accept_redirects = 1,
70 	.send_redirects =  1,
71 	.secure_redirects = 1,
72 	.shared_media =	  1,
73 };
74 
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76 	.accept_redirects =  1,
77 	.send_redirects =    1,
78 	.secure_redirects =  1,
79 	.shared_media =	     1,
80 	.accept_source_route = 1,
81 };
82 
83 static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
84 	[IFA_LOCAL]     	= { .type = NLA_U32 },
85 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
86 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
87 	[IFA_ANYCAST]   	= { .type = NLA_U32 },
88 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
89 };
90 
91 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
92 
93 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
94 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
95 			 int destroy);
96 #ifdef CONFIG_SYSCTL
97 static void devinet_sysctl_register(struct in_device *in_dev,
98 				    struct ipv4_devconf *p);
99 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
100 #endif
101 
102 /* Locks all the inet devices. */
103 
104 static struct in_ifaddr *inet_alloc_ifa(void)
105 {
106 	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
107 
108 	if (ifa) {
109 		INIT_RCU_HEAD(&ifa->rcu_head);
110 	}
111 
112 	return ifa;
113 }
114 
115 static void inet_rcu_free_ifa(struct rcu_head *head)
116 {
117 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
118 	if (ifa->ifa_dev)
119 		in_dev_put(ifa->ifa_dev);
120 	kfree(ifa);
121 }
122 
123 static inline void inet_free_ifa(struct in_ifaddr *ifa)
124 {
125 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
126 }
127 
128 void in_dev_finish_destroy(struct in_device *idev)
129 {
130 	struct net_device *dev = idev->dev;
131 
132 	BUG_TRAP(!idev->ifa_list);
133 	BUG_TRAP(!idev->mc_list);
134 #ifdef NET_REFCNT_DEBUG
135 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
136 	       idev, dev ? dev->name : "NIL");
137 #endif
138 	dev_put(dev);
139 	if (!idev->dead)
140 		printk("Freeing alive in_device %p\n", idev);
141 	else {
142 		kfree(idev);
143 	}
144 }
145 
146 struct in_device *inetdev_init(struct net_device *dev)
147 {
148 	struct in_device *in_dev;
149 
150 	ASSERT_RTNL();
151 
152 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
153 	if (!in_dev)
154 		goto out;
155 	INIT_RCU_HEAD(&in_dev->rcu_head);
156 	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
157 	in_dev->cnf.sysctl = NULL;
158 	in_dev->dev = dev;
159 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
160 		goto out_kfree;
161 	/* Reference in_dev->dev */
162 	dev_hold(dev);
163 #ifdef CONFIG_SYSCTL
164 	neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
165 			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
166 #endif
167 
168 	/* Account for reference dev->ip_ptr (below) */
169 	in_dev_hold(in_dev);
170 
171 #ifdef CONFIG_SYSCTL
172 	devinet_sysctl_register(in_dev, &in_dev->cnf);
173 #endif
174 	ip_mc_init_dev(in_dev);
175 	if (dev->flags & IFF_UP)
176 		ip_mc_up(in_dev);
177 
178 	/* we can receive as soon as ip_ptr is set -- do this last */
179 	rcu_assign_pointer(dev->ip_ptr, in_dev);
180 out:
181 	return in_dev;
182 out_kfree:
183 	kfree(in_dev);
184 	in_dev = NULL;
185 	goto out;
186 }
187 
188 static void in_dev_rcu_put(struct rcu_head *head)
189 {
190 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
191 	in_dev_put(idev);
192 }
193 
194 static void inetdev_destroy(struct in_device *in_dev)
195 {
196 	struct in_ifaddr *ifa;
197 	struct net_device *dev;
198 
199 	ASSERT_RTNL();
200 
201 	dev = in_dev->dev;
202 	if (dev == &loopback_dev)
203 		return;
204 
205 	in_dev->dead = 1;
206 
207 	ip_mc_destroy_dev(in_dev);
208 
209 	while ((ifa = in_dev->ifa_list) != NULL) {
210 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
211 		inet_free_ifa(ifa);
212 	}
213 
214 #ifdef CONFIG_SYSCTL
215 	devinet_sysctl_unregister(&in_dev->cnf);
216 #endif
217 
218 	dev->ip_ptr = NULL;
219 
220 #ifdef CONFIG_SYSCTL
221 	neigh_sysctl_unregister(in_dev->arp_parms);
222 #endif
223 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
224 	arp_ifdown(dev);
225 
226 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
227 }
228 
229 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
230 {
231 	rcu_read_lock();
232 	for_primary_ifa(in_dev) {
233 		if (inet_ifa_match(a, ifa)) {
234 			if (!b || inet_ifa_match(b, ifa)) {
235 				rcu_read_unlock();
236 				return 1;
237 			}
238 		}
239 	} endfor_ifa(in_dev);
240 	rcu_read_unlock();
241 	return 0;
242 }
243 
244 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
245 			 int destroy, struct nlmsghdr *nlh, u32 pid)
246 {
247 	struct in_ifaddr *promote = NULL;
248 	struct in_ifaddr *ifa, *ifa1 = *ifap;
249 	struct in_ifaddr *last_prim = in_dev->ifa_list;
250 	struct in_ifaddr *prev_prom = NULL;
251 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
252 
253 	ASSERT_RTNL();
254 
255 	/* 1. Deleting primary ifaddr forces deletion all secondaries
256 	 * unless alias promotion is set
257 	 **/
258 
259 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
260 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
261 
262 		while ((ifa = *ifap1) != NULL) {
263 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
264 			    ifa1->ifa_scope <= ifa->ifa_scope)
265 				last_prim = ifa;
266 
267 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
268 			    ifa1->ifa_mask != ifa->ifa_mask ||
269 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
270 				ifap1 = &ifa->ifa_next;
271 				prev_prom = ifa;
272 				continue;
273 			}
274 
275 			if (!do_promote) {
276 				*ifap1 = ifa->ifa_next;
277 
278 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
279 				blocking_notifier_call_chain(&inetaddr_chain,
280 						NETDEV_DOWN, ifa);
281 				inet_free_ifa(ifa);
282 			} else {
283 				promote = ifa;
284 				break;
285 			}
286 		}
287 	}
288 
289 	/* 2. Unlink it */
290 
291 	*ifap = ifa1->ifa_next;
292 
293 	/* 3. Announce address deletion */
294 
295 	/* Send message first, then call notifier.
296 	   At first sight, FIB update triggered by notifier
297 	   will refer to already deleted ifaddr, that could confuse
298 	   netlink listeners. It is not true: look, gated sees
299 	   that route deleted and if it still thinks that ifaddr
300 	   is valid, it will try to restore deleted routes... Grr.
301 	   So that, this order is correct.
302 	 */
303 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
304 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
305 
306 	if (promote) {
307 
308 		if (prev_prom) {
309 			prev_prom->ifa_next = promote->ifa_next;
310 			promote->ifa_next = last_prim->ifa_next;
311 			last_prim->ifa_next = promote;
312 		}
313 
314 		promote->ifa_flags &= ~IFA_F_SECONDARY;
315 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
316 		blocking_notifier_call_chain(&inetaddr_chain,
317 				NETDEV_UP, promote);
318 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
319 			if (ifa1->ifa_mask != ifa->ifa_mask ||
320 			    !inet_ifa_match(ifa1->ifa_address, ifa))
321 					continue;
322 			fib_add_ifaddr(ifa);
323 		}
324 
325 	}
326 	if (destroy) {
327 		inet_free_ifa(ifa1);
328 
329 		if (!in_dev->ifa_list)
330 			inetdev_destroy(in_dev);
331 	}
332 }
333 
334 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
335 			 int destroy)
336 {
337 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
338 }
339 
340 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
341 			     u32 pid)
342 {
343 	struct in_device *in_dev = ifa->ifa_dev;
344 	struct in_ifaddr *ifa1, **ifap, **last_primary;
345 
346 	ASSERT_RTNL();
347 
348 	if (!ifa->ifa_local) {
349 		inet_free_ifa(ifa);
350 		return 0;
351 	}
352 
353 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
354 	last_primary = &in_dev->ifa_list;
355 
356 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
357 	     ifap = &ifa1->ifa_next) {
358 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
359 		    ifa->ifa_scope <= ifa1->ifa_scope)
360 			last_primary = &ifa1->ifa_next;
361 		if (ifa1->ifa_mask == ifa->ifa_mask &&
362 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
363 			if (ifa1->ifa_local == ifa->ifa_local) {
364 				inet_free_ifa(ifa);
365 				return -EEXIST;
366 			}
367 			if (ifa1->ifa_scope != ifa->ifa_scope) {
368 				inet_free_ifa(ifa);
369 				return -EINVAL;
370 			}
371 			ifa->ifa_flags |= IFA_F_SECONDARY;
372 		}
373 	}
374 
375 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
376 		net_srandom(ifa->ifa_local);
377 		ifap = last_primary;
378 	}
379 
380 	ifa->ifa_next = *ifap;
381 	*ifap = ifa;
382 
383 	/* Send message first, then call notifier.
384 	   Notifier will trigger FIB update, so that
385 	   listeners of netlink will know about new ifaddr */
386 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
387 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
388 
389 	return 0;
390 }
391 
392 static int inet_insert_ifa(struct in_ifaddr *ifa)
393 {
394 	return __inet_insert_ifa(ifa, NULL, 0);
395 }
396 
397 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
398 {
399 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
400 
401 	ASSERT_RTNL();
402 
403 	if (!in_dev) {
404 		in_dev = inetdev_init(dev);
405 		if (!in_dev) {
406 			inet_free_ifa(ifa);
407 			return -ENOBUFS;
408 		}
409 	}
410 	if (ifa->ifa_dev != in_dev) {
411 		BUG_TRAP(!ifa->ifa_dev);
412 		in_dev_hold(in_dev);
413 		ifa->ifa_dev = in_dev;
414 	}
415 	if (LOOPBACK(ifa->ifa_local))
416 		ifa->ifa_scope = RT_SCOPE_HOST;
417 	return inet_insert_ifa(ifa);
418 }
419 
420 struct in_device *inetdev_by_index(int ifindex)
421 {
422 	struct net_device *dev;
423 	struct in_device *in_dev = NULL;
424 	read_lock(&dev_base_lock);
425 	dev = __dev_get_by_index(ifindex);
426 	if (dev)
427 		in_dev = in_dev_get(dev);
428 	read_unlock(&dev_base_lock);
429 	return in_dev;
430 }
431 
432 /* Called only from RTNL semaphored context. No locks. */
433 
434 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
435 				    __be32 mask)
436 {
437 	ASSERT_RTNL();
438 
439 	for_primary_ifa(in_dev) {
440 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
441 			return ifa;
442 	} endfor_ifa(in_dev);
443 	return NULL;
444 }
445 
446 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
447 {
448 	struct nlattr *tb[IFA_MAX+1];
449 	struct in_device *in_dev;
450 	struct ifaddrmsg *ifm;
451 	struct in_ifaddr *ifa, **ifap;
452 	int err = -EINVAL;
453 
454 	ASSERT_RTNL();
455 
456 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
457 	if (err < 0)
458 		goto errout;
459 
460 	ifm = nlmsg_data(nlh);
461 	in_dev = inetdev_by_index(ifm->ifa_index);
462 	if (in_dev == NULL) {
463 		err = -ENODEV;
464 		goto errout;
465 	}
466 
467 	__in_dev_put(in_dev);
468 
469 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
470 	     ifap = &ifa->ifa_next) {
471 		if (tb[IFA_LOCAL] &&
472 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
473 			continue;
474 
475 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
476 			continue;
477 
478 		if (tb[IFA_ADDRESS] &&
479 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
480 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
481 			continue;
482 
483 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
484 		return 0;
485 	}
486 
487 	err = -EADDRNOTAVAIL;
488 errout:
489 	return err;
490 }
491 
492 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
493 {
494 	struct nlattr *tb[IFA_MAX+1];
495 	struct in_ifaddr *ifa;
496 	struct ifaddrmsg *ifm;
497 	struct net_device *dev;
498 	struct in_device *in_dev;
499 	int err = -EINVAL;
500 
501 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
502 	if (err < 0)
503 		goto errout;
504 
505 	ifm = nlmsg_data(nlh);
506 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
507 		goto errout;
508 
509 	dev = __dev_get_by_index(ifm->ifa_index);
510 	if (dev == NULL) {
511 		err = -ENODEV;
512 		goto errout;
513 	}
514 
515 	in_dev = __in_dev_get_rtnl(dev);
516 	if (in_dev == NULL) {
517 		in_dev = inetdev_init(dev);
518 		if (in_dev == NULL) {
519 			err = -ENOBUFS;
520 			goto errout;
521 		}
522 	}
523 
524 	ifa = inet_alloc_ifa();
525 	if (ifa == NULL) {
526 		/*
527 		 * A potential indev allocation can be left alive, it stays
528 		 * assigned to its device and is destroy with it.
529 		 */
530 		err = -ENOBUFS;
531 		goto errout;
532 	}
533 
534 	in_dev_hold(in_dev);
535 
536 	if (tb[IFA_ADDRESS] == NULL)
537 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
538 
539 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
540 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
541 	ifa->ifa_flags = ifm->ifa_flags;
542 	ifa->ifa_scope = ifm->ifa_scope;
543 	ifa->ifa_dev = in_dev;
544 
545 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
546 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
547 
548 	if (tb[IFA_BROADCAST])
549 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
550 
551 	if (tb[IFA_ANYCAST])
552 		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
553 
554 	if (tb[IFA_LABEL])
555 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
556 	else
557 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
558 
559 	return ifa;
560 
561 errout:
562 	return ERR_PTR(err);
563 }
564 
565 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
566 {
567 	struct in_ifaddr *ifa;
568 
569 	ASSERT_RTNL();
570 
571 	ifa = rtm_to_ifaddr(nlh);
572 	if (IS_ERR(ifa))
573 		return PTR_ERR(ifa);
574 
575 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
576 }
577 
578 /*
579  *	Determine a default network mask, based on the IP address.
580  */
581 
582 static __inline__ int inet_abc_len(__be32 addr)
583 {
584 	int rc = -1;	/* Something else, probably a multicast. */
585 
586   	if (ZERONET(addr))
587   		rc = 0;
588 	else {
589 		__u32 haddr = ntohl(addr);
590 
591 		if (IN_CLASSA(haddr))
592 			rc = 8;
593 		else if (IN_CLASSB(haddr))
594 			rc = 16;
595 		else if (IN_CLASSC(haddr))
596 			rc = 24;
597 	}
598 
599   	return rc;
600 }
601 
602 
603 int devinet_ioctl(unsigned int cmd, void __user *arg)
604 {
605 	struct ifreq ifr;
606 	struct sockaddr_in sin_orig;
607 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
608 	struct in_device *in_dev;
609 	struct in_ifaddr **ifap = NULL;
610 	struct in_ifaddr *ifa = NULL;
611 	struct net_device *dev;
612 	char *colon;
613 	int ret = -EFAULT;
614 	int tryaddrmatch = 0;
615 
616 	/*
617 	 *	Fetch the caller's info block into kernel space
618 	 */
619 
620 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
621 		goto out;
622 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
623 
624 	/* save original address for comparison */
625 	memcpy(&sin_orig, sin, sizeof(*sin));
626 
627 	colon = strchr(ifr.ifr_name, ':');
628 	if (colon)
629 		*colon = 0;
630 
631 #ifdef CONFIG_KMOD
632 	dev_load(ifr.ifr_name);
633 #endif
634 
635 	switch(cmd) {
636 	case SIOCGIFADDR:	/* Get interface address */
637 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
638 	case SIOCGIFDSTADDR:	/* Get the destination address */
639 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
640 		/* Note that these ioctls will not sleep,
641 		   so that we do not impose a lock.
642 		   One day we will be forced to put shlock here (I mean SMP)
643 		 */
644 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
645 		memset(sin, 0, sizeof(*sin));
646 		sin->sin_family = AF_INET;
647 		break;
648 
649 	case SIOCSIFFLAGS:
650 		ret = -EACCES;
651 		if (!capable(CAP_NET_ADMIN))
652 			goto out;
653 		break;
654 	case SIOCSIFADDR:	/* Set interface address (and family) */
655 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
656 	case SIOCSIFDSTADDR:	/* Set the destination address */
657 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
658 		ret = -EACCES;
659 		if (!capable(CAP_NET_ADMIN))
660 			goto out;
661 		ret = -EINVAL;
662 		if (sin->sin_family != AF_INET)
663 			goto out;
664 		break;
665 	default:
666 		ret = -EINVAL;
667 		goto out;
668 	}
669 
670 	rtnl_lock();
671 
672 	ret = -ENODEV;
673 	if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
674 		goto done;
675 
676 	if (colon)
677 		*colon = ':';
678 
679 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
680 		if (tryaddrmatch) {
681 			/* Matthias Andree */
682 			/* compare label and address (4.4BSD style) */
683 			/* note: we only do this for a limited set of ioctls
684 			   and only if the original address family was AF_INET.
685 			   This is checked above. */
686 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
687 			     ifap = &ifa->ifa_next) {
688 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
689 				    sin_orig.sin_addr.s_addr ==
690 							ifa->ifa_address) {
691 					break; /* found */
692 				}
693 			}
694 		}
695 		/* we didn't get a match, maybe the application is
696 		   4.3BSD-style and passed in junk so we fall back to
697 		   comparing just the label */
698 		if (!ifa) {
699 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
700 			     ifap = &ifa->ifa_next)
701 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
702 					break;
703 		}
704 	}
705 
706 	ret = -EADDRNOTAVAIL;
707 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
708 		goto done;
709 
710 	switch(cmd) {
711 	case SIOCGIFADDR:	/* Get interface address */
712 		sin->sin_addr.s_addr = ifa->ifa_local;
713 		goto rarok;
714 
715 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
716 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
717 		goto rarok;
718 
719 	case SIOCGIFDSTADDR:	/* Get the destination address */
720 		sin->sin_addr.s_addr = ifa->ifa_address;
721 		goto rarok;
722 
723 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
724 		sin->sin_addr.s_addr = ifa->ifa_mask;
725 		goto rarok;
726 
727 	case SIOCSIFFLAGS:
728 		if (colon) {
729 			ret = -EADDRNOTAVAIL;
730 			if (!ifa)
731 				break;
732 			ret = 0;
733 			if (!(ifr.ifr_flags & IFF_UP))
734 				inet_del_ifa(in_dev, ifap, 1);
735 			break;
736 		}
737 		ret = dev_change_flags(dev, ifr.ifr_flags);
738 		break;
739 
740 	case SIOCSIFADDR:	/* Set interface address (and family) */
741 		ret = -EINVAL;
742 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
743 			break;
744 
745 		if (!ifa) {
746 			ret = -ENOBUFS;
747 			if ((ifa = inet_alloc_ifa()) == NULL)
748 				break;
749 			if (colon)
750 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
751 			else
752 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
753 		} else {
754 			ret = 0;
755 			if (ifa->ifa_local == sin->sin_addr.s_addr)
756 				break;
757 			inet_del_ifa(in_dev, ifap, 0);
758 			ifa->ifa_broadcast = 0;
759 			ifa->ifa_anycast = 0;
760 		}
761 
762 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
763 
764 		if (!(dev->flags & IFF_POINTOPOINT)) {
765 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
766 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
767 			if ((dev->flags & IFF_BROADCAST) &&
768 			    ifa->ifa_prefixlen < 31)
769 				ifa->ifa_broadcast = ifa->ifa_address |
770 						     ~ifa->ifa_mask;
771 		} else {
772 			ifa->ifa_prefixlen = 32;
773 			ifa->ifa_mask = inet_make_mask(32);
774 		}
775 		ret = inet_set_ifa(dev, ifa);
776 		break;
777 
778 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
779 		ret = 0;
780 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
781 			inet_del_ifa(in_dev, ifap, 0);
782 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
783 			inet_insert_ifa(ifa);
784 		}
785 		break;
786 
787 	case SIOCSIFDSTADDR:	/* Set the destination address */
788 		ret = 0;
789 		if (ifa->ifa_address == sin->sin_addr.s_addr)
790 			break;
791 		ret = -EINVAL;
792 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
793 			break;
794 		ret = 0;
795 		inet_del_ifa(in_dev, ifap, 0);
796 		ifa->ifa_address = sin->sin_addr.s_addr;
797 		inet_insert_ifa(ifa);
798 		break;
799 
800 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
801 
802 		/*
803 		 *	The mask we set must be legal.
804 		 */
805 		ret = -EINVAL;
806 		if (bad_mask(sin->sin_addr.s_addr, 0))
807 			break;
808 		ret = 0;
809 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
810 			__be32 old_mask = ifa->ifa_mask;
811 			inet_del_ifa(in_dev, ifap, 0);
812 			ifa->ifa_mask = sin->sin_addr.s_addr;
813 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
814 
815 			/* See if current broadcast address matches
816 			 * with current netmask, then recalculate
817 			 * the broadcast address. Otherwise it's a
818 			 * funny address, so don't touch it since
819 			 * the user seems to know what (s)he's doing...
820 			 */
821 			if ((dev->flags & IFF_BROADCAST) &&
822 			    (ifa->ifa_prefixlen < 31) &&
823 			    (ifa->ifa_broadcast ==
824 			     (ifa->ifa_local|~old_mask))) {
825 				ifa->ifa_broadcast = (ifa->ifa_local |
826 						      ~sin->sin_addr.s_addr);
827 			}
828 			inet_insert_ifa(ifa);
829 		}
830 		break;
831 	}
832 done:
833 	rtnl_unlock();
834 out:
835 	return ret;
836 rarok:
837 	rtnl_unlock();
838 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
839 	goto out;
840 }
841 
842 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
843 {
844 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
845 	struct in_ifaddr *ifa;
846 	struct ifreq ifr;
847 	int done = 0;
848 
849 	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
850 		goto out;
851 
852 	for (; ifa; ifa = ifa->ifa_next) {
853 		if (!buf) {
854 			done += sizeof(ifr);
855 			continue;
856 		}
857 		if (len < (int) sizeof(ifr))
858 			break;
859 		memset(&ifr, 0, sizeof(struct ifreq));
860 		if (ifa->ifa_label)
861 			strcpy(ifr.ifr_name, ifa->ifa_label);
862 		else
863 			strcpy(ifr.ifr_name, dev->name);
864 
865 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
866 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
867 								ifa->ifa_local;
868 
869 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
870 			done = -EFAULT;
871 			break;
872 		}
873 		buf  += sizeof(struct ifreq);
874 		len  -= sizeof(struct ifreq);
875 		done += sizeof(struct ifreq);
876 	}
877 out:
878 	return done;
879 }
880 
881 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
882 {
883 	__be32 addr = 0;
884 	struct in_device *in_dev;
885 
886 	rcu_read_lock();
887 	in_dev = __in_dev_get_rcu(dev);
888 	if (!in_dev)
889 		goto no_in_dev;
890 
891 	for_primary_ifa(in_dev) {
892 		if (ifa->ifa_scope > scope)
893 			continue;
894 		if (!dst || inet_ifa_match(dst, ifa)) {
895 			addr = ifa->ifa_local;
896 			break;
897 		}
898 		if (!addr)
899 			addr = ifa->ifa_local;
900 	} endfor_ifa(in_dev);
901 no_in_dev:
902 	rcu_read_unlock();
903 
904 	if (addr)
905 		goto out;
906 
907 	/* Not loopback addresses on loopback should be preferred
908 	   in this case. It is importnat that lo is the first interface
909 	   in dev_base list.
910 	 */
911 	read_lock(&dev_base_lock);
912 	rcu_read_lock();
913 	for (dev = dev_base; dev; dev = dev->next) {
914 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
915 			continue;
916 
917 		for_primary_ifa(in_dev) {
918 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
919 			    ifa->ifa_scope <= scope) {
920 				addr = ifa->ifa_local;
921 				goto out_unlock_both;
922 			}
923 		} endfor_ifa(in_dev);
924 	}
925 out_unlock_both:
926 	read_unlock(&dev_base_lock);
927 	rcu_read_unlock();
928 out:
929 	return addr;
930 }
931 
932 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
933 			      __be32 local, int scope)
934 {
935 	int same = 0;
936 	__be32 addr = 0;
937 
938 	for_ifa(in_dev) {
939 		if (!addr &&
940 		    (local == ifa->ifa_local || !local) &&
941 		    ifa->ifa_scope <= scope) {
942 			addr = ifa->ifa_local;
943 			if (same)
944 				break;
945 		}
946 		if (!same) {
947 			same = (!local || inet_ifa_match(local, ifa)) &&
948 				(!dst || inet_ifa_match(dst, ifa));
949 			if (same && addr) {
950 				if (local || !dst)
951 					break;
952 				/* Is the selected addr into dst subnet? */
953 				if (inet_ifa_match(addr, ifa))
954 					break;
955 				/* No, then can we use new local src? */
956 				if (ifa->ifa_scope <= scope) {
957 					addr = ifa->ifa_local;
958 					break;
959 				}
960 				/* search for large dst subnet for addr */
961 				same = 0;
962 			}
963 		}
964 	} endfor_ifa(in_dev);
965 
966 	return same? addr : 0;
967 }
968 
969 /*
970  * Confirm that local IP address exists using wildcards:
971  * - dev: only on this interface, 0=any interface
972  * - dst: only in the same subnet as dst, 0=any dst
973  * - local: address, 0=autoselect the local address
974  * - scope: maximum allowed scope value for the local address
975  */
976 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
977 {
978 	__be32 addr = 0;
979 	struct in_device *in_dev;
980 
981 	if (dev) {
982 		rcu_read_lock();
983 		if ((in_dev = __in_dev_get_rcu(dev)))
984 			addr = confirm_addr_indev(in_dev, dst, local, scope);
985 		rcu_read_unlock();
986 
987 		return addr;
988 	}
989 
990 	read_lock(&dev_base_lock);
991 	rcu_read_lock();
992 	for (dev = dev_base; dev; dev = dev->next) {
993 		if ((in_dev = __in_dev_get_rcu(dev))) {
994 			addr = confirm_addr_indev(in_dev, dst, local, scope);
995 			if (addr)
996 				break;
997 		}
998 	}
999 	rcu_read_unlock();
1000 	read_unlock(&dev_base_lock);
1001 
1002 	return addr;
1003 }
1004 
1005 /*
1006  *	Device notifier
1007  */
1008 
1009 int register_inetaddr_notifier(struct notifier_block *nb)
1010 {
1011 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1012 }
1013 
1014 int unregister_inetaddr_notifier(struct notifier_block *nb)
1015 {
1016 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1017 }
1018 
1019 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1020  * alias numbering and to create unique labels if possible.
1021 */
1022 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1023 {
1024 	struct in_ifaddr *ifa;
1025 	int named = 0;
1026 
1027 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1028 		char old[IFNAMSIZ], *dot;
1029 
1030 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1031 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1032 		if (named++ == 0)
1033 			continue;
1034 		dot = strchr(ifa->ifa_label, ':');
1035 		if (dot == NULL) {
1036 			sprintf(old, ":%d", named);
1037 			dot = old;
1038 		}
1039 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1040 			strcat(ifa->ifa_label, dot);
1041 		} else {
1042 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1043 		}
1044 	}
1045 }
1046 
1047 /* Called only under RTNL semaphore */
1048 
1049 static int inetdev_event(struct notifier_block *this, unsigned long event,
1050 			 void *ptr)
1051 {
1052 	struct net_device *dev = ptr;
1053 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1054 
1055 	ASSERT_RTNL();
1056 
1057 	if (!in_dev) {
1058 		if (event == NETDEV_REGISTER && dev == &loopback_dev) {
1059 			in_dev = inetdev_init(dev);
1060 			if (!in_dev)
1061 				panic("devinet: Failed to create loopback\n");
1062 			in_dev->cnf.no_xfrm = 1;
1063 			in_dev->cnf.no_policy = 1;
1064 		}
1065 		goto out;
1066 	}
1067 
1068 	switch (event) {
1069 	case NETDEV_REGISTER:
1070 		printk(KERN_DEBUG "inetdev_event: bug\n");
1071 		dev->ip_ptr = NULL;
1072 		break;
1073 	case NETDEV_UP:
1074 		if (dev->mtu < 68)
1075 			break;
1076 		if (dev == &loopback_dev) {
1077 			struct in_ifaddr *ifa;
1078 			if ((ifa = inet_alloc_ifa()) != NULL) {
1079 				ifa->ifa_local =
1080 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1081 				ifa->ifa_prefixlen = 8;
1082 				ifa->ifa_mask = inet_make_mask(8);
1083 				in_dev_hold(in_dev);
1084 				ifa->ifa_dev = in_dev;
1085 				ifa->ifa_scope = RT_SCOPE_HOST;
1086 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1087 				inet_insert_ifa(ifa);
1088 			}
1089 		}
1090 		ip_mc_up(in_dev);
1091 		break;
1092 	case NETDEV_DOWN:
1093 		ip_mc_down(in_dev);
1094 		break;
1095 	case NETDEV_CHANGEMTU:
1096 		if (dev->mtu >= 68)
1097 			break;
1098 		/* MTU falled under 68, disable IP */
1099 	case NETDEV_UNREGISTER:
1100 		inetdev_destroy(in_dev);
1101 		break;
1102 	case NETDEV_CHANGENAME:
1103 		/* Do not notify about label change, this event is
1104 		 * not interesting to applications using netlink.
1105 		 */
1106 		inetdev_changename(dev, in_dev);
1107 
1108 #ifdef CONFIG_SYSCTL
1109 		devinet_sysctl_unregister(&in_dev->cnf);
1110 		neigh_sysctl_unregister(in_dev->arp_parms);
1111 		neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1112 				      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1113 		devinet_sysctl_register(in_dev, &in_dev->cnf);
1114 #endif
1115 		break;
1116 	}
1117 out:
1118 	return NOTIFY_DONE;
1119 }
1120 
1121 static struct notifier_block ip_netdev_notifier = {
1122 	.notifier_call =inetdev_event,
1123 };
1124 
1125 static inline size_t inet_nlmsg_size(void)
1126 {
1127 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1128 	       + nla_total_size(4) /* IFA_ADDRESS */
1129 	       + nla_total_size(4) /* IFA_LOCAL */
1130 	       + nla_total_size(4) /* IFA_BROADCAST */
1131 	       + nla_total_size(4) /* IFA_ANYCAST */
1132 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1133 }
1134 
1135 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1136 			    u32 pid, u32 seq, int event, unsigned int flags)
1137 {
1138 	struct ifaddrmsg *ifm;
1139 	struct nlmsghdr  *nlh;
1140 
1141 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1142 	if (nlh == NULL)
1143 		return -ENOBUFS;
1144 
1145 	ifm = nlmsg_data(nlh);
1146 	ifm->ifa_family = AF_INET;
1147 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1148 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1149 	ifm->ifa_scope = ifa->ifa_scope;
1150 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1151 
1152 	if (ifa->ifa_address)
1153 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1154 
1155 	if (ifa->ifa_local)
1156 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1157 
1158 	if (ifa->ifa_broadcast)
1159 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1160 
1161 	if (ifa->ifa_anycast)
1162 		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1163 
1164 	if (ifa->ifa_label[0])
1165 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1166 
1167 	return nlmsg_end(skb, nlh);
1168 
1169 nla_put_failure:
1170 	return nlmsg_cancel(skb, nlh);
1171 }
1172 
1173 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1174 {
1175 	int idx, ip_idx;
1176 	struct net_device *dev;
1177 	struct in_device *in_dev;
1178 	struct in_ifaddr *ifa;
1179 	int s_ip_idx, s_idx = cb->args[0];
1180 
1181 	s_ip_idx = ip_idx = cb->args[1];
1182 	read_lock(&dev_base_lock);
1183 	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
1184 		if (idx < s_idx)
1185 			continue;
1186 		if (idx > s_idx)
1187 			s_ip_idx = 0;
1188 		rcu_read_lock();
1189 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
1190 			rcu_read_unlock();
1191 			continue;
1192 		}
1193 
1194 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1195 		     ifa = ifa->ifa_next, ip_idx++) {
1196 			if (ip_idx < s_ip_idx)
1197 				continue;
1198 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1199 					     cb->nlh->nlmsg_seq,
1200 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1201 				rcu_read_unlock();
1202 				goto done;
1203 			}
1204 		}
1205 		rcu_read_unlock();
1206 	}
1207 
1208 done:
1209 	read_unlock(&dev_base_lock);
1210 	cb->args[0] = idx;
1211 	cb->args[1] = ip_idx;
1212 
1213 	return skb->len;
1214 }
1215 
1216 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1217 		      u32 pid)
1218 {
1219 	struct sk_buff *skb;
1220 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1221 	int err = -ENOBUFS;
1222 
1223 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1224 	if (skb == NULL)
1225 		goto errout;
1226 
1227 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1228 	/* failure implies BUG in inet_nlmsg_size() */
1229 	BUG_ON(err < 0);
1230 
1231 	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1232 errout:
1233 	if (err < 0)
1234 		rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1235 }
1236 
1237 static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
1238 	[RTM_NEWADDR  - RTM_BASE] = { .doit	= inet_rtm_newaddr,	},
1239 	[RTM_DELADDR  - RTM_BASE] = { .doit	= inet_rtm_deladdr,	},
1240 	[RTM_GETADDR  - RTM_BASE] = { .dumpit	= inet_dump_ifaddr,	},
1241 	[RTM_NEWROUTE - RTM_BASE] = { .doit	= inet_rtm_newroute,	},
1242 	[RTM_DELROUTE - RTM_BASE] = { .doit	= inet_rtm_delroute,	},
1243 	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet_rtm_getroute,
1244 				      .dumpit	= inet_dump_fib,	},
1245 #ifdef CONFIG_IP_MULTIPLE_TABLES
1246 	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= fib4_rules_dump,	},
1247 #endif
1248 };
1249 
1250 #ifdef CONFIG_SYSCTL
1251 
1252 void inet_forward_change(void)
1253 {
1254 	struct net_device *dev;
1255 	int on = ipv4_devconf.forwarding;
1256 
1257 	ipv4_devconf.accept_redirects = !on;
1258 	ipv4_devconf_dflt.forwarding = on;
1259 
1260 	read_lock(&dev_base_lock);
1261 	for (dev = dev_base; dev; dev = dev->next) {
1262 		struct in_device *in_dev;
1263 		rcu_read_lock();
1264 		in_dev = __in_dev_get_rcu(dev);
1265 		if (in_dev)
1266 			in_dev->cnf.forwarding = on;
1267 		rcu_read_unlock();
1268 	}
1269 	read_unlock(&dev_base_lock);
1270 
1271 	rt_cache_flush(0);
1272 }
1273 
1274 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1275 				  struct file* filp, void __user *buffer,
1276 				  size_t *lenp, loff_t *ppos)
1277 {
1278 	int *valp = ctl->data;
1279 	int val = *valp;
1280 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1281 
1282 	if (write && *valp != val) {
1283 		if (valp == &ipv4_devconf.forwarding)
1284 			inet_forward_change();
1285 		else if (valp != &ipv4_devconf_dflt.forwarding)
1286 			rt_cache_flush(0);
1287 	}
1288 
1289 	return ret;
1290 }
1291 
1292 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1293 			 struct file* filp, void __user *buffer,
1294 			 size_t *lenp, loff_t *ppos)
1295 {
1296 	int *valp = ctl->data;
1297 	int val = *valp;
1298 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1299 
1300 	if (write && *valp != val)
1301 		rt_cache_flush(0);
1302 
1303 	return ret;
1304 }
1305 
1306 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1307 				  void __user *oldval, size_t __user *oldlenp,
1308 				  void __user *newval, size_t newlen)
1309 {
1310 	int *valp = table->data;
1311 	int new;
1312 
1313 	if (!newval || !newlen)
1314 		return 0;
1315 
1316 	if (newlen != sizeof(int))
1317 		return -EINVAL;
1318 
1319 	if (get_user(new, (int __user *)newval))
1320 		return -EFAULT;
1321 
1322 	if (new == *valp)
1323 		return 0;
1324 
1325 	if (oldval && oldlenp) {
1326 		size_t len;
1327 
1328 		if (get_user(len, oldlenp))
1329 			return -EFAULT;
1330 
1331 		if (len) {
1332 			if (len > table->maxlen)
1333 				len = table->maxlen;
1334 			if (copy_to_user(oldval, valp, len))
1335 				return -EFAULT;
1336 			if (put_user(len, oldlenp))
1337 				return -EFAULT;
1338 		}
1339 	}
1340 
1341 	*valp = new;
1342 	rt_cache_flush(0);
1343 	return 1;
1344 }
1345 
1346 
1347 static struct devinet_sysctl_table {
1348 	struct ctl_table_header *sysctl_header;
1349 	ctl_table		devinet_vars[__NET_IPV4_CONF_MAX];
1350 	ctl_table		devinet_dev[2];
1351 	ctl_table		devinet_conf_dir[2];
1352 	ctl_table		devinet_proto_dir[2];
1353 	ctl_table		devinet_root_dir[2];
1354 } devinet_sysctl = {
1355 	.devinet_vars = {
1356 		{
1357 			.ctl_name	= NET_IPV4_CONF_FORWARDING,
1358 			.procname	= "forwarding",
1359 			.data		= &ipv4_devconf.forwarding,
1360 			.maxlen		= sizeof(int),
1361 			.mode		= 0644,
1362 			.proc_handler	= &devinet_sysctl_forward,
1363 		},
1364 		{
1365 			.ctl_name	= NET_IPV4_CONF_MC_FORWARDING,
1366 			.procname	= "mc_forwarding",
1367 			.data		= &ipv4_devconf.mc_forwarding,
1368 			.maxlen		= sizeof(int),
1369 			.mode		= 0444,
1370 			.proc_handler	= &proc_dointvec,
1371 		},
1372 		{
1373 			.ctl_name	= NET_IPV4_CONF_ACCEPT_REDIRECTS,
1374 			.procname	= "accept_redirects",
1375 			.data		= &ipv4_devconf.accept_redirects,
1376 			.maxlen		= sizeof(int),
1377 			.mode		= 0644,
1378 			.proc_handler	= &proc_dointvec,
1379 		},
1380 		{
1381 			.ctl_name	= NET_IPV4_CONF_SECURE_REDIRECTS,
1382 			.procname	= "secure_redirects",
1383 			.data		= &ipv4_devconf.secure_redirects,
1384 			.maxlen		= sizeof(int),
1385 			.mode		= 0644,
1386 			.proc_handler	= &proc_dointvec,
1387 		},
1388 		{
1389 			.ctl_name	= NET_IPV4_CONF_SHARED_MEDIA,
1390 			.procname	= "shared_media",
1391 			.data		= &ipv4_devconf.shared_media,
1392 			.maxlen		= sizeof(int),
1393 			.mode		= 0644,
1394 			.proc_handler	= &proc_dointvec,
1395 		},
1396 		{
1397 			.ctl_name	= NET_IPV4_CONF_RP_FILTER,
1398 			.procname	= "rp_filter",
1399 			.data		= &ipv4_devconf.rp_filter,
1400 			.maxlen		= sizeof(int),
1401 			.mode		= 0644,
1402 			.proc_handler	= &proc_dointvec,
1403 		},
1404 		{
1405 			.ctl_name	= NET_IPV4_CONF_SEND_REDIRECTS,
1406 			.procname	= "send_redirects",
1407 			.data		= &ipv4_devconf.send_redirects,
1408 			.maxlen		= sizeof(int),
1409 			.mode		= 0644,
1410 			.proc_handler	= &proc_dointvec,
1411 		},
1412 		{
1413 			.ctl_name	= NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,
1414 			.procname	= "accept_source_route",
1415 			.data		= &ipv4_devconf.accept_source_route,
1416 			.maxlen		= sizeof(int),
1417 			.mode		= 0644,
1418 			.proc_handler	= &proc_dointvec,
1419 		},
1420 		{
1421 			.ctl_name	= NET_IPV4_CONF_PROXY_ARP,
1422 			.procname	= "proxy_arp",
1423 			.data		= &ipv4_devconf.proxy_arp,
1424 			.maxlen		= sizeof(int),
1425 			.mode		= 0644,
1426 			.proc_handler	= &proc_dointvec,
1427 		},
1428 		{
1429 			.ctl_name	= NET_IPV4_CONF_MEDIUM_ID,
1430 			.procname	= "medium_id",
1431 			.data		= &ipv4_devconf.medium_id,
1432 			.maxlen		= sizeof(int),
1433 			.mode		= 0644,
1434 			.proc_handler	= &proc_dointvec,
1435 		},
1436 		{
1437 			.ctl_name	= NET_IPV4_CONF_BOOTP_RELAY,
1438 			.procname	= "bootp_relay",
1439 			.data		= &ipv4_devconf.bootp_relay,
1440 			.maxlen		= sizeof(int),
1441 			.mode		= 0644,
1442 			.proc_handler	= &proc_dointvec,
1443 		},
1444 		{
1445 			.ctl_name	= NET_IPV4_CONF_LOG_MARTIANS,
1446 			.procname	= "log_martians",
1447 			.data		= &ipv4_devconf.log_martians,
1448 			.maxlen		= sizeof(int),
1449 			.mode		= 0644,
1450 			.proc_handler	= &proc_dointvec,
1451 		},
1452 		{
1453 			.ctl_name	= NET_IPV4_CONF_TAG,
1454 			.procname	= "tag",
1455 			.data		= &ipv4_devconf.tag,
1456 			.maxlen		= sizeof(int),
1457 			.mode		= 0644,
1458 			.proc_handler	= &proc_dointvec,
1459 		},
1460 		{
1461 			.ctl_name	= NET_IPV4_CONF_ARPFILTER,
1462 			.procname	= "arp_filter",
1463 			.data		= &ipv4_devconf.arp_filter,
1464 			.maxlen		= sizeof(int),
1465 			.mode		= 0644,
1466 			.proc_handler	= &proc_dointvec,
1467 		},
1468 		{
1469 			.ctl_name	= NET_IPV4_CONF_ARP_ANNOUNCE,
1470 			.procname	= "arp_announce",
1471 			.data		= &ipv4_devconf.arp_announce,
1472 			.maxlen		= sizeof(int),
1473 			.mode		= 0644,
1474 			.proc_handler	= &proc_dointvec,
1475 		},
1476 		{
1477 			.ctl_name	= NET_IPV4_CONF_ARP_IGNORE,
1478 			.procname	= "arp_ignore",
1479 			.data		= &ipv4_devconf.arp_ignore,
1480 			.maxlen		= sizeof(int),
1481 			.mode		= 0644,
1482 			.proc_handler	= &proc_dointvec,
1483 		},
1484 		{
1485 			.ctl_name	= NET_IPV4_CONF_ARP_ACCEPT,
1486 			.procname	= "arp_accept",
1487 			.data		= &ipv4_devconf.arp_accept,
1488 			.maxlen		= sizeof(int),
1489 			.mode		= 0644,
1490 			.proc_handler	= &proc_dointvec,
1491 		},
1492 		{
1493 			.ctl_name	= NET_IPV4_CONF_NOXFRM,
1494 			.procname	= "disable_xfrm",
1495 			.data		= &ipv4_devconf.no_xfrm,
1496 			.maxlen		= sizeof(int),
1497 			.mode		= 0644,
1498 			.proc_handler	= &ipv4_doint_and_flush,
1499 			.strategy	= &ipv4_doint_and_flush_strategy,
1500 		},
1501 		{
1502 			.ctl_name	= NET_IPV4_CONF_NOPOLICY,
1503 			.procname	= "disable_policy",
1504 			.data		= &ipv4_devconf.no_policy,
1505 			.maxlen		= sizeof(int),
1506 			.mode		= 0644,
1507 			.proc_handler	= &ipv4_doint_and_flush,
1508 			.strategy	= &ipv4_doint_and_flush_strategy,
1509 		},
1510 		{
1511 			.ctl_name	= NET_IPV4_CONF_FORCE_IGMP_VERSION,
1512 			.procname	= "force_igmp_version",
1513 			.data		= &ipv4_devconf.force_igmp_version,
1514 			.maxlen		= sizeof(int),
1515 			.mode		= 0644,
1516 			.proc_handler	= &ipv4_doint_and_flush,
1517 			.strategy	= &ipv4_doint_and_flush_strategy,
1518 		},
1519 		{
1520 			.ctl_name	= NET_IPV4_CONF_PROMOTE_SECONDARIES,
1521 			.procname	= "promote_secondaries",
1522 			.data		= &ipv4_devconf.promote_secondaries,
1523 			.maxlen		= sizeof(int),
1524 			.mode		= 0644,
1525 			.proc_handler	= &ipv4_doint_and_flush,
1526 			.strategy	= &ipv4_doint_and_flush_strategy,
1527 		},
1528 	},
1529 	.devinet_dev = {
1530 		{
1531 			.ctl_name	= NET_PROTO_CONF_ALL,
1532 			.procname	= "all",
1533 			.mode		= 0555,
1534 			.child		= devinet_sysctl.devinet_vars,
1535 		},
1536 	},
1537 	.devinet_conf_dir = {
1538 	        {
1539 			.ctl_name	= NET_IPV4_CONF,
1540 			.procname	= "conf",
1541 			.mode		= 0555,
1542 			.child		= devinet_sysctl.devinet_dev,
1543 		},
1544 	},
1545 	.devinet_proto_dir = {
1546 		{
1547 			.ctl_name	= NET_IPV4,
1548 			.procname	= "ipv4",
1549 			.mode		= 0555,
1550 			.child 		= devinet_sysctl.devinet_conf_dir,
1551 		},
1552 	},
1553 	.devinet_root_dir = {
1554 		{
1555 			.ctl_name	= CTL_NET,
1556 			.procname 	= "net",
1557 			.mode		= 0555,
1558 			.child		= devinet_sysctl.devinet_proto_dir,
1559 		},
1560 	},
1561 };
1562 
1563 static void devinet_sysctl_register(struct in_device *in_dev,
1564 				    struct ipv4_devconf *p)
1565 {
1566 	int i;
1567 	struct net_device *dev = in_dev ? in_dev->dev : NULL;
1568 	struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1569 						 GFP_KERNEL);
1570 	char *dev_name = NULL;
1571 
1572 	if (!t)
1573 		return;
1574 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1575 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1576 		t->devinet_vars[i].de = NULL;
1577 	}
1578 
1579 	if (dev) {
1580 		dev_name = dev->name;
1581 		t->devinet_dev[0].ctl_name = dev->ifindex;
1582 	} else {
1583 		dev_name = "default";
1584 		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1585 	}
1586 
1587 	/*
1588 	 * Make a copy of dev_name, because '.procname' is regarded as const
1589 	 * by sysctl and we wouldn't want anyone to change it under our feet
1590 	 * (see SIOCSIFNAME).
1591 	 */
1592 	dev_name = kstrdup(dev_name, GFP_KERNEL);
1593 	if (!dev_name)
1594 	    goto free;
1595 
1596 	t->devinet_dev[0].procname    = dev_name;
1597 	t->devinet_dev[0].child	      = t->devinet_vars;
1598 	t->devinet_dev[0].de	      = NULL;
1599 	t->devinet_conf_dir[0].child  = t->devinet_dev;
1600 	t->devinet_conf_dir[0].de     = NULL;
1601 	t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1602 	t->devinet_proto_dir[0].de    = NULL;
1603 	t->devinet_root_dir[0].child  = t->devinet_proto_dir;
1604 	t->devinet_root_dir[0].de     = NULL;
1605 
1606 	t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
1607 	if (!t->sysctl_header)
1608 	    goto free_procname;
1609 
1610 	p->sysctl = t;
1611 	return;
1612 
1613 	/* error path */
1614  free_procname:
1615 	kfree(dev_name);
1616  free:
1617 	kfree(t);
1618 	return;
1619 }
1620 
1621 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1622 {
1623 	if (p->sysctl) {
1624 		struct devinet_sysctl_table *t = p->sysctl;
1625 		p->sysctl = NULL;
1626 		unregister_sysctl_table(t->sysctl_header);
1627 		kfree(t->devinet_dev[0].procname);
1628 		kfree(t);
1629 	}
1630 }
1631 #endif
1632 
1633 void __init devinet_init(void)
1634 {
1635 	register_gifconf(PF_INET, inet_gifconf);
1636 	register_netdevice_notifier(&ip_netdev_notifier);
1637 	rtnetlink_links[PF_INET] = inet_rtnetlink_table;
1638 #ifdef CONFIG_SYSCTL
1639 	devinet_sysctl.sysctl_header =
1640 		register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
1641 	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1642 #endif
1643 }
1644 
1645 EXPORT_SYMBOL(in_dev_finish_destroy);
1646 EXPORT_SYMBOL(inet_select_addr);
1647 EXPORT_SYMBOL(inetdev_by_index);
1648 EXPORT_SYMBOL(register_inetaddr_notifier);
1649 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1650