xref: /openbmc/linux/net/ipv4/devinet.c (revision e868d61272caa648214046a096e5a6bfc068dc8c)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  *	Derived from the IP parts of dev.c 1.0.19
12  * 		Authors:	Ross Biro
13  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *	Additional Authors:
17  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *	Changes:
21  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
22  *					lists.
23  *		Cyrus Durgin:		updated for kmod
24  *		Matthias Andree:	in devinet_ioctl, compare label and
25  *					address (4.4BSD alias style support),
26  *					fall back to comparing just the label
27  *					if no match found.
28  */
29 
30 
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 
66 struct ipv4_devconf ipv4_devconf = {
67 	.accept_redirects = 1,
68 	.send_redirects =  1,
69 	.secure_redirects = 1,
70 	.shared_media =	  1,
71 };
72 
73 static struct ipv4_devconf ipv4_devconf_dflt = {
74 	.accept_redirects =  1,
75 	.send_redirects =    1,
76 	.secure_redirects =  1,
77 	.shared_media =	     1,
78 	.accept_source_route = 1,
79 };
80 
81 static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
82 	[IFA_LOCAL]     	= { .type = NLA_U32 },
83 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
84 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
85 	[IFA_ANYCAST]   	= { .type = NLA_U32 },
86 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
87 };
88 
89 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
90 
91 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
92 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
93 			 int destroy);
94 #ifdef CONFIG_SYSCTL
95 static void devinet_sysctl_register(struct in_device *in_dev,
96 				    struct ipv4_devconf *p);
97 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
98 #endif
99 
100 /* Locks all the inet devices. */
101 
102 static struct in_ifaddr *inet_alloc_ifa(void)
103 {
104 	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
105 
106 	if (ifa) {
107 		INIT_RCU_HEAD(&ifa->rcu_head);
108 	}
109 
110 	return ifa;
111 }
112 
113 static void inet_rcu_free_ifa(struct rcu_head *head)
114 {
115 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
116 	if (ifa->ifa_dev)
117 		in_dev_put(ifa->ifa_dev);
118 	kfree(ifa);
119 }
120 
121 static inline void inet_free_ifa(struct in_ifaddr *ifa)
122 {
123 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
124 }
125 
126 void in_dev_finish_destroy(struct in_device *idev)
127 {
128 	struct net_device *dev = idev->dev;
129 
130 	BUG_TRAP(!idev->ifa_list);
131 	BUG_TRAP(!idev->mc_list);
132 #ifdef NET_REFCNT_DEBUG
133 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
134 	       idev, dev ? dev->name : "NIL");
135 #endif
136 	dev_put(dev);
137 	if (!idev->dead)
138 		printk("Freeing alive in_device %p\n", idev);
139 	else {
140 		kfree(idev);
141 	}
142 }
143 
144 struct in_device *inetdev_init(struct net_device *dev)
145 {
146 	struct in_device *in_dev;
147 
148 	ASSERT_RTNL();
149 
150 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
151 	if (!in_dev)
152 		goto out;
153 	INIT_RCU_HEAD(&in_dev->rcu_head);
154 	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
155 	in_dev->cnf.sysctl = NULL;
156 	in_dev->dev = dev;
157 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
158 		goto out_kfree;
159 	/* Reference in_dev->dev */
160 	dev_hold(dev);
161 #ifdef CONFIG_SYSCTL
162 	neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
163 			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
164 #endif
165 
166 	/* Account for reference dev->ip_ptr (below) */
167 	in_dev_hold(in_dev);
168 
169 #ifdef CONFIG_SYSCTL
170 	devinet_sysctl_register(in_dev, &in_dev->cnf);
171 #endif
172 	ip_mc_init_dev(in_dev);
173 	if (dev->flags & IFF_UP)
174 		ip_mc_up(in_dev);
175 
176 	/* we can receive as soon as ip_ptr is set -- do this last */
177 	rcu_assign_pointer(dev->ip_ptr, in_dev);
178 out:
179 	return in_dev;
180 out_kfree:
181 	kfree(in_dev);
182 	in_dev = NULL;
183 	goto out;
184 }
185 
186 static void in_dev_rcu_put(struct rcu_head *head)
187 {
188 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
189 	in_dev_put(idev);
190 }
191 
192 static void inetdev_destroy(struct in_device *in_dev)
193 {
194 	struct in_ifaddr *ifa;
195 	struct net_device *dev;
196 
197 	ASSERT_RTNL();
198 
199 	dev = in_dev->dev;
200 	if (dev == &loopback_dev)
201 		return;
202 
203 	in_dev->dead = 1;
204 
205 	ip_mc_destroy_dev(in_dev);
206 
207 	while ((ifa = in_dev->ifa_list) != NULL) {
208 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
209 		inet_free_ifa(ifa);
210 	}
211 
212 #ifdef CONFIG_SYSCTL
213 	devinet_sysctl_unregister(&in_dev->cnf);
214 #endif
215 
216 	dev->ip_ptr = NULL;
217 
218 #ifdef CONFIG_SYSCTL
219 	neigh_sysctl_unregister(in_dev->arp_parms);
220 #endif
221 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
222 	arp_ifdown(dev);
223 
224 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
225 }
226 
227 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
228 {
229 	rcu_read_lock();
230 	for_primary_ifa(in_dev) {
231 		if (inet_ifa_match(a, ifa)) {
232 			if (!b || inet_ifa_match(b, ifa)) {
233 				rcu_read_unlock();
234 				return 1;
235 			}
236 		}
237 	} endfor_ifa(in_dev);
238 	rcu_read_unlock();
239 	return 0;
240 }
241 
242 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
243 			 int destroy, struct nlmsghdr *nlh, u32 pid)
244 {
245 	struct in_ifaddr *promote = NULL;
246 	struct in_ifaddr *ifa, *ifa1 = *ifap;
247 	struct in_ifaddr *last_prim = in_dev->ifa_list;
248 	struct in_ifaddr *prev_prom = NULL;
249 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
250 
251 	ASSERT_RTNL();
252 
253 	/* 1. Deleting primary ifaddr forces deletion all secondaries
254 	 * unless alias promotion is set
255 	 **/
256 
257 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
258 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
259 
260 		while ((ifa = *ifap1) != NULL) {
261 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
262 			    ifa1->ifa_scope <= ifa->ifa_scope)
263 				last_prim = ifa;
264 
265 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
266 			    ifa1->ifa_mask != ifa->ifa_mask ||
267 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
268 				ifap1 = &ifa->ifa_next;
269 				prev_prom = ifa;
270 				continue;
271 			}
272 
273 			if (!do_promote) {
274 				*ifap1 = ifa->ifa_next;
275 
276 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
277 				blocking_notifier_call_chain(&inetaddr_chain,
278 						NETDEV_DOWN, ifa);
279 				inet_free_ifa(ifa);
280 			} else {
281 				promote = ifa;
282 				break;
283 			}
284 		}
285 	}
286 
287 	/* 2. Unlink it */
288 
289 	*ifap = ifa1->ifa_next;
290 
291 	/* 3. Announce address deletion */
292 
293 	/* Send message first, then call notifier.
294 	   At first sight, FIB update triggered by notifier
295 	   will refer to already deleted ifaddr, that could confuse
296 	   netlink listeners. It is not true: look, gated sees
297 	   that route deleted and if it still thinks that ifaddr
298 	   is valid, it will try to restore deleted routes... Grr.
299 	   So that, this order is correct.
300 	 */
301 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
302 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
303 
304 	if (promote) {
305 
306 		if (prev_prom) {
307 			prev_prom->ifa_next = promote->ifa_next;
308 			promote->ifa_next = last_prim->ifa_next;
309 			last_prim->ifa_next = promote;
310 		}
311 
312 		promote->ifa_flags &= ~IFA_F_SECONDARY;
313 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
314 		blocking_notifier_call_chain(&inetaddr_chain,
315 				NETDEV_UP, promote);
316 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
317 			if (ifa1->ifa_mask != ifa->ifa_mask ||
318 			    !inet_ifa_match(ifa1->ifa_address, ifa))
319 					continue;
320 			fib_add_ifaddr(ifa);
321 		}
322 
323 	}
324 	if (destroy) {
325 		inet_free_ifa(ifa1);
326 
327 		if (!in_dev->ifa_list)
328 			inetdev_destroy(in_dev);
329 	}
330 }
331 
332 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
333 			 int destroy)
334 {
335 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
336 }
337 
338 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
339 			     u32 pid)
340 {
341 	struct in_device *in_dev = ifa->ifa_dev;
342 	struct in_ifaddr *ifa1, **ifap, **last_primary;
343 
344 	ASSERT_RTNL();
345 
346 	if (!ifa->ifa_local) {
347 		inet_free_ifa(ifa);
348 		return 0;
349 	}
350 
351 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
352 	last_primary = &in_dev->ifa_list;
353 
354 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
355 	     ifap = &ifa1->ifa_next) {
356 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
357 		    ifa->ifa_scope <= ifa1->ifa_scope)
358 			last_primary = &ifa1->ifa_next;
359 		if (ifa1->ifa_mask == ifa->ifa_mask &&
360 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
361 			if (ifa1->ifa_local == ifa->ifa_local) {
362 				inet_free_ifa(ifa);
363 				return -EEXIST;
364 			}
365 			if (ifa1->ifa_scope != ifa->ifa_scope) {
366 				inet_free_ifa(ifa);
367 				return -EINVAL;
368 			}
369 			ifa->ifa_flags |= IFA_F_SECONDARY;
370 		}
371 	}
372 
373 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
374 		net_srandom(ifa->ifa_local);
375 		ifap = last_primary;
376 	}
377 
378 	ifa->ifa_next = *ifap;
379 	*ifap = ifa;
380 
381 	/* Send message first, then call notifier.
382 	   Notifier will trigger FIB update, so that
383 	   listeners of netlink will know about new ifaddr */
384 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
385 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
386 
387 	return 0;
388 }
389 
390 static int inet_insert_ifa(struct in_ifaddr *ifa)
391 {
392 	return __inet_insert_ifa(ifa, NULL, 0);
393 }
394 
395 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
396 {
397 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
398 
399 	ASSERT_RTNL();
400 
401 	if (!in_dev) {
402 		in_dev = inetdev_init(dev);
403 		if (!in_dev) {
404 			inet_free_ifa(ifa);
405 			return -ENOBUFS;
406 		}
407 	}
408 	if (ifa->ifa_dev != in_dev) {
409 		BUG_TRAP(!ifa->ifa_dev);
410 		in_dev_hold(in_dev);
411 		ifa->ifa_dev = in_dev;
412 	}
413 	if (LOOPBACK(ifa->ifa_local))
414 		ifa->ifa_scope = RT_SCOPE_HOST;
415 	return inet_insert_ifa(ifa);
416 }
417 
418 struct in_device *inetdev_by_index(int ifindex)
419 {
420 	struct net_device *dev;
421 	struct in_device *in_dev = NULL;
422 	read_lock(&dev_base_lock);
423 	dev = __dev_get_by_index(ifindex);
424 	if (dev)
425 		in_dev = in_dev_get(dev);
426 	read_unlock(&dev_base_lock);
427 	return in_dev;
428 }
429 
430 /* Called only from RTNL semaphored context. No locks. */
431 
432 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
433 				    __be32 mask)
434 {
435 	ASSERT_RTNL();
436 
437 	for_primary_ifa(in_dev) {
438 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
439 			return ifa;
440 	} endfor_ifa(in_dev);
441 	return NULL;
442 }
443 
444 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
445 {
446 	struct nlattr *tb[IFA_MAX+1];
447 	struct in_device *in_dev;
448 	struct ifaddrmsg *ifm;
449 	struct in_ifaddr *ifa, **ifap;
450 	int err = -EINVAL;
451 
452 	ASSERT_RTNL();
453 
454 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
455 	if (err < 0)
456 		goto errout;
457 
458 	ifm = nlmsg_data(nlh);
459 	in_dev = inetdev_by_index(ifm->ifa_index);
460 	if (in_dev == NULL) {
461 		err = -ENODEV;
462 		goto errout;
463 	}
464 
465 	__in_dev_put(in_dev);
466 
467 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
468 	     ifap = &ifa->ifa_next) {
469 		if (tb[IFA_LOCAL] &&
470 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
471 			continue;
472 
473 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
474 			continue;
475 
476 		if (tb[IFA_ADDRESS] &&
477 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
478 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
479 			continue;
480 
481 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
482 		return 0;
483 	}
484 
485 	err = -EADDRNOTAVAIL;
486 errout:
487 	return err;
488 }
489 
490 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
491 {
492 	struct nlattr *tb[IFA_MAX+1];
493 	struct in_ifaddr *ifa;
494 	struct ifaddrmsg *ifm;
495 	struct net_device *dev;
496 	struct in_device *in_dev;
497 	int err = -EINVAL;
498 
499 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
500 	if (err < 0)
501 		goto errout;
502 
503 	ifm = nlmsg_data(nlh);
504 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
505 		err = -EINVAL;
506 		goto errout;
507 	}
508 
509 	dev = __dev_get_by_index(ifm->ifa_index);
510 	if (dev == NULL) {
511 		err = -ENODEV;
512 		goto errout;
513 	}
514 
515 	in_dev = __in_dev_get_rtnl(dev);
516 	if (in_dev == NULL) {
517 		in_dev = inetdev_init(dev);
518 		if (in_dev == NULL) {
519 			err = -ENOBUFS;
520 			goto errout;
521 		}
522 	}
523 
524 	ifa = inet_alloc_ifa();
525 	if (ifa == NULL) {
526 		/*
527 		 * A potential indev allocation can be left alive, it stays
528 		 * assigned to its device and is destroy with it.
529 		 */
530 		err = -ENOBUFS;
531 		goto errout;
532 	}
533 
534 	in_dev_hold(in_dev);
535 
536 	if (tb[IFA_ADDRESS] == NULL)
537 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
538 
539 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
540 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
541 	ifa->ifa_flags = ifm->ifa_flags;
542 	ifa->ifa_scope = ifm->ifa_scope;
543 	ifa->ifa_dev = in_dev;
544 
545 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
546 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
547 
548 	if (tb[IFA_BROADCAST])
549 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
550 
551 	if (tb[IFA_ANYCAST])
552 		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
553 
554 	if (tb[IFA_LABEL])
555 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
556 	else
557 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
558 
559 	return ifa;
560 
561 errout:
562 	return ERR_PTR(err);
563 }
564 
565 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
566 {
567 	struct in_ifaddr *ifa;
568 
569 	ASSERT_RTNL();
570 
571 	ifa = rtm_to_ifaddr(nlh);
572 	if (IS_ERR(ifa))
573 		return PTR_ERR(ifa);
574 
575 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
576 }
577 
578 /*
579  *	Determine a default network mask, based on the IP address.
580  */
581 
582 static __inline__ int inet_abc_len(__be32 addr)
583 {
584 	int rc = -1;	/* Something else, probably a multicast. */
585 
586 	if (ZERONET(addr))
587 		rc = 0;
588 	else {
589 		__u32 haddr = ntohl(addr);
590 
591 		if (IN_CLASSA(haddr))
592 			rc = 8;
593 		else if (IN_CLASSB(haddr))
594 			rc = 16;
595 		else if (IN_CLASSC(haddr))
596 			rc = 24;
597 	}
598 
599 	return rc;
600 }
601 
602 
603 int devinet_ioctl(unsigned int cmd, void __user *arg)
604 {
605 	struct ifreq ifr;
606 	struct sockaddr_in sin_orig;
607 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
608 	struct in_device *in_dev;
609 	struct in_ifaddr **ifap = NULL;
610 	struct in_ifaddr *ifa = NULL;
611 	struct net_device *dev;
612 	char *colon;
613 	int ret = -EFAULT;
614 	int tryaddrmatch = 0;
615 
616 	/*
617 	 *	Fetch the caller's info block into kernel space
618 	 */
619 
620 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
621 		goto out;
622 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
623 
624 	/* save original address for comparison */
625 	memcpy(&sin_orig, sin, sizeof(*sin));
626 
627 	colon = strchr(ifr.ifr_name, ':');
628 	if (colon)
629 		*colon = 0;
630 
631 #ifdef CONFIG_KMOD
632 	dev_load(ifr.ifr_name);
633 #endif
634 
635 	switch (cmd) {
636 	case SIOCGIFADDR:	/* Get interface address */
637 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
638 	case SIOCGIFDSTADDR:	/* Get the destination address */
639 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
640 		/* Note that these ioctls will not sleep,
641 		   so that we do not impose a lock.
642 		   One day we will be forced to put shlock here (I mean SMP)
643 		 */
644 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
645 		memset(sin, 0, sizeof(*sin));
646 		sin->sin_family = AF_INET;
647 		break;
648 
649 	case SIOCSIFFLAGS:
650 		ret = -EACCES;
651 		if (!capable(CAP_NET_ADMIN))
652 			goto out;
653 		break;
654 	case SIOCSIFADDR:	/* Set interface address (and family) */
655 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
656 	case SIOCSIFDSTADDR:	/* Set the destination address */
657 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
658 		ret = -EACCES;
659 		if (!capable(CAP_NET_ADMIN))
660 			goto out;
661 		ret = -EINVAL;
662 		if (sin->sin_family != AF_INET)
663 			goto out;
664 		break;
665 	default:
666 		ret = -EINVAL;
667 		goto out;
668 	}
669 
670 	rtnl_lock();
671 
672 	ret = -ENODEV;
673 	if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
674 		goto done;
675 
676 	if (colon)
677 		*colon = ':';
678 
679 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
680 		if (tryaddrmatch) {
681 			/* Matthias Andree */
682 			/* compare label and address (4.4BSD style) */
683 			/* note: we only do this for a limited set of ioctls
684 			   and only if the original address family was AF_INET.
685 			   This is checked above. */
686 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
687 			     ifap = &ifa->ifa_next) {
688 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
689 				    sin_orig.sin_addr.s_addr ==
690 							ifa->ifa_address) {
691 					break; /* found */
692 				}
693 			}
694 		}
695 		/* we didn't get a match, maybe the application is
696 		   4.3BSD-style and passed in junk so we fall back to
697 		   comparing just the label */
698 		if (!ifa) {
699 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
700 			     ifap = &ifa->ifa_next)
701 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
702 					break;
703 		}
704 	}
705 
706 	ret = -EADDRNOTAVAIL;
707 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
708 		goto done;
709 
710 	switch (cmd) {
711 	case SIOCGIFADDR:	/* Get interface address */
712 		sin->sin_addr.s_addr = ifa->ifa_local;
713 		goto rarok;
714 
715 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
716 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
717 		goto rarok;
718 
719 	case SIOCGIFDSTADDR:	/* Get the destination address */
720 		sin->sin_addr.s_addr = ifa->ifa_address;
721 		goto rarok;
722 
723 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
724 		sin->sin_addr.s_addr = ifa->ifa_mask;
725 		goto rarok;
726 
727 	case SIOCSIFFLAGS:
728 		if (colon) {
729 			ret = -EADDRNOTAVAIL;
730 			if (!ifa)
731 				break;
732 			ret = 0;
733 			if (!(ifr.ifr_flags & IFF_UP))
734 				inet_del_ifa(in_dev, ifap, 1);
735 			break;
736 		}
737 		ret = dev_change_flags(dev, ifr.ifr_flags);
738 		break;
739 
740 	case SIOCSIFADDR:	/* Set interface address (and family) */
741 		ret = -EINVAL;
742 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
743 			break;
744 
745 		if (!ifa) {
746 			ret = -ENOBUFS;
747 			if ((ifa = inet_alloc_ifa()) == NULL)
748 				break;
749 			if (colon)
750 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
751 			else
752 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
753 		} else {
754 			ret = 0;
755 			if (ifa->ifa_local == sin->sin_addr.s_addr)
756 				break;
757 			inet_del_ifa(in_dev, ifap, 0);
758 			ifa->ifa_broadcast = 0;
759 			ifa->ifa_anycast = 0;
760 		}
761 
762 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
763 
764 		if (!(dev->flags & IFF_POINTOPOINT)) {
765 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
766 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
767 			if ((dev->flags & IFF_BROADCAST) &&
768 			    ifa->ifa_prefixlen < 31)
769 				ifa->ifa_broadcast = ifa->ifa_address |
770 						     ~ifa->ifa_mask;
771 		} else {
772 			ifa->ifa_prefixlen = 32;
773 			ifa->ifa_mask = inet_make_mask(32);
774 		}
775 		ret = inet_set_ifa(dev, ifa);
776 		break;
777 
778 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
779 		ret = 0;
780 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
781 			inet_del_ifa(in_dev, ifap, 0);
782 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
783 			inet_insert_ifa(ifa);
784 		}
785 		break;
786 
787 	case SIOCSIFDSTADDR:	/* Set the destination address */
788 		ret = 0;
789 		if (ifa->ifa_address == sin->sin_addr.s_addr)
790 			break;
791 		ret = -EINVAL;
792 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
793 			break;
794 		ret = 0;
795 		inet_del_ifa(in_dev, ifap, 0);
796 		ifa->ifa_address = sin->sin_addr.s_addr;
797 		inet_insert_ifa(ifa);
798 		break;
799 
800 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
801 
802 		/*
803 		 *	The mask we set must be legal.
804 		 */
805 		ret = -EINVAL;
806 		if (bad_mask(sin->sin_addr.s_addr, 0))
807 			break;
808 		ret = 0;
809 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
810 			__be32 old_mask = ifa->ifa_mask;
811 			inet_del_ifa(in_dev, ifap, 0);
812 			ifa->ifa_mask = sin->sin_addr.s_addr;
813 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
814 
815 			/* See if current broadcast address matches
816 			 * with current netmask, then recalculate
817 			 * the broadcast address. Otherwise it's a
818 			 * funny address, so don't touch it since
819 			 * the user seems to know what (s)he's doing...
820 			 */
821 			if ((dev->flags & IFF_BROADCAST) &&
822 			    (ifa->ifa_prefixlen < 31) &&
823 			    (ifa->ifa_broadcast ==
824 			     (ifa->ifa_local|~old_mask))) {
825 				ifa->ifa_broadcast = (ifa->ifa_local |
826 						      ~sin->sin_addr.s_addr);
827 			}
828 			inet_insert_ifa(ifa);
829 		}
830 		break;
831 	}
832 done:
833 	rtnl_unlock();
834 out:
835 	return ret;
836 rarok:
837 	rtnl_unlock();
838 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
839 	goto out;
840 }
841 
842 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
843 {
844 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
845 	struct in_ifaddr *ifa;
846 	struct ifreq ifr;
847 	int done = 0;
848 
849 	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
850 		goto out;
851 
852 	for (; ifa; ifa = ifa->ifa_next) {
853 		if (!buf) {
854 			done += sizeof(ifr);
855 			continue;
856 		}
857 		if (len < (int) sizeof(ifr))
858 			break;
859 		memset(&ifr, 0, sizeof(struct ifreq));
860 		if (ifa->ifa_label)
861 			strcpy(ifr.ifr_name, ifa->ifa_label);
862 		else
863 			strcpy(ifr.ifr_name, dev->name);
864 
865 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
866 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
867 								ifa->ifa_local;
868 
869 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
870 			done = -EFAULT;
871 			break;
872 		}
873 		buf  += sizeof(struct ifreq);
874 		len  -= sizeof(struct ifreq);
875 		done += sizeof(struct ifreq);
876 	}
877 out:
878 	return done;
879 }
880 
881 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
882 {
883 	__be32 addr = 0;
884 	struct in_device *in_dev;
885 
886 	rcu_read_lock();
887 	in_dev = __in_dev_get_rcu(dev);
888 	if (!in_dev)
889 		goto no_in_dev;
890 
891 	for_primary_ifa(in_dev) {
892 		if (ifa->ifa_scope > scope)
893 			continue;
894 		if (!dst || inet_ifa_match(dst, ifa)) {
895 			addr = ifa->ifa_local;
896 			break;
897 		}
898 		if (!addr)
899 			addr = ifa->ifa_local;
900 	} endfor_ifa(in_dev);
901 no_in_dev:
902 	rcu_read_unlock();
903 
904 	if (addr)
905 		goto out;
906 
907 	/* Not loopback addresses on loopback should be preferred
908 	   in this case. It is importnat that lo is the first interface
909 	   in dev_base list.
910 	 */
911 	read_lock(&dev_base_lock);
912 	rcu_read_lock();
913 	for_each_netdev(dev) {
914 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
915 			continue;
916 
917 		for_primary_ifa(in_dev) {
918 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
919 			    ifa->ifa_scope <= scope) {
920 				addr = ifa->ifa_local;
921 				goto out_unlock_both;
922 			}
923 		} endfor_ifa(in_dev);
924 	}
925 out_unlock_both:
926 	read_unlock(&dev_base_lock);
927 	rcu_read_unlock();
928 out:
929 	return addr;
930 }
931 
932 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
933 			      __be32 local, int scope)
934 {
935 	int same = 0;
936 	__be32 addr = 0;
937 
938 	for_ifa(in_dev) {
939 		if (!addr &&
940 		    (local == ifa->ifa_local || !local) &&
941 		    ifa->ifa_scope <= scope) {
942 			addr = ifa->ifa_local;
943 			if (same)
944 				break;
945 		}
946 		if (!same) {
947 			same = (!local || inet_ifa_match(local, ifa)) &&
948 				(!dst || inet_ifa_match(dst, ifa));
949 			if (same && addr) {
950 				if (local || !dst)
951 					break;
952 				/* Is the selected addr into dst subnet? */
953 				if (inet_ifa_match(addr, ifa))
954 					break;
955 				/* No, then can we use new local src? */
956 				if (ifa->ifa_scope <= scope) {
957 					addr = ifa->ifa_local;
958 					break;
959 				}
960 				/* search for large dst subnet for addr */
961 				same = 0;
962 			}
963 		}
964 	} endfor_ifa(in_dev);
965 
966 	return same? addr : 0;
967 }
968 
969 /*
970  * Confirm that local IP address exists using wildcards:
971  * - dev: only on this interface, 0=any interface
972  * - dst: only in the same subnet as dst, 0=any dst
973  * - local: address, 0=autoselect the local address
974  * - scope: maximum allowed scope value for the local address
975  */
976 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
977 {
978 	__be32 addr = 0;
979 	struct in_device *in_dev;
980 
981 	if (dev) {
982 		rcu_read_lock();
983 		if ((in_dev = __in_dev_get_rcu(dev)))
984 			addr = confirm_addr_indev(in_dev, dst, local, scope);
985 		rcu_read_unlock();
986 
987 		return addr;
988 	}
989 
990 	read_lock(&dev_base_lock);
991 	rcu_read_lock();
992 	for_each_netdev(dev) {
993 		if ((in_dev = __in_dev_get_rcu(dev))) {
994 			addr = confirm_addr_indev(in_dev, dst, local, scope);
995 			if (addr)
996 				break;
997 		}
998 	}
999 	rcu_read_unlock();
1000 	read_unlock(&dev_base_lock);
1001 
1002 	return addr;
1003 }
1004 
1005 /*
1006  *	Device notifier
1007  */
1008 
1009 int register_inetaddr_notifier(struct notifier_block *nb)
1010 {
1011 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1012 }
1013 
1014 int unregister_inetaddr_notifier(struct notifier_block *nb)
1015 {
1016 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1017 }
1018 
1019 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1020  * alias numbering and to create unique labels if possible.
1021 */
1022 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1023 {
1024 	struct in_ifaddr *ifa;
1025 	int named = 0;
1026 
1027 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1028 		char old[IFNAMSIZ], *dot;
1029 
1030 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1031 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1032 		if (named++ == 0)
1033 			continue;
1034 		dot = strchr(ifa->ifa_label, ':');
1035 		if (dot == NULL) {
1036 			sprintf(old, ":%d", named);
1037 			dot = old;
1038 		}
1039 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1040 			strcat(ifa->ifa_label, dot);
1041 		} else {
1042 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1043 		}
1044 	}
1045 }
1046 
1047 /* Called only under RTNL semaphore */
1048 
1049 static int inetdev_event(struct notifier_block *this, unsigned long event,
1050 			 void *ptr)
1051 {
1052 	struct net_device *dev = ptr;
1053 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1054 
1055 	ASSERT_RTNL();
1056 
1057 	if (!in_dev) {
1058 		if (event == NETDEV_REGISTER) {
1059 			in_dev = inetdev_init(dev);
1060 			if (!in_dev)
1061 				panic("devinet: Failed to create loopback\n");
1062 			if (dev == &loopback_dev) {
1063 				in_dev->cnf.no_xfrm = 1;
1064 				in_dev->cnf.no_policy = 1;
1065 			}
1066 		}
1067 		goto out;
1068 	}
1069 
1070 	switch (event) {
1071 	case NETDEV_REGISTER:
1072 		printk(KERN_DEBUG "inetdev_event: bug\n");
1073 		dev->ip_ptr = NULL;
1074 		break;
1075 	case NETDEV_UP:
1076 		if (dev->mtu < 68)
1077 			break;
1078 		if (dev == &loopback_dev) {
1079 			struct in_ifaddr *ifa;
1080 			if ((ifa = inet_alloc_ifa()) != NULL) {
1081 				ifa->ifa_local =
1082 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1083 				ifa->ifa_prefixlen = 8;
1084 				ifa->ifa_mask = inet_make_mask(8);
1085 				in_dev_hold(in_dev);
1086 				ifa->ifa_dev = in_dev;
1087 				ifa->ifa_scope = RT_SCOPE_HOST;
1088 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1089 				inet_insert_ifa(ifa);
1090 			}
1091 		}
1092 		ip_mc_up(in_dev);
1093 		break;
1094 	case NETDEV_DOWN:
1095 		ip_mc_down(in_dev);
1096 		break;
1097 	case NETDEV_CHANGEMTU:
1098 		if (dev->mtu >= 68)
1099 			break;
1100 		/* MTU falled under 68, disable IP */
1101 	case NETDEV_UNREGISTER:
1102 		inetdev_destroy(in_dev);
1103 		break;
1104 	case NETDEV_CHANGENAME:
1105 		/* Do not notify about label change, this event is
1106 		 * not interesting to applications using netlink.
1107 		 */
1108 		inetdev_changename(dev, in_dev);
1109 
1110 #ifdef CONFIG_SYSCTL
1111 		devinet_sysctl_unregister(&in_dev->cnf);
1112 		neigh_sysctl_unregister(in_dev->arp_parms);
1113 		neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1114 				      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1115 		devinet_sysctl_register(in_dev, &in_dev->cnf);
1116 #endif
1117 		break;
1118 	}
1119 out:
1120 	return NOTIFY_DONE;
1121 }
1122 
1123 static struct notifier_block ip_netdev_notifier = {
1124 	.notifier_call =inetdev_event,
1125 };
1126 
1127 static inline size_t inet_nlmsg_size(void)
1128 {
1129 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1130 	       + nla_total_size(4) /* IFA_ADDRESS */
1131 	       + nla_total_size(4) /* IFA_LOCAL */
1132 	       + nla_total_size(4) /* IFA_BROADCAST */
1133 	       + nla_total_size(4) /* IFA_ANYCAST */
1134 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1135 }
1136 
1137 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1138 			    u32 pid, u32 seq, int event, unsigned int flags)
1139 {
1140 	struct ifaddrmsg *ifm;
1141 	struct nlmsghdr  *nlh;
1142 
1143 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1144 	if (nlh == NULL)
1145 		return -EMSGSIZE;
1146 
1147 	ifm = nlmsg_data(nlh);
1148 	ifm->ifa_family = AF_INET;
1149 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1150 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1151 	ifm->ifa_scope = ifa->ifa_scope;
1152 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1153 
1154 	if (ifa->ifa_address)
1155 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1156 
1157 	if (ifa->ifa_local)
1158 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1159 
1160 	if (ifa->ifa_broadcast)
1161 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1162 
1163 	if (ifa->ifa_anycast)
1164 		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1165 
1166 	if (ifa->ifa_label[0])
1167 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1168 
1169 	return nlmsg_end(skb, nlh);
1170 
1171 nla_put_failure:
1172 	nlmsg_cancel(skb, nlh);
1173 	return -EMSGSIZE;
1174 }
1175 
1176 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1177 {
1178 	int idx, ip_idx;
1179 	struct net_device *dev;
1180 	struct in_device *in_dev;
1181 	struct in_ifaddr *ifa;
1182 	int s_ip_idx, s_idx = cb->args[0];
1183 
1184 	s_ip_idx = ip_idx = cb->args[1];
1185 	idx = 0;
1186 	for_each_netdev(dev) {
1187 		if (idx < s_idx)
1188 			goto cont;
1189 		if (idx > s_idx)
1190 			s_ip_idx = 0;
1191 		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1192 			goto cont;
1193 
1194 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1195 		     ifa = ifa->ifa_next, ip_idx++) {
1196 			if (ip_idx < s_ip_idx)
1197 				goto cont;
1198 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1199 					     cb->nlh->nlmsg_seq,
1200 					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1201 				goto done;
1202 		}
1203 cont:
1204 		idx++;
1205 	}
1206 
1207 done:
1208 	cb->args[0] = idx;
1209 	cb->args[1] = ip_idx;
1210 
1211 	return skb->len;
1212 }
1213 
1214 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1215 		      u32 pid)
1216 {
1217 	struct sk_buff *skb;
1218 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1219 	int err = -ENOBUFS;
1220 
1221 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1222 	if (skb == NULL)
1223 		goto errout;
1224 
1225 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1226 	if (err < 0) {
1227 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1228 		WARN_ON(err == -EMSGSIZE);
1229 		kfree_skb(skb);
1230 		goto errout;
1231 	}
1232 	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1233 errout:
1234 	if (err < 0)
1235 		rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1236 }
1237 
1238 #ifdef CONFIG_SYSCTL
1239 
1240 void inet_forward_change(void)
1241 {
1242 	struct net_device *dev;
1243 	int on = ipv4_devconf.forwarding;
1244 
1245 	ipv4_devconf.accept_redirects = !on;
1246 	ipv4_devconf_dflt.forwarding = on;
1247 
1248 	read_lock(&dev_base_lock);
1249 	for_each_netdev(dev) {
1250 		struct in_device *in_dev;
1251 		rcu_read_lock();
1252 		in_dev = __in_dev_get_rcu(dev);
1253 		if (in_dev)
1254 			in_dev->cnf.forwarding = on;
1255 		rcu_read_unlock();
1256 	}
1257 	read_unlock(&dev_base_lock);
1258 
1259 	rt_cache_flush(0);
1260 }
1261 
1262 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1263 				  struct file* filp, void __user *buffer,
1264 				  size_t *lenp, loff_t *ppos)
1265 {
1266 	int *valp = ctl->data;
1267 	int val = *valp;
1268 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1269 
1270 	if (write && *valp != val) {
1271 		if (valp == &ipv4_devconf.forwarding)
1272 			inet_forward_change();
1273 		else if (valp != &ipv4_devconf_dflt.forwarding)
1274 			rt_cache_flush(0);
1275 	}
1276 
1277 	return ret;
1278 }
1279 
1280 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1281 			 struct file* filp, void __user *buffer,
1282 			 size_t *lenp, loff_t *ppos)
1283 {
1284 	int *valp = ctl->data;
1285 	int val = *valp;
1286 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1287 
1288 	if (write && *valp != val)
1289 		rt_cache_flush(0);
1290 
1291 	return ret;
1292 }
1293 
1294 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1295 				  void __user *oldval, size_t __user *oldlenp,
1296 				  void __user *newval, size_t newlen)
1297 {
1298 	int *valp = table->data;
1299 	int new;
1300 
1301 	if (!newval || !newlen)
1302 		return 0;
1303 
1304 	if (newlen != sizeof(int))
1305 		return -EINVAL;
1306 
1307 	if (get_user(new, (int __user *)newval))
1308 		return -EFAULT;
1309 
1310 	if (new == *valp)
1311 		return 0;
1312 
1313 	if (oldval && oldlenp) {
1314 		size_t len;
1315 
1316 		if (get_user(len, oldlenp))
1317 			return -EFAULT;
1318 
1319 		if (len) {
1320 			if (len > table->maxlen)
1321 				len = table->maxlen;
1322 			if (copy_to_user(oldval, valp, len))
1323 				return -EFAULT;
1324 			if (put_user(len, oldlenp))
1325 				return -EFAULT;
1326 		}
1327 	}
1328 
1329 	*valp = new;
1330 	rt_cache_flush(0);
1331 	return 1;
1332 }
1333 
1334 
1335 static struct devinet_sysctl_table {
1336 	struct ctl_table_header *sysctl_header;
1337 	ctl_table		devinet_vars[__NET_IPV4_CONF_MAX];
1338 	ctl_table		devinet_dev[2];
1339 	ctl_table		devinet_conf_dir[2];
1340 	ctl_table		devinet_proto_dir[2];
1341 	ctl_table		devinet_root_dir[2];
1342 } devinet_sysctl = {
1343 	.devinet_vars = {
1344 		{
1345 			.ctl_name	= NET_IPV4_CONF_FORWARDING,
1346 			.procname	= "forwarding",
1347 			.data		= &ipv4_devconf.forwarding,
1348 			.maxlen		= sizeof(int),
1349 			.mode		= 0644,
1350 			.proc_handler	= &devinet_sysctl_forward,
1351 		},
1352 		{
1353 			.ctl_name	= NET_IPV4_CONF_MC_FORWARDING,
1354 			.procname	= "mc_forwarding",
1355 			.data		= &ipv4_devconf.mc_forwarding,
1356 			.maxlen		= sizeof(int),
1357 			.mode		= 0444,
1358 			.proc_handler	= &proc_dointvec,
1359 		},
1360 		{
1361 			.ctl_name	= NET_IPV4_CONF_ACCEPT_REDIRECTS,
1362 			.procname	= "accept_redirects",
1363 			.data		= &ipv4_devconf.accept_redirects,
1364 			.maxlen		= sizeof(int),
1365 			.mode		= 0644,
1366 			.proc_handler	= &proc_dointvec,
1367 		},
1368 		{
1369 			.ctl_name	= NET_IPV4_CONF_SECURE_REDIRECTS,
1370 			.procname	= "secure_redirects",
1371 			.data		= &ipv4_devconf.secure_redirects,
1372 			.maxlen		= sizeof(int),
1373 			.mode		= 0644,
1374 			.proc_handler	= &proc_dointvec,
1375 		},
1376 		{
1377 			.ctl_name	= NET_IPV4_CONF_SHARED_MEDIA,
1378 			.procname	= "shared_media",
1379 			.data		= &ipv4_devconf.shared_media,
1380 			.maxlen		= sizeof(int),
1381 			.mode		= 0644,
1382 			.proc_handler	= &proc_dointvec,
1383 		},
1384 		{
1385 			.ctl_name	= NET_IPV4_CONF_RP_FILTER,
1386 			.procname	= "rp_filter",
1387 			.data		= &ipv4_devconf.rp_filter,
1388 			.maxlen		= sizeof(int),
1389 			.mode		= 0644,
1390 			.proc_handler	= &proc_dointvec,
1391 		},
1392 		{
1393 			.ctl_name	= NET_IPV4_CONF_SEND_REDIRECTS,
1394 			.procname	= "send_redirects",
1395 			.data		= &ipv4_devconf.send_redirects,
1396 			.maxlen		= sizeof(int),
1397 			.mode		= 0644,
1398 			.proc_handler	= &proc_dointvec,
1399 		},
1400 		{
1401 			.ctl_name	= NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,
1402 			.procname	= "accept_source_route",
1403 			.data		= &ipv4_devconf.accept_source_route,
1404 			.maxlen		= sizeof(int),
1405 			.mode		= 0644,
1406 			.proc_handler	= &proc_dointvec,
1407 		},
1408 		{
1409 			.ctl_name	= NET_IPV4_CONF_PROXY_ARP,
1410 			.procname	= "proxy_arp",
1411 			.data		= &ipv4_devconf.proxy_arp,
1412 			.maxlen		= sizeof(int),
1413 			.mode		= 0644,
1414 			.proc_handler	= &proc_dointvec,
1415 		},
1416 		{
1417 			.ctl_name	= NET_IPV4_CONF_MEDIUM_ID,
1418 			.procname	= "medium_id",
1419 			.data		= &ipv4_devconf.medium_id,
1420 			.maxlen		= sizeof(int),
1421 			.mode		= 0644,
1422 			.proc_handler	= &proc_dointvec,
1423 		},
1424 		{
1425 			.ctl_name	= NET_IPV4_CONF_BOOTP_RELAY,
1426 			.procname	= "bootp_relay",
1427 			.data		= &ipv4_devconf.bootp_relay,
1428 			.maxlen		= sizeof(int),
1429 			.mode		= 0644,
1430 			.proc_handler	= &proc_dointvec,
1431 		},
1432 		{
1433 			.ctl_name	= NET_IPV4_CONF_LOG_MARTIANS,
1434 			.procname	= "log_martians",
1435 			.data		= &ipv4_devconf.log_martians,
1436 			.maxlen		= sizeof(int),
1437 			.mode		= 0644,
1438 			.proc_handler	= &proc_dointvec,
1439 		},
1440 		{
1441 			.ctl_name	= NET_IPV4_CONF_TAG,
1442 			.procname	= "tag",
1443 			.data		= &ipv4_devconf.tag,
1444 			.maxlen		= sizeof(int),
1445 			.mode		= 0644,
1446 			.proc_handler	= &proc_dointvec,
1447 		},
1448 		{
1449 			.ctl_name	= NET_IPV4_CONF_ARPFILTER,
1450 			.procname	= "arp_filter",
1451 			.data		= &ipv4_devconf.arp_filter,
1452 			.maxlen		= sizeof(int),
1453 			.mode		= 0644,
1454 			.proc_handler	= &proc_dointvec,
1455 		},
1456 		{
1457 			.ctl_name	= NET_IPV4_CONF_ARP_ANNOUNCE,
1458 			.procname	= "arp_announce",
1459 			.data		= &ipv4_devconf.arp_announce,
1460 			.maxlen		= sizeof(int),
1461 			.mode		= 0644,
1462 			.proc_handler	= &proc_dointvec,
1463 		},
1464 		{
1465 			.ctl_name	= NET_IPV4_CONF_ARP_IGNORE,
1466 			.procname	= "arp_ignore",
1467 			.data		= &ipv4_devconf.arp_ignore,
1468 			.maxlen		= sizeof(int),
1469 			.mode		= 0644,
1470 			.proc_handler	= &proc_dointvec,
1471 		},
1472 		{
1473 			.ctl_name	= NET_IPV4_CONF_ARP_ACCEPT,
1474 			.procname	= "arp_accept",
1475 			.data		= &ipv4_devconf.arp_accept,
1476 			.maxlen		= sizeof(int),
1477 			.mode		= 0644,
1478 			.proc_handler	= &proc_dointvec,
1479 		},
1480 		{
1481 			.ctl_name	= NET_IPV4_CONF_NOXFRM,
1482 			.procname	= "disable_xfrm",
1483 			.data		= &ipv4_devconf.no_xfrm,
1484 			.maxlen		= sizeof(int),
1485 			.mode		= 0644,
1486 			.proc_handler	= &ipv4_doint_and_flush,
1487 			.strategy	= &ipv4_doint_and_flush_strategy,
1488 		},
1489 		{
1490 			.ctl_name	= NET_IPV4_CONF_NOPOLICY,
1491 			.procname	= "disable_policy",
1492 			.data		= &ipv4_devconf.no_policy,
1493 			.maxlen		= sizeof(int),
1494 			.mode		= 0644,
1495 			.proc_handler	= &ipv4_doint_and_flush,
1496 			.strategy	= &ipv4_doint_and_flush_strategy,
1497 		},
1498 		{
1499 			.ctl_name	= NET_IPV4_CONF_FORCE_IGMP_VERSION,
1500 			.procname	= "force_igmp_version",
1501 			.data		= &ipv4_devconf.force_igmp_version,
1502 			.maxlen		= sizeof(int),
1503 			.mode		= 0644,
1504 			.proc_handler	= &ipv4_doint_and_flush,
1505 			.strategy	= &ipv4_doint_and_flush_strategy,
1506 		},
1507 		{
1508 			.ctl_name	= NET_IPV4_CONF_PROMOTE_SECONDARIES,
1509 			.procname	= "promote_secondaries",
1510 			.data		= &ipv4_devconf.promote_secondaries,
1511 			.maxlen		= sizeof(int),
1512 			.mode		= 0644,
1513 			.proc_handler	= &ipv4_doint_and_flush,
1514 			.strategy	= &ipv4_doint_and_flush_strategy,
1515 		},
1516 	},
1517 	.devinet_dev = {
1518 		{
1519 			.ctl_name	= NET_PROTO_CONF_ALL,
1520 			.procname	= "all",
1521 			.mode		= 0555,
1522 			.child		= devinet_sysctl.devinet_vars,
1523 		},
1524 	},
1525 	.devinet_conf_dir = {
1526 		{
1527 			.ctl_name	= NET_IPV4_CONF,
1528 			.procname	= "conf",
1529 			.mode		= 0555,
1530 			.child		= devinet_sysctl.devinet_dev,
1531 		},
1532 	},
1533 	.devinet_proto_dir = {
1534 		{
1535 			.ctl_name	= NET_IPV4,
1536 			.procname	= "ipv4",
1537 			.mode		= 0555,
1538 			.child 		= devinet_sysctl.devinet_conf_dir,
1539 		},
1540 	},
1541 	.devinet_root_dir = {
1542 		{
1543 			.ctl_name	= CTL_NET,
1544 			.procname 	= "net",
1545 			.mode		= 0555,
1546 			.child		= devinet_sysctl.devinet_proto_dir,
1547 		},
1548 	},
1549 };
1550 
1551 static void devinet_sysctl_register(struct in_device *in_dev,
1552 				    struct ipv4_devconf *p)
1553 {
1554 	int i;
1555 	struct net_device *dev = in_dev ? in_dev->dev : NULL;
1556 	struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1557 						 GFP_KERNEL);
1558 	char *dev_name = NULL;
1559 
1560 	if (!t)
1561 		return;
1562 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1563 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1564 	}
1565 
1566 	if (dev) {
1567 		dev_name = dev->name;
1568 		t->devinet_dev[0].ctl_name = dev->ifindex;
1569 	} else {
1570 		dev_name = "default";
1571 		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1572 	}
1573 
1574 	/*
1575 	 * Make a copy of dev_name, because '.procname' is regarded as const
1576 	 * by sysctl and we wouldn't want anyone to change it under our feet
1577 	 * (see SIOCSIFNAME).
1578 	 */
1579 	dev_name = kstrdup(dev_name, GFP_KERNEL);
1580 	if (!dev_name)
1581 	    goto free;
1582 
1583 	t->devinet_dev[0].procname    = dev_name;
1584 	t->devinet_dev[0].child	      = t->devinet_vars;
1585 	t->devinet_conf_dir[0].child  = t->devinet_dev;
1586 	t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1587 	t->devinet_root_dir[0].child  = t->devinet_proto_dir;
1588 
1589 	t->sysctl_header = register_sysctl_table(t->devinet_root_dir);
1590 	if (!t->sysctl_header)
1591 	    goto free_procname;
1592 
1593 	p->sysctl = t;
1594 	return;
1595 
1596 	/* error path */
1597  free_procname:
1598 	kfree(dev_name);
1599  free:
1600 	kfree(t);
1601 	return;
1602 }
1603 
1604 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1605 {
1606 	if (p->sysctl) {
1607 		struct devinet_sysctl_table *t = p->sysctl;
1608 		p->sysctl = NULL;
1609 		unregister_sysctl_table(t->sysctl_header);
1610 		kfree(t->devinet_dev[0].procname);
1611 		kfree(t);
1612 	}
1613 }
1614 #endif
1615 
1616 void __init devinet_init(void)
1617 {
1618 	register_gifconf(PF_INET, inet_gifconf);
1619 	register_netdevice_notifier(&ip_netdev_notifier);
1620 
1621 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1622 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1623 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1624 #ifdef CONFIG_SYSCTL
1625 	devinet_sysctl.sysctl_header =
1626 		register_sysctl_table(devinet_sysctl.devinet_root_dir);
1627 	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1628 #endif
1629 }
1630 
1631 EXPORT_SYMBOL(in_dev_finish_destroy);
1632 EXPORT_SYMBOL(inet_select_addr);
1633 EXPORT_SYMBOL(inetdev_by_index);
1634 EXPORT_SYMBOL(register_inetaddr_notifier);
1635 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1636