xref: /openbmc/linux/net/ipv4/devinet.c (revision 545e4006)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #ifdef CONFIG_SYSCTL
54 #include <linux/sysctl.h>
55 #endif
56 #include <linux/kmod.h>
57 
58 #include <net/arp.h>
59 #include <net/ip.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
64 
65 static struct ipv4_devconf ipv4_devconf = {
66 	.data = {
67 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71 	},
72 };
73 
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75 	.data = {
76 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80 		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81 	},
82 };
83 
84 #define IPV4_DEVCONF_DFLT(net, attr) \
85 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86 
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88 	[IFA_LOCAL]     	= { .type = NLA_U32 },
89 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
90 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
91 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92 };
93 
94 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95 
96 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98 			 int destroy);
99 #ifdef CONFIG_SYSCTL
100 static void devinet_sysctl_register(struct in_device *idev);
101 static void devinet_sysctl_unregister(struct in_device *idev);
102 #else
103 static inline void devinet_sysctl_register(struct in_device *idev)
104 {
105 }
106 static inline void devinet_sysctl_unregister(struct in_device *idev)
107 {
108 }
109 #endif
110 
111 /* Locks all the inet devices. */
112 
113 static struct in_ifaddr *inet_alloc_ifa(void)
114 {
115 	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
116 
117 	if (ifa) {
118 		INIT_RCU_HEAD(&ifa->rcu_head);
119 	}
120 
121 	return ifa;
122 }
123 
124 static void inet_rcu_free_ifa(struct rcu_head *head)
125 {
126 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
127 	if (ifa->ifa_dev)
128 		in_dev_put(ifa->ifa_dev);
129 	kfree(ifa);
130 }
131 
132 static inline void inet_free_ifa(struct in_ifaddr *ifa)
133 {
134 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
135 }
136 
137 void in_dev_finish_destroy(struct in_device *idev)
138 {
139 	struct net_device *dev = idev->dev;
140 
141 	BUG_TRAP(!idev->ifa_list);
142 	BUG_TRAP(!idev->mc_list);
143 #ifdef NET_REFCNT_DEBUG
144 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
145 	       idev, dev ? dev->name : "NIL");
146 #endif
147 	dev_put(dev);
148 	if (!idev->dead)
149 		printk("Freeing alive in_device %p\n", idev);
150 	else {
151 		kfree(idev);
152 	}
153 }
154 
155 static struct in_device *inetdev_init(struct net_device *dev)
156 {
157 	struct in_device *in_dev;
158 
159 	ASSERT_RTNL();
160 
161 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
162 	if (!in_dev)
163 		goto out;
164 	INIT_RCU_HEAD(&in_dev->rcu_head);
165 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
166 			sizeof(in_dev->cnf));
167 	in_dev->cnf.sysctl = NULL;
168 	in_dev->dev = dev;
169 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
170 		goto out_kfree;
171 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
172 		dev_disable_lro(dev);
173 	/* Reference in_dev->dev */
174 	dev_hold(dev);
175 	/* Account for reference dev->ip_ptr (below) */
176 	in_dev_hold(in_dev);
177 
178 	devinet_sysctl_register(in_dev);
179 	ip_mc_init_dev(in_dev);
180 	if (dev->flags & IFF_UP)
181 		ip_mc_up(in_dev);
182 
183 	/* we can receive as soon as ip_ptr is set -- do this last */
184 	rcu_assign_pointer(dev->ip_ptr, in_dev);
185 out:
186 	return in_dev;
187 out_kfree:
188 	kfree(in_dev);
189 	in_dev = NULL;
190 	goto out;
191 }
192 
193 static void in_dev_rcu_put(struct rcu_head *head)
194 {
195 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
196 	in_dev_put(idev);
197 }
198 
199 static void inetdev_destroy(struct in_device *in_dev)
200 {
201 	struct in_ifaddr *ifa;
202 	struct net_device *dev;
203 
204 	ASSERT_RTNL();
205 
206 	dev = in_dev->dev;
207 
208 	in_dev->dead = 1;
209 
210 	ip_mc_destroy_dev(in_dev);
211 
212 	while ((ifa = in_dev->ifa_list) != NULL) {
213 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
214 		inet_free_ifa(ifa);
215 	}
216 
217 	dev->ip_ptr = NULL;
218 
219 	devinet_sysctl_unregister(in_dev);
220 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
221 	arp_ifdown(dev);
222 
223 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
224 }
225 
226 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
227 {
228 	rcu_read_lock();
229 	for_primary_ifa(in_dev) {
230 		if (inet_ifa_match(a, ifa)) {
231 			if (!b || inet_ifa_match(b, ifa)) {
232 				rcu_read_unlock();
233 				return 1;
234 			}
235 		}
236 	} endfor_ifa(in_dev);
237 	rcu_read_unlock();
238 	return 0;
239 }
240 
241 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
242 			 int destroy, struct nlmsghdr *nlh, u32 pid)
243 {
244 	struct in_ifaddr *promote = NULL;
245 	struct in_ifaddr *ifa, *ifa1 = *ifap;
246 	struct in_ifaddr *last_prim = in_dev->ifa_list;
247 	struct in_ifaddr *prev_prom = NULL;
248 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
249 
250 	ASSERT_RTNL();
251 
252 	/* 1. Deleting primary ifaddr forces deletion all secondaries
253 	 * unless alias promotion is set
254 	 **/
255 
256 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
257 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
258 
259 		while ((ifa = *ifap1) != NULL) {
260 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
261 			    ifa1->ifa_scope <= ifa->ifa_scope)
262 				last_prim = ifa;
263 
264 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
265 			    ifa1->ifa_mask != ifa->ifa_mask ||
266 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
267 				ifap1 = &ifa->ifa_next;
268 				prev_prom = ifa;
269 				continue;
270 			}
271 
272 			if (!do_promote) {
273 				*ifap1 = ifa->ifa_next;
274 
275 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
276 				blocking_notifier_call_chain(&inetaddr_chain,
277 						NETDEV_DOWN, ifa);
278 				inet_free_ifa(ifa);
279 			} else {
280 				promote = ifa;
281 				break;
282 			}
283 		}
284 	}
285 
286 	/* 2. Unlink it */
287 
288 	*ifap = ifa1->ifa_next;
289 
290 	/* 3. Announce address deletion */
291 
292 	/* Send message first, then call notifier.
293 	   At first sight, FIB update triggered by notifier
294 	   will refer to already deleted ifaddr, that could confuse
295 	   netlink listeners. It is not true: look, gated sees
296 	   that route deleted and if it still thinks that ifaddr
297 	   is valid, it will try to restore deleted routes... Grr.
298 	   So that, this order is correct.
299 	 */
300 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
301 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
302 
303 	if (promote) {
304 
305 		if (prev_prom) {
306 			prev_prom->ifa_next = promote->ifa_next;
307 			promote->ifa_next = last_prim->ifa_next;
308 			last_prim->ifa_next = promote;
309 		}
310 
311 		promote->ifa_flags &= ~IFA_F_SECONDARY;
312 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
313 		blocking_notifier_call_chain(&inetaddr_chain,
314 				NETDEV_UP, promote);
315 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
316 			if (ifa1->ifa_mask != ifa->ifa_mask ||
317 			    !inet_ifa_match(ifa1->ifa_address, ifa))
318 					continue;
319 			fib_add_ifaddr(ifa);
320 		}
321 
322 	}
323 	if (destroy)
324 		inet_free_ifa(ifa1);
325 }
326 
327 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
328 			 int destroy)
329 {
330 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
331 }
332 
333 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
334 			     u32 pid)
335 {
336 	struct in_device *in_dev = ifa->ifa_dev;
337 	struct in_ifaddr *ifa1, **ifap, **last_primary;
338 
339 	ASSERT_RTNL();
340 
341 	if (!ifa->ifa_local) {
342 		inet_free_ifa(ifa);
343 		return 0;
344 	}
345 
346 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
347 	last_primary = &in_dev->ifa_list;
348 
349 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
350 	     ifap = &ifa1->ifa_next) {
351 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
352 		    ifa->ifa_scope <= ifa1->ifa_scope)
353 			last_primary = &ifa1->ifa_next;
354 		if (ifa1->ifa_mask == ifa->ifa_mask &&
355 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
356 			if (ifa1->ifa_local == ifa->ifa_local) {
357 				inet_free_ifa(ifa);
358 				return -EEXIST;
359 			}
360 			if (ifa1->ifa_scope != ifa->ifa_scope) {
361 				inet_free_ifa(ifa);
362 				return -EINVAL;
363 			}
364 			ifa->ifa_flags |= IFA_F_SECONDARY;
365 		}
366 	}
367 
368 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
369 		net_srandom(ifa->ifa_local);
370 		ifap = last_primary;
371 	}
372 
373 	ifa->ifa_next = *ifap;
374 	*ifap = ifa;
375 
376 	/* Send message first, then call notifier.
377 	   Notifier will trigger FIB update, so that
378 	   listeners of netlink will know about new ifaddr */
379 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
380 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
381 
382 	return 0;
383 }
384 
385 static int inet_insert_ifa(struct in_ifaddr *ifa)
386 {
387 	return __inet_insert_ifa(ifa, NULL, 0);
388 }
389 
390 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
391 {
392 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
393 
394 	ASSERT_RTNL();
395 
396 	if (!in_dev) {
397 		inet_free_ifa(ifa);
398 		return -ENOBUFS;
399 	}
400 	ipv4_devconf_setall(in_dev);
401 	if (ifa->ifa_dev != in_dev) {
402 		BUG_TRAP(!ifa->ifa_dev);
403 		in_dev_hold(in_dev);
404 		ifa->ifa_dev = in_dev;
405 	}
406 	if (ipv4_is_loopback(ifa->ifa_local))
407 		ifa->ifa_scope = RT_SCOPE_HOST;
408 	return inet_insert_ifa(ifa);
409 }
410 
411 struct in_device *inetdev_by_index(struct net *net, int ifindex)
412 {
413 	struct net_device *dev;
414 	struct in_device *in_dev = NULL;
415 	read_lock(&dev_base_lock);
416 	dev = __dev_get_by_index(net, ifindex);
417 	if (dev)
418 		in_dev = in_dev_get(dev);
419 	read_unlock(&dev_base_lock);
420 	return in_dev;
421 }
422 
423 /* Called only from RTNL semaphored context. No locks. */
424 
425 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426 				    __be32 mask)
427 {
428 	ASSERT_RTNL();
429 
430 	for_primary_ifa(in_dev) {
431 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432 			return ifa;
433 	} endfor_ifa(in_dev);
434 	return NULL;
435 }
436 
437 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438 {
439 	struct net *net = sock_net(skb->sk);
440 	struct nlattr *tb[IFA_MAX+1];
441 	struct in_device *in_dev;
442 	struct ifaddrmsg *ifm;
443 	struct in_ifaddr *ifa, **ifap;
444 	int err = -EINVAL;
445 
446 	ASSERT_RTNL();
447 
448 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449 	if (err < 0)
450 		goto errout;
451 
452 	ifm = nlmsg_data(nlh);
453 	in_dev = inetdev_by_index(net, ifm->ifa_index);
454 	if (in_dev == NULL) {
455 		err = -ENODEV;
456 		goto errout;
457 	}
458 
459 	__in_dev_put(in_dev);
460 
461 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
462 	     ifap = &ifa->ifa_next) {
463 		if (tb[IFA_LOCAL] &&
464 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
465 			continue;
466 
467 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
468 			continue;
469 
470 		if (tb[IFA_ADDRESS] &&
471 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
472 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
473 			continue;
474 
475 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
476 		return 0;
477 	}
478 
479 	err = -EADDRNOTAVAIL;
480 errout:
481 	return err;
482 }
483 
484 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
485 {
486 	struct nlattr *tb[IFA_MAX+1];
487 	struct in_ifaddr *ifa;
488 	struct ifaddrmsg *ifm;
489 	struct net_device *dev;
490 	struct in_device *in_dev;
491 	int err;
492 
493 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
494 	if (err < 0)
495 		goto errout;
496 
497 	ifm = nlmsg_data(nlh);
498 	err = -EINVAL;
499 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
500 		goto errout;
501 
502 	dev = __dev_get_by_index(net, ifm->ifa_index);
503 	err = -ENODEV;
504 	if (dev == NULL)
505 		goto errout;
506 
507 	in_dev = __in_dev_get_rtnl(dev);
508 	err = -ENOBUFS;
509 	if (in_dev == NULL)
510 		goto errout;
511 
512 	ifa = inet_alloc_ifa();
513 	if (ifa == NULL)
514 		/*
515 		 * A potential indev allocation can be left alive, it stays
516 		 * assigned to its device and is destroy with it.
517 		 */
518 		goto errout;
519 
520 	ipv4_devconf_setall(in_dev);
521 	in_dev_hold(in_dev);
522 
523 	if (tb[IFA_ADDRESS] == NULL)
524 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
525 
526 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
527 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
528 	ifa->ifa_flags = ifm->ifa_flags;
529 	ifa->ifa_scope = ifm->ifa_scope;
530 	ifa->ifa_dev = in_dev;
531 
532 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
533 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
534 
535 	if (tb[IFA_BROADCAST])
536 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
537 
538 	if (tb[IFA_LABEL])
539 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
540 	else
541 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
542 
543 	return ifa;
544 
545 errout:
546 	return ERR_PTR(err);
547 }
548 
549 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
550 {
551 	struct net *net = sock_net(skb->sk);
552 	struct in_ifaddr *ifa;
553 
554 	ASSERT_RTNL();
555 
556 	ifa = rtm_to_ifaddr(net, nlh);
557 	if (IS_ERR(ifa))
558 		return PTR_ERR(ifa);
559 
560 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
561 }
562 
563 /*
564  *	Determine a default network mask, based on the IP address.
565  */
566 
567 static __inline__ int inet_abc_len(__be32 addr)
568 {
569 	int rc = -1;	/* Something else, probably a multicast. */
570 
571 	if (ipv4_is_zeronet(addr))
572 		rc = 0;
573 	else {
574 		__u32 haddr = ntohl(addr);
575 
576 		if (IN_CLASSA(haddr))
577 			rc = 8;
578 		else if (IN_CLASSB(haddr))
579 			rc = 16;
580 		else if (IN_CLASSC(haddr))
581 			rc = 24;
582 	}
583 
584 	return rc;
585 }
586 
587 
588 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
589 {
590 	struct ifreq ifr;
591 	struct sockaddr_in sin_orig;
592 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
593 	struct in_device *in_dev;
594 	struct in_ifaddr **ifap = NULL;
595 	struct in_ifaddr *ifa = NULL;
596 	struct net_device *dev;
597 	char *colon;
598 	int ret = -EFAULT;
599 	int tryaddrmatch = 0;
600 
601 	/*
602 	 *	Fetch the caller's info block into kernel space
603 	 */
604 
605 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
606 		goto out;
607 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
608 
609 	/* save original address for comparison */
610 	memcpy(&sin_orig, sin, sizeof(*sin));
611 
612 	colon = strchr(ifr.ifr_name, ':');
613 	if (colon)
614 		*colon = 0;
615 
616 #ifdef CONFIG_KMOD
617 	dev_load(net, ifr.ifr_name);
618 #endif
619 
620 	switch (cmd) {
621 	case SIOCGIFADDR:	/* Get interface address */
622 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
623 	case SIOCGIFDSTADDR:	/* Get the destination address */
624 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
625 		/* Note that these ioctls will not sleep,
626 		   so that we do not impose a lock.
627 		   One day we will be forced to put shlock here (I mean SMP)
628 		 */
629 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
630 		memset(sin, 0, sizeof(*sin));
631 		sin->sin_family = AF_INET;
632 		break;
633 
634 	case SIOCSIFFLAGS:
635 		ret = -EACCES;
636 		if (!capable(CAP_NET_ADMIN))
637 			goto out;
638 		break;
639 	case SIOCSIFADDR:	/* Set interface address (and family) */
640 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
641 	case SIOCSIFDSTADDR:	/* Set the destination address */
642 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
643 		ret = -EACCES;
644 		if (!capable(CAP_NET_ADMIN))
645 			goto out;
646 		ret = -EINVAL;
647 		if (sin->sin_family != AF_INET)
648 			goto out;
649 		break;
650 	default:
651 		ret = -EINVAL;
652 		goto out;
653 	}
654 
655 	rtnl_lock();
656 
657 	ret = -ENODEV;
658 	if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
659 		goto done;
660 
661 	if (colon)
662 		*colon = ':';
663 
664 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
665 		if (tryaddrmatch) {
666 			/* Matthias Andree */
667 			/* compare label and address (4.4BSD style) */
668 			/* note: we only do this for a limited set of ioctls
669 			   and only if the original address family was AF_INET.
670 			   This is checked above. */
671 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
672 			     ifap = &ifa->ifa_next) {
673 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
674 				    sin_orig.sin_addr.s_addr ==
675 							ifa->ifa_address) {
676 					break; /* found */
677 				}
678 			}
679 		}
680 		/* we didn't get a match, maybe the application is
681 		   4.3BSD-style and passed in junk so we fall back to
682 		   comparing just the label */
683 		if (!ifa) {
684 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
685 			     ifap = &ifa->ifa_next)
686 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
687 					break;
688 		}
689 	}
690 
691 	ret = -EADDRNOTAVAIL;
692 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
693 		goto done;
694 
695 	switch (cmd) {
696 	case SIOCGIFADDR:	/* Get interface address */
697 		sin->sin_addr.s_addr = ifa->ifa_local;
698 		goto rarok;
699 
700 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
701 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
702 		goto rarok;
703 
704 	case SIOCGIFDSTADDR:	/* Get the destination address */
705 		sin->sin_addr.s_addr = ifa->ifa_address;
706 		goto rarok;
707 
708 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
709 		sin->sin_addr.s_addr = ifa->ifa_mask;
710 		goto rarok;
711 
712 	case SIOCSIFFLAGS:
713 		if (colon) {
714 			ret = -EADDRNOTAVAIL;
715 			if (!ifa)
716 				break;
717 			ret = 0;
718 			if (!(ifr.ifr_flags & IFF_UP))
719 				inet_del_ifa(in_dev, ifap, 1);
720 			break;
721 		}
722 		ret = dev_change_flags(dev, ifr.ifr_flags);
723 		break;
724 
725 	case SIOCSIFADDR:	/* Set interface address (and family) */
726 		ret = -EINVAL;
727 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
728 			break;
729 
730 		if (!ifa) {
731 			ret = -ENOBUFS;
732 			if ((ifa = inet_alloc_ifa()) == NULL)
733 				break;
734 			if (colon)
735 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
736 			else
737 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
738 		} else {
739 			ret = 0;
740 			if (ifa->ifa_local == sin->sin_addr.s_addr)
741 				break;
742 			inet_del_ifa(in_dev, ifap, 0);
743 			ifa->ifa_broadcast = 0;
744 			ifa->ifa_scope = 0;
745 		}
746 
747 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
748 
749 		if (!(dev->flags & IFF_POINTOPOINT)) {
750 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
751 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
752 			if ((dev->flags & IFF_BROADCAST) &&
753 			    ifa->ifa_prefixlen < 31)
754 				ifa->ifa_broadcast = ifa->ifa_address |
755 						     ~ifa->ifa_mask;
756 		} else {
757 			ifa->ifa_prefixlen = 32;
758 			ifa->ifa_mask = inet_make_mask(32);
759 		}
760 		ret = inet_set_ifa(dev, ifa);
761 		break;
762 
763 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
764 		ret = 0;
765 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
766 			inet_del_ifa(in_dev, ifap, 0);
767 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
768 			inet_insert_ifa(ifa);
769 		}
770 		break;
771 
772 	case SIOCSIFDSTADDR:	/* Set the destination address */
773 		ret = 0;
774 		if (ifa->ifa_address == sin->sin_addr.s_addr)
775 			break;
776 		ret = -EINVAL;
777 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
778 			break;
779 		ret = 0;
780 		inet_del_ifa(in_dev, ifap, 0);
781 		ifa->ifa_address = sin->sin_addr.s_addr;
782 		inet_insert_ifa(ifa);
783 		break;
784 
785 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
786 
787 		/*
788 		 *	The mask we set must be legal.
789 		 */
790 		ret = -EINVAL;
791 		if (bad_mask(sin->sin_addr.s_addr, 0))
792 			break;
793 		ret = 0;
794 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
795 			__be32 old_mask = ifa->ifa_mask;
796 			inet_del_ifa(in_dev, ifap, 0);
797 			ifa->ifa_mask = sin->sin_addr.s_addr;
798 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
799 
800 			/* See if current broadcast address matches
801 			 * with current netmask, then recalculate
802 			 * the broadcast address. Otherwise it's a
803 			 * funny address, so don't touch it since
804 			 * the user seems to know what (s)he's doing...
805 			 */
806 			if ((dev->flags & IFF_BROADCAST) &&
807 			    (ifa->ifa_prefixlen < 31) &&
808 			    (ifa->ifa_broadcast ==
809 			     (ifa->ifa_local|~old_mask))) {
810 				ifa->ifa_broadcast = (ifa->ifa_local |
811 						      ~sin->sin_addr.s_addr);
812 			}
813 			inet_insert_ifa(ifa);
814 		}
815 		break;
816 	}
817 done:
818 	rtnl_unlock();
819 out:
820 	return ret;
821 rarok:
822 	rtnl_unlock();
823 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
824 	goto out;
825 }
826 
827 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
828 {
829 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
830 	struct in_ifaddr *ifa;
831 	struct ifreq ifr;
832 	int done = 0;
833 
834 	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
835 		goto out;
836 
837 	for (; ifa; ifa = ifa->ifa_next) {
838 		if (!buf) {
839 			done += sizeof(ifr);
840 			continue;
841 		}
842 		if (len < (int) sizeof(ifr))
843 			break;
844 		memset(&ifr, 0, sizeof(struct ifreq));
845 		if (ifa->ifa_label)
846 			strcpy(ifr.ifr_name, ifa->ifa_label);
847 		else
848 			strcpy(ifr.ifr_name, dev->name);
849 
850 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
851 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
852 								ifa->ifa_local;
853 
854 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
855 			done = -EFAULT;
856 			break;
857 		}
858 		buf  += sizeof(struct ifreq);
859 		len  -= sizeof(struct ifreq);
860 		done += sizeof(struct ifreq);
861 	}
862 out:
863 	return done;
864 }
865 
866 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
867 {
868 	__be32 addr = 0;
869 	struct in_device *in_dev;
870 	struct net *net = dev_net(dev);
871 
872 	rcu_read_lock();
873 	in_dev = __in_dev_get_rcu(dev);
874 	if (!in_dev)
875 		goto no_in_dev;
876 
877 	for_primary_ifa(in_dev) {
878 		if (ifa->ifa_scope > scope)
879 			continue;
880 		if (!dst || inet_ifa_match(dst, ifa)) {
881 			addr = ifa->ifa_local;
882 			break;
883 		}
884 		if (!addr)
885 			addr = ifa->ifa_local;
886 	} endfor_ifa(in_dev);
887 no_in_dev:
888 	rcu_read_unlock();
889 
890 	if (addr)
891 		goto out;
892 
893 	/* Not loopback addresses on loopback should be preferred
894 	   in this case. It is importnat that lo is the first interface
895 	   in dev_base list.
896 	 */
897 	read_lock(&dev_base_lock);
898 	rcu_read_lock();
899 	for_each_netdev(net, dev) {
900 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
901 			continue;
902 
903 		for_primary_ifa(in_dev) {
904 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
905 			    ifa->ifa_scope <= scope) {
906 				addr = ifa->ifa_local;
907 				goto out_unlock_both;
908 			}
909 		} endfor_ifa(in_dev);
910 	}
911 out_unlock_both:
912 	read_unlock(&dev_base_lock);
913 	rcu_read_unlock();
914 out:
915 	return addr;
916 }
917 
918 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
919 			      __be32 local, int scope)
920 {
921 	int same = 0;
922 	__be32 addr = 0;
923 
924 	for_ifa(in_dev) {
925 		if (!addr &&
926 		    (local == ifa->ifa_local || !local) &&
927 		    ifa->ifa_scope <= scope) {
928 			addr = ifa->ifa_local;
929 			if (same)
930 				break;
931 		}
932 		if (!same) {
933 			same = (!local || inet_ifa_match(local, ifa)) &&
934 				(!dst || inet_ifa_match(dst, ifa));
935 			if (same && addr) {
936 				if (local || !dst)
937 					break;
938 				/* Is the selected addr into dst subnet? */
939 				if (inet_ifa_match(addr, ifa))
940 					break;
941 				/* No, then can we use new local src? */
942 				if (ifa->ifa_scope <= scope) {
943 					addr = ifa->ifa_local;
944 					break;
945 				}
946 				/* search for large dst subnet for addr */
947 				same = 0;
948 			}
949 		}
950 	} endfor_ifa(in_dev);
951 
952 	return same? addr : 0;
953 }
954 
955 /*
956  * Confirm that local IP address exists using wildcards:
957  * - in_dev: only on this interface, 0=any interface
958  * - dst: only in the same subnet as dst, 0=any dst
959  * - local: address, 0=autoselect the local address
960  * - scope: maximum allowed scope value for the local address
961  */
962 __be32 inet_confirm_addr(struct in_device *in_dev,
963 			 __be32 dst, __be32 local, int scope)
964 {
965 	__be32 addr = 0;
966 	struct net_device *dev;
967 	struct net *net;
968 
969 	if (scope != RT_SCOPE_LINK)
970 		return confirm_addr_indev(in_dev, dst, local, scope);
971 
972 	net = dev_net(in_dev->dev);
973 	read_lock(&dev_base_lock);
974 	rcu_read_lock();
975 	for_each_netdev(net, dev) {
976 		if ((in_dev = __in_dev_get_rcu(dev))) {
977 			addr = confirm_addr_indev(in_dev, dst, local, scope);
978 			if (addr)
979 				break;
980 		}
981 	}
982 	rcu_read_unlock();
983 	read_unlock(&dev_base_lock);
984 
985 	return addr;
986 }
987 
988 /*
989  *	Device notifier
990  */
991 
992 int register_inetaddr_notifier(struct notifier_block *nb)
993 {
994 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
995 }
996 
997 int unregister_inetaddr_notifier(struct notifier_block *nb)
998 {
999 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1000 }
1001 
1002 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1003  * alias numbering and to create unique labels if possible.
1004 */
1005 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1006 {
1007 	struct in_ifaddr *ifa;
1008 	int named = 0;
1009 
1010 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1011 		char old[IFNAMSIZ], *dot;
1012 
1013 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1014 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1015 		if (named++ == 0)
1016 			goto skip;
1017 		dot = strchr(old, ':');
1018 		if (dot == NULL) {
1019 			sprintf(old, ":%d", named);
1020 			dot = old;
1021 		}
1022 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1023 			strcat(ifa->ifa_label, dot);
1024 		} else {
1025 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1026 		}
1027 skip:
1028 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1029 	}
1030 }
1031 
1032 /* Called only under RTNL semaphore */
1033 
1034 static int inetdev_event(struct notifier_block *this, unsigned long event,
1035 			 void *ptr)
1036 {
1037 	struct net_device *dev = ptr;
1038 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1039 
1040 	ASSERT_RTNL();
1041 
1042 	if (!in_dev) {
1043 		if (event == NETDEV_REGISTER) {
1044 			in_dev = inetdev_init(dev);
1045 			if (!in_dev)
1046 				return notifier_from_errno(-ENOMEM);
1047 			if (dev->flags & IFF_LOOPBACK) {
1048 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1049 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1050 			}
1051 		}
1052 		goto out;
1053 	}
1054 
1055 	switch (event) {
1056 	case NETDEV_REGISTER:
1057 		printk(KERN_DEBUG "inetdev_event: bug\n");
1058 		dev->ip_ptr = NULL;
1059 		break;
1060 	case NETDEV_UP:
1061 		if (dev->mtu < 68)
1062 			break;
1063 		if (dev->flags & IFF_LOOPBACK) {
1064 			struct in_ifaddr *ifa;
1065 			if ((ifa = inet_alloc_ifa()) != NULL) {
1066 				ifa->ifa_local =
1067 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1068 				ifa->ifa_prefixlen = 8;
1069 				ifa->ifa_mask = inet_make_mask(8);
1070 				in_dev_hold(in_dev);
1071 				ifa->ifa_dev = in_dev;
1072 				ifa->ifa_scope = RT_SCOPE_HOST;
1073 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1074 				inet_insert_ifa(ifa);
1075 			}
1076 		}
1077 		ip_mc_up(in_dev);
1078 		break;
1079 	case NETDEV_DOWN:
1080 		ip_mc_down(in_dev);
1081 		break;
1082 	case NETDEV_CHANGEMTU:
1083 		if (dev->mtu >= 68)
1084 			break;
1085 		/* MTU falled under 68, disable IP */
1086 	case NETDEV_UNREGISTER:
1087 		inetdev_destroy(in_dev);
1088 		break;
1089 	case NETDEV_CHANGENAME:
1090 		/* Do not notify about label change, this event is
1091 		 * not interesting to applications using netlink.
1092 		 */
1093 		inetdev_changename(dev, in_dev);
1094 
1095 		devinet_sysctl_unregister(in_dev);
1096 		devinet_sysctl_register(in_dev);
1097 		break;
1098 	}
1099 out:
1100 	return NOTIFY_DONE;
1101 }
1102 
1103 static struct notifier_block ip_netdev_notifier = {
1104 	.notifier_call =inetdev_event,
1105 };
1106 
1107 static inline size_t inet_nlmsg_size(void)
1108 {
1109 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1110 	       + nla_total_size(4) /* IFA_ADDRESS */
1111 	       + nla_total_size(4) /* IFA_LOCAL */
1112 	       + nla_total_size(4) /* IFA_BROADCAST */
1113 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1114 }
1115 
1116 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1117 			    u32 pid, u32 seq, int event, unsigned int flags)
1118 {
1119 	struct ifaddrmsg *ifm;
1120 	struct nlmsghdr  *nlh;
1121 
1122 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1123 	if (nlh == NULL)
1124 		return -EMSGSIZE;
1125 
1126 	ifm = nlmsg_data(nlh);
1127 	ifm->ifa_family = AF_INET;
1128 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1129 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1130 	ifm->ifa_scope = ifa->ifa_scope;
1131 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1132 
1133 	if (ifa->ifa_address)
1134 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1135 
1136 	if (ifa->ifa_local)
1137 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1138 
1139 	if (ifa->ifa_broadcast)
1140 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1141 
1142 	if (ifa->ifa_label[0])
1143 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1144 
1145 	return nlmsg_end(skb, nlh);
1146 
1147 nla_put_failure:
1148 	nlmsg_cancel(skb, nlh);
1149 	return -EMSGSIZE;
1150 }
1151 
1152 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1153 {
1154 	struct net *net = sock_net(skb->sk);
1155 	int idx, ip_idx;
1156 	struct net_device *dev;
1157 	struct in_device *in_dev;
1158 	struct in_ifaddr *ifa;
1159 	int s_ip_idx, s_idx = cb->args[0];
1160 
1161 	s_ip_idx = ip_idx = cb->args[1];
1162 	idx = 0;
1163 	for_each_netdev(net, dev) {
1164 		if (idx < s_idx)
1165 			goto cont;
1166 		if (idx > s_idx)
1167 			s_ip_idx = 0;
1168 		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1169 			goto cont;
1170 
1171 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1172 		     ifa = ifa->ifa_next, ip_idx++) {
1173 			if (ip_idx < s_ip_idx)
1174 				continue;
1175 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1176 					     cb->nlh->nlmsg_seq,
1177 					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1178 				goto done;
1179 		}
1180 cont:
1181 		idx++;
1182 	}
1183 
1184 done:
1185 	cb->args[0] = idx;
1186 	cb->args[1] = ip_idx;
1187 
1188 	return skb->len;
1189 }
1190 
1191 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1192 		      u32 pid)
1193 {
1194 	struct sk_buff *skb;
1195 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1196 	int err = -ENOBUFS;
1197 	struct net *net;
1198 
1199 	net = dev_net(ifa->ifa_dev->dev);
1200 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1201 	if (skb == NULL)
1202 		goto errout;
1203 
1204 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1205 	if (err < 0) {
1206 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1207 		WARN_ON(err == -EMSGSIZE);
1208 		kfree_skb(skb);
1209 		goto errout;
1210 	}
1211 	err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1212 errout:
1213 	if (err < 0)
1214 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1215 }
1216 
1217 #ifdef CONFIG_SYSCTL
1218 
1219 static void devinet_copy_dflt_conf(struct net *net, int i)
1220 {
1221 	struct net_device *dev;
1222 
1223 	read_lock(&dev_base_lock);
1224 	for_each_netdev(net, dev) {
1225 		struct in_device *in_dev;
1226 		rcu_read_lock();
1227 		in_dev = __in_dev_get_rcu(dev);
1228 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1229 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1230 		rcu_read_unlock();
1231 	}
1232 	read_unlock(&dev_base_lock);
1233 }
1234 
1235 static void inet_forward_change(struct net *net)
1236 {
1237 	struct net_device *dev;
1238 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1239 
1240 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1241 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1242 
1243 	read_lock(&dev_base_lock);
1244 	for_each_netdev(net, dev) {
1245 		struct in_device *in_dev;
1246 		if (on)
1247 			dev_disable_lro(dev);
1248 		rcu_read_lock();
1249 		in_dev = __in_dev_get_rcu(dev);
1250 		if (in_dev)
1251 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1252 		rcu_read_unlock();
1253 	}
1254 	read_unlock(&dev_base_lock);
1255 }
1256 
1257 static int devinet_conf_proc(ctl_table *ctl, int write,
1258 			     struct file* filp, void __user *buffer,
1259 			     size_t *lenp, loff_t *ppos)
1260 {
1261 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1262 
1263 	if (write) {
1264 		struct ipv4_devconf *cnf = ctl->extra1;
1265 		struct net *net = ctl->extra2;
1266 		int i = (int *)ctl->data - cnf->data;
1267 
1268 		set_bit(i, cnf->state);
1269 
1270 		if (cnf == net->ipv4.devconf_dflt)
1271 			devinet_copy_dflt_conf(net, i);
1272 	}
1273 
1274 	return ret;
1275 }
1276 
1277 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1278 			       void __user *oldval, size_t __user *oldlenp,
1279 			       void __user *newval, size_t newlen)
1280 {
1281 	struct ipv4_devconf *cnf;
1282 	struct net *net;
1283 	int *valp = table->data;
1284 	int new;
1285 	int i;
1286 
1287 	if (!newval || !newlen)
1288 		return 0;
1289 
1290 	if (newlen != sizeof(int))
1291 		return -EINVAL;
1292 
1293 	if (get_user(new, (int __user *)newval))
1294 		return -EFAULT;
1295 
1296 	if (new == *valp)
1297 		return 0;
1298 
1299 	if (oldval && oldlenp) {
1300 		size_t len;
1301 
1302 		if (get_user(len, oldlenp))
1303 			return -EFAULT;
1304 
1305 		if (len) {
1306 			if (len > table->maxlen)
1307 				len = table->maxlen;
1308 			if (copy_to_user(oldval, valp, len))
1309 				return -EFAULT;
1310 			if (put_user(len, oldlenp))
1311 				return -EFAULT;
1312 		}
1313 	}
1314 
1315 	*valp = new;
1316 
1317 	cnf = table->extra1;
1318 	net = table->extra2;
1319 	i = (int *)table->data - cnf->data;
1320 
1321 	set_bit(i, cnf->state);
1322 
1323 	if (cnf == net->ipv4.devconf_dflt)
1324 		devinet_copy_dflt_conf(net, i);
1325 
1326 	return 1;
1327 }
1328 
1329 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1330 				  struct file* filp, void __user *buffer,
1331 				  size_t *lenp, loff_t *ppos)
1332 {
1333 	int *valp = ctl->data;
1334 	int val = *valp;
1335 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1336 
1337 	if (write && *valp != val) {
1338 		struct net *net = ctl->extra2;
1339 
1340 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1341 			rtnl_lock();
1342 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1343 				inet_forward_change(net);
1344 			} else if (*valp) {
1345 				struct ipv4_devconf *cnf = ctl->extra1;
1346 				struct in_device *idev =
1347 					container_of(cnf, struct in_device, cnf);
1348 				dev_disable_lro(idev->dev);
1349 			}
1350 			rtnl_unlock();
1351 			rt_cache_flush(net, 0);
1352 		}
1353 	}
1354 
1355 	return ret;
1356 }
1357 
1358 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1359 			 struct file* filp, void __user *buffer,
1360 			 size_t *lenp, loff_t *ppos)
1361 {
1362 	int *valp = ctl->data;
1363 	int val = *valp;
1364 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1365 	struct net *net = ctl->extra2;
1366 
1367 	if (write && *valp != val)
1368 		rt_cache_flush(net, 0);
1369 
1370 	return ret;
1371 }
1372 
1373 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1374 				  void __user *oldval, size_t __user *oldlenp,
1375 				  void __user *newval, size_t newlen)
1376 {
1377 	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1378 				      newval, newlen);
1379 	struct net *net = table->extra2;
1380 
1381 	if (ret == 1)
1382 		rt_cache_flush(net, 0);
1383 
1384 	return ret;
1385 }
1386 
1387 
1388 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1389 	{ \
1390 		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1391 		.procname	= name, \
1392 		.data		= ipv4_devconf.data + \
1393 				  NET_IPV4_CONF_ ## attr - 1, \
1394 		.maxlen		= sizeof(int), \
1395 		.mode		= mval, \
1396 		.proc_handler	= proc, \
1397 		.strategy	= sysctl, \
1398 		.extra1		= &ipv4_devconf, \
1399 	}
1400 
1401 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1402 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1403 			     devinet_conf_sysctl)
1404 
1405 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1406 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1407 			     devinet_conf_sysctl)
1408 
1409 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1410 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1411 
1412 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1413 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1414 				     ipv4_doint_and_flush_strategy)
1415 
1416 static struct devinet_sysctl_table {
1417 	struct ctl_table_header *sysctl_header;
1418 	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1419 	char *dev_name;
1420 } devinet_sysctl = {
1421 	.devinet_vars = {
1422 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1423 					     devinet_sysctl_forward,
1424 					     devinet_conf_sysctl),
1425 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1426 
1427 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1428 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1429 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1430 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1431 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1432 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1433 					"accept_source_route"),
1434 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1435 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1436 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1437 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1438 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1439 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1440 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1441 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1442 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1443 
1444 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1445 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1446 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1447 					      "force_igmp_version"),
1448 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1449 					      "promote_secondaries"),
1450 	},
1451 };
1452 
1453 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1454 		int ctl_name, struct ipv4_devconf *p)
1455 {
1456 	int i;
1457 	struct devinet_sysctl_table *t;
1458 
1459 #define DEVINET_CTL_PATH_DEV	3
1460 
1461 	struct ctl_path devinet_ctl_path[] = {
1462 		{ .procname = "net", .ctl_name = CTL_NET, },
1463 		{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1464 		{ .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1465 		{ /* to be set */ },
1466 		{ },
1467 	};
1468 
1469 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1470 	if (!t)
1471 		goto out;
1472 
1473 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1474 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1475 		t->devinet_vars[i].extra1 = p;
1476 		t->devinet_vars[i].extra2 = net;
1477 	}
1478 
1479 	/*
1480 	 * Make a copy of dev_name, because '.procname' is regarded as const
1481 	 * by sysctl and we wouldn't want anyone to change it under our feet
1482 	 * (see SIOCSIFNAME).
1483 	 */
1484 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1485 	if (!t->dev_name)
1486 		goto free;
1487 
1488 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1489 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1490 
1491 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1492 			t->devinet_vars);
1493 	if (!t->sysctl_header)
1494 		goto free_procname;
1495 
1496 	p->sysctl = t;
1497 	return 0;
1498 
1499 free_procname:
1500 	kfree(t->dev_name);
1501 free:
1502 	kfree(t);
1503 out:
1504 	return -ENOBUFS;
1505 }
1506 
1507 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1508 {
1509 	struct devinet_sysctl_table *t = cnf->sysctl;
1510 
1511 	if (t == NULL)
1512 		return;
1513 
1514 	cnf->sysctl = NULL;
1515 	unregister_sysctl_table(t->sysctl_header);
1516 	kfree(t->dev_name);
1517 	kfree(t);
1518 }
1519 
1520 static void devinet_sysctl_register(struct in_device *idev)
1521 {
1522 	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1523 			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1524 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1525 			idev->dev->ifindex, &idev->cnf);
1526 }
1527 
1528 static void devinet_sysctl_unregister(struct in_device *idev)
1529 {
1530 	__devinet_sysctl_unregister(&idev->cnf);
1531 	neigh_sysctl_unregister(idev->arp_parms);
1532 }
1533 
1534 static struct ctl_table ctl_forward_entry[] = {
1535 	{
1536 		.ctl_name	= NET_IPV4_FORWARD,
1537 		.procname	= "ip_forward",
1538 		.data		= &ipv4_devconf.data[
1539 					NET_IPV4_CONF_FORWARDING - 1],
1540 		.maxlen		= sizeof(int),
1541 		.mode		= 0644,
1542 		.proc_handler	= devinet_sysctl_forward,
1543 		.strategy	= devinet_conf_sysctl,
1544 		.extra1		= &ipv4_devconf,
1545 		.extra2		= &init_net,
1546 	},
1547 	{ },
1548 };
1549 
1550 static __net_initdata struct ctl_path net_ipv4_path[] = {
1551 	{ .procname = "net", .ctl_name = CTL_NET, },
1552 	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1553 	{ },
1554 };
1555 #endif
1556 
1557 static __net_init int devinet_init_net(struct net *net)
1558 {
1559 	int err;
1560 	struct ipv4_devconf *all, *dflt;
1561 #ifdef CONFIG_SYSCTL
1562 	struct ctl_table *tbl = ctl_forward_entry;
1563 	struct ctl_table_header *forw_hdr;
1564 #endif
1565 
1566 	err = -ENOMEM;
1567 	all = &ipv4_devconf;
1568 	dflt = &ipv4_devconf_dflt;
1569 
1570 	if (net != &init_net) {
1571 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1572 		if (all == NULL)
1573 			goto err_alloc_all;
1574 
1575 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1576 		if (dflt == NULL)
1577 			goto err_alloc_dflt;
1578 
1579 #ifdef CONFIG_SYSCTL
1580 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1581 		if (tbl == NULL)
1582 			goto err_alloc_ctl;
1583 
1584 		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1585 		tbl[0].extra1 = all;
1586 		tbl[0].extra2 = net;
1587 #endif
1588 	}
1589 
1590 #ifdef CONFIG_SYSCTL
1591 	err = __devinet_sysctl_register(net, "all",
1592 			NET_PROTO_CONF_ALL, all);
1593 	if (err < 0)
1594 		goto err_reg_all;
1595 
1596 	err = __devinet_sysctl_register(net, "default",
1597 			NET_PROTO_CONF_DEFAULT, dflt);
1598 	if (err < 0)
1599 		goto err_reg_dflt;
1600 
1601 	err = -ENOMEM;
1602 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1603 	if (forw_hdr == NULL)
1604 		goto err_reg_ctl;
1605 	net->ipv4.forw_hdr = forw_hdr;
1606 #endif
1607 
1608 	net->ipv4.devconf_all = all;
1609 	net->ipv4.devconf_dflt = dflt;
1610 	return 0;
1611 
1612 #ifdef CONFIG_SYSCTL
1613 err_reg_ctl:
1614 	__devinet_sysctl_unregister(dflt);
1615 err_reg_dflt:
1616 	__devinet_sysctl_unregister(all);
1617 err_reg_all:
1618 	if (tbl != ctl_forward_entry)
1619 		kfree(tbl);
1620 err_alloc_ctl:
1621 #endif
1622 	if (dflt != &ipv4_devconf_dflt)
1623 		kfree(dflt);
1624 err_alloc_dflt:
1625 	if (all != &ipv4_devconf)
1626 		kfree(all);
1627 err_alloc_all:
1628 	return err;
1629 }
1630 
1631 static __net_exit void devinet_exit_net(struct net *net)
1632 {
1633 #ifdef CONFIG_SYSCTL
1634 	struct ctl_table *tbl;
1635 
1636 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1637 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1638 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1639 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1640 	kfree(tbl);
1641 #endif
1642 	kfree(net->ipv4.devconf_dflt);
1643 	kfree(net->ipv4.devconf_all);
1644 }
1645 
1646 static __net_initdata struct pernet_operations devinet_ops = {
1647 	.init = devinet_init_net,
1648 	.exit = devinet_exit_net,
1649 };
1650 
1651 void __init devinet_init(void)
1652 {
1653 	register_pernet_subsys(&devinet_ops);
1654 
1655 	register_gifconf(PF_INET, inet_gifconf);
1656 	register_netdevice_notifier(&ip_netdev_notifier);
1657 
1658 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1659 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1660 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1661 }
1662 
1663 EXPORT_SYMBOL(in_dev_finish_destroy);
1664 EXPORT_SYMBOL(inet_select_addr);
1665 EXPORT_SYMBOL(inetdev_by_index);
1666 EXPORT_SYMBOL(register_inetaddr_notifier);
1667 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1668