xref: /openbmc/linux/net/ipv4/devinet.c (revision a09d2831)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #ifdef CONFIG_SYSCTL
54 #include <linux/sysctl.h>
55 #endif
56 #include <linux/kmod.h>
57 
58 #include <net/arp.h>
59 #include <net/ip.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
64 
65 static struct ipv4_devconf ipv4_devconf = {
66 	.data = {
67 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71 	},
72 };
73 
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75 	.data = {
76 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80 		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81 	},
82 };
83 
84 #define IPV4_DEVCONF_DFLT(net, attr) \
85 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86 
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88 	[IFA_LOCAL]     	= { .type = NLA_U32 },
89 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
90 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
91 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92 };
93 
94 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95 
96 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98 			 int destroy);
99 #ifdef CONFIG_SYSCTL
100 static void devinet_sysctl_register(struct in_device *idev);
101 static void devinet_sysctl_unregister(struct in_device *idev);
102 #else
103 static inline void devinet_sysctl_register(struct in_device *idev)
104 {
105 }
106 static inline void devinet_sysctl_unregister(struct in_device *idev)
107 {
108 }
109 #endif
110 
111 /* Locks all the inet devices. */
112 
113 static struct in_ifaddr *inet_alloc_ifa(void)
114 {
115 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
116 }
117 
118 static void inet_rcu_free_ifa(struct rcu_head *head)
119 {
120 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
121 	if (ifa->ifa_dev)
122 		in_dev_put(ifa->ifa_dev);
123 	kfree(ifa);
124 }
125 
126 static inline void inet_free_ifa(struct in_ifaddr *ifa)
127 {
128 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
129 }
130 
131 void in_dev_finish_destroy(struct in_device *idev)
132 {
133 	struct net_device *dev = idev->dev;
134 
135 	WARN_ON(idev->ifa_list);
136 	WARN_ON(idev->mc_list);
137 #ifdef NET_REFCNT_DEBUG
138 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
139 	       idev, dev ? dev->name : "NIL");
140 #endif
141 	dev_put(dev);
142 	if (!idev->dead)
143 		pr_err("Freeing alive in_device %p\n", idev);
144 	else
145 		kfree(idev);
146 }
147 EXPORT_SYMBOL(in_dev_finish_destroy);
148 
149 static struct in_device *inetdev_init(struct net_device *dev)
150 {
151 	struct in_device *in_dev;
152 
153 	ASSERT_RTNL();
154 
155 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
156 	if (!in_dev)
157 		goto out;
158 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
159 			sizeof(in_dev->cnf));
160 	in_dev->cnf.sysctl = NULL;
161 	in_dev->dev = dev;
162 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
163 	if (!in_dev->arp_parms)
164 		goto out_kfree;
165 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
166 		dev_disable_lro(dev);
167 	/* Reference in_dev->dev */
168 	dev_hold(dev);
169 	/* Account for reference dev->ip_ptr (below) */
170 	in_dev_hold(in_dev);
171 
172 	devinet_sysctl_register(in_dev);
173 	ip_mc_init_dev(in_dev);
174 	if (dev->flags & IFF_UP)
175 		ip_mc_up(in_dev);
176 
177 	/* we can receive as soon as ip_ptr is set -- do this last */
178 	rcu_assign_pointer(dev->ip_ptr, in_dev);
179 out:
180 	return in_dev;
181 out_kfree:
182 	kfree(in_dev);
183 	in_dev = NULL;
184 	goto out;
185 }
186 
187 static void in_dev_rcu_put(struct rcu_head *head)
188 {
189 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
190 	in_dev_put(idev);
191 }
192 
193 static void inetdev_destroy(struct in_device *in_dev)
194 {
195 	struct in_ifaddr *ifa;
196 	struct net_device *dev;
197 
198 	ASSERT_RTNL();
199 
200 	dev = in_dev->dev;
201 
202 	in_dev->dead = 1;
203 
204 	ip_mc_destroy_dev(in_dev);
205 
206 	while ((ifa = in_dev->ifa_list) != NULL) {
207 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
208 		inet_free_ifa(ifa);
209 	}
210 
211 	dev->ip_ptr = NULL;
212 
213 	devinet_sysctl_unregister(in_dev);
214 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
215 	arp_ifdown(dev);
216 
217 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
218 }
219 
220 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
221 {
222 	rcu_read_lock();
223 	for_primary_ifa(in_dev) {
224 		if (inet_ifa_match(a, ifa)) {
225 			if (!b || inet_ifa_match(b, ifa)) {
226 				rcu_read_unlock();
227 				return 1;
228 			}
229 		}
230 	} endfor_ifa(in_dev);
231 	rcu_read_unlock();
232 	return 0;
233 }
234 
235 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
236 			 int destroy, struct nlmsghdr *nlh, u32 pid)
237 {
238 	struct in_ifaddr *promote = NULL;
239 	struct in_ifaddr *ifa, *ifa1 = *ifap;
240 	struct in_ifaddr *last_prim = in_dev->ifa_list;
241 	struct in_ifaddr *prev_prom = NULL;
242 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
243 
244 	ASSERT_RTNL();
245 
246 	/* 1. Deleting primary ifaddr forces deletion all secondaries
247 	 * unless alias promotion is set
248 	 **/
249 
250 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
251 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
252 
253 		while ((ifa = *ifap1) != NULL) {
254 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
255 			    ifa1->ifa_scope <= ifa->ifa_scope)
256 				last_prim = ifa;
257 
258 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
259 			    ifa1->ifa_mask != ifa->ifa_mask ||
260 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
261 				ifap1 = &ifa->ifa_next;
262 				prev_prom = ifa;
263 				continue;
264 			}
265 
266 			if (!do_promote) {
267 				*ifap1 = ifa->ifa_next;
268 
269 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
270 				blocking_notifier_call_chain(&inetaddr_chain,
271 						NETDEV_DOWN, ifa);
272 				inet_free_ifa(ifa);
273 			} else {
274 				promote = ifa;
275 				break;
276 			}
277 		}
278 	}
279 
280 	/* 2. Unlink it */
281 
282 	*ifap = ifa1->ifa_next;
283 
284 	/* 3. Announce address deletion */
285 
286 	/* Send message first, then call notifier.
287 	   At first sight, FIB update triggered by notifier
288 	   will refer to already deleted ifaddr, that could confuse
289 	   netlink listeners. It is not true: look, gated sees
290 	   that route deleted and if it still thinks that ifaddr
291 	   is valid, it will try to restore deleted routes... Grr.
292 	   So that, this order is correct.
293 	 */
294 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
295 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
296 
297 	if (promote) {
298 
299 		if (prev_prom) {
300 			prev_prom->ifa_next = promote->ifa_next;
301 			promote->ifa_next = last_prim->ifa_next;
302 			last_prim->ifa_next = promote;
303 		}
304 
305 		promote->ifa_flags &= ~IFA_F_SECONDARY;
306 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
307 		blocking_notifier_call_chain(&inetaddr_chain,
308 				NETDEV_UP, promote);
309 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
310 			if (ifa1->ifa_mask != ifa->ifa_mask ||
311 			    !inet_ifa_match(ifa1->ifa_address, ifa))
312 					continue;
313 			fib_add_ifaddr(ifa);
314 		}
315 
316 	}
317 	if (destroy)
318 		inet_free_ifa(ifa1);
319 }
320 
321 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
322 			 int destroy)
323 {
324 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
325 }
326 
327 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
328 			     u32 pid)
329 {
330 	struct in_device *in_dev = ifa->ifa_dev;
331 	struct in_ifaddr *ifa1, **ifap, **last_primary;
332 
333 	ASSERT_RTNL();
334 
335 	if (!ifa->ifa_local) {
336 		inet_free_ifa(ifa);
337 		return 0;
338 	}
339 
340 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
341 	last_primary = &in_dev->ifa_list;
342 
343 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
344 	     ifap = &ifa1->ifa_next) {
345 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
346 		    ifa->ifa_scope <= ifa1->ifa_scope)
347 			last_primary = &ifa1->ifa_next;
348 		if (ifa1->ifa_mask == ifa->ifa_mask &&
349 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
350 			if (ifa1->ifa_local == ifa->ifa_local) {
351 				inet_free_ifa(ifa);
352 				return -EEXIST;
353 			}
354 			if (ifa1->ifa_scope != ifa->ifa_scope) {
355 				inet_free_ifa(ifa);
356 				return -EINVAL;
357 			}
358 			ifa->ifa_flags |= IFA_F_SECONDARY;
359 		}
360 	}
361 
362 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
363 		net_srandom(ifa->ifa_local);
364 		ifap = last_primary;
365 	}
366 
367 	ifa->ifa_next = *ifap;
368 	*ifap = ifa;
369 
370 	/* Send message first, then call notifier.
371 	   Notifier will trigger FIB update, so that
372 	   listeners of netlink will know about new ifaddr */
373 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
374 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
375 
376 	return 0;
377 }
378 
379 static int inet_insert_ifa(struct in_ifaddr *ifa)
380 {
381 	return __inet_insert_ifa(ifa, NULL, 0);
382 }
383 
384 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
385 {
386 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
387 
388 	ASSERT_RTNL();
389 
390 	if (!in_dev) {
391 		inet_free_ifa(ifa);
392 		return -ENOBUFS;
393 	}
394 	ipv4_devconf_setall(in_dev);
395 	if (ifa->ifa_dev != in_dev) {
396 		WARN_ON(ifa->ifa_dev);
397 		in_dev_hold(in_dev);
398 		ifa->ifa_dev = in_dev;
399 	}
400 	if (ipv4_is_loopback(ifa->ifa_local))
401 		ifa->ifa_scope = RT_SCOPE_HOST;
402 	return inet_insert_ifa(ifa);
403 }
404 
405 struct in_device *inetdev_by_index(struct net *net, int ifindex)
406 {
407 	struct net_device *dev;
408 	struct in_device *in_dev = NULL;
409 
410 	rcu_read_lock();
411 	dev = dev_get_by_index_rcu(net, ifindex);
412 	if (dev)
413 		in_dev = in_dev_get(dev);
414 	rcu_read_unlock();
415 	return in_dev;
416 }
417 EXPORT_SYMBOL(inetdev_by_index);
418 
419 /* Called only from RTNL semaphored context. No locks. */
420 
421 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
422 				    __be32 mask)
423 {
424 	ASSERT_RTNL();
425 
426 	for_primary_ifa(in_dev) {
427 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
428 			return ifa;
429 	} endfor_ifa(in_dev);
430 	return NULL;
431 }
432 
433 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
434 {
435 	struct net *net = sock_net(skb->sk);
436 	struct nlattr *tb[IFA_MAX+1];
437 	struct in_device *in_dev;
438 	struct ifaddrmsg *ifm;
439 	struct in_ifaddr *ifa, **ifap;
440 	int err = -EINVAL;
441 
442 	ASSERT_RTNL();
443 
444 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
445 	if (err < 0)
446 		goto errout;
447 
448 	ifm = nlmsg_data(nlh);
449 	in_dev = inetdev_by_index(net, ifm->ifa_index);
450 	if (in_dev == NULL) {
451 		err = -ENODEV;
452 		goto errout;
453 	}
454 
455 	__in_dev_put(in_dev);
456 
457 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
458 	     ifap = &ifa->ifa_next) {
459 		if (tb[IFA_LOCAL] &&
460 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
461 			continue;
462 
463 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
464 			continue;
465 
466 		if (tb[IFA_ADDRESS] &&
467 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
468 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
469 			continue;
470 
471 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
472 		return 0;
473 	}
474 
475 	err = -EADDRNOTAVAIL;
476 errout:
477 	return err;
478 }
479 
480 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
481 {
482 	struct nlattr *tb[IFA_MAX+1];
483 	struct in_ifaddr *ifa;
484 	struct ifaddrmsg *ifm;
485 	struct net_device *dev;
486 	struct in_device *in_dev;
487 	int err;
488 
489 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
490 	if (err < 0)
491 		goto errout;
492 
493 	ifm = nlmsg_data(nlh);
494 	err = -EINVAL;
495 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
496 		goto errout;
497 
498 	dev = __dev_get_by_index(net, ifm->ifa_index);
499 	err = -ENODEV;
500 	if (dev == NULL)
501 		goto errout;
502 
503 	in_dev = __in_dev_get_rtnl(dev);
504 	err = -ENOBUFS;
505 	if (in_dev == NULL)
506 		goto errout;
507 
508 	ifa = inet_alloc_ifa();
509 	if (ifa == NULL)
510 		/*
511 		 * A potential indev allocation can be left alive, it stays
512 		 * assigned to its device and is destroy with it.
513 		 */
514 		goto errout;
515 
516 	ipv4_devconf_setall(in_dev);
517 	in_dev_hold(in_dev);
518 
519 	if (tb[IFA_ADDRESS] == NULL)
520 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
521 
522 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
523 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
524 	ifa->ifa_flags = ifm->ifa_flags;
525 	ifa->ifa_scope = ifm->ifa_scope;
526 	ifa->ifa_dev = in_dev;
527 
528 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
529 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
530 
531 	if (tb[IFA_BROADCAST])
532 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
533 
534 	if (tb[IFA_LABEL])
535 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
536 	else
537 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
538 
539 	return ifa;
540 
541 errout:
542 	return ERR_PTR(err);
543 }
544 
545 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
546 {
547 	struct net *net = sock_net(skb->sk);
548 	struct in_ifaddr *ifa;
549 
550 	ASSERT_RTNL();
551 
552 	ifa = rtm_to_ifaddr(net, nlh);
553 	if (IS_ERR(ifa))
554 		return PTR_ERR(ifa);
555 
556 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
557 }
558 
559 /*
560  *	Determine a default network mask, based on the IP address.
561  */
562 
563 static inline int inet_abc_len(__be32 addr)
564 {
565 	int rc = -1;	/* Something else, probably a multicast. */
566 
567 	if (ipv4_is_zeronet(addr))
568 		rc = 0;
569 	else {
570 		__u32 haddr = ntohl(addr);
571 
572 		if (IN_CLASSA(haddr))
573 			rc = 8;
574 		else if (IN_CLASSB(haddr))
575 			rc = 16;
576 		else if (IN_CLASSC(haddr))
577 			rc = 24;
578 	}
579 
580 	return rc;
581 }
582 
583 
584 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
585 {
586 	struct ifreq ifr;
587 	struct sockaddr_in sin_orig;
588 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
589 	struct in_device *in_dev;
590 	struct in_ifaddr **ifap = NULL;
591 	struct in_ifaddr *ifa = NULL;
592 	struct net_device *dev;
593 	char *colon;
594 	int ret = -EFAULT;
595 	int tryaddrmatch = 0;
596 
597 	/*
598 	 *	Fetch the caller's info block into kernel space
599 	 */
600 
601 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
602 		goto out;
603 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
604 
605 	/* save original address for comparison */
606 	memcpy(&sin_orig, sin, sizeof(*sin));
607 
608 	colon = strchr(ifr.ifr_name, ':');
609 	if (colon)
610 		*colon = 0;
611 
612 	dev_load(net, ifr.ifr_name);
613 
614 	switch (cmd) {
615 	case SIOCGIFADDR:	/* Get interface address */
616 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
617 	case SIOCGIFDSTADDR:	/* Get the destination address */
618 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
619 		/* Note that these ioctls will not sleep,
620 		   so that we do not impose a lock.
621 		   One day we will be forced to put shlock here (I mean SMP)
622 		 */
623 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
624 		memset(sin, 0, sizeof(*sin));
625 		sin->sin_family = AF_INET;
626 		break;
627 
628 	case SIOCSIFFLAGS:
629 		ret = -EACCES;
630 		if (!capable(CAP_NET_ADMIN))
631 			goto out;
632 		break;
633 	case SIOCSIFADDR:	/* Set interface address (and family) */
634 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
635 	case SIOCSIFDSTADDR:	/* Set the destination address */
636 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
637 		ret = -EACCES;
638 		if (!capable(CAP_NET_ADMIN))
639 			goto out;
640 		ret = -EINVAL;
641 		if (sin->sin_family != AF_INET)
642 			goto out;
643 		break;
644 	default:
645 		ret = -EINVAL;
646 		goto out;
647 	}
648 
649 	rtnl_lock();
650 
651 	ret = -ENODEV;
652 	dev = __dev_get_by_name(net, ifr.ifr_name);
653 	if (!dev)
654 		goto done;
655 
656 	if (colon)
657 		*colon = ':';
658 
659 	in_dev = __in_dev_get_rtnl(dev);
660 	if (in_dev) {
661 		if (tryaddrmatch) {
662 			/* Matthias Andree */
663 			/* compare label and address (4.4BSD style) */
664 			/* note: we only do this for a limited set of ioctls
665 			   and only if the original address family was AF_INET.
666 			   This is checked above. */
667 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
668 			     ifap = &ifa->ifa_next) {
669 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
670 				    sin_orig.sin_addr.s_addr ==
671 							ifa->ifa_address) {
672 					break; /* found */
673 				}
674 			}
675 		}
676 		/* we didn't get a match, maybe the application is
677 		   4.3BSD-style and passed in junk so we fall back to
678 		   comparing just the label */
679 		if (!ifa) {
680 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
681 			     ifap = &ifa->ifa_next)
682 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
683 					break;
684 		}
685 	}
686 
687 	ret = -EADDRNOTAVAIL;
688 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
689 		goto done;
690 
691 	switch (cmd) {
692 	case SIOCGIFADDR:	/* Get interface address */
693 		sin->sin_addr.s_addr = ifa->ifa_local;
694 		goto rarok;
695 
696 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
697 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
698 		goto rarok;
699 
700 	case SIOCGIFDSTADDR:	/* Get the destination address */
701 		sin->sin_addr.s_addr = ifa->ifa_address;
702 		goto rarok;
703 
704 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
705 		sin->sin_addr.s_addr = ifa->ifa_mask;
706 		goto rarok;
707 
708 	case SIOCSIFFLAGS:
709 		if (colon) {
710 			ret = -EADDRNOTAVAIL;
711 			if (!ifa)
712 				break;
713 			ret = 0;
714 			if (!(ifr.ifr_flags & IFF_UP))
715 				inet_del_ifa(in_dev, ifap, 1);
716 			break;
717 		}
718 		ret = dev_change_flags(dev, ifr.ifr_flags);
719 		break;
720 
721 	case SIOCSIFADDR:	/* Set interface address (and family) */
722 		ret = -EINVAL;
723 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
724 			break;
725 
726 		if (!ifa) {
727 			ret = -ENOBUFS;
728 			ifa = inet_alloc_ifa();
729 			if (!ifa)
730 				break;
731 			if (colon)
732 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
733 			else
734 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
735 		} else {
736 			ret = 0;
737 			if (ifa->ifa_local == sin->sin_addr.s_addr)
738 				break;
739 			inet_del_ifa(in_dev, ifap, 0);
740 			ifa->ifa_broadcast = 0;
741 			ifa->ifa_scope = 0;
742 		}
743 
744 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
745 
746 		if (!(dev->flags & IFF_POINTOPOINT)) {
747 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
748 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
749 			if ((dev->flags & IFF_BROADCAST) &&
750 			    ifa->ifa_prefixlen < 31)
751 				ifa->ifa_broadcast = ifa->ifa_address |
752 						     ~ifa->ifa_mask;
753 		} else {
754 			ifa->ifa_prefixlen = 32;
755 			ifa->ifa_mask = inet_make_mask(32);
756 		}
757 		ret = inet_set_ifa(dev, ifa);
758 		break;
759 
760 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
761 		ret = 0;
762 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
763 			inet_del_ifa(in_dev, ifap, 0);
764 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
765 			inet_insert_ifa(ifa);
766 		}
767 		break;
768 
769 	case SIOCSIFDSTADDR:	/* Set the destination address */
770 		ret = 0;
771 		if (ifa->ifa_address == sin->sin_addr.s_addr)
772 			break;
773 		ret = -EINVAL;
774 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
775 			break;
776 		ret = 0;
777 		inet_del_ifa(in_dev, ifap, 0);
778 		ifa->ifa_address = sin->sin_addr.s_addr;
779 		inet_insert_ifa(ifa);
780 		break;
781 
782 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
783 
784 		/*
785 		 *	The mask we set must be legal.
786 		 */
787 		ret = -EINVAL;
788 		if (bad_mask(sin->sin_addr.s_addr, 0))
789 			break;
790 		ret = 0;
791 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
792 			__be32 old_mask = ifa->ifa_mask;
793 			inet_del_ifa(in_dev, ifap, 0);
794 			ifa->ifa_mask = sin->sin_addr.s_addr;
795 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
796 
797 			/* See if current broadcast address matches
798 			 * with current netmask, then recalculate
799 			 * the broadcast address. Otherwise it's a
800 			 * funny address, so don't touch it since
801 			 * the user seems to know what (s)he's doing...
802 			 */
803 			if ((dev->flags & IFF_BROADCAST) &&
804 			    (ifa->ifa_prefixlen < 31) &&
805 			    (ifa->ifa_broadcast ==
806 			     (ifa->ifa_local|~old_mask))) {
807 				ifa->ifa_broadcast = (ifa->ifa_local |
808 						      ~sin->sin_addr.s_addr);
809 			}
810 			inet_insert_ifa(ifa);
811 		}
812 		break;
813 	}
814 done:
815 	rtnl_unlock();
816 out:
817 	return ret;
818 rarok:
819 	rtnl_unlock();
820 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
821 	goto out;
822 }
823 
824 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
825 {
826 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
827 	struct in_ifaddr *ifa;
828 	struct ifreq ifr;
829 	int done = 0;
830 
831 	if (!in_dev)
832 		goto out;
833 
834 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
835 		if (!buf) {
836 			done += sizeof(ifr);
837 			continue;
838 		}
839 		if (len < (int) sizeof(ifr))
840 			break;
841 		memset(&ifr, 0, sizeof(struct ifreq));
842 		if (ifa->ifa_label)
843 			strcpy(ifr.ifr_name, ifa->ifa_label);
844 		else
845 			strcpy(ifr.ifr_name, dev->name);
846 
847 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
848 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
849 								ifa->ifa_local;
850 
851 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
852 			done = -EFAULT;
853 			break;
854 		}
855 		buf  += sizeof(struct ifreq);
856 		len  -= sizeof(struct ifreq);
857 		done += sizeof(struct ifreq);
858 	}
859 out:
860 	return done;
861 }
862 
863 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
864 {
865 	__be32 addr = 0;
866 	struct in_device *in_dev;
867 	struct net *net = dev_net(dev);
868 
869 	rcu_read_lock();
870 	in_dev = __in_dev_get_rcu(dev);
871 	if (!in_dev)
872 		goto no_in_dev;
873 
874 	for_primary_ifa(in_dev) {
875 		if (ifa->ifa_scope > scope)
876 			continue;
877 		if (!dst || inet_ifa_match(dst, ifa)) {
878 			addr = ifa->ifa_local;
879 			break;
880 		}
881 		if (!addr)
882 			addr = ifa->ifa_local;
883 	} endfor_ifa(in_dev);
884 
885 	if (addr)
886 		goto out_unlock;
887 no_in_dev:
888 
889 	/* Not loopback addresses on loopback should be preferred
890 	   in this case. It is importnat that lo is the first interface
891 	   in dev_base list.
892 	 */
893 	for_each_netdev_rcu(net, dev) {
894 		in_dev = __in_dev_get_rcu(dev);
895 		if (!in_dev)
896 			continue;
897 
898 		for_primary_ifa(in_dev) {
899 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
900 			    ifa->ifa_scope <= scope) {
901 				addr = ifa->ifa_local;
902 				goto out_unlock;
903 			}
904 		} endfor_ifa(in_dev);
905 	}
906 out_unlock:
907 	rcu_read_unlock();
908 	return addr;
909 }
910 EXPORT_SYMBOL(inet_select_addr);
911 
912 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
913 			      __be32 local, int scope)
914 {
915 	int same = 0;
916 	__be32 addr = 0;
917 
918 	for_ifa(in_dev) {
919 		if (!addr &&
920 		    (local == ifa->ifa_local || !local) &&
921 		    ifa->ifa_scope <= scope) {
922 			addr = ifa->ifa_local;
923 			if (same)
924 				break;
925 		}
926 		if (!same) {
927 			same = (!local || inet_ifa_match(local, ifa)) &&
928 				(!dst || inet_ifa_match(dst, ifa));
929 			if (same && addr) {
930 				if (local || !dst)
931 					break;
932 				/* Is the selected addr into dst subnet? */
933 				if (inet_ifa_match(addr, ifa))
934 					break;
935 				/* No, then can we use new local src? */
936 				if (ifa->ifa_scope <= scope) {
937 					addr = ifa->ifa_local;
938 					break;
939 				}
940 				/* search for large dst subnet for addr */
941 				same = 0;
942 			}
943 		}
944 	} endfor_ifa(in_dev);
945 
946 	return same ? addr : 0;
947 }
948 
949 /*
950  * Confirm that local IP address exists using wildcards:
951  * - in_dev: only on this interface, 0=any interface
952  * - dst: only in the same subnet as dst, 0=any dst
953  * - local: address, 0=autoselect the local address
954  * - scope: maximum allowed scope value for the local address
955  */
956 __be32 inet_confirm_addr(struct in_device *in_dev,
957 			 __be32 dst, __be32 local, int scope)
958 {
959 	__be32 addr = 0;
960 	struct net_device *dev;
961 	struct net *net;
962 
963 	if (scope != RT_SCOPE_LINK)
964 		return confirm_addr_indev(in_dev, dst, local, scope);
965 
966 	net = dev_net(in_dev->dev);
967 	rcu_read_lock();
968 	for_each_netdev_rcu(net, dev) {
969 		in_dev = __in_dev_get_rcu(dev);
970 		if (in_dev) {
971 			addr = confirm_addr_indev(in_dev, dst, local, scope);
972 			if (addr)
973 				break;
974 		}
975 	}
976 	rcu_read_unlock();
977 
978 	return addr;
979 }
980 
981 /*
982  *	Device notifier
983  */
984 
985 int register_inetaddr_notifier(struct notifier_block *nb)
986 {
987 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
988 }
989 EXPORT_SYMBOL(register_inetaddr_notifier);
990 
991 int unregister_inetaddr_notifier(struct notifier_block *nb)
992 {
993 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
994 }
995 EXPORT_SYMBOL(unregister_inetaddr_notifier);
996 
997 /* Rename ifa_labels for a device name change. Make some effort to preserve
998  * existing alias numbering and to create unique labels if possible.
999 */
1000 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1001 {
1002 	struct in_ifaddr *ifa;
1003 	int named = 0;
1004 
1005 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1006 		char old[IFNAMSIZ], *dot;
1007 
1008 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1009 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1010 		if (named++ == 0)
1011 			goto skip;
1012 		dot = strchr(old, ':');
1013 		if (dot == NULL) {
1014 			sprintf(old, ":%d", named);
1015 			dot = old;
1016 		}
1017 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1018 			strcat(ifa->ifa_label, dot);
1019 		else
1020 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1021 skip:
1022 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1023 	}
1024 }
1025 
1026 static inline bool inetdev_valid_mtu(unsigned mtu)
1027 {
1028 	return mtu >= 68;
1029 }
1030 
1031 /* Called only under RTNL semaphore */
1032 
1033 static int inetdev_event(struct notifier_block *this, unsigned long event,
1034 			 void *ptr)
1035 {
1036 	struct net_device *dev = ptr;
1037 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1038 
1039 	ASSERT_RTNL();
1040 
1041 	if (!in_dev) {
1042 		if (event == NETDEV_REGISTER) {
1043 			in_dev = inetdev_init(dev);
1044 			if (!in_dev)
1045 				return notifier_from_errno(-ENOMEM);
1046 			if (dev->flags & IFF_LOOPBACK) {
1047 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1048 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1049 			}
1050 		} else if (event == NETDEV_CHANGEMTU) {
1051 			/* Re-enabling IP */
1052 			if (inetdev_valid_mtu(dev->mtu))
1053 				in_dev = inetdev_init(dev);
1054 		}
1055 		goto out;
1056 	}
1057 
1058 	switch (event) {
1059 	case NETDEV_REGISTER:
1060 		printk(KERN_DEBUG "inetdev_event: bug\n");
1061 		dev->ip_ptr = NULL;
1062 		break;
1063 	case NETDEV_UP:
1064 		if (!inetdev_valid_mtu(dev->mtu))
1065 			break;
1066 		if (dev->flags & IFF_LOOPBACK) {
1067 			struct in_ifaddr *ifa = inet_alloc_ifa();
1068 
1069 			if (ifa) {
1070 				ifa->ifa_local =
1071 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1072 				ifa->ifa_prefixlen = 8;
1073 				ifa->ifa_mask = inet_make_mask(8);
1074 				in_dev_hold(in_dev);
1075 				ifa->ifa_dev = in_dev;
1076 				ifa->ifa_scope = RT_SCOPE_HOST;
1077 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1078 				inet_insert_ifa(ifa);
1079 			}
1080 		}
1081 		ip_mc_up(in_dev);
1082 		/* fall through */
1083 	case NETDEV_CHANGEADDR:
1084 		/* Send gratuitous ARP to notify of link change */
1085 		if (IN_DEV_ARP_NOTIFY(in_dev)) {
1086 			struct in_ifaddr *ifa = in_dev->ifa_list;
1087 
1088 			if (ifa)
1089 				arp_send(ARPOP_REQUEST, ETH_P_ARP,
1090 					 ifa->ifa_address, dev,
1091 					 ifa->ifa_address, NULL,
1092 					 dev->dev_addr, NULL);
1093 		}
1094 		break;
1095 	case NETDEV_DOWN:
1096 		ip_mc_down(in_dev);
1097 		break;
1098 	case NETDEV_BONDING_OLDTYPE:
1099 		ip_mc_unmap(in_dev);
1100 		break;
1101 	case NETDEV_BONDING_NEWTYPE:
1102 		ip_mc_remap(in_dev);
1103 		break;
1104 	case NETDEV_CHANGEMTU:
1105 		if (inetdev_valid_mtu(dev->mtu))
1106 			break;
1107 		/* disable IP when MTU is not enough */
1108 	case NETDEV_UNREGISTER:
1109 		inetdev_destroy(in_dev);
1110 		break;
1111 	case NETDEV_CHANGENAME:
1112 		/* Do not notify about label change, this event is
1113 		 * not interesting to applications using netlink.
1114 		 */
1115 		inetdev_changename(dev, in_dev);
1116 
1117 		devinet_sysctl_unregister(in_dev);
1118 		devinet_sysctl_register(in_dev);
1119 		break;
1120 	}
1121 out:
1122 	return NOTIFY_DONE;
1123 }
1124 
1125 static struct notifier_block ip_netdev_notifier = {
1126 	.notifier_call = inetdev_event,
1127 };
1128 
1129 static inline size_t inet_nlmsg_size(void)
1130 {
1131 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1132 	       + nla_total_size(4) /* IFA_ADDRESS */
1133 	       + nla_total_size(4) /* IFA_LOCAL */
1134 	       + nla_total_size(4) /* IFA_BROADCAST */
1135 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1136 }
1137 
1138 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1139 			    u32 pid, u32 seq, int event, unsigned int flags)
1140 {
1141 	struct ifaddrmsg *ifm;
1142 	struct nlmsghdr  *nlh;
1143 
1144 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1145 	if (nlh == NULL)
1146 		return -EMSGSIZE;
1147 
1148 	ifm = nlmsg_data(nlh);
1149 	ifm->ifa_family = AF_INET;
1150 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1151 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1152 	ifm->ifa_scope = ifa->ifa_scope;
1153 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1154 
1155 	if (ifa->ifa_address)
1156 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1157 
1158 	if (ifa->ifa_local)
1159 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1160 
1161 	if (ifa->ifa_broadcast)
1162 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1163 
1164 	if (ifa->ifa_label[0])
1165 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1166 
1167 	return nlmsg_end(skb, nlh);
1168 
1169 nla_put_failure:
1170 	nlmsg_cancel(skb, nlh);
1171 	return -EMSGSIZE;
1172 }
1173 
1174 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1175 {
1176 	struct net *net = sock_net(skb->sk);
1177 	int h, s_h;
1178 	int idx, s_idx;
1179 	int ip_idx, s_ip_idx;
1180 	struct net_device *dev;
1181 	struct in_device *in_dev;
1182 	struct in_ifaddr *ifa;
1183 	struct hlist_head *head;
1184 	struct hlist_node *node;
1185 
1186 	s_h = cb->args[0];
1187 	s_idx = idx = cb->args[1];
1188 	s_ip_idx = ip_idx = cb->args[2];
1189 
1190 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1191 		idx = 0;
1192 		head = &net->dev_index_head[h];
1193 		rcu_read_lock();
1194 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1195 			if (idx < s_idx)
1196 				goto cont;
1197 			if (idx > s_idx)
1198 				s_ip_idx = 0;
1199 			in_dev = __in_dev_get_rcu(dev);
1200 			if (!in_dev)
1201 				goto cont;
1202 
1203 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1204 			     ifa = ifa->ifa_next, ip_idx++) {
1205 				if (ip_idx < s_ip_idx)
1206 					continue;
1207 				if (inet_fill_ifaddr(skb, ifa,
1208 					     NETLINK_CB(cb->skb).pid,
1209 					     cb->nlh->nlmsg_seq,
1210 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1211 					rcu_read_unlock();
1212 					goto done;
1213 				}
1214 			}
1215 cont:
1216 			idx++;
1217 		}
1218 		rcu_read_unlock();
1219 	}
1220 
1221 done:
1222 	cb->args[0] = h;
1223 	cb->args[1] = idx;
1224 	cb->args[2] = ip_idx;
1225 
1226 	return skb->len;
1227 }
1228 
1229 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1230 		      u32 pid)
1231 {
1232 	struct sk_buff *skb;
1233 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1234 	int err = -ENOBUFS;
1235 	struct net *net;
1236 
1237 	net = dev_net(ifa->ifa_dev->dev);
1238 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1239 	if (skb == NULL)
1240 		goto errout;
1241 
1242 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1243 	if (err < 0) {
1244 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1245 		WARN_ON(err == -EMSGSIZE);
1246 		kfree_skb(skb);
1247 		goto errout;
1248 	}
1249 	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1250 	return;
1251 errout:
1252 	if (err < 0)
1253 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1254 }
1255 
1256 #ifdef CONFIG_SYSCTL
1257 
1258 static void devinet_copy_dflt_conf(struct net *net, int i)
1259 {
1260 	struct net_device *dev;
1261 
1262 	rcu_read_lock();
1263 	for_each_netdev_rcu(net, dev) {
1264 		struct in_device *in_dev;
1265 
1266 		in_dev = __in_dev_get_rcu(dev);
1267 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1268 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1269 	}
1270 	rcu_read_unlock();
1271 }
1272 
1273 /* called with RTNL locked */
1274 static void inet_forward_change(struct net *net)
1275 {
1276 	struct net_device *dev;
1277 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1278 
1279 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1280 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1281 
1282 	for_each_netdev(net, dev) {
1283 		struct in_device *in_dev;
1284 		if (on)
1285 			dev_disable_lro(dev);
1286 		rcu_read_lock();
1287 		in_dev = __in_dev_get_rcu(dev);
1288 		if (in_dev)
1289 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1290 		rcu_read_unlock();
1291 	}
1292 }
1293 
1294 static int devinet_conf_proc(ctl_table *ctl, int write,
1295 			     void __user *buffer,
1296 			     size_t *lenp, loff_t *ppos)
1297 {
1298 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1299 
1300 	if (write) {
1301 		struct ipv4_devconf *cnf = ctl->extra1;
1302 		struct net *net = ctl->extra2;
1303 		int i = (int *)ctl->data - cnf->data;
1304 
1305 		set_bit(i, cnf->state);
1306 
1307 		if (cnf == net->ipv4.devconf_dflt)
1308 			devinet_copy_dflt_conf(net, i);
1309 	}
1310 
1311 	return ret;
1312 }
1313 
1314 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1315 				  void __user *buffer,
1316 				  size_t *lenp, loff_t *ppos)
1317 {
1318 	int *valp = ctl->data;
1319 	int val = *valp;
1320 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1321 
1322 	if (write && *valp != val) {
1323 		struct net *net = ctl->extra2;
1324 
1325 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1326 			if (!rtnl_trylock())
1327 				return restart_syscall();
1328 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1329 				inet_forward_change(net);
1330 			} else if (*valp) {
1331 				struct ipv4_devconf *cnf = ctl->extra1;
1332 				struct in_device *idev =
1333 					container_of(cnf, struct in_device, cnf);
1334 				dev_disable_lro(idev->dev);
1335 			}
1336 			rtnl_unlock();
1337 			rt_cache_flush(net, 0);
1338 		}
1339 	}
1340 
1341 	return ret;
1342 }
1343 
1344 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1345 			 void __user *buffer,
1346 			 size_t *lenp, loff_t *ppos)
1347 {
1348 	int *valp = ctl->data;
1349 	int val = *valp;
1350 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1351 	struct net *net = ctl->extra2;
1352 
1353 	if (write && *valp != val)
1354 		rt_cache_flush(net, 0);
1355 
1356 	return ret;
1357 }
1358 
1359 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1360 	{ \
1361 		.procname	= name, \
1362 		.data		= ipv4_devconf.data + \
1363 				  NET_IPV4_CONF_ ## attr - 1, \
1364 		.maxlen		= sizeof(int), \
1365 		.mode		= mval, \
1366 		.proc_handler	= proc, \
1367 		.extra1		= &ipv4_devconf, \
1368 	}
1369 
1370 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1371 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1372 
1373 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1374 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1375 
1376 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1377 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1378 
1379 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1380 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1381 
1382 static struct devinet_sysctl_table {
1383 	struct ctl_table_header *sysctl_header;
1384 	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1385 	char *dev_name;
1386 } devinet_sysctl = {
1387 	.devinet_vars = {
1388 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1389 					     devinet_sysctl_forward),
1390 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1391 
1392 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1393 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1394 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1395 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1396 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1397 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1398 					"accept_source_route"),
1399 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1400 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1401 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1402 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1403 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1404 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1405 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1406 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1407 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1408 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1409 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1410 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1411 
1412 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1413 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1414 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1415 					      "force_igmp_version"),
1416 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1417 					      "promote_secondaries"),
1418 	},
1419 };
1420 
1421 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1422 					struct ipv4_devconf *p)
1423 {
1424 	int i;
1425 	struct devinet_sysctl_table *t;
1426 
1427 #define DEVINET_CTL_PATH_DEV	3
1428 
1429 	struct ctl_path devinet_ctl_path[] = {
1430 		{ .procname = "net",  },
1431 		{ .procname = "ipv4", },
1432 		{ .procname = "conf", },
1433 		{ /* to be set */ },
1434 		{ },
1435 	};
1436 
1437 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1438 	if (!t)
1439 		goto out;
1440 
1441 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1442 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1443 		t->devinet_vars[i].extra1 = p;
1444 		t->devinet_vars[i].extra2 = net;
1445 	}
1446 
1447 	/*
1448 	 * Make a copy of dev_name, because '.procname' is regarded as const
1449 	 * by sysctl and we wouldn't want anyone to change it under our feet
1450 	 * (see SIOCSIFNAME).
1451 	 */
1452 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1453 	if (!t->dev_name)
1454 		goto free;
1455 
1456 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1457 
1458 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1459 			t->devinet_vars);
1460 	if (!t->sysctl_header)
1461 		goto free_procname;
1462 
1463 	p->sysctl = t;
1464 	return 0;
1465 
1466 free_procname:
1467 	kfree(t->dev_name);
1468 free:
1469 	kfree(t);
1470 out:
1471 	return -ENOBUFS;
1472 }
1473 
1474 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1475 {
1476 	struct devinet_sysctl_table *t = cnf->sysctl;
1477 
1478 	if (t == NULL)
1479 		return;
1480 
1481 	cnf->sysctl = NULL;
1482 	unregister_sysctl_table(t->sysctl_header);
1483 	kfree(t->dev_name);
1484 	kfree(t);
1485 }
1486 
1487 static void devinet_sysctl_register(struct in_device *idev)
1488 {
1489 	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1490 			NET_IPV4_NEIGH, "ipv4", NULL);
1491 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1492 					&idev->cnf);
1493 }
1494 
1495 static void devinet_sysctl_unregister(struct in_device *idev)
1496 {
1497 	__devinet_sysctl_unregister(&idev->cnf);
1498 	neigh_sysctl_unregister(idev->arp_parms);
1499 }
1500 
1501 static struct ctl_table ctl_forward_entry[] = {
1502 	{
1503 		.procname	= "ip_forward",
1504 		.data		= &ipv4_devconf.data[
1505 					NET_IPV4_CONF_FORWARDING - 1],
1506 		.maxlen		= sizeof(int),
1507 		.mode		= 0644,
1508 		.proc_handler	= devinet_sysctl_forward,
1509 		.extra1		= &ipv4_devconf,
1510 		.extra2		= &init_net,
1511 	},
1512 	{ },
1513 };
1514 
1515 static __net_initdata struct ctl_path net_ipv4_path[] = {
1516 	{ .procname = "net", },
1517 	{ .procname = "ipv4", },
1518 	{ },
1519 };
1520 #endif
1521 
1522 static __net_init int devinet_init_net(struct net *net)
1523 {
1524 	int err;
1525 	struct ipv4_devconf *all, *dflt;
1526 #ifdef CONFIG_SYSCTL
1527 	struct ctl_table *tbl = ctl_forward_entry;
1528 	struct ctl_table_header *forw_hdr;
1529 #endif
1530 
1531 	err = -ENOMEM;
1532 	all = &ipv4_devconf;
1533 	dflt = &ipv4_devconf_dflt;
1534 
1535 	if (!net_eq(net, &init_net)) {
1536 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1537 		if (all == NULL)
1538 			goto err_alloc_all;
1539 
1540 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1541 		if (dflt == NULL)
1542 			goto err_alloc_dflt;
1543 
1544 #ifdef CONFIG_SYSCTL
1545 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1546 		if (tbl == NULL)
1547 			goto err_alloc_ctl;
1548 
1549 		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1550 		tbl[0].extra1 = all;
1551 		tbl[0].extra2 = net;
1552 #endif
1553 	}
1554 
1555 #ifdef CONFIG_SYSCTL
1556 	err = __devinet_sysctl_register(net, "all", all);
1557 	if (err < 0)
1558 		goto err_reg_all;
1559 
1560 	err = __devinet_sysctl_register(net, "default", dflt);
1561 	if (err < 0)
1562 		goto err_reg_dflt;
1563 
1564 	err = -ENOMEM;
1565 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1566 	if (forw_hdr == NULL)
1567 		goto err_reg_ctl;
1568 	net->ipv4.forw_hdr = forw_hdr;
1569 #endif
1570 
1571 	net->ipv4.devconf_all = all;
1572 	net->ipv4.devconf_dflt = dflt;
1573 	return 0;
1574 
1575 #ifdef CONFIG_SYSCTL
1576 err_reg_ctl:
1577 	__devinet_sysctl_unregister(dflt);
1578 err_reg_dflt:
1579 	__devinet_sysctl_unregister(all);
1580 err_reg_all:
1581 	if (tbl != ctl_forward_entry)
1582 		kfree(tbl);
1583 err_alloc_ctl:
1584 #endif
1585 	if (dflt != &ipv4_devconf_dflt)
1586 		kfree(dflt);
1587 err_alloc_dflt:
1588 	if (all != &ipv4_devconf)
1589 		kfree(all);
1590 err_alloc_all:
1591 	return err;
1592 }
1593 
1594 static __net_exit void devinet_exit_net(struct net *net)
1595 {
1596 #ifdef CONFIG_SYSCTL
1597 	struct ctl_table *tbl;
1598 
1599 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1600 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1601 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1602 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1603 	kfree(tbl);
1604 #endif
1605 	kfree(net->ipv4.devconf_dflt);
1606 	kfree(net->ipv4.devconf_all);
1607 }
1608 
1609 static __net_initdata struct pernet_operations devinet_ops = {
1610 	.init = devinet_init_net,
1611 	.exit = devinet_exit_net,
1612 };
1613 
1614 void __init devinet_init(void)
1615 {
1616 	register_pernet_subsys(&devinet_ops);
1617 
1618 	register_gifconf(PF_INET, inet_gifconf);
1619 	register_netdevice_notifier(&ip_netdev_notifier);
1620 
1621 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1622 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1623 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1624 }
1625 
1626