xref: /openbmc/linux/net/ipv4/devinet.c (revision 384740dc)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #ifdef CONFIG_SYSCTL
54 #include <linux/sysctl.h>
55 #endif
56 #include <linux/kmod.h>
57 
58 #include <net/arp.h>
59 #include <net/ip.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
64 
65 static struct ipv4_devconf ipv4_devconf = {
66 	.data = {
67 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71 	},
72 };
73 
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75 	.data = {
76 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80 		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81 	},
82 };
83 
84 #define IPV4_DEVCONF_DFLT(net, attr) \
85 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86 
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88 	[IFA_LOCAL]     	= { .type = NLA_U32 },
89 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
90 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
91 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92 };
93 
94 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95 
96 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98 			 int destroy);
99 #ifdef CONFIG_SYSCTL
100 static void devinet_sysctl_register(struct in_device *idev);
101 static void devinet_sysctl_unregister(struct in_device *idev);
102 #else
103 static inline void devinet_sysctl_register(struct in_device *idev)
104 {
105 }
106 static inline void devinet_sysctl_unregister(struct in_device *idev)
107 {
108 }
109 #endif
110 
111 /* Locks all the inet devices. */
112 
113 static struct in_ifaddr *inet_alloc_ifa(void)
114 {
115 	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
116 
117 	if (ifa) {
118 		INIT_RCU_HEAD(&ifa->rcu_head);
119 	}
120 
121 	return ifa;
122 }
123 
124 static void inet_rcu_free_ifa(struct rcu_head *head)
125 {
126 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
127 	if (ifa->ifa_dev)
128 		in_dev_put(ifa->ifa_dev);
129 	kfree(ifa);
130 }
131 
132 static inline void inet_free_ifa(struct in_ifaddr *ifa)
133 {
134 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
135 }
136 
137 void in_dev_finish_destroy(struct in_device *idev)
138 {
139 	struct net_device *dev = idev->dev;
140 
141 	WARN_ON(idev->ifa_list);
142 	WARN_ON(idev->mc_list);
143 #ifdef NET_REFCNT_DEBUG
144 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
145 	       idev, dev ? dev->name : "NIL");
146 #endif
147 	dev_put(dev);
148 	if (!idev->dead)
149 		printk("Freeing alive in_device %p\n", idev);
150 	else {
151 		kfree(idev);
152 	}
153 }
154 
155 static struct in_device *inetdev_init(struct net_device *dev)
156 {
157 	struct in_device *in_dev;
158 
159 	ASSERT_RTNL();
160 
161 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
162 	if (!in_dev)
163 		goto out;
164 	INIT_RCU_HEAD(&in_dev->rcu_head);
165 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
166 			sizeof(in_dev->cnf));
167 	in_dev->cnf.sysctl = NULL;
168 	in_dev->dev = dev;
169 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
170 		goto out_kfree;
171 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
172 		dev_disable_lro(dev);
173 	/* Reference in_dev->dev */
174 	dev_hold(dev);
175 	/* Account for reference dev->ip_ptr (below) */
176 	in_dev_hold(in_dev);
177 
178 	devinet_sysctl_register(in_dev);
179 	ip_mc_init_dev(in_dev);
180 	if (dev->flags & IFF_UP)
181 		ip_mc_up(in_dev);
182 
183 	/* we can receive as soon as ip_ptr is set -- do this last */
184 	rcu_assign_pointer(dev->ip_ptr, in_dev);
185 out:
186 	return in_dev;
187 out_kfree:
188 	kfree(in_dev);
189 	in_dev = NULL;
190 	goto out;
191 }
192 
193 static void in_dev_rcu_put(struct rcu_head *head)
194 {
195 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
196 	in_dev_put(idev);
197 }
198 
199 static void inetdev_destroy(struct in_device *in_dev)
200 {
201 	struct in_ifaddr *ifa;
202 	struct net_device *dev;
203 
204 	ASSERT_RTNL();
205 
206 	dev = in_dev->dev;
207 
208 	in_dev->dead = 1;
209 
210 	ip_mc_destroy_dev(in_dev);
211 
212 	while ((ifa = in_dev->ifa_list) != NULL) {
213 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
214 		inet_free_ifa(ifa);
215 	}
216 
217 	dev->ip_ptr = NULL;
218 
219 	devinet_sysctl_unregister(in_dev);
220 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
221 	arp_ifdown(dev);
222 
223 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
224 }
225 
226 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
227 {
228 	rcu_read_lock();
229 	for_primary_ifa(in_dev) {
230 		if (inet_ifa_match(a, ifa)) {
231 			if (!b || inet_ifa_match(b, ifa)) {
232 				rcu_read_unlock();
233 				return 1;
234 			}
235 		}
236 	} endfor_ifa(in_dev);
237 	rcu_read_unlock();
238 	return 0;
239 }
240 
241 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
242 			 int destroy, struct nlmsghdr *nlh, u32 pid)
243 {
244 	struct in_ifaddr *promote = NULL;
245 	struct in_ifaddr *ifa, *ifa1 = *ifap;
246 	struct in_ifaddr *last_prim = in_dev->ifa_list;
247 	struct in_ifaddr *prev_prom = NULL;
248 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
249 
250 	ASSERT_RTNL();
251 
252 	/* 1. Deleting primary ifaddr forces deletion all secondaries
253 	 * unless alias promotion is set
254 	 **/
255 
256 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
257 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
258 
259 		while ((ifa = *ifap1) != NULL) {
260 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
261 			    ifa1->ifa_scope <= ifa->ifa_scope)
262 				last_prim = ifa;
263 
264 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
265 			    ifa1->ifa_mask != ifa->ifa_mask ||
266 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
267 				ifap1 = &ifa->ifa_next;
268 				prev_prom = ifa;
269 				continue;
270 			}
271 
272 			if (!do_promote) {
273 				*ifap1 = ifa->ifa_next;
274 
275 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
276 				blocking_notifier_call_chain(&inetaddr_chain,
277 						NETDEV_DOWN, ifa);
278 				inet_free_ifa(ifa);
279 			} else {
280 				promote = ifa;
281 				break;
282 			}
283 		}
284 	}
285 
286 	/* 2. Unlink it */
287 
288 	*ifap = ifa1->ifa_next;
289 
290 	/* 3. Announce address deletion */
291 
292 	/* Send message first, then call notifier.
293 	   At first sight, FIB update triggered by notifier
294 	   will refer to already deleted ifaddr, that could confuse
295 	   netlink listeners. It is not true: look, gated sees
296 	   that route deleted and if it still thinks that ifaddr
297 	   is valid, it will try to restore deleted routes... Grr.
298 	   So that, this order is correct.
299 	 */
300 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
301 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
302 
303 	if (promote) {
304 
305 		if (prev_prom) {
306 			prev_prom->ifa_next = promote->ifa_next;
307 			promote->ifa_next = last_prim->ifa_next;
308 			last_prim->ifa_next = promote;
309 		}
310 
311 		promote->ifa_flags &= ~IFA_F_SECONDARY;
312 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
313 		blocking_notifier_call_chain(&inetaddr_chain,
314 				NETDEV_UP, promote);
315 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
316 			if (ifa1->ifa_mask != ifa->ifa_mask ||
317 			    !inet_ifa_match(ifa1->ifa_address, ifa))
318 					continue;
319 			fib_add_ifaddr(ifa);
320 		}
321 
322 	}
323 	if (destroy)
324 		inet_free_ifa(ifa1);
325 }
326 
327 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
328 			 int destroy)
329 {
330 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
331 }
332 
333 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
334 			     u32 pid)
335 {
336 	struct in_device *in_dev = ifa->ifa_dev;
337 	struct in_ifaddr *ifa1, **ifap, **last_primary;
338 
339 	ASSERT_RTNL();
340 
341 	if (!ifa->ifa_local) {
342 		inet_free_ifa(ifa);
343 		return 0;
344 	}
345 
346 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
347 	last_primary = &in_dev->ifa_list;
348 
349 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
350 	     ifap = &ifa1->ifa_next) {
351 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
352 		    ifa->ifa_scope <= ifa1->ifa_scope)
353 			last_primary = &ifa1->ifa_next;
354 		if (ifa1->ifa_mask == ifa->ifa_mask &&
355 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
356 			if (ifa1->ifa_local == ifa->ifa_local) {
357 				inet_free_ifa(ifa);
358 				return -EEXIST;
359 			}
360 			if (ifa1->ifa_scope != ifa->ifa_scope) {
361 				inet_free_ifa(ifa);
362 				return -EINVAL;
363 			}
364 			ifa->ifa_flags |= IFA_F_SECONDARY;
365 		}
366 	}
367 
368 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
369 		net_srandom(ifa->ifa_local);
370 		ifap = last_primary;
371 	}
372 
373 	ifa->ifa_next = *ifap;
374 	*ifap = ifa;
375 
376 	/* Send message first, then call notifier.
377 	   Notifier will trigger FIB update, so that
378 	   listeners of netlink will know about new ifaddr */
379 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
380 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
381 
382 	return 0;
383 }
384 
385 static int inet_insert_ifa(struct in_ifaddr *ifa)
386 {
387 	return __inet_insert_ifa(ifa, NULL, 0);
388 }
389 
390 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
391 {
392 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
393 
394 	ASSERT_RTNL();
395 
396 	if (!in_dev) {
397 		inet_free_ifa(ifa);
398 		return -ENOBUFS;
399 	}
400 	ipv4_devconf_setall(in_dev);
401 	if (ifa->ifa_dev != in_dev) {
402 		WARN_ON(ifa->ifa_dev);
403 		in_dev_hold(in_dev);
404 		ifa->ifa_dev = in_dev;
405 	}
406 	if (ipv4_is_loopback(ifa->ifa_local))
407 		ifa->ifa_scope = RT_SCOPE_HOST;
408 	return inet_insert_ifa(ifa);
409 }
410 
411 struct in_device *inetdev_by_index(struct net *net, int ifindex)
412 {
413 	struct net_device *dev;
414 	struct in_device *in_dev = NULL;
415 	read_lock(&dev_base_lock);
416 	dev = __dev_get_by_index(net, ifindex);
417 	if (dev)
418 		in_dev = in_dev_get(dev);
419 	read_unlock(&dev_base_lock);
420 	return in_dev;
421 }
422 
423 /* Called only from RTNL semaphored context. No locks. */
424 
425 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426 				    __be32 mask)
427 {
428 	ASSERT_RTNL();
429 
430 	for_primary_ifa(in_dev) {
431 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432 			return ifa;
433 	} endfor_ifa(in_dev);
434 	return NULL;
435 }
436 
437 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438 {
439 	struct net *net = sock_net(skb->sk);
440 	struct nlattr *tb[IFA_MAX+1];
441 	struct in_device *in_dev;
442 	struct ifaddrmsg *ifm;
443 	struct in_ifaddr *ifa, **ifap;
444 	int err = -EINVAL;
445 
446 	ASSERT_RTNL();
447 
448 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449 	if (err < 0)
450 		goto errout;
451 
452 	ifm = nlmsg_data(nlh);
453 	in_dev = inetdev_by_index(net, ifm->ifa_index);
454 	if (in_dev == NULL) {
455 		err = -ENODEV;
456 		goto errout;
457 	}
458 
459 	__in_dev_put(in_dev);
460 
461 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
462 	     ifap = &ifa->ifa_next) {
463 		if (tb[IFA_LOCAL] &&
464 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
465 			continue;
466 
467 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
468 			continue;
469 
470 		if (tb[IFA_ADDRESS] &&
471 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
472 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
473 			continue;
474 
475 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
476 		return 0;
477 	}
478 
479 	err = -EADDRNOTAVAIL;
480 errout:
481 	return err;
482 }
483 
484 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
485 {
486 	struct nlattr *tb[IFA_MAX+1];
487 	struct in_ifaddr *ifa;
488 	struct ifaddrmsg *ifm;
489 	struct net_device *dev;
490 	struct in_device *in_dev;
491 	int err;
492 
493 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
494 	if (err < 0)
495 		goto errout;
496 
497 	ifm = nlmsg_data(nlh);
498 	err = -EINVAL;
499 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
500 		goto errout;
501 
502 	dev = __dev_get_by_index(net, ifm->ifa_index);
503 	err = -ENODEV;
504 	if (dev == NULL)
505 		goto errout;
506 
507 	in_dev = __in_dev_get_rtnl(dev);
508 	err = -ENOBUFS;
509 	if (in_dev == NULL)
510 		goto errout;
511 
512 	ifa = inet_alloc_ifa();
513 	if (ifa == NULL)
514 		/*
515 		 * A potential indev allocation can be left alive, it stays
516 		 * assigned to its device and is destroy with it.
517 		 */
518 		goto errout;
519 
520 	ipv4_devconf_setall(in_dev);
521 	in_dev_hold(in_dev);
522 
523 	if (tb[IFA_ADDRESS] == NULL)
524 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
525 
526 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
527 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
528 	ifa->ifa_flags = ifm->ifa_flags;
529 	ifa->ifa_scope = ifm->ifa_scope;
530 	ifa->ifa_dev = in_dev;
531 
532 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
533 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
534 
535 	if (tb[IFA_BROADCAST])
536 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
537 
538 	if (tb[IFA_LABEL])
539 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
540 	else
541 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
542 
543 	return ifa;
544 
545 errout:
546 	return ERR_PTR(err);
547 }
548 
549 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
550 {
551 	struct net *net = sock_net(skb->sk);
552 	struct in_ifaddr *ifa;
553 
554 	ASSERT_RTNL();
555 
556 	ifa = rtm_to_ifaddr(net, nlh);
557 	if (IS_ERR(ifa))
558 		return PTR_ERR(ifa);
559 
560 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
561 }
562 
563 /*
564  *	Determine a default network mask, based on the IP address.
565  */
566 
567 static __inline__ int inet_abc_len(__be32 addr)
568 {
569 	int rc = -1;	/* Something else, probably a multicast. */
570 
571 	if (ipv4_is_zeronet(addr))
572 		rc = 0;
573 	else {
574 		__u32 haddr = ntohl(addr);
575 
576 		if (IN_CLASSA(haddr))
577 			rc = 8;
578 		else if (IN_CLASSB(haddr))
579 			rc = 16;
580 		else if (IN_CLASSC(haddr))
581 			rc = 24;
582 	}
583 
584 	return rc;
585 }
586 
587 
588 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
589 {
590 	struct ifreq ifr;
591 	struct sockaddr_in sin_orig;
592 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
593 	struct in_device *in_dev;
594 	struct in_ifaddr **ifap = NULL;
595 	struct in_ifaddr *ifa = NULL;
596 	struct net_device *dev;
597 	char *colon;
598 	int ret = -EFAULT;
599 	int tryaddrmatch = 0;
600 
601 	/*
602 	 *	Fetch the caller's info block into kernel space
603 	 */
604 
605 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
606 		goto out;
607 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
608 
609 	/* save original address for comparison */
610 	memcpy(&sin_orig, sin, sizeof(*sin));
611 
612 	colon = strchr(ifr.ifr_name, ':');
613 	if (colon)
614 		*colon = 0;
615 
616 #ifdef CONFIG_KMOD
617 	dev_load(net, ifr.ifr_name);
618 #endif
619 
620 	switch (cmd) {
621 	case SIOCGIFADDR:	/* Get interface address */
622 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
623 	case SIOCGIFDSTADDR:	/* Get the destination address */
624 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
625 		/* Note that these ioctls will not sleep,
626 		   so that we do not impose a lock.
627 		   One day we will be forced to put shlock here (I mean SMP)
628 		 */
629 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
630 		memset(sin, 0, sizeof(*sin));
631 		sin->sin_family = AF_INET;
632 		break;
633 
634 	case SIOCSIFFLAGS:
635 		ret = -EACCES;
636 		if (!capable(CAP_NET_ADMIN))
637 			goto out;
638 		break;
639 	case SIOCSIFADDR:	/* Set interface address (and family) */
640 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
641 	case SIOCSIFDSTADDR:	/* Set the destination address */
642 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
643 		ret = -EACCES;
644 		if (!capable(CAP_NET_ADMIN))
645 			goto out;
646 		ret = -EINVAL;
647 		if (sin->sin_family != AF_INET)
648 			goto out;
649 		break;
650 	default:
651 		ret = -EINVAL;
652 		goto out;
653 	}
654 
655 	rtnl_lock();
656 
657 	ret = -ENODEV;
658 	if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
659 		goto done;
660 
661 	if (colon)
662 		*colon = ':';
663 
664 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
665 		if (tryaddrmatch) {
666 			/* Matthias Andree */
667 			/* compare label and address (4.4BSD style) */
668 			/* note: we only do this for a limited set of ioctls
669 			   and only if the original address family was AF_INET.
670 			   This is checked above. */
671 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
672 			     ifap = &ifa->ifa_next) {
673 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
674 				    sin_orig.sin_addr.s_addr ==
675 							ifa->ifa_address) {
676 					break; /* found */
677 				}
678 			}
679 		}
680 		/* we didn't get a match, maybe the application is
681 		   4.3BSD-style and passed in junk so we fall back to
682 		   comparing just the label */
683 		if (!ifa) {
684 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
685 			     ifap = &ifa->ifa_next)
686 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
687 					break;
688 		}
689 	}
690 
691 	ret = -EADDRNOTAVAIL;
692 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
693 		goto done;
694 
695 	switch (cmd) {
696 	case SIOCGIFADDR:	/* Get interface address */
697 		sin->sin_addr.s_addr = ifa->ifa_local;
698 		goto rarok;
699 
700 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
701 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
702 		goto rarok;
703 
704 	case SIOCGIFDSTADDR:	/* Get the destination address */
705 		sin->sin_addr.s_addr = ifa->ifa_address;
706 		goto rarok;
707 
708 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
709 		sin->sin_addr.s_addr = ifa->ifa_mask;
710 		goto rarok;
711 
712 	case SIOCSIFFLAGS:
713 		if (colon) {
714 			ret = -EADDRNOTAVAIL;
715 			if (!ifa)
716 				break;
717 			ret = 0;
718 			if (!(ifr.ifr_flags & IFF_UP))
719 				inet_del_ifa(in_dev, ifap, 1);
720 			break;
721 		}
722 		ret = dev_change_flags(dev, ifr.ifr_flags);
723 		break;
724 
725 	case SIOCSIFADDR:	/* Set interface address (and family) */
726 		ret = -EINVAL;
727 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
728 			break;
729 
730 		if (!ifa) {
731 			ret = -ENOBUFS;
732 			if ((ifa = inet_alloc_ifa()) == NULL)
733 				break;
734 			if (colon)
735 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
736 			else
737 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
738 		} else {
739 			ret = 0;
740 			if (ifa->ifa_local == sin->sin_addr.s_addr)
741 				break;
742 			inet_del_ifa(in_dev, ifap, 0);
743 			ifa->ifa_broadcast = 0;
744 			ifa->ifa_scope = 0;
745 		}
746 
747 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
748 
749 		if (!(dev->flags & IFF_POINTOPOINT)) {
750 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
751 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
752 			if ((dev->flags & IFF_BROADCAST) &&
753 			    ifa->ifa_prefixlen < 31)
754 				ifa->ifa_broadcast = ifa->ifa_address |
755 						     ~ifa->ifa_mask;
756 		} else {
757 			ifa->ifa_prefixlen = 32;
758 			ifa->ifa_mask = inet_make_mask(32);
759 		}
760 		ret = inet_set_ifa(dev, ifa);
761 		break;
762 
763 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
764 		ret = 0;
765 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
766 			inet_del_ifa(in_dev, ifap, 0);
767 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
768 			inet_insert_ifa(ifa);
769 		}
770 		break;
771 
772 	case SIOCSIFDSTADDR:	/* Set the destination address */
773 		ret = 0;
774 		if (ifa->ifa_address == sin->sin_addr.s_addr)
775 			break;
776 		ret = -EINVAL;
777 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
778 			break;
779 		ret = 0;
780 		inet_del_ifa(in_dev, ifap, 0);
781 		ifa->ifa_address = sin->sin_addr.s_addr;
782 		inet_insert_ifa(ifa);
783 		break;
784 
785 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
786 
787 		/*
788 		 *	The mask we set must be legal.
789 		 */
790 		ret = -EINVAL;
791 		if (bad_mask(sin->sin_addr.s_addr, 0))
792 			break;
793 		ret = 0;
794 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
795 			__be32 old_mask = ifa->ifa_mask;
796 			inet_del_ifa(in_dev, ifap, 0);
797 			ifa->ifa_mask = sin->sin_addr.s_addr;
798 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
799 
800 			/* See if current broadcast address matches
801 			 * with current netmask, then recalculate
802 			 * the broadcast address. Otherwise it's a
803 			 * funny address, so don't touch it since
804 			 * the user seems to know what (s)he's doing...
805 			 */
806 			if ((dev->flags & IFF_BROADCAST) &&
807 			    (ifa->ifa_prefixlen < 31) &&
808 			    (ifa->ifa_broadcast ==
809 			     (ifa->ifa_local|~old_mask))) {
810 				ifa->ifa_broadcast = (ifa->ifa_local |
811 						      ~sin->sin_addr.s_addr);
812 			}
813 			inet_insert_ifa(ifa);
814 		}
815 		break;
816 	}
817 done:
818 	rtnl_unlock();
819 out:
820 	return ret;
821 rarok:
822 	rtnl_unlock();
823 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
824 	goto out;
825 }
826 
827 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
828 {
829 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
830 	struct in_ifaddr *ifa;
831 	struct ifreq ifr;
832 	int done = 0;
833 
834 	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
835 		goto out;
836 
837 	for (; ifa; ifa = ifa->ifa_next) {
838 		if (!buf) {
839 			done += sizeof(ifr);
840 			continue;
841 		}
842 		if (len < (int) sizeof(ifr))
843 			break;
844 		memset(&ifr, 0, sizeof(struct ifreq));
845 		if (ifa->ifa_label)
846 			strcpy(ifr.ifr_name, ifa->ifa_label);
847 		else
848 			strcpy(ifr.ifr_name, dev->name);
849 
850 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
851 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
852 								ifa->ifa_local;
853 
854 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
855 			done = -EFAULT;
856 			break;
857 		}
858 		buf  += sizeof(struct ifreq);
859 		len  -= sizeof(struct ifreq);
860 		done += sizeof(struct ifreq);
861 	}
862 out:
863 	return done;
864 }
865 
866 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
867 {
868 	__be32 addr = 0;
869 	struct in_device *in_dev;
870 	struct net *net = dev_net(dev);
871 
872 	rcu_read_lock();
873 	in_dev = __in_dev_get_rcu(dev);
874 	if (!in_dev)
875 		goto no_in_dev;
876 
877 	for_primary_ifa(in_dev) {
878 		if (ifa->ifa_scope > scope)
879 			continue;
880 		if (!dst || inet_ifa_match(dst, ifa)) {
881 			addr = ifa->ifa_local;
882 			break;
883 		}
884 		if (!addr)
885 			addr = ifa->ifa_local;
886 	} endfor_ifa(in_dev);
887 no_in_dev:
888 	rcu_read_unlock();
889 
890 	if (addr)
891 		goto out;
892 
893 	/* Not loopback addresses on loopback should be preferred
894 	   in this case. It is importnat that lo is the first interface
895 	   in dev_base list.
896 	 */
897 	read_lock(&dev_base_lock);
898 	rcu_read_lock();
899 	for_each_netdev(net, dev) {
900 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
901 			continue;
902 
903 		for_primary_ifa(in_dev) {
904 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
905 			    ifa->ifa_scope <= scope) {
906 				addr = ifa->ifa_local;
907 				goto out_unlock_both;
908 			}
909 		} endfor_ifa(in_dev);
910 	}
911 out_unlock_both:
912 	read_unlock(&dev_base_lock);
913 	rcu_read_unlock();
914 out:
915 	return addr;
916 }
917 
918 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
919 			      __be32 local, int scope)
920 {
921 	int same = 0;
922 	__be32 addr = 0;
923 
924 	for_ifa(in_dev) {
925 		if (!addr &&
926 		    (local == ifa->ifa_local || !local) &&
927 		    ifa->ifa_scope <= scope) {
928 			addr = ifa->ifa_local;
929 			if (same)
930 				break;
931 		}
932 		if (!same) {
933 			same = (!local || inet_ifa_match(local, ifa)) &&
934 				(!dst || inet_ifa_match(dst, ifa));
935 			if (same && addr) {
936 				if (local || !dst)
937 					break;
938 				/* Is the selected addr into dst subnet? */
939 				if (inet_ifa_match(addr, ifa))
940 					break;
941 				/* No, then can we use new local src? */
942 				if (ifa->ifa_scope <= scope) {
943 					addr = ifa->ifa_local;
944 					break;
945 				}
946 				/* search for large dst subnet for addr */
947 				same = 0;
948 			}
949 		}
950 	} endfor_ifa(in_dev);
951 
952 	return same? addr : 0;
953 }
954 
955 /*
956  * Confirm that local IP address exists using wildcards:
957  * - in_dev: only on this interface, 0=any interface
958  * - dst: only in the same subnet as dst, 0=any dst
959  * - local: address, 0=autoselect the local address
960  * - scope: maximum allowed scope value for the local address
961  */
962 __be32 inet_confirm_addr(struct in_device *in_dev,
963 			 __be32 dst, __be32 local, int scope)
964 {
965 	__be32 addr = 0;
966 	struct net_device *dev;
967 	struct net *net;
968 
969 	if (scope != RT_SCOPE_LINK)
970 		return confirm_addr_indev(in_dev, dst, local, scope);
971 
972 	net = dev_net(in_dev->dev);
973 	read_lock(&dev_base_lock);
974 	rcu_read_lock();
975 	for_each_netdev(net, dev) {
976 		if ((in_dev = __in_dev_get_rcu(dev))) {
977 			addr = confirm_addr_indev(in_dev, dst, local, scope);
978 			if (addr)
979 				break;
980 		}
981 	}
982 	rcu_read_unlock();
983 	read_unlock(&dev_base_lock);
984 
985 	return addr;
986 }
987 
988 /*
989  *	Device notifier
990  */
991 
992 int register_inetaddr_notifier(struct notifier_block *nb)
993 {
994 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
995 }
996 
997 int unregister_inetaddr_notifier(struct notifier_block *nb)
998 {
999 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1000 }
1001 
1002 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1003  * alias numbering and to create unique labels if possible.
1004 */
1005 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1006 {
1007 	struct in_ifaddr *ifa;
1008 	int named = 0;
1009 
1010 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1011 		char old[IFNAMSIZ], *dot;
1012 
1013 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1014 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1015 		if (named++ == 0)
1016 			goto skip;
1017 		dot = strchr(old, ':');
1018 		if (dot == NULL) {
1019 			sprintf(old, ":%d", named);
1020 			dot = old;
1021 		}
1022 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1023 			strcat(ifa->ifa_label, dot);
1024 		} else {
1025 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1026 		}
1027 skip:
1028 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1029 	}
1030 }
1031 
1032 static inline bool inetdev_valid_mtu(unsigned mtu)
1033 {
1034 	return mtu >= 68;
1035 }
1036 
1037 /* Called only under RTNL semaphore */
1038 
1039 static int inetdev_event(struct notifier_block *this, unsigned long event,
1040 			 void *ptr)
1041 {
1042 	struct net_device *dev = ptr;
1043 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1044 
1045 	ASSERT_RTNL();
1046 
1047 	if (!in_dev) {
1048 		if (event == NETDEV_REGISTER) {
1049 			in_dev = inetdev_init(dev);
1050 			if (!in_dev)
1051 				return notifier_from_errno(-ENOMEM);
1052 			if (dev->flags & IFF_LOOPBACK) {
1053 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1054 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1055 			}
1056 		} else if (event == NETDEV_CHANGEMTU) {
1057 			/* Re-enabling IP */
1058 			if (inetdev_valid_mtu(dev->mtu))
1059 				in_dev = inetdev_init(dev);
1060 		}
1061 		goto out;
1062 	}
1063 
1064 	switch (event) {
1065 	case NETDEV_REGISTER:
1066 		printk(KERN_DEBUG "inetdev_event: bug\n");
1067 		dev->ip_ptr = NULL;
1068 		break;
1069 	case NETDEV_UP:
1070 		if (!inetdev_valid_mtu(dev->mtu))
1071 			break;
1072 		if (dev->flags & IFF_LOOPBACK) {
1073 			struct in_ifaddr *ifa;
1074 			if ((ifa = inet_alloc_ifa()) != NULL) {
1075 				ifa->ifa_local =
1076 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1077 				ifa->ifa_prefixlen = 8;
1078 				ifa->ifa_mask = inet_make_mask(8);
1079 				in_dev_hold(in_dev);
1080 				ifa->ifa_dev = in_dev;
1081 				ifa->ifa_scope = RT_SCOPE_HOST;
1082 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1083 				inet_insert_ifa(ifa);
1084 			}
1085 		}
1086 		ip_mc_up(in_dev);
1087 		break;
1088 	case NETDEV_DOWN:
1089 		ip_mc_down(in_dev);
1090 		break;
1091 	case NETDEV_CHANGEMTU:
1092 		if (inetdev_valid_mtu(dev->mtu))
1093 			break;
1094 		/* disable IP when MTU is not enough */
1095 	case NETDEV_UNREGISTER:
1096 		inetdev_destroy(in_dev);
1097 		break;
1098 	case NETDEV_CHANGENAME:
1099 		/* Do not notify about label change, this event is
1100 		 * not interesting to applications using netlink.
1101 		 */
1102 		inetdev_changename(dev, in_dev);
1103 
1104 		devinet_sysctl_unregister(in_dev);
1105 		devinet_sysctl_register(in_dev);
1106 		break;
1107 	}
1108 out:
1109 	return NOTIFY_DONE;
1110 }
1111 
1112 static struct notifier_block ip_netdev_notifier = {
1113 	.notifier_call =inetdev_event,
1114 };
1115 
1116 static inline size_t inet_nlmsg_size(void)
1117 {
1118 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1119 	       + nla_total_size(4) /* IFA_ADDRESS */
1120 	       + nla_total_size(4) /* IFA_LOCAL */
1121 	       + nla_total_size(4) /* IFA_BROADCAST */
1122 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1123 }
1124 
1125 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1126 			    u32 pid, u32 seq, int event, unsigned int flags)
1127 {
1128 	struct ifaddrmsg *ifm;
1129 	struct nlmsghdr  *nlh;
1130 
1131 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1132 	if (nlh == NULL)
1133 		return -EMSGSIZE;
1134 
1135 	ifm = nlmsg_data(nlh);
1136 	ifm->ifa_family = AF_INET;
1137 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1138 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1139 	ifm->ifa_scope = ifa->ifa_scope;
1140 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1141 
1142 	if (ifa->ifa_address)
1143 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1144 
1145 	if (ifa->ifa_local)
1146 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1147 
1148 	if (ifa->ifa_broadcast)
1149 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1150 
1151 	if (ifa->ifa_label[0])
1152 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1153 
1154 	return nlmsg_end(skb, nlh);
1155 
1156 nla_put_failure:
1157 	nlmsg_cancel(skb, nlh);
1158 	return -EMSGSIZE;
1159 }
1160 
1161 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1162 {
1163 	struct net *net = sock_net(skb->sk);
1164 	int idx, ip_idx;
1165 	struct net_device *dev;
1166 	struct in_device *in_dev;
1167 	struct in_ifaddr *ifa;
1168 	int s_ip_idx, s_idx = cb->args[0];
1169 
1170 	s_ip_idx = ip_idx = cb->args[1];
1171 	idx = 0;
1172 	for_each_netdev(net, dev) {
1173 		if (idx < s_idx)
1174 			goto cont;
1175 		if (idx > s_idx)
1176 			s_ip_idx = 0;
1177 		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1178 			goto cont;
1179 
1180 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1181 		     ifa = ifa->ifa_next, ip_idx++) {
1182 			if (ip_idx < s_ip_idx)
1183 				continue;
1184 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1185 					     cb->nlh->nlmsg_seq,
1186 					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1187 				goto done;
1188 		}
1189 cont:
1190 		idx++;
1191 	}
1192 
1193 done:
1194 	cb->args[0] = idx;
1195 	cb->args[1] = ip_idx;
1196 
1197 	return skb->len;
1198 }
1199 
1200 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1201 		      u32 pid)
1202 {
1203 	struct sk_buff *skb;
1204 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1205 	int err = -ENOBUFS;
1206 	struct net *net;
1207 
1208 	net = dev_net(ifa->ifa_dev->dev);
1209 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1210 	if (skb == NULL)
1211 		goto errout;
1212 
1213 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1214 	if (err < 0) {
1215 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1216 		WARN_ON(err == -EMSGSIZE);
1217 		kfree_skb(skb);
1218 		goto errout;
1219 	}
1220 	err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1221 errout:
1222 	if (err < 0)
1223 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1224 }
1225 
1226 #ifdef CONFIG_SYSCTL
1227 
1228 static void devinet_copy_dflt_conf(struct net *net, int i)
1229 {
1230 	struct net_device *dev;
1231 
1232 	read_lock(&dev_base_lock);
1233 	for_each_netdev(net, dev) {
1234 		struct in_device *in_dev;
1235 		rcu_read_lock();
1236 		in_dev = __in_dev_get_rcu(dev);
1237 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1238 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1239 		rcu_read_unlock();
1240 	}
1241 	read_unlock(&dev_base_lock);
1242 }
1243 
1244 static void inet_forward_change(struct net *net)
1245 {
1246 	struct net_device *dev;
1247 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1248 
1249 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1250 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1251 
1252 	read_lock(&dev_base_lock);
1253 	for_each_netdev(net, dev) {
1254 		struct in_device *in_dev;
1255 		if (on)
1256 			dev_disable_lro(dev);
1257 		rcu_read_lock();
1258 		in_dev = __in_dev_get_rcu(dev);
1259 		if (in_dev)
1260 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1261 		rcu_read_unlock();
1262 	}
1263 	read_unlock(&dev_base_lock);
1264 }
1265 
1266 static int devinet_conf_proc(ctl_table *ctl, int write,
1267 			     struct file* filp, void __user *buffer,
1268 			     size_t *lenp, loff_t *ppos)
1269 {
1270 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1271 
1272 	if (write) {
1273 		struct ipv4_devconf *cnf = ctl->extra1;
1274 		struct net *net = ctl->extra2;
1275 		int i = (int *)ctl->data - cnf->data;
1276 
1277 		set_bit(i, cnf->state);
1278 
1279 		if (cnf == net->ipv4.devconf_dflt)
1280 			devinet_copy_dflt_conf(net, i);
1281 	}
1282 
1283 	return ret;
1284 }
1285 
1286 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1287 			       void __user *oldval, size_t __user *oldlenp,
1288 			       void __user *newval, size_t newlen)
1289 {
1290 	struct ipv4_devconf *cnf;
1291 	struct net *net;
1292 	int *valp = table->data;
1293 	int new;
1294 	int i;
1295 
1296 	if (!newval || !newlen)
1297 		return 0;
1298 
1299 	if (newlen != sizeof(int))
1300 		return -EINVAL;
1301 
1302 	if (get_user(new, (int __user *)newval))
1303 		return -EFAULT;
1304 
1305 	if (new == *valp)
1306 		return 0;
1307 
1308 	if (oldval && oldlenp) {
1309 		size_t len;
1310 
1311 		if (get_user(len, oldlenp))
1312 			return -EFAULT;
1313 
1314 		if (len) {
1315 			if (len > table->maxlen)
1316 				len = table->maxlen;
1317 			if (copy_to_user(oldval, valp, len))
1318 				return -EFAULT;
1319 			if (put_user(len, oldlenp))
1320 				return -EFAULT;
1321 		}
1322 	}
1323 
1324 	*valp = new;
1325 
1326 	cnf = table->extra1;
1327 	net = table->extra2;
1328 	i = (int *)table->data - cnf->data;
1329 
1330 	set_bit(i, cnf->state);
1331 
1332 	if (cnf == net->ipv4.devconf_dflt)
1333 		devinet_copy_dflt_conf(net, i);
1334 
1335 	return 1;
1336 }
1337 
1338 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1339 				  struct file* filp, void __user *buffer,
1340 				  size_t *lenp, loff_t *ppos)
1341 {
1342 	int *valp = ctl->data;
1343 	int val = *valp;
1344 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1345 
1346 	if (write && *valp != val) {
1347 		struct net *net = ctl->extra2;
1348 
1349 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1350 			rtnl_lock();
1351 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1352 				inet_forward_change(net);
1353 			} else if (*valp) {
1354 				struct ipv4_devconf *cnf = ctl->extra1;
1355 				struct in_device *idev =
1356 					container_of(cnf, struct in_device, cnf);
1357 				dev_disable_lro(idev->dev);
1358 			}
1359 			rtnl_unlock();
1360 			rt_cache_flush(net, 0);
1361 		}
1362 	}
1363 
1364 	return ret;
1365 }
1366 
1367 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1368 			 struct file* filp, void __user *buffer,
1369 			 size_t *lenp, loff_t *ppos)
1370 {
1371 	int *valp = ctl->data;
1372 	int val = *valp;
1373 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1374 	struct net *net = ctl->extra2;
1375 
1376 	if (write && *valp != val)
1377 		rt_cache_flush(net, 0);
1378 
1379 	return ret;
1380 }
1381 
1382 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1383 				  void __user *oldval, size_t __user *oldlenp,
1384 				  void __user *newval, size_t newlen)
1385 {
1386 	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1387 				      newval, newlen);
1388 	struct net *net = table->extra2;
1389 
1390 	if (ret == 1)
1391 		rt_cache_flush(net, 0);
1392 
1393 	return ret;
1394 }
1395 
1396 
1397 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1398 	{ \
1399 		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1400 		.procname	= name, \
1401 		.data		= ipv4_devconf.data + \
1402 				  NET_IPV4_CONF_ ## attr - 1, \
1403 		.maxlen		= sizeof(int), \
1404 		.mode		= mval, \
1405 		.proc_handler	= proc, \
1406 		.strategy	= sysctl, \
1407 		.extra1		= &ipv4_devconf, \
1408 	}
1409 
1410 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1411 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1412 			     devinet_conf_sysctl)
1413 
1414 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1415 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1416 			     devinet_conf_sysctl)
1417 
1418 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1419 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1420 
1421 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1422 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1423 				     ipv4_doint_and_flush_strategy)
1424 
1425 static struct devinet_sysctl_table {
1426 	struct ctl_table_header *sysctl_header;
1427 	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1428 	char *dev_name;
1429 } devinet_sysctl = {
1430 	.devinet_vars = {
1431 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1432 					     devinet_sysctl_forward,
1433 					     devinet_conf_sysctl),
1434 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1435 
1436 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1437 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1438 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1439 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1440 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1441 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1442 					"accept_source_route"),
1443 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1444 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1445 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1446 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1447 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1448 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1449 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1450 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1451 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1452 
1453 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1454 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1455 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1456 					      "force_igmp_version"),
1457 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1458 					      "promote_secondaries"),
1459 	},
1460 };
1461 
1462 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1463 		int ctl_name, struct ipv4_devconf *p)
1464 {
1465 	int i;
1466 	struct devinet_sysctl_table *t;
1467 
1468 #define DEVINET_CTL_PATH_DEV	3
1469 
1470 	struct ctl_path devinet_ctl_path[] = {
1471 		{ .procname = "net", .ctl_name = CTL_NET, },
1472 		{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1473 		{ .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1474 		{ /* to be set */ },
1475 		{ },
1476 	};
1477 
1478 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1479 	if (!t)
1480 		goto out;
1481 
1482 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1483 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1484 		t->devinet_vars[i].extra1 = p;
1485 		t->devinet_vars[i].extra2 = net;
1486 	}
1487 
1488 	/*
1489 	 * Make a copy of dev_name, because '.procname' is regarded as const
1490 	 * by sysctl and we wouldn't want anyone to change it under our feet
1491 	 * (see SIOCSIFNAME).
1492 	 */
1493 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1494 	if (!t->dev_name)
1495 		goto free;
1496 
1497 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1498 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1499 
1500 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1501 			t->devinet_vars);
1502 	if (!t->sysctl_header)
1503 		goto free_procname;
1504 
1505 	p->sysctl = t;
1506 	return 0;
1507 
1508 free_procname:
1509 	kfree(t->dev_name);
1510 free:
1511 	kfree(t);
1512 out:
1513 	return -ENOBUFS;
1514 }
1515 
1516 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1517 {
1518 	struct devinet_sysctl_table *t = cnf->sysctl;
1519 
1520 	if (t == NULL)
1521 		return;
1522 
1523 	cnf->sysctl = NULL;
1524 	unregister_sysctl_table(t->sysctl_header);
1525 	kfree(t->dev_name);
1526 	kfree(t);
1527 }
1528 
1529 static void devinet_sysctl_register(struct in_device *idev)
1530 {
1531 	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1532 			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1533 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1534 			idev->dev->ifindex, &idev->cnf);
1535 }
1536 
1537 static void devinet_sysctl_unregister(struct in_device *idev)
1538 {
1539 	__devinet_sysctl_unregister(&idev->cnf);
1540 	neigh_sysctl_unregister(idev->arp_parms);
1541 }
1542 
1543 static struct ctl_table ctl_forward_entry[] = {
1544 	{
1545 		.ctl_name	= NET_IPV4_FORWARD,
1546 		.procname	= "ip_forward",
1547 		.data		= &ipv4_devconf.data[
1548 					NET_IPV4_CONF_FORWARDING - 1],
1549 		.maxlen		= sizeof(int),
1550 		.mode		= 0644,
1551 		.proc_handler	= devinet_sysctl_forward,
1552 		.strategy	= devinet_conf_sysctl,
1553 		.extra1		= &ipv4_devconf,
1554 		.extra2		= &init_net,
1555 	},
1556 	{ },
1557 };
1558 
1559 static __net_initdata struct ctl_path net_ipv4_path[] = {
1560 	{ .procname = "net", .ctl_name = CTL_NET, },
1561 	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1562 	{ },
1563 };
1564 #endif
1565 
1566 static __net_init int devinet_init_net(struct net *net)
1567 {
1568 	int err;
1569 	struct ipv4_devconf *all, *dflt;
1570 #ifdef CONFIG_SYSCTL
1571 	struct ctl_table *tbl = ctl_forward_entry;
1572 	struct ctl_table_header *forw_hdr;
1573 #endif
1574 
1575 	err = -ENOMEM;
1576 	all = &ipv4_devconf;
1577 	dflt = &ipv4_devconf_dflt;
1578 
1579 	if (net != &init_net) {
1580 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1581 		if (all == NULL)
1582 			goto err_alloc_all;
1583 
1584 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1585 		if (dflt == NULL)
1586 			goto err_alloc_dflt;
1587 
1588 #ifdef CONFIG_SYSCTL
1589 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1590 		if (tbl == NULL)
1591 			goto err_alloc_ctl;
1592 
1593 		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1594 		tbl[0].extra1 = all;
1595 		tbl[0].extra2 = net;
1596 #endif
1597 	}
1598 
1599 #ifdef CONFIG_SYSCTL
1600 	err = __devinet_sysctl_register(net, "all",
1601 			NET_PROTO_CONF_ALL, all);
1602 	if (err < 0)
1603 		goto err_reg_all;
1604 
1605 	err = __devinet_sysctl_register(net, "default",
1606 			NET_PROTO_CONF_DEFAULT, dflt);
1607 	if (err < 0)
1608 		goto err_reg_dflt;
1609 
1610 	err = -ENOMEM;
1611 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1612 	if (forw_hdr == NULL)
1613 		goto err_reg_ctl;
1614 	net->ipv4.forw_hdr = forw_hdr;
1615 #endif
1616 
1617 	net->ipv4.devconf_all = all;
1618 	net->ipv4.devconf_dflt = dflt;
1619 	return 0;
1620 
1621 #ifdef CONFIG_SYSCTL
1622 err_reg_ctl:
1623 	__devinet_sysctl_unregister(dflt);
1624 err_reg_dflt:
1625 	__devinet_sysctl_unregister(all);
1626 err_reg_all:
1627 	if (tbl != ctl_forward_entry)
1628 		kfree(tbl);
1629 err_alloc_ctl:
1630 #endif
1631 	if (dflt != &ipv4_devconf_dflt)
1632 		kfree(dflt);
1633 err_alloc_dflt:
1634 	if (all != &ipv4_devconf)
1635 		kfree(all);
1636 err_alloc_all:
1637 	return err;
1638 }
1639 
1640 static __net_exit void devinet_exit_net(struct net *net)
1641 {
1642 #ifdef CONFIG_SYSCTL
1643 	struct ctl_table *tbl;
1644 
1645 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1646 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1647 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1648 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1649 	kfree(tbl);
1650 #endif
1651 	kfree(net->ipv4.devconf_dflt);
1652 	kfree(net->ipv4.devconf_all);
1653 }
1654 
1655 static __net_initdata struct pernet_operations devinet_ops = {
1656 	.init = devinet_init_net,
1657 	.exit = devinet_exit_net,
1658 };
1659 
1660 void __init devinet_init(void)
1661 {
1662 	register_pernet_subsys(&devinet_ops);
1663 
1664 	register_gifconf(PF_INET, inet_gifconf);
1665 	register_netdevice_notifier(&ip_netdev_notifier);
1666 
1667 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1668 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1669 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1670 }
1671 
1672 EXPORT_SYMBOL(in_dev_finish_destroy);
1673 EXPORT_SYMBOL(inet_select_addr);
1674 EXPORT_SYMBOL(inetdev_by_index);
1675 EXPORT_SYMBOL(register_inetaddr_notifier);
1676 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1677