xref: /openbmc/linux/net/ipv4/devinet.c (revision 9ac8d3fb)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #ifdef CONFIG_SYSCTL
54 #include <linux/sysctl.h>
55 #endif
56 #include <linux/kmod.h>
57 
58 #include <net/arp.h>
59 #include <net/ip.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
64 
65 static struct ipv4_devconf ipv4_devconf = {
66 	.data = {
67 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71 	},
72 };
73 
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75 	.data = {
76 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80 		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81 	},
82 };
83 
84 #define IPV4_DEVCONF_DFLT(net, attr) \
85 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86 
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88 	[IFA_LOCAL]     	= { .type = NLA_U32 },
89 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
90 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
91 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92 };
93 
94 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95 
96 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98 			 int destroy);
99 #ifdef CONFIG_SYSCTL
100 static void devinet_sysctl_register(struct in_device *idev);
101 static void devinet_sysctl_unregister(struct in_device *idev);
102 #else
103 static inline void devinet_sysctl_register(struct in_device *idev)
104 {
105 }
106 static inline void devinet_sysctl_unregister(struct in_device *idev)
107 {
108 }
109 #endif
110 
111 /* Locks all the inet devices. */
112 
113 static struct in_ifaddr *inet_alloc_ifa(void)
114 {
115 	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
116 
117 	if (ifa) {
118 		INIT_RCU_HEAD(&ifa->rcu_head);
119 	}
120 
121 	return ifa;
122 }
123 
124 static void inet_rcu_free_ifa(struct rcu_head *head)
125 {
126 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
127 	if (ifa->ifa_dev)
128 		in_dev_put(ifa->ifa_dev);
129 	kfree(ifa);
130 }
131 
132 static inline void inet_free_ifa(struct in_ifaddr *ifa)
133 {
134 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
135 }
136 
137 void in_dev_finish_destroy(struct in_device *idev)
138 {
139 	struct net_device *dev = idev->dev;
140 
141 	WARN_ON(idev->ifa_list);
142 	WARN_ON(idev->mc_list);
143 #ifdef NET_REFCNT_DEBUG
144 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
145 	       idev, dev ? dev->name : "NIL");
146 #endif
147 	dev_put(dev);
148 	if (!idev->dead)
149 		printk("Freeing alive in_device %p\n", idev);
150 	else {
151 		kfree(idev);
152 	}
153 }
154 
155 static struct in_device *inetdev_init(struct net_device *dev)
156 {
157 	struct in_device *in_dev;
158 
159 	ASSERT_RTNL();
160 
161 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
162 	if (!in_dev)
163 		goto out;
164 	INIT_RCU_HEAD(&in_dev->rcu_head);
165 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
166 			sizeof(in_dev->cnf));
167 	in_dev->cnf.sysctl = NULL;
168 	in_dev->dev = dev;
169 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
170 		goto out_kfree;
171 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
172 		dev_disable_lro(dev);
173 	/* Reference in_dev->dev */
174 	dev_hold(dev);
175 	/* Account for reference dev->ip_ptr (below) */
176 	in_dev_hold(in_dev);
177 
178 	devinet_sysctl_register(in_dev);
179 	ip_mc_init_dev(in_dev);
180 	if (dev->flags & IFF_UP)
181 		ip_mc_up(in_dev);
182 
183 	/* we can receive as soon as ip_ptr is set -- do this last */
184 	rcu_assign_pointer(dev->ip_ptr, in_dev);
185 out:
186 	return in_dev;
187 out_kfree:
188 	kfree(in_dev);
189 	in_dev = NULL;
190 	goto out;
191 }
192 
193 static void in_dev_rcu_put(struct rcu_head *head)
194 {
195 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
196 	in_dev_put(idev);
197 }
198 
199 static void inetdev_destroy(struct in_device *in_dev)
200 {
201 	struct in_ifaddr *ifa;
202 	struct net_device *dev;
203 
204 	ASSERT_RTNL();
205 
206 	dev = in_dev->dev;
207 
208 	in_dev->dead = 1;
209 
210 	ip_mc_destroy_dev(in_dev);
211 
212 	while ((ifa = in_dev->ifa_list) != NULL) {
213 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
214 		inet_free_ifa(ifa);
215 	}
216 
217 	dev->ip_ptr = NULL;
218 
219 	devinet_sysctl_unregister(in_dev);
220 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
221 	arp_ifdown(dev);
222 
223 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
224 }
225 
226 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
227 {
228 	rcu_read_lock();
229 	for_primary_ifa(in_dev) {
230 		if (inet_ifa_match(a, ifa)) {
231 			if (!b || inet_ifa_match(b, ifa)) {
232 				rcu_read_unlock();
233 				return 1;
234 			}
235 		}
236 	} endfor_ifa(in_dev);
237 	rcu_read_unlock();
238 	return 0;
239 }
240 
241 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
242 			 int destroy, struct nlmsghdr *nlh, u32 pid)
243 {
244 	struct in_ifaddr *promote = NULL;
245 	struct in_ifaddr *ifa, *ifa1 = *ifap;
246 	struct in_ifaddr *last_prim = in_dev->ifa_list;
247 	struct in_ifaddr *prev_prom = NULL;
248 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
249 
250 	ASSERT_RTNL();
251 
252 	/* 1. Deleting primary ifaddr forces deletion all secondaries
253 	 * unless alias promotion is set
254 	 **/
255 
256 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
257 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
258 
259 		while ((ifa = *ifap1) != NULL) {
260 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
261 			    ifa1->ifa_scope <= ifa->ifa_scope)
262 				last_prim = ifa;
263 
264 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
265 			    ifa1->ifa_mask != ifa->ifa_mask ||
266 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
267 				ifap1 = &ifa->ifa_next;
268 				prev_prom = ifa;
269 				continue;
270 			}
271 
272 			if (!do_promote) {
273 				*ifap1 = ifa->ifa_next;
274 
275 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
276 				blocking_notifier_call_chain(&inetaddr_chain,
277 						NETDEV_DOWN, ifa);
278 				inet_free_ifa(ifa);
279 			} else {
280 				promote = ifa;
281 				break;
282 			}
283 		}
284 	}
285 
286 	/* 2. Unlink it */
287 
288 	*ifap = ifa1->ifa_next;
289 
290 	/* 3. Announce address deletion */
291 
292 	/* Send message first, then call notifier.
293 	   At first sight, FIB update triggered by notifier
294 	   will refer to already deleted ifaddr, that could confuse
295 	   netlink listeners. It is not true: look, gated sees
296 	   that route deleted and if it still thinks that ifaddr
297 	   is valid, it will try to restore deleted routes... Grr.
298 	   So that, this order is correct.
299 	 */
300 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
301 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
302 
303 	if (promote) {
304 
305 		if (prev_prom) {
306 			prev_prom->ifa_next = promote->ifa_next;
307 			promote->ifa_next = last_prim->ifa_next;
308 			last_prim->ifa_next = promote;
309 		}
310 
311 		promote->ifa_flags &= ~IFA_F_SECONDARY;
312 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
313 		blocking_notifier_call_chain(&inetaddr_chain,
314 				NETDEV_UP, promote);
315 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
316 			if (ifa1->ifa_mask != ifa->ifa_mask ||
317 			    !inet_ifa_match(ifa1->ifa_address, ifa))
318 					continue;
319 			fib_add_ifaddr(ifa);
320 		}
321 
322 	}
323 	if (destroy)
324 		inet_free_ifa(ifa1);
325 }
326 
327 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
328 			 int destroy)
329 {
330 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
331 }
332 
333 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
334 			     u32 pid)
335 {
336 	struct in_device *in_dev = ifa->ifa_dev;
337 	struct in_ifaddr *ifa1, **ifap, **last_primary;
338 
339 	ASSERT_RTNL();
340 
341 	if (!ifa->ifa_local) {
342 		inet_free_ifa(ifa);
343 		return 0;
344 	}
345 
346 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
347 	last_primary = &in_dev->ifa_list;
348 
349 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
350 	     ifap = &ifa1->ifa_next) {
351 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
352 		    ifa->ifa_scope <= ifa1->ifa_scope)
353 			last_primary = &ifa1->ifa_next;
354 		if (ifa1->ifa_mask == ifa->ifa_mask &&
355 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
356 			if (ifa1->ifa_local == ifa->ifa_local) {
357 				inet_free_ifa(ifa);
358 				return -EEXIST;
359 			}
360 			if (ifa1->ifa_scope != ifa->ifa_scope) {
361 				inet_free_ifa(ifa);
362 				return -EINVAL;
363 			}
364 			ifa->ifa_flags |= IFA_F_SECONDARY;
365 		}
366 	}
367 
368 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
369 		net_srandom(ifa->ifa_local);
370 		ifap = last_primary;
371 	}
372 
373 	ifa->ifa_next = *ifap;
374 	*ifap = ifa;
375 
376 	/* Send message first, then call notifier.
377 	   Notifier will trigger FIB update, so that
378 	   listeners of netlink will know about new ifaddr */
379 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
380 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
381 
382 	return 0;
383 }
384 
385 static int inet_insert_ifa(struct in_ifaddr *ifa)
386 {
387 	return __inet_insert_ifa(ifa, NULL, 0);
388 }
389 
390 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
391 {
392 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
393 
394 	ASSERT_RTNL();
395 
396 	if (!in_dev) {
397 		inet_free_ifa(ifa);
398 		return -ENOBUFS;
399 	}
400 	ipv4_devconf_setall(in_dev);
401 	if (ifa->ifa_dev != in_dev) {
402 		WARN_ON(ifa->ifa_dev);
403 		in_dev_hold(in_dev);
404 		ifa->ifa_dev = in_dev;
405 	}
406 	if (ipv4_is_loopback(ifa->ifa_local))
407 		ifa->ifa_scope = RT_SCOPE_HOST;
408 	return inet_insert_ifa(ifa);
409 }
410 
411 struct in_device *inetdev_by_index(struct net *net, int ifindex)
412 {
413 	struct net_device *dev;
414 	struct in_device *in_dev = NULL;
415 	read_lock(&dev_base_lock);
416 	dev = __dev_get_by_index(net, ifindex);
417 	if (dev)
418 		in_dev = in_dev_get(dev);
419 	read_unlock(&dev_base_lock);
420 	return in_dev;
421 }
422 
423 /* Called only from RTNL semaphored context. No locks. */
424 
425 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426 				    __be32 mask)
427 {
428 	ASSERT_RTNL();
429 
430 	for_primary_ifa(in_dev) {
431 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432 			return ifa;
433 	} endfor_ifa(in_dev);
434 	return NULL;
435 }
436 
437 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438 {
439 	struct net *net = sock_net(skb->sk);
440 	struct nlattr *tb[IFA_MAX+1];
441 	struct in_device *in_dev;
442 	struct ifaddrmsg *ifm;
443 	struct in_ifaddr *ifa, **ifap;
444 	int err = -EINVAL;
445 
446 	ASSERT_RTNL();
447 
448 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449 	if (err < 0)
450 		goto errout;
451 
452 	ifm = nlmsg_data(nlh);
453 	in_dev = inetdev_by_index(net, ifm->ifa_index);
454 	if (in_dev == NULL) {
455 		err = -ENODEV;
456 		goto errout;
457 	}
458 
459 	__in_dev_put(in_dev);
460 
461 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
462 	     ifap = &ifa->ifa_next) {
463 		if (tb[IFA_LOCAL] &&
464 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
465 			continue;
466 
467 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
468 			continue;
469 
470 		if (tb[IFA_ADDRESS] &&
471 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
472 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
473 			continue;
474 
475 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
476 		return 0;
477 	}
478 
479 	err = -EADDRNOTAVAIL;
480 errout:
481 	return err;
482 }
483 
484 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
485 {
486 	struct nlattr *tb[IFA_MAX+1];
487 	struct in_ifaddr *ifa;
488 	struct ifaddrmsg *ifm;
489 	struct net_device *dev;
490 	struct in_device *in_dev;
491 	int err;
492 
493 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
494 	if (err < 0)
495 		goto errout;
496 
497 	ifm = nlmsg_data(nlh);
498 	err = -EINVAL;
499 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
500 		goto errout;
501 
502 	dev = __dev_get_by_index(net, ifm->ifa_index);
503 	err = -ENODEV;
504 	if (dev == NULL)
505 		goto errout;
506 
507 	in_dev = __in_dev_get_rtnl(dev);
508 	err = -ENOBUFS;
509 	if (in_dev == NULL)
510 		goto errout;
511 
512 	ifa = inet_alloc_ifa();
513 	if (ifa == NULL)
514 		/*
515 		 * A potential indev allocation can be left alive, it stays
516 		 * assigned to its device and is destroy with it.
517 		 */
518 		goto errout;
519 
520 	ipv4_devconf_setall(in_dev);
521 	in_dev_hold(in_dev);
522 
523 	if (tb[IFA_ADDRESS] == NULL)
524 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
525 
526 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
527 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
528 	ifa->ifa_flags = ifm->ifa_flags;
529 	ifa->ifa_scope = ifm->ifa_scope;
530 	ifa->ifa_dev = in_dev;
531 
532 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
533 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
534 
535 	if (tb[IFA_BROADCAST])
536 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
537 
538 	if (tb[IFA_LABEL])
539 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
540 	else
541 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
542 
543 	return ifa;
544 
545 errout:
546 	return ERR_PTR(err);
547 }
548 
549 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
550 {
551 	struct net *net = sock_net(skb->sk);
552 	struct in_ifaddr *ifa;
553 
554 	ASSERT_RTNL();
555 
556 	ifa = rtm_to_ifaddr(net, nlh);
557 	if (IS_ERR(ifa))
558 		return PTR_ERR(ifa);
559 
560 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
561 }
562 
563 /*
564  *	Determine a default network mask, based on the IP address.
565  */
566 
567 static __inline__ int inet_abc_len(__be32 addr)
568 {
569 	int rc = -1;	/* Something else, probably a multicast. */
570 
571 	if (ipv4_is_zeronet(addr))
572 		rc = 0;
573 	else {
574 		__u32 haddr = ntohl(addr);
575 
576 		if (IN_CLASSA(haddr))
577 			rc = 8;
578 		else if (IN_CLASSB(haddr))
579 			rc = 16;
580 		else if (IN_CLASSC(haddr))
581 			rc = 24;
582 	}
583 
584 	return rc;
585 }
586 
587 
588 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
589 {
590 	struct ifreq ifr;
591 	struct sockaddr_in sin_orig;
592 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
593 	struct in_device *in_dev;
594 	struct in_ifaddr **ifap = NULL;
595 	struct in_ifaddr *ifa = NULL;
596 	struct net_device *dev;
597 	char *colon;
598 	int ret = -EFAULT;
599 	int tryaddrmatch = 0;
600 
601 	/*
602 	 *	Fetch the caller's info block into kernel space
603 	 */
604 
605 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
606 		goto out;
607 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
608 
609 	/* save original address for comparison */
610 	memcpy(&sin_orig, sin, sizeof(*sin));
611 
612 	colon = strchr(ifr.ifr_name, ':');
613 	if (colon)
614 		*colon = 0;
615 
616 	dev_load(net, ifr.ifr_name);
617 
618 	switch (cmd) {
619 	case SIOCGIFADDR:	/* Get interface address */
620 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
621 	case SIOCGIFDSTADDR:	/* Get the destination address */
622 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
623 		/* Note that these ioctls will not sleep,
624 		   so that we do not impose a lock.
625 		   One day we will be forced to put shlock here (I mean SMP)
626 		 */
627 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
628 		memset(sin, 0, sizeof(*sin));
629 		sin->sin_family = AF_INET;
630 		break;
631 
632 	case SIOCSIFFLAGS:
633 		ret = -EACCES;
634 		if (!capable(CAP_NET_ADMIN))
635 			goto out;
636 		break;
637 	case SIOCSIFADDR:	/* Set interface address (and family) */
638 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
639 	case SIOCSIFDSTADDR:	/* Set the destination address */
640 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
641 		ret = -EACCES;
642 		if (!capable(CAP_NET_ADMIN))
643 			goto out;
644 		ret = -EINVAL;
645 		if (sin->sin_family != AF_INET)
646 			goto out;
647 		break;
648 	default:
649 		ret = -EINVAL;
650 		goto out;
651 	}
652 
653 	rtnl_lock();
654 
655 	ret = -ENODEV;
656 	if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
657 		goto done;
658 
659 	if (colon)
660 		*colon = ':';
661 
662 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
663 		if (tryaddrmatch) {
664 			/* Matthias Andree */
665 			/* compare label and address (4.4BSD style) */
666 			/* note: we only do this for a limited set of ioctls
667 			   and only if the original address family was AF_INET.
668 			   This is checked above. */
669 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
670 			     ifap = &ifa->ifa_next) {
671 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672 				    sin_orig.sin_addr.s_addr ==
673 							ifa->ifa_address) {
674 					break; /* found */
675 				}
676 			}
677 		}
678 		/* we didn't get a match, maybe the application is
679 		   4.3BSD-style and passed in junk so we fall back to
680 		   comparing just the label */
681 		if (!ifa) {
682 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
683 			     ifap = &ifa->ifa_next)
684 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
685 					break;
686 		}
687 	}
688 
689 	ret = -EADDRNOTAVAIL;
690 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
691 		goto done;
692 
693 	switch (cmd) {
694 	case SIOCGIFADDR:	/* Get interface address */
695 		sin->sin_addr.s_addr = ifa->ifa_local;
696 		goto rarok;
697 
698 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
699 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
700 		goto rarok;
701 
702 	case SIOCGIFDSTADDR:	/* Get the destination address */
703 		sin->sin_addr.s_addr = ifa->ifa_address;
704 		goto rarok;
705 
706 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
707 		sin->sin_addr.s_addr = ifa->ifa_mask;
708 		goto rarok;
709 
710 	case SIOCSIFFLAGS:
711 		if (colon) {
712 			ret = -EADDRNOTAVAIL;
713 			if (!ifa)
714 				break;
715 			ret = 0;
716 			if (!(ifr.ifr_flags & IFF_UP))
717 				inet_del_ifa(in_dev, ifap, 1);
718 			break;
719 		}
720 		ret = dev_change_flags(dev, ifr.ifr_flags);
721 		break;
722 
723 	case SIOCSIFADDR:	/* Set interface address (and family) */
724 		ret = -EINVAL;
725 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
726 			break;
727 
728 		if (!ifa) {
729 			ret = -ENOBUFS;
730 			if ((ifa = inet_alloc_ifa()) == NULL)
731 				break;
732 			if (colon)
733 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
734 			else
735 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
736 		} else {
737 			ret = 0;
738 			if (ifa->ifa_local == sin->sin_addr.s_addr)
739 				break;
740 			inet_del_ifa(in_dev, ifap, 0);
741 			ifa->ifa_broadcast = 0;
742 			ifa->ifa_scope = 0;
743 		}
744 
745 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
746 
747 		if (!(dev->flags & IFF_POINTOPOINT)) {
748 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
749 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
750 			if ((dev->flags & IFF_BROADCAST) &&
751 			    ifa->ifa_prefixlen < 31)
752 				ifa->ifa_broadcast = ifa->ifa_address |
753 						     ~ifa->ifa_mask;
754 		} else {
755 			ifa->ifa_prefixlen = 32;
756 			ifa->ifa_mask = inet_make_mask(32);
757 		}
758 		ret = inet_set_ifa(dev, ifa);
759 		break;
760 
761 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
762 		ret = 0;
763 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
764 			inet_del_ifa(in_dev, ifap, 0);
765 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
766 			inet_insert_ifa(ifa);
767 		}
768 		break;
769 
770 	case SIOCSIFDSTADDR:	/* Set the destination address */
771 		ret = 0;
772 		if (ifa->ifa_address == sin->sin_addr.s_addr)
773 			break;
774 		ret = -EINVAL;
775 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
776 			break;
777 		ret = 0;
778 		inet_del_ifa(in_dev, ifap, 0);
779 		ifa->ifa_address = sin->sin_addr.s_addr;
780 		inet_insert_ifa(ifa);
781 		break;
782 
783 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
784 
785 		/*
786 		 *	The mask we set must be legal.
787 		 */
788 		ret = -EINVAL;
789 		if (bad_mask(sin->sin_addr.s_addr, 0))
790 			break;
791 		ret = 0;
792 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
793 			__be32 old_mask = ifa->ifa_mask;
794 			inet_del_ifa(in_dev, ifap, 0);
795 			ifa->ifa_mask = sin->sin_addr.s_addr;
796 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
797 
798 			/* See if current broadcast address matches
799 			 * with current netmask, then recalculate
800 			 * the broadcast address. Otherwise it's a
801 			 * funny address, so don't touch it since
802 			 * the user seems to know what (s)he's doing...
803 			 */
804 			if ((dev->flags & IFF_BROADCAST) &&
805 			    (ifa->ifa_prefixlen < 31) &&
806 			    (ifa->ifa_broadcast ==
807 			     (ifa->ifa_local|~old_mask))) {
808 				ifa->ifa_broadcast = (ifa->ifa_local |
809 						      ~sin->sin_addr.s_addr);
810 			}
811 			inet_insert_ifa(ifa);
812 		}
813 		break;
814 	}
815 done:
816 	rtnl_unlock();
817 out:
818 	return ret;
819 rarok:
820 	rtnl_unlock();
821 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
822 	goto out;
823 }
824 
825 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
826 {
827 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
828 	struct in_ifaddr *ifa;
829 	struct ifreq ifr;
830 	int done = 0;
831 
832 	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
833 		goto out;
834 
835 	for (; ifa; ifa = ifa->ifa_next) {
836 		if (!buf) {
837 			done += sizeof(ifr);
838 			continue;
839 		}
840 		if (len < (int) sizeof(ifr))
841 			break;
842 		memset(&ifr, 0, sizeof(struct ifreq));
843 		if (ifa->ifa_label)
844 			strcpy(ifr.ifr_name, ifa->ifa_label);
845 		else
846 			strcpy(ifr.ifr_name, dev->name);
847 
848 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
849 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
850 								ifa->ifa_local;
851 
852 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
853 			done = -EFAULT;
854 			break;
855 		}
856 		buf  += sizeof(struct ifreq);
857 		len  -= sizeof(struct ifreq);
858 		done += sizeof(struct ifreq);
859 	}
860 out:
861 	return done;
862 }
863 
864 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
865 {
866 	__be32 addr = 0;
867 	struct in_device *in_dev;
868 	struct net *net = dev_net(dev);
869 
870 	rcu_read_lock();
871 	in_dev = __in_dev_get_rcu(dev);
872 	if (!in_dev)
873 		goto no_in_dev;
874 
875 	for_primary_ifa(in_dev) {
876 		if (ifa->ifa_scope > scope)
877 			continue;
878 		if (!dst || inet_ifa_match(dst, ifa)) {
879 			addr = ifa->ifa_local;
880 			break;
881 		}
882 		if (!addr)
883 			addr = ifa->ifa_local;
884 	} endfor_ifa(in_dev);
885 no_in_dev:
886 	rcu_read_unlock();
887 
888 	if (addr)
889 		goto out;
890 
891 	/* Not loopback addresses on loopback should be preferred
892 	   in this case. It is importnat that lo is the first interface
893 	   in dev_base list.
894 	 */
895 	read_lock(&dev_base_lock);
896 	rcu_read_lock();
897 	for_each_netdev(net, dev) {
898 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
899 			continue;
900 
901 		for_primary_ifa(in_dev) {
902 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
903 			    ifa->ifa_scope <= scope) {
904 				addr = ifa->ifa_local;
905 				goto out_unlock_both;
906 			}
907 		} endfor_ifa(in_dev);
908 	}
909 out_unlock_both:
910 	read_unlock(&dev_base_lock);
911 	rcu_read_unlock();
912 out:
913 	return addr;
914 }
915 
916 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
917 			      __be32 local, int scope)
918 {
919 	int same = 0;
920 	__be32 addr = 0;
921 
922 	for_ifa(in_dev) {
923 		if (!addr &&
924 		    (local == ifa->ifa_local || !local) &&
925 		    ifa->ifa_scope <= scope) {
926 			addr = ifa->ifa_local;
927 			if (same)
928 				break;
929 		}
930 		if (!same) {
931 			same = (!local || inet_ifa_match(local, ifa)) &&
932 				(!dst || inet_ifa_match(dst, ifa));
933 			if (same && addr) {
934 				if (local || !dst)
935 					break;
936 				/* Is the selected addr into dst subnet? */
937 				if (inet_ifa_match(addr, ifa))
938 					break;
939 				/* No, then can we use new local src? */
940 				if (ifa->ifa_scope <= scope) {
941 					addr = ifa->ifa_local;
942 					break;
943 				}
944 				/* search for large dst subnet for addr */
945 				same = 0;
946 			}
947 		}
948 	} endfor_ifa(in_dev);
949 
950 	return same? addr : 0;
951 }
952 
953 /*
954  * Confirm that local IP address exists using wildcards:
955  * - in_dev: only on this interface, 0=any interface
956  * - dst: only in the same subnet as dst, 0=any dst
957  * - local: address, 0=autoselect the local address
958  * - scope: maximum allowed scope value for the local address
959  */
960 __be32 inet_confirm_addr(struct in_device *in_dev,
961 			 __be32 dst, __be32 local, int scope)
962 {
963 	__be32 addr = 0;
964 	struct net_device *dev;
965 	struct net *net;
966 
967 	if (scope != RT_SCOPE_LINK)
968 		return confirm_addr_indev(in_dev, dst, local, scope);
969 
970 	net = dev_net(in_dev->dev);
971 	read_lock(&dev_base_lock);
972 	rcu_read_lock();
973 	for_each_netdev(net, dev) {
974 		if ((in_dev = __in_dev_get_rcu(dev))) {
975 			addr = confirm_addr_indev(in_dev, dst, local, scope);
976 			if (addr)
977 				break;
978 		}
979 	}
980 	rcu_read_unlock();
981 	read_unlock(&dev_base_lock);
982 
983 	return addr;
984 }
985 
986 /*
987  *	Device notifier
988  */
989 
990 int register_inetaddr_notifier(struct notifier_block *nb)
991 {
992 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
993 }
994 
995 int unregister_inetaddr_notifier(struct notifier_block *nb)
996 {
997 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
998 }
999 
1000 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1001  * alias numbering and to create unique labels if possible.
1002 */
1003 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1004 {
1005 	struct in_ifaddr *ifa;
1006 	int named = 0;
1007 
1008 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1009 		char old[IFNAMSIZ], *dot;
1010 
1011 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1012 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1013 		if (named++ == 0)
1014 			goto skip;
1015 		dot = strchr(old, ':');
1016 		if (dot == NULL) {
1017 			sprintf(old, ":%d", named);
1018 			dot = old;
1019 		}
1020 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1021 			strcat(ifa->ifa_label, dot);
1022 		} else {
1023 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1024 		}
1025 skip:
1026 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1027 	}
1028 }
1029 
1030 static inline bool inetdev_valid_mtu(unsigned mtu)
1031 {
1032 	return mtu >= 68;
1033 }
1034 
1035 /* Called only under RTNL semaphore */
1036 
1037 static int inetdev_event(struct notifier_block *this, unsigned long event,
1038 			 void *ptr)
1039 {
1040 	struct net_device *dev = ptr;
1041 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1042 
1043 	ASSERT_RTNL();
1044 
1045 	if (!in_dev) {
1046 		if (event == NETDEV_REGISTER) {
1047 			in_dev = inetdev_init(dev);
1048 			if (!in_dev)
1049 				return notifier_from_errno(-ENOMEM);
1050 			if (dev->flags & IFF_LOOPBACK) {
1051 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1052 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1053 			}
1054 		} else if (event == NETDEV_CHANGEMTU) {
1055 			/* Re-enabling IP */
1056 			if (inetdev_valid_mtu(dev->mtu))
1057 				in_dev = inetdev_init(dev);
1058 		}
1059 		goto out;
1060 	}
1061 
1062 	switch (event) {
1063 	case NETDEV_REGISTER:
1064 		printk(KERN_DEBUG "inetdev_event: bug\n");
1065 		dev->ip_ptr = NULL;
1066 		break;
1067 	case NETDEV_UP:
1068 		if (!inetdev_valid_mtu(dev->mtu))
1069 			break;
1070 		if (dev->flags & IFF_LOOPBACK) {
1071 			struct in_ifaddr *ifa;
1072 			if ((ifa = inet_alloc_ifa()) != NULL) {
1073 				ifa->ifa_local =
1074 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1075 				ifa->ifa_prefixlen = 8;
1076 				ifa->ifa_mask = inet_make_mask(8);
1077 				in_dev_hold(in_dev);
1078 				ifa->ifa_dev = in_dev;
1079 				ifa->ifa_scope = RT_SCOPE_HOST;
1080 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1081 				inet_insert_ifa(ifa);
1082 			}
1083 		}
1084 		ip_mc_up(in_dev);
1085 		break;
1086 	case NETDEV_DOWN:
1087 		ip_mc_down(in_dev);
1088 		break;
1089 	case NETDEV_CHANGEMTU:
1090 		if (inetdev_valid_mtu(dev->mtu))
1091 			break;
1092 		/* disable IP when MTU is not enough */
1093 	case NETDEV_UNREGISTER:
1094 		inetdev_destroy(in_dev);
1095 		break;
1096 	case NETDEV_CHANGENAME:
1097 		/* Do not notify about label change, this event is
1098 		 * not interesting to applications using netlink.
1099 		 */
1100 		inetdev_changename(dev, in_dev);
1101 
1102 		devinet_sysctl_unregister(in_dev);
1103 		devinet_sysctl_register(in_dev);
1104 		break;
1105 	}
1106 out:
1107 	return NOTIFY_DONE;
1108 }
1109 
1110 static struct notifier_block ip_netdev_notifier = {
1111 	.notifier_call =inetdev_event,
1112 };
1113 
1114 static inline size_t inet_nlmsg_size(void)
1115 {
1116 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1117 	       + nla_total_size(4) /* IFA_ADDRESS */
1118 	       + nla_total_size(4) /* IFA_LOCAL */
1119 	       + nla_total_size(4) /* IFA_BROADCAST */
1120 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1121 }
1122 
1123 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1124 			    u32 pid, u32 seq, int event, unsigned int flags)
1125 {
1126 	struct ifaddrmsg *ifm;
1127 	struct nlmsghdr  *nlh;
1128 
1129 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1130 	if (nlh == NULL)
1131 		return -EMSGSIZE;
1132 
1133 	ifm = nlmsg_data(nlh);
1134 	ifm->ifa_family = AF_INET;
1135 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1136 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1137 	ifm->ifa_scope = ifa->ifa_scope;
1138 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1139 
1140 	if (ifa->ifa_address)
1141 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1142 
1143 	if (ifa->ifa_local)
1144 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1145 
1146 	if (ifa->ifa_broadcast)
1147 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1148 
1149 	if (ifa->ifa_label[0])
1150 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1151 
1152 	return nlmsg_end(skb, nlh);
1153 
1154 nla_put_failure:
1155 	nlmsg_cancel(skb, nlh);
1156 	return -EMSGSIZE;
1157 }
1158 
1159 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1160 {
1161 	struct net *net = sock_net(skb->sk);
1162 	int idx, ip_idx;
1163 	struct net_device *dev;
1164 	struct in_device *in_dev;
1165 	struct in_ifaddr *ifa;
1166 	int s_ip_idx, s_idx = cb->args[0];
1167 
1168 	s_ip_idx = ip_idx = cb->args[1];
1169 	idx = 0;
1170 	for_each_netdev(net, dev) {
1171 		if (idx < s_idx)
1172 			goto cont;
1173 		if (idx > s_idx)
1174 			s_ip_idx = 0;
1175 		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1176 			goto cont;
1177 
1178 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1179 		     ifa = ifa->ifa_next, ip_idx++) {
1180 			if (ip_idx < s_ip_idx)
1181 				continue;
1182 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1183 					     cb->nlh->nlmsg_seq,
1184 					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1185 				goto done;
1186 		}
1187 cont:
1188 		idx++;
1189 	}
1190 
1191 done:
1192 	cb->args[0] = idx;
1193 	cb->args[1] = ip_idx;
1194 
1195 	return skb->len;
1196 }
1197 
1198 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1199 		      u32 pid)
1200 {
1201 	struct sk_buff *skb;
1202 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1203 	int err = -ENOBUFS;
1204 	struct net *net;
1205 
1206 	net = dev_net(ifa->ifa_dev->dev);
1207 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1208 	if (skb == NULL)
1209 		goto errout;
1210 
1211 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1212 	if (err < 0) {
1213 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1214 		WARN_ON(err == -EMSGSIZE);
1215 		kfree_skb(skb);
1216 		goto errout;
1217 	}
1218 	err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1219 errout:
1220 	if (err < 0)
1221 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1222 }
1223 
1224 #ifdef CONFIG_SYSCTL
1225 
1226 static void devinet_copy_dflt_conf(struct net *net, int i)
1227 {
1228 	struct net_device *dev;
1229 
1230 	read_lock(&dev_base_lock);
1231 	for_each_netdev(net, dev) {
1232 		struct in_device *in_dev;
1233 		rcu_read_lock();
1234 		in_dev = __in_dev_get_rcu(dev);
1235 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1236 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1237 		rcu_read_unlock();
1238 	}
1239 	read_unlock(&dev_base_lock);
1240 }
1241 
1242 static void inet_forward_change(struct net *net)
1243 {
1244 	struct net_device *dev;
1245 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1246 
1247 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1248 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1249 
1250 	read_lock(&dev_base_lock);
1251 	for_each_netdev(net, dev) {
1252 		struct in_device *in_dev;
1253 		if (on)
1254 			dev_disable_lro(dev);
1255 		rcu_read_lock();
1256 		in_dev = __in_dev_get_rcu(dev);
1257 		if (in_dev)
1258 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1259 		rcu_read_unlock();
1260 	}
1261 	read_unlock(&dev_base_lock);
1262 }
1263 
1264 static int devinet_conf_proc(ctl_table *ctl, int write,
1265 			     struct file* filp, void __user *buffer,
1266 			     size_t *lenp, loff_t *ppos)
1267 {
1268 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1269 
1270 	if (write) {
1271 		struct ipv4_devconf *cnf = ctl->extra1;
1272 		struct net *net = ctl->extra2;
1273 		int i = (int *)ctl->data - cnf->data;
1274 
1275 		set_bit(i, cnf->state);
1276 
1277 		if (cnf == net->ipv4.devconf_dflt)
1278 			devinet_copy_dflt_conf(net, i);
1279 	}
1280 
1281 	return ret;
1282 }
1283 
1284 static int devinet_conf_sysctl(ctl_table *table,
1285 			       void __user *oldval, size_t __user *oldlenp,
1286 			       void __user *newval, size_t newlen)
1287 {
1288 	struct ipv4_devconf *cnf;
1289 	struct net *net;
1290 	int *valp = table->data;
1291 	int new;
1292 	int i;
1293 
1294 	if (!newval || !newlen)
1295 		return 0;
1296 
1297 	if (newlen != sizeof(int))
1298 		return -EINVAL;
1299 
1300 	if (get_user(new, (int __user *)newval))
1301 		return -EFAULT;
1302 
1303 	if (new == *valp)
1304 		return 0;
1305 
1306 	if (oldval && oldlenp) {
1307 		size_t len;
1308 
1309 		if (get_user(len, oldlenp))
1310 			return -EFAULT;
1311 
1312 		if (len) {
1313 			if (len > table->maxlen)
1314 				len = table->maxlen;
1315 			if (copy_to_user(oldval, valp, len))
1316 				return -EFAULT;
1317 			if (put_user(len, oldlenp))
1318 				return -EFAULT;
1319 		}
1320 	}
1321 
1322 	*valp = new;
1323 
1324 	cnf = table->extra1;
1325 	net = table->extra2;
1326 	i = (int *)table->data - cnf->data;
1327 
1328 	set_bit(i, cnf->state);
1329 
1330 	if (cnf == net->ipv4.devconf_dflt)
1331 		devinet_copy_dflt_conf(net, i);
1332 
1333 	return 1;
1334 }
1335 
1336 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1337 				  struct file* filp, void __user *buffer,
1338 				  size_t *lenp, loff_t *ppos)
1339 {
1340 	int *valp = ctl->data;
1341 	int val = *valp;
1342 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1343 
1344 	if (write && *valp != val) {
1345 		struct net *net = ctl->extra2;
1346 
1347 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1348 			rtnl_lock();
1349 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1350 				inet_forward_change(net);
1351 			} else if (*valp) {
1352 				struct ipv4_devconf *cnf = ctl->extra1;
1353 				struct in_device *idev =
1354 					container_of(cnf, struct in_device, cnf);
1355 				dev_disable_lro(idev->dev);
1356 			}
1357 			rtnl_unlock();
1358 			rt_cache_flush(net, 0);
1359 		}
1360 	}
1361 
1362 	return ret;
1363 }
1364 
1365 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1366 			 struct file* filp, void __user *buffer,
1367 			 size_t *lenp, loff_t *ppos)
1368 {
1369 	int *valp = ctl->data;
1370 	int val = *valp;
1371 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1372 	struct net *net = ctl->extra2;
1373 
1374 	if (write && *valp != val)
1375 		rt_cache_flush(net, 0);
1376 
1377 	return ret;
1378 }
1379 
1380 int ipv4_doint_and_flush_strategy(ctl_table *table,
1381 				  void __user *oldval, size_t __user *oldlenp,
1382 				  void __user *newval, size_t newlen)
1383 {
1384 	int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen);
1385 	struct net *net = table->extra2;
1386 
1387 	if (ret == 1)
1388 		rt_cache_flush(net, 0);
1389 
1390 	return ret;
1391 }
1392 
1393 
1394 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1395 	{ \
1396 		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1397 		.procname	= name, \
1398 		.data		= ipv4_devconf.data + \
1399 				  NET_IPV4_CONF_ ## attr - 1, \
1400 		.maxlen		= sizeof(int), \
1401 		.mode		= mval, \
1402 		.proc_handler	= proc, \
1403 		.strategy	= sysctl, \
1404 		.extra1		= &ipv4_devconf, \
1405 	}
1406 
1407 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1408 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1409 			     devinet_conf_sysctl)
1410 
1411 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1412 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1413 			     devinet_conf_sysctl)
1414 
1415 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1416 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1417 
1418 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1419 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1420 				     ipv4_doint_and_flush_strategy)
1421 
1422 static struct devinet_sysctl_table {
1423 	struct ctl_table_header *sysctl_header;
1424 	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1425 	char *dev_name;
1426 } devinet_sysctl = {
1427 	.devinet_vars = {
1428 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1429 					     devinet_sysctl_forward,
1430 					     devinet_conf_sysctl),
1431 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1432 
1433 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1434 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1435 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1436 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1437 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1438 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1439 					"accept_source_route"),
1440 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1441 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1442 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1443 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1444 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1445 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1446 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1447 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1448 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1449 
1450 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1451 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1452 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1453 					      "force_igmp_version"),
1454 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1455 					      "promote_secondaries"),
1456 	},
1457 };
1458 
1459 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1460 		int ctl_name, struct ipv4_devconf *p)
1461 {
1462 	int i;
1463 	struct devinet_sysctl_table *t;
1464 
1465 #define DEVINET_CTL_PATH_DEV	3
1466 
1467 	struct ctl_path devinet_ctl_path[] = {
1468 		{ .procname = "net", .ctl_name = CTL_NET, },
1469 		{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1470 		{ .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1471 		{ /* to be set */ },
1472 		{ },
1473 	};
1474 
1475 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1476 	if (!t)
1477 		goto out;
1478 
1479 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1480 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1481 		t->devinet_vars[i].extra1 = p;
1482 		t->devinet_vars[i].extra2 = net;
1483 	}
1484 
1485 	/*
1486 	 * Make a copy of dev_name, because '.procname' is regarded as const
1487 	 * by sysctl and we wouldn't want anyone to change it under our feet
1488 	 * (see SIOCSIFNAME).
1489 	 */
1490 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1491 	if (!t->dev_name)
1492 		goto free;
1493 
1494 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1495 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1496 
1497 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1498 			t->devinet_vars);
1499 	if (!t->sysctl_header)
1500 		goto free_procname;
1501 
1502 	p->sysctl = t;
1503 	return 0;
1504 
1505 free_procname:
1506 	kfree(t->dev_name);
1507 free:
1508 	kfree(t);
1509 out:
1510 	return -ENOBUFS;
1511 }
1512 
1513 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1514 {
1515 	struct devinet_sysctl_table *t = cnf->sysctl;
1516 
1517 	if (t == NULL)
1518 		return;
1519 
1520 	cnf->sysctl = NULL;
1521 	unregister_sysctl_table(t->sysctl_header);
1522 	kfree(t->dev_name);
1523 	kfree(t);
1524 }
1525 
1526 static void devinet_sysctl_register(struct in_device *idev)
1527 {
1528 	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1529 			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1530 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1531 			idev->dev->ifindex, &idev->cnf);
1532 }
1533 
1534 static void devinet_sysctl_unregister(struct in_device *idev)
1535 {
1536 	__devinet_sysctl_unregister(&idev->cnf);
1537 	neigh_sysctl_unregister(idev->arp_parms);
1538 }
1539 
1540 static struct ctl_table ctl_forward_entry[] = {
1541 	{
1542 		.ctl_name	= NET_IPV4_FORWARD,
1543 		.procname	= "ip_forward",
1544 		.data		= &ipv4_devconf.data[
1545 					NET_IPV4_CONF_FORWARDING - 1],
1546 		.maxlen		= sizeof(int),
1547 		.mode		= 0644,
1548 		.proc_handler	= devinet_sysctl_forward,
1549 		.strategy	= devinet_conf_sysctl,
1550 		.extra1		= &ipv4_devconf,
1551 		.extra2		= &init_net,
1552 	},
1553 	{ },
1554 };
1555 
1556 static __net_initdata struct ctl_path net_ipv4_path[] = {
1557 	{ .procname = "net", .ctl_name = CTL_NET, },
1558 	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1559 	{ },
1560 };
1561 #endif
1562 
1563 static __net_init int devinet_init_net(struct net *net)
1564 {
1565 	int err;
1566 	struct ipv4_devconf *all, *dflt;
1567 #ifdef CONFIG_SYSCTL
1568 	struct ctl_table *tbl = ctl_forward_entry;
1569 	struct ctl_table_header *forw_hdr;
1570 #endif
1571 
1572 	err = -ENOMEM;
1573 	all = &ipv4_devconf;
1574 	dflt = &ipv4_devconf_dflt;
1575 
1576 	if (net != &init_net) {
1577 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1578 		if (all == NULL)
1579 			goto err_alloc_all;
1580 
1581 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1582 		if (dflt == NULL)
1583 			goto err_alloc_dflt;
1584 
1585 #ifdef CONFIG_SYSCTL
1586 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1587 		if (tbl == NULL)
1588 			goto err_alloc_ctl;
1589 
1590 		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1591 		tbl[0].extra1 = all;
1592 		tbl[0].extra2 = net;
1593 #endif
1594 	}
1595 
1596 #ifdef CONFIG_SYSCTL
1597 	err = __devinet_sysctl_register(net, "all",
1598 			NET_PROTO_CONF_ALL, all);
1599 	if (err < 0)
1600 		goto err_reg_all;
1601 
1602 	err = __devinet_sysctl_register(net, "default",
1603 			NET_PROTO_CONF_DEFAULT, dflt);
1604 	if (err < 0)
1605 		goto err_reg_dflt;
1606 
1607 	err = -ENOMEM;
1608 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1609 	if (forw_hdr == NULL)
1610 		goto err_reg_ctl;
1611 	net->ipv4.forw_hdr = forw_hdr;
1612 #endif
1613 
1614 	net->ipv4.devconf_all = all;
1615 	net->ipv4.devconf_dflt = dflt;
1616 	return 0;
1617 
1618 #ifdef CONFIG_SYSCTL
1619 err_reg_ctl:
1620 	__devinet_sysctl_unregister(dflt);
1621 err_reg_dflt:
1622 	__devinet_sysctl_unregister(all);
1623 err_reg_all:
1624 	if (tbl != ctl_forward_entry)
1625 		kfree(tbl);
1626 err_alloc_ctl:
1627 #endif
1628 	if (dflt != &ipv4_devconf_dflt)
1629 		kfree(dflt);
1630 err_alloc_dflt:
1631 	if (all != &ipv4_devconf)
1632 		kfree(all);
1633 err_alloc_all:
1634 	return err;
1635 }
1636 
1637 static __net_exit void devinet_exit_net(struct net *net)
1638 {
1639 #ifdef CONFIG_SYSCTL
1640 	struct ctl_table *tbl;
1641 
1642 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1643 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1644 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1645 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1646 	kfree(tbl);
1647 #endif
1648 	kfree(net->ipv4.devconf_dflt);
1649 	kfree(net->ipv4.devconf_all);
1650 }
1651 
1652 static __net_initdata struct pernet_operations devinet_ops = {
1653 	.init = devinet_init_net,
1654 	.exit = devinet_exit_net,
1655 };
1656 
1657 void __init devinet_init(void)
1658 {
1659 	register_pernet_subsys(&devinet_ops);
1660 
1661 	register_gifconf(PF_INET, inet_gifconf);
1662 	register_netdevice_notifier(&ip_netdev_notifier);
1663 
1664 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1665 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1666 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1667 }
1668 
1669 EXPORT_SYMBOL(in_dev_finish_destroy);
1670 EXPORT_SYMBOL(inet_select_addr);
1671 EXPORT_SYMBOL(inetdev_by_index);
1672 EXPORT_SYMBOL(register_inetaddr_notifier);
1673 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1674