xref: /openbmc/linux/net/ipv4/devinet.c (revision a0865368)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65 
66 static struct ipv4_devconf ipv4_devconf = {
67 	.data = {
68 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
69 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
70 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
71 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
72 	},
73 };
74 
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76 	.data = {
77 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82 	},
83 };
84 
85 #define IPV4_DEVCONF_DFLT(net, attr) \
86 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
87 
88 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89 	[IFA_LOCAL]     	= { .type = NLA_U32 },
90 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
91 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
92 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93 };
94 
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96 
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99 			 int destroy);
100 #ifdef CONFIG_SYSCTL
101 static void devinet_sysctl_register(struct in_device *idev);
102 static void devinet_sysctl_unregister(struct in_device *idev);
103 #else
104 static inline void devinet_sysctl_register(struct in_device *idev)
105 {
106 }
107 static inline void devinet_sysctl_unregister(struct in_device *idev)
108 {
109 }
110 #endif
111 
112 /* Locks all the inet devices. */
113 
114 static struct in_ifaddr *inet_alloc_ifa(void)
115 {
116 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
117 }
118 
119 static void inet_rcu_free_ifa(struct rcu_head *head)
120 {
121 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122 	if (ifa->ifa_dev)
123 		in_dev_put(ifa->ifa_dev);
124 	kfree(ifa);
125 }
126 
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
128 {
129 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130 }
131 
132 void in_dev_finish_destroy(struct in_device *idev)
133 {
134 	struct net_device *dev = idev->dev;
135 
136 	WARN_ON(idev->ifa_list);
137 	WARN_ON(idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140 	       idev, dev ? dev->name : "NIL");
141 #endif
142 	dev_put(dev);
143 	if (!idev->dead)
144 		pr_err("Freeing alive in_device %p\n", idev);
145 	else
146 		kfree(idev);
147 }
148 EXPORT_SYMBOL(in_dev_finish_destroy);
149 
150 static struct in_device *inetdev_init(struct net_device *dev)
151 {
152 	struct in_device *in_dev;
153 
154 	ASSERT_RTNL();
155 
156 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157 	if (!in_dev)
158 		goto out;
159 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
160 			sizeof(in_dev->cnf));
161 	in_dev->cnf.sysctl = NULL;
162 	in_dev->dev = dev;
163 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
164 	if (!in_dev->arp_parms)
165 		goto out_kfree;
166 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
167 		dev_disable_lro(dev);
168 	/* Reference in_dev->dev */
169 	dev_hold(dev);
170 	/* Account for reference dev->ip_ptr (below) */
171 	in_dev_hold(in_dev);
172 
173 	devinet_sysctl_register(in_dev);
174 	ip_mc_init_dev(in_dev);
175 	if (dev->flags & IFF_UP)
176 		ip_mc_up(in_dev);
177 
178 	/* we can receive as soon as ip_ptr is set -- do this last */
179 	rcu_assign_pointer(dev->ip_ptr, in_dev);
180 out:
181 	return in_dev;
182 out_kfree:
183 	kfree(in_dev);
184 	in_dev = NULL;
185 	goto out;
186 }
187 
188 static void in_dev_rcu_put(struct rcu_head *head)
189 {
190 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
191 	in_dev_put(idev);
192 }
193 
194 static void inetdev_destroy(struct in_device *in_dev)
195 {
196 	struct in_ifaddr *ifa;
197 	struct net_device *dev;
198 
199 	ASSERT_RTNL();
200 
201 	dev = in_dev->dev;
202 
203 	in_dev->dead = 1;
204 
205 	ip_mc_destroy_dev(in_dev);
206 
207 	while ((ifa = in_dev->ifa_list) != NULL) {
208 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
209 		inet_free_ifa(ifa);
210 	}
211 
212 	rcu_assign_pointer(dev->ip_ptr, NULL);
213 
214 	devinet_sysctl_unregister(in_dev);
215 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
216 	arp_ifdown(dev);
217 
218 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
219 }
220 
221 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
222 {
223 	rcu_read_lock();
224 	for_primary_ifa(in_dev) {
225 		if (inet_ifa_match(a, ifa)) {
226 			if (!b || inet_ifa_match(b, ifa)) {
227 				rcu_read_unlock();
228 				return 1;
229 			}
230 		}
231 	} endfor_ifa(in_dev);
232 	rcu_read_unlock();
233 	return 0;
234 }
235 
236 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
237 			 int destroy, struct nlmsghdr *nlh, u32 pid)
238 {
239 	struct in_ifaddr *promote = NULL;
240 	struct in_ifaddr *ifa, *ifa1 = *ifap;
241 	struct in_ifaddr *last_prim = in_dev->ifa_list;
242 	struct in_ifaddr *prev_prom = NULL;
243 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
244 
245 	ASSERT_RTNL();
246 
247 	/* 1. Deleting primary ifaddr forces deletion all secondaries
248 	 * unless alias promotion is set
249 	 **/
250 
251 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
252 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
253 
254 		while ((ifa = *ifap1) != NULL) {
255 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
256 			    ifa1->ifa_scope <= ifa->ifa_scope)
257 				last_prim = ifa;
258 
259 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
260 			    ifa1->ifa_mask != ifa->ifa_mask ||
261 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
262 				ifap1 = &ifa->ifa_next;
263 				prev_prom = ifa;
264 				continue;
265 			}
266 
267 			if (!do_promote) {
268 				*ifap1 = ifa->ifa_next;
269 
270 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
271 				blocking_notifier_call_chain(&inetaddr_chain,
272 						NETDEV_DOWN, ifa);
273 				inet_free_ifa(ifa);
274 			} else {
275 				promote = ifa;
276 				break;
277 			}
278 		}
279 	}
280 
281 	/* 2. Unlink it */
282 
283 	*ifap = ifa1->ifa_next;
284 
285 	/* 3. Announce address deletion */
286 
287 	/* Send message first, then call notifier.
288 	   At first sight, FIB update triggered by notifier
289 	   will refer to already deleted ifaddr, that could confuse
290 	   netlink listeners. It is not true: look, gated sees
291 	   that route deleted and if it still thinks that ifaddr
292 	   is valid, it will try to restore deleted routes... Grr.
293 	   So that, this order is correct.
294 	 */
295 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
296 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
297 
298 	if (promote) {
299 
300 		if (prev_prom) {
301 			prev_prom->ifa_next = promote->ifa_next;
302 			promote->ifa_next = last_prim->ifa_next;
303 			last_prim->ifa_next = promote;
304 		}
305 
306 		promote->ifa_flags &= ~IFA_F_SECONDARY;
307 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
308 		blocking_notifier_call_chain(&inetaddr_chain,
309 				NETDEV_UP, promote);
310 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
311 			if (ifa1->ifa_mask != ifa->ifa_mask ||
312 			    !inet_ifa_match(ifa1->ifa_address, ifa))
313 					continue;
314 			fib_add_ifaddr(ifa);
315 		}
316 
317 	}
318 	if (destroy)
319 		inet_free_ifa(ifa1);
320 }
321 
322 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
323 			 int destroy)
324 {
325 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
326 }
327 
328 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
329 			     u32 pid)
330 {
331 	struct in_device *in_dev = ifa->ifa_dev;
332 	struct in_ifaddr *ifa1, **ifap, **last_primary;
333 
334 	ASSERT_RTNL();
335 
336 	if (!ifa->ifa_local) {
337 		inet_free_ifa(ifa);
338 		return 0;
339 	}
340 
341 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
342 	last_primary = &in_dev->ifa_list;
343 
344 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
345 	     ifap = &ifa1->ifa_next) {
346 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
347 		    ifa->ifa_scope <= ifa1->ifa_scope)
348 			last_primary = &ifa1->ifa_next;
349 		if (ifa1->ifa_mask == ifa->ifa_mask &&
350 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
351 			if (ifa1->ifa_local == ifa->ifa_local) {
352 				inet_free_ifa(ifa);
353 				return -EEXIST;
354 			}
355 			if (ifa1->ifa_scope != ifa->ifa_scope) {
356 				inet_free_ifa(ifa);
357 				return -EINVAL;
358 			}
359 			ifa->ifa_flags |= IFA_F_SECONDARY;
360 		}
361 	}
362 
363 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
364 		net_srandom(ifa->ifa_local);
365 		ifap = last_primary;
366 	}
367 
368 	ifa->ifa_next = *ifap;
369 	*ifap = ifa;
370 
371 	/* Send message first, then call notifier.
372 	   Notifier will trigger FIB update, so that
373 	   listeners of netlink will know about new ifaddr */
374 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
375 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
376 
377 	return 0;
378 }
379 
380 static int inet_insert_ifa(struct in_ifaddr *ifa)
381 {
382 	return __inet_insert_ifa(ifa, NULL, 0);
383 }
384 
385 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
386 {
387 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
388 
389 	ASSERT_RTNL();
390 
391 	if (!in_dev) {
392 		inet_free_ifa(ifa);
393 		return -ENOBUFS;
394 	}
395 	ipv4_devconf_setall(in_dev);
396 	if (ifa->ifa_dev != in_dev) {
397 		WARN_ON(ifa->ifa_dev);
398 		in_dev_hold(in_dev);
399 		ifa->ifa_dev = in_dev;
400 	}
401 	if (ipv4_is_loopback(ifa->ifa_local))
402 		ifa->ifa_scope = RT_SCOPE_HOST;
403 	return inet_insert_ifa(ifa);
404 }
405 
406 /* Caller must hold RCU or RTNL :
407  * We dont take a reference on found in_device
408  */
409 struct in_device *inetdev_by_index(struct net *net, int ifindex)
410 {
411 	struct net_device *dev;
412 	struct in_device *in_dev = NULL;
413 
414 	rcu_read_lock();
415 	dev = dev_get_by_index_rcu(net, ifindex);
416 	if (dev)
417 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
418 	rcu_read_unlock();
419 	return in_dev;
420 }
421 EXPORT_SYMBOL(inetdev_by_index);
422 
423 /* Called only from RTNL semaphored context. No locks. */
424 
425 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426 				    __be32 mask)
427 {
428 	ASSERT_RTNL();
429 
430 	for_primary_ifa(in_dev) {
431 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432 			return ifa;
433 	} endfor_ifa(in_dev);
434 	return NULL;
435 }
436 
437 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438 {
439 	struct net *net = sock_net(skb->sk);
440 	struct nlattr *tb[IFA_MAX+1];
441 	struct in_device *in_dev;
442 	struct ifaddrmsg *ifm;
443 	struct in_ifaddr *ifa, **ifap;
444 	int err = -EINVAL;
445 
446 	ASSERT_RTNL();
447 
448 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449 	if (err < 0)
450 		goto errout;
451 
452 	ifm = nlmsg_data(nlh);
453 	in_dev = inetdev_by_index(net, ifm->ifa_index);
454 	if (in_dev == NULL) {
455 		err = -ENODEV;
456 		goto errout;
457 	}
458 
459 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
460 	     ifap = &ifa->ifa_next) {
461 		if (tb[IFA_LOCAL] &&
462 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
463 			continue;
464 
465 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
466 			continue;
467 
468 		if (tb[IFA_ADDRESS] &&
469 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
470 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
471 			continue;
472 
473 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
474 		return 0;
475 	}
476 
477 	err = -EADDRNOTAVAIL;
478 errout:
479 	return err;
480 }
481 
482 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
483 {
484 	struct nlattr *tb[IFA_MAX+1];
485 	struct in_ifaddr *ifa;
486 	struct ifaddrmsg *ifm;
487 	struct net_device *dev;
488 	struct in_device *in_dev;
489 	int err;
490 
491 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
492 	if (err < 0)
493 		goto errout;
494 
495 	ifm = nlmsg_data(nlh);
496 	err = -EINVAL;
497 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
498 		goto errout;
499 
500 	dev = __dev_get_by_index(net, ifm->ifa_index);
501 	err = -ENODEV;
502 	if (dev == NULL)
503 		goto errout;
504 
505 	in_dev = __in_dev_get_rtnl(dev);
506 	err = -ENOBUFS;
507 	if (in_dev == NULL)
508 		goto errout;
509 
510 	ifa = inet_alloc_ifa();
511 	if (ifa == NULL)
512 		/*
513 		 * A potential indev allocation can be left alive, it stays
514 		 * assigned to its device and is destroy with it.
515 		 */
516 		goto errout;
517 
518 	ipv4_devconf_setall(in_dev);
519 	in_dev_hold(in_dev);
520 
521 	if (tb[IFA_ADDRESS] == NULL)
522 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
523 
524 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
525 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
526 	ifa->ifa_flags = ifm->ifa_flags;
527 	ifa->ifa_scope = ifm->ifa_scope;
528 	ifa->ifa_dev = in_dev;
529 
530 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
531 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
532 
533 	if (tb[IFA_BROADCAST])
534 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
535 
536 	if (tb[IFA_LABEL])
537 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
538 	else
539 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
540 
541 	return ifa;
542 
543 errout:
544 	return ERR_PTR(err);
545 }
546 
547 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
548 {
549 	struct net *net = sock_net(skb->sk);
550 	struct in_ifaddr *ifa;
551 
552 	ASSERT_RTNL();
553 
554 	ifa = rtm_to_ifaddr(net, nlh);
555 	if (IS_ERR(ifa))
556 		return PTR_ERR(ifa);
557 
558 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
559 }
560 
561 /*
562  *	Determine a default network mask, based on the IP address.
563  */
564 
565 static inline int inet_abc_len(__be32 addr)
566 {
567 	int rc = -1;	/* Something else, probably a multicast. */
568 
569 	if (ipv4_is_zeronet(addr))
570 		rc = 0;
571 	else {
572 		__u32 haddr = ntohl(addr);
573 
574 		if (IN_CLASSA(haddr))
575 			rc = 8;
576 		else if (IN_CLASSB(haddr))
577 			rc = 16;
578 		else if (IN_CLASSC(haddr))
579 			rc = 24;
580 	}
581 
582 	return rc;
583 }
584 
585 
586 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
587 {
588 	struct ifreq ifr;
589 	struct sockaddr_in sin_orig;
590 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
591 	struct in_device *in_dev;
592 	struct in_ifaddr **ifap = NULL;
593 	struct in_ifaddr *ifa = NULL;
594 	struct net_device *dev;
595 	char *colon;
596 	int ret = -EFAULT;
597 	int tryaddrmatch = 0;
598 
599 	/*
600 	 *	Fetch the caller's info block into kernel space
601 	 */
602 
603 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
604 		goto out;
605 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
606 
607 	/* save original address for comparison */
608 	memcpy(&sin_orig, sin, sizeof(*sin));
609 
610 	colon = strchr(ifr.ifr_name, ':');
611 	if (colon)
612 		*colon = 0;
613 
614 	dev_load(net, ifr.ifr_name);
615 
616 	switch (cmd) {
617 	case SIOCGIFADDR:	/* Get interface address */
618 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
619 	case SIOCGIFDSTADDR:	/* Get the destination address */
620 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
621 		/* Note that these ioctls will not sleep,
622 		   so that we do not impose a lock.
623 		   One day we will be forced to put shlock here (I mean SMP)
624 		 */
625 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
626 		memset(sin, 0, sizeof(*sin));
627 		sin->sin_family = AF_INET;
628 		break;
629 
630 	case SIOCSIFFLAGS:
631 		ret = -EACCES;
632 		if (!capable(CAP_NET_ADMIN))
633 			goto out;
634 		break;
635 	case SIOCSIFADDR:	/* Set interface address (and family) */
636 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
637 	case SIOCSIFDSTADDR:	/* Set the destination address */
638 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
639 		ret = -EACCES;
640 		if (!capable(CAP_NET_ADMIN))
641 			goto out;
642 		ret = -EINVAL;
643 		if (sin->sin_family != AF_INET)
644 			goto out;
645 		break;
646 	default:
647 		ret = -EINVAL;
648 		goto out;
649 	}
650 
651 	rtnl_lock();
652 
653 	ret = -ENODEV;
654 	dev = __dev_get_by_name(net, ifr.ifr_name);
655 	if (!dev)
656 		goto done;
657 
658 	if (colon)
659 		*colon = ':';
660 
661 	in_dev = __in_dev_get_rtnl(dev);
662 	if (in_dev) {
663 		if (tryaddrmatch) {
664 			/* Matthias Andree */
665 			/* compare label and address (4.4BSD style) */
666 			/* note: we only do this for a limited set of ioctls
667 			   and only if the original address family was AF_INET.
668 			   This is checked above. */
669 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
670 			     ifap = &ifa->ifa_next) {
671 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672 				    sin_orig.sin_addr.s_addr ==
673 							ifa->ifa_address) {
674 					break; /* found */
675 				}
676 			}
677 		}
678 		/* we didn't get a match, maybe the application is
679 		   4.3BSD-style and passed in junk so we fall back to
680 		   comparing just the label */
681 		if (!ifa) {
682 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
683 			     ifap = &ifa->ifa_next)
684 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
685 					break;
686 		}
687 	}
688 
689 	ret = -EADDRNOTAVAIL;
690 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
691 		goto done;
692 
693 	switch (cmd) {
694 	case SIOCGIFADDR:	/* Get interface address */
695 		sin->sin_addr.s_addr = ifa->ifa_local;
696 		goto rarok;
697 
698 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
699 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
700 		goto rarok;
701 
702 	case SIOCGIFDSTADDR:	/* Get the destination address */
703 		sin->sin_addr.s_addr = ifa->ifa_address;
704 		goto rarok;
705 
706 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
707 		sin->sin_addr.s_addr = ifa->ifa_mask;
708 		goto rarok;
709 
710 	case SIOCSIFFLAGS:
711 		if (colon) {
712 			ret = -EADDRNOTAVAIL;
713 			if (!ifa)
714 				break;
715 			ret = 0;
716 			if (!(ifr.ifr_flags & IFF_UP))
717 				inet_del_ifa(in_dev, ifap, 1);
718 			break;
719 		}
720 		ret = dev_change_flags(dev, ifr.ifr_flags);
721 		break;
722 
723 	case SIOCSIFADDR:	/* Set interface address (and family) */
724 		ret = -EINVAL;
725 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
726 			break;
727 
728 		if (!ifa) {
729 			ret = -ENOBUFS;
730 			ifa = inet_alloc_ifa();
731 			if (!ifa)
732 				break;
733 			if (colon)
734 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
735 			else
736 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
737 		} else {
738 			ret = 0;
739 			if (ifa->ifa_local == sin->sin_addr.s_addr)
740 				break;
741 			inet_del_ifa(in_dev, ifap, 0);
742 			ifa->ifa_broadcast = 0;
743 			ifa->ifa_scope = 0;
744 		}
745 
746 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
747 
748 		if (!(dev->flags & IFF_POINTOPOINT)) {
749 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
750 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
751 			if ((dev->flags & IFF_BROADCAST) &&
752 			    ifa->ifa_prefixlen < 31)
753 				ifa->ifa_broadcast = ifa->ifa_address |
754 						     ~ifa->ifa_mask;
755 		} else {
756 			ifa->ifa_prefixlen = 32;
757 			ifa->ifa_mask = inet_make_mask(32);
758 		}
759 		ret = inet_set_ifa(dev, ifa);
760 		break;
761 
762 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
763 		ret = 0;
764 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
765 			inet_del_ifa(in_dev, ifap, 0);
766 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
767 			inet_insert_ifa(ifa);
768 		}
769 		break;
770 
771 	case SIOCSIFDSTADDR:	/* Set the destination address */
772 		ret = 0;
773 		if (ifa->ifa_address == sin->sin_addr.s_addr)
774 			break;
775 		ret = -EINVAL;
776 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
777 			break;
778 		ret = 0;
779 		inet_del_ifa(in_dev, ifap, 0);
780 		ifa->ifa_address = sin->sin_addr.s_addr;
781 		inet_insert_ifa(ifa);
782 		break;
783 
784 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
785 
786 		/*
787 		 *	The mask we set must be legal.
788 		 */
789 		ret = -EINVAL;
790 		if (bad_mask(sin->sin_addr.s_addr, 0))
791 			break;
792 		ret = 0;
793 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
794 			__be32 old_mask = ifa->ifa_mask;
795 			inet_del_ifa(in_dev, ifap, 0);
796 			ifa->ifa_mask = sin->sin_addr.s_addr;
797 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
798 
799 			/* See if current broadcast address matches
800 			 * with current netmask, then recalculate
801 			 * the broadcast address. Otherwise it's a
802 			 * funny address, so don't touch it since
803 			 * the user seems to know what (s)he's doing...
804 			 */
805 			if ((dev->flags & IFF_BROADCAST) &&
806 			    (ifa->ifa_prefixlen < 31) &&
807 			    (ifa->ifa_broadcast ==
808 			     (ifa->ifa_local|~old_mask))) {
809 				ifa->ifa_broadcast = (ifa->ifa_local |
810 						      ~sin->sin_addr.s_addr);
811 			}
812 			inet_insert_ifa(ifa);
813 		}
814 		break;
815 	}
816 done:
817 	rtnl_unlock();
818 out:
819 	return ret;
820 rarok:
821 	rtnl_unlock();
822 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
823 	goto out;
824 }
825 
826 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
827 {
828 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
829 	struct in_ifaddr *ifa;
830 	struct ifreq ifr;
831 	int done = 0;
832 
833 	if (!in_dev)
834 		goto out;
835 
836 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
837 		if (!buf) {
838 			done += sizeof(ifr);
839 			continue;
840 		}
841 		if (len < (int) sizeof(ifr))
842 			break;
843 		memset(&ifr, 0, sizeof(struct ifreq));
844 		if (ifa->ifa_label)
845 			strcpy(ifr.ifr_name, ifa->ifa_label);
846 		else
847 			strcpy(ifr.ifr_name, dev->name);
848 
849 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
850 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
851 								ifa->ifa_local;
852 
853 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
854 			done = -EFAULT;
855 			break;
856 		}
857 		buf  += sizeof(struct ifreq);
858 		len  -= sizeof(struct ifreq);
859 		done += sizeof(struct ifreq);
860 	}
861 out:
862 	return done;
863 }
864 
865 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
866 {
867 	__be32 addr = 0;
868 	struct in_device *in_dev;
869 	struct net *net = dev_net(dev);
870 
871 	rcu_read_lock();
872 	in_dev = __in_dev_get_rcu(dev);
873 	if (!in_dev)
874 		goto no_in_dev;
875 
876 	for_primary_ifa(in_dev) {
877 		if (ifa->ifa_scope > scope)
878 			continue;
879 		if (!dst || inet_ifa_match(dst, ifa)) {
880 			addr = ifa->ifa_local;
881 			break;
882 		}
883 		if (!addr)
884 			addr = ifa->ifa_local;
885 	} endfor_ifa(in_dev);
886 
887 	if (addr)
888 		goto out_unlock;
889 no_in_dev:
890 
891 	/* Not loopback addresses on loopback should be preferred
892 	   in this case. It is importnat that lo is the first interface
893 	   in dev_base list.
894 	 */
895 	for_each_netdev_rcu(net, dev) {
896 		in_dev = __in_dev_get_rcu(dev);
897 		if (!in_dev)
898 			continue;
899 
900 		for_primary_ifa(in_dev) {
901 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
902 			    ifa->ifa_scope <= scope) {
903 				addr = ifa->ifa_local;
904 				goto out_unlock;
905 			}
906 		} endfor_ifa(in_dev);
907 	}
908 out_unlock:
909 	rcu_read_unlock();
910 	return addr;
911 }
912 EXPORT_SYMBOL(inet_select_addr);
913 
914 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
915 			      __be32 local, int scope)
916 {
917 	int same = 0;
918 	__be32 addr = 0;
919 
920 	for_ifa(in_dev) {
921 		if (!addr &&
922 		    (local == ifa->ifa_local || !local) &&
923 		    ifa->ifa_scope <= scope) {
924 			addr = ifa->ifa_local;
925 			if (same)
926 				break;
927 		}
928 		if (!same) {
929 			same = (!local || inet_ifa_match(local, ifa)) &&
930 				(!dst || inet_ifa_match(dst, ifa));
931 			if (same && addr) {
932 				if (local || !dst)
933 					break;
934 				/* Is the selected addr into dst subnet? */
935 				if (inet_ifa_match(addr, ifa))
936 					break;
937 				/* No, then can we use new local src? */
938 				if (ifa->ifa_scope <= scope) {
939 					addr = ifa->ifa_local;
940 					break;
941 				}
942 				/* search for large dst subnet for addr */
943 				same = 0;
944 			}
945 		}
946 	} endfor_ifa(in_dev);
947 
948 	return same ? addr : 0;
949 }
950 
951 /*
952  * Confirm that local IP address exists using wildcards:
953  * - in_dev: only on this interface, 0=any interface
954  * - dst: only in the same subnet as dst, 0=any dst
955  * - local: address, 0=autoselect the local address
956  * - scope: maximum allowed scope value for the local address
957  */
958 __be32 inet_confirm_addr(struct in_device *in_dev,
959 			 __be32 dst, __be32 local, int scope)
960 {
961 	__be32 addr = 0;
962 	struct net_device *dev;
963 	struct net *net;
964 
965 	if (scope != RT_SCOPE_LINK)
966 		return confirm_addr_indev(in_dev, dst, local, scope);
967 
968 	net = dev_net(in_dev->dev);
969 	rcu_read_lock();
970 	for_each_netdev_rcu(net, dev) {
971 		in_dev = __in_dev_get_rcu(dev);
972 		if (in_dev) {
973 			addr = confirm_addr_indev(in_dev, dst, local, scope);
974 			if (addr)
975 				break;
976 		}
977 	}
978 	rcu_read_unlock();
979 
980 	return addr;
981 }
982 
983 /*
984  *	Device notifier
985  */
986 
987 int register_inetaddr_notifier(struct notifier_block *nb)
988 {
989 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
990 }
991 EXPORT_SYMBOL(register_inetaddr_notifier);
992 
993 int unregister_inetaddr_notifier(struct notifier_block *nb)
994 {
995 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
996 }
997 EXPORT_SYMBOL(unregister_inetaddr_notifier);
998 
999 /* Rename ifa_labels for a device name change. Make some effort to preserve
1000  * existing alias numbering and to create unique labels if possible.
1001 */
1002 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1003 {
1004 	struct in_ifaddr *ifa;
1005 	int named = 0;
1006 
1007 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1008 		char old[IFNAMSIZ], *dot;
1009 
1010 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1011 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1012 		if (named++ == 0)
1013 			goto skip;
1014 		dot = strchr(old, ':');
1015 		if (dot == NULL) {
1016 			sprintf(old, ":%d", named);
1017 			dot = old;
1018 		}
1019 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1020 			strcat(ifa->ifa_label, dot);
1021 		else
1022 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1023 skip:
1024 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1025 	}
1026 }
1027 
1028 static inline bool inetdev_valid_mtu(unsigned mtu)
1029 {
1030 	return mtu >= 68;
1031 }
1032 
1033 /* Called only under RTNL semaphore */
1034 
1035 static int inetdev_event(struct notifier_block *this, unsigned long event,
1036 			 void *ptr)
1037 {
1038 	struct net_device *dev = ptr;
1039 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1040 
1041 	ASSERT_RTNL();
1042 
1043 	if (!in_dev) {
1044 		if (event == NETDEV_REGISTER) {
1045 			in_dev = inetdev_init(dev);
1046 			if (!in_dev)
1047 				return notifier_from_errno(-ENOMEM);
1048 			if (dev->flags & IFF_LOOPBACK) {
1049 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1050 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1051 			}
1052 		} else if (event == NETDEV_CHANGEMTU) {
1053 			/* Re-enabling IP */
1054 			if (inetdev_valid_mtu(dev->mtu))
1055 				in_dev = inetdev_init(dev);
1056 		}
1057 		goto out;
1058 	}
1059 
1060 	switch (event) {
1061 	case NETDEV_REGISTER:
1062 		printk(KERN_DEBUG "inetdev_event: bug\n");
1063 		rcu_assign_pointer(dev->ip_ptr, NULL);
1064 		break;
1065 	case NETDEV_UP:
1066 		if (!inetdev_valid_mtu(dev->mtu))
1067 			break;
1068 		if (dev->flags & IFF_LOOPBACK) {
1069 			struct in_ifaddr *ifa = inet_alloc_ifa();
1070 
1071 			if (ifa) {
1072 				ifa->ifa_local =
1073 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1074 				ifa->ifa_prefixlen = 8;
1075 				ifa->ifa_mask = inet_make_mask(8);
1076 				in_dev_hold(in_dev);
1077 				ifa->ifa_dev = in_dev;
1078 				ifa->ifa_scope = RT_SCOPE_HOST;
1079 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1080 				inet_insert_ifa(ifa);
1081 			}
1082 		}
1083 		ip_mc_up(in_dev);
1084 		/* fall through */
1085 	case NETDEV_NOTIFY_PEERS:
1086 	case NETDEV_CHANGEADDR:
1087 		/* Send gratuitous ARP to notify of link change */
1088 		if (IN_DEV_ARP_NOTIFY(in_dev)) {
1089 			struct in_ifaddr *ifa = in_dev->ifa_list;
1090 
1091 			if (ifa)
1092 				arp_send(ARPOP_REQUEST, ETH_P_ARP,
1093 					 ifa->ifa_address, dev,
1094 					 ifa->ifa_address, NULL,
1095 					 dev->dev_addr, NULL);
1096 		}
1097 		break;
1098 	case NETDEV_DOWN:
1099 		ip_mc_down(in_dev);
1100 		break;
1101 	case NETDEV_PRE_TYPE_CHANGE:
1102 		ip_mc_unmap(in_dev);
1103 		break;
1104 	case NETDEV_POST_TYPE_CHANGE:
1105 		ip_mc_remap(in_dev);
1106 		break;
1107 	case NETDEV_CHANGEMTU:
1108 		if (inetdev_valid_mtu(dev->mtu))
1109 			break;
1110 		/* disable IP when MTU is not enough */
1111 	case NETDEV_UNREGISTER:
1112 		inetdev_destroy(in_dev);
1113 		break;
1114 	case NETDEV_CHANGENAME:
1115 		/* Do not notify about label change, this event is
1116 		 * not interesting to applications using netlink.
1117 		 */
1118 		inetdev_changename(dev, in_dev);
1119 
1120 		devinet_sysctl_unregister(in_dev);
1121 		devinet_sysctl_register(in_dev);
1122 		break;
1123 	}
1124 out:
1125 	return NOTIFY_DONE;
1126 }
1127 
1128 static struct notifier_block ip_netdev_notifier = {
1129 	.notifier_call = inetdev_event,
1130 };
1131 
1132 static inline size_t inet_nlmsg_size(void)
1133 {
1134 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1135 	       + nla_total_size(4) /* IFA_ADDRESS */
1136 	       + nla_total_size(4) /* IFA_LOCAL */
1137 	       + nla_total_size(4) /* IFA_BROADCAST */
1138 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1139 }
1140 
1141 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1142 			    u32 pid, u32 seq, int event, unsigned int flags)
1143 {
1144 	struct ifaddrmsg *ifm;
1145 	struct nlmsghdr  *nlh;
1146 
1147 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1148 	if (nlh == NULL)
1149 		return -EMSGSIZE;
1150 
1151 	ifm = nlmsg_data(nlh);
1152 	ifm->ifa_family = AF_INET;
1153 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1154 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1155 	ifm->ifa_scope = ifa->ifa_scope;
1156 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1157 
1158 	if (ifa->ifa_address)
1159 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1160 
1161 	if (ifa->ifa_local)
1162 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1163 
1164 	if (ifa->ifa_broadcast)
1165 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1166 
1167 	if (ifa->ifa_label[0])
1168 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1169 
1170 	return nlmsg_end(skb, nlh);
1171 
1172 nla_put_failure:
1173 	nlmsg_cancel(skb, nlh);
1174 	return -EMSGSIZE;
1175 }
1176 
1177 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1178 {
1179 	struct net *net = sock_net(skb->sk);
1180 	int h, s_h;
1181 	int idx, s_idx;
1182 	int ip_idx, s_ip_idx;
1183 	struct net_device *dev;
1184 	struct in_device *in_dev;
1185 	struct in_ifaddr *ifa;
1186 	struct hlist_head *head;
1187 	struct hlist_node *node;
1188 
1189 	s_h = cb->args[0];
1190 	s_idx = idx = cb->args[1];
1191 	s_ip_idx = ip_idx = cb->args[2];
1192 
1193 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1194 		idx = 0;
1195 		head = &net->dev_index_head[h];
1196 		rcu_read_lock();
1197 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1198 			if (idx < s_idx)
1199 				goto cont;
1200 			if (h > s_h || idx > s_idx)
1201 				s_ip_idx = 0;
1202 			in_dev = __in_dev_get_rcu(dev);
1203 			if (!in_dev)
1204 				goto cont;
1205 
1206 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1207 			     ifa = ifa->ifa_next, ip_idx++) {
1208 				if (ip_idx < s_ip_idx)
1209 					continue;
1210 				if (inet_fill_ifaddr(skb, ifa,
1211 					     NETLINK_CB(cb->skb).pid,
1212 					     cb->nlh->nlmsg_seq,
1213 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1214 					rcu_read_unlock();
1215 					goto done;
1216 				}
1217 			}
1218 cont:
1219 			idx++;
1220 		}
1221 		rcu_read_unlock();
1222 	}
1223 
1224 done:
1225 	cb->args[0] = h;
1226 	cb->args[1] = idx;
1227 	cb->args[2] = ip_idx;
1228 
1229 	return skb->len;
1230 }
1231 
1232 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1233 		      u32 pid)
1234 {
1235 	struct sk_buff *skb;
1236 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1237 	int err = -ENOBUFS;
1238 	struct net *net;
1239 
1240 	net = dev_net(ifa->ifa_dev->dev);
1241 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1242 	if (skb == NULL)
1243 		goto errout;
1244 
1245 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1246 	if (err < 0) {
1247 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1248 		WARN_ON(err == -EMSGSIZE);
1249 		kfree_skb(skb);
1250 		goto errout;
1251 	}
1252 	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1253 	return;
1254 errout:
1255 	if (err < 0)
1256 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1257 }
1258 
1259 static size_t inet_get_link_af_size(const struct net_device *dev)
1260 {
1261 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1262 
1263 	if (!in_dev)
1264 		return 0;
1265 
1266 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1267 }
1268 
1269 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1270 {
1271 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1272 	struct nlattr *nla;
1273 	int i;
1274 
1275 	if (!in_dev)
1276 		return -ENODATA;
1277 
1278 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1279 	if (nla == NULL)
1280 		return -EMSGSIZE;
1281 
1282 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1283 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1284 
1285 	return 0;
1286 }
1287 
1288 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1289 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1290 };
1291 
1292 static int inet_validate_link_af(const struct net_device *dev,
1293 				 const struct nlattr *nla)
1294 {
1295 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1296 	int err, rem;
1297 
1298 	if (dev && !__in_dev_get_rtnl(dev))
1299 		return -EAFNOSUPPORT;
1300 
1301 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1302 	if (err < 0)
1303 		return err;
1304 
1305 	if (tb[IFLA_INET_CONF]) {
1306 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1307 			int cfgid = nla_type(a);
1308 
1309 			if (nla_len(a) < 4)
1310 				return -EINVAL;
1311 
1312 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1313 				return -EINVAL;
1314 		}
1315 	}
1316 
1317 	return 0;
1318 }
1319 
1320 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1321 {
1322 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1323 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1324 	int rem;
1325 
1326 	if (!in_dev)
1327 		return -EAFNOSUPPORT;
1328 
1329 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1330 		BUG();
1331 
1332 	if (tb[IFLA_INET_CONF]) {
1333 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1334 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1335 	}
1336 
1337 	return 0;
1338 }
1339 
1340 #ifdef CONFIG_SYSCTL
1341 
1342 static void devinet_copy_dflt_conf(struct net *net, int i)
1343 {
1344 	struct net_device *dev;
1345 
1346 	rcu_read_lock();
1347 	for_each_netdev_rcu(net, dev) {
1348 		struct in_device *in_dev;
1349 
1350 		in_dev = __in_dev_get_rcu(dev);
1351 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1352 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1353 	}
1354 	rcu_read_unlock();
1355 }
1356 
1357 /* called with RTNL locked */
1358 static void inet_forward_change(struct net *net)
1359 {
1360 	struct net_device *dev;
1361 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1362 
1363 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1364 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1365 
1366 	for_each_netdev(net, dev) {
1367 		struct in_device *in_dev;
1368 		if (on)
1369 			dev_disable_lro(dev);
1370 		rcu_read_lock();
1371 		in_dev = __in_dev_get_rcu(dev);
1372 		if (in_dev)
1373 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1374 		rcu_read_unlock();
1375 	}
1376 }
1377 
1378 static int devinet_conf_proc(ctl_table *ctl, int write,
1379 			     void __user *buffer,
1380 			     size_t *lenp, loff_t *ppos)
1381 {
1382 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1383 
1384 	if (write) {
1385 		struct ipv4_devconf *cnf = ctl->extra1;
1386 		struct net *net = ctl->extra2;
1387 		int i = (int *)ctl->data - cnf->data;
1388 
1389 		set_bit(i, cnf->state);
1390 
1391 		if (cnf == net->ipv4.devconf_dflt)
1392 			devinet_copy_dflt_conf(net, i);
1393 	}
1394 
1395 	return ret;
1396 }
1397 
1398 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1399 				  void __user *buffer,
1400 				  size_t *lenp, loff_t *ppos)
1401 {
1402 	int *valp = ctl->data;
1403 	int val = *valp;
1404 	loff_t pos = *ppos;
1405 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1406 
1407 	if (write && *valp != val) {
1408 		struct net *net = ctl->extra2;
1409 
1410 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1411 			if (!rtnl_trylock()) {
1412 				/* Restore the original values before restarting */
1413 				*valp = val;
1414 				*ppos = pos;
1415 				return restart_syscall();
1416 			}
1417 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1418 				inet_forward_change(net);
1419 			} else if (*valp) {
1420 				struct ipv4_devconf *cnf = ctl->extra1;
1421 				struct in_device *idev =
1422 					container_of(cnf, struct in_device, cnf);
1423 				dev_disable_lro(idev->dev);
1424 			}
1425 			rtnl_unlock();
1426 			rt_cache_flush(net, 0);
1427 		}
1428 	}
1429 
1430 	return ret;
1431 }
1432 
1433 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1434 				void __user *buffer,
1435 				size_t *lenp, loff_t *ppos)
1436 {
1437 	int *valp = ctl->data;
1438 	int val = *valp;
1439 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1440 	struct net *net = ctl->extra2;
1441 
1442 	if (write && *valp != val)
1443 		rt_cache_flush(net, 0);
1444 
1445 	return ret;
1446 }
1447 
1448 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1449 	{ \
1450 		.procname	= name, \
1451 		.data		= ipv4_devconf.data + \
1452 				  IPV4_DEVCONF_ ## attr - 1, \
1453 		.maxlen		= sizeof(int), \
1454 		.mode		= mval, \
1455 		.proc_handler	= proc, \
1456 		.extra1		= &ipv4_devconf, \
1457 	}
1458 
1459 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1460 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1461 
1462 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1463 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1464 
1465 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1466 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1467 
1468 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1469 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1470 
1471 static struct devinet_sysctl_table {
1472 	struct ctl_table_header *sysctl_header;
1473 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1474 	char *dev_name;
1475 } devinet_sysctl = {
1476 	.devinet_vars = {
1477 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1478 					     devinet_sysctl_forward),
1479 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1480 
1481 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1482 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1483 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1484 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1485 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1486 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1487 					"accept_source_route"),
1488 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1489 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1490 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1491 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1492 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1493 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1494 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1495 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1496 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1497 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1498 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1499 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1500 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1501 
1502 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1503 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1504 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1505 					      "force_igmp_version"),
1506 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1507 					      "promote_secondaries"),
1508 	},
1509 };
1510 
1511 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1512 					struct ipv4_devconf *p)
1513 {
1514 	int i;
1515 	struct devinet_sysctl_table *t;
1516 
1517 #define DEVINET_CTL_PATH_DEV	3
1518 
1519 	struct ctl_path devinet_ctl_path[] = {
1520 		{ .procname = "net",  },
1521 		{ .procname = "ipv4", },
1522 		{ .procname = "conf", },
1523 		{ /* to be set */ },
1524 		{ },
1525 	};
1526 
1527 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1528 	if (!t)
1529 		goto out;
1530 
1531 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1532 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1533 		t->devinet_vars[i].extra1 = p;
1534 		t->devinet_vars[i].extra2 = net;
1535 	}
1536 
1537 	/*
1538 	 * Make a copy of dev_name, because '.procname' is regarded as const
1539 	 * by sysctl and we wouldn't want anyone to change it under our feet
1540 	 * (see SIOCSIFNAME).
1541 	 */
1542 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1543 	if (!t->dev_name)
1544 		goto free;
1545 
1546 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1547 
1548 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1549 			t->devinet_vars);
1550 	if (!t->sysctl_header)
1551 		goto free_procname;
1552 
1553 	p->sysctl = t;
1554 	return 0;
1555 
1556 free_procname:
1557 	kfree(t->dev_name);
1558 free:
1559 	kfree(t);
1560 out:
1561 	return -ENOBUFS;
1562 }
1563 
1564 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1565 {
1566 	struct devinet_sysctl_table *t = cnf->sysctl;
1567 
1568 	if (t == NULL)
1569 		return;
1570 
1571 	cnf->sysctl = NULL;
1572 	unregister_sysctl_table(t->sysctl_header);
1573 	kfree(t->dev_name);
1574 	kfree(t);
1575 }
1576 
1577 static void devinet_sysctl_register(struct in_device *idev)
1578 {
1579 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1580 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1581 					&idev->cnf);
1582 }
1583 
1584 static void devinet_sysctl_unregister(struct in_device *idev)
1585 {
1586 	__devinet_sysctl_unregister(&idev->cnf);
1587 	neigh_sysctl_unregister(idev->arp_parms);
1588 }
1589 
1590 static struct ctl_table ctl_forward_entry[] = {
1591 	{
1592 		.procname	= "ip_forward",
1593 		.data		= &ipv4_devconf.data[
1594 					IPV4_DEVCONF_FORWARDING - 1],
1595 		.maxlen		= sizeof(int),
1596 		.mode		= 0644,
1597 		.proc_handler	= devinet_sysctl_forward,
1598 		.extra1		= &ipv4_devconf,
1599 		.extra2		= &init_net,
1600 	},
1601 	{ },
1602 };
1603 
1604 static __net_initdata struct ctl_path net_ipv4_path[] = {
1605 	{ .procname = "net", },
1606 	{ .procname = "ipv4", },
1607 	{ },
1608 };
1609 #endif
1610 
1611 static __net_init int devinet_init_net(struct net *net)
1612 {
1613 	int err;
1614 	struct ipv4_devconf *all, *dflt;
1615 #ifdef CONFIG_SYSCTL
1616 	struct ctl_table *tbl = ctl_forward_entry;
1617 	struct ctl_table_header *forw_hdr;
1618 #endif
1619 
1620 	err = -ENOMEM;
1621 	all = &ipv4_devconf;
1622 	dflt = &ipv4_devconf_dflt;
1623 
1624 	if (!net_eq(net, &init_net)) {
1625 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1626 		if (all == NULL)
1627 			goto err_alloc_all;
1628 
1629 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1630 		if (dflt == NULL)
1631 			goto err_alloc_dflt;
1632 
1633 #ifdef CONFIG_SYSCTL
1634 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1635 		if (tbl == NULL)
1636 			goto err_alloc_ctl;
1637 
1638 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1639 		tbl[0].extra1 = all;
1640 		tbl[0].extra2 = net;
1641 #endif
1642 	}
1643 
1644 #ifdef CONFIG_SYSCTL
1645 	err = __devinet_sysctl_register(net, "all", all);
1646 	if (err < 0)
1647 		goto err_reg_all;
1648 
1649 	err = __devinet_sysctl_register(net, "default", dflt);
1650 	if (err < 0)
1651 		goto err_reg_dflt;
1652 
1653 	err = -ENOMEM;
1654 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1655 	if (forw_hdr == NULL)
1656 		goto err_reg_ctl;
1657 	net->ipv4.forw_hdr = forw_hdr;
1658 #endif
1659 
1660 	net->ipv4.devconf_all = all;
1661 	net->ipv4.devconf_dflt = dflt;
1662 	return 0;
1663 
1664 #ifdef CONFIG_SYSCTL
1665 err_reg_ctl:
1666 	__devinet_sysctl_unregister(dflt);
1667 err_reg_dflt:
1668 	__devinet_sysctl_unregister(all);
1669 err_reg_all:
1670 	if (tbl != ctl_forward_entry)
1671 		kfree(tbl);
1672 err_alloc_ctl:
1673 #endif
1674 	if (dflt != &ipv4_devconf_dflt)
1675 		kfree(dflt);
1676 err_alloc_dflt:
1677 	if (all != &ipv4_devconf)
1678 		kfree(all);
1679 err_alloc_all:
1680 	return err;
1681 }
1682 
1683 static __net_exit void devinet_exit_net(struct net *net)
1684 {
1685 #ifdef CONFIG_SYSCTL
1686 	struct ctl_table *tbl;
1687 
1688 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1689 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1690 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1691 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1692 	kfree(tbl);
1693 #endif
1694 	kfree(net->ipv4.devconf_dflt);
1695 	kfree(net->ipv4.devconf_all);
1696 }
1697 
1698 static __net_initdata struct pernet_operations devinet_ops = {
1699 	.init = devinet_init_net,
1700 	.exit = devinet_exit_net,
1701 };
1702 
1703 static struct rtnl_af_ops inet_af_ops = {
1704 	.family		  = AF_INET,
1705 	.fill_link_af	  = inet_fill_link_af,
1706 	.get_link_af_size = inet_get_link_af_size,
1707 	.validate_link_af = inet_validate_link_af,
1708 	.set_link_af	  = inet_set_link_af,
1709 };
1710 
1711 void __init devinet_init(void)
1712 {
1713 	register_pernet_subsys(&devinet_ops);
1714 
1715 	register_gifconf(PF_INET, inet_gifconf);
1716 	register_netdevice_notifier(&ip_netdev_notifier);
1717 
1718 	rtnl_af_register(&inet_af_ops);
1719 
1720 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1721 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1722 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1723 }
1724 
1725