xref: /openbmc/linux/net/ipv4/devinet.c (revision df2634f43f5106947f3735a0b61a6527a4b278cd)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65 
66 static struct ipv4_devconf ipv4_devconf = {
67 	.data = {
68 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
69 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
70 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
71 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
72 	},
73 };
74 
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76 	.data = {
77 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82 	},
83 };
84 
85 #define IPV4_DEVCONF_DFLT(net, attr) \
86 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
87 
88 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89 	[IFA_LOCAL]     	= { .type = NLA_U32 },
90 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
91 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
92 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93 };
94 
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96 
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99 			 int destroy);
100 #ifdef CONFIG_SYSCTL
101 static void devinet_sysctl_register(struct in_device *idev);
102 static void devinet_sysctl_unregister(struct in_device *idev);
103 #else
104 static inline void devinet_sysctl_register(struct in_device *idev)
105 {
106 }
107 static inline void devinet_sysctl_unregister(struct in_device *idev)
108 {
109 }
110 #endif
111 
112 /* Locks all the inet devices. */
113 
114 static struct in_ifaddr *inet_alloc_ifa(void)
115 {
116 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
117 }
118 
119 static void inet_rcu_free_ifa(struct rcu_head *head)
120 {
121 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122 	if (ifa->ifa_dev)
123 		in_dev_put(ifa->ifa_dev);
124 	kfree(ifa);
125 }
126 
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
128 {
129 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130 }
131 
132 void in_dev_finish_destroy(struct in_device *idev)
133 {
134 	struct net_device *dev = idev->dev;
135 
136 	WARN_ON(idev->ifa_list);
137 	WARN_ON(idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140 	       idev, dev ? dev->name : "NIL");
141 #endif
142 	dev_put(dev);
143 	if (!idev->dead)
144 		pr_err("Freeing alive in_device %p\n", idev);
145 	else
146 		kfree(idev);
147 }
148 EXPORT_SYMBOL(in_dev_finish_destroy);
149 
150 static struct in_device *inetdev_init(struct net_device *dev)
151 {
152 	struct in_device *in_dev;
153 
154 	ASSERT_RTNL();
155 
156 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157 	if (!in_dev)
158 		goto out;
159 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
160 			sizeof(in_dev->cnf));
161 	in_dev->cnf.sysctl = NULL;
162 	in_dev->dev = dev;
163 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
164 	if (!in_dev->arp_parms)
165 		goto out_kfree;
166 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
167 		dev_disable_lro(dev);
168 	/* Reference in_dev->dev */
169 	dev_hold(dev);
170 	/* Account for reference dev->ip_ptr (below) */
171 	in_dev_hold(in_dev);
172 
173 	devinet_sysctl_register(in_dev);
174 	ip_mc_init_dev(in_dev);
175 	if (dev->flags & IFF_UP)
176 		ip_mc_up(in_dev);
177 
178 	/* we can receive as soon as ip_ptr is set -- do this last */
179 	rcu_assign_pointer(dev->ip_ptr, in_dev);
180 out:
181 	return in_dev;
182 out_kfree:
183 	kfree(in_dev);
184 	in_dev = NULL;
185 	goto out;
186 }
187 
188 static void in_dev_rcu_put(struct rcu_head *head)
189 {
190 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
191 	in_dev_put(idev);
192 }
193 
194 static void inetdev_destroy(struct in_device *in_dev)
195 {
196 	struct in_ifaddr *ifa;
197 	struct net_device *dev;
198 
199 	ASSERT_RTNL();
200 
201 	dev = in_dev->dev;
202 
203 	in_dev->dead = 1;
204 
205 	ip_mc_destroy_dev(in_dev);
206 
207 	while ((ifa = in_dev->ifa_list) != NULL) {
208 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
209 		inet_free_ifa(ifa);
210 	}
211 
212 	rcu_assign_pointer(dev->ip_ptr, NULL);
213 
214 	devinet_sysctl_unregister(in_dev);
215 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
216 	arp_ifdown(dev);
217 
218 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
219 }
220 
221 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
222 {
223 	rcu_read_lock();
224 	for_primary_ifa(in_dev) {
225 		if (inet_ifa_match(a, ifa)) {
226 			if (!b || inet_ifa_match(b, ifa)) {
227 				rcu_read_unlock();
228 				return 1;
229 			}
230 		}
231 	} endfor_ifa(in_dev);
232 	rcu_read_unlock();
233 	return 0;
234 }
235 
236 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
237 			 int destroy, struct nlmsghdr *nlh, u32 pid)
238 {
239 	struct in_ifaddr *promote = NULL;
240 	struct in_ifaddr *ifa, *ifa1 = *ifap;
241 	struct in_ifaddr *last_prim = in_dev->ifa_list;
242 	struct in_ifaddr *prev_prom = NULL;
243 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
244 
245 	ASSERT_RTNL();
246 
247 	/* 1. Deleting primary ifaddr forces deletion all secondaries
248 	 * unless alias promotion is set
249 	 **/
250 
251 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
252 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
253 
254 		while ((ifa = *ifap1) != NULL) {
255 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
256 			    ifa1->ifa_scope <= ifa->ifa_scope)
257 				last_prim = ifa;
258 
259 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
260 			    ifa1->ifa_mask != ifa->ifa_mask ||
261 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
262 				ifap1 = &ifa->ifa_next;
263 				prev_prom = ifa;
264 				continue;
265 			}
266 
267 			if (!do_promote) {
268 				*ifap1 = ifa->ifa_next;
269 
270 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
271 				blocking_notifier_call_chain(&inetaddr_chain,
272 						NETDEV_DOWN, ifa);
273 				inet_free_ifa(ifa);
274 			} else {
275 				promote = ifa;
276 				break;
277 			}
278 		}
279 	}
280 
281 	/* 2. Unlink it */
282 
283 	*ifap = ifa1->ifa_next;
284 
285 	/* 3. Announce address deletion */
286 
287 	/* Send message first, then call notifier.
288 	   At first sight, FIB update triggered by notifier
289 	   will refer to already deleted ifaddr, that could confuse
290 	   netlink listeners. It is not true: look, gated sees
291 	   that route deleted and if it still thinks that ifaddr
292 	   is valid, it will try to restore deleted routes... Grr.
293 	   So that, this order is correct.
294 	 */
295 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
296 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
297 
298 	if (promote) {
299 
300 		if (prev_prom) {
301 			prev_prom->ifa_next = promote->ifa_next;
302 			promote->ifa_next = last_prim->ifa_next;
303 			last_prim->ifa_next = promote;
304 		}
305 
306 		promote->ifa_flags &= ~IFA_F_SECONDARY;
307 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
308 		blocking_notifier_call_chain(&inetaddr_chain,
309 				NETDEV_UP, promote);
310 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
311 			if (ifa1->ifa_mask != ifa->ifa_mask ||
312 			    !inet_ifa_match(ifa1->ifa_address, ifa))
313 					continue;
314 			fib_add_ifaddr(ifa);
315 		}
316 
317 	}
318 	if (destroy)
319 		inet_free_ifa(ifa1);
320 }
321 
322 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
323 			 int destroy)
324 {
325 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
326 }
327 
328 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
329 			     u32 pid)
330 {
331 	struct in_device *in_dev = ifa->ifa_dev;
332 	struct in_ifaddr *ifa1, **ifap, **last_primary;
333 
334 	ASSERT_RTNL();
335 
336 	if (!ifa->ifa_local) {
337 		inet_free_ifa(ifa);
338 		return 0;
339 	}
340 
341 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
342 	last_primary = &in_dev->ifa_list;
343 
344 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
345 	     ifap = &ifa1->ifa_next) {
346 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
347 		    ifa->ifa_scope <= ifa1->ifa_scope)
348 			last_primary = &ifa1->ifa_next;
349 		if (ifa1->ifa_mask == ifa->ifa_mask &&
350 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
351 			if (ifa1->ifa_local == ifa->ifa_local) {
352 				inet_free_ifa(ifa);
353 				return -EEXIST;
354 			}
355 			if (ifa1->ifa_scope != ifa->ifa_scope) {
356 				inet_free_ifa(ifa);
357 				return -EINVAL;
358 			}
359 			ifa->ifa_flags |= IFA_F_SECONDARY;
360 		}
361 	}
362 
363 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
364 		net_srandom(ifa->ifa_local);
365 		ifap = last_primary;
366 	}
367 
368 	ifa->ifa_next = *ifap;
369 	*ifap = ifa;
370 
371 	/* Send message first, then call notifier.
372 	   Notifier will trigger FIB update, so that
373 	   listeners of netlink will know about new ifaddr */
374 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
375 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
376 
377 	return 0;
378 }
379 
380 static int inet_insert_ifa(struct in_ifaddr *ifa)
381 {
382 	return __inet_insert_ifa(ifa, NULL, 0);
383 }
384 
385 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
386 {
387 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
388 
389 	ASSERT_RTNL();
390 
391 	if (!in_dev) {
392 		inet_free_ifa(ifa);
393 		return -ENOBUFS;
394 	}
395 	ipv4_devconf_setall(in_dev);
396 	if (ifa->ifa_dev != in_dev) {
397 		WARN_ON(ifa->ifa_dev);
398 		in_dev_hold(in_dev);
399 		ifa->ifa_dev = in_dev;
400 	}
401 	if (ipv4_is_loopback(ifa->ifa_local))
402 		ifa->ifa_scope = RT_SCOPE_HOST;
403 	return inet_insert_ifa(ifa);
404 }
405 
406 /* Caller must hold RCU or RTNL :
407  * We dont take a reference on found in_device
408  */
409 struct in_device *inetdev_by_index(struct net *net, int ifindex)
410 {
411 	struct net_device *dev;
412 	struct in_device *in_dev = NULL;
413 
414 	rcu_read_lock();
415 	dev = dev_get_by_index_rcu(net, ifindex);
416 	if (dev)
417 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
418 	rcu_read_unlock();
419 	return in_dev;
420 }
421 EXPORT_SYMBOL(inetdev_by_index);
422 
423 /* Called only from RTNL semaphored context. No locks. */
424 
425 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426 				    __be32 mask)
427 {
428 	ASSERT_RTNL();
429 
430 	for_primary_ifa(in_dev) {
431 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432 			return ifa;
433 	} endfor_ifa(in_dev);
434 	return NULL;
435 }
436 
437 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438 {
439 	struct net *net = sock_net(skb->sk);
440 	struct nlattr *tb[IFA_MAX+1];
441 	struct in_device *in_dev;
442 	struct ifaddrmsg *ifm;
443 	struct in_ifaddr *ifa, **ifap;
444 	int err = -EINVAL;
445 
446 	ASSERT_RTNL();
447 
448 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449 	if (err < 0)
450 		goto errout;
451 
452 	ifm = nlmsg_data(nlh);
453 	in_dev = inetdev_by_index(net, ifm->ifa_index);
454 	if (in_dev == NULL) {
455 		err = -ENODEV;
456 		goto errout;
457 	}
458 
459 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
460 	     ifap = &ifa->ifa_next) {
461 		if (tb[IFA_LOCAL] &&
462 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
463 			continue;
464 
465 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
466 			continue;
467 
468 		if (tb[IFA_ADDRESS] &&
469 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
470 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
471 			continue;
472 
473 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
474 		return 0;
475 	}
476 
477 	err = -EADDRNOTAVAIL;
478 errout:
479 	return err;
480 }
481 
482 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
483 {
484 	struct nlattr *tb[IFA_MAX+1];
485 	struct in_ifaddr *ifa;
486 	struct ifaddrmsg *ifm;
487 	struct net_device *dev;
488 	struct in_device *in_dev;
489 	int err;
490 
491 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
492 	if (err < 0)
493 		goto errout;
494 
495 	ifm = nlmsg_data(nlh);
496 	err = -EINVAL;
497 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
498 		goto errout;
499 
500 	dev = __dev_get_by_index(net, ifm->ifa_index);
501 	err = -ENODEV;
502 	if (dev == NULL)
503 		goto errout;
504 
505 	in_dev = __in_dev_get_rtnl(dev);
506 	err = -ENOBUFS;
507 	if (in_dev == NULL)
508 		goto errout;
509 
510 	ifa = inet_alloc_ifa();
511 	if (ifa == NULL)
512 		/*
513 		 * A potential indev allocation can be left alive, it stays
514 		 * assigned to its device and is destroy with it.
515 		 */
516 		goto errout;
517 
518 	ipv4_devconf_setall(in_dev);
519 	in_dev_hold(in_dev);
520 
521 	if (tb[IFA_ADDRESS] == NULL)
522 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
523 
524 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
525 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
526 	ifa->ifa_flags = ifm->ifa_flags;
527 	ifa->ifa_scope = ifm->ifa_scope;
528 	ifa->ifa_dev = in_dev;
529 
530 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
531 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
532 
533 	if (tb[IFA_BROADCAST])
534 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
535 
536 	if (tb[IFA_LABEL])
537 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
538 	else
539 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
540 
541 	return ifa;
542 
543 errout:
544 	return ERR_PTR(err);
545 }
546 
547 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
548 {
549 	struct net *net = sock_net(skb->sk);
550 	struct in_ifaddr *ifa;
551 
552 	ASSERT_RTNL();
553 
554 	ifa = rtm_to_ifaddr(net, nlh);
555 	if (IS_ERR(ifa))
556 		return PTR_ERR(ifa);
557 
558 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
559 }
560 
561 /*
562  *	Determine a default network mask, based on the IP address.
563  */
564 
565 static inline int inet_abc_len(__be32 addr)
566 {
567 	int rc = -1;	/* Something else, probably a multicast. */
568 
569 	if (ipv4_is_zeronet(addr))
570 		rc = 0;
571 	else {
572 		__u32 haddr = ntohl(addr);
573 
574 		if (IN_CLASSA(haddr))
575 			rc = 8;
576 		else if (IN_CLASSB(haddr))
577 			rc = 16;
578 		else if (IN_CLASSC(haddr))
579 			rc = 24;
580 	}
581 
582 	return rc;
583 }
584 
585 
586 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
587 {
588 	struct ifreq ifr;
589 	struct sockaddr_in sin_orig;
590 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
591 	struct in_device *in_dev;
592 	struct in_ifaddr **ifap = NULL;
593 	struct in_ifaddr *ifa = NULL;
594 	struct net_device *dev;
595 	char *colon;
596 	int ret = -EFAULT;
597 	int tryaddrmatch = 0;
598 
599 	/*
600 	 *	Fetch the caller's info block into kernel space
601 	 */
602 
603 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
604 		goto out;
605 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
606 
607 	/* save original address for comparison */
608 	memcpy(&sin_orig, sin, sizeof(*sin));
609 
610 	colon = strchr(ifr.ifr_name, ':');
611 	if (colon)
612 		*colon = 0;
613 
614 	dev_load(net, ifr.ifr_name);
615 
616 	switch (cmd) {
617 	case SIOCGIFADDR:	/* Get interface address */
618 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
619 	case SIOCGIFDSTADDR:	/* Get the destination address */
620 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
621 		/* Note that these ioctls will not sleep,
622 		   so that we do not impose a lock.
623 		   One day we will be forced to put shlock here (I mean SMP)
624 		 */
625 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
626 		memset(sin, 0, sizeof(*sin));
627 		sin->sin_family = AF_INET;
628 		break;
629 
630 	case SIOCSIFFLAGS:
631 		ret = -EACCES;
632 		if (!capable(CAP_NET_ADMIN))
633 			goto out;
634 		break;
635 	case SIOCSIFADDR:	/* Set interface address (and family) */
636 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
637 	case SIOCSIFDSTADDR:	/* Set the destination address */
638 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
639 		ret = -EACCES;
640 		if (!capable(CAP_NET_ADMIN))
641 			goto out;
642 		ret = -EINVAL;
643 		if (sin->sin_family != AF_INET)
644 			goto out;
645 		break;
646 	default:
647 		ret = -EINVAL;
648 		goto out;
649 	}
650 
651 	rtnl_lock();
652 
653 	ret = -ENODEV;
654 	dev = __dev_get_by_name(net, ifr.ifr_name);
655 	if (!dev)
656 		goto done;
657 
658 	if (colon)
659 		*colon = ':';
660 
661 	in_dev = __in_dev_get_rtnl(dev);
662 	if (in_dev) {
663 		if (tryaddrmatch) {
664 			/* Matthias Andree */
665 			/* compare label and address (4.4BSD style) */
666 			/* note: we only do this for a limited set of ioctls
667 			   and only if the original address family was AF_INET.
668 			   This is checked above. */
669 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
670 			     ifap = &ifa->ifa_next) {
671 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672 				    sin_orig.sin_addr.s_addr ==
673 							ifa->ifa_address) {
674 					break; /* found */
675 				}
676 			}
677 		}
678 		/* we didn't get a match, maybe the application is
679 		   4.3BSD-style and passed in junk so we fall back to
680 		   comparing just the label */
681 		if (!ifa) {
682 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
683 			     ifap = &ifa->ifa_next)
684 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
685 					break;
686 		}
687 	}
688 
689 	ret = -EADDRNOTAVAIL;
690 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
691 		goto done;
692 
693 	switch (cmd) {
694 	case SIOCGIFADDR:	/* Get interface address */
695 		sin->sin_addr.s_addr = ifa->ifa_local;
696 		goto rarok;
697 
698 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
699 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
700 		goto rarok;
701 
702 	case SIOCGIFDSTADDR:	/* Get the destination address */
703 		sin->sin_addr.s_addr = ifa->ifa_address;
704 		goto rarok;
705 
706 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
707 		sin->sin_addr.s_addr = ifa->ifa_mask;
708 		goto rarok;
709 
710 	case SIOCSIFFLAGS:
711 		if (colon) {
712 			ret = -EADDRNOTAVAIL;
713 			if (!ifa)
714 				break;
715 			ret = 0;
716 			if (!(ifr.ifr_flags & IFF_UP))
717 				inet_del_ifa(in_dev, ifap, 1);
718 			break;
719 		}
720 		ret = dev_change_flags(dev, ifr.ifr_flags);
721 		break;
722 
723 	case SIOCSIFADDR:	/* Set interface address (and family) */
724 		ret = -EINVAL;
725 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
726 			break;
727 
728 		if (!ifa) {
729 			ret = -ENOBUFS;
730 			ifa = inet_alloc_ifa();
731 			if (!ifa)
732 				break;
733 			if (colon)
734 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
735 			else
736 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
737 		} else {
738 			ret = 0;
739 			if (ifa->ifa_local == sin->sin_addr.s_addr)
740 				break;
741 			inet_del_ifa(in_dev, ifap, 0);
742 			ifa->ifa_broadcast = 0;
743 			ifa->ifa_scope = 0;
744 		}
745 
746 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
747 
748 		if (!(dev->flags & IFF_POINTOPOINT)) {
749 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
750 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
751 			if ((dev->flags & IFF_BROADCAST) &&
752 			    ifa->ifa_prefixlen < 31)
753 				ifa->ifa_broadcast = ifa->ifa_address |
754 						     ~ifa->ifa_mask;
755 		} else {
756 			ifa->ifa_prefixlen = 32;
757 			ifa->ifa_mask = inet_make_mask(32);
758 		}
759 		ret = inet_set_ifa(dev, ifa);
760 		break;
761 
762 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
763 		ret = 0;
764 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
765 			inet_del_ifa(in_dev, ifap, 0);
766 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
767 			inet_insert_ifa(ifa);
768 		}
769 		break;
770 
771 	case SIOCSIFDSTADDR:	/* Set the destination address */
772 		ret = 0;
773 		if (ifa->ifa_address == sin->sin_addr.s_addr)
774 			break;
775 		ret = -EINVAL;
776 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
777 			break;
778 		ret = 0;
779 		inet_del_ifa(in_dev, ifap, 0);
780 		ifa->ifa_address = sin->sin_addr.s_addr;
781 		inet_insert_ifa(ifa);
782 		break;
783 
784 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
785 
786 		/*
787 		 *	The mask we set must be legal.
788 		 */
789 		ret = -EINVAL;
790 		if (bad_mask(sin->sin_addr.s_addr, 0))
791 			break;
792 		ret = 0;
793 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
794 			__be32 old_mask = ifa->ifa_mask;
795 			inet_del_ifa(in_dev, ifap, 0);
796 			ifa->ifa_mask = sin->sin_addr.s_addr;
797 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
798 
799 			/* See if current broadcast address matches
800 			 * with current netmask, then recalculate
801 			 * the broadcast address. Otherwise it's a
802 			 * funny address, so don't touch it since
803 			 * the user seems to know what (s)he's doing...
804 			 */
805 			if ((dev->flags & IFF_BROADCAST) &&
806 			    (ifa->ifa_prefixlen < 31) &&
807 			    (ifa->ifa_broadcast ==
808 			     (ifa->ifa_local|~old_mask))) {
809 				ifa->ifa_broadcast = (ifa->ifa_local |
810 						      ~sin->sin_addr.s_addr);
811 			}
812 			inet_insert_ifa(ifa);
813 		}
814 		break;
815 	}
816 done:
817 	rtnl_unlock();
818 out:
819 	return ret;
820 rarok:
821 	rtnl_unlock();
822 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
823 	goto out;
824 }
825 
826 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
827 {
828 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
829 	struct in_ifaddr *ifa;
830 	struct ifreq ifr;
831 	int done = 0;
832 
833 	if (!in_dev)
834 		goto out;
835 
836 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
837 		if (!buf) {
838 			done += sizeof(ifr);
839 			continue;
840 		}
841 		if (len < (int) sizeof(ifr))
842 			break;
843 		memset(&ifr, 0, sizeof(struct ifreq));
844 		if (ifa->ifa_label)
845 			strcpy(ifr.ifr_name, ifa->ifa_label);
846 		else
847 			strcpy(ifr.ifr_name, dev->name);
848 
849 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
850 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
851 								ifa->ifa_local;
852 
853 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
854 			done = -EFAULT;
855 			break;
856 		}
857 		buf  += sizeof(struct ifreq);
858 		len  -= sizeof(struct ifreq);
859 		done += sizeof(struct ifreq);
860 	}
861 out:
862 	return done;
863 }
864 
865 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
866 {
867 	__be32 addr = 0;
868 	struct in_device *in_dev;
869 	struct net *net = dev_net(dev);
870 
871 	rcu_read_lock();
872 	in_dev = __in_dev_get_rcu(dev);
873 	if (!in_dev)
874 		goto no_in_dev;
875 
876 	for_primary_ifa(in_dev) {
877 		if (ifa->ifa_scope > scope)
878 			continue;
879 		if (!dst || inet_ifa_match(dst, ifa)) {
880 			addr = ifa->ifa_local;
881 			break;
882 		}
883 		if (!addr)
884 			addr = ifa->ifa_local;
885 	} endfor_ifa(in_dev);
886 
887 	if (addr)
888 		goto out_unlock;
889 no_in_dev:
890 
891 	/* Not loopback addresses on loopback should be preferred
892 	   in this case. It is importnat that lo is the first interface
893 	   in dev_base list.
894 	 */
895 	for_each_netdev_rcu(net, dev) {
896 		in_dev = __in_dev_get_rcu(dev);
897 		if (!in_dev)
898 			continue;
899 
900 		for_primary_ifa(in_dev) {
901 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
902 			    ifa->ifa_scope <= scope) {
903 				addr = ifa->ifa_local;
904 				goto out_unlock;
905 			}
906 		} endfor_ifa(in_dev);
907 	}
908 out_unlock:
909 	rcu_read_unlock();
910 	return addr;
911 }
912 EXPORT_SYMBOL(inet_select_addr);
913 
914 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
915 			      __be32 local, int scope)
916 {
917 	int same = 0;
918 	__be32 addr = 0;
919 
920 	for_ifa(in_dev) {
921 		if (!addr &&
922 		    (local == ifa->ifa_local || !local) &&
923 		    ifa->ifa_scope <= scope) {
924 			addr = ifa->ifa_local;
925 			if (same)
926 				break;
927 		}
928 		if (!same) {
929 			same = (!local || inet_ifa_match(local, ifa)) &&
930 				(!dst || inet_ifa_match(dst, ifa));
931 			if (same && addr) {
932 				if (local || !dst)
933 					break;
934 				/* Is the selected addr into dst subnet? */
935 				if (inet_ifa_match(addr, ifa))
936 					break;
937 				/* No, then can we use new local src? */
938 				if (ifa->ifa_scope <= scope) {
939 					addr = ifa->ifa_local;
940 					break;
941 				}
942 				/* search for large dst subnet for addr */
943 				same = 0;
944 			}
945 		}
946 	} endfor_ifa(in_dev);
947 
948 	return same ? addr : 0;
949 }
950 
951 /*
952  * Confirm that local IP address exists using wildcards:
953  * - in_dev: only on this interface, 0=any interface
954  * - dst: only in the same subnet as dst, 0=any dst
955  * - local: address, 0=autoselect the local address
956  * - scope: maximum allowed scope value for the local address
957  */
958 __be32 inet_confirm_addr(struct in_device *in_dev,
959 			 __be32 dst, __be32 local, int scope)
960 {
961 	__be32 addr = 0;
962 	struct net_device *dev;
963 	struct net *net;
964 
965 	if (scope != RT_SCOPE_LINK)
966 		return confirm_addr_indev(in_dev, dst, local, scope);
967 
968 	net = dev_net(in_dev->dev);
969 	rcu_read_lock();
970 	for_each_netdev_rcu(net, dev) {
971 		in_dev = __in_dev_get_rcu(dev);
972 		if (in_dev) {
973 			addr = confirm_addr_indev(in_dev, dst, local, scope);
974 			if (addr)
975 				break;
976 		}
977 	}
978 	rcu_read_unlock();
979 
980 	return addr;
981 }
982 
983 /*
984  *	Device notifier
985  */
986 
987 int register_inetaddr_notifier(struct notifier_block *nb)
988 {
989 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
990 }
991 EXPORT_SYMBOL(register_inetaddr_notifier);
992 
993 int unregister_inetaddr_notifier(struct notifier_block *nb)
994 {
995 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
996 }
997 EXPORT_SYMBOL(unregister_inetaddr_notifier);
998 
999 /* Rename ifa_labels for a device name change. Make some effort to preserve
1000  * existing alias numbering and to create unique labels if possible.
1001 */
1002 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1003 {
1004 	struct in_ifaddr *ifa;
1005 	int named = 0;
1006 
1007 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1008 		char old[IFNAMSIZ], *dot;
1009 
1010 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1011 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1012 		if (named++ == 0)
1013 			goto skip;
1014 		dot = strchr(old, ':');
1015 		if (dot == NULL) {
1016 			sprintf(old, ":%d", named);
1017 			dot = old;
1018 		}
1019 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1020 			strcat(ifa->ifa_label, dot);
1021 		else
1022 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1023 skip:
1024 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1025 	}
1026 }
1027 
1028 static inline bool inetdev_valid_mtu(unsigned mtu)
1029 {
1030 	return mtu >= 68;
1031 }
1032 
1033 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1034 					struct in_device *in_dev)
1035 
1036 {
1037 	struct in_ifaddr *ifa = in_dev->ifa_list;
1038 
1039 	if (!ifa)
1040 		return;
1041 
1042 	arp_send(ARPOP_REQUEST, ETH_P_ARP,
1043 		 ifa->ifa_address, dev,
1044 		 ifa->ifa_address, NULL,
1045 		 dev->dev_addr, NULL);
1046 }
1047 
1048 /* Called only under RTNL semaphore */
1049 
1050 static int inetdev_event(struct notifier_block *this, unsigned long event,
1051 			 void *ptr)
1052 {
1053 	struct net_device *dev = ptr;
1054 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1055 
1056 	ASSERT_RTNL();
1057 
1058 	if (!in_dev) {
1059 		if (event == NETDEV_REGISTER) {
1060 			in_dev = inetdev_init(dev);
1061 			if (!in_dev)
1062 				return notifier_from_errno(-ENOMEM);
1063 			if (dev->flags & IFF_LOOPBACK) {
1064 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1065 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1066 			}
1067 		} else if (event == NETDEV_CHANGEMTU) {
1068 			/* Re-enabling IP */
1069 			if (inetdev_valid_mtu(dev->mtu))
1070 				in_dev = inetdev_init(dev);
1071 		}
1072 		goto out;
1073 	}
1074 
1075 	switch (event) {
1076 	case NETDEV_REGISTER:
1077 		printk(KERN_DEBUG "inetdev_event: bug\n");
1078 		rcu_assign_pointer(dev->ip_ptr, NULL);
1079 		break;
1080 	case NETDEV_UP:
1081 		if (!inetdev_valid_mtu(dev->mtu))
1082 			break;
1083 		if (dev->flags & IFF_LOOPBACK) {
1084 			struct in_ifaddr *ifa = inet_alloc_ifa();
1085 
1086 			if (ifa) {
1087 				ifa->ifa_local =
1088 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1089 				ifa->ifa_prefixlen = 8;
1090 				ifa->ifa_mask = inet_make_mask(8);
1091 				in_dev_hold(in_dev);
1092 				ifa->ifa_dev = in_dev;
1093 				ifa->ifa_scope = RT_SCOPE_HOST;
1094 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1095 				inet_insert_ifa(ifa);
1096 			}
1097 		}
1098 		ip_mc_up(in_dev);
1099 		/* fall through */
1100 	case NETDEV_CHANGEADDR:
1101 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1102 			break;
1103 		/* fall through */
1104 	case NETDEV_NOTIFY_PEERS:
1105 		/* Send gratuitous ARP to notify of link change */
1106 		inetdev_send_gratuitous_arp(dev, in_dev);
1107 		break;
1108 	case NETDEV_DOWN:
1109 		ip_mc_down(in_dev);
1110 		break;
1111 	case NETDEV_PRE_TYPE_CHANGE:
1112 		ip_mc_unmap(in_dev);
1113 		break;
1114 	case NETDEV_POST_TYPE_CHANGE:
1115 		ip_mc_remap(in_dev);
1116 		break;
1117 	case NETDEV_CHANGEMTU:
1118 		if (inetdev_valid_mtu(dev->mtu))
1119 			break;
1120 		/* disable IP when MTU is not enough */
1121 	case NETDEV_UNREGISTER:
1122 		inetdev_destroy(in_dev);
1123 		break;
1124 	case NETDEV_CHANGENAME:
1125 		/* Do not notify about label change, this event is
1126 		 * not interesting to applications using netlink.
1127 		 */
1128 		inetdev_changename(dev, in_dev);
1129 
1130 		devinet_sysctl_unregister(in_dev);
1131 		devinet_sysctl_register(in_dev);
1132 		break;
1133 	}
1134 out:
1135 	return NOTIFY_DONE;
1136 }
1137 
1138 static struct notifier_block ip_netdev_notifier = {
1139 	.notifier_call = inetdev_event,
1140 };
1141 
1142 static inline size_t inet_nlmsg_size(void)
1143 {
1144 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1145 	       + nla_total_size(4) /* IFA_ADDRESS */
1146 	       + nla_total_size(4) /* IFA_LOCAL */
1147 	       + nla_total_size(4) /* IFA_BROADCAST */
1148 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1149 }
1150 
1151 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1152 			    u32 pid, u32 seq, int event, unsigned int flags)
1153 {
1154 	struct ifaddrmsg *ifm;
1155 	struct nlmsghdr  *nlh;
1156 
1157 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1158 	if (nlh == NULL)
1159 		return -EMSGSIZE;
1160 
1161 	ifm = nlmsg_data(nlh);
1162 	ifm->ifa_family = AF_INET;
1163 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1164 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1165 	ifm->ifa_scope = ifa->ifa_scope;
1166 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1167 
1168 	if (ifa->ifa_address)
1169 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1170 
1171 	if (ifa->ifa_local)
1172 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1173 
1174 	if (ifa->ifa_broadcast)
1175 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1176 
1177 	if (ifa->ifa_label[0])
1178 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1179 
1180 	return nlmsg_end(skb, nlh);
1181 
1182 nla_put_failure:
1183 	nlmsg_cancel(skb, nlh);
1184 	return -EMSGSIZE;
1185 }
1186 
1187 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1188 {
1189 	struct net *net = sock_net(skb->sk);
1190 	int h, s_h;
1191 	int idx, s_idx;
1192 	int ip_idx, s_ip_idx;
1193 	struct net_device *dev;
1194 	struct in_device *in_dev;
1195 	struct in_ifaddr *ifa;
1196 	struct hlist_head *head;
1197 	struct hlist_node *node;
1198 
1199 	s_h = cb->args[0];
1200 	s_idx = idx = cb->args[1];
1201 	s_ip_idx = ip_idx = cb->args[2];
1202 
1203 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1204 		idx = 0;
1205 		head = &net->dev_index_head[h];
1206 		rcu_read_lock();
1207 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1208 			if (idx < s_idx)
1209 				goto cont;
1210 			if (h > s_h || idx > s_idx)
1211 				s_ip_idx = 0;
1212 			in_dev = __in_dev_get_rcu(dev);
1213 			if (!in_dev)
1214 				goto cont;
1215 
1216 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1217 			     ifa = ifa->ifa_next, ip_idx++) {
1218 				if (ip_idx < s_ip_idx)
1219 					continue;
1220 				if (inet_fill_ifaddr(skb, ifa,
1221 					     NETLINK_CB(cb->skb).pid,
1222 					     cb->nlh->nlmsg_seq,
1223 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1224 					rcu_read_unlock();
1225 					goto done;
1226 				}
1227 			}
1228 cont:
1229 			idx++;
1230 		}
1231 		rcu_read_unlock();
1232 	}
1233 
1234 done:
1235 	cb->args[0] = h;
1236 	cb->args[1] = idx;
1237 	cb->args[2] = ip_idx;
1238 
1239 	return skb->len;
1240 }
1241 
1242 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1243 		      u32 pid)
1244 {
1245 	struct sk_buff *skb;
1246 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1247 	int err = -ENOBUFS;
1248 	struct net *net;
1249 
1250 	net = dev_net(ifa->ifa_dev->dev);
1251 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1252 	if (skb == NULL)
1253 		goto errout;
1254 
1255 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1256 	if (err < 0) {
1257 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1258 		WARN_ON(err == -EMSGSIZE);
1259 		kfree_skb(skb);
1260 		goto errout;
1261 	}
1262 	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1263 	return;
1264 errout:
1265 	if (err < 0)
1266 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1267 }
1268 
1269 static size_t inet_get_link_af_size(const struct net_device *dev)
1270 {
1271 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1272 
1273 	if (!in_dev)
1274 		return 0;
1275 
1276 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1277 }
1278 
1279 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1280 {
1281 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1282 	struct nlattr *nla;
1283 	int i;
1284 
1285 	if (!in_dev)
1286 		return -ENODATA;
1287 
1288 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1289 	if (nla == NULL)
1290 		return -EMSGSIZE;
1291 
1292 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1293 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1294 
1295 	return 0;
1296 }
1297 
1298 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1299 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1300 };
1301 
1302 static int inet_validate_link_af(const struct net_device *dev,
1303 				 const struct nlattr *nla)
1304 {
1305 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1306 	int err, rem;
1307 
1308 	if (dev && !__in_dev_get_rtnl(dev))
1309 		return -EAFNOSUPPORT;
1310 
1311 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1312 	if (err < 0)
1313 		return err;
1314 
1315 	if (tb[IFLA_INET_CONF]) {
1316 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1317 			int cfgid = nla_type(a);
1318 
1319 			if (nla_len(a) < 4)
1320 				return -EINVAL;
1321 
1322 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1323 				return -EINVAL;
1324 		}
1325 	}
1326 
1327 	return 0;
1328 }
1329 
1330 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1331 {
1332 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1333 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1334 	int rem;
1335 
1336 	if (!in_dev)
1337 		return -EAFNOSUPPORT;
1338 
1339 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1340 		BUG();
1341 
1342 	if (tb[IFLA_INET_CONF]) {
1343 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1344 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1345 	}
1346 
1347 	return 0;
1348 }
1349 
1350 #ifdef CONFIG_SYSCTL
1351 
1352 static void devinet_copy_dflt_conf(struct net *net, int i)
1353 {
1354 	struct net_device *dev;
1355 
1356 	rcu_read_lock();
1357 	for_each_netdev_rcu(net, dev) {
1358 		struct in_device *in_dev;
1359 
1360 		in_dev = __in_dev_get_rcu(dev);
1361 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1362 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1363 	}
1364 	rcu_read_unlock();
1365 }
1366 
1367 /* called with RTNL locked */
1368 static void inet_forward_change(struct net *net)
1369 {
1370 	struct net_device *dev;
1371 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1372 
1373 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1374 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1375 
1376 	for_each_netdev(net, dev) {
1377 		struct in_device *in_dev;
1378 		if (on)
1379 			dev_disable_lro(dev);
1380 		rcu_read_lock();
1381 		in_dev = __in_dev_get_rcu(dev);
1382 		if (in_dev)
1383 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1384 		rcu_read_unlock();
1385 	}
1386 }
1387 
1388 static int devinet_conf_proc(ctl_table *ctl, int write,
1389 			     void __user *buffer,
1390 			     size_t *lenp, loff_t *ppos)
1391 {
1392 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1393 
1394 	if (write) {
1395 		struct ipv4_devconf *cnf = ctl->extra1;
1396 		struct net *net = ctl->extra2;
1397 		int i = (int *)ctl->data - cnf->data;
1398 
1399 		set_bit(i, cnf->state);
1400 
1401 		if (cnf == net->ipv4.devconf_dflt)
1402 			devinet_copy_dflt_conf(net, i);
1403 	}
1404 
1405 	return ret;
1406 }
1407 
1408 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1409 				  void __user *buffer,
1410 				  size_t *lenp, loff_t *ppos)
1411 {
1412 	int *valp = ctl->data;
1413 	int val = *valp;
1414 	loff_t pos = *ppos;
1415 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1416 
1417 	if (write && *valp != val) {
1418 		struct net *net = ctl->extra2;
1419 
1420 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1421 			if (!rtnl_trylock()) {
1422 				/* Restore the original values before restarting */
1423 				*valp = val;
1424 				*ppos = pos;
1425 				return restart_syscall();
1426 			}
1427 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1428 				inet_forward_change(net);
1429 			} else if (*valp) {
1430 				struct ipv4_devconf *cnf = ctl->extra1;
1431 				struct in_device *idev =
1432 					container_of(cnf, struct in_device, cnf);
1433 				dev_disable_lro(idev->dev);
1434 			}
1435 			rtnl_unlock();
1436 			rt_cache_flush(net, 0);
1437 		}
1438 	}
1439 
1440 	return ret;
1441 }
1442 
1443 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1444 				void __user *buffer,
1445 				size_t *lenp, loff_t *ppos)
1446 {
1447 	int *valp = ctl->data;
1448 	int val = *valp;
1449 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1450 	struct net *net = ctl->extra2;
1451 
1452 	if (write && *valp != val)
1453 		rt_cache_flush(net, 0);
1454 
1455 	return ret;
1456 }
1457 
1458 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1459 	{ \
1460 		.procname	= name, \
1461 		.data		= ipv4_devconf.data + \
1462 				  IPV4_DEVCONF_ ## attr - 1, \
1463 		.maxlen		= sizeof(int), \
1464 		.mode		= mval, \
1465 		.proc_handler	= proc, \
1466 		.extra1		= &ipv4_devconf, \
1467 	}
1468 
1469 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1470 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1471 
1472 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1473 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1474 
1475 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1476 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1477 
1478 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1479 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1480 
1481 static struct devinet_sysctl_table {
1482 	struct ctl_table_header *sysctl_header;
1483 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1484 	char *dev_name;
1485 } devinet_sysctl = {
1486 	.devinet_vars = {
1487 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1488 					     devinet_sysctl_forward),
1489 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1490 
1491 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1492 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1493 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1494 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1495 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1496 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1497 					"accept_source_route"),
1498 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1499 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1500 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1501 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1502 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1503 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1504 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1505 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1506 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1507 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1508 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1509 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1510 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1511 
1512 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1513 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1514 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1515 					      "force_igmp_version"),
1516 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1517 					      "promote_secondaries"),
1518 	},
1519 };
1520 
1521 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1522 					struct ipv4_devconf *p)
1523 {
1524 	int i;
1525 	struct devinet_sysctl_table *t;
1526 
1527 #define DEVINET_CTL_PATH_DEV	3
1528 
1529 	struct ctl_path devinet_ctl_path[] = {
1530 		{ .procname = "net",  },
1531 		{ .procname = "ipv4", },
1532 		{ .procname = "conf", },
1533 		{ /* to be set */ },
1534 		{ },
1535 	};
1536 
1537 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1538 	if (!t)
1539 		goto out;
1540 
1541 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1542 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1543 		t->devinet_vars[i].extra1 = p;
1544 		t->devinet_vars[i].extra2 = net;
1545 	}
1546 
1547 	/*
1548 	 * Make a copy of dev_name, because '.procname' is regarded as const
1549 	 * by sysctl and we wouldn't want anyone to change it under our feet
1550 	 * (see SIOCSIFNAME).
1551 	 */
1552 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1553 	if (!t->dev_name)
1554 		goto free;
1555 
1556 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1557 
1558 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1559 			t->devinet_vars);
1560 	if (!t->sysctl_header)
1561 		goto free_procname;
1562 
1563 	p->sysctl = t;
1564 	return 0;
1565 
1566 free_procname:
1567 	kfree(t->dev_name);
1568 free:
1569 	kfree(t);
1570 out:
1571 	return -ENOBUFS;
1572 }
1573 
1574 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1575 {
1576 	struct devinet_sysctl_table *t = cnf->sysctl;
1577 
1578 	if (t == NULL)
1579 		return;
1580 
1581 	cnf->sysctl = NULL;
1582 	unregister_sysctl_table(t->sysctl_header);
1583 	kfree(t->dev_name);
1584 	kfree(t);
1585 }
1586 
1587 static void devinet_sysctl_register(struct in_device *idev)
1588 {
1589 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1590 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1591 					&idev->cnf);
1592 }
1593 
1594 static void devinet_sysctl_unregister(struct in_device *idev)
1595 {
1596 	__devinet_sysctl_unregister(&idev->cnf);
1597 	neigh_sysctl_unregister(idev->arp_parms);
1598 }
1599 
1600 static struct ctl_table ctl_forward_entry[] = {
1601 	{
1602 		.procname	= "ip_forward",
1603 		.data		= &ipv4_devconf.data[
1604 					IPV4_DEVCONF_FORWARDING - 1],
1605 		.maxlen		= sizeof(int),
1606 		.mode		= 0644,
1607 		.proc_handler	= devinet_sysctl_forward,
1608 		.extra1		= &ipv4_devconf,
1609 		.extra2		= &init_net,
1610 	},
1611 	{ },
1612 };
1613 
1614 static __net_initdata struct ctl_path net_ipv4_path[] = {
1615 	{ .procname = "net", },
1616 	{ .procname = "ipv4", },
1617 	{ },
1618 };
1619 #endif
1620 
1621 static __net_init int devinet_init_net(struct net *net)
1622 {
1623 	int err;
1624 	struct ipv4_devconf *all, *dflt;
1625 #ifdef CONFIG_SYSCTL
1626 	struct ctl_table *tbl = ctl_forward_entry;
1627 	struct ctl_table_header *forw_hdr;
1628 #endif
1629 
1630 	err = -ENOMEM;
1631 	all = &ipv4_devconf;
1632 	dflt = &ipv4_devconf_dflt;
1633 
1634 	if (!net_eq(net, &init_net)) {
1635 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1636 		if (all == NULL)
1637 			goto err_alloc_all;
1638 
1639 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1640 		if (dflt == NULL)
1641 			goto err_alloc_dflt;
1642 
1643 #ifdef CONFIG_SYSCTL
1644 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1645 		if (tbl == NULL)
1646 			goto err_alloc_ctl;
1647 
1648 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1649 		tbl[0].extra1 = all;
1650 		tbl[0].extra2 = net;
1651 #endif
1652 	}
1653 
1654 #ifdef CONFIG_SYSCTL
1655 	err = __devinet_sysctl_register(net, "all", all);
1656 	if (err < 0)
1657 		goto err_reg_all;
1658 
1659 	err = __devinet_sysctl_register(net, "default", dflt);
1660 	if (err < 0)
1661 		goto err_reg_dflt;
1662 
1663 	err = -ENOMEM;
1664 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1665 	if (forw_hdr == NULL)
1666 		goto err_reg_ctl;
1667 	net->ipv4.forw_hdr = forw_hdr;
1668 #endif
1669 
1670 	net->ipv4.devconf_all = all;
1671 	net->ipv4.devconf_dflt = dflt;
1672 	return 0;
1673 
1674 #ifdef CONFIG_SYSCTL
1675 err_reg_ctl:
1676 	__devinet_sysctl_unregister(dflt);
1677 err_reg_dflt:
1678 	__devinet_sysctl_unregister(all);
1679 err_reg_all:
1680 	if (tbl != ctl_forward_entry)
1681 		kfree(tbl);
1682 err_alloc_ctl:
1683 #endif
1684 	if (dflt != &ipv4_devconf_dflt)
1685 		kfree(dflt);
1686 err_alloc_dflt:
1687 	if (all != &ipv4_devconf)
1688 		kfree(all);
1689 err_alloc_all:
1690 	return err;
1691 }
1692 
1693 static __net_exit void devinet_exit_net(struct net *net)
1694 {
1695 #ifdef CONFIG_SYSCTL
1696 	struct ctl_table *tbl;
1697 
1698 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1699 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1700 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1701 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1702 	kfree(tbl);
1703 #endif
1704 	kfree(net->ipv4.devconf_dflt);
1705 	kfree(net->ipv4.devconf_all);
1706 }
1707 
1708 static __net_initdata struct pernet_operations devinet_ops = {
1709 	.init = devinet_init_net,
1710 	.exit = devinet_exit_net,
1711 };
1712 
1713 static struct rtnl_af_ops inet_af_ops = {
1714 	.family		  = AF_INET,
1715 	.fill_link_af	  = inet_fill_link_af,
1716 	.get_link_af_size = inet_get_link_af_size,
1717 	.validate_link_af = inet_validate_link_af,
1718 	.set_link_af	  = inet_set_link_af,
1719 };
1720 
1721 void __init devinet_init(void)
1722 {
1723 	register_pernet_subsys(&devinet_ops);
1724 
1725 	register_gifconf(PF_INET, inet_gifconf);
1726 	register_netdevice_notifier(&ip_netdev_notifier);
1727 
1728 	rtnl_af_register(&inet_af_ops);
1729 
1730 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1731 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1732 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1733 }
1734 
1735