xref: /openbmc/linux/net/ipv4/devinet.c (revision 6ecc07b9)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 
67 #include "fib_lookup.h"
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 	},
76 };
77 
78 static struct ipv4_devconf ipv4_devconf_dflt = {
79 	.data = {
80 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
81 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
82 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
84 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85 	},
86 };
87 
88 #define IPV4_DEVCONF_DFLT(net, attr) \
89 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90 
91 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92 	[IFA_LOCAL]     	= { .type = NLA_U32 },
93 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
94 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
95 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96 };
97 
98 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
99  * value.  So if you change this define, make appropriate changes to
100  * inet_addr_hash as well.
101  */
102 #define IN4_ADDR_HSIZE	256
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105 
106 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
107 {
108 	u32 val = (__force u32) addr ^ hash_ptr(net, 8);
109 
110 	return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
111 		(IN4_ADDR_HSIZE - 1));
112 }
113 
114 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
115 {
116 	unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
117 
118 	spin_lock(&inet_addr_hash_lock);
119 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
120 	spin_unlock(&inet_addr_hash_lock);
121 }
122 
123 static void inet_hash_remove(struct in_ifaddr *ifa)
124 {
125 	spin_lock(&inet_addr_hash_lock);
126 	hlist_del_init_rcu(&ifa->hash);
127 	spin_unlock(&inet_addr_hash_lock);
128 }
129 
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140 	unsigned int hash = inet_addr_hash(net, addr);
141 	struct net_device *result = NULL;
142 	struct in_ifaddr *ifa;
143 	struct hlist_node *node;
144 
145 	rcu_read_lock();
146 	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
147 		struct net_device *dev = ifa->ifa_dev->dev;
148 
149 		if (!net_eq(dev_net(dev), net))
150 			continue;
151 		if (ifa->ifa_local == addr) {
152 			result = dev;
153 			break;
154 		}
155 	}
156 	if (!result) {
157 		struct flowi4 fl4 = { .daddr = addr };
158 		struct fib_result res = { 0 };
159 		struct fib_table *local;
160 
161 		/* Fallback to FIB local table so that communication
162 		 * over loopback subnets work.
163 		 */
164 		local = fib_get_table(net, RT_TABLE_LOCAL);
165 		if (local &&
166 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 		    res.type == RTN_LOCAL)
168 			result = FIB_RES_DEV(res);
169 	}
170 	if (result && devref)
171 		dev_hold(result);
172 	rcu_read_unlock();
173 	return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176 
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178 
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181 			 int destroy);
182 #ifdef CONFIG_SYSCTL
183 static void devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static inline void devinet_sysctl_register(struct in_device *idev)
187 {
188 }
189 static inline void devinet_sysctl_unregister(struct in_device *idev)
190 {
191 }
192 #endif
193 
194 /* Locks all the inet devices. */
195 
196 static struct in_ifaddr *inet_alloc_ifa(void)
197 {
198 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199 }
200 
201 static void inet_rcu_free_ifa(struct rcu_head *head)
202 {
203 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204 	if (ifa->ifa_dev)
205 		in_dev_put(ifa->ifa_dev);
206 	kfree(ifa);
207 }
208 
209 static inline void inet_free_ifa(struct in_ifaddr *ifa)
210 {
211 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212 }
213 
214 void in_dev_finish_destroy(struct in_device *idev)
215 {
216 	struct net_device *dev = idev->dev;
217 
218 	WARN_ON(idev->ifa_list);
219 	WARN_ON(idev->mc_list);
220 #ifdef NET_REFCNT_DEBUG
221 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
222 	       idev, dev ? dev->name : "NIL");
223 #endif
224 	dev_put(dev);
225 	if (!idev->dead)
226 		pr_err("Freeing alive in_device %p\n", idev);
227 	else
228 		kfree(idev);
229 }
230 EXPORT_SYMBOL(in_dev_finish_destroy);
231 
232 static struct in_device *inetdev_init(struct net_device *dev)
233 {
234 	struct in_device *in_dev;
235 
236 	ASSERT_RTNL();
237 
238 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
239 	if (!in_dev)
240 		goto out;
241 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
242 			sizeof(in_dev->cnf));
243 	in_dev->cnf.sysctl = NULL;
244 	in_dev->dev = dev;
245 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
246 	if (!in_dev->arp_parms)
247 		goto out_kfree;
248 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
249 		dev_disable_lro(dev);
250 	/* Reference in_dev->dev */
251 	dev_hold(dev);
252 	/* Account for reference dev->ip_ptr (below) */
253 	in_dev_hold(in_dev);
254 
255 	devinet_sysctl_register(in_dev);
256 	ip_mc_init_dev(in_dev);
257 	if (dev->flags & IFF_UP)
258 		ip_mc_up(in_dev);
259 
260 	/* we can receive as soon as ip_ptr is set -- do this last */
261 	rcu_assign_pointer(dev->ip_ptr, in_dev);
262 out:
263 	return in_dev;
264 out_kfree:
265 	kfree(in_dev);
266 	in_dev = NULL;
267 	goto out;
268 }
269 
270 static void in_dev_rcu_put(struct rcu_head *head)
271 {
272 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
273 	in_dev_put(idev);
274 }
275 
276 static void inetdev_destroy(struct in_device *in_dev)
277 {
278 	struct in_ifaddr *ifa;
279 	struct net_device *dev;
280 
281 	ASSERT_RTNL();
282 
283 	dev = in_dev->dev;
284 
285 	in_dev->dead = 1;
286 
287 	ip_mc_destroy_dev(in_dev);
288 
289 	while ((ifa = in_dev->ifa_list) != NULL) {
290 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
291 		inet_free_ifa(ifa);
292 	}
293 
294 	rcu_assign_pointer(dev->ip_ptr, NULL);
295 
296 	devinet_sysctl_unregister(in_dev);
297 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
298 	arp_ifdown(dev);
299 
300 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
301 }
302 
303 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
304 {
305 	rcu_read_lock();
306 	for_primary_ifa(in_dev) {
307 		if (inet_ifa_match(a, ifa)) {
308 			if (!b || inet_ifa_match(b, ifa)) {
309 				rcu_read_unlock();
310 				return 1;
311 			}
312 		}
313 	} endfor_ifa(in_dev);
314 	rcu_read_unlock();
315 	return 0;
316 }
317 
318 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
319 			 int destroy, struct nlmsghdr *nlh, u32 pid)
320 {
321 	struct in_ifaddr *promote = NULL;
322 	struct in_ifaddr *ifa, *ifa1 = *ifap;
323 	struct in_ifaddr *last_prim = in_dev->ifa_list;
324 	struct in_ifaddr *prev_prom = NULL;
325 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
326 
327 	ASSERT_RTNL();
328 
329 	/* 1. Deleting primary ifaddr forces deletion all secondaries
330 	 * unless alias promotion is set
331 	 **/
332 
333 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
334 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
335 
336 		while ((ifa = *ifap1) != NULL) {
337 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
338 			    ifa1->ifa_scope <= ifa->ifa_scope)
339 				last_prim = ifa;
340 
341 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
342 			    ifa1->ifa_mask != ifa->ifa_mask ||
343 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
344 				ifap1 = &ifa->ifa_next;
345 				prev_prom = ifa;
346 				continue;
347 			}
348 
349 			if (!do_promote) {
350 				inet_hash_remove(ifa);
351 				*ifap1 = ifa->ifa_next;
352 
353 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
354 				blocking_notifier_call_chain(&inetaddr_chain,
355 						NETDEV_DOWN, ifa);
356 				inet_free_ifa(ifa);
357 			} else {
358 				promote = ifa;
359 				break;
360 			}
361 		}
362 	}
363 
364 	/* On promotion all secondaries from subnet are changing
365 	 * the primary IP, we must remove all their routes silently
366 	 * and later to add them back with new prefsrc. Do this
367 	 * while all addresses are on the device list.
368 	 */
369 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
370 		if (ifa1->ifa_mask == ifa->ifa_mask &&
371 		    inet_ifa_match(ifa1->ifa_address, ifa))
372 			fib_del_ifaddr(ifa, ifa1);
373 	}
374 
375 	/* 2. Unlink it */
376 
377 	*ifap = ifa1->ifa_next;
378 	inet_hash_remove(ifa1);
379 
380 	/* 3. Announce address deletion */
381 
382 	/* Send message first, then call notifier.
383 	   At first sight, FIB update triggered by notifier
384 	   will refer to already deleted ifaddr, that could confuse
385 	   netlink listeners. It is not true: look, gated sees
386 	   that route deleted and if it still thinks that ifaddr
387 	   is valid, it will try to restore deleted routes... Grr.
388 	   So that, this order is correct.
389 	 */
390 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
391 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
392 
393 	if (promote) {
394 		struct in_ifaddr *next_sec = promote->ifa_next;
395 
396 		if (prev_prom) {
397 			prev_prom->ifa_next = promote->ifa_next;
398 			promote->ifa_next = last_prim->ifa_next;
399 			last_prim->ifa_next = promote;
400 		}
401 
402 		promote->ifa_flags &= ~IFA_F_SECONDARY;
403 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
404 		blocking_notifier_call_chain(&inetaddr_chain,
405 				NETDEV_UP, promote);
406 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
407 			if (ifa1->ifa_mask != ifa->ifa_mask ||
408 			    !inet_ifa_match(ifa1->ifa_address, ifa))
409 					continue;
410 			fib_add_ifaddr(ifa);
411 		}
412 
413 	}
414 	if (destroy)
415 		inet_free_ifa(ifa1);
416 }
417 
418 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
419 			 int destroy)
420 {
421 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
422 }
423 
424 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
425 			     u32 pid)
426 {
427 	struct in_device *in_dev = ifa->ifa_dev;
428 	struct in_ifaddr *ifa1, **ifap, **last_primary;
429 
430 	ASSERT_RTNL();
431 
432 	if (!ifa->ifa_local) {
433 		inet_free_ifa(ifa);
434 		return 0;
435 	}
436 
437 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
438 	last_primary = &in_dev->ifa_list;
439 
440 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
441 	     ifap = &ifa1->ifa_next) {
442 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
443 		    ifa->ifa_scope <= ifa1->ifa_scope)
444 			last_primary = &ifa1->ifa_next;
445 		if (ifa1->ifa_mask == ifa->ifa_mask &&
446 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
447 			if (ifa1->ifa_local == ifa->ifa_local) {
448 				inet_free_ifa(ifa);
449 				return -EEXIST;
450 			}
451 			if (ifa1->ifa_scope != ifa->ifa_scope) {
452 				inet_free_ifa(ifa);
453 				return -EINVAL;
454 			}
455 			ifa->ifa_flags |= IFA_F_SECONDARY;
456 		}
457 	}
458 
459 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
460 		net_srandom(ifa->ifa_local);
461 		ifap = last_primary;
462 	}
463 
464 	ifa->ifa_next = *ifap;
465 	*ifap = ifa;
466 
467 	inet_hash_insert(dev_net(in_dev->dev), ifa);
468 
469 	/* Send message first, then call notifier.
470 	   Notifier will trigger FIB update, so that
471 	   listeners of netlink will know about new ifaddr */
472 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
473 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
474 
475 	return 0;
476 }
477 
478 static int inet_insert_ifa(struct in_ifaddr *ifa)
479 {
480 	return __inet_insert_ifa(ifa, NULL, 0);
481 }
482 
483 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
484 {
485 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
486 
487 	ASSERT_RTNL();
488 
489 	if (!in_dev) {
490 		inet_free_ifa(ifa);
491 		return -ENOBUFS;
492 	}
493 	ipv4_devconf_setall(in_dev);
494 	if (ifa->ifa_dev != in_dev) {
495 		WARN_ON(ifa->ifa_dev);
496 		in_dev_hold(in_dev);
497 		ifa->ifa_dev = in_dev;
498 	}
499 	if (ipv4_is_loopback(ifa->ifa_local))
500 		ifa->ifa_scope = RT_SCOPE_HOST;
501 	return inet_insert_ifa(ifa);
502 }
503 
504 /* Caller must hold RCU or RTNL :
505  * We dont take a reference on found in_device
506  */
507 struct in_device *inetdev_by_index(struct net *net, int ifindex)
508 {
509 	struct net_device *dev;
510 	struct in_device *in_dev = NULL;
511 
512 	rcu_read_lock();
513 	dev = dev_get_by_index_rcu(net, ifindex);
514 	if (dev)
515 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
516 	rcu_read_unlock();
517 	return in_dev;
518 }
519 EXPORT_SYMBOL(inetdev_by_index);
520 
521 /* Called only from RTNL semaphored context. No locks. */
522 
523 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
524 				    __be32 mask)
525 {
526 	ASSERT_RTNL();
527 
528 	for_primary_ifa(in_dev) {
529 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
530 			return ifa;
531 	} endfor_ifa(in_dev);
532 	return NULL;
533 }
534 
535 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
536 {
537 	struct net *net = sock_net(skb->sk);
538 	struct nlattr *tb[IFA_MAX+1];
539 	struct in_device *in_dev;
540 	struct ifaddrmsg *ifm;
541 	struct in_ifaddr *ifa, **ifap;
542 	int err = -EINVAL;
543 
544 	ASSERT_RTNL();
545 
546 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
547 	if (err < 0)
548 		goto errout;
549 
550 	ifm = nlmsg_data(nlh);
551 	in_dev = inetdev_by_index(net, ifm->ifa_index);
552 	if (in_dev == NULL) {
553 		err = -ENODEV;
554 		goto errout;
555 	}
556 
557 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
558 	     ifap = &ifa->ifa_next) {
559 		if (tb[IFA_LOCAL] &&
560 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
561 			continue;
562 
563 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
564 			continue;
565 
566 		if (tb[IFA_ADDRESS] &&
567 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
568 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
569 			continue;
570 
571 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
572 		return 0;
573 	}
574 
575 	err = -EADDRNOTAVAIL;
576 errout:
577 	return err;
578 }
579 
580 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
581 {
582 	struct nlattr *tb[IFA_MAX+1];
583 	struct in_ifaddr *ifa;
584 	struct ifaddrmsg *ifm;
585 	struct net_device *dev;
586 	struct in_device *in_dev;
587 	int err;
588 
589 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
590 	if (err < 0)
591 		goto errout;
592 
593 	ifm = nlmsg_data(nlh);
594 	err = -EINVAL;
595 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
596 		goto errout;
597 
598 	dev = __dev_get_by_index(net, ifm->ifa_index);
599 	err = -ENODEV;
600 	if (dev == NULL)
601 		goto errout;
602 
603 	in_dev = __in_dev_get_rtnl(dev);
604 	err = -ENOBUFS;
605 	if (in_dev == NULL)
606 		goto errout;
607 
608 	ifa = inet_alloc_ifa();
609 	if (ifa == NULL)
610 		/*
611 		 * A potential indev allocation can be left alive, it stays
612 		 * assigned to its device and is destroy with it.
613 		 */
614 		goto errout;
615 
616 	ipv4_devconf_setall(in_dev);
617 	in_dev_hold(in_dev);
618 
619 	if (tb[IFA_ADDRESS] == NULL)
620 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
621 
622 	INIT_HLIST_NODE(&ifa->hash);
623 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
624 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
625 	ifa->ifa_flags = ifm->ifa_flags;
626 	ifa->ifa_scope = ifm->ifa_scope;
627 	ifa->ifa_dev = in_dev;
628 
629 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
630 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
631 
632 	if (tb[IFA_BROADCAST])
633 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
634 
635 	if (tb[IFA_LABEL])
636 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
637 	else
638 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
639 
640 	return ifa;
641 
642 errout:
643 	return ERR_PTR(err);
644 }
645 
646 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
647 {
648 	struct net *net = sock_net(skb->sk);
649 	struct in_ifaddr *ifa;
650 
651 	ASSERT_RTNL();
652 
653 	ifa = rtm_to_ifaddr(net, nlh);
654 	if (IS_ERR(ifa))
655 		return PTR_ERR(ifa);
656 
657 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
658 }
659 
660 /*
661  *	Determine a default network mask, based on the IP address.
662  */
663 
664 static inline int inet_abc_len(__be32 addr)
665 {
666 	int rc = -1;	/* Something else, probably a multicast. */
667 
668 	if (ipv4_is_zeronet(addr))
669 		rc = 0;
670 	else {
671 		__u32 haddr = ntohl(addr);
672 
673 		if (IN_CLASSA(haddr))
674 			rc = 8;
675 		else if (IN_CLASSB(haddr))
676 			rc = 16;
677 		else if (IN_CLASSC(haddr))
678 			rc = 24;
679 	}
680 
681 	return rc;
682 }
683 
684 
685 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
686 {
687 	struct ifreq ifr;
688 	struct sockaddr_in sin_orig;
689 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
690 	struct in_device *in_dev;
691 	struct in_ifaddr **ifap = NULL;
692 	struct in_ifaddr *ifa = NULL;
693 	struct net_device *dev;
694 	char *colon;
695 	int ret = -EFAULT;
696 	int tryaddrmatch = 0;
697 
698 	/*
699 	 *	Fetch the caller's info block into kernel space
700 	 */
701 
702 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
703 		goto out;
704 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
705 
706 	/* save original address for comparison */
707 	memcpy(&sin_orig, sin, sizeof(*sin));
708 
709 	colon = strchr(ifr.ifr_name, ':');
710 	if (colon)
711 		*colon = 0;
712 
713 	dev_load(net, ifr.ifr_name);
714 
715 	switch (cmd) {
716 	case SIOCGIFADDR:	/* Get interface address */
717 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
718 	case SIOCGIFDSTADDR:	/* Get the destination address */
719 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
720 		/* Note that these ioctls will not sleep,
721 		   so that we do not impose a lock.
722 		   One day we will be forced to put shlock here (I mean SMP)
723 		 */
724 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
725 		memset(sin, 0, sizeof(*sin));
726 		sin->sin_family = AF_INET;
727 		break;
728 
729 	case SIOCSIFFLAGS:
730 		ret = -EACCES;
731 		if (!capable(CAP_NET_ADMIN))
732 			goto out;
733 		break;
734 	case SIOCSIFADDR:	/* Set interface address (and family) */
735 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
736 	case SIOCSIFDSTADDR:	/* Set the destination address */
737 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
738 		ret = -EACCES;
739 		if (!capable(CAP_NET_ADMIN))
740 			goto out;
741 		ret = -EINVAL;
742 		if (sin->sin_family != AF_INET)
743 			goto out;
744 		break;
745 	default:
746 		ret = -EINVAL;
747 		goto out;
748 	}
749 
750 	rtnl_lock();
751 
752 	ret = -ENODEV;
753 	dev = __dev_get_by_name(net, ifr.ifr_name);
754 	if (!dev)
755 		goto done;
756 
757 	if (colon)
758 		*colon = ':';
759 
760 	in_dev = __in_dev_get_rtnl(dev);
761 	if (in_dev) {
762 		if (tryaddrmatch) {
763 			/* Matthias Andree */
764 			/* compare label and address (4.4BSD style) */
765 			/* note: we only do this for a limited set of ioctls
766 			   and only if the original address family was AF_INET.
767 			   This is checked above. */
768 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
769 			     ifap = &ifa->ifa_next) {
770 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
771 				    sin_orig.sin_addr.s_addr ==
772 							ifa->ifa_local) {
773 					break; /* found */
774 				}
775 			}
776 		}
777 		/* we didn't get a match, maybe the application is
778 		   4.3BSD-style and passed in junk so we fall back to
779 		   comparing just the label */
780 		if (!ifa) {
781 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
782 			     ifap = &ifa->ifa_next)
783 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
784 					break;
785 		}
786 	}
787 
788 	ret = -EADDRNOTAVAIL;
789 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
790 		goto done;
791 
792 	switch (cmd) {
793 	case SIOCGIFADDR:	/* Get interface address */
794 		sin->sin_addr.s_addr = ifa->ifa_local;
795 		goto rarok;
796 
797 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
798 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
799 		goto rarok;
800 
801 	case SIOCGIFDSTADDR:	/* Get the destination address */
802 		sin->sin_addr.s_addr = ifa->ifa_address;
803 		goto rarok;
804 
805 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
806 		sin->sin_addr.s_addr = ifa->ifa_mask;
807 		goto rarok;
808 
809 	case SIOCSIFFLAGS:
810 		if (colon) {
811 			ret = -EADDRNOTAVAIL;
812 			if (!ifa)
813 				break;
814 			ret = 0;
815 			if (!(ifr.ifr_flags & IFF_UP))
816 				inet_del_ifa(in_dev, ifap, 1);
817 			break;
818 		}
819 		ret = dev_change_flags(dev, ifr.ifr_flags);
820 		break;
821 
822 	case SIOCSIFADDR:	/* Set interface address (and family) */
823 		ret = -EINVAL;
824 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
825 			break;
826 
827 		if (!ifa) {
828 			ret = -ENOBUFS;
829 			ifa = inet_alloc_ifa();
830 			INIT_HLIST_NODE(&ifa->hash);
831 			if (!ifa)
832 				break;
833 			if (colon)
834 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
835 			else
836 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
837 		} else {
838 			ret = 0;
839 			if (ifa->ifa_local == sin->sin_addr.s_addr)
840 				break;
841 			inet_del_ifa(in_dev, ifap, 0);
842 			ifa->ifa_broadcast = 0;
843 			ifa->ifa_scope = 0;
844 		}
845 
846 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
847 
848 		if (!(dev->flags & IFF_POINTOPOINT)) {
849 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
850 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
851 			if ((dev->flags & IFF_BROADCAST) &&
852 			    ifa->ifa_prefixlen < 31)
853 				ifa->ifa_broadcast = ifa->ifa_address |
854 						     ~ifa->ifa_mask;
855 		} else {
856 			ifa->ifa_prefixlen = 32;
857 			ifa->ifa_mask = inet_make_mask(32);
858 		}
859 		ret = inet_set_ifa(dev, ifa);
860 		break;
861 
862 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
863 		ret = 0;
864 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
865 			inet_del_ifa(in_dev, ifap, 0);
866 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
867 			inet_insert_ifa(ifa);
868 		}
869 		break;
870 
871 	case SIOCSIFDSTADDR:	/* Set the destination address */
872 		ret = 0;
873 		if (ifa->ifa_address == sin->sin_addr.s_addr)
874 			break;
875 		ret = -EINVAL;
876 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
877 			break;
878 		ret = 0;
879 		inet_del_ifa(in_dev, ifap, 0);
880 		ifa->ifa_address = sin->sin_addr.s_addr;
881 		inet_insert_ifa(ifa);
882 		break;
883 
884 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
885 
886 		/*
887 		 *	The mask we set must be legal.
888 		 */
889 		ret = -EINVAL;
890 		if (bad_mask(sin->sin_addr.s_addr, 0))
891 			break;
892 		ret = 0;
893 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
894 			__be32 old_mask = ifa->ifa_mask;
895 			inet_del_ifa(in_dev, ifap, 0);
896 			ifa->ifa_mask = sin->sin_addr.s_addr;
897 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
898 
899 			/* See if current broadcast address matches
900 			 * with current netmask, then recalculate
901 			 * the broadcast address. Otherwise it's a
902 			 * funny address, so don't touch it since
903 			 * the user seems to know what (s)he's doing...
904 			 */
905 			if ((dev->flags & IFF_BROADCAST) &&
906 			    (ifa->ifa_prefixlen < 31) &&
907 			    (ifa->ifa_broadcast ==
908 			     (ifa->ifa_local|~old_mask))) {
909 				ifa->ifa_broadcast = (ifa->ifa_local |
910 						      ~sin->sin_addr.s_addr);
911 			}
912 			inet_insert_ifa(ifa);
913 		}
914 		break;
915 	}
916 done:
917 	rtnl_unlock();
918 out:
919 	return ret;
920 rarok:
921 	rtnl_unlock();
922 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
923 	goto out;
924 }
925 
926 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
927 {
928 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
929 	struct in_ifaddr *ifa;
930 	struct ifreq ifr;
931 	int done = 0;
932 
933 	if (!in_dev)
934 		goto out;
935 
936 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
937 		if (!buf) {
938 			done += sizeof(ifr);
939 			continue;
940 		}
941 		if (len < (int) sizeof(ifr))
942 			break;
943 		memset(&ifr, 0, sizeof(struct ifreq));
944 		if (ifa->ifa_label)
945 			strcpy(ifr.ifr_name, ifa->ifa_label);
946 		else
947 			strcpy(ifr.ifr_name, dev->name);
948 
949 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
950 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
951 								ifa->ifa_local;
952 
953 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
954 			done = -EFAULT;
955 			break;
956 		}
957 		buf  += sizeof(struct ifreq);
958 		len  -= sizeof(struct ifreq);
959 		done += sizeof(struct ifreq);
960 	}
961 out:
962 	return done;
963 }
964 
965 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
966 {
967 	__be32 addr = 0;
968 	struct in_device *in_dev;
969 	struct net *net = dev_net(dev);
970 
971 	rcu_read_lock();
972 	in_dev = __in_dev_get_rcu(dev);
973 	if (!in_dev)
974 		goto no_in_dev;
975 
976 	for_primary_ifa(in_dev) {
977 		if (ifa->ifa_scope > scope)
978 			continue;
979 		if (!dst || inet_ifa_match(dst, ifa)) {
980 			addr = ifa->ifa_local;
981 			break;
982 		}
983 		if (!addr)
984 			addr = ifa->ifa_local;
985 	} endfor_ifa(in_dev);
986 
987 	if (addr)
988 		goto out_unlock;
989 no_in_dev:
990 
991 	/* Not loopback addresses on loopback should be preferred
992 	   in this case. It is importnat that lo is the first interface
993 	   in dev_base list.
994 	 */
995 	for_each_netdev_rcu(net, dev) {
996 		in_dev = __in_dev_get_rcu(dev);
997 		if (!in_dev)
998 			continue;
999 
1000 		for_primary_ifa(in_dev) {
1001 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1002 			    ifa->ifa_scope <= scope) {
1003 				addr = ifa->ifa_local;
1004 				goto out_unlock;
1005 			}
1006 		} endfor_ifa(in_dev);
1007 	}
1008 out_unlock:
1009 	rcu_read_unlock();
1010 	return addr;
1011 }
1012 EXPORT_SYMBOL(inet_select_addr);
1013 
1014 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1015 			      __be32 local, int scope)
1016 {
1017 	int same = 0;
1018 	__be32 addr = 0;
1019 
1020 	for_ifa(in_dev) {
1021 		if (!addr &&
1022 		    (local == ifa->ifa_local || !local) &&
1023 		    ifa->ifa_scope <= scope) {
1024 			addr = ifa->ifa_local;
1025 			if (same)
1026 				break;
1027 		}
1028 		if (!same) {
1029 			same = (!local || inet_ifa_match(local, ifa)) &&
1030 				(!dst || inet_ifa_match(dst, ifa));
1031 			if (same && addr) {
1032 				if (local || !dst)
1033 					break;
1034 				/* Is the selected addr into dst subnet? */
1035 				if (inet_ifa_match(addr, ifa))
1036 					break;
1037 				/* No, then can we use new local src? */
1038 				if (ifa->ifa_scope <= scope) {
1039 					addr = ifa->ifa_local;
1040 					break;
1041 				}
1042 				/* search for large dst subnet for addr */
1043 				same = 0;
1044 			}
1045 		}
1046 	} endfor_ifa(in_dev);
1047 
1048 	return same ? addr : 0;
1049 }
1050 
1051 /*
1052  * Confirm that local IP address exists using wildcards:
1053  * - in_dev: only on this interface, 0=any interface
1054  * - dst: only in the same subnet as dst, 0=any dst
1055  * - local: address, 0=autoselect the local address
1056  * - scope: maximum allowed scope value for the local address
1057  */
1058 __be32 inet_confirm_addr(struct in_device *in_dev,
1059 			 __be32 dst, __be32 local, int scope)
1060 {
1061 	__be32 addr = 0;
1062 	struct net_device *dev;
1063 	struct net *net;
1064 
1065 	if (scope != RT_SCOPE_LINK)
1066 		return confirm_addr_indev(in_dev, dst, local, scope);
1067 
1068 	net = dev_net(in_dev->dev);
1069 	rcu_read_lock();
1070 	for_each_netdev_rcu(net, dev) {
1071 		in_dev = __in_dev_get_rcu(dev);
1072 		if (in_dev) {
1073 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1074 			if (addr)
1075 				break;
1076 		}
1077 	}
1078 	rcu_read_unlock();
1079 
1080 	return addr;
1081 }
1082 
1083 /*
1084  *	Device notifier
1085  */
1086 
1087 int register_inetaddr_notifier(struct notifier_block *nb)
1088 {
1089 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1090 }
1091 EXPORT_SYMBOL(register_inetaddr_notifier);
1092 
1093 int unregister_inetaddr_notifier(struct notifier_block *nb)
1094 {
1095 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1096 }
1097 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1098 
1099 /* Rename ifa_labels for a device name change. Make some effort to preserve
1100  * existing alias numbering and to create unique labels if possible.
1101 */
1102 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1103 {
1104 	struct in_ifaddr *ifa;
1105 	int named = 0;
1106 
1107 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1108 		char old[IFNAMSIZ], *dot;
1109 
1110 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1111 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1112 		if (named++ == 0)
1113 			goto skip;
1114 		dot = strchr(old, ':');
1115 		if (dot == NULL) {
1116 			sprintf(old, ":%d", named);
1117 			dot = old;
1118 		}
1119 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1120 			strcat(ifa->ifa_label, dot);
1121 		else
1122 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1123 skip:
1124 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1125 	}
1126 }
1127 
1128 static inline bool inetdev_valid_mtu(unsigned mtu)
1129 {
1130 	return mtu >= 68;
1131 }
1132 
1133 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1134 					struct in_device *in_dev)
1135 
1136 {
1137 	struct in_ifaddr *ifa;
1138 
1139 	for (ifa = in_dev->ifa_list; ifa;
1140 	     ifa = ifa->ifa_next) {
1141 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1142 			 ifa->ifa_local, dev,
1143 			 ifa->ifa_local, NULL,
1144 			 dev->dev_addr, NULL);
1145 	}
1146 }
1147 
1148 /* Called only under RTNL semaphore */
1149 
1150 static int inetdev_event(struct notifier_block *this, unsigned long event,
1151 			 void *ptr)
1152 {
1153 	struct net_device *dev = ptr;
1154 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1155 
1156 	ASSERT_RTNL();
1157 
1158 	if (!in_dev) {
1159 		if (event == NETDEV_REGISTER) {
1160 			in_dev = inetdev_init(dev);
1161 			if (!in_dev)
1162 				return notifier_from_errno(-ENOMEM);
1163 			if (dev->flags & IFF_LOOPBACK) {
1164 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1165 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1166 			}
1167 		} else if (event == NETDEV_CHANGEMTU) {
1168 			/* Re-enabling IP */
1169 			if (inetdev_valid_mtu(dev->mtu))
1170 				in_dev = inetdev_init(dev);
1171 		}
1172 		goto out;
1173 	}
1174 
1175 	switch (event) {
1176 	case NETDEV_REGISTER:
1177 		printk(KERN_DEBUG "inetdev_event: bug\n");
1178 		rcu_assign_pointer(dev->ip_ptr, NULL);
1179 		break;
1180 	case NETDEV_UP:
1181 		if (!inetdev_valid_mtu(dev->mtu))
1182 			break;
1183 		if (dev->flags & IFF_LOOPBACK) {
1184 			struct in_ifaddr *ifa = inet_alloc_ifa();
1185 
1186 			if (ifa) {
1187 				INIT_HLIST_NODE(&ifa->hash);
1188 				ifa->ifa_local =
1189 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1190 				ifa->ifa_prefixlen = 8;
1191 				ifa->ifa_mask = inet_make_mask(8);
1192 				in_dev_hold(in_dev);
1193 				ifa->ifa_dev = in_dev;
1194 				ifa->ifa_scope = RT_SCOPE_HOST;
1195 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1196 				inet_insert_ifa(ifa);
1197 			}
1198 		}
1199 		ip_mc_up(in_dev);
1200 		/* fall through */
1201 	case NETDEV_CHANGEADDR:
1202 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1203 			break;
1204 		/* fall through */
1205 	case NETDEV_NOTIFY_PEERS:
1206 		/* Send gratuitous ARP to notify of link change */
1207 		inetdev_send_gratuitous_arp(dev, in_dev);
1208 		break;
1209 	case NETDEV_DOWN:
1210 		ip_mc_down(in_dev);
1211 		break;
1212 	case NETDEV_PRE_TYPE_CHANGE:
1213 		ip_mc_unmap(in_dev);
1214 		break;
1215 	case NETDEV_POST_TYPE_CHANGE:
1216 		ip_mc_remap(in_dev);
1217 		break;
1218 	case NETDEV_CHANGEMTU:
1219 		if (inetdev_valid_mtu(dev->mtu))
1220 			break;
1221 		/* disable IP when MTU is not enough */
1222 	case NETDEV_UNREGISTER:
1223 		inetdev_destroy(in_dev);
1224 		break;
1225 	case NETDEV_CHANGENAME:
1226 		/* Do not notify about label change, this event is
1227 		 * not interesting to applications using netlink.
1228 		 */
1229 		inetdev_changename(dev, in_dev);
1230 
1231 		devinet_sysctl_unregister(in_dev);
1232 		devinet_sysctl_register(in_dev);
1233 		break;
1234 	}
1235 out:
1236 	return NOTIFY_DONE;
1237 }
1238 
1239 static struct notifier_block ip_netdev_notifier = {
1240 	.notifier_call = inetdev_event,
1241 };
1242 
1243 static inline size_t inet_nlmsg_size(void)
1244 {
1245 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1246 	       + nla_total_size(4) /* IFA_ADDRESS */
1247 	       + nla_total_size(4) /* IFA_LOCAL */
1248 	       + nla_total_size(4) /* IFA_BROADCAST */
1249 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1250 }
1251 
1252 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1253 			    u32 pid, u32 seq, int event, unsigned int flags)
1254 {
1255 	struct ifaddrmsg *ifm;
1256 	struct nlmsghdr  *nlh;
1257 
1258 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1259 	if (nlh == NULL)
1260 		return -EMSGSIZE;
1261 
1262 	ifm = nlmsg_data(nlh);
1263 	ifm->ifa_family = AF_INET;
1264 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1265 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1266 	ifm->ifa_scope = ifa->ifa_scope;
1267 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1268 
1269 	if (ifa->ifa_address)
1270 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1271 
1272 	if (ifa->ifa_local)
1273 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1274 
1275 	if (ifa->ifa_broadcast)
1276 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1277 
1278 	if (ifa->ifa_label[0])
1279 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1280 
1281 	return nlmsg_end(skb, nlh);
1282 
1283 nla_put_failure:
1284 	nlmsg_cancel(skb, nlh);
1285 	return -EMSGSIZE;
1286 }
1287 
1288 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1289 {
1290 	struct net *net = sock_net(skb->sk);
1291 	int h, s_h;
1292 	int idx, s_idx;
1293 	int ip_idx, s_ip_idx;
1294 	struct net_device *dev;
1295 	struct in_device *in_dev;
1296 	struct in_ifaddr *ifa;
1297 	struct hlist_head *head;
1298 	struct hlist_node *node;
1299 
1300 	s_h = cb->args[0];
1301 	s_idx = idx = cb->args[1];
1302 	s_ip_idx = ip_idx = cb->args[2];
1303 
1304 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1305 		idx = 0;
1306 		head = &net->dev_index_head[h];
1307 		rcu_read_lock();
1308 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1309 			if (idx < s_idx)
1310 				goto cont;
1311 			if (h > s_h || idx > s_idx)
1312 				s_ip_idx = 0;
1313 			in_dev = __in_dev_get_rcu(dev);
1314 			if (!in_dev)
1315 				goto cont;
1316 
1317 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1318 			     ifa = ifa->ifa_next, ip_idx++) {
1319 				if (ip_idx < s_ip_idx)
1320 					continue;
1321 				if (inet_fill_ifaddr(skb, ifa,
1322 					     NETLINK_CB(cb->skb).pid,
1323 					     cb->nlh->nlmsg_seq,
1324 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1325 					rcu_read_unlock();
1326 					goto done;
1327 				}
1328 			}
1329 cont:
1330 			idx++;
1331 		}
1332 		rcu_read_unlock();
1333 	}
1334 
1335 done:
1336 	cb->args[0] = h;
1337 	cb->args[1] = idx;
1338 	cb->args[2] = ip_idx;
1339 
1340 	return skb->len;
1341 }
1342 
1343 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1344 		      u32 pid)
1345 {
1346 	struct sk_buff *skb;
1347 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1348 	int err = -ENOBUFS;
1349 	struct net *net;
1350 
1351 	net = dev_net(ifa->ifa_dev->dev);
1352 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1353 	if (skb == NULL)
1354 		goto errout;
1355 
1356 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1357 	if (err < 0) {
1358 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1359 		WARN_ON(err == -EMSGSIZE);
1360 		kfree_skb(skb);
1361 		goto errout;
1362 	}
1363 	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1364 	return;
1365 errout:
1366 	if (err < 0)
1367 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1368 }
1369 
1370 static size_t inet_get_link_af_size(const struct net_device *dev)
1371 {
1372 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1373 
1374 	if (!in_dev)
1375 		return 0;
1376 
1377 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1378 }
1379 
1380 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1381 {
1382 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1383 	struct nlattr *nla;
1384 	int i;
1385 
1386 	if (!in_dev)
1387 		return -ENODATA;
1388 
1389 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1390 	if (nla == NULL)
1391 		return -EMSGSIZE;
1392 
1393 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1394 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1395 
1396 	return 0;
1397 }
1398 
1399 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1400 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1401 };
1402 
1403 static int inet_validate_link_af(const struct net_device *dev,
1404 				 const struct nlattr *nla)
1405 {
1406 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1407 	int err, rem;
1408 
1409 	if (dev && !__in_dev_get_rtnl(dev))
1410 		return -EAFNOSUPPORT;
1411 
1412 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1413 	if (err < 0)
1414 		return err;
1415 
1416 	if (tb[IFLA_INET_CONF]) {
1417 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1418 			int cfgid = nla_type(a);
1419 
1420 			if (nla_len(a) < 4)
1421 				return -EINVAL;
1422 
1423 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1424 				return -EINVAL;
1425 		}
1426 	}
1427 
1428 	return 0;
1429 }
1430 
1431 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1432 {
1433 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1434 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1435 	int rem;
1436 
1437 	if (!in_dev)
1438 		return -EAFNOSUPPORT;
1439 
1440 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1441 		BUG();
1442 
1443 	if (tb[IFLA_INET_CONF]) {
1444 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1445 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1446 	}
1447 
1448 	return 0;
1449 }
1450 
1451 #ifdef CONFIG_SYSCTL
1452 
1453 static void devinet_copy_dflt_conf(struct net *net, int i)
1454 {
1455 	struct net_device *dev;
1456 
1457 	rcu_read_lock();
1458 	for_each_netdev_rcu(net, dev) {
1459 		struct in_device *in_dev;
1460 
1461 		in_dev = __in_dev_get_rcu(dev);
1462 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1463 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1464 	}
1465 	rcu_read_unlock();
1466 }
1467 
1468 /* called with RTNL locked */
1469 static void inet_forward_change(struct net *net)
1470 {
1471 	struct net_device *dev;
1472 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1473 
1474 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1475 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1476 
1477 	for_each_netdev(net, dev) {
1478 		struct in_device *in_dev;
1479 		if (on)
1480 			dev_disable_lro(dev);
1481 		rcu_read_lock();
1482 		in_dev = __in_dev_get_rcu(dev);
1483 		if (in_dev)
1484 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1485 		rcu_read_unlock();
1486 	}
1487 }
1488 
1489 static int devinet_conf_proc(ctl_table *ctl, int write,
1490 			     void __user *buffer,
1491 			     size_t *lenp, loff_t *ppos)
1492 {
1493 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1494 
1495 	if (write) {
1496 		struct ipv4_devconf *cnf = ctl->extra1;
1497 		struct net *net = ctl->extra2;
1498 		int i = (int *)ctl->data - cnf->data;
1499 
1500 		set_bit(i, cnf->state);
1501 
1502 		if (cnf == net->ipv4.devconf_dflt)
1503 			devinet_copy_dflt_conf(net, i);
1504 	}
1505 
1506 	return ret;
1507 }
1508 
1509 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1510 				  void __user *buffer,
1511 				  size_t *lenp, loff_t *ppos)
1512 {
1513 	int *valp = ctl->data;
1514 	int val = *valp;
1515 	loff_t pos = *ppos;
1516 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1517 
1518 	if (write && *valp != val) {
1519 		struct net *net = ctl->extra2;
1520 
1521 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1522 			if (!rtnl_trylock()) {
1523 				/* Restore the original values before restarting */
1524 				*valp = val;
1525 				*ppos = pos;
1526 				return restart_syscall();
1527 			}
1528 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1529 				inet_forward_change(net);
1530 			} else if (*valp) {
1531 				struct ipv4_devconf *cnf = ctl->extra1;
1532 				struct in_device *idev =
1533 					container_of(cnf, struct in_device, cnf);
1534 				dev_disable_lro(idev->dev);
1535 			}
1536 			rtnl_unlock();
1537 			rt_cache_flush(net, 0);
1538 		}
1539 	}
1540 
1541 	return ret;
1542 }
1543 
1544 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1545 				void __user *buffer,
1546 				size_t *lenp, loff_t *ppos)
1547 {
1548 	int *valp = ctl->data;
1549 	int val = *valp;
1550 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1551 	struct net *net = ctl->extra2;
1552 
1553 	if (write && *valp != val)
1554 		rt_cache_flush(net, 0);
1555 
1556 	return ret;
1557 }
1558 
1559 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1560 	{ \
1561 		.procname	= name, \
1562 		.data		= ipv4_devconf.data + \
1563 				  IPV4_DEVCONF_ ## attr - 1, \
1564 		.maxlen		= sizeof(int), \
1565 		.mode		= mval, \
1566 		.proc_handler	= proc, \
1567 		.extra1		= &ipv4_devconf, \
1568 	}
1569 
1570 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1571 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1572 
1573 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1574 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1575 
1576 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1577 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1578 
1579 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1580 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1581 
1582 static struct devinet_sysctl_table {
1583 	struct ctl_table_header *sysctl_header;
1584 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1585 	char *dev_name;
1586 } devinet_sysctl = {
1587 	.devinet_vars = {
1588 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1589 					     devinet_sysctl_forward),
1590 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1591 
1592 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1593 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1594 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1595 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1596 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1597 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1598 					"accept_source_route"),
1599 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1600 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1601 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1602 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1603 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1604 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1605 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1606 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1607 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1608 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1609 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1610 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1611 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1612 
1613 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1614 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1615 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1616 					      "force_igmp_version"),
1617 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1618 					      "promote_secondaries"),
1619 	},
1620 };
1621 
1622 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1623 					struct ipv4_devconf *p)
1624 {
1625 	int i;
1626 	struct devinet_sysctl_table *t;
1627 
1628 #define DEVINET_CTL_PATH_DEV	3
1629 
1630 	struct ctl_path devinet_ctl_path[] = {
1631 		{ .procname = "net",  },
1632 		{ .procname = "ipv4", },
1633 		{ .procname = "conf", },
1634 		{ /* to be set */ },
1635 		{ },
1636 	};
1637 
1638 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1639 	if (!t)
1640 		goto out;
1641 
1642 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1643 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1644 		t->devinet_vars[i].extra1 = p;
1645 		t->devinet_vars[i].extra2 = net;
1646 	}
1647 
1648 	/*
1649 	 * Make a copy of dev_name, because '.procname' is regarded as const
1650 	 * by sysctl and we wouldn't want anyone to change it under our feet
1651 	 * (see SIOCSIFNAME).
1652 	 */
1653 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1654 	if (!t->dev_name)
1655 		goto free;
1656 
1657 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1658 
1659 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1660 			t->devinet_vars);
1661 	if (!t->sysctl_header)
1662 		goto free_procname;
1663 
1664 	p->sysctl = t;
1665 	return 0;
1666 
1667 free_procname:
1668 	kfree(t->dev_name);
1669 free:
1670 	kfree(t);
1671 out:
1672 	return -ENOBUFS;
1673 }
1674 
1675 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1676 {
1677 	struct devinet_sysctl_table *t = cnf->sysctl;
1678 
1679 	if (t == NULL)
1680 		return;
1681 
1682 	cnf->sysctl = NULL;
1683 	unregister_net_sysctl_table(t->sysctl_header);
1684 	kfree(t->dev_name);
1685 	kfree(t);
1686 }
1687 
1688 static void devinet_sysctl_register(struct in_device *idev)
1689 {
1690 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1691 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1692 					&idev->cnf);
1693 }
1694 
1695 static void devinet_sysctl_unregister(struct in_device *idev)
1696 {
1697 	__devinet_sysctl_unregister(&idev->cnf);
1698 	neigh_sysctl_unregister(idev->arp_parms);
1699 }
1700 
1701 static struct ctl_table ctl_forward_entry[] = {
1702 	{
1703 		.procname	= "ip_forward",
1704 		.data		= &ipv4_devconf.data[
1705 					IPV4_DEVCONF_FORWARDING - 1],
1706 		.maxlen		= sizeof(int),
1707 		.mode		= 0644,
1708 		.proc_handler	= devinet_sysctl_forward,
1709 		.extra1		= &ipv4_devconf,
1710 		.extra2		= &init_net,
1711 	},
1712 	{ },
1713 };
1714 
1715 static __net_initdata struct ctl_path net_ipv4_path[] = {
1716 	{ .procname = "net", },
1717 	{ .procname = "ipv4", },
1718 	{ },
1719 };
1720 #endif
1721 
1722 static __net_init int devinet_init_net(struct net *net)
1723 {
1724 	int err;
1725 	struct ipv4_devconf *all, *dflt;
1726 #ifdef CONFIG_SYSCTL
1727 	struct ctl_table *tbl = ctl_forward_entry;
1728 	struct ctl_table_header *forw_hdr;
1729 #endif
1730 
1731 	err = -ENOMEM;
1732 	all = &ipv4_devconf;
1733 	dflt = &ipv4_devconf_dflt;
1734 
1735 	if (!net_eq(net, &init_net)) {
1736 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1737 		if (all == NULL)
1738 			goto err_alloc_all;
1739 
1740 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1741 		if (dflt == NULL)
1742 			goto err_alloc_dflt;
1743 
1744 #ifdef CONFIG_SYSCTL
1745 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1746 		if (tbl == NULL)
1747 			goto err_alloc_ctl;
1748 
1749 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1750 		tbl[0].extra1 = all;
1751 		tbl[0].extra2 = net;
1752 #endif
1753 	}
1754 
1755 #ifdef CONFIG_SYSCTL
1756 	err = __devinet_sysctl_register(net, "all", all);
1757 	if (err < 0)
1758 		goto err_reg_all;
1759 
1760 	err = __devinet_sysctl_register(net, "default", dflt);
1761 	if (err < 0)
1762 		goto err_reg_dflt;
1763 
1764 	err = -ENOMEM;
1765 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1766 	if (forw_hdr == NULL)
1767 		goto err_reg_ctl;
1768 	net->ipv4.forw_hdr = forw_hdr;
1769 #endif
1770 
1771 	net->ipv4.devconf_all = all;
1772 	net->ipv4.devconf_dflt = dflt;
1773 	return 0;
1774 
1775 #ifdef CONFIG_SYSCTL
1776 err_reg_ctl:
1777 	__devinet_sysctl_unregister(dflt);
1778 err_reg_dflt:
1779 	__devinet_sysctl_unregister(all);
1780 err_reg_all:
1781 	if (tbl != ctl_forward_entry)
1782 		kfree(tbl);
1783 err_alloc_ctl:
1784 #endif
1785 	if (dflt != &ipv4_devconf_dflt)
1786 		kfree(dflt);
1787 err_alloc_dflt:
1788 	if (all != &ipv4_devconf)
1789 		kfree(all);
1790 err_alloc_all:
1791 	return err;
1792 }
1793 
1794 static __net_exit void devinet_exit_net(struct net *net)
1795 {
1796 #ifdef CONFIG_SYSCTL
1797 	struct ctl_table *tbl;
1798 
1799 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1800 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1801 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1802 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1803 	kfree(tbl);
1804 #endif
1805 	kfree(net->ipv4.devconf_dflt);
1806 	kfree(net->ipv4.devconf_all);
1807 }
1808 
1809 static __net_initdata struct pernet_operations devinet_ops = {
1810 	.init = devinet_init_net,
1811 	.exit = devinet_exit_net,
1812 };
1813 
1814 static struct rtnl_af_ops inet_af_ops = {
1815 	.family		  = AF_INET,
1816 	.fill_link_af	  = inet_fill_link_af,
1817 	.get_link_af_size = inet_get_link_af_size,
1818 	.validate_link_af = inet_validate_link_af,
1819 	.set_link_af	  = inet_set_link_af,
1820 };
1821 
1822 void __init devinet_init(void)
1823 {
1824 	int i;
1825 
1826 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
1827 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
1828 
1829 	register_pernet_subsys(&devinet_ops);
1830 
1831 	register_gifconf(PF_INET, inet_gifconf);
1832 	register_netdevice_notifier(&ip_netdev_notifier);
1833 
1834 	rtnl_af_register(&inet_af_ops);
1835 
1836 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1837 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1838 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1839 }
1840 
1841