xref: /openbmc/linux/net/ipv4/devinet.c (revision 9cdb81c7)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 
67 #include "fib_lookup.h"
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 	},
76 };
77 
78 static struct ipv4_devconf ipv4_devconf_dflt = {
79 	.data = {
80 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
81 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
82 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
84 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85 	},
86 };
87 
88 #define IPV4_DEVCONF_DFLT(net, attr) \
89 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90 
91 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92 	[IFA_LOCAL]     	= { .type = NLA_U32 },
93 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
94 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
95 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96 };
97 
98 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
99  * value.  So if you change this define, make appropriate changes to
100  * inet_addr_hash as well.
101  */
102 #define IN4_ADDR_HSIZE	256
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105 
106 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
107 {
108 	u32 val = (__force u32) addr ^ hash_ptr(net, 8);
109 
110 	return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
111 		(IN4_ADDR_HSIZE - 1));
112 }
113 
114 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
115 {
116 	unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
117 
118 	spin_lock(&inet_addr_hash_lock);
119 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
120 	spin_unlock(&inet_addr_hash_lock);
121 }
122 
123 static void inet_hash_remove(struct in_ifaddr *ifa)
124 {
125 	spin_lock(&inet_addr_hash_lock);
126 	hlist_del_init_rcu(&ifa->hash);
127 	spin_unlock(&inet_addr_hash_lock);
128 }
129 
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140 	unsigned int hash = inet_addr_hash(net, addr);
141 	struct net_device *result = NULL;
142 	struct in_ifaddr *ifa;
143 	struct hlist_node *node;
144 
145 	rcu_read_lock();
146 	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
147 		struct net_device *dev = ifa->ifa_dev->dev;
148 
149 		if (!net_eq(dev_net(dev), net))
150 			continue;
151 		if (ifa->ifa_local == addr) {
152 			result = dev;
153 			break;
154 		}
155 	}
156 	if (!result) {
157 		struct flowi4 fl4 = { .daddr = addr };
158 		struct fib_result res = { 0 };
159 		struct fib_table *local;
160 
161 		/* Fallback to FIB local table so that communication
162 		 * over loopback subnets work.
163 		 */
164 		local = fib_get_table(net, RT_TABLE_LOCAL);
165 		if (local &&
166 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 		    res.type == RTN_LOCAL)
168 			result = FIB_RES_DEV(res);
169 	}
170 	if (result && devref)
171 		dev_hold(result);
172 	rcu_read_unlock();
173 	return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176 
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178 
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181 			 int destroy);
182 #ifdef CONFIG_SYSCTL
183 static void devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static inline void devinet_sysctl_register(struct in_device *idev)
187 {
188 }
189 static inline void devinet_sysctl_unregister(struct in_device *idev)
190 {
191 }
192 #endif
193 
194 /* Locks all the inet devices. */
195 
196 static struct in_ifaddr *inet_alloc_ifa(void)
197 {
198 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199 }
200 
201 static void inet_rcu_free_ifa(struct rcu_head *head)
202 {
203 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204 	if (ifa->ifa_dev)
205 		in_dev_put(ifa->ifa_dev);
206 	kfree(ifa);
207 }
208 
209 static inline void inet_free_ifa(struct in_ifaddr *ifa)
210 {
211 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212 }
213 
214 void in_dev_finish_destroy(struct in_device *idev)
215 {
216 	struct net_device *dev = idev->dev;
217 
218 	WARN_ON(idev->ifa_list);
219 	WARN_ON(idev->mc_list);
220 #ifdef NET_REFCNT_DEBUG
221 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
222 	       idev, dev ? dev->name : "NIL");
223 #endif
224 	dev_put(dev);
225 	if (!idev->dead)
226 		pr_err("Freeing alive in_device %p\n", idev);
227 	else
228 		kfree(idev);
229 }
230 EXPORT_SYMBOL(in_dev_finish_destroy);
231 
232 static struct in_device *inetdev_init(struct net_device *dev)
233 {
234 	struct in_device *in_dev;
235 
236 	ASSERT_RTNL();
237 
238 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
239 	if (!in_dev)
240 		goto out;
241 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
242 			sizeof(in_dev->cnf));
243 	in_dev->cnf.sysctl = NULL;
244 	in_dev->dev = dev;
245 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
246 	if (!in_dev->arp_parms)
247 		goto out_kfree;
248 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
249 		dev_disable_lro(dev);
250 	/* Reference in_dev->dev */
251 	dev_hold(dev);
252 	/* Account for reference dev->ip_ptr (below) */
253 	in_dev_hold(in_dev);
254 
255 	devinet_sysctl_register(in_dev);
256 	ip_mc_init_dev(in_dev);
257 	if (dev->flags & IFF_UP)
258 		ip_mc_up(in_dev);
259 
260 	/* we can receive as soon as ip_ptr is set -- do this last */
261 	rcu_assign_pointer(dev->ip_ptr, in_dev);
262 out:
263 	return in_dev;
264 out_kfree:
265 	kfree(in_dev);
266 	in_dev = NULL;
267 	goto out;
268 }
269 
270 static void in_dev_rcu_put(struct rcu_head *head)
271 {
272 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
273 	in_dev_put(idev);
274 }
275 
276 static void inetdev_destroy(struct in_device *in_dev)
277 {
278 	struct in_ifaddr *ifa;
279 	struct net_device *dev;
280 
281 	ASSERT_RTNL();
282 
283 	dev = in_dev->dev;
284 
285 	in_dev->dead = 1;
286 
287 	ip_mc_destroy_dev(in_dev);
288 
289 	while ((ifa = in_dev->ifa_list) != NULL) {
290 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
291 		inet_free_ifa(ifa);
292 	}
293 
294 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
295 
296 	devinet_sysctl_unregister(in_dev);
297 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
298 	arp_ifdown(dev);
299 
300 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
301 }
302 
303 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
304 {
305 	rcu_read_lock();
306 	for_primary_ifa(in_dev) {
307 		if (inet_ifa_match(a, ifa)) {
308 			if (!b || inet_ifa_match(b, ifa)) {
309 				rcu_read_unlock();
310 				return 1;
311 			}
312 		}
313 	} endfor_ifa(in_dev);
314 	rcu_read_unlock();
315 	return 0;
316 }
317 
318 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
319 			 int destroy, struct nlmsghdr *nlh, u32 pid)
320 {
321 	struct in_ifaddr *promote = NULL;
322 	struct in_ifaddr *ifa, *ifa1 = *ifap;
323 	struct in_ifaddr *last_prim = in_dev->ifa_list;
324 	struct in_ifaddr *prev_prom = NULL;
325 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
326 
327 	ASSERT_RTNL();
328 
329 	/* 1. Deleting primary ifaddr forces deletion all secondaries
330 	 * unless alias promotion is set
331 	 **/
332 
333 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
334 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
335 
336 		while ((ifa = *ifap1) != NULL) {
337 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
338 			    ifa1->ifa_scope <= ifa->ifa_scope)
339 				last_prim = ifa;
340 
341 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
342 			    ifa1->ifa_mask != ifa->ifa_mask ||
343 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
344 				ifap1 = &ifa->ifa_next;
345 				prev_prom = ifa;
346 				continue;
347 			}
348 
349 			if (!do_promote) {
350 				inet_hash_remove(ifa);
351 				*ifap1 = ifa->ifa_next;
352 
353 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
354 				blocking_notifier_call_chain(&inetaddr_chain,
355 						NETDEV_DOWN, ifa);
356 				inet_free_ifa(ifa);
357 			} else {
358 				promote = ifa;
359 				break;
360 			}
361 		}
362 	}
363 
364 	/* On promotion all secondaries from subnet are changing
365 	 * the primary IP, we must remove all their routes silently
366 	 * and later to add them back with new prefsrc. Do this
367 	 * while all addresses are on the device list.
368 	 */
369 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
370 		if (ifa1->ifa_mask == ifa->ifa_mask &&
371 		    inet_ifa_match(ifa1->ifa_address, ifa))
372 			fib_del_ifaddr(ifa, ifa1);
373 	}
374 
375 	/* 2. Unlink it */
376 
377 	*ifap = ifa1->ifa_next;
378 	inet_hash_remove(ifa1);
379 
380 	/* 3. Announce address deletion */
381 
382 	/* Send message first, then call notifier.
383 	   At first sight, FIB update triggered by notifier
384 	   will refer to already deleted ifaddr, that could confuse
385 	   netlink listeners. It is not true: look, gated sees
386 	   that route deleted and if it still thinks that ifaddr
387 	   is valid, it will try to restore deleted routes... Grr.
388 	   So that, this order is correct.
389 	 */
390 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
391 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
392 
393 	if (promote) {
394 		struct in_ifaddr *next_sec = promote->ifa_next;
395 
396 		if (prev_prom) {
397 			prev_prom->ifa_next = promote->ifa_next;
398 			promote->ifa_next = last_prim->ifa_next;
399 			last_prim->ifa_next = promote;
400 		}
401 
402 		promote->ifa_flags &= ~IFA_F_SECONDARY;
403 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
404 		blocking_notifier_call_chain(&inetaddr_chain,
405 				NETDEV_UP, promote);
406 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
407 			if (ifa1->ifa_mask != ifa->ifa_mask ||
408 			    !inet_ifa_match(ifa1->ifa_address, ifa))
409 					continue;
410 			fib_add_ifaddr(ifa);
411 		}
412 
413 	}
414 	if (destroy)
415 		inet_free_ifa(ifa1);
416 }
417 
418 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
419 			 int destroy)
420 {
421 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
422 }
423 
424 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
425 			     u32 pid)
426 {
427 	struct in_device *in_dev = ifa->ifa_dev;
428 	struct in_ifaddr *ifa1, **ifap, **last_primary;
429 
430 	ASSERT_RTNL();
431 
432 	if (!ifa->ifa_local) {
433 		inet_free_ifa(ifa);
434 		return 0;
435 	}
436 
437 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
438 	last_primary = &in_dev->ifa_list;
439 
440 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
441 	     ifap = &ifa1->ifa_next) {
442 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
443 		    ifa->ifa_scope <= ifa1->ifa_scope)
444 			last_primary = &ifa1->ifa_next;
445 		if (ifa1->ifa_mask == ifa->ifa_mask &&
446 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
447 			if (ifa1->ifa_local == ifa->ifa_local) {
448 				inet_free_ifa(ifa);
449 				return -EEXIST;
450 			}
451 			if (ifa1->ifa_scope != ifa->ifa_scope) {
452 				inet_free_ifa(ifa);
453 				return -EINVAL;
454 			}
455 			ifa->ifa_flags |= IFA_F_SECONDARY;
456 		}
457 	}
458 
459 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
460 		net_srandom(ifa->ifa_local);
461 		ifap = last_primary;
462 	}
463 
464 	ifa->ifa_next = *ifap;
465 	*ifap = ifa;
466 
467 	inet_hash_insert(dev_net(in_dev->dev), ifa);
468 
469 	/* Send message first, then call notifier.
470 	   Notifier will trigger FIB update, so that
471 	   listeners of netlink will know about new ifaddr */
472 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
473 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
474 
475 	return 0;
476 }
477 
478 static int inet_insert_ifa(struct in_ifaddr *ifa)
479 {
480 	return __inet_insert_ifa(ifa, NULL, 0);
481 }
482 
483 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
484 {
485 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
486 
487 	ASSERT_RTNL();
488 
489 	if (!in_dev) {
490 		inet_free_ifa(ifa);
491 		return -ENOBUFS;
492 	}
493 	ipv4_devconf_setall(in_dev);
494 	if (ifa->ifa_dev != in_dev) {
495 		WARN_ON(ifa->ifa_dev);
496 		in_dev_hold(in_dev);
497 		ifa->ifa_dev = in_dev;
498 	}
499 	if (ipv4_is_loopback(ifa->ifa_local))
500 		ifa->ifa_scope = RT_SCOPE_HOST;
501 	return inet_insert_ifa(ifa);
502 }
503 
504 /* Caller must hold RCU or RTNL :
505  * We dont take a reference on found in_device
506  */
507 struct in_device *inetdev_by_index(struct net *net, int ifindex)
508 {
509 	struct net_device *dev;
510 	struct in_device *in_dev = NULL;
511 
512 	rcu_read_lock();
513 	dev = dev_get_by_index_rcu(net, ifindex);
514 	if (dev)
515 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
516 	rcu_read_unlock();
517 	return in_dev;
518 }
519 EXPORT_SYMBOL(inetdev_by_index);
520 
521 /* Called only from RTNL semaphored context. No locks. */
522 
523 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
524 				    __be32 mask)
525 {
526 	ASSERT_RTNL();
527 
528 	for_primary_ifa(in_dev) {
529 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
530 			return ifa;
531 	} endfor_ifa(in_dev);
532 	return NULL;
533 }
534 
535 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
536 {
537 	struct net *net = sock_net(skb->sk);
538 	struct nlattr *tb[IFA_MAX+1];
539 	struct in_device *in_dev;
540 	struct ifaddrmsg *ifm;
541 	struct in_ifaddr *ifa, **ifap;
542 	int err = -EINVAL;
543 
544 	ASSERT_RTNL();
545 
546 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
547 	if (err < 0)
548 		goto errout;
549 
550 	ifm = nlmsg_data(nlh);
551 	in_dev = inetdev_by_index(net, ifm->ifa_index);
552 	if (in_dev == NULL) {
553 		err = -ENODEV;
554 		goto errout;
555 	}
556 
557 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
558 	     ifap = &ifa->ifa_next) {
559 		if (tb[IFA_LOCAL] &&
560 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
561 			continue;
562 
563 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
564 			continue;
565 
566 		if (tb[IFA_ADDRESS] &&
567 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
568 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
569 			continue;
570 
571 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
572 		return 0;
573 	}
574 
575 	err = -EADDRNOTAVAIL;
576 errout:
577 	return err;
578 }
579 
580 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
581 {
582 	struct nlattr *tb[IFA_MAX+1];
583 	struct in_ifaddr *ifa;
584 	struct ifaddrmsg *ifm;
585 	struct net_device *dev;
586 	struct in_device *in_dev;
587 	int err;
588 
589 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
590 	if (err < 0)
591 		goto errout;
592 
593 	ifm = nlmsg_data(nlh);
594 	err = -EINVAL;
595 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
596 		goto errout;
597 
598 	dev = __dev_get_by_index(net, ifm->ifa_index);
599 	err = -ENODEV;
600 	if (dev == NULL)
601 		goto errout;
602 
603 	in_dev = __in_dev_get_rtnl(dev);
604 	err = -ENOBUFS;
605 	if (in_dev == NULL)
606 		goto errout;
607 
608 	ifa = inet_alloc_ifa();
609 	if (ifa == NULL)
610 		/*
611 		 * A potential indev allocation can be left alive, it stays
612 		 * assigned to its device and is destroy with it.
613 		 */
614 		goto errout;
615 
616 	ipv4_devconf_setall(in_dev);
617 	in_dev_hold(in_dev);
618 
619 	if (tb[IFA_ADDRESS] == NULL)
620 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
621 
622 	INIT_HLIST_NODE(&ifa->hash);
623 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
624 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
625 	ifa->ifa_flags = ifm->ifa_flags;
626 	ifa->ifa_scope = ifm->ifa_scope;
627 	ifa->ifa_dev = in_dev;
628 
629 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
630 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
631 
632 	if (tb[IFA_BROADCAST])
633 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
634 
635 	if (tb[IFA_LABEL])
636 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
637 	else
638 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
639 
640 	return ifa;
641 
642 errout:
643 	return ERR_PTR(err);
644 }
645 
646 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
647 {
648 	struct net *net = sock_net(skb->sk);
649 	struct in_ifaddr *ifa;
650 
651 	ASSERT_RTNL();
652 
653 	ifa = rtm_to_ifaddr(net, nlh);
654 	if (IS_ERR(ifa))
655 		return PTR_ERR(ifa);
656 
657 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
658 }
659 
660 /*
661  *	Determine a default network mask, based on the IP address.
662  */
663 
664 static inline int inet_abc_len(__be32 addr)
665 {
666 	int rc = -1;	/* Something else, probably a multicast. */
667 
668 	if (ipv4_is_zeronet(addr))
669 		rc = 0;
670 	else {
671 		__u32 haddr = ntohl(addr);
672 
673 		if (IN_CLASSA(haddr))
674 			rc = 8;
675 		else if (IN_CLASSB(haddr))
676 			rc = 16;
677 		else if (IN_CLASSC(haddr))
678 			rc = 24;
679 	}
680 
681 	return rc;
682 }
683 
684 
685 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
686 {
687 	struct ifreq ifr;
688 	struct sockaddr_in sin_orig;
689 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
690 	struct in_device *in_dev;
691 	struct in_ifaddr **ifap = NULL;
692 	struct in_ifaddr *ifa = NULL;
693 	struct net_device *dev;
694 	char *colon;
695 	int ret = -EFAULT;
696 	int tryaddrmatch = 0;
697 
698 	/*
699 	 *	Fetch the caller's info block into kernel space
700 	 */
701 
702 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
703 		goto out;
704 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
705 
706 	/* save original address for comparison */
707 	memcpy(&sin_orig, sin, sizeof(*sin));
708 
709 	colon = strchr(ifr.ifr_name, ':');
710 	if (colon)
711 		*colon = 0;
712 
713 	dev_load(net, ifr.ifr_name);
714 
715 	switch (cmd) {
716 	case SIOCGIFADDR:	/* Get interface address */
717 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
718 	case SIOCGIFDSTADDR:	/* Get the destination address */
719 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
720 		/* Note that these ioctls will not sleep,
721 		   so that we do not impose a lock.
722 		   One day we will be forced to put shlock here (I mean SMP)
723 		 */
724 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
725 		memset(sin, 0, sizeof(*sin));
726 		sin->sin_family = AF_INET;
727 		break;
728 
729 	case SIOCSIFFLAGS:
730 		ret = -EACCES;
731 		if (!capable(CAP_NET_ADMIN))
732 			goto out;
733 		break;
734 	case SIOCSIFADDR:	/* Set interface address (and family) */
735 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
736 	case SIOCSIFDSTADDR:	/* Set the destination address */
737 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
738 		ret = -EACCES;
739 		if (!capable(CAP_NET_ADMIN))
740 			goto out;
741 		ret = -EINVAL;
742 		if (sin->sin_family != AF_INET)
743 			goto out;
744 		break;
745 	default:
746 		ret = -EINVAL;
747 		goto out;
748 	}
749 
750 	rtnl_lock();
751 
752 	ret = -ENODEV;
753 	dev = __dev_get_by_name(net, ifr.ifr_name);
754 	if (!dev)
755 		goto done;
756 
757 	if (colon)
758 		*colon = ':';
759 
760 	in_dev = __in_dev_get_rtnl(dev);
761 	if (in_dev) {
762 		if (tryaddrmatch) {
763 			/* Matthias Andree */
764 			/* compare label and address (4.4BSD style) */
765 			/* note: we only do this for a limited set of ioctls
766 			   and only if the original address family was AF_INET.
767 			   This is checked above. */
768 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
769 			     ifap = &ifa->ifa_next) {
770 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
771 				    sin_orig.sin_addr.s_addr ==
772 							ifa->ifa_local) {
773 					break; /* found */
774 				}
775 			}
776 		}
777 		/* we didn't get a match, maybe the application is
778 		   4.3BSD-style and passed in junk so we fall back to
779 		   comparing just the label */
780 		if (!ifa) {
781 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
782 			     ifap = &ifa->ifa_next)
783 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
784 					break;
785 		}
786 	}
787 
788 	ret = -EADDRNOTAVAIL;
789 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
790 		goto done;
791 
792 	switch (cmd) {
793 	case SIOCGIFADDR:	/* Get interface address */
794 		sin->sin_addr.s_addr = ifa->ifa_local;
795 		goto rarok;
796 
797 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
798 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
799 		goto rarok;
800 
801 	case SIOCGIFDSTADDR:	/* Get the destination address */
802 		sin->sin_addr.s_addr = ifa->ifa_address;
803 		goto rarok;
804 
805 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
806 		sin->sin_addr.s_addr = ifa->ifa_mask;
807 		goto rarok;
808 
809 	case SIOCSIFFLAGS:
810 		if (colon) {
811 			ret = -EADDRNOTAVAIL;
812 			if (!ifa)
813 				break;
814 			ret = 0;
815 			if (!(ifr.ifr_flags & IFF_UP))
816 				inet_del_ifa(in_dev, ifap, 1);
817 			break;
818 		}
819 		ret = dev_change_flags(dev, ifr.ifr_flags);
820 		break;
821 
822 	case SIOCSIFADDR:	/* Set interface address (and family) */
823 		ret = -EINVAL;
824 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
825 			break;
826 
827 		if (!ifa) {
828 			ret = -ENOBUFS;
829 			ifa = inet_alloc_ifa();
830 			INIT_HLIST_NODE(&ifa->hash);
831 			if (!ifa)
832 				break;
833 			if (colon)
834 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
835 			else
836 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
837 		} else {
838 			ret = 0;
839 			if (ifa->ifa_local == sin->sin_addr.s_addr)
840 				break;
841 			inet_del_ifa(in_dev, ifap, 0);
842 			ifa->ifa_broadcast = 0;
843 			ifa->ifa_scope = 0;
844 		}
845 
846 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
847 
848 		if (!(dev->flags & IFF_POINTOPOINT)) {
849 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
850 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
851 			if ((dev->flags & IFF_BROADCAST) &&
852 			    ifa->ifa_prefixlen < 31)
853 				ifa->ifa_broadcast = ifa->ifa_address |
854 						     ~ifa->ifa_mask;
855 		} else {
856 			ifa->ifa_prefixlen = 32;
857 			ifa->ifa_mask = inet_make_mask(32);
858 		}
859 		ret = inet_set_ifa(dev, ifa);
860 		break;
861 
862 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
863 		ret = 0;
864 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
865 			inet_del_ifa(in_dev, ifap, 0);
866 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
867 			inet_insert_ifa(ifa);
868 		}
869 		break;
870 
871 	case SIOCSIFDSTADDR:	/* Set the destination address */
872 		ret = 0;
873 		if (ifa->ifa_address == sin->sin_addr.s_addr)
874 			break;
875 		ret = -EINVAL;
876 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
877 			break;
878 		ret = 0;
879 		inet_del_ifa(in_dev, ifap, 0);
880 		ifa->ifa_address = sin->sin_addr.s_addr;
881 		inet_insert_ifa(ifa);
882 		break;
883 
884 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
885 
886 		/*
887 		 *	The mask we set must be legal.
888 		 */
889 		ret = -EINVAL;
890 		if (bad_mask(sin->sin_addr.s_addr, 0))
891 			break;
892 		ret = 0;
893 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
894 			__be32 old_mask = ifa->ifa_mask;
895 			inet_del_ifa(in_dev, ifap, 0);
896 			ifa->ifa_mask = sin->sin_addr.s_addr;
897 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
898 
899 			/* See if current broadcast address matches
900 			 * with current netmask, then recalculate
901 			 * the broadcast address. Otherwise it's a
902 			 * funny address, so don't touch it since
903 			 * the user seems to know what (s)he's doing...
904 			 */
905 			if ((dev->flags & IFF_BROADCAST) &&
906 			    (ifa->ifa_prefixlen < 31) &&
907 			    (ifa->ifa_broadcast ==
908 			     (ifa->ifa_local|~old_mask))) {
909 				ifa->ifa_broadcast = (ifa->ifa_local |
910 						      ~sin->sin_addr.s_addr);
911 			}
912 			inet_insert_ifa(ifa);
913 		}
914 		break;
915 	}
916 done:
917 	rtnl_unlock();
918 out:
919 	return ret;
920 rarok:
921 	rtnl_unlock();
922 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
923 	goto out;
924 }
925 
926 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
927 {
928 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
929 	struct in_ifaddr *ifa;
930 	struct ifreq ifr;
931 	int done = 0;
932 
933 	if (!in_dev)
934 		goto out;
935 
936 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
937 		if (!buf) {
938 			done += sizeof(ifr);
939 			continue;
940 		}
941 		if (len < (int) sizeof(ifr))
942 			break;
943 		memset(&ifr, 0, sizeof(struct ifreq));
944 		if (ifa->ifa_label)
945 			strcpy(ifr.ifr_name, ifa->ifa_label);
946 		else
947 			strcpy(ifr.ifr_name, dev->name);
948 
949 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
950 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
951 								ifa->ifa_local;
952 
953 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
954 			done = -EFAULT;
955 			break;
956 		}
957 		buf  += sizeof(struct ifreq);
958 		len  -= sizeof(struct ifreq);
959 		done += sizeof(struct ifreq);
960 	}
961 out:
962 	return done;
963 }
964 
965 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
966 {
967 	__be32 addr = 0;
968 	struct in_device *in_dev;
969 	struct net *net = dev_net(dev);
970 
971 	rcu_read_lock();
972 	in_dev = __in_dev_get_rcu(dev);
973 	if (!in_dev)
974 		goto no_in_dev;
975 
976 	for_primary_ifa(in_dev) {
977 		if (ifa->ifa_scope > scope)
978 			continue;
979 		if (!dst || inet_ifa_match(dst, ifa)) {
980 			addr = ifa->ifa_local;
981 			break;
982 		}
983 		if (!addr)
984 			addr = ifa->ifa_local;
985 	} endfor_ifa(in_dev);
986 
987 	if (addr)
988 		goto out_unlock;
989 no_in_dev:
990 
991 	/* Not loopback addresses on loopback should be preferred
992 	   in this case. It is importnat that lo is the first interface
993 	   in dev_base list.
994 	 */
995 	for_each_netdev_rcu(net, dev) {
996 		in_dev = __in_dev_get_rcu(dev);
997 		if (!in_dev)
998 			continue;
999 
1000 		for_primary_ifa(in_dev) {
1001 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1002 			    ifa->ifa_scope <= scope) {
1003 				addr = ifa->ifa_local;
1004 				goto out_unlock;
1005 			}
1006 		} endfor_ifa(in_dev);
1007 	}
1008 out_unlock:
1009 	rcu_read_unlock();
1010 	return addr;
1011 }
1012 EXPORT_SYMBOL(inet_select_addr);
1013 
1014 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1015 			      __be32 local, int scope)
1016 {
1017 	int same = 0;
1018 	__be32 addr = 0;
1019 
1020 	for_ifa(in_dev) {
1021 		if (!addr &&
1022 		    (local == ifa->ifa_local || !local) &&
1023 		    ifa->ifa_scope <= scope) {
1024 			addr = ifa->ifa_local;
1025 			if (same)
1026 				break;
1027 		}
1028 		if (!same) {
1029 			same = (!local || inet_ifa_match(local, ifa)) &&
1030 				(!dst || inet_ifa_match(dst, ifa));
1031 			if (same && addr) {
1032 				if (local || !dst)
1033 					break;
1034 				/* Is the selected addr into dst subnet? */
1035 				if (inet_ifa_match(addr, ifa))
1036 					break;
1037 				/* No, then can we use new local src? */
1038 				if (ifa->ifa_scope <= scope) {
1039 					addr = ifa->ifa_local;
1040 					break;
1041 				}
1042 				/* search for large dst subnet for addr */
1043 				same = 0;
1044 			}
1045 		}
1046 	} endfor_ifa(in_dev);
1047 
1048 	return same ? addr : 0;
1049 }
1050 
1051 /*
1052  * Confirm that local IP address exists using wildcards:
1053  * - in_dev: only on this interface, 0=any interface
1054  * - dst: only in the same subnet as dst, 0=any dst
1055  * - local: address, 0=autoselect the local address
1056  * - scope: maximum allowed scope value for the local address
1057  */
1058 __be32 inet_confirm_addr(struct in_device *in_dev,
1059 			 __be32 dst, __be32 local, int scope)
1060 {
1061 	__be32 addr = 0;
1062 	struct net_device *dev;
1063 	struct net *net;
1064 
1065 	if (scope != RT_SCOPE_LINK)
1066 		return confirm_addr_indev(in_dev, dst, local, scope);
1067 
1068 	net = dev_net(in_dev->dev);
1069 	rcu_read_lock();
1070 	for_each_netdev_rcu(net, dev) {
1071 		in_dev = __in_dev_get_rcu(dev);
1072 		if (in_dev) {
1073 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1074 			if (addr)
1075 				break;
1076 		}
1077 	}
1078 	rcu_read_unlock();
1079 
1080 	return addr;
1081 }
1082 EXPORT_SYMBOL(inet_confirm_addr);
1083 
1084 /*
1085  *	Device notifier
1086  */
1087 
1088 int register_inetaddr_notifier(struct notifier_block *nb)
1089 {
1090 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1091 }
1092 EXPORT_SYMBOL(register_inetaddr_notifier);
1093 
1094 int unregister_inetaddr_notifier(struct notifier_block *nb)
1095 {
1096 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1097 }
1098 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1099 
1100 /* Rename ifa_labels for a device name change. Make some effort to preserve
1101  * existing alias numbering and to create unique labels if possible.
1102 */
1103 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1104 {
1105 	struct in_ifaddr *ifa;
1106 	int named = 0;
1107 
1108 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1109 		char old[IFNAMSIZ], *dot;
1110 
1111 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1112 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1113 		if (named++ == 0)
1114 			goto skip;
1115 		dot = strchr(old, ':');
1116 		if (dot == NULL) {
1117 			sprintf(old, ":%d", named);
1118 			dot = old;
1119 		}
1120 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1121 			strcat(ifa->ifa_label, dot);
1122 		else
1123 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1124 skip:
1125 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1126 	}
1127 }
1128 
1129 static inline bool inetdev_valid_mtu(unsigned mtu)
1130 {
1131 	return mtu >= 68;
1132 }
1133 
1134 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1135 					struct in_device *in_dev)
1136 
1137 {
1138 	struct in_ifaddr *ifa;
1139 
1140 	for (ifa = in_dev->ifa_list; ifa;
1141 	     ifa = ifa->ifa_next) {
1142 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1143 			 ifa->ifa_local, dev,
1144 			 ifa->ifa_local, NULL,
1145 			 dev->dev_addr, NULL);
1146 	}
1147 }
1148 
1149 /* Called only under RTNL semaphore */
1150 
1151 static int inetdev_event(struct notifier_block *this, unsigned long event,
1152 			 void *ptr)
1153 {
1154 	struct net_device *dev = ptr;
1155 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1156 
1157 	ASSERT_RTNL();
1158 
1159 	if (!in_dev) {
1160 		if (event == NETDEV_REGISTER) {
1161 			in_dev = inetdev_init(dev);
1162 			if (!in_dev)
1163 				return notifier_from_errno(-ENOMEM);
1164 			if (dev->flags & IFF_LOOPBACK) {
1165 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1166 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1167 			}
1168 		} else if (event == NETDEV_CHANGEMTU) {
1169 			/* Re-enabling IP */
1170 			if (inetdev_valid_mtu(dev->mtu))
1171 				in_dev = inetdev_init(dev);
1172 		}
1173 		goto out;
1174 	}
1175 
1176 	switch (event) {
1177 	case NETDEV_REGISTER:
1178 		printk(KERN_DEBUG "inetdev_event: bug\n");
1179 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1180 		break;
1181 	case NETDEV_UP:
1182 		if (!inetdev_valid_mtu(dev->mtu))
1183 			break;
1184 		if (dev->flags & IFF_LOOPBACK) {
1185 			struct in_ifaddr *ifa = inet_alloc_ifa();
1186 
1187 			if (ifa) {
1188 				INIT_HLIST_NODE(&ifa->hash);
1189 				ifa->ifa_local =
1190 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1191 				ifa->ifa_prefixlen = 8;
1192 				ifa->ifa_mask = inet_make_mask(8);
1193 				in_dev_hold(in_dev);
1194 				ifa->ifa_dev = in_dev;
1195 				ifa->ifa_scope = RT_SCOPE_HOST;
1196 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1197 				inet_insert_ifa(ifa);
1198 			}
1199 		}
1200 		ip_mc_up(in_dev);
1201 		/* fall through */
1202 	case NETDEV_CHANGEADDR:
1203 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1204 			break;
1205 		/* fall through */
1206 	case NETDEV_NOTIFY_PEERS:
1207 		/* Send gratuitous ARP to notify of link change */
1208 		inetdev_send_gratuitous_arp(dev, in_dev);
1209 		break;
1210 	case NETDEV_DOWN:
1211 		ip_mc_down(in_dev);
1212 		break;
1213 	case NETDEV_PRE_TYPE_CHANGE:
1214 		ip_mc_unmap(in_dev);
1215 		break;
1216 	case NETDEV_POST_TYPE_CHANGE:
1217 		ip_mc_remap(in_dev);
1218 		break;
1219 	case NETDEV_CHANGEMTU:
1220 		if (inetdev_valid_mtu(dev->mtu))
1221 			break;
1222 		/* disable IP when MTU is not enough */
1223 	case NETDEV_UNREGISTER:
1224 		inetdev_destroy(in_dev);
1225 		break;
1226 	case NETDEV_CHANGENAME:
1227 		/* Do not notify about label change, this event is
1228 		 * not interesting to applications using netlink.
1229 		 */
1230 		inetdev_changename(dev, in_dev);
1231 
1232 		devinet_sysctl_unregister(in_dev);
1233 		devinet_sysctl_register(in_dev);
1234 		break;
1235 	}
1236 out:
1237 	return NOTIFY_DONE;
1238 }
1239 
1240 static struct notifier_block ip_netdev_notifier = {
1241 	.notifier_call = inetdev_event,
1242 };
1243 
1244 static inline size_t inet_nlmsg_size(void)
1245 {
1246 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1247 	       + nla_total_size(4) /* IFA_ADDRESS */
1248 	       + nla_total_size(4) /* IFA_LOCAL */
1249 	       + nla_total_size(4) /* IFA_BROADCAST */
1250 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1251 }
1252 
1253 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1254 			    u32 pid, u32 seq, int event, unsigned int flags)
1255 {
1256 	struct ifaddrmsg *ifm;
1257 	struct nlmsghdr  *nlh;
1258 
1259 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1260 	if (nlh == NULL)
1261 		return -EMSGSIZE;
1262 
1263 	ifm = nlmsg_data(nlh);
1264 	ifm->ifa_family = AF_INET;
1265 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1266 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1267 	ifm->ifa_scope = ifa->ifa_scope;
1268 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1269 
1270 	if (ifa->ifa_address)
1271 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1272 
1273 	if (ifa->ifa_local)
1274 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1275 
1276 	if (ifa->ifa_broadcast)
1277 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1278 
1279 	if (ifa->ifa_label[0])
1280 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1281 
1282 	return nlmsg_end(skb, nlh);
1283 
1284 nla_put_failure:
1285 	nlmsg_cancel(skb, nlh);
1286 	return -EMSGSIZE;
1287 }
1288 
1289 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1290 {
1291 	struct net *net = sock_net(skb->sk);
1292 	int h, s_h;
1293 	int idx, s_idx;
1294 	int ip_idx, s_ip_idx;
1295 	struct net_device *dev;
1296 	struct in_device *in_dev;
1297 	struct in_ifaddr *ifa;
1298 	struct hlist_head *head;
1299 	struct hlist_node *node;
1300 
1301 	s_h = cb->args[0];
1302 	s_idx = idx = cb->args[1];
1303 	s_ip_idx = ip_idx = cb->args[2];
1304 
1305 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1306 		idx = 0;
1307 		head = &net->dev_index_head[h];
1308 		rcu_read_lock();
1309 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1310 			if (idx < s_idx)
1311 				goto cont;
1312 			if (h > s_h || idx > s_idx)
1313 				s_ip_idx = 0;
1314 			in_dev = __in_dev_get_rcu(dev);
1315 			if (!in_dev)
1316 				goto cont;
1317 
1318 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1319 			     ifa = ifa->ifa_next, ip_idx++) {
1320 				if (ip_idx < s_ip_idx)
1321 					continue;
1322 				if (inet_fill_ifaddr(skb, ifa,
1323 					     NETLINK_CB(cb->skb).pid,
1324 					     cb->nlh->nlmsg_seq,
1325 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1326 					rcu_read_unlock();
1327 					goto done;
1328 				}
1329 			}
1330 cont:
1331 			idx++;
1332 		}
1333 		rcu_read_unlock();
1334 	}
1335 
1336 done:
1337 	cb->args[0] = h;
1338 	cb->args[1] = idx;
1339 	cb->args[2] = ip_idx;
1340 
1341 	return skb->len;
1342 }
1343 
1344 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1345 		      u32 pid)
1346 {
1347 	struct sk_buff *skb;
1348 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1349 	int err = -ENOBUFS;
1350 	struct net *net;
1351 
1352 	net = dev_net(ifa->ifa_dev->dev);
1353 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1354 	if (skb == NULL)
1355 		goto errout;
1356 
1357 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1358 	if (err < 0) {
1359 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1360 		WARN_ON(err == -EMSGSIZE);
1361 		kfree_skb(skb);
1362 		goto errout;
1363 	}
1364 	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1365 	return;
1366 errout:
1367 	if (err < 0)
1368 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1369 }
1370 
1371 static size_t inet_get_link_af_size(const struct net_device *dev)
1372 {
1373 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1374 
1375 	if (!in_dev)
1376 		return 0;
1377 
1378 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1379 }
1380 
1381 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1382 {
1383 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1384 	struct nlattr *nla;
1385 	int i;
1386 
1387 	if (!in_dev)
1388 		return -ENODATA;
1389 
1390 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1391 	if (nla == NULL)
1392 		return -EMSGSIZE;
1393 
1394 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1395 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1396 
1397 	return 0;
1398 }
1399 
1400 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1401 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1402 };
1403 
1404 static int inet_validate_link_af(const struct net_device *dev,
1405 				 const struct nlattr *nla)
1406 {
1407 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1408 	int err, rem;
1409 
1410 	if (dev && !__in_dev_get_rtnl(dev))
1411 		return -EAFNOSUPPORT;
1412 
1413 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1414 	if (err < 0)
1415 		return err;
1416 
1417 	if (tb[IFLA_INET_CONF]) {
1418 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1419 			int cfgid = nla_type(a);
1420 
1421 			if (nla_len(a) < 4)
1422 				return -EINVAL;
1423 
1424 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1425 				return -EINVAL;
1426 		}
1427 	}
1428 
1429 	return 0;
1430 }
1431 
1432 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1433 {
1434 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1435 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1436 	int rem;
1437 
1438 	if (!in_dev)
1439 		return -EAFNOSUPPORT;
1440 
1441 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1442 		BUG();
1443 
1444 	if (tb[IFLA_INET_CONF]) {
1445 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1446 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1447 	}
1448 
1449 	return 0;
1450 }
1451 
1452 #ifdef CONFIG_SYSCTL
1453 
1454 static void devinet_copy_dflt_conf(struct net *net, int i)
1455 {
1456 	struct net_device *dev;
1457 
1458 	rcu_read_lock();
1459 	for_each_netdev_rcu(net, dev) {
1460 		struct in_device *in_dev;
1461 
1462 		in_dev = __in_dev_get_rcu(dev);
1463 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1464 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1465 	}
1466 	rcu_read_unlock();
1467 }
1468 
1469 /* called with RTNL locked */
1470 static void inet_forward_change(struct net *net)
1471 {
1472 	struct net_device *dev;
1473 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1474 
1475 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1476 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1477 
1478 	for_each_netdev(net, dev) {
1479 		struct in_device *in_dev;
1480 		if (on)
1481 			dev_disable_lro(dev);
1482 		rcu_read_lock();
1483 		in_dev = __in_dev_get_rcu(dev);
1484 		if (in_dev)
1485 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1486 		rcu_read_unlock();
1487 	}
1488 }
1489 
1490 static int devinet_conf_proc(ctl_table *ctl, int write,
1491 			     void __user *buffer,
1492 			     size_t *lenp, loff_t *ppos)
1493 {
1494 	int old_value = *(int *)ctl->data;
1495 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1496 	int new_value = *(int *)ctl->data;
1497 
1498 	if (write) {
1499 		struct ipv4_devconf *cnf = ctl->extra1;
1500 		struct net *net = ctl->extra2;
1501 		int i = (int *)ctl->data - cnf->data;
1502 
1503 		set_bit(i, cnf->state);
1504 
1505 		if (cnf == net->ipv4.devconf_dflt)
1506 			devinet_copy_dflt_conf(net, i);
1507 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
1508 			if ((new_value == 0) && (old_value != 0))
1509 				rt_cache_flush(net, 0);
1510 	}
1511 
1512 	return ret;
1513 }
1514 
1515 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1516 				  void __user *buffer,
1517 				  size_t *lenp, loff_t *ppos)
1518 {
1519 	int *valp = ctl->data;
1520 	int val = *valp;
1521 	loff_t pos = *ppos;
1522 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1523 
1524 	if (write && *valp != val) {
1525 		struct net *net = ctl->extra2;
1526 
1527 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1528 			if (!rtnl_trylock()) {
1529 				/* Restore the original values before restarting */
1530 				*valp = val;
1531 				*ppos = pos;
1532 				return restart_syscall();
1533 			}
1534 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1535 				inet_forward_change(net);
1536 			} else if (*valp) {
1537 				struct ipv4_devconf *cnf = ctl->extra1;
1538 				struct in_device *idev =
1539 					container_of(cnf, struct in_device, cnf);
1540 				dev_disable_lro(idev->dev);
1541 			}
1542 			rtnl_unlock();
1543 			rt_cache_flush(net, 0);
1544 		}
1545 	}
1546 
1547 	return ret;
1548 }
1549 
1550 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1551 				void __user *buffer,
1552 				size_t *lenp, loff_t *ppos)
1553 {
1554 	int *valp = ctl->data;
1555 	int val = *valp;
1556 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1557 	struct net *net = ctl->extra2;
1558 
1559 	if (write && *valp != val)
1560 		rt_cache_flush(net, 0);
1561 
1562 	return ret;
1563 }
1564 
1565 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1566 	{ \
1567 		.procname	= name, \
1568 		.data		= ipv4_devconf.data + \
1569 				  IPV4_DEVCONF_ ## attr - 1, \
1570 		.maxlen		= sizeof(int), \
1571 		.mode		= mval, \
1572 		.proc_handler	= proc, \
1573 		.extra1		= &ipv4_devconf, \
1574 	}
1575 
1576 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1577 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1578 
1579 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1580 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1581 
1582 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1583 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1584 
1585 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1586 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1587 
1588 static struct devinet_sysctl_table {
1589 	struct ctl_table_header *sysctl_header;
1590 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1591 	char *dev_name;
1592 } devinet_sysctl = {
1593 	.devinet_vars = {
1594 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1595 					     devinet_sysctl_forward),
1596 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1597 
1598 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1599 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1600 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1601 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1602 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1603 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1604 					"accept_source_route"),
1605 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1606 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1607 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1608 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1609 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1610 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1611 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1612 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1613 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1614 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1615 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1616 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1617 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1618 
1619 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1620 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1621 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1622 					      "force_igmp_version"),
1623 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1624 					      "promote_secondaries"),
1625 	},
1626 };
1627 
1628 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1629 					struct ipv4_devconf *p)
1630 {
1631 	int i;
1632 	struct devinet_sysctl_table *t;
1633 
1634 #define DEVINET_CTL_PATH_DEV	3
1635 
1636 	struct ctl_path devinet_ctl_path[] = {
1637 		{ .procname = "net",  },
1638 		{ .procname = "ipv4", },
1639 		{ .procname = "conf", },
1640 		{ /* to be set */ },
1641 		{ },
1642 	};
1643 
1644 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1645 	if (!t)
1646 		goto out;
1647 
1648 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1649 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1650 		t->devinet_vars[i].extra1 = p;
1651 		t->devinet_vars[i].extra2 = net;
1652 	}
1653 
1654 	/*
1655 	 * Make a copy of dev_name, because '.procname' is regarded as const
1656 	 * by sysctl and we wouldn't want anyone to change it under our feet
1657 	 * (see SIOCSIFNAME).
1658 	 */
1659 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1660 	if (!t->dev_name)
1661 		goto free;
1662 
1663 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1664 
1665 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1666 			t->devinet_vars);
1667 	if (!t->sysctl_header)
1668 		goto free_procname;
1669 
1670 	p->sysctl = t;
1671 	return 0;
1672 
1673 free_procname:
1674 	kfree(t->dev_name);
1675 free:
1676 	kfree(t);
1677 out:
1678 	return -ENOBUFS;
1679 }
1680 
1681 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1682 {
1683 	struct devinet_sysctl_table *t = cnf->sysctl;
1684 
1685 	if (t == NULL)
1686 		return;
1687 
1688 	cnf->sysctl = NULL;
1689 	unregister_net_sysctl_table(t->sysctl_header);
1690 	kfree(t->dev_name);
1691 	kfree(t);
1692 }
1693 
1694 static void devinet_sysctl_register(struct in_device *idev)
1695 {
1696 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1697 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1698 					&idev->cnf);
1699 }
1700 
1701 static void devinet_sysctl_unregister(struct in_device *idev)
1702 {
1703 	__devinet_sysctl_unregister(&idev->cnf);
1704 	neigh_sysctl_unregister(idev->arp_parms);
1705 }
1706 
1707 static struct ctl_table ctl_forward_entry[] = {
1708 	{
1709 		.procname	= "ip_forward",
1710 		.data		= &ipv4_devconf.data[
1711 					IPV4_DEVCONF_FORWARDING - 1],
1712 		.maxlen		= sizeof(int),
1713 		.mode		= 0644,
1714 		.proc_handler	= devinet_sysctl_forward,
1715 		.extra1		= &ipv4_devconf,
1716 		.extra2		= &init_net,
1717 	},
1718 	{ },
1719 };
1720 
1721 static __net_initdata struct ctl_path net_ipv4_path[] = {
1722 	{ .procname = "net", },
1723 	{ .procname = "ipv4", },
1724 	{ },
1725 };
1726 #endif
1727 
1728 static __net_init int devinet_init_net(struct net *net)
1729 {
1730 	int err;
1731 	struct ipv4_devconf *all, *dflt;
1732 #ifdef CONFIG_SYSCTL
1733 	struct ctl_table *tbl = ctl_forward_entry;
1734 	struct ctl_table_header *forw_hdr;
1735 #endif
1736 
1737 	err = -ENOMEM;
1738 	all = &ipv4_devconf;
1739 	dflt = &ipv4_devconf_dflt;
1740 
1741 	if (!net_eq(net, &init_net)) {
1742 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1743 		if (all == NULL)
1744 			goto err_alloc_all;
1745 
1746 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1747 		if (dflt == NULL)
1748 			goto err_alloc_dflt;
1749 
1750 #ifdef CONFIG_SYSCTL
1751 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1752 		if (tbl == NULL)
1753 			goto err_alloc_ctl;
1754 
1755 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1756 		tbl[0].extra1 = all;
1757 		tbl[0].extra2 = net;
1758 #endif
1759 	}
1760 
1761 #ifdef CONFIG_SYSCTL
1762 	err = __devinet_sysctl_register(net, "all", all);
1763 	if (err < 0)
1764 		goto err_reg_all;
1765 
1766 	err = __devinet_sysctl_register(net, "default", dflt);
1767 	if (err < 0)
1768 		goto err_reg_dflt;
1769 
1770 	err = -ENOMEM;
1771 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1772 	if (forw_hdr == NULL)
1773 		goto err_reg_ctl;
1774 	net->ipv4.forw_hdr = forw_hdr;
1775 #endif
1776 
1777 	net->ipv4.devconf_all = all;
1778 	net->ipv4.devconf_dflt = dflt;
1779 	return 0;
1780 
1781 #ifdef CONFIG_SYSCTL
1782 err_reg_ctl:
1783 	__devinet_sysctl_unregister(dflt);
1784 err_reg_dflt:
1785 	__devinet_sysctl_unregister(all);
1786 err_reg_all:
1787 	if (tbl != ctl_forward_entry)
1788 		kfree(tbl);
1789 err_alloc_ctl:
1790 #endif
1791 	if (dflt != &ipv4_devconf_dflt)
1792 		kfree(dflt);
1793 err_alloc_dflt:
1794 	if (all != &ipv4_devconf)
1795 		kfree(all);
1796 err_alloc_all:
1797 	return err;
1798 }
1799 
1800 static __net_exit void devinet_exit_net(struct net *net)
1801 {
1802 #ifdef CONFIG_SYSCTL
1803 	struct ctl_table *tbl;
1804 
1805 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1806 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1807 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1808 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1809 	kfree(tbl);
1810 #endif
1811 	kfree(net->ipv4.devconf_dflt);
1812 	kfree(net->ipv4.devconf_all);
1813 }
1814 
1815 static __net_initdata struct pernet_operations devinet_ops = {
1816 	.init = devinet_init_net,
1817 	.exit = devinet_exit_net,
1818 };
1819 
1820 static struct rtnl_af_ops inet_af_ops = {
1821 	.family		  = AF_INET,
1822 	.fill_link_af	  = inet_fill_link_af,
1823 	.get_link_af_size = inet_get_link_af_size,
1824 	.validate_link_af = inet_validate_link_af,
1825 	.set_link_af	  = inet_set_link_af,
1826 };
1827 
1828 void __init devinet_init(void)
1829 {
1830 	int i;
1831 
1832 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
1833 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
1834 
1835 	register_pernet_subsys(&devinet_ops);
1836 
1837 	register_gifconf(PF_INET, inet_gifconf);
1838 	register_netdevice_notifier(&ip_netdev_notifier);
1839 
1840 	rtnl_af_register(&inet_af_ops);
1841 
1842 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1843 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1844 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1845 }
1846 
1847