xref: /openbmc/linux/net/ipv4/devinet.c (revision ca79522c)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 	},
77 };
78 
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80 	.data = {
81 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86 	},
87 };
88 
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91 
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93 	[IFA_LOCAL]     	= { .type = NLA_U32 },
94 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
95 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
96 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
98 };
99 
100 #define IN4_ADDR_HSIZE_SHIFT	8
101 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
102 
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105 
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
107 {
108 	u32 val = (__force u32) addr ^ net_hash_mix(net);
109 
110 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111 }
112 
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
116 
117 	spin_lock(&inet_addr_hash_lock);
118 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119 	spin_unlock(&inet_addr_hash_lock);
120 }
121 
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124 	spin_lock(&inet_addr_hash_lock);
125 	hlist_del_init_rcu(&ifa->hash);
126 	spin_unlock(&inet_addr_hash_lock);
127 }
128 
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139 	u32 hash = inet_addr_hash(net, addr);
140 	struct net_device *result = NULL;
141 	struct in_ifaddr *ifa;
142 
143 	rcu_read_lock();
144 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145 		if (ifa->ifa_local == addr) {
146 			struct net_device *dev = ifa->ifa_dev->dev;
147 
148 			if (!net_eq(dev_net(dev), net))
149 				continue;
150 			result = dev;
151 			break;
152 		}
153 	}
154 	if (!result) {
155 		struct flowi4 fl4 = { .daddr = addr };
156 		struct fib_result res = { 0 };
157 		struct fib_table *local;
158 
159 		/* Fallback to FIB local table so that communication
160 		 * over loopback subnets work.
161 		 */
162 		local = fib_get_table(net, RT_TABLE_LOCAL);
163 		if (local &&
164 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165 		    res.type == RTN_LOCAL)
166 			result = FIB_RES_DEV(res);
167 	}
168 	if (result && devref)
169 		dev_hold(result);
170 	rcu_read_unlock();
171 	return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174 
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176 
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179 			 int destroy);
180 #ifdef CONFIG_SYSCTL
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
183 #else
184 static void devinet_sysctl_register(struct in_device *idev)
185 {
186 }
187 static void devinet_sysctl_unregister(struct in_device *idev)
188 {
189 }
190 #endif
191 
192 /* Locks all the inet devices. */
193 
194 static struct in_ifaddr *inet_alloc_ifa(void)
195 {
196 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
197 }
198 
199 static void inet_rcu_free_ifa(struct rcu_head *head)
200 {
201 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
202 	if (ifa->ifa_dev)
203 		in_dev_put(ifa->ifa_dev);
204 	kfree(ifa);
205 }
206 
207 static void inet_free_ifa(struct in_ifaddr *ifa)
208 {
209 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
210 }
211 
212 void in_dev_finish_destroy(struct in_device *idev)
213 {
214 	struct net_device *dev = idev->dev;
215 
216 	WARN_ON(idev->ifa_list);
217 	WARN_ON(idev->mc_list);
218 #ifdef NET_REFCNT_DEBUG
219 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
220 #endif
221 	dev_put(dev);
222 	if (!idev->dead)
223 		pr_err("Freeing alive in_device %p\n", idev);
224 	else
225 		kfree(idev);
226 }
227 EXPORT_SYMBOL(in_dev_finish_destroy);
228 
229 static struct in_device *inetdev_init(struct net_device *dev)
230 {
231 	struct in_device *in_dev;
232 
233 	ASSERT_RTNL();
234 
235 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
236 	if (!in_dev)
237 		goto out;
238 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
239 			sizeof(in_dev->cnf));
240 	in_dev->cnf.sysctl = NULL;
241 	in_dev->dev = dev;
242 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
243 	if (!in_dev->arp_parms)
244 		goto out_kfree;
245 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
246 		dev_disable_lro(dev);
247 	/* Reference in_dev->dev */
248 	dev_hold(dev);
249 	/* Account for reference dev->ip_ptr (below) */
250 	in_dev_hold(in_dev);
251 
252 	devinet_sysctl_register(in_dev);
253 	ip_mc_init_dev(in_dev);
254 	if (dev->flags & IFF_UP)
255 		ip_mc_up(in_dev);
256 
257 	/* we can receive as soon as ip_ptr is set -- do this last */
258 	rcu_assign_pointer(dev->ip_ptr, in_dev);
259 out:
260 	return in_dev;
261 out_kfree:
262 	kfree(in_dev);
263 	in_dev = NULL;
264 	goto out;
265 }
266 
267 static void in_dev_rcu_put(struct rcu_head *head)
268 {
269 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
270 	in_dev_put(idev);
271 }
272 
273 static void inetdev_destroy(struct in_device *in_dev)
274 {
275 	struct in_ifaddr *ifa;
276 	struct net_device *dev;
277 
278 	ASSERT_RTNL();
279 
280 	dev = in_dev->dev;
281 
282 	in_dev->dead = 1;
283 
284 	ip_mc_destroy_dev(in_dev);
285 
286 	while ((ifa = in_dev->ifa_list) != NULL) {
287 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
288 		inet_free_ifa(ifa);
289 	}
290 
291 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
292 
293 	devinet_sysctl_unregister(in_dev);
294 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
295 	arp_ifdown(dev);
296 
297 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
298 }
299 
300 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
301 {
302 	rcu_read_lock();
303 	for_primary_ifa(in_dev) {
304 		if (inet_ifa_match(a, ifa)) {
305 			if (!b || inet_ifa_match(b, ifa)) {
306 				rcu_read_unlock();
307 				return 1;
308 			}
309 		}
310 	} endfor_ifa(in_dev);
311 	rcu_read_unlock();
312 	return 0;
313 }
314 
315 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
316 			 int destroy, struct nlmsghdr *nlh, u32 portid)
317 {
318 	struct in_ifaddr *promote = NULL;
319 	struct in_ifaddr *ifa, *ifa1 = *ifap;
320 	struct in_ifaddr *last_prim = in_dev->ifa_list;
321 	struct in_ifaddr *prev_prom = NULL;
322 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
323 
324 	ASSERT_RTNL();
325 
326 	/* 1. Deleting primary ifaddr forces deletion all secondaries
327 	 * unless alias promotion is set
328 	 **/
329 
330 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
331 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
332 
333 		while ((ifa = *ifap1) != NULL) {
334 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
335 			    ifa1->ifa_scope <= ifa->ifa_scope)
336 				last_prim = ifa;
337 
338 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
339 			    ifa1->ifa_mask != ifa->ifa_mask ||
340 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
341 				ifap1 = &ifa->ifa_next;
342 				prev_prom = ifa;
343 				continue;
344 			}
345 
346 			if (!do_promote) {
347 				inet_hash_remove(ifa);
348 				*ifap1 = ifa->ifa_next;
349 
350 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
351 				blocking_notifier_call_chain(&inetaddr_chain,
352 						NETDEV_DOWN, ifa);
353 				inet_free_ifa(ifa);
354 			} else {
355 				promote = ifa;
356 				break;
357 			}
358 		}
359 	}
360 
361 	/* On promotion all secondaries from subnet are changing
362 	 * the primary IP, we must remove all their routes silently
363 	 * and later to add them back with new prefsrc. Do this
364 	 * while all addresses are on the device list.
365 	 */
366 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
367 		if (ifa1->ifa_mask == ifa->ifa_mask &&
368 		    inet_ifa_match(ifa1->ifa_address, ifa))
369 			fib_del_ifaddr(ifa, ifa1);
370 	}
371 
372 	/* 2. Unlink it */
373 
374 	*ifap = ifa1->ifa_next;
375 	inet_hash_remove(ifa1);
376 
377 	/* 3. Announce address deletion */
378 
379 	/* Send message first, then call notifier.
380 	   At first sight, FIB update triggered by notifier
381 	   will refer to already deleted ifaddr, that could confuse
382 	   netlink listeners. It is not true: look, gated sees
383 	   that route deleted and if it still thinks that ifaddr
384 	   is valid, it will try to restore deleted routes... Grr.
385 	   So that, this order is correct.
386 	 */
387 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
388 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
389 
390 	if (promote) {
391 		struct in_ifaddr *next_sec = promote->ifa_next;
392 
393 		if (prev_prom) {
394 			prev_prom->ifa_next = promote->ifa_next;
395 			promote->ifa_next = last_prim->ifa_next;
396 			last_prim->ifa_next = promote;
397 		}
398 
399 		promote->ifa_flags &= ~IFA_F_SECONDARY;
400 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
401 		blocking_notifier_call_chain(&inetaddr_chain,
402 				NETDEV_UP, promote);
403 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
404 			if (ifa1->ifa_mask != ifa->ifa_mask ||
405 			    !inet_ifa_match(ifa1->ifa_address, ifa))
406 					continue;
407 			fib_add_ifaddr(ifa);
408 		}
409 
410 	}
411 	if (destroy)
412 		inet_free_ifa(ifa1);
413 }
414 
415 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
416 			 int destroy)
417 {
418 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
419 }
420 
421 static void check_lifetime(struct work_struct *work);
422 
423 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
424 
425 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
426 			     u32 portid)
427 {
428 	struct in_device *in_dev = ifa->ifa_dev;
429 	struct in_ifaddr *ifa1, **ifap, **last_primary;
430 
431 	ASSERT_RTNL();
432 
433 	if (!ifa->ifa_local) {
434 		inet_free_ifa(ifa);
435 		return 0;
436 	}
437 
438 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
439 	last_primary = &in_dev->ifa_list;
440 
441 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
442 	     ifap = &ifa1->ifa_next) {
443 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
444 		    ifa->ifa_scope <= ifa1->ifa_scope)
445 			last_primary = &ifa1->ifa_next;
446 		if (ifa1->ifa_mask == ifa->ifa_mask &&
447 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
448 			if (ifa1->ifa_local == ifa->ifa_local) {
449 				inet_free_ifa(ifa);
450 				return -EEXIST;
451 			}
452 			if (ifa1->ifa_scope != ifa->ifa_scope) {
453 				inet_free_ifa(ifa);
454 				return -EINVAL;
455 			}
456 			ifa->ifa_flags |= IFA_F_SECONDARY;
457 		}
458 	}
459 
460 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
461 		net_srandom(ifa->ifa_local);
462 		ifap = last_primary;
463 	}
464 
465 	ifa->ifa_next = *ifap;
466 	*ifap = ifa;
467 
468 	inet_hash_insert(dev_net(in_dev->dev), ifa);
469 
470 	cancel_delayed_work(&check_lifetime_work);
471 	schedule_delayed_work(&check_lifetime_work, 0);
472 
473 	/* Send message first, then call notifier.
474 	   Notifier will trigger FIB update, so that
475 	   listeners of netlink will know about new ifaddr */
476 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
477 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
478 
479 	return 0;
480 }
481 
482 static int inet_insert_ifa(struct in_ifaddr *ifa)
483 {
484 	return __inet_insert_ifa(ifa, NULL, 0);
485 }
486 
487 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
488 {
489 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
490 
491 	ASSERT_RTNL();
492 
493 	if (!in_dev) {
494 		inet_free_ifa(ifa);
495 		return -ENOBUFS;
496 	}
497 	ipv4_devconf_setall(in_dev);
498 	if (ifa->ifa_dev != in_dev) {
499 		WARN_ON(ifa->ifa_dev);
500 		in_dev_hold(in_dev);
501 		ifa->ifa_dev = in_dev;
502 	}
503 	if (ipv4_is_loopback(ifa->ifa_local))
504 		ifa->ifa_scope = RT_SCOPE_HOST;
505 	return inet_insert_ifa(ifa);
506 }
507 
508 /* Caller must hold RCU or RTNL :
509  * We dont take a reference on found in_device
510  */
511 struct in_device *inetdev_by_index(struct net *net, int ifindex)
512 {
513 	struct net_device *dev;
514 	struct in_device *in_dev = NULL;
515 
516 	rcu_read_lock();
517 	dev = dev_get_by_index_rcu(net, ifindex);
518 	if (dev)
519 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
520 	rcu_read_unlock();
521 	return in_dev;
522 }
523 EXPORT_SYMBOL(inetdev_by_index);
524 
525 /* Called only from RTNL semaphored context. No locks. */
526 
527 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
528 				    __be32 mask)
529 {
530 	ASSERT_RTNL();
531 
532 	for_primary_ifa(in_dev) {
533 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
534 			return ifa;
535 	} endfor_ifa(in_dev);
536 	return NULL;
537 }
538 
539 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
540 {
541 	struct net *net = sock_net(skb->sk);
542 	struct nlattr *tb[IFA_MAX+1];
543 	struct in_device *in_dev;
544 	struct ifaddrmsg *ifm;
545 	struct in_ifaddr *ifa, **ifap;
546 	int err = -EINVAL;
547 
548 	ASSERT_RTNL();
549 
550 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
551 	if (err < 0)
552 		goto errout;
553 
554 	ifm = nlmsg_data(nlh);
555 	in_dev = inetdev_by_index(net, ifm->ifa_index);
556 	if (in_dev == NULL) {
557 		err = -ENODEV;
558 		goto errout;
559 	}
560 
561 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
562 	     ifap = &ifa->ifa_next) {
563 		if (tb[IFA_LOCAL] &&
564 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
565 			continue;
566 
567 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
568 			continue;
569 
570 		if (tb[IFA_ADDRESS] &&
571 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
572 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
573 			continue;
574 
575 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
576 		return 0;
577 	}
578 
579 	err = -EADDRNOTAVAIL;
580 errout:
581 	return err;
582 }
583 
584 #define INFINITY_LIFE_TIME	0xFFFFFFFF
585 
586 static void check_lifetime(struct work_struct *work)
587 {
588 	unsigned long now, next, next_sec, next_sched;
589 	struct in_ifaddr *ifa;
590 	struct hlist_node *n;
591 	int i;
592 
593 	now = jiffies;
594 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
595 
596 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
597 		bool change_needed = false;
598 
599 		rcu_read_lock();
600 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
601 			unsigned long age;
602 
603 			if (ifa->ifa_flags & IFA_F_PERMANENT)
604 				continue;
605 
606 			/* We try to batch several events at once. */
607 			age = (now - ifa->ifa_tstamp +
608 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
609 
610 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
611 			    age >= ifa->ifa_valid_lft) {
612 				change_needed = true;
613 			} else if (ifa->ifa_preferred_lft ==
614 				   INFINITY_LIFE_TIME) {
615 				continue;
616 			} else if (age >= ifa->ifa_preferred_lft) {
617 				if (time_before(ifa->ifa_tstamp +
618 						ifa->ifa_valid_lft * HZ, next))
619 					next = ifa->ifa_tstamp +
620 					       ifa->ifa_valid_lft * HZ;
621 
622 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
623 					change_needed = true;
624 			} else if (time_before(ifa->ifa_tstamp +
625 					       ifa->ifa_preferred_lft * HZ,
626 					       next)) {
627 				next = ifa->ifa_tstamp +
628 				       ifa->ifa_preferred_lft * HZ;
629 			}
630 		}
631 		rcu_read_unlock();
632 		if (!change_needed)
633 			continue;
634 		rtnl_lock();
635 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
636 			unsigned long age;
637 
638 			if (ifa->ifa_flags & IFA_F_PERMANENT)
639 				continue;
640 
641 			/* We try to batch several events at once. */
642 			age = (now - ifa->ifa_tstamp +
643 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
644 
645 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
646 			    age >= ifa->ifa_valid_lft) {
647 				struct in_ifaddr **ifap;
648 
649 				for (ifap = &ifa->ifa_dev->ifa_list;
650 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
651 					if (*ifap == ifa) {
652 						inet_del_ifa(ifa->ifa_dev,
653 							     ifap, 1);
654 						break;
655 					}
656 				}
657 			} else if (ifa->ifa_preferred_lft !=
658 				   INFINITY_LIFE_TIME &&
659 				   age >= ifa->ifa_preferred_lft &&
660 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
661 				ifa->ifa_flags |= IFA_F_DEPRECATED;
662 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
663 			}
664 		}
665 		rtnl_unlock();
666 	}
667 
668 	next_sec = round_jiffies_up(next);
669 	next_sched = next;
670 
671 	/* If rounded timeout is accurate enough, accept it. */
672 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
673 		next_sched = next_sec;
674 
675 	now = jiffies;
676 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
677 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
678 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
679 
680 	schedule_delayed_work(&check_lifetime_work, next_sched - now);
681 }
682 
683 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
684 			     __u32 prefered_lft)
685 {
686 	unsigned long timeout;
687 
688 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
689 
690 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
691 	if (addrconf_finite_timeout(timeout))
692 		ifa->ifa_valid_lft = timeout;
693 	else
694 		ifa->ifa_flags |= IFA_F_PERMANENT;
695 
696 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
697 	if (addrconf_finite_timeout(timeout)) {
698 		if (timeout == 0)
699 			ifa->ifa_flags |= IFA_F_DEPRECATED;
700 		ifa->ifa_preferred_lft = timeout;
701 	}
702 	ifa->ifa_tstamp = jiffies;
703 	if (!ifa->ifa_cstamp)
704 		ifa->ifa_cstamp = ifa->ifa_tstamp;
705 }
706 
707 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
708 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
709 {
710 	struct nlattr *tb[IFA_MAX+1];
711 	struct in_ifaddr *ifa;
712 	struct ifaddrmsg *ifm;
713 	struct net_device *dev;
714 	struct in_device *in_dev;
715 	int err;
716 
717 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
718 	if (err < 0)
719 		goto errout;
720 
721 	ifm = nlmsg_data(nlh);
722 	err = -EINVAL;
723 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
724 		goto errout;
725 
726 	dev = __dev_get_by_index(net, ifm->ifa_index);
727 	err = -ENODEV;
728 	if (dev == NULL)
729 		goto errout;
730 
731 	in_dev = __in_dev_get_rtnl(dev);
732 	err = -ENOBUFS;
733 	if (in_dev == NULL)
734 		goto errout;
735 
736 	ifa = inet_alloc_ifa();
737 	if (ifa == NULL)
738 		/*
739 		 * A potential indev allocation can be left alive, it stays
740 		 * assigned to its device and is destroy with it.
741 		 */
742 		goto errout;
743 
744 	ipv4_devconf_setall(in_dev);
745 	in_dev_hold(in_dev);
746 
747 	if (tb[IFA_ADDRESS] == NULL)
748 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
749 
750 	INIT_HLIST_NODE(&ifa->hash);
751 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
752 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
753 	ifa->ifa_flags = ifm->ifa_flags;
754 	ifa->ifa_scope = ifm->ifa_scope;
755 	ifa->ifa_dev = in_dev;
756 
757 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
758 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
759 
760 	if (tb[IFA_BROADCAST])
761 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
762 
763 	if (tb[IFA_LABEL])
764 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
765 	else
766 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
767 
768 	if (tb[IFA_CACHEINFO]) {
769 		struct ifa_cacheinfo *ci;
770 
771 		ci = nla_data(tb[IFA_CACHEINFO]);
772 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
773 			err = -EINVAL;
774 			goto errout;
775 		}
776 		*pvalid_lft = ci->ifa_valid;
777 		*pprefered_lft = ci->ifa_prefered;
778 	}
779 
780 	return ifa;
781 
782 errout:
783 	return ERR_PTR(err);
784 }
785 
786 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
787 {
788 	struct in_device *in_dev = ifa->ifa_dev;
789 	struct in_ifaddr *ifa1, **ifap;
790 
791 	if (!ifa->ifa_local)
792 		return NULL;
793 
794 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
795 	     ifap = &ifa1->ifa_next) {
796 		if (ifa1->ifa_mask == ifa->ifa_mask &&
797 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
798 		    ifa1->ifa_local == ifa->ifa_local)
799 			return ifa1;
800 	}
801 	return NULL;
802 }
803 
804 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
805 {
806 	struct net *net = sock_net(skb->sk);
807 	struct in_ifaddr *ifa;
808 	struct in_ifaddr *ifa_existing;
809 	__u32 valid_lft = INFINITY_LIFE_TIME;
810 	__u32 prefered_lft = INFINITY_LIFE_TIME;
811 
812 	ASSERT_RTNL();
813 
814 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
815 	if (IS_ERR(ifa))
816 		return PTR_ERR(ifa);
817 
818 	ifa_existing = find_matching_ifa(ifa);
819 	if (!ifa_existing) {
820 		/* It would be best to check for !NLM_F_CREATE here but
821 		 * userspace alreay relies on not having to provide this.
822 		 */
823 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
824 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
825 	} else {
826 		inet_free_ifa(ifa);
827 
828 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
829 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
830 			return -EEXIST;
831 		ifa = ifa_existing;
832 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
833 		cancel_delayed_work(&check_lifetime_work);
834 		schedule_delayed_work(&check_lifetime_work, 0);
835 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
836 		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
837 	}
838 	return 0;
839 }
840 
841 /*
842  *	Determine a default network mask, based on the IP address.
843  */
844 
845 static int inet_abc_len(__be32 addr)
846 {
847 	int rc = -1;	/* Something else, probably a multicast. */
848 
849 	if (ipv4_is_zeronet(addr))
850 		rc = 0;
851 	else {
852 		__u32 haddr = ntohl(addr);
853 
854 		if (IN_CLASSA(haddr))
855 			rc = 8;
856 		else if (IN_CLASSB(haddr))
857 			rc = 16;
858 		else if (IN_CLASSC(haddr))
859 			rc = 24;
860 	}
861 
862 	return rc;
863 }
864 
865 
866 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
867 {
868 	struct ifreq ifr;
869 	struct sockaddr_in sin_orig;
870 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
871 	struct in_device *in_dev;
872 	struct in_ifaddr **ifap = NULL;
873 	struct in_ifaddr *ifa = NULL;
874 	struct net_device *dev;
875 	char *colon;
876 	int ret = -EFAULT;
877 	int tryaddrmatch = 0;
878 
879 	/*
880 	 *	Fetch the caller's info block into kernel space
881 	 */
882 
883 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
884 		goto out;
885 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
886 
887 	/* save original address for comparison */
888 	memcpy(&sin_orig, sin, sizeof(*sin));
889 
890 	colon = strchr(ifr.ifr_name, ':');
891 	if (colon)
892 		*colon = 0;
893 
894 	dev_load(net, ifr.ifr_name);
895 
896 	switch (cmd) {
897 	case SIOCGIFADDR:	/* Get interface address */
898 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
899 	case SIOCGIFDSTADDR:	/* Get the destination address */
900 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
901 		/* Note that these ioctls will not sleep,
902 		   so that we do not impose a lock.
903 		   One day we will be forced to put shlock here (I mean SMP)
904 		 */
905 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
906 		memset(sin, 0, sizeof(*sin));
907 		sin->sin_family = AF_INET;
908 		break;
909 
910 	case SIOCSIFFLAGS:
911 		ret = -EPERM;
912 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
913 			goto out;
914 		break;
915 	case SIOCSIFADDR:	/* Set interface address (and family) */
916 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
917 	case SIOCSIFDSTADDR:	/* Set the destination address */
918 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
919 		ret = -EPERM;
920 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
921 			goto out;
922 		ret = -EINVAL;
923 		if (sin->sin_family != AF_INET)
924 			goto out;
925 		break;
926 	default:
927 		ret = -EINVAL;
928 		goto out;
929 	}
930 
931 	rtnl_lock();
932 
933 	ret = -ENODEV;
934 	dev = __dev_get_by_name(net, ifr.ifr_name);
935 	if (!dev)
936 		goto done;
937 
938 	if (colon)
939 		*colon = ':';
940 
941 	in_dev = __in_dev_get_rtnl(dev);
942 	if (in_dev) {
943 		if (tryaddrmatch) {
944 			/* Matthias Andree */
945 			/* compare label and address (4.4BSD style) */
946 			/* note: we only do this for a limited set of ioctls
947 			   and only if the original address family was AF_INET.
948 			   This is checked above. */
949 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
950 			     ifap = &ifa->ifa_next) {
951 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
952 				    sin_orig.sin_addr.s_addr ==
953 							ifa->ifa_local) {
954 					break; /* found */
955 				}
956 			}
957 		}
958 		/* we didn't get a match, maybe the application is
959 		   4.3BSD-style and passed in junk so we fall back to
960 		   comparing just the label */
961 		if (!ifa) {
962 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
963 			     ifap = &ifa->ifa_next)
964 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
965 					break;
966 		}
967 	}
968 
969 	ret = -EADDRNOTAVAIL;
970 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
971 		goto done;
972 
973 	switch (cmd) {
974 	case SIOCGIFADDR:	/* Get interface address */
975 		sin->sin_addr.s_addr = ifa->ifa_local;
976 		goto rarok;
977 
978 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
979 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
980 		goto rarok;
981 
982 	case SIOCGIFDSTADDR:	/* Get the destination address */
983 		sin->sin_addr.s_addr = ifa->ifa_address;
984 		goto rarok;
985 
986 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
987 		sin->sin_addr.s_addr = ifa->ifa_mask;
988 		goto rarok;
989 
990 	case SIOCSIFFLAGS:
991 		if (colon) {
992 			ret = -EADDRNOTAVAIL;
993 			if (!ifa)
994 				break;
995 			ret = 0;
996 			if (!(ifr.ifr_flags & IFF_UP))
997 				inet_del_ifa(in_dev, ifap, 1);
998 			break;
999 		}
1000 		ret = dev_change_flags(dev, ifr.ifr_flags);
1001 		break;
1002 
1003 	case SIOCSIFADDR:	/* Set interface address (and family) */
1004 		ret = -EINVAL;
1005 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1006 			break;
1007 
1008 		if (!ifa) {
1009 			ret = -ENOBUFS;
1010 			ifa = inet_alloc_ifa();
1011 			if (!ifa)
1012 				break;
1013 			INIT_HLIST_NODE(&ifa->hash);
1014 			if (colon)
1015 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1016 			else
1017 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1018 		} else {
1019 			ret = 0;
1020 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1021 				break;
1022 			inet_del_ifa(in_dev, ifap, 0);
1023 			ifa->ifa_broadcast = 0;
1024 			ifa->ifa_scope = 0;
1025 		}
1026 
1027 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1028 
1029 		if (!(dev->flags & IFF_POINTOPOINT)) {
1030 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1031 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1032 			if ((dev->flags & IFF_BROADCAST) &&
1033 			    ifa->ifa_prefixlen < 31)
1034 				ifa->ifa_broadcast = ifa->ifa_address |
1035 						     ~ifa->ifa_mask;
1036 		} else {
1037 			ifa->ifa_prefixlen = 32;
1038 			ifa->ifa_mask = inet_make_mask(32);
1039 		}
1040 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1041 		ret = inet_set_ifa(dev, ifa);
1042 		break;
1043 
1044 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1045 		ret = 0;
1046 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1047 			inet_del_ifa(in_dev, ifap, 0);
1048 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1049 			inet_insert_ifa(ifa);
1050 		}
1051 		break;
1052 
1053 	case SIOCSIFDSTADDR:	/* Set the destination address */
1054 		ret = 0;
1055 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1056 			break;
1057 		ret = -EINVAL;
1058 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1059 			break;
1060 		ret = 0;
1061 		inet_del_ifa(in_dev, ifap, 0);
1062 		ifa->ifa_address = sin->sin_addr.s_addr;
1063 		inet_insert_ifa(ifa);
1064 		break;
1065 
1066 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1067 
1068 		/*
1069 		 *	The mask we set must be legal.
1070 		 */
1071 		ret = -EINVAL;
1072 		if (bad_mask(sin->sin_addr.s_addr, 0))
1073 			break;
1074 		ret = 0;
1075 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1076 			__be32 old_mask = ifa->ifa_mask;
1077 			inet_del_ifa(in_dev, ifap, 0);
1078 			ifa->ifa_mask = sin->sin_addr.s_addr;
1079 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1080 
1081 			/* See if current broadcast address matches
1082 			 * with current netmask, then recalculate
1083 			 * the broadcast address. Otherwise it's a
1084 			 * funny address, so don't touch it since
1085 			 * the user seems to know what (s)he's doing...
1086 			 */
1087 			if ((dev->flags & IFF_BROADCAST) &&
1088 			    (ifa->ifa_prefixlen < 31) &&
1089 			    (ifa->ifa_broadcast ==
1090 			     (ifa->ifa_local|~old_mask))) {
1091 				ifa->ifa_broadcast = (ifa->ifa_local |
1092 						      ~sin->sin_addr.s_addr);
1093 			}
1094 			inet_insert_ifa(ifa);
1095 		}
1096 		break;
1097 	}
1098 done:
1099 	rtnl_unlock();
1100 out:
1101 	return ret;
1102 rarok:
1103 	rtnl_unlock();
1104 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1105 	goto out;
1106 }
1107 
1108 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1109 {
1110 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1111 	struct in_ifaddr *ifa;
1112 	struct ifreq ifr;
1113 	int done = 0;
1114 
1115 	if (!in_dev)
1116 		goto out;
1117 
1118 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1119 		if (!buf) {
1120 			done += sizeof(ifr);
1121 			continue;
1122 		}
1123 		if (len < (int) sizeof(ifr))
1124 			break;
1125 		memset(&ifr, 0, sizeof(struct ifreq));
1126 		if (ifa->ifa_label)
1127 			strcpy(ifr.ifr_name, ifa->ifa_label);
1128 		else
1129 			strcpy(ifr.ifr_name, dev->name);
1130 
1131 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1132 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1133 								ifa->ifa_local;
1134 
1135 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1136 			done = -EFAULT;
1137 			break;
1138 		}
1139 		buf  += sizeof(struct ifreq);
1140 		len  -= sizeof(struct ifreq);
1141 		done += sizeof(struct ifreq);
1142 	}
1143 out:
1144 	return done;
1145 }
1146 
1147 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1148 {
1149 	__be32 addr = 0;
1150 	struct in_device *in_dev;
1151 	struct net *net = dev_net(dev);
1152 
1153 	rcu_read_lock();
1154 	in_dev = __in_dev_get_rcu(dev);
1155 	if (!in_dev)
1156 		goto no_in_dev;
1157 
1158 	for_primary_ifa(in_dev) {
1159 		if (ifa->ifa_scope > scope)
1160 			continue;
1161 		if (!dst || inet_ifa_match(dst, ifa)) {
1162 			addr = ifa->ifa_local;
1163 			break;
1164 		}
1165 		if (!addr)
1166 			addr = ifa->ifa_local;
1167 	} endfor_ifa(in_dev);
1168 
1169 	if (addr)
1170 		goto out_unlock;
1171 no_in_dev:
1172 
1173 	/* Not loopback addresses on loopback should be preferred
1174 	   in this case. It is importnat that lo is the first interface
1175 	   in dev_base list.
1176 	 */
1177 	for_each_netdev_rcu(net, dev) {
1178 		in_dev = __in_dev_get_rcu(dev);
1179 		if (!in_dev)
1180 			continue;
1181 
1182 		for_primary_ifa(in_dev) {
1183 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1184 			    ifa->ifa_scope <= scope) {
1185 				addr = ifa->ifa_local;
1186 				goto out_unlock;
1187 			}
1188 		} endfor_ifa(in_dev);
1189 	}
1190 out_unlock:
1191 	rcu_read_unlock();
1192 	return addr;
1193 }
1194 EXPORT_SYMBOL(inet_select_addr);
1195 
1196 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1197 			      __be32 local, int scope)
1198 {
1199 	int same = 0;
1200 	__be32 addr = 0;
1201 
1202 	for_ifa(in_dev) {
1203 		if (!addr &&
1204 		    (local == ifa->ifa_local || !local) &&
1205 		    ifa->ifa_scope <= scope) {
1206 			addr = ifa->ifa_local;
1207 			if (same)
1208 				break;
1209 		}
1210 		if (!same) {
1211 			same = (!local || inet_ifa_match(local, ifa)) &&
1212 				(!dst || inet_ifa_match(dst, ifa));
1213 			if (same && addr) {
1214 				if (local || !dst)
1215 					break;
1216 				/* Is the selected addr into dst subnet? */
1217 				if (inet_ifa_match(addr, ifa))
1218 					break;
1219 				/* No, then can we use new local src? */
1220 				if (ifa->ifa_scope <= scope) {
1221 					addr = ifa->ifa_local;
1222 					break;
1223 				}
1224 				/* search for large dst subnet for addr */
1225 				same = 0;
1226 			}
1227 		}
1228 	} endfor_ifa(in_dev);
1229 
1230 	return same ? addr : 0;
1231 }
1232 
1233 /*
1234  * Confirm that local IP address exists using wildcards:
1235  * - in_dev: only on this interface, 0=any interface
1236  * - dst: only in the same subnet as dst, 0=any dst
1237  * - local: address, 0=autoselect the local address
1238  * - scope: maximum allowed scope value for the local address
1239  */
1240 __be32 inet_confirm_addr(struct in_device *in_dev,
1241 			 __be32 dst, __be32 local, int scope)
1242 {
1243 	__be32 addr = 0;
1244 	struct net_device *dev;
1245 	struct net *net;
1246 
1247 	if (scope != RT_SCOPE_LINK)
1248 		return confirm_addr_indev(in_dev, dst, local, scope);
1249 
1250 	net = dev_net(in_dev->dev);
1251 	rcu_read_lock();
1252 	for_each_netdev_rcu(net, dev) {
1253 		in_dev = __in_dev_get_rcu(dev);
1254 		if (in_dev) {
1255 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1256 			if (addr)
1257 				break;
1258 		}
1259 	}
1260 	rcu_read_unlock();
1261 
1262 	return addr;
1263 }
1264 EXPORT_SYMBOL(inet_confirm_addr);
1265 
1266 /*
1267  *	Device notifier
1268  */
1269 
1270 int register_inetaddr_notifier(struct notifier_block *nb)
1271 {
1272 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1273 }
1274 EXPORT_SYMBOL(register_inetaddr_notifier);
1275 
1276 int unregister_inetaddr_notifier(struct notifier_block *nb)
1277 {
1278 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1279 }
1280 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1281 
1282 /* Rename ifa_labels for a device name change. Make some effort to preserve
1283  * existing alias numbering and to create unique labels if possible.
1284 */
1285 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1286 {
1287 	struct in_ifaddr *ifa;
1288 	int named = 0;
1289 
1290 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1291 		char old[IFNAMSIZ], *dot;
1292 
1293 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1294 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1295 		if (named++ == 0)
1296 			goto skip;
1297 		dot = strchr(old, ':');
1298 		if (dot == NULL) {
1299 			sprintf(old, ":%d", named);
1300 			dot = old;
1301 		}
1302 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1303 			strcat(ifa->ifa_label, dot);
1304 		else
1305 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1306 skip:
1307 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1308 	}
1309 }
1310 
1311 static bool inetdev_valid_mtu(unsigned int mtu)
1312 {
1313 	return mtu >= 68;
1314 }
1315 
1316 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1317 					struct in_device *in_dev)
1318 
1319 {
1320 	struct in_ifaddr *ifa;
1321 
1322 	for (ifa = in_dev->ifa_list; ifa;
1323 	     ifa = ifa->ifa_next) {
1324 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1325 			 ifa->ifa_local, dev,
1326 			 ifa->ifa_local, NULL,
1327 			 dev->dev_addr, NULL);
1328 	}
1329 }
1330 
1331 /* Called only under RTNL semaphore */
1332 
1333 static int inetdev_event(struct notifier_block *this, unsigned long event,
1334 			 void *ptr)
1335 {
1336 	struct net_device *dev = ptr;
1337 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1338 
1339 	ASSERT_RTNL();
1340 
1341 	if (!in_dev) {
1342 		if (event == NETDEV_REGISTER) {
1343 			in_dev = inetdev_init(dev);
1344 			if (!in_dev)
1345 				return notifier_from_errno(-ENOMEM);
1346 			if (dev->flags & IFF_LOOPBACK) {
1347 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1348 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1349 			}
1350 		} else if (event == NETDEV_CHANGEMTU) {
1351 			/* Re-enabling IP */
1352 			if (inetdev_valid_mtu(dev->mtu))
1353 				in_dev = inetdev_init(dev);
1354 		}
1355 		goto out;
1356 	}
1357 
1358 	switch (event) {
1359 	case NETDEV_REGISTER:
1360 		pr_debug("%s: bug\n", __func__);
1361 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1362 		break;
1363 	case NETDEV_UP:
1364 		if (!inetdev_valid_mtu(dev->mtu))
1365 			break;
1366 		if (dev->flags & IFF_LOOPBACK) {
1367 			struct in_ifaddr *ifa = inet_alloc_ifa();
1368 
1369 			if (ifa) {
1370 				INIT_HLIST_NODE(&ifa->hash);
1371 				ifa->ifa_local =
1372 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1373 				ifa->ifa_prefixlen = 8;
1374 				ifa->ifa_mask = inet_make_mask(8);
1375 				in_dev_hold(in_dev);
1376 				ifa->ifa_dev = in_dev;
1377 				ifa->ifa_scope = RT_SCOPE_HOST;
1378 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1379 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1380 						 INFINITY_LIFE_TIME);
1381 				inet_insert_ifa(ifa);
1382 			}
1383 		}
1384 		ip_mc_up(in_dev);
1385 		/* fall through */
1386 	case NETDEV_CHANGEADDR:
1387 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1388 			break;
1389 		/* fall through */
1390 	case NETDEV_NOTIFY_PEERS:
1391 		/* Send gratuitous ARP to notify of link change */
1392 		inetdev_send_gratuitous_arp(dev, in_dev);
1393 		break;
1394 	case NETDEV_DOWN:
1395 		ip_mc_down(in_dev);
1396 		break;
1397 	case NETDEV_PRE_TYPE_CHANGE:
1398 		ip_mc_unmap(in_dev);
1399 		break;
1400 	case NETDEV_POST_TYPE_CHANGE:
1401 		ip_mc_remap(in_dev);
1402 		break;
1403 	case NETDEV_CHANGEMTU:
1404 		if (inetdev_valid_mtu(dev->mtu))
1405 			break;
1406 		/* disable IP when MTU is not enough */
1407 	case NETDEV_UNREGISTER:
1408 		inetdev_destroy(in_dev);
1409 		break;
1410 	case NETDEV_CHANGENAME:
1411 		/* Do not notify about label change, this event is
1412 		 * not interesting to applications using netlink.
1413 		 */
1414 		inetdev_changename(dev, in_dev);
1415 
1416 		devinet_sysctl_unregister(in_dev);
1417 		devinet_sysctl_register(in_dev);
1418 		break;
1419 	}
1420 out:
1421 	return NOTIFY_DONE;
1422 }
1423 
1424 static struct notifier_block ip_netdev_notifier = {
1425 	.notifier_call = inetdev_event,
1426 };
1427 
1428 static size_t inet_nlmsg_size(void)
1429 {
1430 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1431 	       + nla_total_size(4) /* IFA_ADDRESS */
1432 	       + nla_total_size(4) /* IFA_LOCAL */
1433 	       + nla_total_size(4) /* IFA_BROADCAST */
1434 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1435 }
1436 
1437 static inline u32 cstamp_delta(unsigned long cstamp)
1438 {
1439 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1440 }
1441 
1442 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1443 			 unsigned long tstamp, u32 preferred, u32 valid)
1444 {
1445 	struct ifa_cacheinfo ci;
1446 
1447 	ci.cstamp = cstamp_delta(cstamp);
1448 	ci.tstamp = cstamp_delta(tstamp);
1449 	ci.ifa_prefered = preferred;
1450 	ci.ifa_valid = valid;
1451 
1452 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1453 }
1454 
1455 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1456 			    u32 portid, u32 seq, int event, unsigned int flags)
1457 {
1458 	struct ifaddrmsg *ifm;
1459 	struct nlmsghdr  *nlh;
1460 	u32 preferred, valid;
1461 
1462 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1463 	if (nlh == NULL)
1464 		return -EMSGSIZE;
1465 
1466 	ifm = nlmsg_data(nlh);
1467 	ifm->ifa_family = AF_INET;
1468 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1469 	ifm->ifa_flags = ifa->ifa_flags;
1470 	ifm->ifa_scope = ifa->ifa_scope;
1471 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1472 
1473 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1474 		preferred = ifa->ifa_preferred_lft;
1475 		valid = ifa->ifa_valid_lft;
1476 		if (preferred != INFINITY_LIFE_TIME) {
1477 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1478 
1479 			if (preferred > tval)
1480 				preferred -= tval;
1481 			else
1482 				preferred = 0;
1483 			if (valid != INFINITY_LIFE_TIME) {
1484 				if (valid > tval)
1485 					valid -= tval;
1486 				else
1487 					valid = 0;
1488 			}
1489 		}
1490 	} else {
1491 		preferred = INFINITY_LIFE_TIME;
1492 		valid = INFINITY_LIFE_TIME;
1493 	}
1494 	if ((ifa->ifa_address &&
1495 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1496 	    (ifa->ifa_local &&
1497 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1498 	    (ifa->ifa_broadcast &&
1499 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1500 	    (ifa->ifa_label[0] &&
1501 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1502 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1503 			  preferred, valid))
1504 		goto nla_put_failure;
1505 
1506 	return nlmsg_end(skb, nlh);
1507 
1508 nla_put_failure:
1509 	nlmsg_cancel(skb, nlh);
1510 	return -EMSGSIZE;
1511 }
1512 
1513 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1514 {
1515 	struct net *net = sock_net(skb->sk);
1516 	int h, s_h;
1517 	int idx, s_idx;
1518 	int ip_idx, s_ip_idx;
1519 	struct net_device *dev;
1520 	struct in_device *in_dev;
1521 	struct in_ifaddr *ifa;
1522 	struct hlist_head *head;
1523 
1524 	s_h = cb->args[0];
1525 	s_idx = idx = cb->args[1];
1526 	s_ip_idx = ip_idx = cb->args[2];
1527 
1528 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1529 		idx = 0;
1530 		head = &net->dev_index_head[h];
1531 		rcu_read_lock();
1532 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1533 			  net->dev_base_seq;
1534 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1535 			if (idx < s_idx)
1536 				goto cont;
1537 			if (h > s_h || idx > s_idx)
1538 				s_ip_idx = 0;
1539 			in_dev = __in_dev_get_rcu(dev);
1540 			if (!in_dev)
1541 				goto cont;
1542 
1543 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1544 			     ifa = ifa->ifa_next, ip_idx++) {
1545 				if (ip_idx < s_ip_idx)
1546 					continue;
1547 				if (inet_fill_ifaddr(skb, ifa,
1548 					     NETLINK_CB(cb->skb).portid,
1549 					     cb->nlh->nlmsg_seq,
1550 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1551 					rcu_read_unlock();
1552 					goto done;
1553 				}
1554 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1555 			}
1556 cont:
1557 			idx++;
1558 		}
1559 		rcu_read_unlock();
1560 	}
1561 
1562 done:
1563 	cb->args[0] = h;
1564 	cb->args[1] = idx;
1565 	cb->args[2] = ip_idx;
1566 
1567 	return skb->len;
1568 }
1569 
1570 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1571 		      u32 portid)
1572 {
1573 	struct sk_buff *skb;
1574 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1575 	int err = -ENOBUFS;
1576 	struct net *net;
1577 
1578 	net = dev_net(ifa->ifa_dev->dev);
1579 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1580 	if (skb == NULL)
1581 		goto errout;
1582 
1583 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1584 	if (err < 0) {
1585 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1586 		WARN_ON(err == -EMSGSIZE);
1587 		kfree_skb(skb);
1588 		goto errout;
1589 	}
1590 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1591 	return;
1592 errout:
1593 	if (err < 0)
1594 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1595 }
1596 
1597 static size_t inet_get_link_af_size(const struct net_device *dev)
1598 {
1599 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1600 
1601 	if (!in_dev)
1602 		return 0;
1603 
1604 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1605 }
1606 
1607 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1608 {
1609 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1610 	struct nlattr *nla;
1611 	int i;
1612 
1613 	if (!in_dev)
1614 		return -ENODATA;
1615 
1616 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1617 	if (nla == NULL)
1618 		return -EMSGSIZE;
1619 
1620 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1621 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1622 
1623 	return 0;
1624 }
1625 
1626 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1627 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1628 };
1629 
1630 static int inet_validate_link_af(const struct net_device *dev,
1631 				 const struct nlattr *nla)
1632 {
1633 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1634 	int err, rem;
1635 
1636 	if (dev && !__in_dev_get_rtnl(dev))
1637 		return -EAFNOSUPPORT;
1638 
1639 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1640 	if (err < 0)
1641 		return err;
1642 
1643 	if (tb[IFLA_INET_CONF]) {
1644 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1645 			int cfgid = nla_type(a);
1646 
1647 			if (nla_len(a) < 4)
1648 				return -EINVAL;
1649 
1650 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1651 				return -EINVAL;
1652 		}
1653 	}
1654 
1655 	return 0;
1656 }
1657 
1658 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1659 {
1660 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1661 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1662 	int rem;
1663 
1664 	if (!in_dev)
1665 		return -EAFNOSUPPORT;
1666 
1667 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1668 		BUG();
1669 
1670 	if (tb[IFLA_INET_CONF]) {
1671 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1672 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1673 	}
1674 
1675 	return 0;
1676 }
1677 
1678 static int inet_netconf_msgsize_devconf(int type)
1679 {
1680 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1681 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1682 
1683 	/* type -1 is used for ALL */
1684 	if (type == -1 || type == NETCONFA_FORWARDING)
1685 		size += nla_total_size(4);
1686 	if (type == -1 || type == NETCONFA_RP_FILTER)
1687 		size += nla_total_size(4);
1688 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1689 		size += nla_total_size(4);
1690 
1691 	return size;
1692 }
1693 
1694 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1695 				     struct ipv4_devconf *devconf, u32 portid,
1696 				     u32 seq, int event, unsigned int flags,
1697 				     int type)
1698 {
1699 	struct nlmsghdr  *nlh;
1700 	struct netconfmsg *ncm;
1701 
1702 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1703 			flags);
1704 	if (nlh == NULL)
1705 		return -EMSGSIZE;
1706 
1707 	ncm = nlmsg_data(nlh);
1708 	ncm->ncm_family = AF_INET;
1709 
1710 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1711 		goto nla_put_failure;
1712 
1713 	/* type -1 is used for ALL */
1714 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1715 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1716 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1717 		goto nla_put_failure;
1718 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1719 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1720 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1721 		goto nla_put_failure;
1722 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1723 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1724 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1725 		goto nla_put_failure;
1726 
1727 	return nlmsg_end(skb, nlh);
1728 
1729 nla_put_failure:
1730 	nlmsg_cancel(skb, nlh);
1731 	return -EMSGSIZE;
1732 }
1733 
1734 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1735 				 struct ipv4_devconf *devconf)
1736 {
1737 	struct sk_buff *skb;
1738 	int err = -ENOBUFS;
1739 
1740 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1741 	if (skb == NULL)
1742 		goto errout;
1743 
1744 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1745 					RTM_NEWNETCONF, 0, type);
1746 	if (err < 0) {
1747 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1748 		WARN_ON(err == -EMSGSIZE);
1749 		kfree_skb(skb);
1750 		goto errout;
1751 	}
1752 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1753 	return;
1754 errout:
1755 	if (err < 0)
1756 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1757 }
1758 
1759 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1760 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1761 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1762 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1763 };
1764 
1765 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1766 				    struct nlmsghdr *nlh)
1767 {
1768 	struct net *net = sock_net(in_skb->sk);
1769 	struct nlattr *tb[NETCONFA_MAX+1];
1770 	struct netconfmsg *ncm;
1771 	struct sk_buff *skb;
1772 	struct ipv4_devconf *devconf;
1773 	struct in_device *in_dev;
1774 	struct net_device *dev;
1775 	int ifindex;
1776 	int err;
1777 
1778 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1779 			  devconf_ipv4_policy);
1780 	if (err < 0)
1781 		goto errout;
1782 
1783 	err = EINVAL;
1784 	if (!tb[NETCONFA_IFINDEX])
1785 		goto errout;
1786 
1787 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1788 	switch (ifindex) {
1789 	case NETCONFA_IFINDEX_ALL:
1790 		devconf = net->ipv4.devconf_all;
1791 		break;
1792 	case NETCONFA_IFINDEX_DEFAULT:
1793 		devconf = net->ipv4.devconf_dflt;
1794 		break;
1795 	default:
1796 		dev = __dev_get_by_index(net, ifindex);
1797 		if (dev == NULL)
1798 			goto errout;
1799 		in_dev = __in_dev_get_rtnl(dev);
1800 		if (in_dev == NULL)
1801 			goto errout;
1802 		devconf = &in_dev->cnf;
1803 		break;
1804 	}
1805 
1806 	err = -ENOBUFS;
1807 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1808 	if (skb == NULL)
1809 		goto errout;
1810 
1811 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1812 					NETLINK_CB(in_skb).portid,
1813 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1814 					-1);
1815 	if (err < 0) {
1816 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1817 		WARN_ON(err == -EMSGSIZE);
1818 		kfree_skb(skb);
1819 		goto errout;
1820 	}
1821 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1822 errout:
1823 	return err;
1824 }
1825 
1826 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1827 				     struct netlink_callback *cb)
1828 {
1829 	struct net *net = sock_net(skb->sk);
1830 	int h, s_h;
1831 	int idx, s_idx;
1832 	struct net_device *dev;
1833 	struct in_device *in_dev;
1834 	struct hlist_head *head;
1835 
1836 	s_h = cb->args[0];
1837 	s_idx = idx = cb->args[1];
1838 
1839 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1840 		idx = 0;
1841 		head = &net->dev_index_head[h];
1842 		rcu_read_lock();
1843 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1844 			  net->dev_base_seq;
1845 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1846 			if (idx < s_idx)
1847 				goto cont;
1848 			in_dev = __in_dev_get_rcu(dev);
1849 			if (!in_dev)
1850 				goto cont;
1851 
1852 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1853 						      &in_dev->cnf,
1854 						      NETLINK_CB(cb->skb).portid,
1855 						      cb->nlh->nlmsg_seq,
1856 						      RTM_NEWNETCONF,
1857 						      NLM_F_MULTI,
1858 						      -1) <= 0) {
1859 				rcu_read_unlock();
1860 				goto done;
1861 			}
1862 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1863 cont:
1864 			idx++;
1865 		}
1866 		rcu_read_unlock();
1867 	}
1868 	if (h == NETDEV_HASHENTRIES) {
1869 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1870 					      net->ipv4.devconf_all,
1871 					      NETLINK_CB(cb->skb).portid,
1872 					      cb->nlh->nlmsg_seq,
1873 					      RTM_NEWNETCONF, NLM_F_MULTI,
1874 					      -1) <= 0)
1875 			goto done;
1876 		else
1877 			h++;
1878 	}
1879 	if (h == NETDEV_HASHENTRIES + 1) {
1880 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1881 					      net->ipv4.devconf_dflt,
1882 					      NETLINK_CB(cb->skb).portid,
1883 					      cb->nlh->nlmsg_seq,
1884 					      RTM_NEWNETCONF, NLM_F_MULTI,
1885 					      -1) <= 0)
1886 			goto done;
1887 		else
1888 			h++;
1889 	}
1890 done:
1891 	cb->args[0] = h;
1892 	cb->args[1] = idx;
1893 
1894 	return skb->len;
1895 }
1896 
1897 #ifdef CONFIG_SYSCTL
1898 
1899 static void devinet_copy_dflt_conf(struct net *net, int i)
1900 {
1901 	struct net_device *dev;
1902 
1903 	rcu_read_lock();
1904 	for_each_netdev_rcu(net, dev) {
1905 		struct in_device *in_dev;
1906 
1907 		in_dev = __in_dev_get_rcu(dev);
1908 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1909 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1910 	}
1911 	rcu_read_unlock();
1912 }
1913 
1914 /* called with RTNL locked */
1915 static void inet_forward_change(struct net *net)
1916 {
1917 	struct net_device *dev;
1918 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1919 
1920 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1921 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1922 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1923 				    NETCONFA_IFINDEX_ALL,
1924 				    net->ipv4.devconf_all);
1925 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1926 				    NETCONFA_IFINDEX_DEFAULT,
1927 				    net->ipv4.devconf_dflt);
1928 
1929 	for_each_netdev(net, dev) {
1930 		struct in_device *in_dev;
1931 		if (on)
1932 			dev_disable_lro(dev);
1933 		rcu_read_lock();
1934 		in_dev = __in_dev_get_rcu(dev);
1935 		if (in_dev) {
1936 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1937 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1938 						    dev->ifindex, &in_dev->cnf);
1939 		}
1940 		rcu_read_unlock();
1941 	}
1942 }
1943 
1944 static int devinet_conf_proc(ctl_table *ctl, int write,
1945 			     void __user *buffer,
1946 			     size_t *lenp, loff_t *ppos)
1947 {
1948 	int old_value = *(int *)ctl->data;
1949 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1950 	int new_value = *(int *)ctl->data;
1951 
1952 	if (write) {
1953 		struct ipv4_devconf *cnf = ctl->extra1;
1954 		struct net *net = ctl->extra2;
1955 		int i = (int *)ctl->data - cnf->data;
1956 
1957 		set_bit(i, cnf->state);
1958 
1959 		if (cnf == net->ipv4.devconf_dflt)
1960 			devinet_copy_dflt_conf(net, i);
1961 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1962 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1963 			if ((new_value == 0) && (old_value != 0))
1964 				rt_cache_flush(net);
1965 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1966 		    new_value != old_value) {
1967 			int ifindex;
1968 
1969 			if (cnf == net->ipv4.devconf_dflt)
1970 				ifindex = NETCONFA_IFINDEX_DEFAULT;
1971 			else if (cnf == net->ipv4.devconf_all)
1972 				ifindex = NETCONFA_IFINDEX_ALL;
1973 			else {
1974 				struct in_device *idev =
1975 					container_of(cnf, struct in_device,
1976 						     cnf);
1977 				ifindex = idev->dev->ifindex;
1978 			}
1979 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1980 						    ifindex, cnf);
1981 		}
1982 	}
1983 
1984 	return ret;
1985 }
1986 
1987 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1988 				  void __user *buffer,
1989 				  size_t *lenp, loff_t *ppos)
1990 {
1991 	int *valp = ctl->data;
1992 	int val = *valp;
1993 	loff_t pos = *ppos;
1994 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1995 
1996 	if (write && *valp != val) {
1997 		struct net *net = ctl->extra2;
1998 
1999 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2000 			if (!rtnl_trylock()) {
2001 				/* Restore the original values before restarting */
2002 				*valp = val;
2003 				*ppos = pos;
2004 				return restart_syscall();
2005 			}
2006 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2007 				inet_forward_change(net);
2008 			} else {
2009 				struct ipv4_devconf *cnf = ctl->extra1;
2010 				struct in_device *idev =
2011 					container_of(cnf, struct in_device, cnf);
2012 				if (*valp)
2013 					dev_disable_lro(idev->dev);
2014 				inet_netconf_notify_devconf(net,
2015 							    NETCONFA_FORWARDING,
2016 							    idev->dev->ifindex,
2017 							    cnf);
2018 			}
2019 			rtnl_unlock();
2020 			rt_cache_flush(net);
2021 		} else
2022 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2023 						    NETCONFA_IFINDEX_DEFAULT,
2024 						    net->ipv4.devconf_dflt);
2025 	}
2026 
2027 	return ret;
2028 }
2029 
2030 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
2031 				void __user *buffer,
2032 				size_t *lenp, loff_t *ppos)
2033 {
2034 	int *valp = ctl->data;
2035 	int val = *valp;
2036 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2037 	struct net *net = ctl->extra2;
2038 
2039 	if (write && *valp != val)
2040 		rt_cache_flush(net);
2041 
2042 	return ret;
2043 }
2044 
2045 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2046 	{ \
2047 		.procname	= name, \
2048 		.data		= ipv4_devconf.data + \
2049 				  IPV4_DEVCONF_ ## attr - 1, \
2050 		.maxlen		= sizeof(int), \
2051 		.mode		= mval, \
2052 		.proc_handler	= proc, \
2053 		.extra1		= &ipv4_devconf, \
2054 	}
2055 
2056 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2057 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2058 
2059 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2060 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2061 
2062 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2063 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2064 
2065 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2066 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2067 
2068 static struct devinet_sysctl_table {
2069 	struct ctl_table_header *sysctl_header;
2070 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2071 } devinet_sysctl = {
2072 	.devinet_vars = {
2073 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2074 					     devinet_sysctl_forward),
2075 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2076 
2077 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2078 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2079 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2080 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2081 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2082 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2083 					"accept_source_route"),
2084 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2085 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2086 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2087 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2088 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2089 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2090 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2091 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2092 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2093 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2094 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2095 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2096 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2097 
2098 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2099 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2100 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2101 					      "force_igmp_version"),
2102 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2103 					      "promote_secondaries"),
2104 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2105 					      "route_localnet"),
2106 	},
2107 };
2108 
2109 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2110 					struct ipv4_devconf *p)
2111 {
2112 	int i;
2113 	struct devinet_sysctl_table *t;
2114 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2115 
2116 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2117 	if (!t)
2118 		goto out;
2119 
2120 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2121 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2122 		t->devinet_vars[i].extra1 = p;
2123 		t->devinet_vars[i].extra2 = net;
2124 	}
2125 
2126 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2127 
2128 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2129 	if (!t->sysctl_header)
2130 		goto free;
2131 
2132 	p->sysctl = t;
2133 	return 0;
2134 
2135 free:
2136 	kfree(t);
2137 out:
2138 	return -ENOBUFS;
2139 }
2140 
2141 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2142 {
2143 	struct devinet_sysctl_table *t = cnf->sysctl;
2144 
2145 	if (t == NULL)
2146 		return;
2147 
2148 	cnf->sysctl = NULL;
2149 	unregister_net_sysctl_table(t->sysctl_header);
2150 	kfree(t);
2151 }
2152 
2153 static void devinet_sysctl_register(struct in_device *idev)
2154 {
2155 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2156 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2157 					&idev->cnf);
2158 }
2159 
2160 static void devinet_sysctl_unregister(struct in_device *idev)
2161 {
2162 	__devinet_sysctl_unregister(&idev->cnf);
2163 	neigh_sysctl_unregister(idev->arp_parms);
2164 }
2165 
2166 static struct ctl_table ctl_forward_entry[] = {
2167 	{
2168 		.procname	= "ip_forward",
2169 		.data		= &ipv4_devconf.data[
2170 					IPV4_DEVCONF_FORWARDING - 1],
2171 		.maxlen		= sizeof(int),
2172 		.mode		= 0644,
2173 		.proc_handler	= devinet_sysctl_forward,
2174 		.extra1		= &ipv4_devconf,
2175 		.extra2		= &init_net,
2176 	},
2177 	{ },
2178 };
2179 #endif
2180 
2181 static __net_init int devinet_init_net(struct net *net)
2182 {
2183 	int err;
2184 	struct ipv4_devconf *all, *dflt;
2185 #ifdef CONFIG_SYSCTL
2186 	struct ctl_table *tbl = ctl_forward_entry;
2187 	struct ctl_table_header *forw_hdr;
2188 #endif
2189 
2190 	err = -ENOMEM;
2191 	all = &ipv4_devconf;
2192 	dflt = &ipv4_devconf_dflt;
2193 
2194 	if (!net_eq(net, &init_net)) {
2195 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2196 		if (all == NULL)
2197 			goto err_alloc_all;
2198 
2199 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2200 		if (dflt == NULL)
2201 			goto err_alloc_dflt;
2202 
2203 #ifdef CONFIG_SYSCTL
2204 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2205 		if (tbl == NULL)
2206 			goto err_alloc_ctl;
2207 
2208 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2209 		tbl[0].extra1 = all;
2210 		tbl[0].extra2 = net;
2211 #endif
2212 	}
2213 
2214 #ifdef CONFIG_SYSCTL
2215 	err = __devinet_sysctl_register(net, "all", all);
2216 	if (err < 0)
2217 		goto err_reg_all;
2218 
2219 	err = __devinet_sysctl_register(net, "default", dflt);
2220 	if (err < 0)
2221 		goto err_reg_dflt;
2222 
2223 	err = -ENOMEM;
2224 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2225 	if (forw_hdr == NULL)
2226 		goto err_reg_ctl;
2227 	net->ipv4.forw_hdr = forw_hdr;
2228 #endif
2229 
2230 	net->ipv4.devconf_all = all;
2231 	net->ipv4.devconf_dflt = dflt;
2232 	return 0;
2233 
2234 #ifdef CONFIG_SYSCTL
2235 err_reg_ctl:
2236 	__devinet_sysctl_unregister(dflt);
2237 err_reg_dflt:
2238 	__devinet_sysctl_unregister(all);
2239 err_reg_all:
2240 	if (tbl != ctl_forward_entry)
2241 		kfree(tbl);
2242 err_alloc_ctl:
2243 #endif
2244 	if (dflt != &ipv4_devconf_dflt)
2245 		kfree(dflt);
2246 err_alloc_dflt:
2247 	if (all != &ipv4_devconf)
2248 		kfree(all);
2249 err_alloc_all:
2250 	return err;
2251 }
2252 
2253 static __net_exit void devinet_exit_net(struct net *net)
2254 {
2255 #ifdef CONFIG_SYSCTL
2256 	struct ctl_table *tbl;
2257 
2258 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2259 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2260 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2261 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2262 	kfree(tbl);
2263 #endif
2264 	kfree(net->ipv4.devconf_dflt);
2265 	kfree(net->ipv4.devconf_all);
2266 }
2267 
2268 static __net_initdata struct pernet_operations devinet_ops = {
2269 	.init = devinet_init_net,
2270 	.exit = devinet_exit_net,
2271 };
2272 
2273 static struct rtnl_af_ops inet_af_ops = {
2274 	.family		  = AF_INET,
2275 	.fill_link_af	  = inet_fill_link_af,
2276 	.get_link_af_size = inet_get_link_af_size,
2277 	.validate_link_af = inet_validate_link_af,
2278 	.set_link_af	  = inet_set_link_af,
2279 };
2280 
2281 void __init devinet_init(void)
2282 {
2283 	int i;
2284 
2285 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2286 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2287 
2288 	register_pernet_subsys(&devinet_ops);
2289 
2290 	register_gifconf(PF_INET, inet_gifconf);
2291 	register_netdevice_notifier(&ip_netdev_notifier);
2292 
2293 	schedule_delayed_work(&check_lifetime_work, 0);
2294 
2295 	rtnl_af_register(&inet_af_ops);
2296 
2297 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2298 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2299 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2300 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2301 		      inet_netconf_dump_devconf, NULL);
2302 }
2303 
2304