xref: /openbmc/linux/net/ipv4/devinet.c (revision 8c0b9ee8)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 	},
79 };
80 
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82 	.data = {
83 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90 	},
91 };
92 
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95 
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 	[IFA_LOCAL]     	= { .type = NLA_U32 },
98 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
99 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
100 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
102 	[IFA_FLAGS]		= { .type = NLA_U32 },
103 };
104 
105 #define IN4_ADDR_HSIZE_SHIFT	8
106 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
107 
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109 
110 static u32 inet_addr_hash(struct net *net, __be32 addr)
111 {
112 	u32 val = (__force u32) addr ^ net_hash_mix(net);
113 
114 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116 
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
120 
121 	ASSERT_RTNL();
122 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124 
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127 	ASSERT_RTNL();
128 	hlist_del_init_rcu(&ifa->hash);
129 }
130 
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141 	u32 hash = inet_addr_hash(net, addr);
142 	struct net_device *result = NULL;
143 	struct in_ifaddr *ifa;
144 
145 	rcu_read_lock();
146 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147 		if (ifa->ifa_local == addr) {
148 			struct net_device *dev = ifa->ifa_dev->dev;
149 
150 			if (!net_eq(dev_net(dev), net))
151 				continue;
152 			result = dev;
153 			break;
154 		}
155 	}
156 	if (!result) {
157 		struct flowi4 fl4 = { .daddr = addr };
158 		struct fib_result res = { 0 };
159 		struct fib_table *local;
160 
161 		/* Fallback to FIB local table so that communication
162 		 * over loopback subnets work.
163 		 */
164 		local = fib_get_table(net, RT_TABLE_LOCAL);
165 		if (local &&
166 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 		    res.type == RTN_LOCAL)
168 			result = FIB_RES_DEV(res);
169 	}
170 	if (result && devref)
171 		dev_hold(result);
172 	rcu_read_unlock();
173 	return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176 
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178 
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181 			 int destroy);
182 #ifdef CONFIG_SYSCTL
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static int devinet_sysctl_register(struct in_device *idev)
187 {
188 	return 0;
189 }
190 static void devinet_sysctl_unregister(struct in_device *idev)
191 {
192 }
193 #endif
194 
195 /* Locks all the inet devices. */
196 
197 static struct in_ifaddr *inet_alloc_ifa(void)
198 {
199 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 }
201 
202 static void inet_rcu_free_ifa(struct rcu_head *head)
203 {
204 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205 	if (ifa->ifa_dev)
206 		in_dev_put(ifa->ifa_dev);
207 	kfree(ifa);
208 }
209 
210 static void inet_free_ifa(struct in_ifaddr *ifa)
211 {
212 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 }
214 
215 void in_dev_finish_destroy(struct in_device *idev)
216 {
217 	struct net_device *dev = idev->dev;
218 
219 	WARN_ON(idev->ifa_list);
220 	WARN_ON(idev->mc_list);
221 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 #endif
225 	dev_put(dev);
226 	if (!idev->dead)
227 		pr_err("Freeing alive in_device %p\n", idev);
228 	else
229 		kfree(idev);
230 }
231 EXPORT_SYMBOL(in_dev_finish_destroy);
232 
233 static struct in_device *inetdev_init(struct net_device *dev)
234 {
235 	struct in_device *in_dev;
236 	int err = -ENOMEM;
237 
238 	ASSERT_RTNL();
239 
240 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241 	if (!in_dev)
242 		goto out;
243 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244 			sizeof(in_dev->cnf));
245 	in_dev->cnf.sysctl = NULL;
246 	in_dev->dev = dev;
247 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248 	if (!in_dev->arp_parms)
249 		goto out_kfree;
250 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251 		dev_disable_lro(dev);
252 	/* Reference in_dev->dev */
253 	dev_hold(dev);
254 	/* Account for reference dev->ip_ptr (below) */
255 	in_dev_hold(in_dev);
256 
257 	err = devinet_sysctl_register(in_dev);
258 	if (err) {
259 		in_dev->dead = 1;
260 		in_dev_put(in_dev);
261 		in_dev = NULL;
262 		goto out;
263 	}
264 	ip_mc_init_dev(in_dev);
265 	if (dev->flags & IFF_UP)
266 		ip_mc_up(in_dev);
267 
268 	/* we can receive as soon as ip_ptr is set -- do this last */
269 	rcu_assign_pointer(dev->ip_ptr, in_dev);
270 out:
271 	return in_dev ?: ERR_PTR(err);
272 out_kfree:
273 	kfree(in_dev);
274 	in_dev = NULL;
275 	goto out;
276 }
277 
278 static void in_dev_rcu_put(struct rcu_head *head)
279 {
280 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
281 	in_dev_put(idev);
282 }
283 
284 static void inetdev_destroy(struct in_device *in_dev)
285 {
286 	struct in_ifaddr *ifa;
287 	struct net_device *dev;
288 
289 	ASSERT_RTNL();
290 
291 	dev = in_dev->dev;
292 
293 	in_dev->dead = 1;
294 
295 	ip_mc_destroy_dev(in_dev);
296 
297 	while ((ifa = in_dev->ifa_list) != NULL) {
298 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299 		inet_free_ifa(ifa);
300 	}
301 
302 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
303 
304 	devinet_sysctl_unregister(in_dev);
305 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306 	arp_ifdown(dev);
307 
308 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
309 }
310 
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312 {
313 	rcu_read_lock();
314 	for_primary_ifa(in_dev) {
315 		if (inet_ifa_match(a, ifa)) {
316 			if (!b || inet_ifa_match(b, ifa)) {
317 				rcu_read_unlock();
318 				return 1;
319 			}
320 		}
321 	} endfor_ifa(in_dev);
322 	rcu_read_unlock();
323 	return 0;
324 }
325 
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327 			 int destroy, struct nlmsghdr *nlh, u32 portid)
328 {
329 	struct in_ifaddr *promote = NULL;
330 	struct in_ifaddr *ifa, *ifa1 = *ifap;
331 	struct in_ifaddr *last_prim = in_dev->ifa_list;
332 	struct in_ifaddr *prev_prom = NULL;
333 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334 
335 	ASSERT_RTNL();
336 
337 	/* 1. Deleting primary ifaddr forces deletion all secondaries
338 	 * unless alias promotion is set
339 	 **/
340 
341 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
342 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
343 
344 		while ((ifa = *ifap1) != NULL) {
345 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
346 			    ifa1->ifa_scope <= ifa->ifa_scope)
347 				last_prim = ifa;
348 
349 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
350 			    ifa1->ifa_mask != ifa->ifa_mask ||
351 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
352 				ifap1 = &ifa->ifa_next;
353 				prev_prom = ifa;
354 				continue;
355 			}
356 
357 			if (!do_promote) {
358 				inet_hash_remove(ifa);
359 				*ifap1 = ifa->ifa_next;
360 
361 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
362 				blocking_notifier_call_chain(&inetaddr_chain,
363 						NETDEV_DOWN, ifa);
364 				inet_free_ifa(ifa);
365 			} else {
366 				promote = ifa;
367 				break;
368 			}
369 		}
370 	}
371 
372 	/* On promotion all secondaries from subnet are changing
373 	 * the primary IP, we must remove all their routes silently
374 	 * and later to add them back with new prefsrc. Do this
375 	 * while all addresses are on the device list.
376 	 */
377 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
378 		if (ifa1->ifa_mask == ifa->ifa_mask &&
379 		    inet_ifa_match(ifa1->ifa_address, ifa))
380 			fib_del_ifaddr(ifa, ifa1);
381 	}
382 
383 	/* 2. Unlink it */
384 
385 	*ifap = ifa1->ifa_next;
386 	inet_hash_remove(ifa1);
387 
388 	/* 3. Announce address deletion */
389 
390 	/* Send message first, then call notifier.
391 	   At first sight, FIB update triggered by notifier
392 	   will refer to already deleted ifaddr, that could confuse
393 	   netlink listeners. It is not true: look, gated sees
394 	   that route deleted and if it still thinks that ifaddr
395 	   is valid, it will try to restore deleted routes... Grr.
396 	   So that, this order is correct.
397 	 */
398 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
399 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
400 
401 	if (promote) {
402 		struct in_ifaddr *next_sec = promote->ifa_next;
403 
404 		if (prev_prom) {
405 			prev_prom->ifa_next = promote->ifa_next;
406 			promote->ifa_next = last_prim->ifa_next;
407 			last_prim->ifa_next = promote;
408 		}
409 
410 		promote->ifa_flags &= ~IFA_F_SECONDARY;
411 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
412 		blocking_notifier_call_chain(&inetaddr_chain,
413 				NETDEV_UP, promote);
414 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
415 			if (ifa1->ifa_mask != ifa->ifa_mask ||
416 			    !inet_ifa_match(ifa1->ifa_address, ifa))
417 					continue;
418 			fib_add_ifaddr(ifa);
419 		}
420 
421 	}
422 	if (destroy)
423 		inet_free_ifa(ifa1);
424 }
425 
426 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
427 			 int destroy)
428 {
429 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
430 }
431 
432 static void check_lifetime(struct work_struct *work);
433 
434 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
435 
436 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
437 			     u32 portid)
438 {
439 	struct in_device *in_dev = ifa->ifa_dev;
440 	struct in_ifaddr *ifa1, **ifap, **last_primary;
441 
442 	ASSERT_RTNL();
443 
444 	if (!ifa->ifa_local) {
445 		inet_free_ifa(ifa);
446 		return 0;
447 	}
448 
449 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
450 	last_primary = &in_dev->ifa_list;
451 
452 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
453 	     ifap = &ifa1->ifa_next) {
454 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
455 		    ifa->ifa_scope <= ifa1->ifa_scope)
456 			last_primary = &ifa1->ifa_next;
457 		if (ifa1->ifa_mask == ifa->ifa_mask &&
458 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
459 			if (ifa1->ifa_local == ifa->ifa_local) {
460 				inet_free_ifa(ifa);
461 				return -EEXIST;
462 			}
463 			if (ifa1->ifa_scope != ifa->ifa_scope) {
464 				inet_free_ifa(ifa);
465 				return -EINVAL;
466 			}
467 			ifa->ifa_flags |= IFA_F_SECONDARY;
468 		}
469 	}
470 
471 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
472 		prandom_seed((__force u32) ifa->ifa_local);
473 		ifap = last_primary;
474 	}
475 
476 	ifa->ifa_next = *ifap;
477 	*ifap = ifa;
478 
479 	inet_hash_insert(dev_net(in_dev->dev), ifa);
480 
481 	cancel_delayed_work(&check_lifetime_work);
482 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
483 
484 	/* Send message first, then call notifier.
485 	   Notifier will trigger FIB update, so that
486 	   listeners of netlink will know about new ifaddr */
487 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
488 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
489 
490 	return 0;
491 }
492 
493 static int inet_insert_ifa(struct in_ifaddr *ifa)
494 {
495 	return __inet_insert_ifa(ifa, NULL, 0);
496 }
497 
498 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
499 {
500 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
501 
502 	ASSERT_RTNL();
503 
504 	if (!in_dev) {
505 		inet_free_ifa(ifa);
506 		return -ENOBUFS;
507 	}
508 	ipv4_devconf_setall(in_dev);
509 	neigh_parms_data_state_setall(in_dev->arp_parms);
510 	if (ifa->ifa_dev != in_dev) {
511 		WARN_ON(ifa->ifa_dev);
512 		in_dev_hold(in_dev);
513 		ifa->ifa_dev = in_dev;
514 	}
515 	if (ipv4_is_loopback(ifa->ifa_local))
516 		ifa->ifa_scope = RT_SCOPE_HOST;
517 	return inet_insert_ifa(ifa);
518 }
519 
520 /* Caller must hold RCU or RTNL :
521  * We dont take a reference on found in_device
522  */
523 struct in_device *inetdev_by_index(struct net *net, int ifindex)
524 {
525 	struct net_device *dev;
526 	struct in_device *in_dev = NULL;
527 
528 	rcu_read_lock();
529 	dev = dev_get_by_index_rcu(net, ifindex);
530 	if (dev)
531 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
532 	rcu_read_unlock();
533 	return in_dev;
534 }
535 EXPORT_SYMBOL(inetdev_by_index);
536 
537 /* Called only from RTNL semaphored context. No locks. */
538 
539 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
540 				    __be32 mask)
541 {
542 	ASSERT_RTNL();
543 
544 	for_primary_ifa(in_dev) {
545 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
546 			return ifa;
547 	} endfor_ifa(in_dev);
548 	return NULL;
549 }
550 
551 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
552 {
553 	struct net *net = sock_net(skb->sk);
554 	struct nlattr *tb[IFA_MAX+1];
555 	struct in_device *in_dev;
556 	struct ifaddrmsg *ifm;
557 	struct in_ifaddr *ifa, **ifap;
558 	int err = -EINVAL;
559 
560 	ASSERT_RTNL();
561 
562 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
563 	if (err < 0)
564 		goto errout;
565 
566 	ifm = nlmsg_data(nlh);
567 	in_dev = inetdev_by_index(net, ifm->ifa_index);
568 	if (in_dev == NULL) {
569 		err = -ENODEV;
570 		goto errout;
571 	}
572 
573 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
574 	     ifap = &ifa->ifa_next) {
575 		if (tb[IFA_LOCAL] &&
576 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
577 			continue;
578 
579 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
580 			continue;
581 
582 		if (tb[IFA_ADDRESS] &&
583 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
584 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
585 			continue;
586 
587 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
588 		return 0;
589 	}
590 
591 	err = -EADDRNOTAVAIL;
592 errout:
593 	return err;
594 }
595 
596 #define INFINITY_LIFE_TIME	0xFFFFFFFF
597 
598 static void check_lifetime(struct work_struct *work)
599 {
600 	unsigned long now, next, next_sec, next_sched;
601 	struct in_ifaddr *ifa;
602 	struct hlist_node *n;
603 	int i;
604 
605 	now = jiffies;
606 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
607 
608 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
609 		bool change_needed = false;
610 
611 		rcu_read_lock();
612 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
613 			unsigned long age;
614 
615 			if (ifa->ifa_flags & IFA_F_PERMANENT)
616 				continue;
617 
618 			/* We try to batch several events at once. */
619 			age = (now - ifa->ifa_tstamp +
620 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
621 
622 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
623 			    age >= ifa->ifa_valid_lft) {
624 				change_needed = true;
625 			} else if (ifa->ifa_preferred_lft ==
626 				   INFINITY_LIFE_TIME) {
627 				continue;
628 			} else if (age >= ifa->ifa_preferred_lft) {
629 				if (time_before(ifa->ifa_tstamp +
630 						ifa->ifa_valid_lft * HZ, next))
631 					next = ifa->ifa_tstamp +
632 					       ifa->ifa_valid_lft * HZ;
633 
634 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
635 					change_needed = true;
636 			} else if (time_before(ifa->ifa_tstamp +
637 					       ifa->ifa_preferred_lft * HZ,
638 					       next)) {
639 				next = ifa->ifa_tstamp +
640 				       ifa->ifa_preferred_lft * HZ;
641 			}
642 		}
643 		rcu_read_unlock();
644 		if (!change_needed)
645 			continue;
646 		rtnl_lock();
647 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
648 			unsigned long age;
649 
650 			if (ifa->ifa_flags & IFA_F_PERMANENT)
651 				continue;
652 
653 			/* We try to batch several events at once. */
654 			age = (now - ifa->ifa_tstamp +
655 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
656 
657 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
658 			    age >= ifa->ifa_valid_lft) {
659 				struct in_ifaddr **ifap;
660 
661 				for (ifap = &ifa->ifa_dev->ifa_list;
662 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
663 					if (*ifap == ifa) {
664 						inet_del_ifa(ifa->ifa_dev,
665 							     ifap, 1);
666 						break;
667 					}
668 				}
669 			} else if (ifa->ifa_preferred_lft !=
670 				   INFINITY_LIFE_TIME &&
671 				   age >= ifa->ifa_preferred_lft &&
672 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
673 				ifa->ifa_flags |= IFA_F_DEPRECATED;
674 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
675 			}
676 		}
677 		rtnl_unlock();
678 	}
679 
680 	next_sec = round_jiffies_up(next);
681 	next_sched = next;
682 
683 	/* If rounded timeout is accurate enough, accept it. */
684 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
685 		next_sched = next_sec;
686 
687 	now = jiffies;
688 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
689 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
690 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
691 
692 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
693 			next_sched - now);
694 }
695 
696 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
697 			     __u32 prefered_lft)
698 {
699 	unsigned long timeout;
700 
701 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
702 
703 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
704 	if (addrconf_finite_timeout(timeout))
705 		ifa->ifa_valid_lft = timeout;
706 	else
707 		ifa->ifa_flags |= IFA_F_PERMANENT;
708 
709 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
710 	if (addrconf_finite_timeout(timeout)) {
711 		if (timeout == 0)
712 			ifa->ifa_flags |= IFA_F_DEPRECATED;
713 		ifa->ifa_preferred_lft = timeout;
714 	}
715 	ifa->ifa_tstamp = jiffies;
716 	if (!ifa->ifa_cstamp)
717 		ifa->ifa_cstamp = ifa->ifa_tstamp;
718 }
719 
720 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
721 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
722 {
723 	struct nlattr *tb[IFA_MAX+1];
724 	struct in_ifaddr *ifa;
725 	struct ifaddrmsg *ifm;
726 	struct net_device *dev;
727 	struct in_device *in_dev;
728 	int err;
729 
730 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
731 	if (err < 0)
732 		goto errout;
733 
734 	ifm = nlmsg_data(nlh);
735 	err = -EINVAL;
736 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
737 		goto errout;
738 
739 	dev = __dev_get_by_index(net, ifm->ifa_index);
740 	err = -ENODEV;
741 	if (dev == NULL)
742 		goto errout;
743 
744 	in_dev = __in_dev_get_rtnl(dev);
745 	err = -ENOBUFS;
746 	if (in_dev == NULL)
747 		goto errout;
748 
749 	ifa = inet_alloc_ifa();
750 	if (ifa == NULL)
751 		/*
752 		 * A potential indev allocation can be left alive, it stays
753 		 * assigned to its device and is destroy with it.
754 		 */
755 		goto errout;
756 
757 	ipv4_devconf_setall(in_dev);
758 	neigh_parms_data_state_setall(in_dev->arp_parms);
759 	in_dev_hold(in_dev);
760 
761 	if (tb[IFA_ADDRESS] == NULL)
762 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
763 
764 	INIT_HLIST_NODE(&ifa->hash);
765 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
766 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
767 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
768 					 ifm->ifa_flags;
769 	ifa->ifa_scope = ifm->ifa_scope;
770 	ifa->ifa_dev = in_dev;
771 
772 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
773 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
774 
775 	if (tb[IFA_BROADCAST])
776 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
777 
778 	if (tb[IFA_LABEL])
779 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
780 	else
781 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
782 
783 	if (tb[IFA_CACHEINFO]) {
784 		struct ifa_cacheinfo *ci;
785 
786 		ci = nla_data(tb[IFA_CACHEINFO]);
787 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
788 			err = -EINVAL;
789 			goto errout_free;
790 		}
791 		*pvalid_lft = ci->ifa_valid;
792 		*pprefered_lft = ci->ifa_prefered;
793 	}
794 
795 	return ifa;
796 
797 errout_free:
798 	inet_free_ifa(ifa);
799 errout:
800 	return ERR_PTR(err);
801 }
802 
803 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
804 {
805 	struct in_device *in_dev = ifa->ifa_dev;
806 	struct in_ifaddr *ifa1, **ifap;
807 
808 	if (!ifa->ifa_local)
809 		return NULL;
810 
811 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
812 	     ifap = &ifa1->ifa_next) {
813 		if (ifa1->ifa_mask == ifa->ifa_mask &&
814 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
815 		    ifa1->ifa_local == ifa->ifa_local)
816 			return ifa1;
817 	}
818 	return NULL;
819 }
820 
821 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
822 {
823 	struct net *net = sock_net(skb->sk);
824 	struct in_ifaddr *ifa;
825 	struct in_ifaddr *ifa_existing;
826 	__u32 valid_lft = INFINITY_LIFE_TIME;
827 	__u32 prefered_lft = INFINITY_LIFE_TIME;
828 
829 	ASSERT_RTNL();
830 
831 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
832 	if (IS_ERR(ifa))
833 		return PTR_ERR(ifa);
834 
835 	ifa_existing = find_matching_ifa(ifa);
836 	if (!ifa_existing) {
837 		/* It would be best to check for !NLM_F_CREATE here but
838 		 * userspace already relies on not having to provide this.
839 		 */
840 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
841 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
842 	} else {
843 		inet_free_ifa(ifa);
844 
845 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
846 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
847 			return -EEXIST;
848 		ifa = ifa_existing;
849 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
850 		cancel_delayed_work(&check_lifetime_work);
851 		queue_delayed_work(system_power_efficient_wq,
852 				&check_lifetime_work, 0);
853 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
854 		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
855 	}
856 	return 0;
857 }
858 
859 /*
860  *	Determine a default network mask, based on the IP address.
861  */
862 
863 static int inet_abc_len(__be32 addr)
864 {
865 	int rc = -1;	/* Something else, probably a multicast. */
866 
867 	if (ipv4_is_zeronet(addr))
868 		rc = 0;
869 	else {
870 		__u32 haddr = ntohl(addr);
871 
872 		if (IN_CLASSA(haddr))
873 			rc = 8;
874 		else if (IN_CLASSB(haddr))
875 			rc = 16;
876 		else if (IN_CLASSC(haddr))
877 			rc = 24;
878 	}
879 
880 	return rc;
881 }
882 
883 
884 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
885 {
886 	struct ifreq ifr;
887 	struct sockaddr_in sin_orig;
888 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
889 	struct in_device *in_dev;
890 	struct in_ifaddr **ifap = NULL;
891 	struct in_ifaddr *ifa = NULL;
892 	struct net_device *dev;
893 	char *colon;
894 	int ret = -EFAULT;
895 	int tryaddrmatch = 0;
896 
897 	/*
898 	 *	Fetch the caller's info block into kernel space
899 	 */
900 
901 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
902 		goto out;
903 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
904 
905 	/* save original address for comparison */
906 	memcpy(&sin_orig, sin, sizeof(*sin));
907 
908 	colon = strchr(ifr.ifr_name, ':');
909 	if (colon)
910 		*colon = 0;
911 
912 	dev_load(net, ifr.ifr_name);
913 
914 	switch (cmd) {
915 	case SIOCGIFADDR:	/* Get interface address */
916 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
917 	case SIOCGIFDSTADDR:	/* Get the destination address */
918 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
919 		/* Note that these ioctls will not sleep,
920 		   so that we do not impose a lock.
921 		   One day we will be forced to put shlock here (I mean SMP)
922 		 */
923 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
924 		memset(sin, 0, sizeof(*sin));
925 		sin->sin_family = AF_INET;
926 		break;
927 
928 	case SIOCSIFFLAGS:
929 		ret = -EPERM;
930 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
931 			goto out;
932 		break;
933 	case SIOCSIFADDR:	/* Set interface address (and family) */
934 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
935 	case SIOCSIFDSTADDR:	/* Set the destination address */
936 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
937 		ret = -EPERM;
938 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
939 			goto out;
940 		ret = -EINVAL;
941 		if (sin->sin_family != AF_INET)
942 			goto out;
943 		break;
944 	default:
945 		ret = -EINVAL;
946 		goto out;
947 	}
948 
949 	rtnl_lock();
950 
951 	ret = -ENODEV;
952 	dev = __dev_get_by_name(net, ifr.ifr_name);
953 	if (!dev)
954 		goto done;
955 
956 	if (colon)
957 		*colon = ':';
958 
959 	in_dev = __in_dev_get_rtnl(dev);
960 	if (in_dev) {
961 		if (tryaddrmatch) {
962 			/* Matthias Andree */
963 			/* compare label and address (4.4BSD style) */
964 			/* note: we only do this for a limited set of ioctls
965 			   and only if the original address family was AF_INET.
966 			   This is checked above. */
967 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
968 			     ifap = &ifa->ifa_next) {
969 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
970 				    sin_orig.sin_addr.s_addr ==
971 							ifa->ifa_local) {
972 					break; /* found */
973 				}
974 			}
975 		}
976 		/* we didn't get a match, maybe the application is
977 		   4.3BSD-style and passed in junk so we fall back to
978 		   comparing just the label */
979 		if (!ifa) {
980 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
981 			     ifap = &ifa->ifa_next)
982 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
983 					break;
984 		}
985 	}
986 
987 	ret = -EADDRNOTAVAIL;
988 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
989 		goto done;
990 
991 	switch (cmd) {
992 	case SIOCGIFADDR:	/* Get interface address */
993 		sin->sin_addr.s_addr = ifa->ifa_local;
994 		goto rarok;
995 
996 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
997 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
998 		goto rarok;
999 
1000 	case SIOCGIFDSTADDR:	/* Get the destination address */
1001 		sin->sin_addr.s_addr = ifa->ifa_address;
1002 		goto rarok;
1003 
1004 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1005 		sin->sin_addr.s_addr = ifa->ifa_mask;
1006 		goto rarok;
1007 
1008 	case SIOCSIFFLAGS:
1009 		if (colon) {
1010 			ret = -EADDRNOTAVAIL;
1011 			if (!ifa)
1012 				break;
1013 			ret = 0;
1014 			if (!(ifr.ifr_flags & IFF_UP))
1015 				inet_del_ifa(in_dev, ifap, 1);
1016 			break;
1017 		}
1018 		ret = dev_change_flags(dev, ifr.ifr_flags);
1019 		break;
1020 
1021 	case SIOCSIFADDR:	/* Set interface address (and family) */
1022 		ret = -EINVAL;
1023 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1024 			break;
1025 
1026 		if (!ifa) {
1027 			ret = -ENOBUFS;
1028 			ifa = inet_alloc_ifa();
1029 			if (!ifa)
1030 				break;
1031 			INIT_HLIST_NODE(&ifa->hash);
1032 			if (colon)
1033 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1034 			else
1035 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1036 		} else {
1037 			ret = 0;
1038 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1039 				break;
1040 			inet_del_ifa(in_dev, ifap, 0);
1041 			ifa->ifa_broadcast = 0;
1042 			ifa->ifa_scope = 0;
1043 		}
1044 
1045 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1046 
1047 		if (!(dev->flags & IFF_POINTOPOINT)) {
1048 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1049 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1050 			if ((dev->flags & IFF_BROADCAST) &&
1051 			    ifa->ifa_prefixlen < 31)
1052 				ifa->ifa_broadcast = ifa->ifa_address |
1053 						     ~ifa->ifa_mask;
1054 		} else {
1055 			ifa->ifa_prefixlen = 32;
1056 			ifa->ifa_mask = inet_make_mask(32);
1057 		}
1058 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1059 		ret = inet_set_ifa(dev, ifa);
1060 		break;
1061 
1062 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1063 		ret = 0;
1064 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1065 			inet_del_ifa(in_dev, ifap, 0);
1066 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1067 			inet_insert_ifa(ifa);
1068 		}
1069 		break;
1070 
1071 	case SIOCSIFDSTADDR:	/* Set the destination address */
1072 		ret = 0;
1073 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1074 			break;
1075 		ret = -EINVAL;
1076 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1077 			break;
1078 		ret = 0;
1079 		inet_del_ifa(in_dev, ifap, 0);
1080 		ifa->ifa_address = sin->sin_addr.s_addr;
1081 		inet_insert_ifa(ifa);
1082 		break;
1083 
1084 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1085 
1086 		/*
1087 		 *	The mask we set must be legal.
1088 		 */
1089 		ret = -EINVAL;
1090 		if (bad_mask(sin->sin_addr.s_addr, 0))
1091 			break;
1092 		ret = 0;
1093 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1094 			__be32 old_mask = ifa->ifa_mask;
1095 			inet_del_ifa(in_dev, ifap, 0);
1096 			ifa->ifa_mask = sin->sin_addr.s_addr;
1097 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1098 
1099 			/* See if current broadcast address matches
1100 			 * with current netmask, then recalculate
1101 			 * the broadcast address. Otherwise it's a
1102 			 * funny address, so don't touch it since
1103 			 * the user seems to know what (s)he's doing...
1104 			 */
1105 			if ((dev->flags & IFF_BROADCAST) &&
1106 			    (ifa->ifa_prefixlen < 31) &&
1107 			    (ifa->ifa_broadcast ==
1108 			     (ifa->ifa_local|~old_mask))) {
1109 				ifa->ifa_broadcast = (ifa->ifa_local |
1110 						      ~sin->sin_addr.s_addr);
1111 			}
1112 			inet_insert_ifa(ifa);
1113 		}
1114 		break;
1115 	}
1116 done:
1117 	rtnl_unlock();
1118 out:
1119 	return ret;
1120 rarok:
1121 	rtnl_unlock();
1122 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1123 	goto out;
1124 }
1125 
1126 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1127 {
1128 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1129 	struct in_ifaddr *ifa;
1130 	struct ifreq ifr;
1131 	int done = 0;
1132 
1133 	if (!in_dev)
1134 		goto out;
1135 
1136 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1137 		if (!buf) {
1138 			done += sizeof(ifr);
1139 			continue;
1140 		}
1141 		if (len < (int) sizeof(ifr))
1142 			break;
1143 		memset(&ifr, 0, sizeof(struct ifreq));
1144 		strcpy(ifr.ifr_name, ifa->ifa_label);
1145 
1146 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1147 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1148 								ifa->ifa_local;
1149 
1150 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1151 			done = -EFAULT;
1152 			break;
1153 		}
1154 		buf  += sizeof(struct ifreq);
1155 		len  -= sizeof(struct ifreq);
1156 		done += sizeof(struct ifreq);
1157 	}
1158 out:
1159 	return done;
1160 }
1161 
1162 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1163 {
1164 	__be32 addr = 0;
1165 	struct in_device *in_dev;
1166 	struct net *net = dev_net(dev);
1167 
1168 	rcu_read_lock();
1169 	in_dev = __in_dev_get_rcu(dev);
1170 	if (!in_dev)
1171 		goto no_in_dev;
1172 
1173 	for_primary_ifa(in_dev) {
1174 		if (ifa->ifa_scope > scope)
1175 			continue;
1176 		if (!dst || inet_ifa_match(dst, ifa)) {
1177 			addr = ifa->ifa_local;
1178 			break;
1179 		}
1180 		if (!addr)
1181 			addr = ifa->ifa_local;
1182 	} endfor_ifa(in_dev);
1183 
1184 	if (addr)
1185 		goto out_unlock;
1186 no_in_dev:
1187 
1188 	/* Not loopback addresses on loopback should be preferred
1189 	   in this case. It is important that lo is the first interface
1190 	   in dev_base list.
1191 	 */
1192 	for_each_netdev_rcu(net, dev) {
1193 		in_dev = __in_dev_get_rcu(dev);
1194 		if (!in_dev)
1195 			continue;
1196 
1197 		for_primary_ifa(in_dev) {
1198 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1199 			    ifa->ifa_scope <= scope) {
1200 				addr = ifa->ifa_local;
1201 				goto out_unlock;
1202 			}
1203 		} endfor_ifa(in_dev);
1204 	}
1205 out_unlock:
1206 	rcu_read_unlock();
1207 	return addr;
1208 }
1209 EXPORT_SYMBOL(inet_select_addr);
1210 
1211 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1212 			      __be32 local, int scope)
1213 {
1214 	int same = 0;
1215 	__be32 addr = 0;
1216 
1217 	for_ifa(in_dev) {
1218 		if (!addr &&
1219 		    (local == ifa->ifa_local || !local) &&
1220 		    ifa->ifa_scope <= scope) {
1221 			addr = ifa->ifa_local;
1222 			if (same)
1223 				break;
1224 		}
1225 		if (!same) {
1226 			same = (!local || inet_ifa_match(local, ifa)) &&
1227 				(!dst || inet_ifa_match(dst, ifa));
1228 			if (same && addr) {
1229 				if (local || !dst)
1230 					break;
1231 				/* Is the selected addr into dst subnet? */
1232 				if (inet_ifa_match(addr, ifa))
1233 					break;
1234 				/* No, then can we use new local src? */
1235 				if (ifa->ifa_scope <= scope) {
1236 					addr = ifa->ifa_local;
1237 					break;
1238 				}
1239 				/* search for large dst subnet for addr */
1240 				same = 0;
1241 			}
1242 		}
1243 	} endfor_ifa(in_dev);
1244 
1245 	return same ? addr : 0;
1246 }
1247 
1248 /*
1249  * Confirm that local IP address exists using wildcards:
1250  * - net: netns to check, cannot be NULL
1251  * - in_dev: only on this interface, NULL=any interface
1252  * - dst: only in the same subnet as dst, 0=any dst
1253  * - local: address, 0=autoselect the local address
1254  * - scope: maximum allowed scope value for the local address
1255  */
1256 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1257 			 __be32 dst, __be32 local, int scope)
1258 {
1259 	__be32 addr = 0;
1260 	struct net_device *dev;
1261 
1262 	if (in_dev != NULL)
1263 		return confirm_addr_indev(in_dev, dst, local, scope);
1264 
1265 	rcu_read_lock();
1266 	for_each_netdev_rcu(net, dev) {
1267 		in_dev = __in_dev_get_rcu(dev);
1268 		if (in_dev) {
1269 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1270 			if (addr)
1271 				break;
1272 		}
1273 	}
1274 	rcu_read_unlock();
1275 
1276 	return addr;
1277 }
1278 EXPORT_SYMBOL(inet_confirm_addr);
1279 
1280 /*
1281  *	Device notifier
1282  */
1283 
1284 int register_inetaddr_notifier(struct notifier_block *nb)
1285 {
1286 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1287 }
1288 EXPORT_SYMBOL(register_inetaddr_notifier);
1289 
1290 int unregister_inetaddr_notifier(struct notifier_block *nb)
1291 {
1292 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1293 }
1294 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1295 
1296 /* Rename ifa_labels for a device name change. Make some effort to preserve
1297  * existing alias numbering and to create unique labels if possible.
1298 */
1299 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1300 {
1301 	struct in_ifaddr *ifa;
1302 	int named = 0;
1303 
1304 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1305 		char old[IFNAMSIZ], *dot;
1306 
1307 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1308 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1309 		if (named++ == 0)
1310 			goto skip;
1311 		dot = strchr(old, ':');
1312 		if (dot == NULL) {
1313 			sprintf(old, ":%d", named);
1314 			dot = old;
1315 		}
1316 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1317 			strcat(ifa->ifa_label, dot);
1318 		else
1319 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1320 skip:
1321 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1322 	}
1323 }
1324 
1325 static bool inetdev_valid_mtu(unsigned int mtu)
1326 {
1327 	return mtu >= 68;
1328 }
1329 
1330 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1331 					struct in_device *in_dev)
1332 
1333 {
1334 	struct in_ifaddr *ifa;
1335 
1336 	for (ifa = in_dev->ifa_list; ifa;
1337 	     ifa = ifa->ifa_next) {
1338 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1339 			 ifa->ifa_local, dev,
1340 			 ifa->ifa_local, NULL,
1341 			 dev->dev_addr, NULL);
1342 	}
1343 }
1344 
1345 /* Called only under RTNL semaphore */
1346 
1347 static int inetdev_event(struct notifier_block *this, unsigned long event,
1348 			 void *ptr)
1349 {
1350 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1351 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1352 
1353 	ASSERT_RTNL();
1354 
1355 	if (!in_dev) {
1356 		if (event == NETDEV_REGISTER) {
1357 			in_dev = inetdev_init(dev);
1358 			if (IS_ERR(in_dev))
1359 				return notifier_from_errno(PTR_ERR(in_dev));
1360 			if (dev->flags & IFF_LOOPBACK) {
1361 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1362 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1363 			}
1364 		} else if (event == NETDEV_CHANGEMTU) {
1365 			/* Re-enabling IP */
1366 			if (inetdev_valid_mtu(dev->mtu))
1367 				in_dev = inetdev_init(dev);
1368 		}
1369 		goto out;
1370 	}
1371 
1372 	switch (event) {
1373 	case NETDEV_REGISTER:
1374 		pr_debug("%s: bug\n", __func__);
1375 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1376 		break;
1377 	case NETDEV_UP:
1378 		if (!inetdev_valid_mtu(dev->mtu))
1379 			break;
1380 		if (dev->flags & IFF_LOOPBACK) {
1381 			struct in_ifaddr *ifa = inet_alloc_ifa();
1382 
1383 			if (ifa) {
1384 				INIT_HLIST_NODE(&ifa->hash);
1385 				ifa->ifa_local =
1386 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1387 				ifa->ifa_prefixlen = 8;
1388 				ifa->ifa_mask = inet_make_mask(8);
1389 				in_dev_hold(in_dev);
1390 				ifa->ifa_dev = in_dev;
1391 				ifa->ifa_scope = RT_SCOPE_HOST;
1392 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1393 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1394 						 INFINITY_LIFE_TIME);
1395 				ipv4_devconf_setall(in_dev);
1396 				neigh_parms_data_state_setall(in_dev->arp_parms);
1397 				inet_insert_ifa(ifa);
1398 			}
1399 		}
1400 		ip_mc_up(in_dev);
1401 		/* fall through */
1402 	case NETDEV_CHANGEADDR:
1403 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1404 			break;
1405 		/* fall through */
1406 	case NETDEV_NOTIFY_PEERS:
1407 		/* Send gratuitous ARP to notify of link change */
1408 		inetdev_send_gratuitous_arp(dev, in_dev);
1409 		break;
1410 	case NETDEV_DOWN:
1411 		ip_mc_down(in_dev);
1412 		break;
1413 	case NETDEV_PRE_TYPE_CHANGE:
1414 		ip_mc_unmap(in_dev);
1415 		break;
1416 	case NETDEV_POST_TYPE_CHANGE:
1417 		ip_mc_remap(in_dev);
1418 		break;
1419 	case NETDEV_CHANGEMTU:
1420 		if (inetdev_valid_mtu(dev->mtu))
1421 			break;
1422 		/* disable IP when MTU is not enough */
1423 	case NETDEV_UNREGISTER:
1424 		inetdev_destroy(in_dev);
1425 		break;
1426 	case NETDEV_CHANGENAME:
1427 		/* Do not notify about label change, this event is
1428 		 * not interesting to applications using netlink.
1429 		 */
1430 		inetdev_changename(dev, in_dev);
1431 
1432 		devinet_sysctl_unregister(in_dev);
1433 		devinet_sysctl_register(in_dev);
1434 		break;
1435 	}
1436 out:
1437 	return NOTIFY_DONE;
1438 }
1439 
1440 static struct notifier_block ip_netdev_notifier = {
1441 	.notifier_call = inetdev_event,
1442 };
1443 
1444 static size_t inet_nlmsg_size(void)
1445 {
1446 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1447 	       + nla_total_size(4) /* IFA_ADDRESS */
1448 	       + nla_total_size(4) /* IFA_LOCAL */
1449 	       + nla_total_size(4) /* IFA_BROADCAST */
1450 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1451 	       + nla_total_size(4)  /* IFA_FLAGS */
1452 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1453 }
1454 
1455 static inline u32 cstamp_delta(unsigned long cstamp)
1456 {
1457 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1458 }
1459 
1460 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1461 			 unsigned long tstamp, u32 preferred, u32 valid)
1462 {
1463 	struct ifa_cacheinfo ci;
1464 
1465 	ci.cstamp = cstamp_delta(cstamp);
1466 	ci.tstamp = cstamp_delta(tstamp);
1467 	ci.ifa_prefered = preferred;
1468 	ci.ifa_valid = valid;
1469 
1470 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1471 }
1472 
1473 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1474 			    u32 portid, u32 seq, int event, unsigned int flags)
1475 {
1476 	struct ifaddrmsg *ifm;
1477 	struct nlmsghdr  *nlh;
1478 	u32 preferred, valid;
1479 
1480 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1481 	if (nlh == NULL)
1482 		return -EMSGSIZE;
1483 
1484 	ifm = nlmsg_data(nlh);
1485 	ifm->ifa_family = AF_INET;
1486 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1487 	ifm->ifa_flags = ifa->ifa_flags;
1488 	ifm->ifa_scope = ifa->ifa_scope;
1489 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1490 
1491 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1492 		preferred = ifa->ifa_preferred_lft;
1493 		valid = ifa->ifa_valid_lft;
1494 		if (preferred != INFINITY_LIFE_TIME) {
1495 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1496 
1497 			if (preferred > tval)
1498 				preferred -= tval;
1499 			else
1500 				preferred = 0;
1501 			if (valid != INFINITY_LIFE_TIME) {
1502 				if (valid > tval)
1503 					valid -= tval;
1504 				else
1505 					valid = 0;
1506 			}
1507 		}
1508 	} else {
1509 		preferred = INFINITY_LIFE_TIME;
1510 		valid = INFINITY_LIFE_TIME;
1511 	}
1512 	if ((ifa->ifa_address &&
1513 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1514 	    (ifa->ifa_local &&
1515 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1516 	    (ifa->ifa_broadcast &&
1517 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1518 	    (ifa->ifa_label[0] &&
1519 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1520 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1521 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1522 			  preferred, valid))
1523 		goto nla_put_failure;
1524 
1525 	nlmsg_end(skb, nlh);
1526 	return 0;
1527 
1528 nla_put_failure:
1529 	nlmsg_cancel(skb, nlh);
1530 	return -EMSGSIZE;
1531 }
1532 
1533 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1534 {
1535 	struct net *net = sock_net(skb->sk);
1536 	int h, s_h;
1537 	int idx, s_idx;
1538 	int ip_idx, s_ip_idx;
1539 	struct net_device *dev;
1540 	struct in_device *in_dev;
1541 	struct in_ifaddr *ifa;
1542 	struct hlist_head *head;
1543 
1544 	s_h = cb->args[0];
1545 	s_idx = idx = cb->args[1];
1546 	s_ip_idx = ip_idx = cb->args[2];
1547 
1548 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1549 		idx = 0;
1550 		head = &net->dev_index_head[h];
1551 		rcu_read_lock();
1552 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1553 			  net->dev_base_seq;
1554 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1555 			if (idx < s_idx)
1556 				goto cont;
1557 			if (h > s_h || idx > s_idx)
1558 				s_ip_idx = 0;
1559 			in_dev = __in_dev_get_rcu(dev);
1560 			if (!in_dev)
1561 				goto cont;
1562 
1563 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1564 			     ifa = ifa->ifa_next, ip_idx++) {
1565 				if (ip_idx < s_ip_idx)
1566 					continue;
1567 				if (inet_fill_ifaddr(skb, ifa,
1568 					     NETLINK_CB(cb->skb).portid,
1569 					     cb->nlh->nlmsg_seq,
1570 					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
1571 					rcu_read_unlock();
1572 					goto done;
1573 				}
1574 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1575 			}
1576 cont:
1577 			idx++;
1578 		}
1579 		rcu_read_unlock();
1580 	}
1581 
1582 done:
1583 	cb->args[0] = h;
1584 	cb->args[1] = idx;
1585 	cb->args[2] = ip_idx;
1586 
1587 	return skb->len;
1588 }
1589 
1590 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1591 		      u32 portid)
1592 {
1593 	struct sk_buff *skb;
1594 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1595 	int err = -ENOBUFS;
1596 	struct net *net;
1597 
1598 	net = dev_net(ifa->ifa_dev->dev);
1599 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1600 	if (skb == NULL)
1601 		goto errout;
1602 
1603 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1604 	if (err < 0) {
1605 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1606 		WARN_ON(err == -EMSGSIZE);
1607 		kfree_skb(skb);
1608 		goto errout;
1609 	}
1610 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1611 	return;
1612 errout:
1613 	if (err < 0)
1614 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1615 }
1616 
1617 static size_t inet_get_link_af_size(const struct net_device *dev)
1618 {
1619 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1620 
1621 	if (!in_dev)
1622 		return 0;
1623 
1624 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1625 }
1626 
1627 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1628 {
1629 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1630 	struct nlattr *nla;
1631 	int i;
1632 
1633 	if (!in_dev)
1634 		return -ENODATA;
1635 
1636 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1637 	if (nla == NULL)
1638 		return -EMSGSIZE;
1639 
1640 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1641 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1642 
1643 	return 0;
1644 }
1645 
1646 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1647 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1648 };
1649 
1650 static int inet_validate_link_af(const struct net_device *dev,
1651 				 const struct nlattr *nla)
1652 {
1653 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1654 	int err, rem;
1655 
1656 	if (dev && !__in_dev_get_rtnl(dev))
1657 		return -EAFNOSUPPORT;
1658 
1659 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1660 	if (err < 0)
1661 		return err;
1662 
1663 	if (tb[IFLA_INET_CONF]) {
1664 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1665 			int cfgid = nla_type(a);
1666 
1667 			if (nla_len(a) < 4)
1668 				return -EINVAL;
1669 
1670 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1671 				return -EINVAL;
1672 		}
1673 	}
1674 
1675 	return 0;
1676 }
1677 
1678 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1679 {
1680 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1681 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1682 	int rem;
1683 
1684 	if (!in_dev)
1685 		return -EAFNOSUPPORT;
1686 
1687 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1688 		BUG();
1689 
1690 	if (tb[IFLA_INET_CONF]) {
1691 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1692 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1693 	}
1694 
1695 	return 0;
1696 }
1697 
1698 static int inet_netconf_msgsize_devconf(int type)
1699 {
1700 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1701 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1702 
1703 	/* type -1 is used for ALL */
1704 	if (type == -1 || type == NETCONFA_FORWARDING)
1705 		size += nla_total_size(4);
1706 	if (type == -1 || type == NETCONFA_RP_FILTER)
1707 		size += nla_total_size(4);
1708 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1709 		size += nla_total_size(4);
1710 	if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1711 		size += nla_total_size(4);
1712 
1713 	return size;
1714 }
1715 
1716 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1717 				     struct ipv4_devconf *devconf, u32 portid,
1718 				     u32 seq, int event, unsigned int flags,
1719 				     int type)
1720 {
1721 	struct nlmsghdr  *nlh;
1722 	struct netconfmsg *ncm;
1723 
1724 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1725 			flags);
1726 	if (nlh == NULL)
1727 		return -EMSGSIZE;
1728 
1729 	ncm = nlmsg_data(nlh);
1730 	ncm->ncm_family = AF_INET;
1731 
1732 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1733 		goto nla_put_failure;
1734 
1735 	/* type -1 is used for ALL */
1736 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1737 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1738 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1739 		goto nla_put_failure;
1740 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1741 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1742 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1743 		goto nla_put_failure;
1744 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1745 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1746 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1747 		goto nla_put_failure;
1748 	if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1749 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1750 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1751 		goto nla_put_failure;
1752 
1753 	nlmsg_end(skb, nlh);
1754 	return 0;
1755 
1756 nla_put_failure:
1757 	nlmsg_cancel(skb, nlh);
1758 	return -EMSGSIZE;
1759 }
1760 
1761 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1762 				 struct ipv4_devconf *devconf)
1763 {
1764 	struct sk_buff *skb;
1765 	int err = -ENOBUFS;
1766 
1767 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1768 	if (skb == NULL)
1769 		goto errout;
1770 
1771 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1772 					RTM_NEWNETCONF, 0, type);
1773 	if (err < 0) {
1774 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1775 		WARN_ON(err == -EMSGSIZE);
1776 		kfree_skb(skb);
1777 		goto errout;
1778 	}
1779 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1780 	return;
1781 errout:
1782 	if (err < 0)
1783 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1784 }
1785 
1786 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1787 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1788 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1789 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1790 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1791 };
1792 
1793 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1794 				    struct nlmsghdr *nlh)
1795 {
1796 	struct net *net = sock_net(in_skb->sk);
1797 	struct nlattr *tb[NETCONFA_MAX+1];
1798 	struct netconfmsg *ncm;
1799 	struct sk_buff *skb;
1800 	struct ipv4_devconf *devconf;
1801 	struct in_device *in_dev;
1802 	struct net_device *dev;
1803 	int ifindex;
1804 	int err;
1805 
1806 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1807 			  devconf_ipv4_policy);
1808 	if (err < 0)
1809 		goto errout;
1810 
1811 	err = EINVAL;
1812 	if (!tb[NETCONFA_IFINDEX])
1813 		goto errout;
1814 
1815 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1816 	switch (ifindex) {
1817 	case NETCONFA_IFINDEX_ALL:
1818 		devconf = net->ipv4.devconf_all;
1819 		break;
1820 	case NETCONFA_IFINDEX_DEFAULT:
1821 		devconf = net->ipv4.devconf_dflt;
1822 		break;
1823 	default:
1824 		dev = __dev_get_by_index(net, ifindex);
1825 		if (dev == NULL)
1826 			goto errout;
1827 		in_dev = __in_dev_get_rtnl(dev);
1828 		if (in_dev == NULL)
1829 			goto errout;
1830 		devconf = &in_dev->cnf;
1831 		break;
1832 	}
1833 
1834 	err = -ENOBUFS;
1835 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1836 	if (skb == NULL)
1837 		goto errout;
1838 
1839 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1840 					NETLINK_CB(in_skb).portid,
1841 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1842 					-1);
1843 	if (err < 0) {
1844 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1845 		WARN_ON(err == -EMSGSIZE);
1846 		kfree_skb(skb);
1847 		goto errout;
1848 	}
1849 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1850 errout:
1851 	return err;
1852 }
1853 
1854 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1855 				     struct netlink_callback *cb)
1856 {
1857 	struct net *net = sock_net(skb->sk);
1858 	int h, s_h;
1859 	int idx, s_idx;
1860 	struct net_device *dev;
1861 	struct in_device *in_dev;
1862 	struct hlist_head *head;
1863 
1864 	s_h = cb->args[0];
1865 	s_idx = idx = cb->args[1];
1866 
1867 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1868 		idx = 0;
1869 		head = &net->dev_index_head[h];
1870 		rcu_read_lock();
1871 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1872 			  net->dev_base_seq;
1873 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1874 			if (idx < s_idx)
1875 				goto cont;
1876 			in_dev = __in_dev_get_rcu(dev);
1877 			if (!in_dev)
1878 				goto cont;
1879 
1880 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1881 						      &in_dev->cnf,
1882 						      NETLINK_CB(cb->skb).portid,
1883 						      cb->nlh->nlmsg_seq,
1884 						      RTM_NEWNETCONF,
1885 						      NLM_F_MULTI,
1886 						      -1) < 0) {
1887 				rcu_read_unlock();
1888 				goto done;
1889 			}
1890 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1891 cont:
1892 			idx++;
1893 		}
1894 		rcu_read_unlock();
1895 	}
1896 	if (h == NETDEV_HASHENTRIES) {
1897 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1898 					      net->ipv4.devconf_all,
1899 					      NETLINK_CB(cb->skb).portid,
1900 					      cb->nlh->nlmsg_seq,
1901 					      RTM_NEWNETCONF, NLM_F_MULTI,
1902 					      -1) < 0)
1903 			goto done;
1904 		else
1905 			h++;
1906 	}
1907 	if (h == NETDEV_HASHENTRIES + 1) {
1908 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1909 					      net->ipv4.devconf_dflt,
1910 					      NETLINK_CB(cb->skb).portid,
1911 					      cb->nlh->nlmsg_seq,
1912 					      RTM_NEWNETCONF, NLM_F_MULTI,
1913 					      -1) < 0)
1914 			goto done;
1915 		else
1916 			h++;
1917 	}
1918 done:
1919 	cb->args[0] = h;
1920 	cb->args[1] = idx;
1921 
1922 	return skb->len;
1923 }
1924 
1925 #ifdef CONFIG_SYSCTL
1926 
1927 static void devinet_copy_dflt_conf(struct net *net, int i)
1928 {
1929 	struct net_device *dev;
1930 
1931 	rcu_read_lock();
1932 	for_each_netdev_rcu(net, dev) {
1933 		struct in_device *in_dev;
1934 
1935 		in_dev = __in_dev_get_rcu(dev);
1936 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1937 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1938 	}
1939 	rcu_read_unlock();
1940 }
1941 
1942 /* called with RTNL locked */
1943 static void inet_forward_change(struct net *net)
1944 {
1945 	struct net_device *dev;
1946 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1947 
1948 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1949 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1950 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1951 				    NETCONFA_IFINDEX_ALL,
1952 				    net->ipv4.devconf_all);
1953 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1954 				    NETCONFA_IFINDEX_DEFAULT,
1955 				    net->ipv4.devconf_dflt);
1956 
1957 	for_each_netdev(net, dev) {
1958 		struct in_device *in_dev;
1959 		if (on)
1960 			dev_disable_lro(dev);
1961 		rcu_read_lock();
1962 		in_dev = __in_dev_get_rcu(dev);
1963 		if (in_dev) {
1964 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1965 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1966 						    dev->ifindex, &in_dev->cnf);
1967 		}
1968 		rcu_read_unlock();
1969 	}
1970 }
1971 
1972 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1973 {
1974 	if (cnf == net->ipv4.devconf_dflt)
1975 		return NETCONFA_IFINDEX_DEFAULT;
1976 	else if (cnf == net->ipv4.devconf_all)
1977 		return NETCONFA_IFINDEX_ALL;
1978 	else {
1979 		struct in_device *idev
1980 			= container_of(cnf, struct in_device, cnf);
1981 		return idev->dev->ifindex;
1982 	}
1983 }
1984 
1985 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1986 			     void __user *buffer,
1987 			     size_t *lenp, loff_t *ppos)
1988 {
1989 	int old_value = *(int *)ctl->data;
1990 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1991 	int new_value = *(int *)ctl->data;
1992 
1993 	if (write) {
1994 		struct ipv4_devconf *cnf = ctl->extra1;
1995 		struct net *net = ctl->extra2;
1996 		int i = (int *)ctl->data - cnf->data;
1997 		int ifindex;
1998 
1999 		set_bit(i, cnf->state);
2000 
2001 		if (cnf == net->ipv4.devconf_dflt)
2002 			devinet_copy_dflt_conf(net, i);
2003 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2004 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2005 			if ((new_value == 0) && (old_value != 0))
2006 				rt_cache_flush(net);
2007 
2008 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2009 		    new_value != old_value) {
2010 			ifindex = devinet_conf_ifindex(net, cnf);
2011 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2012 						    ifindex, cnf);
2013 		}
2014 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2015 		    new_value != old_value) {
2016 			ifindex = devinet_conf_ifindex(net, cnf);
2017 			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2018 						    ifindex, cnf);
2019 		}
2020 	}
2021 
2022 	return ret;
2023 }
2024 
2025 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2026 				  void __user *buffer,
2027 				  size_t *lenp, loff_t *ppos)
2028 {
2029 	int *valp = ctl->data;
2030 	int val = *valp;
2031 	loff_t pos = *ppos;
2032 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2033 
2034 	if (write && *valp != val) {
2035 		struct net *net = ctl->extra2;
2036 
2037 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2038 			if (!rtnl_trylock()) {
2039 				/* Restore the original values before restarting */
2040 				*valp = val;
2041 				*ppos = pos;
2042 				return restart_syscall();
2043 			}
2044 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2045 				inet_forward_change(net);
2046 			} else {
2047 				struct ipv4_devconf *cnf = ctl->extra1;
2048 				struct in_device *idev =
2049 					container_of(cnf, struct in_device, cnf);
2050 				if (*valp)
2051 					dev_disable_lro(idev->dev);
2052 				inet_netconf_notify_devconf(net,
2053 							    NETCONFA_FORWARDING,
2054 							    idev->dev->ifindex,
2055 							    cnf);
2056 			}
2057 			rtnl_unlock();
2058 			rt_cache_flush(net);
2059 		} else
2060 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2061 						    NETCONFA_IFINDEX_DEFAULT,
2062 						    net->ipv4.devconf_dflt);
2063 	}
2064 
2065 	return ret;
2066 }
2067 
2068 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2069 				void __user *buffer,
2070 				size_t *lenp, loff_t *ppos)
2071 {
2072 	int *valp = ctl->data;
2073 	int val = *valp;
2074 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2075 	struct net *net = ctl->extra2;
2076 
2077 	if (write && *valp != val)
2078 		rt_cache_flush(net);
2079 
2080 	return ret;
2081 }
2082 
2083 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2084 	{ \
2085 		.procname	= name, \
2086 		.data		= ipv4_devconf.data + \
2087 				  IPV4_DEVCONF_ ## attr - 1, \
2088 		.maxlen		= sizeof(int), \
2089 		.mode		= mval, \
2090 		.proc_handler	= proc, \
2091 		.extra1		= &ipv4_devconf, \
2092 	}
2093 
2094 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2095 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2096 
2097 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2098 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2099 
2100 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2101 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2102 
2103 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2104 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2105 
2106 static struct devinet_sysctl_table {
2107 	struct ctl_table_header *sysctl_header;
2108 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2109 } devinet_sysctl = {
2110 	.devinet_vars = {
2111 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2112 					     devinet_sysctl_forward),
2113 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2114 
2115 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2116 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2117 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2118 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2119 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2120 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2121 					"accept_source_route"),
2122 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2123 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2124 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2125 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2126 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2127 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2128 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2129 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2130 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2131 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2132 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2133 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2134 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2135 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2136 					"force_igmp_version"),
2137 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2138 					"igmpv2_unsolicited_report_interval"),
2139 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2140 					"igmpv3_unsolicited_report_interval"),
2141 
2142 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2143 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2144 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2145 					      "promote_secondaries"),
2146 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2147 					      "route_localnet"),
2148 	},
2149 };
2150 
2151 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2152 					struct ipv4_devconf *p)
2153 {
2154 	int i;
2155 	struct devinet_sysctl_table *t;
2156 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2157 
2158 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2159 	if (!t)
2160 		goto out;
2161 
2162 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2163 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2164 		t->devinet_vars[i].extra1 = p;
2165 		t->devinet_vars[i].extra2 = net;
2166 	}
2167 
2168 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2169 
2170 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2171 	if (!t->sysctl_header)
2172 		goto free;
2173 
2174 	p->sysctl = t;
2175 	return 0;
2176 
2177 free:
2178 	kfree(t);
2179 out:
2180 	return -ENOBUFS;
2181 }
2182 
2183 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2184 {
2185 	struct devinet_sysctl_table *t = cnf->sysctl;
2186 
2187 	if (t == NULL)
2188 		return;
2189 
2190 	cnf->sysctl = NULL;
2191 	unregister_net_sysctl_table(t->sysctl_header);
2192 	kfree(t);
2193 }
2194 
2195 static int devinet_sysctl_register(struct in_device *idev)
2196 {
2197 	int err;
2198 
2199 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2200 		return -EINVAL;
2201 
2202 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2203 	if (err)
2204 		return err;
2205 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2206 					&idev->cnf);
2207 	if (err)
2208 		neigh_sysctl_unregister(idev->arp_parms);
2209 	return err;
2210 }
2211 
2212 static void devinet_sysctl_unregister(struct in_device *idev)
2213 {
2214 	__devinet_sysctl_unregister(&idev->cnf);
2215 	neigh_sysctl_unregister(idev->arp_parms);
2216 }
2217 
2218 static struct ctl_table ctl_forward_entry[] = {
2219 	{
2220 		.procname	= "ip_forward",
2221 		.data		= &ipv4_devconf.data[
2222 					IPV4_DEVCONF_FORWARDING - 1],
2223 		.maxlen		= sizeof(int),
2224 		.mode		= 0644,
2225 		.proc_handler	= devinet_sysctl_forward,
2226 		.extra1		= &ipv4_devconf,
2227 		.extra2		= &init_net,
2228 	},
2229 	{ },
2230 };
2231 #endif
2232 
2233 static __net_init int devinet_init_net(struct net *net)
2234 {
2235 	int err;
2236 	struct ipv4_devconf *all, *dflt;
2237 #ifdef CONFIG_SYSCTL
2238 	struct ctl_table *tbl = ctl_forward_entry;
2239 	struct ctl_table_header *forw_hdr;
2240 #endif
2241 
2242 	err = -ENOMEM;
2243 	all = &ipv4_devconf;
2244 	dflt = &ipv4_devconf_dflt;
2245 
2246 	if (!net_eq(net, &init_net)) {
2247 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2248 		if (all == NULL)
2249 			goto err_alloc_all;
2250 
2251 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2252 		if (dflt == NULL)
2253 			goto err_alloc_dflt;
2254 
2255 #ifdef CONFIG_SYSCTL
2256 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2257 		if (tbl == NULL)
2258 			goto err_alloc_ctl;
2259 
2260 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2261 		tbl[0].extra1 = all;
2262 		tbl[0].extra2 = net;
2263 #endif
2264 	}
2265 
2266 #ifdef CONFIG_SYSCTL
2267 	err = __devinet_sysctl_register(net, "all", all);
2268 	if (err < 0)
2269 		goto err_reg_all;
2270 
2271 	err = __devinet_sysctl_register(net, "default", dflt);
2272 	if (err < 0)
2273 		goto err_reg_dflt;
2274 
2275 	err = -ENOMEM;
2276 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2277 	if (forw_hdr == NULL)
2278 		goto err_reg_ctl;
2279 	net->ipv4.forw_hdr = forw_hdr;
2280 #endif
2281 
2282 	net->ipv4.devconf_all = all;
2283 	net->ipv4.devconf_dflt = dflt;
2284 	return 0;
2285 
2286 #ifdef CONFIG_SYSCTL
2287 err_reg_ctl:
2288 	__devinet_sysctl_unregister(dflt);
2289 err_reg_dflt:
2290 	__devinet_sysctl_unregister(all);
2291 err_reg_all:
2292 	if (tbl != ctl_forward_entry)
2293 		kfree(tbl);
2294 err_alloc_ctl:
2295 #endif
2296 	if (dflt != &ipv4_devconf_dflt)
2297 		kfree(dflt);
2298 err_alloc_dflt:
2299 	if (all != &ipv4_devconf)
2300 		kfree(all);
2301 err_alloc_all:
2302 	return err;
2303 }
2304 
2305 static __net_exit void devinet_exit_net(struct net *net)
2306 {
2307 #ifdef CONFIG_SYSCTL
2308 	struct ctl_table *tbl;
2309 
2310 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2311 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2312 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2313 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2314 	kfree(tbl);
2315 #endif
2316 	kfree(net->ipv4.devconf_dflt);
2317 	kfree(net->ipv4.devconf_all);
2318 }
2319 
2320 static __net_initdata struct pernet_operations devinet_ops = {
2321 	.init = devinet_init_net,
2322 	.exit = devinet_exit_net,
2323 };
2324 
2325 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2326 	.family		  = AF_INET,
2327 	.fill_link_af	  = inet_fill_link_af,
2328 	.get_link_af_size = inet_get_link_af_size,
2329 	.validate_link_af = inet_validate_link_af,
2330 	.set_link_af	  = inet_set_link_af,
2331 };
2332 
2333 void __init devinet_init(void)
2334 {
2335 	int i;
2336 
2337 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2338 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2339 
2340 	register_pernet_subsys(&devinet_ops);
2341 
2342 	register_gifconf(PF_INET, inet_gifconf);
2343 	register_netdevice_notifier(&ip_netdev_notifier);
2344 
2345 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2346 
2347 	rtnl_af_register(&inet_af_ops);
2348 
2349 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2350 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2351 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2352 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2353 		      inet_netconf_dump_devconf, NULL);
2354 }
2355 
2356