xref: /openbmc/linux/net/ipv4/devinet.c (revision d2999e1b)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 	},
79 };
80 
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82 	.data = {
83 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90 	},
91 };
92 
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95 
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 	[IFA_LOCAL]     	= { .type = NLA_U32 },
98 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
99 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
100 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
102 	[IFA_FLAGS]		= { .type = NLA_U32 },
103 };
104 
105 #define IN4_ADDR_HSIZE_SHIFT	8
106 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
107 
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109 
110 static u32 inet_addr_hash(struct net *net, __be32 addr)
111 {
112 	u32 val = (__force u32) addr ^ net_hash_mix(net);
113 
114 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116 
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
120 
121 	ASSERT_RTNL();
122 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124 
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127 	ASSERT_RTNL();
128 	hlist_del_init_rcu(&ifa->hash);
129 }
130 
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141 	u32 hash = inet_addr_hash(net, addr);
142 	struct net_device *result = NULL;
143 	struct in_ifaddr *ifa;
144 
145 	rcu_read_lock();
146 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147 		if (ifa->ifa_local == addr) {
148 			struct net_device *dev = ifa->ifa_dev->dev;
149 
150 			if (!net_eq(dev_net(dev), net))
151 				continue;
152 			result = dev;
153 			break;
154 		}
155 	}
156 	if (!result) {
157 		struct flowi4 fl4 = { .daddr = addr };
158 		struct fib_result res = { 0 };
159 		struct fib_table *local;
160 
161 		/* Fallback to FIB local table so that communication
162 		 * over loopback subnets work.
163 		 */
164 		local = fib_get_table(net, RT_TABLE_LOCAL);
165 		if (local &&
166 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 		    res.type == RTN_LOCAL)
168 			result = FIB_RES_DEV(res);
169 	}
170 	if (result && devref)
171 		dev_hold(result);
172 	rcu_read_unlock();
173 	return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176 
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178 
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181 			 int destroy);
182 #ifdef CONFIG_SYSCTL
183 static void devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static void devinet_sysctl_register(struct in_device *idev)
187 {
188 }
189 static void devinet_sysctl_unregister(struct in_device *idev)
190 {
191 }
192 #endif
193 
194 /* Locks all the inet devices. */
195 
196 static struct in_ifaddr *inet_alloc_ifa(void)
197 {
198 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199 }
200 
201 static void inet_rcu_free_ifa(struct rcu_head *head)
202 {
203 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204 	if (ifa->ifa_dev)
205 		in_dev_put(ifa->ifa_dev);
206 	kfree(ifa);
207 }
208 
209 static void inet_free_ifa(struct in_ifaddr *ifa)
210 {
211 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212 }
213 
214 void in_dev_finish_destroy(struct in_device *idev)
215 {
216 	struct net_device *dev = idev->dev;
217 
218 	WARN_ON(idev->ifa_list);
219 	WARN_ON(idev->mc_list);
220 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
221 #ifdef NET_REFCNT_DEBUG
222 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
223 #endif
224 	dev_put(dev);
225 	if (!idev->dead)
226 		pr_err("Freeing alive in_device %p\n", idev);
227 	else
228 		kfree(idev);
229 }
230 EXPORT_SYMBOL(in_dev_finish_destroy);
231 
232 static struct in_device *inetdev_init(struct net_device *dev)
233 {
234 	struct in_device *in_dev;
235 
236 	ASSERT_RTNL();
237 
238 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
239 	if (!in_dev)
240 		goto out;
241 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
242 			sizeof(in_dev->cnf));
243 	in_dev->cnf.sysctl = NULL;
244 	in_dev->dev = dev;
245 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
246 	if (!in_dev->arp_parms)
247 		goto out_kfree;
248 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
249 		dev_disable_lro(dev);
250 	/* Reference in_dev->dev */
251 	dev_hold(dev);
252 	/* Account for reference dev->ip_ptr (below) */
253 	in_dev_hold(in_dev);
254 
255 	devinet_sysctl_register(in_dev);
256 	ip_mc_init_dev(in_dev);
257 	if (dev->flags & IFF_UP)
258 		ip_mc_up(in_dev);
259 
260 	/* we can receive as soon as ip_ptr is set -- do this last */
261 	rcu_assign_pointer(dev->ip_ptr, in_dev);
262 out:
263 	return in_dev;
264 out_kfree:
265 	kfree(in_dev);
266 	in_dev = NULL;
267 	goto out;
268 }
269 
270 static void in_dev_rcu_put(struct rcu_head *head)
271 {
272 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
273 	in_dev_put(idev);
274 }
275 
276 static void inetdev_destroy(struct in_device *in_dev)
277 {
278 	struct in_ifaddr *ifa;
279 	struct net_device *dev;
280 
281 	ASSERT_RTNL();
282 
283 	dev = in_dev->dev;
284 
285 	in_dev->dead = 1;
286 
287 	ip_mc_destroy_dev(in_dev);
288 
289 	while ((ifa = in_dev->ifa_list) != NULL) {
290 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
291 		inet_free_ifa(ifa);
292 	}
293 
294 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
295 
296 	devinet_sysctl_unregister(in_dev);
297 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
298 	arp_ifdown(dev);
299 
300 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
301 }
302 
303 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
304 {
305 	rcu_read_lock();
306 	for_primary_ifa(in_dev) {
307 		if (inet_ifa_match(a, ifa)) {
308 			if (!b || inet_ifa_match(b, ifa)) {
309 				rcu_read_unlock();
310 				return 1;
311 			}
312 		}
313 	} endfor_ifa(in_dev);
314 	rcu_read_unlock();
315 	return 0;
316 }
317 
318 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
319 			 int destroy, struct nlmsghdr *nlh, u32 portid)
320 {
321 	struct in_ifaddr *promote = NULL;
322 	struct in_ifaddr *ifa, *ifa1 = *ifap;
323 	struct in_ifaddr *last_prim = in_dev->ifa_list;
324 	struct in_ifaddr *prev_prom = NULL;
325 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
326 
327 	ASSERT_RTNL();
328 
329 	/* 1. Deleting primary ifaddr forces deletion all secondaries
330 	 * unless alias promotion is set
331 	 **/
332 
333 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
334 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
335 
336 		while ((ifa = *ifap1) != NULL) {
337 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
338 			    ifa1->ifa_scope <= ifa->ifa_scope)
339 				last_prim = ifa;
340 
341 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
342 			    ifa1->ifa_mask != ifa->ifa_mask ||
343 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
344 				ifap1 = &ifa->ifa_next;
345 				prev_prom = ifa;
346 				continue;
347 			}
348 
349 			if (!do_promote) {
350 				inet_hash_remove(ifa);
351 				*ifap1 = ifa->ifa_next;
352 
353 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
354 				blocking_notifier_call_chain(&inetaddr_chain,
355 						NETDEV_DOWN, ifa);
356 				inet_free_ifa(ifa);
357 			} else {
358 				promote = ifa;
359 				break;
360 			}
361 		}
362 	}
363 
364 	/* On promotion all secondaries from subnet are changing
365 	 * the primary IP, we must remove all their routes silently
366 	 * and later to add them back with new prefsrc. Do this
367 	 * while all addresses are on the device list.
368 	 */
369 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
370 		if (ifa1->ifa_mask == ifa->ifa_mask &&
371 		    inet_ifa_match(ifa1->ifa_address, ifa))
372 			fib_del_ifaddr(ifa, ifa1);
373 	}
374 
375 	/* 2. Unlink it */
376 
377 	*ifap = ifa1->ifa_next;
378 	inet_hash_remove(ifa1);
379 
380 	/* 3. Announce address deletion */
381 
382 	/* Send message first, then call notifier.
383 	   At first sight, FIB update triggered by notifier
384 	   will refer to already deleted ifaddr, that could confuse
385 	   netlink listeners. It is not true: look, gated sees
386 	   that route deleted and if it still thinks that ifaddr
387 	   is valid, it will try to restore deleted routes... Grr.
388 	   So that, this order is correct.
389 	 */
390 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
391 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
392 
393 	if (promote) {
394 		struct in_ifaddr *next_sec = promote->ifa_next;
395 
396 		if (prev_prom) {
397 			prev_prom->ifa_next = promote->ifa_next;
398 			promote->ifa_next = last_prim->ifa_next;
399 			last_prim->ifa_next = promote;
400 		}
401 
402 		promote->ifa_flags &= ~IFA_F_SECONDARY;
403 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
404 		blocking_notifier_call_chain(&inetaddr_chain,
405 				NETDEV_UP, promote);
406 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
407 			if (ifa1->ifa_mask != ifa->ifa_mask ||
408 			    !inet_ifa_match(ifa1->ifa_address, ifa))
409 					continue;
410 			fib_add_ifaddr(ifa);
411 		}
412 
413 	}
414 	if (destroy)
415 		inet_free_ifa(ifa1);
416 }
417 
418 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
419 			 int destroy)
420 {
421 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
422 }
423 
424 static void check_lifetime(struct work_struct *work);
425 
426 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
427 
428 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
429 			     u32 portid)
430 {
431 	struct in_device *in_dev = ifa->ifa_dev;
432 	struct in_ifaddr *ifa1, **ifap, **last_primary;
433 
434 	ASSERT_RTNL();
435 
436 	if (!ifa->ifa_local) {
437 		inet_free_ifa(ifa);
438 		return 0;
439 	}
440 
441 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
442 	last_primary = &in_dev->ifa_list;
443 
444 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
445 	     ifap = &ifa1->ifa_next) {
446 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
447 		    ifa->ifa_scope <= ifa1->ifa_scope)
448 			last_primary = &ifa1->ifa_next;
449 		if (ifa1->ifa_mask == ifa->ifa_mask &&
450 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
451 			if (ifa1->ifa_local == ifa->ifa_local) {
452 				inet_free_ifa(ifa);
453 				return -EEXIST;
454 			}
455 			if (ifa1->ifa_scope != ifa->ifa_scope) {
456 				inet_free_ifa(ifa);
457 				return -EINVAL;
458 			}
459 			ifa->ifa_flags |= IFA_F_SECONDARY;
460 		}
461 	}
462 
463 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
464 		prandom_seed((__force u32) ifa->ifa_local);
465 		ifap = last_primary;
466 	}
467 
468 	ifa->ifa_next = *ifap;
469 	*ifap = ifa;
470 
471 	inet_hash_insert(dev_net(in_dev->dev), ifa);
472 
473 	cancel_delayed_work(&check_lifetime_work);
474 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
475 
476 	/* Send message first, then call notifier.
477 	   Notifier will trigger FIB update, so that
478 	   listeners of netlink will know about new ifaddr */
479 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
480 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
481 
482 	return 0;
483 }
484 
485 static int inet_insert_ifa(struct in_ifaddr *ifa)
486 {
487 	return __inet_insert_ifa(ifa, NULL, 0);
488 }
489 
490 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
491 {
492 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
493 
494 	ASSERT_RTNL();
495 
496 	if (!in_dev) {
497 		inet_free_ifa(ifa);
498 		return -ENOBUFS;
499 	}
500 	ipv4_devconf_setall(in_dev);
501 	neigh_parms_data_state_setall(in_dev->arp_parms);
502 	if (ifa->ifa_dev != in_dev) {
503 		WARN_ON(ifa->ifa_dev);
504 		in_dev_hold(in_dev);
505 		ifa->ifa_dev = in_dev;
506 	}
507 	if (ipv4_is_loopback(ifa->ifa_local))
508 		ifa->ifa_scope = RT_SCOPE_HOST;
509 	return inet_insert_ifa(ifa);
510 }
511 
512 /* Caller must hold RCU or RTNL :
513  * We dont take a reference on found in_device
514  */
515 struct in_device *inetdev_by_index(struct net *net, int ifindex)
516 {
517 	struct net_device *dev;
518 	struct in_device *in_dev = NULL;
519 
520 	rcu_read_lock();
521 	dev = dev_get_by_index_rcu(net, ifindex);
522 	if (dev)
523 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
524 	rcu_read_unlock();
525 	return in_dev;
526 }
527 EXPORT_SYMBOL(inetdev_by_index);
528 
529 /* Called only from RTNL semaphored context. No locks. */
530 
531 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
532 				    __be32 mask)
533 {
534 	ASSERT_RTNL();
535 
536 	for_primary_ifa(in_dev) {
537 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
538 			return ifa;
539 	} endfor_ifa(in_dev);
540 	return NULL;
541 }
542 
543 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
544 {
545 	struct net *net = sock_net(skb->sk);
546 	struct nlattr *tb[IFA_MAX+1];
547 	struct in_device *in_dev;
548 	struct ifaddrmsg *ifm;
549 	struct in_ifaddr *ifa, **ifap;
550 	int err = -EINVAL;
551 
552 	ASSERT_RTNL();
553 
554 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
555 	if (err < 0)
556 		goto errout;
557 
558 	ifm = nlmsg_data(nlh);
559 	in_dev = inetdev_by_index(net, ifm->ifa_index);
560 	if (in_dev == NULL) {
561 		err = -ENODEV;
562 		goto errout;
563 	}
564 
565 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
566 	     ifap = &ifa->ifa_next) {
567 		if (tb[IFA_LOCAL] &&
568 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
569 			continue;
570 
571 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
572 			continue;
573 
574 		if (tb[IFA_ADDRESS] &&
575 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
576 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
577 			continue;
578 
579 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
580 		return 0;
581 	}
582 
583 	err = -EADDRNOTAVAIL;
584 errout:
585 	return err;
586 }
587 
588 #define INFINITY_LIFE_TIME	0xFFFFFFFF
589 
590 static void check_lifetime(struct work_struct *work)
591 {
592 	unsigned long now, next, next_sec, next_sched;
593 	struct in_ifaddr *ifa;
594 	struct hlist_node *n;
595 	int i;
596 
597 	now = jiffies;
598 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
599 
600 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
601 		bool change_needed = false;
602 
603 		rcu_read_lock();
604 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
605 			unsigned long age;
606 
607 			if (ifa->ifa_flags & IFA_F_PERMANENT)
608 				continue;
609 
610 			/* We try to batch several events at once. */
611 			age = (now - ifa->ifa_tstamp +
612 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
613 
614 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
615 			    age >= ifa->ifa_valid_lft) {
616 				change_needed = true;
617 			} else if (ifa->ifa_preferred_lft ==
618 				   INFINITY_LIFE_TIME) {
619 				continue;
620 			} else if (age >= ifa->ifa_preferred_lft) {
621 				if (time_before(ifa->ifa_tstamp +
622 						ifa->ifa_valid_lft * HZ, next))
623 					next = ifa->ifa_tstamp +
624 					       ifa->ifa_valid_lft * HZ;
625 
626 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
627 					change_needed = true;
628 			} else if (time_before(ifa->ifa_tstamp +
629 					       ifa->ifa_preferred_lft * HZ,
630 					       next)) {
631 				next = ifa->ifa_tstamp +
632 				       ifa->ifa_preferred_lft * HZ;
633 			}
634 		}
635 		rcu_read_unlock();
636 		if (!change_needed)
637 			continue;
638 		rtnl_lock();
639 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
640 			unsigned long age;
641 
642 			if (ifa->ifa_flags & IFA_F_PERMANENT)
643 				continue;
644 
645 			/* We try to batch several events at once. */
646 			age = (now - ifa->ifa_tstamp +
647 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
648 
649 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
650 			    age >= ifa->ifa_valid_lft) {
651 				struct in_ifaddr **ifap;
652 
653 				for (ifap = &ifa->ifa_dev->ifa_list;
654 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
655 					if (*ifap == ifa) {
656 						inet_del_ifa(ifa->ifa_dev,
657 							     ifap, 1);
658 						break;
659 					}
660 				}
661 			} else if (ifa->ifa_preferred_lft !=
662 				   INFINITY_LIFE_TIME &&
663 				   age >= ifa->ifa_preferred_lft &&
664 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
665 				ifa->ifa_flags |= IFA_F_DEPRECATED;
666 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
667 			}
668 		}
669 		rtnl_unlock();
670 	}
671 
672 	next_sec = round_jiffies_up(next);
673 	next_sched = next;
674 
675 	/* If rounded timeout is accurate enough, accept it. */
676 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
677 		next_sched = next_sec;
678 
679 	now = jiffies;
680 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
681 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
682 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
683 
684 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
685 			next_sched - now);
686 }
687 
688 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
689 			     __u32 prefered_lft)
690 {
691 	unsigned long timeout;
692 
693 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
694 
695 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
696 	if (addrconf_finite_timeout(timeout))
697 		ifa->ifa_valid_lft = timeout;
698 	else
699 		ifa->ifa_flags |= IFA_F_PERMANENT;
700 
701 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
702 	if (addrconf_finite_timeout(timeout)) {
703 		if (timeout == 0)
704 			ifa->ifa_flags |= IFA_F_DEPRECATED;
705 		ifa->ifa_preferred_lft = timeout;
706 	}
707 	ifa->ifa_tstamp = jiffies;
708 	if (!ifa->ifa_cstamp)
709 		ifa->ifa_cstamp = ifa->ifa_tstamp;
710 }
711 
712 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
713 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
714 {
715 	struct nlattr *tb[IFA_MAX+1];
716 	struct in_ifaddr *ifa;
717 	struct ifaddrmsg *ifm;
718 	struct net_device *dev;
719 	struct in_device *in_dev;
720 	int err;
721 
722 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
723 	if (err < 0)
724 		goto errout;
725 
726 	ifm = nlmsg_data(nlh);
727 	err = -EINVAL;
728 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
729 		goto errout;
730 
731 	dev = __dev_get_by_index(net, ifm->ifa_index);
732 	err = -ENODEV;
733 	if (dev == NULL)
734 		goto errout;
735 
736 	in_dev = __in_dev_get_rtnl(dev);
737 	err = -ENOBUFS;
738 	if (in_dev == NULL)
739 		goto errout;
740 
741 	ifa = inet_alloc_ifa();
742 	if (ifa == NULL)
743 		/*
744 		 * A potential indev allocation can be left alive, it stays
745 		 * assigned to its device and is destroy with it.
746 		 */
747 		goto errout;
748 
749 	ipv4_devconf_setall(in_dev);
750 	neigh_parms_data_state_setall(in_dev->arp_parms);
751 	in_dev_hold(in_dev);
752 
753 	if (tb[IFA_ADDRESS] == NULL)
754 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
755 
756 	INIT_HLIST_NODE(&ifa->hash);
757 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
758 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
759 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
760 					 ifm->ifa_flags;
761 	ifa->ifa_scope = ifm->ifa_scope;
762 	ifa->ifa_dev = in_dev;
763 
764 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
765 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
766 
767 	if (tb[IFA_BROADCAST])
768 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
769 
770 	if (tb[IFA_LABEL])
771 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
772 	else
773 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
774 
775 	if (tb[IFA_CACHEINFO]) {
776 		struct ifa_cacheinfo *ci;
777 
778 		ci = nla_data(tb[IFA_CACHEINFO]);
779 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
780 			err = -EINVAL;
781 			goto errout_free;
782 		}
783 		*pvalid_lft = ci->ifa_valid;
784 		*pprefered_lft = ci->ifa_prefered;
785 	}
786 
787 	return ifa;
788 
789 errout_free:
790 	inet_free_ifa(ifa);
791 errout:
792 	return ERR_PTR(err);
793 }
794 
795 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
796 {
797 	struct in_device *in_dev = ifa->ifa_dev;
798 	struct in_ifaddr *ifa1, **ifap;
799 
800 	if (!ifa->ifa_local)
801 		return NULL;
802 
803 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
804 	     ifap = &ifa1->ifa_next) {
805 		if (ifa1->ifa_mask == ifa->ifa_mask &&
806 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
807 		    ifa1->ifa_local == ifa->ifa_local)
808 			return ifa1;
809 	}
810 	return NULL;
811 }
812 
813 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
814 {
815 	struct net *net = sock_net(skb->sk);
816 	struct in_ifaddr *ifa;
817 	struct in_ifaddr *ifa_existing;
818 	__u32 valid_lft = INFINITY_LIFE_TIME;
819 	__u32 prefered_lft = INFINITY_LIFE_TIME;
820 
821 	ASSERT_RTNL();
822 
823 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
824 	if (IS_ERR(ifa))
825 		return PTR_ERR(ifa);
826 
827 	ifa_existing = find_matching_ifa(ifa);
828 	if (!ifa_existing) {
829 		/* It would be best to check for !NLM_F_CREATE here but
830 		 * userspace already relies on not having to provide this.
831 		 */
832 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
833 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
834 	} else {
835 		inet_free_ifa(ifa);
836 
837 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
838 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
839 			return -EEXIST;
840 		ifa = ifa_existing;
841 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
842 		cancel_delayed_work(&check_lifetime_work);
843 		queue_delayed_work(system_power_efficient_wq,
844 				&check_lifetime_work, 0);
845 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
846 		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
847 	}
848 	return 0;
849 }
850 
851 /*
852  *	Determine a default network mask, based on the IP address.
853  */
854 
855 static int inet_abc_len(__be32 addr)
856 {
857 	int rc = -1;	/* Something else, probably a multicast. */
858 
859 	if (ipv4_is_zeronet(addr))
860 		rc = 0;
861 	else {
862 		__u32 haddr = ntohl(addr);
863 
864 		if (IN_CLASSA(haddr))
865 			rc = 8;
866 		else if (IN_CLASSB(haddr))
867 			rc = 16;
868 		else if (IN_CLASSC(haddr))
869 			rc = 24;
870 	}
871 
872 	return rc;
873 }
874 
875 
876 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
877 {
878 	struct ifreq ifr;
879 	struct sockaddr_in sin_orig;
880 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
881 	struct in_device *in_dev;
882 	struct in_ifaddr **ifap = NULL;
883 	struct in_ifaddr *ifa = NULL;
884 	struct net_device *dev;
885 	char *colon;
886 	int ret = -EFAULT;
887 	int tryaddrmatch = 0;
888 
889 	/*
890 	 *	Fetch the caller's info block into kernel space
891 	 */
892 
893 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
894 		goto out;
895 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
896 
897 	/* save original address for comparison */
898 	memcpy(&sin_orig, sin, sizeof(*sin));
899 
900 	colon = strchr(ifr.ifr_name, ':');
901 	if (colon)
902 		*colon = 0;
903 
904 	dev_load(net, ifr.ifr_name);
905 
906 	switch (cmd) {
907 	case SIOCGIFADDR:	/* Get interface address */
908 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
909 	case SIOCGIFDSTADDR:	/* Get the destination address */
910 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
911 		/* Note that these ioctls will not sleep,
912 		   so that we do not impose a lock.
913 		   One day we will be forced to put shlock here (I mean SMP)
914 		 */
915 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
916 		memset(sin, 0, sizeof(*sin));
917 		sin->sin_family = AF_INET;
918 		break;
919 
920 	case SIOCSIFFLAGS:
921 		ret = -EPERM;
922 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
923 			goto out;
924 		break;
925 	case SIOCSIFADDR:	/* Set interface address (and family) */
926 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
927 	case SIOCSIFDSTADDR:	/* Set the destination address */
928 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
929 		ret = -EPERM;
930 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
931 			goto out;
932 		ret = -EINVAL;
933 		if (sin->sin_family != AF_INET)
934 			goto out;
935 		break;
936 	default:
937 		ret = -EINVAL;
938 		goto out;
939 	}
940 
941 	rtnl_lock();
942 
943 	ret = -ENODEV;
944 	dev = __dev_get_by_name(net, ifr.ifr_name);
945 	if (!dev)
946 		goto done;
947 
948 	if (colon)
949 		*colon = ':';
950 
951 	in_dev = __in_dev_get_rtnl(dev);
952 	if (in_dev) {
953 		if (tryaddrmatch) {
954 			/* Matthias Andree */
955 			/* compare label and address (4.4BSD style) */
956 			/* note: we only do this for a limited set of ioctls
957 			   and only if the original address family was AF_INET.
958 			   This is checked above. */
959 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
960 			     ifap = &ifa->ifa_next) {
961 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
962 				    sin_orig.sin_addr.s_addr ==
963 							ifa->ifa_local) {
964 					break; /* found */
965 				}
966 			}
967 		}
968 		/* we didn't get a match, maybe the application is
969 		   4.3BSD-style and passed in junk so we fall back to
970 		   comparing just the label */
971 		if (!ifa) {
972 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
973 			     ifap = &ifa->ifa_next)
974 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
975 					break;
976 		}
977 	}
978 
979 	ret = -EADDRNOTAVAIL;
980 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
981 		goto done;
982 
983 	switch (cmd) {
984 	case SIOCGIFADDR:	/* Get interface address */
985 		sin->sin_addr.s_addr = ifa->ifa_local;
986 		goto rarok;
987 
988 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
989 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
990 		goto rarok;
991 
992 	case SIOCGIFDSTADDR:	/* Get the destination address */
993 		sin->sin_addr.s_addr = ifa->ifa_address;
994 		goto rarok;
995 
996 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
997 		sin->sin_addr.s_addr = ifa->ifa_mask;
998 		goto rarok;
999 
1000 	case SIOCSIFFLAGS:
1001 		if (colon) {
1002 			ret = -EADDRNOTAVAIL;
1003 			if (!ifa)
1004 				break;
1005 			ret = 0;
1006 			if (!(ifr.ifr_flags & IFF_UP))
1007 				inet_del_ifa(in_dev, ifap, 1);
1008 			break;
1009 		}
1010 		ret = dev_change_flags(dev, ifr.ifr_flags);
1011 		break;
1012 
1013 	case SIOCSIFADDR:	/* Set interface address (and family) */
1014 		ret = -EINVAL;
1015 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1016 			break;
1017 
1018 		if (!ifa) {
1019 			ret = -ENOBUFS;
1020 			ifa = inet_alloc_ifa();
1021 			if (!ifa)
1022 				break;
1023 			INIT_HLIST_NODE(&ifa->hash);
1024 			if (colon)
1025 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1026 			else
1027 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1028 		} else {
1029 			ret = 0;
1030 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1031 				break;
1032 			inet_del_ifa(in_dev, ifap, 0);
1033 			ifa->ifa_broadcast = 0;
1034 			ifa->ifa_scope = 0;
1035 		}
1036 
1037 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1038 
1039 		if (!(dev->flags & IFF_POINTOPOINT)) {
1040 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1041 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1042 			if ((dev->flags & IFF_BROADCAST) &&
1043 			    ifa->ifa_prefixlen < 31)
1044 				ifa->ifa_broadcast = ifa->ifa_address |
1045 						     ~ifa->ifa_mask;
1046 		} else {
1047 			ifa->ifa_prefixlen = 32;
1048 			ifa->ifa_mask = inet_make_mask(32);
1049 		}
1050 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1051 		ret = inet_set_ifa(dev, ifa);
1052 		break;
1053 
1054 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1055 		ret = 0;
1056 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1057 			inet_del_ifa(in_dev, ifap, 0);
1058 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1059 			inet_insert_ifa(ifa);
1060 		}
1061 		break;
1062 
1063 	case SIOCSIFDSTADDR:	/* Set the destination address */
1064 		ret = 0;
1065 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1066 			break;
1067 		ret = -EINVAL;
1068 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1069 			break;
1070 		ret = 0;
1071 		inet_del_ifa(in_dev, ifap, 0);
1072 		ifa->ifa_address = sin->sin_addr.s_addr;
1073 		inet_insert_ifa(ifa);
1074 		break;
1075 
1076 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1077 
1078 		/*
1079 		 *	The mask we set must be legal.
1080 		 */
1081 		ret = -EINVAL;
1082 		if (bad_mask(sin->sin_addr.s_addr, 0))
1083 			break;
1084 		ret = 0;
1085 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1086 			__be32 old_mask = ifa->ifa_mask;
1087 			inet_del_ifa(in_dev, ifap, 0);
1088 			ifa->ifa_mask = sin->sin_addr.s_addr;
1089 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1090 
1091 			/* See if current broadcast address matches
1092 			 * with current netmask, then recalculate
1093 			 * the broadcast address. Otherwise it's a
1094 			 * funny address, so don't touch it since
1095 			 * the user seems to know what (s)he's doing...
1096 			 */
1097 			if ((dev->flags & IFF_BROADCAST) &&
1098 			    (ifa->ifa_prefixlen < 31) &&
1099 			    (ifa->ifa_broadcast ==
1100 			     (ifa->ifa_local|~old_mask))) {
1101 				ifa->ifa_broadcast = (ifa->ifa_local |
1102 						      ~sin->sin_addr.s_addr);
1103 			}
1104 			inet_insert_ifa(ifa);
1105 		}
1106 		break;
1107 	}
1108 done:
1109 	rtnl_unlock();
1110 out:
1111 	return ret;
1112 rarok:
1113 	rtnl_unlock();
1114 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1115 	goto out;
1116 }
1117 
1118 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1119 {
1120 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1121 	struct in_ifaddr *ifa;
1122 	struct ifreq ifr;
1123 	int done = 0;
1124 
1125 	if (!in_dev)
1126 		goto out;
1127 
1128 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1129 		if (!buf) {
1130 			done += sizeof(ifr);
1131 			continue;
1132 		}
1133 		if (len < (int) sizeof(ifr))
1134 			break;
1135 		memset(&ifr, 0, sizeof(struct ifreq));
1136 		strcpy(ifr.ifr_name, ifa->ifa_label);
1137 
1138 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1139 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1140 								ifa->ifa_local;
1141 
1142 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1143 			done = -EFAULT;
1144 			break;
1145 		}
1146 		buf  += sizeof(struct ifreq);
1147 		len  -= sizeof(struct ifreq);
1148 		done += sizeof(struct ifreq);
1149 	}
1150 out:
1151 	return done;
1152 }
1153 
1154 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1155 {
1156 	__be32 addr = 0;
1157 	struct in_device *in_dev;
1158 	struct net *net = dev_net(dev);
1159 
1160 	rcu_read_lock();
1161 	in_dev = __in_dev_get_rcu(dev);
1162 	if (!in_dev)
1163 		goto no_in_dev;
1164 
1165 	for_primary_ifa(in_dev) {
1166 		if (ifa->ifa_scope > scope)
1167 			continue;
1168 		if (!dst || inet_ifa_match(dst, ifa)) {
1169 			addr = ifa->ifa_local;
1170 			break;
1171 		}
1172 		if (!addr)
1173 			addr = ifa->ifa_local;
1174 	} endfor_ifa(in_dev);
1175 
1176 	if (addr)
1177 		goto out_unlock;
1178 no_in_dev:
1179 
1180 	/* Not loopback addresses on loopback should be preferred
1181 	   in this case. It is importnat that lo is the first interface
1182 	   in dev_base list.
1183 	 */
1184 	for_each_netdev_rcu(net, dev) {
1185 		in_dev = __in_dev_get_rcu(dev);
1186 		if (!in_dev)
1187 			continue;
1188 
1189 		for_primary_ifa(in_dev) {
1190 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1191 			    ifa->ifa_scope <= scope) {
1192 				addr = ifa->ifa_local;
1193 				goto out_unlock;
1194 			}
1195 		} endfor_ifa(in_dev);
1196 	}
1197 out_unlock:
1198 	rcu_read_unlock();
1199 	return addr;
1200 }
1201 EXPORT_SYMBOL(inet_select_addr);
1202 
1203 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1204 			      __be32 local, int scope)
1205 {
1206 	int same = 0;
1207 	__be32 addr = 0;
1208 
1209 	for_ifa(in_dev) {
1210 		if (!addr &&
1211 		    (local == ifa->ifa_local || !local) &&
1212 		    ifa->ifa_scope <= scope) {
1213 			addr = ifa->ifa_local;
1214 			if (same)
1215 				break;
1216 		}
1217 		if (!same) {
1218 			same = (!local || inet_ifa_match(local, ifa)) &&
1219 				(!dst || inet_ifa_match(dst, ifa));
1220 			if (same && addr) {
1221 				if (local || !dst)
1222 					break;
1223 				/* Is the selected addr into dst subnet? */
1224 				if (inet_ifa_match(addr, ifa))
1225 					break;
1226 				/* No, then can we use new local src? */
1227 				if (ifa->ifa_scope <= scope) {
1228 					addr = ifa->ifa_local;
1229 					break;
1230 				}
1231 				/* search for large dst subnet for addr */
1232 				same = 0;
1233 			}
1234 		}
1235 	} endfor_ifa(in_dev);
1236 
1237 	return same ? addr : 0;
1238 }
1239 
1240 /*
1241  * Confirm that local IP address exists using wildcards:
1242  * - net: netns to check, cannot be NULL
1243  * - in_dev: only on this interface, NULL=any interface
1244  * - dst: only in the same subnet as dst, 0=any dst
1245  * - local: address, 0=autoselect the local address
1246  * - scope: maximum allowed scope value for the local address
1247  */
1248 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1249 			 __be32 dst, __be32 local, int scope)
1250 {
1251 	__be32 addr = 0;
1252 	struct net_device *dev;
1253 
1254 	if (in_dev != NULL)
1255 		return confirm_addr_indev(in_dev, dst, local, scope);
1256 
1257 	rcu_read_lock();
1258 	for_each_netdev_rcu(net, dev) {
1259 		in_dev = __in_dev_get_rcu(dev);
1260 		if (in_dev) {
1261 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1262 			if (addr)
1263 				break;
1264 		}
1265 	}
1266 	rcu_read_unlock();
1267 
1268 	return addr;
1269 }
1270 EXPORT_SYMBOL(inet_confirm_addr);
1271 
1272 /*
1273  *	Device notifier
1274  */
1275 
1276 int register_inetaddr_notifier(struct notifier_block *nb)
1277 {
1278 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1279 }
1280 EXPORT_SYMBOL(register_inetaddr_notifier);
1281 
1282 int unregister_inetaddr_notifier(struct notifier_block *nb)
1283 {
1284 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1285 }
1286 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1287 
1288 /* Rename ifa_labels for a device name change. Make some effort to preserve
1289  * existing alias numbering and to create unique labels if possible.
1290 */
1291 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1292 {
1293 	struct in_ifaddr *ifa;
1294 	int named = 0;
1295 
1296 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1297 		char old[IFNAMSIZ], *dot;
1298 
1299 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1300 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1301 		if (named++ == 0)
1302 			goto skip;
1303 		dot = strchr(old, ':');
1304 		if (dot == NULL) {
1305 			sprintf(old, ":%d", named);
1306 			dot = old;
1307 		}
1308 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1309 			strcat(ifa->ifa_label, dot);
1310 		else
1311 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1312 skip:
1313 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1314 	}
1315 }
1316 
1317 static bool inetdev_valid_mtu(unsigned int mtu)
1318 {
1319 	return mtu >= 68;
1320 }
1321 
1322 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1323 					struct in_device *in_dev)
1324 
1325 {
1326 	struct in_ifaddr *ifa;
1327 
1328 	for (ifa = in_dev->ifa_list; ifa;
1329 	     ifa = ifa->ifa_next) {
1330 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1331 			 ifa->ifa_local, dev,
1332 			 ifa->ifa_local, NULL,
1333 			 dev->dev_addr, NULL);
1334 	}
1335 }
1336 
1337 /* Called only under RTNL semaphore */
1338 
1339 static int inetdev_event(struct notifier_block *this, unsigned long event,
1340 			 void *ptr)
1341 {
1342 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1343 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1344 
1345 	ASSERT_RTNL();
1346 
1347 	if (!in_dev) {
1348 		if (event == NETDEV_REGISTER) {
1349 			in_dev = inetdev_init(dev);
1350 			if (!in_dev)
1351 				return notifier_from_errno(-ENOMEM);
1352 			if (dev->flags & IFF_LOOPBACK) {
1353 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1354 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1355 			}
1356 		} else if (event == NETDEV_CHANGEMTU) {
1357 			/* Re-enabling IP */
1358 			if (inetdev_valid_mtu(dev->mtu))
1359 				in_dev = inetdev_init(dev);
1360 		}
1361 		goto out;
1362 	}
1363 
1364 	switch (event) {
1365 	case NETDEV_REGISTER:
1366 		pr_debug("%s: bug\n", __func__);
1367 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1368 		break;
1369 	case NETDEV_UP:
1370 		if (!inetdev_valid_mtu(dev->mtu))
1371 			break;
1372 		if (dev->flags & IFF_LOOPBACK) {
1373 			struct in_ifaddr *ifa = inet_alloc_ifa();
1374 
1375 			if (ifa) {
1376 				INIT_HLIST_NODE(&ifa->hash);
1377 				ifa->ifa_local =
1378 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1379 				ifa->ifa_prefixlen = 8;
1380 				ifa->ifa_mask = inet_make_mask(8);
1381 				in_dev_hold(in_dev);
1382 				ifa->ifa_dev = in_dev;
1383 				ifa->ifa_scope = RT_SCOPE_HOST;
1384 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1385 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1386 						 INFINITY_LIFE_TIME);
1387 				ipv4_devconf_setall(in_dev);
1388 				neigh_parms_data_state_setall(in_dev->arp_parms);
1389 				inet_insert_ifa(ifa);
1390 			}
1391 		}
1392 		ip_mc_up(in_dev);
1393 		/* fall through */
1394 	case NETDEV_CHANGEADDR:
1395 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1396 			break;
1397 		/* fall through */
1398 	case NETDEV_NOTIFY_PEERS:
1399 		/* Send gratuitous ARP to notify of link change */
1400 		inetdev_send_gratuitous_arp(dev, in_dev);
1401 		break;
1402 	case NETDEV_DOWN:
1403 		ip_mc_down(in_dev);
1404 		break;
1405 	case NETDEV_PRE_TYPE_CHANGE:
1406 		ip_mc_unmap(in_dev);
1407 		break;
1408 	case NETDEV_POST_TYPE_CHANGE:
1409 		ip_mc_remap(in_dev);
1410 		break;
1411 	case NETDEV_CHANGEMTU:
1412 		if (inetdev_valid_mtu(dev->mtu))
1413 			break;
1414 		/* disable IP when MTU is not enough */
1415 	case NETDEV_UNREGISTER:
1416 		inetdev_destroy(in_dev);
1417 		break;
1418 	case NETDEV_CHANGENAME:
1419 		/* Do not notify about label change, this event is
1420 		 * not interesting to applications using netlink.
1421 		 */
1422 		inetdev_changename(dev, in_dev);
1423 
1424 		devinet_sysctl_unregister(in_dev);
1425 		devinet_sysctl_register(in_dev);
1426 		break;
1427 	}
1428 out:
1429 	return NOTIFY_DONE;
1430 }
1431 
1432 static struct notifier_block ip_netdev_notifier = {
1433 	.notifier_call = inetdev_event,
1434 };
1435 
1436 static size_t inet_nlmsg_size(void)
1437 {
1438 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1439 	       + nla_total_size(4) /* IFA_ADDRESS */
1440 	       + nla_total_size(4) /* IFA_LOCAL */
1441 	       + nla_total_size(4) /* IFA_BROADCAST */
1442 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1443 	       + nla_total_size(4)  /* IFA_FLAGS */
1444 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1445 }
1446 
1447 static inline u32 cstamp_delta(unsigned long cstamp)
1448 {
1449 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1450 }
1451 
1452 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1453 			 unsigned long tstamp, u32 preferred, u32 valid)
1454 {
1455 	struct ifa_cacheinfo ci;
1456 
1457 	ci.cstamp = cstamp_delta(cstamp);
1458 	ci.tstamp = cstamp_delta(tstamp);
1459 	ci.ifa_prefered = preferred;
1460 	ci.ifa_valid = valid;
1461 
1462 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1463 }
1464 
1465 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1466 			    u32 portid, u32 seq, int event, unsigned int flags)
1467 {
1468 	struct ifaddrmsg *ifm;
1469 	struct nlmsghdr  *nlh;
1470 	u32 preferred, valid;
1471 
1472 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1473 	if (nlh == NULL)
1474 		return -EMSGSIZE;
1475 
1476 	ifm = nlmsg_data(nlh);
1477 	ifm->ifa_family = AF_INET;
1478 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1479 	ifm->ifa_flags = ifa->ifa_flags;
1480 	ifm->ifa_scope = ifa->ifa_scope;
1481 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1482 
1483 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1484 		preferred = ifa->ifa_preferred_lft;
1485 		valid = ifa->ifa_valid_lft;
1486 		if (preferred != INFINITY_LIFE_TIME) {
1487 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1488 
1489 			if (preferred > tval)
1490 				preferred -= tval;
1491 			else
1492 				preferred = 0;
1493 			if (valid != INFINITY_LIFE_TIME) {
1494 				if (valid > tval)
1495 					valid -= tval;
1496 				else
1497 					valid = 0;
1498 			}
1499 		}
1500 	} else {
1501 		preferred = INFINITY_LIFE_TIME;
1502 		valid = INFINITY_LIFE_TIME;
1503 	}
1504 	if ((ifa->ifa_address &&
1505 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1506 	    (ifa->ifa_local &&
1507 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1508 	    (ifa->ifa_broadcast &&
1509 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1510 	    (ifa->ifa_label[0] &&
1511 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1512 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1513 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1514 			  preferred, valid))
1515 		goto nla_put_failure;
1516 
1517 	return nlmsg_end(skb, nlh);
1518 
1519 nla_put_failure:
1520 	nlmsg_cancel(skb, nlh);
1521 	return -EMSGSIZE;
1522 }
1523 
1524 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1525 {
1526 	struct net *net = sock_net(skb->sk);
1527 	int h, s_h;
1528 	int idx, s_idx;
1529 	int ip_idx, s_ip_idx;
1530 	struct net_device *dev;
1531 	struct in_device *in_dev;
1532 	struct in_ifaddr *ifa;
1533 	struct hlist_head *head;
1534 
1535 	s_h = cb->args[0];
1536 	s_idx = idx = cb->args[1];
1537 	s_ip_idx = ip_idx = cb->args[2];
1538 
1539 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1540 		idx = 0;
1541 		head = &net->dev_index_head[h];
1542 		rcu_read_lock();
1543 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1544 			  net->dev_base_seq;
1545 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1546 			if (idx < s_idx)
1547 				goto cont;
1548 			if (h > s_h || idx > s_idx)
1549 				s_ip_idx = 0;
1550 			in_dev = __in_dev_get_rcu(dev);
1551 			if (!in_dev)
1552 				goto cont;
1553 
1554 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1555 			     ifa = ifa->ifa_next, ip_idx++) {
1556 				if (ip_idx < s_ip_idx)
1557 					continue;
1558 				if (inet_fill_ifaddr(skb, ifa,
1559 					     NETLINK_CB(cb->skb).portid,
1560 					     cb->nlh->nlmsg_seq,
1561 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1562 					rcu_read_unlock();
1563 					goto done;
1564 				}
1565 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1566 			}
1567 cont:
1568 			idx++;
1569 		}
1570 		rcu_read_unlock();
1571 	}
1572 
1573 done:
1574 	cb->args[0] = h;
1575 	cb->args[1] = idx;
1576 	cb->args[2] = ip_idx;
1577 
1578 	return skb->len;
1579 }
1580 
1581 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1582 		      u32 portid)
1583 {
1584 	struct sk_buff *skb;
1585 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1586 	int err = -ENOBUFS;
1587 	struct net *net;
1588 
1589 	net = dev_net(ifa->ifa_dev->dev);
1590 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1591 	if (skb == NULL)
1592 		goto errout;
1593 
1594 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1595 	if (err < 0) {
1596 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1597 		WARN_ON(err == -EMSGSIZE);
1598 		kfree_skb(skb);
1599 		goto errout;
1600 	}
1601 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1602 	return;
1603 errout:
1604 	if (err < 0)
1605 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1606 }
1607 
1608 static size_t inet_get_link_af_size(const struct net_device *dev)
1609 {
1610 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1611 
1612 	if (!in_dev)
1613 		return 0;
1614 
1615 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1616 }
1617 
1618 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1619 {
1620 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1621 	struct nlattr *nla;
1622 	int i;
1623 
1624 	if (!in_dev)
1625 		return -ENODATA;
1626 
1627 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1628 	if (nla == NULL)
1629 		return -EMSGSIZE;
1630 
1631 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1632 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1633 
1634 	return 0;
1635 }
1636 
1637 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1638 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1639 };
1640 
1641 static int inet_validate_link_af(const struct net_device *dev,
1642 				 const struct nlattr *nla)
1643 {
1644 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1645 	int err, rem;
1646 
1647 	if (dev && !__in_dev_get_rtnl(dev))
1648 		return -EAFNOSUPPORT;
1649 
1650 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1651 	if (err < 0)
1652 		return err;
1653 
1654 	if (tb[IFLA_INET_CONF]) {
1655 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1656 			int cfgid = nla_type(a);
1657 
1658 			if (nla_len(a) < 4)
1659 				return -EINVAL;
1660 
1661 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1662 				return -EINVAL;
1663 		}
1664 	}
1665 
1666 	return 0;
1667 }
1668 
1669 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1670 {
1671 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1672 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1673 	int rem;
1674 
1675 	if (!in_dev)
1676 		return -EAFNOSUPPORT;
1677 
1678 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1679 		BUG();
1680 
1681 	if (tb[IFLA_INET_CONF]) {
1682 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1683 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1684 	}
1685 
1686 	return 0;
1687 }
1688 
1689 static int inet_netconf_msgsize_devconf(int type)
1690 {
1691 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1692 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1693 
1694 	/* type -1 is used for ALL */
1695 	if (type == -1 || type == NETCONFA_FORWARDING)
1696 		size += nla_total_size(4);
1697 	if (type == -1 || type == NETCONFA_RP_FILTER)
1698 		size += nla_total_size(4);
1699 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1700 		size += nla_total_size(4);
1701 	if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1702 		size += nla_total_size(4);
1703 
1704 	return size;
1705 }
1706 
1707 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1708 				     struct ipv4_devconf *devconf, u32 portid,
1709 				     u32 seq, int event, unsigned int flags,
1710 				     int type)
1711 {
1712 	struct nlmsghdr  *nlh;
1713 	struct netconfmsg *ncm;
1714 
1715 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1716 			flags);
1717 	if (nlh == NULL)
1718 		return -EMSGSIZE;
1719 
1720 	ncm = nlmsg_data(nlh);
1721 	ncm->ncm_family = AF_INET;
1722 
1723 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1724 		goto nla_put_failure;
1725 
1726 	/* type -1 is used for ALL */
1727 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1728 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1729 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1730 		goto nla_put_failure;
1731 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1732 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1733 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1734 		goto nla_put_failure;
1735 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1736 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1737 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1738 		goto nla_put_failure;
1739 	if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1740 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1741 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1742 		goto nla_put_failure;
1743 
1744 	return nlmsg_end(skb, nlh);
1745 
1746 nla_put_failure:
1747 	nlmsg_cancel(skb, nlh);
1748 	return -EMSGSIZE;
1749 }
1750 
1751 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1752 				 struct ipv4_devconf *devconf)
1753 {
1754 	struct sk_buff *skb;
1755 	int err = -ENOBUFS;
1756 
1757 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1758 	if (skb == NULL)
1759 		goto errout;
1760 
1761 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1762 					RTM_NEWNETCONF, 0, type);
1763 	if (err < 0) {
1764 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1765 		WARN_ON(err == -EMSGSIZE);
1766 		kfree_skb(skb);
1767 		goto errout;
1768 	}
1769 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1770 	return;
1771 errout:
1772 	if (err < 0)
1773 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1774 }
1775 
1776 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1777 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1778 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1779 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1780 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1781 };
1782 
1783 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1784 				    struct nlmsghdr *nlh)
1785 {
1786 	struct net *net = sock_net(in_skb->sk);
1787 	struct nlattr *tb[NETCONFA_MAX+1];
1788 	struct netconfmsg *ncm;
1789 	struct sk_buff *skb;
1790 	struct ipv4_devconf *devconf;
1791 	struct in_device *in_dev;
1792 	struct net_device *dev;
1793 	int ifindex;
1794 	int err;
1795 
1796 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1797 			  devconf_ipv4_policy);
1798 	if (err < 0)
1799 		goto errout;
1800 
1801 	err = EINVAL;
1802 	if (!tb[NETCONFA_IFINDEX])
1803 		goto errout;
1804 
1805 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1806 	switch (ifindex) {
1807 	case NETCONFA_IFINDEX_ALL:
1808 		devconf = net->ipv4.devconf_all;
1809 		break;
1810 	case NETCONFA_IFINDEX_DEFAULT:
1811 		devconf = net->ipv4.devconf_dflt;
1812 		break;
1813 	default:
1814 		dev = __dev_get_by_index(net, ifindex);
1815 		if (dev == NULL)
1816 			goto errout;
1817 		in_dev = __in_dev_get_rtnl(dev);
1818 		if (in_dev == NULL)
1819 			goto errout;
1820 		devconf = &in_dev->cnf;
1821 		break;
1822 	}
1823 
1824 	err = -ENOBUFS;
1825 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1826 	if (skb == NULL)
1827 		goto errout;
1828 
1829 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1830 					NETLINK_CB(in_skb).portid,
1831 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1832 					-1);
1833 	if (err < 0) {
1834 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1835 		WARN_ON(err == -EMSGSIZE);
1836 		kfree_skb(skb);
1837 		goto errout;
1838 	}
1839 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1840 errout:
1841 	return err;
1842 }
1843 
1844 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1845 				     struct netlink_callback *cb)
1846 {
1847 	struct net *net = sock_net(skb->sk);
1848 	int h, s_h;
1849 	int idx, s_idx;
1850 	struct net_device *dev;
1851 	struct in_device *in_dev;
1852 	struct hlist_head *head;
1853 
1854 	s_h = cb->args[0];
1855 	s_idx = idx = cb->args[1];
1856 
1857 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1858 		idx = 0;
1859 		head = &net->dev_index_head[h];
1860 		rcu_read_lock();
1861 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1862 			  net->dev_base_seq;
1863 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1864 			if (idx < s_idx)
1865 				goto cont;
1866 			in_dev = __in_dev_get_rcu(dev);
1867 			if (!in_dev)
1868 				goto cont;
1869 
1870 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1871 						      &in_dev->cnf,
1872 						      NETLINK_CB(cb->skb).portid,
1873 						      cb->nlh->nlmsg_seq,
1874 						      RTM_NEWNETCONF,
1875 						      NLM_F_MULTI,
1876 						      -1) <= 0) {
1877 				rcu_read_unlock();
1878 				goto done;
1879 			}
1880 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1881 cont:
1882 			idx++;
1883 		}
1884 		rcu_read_unlock();
1885 	}
1886 	if (h == NETDEV_HASHENTRIES) {
1887 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1888 					      net->ipv4.devconf_all,
1889 					      NETLINK_CB(cb->skb).portid,
1890 					      cb->nlh->nlmsg_seq,
1891 					      RTM_NEWNETCONF, NLM_F_MULTI,
1892 					      -1) <= 0)
1893 			goto done;
1894 		else
1895 			h++;
1896 	}
1897 	if (h == NETDEV_HASHENTRIES + 1) {
1898 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1899 					      net->ipv4.devconf_dflt,
1900 					      NETLINK_CB(cb->skb).portid,
1901 					      cb->nlh->nlmsg_seq,
1902 					      RTM_NEWNETCONF, NLM_F_MULTI,
1903 					      -1) <= 0)
1904 			goto done;
1905 		else
1906 			h++;
1907 	}
1908 done:
1909 	cb->args[0] = h;
1910 	cb->args[1] = idx;
1911 
1912 	return skb->len;
1913 }
1914 
1915 #ifdef CONFIG_SYSCTL
1916 
1917 static void devinet_copy_dflt_conf(struct net *net, int i)
1918 {
1919 	struct net_device *dev;
1920 
1921 	rcu_read_lock();
1922 	for_each_netdev_rcu(net, dev) {
1923 		struct in_device *in_dev;
1924 
1925 		in_dev = __in_dev_get_rcu(dev);
1926 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1927 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1928 	}
1929 	rcu_read_unlock();
1930 }
1931 
1932 /* called with RTNL locked */
1933 static void inet_forward_change(struct net *net)
1934 {
1935 	struct net_device *dev;
1936 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1937 
1938 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1939 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1940 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1941 				    NETCONFA_IFINDEX_ALL,
1942 				    net->ipv4.devconf_all);
1943 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1944 				    NETCONFA_IFINDEX_DEFAULT,
1945 				    net->ipv4.devconf_dflt);
1946 
1947 	for_each_netdev(net, dev) {
1948 		struct in_device *in_dev;
1949 		if (on)
1950 			dev_disable_lro(dev);
1951 		rcu_read_lock();
1952 		in_dev = __in_dev_get_rcu(dev);
1953 		if (in_dev) {
1954 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1955 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1956 						    dev->ifindex, &in_dev->cnf);
1957 		}
1958 		rcu_read_unlock();
1959 	}
1960 }
1961 
1962 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1963 {
1964 	if (cnf == net->ipv4.devconf_dflt)
1965 		return NETCONFA_IFINDEX_DEFAULT;
1966 	else if (cnf == net->ipv4.devconf_all)
1967 		return NETCONFA_IFINDEX_ALL;
1968 	else {
1969 		struct in_device *idev
1970 			= container_of(cnf, struct in_device, cnf);
1971 		return idev->dev->ifindex;
1972 	}
1973 }
1974 
1975 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1976 			     void __user *buffer,
1977 			     size_t *lenp, loff_t *ppos)
1978 {
1979 	int old_value = *(int *)ctl->data;
1980 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1981 	int new_value = *(int *)ctl->data;
1982 
1983 	if (write) {
1984 		struct ipv4_devconf *cnf = ctl->extra1;
1985 		struct net *net = ctl->extra2;
1986 		int i = (int *)ctl->data - cnf->data;
1987 		int ifindex;
1988 
1989 		set_bit(i, cnf->state);
1990 
1991 		if (cnf == net->ipv4.devconf_dflt)
1992 			devinet_copy_dflt_conf(net, i);
1993 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1994 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1995 			if ((new_value == 0) && (old_value != 0))
1996 				rt_cache_flush(net);
1997 
1998 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1999 		    new_value != old_value) {
2000 			ifindex = devinet_conf_ifindex(net, cnf);
2001 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2002 						    ifindex, cnf);
2003 		}
2004 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2005 		    new_value != old_value) {
2006 			ifindex = devinet_conf_ifindex(net, cnf);
2007 			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2008 						    ifindex, cnf);
2009 		}
2010 	}
2011 
2012 	return ret;
2013 }
2014 
2015 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2016 				  void __user *buffer,
2017 				  size_t *lenp, loff_t *ppos)
2018 {
2019 	int *valp = ctl->data;
2020 	int val = *valp;
2021 	loff_t pos = *ppos;
2022 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2023 
2024 	if (write && *valp != val) {
2025 		struct net *net = ctl->extra2;
2026 
2027 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2028 			if (!rtnl_trylock()) {
2029 				/* Restore the original values before restarting */
2030 				*valp = val;
2031 				*ppos = pos;
2032 				return restart_syscall();
2033 			}
2034 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2035 				inet_forward_change(net);
2036 			} else {
2037 				struct ipv4_devconf *cnf = ctl->extra1;
2038 				struct in_device *idev =
2039 					container_of(cnf, struct in_device, cnf);
2040 				if (*valp)
2041 					dev_disable_lro(idev->dev);
2042 				inet_netconf_notify_devconf(net,
2043 							    NETCONFA_FORWARDING,
2044 							    idev->dev->ifindex,
2045 							    cnf);
2046 			}
2047 			rtnl_unlock();
2048 			rt_cache_flush(net);
2049 		} else
2050 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2051 						    NETCONFA_IFINDEX_DEFAULT,
2052 						    net->ipv4.devconf_dflt);
2053 	}
2054 
2055 	return ret;
2056 }
2057 
2058 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2059 				void __user *buffer,
2060 				size_t *lenp, loff_t *ppos)
2061 {
2062 	int *valp = ctl->data;
2063 	int val = *valp;
2064 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2065 	struct net *net = ctl->extra2;
2066 
2067 	if (write && *valp != val)
2068 		rt_cache_flush(net);
2069 
2070 	return ret;
2071 }
2072 
2073 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2074 	{ \
2075 		.procname	= name, \
2076 		.data		= ipv4_devconf.data + \
2077 				  IPV4_DEVCONF_ ## attr - 1, \
2078 		.maxlen		= sizeof(int), \
2079 		.mode		= mval, \
2080 		.proc_handler	= proc, \
2081 		.extra1		= &ipv4_devconf, \
2082 	}
2083 
2084 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2085 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2086 
2087 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2088 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2089 
2090 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2091 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2092 
2093 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2094 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2095 
2096 static struct devinet_sysctl_table {
2097 	struct ctl_table_header *sysctl_header;
2098 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2099 } devinet_sysctl = {
2100 	.devinet_vars = {
2101 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2102 					     devinet_sysctl_forward),
2103 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2104 
2105 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2106 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2107 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2108 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2109 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2110 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2111 					"accept_source_route"),
2112 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2113 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2114 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2115 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2116 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2117 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2118 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2119 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2120 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2121 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2122 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2123 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2124 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2125 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2126 					"force_igmp_version"),
2127 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2128 					"igmpv2_unsolicited_report_interval"),
2129 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2130 					"igmpv3_unsolicited_report_interval"),
2131 
2132 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2133 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2134 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2135 					      "promote_secondaries"),
2136 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2137 					      "route_localnet"),
2138 	},
2139 };
2140 
2141 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2142 					struct ipv4_devconf *p)
2143 {
2144 	int i;
2145 	struct devinet_sysctl_table *t;
2146 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2147 
2148 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2149 	if (!t)
2150 		goto out;
2151 
2152 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2153 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2154 		t->devinet_vars[i].extra1 = p;
2155 		t->devinet_vars[i].extra2 = net;
2156 	}
2157 
2158 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2159 
2160 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2161 	if (!t->sysctl_header)
2162 		goto free;
2163 
2164 	p->sysctl = t;
2165 	return 0;
2166 
2167 free:
2168 	kfree(t);
2169 out:
2170 	return -ENOBUFS;
2171 }
2172 
2173 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2174 {
2175 	struct devinet_sysctl_table *t = cnf->sysctl;
2176 
2177 	if (t == NULL)
2178 		return;
2179 
2180 	cnf->sysctl = NULL;
2181 	unregister_net_sysctl_table(t->sysctl_header);
2182 	kfree(t);
2183 }
2184 
2185 static void devinet_sysctl_register(struct in_device *idev)
2186 {
2187 	neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2188 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2189 					&idev->cnf);
2190 }
2191 
2192 static void devinet_sysctl_unregister(struct in_device *idev)
2193 {
2194 	__devinet_sysctl_unregister(&idev->cnf);
2195 	neigh_sysctl_unregister(idev->arp_parms);
2196 }
2197 
2198 static struct ctl_table ctl_forward_entry[] = {
2199 	{
2200 		.procname	= "ip_forward",
2201 		.data		= &ipv4_devconf.data[
2202 					IPV4_DEVCONF_FORWARDING - 1],
2203 		.maxlen		= sizeof(int),
2204 		.mode		= 0644,
2205 		.proc_handler	= devinet_sysctl_forward,
2206 		.extra1		= &ipv4_devconf,
2207 		.extra2		= &init_net,
2208 	},
2209 	{ },
2210 };
2211 #endif
2212 
2213 static __net_init int devinet_init_net(struct net *net)
2214 {
2215 	int err;
2216 	struct ipv4_devconf *all, *dflt;
2217 #ifdef CONFIG_SYSCTL
2218 	struct ctl_table *tbl = ctl_forward_entry;
2219 	struct ctl_table_header *forw_hdr;
2220 #endif
2221 
2222 	err = -ENOMEM;
2223 	all = &ipv4_devconf;
2224 	dflt = &ipv4_devconf_dflt;
2225 
2226 	if (!net_eq(net, &init_net)) {
2227 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2228 		if (all == NULL)
2229 			goto err_alloc_all;
2230 
2231 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2232 		if (dflt == NULL)
2233 			goto err_alloc_dflt;
2234 
2235 #ifdef CONFIG_SYSCTL
2236 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2237 		if (tbl == NULL)
2238 			goto err_alloc_ctl;
2239 
2240 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2241 		tbl[0].extra1 = all;
2242 		tbl[0].extra2 = net;
2243 #endif
2244 	}
2245 
2246 #ifdef CONFIG_SYSCTL
2247 	err = __devinet_sysctl_register(net, "all", all);
2248 	if (err < 0)
2249 		goto err_reg_all;
2250 
2251 	err = __devinet_sysctl_register(net, "default", dflt);
2252 	if (err < 0)
2253 		goto err_reg_dflt;
2254 
2255 	err = -ENOMEM;
2256 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2257 	if (forw_hdr == NULL)
2258 		goto err_reg_ctl;
2259 	net->ipv4.forw_hdr = forw_hdr;
2260 #endif
2261 
2262 	net->ipv4.devconf_all = all;
2263 	net->ipv4.devconf_dflt = dflt;
2264 	return 0;
2265 
2266 #ifdef CONFIG_SYSCTL
2267 err_reg_ctl:
2268 	__devinet_sysctl_unregister(dflt);
2269 err_reg_dflt:
2270 	__devinet_sysctl_unregister(all);
2271 err_reg_all:
2272 	if (tbl != ctl_forward_entry)
2273 		kfree(tbl);
2274 err_alloc_ctl:
2275 #endif
2276 	if (dflt != &ipv4_devconf_dflt)
2277 		kfree(dflt);
2278 err_alloc_dflt:
2279 	if (all != &ipv4_devconf)
2280 		kfree(all);
2281 err_alloc_all:
2282 	return err;
2283 }
2284 
2285 static __net_exit void devinet_exit_net(struct net *net)
2286 {
2287 #ifdef CONFIG_SYSCTL
2288 	struct ctl_table *tbl;
2289 
2290 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2291 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2292 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2293 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2294 	kfree(tbl);
2295 #endif
2296 	kfree(net->ipv4.devconf_dflt);
2297 	kfree(net->ipv4.devconf_all);
2298 }
2299 
2300 static __net_initdata struct pernet_operations devinet_ops = {
2301 	.init = devinet_init_net,
2302 	.exit = devinet_exit_net,
2303 };
2304 
2305 static struct rtnl_af_ops inet_af_ops = {
2306 	.family		  = AF_INET,
2307 	.fill_link_af	  = inet_fill_link_af,
2308 	.get_link_af_size = inet_get_link_af_size,
2309 	.validate_link_af = inet_validate_link_af,
2310 	.set_link_af	  = inet_set_link_af,
2311 };
2312 
2313 void __init devinet_init(void)
2314 {
2315 	int i;
2316 
2317 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2318 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2319 
2320 	register_pernet_subsys(&devinet_ops);
2321 
2322 	register_gifconf(PF_INET, inet_gifconf);
2323 	register_netdevice_notifier(&ip_netdev_notifier);
2324 
2325 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2326 
2327 	rtnl_af_register(&inet_af_ops);
2328 
2329 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2330 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2331 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2332 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2333 		      inet_netconf_dump_devconf, NULL);
2334 }
2335 
2336