xref: /openbmc/linux/net/ipv4/devinet.c (revision b34e08d5)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 	},
79 };
80 
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82 	.data = {
83 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90 	},
91 };
92 
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95 
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 	[IFA_LOCAL]     	= { .type = NLA_U32 },
98 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
99 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
100 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
102 	[IFA_FLAGS]		= { .type = NLA_U32 },
103 };
104 
105 #define IN4_ADDR_HSIZE_SHIFT	8
106 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
107 
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109 static DEFINE_SPINLOCK(inet_addr_hash_lock);
110 
111 static u32 inet_addr_hash(struct net *net, __be32 addr)
112 {
113 	u32 val = (__force u32) addr ^ net_hash_mix(net);
114 
115 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
116 }
117 
118 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119 {
120 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
121 
122 	spin_lock(&inet_addr_hash_lock);
123 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
124 	spin_unlock(&inet_addr_hash_lock);
125 }
126 
127 static void inet_hash_remove(struct in_ifaddr *ifa)
128 {
129 	spin_lock(&inet_addr_hash_lock);
130 	hlist_del_init_rcu(&ifa->hash);
131 	spin_unlock(&inet_addr_hash_lock);
132 }
133 
134 /**
135  * __ip_dev_find - find the first device with a given source address.
136  * @net: the net namespace
137  * @addr: the source address
138  * @devref: if true, take a reference on the found device
139  *
140  * If a caller uses devref=false, it should be protected by RCU, or RTNL
141  */
142 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
143 {
144 	u32 hash = inet_addr_hash(net, addr);
145 	struct net_device *result = NULL;
146 	struct in_ifaddr *ifa;
147 
148 	rcu_read_lock();
149 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
150 		if (ifa->ifa_local == addr) {
151 			struct net_device *dev = ifa->ifa_dev->dev;
152 
153 			if (!net_eq(dev_net(dev), net))
154 				continue;
155 			result = dev;
156 			break;
157 		}
158 	}
159 	if (!result) {
160 		struct flowi4 fl4 = { .daddr = addr };
161 		struct fib_result res = { 0 };
162 		struct fib_table *local;
163 
164 		/* Fallback to FIB local table so that communication
165 		 * over loopback subnets work.
166 		 */
167 		local = fib_get_table(net, RT_TABLE_LOCAL);
168 		if (local &&
169 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
170 		    res.type == RTN_LOCAL)
171 			result = FIB_RES_DEV(res);
172 	}
173 	if (result && devref)
174 		dev_hold(result);
175 	rcu_read_unlock();
176 	return result;
177 }
178 EXPORT_SYMBOL(__ip_dev_find);
179 
180 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
181 
182 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
183 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
184 			 int destroy);
185 #ifdef CONFIG_SYSCTL
186 static void devinet_sysctl_register(struct in_device *idev);
187 static void devinet_sysctl_unregister(struct in_device *idev);
188 #else
189 static void devinet_sysctl_register(struct in_device *idev)
190 {
191 }
192 static void devinet_sysctl_unregister(struct in_device *idev)
193 {
194 }
195 #endif
196 
197 /* Locks all the inet devices. */
198 
199 static struct in_ifaddr *inet_alloc_ifa(void)
200 {
201 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
202 }
203 
204 static void inet_rcu_free_ifa(struct rcu_head *head)
205 {
206 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
207 	if (ifa->ifa_dev)
208 		in_dev_put(ifa->ifa_dev);
209 	kfree(ifa);
210 }
211 
212 static void inet_free_ifa(struct in_ifaddr *ifa)
213 {
214 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
215 }
216 
217 void in_dev_finish_destroy(struct in_device *idev)
218 {
219 	struct net_device *dev = idev->dev;
220 
221 	WARN_ON(idev->ifa_list);
222 	WARN_ON(idev->mc_list);
223 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
224 #ifdef NET_REFCNT_DEBUG
225 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
226 #endif
227 	dev_put(dev);
228 	if (!idev->dead)
229 		pr_err("Freeing alive in_device %p\n", idev);
230 	else
231 		kfree(idev);
232 }
233 EXPORT_SYMBOL(in_dev_finish_destroy);
234 
235 static struct in_device *inetdev_init(struct net_device *dev)
236 {
237 	struct in_device *in_dev;
238 
239 	ASSERT_RTNL();
240 
241 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
242 	if (!in_dev)
243 		goto out;
244 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
245 			sizeof(in_dev->cnf));
246 	in_dev->cnf.sysctl = NULL;
247 	in_dev->dev = dev;
248 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
249 	if (!in_dev->arp_parms)
250 		goto out_kfree;
251 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
252 		dev_disable_lro(dev);
253 	/* Reference in_dev->dev */
254 	dev_hold(dev);
255 	/* Account for reference dev->ip_ptr (below) */
256 	in_dev_hold(in_dev);
257 
258 	devinet_sysctl_register(in_dev);
259 	ip_mc_init_dev(in_dev);
260 	if (dev->flags & IFF_UP)
261 		ip_mc_up(in_dev);
262 
263 	/* we can receive as soon as ip_ptr is set -- do this last */
264 	rcu_assign_pointer(dev->ip_ptr, in_dev);
265 out:
266 	return in_dev;
267 out_kfree:
268 	kfree(in_dev);
269 	in_dev = NULL;
270 	goto out;
271 }
272 
273 static void in_dev_rcu_put(struct rcu_head *head)
274 {
275 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
276 	in_dev_put(idev);
277 }
278 
279 static void inetdev_destroy(struct in_device *in_dev)
280 {
281 	struct in_ifaddr *ifa;
282 	struct net_device *dev;
283 
284 	ASSERT_RTNL();
285 
286 	dev = in_dev->dev;
287 
288 	in_dev->dead = 1;
289 
290 	ip_mc_destroy_dev(in_dev);
291 
292 	while ((ifa = in_dev->ifa_list) != NULL) {
293 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
294 		inet_free_ifa(ifa);
295 	}
296 
297 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
298 
299 	devinet_sysctl_unregister(in_dev);
300 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
301 	arp_ifdown(dev);
302 
303 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
304 }
305 
306 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
307 {
308 	rcu_read_lock();
309 	for_primary_ifa(in_dev) {
310 		if (inet_ifa_match(a, ifa)) {
311 			if (!b || inet_ifa_match(b, ifa)) {
312 				rcu_read_unlock();
313 				return 1;
314 			}
315 		}
316 	} endfor_ifa(in_dev);
317 	rcu_read_unlock();
318 	return 0;
319 }
320 
321 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
322 			 int destroy, struct nlmsghdr *nlh, u32 portid)
323 {
324 	struct in_ifaddr *promote = NULL;
325 	struct in_ifaddr *ifa, *ifa1 = *ifap;
326 	struct in_ifaddr *last_prim = in_dev->ifa_list;
327 	struct in_ifaddr *prev_prom = NULL;
328 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
329 
330 	ASSERT_RTNL();
331 
332 	/* 1. Deleting primary ifaddr forces deletion all secondaries
333 	 * unless alias promotion is set
334 	 **/
335 
336 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
337 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
338 
339 		while ((ifa = *ifap1) != NULL) {
340 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
341 			    ifa1->ifa_scope <= ifa->ifa_scope)
342 				last_prim = ifa;
343 
344 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
345 			    ifa1->ifa_mask != ifa->ifa_mask ||
346 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
347 				ifap1 = &ifa->ifa_next;
348 				prev_prom = ifa;
349 				continue;
350 			}
351 
352 			if (!do_promote) {
353 				inet_hash_remove(ifa);
354 				*ifap1 = ifa->ifa_next;
355 
356 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
357 				blocking_notifier_call_chain(&inetaddr_chain,
358 						NETDEV_DOWN, ifa);
359 				inet_free_ifa(ifa);
360 			} else {
361 				promote = ifa;
362 				break;
363 			}
364 		}
365 	}
366 
367 	/* On promotion all secondaries from subnet are changing
368 	 * the primary IP, we must remove all their routes silently
369 	 * and later to add them back with new prefsrc. Do this
370 	 * while all addresses are on the device list.
371 	 */
372 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
373 		if (ifa1->ifa_mask == ifa->ifa_mask &&
374 		    inet_ifa_match(ifa1->ifa_address, ifa))
375 			fib_del_ifaddr(ifa, ifa1);
376 	}
377 
378 	/* 2. Unlink it */
379 
380 	*ifap = ifa1->ifa_next;
381 	inet_hash_remove(ifa1);
382 
383 	/* 3. Announce address deletion */
384 
385 	/* Send message first, then call notifier.
386 	   At first sight, FIB update triggered by notifier
387 	   will refer to already deleted ifaddr, that could confuse
388 	   netlink listeners. It is not true: look, gated sees
389 	   that route deleted and if it still thinks that ifaddr
390 	   is valid, it will try to restore deleted routes... Grr.
391 	   So that, this order is correct.
392 	 */
393 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
394 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
395 
396 	if (promote) {
397 		struct in_ifaddr *next_sec = promote->ifa_next;
398 
399 		if (prev_prom) {
400 			prev_prom->ifa_next = promote->ifa_next;
401 			promote->ifa_next = last_prim->ifa_next;
402 			last_prim->ifa_next = promote;
403 		}
404 
405 		promote->ifa_flags &= ~IFA_F_SECONDARY;
406 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
407 		blocking_notifier_call_chain(&inetaddr_chain,
408 				NETDEV_UP, promote);
409 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
410 			if (ifa1->ifa_mask != ifa->ifa_mask ||
411 			    !inet_ifa_match(ifa1->ifa_address, ifa))
412 					continue;
413 			fib_add_ifaddr(ifa);
414 		}
415 
416 	}
417 	if (destroy)
418 		inet_free_ifa(ifa1);
419 }
420 
421 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
422 			 int destroy)
423 {
424 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
425 }
426 
427 static void check_lifetime(struct work_struct *work);
428 
429 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
430 
431 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
432 			     u32 portid)
433 {
434 	struct in_device *in_dev = ifa->ifa_dev;
435 	struct in_ifaddr *ifa1, **ifap, **last_primary;
436 
437 	ASSERT_RTNL();
438 
439 	if (!ifa->ifa_local) {
440 		inet_free_ifa(ifa);
441 		return 0;
442 	}
443 
444 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
445 	last_primary = &in_dev->ifa_list;
446 
447 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
448 	     ifap = &ifa1->ifa_next) {
449 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
450 		    ifa->ifa_scope <= ifa1->ifa_scope)
451 			last_primary = &ifa1->ifa_next;
452 		if (ifa1->ifa_mask == ifa->ifa_mask &&
453 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
454 			if (ifa1->ifa_local == ifa->ifa_local) {
455 				inet_free_ifa(ifa);
456 				return -EEXIST;
457 			}
458 			if (ifa1->ifa_scope != ifa->ifa_scope) {
459 				inet_free_ifa(ifa);
460 				return -EINVAL;
461 			}
462 			ifa->ifa_flags |= IFA_F_SECONDARY;
463 		}
464 	}
465 
466 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
467 		prandom_seed((__force u32) ifa->ifa_local);
468 		ifap = last_primary;
469 	}
470 
471 	ifa->ifa_next = *ifap;
472 	*ifap = ifa;
473 
474 	inet_hash_insert(dev_net(in_dev->dev), ifa);
475 
476 	cancel_delayed_work(&check_lifetime_work);
477 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
478 
479 	/* Send message first, then call notifier.
480 	   Notifier will trigger FIB update, so that
481 	   listeners of netlink will know about new ifaddr */
482 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
483 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
484 
485 	return 0;
486 }
487 
488 static int inet_insert_ifa(struct in_ifaddr *ifa)
489 {
490 	return __inet_insert_ifa(ifa, NULL, 0);
491 }
492 
493 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
494 {
495 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
496 
497 	ASSERT_RTNL();
498 
499 	if (!in_dev) {
500 		inet_free_ifa(ifa);
501 		return -ENOBUFS;
502 	}
503 	ipv4_devconf_setall(in_dev);
504 	neigh_parms_data_state_setall(in_dev->arp_parms);
505 	if (ifa->ifa_dev != in_dev) {
506 		WARN_ON(ifa->ifa_dev);
507 		in_dev_hold(in_dev);
508 		ifa->ifa_dev = in_dev;
509 	}
510 	if (ipv4_is_loopback(ifa->ifa_local))
511 		ifa->ifa_scope = RT_SCOPE_HOST;
512 	return inet_insert_ifa(ifa);
513 }
514 
515 /* Caller must hold RCU or RTNL :
516  * We dont take a reference on found in_device
517  */
518 struct in_device *inetdev_by_index(struct net *net, int ifindex)
519 {
520 	struct net_device *dev;
521 	struct in_device *in_dev = NULL;
522 
523 	rcu_read_lock();
524 	dev = dev_get_by_index_rcu(net, ifindex);
525 	if (dev)
526 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
527 	rcu_read_unlock();
528 	return in_dev;
529 }
530 EXPORT_SYMBOL(inetdev_by_index);
531 
532 /* Called only from RTNL semaphored context. No locks. */
533 
534 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
535 				    __be32 mask)
536 {
537 	ASSERT_RTNL();
538 
539 	for_primary_ifa(in_dev) {
540 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
541 			return ifa;
542 	} endfor_ifa(in_dev);
543 	return NULL;
544 }
545 
546 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
547 {
548 	struct net *net = sock_net(skb->sk);
549 	struct nlattr *tb[IFA_MAX+1];
550 	struct in_device *in_dev;
551 	struct ifaddrmsg *ifm;
552 	struct in_ifaddr *ifa, **ifap;
553 	int err = -EINVAL;
554 
555 	ASSERT_RTNL();
556 
557 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
558 	if (err < 0)
559 		goto errout;
560 
561 	ifm = nlmsg_data(nlh);
562 	in_dev = inetdev_by_index(net, ifm->ifa_index);
563 	if (in_dev == NULL) {
564 		err = -ENODEV;
565 		goto errout;
566 	}
567 
568 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
569 	     ifap = &ifa->ifa_next) {
570 		if (tb[IFA_LOCAL] &&
571 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
572 			continue;
573 
574 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
575 			continue;
576 
577 		if (tb[IFA_ADDRESS] &&
578 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
579 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
580 			continue;
581 
582 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
583 		return 0;
584 	}
585 
586 	err = -EADDRNOTAVAIL;
587 errout:
588 	return err;
589 }
590 
591 #define INFINITY_LIFE_TIME	0xFFFFFFFF
592 
593 static void check_lifetime(struct work_struct *work)
594 {
595 	unsigned long now, next, next_sec, next_sched;
596 	struct in_ifaddr *ifa;
597 	struct hlist_node *n;
598 	int i;
599 
600 	now = jiffies;
601 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
602 
603 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
604 		bool change_needed = false;
605 
606 		rcu_read_lock();
607 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
608 			unsigned long age;
609 
610 			if (ifa->ifa_flags & IFA_F_PERMANENT)
611 				continue;
612 
613 			/* We try to batch several events at once. */
614 			age = (now - ifa->ifa_tstamp +
615 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
616 
617 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
618 			    age >= ifa->ifa_valid_lft) {
619 				change_needed = true;
620 			} else if (ifa->ifa_preferred_lft ==
621 				   INFINITY_LIFE_TIME) {
622 				continue;
623 			} else if (age >= ifa->ifa_preferred_lft) {
624 				if (time_before(ifa->ifa_tstamp +
625 						ifa->ifa_valid_lft * HZ, next))
626 					next = ifa->ifa_tstamp +
627 					       ifa->ifa_valid_lft * HZ;
628 
629 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
630 					change_needed = true;
631 			} else if (time_before(ifa->ifa_tstamp +
632 					       ifa->ifa_preferred_lft * HZ,
633 					       next)) {
634 				next = ifa->ifa_tstamp +
635 				       ifa->ifa_preferred_lft * HZ;
636 			}
637 		}
638 		rcu_read_unlock();
639 		if (!change_needed)
640 			continue;
641 		rtnl_lock();
642 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
643 			unsigned long age;
644 
645 			if (ifa->ifa_flags & IFA_F_PERMANENT)
646 				continue;
647 
648 			/* We try to batch several events at once. */
649 			age = (now - ifa->ifa_tstamp +
650 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
651 
652 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
653 			    age >= ifa->ifa_valid_lft) {
654 				struct in_ifaddr **ifap;
655 
656 				for (ifap = &ifa->ifa_dev->ifa_list;
657 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
658 					if (*ifap == ifa) {
659 						inet_del_ifa(ifa->ifa_dev,
660 							     ifap, 1);
661 						break;
662 					}
663 				}
664 			} else if (ifa->ifa_preferred_lft !=
665 				   INFINITY_LIFE_TIME &&
666 				   age >= ifa->ifa_preferred_lft &&
667 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
668 				ifa->ifa_flags |= IFA_F_DEPRECATED;
669 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
670 			}
671 		}
672 		rtnl_unlock();
673 	}
674 
675 	next_sec = round_jiffies_up(next);
676 	next_sched = next;
677 
678 	/* If rounded timeout is accurate enough, accept it. */
679 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
680 		next_sched = next_sec;
681 
682 	now = jiffies;
683 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
684 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
685 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
686 
687 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
688 			next_sched - now);
689 }
690 
691 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
692 			     __u32 prefered_lft)
693 {
694 	unsigned long timeout;
695 
696 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
697 
698 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
699 	if (addrconf_finite_timeout(timeout))
700 		ifa->ifa_valid_lft = timeout;
701 	else
702 		ifa->ifa_flags |= IFA_F_PERMANENT;
703 
704 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
705 	if (addrconf_finite_timeout(timeout)) {
706 		if (timeout == 0)
707 			ifa->ifa_flags |= IFA_F_DEPRECATED;
708 		ifa->ifa_preferred_lft = timeout;
709 	}
710 	ifa->ifa_tstamp = jiffies;
711 	if (!ifa->ifa_cstamp)
712 		ifa->ifa_cstamp = ifa->ifa_tstamp;
713 }
714 
715 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
716 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
717 {
718 	struct nlattr *tb[IFA_MAX+1];
719 	struct in_ifaddr *ifa;
720 	struct ifaddrmsg *ifm;
721 	struct net_device *dev;
722 	struct in_device *in_dev;
723 	int err;
724 
725 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
726 	if (err < 0)
727 		goto errout;
728 
729 	ifm = nlmsg_data(nlh);
730 	err = -EINVAL;
731 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
732 		goto errout;
733 
734 	dev = __dev_get_by_index(net, ifm->ifa_index);
735 	err = -ENODEV;
736 	if (dev == NULL)
737 		goto errout;
738 
739 	in_dev = __in_dev_get_rtnl(dev);
740 	err = -ENOBUFS;
741 	if (in_dev == NULL)
742 		goto errout;
743 
744 	ifa = inet_alloc_ifa();
745 	if (ifa == NULL)
746 		/*
747 		 * A potential indev allocation can be left alive, it stays
748 		 * assigned to its device and is destroy with it.
749 		 */
750 		goto errout;
751 
752 	ipv4_devconf_setall(in_dev);
753 	neigh_parms_data_state_setall(in_dev->arp_parms);
754 	in_dev_hold(in_dev);
755 
756 	if (tb[IFA_ADDRESS] == NULL)
757 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
758 
759 	INIT_HLIST_NODE(&ifa->hash);
760 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
761 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
762 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
763 					 ifm->ifa_flags;
764 	ifa->ifa_scope = ifm->ifa_scope;
765 	ifa->ifa_dev = in_dev;
766 
767 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
768 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
769 
770 	if (tb[IFA_BROADCAST])
771 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
772 
773 	if (tb[IFA_LABEL])
774 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
775 	else
776 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
777 
778 	if (tb[IFA_CACHEINFO]) {
779 		struct ifa_cacheinfo *ci;
780 
781 		ci = nla_data(tb[IFA_CACHEINFO]);
782 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
783 			err = -EINVAL;
784 			goto errout_free;
785 		}
786 		*pvalid_lft = ci->ifa_valid;
787 		*pprefered_lft = ci->ifa_prefered;
788 	}
789 
790 	return ifa;
791 
792 errout_free:
793 	inet_free_ifa(ifa);
794 errout:
795 	return ERR_PTR(err);
796 }
797 
798 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
799 {
800 	struct in_device *in_dev = ifa->ifa_dev;
801 	struct in_ifaddr *ifa1, **ifap;
802 
803 	if (!ifa->ifa_local)
804 		return NULL;
805 
806 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
807 	     ifap = &ifa1->ifa_next) {
808 		if (ifa1->ifa_mask == ifa->ifa_mask &&
809 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
810 		    ifa1->ifa_local == ifa->ifa_local)
811 			return ifa1;
812 	}
813 	return NULL;
814 }
815 
816 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
817 {
818 	struct net *net = sock_net(skb->sk);
819 	struct in_ifaddr *ifa;
820 	struct in_ifaddr *ifa_existing;
821 	__u32 valid_lft = INFINITY_LIFE_TIME;
822 	__u32 prefered_lft = INFINITY_LIFE_TIME;
823 
824 	ASSERT_RTNL();
825 
826 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
827 	if (IS_ERR(ifa))
828 		return PTR_ERR(ifa);
829 
830 	ifa_existing = find_matching_ifa(ifa);
831 	if (!ifa_existing) {
832 		/* It would be best to check for !NLM_F_CREATE here but
833 		 * userspace alreay relies on not having to provide this.
834 		 */
835 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
836 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
837 	} else {
838 		inet_free_ifa(ifa);
839 
840 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
841 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
842 			return -EEXIST;
843 		ifa = ifa_existing;
844 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
845 		cancel_delayed_work(&check_lifetime_work);
846 		queue_delayed_work(system_power_efficient_wq,
847 				&check_lifetime_work, 0);
848 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
849 		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
850 	}
851 	return 0;
852 }
853 
854 /*
855  *	Determine a default network mask, based on the IP address.
856  */
857 
858 static int inet_abc_len(__be32 addr)
859 {
860 	int rc = -1;	/* Something else, probably a multicast. */
861 
862 	if (ipv4_is_zeronet(addr))
863 		rc = 0;
864 	else {
865 		__u32 haddr = ntohl(addr);
866 
867 		if (IN_CLASSA(haddr))
868 			rc = 8;
869 		else if (IN_CLASSB(haddr))
870 			rc = 16;
871 		else if (IN_CLASSC(haddr))
872 			rc = 24;
873 	}
874 
875 	return rc;
876 }
877 
878 
879 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
880 {
881 	struct ifreq ifr;
882 	struct sockaddr_in sin_orig;
883 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
884 	struct in_device *in_dev;
885 	struct in_ifaddr **ifap = NULL;
886 	struct in_ifaddr *ifa = NULL;
887 	struct net_device *dev;
888 	char *colon;
889 	int ret = -EFAULT;
890 	int tryaddrmatch = 0;
891 
892 	/*
893 	 *	Fetch the caller's info block into kernel space
894 	 */
895 
896 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
897 		goto out;
898 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
899 
900 	/* save original address for comparison */
901 	memcpy(&sin_orig, sin, sizeof(*sin));
902 
903 	colon = strchr(ifr.ifr_name, ':');
904 	if (colon)
905 		*colon = 0;
906 
907 	dev_load(net, ifr.ifr_name);
908 
909 	switch (cmd) {
910 	case SIOCGIFADDR:	/* Get interface address */
911 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
912 	case SIOCGIFDSTADDR:	/* Get the destination address */
913 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
914 		/* Note that these ioctls will not sleep,
915 		   so that we do not impose a lock.
916 		   One day we will be forced to put shlock here (I mean SMP)
917 		 */
918 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
919 		memset(sin, 0, sizeof(*sin));
920 		sin->sin_family = AF_INET;
921 		break;
922 
923 	case SIOCSIFFLAGS:
924 		ret = -EPERM;
925 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
926 			goto out;
927 		break;
928 	case SIOCSIFADDR:	/* Set interface address (and family) */
929 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
930 	case SIOCSIFDSTADDR:	/* Set the destination address */
931 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
932 		ret = -EPERM;
933 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
934 			goto out;
935 		ret = -EINVAL;
936 		if (sin->sin_family != AF_INET)
937 			goto out;
938 		break;
939 	default:
940 		ret = -EINVAL;
941 		goto out;
942 	}
943 
944 	rtnl_lock();
945 
946 	ret = -ENODEV;
947 	dev = __dev_get_by_name(net, ifr.ifr_name);
948 	if (!dev)
949 		goto done;
950 
951 	if (colon)
952 		*colon = ':';
953 
954 	in_dev = __in_dev_get_rtnl(dev);
955 	if (in_dev) {
956 		if (tryaddrmatch) {
957 			/* Matthias Andree */
958 			/* compare label and address (4.4BSD style) */
959 			/* note: we only do this for a limited set of ioctls
960 			   and only if the original address family was AF_INET.
961 			   This is checked above. */
962 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
963 			     ifap = &ifa->ifa_next) {
964 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
965 				    sin_orig.sin_addr.s_addr ==
966 							ifa->ifa_local) {
967 					break; /* found */
968 				}
969 			}
970 		}
971 		/* we didn't get a match, maybe the application is
972 		   4.3BSD-style and passed in junk so we fall back to
973 		   comparing just the label */
974 		if (!ifa) {
975 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
976 			     ifap = &ifa->ifa_next)
977 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
978 					break;
979 		}
980 	}
981 
982 	ret = -EADDRNOTAVAIL;
983 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
984 		goto done;
985 
986 	switch (cmd) {
987 	case SIOCGIFADDR:	/* Get interface address */
988 		sin->sin_addr.s_addr = ifa->ifa_local;
989 		goto rarok;
990 
991 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
992 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
993 		goto rarok;
994 
995 	case SIOCGIFDSTADDR:	/* Get the destination address */
996 		sin->sin_addr.s_addr = ifa->ifa_address;
997 		goto rarok;
998 
999 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1000 		sin->sin_addr.s_addr = ifa->ifa_mask;
1001 		goto rarok;
1002 
1003 	case SIOCSIFFLAGS:
1004 		if (colon) {
1005 			ret = -EADDRNOTAVAIL;
1006 			if (!ifa)
1007 				break;
1008 			ret = 0;
1009 			if (!(ifr.ifr_flags & IFF_UP))
1010 				inet_del_ifa(in_dev, ifap, 1);
1011 			break;
1012 		}
1013 		ret = dev_change_flags(dev, ifr.ifr_flags);
1014 		break;
1015 
1016 	case SIOCSIFADDR:	/* Set interface address (and family) */
1017 		ret = -EINVAL;
1018 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1019 			break;
1020 
1021 		if (!ifa) {
1022 			ret = -ENOBUFS;
1023 			ifa = inet_alloc_ifa();
1024 			if (!ifa)
1025 				break;
1026 			INIT_HLIST_NODE(&ifa->hash);
1027 			if (colon)
1028 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1029 			else
1030 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1031 		} else {
1032 			ret = 0;
1033 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1034 				break;
1035 			inet_del_ifa(in_dev, ifap, 0);
1036 			ifa->ifa_broadcast = 0;
1037 			ifa->ifa_scope = 0;
1038 		}
1039 
1040 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1041 
1042 		if (!(dev->flags & IFF_POINTOPOINT)) {
1043 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1044 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1045 			if ((dev->flags & IFF_BROADCAST) &&
1046 			    ifa->ifa_prefixlen < 31)
1047 				ifa->ifa_broadcast = ifa->ifa_address |
1048 						     ~ifa->ifa_mask;
1049 		} else {
1050 			ifa->ifa_prefixlen = 32;
1051 			ifa->ifa_mask = inet_make_mask(32);
1052 		}
1053 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1054 		ret = inet_set_ifa(dev, ifa);
1055 		break;
1056 
1057 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1058 		ret = 0;
1059 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1060 			inet_del_ifa(in_dev, ifap, 0);
1061 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1062 			inet_insert_ifa(ifa);
1063 		}
1064 		break;
1065 
1066 	case SIOCSIFDSTADDR:	/* Set the destination address */
1067 		ret = 0;
1068 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1069 			break;
1070 		ret = -EINVAL;
1071 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1072 			break;
1073 		ret = 0;
1074 		inet_del_ifa(in_dev, ifap, 0);
1075 		ifa->ifa_address = sin->sin_addr.s_addr;
1076 		inet_insert_ifa(ifa);
1077 		break;
1078 
1079 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1080 
1081 		/*
1082 		 *	The mask we set must be legal.
1083 		 */
1084 		ret = -EINVAL;
1085 		if (bad_mask(sin->sin_addr.s_addr, 0))
1086 			break;
1087 		ret = 0;
1088 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1089 			__be32 old_mask = ifa->ifa_mask;
1090 			inet_del_ifa(in_dev, ifap, 0);
1091 			ifa->ifa_mask = sin->sin_addr.s_addr;
1092 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1093 
1094 			/* See if current broadcast address matches
1095 			 * with current netmask, then recalculate
1096 			 * the broadcast address. Otherwise it's a
1097 			 * funny address, so don't touch it since
1098 			 * the user seems to know what (s)he's doing...
1099 			 */
1100 			if ((dev->flags & IFF_BROADCAST) &&
1101 			    (ifa->ifa_prefixlen < 31) &&
1102 			    (ifa->ifa_broadcast ==
1103 			     (ifa->ifa_local|~old_mask))) {
1104 				ifa->ifa_broadcast = (ifa->ifa_local |
1105 						      ~sin->sin_addr.s_addr);
1106 			}
1107 			inet_insert_ifa(ifa);
1108 		}
1109 		break;
1110 	}
1111 done:
1112 	rtnl_unlock();
1113 out:
1114 	return ret;
1115 rarok:
1116 	rtnl_unlock();
1117 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1118 	goto out;
1119 }
1120 
1121 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1122 {
1123 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1124 	struct in_ifaddr *ifa;
1125 	struct ifreq ifr;
1126 	int done = 0;
1127 
1128 	if (!in_dev)
1129 		goto out;
1130 
1131 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1132 		if (!buf) {
1133 			done += sizeof(ifr);
1134 			continue;
1135 		}
1136 		if (len < (int) sizeof(ifr))
1137 			break;
1138 		memset(&ifr, 0, sizeof(struct ifreq));
1139 		strcpy(ifr.ifr_name, ifa->ifa_label);
1140 
1141 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1142 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1143 								ifa->ifa_local;
1144 
1145 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1146 			done = -EFAULT;
1147 			break;
1148 		}
1149 		buf  += sizeof(struct ifreq);
1150 		len  -= sizeof(struct ifreq);
1151 		done += sizeof(struct ifreq);
1152 	}
1153 out:
1154 	return done;
1155 }
1156 
1157 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1158 {
1159 	__be32 addr = 0;
1160 	struct in_device *in_dev;
1161 	struct net *net = dev_net(dev);
1162 
1163 	rcu_read_lock();
1164 	in_dev = __in_dev_get_rcu(dev);
1165 	if (!in_dev)
1166 		goto no_in_dev;
1167 
1168 	for_primary_ifa(in_dev) {
1169 		if (ifa->ifa_scope > scope)
1170 			continue;
1171 		if (!dst || inet_ifa_match(dst, ifa)) {
1172 			addr = ifa->ifa_local;
1173 			break;
1174 		}
1175 		if (!addr)
1176 			addr = ifa->ifa_local;
1177 	} endfor_ifa(in_dev);
1178 
1179 	if (addr)
1180 		goto out_unlock;
1181 no_in_dev:
1182 
1183 	/* Not loopback addresses on loopback should be preferred
1184 	   in this case. It is importnat that lo is the first interface
1185 	   in dev_base list.
1186 	 */
1187 	for_each_netdev_rcu(net, dev) {
1188 		in_dev = __in_dev_get_rcu(dev);
1189 		if (!in_dev)
1190 			continue;
1191 
1192 		for_primary_ifa(in_dev) {
1193 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1194 			    ifa->ifa_scope <= scope) {
1195 				addr = ifa->ifa_local;
1196 				goto out_unlock;
1197 			}
1198 		} endfor_ifa(in_dev);
1199 	}
1200 out_unlock:
1201 	rcu_read_unlock();
1202 	return addr;
1203 }
1204 EXPORT_SYMBOL(inet_select_addr);
1205 
1206 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1207 			      __be32 local, int scope)
1208 {
1209 	int same = 0;
1210 	__be32 addr = 0;
1211 
1212 	for_ifa(in_dev) {
1213 		if (!addr &&
1214 		    (local == ifa->ifa_local || !local) &&
1215 		    ifa->ifa_scope <= scope) {
1216 			addr = ifa->ifa_local;
1217 			if (same)
1218 				break;
1219 		}
1220 		if (!same) {
1221 			same = (!local || inet_ifa_match(local, ifa)) &&
1222 				(!dst || inet_ifa_match(dst, ifa));
1223 			if (same && addr) {
1224 				if (local || !dst)
1225 					break;
1226 				/* Is the selected addr into dst subnet? */
1227 				if (inet_ifa_match(addr, ifa))
1228 					break;
1229 				/* No, then can we use new local src? */
1230 				if (ifa->ifa_scope <= scope) {
1231 					addr = ifa->ifa_local;
1232 					break;
1233 				}
1234 				/* search for large dst subnet for addr */
1235 				same = 0;
1236 			}
1237 		}
1238 	} endfor_ifa(in_dev);
1239 
1240 	return same ? addr : 0;
1241 }
1242 
1243 /*
1244  * Confirm that local IP address exists using wildcards:
1245  * - net: netns to check, cannot be NULL
1246  * - in_dev: only on this interface, NULL=any interface
1247  * - dst: only in the same subnet as dst, 0=any dst
1248  * - local: address, 0=autoselect the local address
1249  * - scope: maximum allowed scope value for the local address
1250  */
1251 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1252 			 __be32 dst, __be32 local, int scope)
1253 {
1254 	__be32 addr = 0;
1255 	struct net_device *dev;
1256 
1257 	if (in_dev != NULL)
1258 		return confirm_addr_indev(in_dev, dst, local, scope);
1259 
1260 	rcu_read_lock();
1261 	for_each_netdev_rcu(net, dev) {
1262 		in_dev = __in_dev_get_rcu(dev);
1263 		if (in_dev) {
1264 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1265 			if (addr)
1266 				break;
1267 		}
1268 	}
1269 	rcu_read_unlock();
1270 
1271 	return addr;
1272 }
1273 EXPORT_SYMBOL(inet_confirm_addr);
1274 
1275 /*
1276  *	Device notifier
1277  */
1278 
1279 int register_inetaddr_notifier(struct notifier_block *nb)
1280 {
1281 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1282 }
1283 EXPORT_SYMBOL(register_inetaddr_notifier);
1284 
1285 int unregister_inetaddr_notifier(struct notifier_block *nb)
1286 {
1287 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1288 }
1289 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1290 
1291 /* Rename ifa_labels for a device name change. Make some effort to preserve
1292  * existing alias numbering and to create unique labels if possible.
1293 */
1294 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1295 {
1296 	struct in_ifaddr *ifa;
1297 	int named = 0;
1298 
1299 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1300 		char old[IFNAMSIZ], *dot;
1301 
1302 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1303 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1304 		if (named++ == 0)
1305 			goto skip;
1306 		dot = strchr(old, ':');
1307 		if (dot == NULL) {
1308 			sprintf(old, ":%d", named);
1309 			dot = old;
1310 		}
1311 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1312 			strcat(ifa->ifa_label, dot);
1313 		else
1314 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1315 skip:
1316 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1317 	}
1318 }
1319 
1320 static bool inetdev_valid_mtu(unsigned int mtu)
1321 {
1322 	return mtu >= 68;
1323 }
1324 
1325 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1326 					struct in_device *in_dev)
1327 
1328 {
1329 	struct in_ifaddr *ifa;
1330 
1331 	for (ifa = in_dev->ifa_list; ifa;
1332 	     ifa = ifa->ifa_next) {
1333 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1334 			 ifa->ifa_local, dev,
1335 			 ifa->ifa_local, NULL,
1336 			 dev->dev_addr, NULL);
1337 	}
1338 }
1339 
1340 /* Called only under RTNL semaphore */
1341 
1342 static int inetdev_event(struct notifier_block *this, unsigned long event,
1343 			 void *ptr)
1344 {
1345 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1346 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1347 
1348 	ASSERT_RTNL();
1349 
1350 	if (!in_dev) {
1351 		if (event == NETDEV_REGISTER) {
1352 			in_dev = inetdev_init(dev);
1353 			if (!in_dev)
1354 				return notifier_from_errno(-ENOMEM);
1355 			if (dev->flags & IFF_LOOPBACK) {
1356 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1357 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1358 			}
1359 		} else if (event == NETDEV_CHANGEMTU) {
1360 			/* Re-enabling IP */
1361 			if (inetdev_valid_mtu(dev->mtu))
1362 				in_dev = inetdev_init(dev);
1363 		}
1364 		goto out;
1365 	}
1366 
1367 	switch (event) {
1368 	case NETDEV_REGISTER:
1369 		pr_debug("%s: bug\n", __func__);
1370 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1371 		break;
1372 	case NETDEV_UP:
1373 		if (!inetdev_valid_mtu(dev->mtu))
1374 			break;
1375 		if (dev->flags & IFF_LOOPBACK) {
1376 			struct in_ifaddr *ifa = inet_alloc_ifa();
1377 
1378 			if (ifa) {
1379 				INIT_HLIST_NODE(&ifa->hash);
1380 				ifa->ifa_local =
1381 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1382 				ifa->ifa_prefixlen = 8;
1383 				ifa->ifa_mask = inet_make_mask(8);
1384 				in_dev_hold(in_dev);
1385 				ifa->ifa_dev = in_dev;
1386 				ifa->ifa_scope = RT_SCOPE_HOST;
1387 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1388 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1389 						 INFINITY_LIFE_TIME);
1390 				ipv4_devconf_setall(in_dev);
1391 				neigh_parms_data_state_setall(in_dev->arp_parms);
1392 				inet_insert_ifa(ifa);
1393 			}
1394 		}
1395 		ip_mc_up(in_dev);
1396 		/* fall through */
1397 	case NETDEV_CHANGEADDR:
1398 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1399 			break;
1400 		/* fall through */
1401 	case NETDEV_NOTIFY_PEERS:
1402 		/* Send gratuitous ARP to notify of link change */
1403 		inetdev_send_gratuitous_arp(dev, in_dev);
1404 		break;
1405 	case NETDEV_DOWN:
1406 		ip_mc_down(in_dev);
1407 		break;
1408 	case NETDEV_PRE_TYPE_CHANGE:
1409 		ip_mc_unmap(in_dev);
1410 		break;
1411 	case NETDEV_POST_TYPE_CHANGE:
1412 		ip_mc_remap(in_dev);
1413 		break;
1414 	case NETDEV_CHANGEMTU:
1415 		if (inetdev_valid_mtu(dev->mtu))
1416 			break;
1417 		/* disable IP when MTU is not enough */
1418 	case NETDEV_UNREGISTER:
1419 		inetdev_destroy(in_dev);
1420 		break;
1421 	case NETDEV_CHANGENAME:
1422 		/* Do not notify about label change, this event is
1423 		 * not interesting to applications using netlink.
1424 		 */
1425 		inetdev_changename(dev, in_dev);
1426 
1427 		devinet_sysctl_unregister(in_dev);
1428 		devinet_sysctl_register(in_dev);
1429 		break;
1430 	}
1431 out:
1432 	return NOTIFY_DONE;
1433 }
1434 
1435 static struct notifier_block ip_netdev_notifier = {
1436 	.notifier_call = inetdev_event,
1437 };
1438 
1439 static size_t inet_nlmsg_size(void)
1440 {
1441 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1442 	       + nla_total_size(4) /* IFA_ADDRESS */
1443 	       + nla_total_size(4) /* IFA_LOCAL */
1444 	       + nla_total_size(4) /* IFA_BROADCAST */
1445 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1446 	       + nla_total_size(4)  /* IFA_FLAGS */
1447 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1448 }
1449 
1450 static inline u32 cstamp_delta(unsigned long cstamp)
1451 {
1452 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1453 }
1454 
1455 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1456 			 unsigned long tstamp, u32 preferred, u32 valid)
1457 {
1458 	struct ifa_cacheinfo ci;
1459 
1460 	ci.cstamp = cstamp_delta(cstamp);
1461 	ci.tstamp = cstamp_delta(tstamp);
1462 	ci.ifa_prefered = preferred;
1463 	ci.ifa_valid = valid;
1464 
1465 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1466 }
1467 
1468 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1469 			    u32 portid, u32 seq, int event, unsigned int flags)
1470 {
1471 	struct ifaddrmsg *ifm;
1472 	struct nlmsghdr  *nlh;
1473 	u32 preferred, valid;
1474 
1475 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1476 	if (nlh == NULL)
1477 		return -EMSGSIZE;
1478 
1479 	ifm = nlmsg_data(nlh);
1480 	ifm->ifa_family = AF_INET;
1481 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1482 	ifm->ifa_flags = ifa->ifa_flags;
1483 	ifm->ifa_scope = ifa->ifa_scope;
1484 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1485 
1486 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1487 		preferred = ifa->ifa_preferred_lft;
1488 		valid = ifa->ifa_valid_lft;
1489 		if (preferred != INFINITY_LIFE_TIME) {
1490 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1491 
1492 			if (preferred > tval)
1493 				preferred -= tval;
1494 			else
1495 				preferred = 0;
1496 			if (valid != INFINITY_LIFE_TIME) {
1497 				if (valid > tval)
1498 					valid -= tval;
1499 				else
1500 					valid = 0;
1501 			}
1502 		}
1503 	} else {
1504 		preferred = INFINITY_LIFE_TIME;
1505 		valid = INFINITY_LIFE_TIME;
1506 	}
1507 	if ((ifa->ifa_address &&
1508 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1509 	    (ifa->ifa_local &&
1510 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1511 	    (ifa->ifa_broadcast &&
1512 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1513 	    (ifa->ifa_label[0] &&
1514 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1515 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1516 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1517 			  preferred, valid))
1518 		goto nla_put_failure;
1519 
1520 	return nlmsg_end(skb, nlh);
1521 
1522 nla_put_failure:
1523 	nlmsg_cancel(skb, nlh);
1524 	return -EMSGSIZE;
1525 }
1526 
1527 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1528 {
1529 	struct net *net = sock_net(skb->sk);
1530 	int h, s_h;
1531 	int idx, s_idx;
1532 	int ip_idx, s_ip_idx;
1533 	struct net_device *dev;
1534 	struct in_device *in_dev;
1535 	struct in_ifaddr *ifa;
1536 	struct hlist_head *head;
1537 
1538 	s_h = cb->args[0];
1539 	s_idx = idx = cb->args[1];
1540 	s_ip_idx = ip_idx = cb->args[2];
1541 
1542 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1543 		idx = 0;
1544 		head = &net->dev_index_head[h];
1545 		rcu_read_lock();
1546 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1547 			  net->dev_base_seq;
1548 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1549 			if (idx < s_idx)
1550 				goto cont;
1551 			if (h > s_h || idx > s_idx)
1552 				s_ip_idx = 0;
1553 			in_dev = __in_dev_get_rcu(dev);
1554 			if (!in_dev)
1555 				goto cont;
1556 
1557 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1558 			     ifa = ifa->ifa_next, ip_idx++) {
1559 				if (ip_idx < s_ip_idx)
1560 					continue;
1561 				if (inet_fill_ifaddr(skb, ifa,
1562 					     NETLINK_CB(cb->skb).portid,
1563 					     cb->nlh->nlmsg_seq,
1564 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1565 					rcu_read_unlock();
1566 					goto done;
1567 				}
1568 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1569 			}
1570 cont:
1571 			idx++;
1572 		}
1573 		rcu_read_unlock();
1574 	}
1575 
1576 done:
1577 	cb->args[0] = h;
1578 	cb->args[1] = idx;
1579 	cb->args[2] = ip_idx;
1580 
1581 	return skb->len;
1582 }
1583 
1584 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1585 		      u32 portid)
1586 {
1587 	struct sk_buff *skb;
1588 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1589 	int err = -ENOBUFS;
1590 	struct net *net;
1591 
1592 	net = dev_net(ifa->ifa_dev->dev);
1593 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1594 	if (skb == NULL)
1595 		goto errout;
1596 
1597 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1598 	if (err < 0) {
1599 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1600 		WARN_ON(err == -EMSGSIZE);
1601 		kfree_skb(skb);
1602 		goto errout;
1603 	}
1604 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1605 	return;
1606 errout:
1607 	if (err < 0)
1608 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1609 }
1610 
1611 static size_t inet_get_link_af_size(const struct net_device *dev)
1612 {
1613 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1614 
1615 	if (!in_dev)
1616 		return 0;
1617 
1618 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1619 }
1620 
1621 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1622 {
1623 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1624 	struct nlattr *nla;
1625 	int i;
1626 
1627 	if (!in_dev)
1628 		return -ENODATA;
1629 
1630 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1631 	if (nla == NULL)
1632 		return -EMSGSIZE;
1633 
1634 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1635 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1636 
1637 	return 0;
1638 }
1639 
1640 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1641 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1642 };
1643 
1644 static int inet_validate_link_af(const struct net_device *dev,
1645 				 const struct nlattr *nla)
1646 {
1647 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1648 	int err, rem;
1649 
1650 	if (dev && !__in_dev_get_rtnl(dev))
1651 		return -EAFNOSUPPORT;
1652 
1653 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1654 	if (err < 0)
1655 		return err;
1656 
1657 	if (tb[IFLA_INET_CONF]) {
1658 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1659 			int cfgid = nla_type(a);
1660 
1661 			if (nla_len(a) < 4)
1662 				return -EINVAL;
1663 
1664 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1665 				return -EINVAL;
1666 		}
1667 	}
1668 
1669 	return 0;
1670 }
1671 
1672 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1673 {
1674 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1675 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1676 	int rem;
1677 
1678 	if (!in_dev)
1679 		return -EAFNOSUPPORT;
1680 
1681 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1682 		BUG();
1683 
1684 	if (tb[IFLA_INET_CONF]) {
1685 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1686 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1687 	}
1688 
1689 	return 0;
1690 }
1691 
1692 static int inet_netconf_msgsize_devconf(int type)
1693 {
1694 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1695 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1696 
1697 	/* type -1 is used for ALL */
1698 	if (type == -1 || type == NETCONFA_FORWARDING)
1699 		size += nla_total_size(4);
1700 	if (type == -1 || type == NETCONFA_RP_FILTER)
1701 		size += nla_total_size(4);
1702 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1703 		size += nla_total_size(4);
1704 	if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1705 		size += nla_total_size(4);
1706 
1707 	return size;
1708 }
1709 
1710 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1711 				     struct ipv4_devconf *devconf, u32 portid,
1712 				     u32 seq, int event, unsigned int flags,
1713 				     int type)
1714 {
1715 	struct nlmsghdr  *nlh;
1716 	struct netconfmsg *ncm;
1717 
1718 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1719 			flags);
1720 	if (nlh == NULL)
1721 		return -EMSGSIZE;
1722 
1723 	ncm = nlmsg_data(nlh);
1724 	ncm->ncm_family = AF_INET;
1725 
1726 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1727 		goto nla_put_failure;
1728 
1729 	/* type -1 is used for ALL */
1730 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1731 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1732 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1733 		goto nla_put_failure;
1734 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1735 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1736 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1737 		goto nla_put_failure;
1738 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1739 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1740 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1741 		goto nla_put_failure;
1742 	if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1743 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1744 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1745 		goto nla_put_failure;
1746 
1747 	return nlmsg_end(skb, nlh);
1748 
1749 nla_put_failure:
1750 	nlmsg_cancel(skb, nlh);
1751 	return -EMSGSIZE;
1752 }
1753 
1754 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1755 				 struct ipv4_devconf *devconf)
1756 {
1757 	struct sk_buff *skb;
1758 	int err = -ENOBUFS;
1759 
1760 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1761 	if (skb == NULL)
1762 		goto errout;
1763 
1764 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1765 					RTM_NEWNETCONF, 0, type);
1766 	if (err < 0) {
1767 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1768 		WARN_ON(err == -EMSGSIZE);
1769 		kfree_skb(skb);
1770 		goto errout;
1771 	}
1772 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1773 	return;
1774 errout:
1775 	if (err < 0)
1776 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1777 }
1778 
1779 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1780 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1781 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1782 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1783 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1784 };
1785 
1786 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1787 				    struct nlmsghdr *nlh)
1788 {
1789 	struct net *net = sock_net(in_skb->sk);
1790 	struct nlattr *tb[NETCONFA_MAX+1];
1791 	struct netconfmsg *ncm;
1792 	struct sk_buff *skb;
1793 	struct ipv4_devconf *devconf;
1794 	struct in_device *in_dev;
1795 	struct net_device *dev;
1796 	int ifindex;
1797 	int err;
1798 
1799 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1800 			  devconf_ipv4_policy);
1801 	if (err < 0)
1802 		goto errout;
1803 
1804 	err = EINVAL;
1805 	if (!tb[NETCONFA_IFINDEX])
1806 		goto errout;
1807 
1808 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1809 	switch (ifindex) {
1810 	case NETCONFA_IFINDEX_ALL:
1811 		devconf = net->ipv4.devconf_all;
1812 		break;
1813 	case NETCONFA_IFINDEX_DEFAULT:
1814 		devconf = net->ipv4.devconf_dflt;
1815 		break;
1816 	default:
1817 		dev = __dev_get_by_index(net, ifindex);
1818 		if (dev == NULL)
1819 			goto errout;
1820 		in_dev = __in_dev_get_rtnl(dev);
1821 		if (in_dev == NULL)
1822 			goto errout;
1823 		devconf = &in_dev->cnf;
1824 		break;
1825 	}
1826 
1827 	err = -ENOBUFS;
1828 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1829 	if (skb == NULL)
1830 		goto errout;
1831 
1832 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1833 					NETLINK_CB(in_skb).portid,
1834 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1835 					-1);
1836 	if (err < 0) {
1837 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1838 		WARN_ON(err == -EMSGSIZE);
1839 		kfree_skb(skb);
1840 		goto errout;
1841 	}
1842 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1843 errout:
1844 	return err;
1845 }
1846 
1847 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1848 				     struct netlink_callback *cb)
1849 {
1850 	struct net *net = sock_net(skb->sk);
1851 	int h, s_h;
1852 	int idx, s_idx;
1853 	struct net_device *dev;
1854 	struct in_device *in_dev;
1855 	struct hlist_head *head;
1856 
1857 	s_h = cb->args[0];
1858 	s_idx = idx = cb->args[1];
1859 
1860 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1861 		idx = 0;
1862 		head = &net->dev_index_head[h];
1863 		rcu_read_lock();
1864 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1865 			  net->dev_base_seq;
1866 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1867 			if (idx < s_idx)
1868 				goto cont;
1869 			in_dev = __in_dev_get_rcu(dev);
1870 			if (!in_dev)
1871 				goto cont;
1872 
1873 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1874 						      &in_dev->cnf,
1875 						      NETLINK_CB(cb->skb).portid,
1876 						      cb->nlh->nlmsg_seq,
1877 						      RTM_NEWNETCONF,
1878 						      NLM_F_MULTI,
1879 						      -1) <= 0) {
1880 				rcu_read_unlock();
1881 				goto done;
1882 			}
1883 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1884 cont:
1885 			idx++;
1886 		}
1887 		rcu_read_unlock();
1888 	}
1889 	if (h == NETDEV_HASHENTRIES) {
1890 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1891 					      net->ipv4.devconf_all,
1892 					      NETLINK_CB(cb->skb).portid,
1893 					      cb->nlh->nlmsg_seq,
1894 					      RTM_NEWNETCONF, NLM_F_MULTI,
1895 					      -1) <= 0)
1896 			goto done;
1897 		else
1898 			h++;
1899 	}
1900 	if (h == NETDEV_HASHENTRIES + 1) {
1901 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1902 					      net->ipv4.devconf_dflt,
1903 					      NETLINK_CB(cb->skb).portid,
1904 					      cb->nlh->nlmsg_seq,
1905 					      RTM_NEWNETCONF, NLM_F_MULTI,
1906 					      -1) <= 0)
1907 			goto done;
1908 		else
1909 			h++;
1910 	}
1911 done:
1912 	cb->args[0] = h;
1913 	cb->args[1] = idx;
1914 
1915 	return skb->len;
1916 }
1917 
1918 #ifdef CONFIG_SYSCTL
1919 
1920 static void devinet_copy_dflt_conf(struct net *net, int i)
1921 {
1922 	struct net_device *dev;
1923 
1924 	rcu_read_lock();
1925 	for_each_netdev_rcu(net, dev) {
1926 		struct in_device *in_dev;
1927 
1928 		in_dev = __in_dev_get_rcu(dev);
1929 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1930 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1931 	}
1932 	rcu_read_unlock();
1933 }
1934 
1935 /* called with RTNL locked */
1936 static void inet_forward_change(struct net *net)
1937 {
1938 	struct net_device *dev;
1939 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1940 
1941 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1942 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1943 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1944 				    NETCONFA_IFINDEX_ALL,
1945 				    net->ipv4.devconf_all);
1946 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1947 				    NETCONFA_IFINDEX_DEFAULT,
1948 				    net->ipv4.devconf_dflt);
1949 
1950 	for_each_netdev(net, dev) {
1951 		struct in_device *in_dev;
1952 		if (on)
1953 			dev_disable_lro(dev);
1954 		rcu_read_lock();
1955 		in_dev = __in_dev_get_rcu(dev);
1956 		if (in_dev) {
1957 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1958 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1959 						    dev->ifindex, &in_dev->cnf);
1960 		}
1961 		rcu_read_unlock();
1962 	}
1963 }
1964 
1965 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1966 {
1967 	if (cnf == net->ipv4.devconf_dflt)
1968 		return NETCONFA_IFINDEX_DEFAULT;
1969 	else if (cnf == net->ipv4.devconf_all)
1970 		return NETCONFA_IFINDEX_ALL;
1971 	else {
1972 		struct in_device *idev
1973 			= container_of(cnf, struct in_device, cnf);
1974 		return idev->dev->ifindex;
1975 	}
1976 }
1977 
1978 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1979 			     void __user *buffer,
1980 			     size_t *lenp, loff_t *ppos)
1981 {
1982 	int old_value = *(int *)ctl->data;
1983 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1984 	int new_value = *(int *)ctl->data;
1985 
1986 	if (write) {
1987 		struct ipv4_devconf *cnf = ctl->extra1;
1988 		struct net *net = ctl->extra2;
1989 		int i = (int *)ctl->data - cnf->data;
1990 		int ifindex;
1991 
1992 		set_bit(i, cnf->state);
1993 
1994 		if (cnf == net->ipv4.devconf_dflt)
1995 			devinet_copy_dflt_conf(net, i);
1996 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1997 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1998 			if ((new_value == 0) && (old_value != 0))
1999 				rt_cache_flush(net);
2000 
2001 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2002 		    new_value != old_value) {
2003 			ifindex = devinet_conf_ifindex(net, cnf);
2004 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2005 						    ifindex, cnf);
2006 		}
2007 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2008 		    new_value != old_value) {
2009 			ifindex = devinet_conf_ifindex(net, cnf);
2010 			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2011 						    ifindex, cnf);
2012 		}
2013 	}
2014 
2015 	return ret;
2016 }
2017 
2018 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2019 				  void __user *buffer,
2020 				  size_t *lenp, loff_t *ppos)
2021 {
2022 	int *valp = ctl->data;
2023 	int val = *valp;
2024 	loff_t pos = *ppos;
2025 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2026 
2027 	if (write && *valp != val) {
2028 		struct net *net = ctl->extra2;
2029 
2030 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2031 			if (!rtnl_trylock()) {
2032 				/* Restore the original values before restarting */
2033 				*valp = val;
2034 				*ppos = pos;
2035 				return restart_syscall();
2036 			}
2037 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2038 				inet_forward_change(net);
2039 			} else {
2040 				struct ipv4_devconf *cnf = ctl->extra1;
2041 				struct in_device *idev =
2042 					container_of(cnf, struct in_device, cnf);
2043 				if (*valp)
2044 					dev_disable_lro(idev->dev);
2045 				inet_netconf_notify_devconf(net,
2046 							    NETCONFA_FORWARDING,
2047 							    idev->dev->ifindex,
2048 							    cnf);
2049 			}
2050 			rtnl_unlock();
2051 			rt_cache_flush(net);
2052 		} else
2053 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2054 						    NETCONFA_IFINDEX_DEFAULT,
2055 						    net->ipv4.devconf_dflt);
2056 	}
2057 
2058 	return ret;
2059 }
2060 
2061 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2062 				void __user *buffer,
2063 				size_t *lenp, loff_t *ppos)
2064 {
2065 	int *valp = ctl->data;
2066 	int val = *valp;
2067 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2068 	struct net *net = ctl->extra2;
2069 
2070 	if (write && *valp != val)
2071 		rt_cache_flush(net);
2072 
2073 	return ret;
2074 }
2075 
2076 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2077 	{ \
2078 		.procname	= name, \
2079 		.data		= ipv4_devconf.data + \
2080 				  IPV4_DEVCONF_ ## attr - 1, \
2081 		.maxlen		= sizeof(int), \
2082 		.mode		= mval, \
2083 		.proc_handler	= proc, \
2084 		.extra1		= &ipv4_devconf, \
2085 	}
2086 
2087 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2088 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2089 
2090 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2091 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2092 
2093 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2094 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2095 
2096 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2097 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2098 
2099 static struct devinet_sysctl_table {
2100 	struct ctl_table_header *sysctl_header;
2101 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2102 } devinet_sysctl = {
2103 	.devinet_vars = {
2104 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2105 					     devinet_sysctl_forward),
2106 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2107 
2108 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2109 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2110 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2111 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2112 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2113 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2114 					"accept_source_route"),
2115 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2116 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2117 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2118 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2119 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2120 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2121 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2122 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2123 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2124 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2125 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2126 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2127 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2128 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2129 					"force_igmp_version"),
2130 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2131 					"igmpv2_unsolicited_report_interval"),
2132 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2133 					"igmpv3_unsolicited_report_interval"),
2134 
2135 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2136 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2137 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2138 					      "promote_secondaries"),
2139 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2140 					      "route_localnet"),
2141 	},
2142 };
2143 
2144 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2145 					struct ipv4_devconf *p)
2146 {
2147 	int i;
2148 	struct devinet_sysctl_table *t;
2149 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2150 
2151 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2152 	if (!t)
2153 		goto out;
2154 
2155 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2156 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2157 		t->devinet_vars[i].extra1 = p;
2158 		t->devinet_vars[i].extra2 = net;
2159 	}
2160 
2161 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2162 
2163 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2164 	if (!t->sysctl_header)
2165 		goto free;
2166 
2167 	p->sysctl = t;
2168 	return 0;
2169 
2170 free:
2171 	kfree(t);
2172 out:
2173 	return -ENOBUFS;
2174 }
2175 
2176 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2177 {
2178 	struct devinet_sysctl_table *t = cnf->sysctl;
2179 
2180 	if (t == NULL)
2181 		return;
2182 
2183 	cnf->sysctl = NULL;
2184 	unregister_net_sysctl_table(t->sysctl_header);
2185 	kfree(t);
2186 }
2187 
2188 static void devinet_sysctl_register(struct in_device *idev)
2189 {
2190 	neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2191 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2192 					&idev->cnf);
2193 }
2194 
2195 static void devinet_sysctl_unregister(struct in_device *idev)
2196 {
2197 	__devinet_sysctl_unregister(&idev->cnf);
2198 	neigh_sysctl_unregister(idev->arp_parms);
2199 }
2200 
2201 static struct ctl_table ctl_forward_entry[] = {
2202 	{
2203 		.procname	= "ip_forward",
2204 		.data		= &ipv4_devconf.data[
2205 					IPV4_DEVCONF_FORWARDING - 1],
2206 		.maxlen		= sizeof(int),
2207 		.mode		= 0644,
2208 		.proc_handler	= devinet_sysctl_forward,
2209 		.extra1		= &ipv4_devconf,
2210 		.extra2		= &init_net,
2211 	},
2212 	{ },
2213 };
2214 #endif
2215 
2216 static __net_init int devinet_init_net(struct net *net)
2217 {
2218 	int err;
2219 	struct ipv4_devconf *all, *dflt;
2220 #ifdef CONFIG_SYSCTL
2221 	struct ctl_table *tbl = ctl_forward_entry;
2222 	struct ctl_table_header *forw_hdr;
2223 #endif
2224 
2225 	err = -ENOMEM;
2226 	all = &ipv4_devconf;
2227 	dflt = &ipv4_devconf_dflt;
2228 
2229 	if (!net_eq(net, &init_net)) {
2230 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2231 		if (all == NULL)
2232 			goto err_alloc_all;
2233 
2234 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2235 		if (dflt == NULL)
2236 			goto err_alloc_dflt;
2237 
2238 #ifdef CONFIG_SYSCTL
2239 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2240 		if (tbl == NULL)
2241 			goto err_alloc_ctl;
2242 
2243 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2244 		tbl[0].extra1 = all;
2245 		tbl[0].extra2 = net;
2246 #endif
2247 	}
2248 
2249 #ifdef CONFIG_SYSCTL
2250 	err = __devinet_sysctl_register(net, "all", all);
2251 	if (err < 0)
2252 		goto err_reg_all;
2253 
2254 	err = __devinet_sysctl_register(net, "default", dflt);
2255 	if (err < 0)
2256 		goto err_reg_dflt;
2257 
2258 	err = -ENOMEM;
2259 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2260 	if (forw_hdr == NULL)
2261 		goto err_reg_ctl;
2262 	net->ipv4.forw_hdr = forw_hdr;
2263 #endif
2264 
2265 	net->ipv4.devconf_all = all;
2266 	net->ipv4.devconf_dflt = dflt;
2267 	return 0;
2268 
2269 #ifdef CONFIG_SYSCTL
2270 err_reg_ctl:
2271 	__devinet_sysctl_unregister(dflt);
2272 err_reg_dflt:
2273 	__devinet_sysctl_unregister(all);
2274 err_reg_all:
2275 	if (tbl != ctl_forward_entry)
2276 		kfree(tbl);
2277 err_alloc_ctl:
2278 #endif
2279 	if (dflt != &ipv4_devconf_dflt)
2280 		kfree(dflt);
2281 err_alloc_dflt:
2282 	if (all != &ipv4_devconf)
2283 		kfree(all);
2284 err_alloc_all:
2285 	return err;
2286 }
2287 
2288 static __net_exit void devinet_exit_net(struct net *net)
2289 {
2290 #ifdef CONFIG_SYSCTL
2291 	struct ctl_table *tbl;
2292 
2293 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2294 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2295 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2296 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2297 	kfree(tbl);
2298 #endif
2299 	kfree(net->ipv4.devconf_dflt);
2300 	kfree(net->ipv4.devconf_all);
2301 }
2302 
2303 static __net_initdata struct pernet_operations devinet_ops = {
2304 	.init = devinet_init_net,
2305 	.exit = devinet_exit_net,
2306 };
2307 
2308 static struct rtnl_af_ops inet_af_ops = {
2309 	.family		  = AF_INET,
2310 	.fill_link_af	  = inet_fill_link_af,
2311 	.get_link_af_size = inet_get_link_af_size,
2312 	.validate_link_af = inet_validate_link_af,
2313 	.set_link_af	  = inet_set_link_af,
2314 };
2315 
2316 void __init devinet_init(void)
2317 {
2318 	int i;
2319 
2320 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2321 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2322 
2323 	register_pernet_subsys(&devinet_ops);
2324 
2325 	register_gifconf(PF_INET, inet_gifconf);
2326 	register_netdevice_notifier(&ip_netdev_notifier);
2327 
2328 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2329 
2330 	rtnl_af_register(&inet_af_ops);
2331 
2332 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2333 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2334 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2335 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2336 		      inet_netconf_dump_devconf, NULL);
2337 }
2338 
2339