xref: /openbmc/linux/net/ipv4/devinet.c (revision 634676c2)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 	},
79 };
80 
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82 	.data = {
83 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90 	},
91 };
92 
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95 
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 	[IFA_LOCAL]     	= { .type = NLA_U32 },
98 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
99 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
100 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
102 	[IFA_FLAGS]		= { .type = NLA_U32 },
103 };
104 
105 #define IN4_ADDR_HSIZE_SHIFT	8
106 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
107 
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109 static DEFINE_SPINLOCK(inet_addr_hash_lock);
110 
111 static u32 inet_addr_hash(struct net *net, __be32 addr)
112 {
113 	u32 val = (__force u32) addr ^ net_hash_mix(net);
114 
115 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
116 }
117 
118 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119 {
120 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
121 
122 	spin_lock(&inet_addr_hash_lock);
123 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
124 	spin_unlock(&inet_addr_hash_lock);
125 }
126 
127 static void inet_hash_remove(struct in_ifaddr *ifa)
128 {
129 	spin_lock(&inet_addr_hash_lock);
130 	hlist_del_init_rcu(&ifa->hash);
131 	spin_unlock(&inet_addr_hash_lock);
132 }
133 
134 /**
135  * __ip_dev_find - find the first device with a given source address.
136  * @net: the net namespace
137  * @addr: the source address
138  * @devref: if true, take a reference on the found device
139  *
140  * If a caller uses devref=false, it should be protected by RCU, or RTNL
141  */
142 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
143 {
144 	u32 hash = inet_addr_hash(net, addr);
145 	struct net_device *result = NULL;
146 	struct in_ifaddr *ifa;
147 
148 	rcu_read_lock();
149 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
150 		if (ifa->ifa_local == addr) {
151 			struct net_device *dev = ifa->ifa_dev->dev;
152 
153 			if (!net_eq(dev_net(dev), net))
154 				continue;
155 			result = dev;
156 			break;
157 		}
158 	}
159 	if (!result) {
160 		struct flowi4 fl4 = { .daddr = addr };
161 		struct fib_result res = { 0 };
162 		struct fib_table *local;
163 
164 		/* Fallback to FIB local table so that communication
165 		 * over loopback subnets work.
166 		 */
167 		local = fib_get_table(net, RT_TABLE_LOCAL);
168 		if (local &&
169 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
170 		    res.type == RTN_LOCAL)
171 			result = FIB_RES_DEV(res);
172 	}
173 	if (result && devref)
174 		dev_hold(result);
175 	rcu_read_unlock();
176 	return result;
177 }
178 EXPORT_SYMBOL(__ip_dev_find);
179 
180 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
181 
182 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
183 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
184 			 int destroy);
185 #ifdef CONFIG_SYSCTL
186 static void devinet_sysctl_register(struct in_device *idev);
187 static void devinet_sysctl_unregister(struct in_device *idev);
188 #else
189 static void devinet_sysctl_register(struct in_device *idev)
190 {
191 }
192 static void devinet_sysctl_unregister(struct in_device *idev)
193 {
194 }
195 #endif
196 
197 /* Locks all the inet devices. */
198 
199 static struct in_ifaddr *inet_alloc_ifa(void)
200 {
201 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
202 }
203 
204 static void inet_rcu_free_ifa(struct rcu_head *head)
205 {
206 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
207 	if (ifa->ifa_dev)
208 		in_dev_put(ifa->ifa_dev);
209 	kfree(ifa);
210 }
211 
212 static void inet_free_ifa(struct in_ifaddr *ifa)
213 {
214 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
215 }
216 
217 void in_dev_finish_destroy(struct in_device *idev)
218 {
219 	struct net_device *dev = idev->dev;
220 
221 	WARN_ON(idev->ifa_list);
222 	WARN_ON(idev->mc_list);
223 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
224 #ifdef NET_REFCNT_DEBUG
225 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
226 #endif
227 	dev_put(dev);
228 	if (!idev->dead)
229 		pr_err("Freeing alive in_device %p\n", idev);
230 	else
231 		kfree(idev);
232 }
233 EXPORT_SYMBOL(in_dev_finish_destroy);
234 
235 static struct in_device *inetdev_init(struct net_device *dev)
236 {
237 	struct in_device *in_dev;
238 
239 	ASSERT_RTNL();
240 
241 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
242 	if (!in_dev)
243 		goto out;
244 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
245 			sizeof(in_dev->cnf));
246 	in_dev->cnf.sysctl = NULL;
247 	in_dev->dev = dev;
248 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
249 	if (!in_dev->arp_parms)
250 		goto out_kfree;
251 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
252 		dev_disable_lro(dev);
253 	/* Reference in_dev->dev */
254 	dev_hold(dev);
255 	/* Account for reference dev->ip_ptr (below) */
256 	in_dev_hold(in_dev);
257 
258 	devinet_sysctl_register(in_dev);
259 	ip_mc_init_dev(in_dev);
260 	if (dev->flags & IFF_UP)
261 		ip_mc_up(in_dev);
262 
263 	/* we can receive as soon as ip_ptr is set -- do this last */
264 	rcu_assign_pointer(dev->ip_ptr, in_dev);
265 out:
266 	return in_dev;
267 out_kfree:
268 	kfree(in_dev);
269 	in_dev = NULL;
270 	goto out;
271 }
272 
273 static void in_dev_rcu_put(struct rcu_head *head)
274 {
275 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
276 	in_dev_put(idev);
277 }
278 
279 static void inetdev_destroy(struct in_device *in_dev)
280 {
281 	struct in_ifaddr *ifa;
282 	struct net_device *dev;
283 
284 	ASSERT_RTNL();
285 
286 	dev = in_dev->dev;
287 
288 	in_dev->dead = 1;
289 
290 	ip_mc_destroy_dev(in_dev);
291 
292 	while ((ifa = in_dev->ifa_list) != NULL) {
293 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
294 		inet_free_ifa(ifa);
295 	}
296 
297 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
298 
299 	devinet_sysctl_unregister(in_dev);
300 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
301 	arp_ifdown(dev);
302 
303 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
304 }
305 
306 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
307 {
308 	rcu_read_lock();
309 	for_primary_ifa(in_dev) {
310 		if (inet_ifa_match(a, ifa)) {
311 			if (!b || inet_ifa_match(b, ifa)) {
312 				rcu_read_unlock();
313 				return 1;
314 			}
315 		}
316 	} endfor_ifa(in_dev);
317 	rcu_read_unlock();
318 	return 0;
319 }
320 
321 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
322 			 int destroy, struct nlmsghdr *nlh, u32 portid)
323 {
324 	struct in_ifaddr *promote = NULL;
325 	struct in_ifaddr *ifa, *ifa1 = *ifap;
326 	struct in_ifaddr *last_prim = in_dev->ifa_list;
327 	struct in_ifaddr *prev_prom = NULL;
328 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
329 
330 	ASSERT_RTNL();
331 
332 	/* 1. Deleting primary ifaddr forces deletion all secondaries
333 	 * unless alias promotion is set
334 	 **/
335 
336 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
337 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
338 
339 		while ((ifa = *ifap1) != NULL) {
340 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
341 			    ifa1->ifa_scope <= ifa->ifa_scope)
342 				last_prim = ifa;
343 
344 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
345 			    ifa1->ifa_mask != ifa->ifa_mask ||
346 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
347 				ifap1 = &ifa->ifa_next;
348 				prev_prom = ifa;
349 				continue;
350 			}
351 
352 			if (!do_promote) {
353 				inet_hash_remove(ifa);
354 				*ifap1 = ifa->ifa_next;
355 
356 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
357 				blocking_notifier_call_chain(&inetaddr_chain,
358 						NETDEV_DOWN, ifa);
359 				inet_free_ifa(ifa);
360 			} else {
361 				promote = ifa;
362 				break;
363 			}
364 		}
365 	}
366 
367 	/* On promotion all secondaries from subnet are changing
368 	 * the primary IP, we must remove all their routes silently
369 	 * and later to add them back with new prefsrc. Do this
370 	 * while all addresses are on the device list.
371 	 */
372 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
373 		if (ifa1->ifa_mask == ifa->ifa_mask &&
374 		    inet_ifa_match(ifa1->ifa_address, ifa))
375 			fib_del_ifaddr(ifa, ifa1);
376 	}
377 
378 	/* 2. Unlink it */
379 
380 	*ifap = ifa1->ifa_next;
381 	inet_hash_remove(ifa1);
382 
383 	/* 3. Announce address deletion */
384 
385 	/* Send message first, then call notifier.
386 	   At first sight, FIB update triggered by notifier
387 	   will refer to already deleted ifaddr, that could confuse
388 	   netlink listeners. It is not true: look, gated sees
389 	   that route deleted and if it still thinks that ifaddr
390 	   is valid, it will try to restore deleted routes... Grr.
391 	   So that, this order is correct.
392 	 */
393 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
394 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
395 
396 	if (promote) {
397 		struct in_ifaddr *next_sec = promote->ifa_next;
398 
399 		if (prev_prom) {
400 			prev_prom->ifa_next = promote->ifa_next;
401 			promote->ifa_next = last_prim->ifa_next;
402 			last_prim->ifa_next = promote;
403 		}
404 
405 		promote->ifa_flags &= ~IFA_F_SECONDARY;
406 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
407 		blocking_notifier_call_chain(&inetaddr_chain,
408 				NETDEV_UP, promote);
409 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
410 			if (ifa1->ifa_mask != ifa->ifa_mask ||
411 			    !inet_ifa_match(ifa1->ifa_address, ifa))
412 					continue;
413 			fib_add_ifaddr(ifa);
414 		}
415 
416 	}
417 	if (destroy)
418 		inet_free_ifa(ifa1);
419 }
420 
421 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
422 			 int destroy)
423 {
424 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
425 }
426 
427 static void check_lifetime(struct work_struct *work);
428 
429 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
430 
431 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
432 			     u32 portid)
433 {
434 	struct in_device *in_dev = ifa->ifa_dev;
435 	struct in_ifaddr *ifa1, **ifap, **last_primary;
436 
437 	ASSERT_RTNL();
438 
439 	if (!ifa->ifa_local) {
440 		inet_free_ifa(ifa);
441 		return 0;
442 	}
443 
444 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
445 	last_primary = &in_dev->ifa_list;
446 
447 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
448 	     ifap = &ifa1->ifa_next) {
449 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
450 		    ifa->ifa_scope <= ifa1->ifa_scope)
451 			last_primary = &ifa1->ifa_next;
452 		if (ifa1->ifa_mask == ifa->ifa_mask &&
453 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
454 			if (ifa1->ifa_local == ifa->ifa_local) {
455 				inet_free_ifa(ifa);
456 				return -EEXIST;
457 			}
458 			if (ifa1->ifa_scope != ifa->ifa_scope) {
459 				inet_free_ifa(ifa);
460 				return -EINVAL;
461 			}
462 			ifa->ifa_flags |= IFA_F_SECONDARY;
463 		}
464 	}
465 
466 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
467 		prandom_seed((__force u32) ifa->ifa_local);
468 		ifap = last_primary;
469 	}
470 
471 	ifa->ifa_next = *ifap;
472 	*ifap = ifa;
473 
474 	inet_hash_insert(dev_net(in_dev->dev), ifa);
475 
476 	cancel_delayed_work(&check_lifetime_work);
477 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
478 
479 	/* Send message first, then call notifier.
480 	   Notifier will trigger FIB update, so that
481 	   listeners of netlink will know about new ifaddr */
482 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
483 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
484 
485 	return 0;
486 }
487 
488 static int inet_insert_ifa(struct in_ifaddr *ifa)
489 {
490 	return __inet_insert_ifa(ifa, NULL, 0);
491 }
492 
493 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
494 {
495 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
496 
497 	ASSERT_RTNL();
498 
499 	if (!in_dev) {
500 		inet_free_ifa(ifa);
501 		return -ENOBUFS;
502 	}
503 	ipv4_devconf_setall(in_dev);
504 	neigh_parms_data_state_setall(in_dev->arp_parms);
505 	if (ifa->ifa_dev != in_dev) {
506 		WARN_ON(ifa->ifa_dev);
507 		in_dev_hold(in_dev);
508 		ifa->ifa_dev = in_dev;
509 	}
510 	if (ipv4_is_loopback(ifa->ifa_local))
511 		ifa->ifa_scope = RT_SCOPE_HOST;
512 	return inet_insert_ifa(ifa);
513 }
514 
515 /* Caller must hold RCU or RTNL :
516  * We dont take a reference on found in_device
517  */
518 struct in_device *inetdev_by_index(struct net *net, int ifindex)
519 {
520 	struct net_device *dev;
521 	struct in_device *in_dev = NULL;
522 
523 	rcu_read_lock();
524 	dev = dev_get_by_index_rcu(net, ifindex);
525 	if (dev)
526 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
527 	rcu_read_unlock();
528 	return in_dev;
529 }
530 EXPORT_SYMBOL(inetdev_by_index);
531 
532 /* Called only from RTNL semaphored context. No locks. */
533 
534 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
535 				    __be32 mask)
536 {
537 	ASSERT_RTNL();
538 
539 	for_primary_ifa(in_dev) {
540 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
541 			return ifa;
542 	} endfor_ifa(in_dev);
543 	return NULL;
544 }
545 
546 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
547 {
548 	struct net *net = sock_net(skb->sk);
549 	struct nlattr *tb[IFA_MAX+1];
550 	struct in_device *in_dev;
551 	struct ifaddrmsg *ifm;
552 	struct in_ifaddr *ifa, **ifap;
553 	int err = -EINVAL;
554 
555 	ASSERT_RTNL();
556 
557 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
558 	if (err < 0)
559 		goto errout;
560 
561 	ifm = nlmsg_data(nlh);
562 	in_dev = inetdev_by_index(net, ifm->ifa_index);
563 	if (in_dev == NULL) {
564 		err = -ENODEV;
565 		goto errout;
566 	}
567 
568 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
569 	     ifap = &ifa->ifa_next) {
570 		if (tb[IFA_LOCAL] &&
571 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
572 			continue;
573 
574 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
575 			continue;
576 
577 		if (tb[IFA_ADDRESS] &&
578 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
579 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
580 			continue;
581 
582 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
583 		return 0;
584 	}
585 
586 	err = -EADDRNOTAVAIL;
587 errout:
588 	return err;
589 }
590 
591 #define INFINITY_LIFE_TIME	0xFFFFFFFF
592 
593 static void check_lifetime(struct work_struct *work)
594 {
595 	unsigned long now, next, next_sec, next_sched;
596 	struct in_ifaddr *ifa;
597 	struct hlist_node *n;
598 	int i;
599 
600 	now = jiffies;
601 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
602 
603 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
604 		bool change_needed = false;
605 
606 		rcu_read_lock();
607 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
608 			unsigned long age;
609 
610 			if (ifa->ifa_flags & IFA_F_PERMANENT)
611 				continue;
612 
613 			/* We try to batch several events at once. */
614 			age = (now - ifa->ifa_tstamp +
615 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
616 
617 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
618 			    age >= ifa->ifa_valid_lft) {
619 				change_needed = true;
620 			} else if (ifa->ifa_preferred_lft ==
621 				   INFINITY_LIFE_TIME) {
622 				continue;
623 			} else if (age >= ifa->ifa_preferred_lft) {
624 				if (time_before(ifa->ifa_tstamp +
625 						ifa->ifa_valid_lft * HZ, next))
626 					next = ifa->ifa_tstamp +
627 					       ifa->ifa_valid_lft * HZ;
628 
629 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
630 					change_needed = true;
631 			} else if (time_before(ifa->ifa_tstamp +
632 					       ifa->ifa_preferred_lft * HZ,
633 					       next)) {
634 				next = ifa->ifa_tstamp +
635 				       ifa->ifa_preferred_lft * HZ;
636 			}
637 		}
638 		rcu_read_unlock();
639 		if (!change_needed)
640 			continue;
641 		rtnl_lock();
642 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
643 			unsigned long age;
644 
645 			if (ifa->ifa_flags & IFA_F_PERMANENT)
646 				continue;
647 
648 			/* We try to batch several events at once. */
649 			age = (now - ifa->ifa_tstamp +
650 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
651 
652 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
653 			    age >= ifa->ifa_valid_lft) {
654 				struct in_ifaddr **ifap;
655 
656 				for (ifap = &ifa->ifa_dev->ifa_list;
657 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
658 					if (*ifap == ifa) {
659 						inet_del_ifa(ifa->ifa_dev,
660 							     ifap, 1);
661 						break;
662 					}
663 				}
664 			} else if (ifa->ifa_preferred_lft !=
665 				   INFINITY_LIFE_TIME &&
666 				   age >= ifa->ifa_preferred_lft &&
667 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
668 				ifa->ifa_flags |= IFA_F_DEPRECATED;
669 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
670 			}
671 		}
672 		rtnl_unlock();
673 	}
674 
675 	next_sec = round_jiffies_up(next);
676 	next_sched = next;
677 
678 	/* If rounded timeout is accurate enough, accept it. */
679 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
680 		next_sched = next_sec;
681 
682 	now = jiffies;
683 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
684 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
685 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
686 
687 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
688 			next_sched - now);
689 }
690 
691 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
692 			     __u32 prefered_lft)
693 {
694 	unsigned long timeout;
695 
696 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
697 
698 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
699 	if (addrconf_finite_timeout(timeout))
700 		ifa->ifa_valid_lft = timeout;
701 	else
702 		ifa->ifa_flags |= IFA_F_PERMANENT;
703 
704 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
705 	if (addrconf_finite_timeout(timeout)) {
706 		if (timeout == 0)
707 			ifa->ifa_flags |= IFA_F_DEPRECATED;
708 		ifa->ifa_preferred_lft = timeout;
709 	}
710 	ifa->ifa_tstamp = jiffies;
711 	if (!ifa->ifa_cstamp)
712 		ifa->ifa_cstamp = ifa->ifa_tstamp;
713 }
714 
715 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
716 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
717 {
718 	struct nlattr *tb[IFA_MAX+1];
719 	struct in_ifaddr *ifa;
720 	struct ifaddrmsg *ifm;
721 	struct net_device *dev;
722 	struct in_device *in_dev;
723 	int err;
724 
725 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
726 	if (err < 0)
727 		goto errout;
728 
729 	ifm = nlmsg_data(nlh);
730 	err = -EINVAL;
731 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
732 		goto errout;
733 
734 	dev = __dev_get_by_index(net, ifm->ifa_index);
735 	err = -ENODEV;
736 	if (dev == NULL)
737 		goto errout;
738 
739 	in_dev = __in_dev_get_rtnl(dev);
740 	err = -ENOBUFS;
741 	if (in_dev == NULL)
742 		goto errout;
743 
744 	ifa = inet_alloc_ifa();
745 	if (ifa == NULL)
746 		/*
747 		 * A potential indev allocation can be left alive, it stays
748 		 * assigned to its device and is destroy with it.
749 		 */
750 		goto errout;
751 
752 	ipv4_devconf_setall(in_dev);
753 	neigh_parms_data_state_setall(in_dev->arp_parms);
754 	in_dev_hold(in_dev);
755 
756 	if (tb[IFA_ADDRESS] == NULL)
757 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
758 
759 	INIT_HLIST_NODE(&ifa->hash);
760 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
761 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
762 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
763 					 ifm->ifa_flags;
764 	ifa->ifa_scope = ifm->ifa_scope;
765 	ifa->ifa_dev = in_dev;
766 
767 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
768 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
769 
770 	if (tb[IFA_BROADCAST])
771 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
772 
773 	if (tb[IFA_LABEL])
774 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
775 	else
776 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
777 
778 	if (tb[IFA_CACHEINFO]) {
779 		struct ifa_cacheinfo *ci;
780 
781 		ci = nla_data(tb[IFA_CACHEINFO]);
782 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
783 			err = -EINVAL;
784 			goto errout_free;
785 		}
786 		*pvalid_lft = ci->ifa_valid;
787 		*pprefered_lft = ci->ifa_prefered;
788 	}
789 
790 	return ifa;
791 
792 errout_free:
793 	inet_free_ifa(ifa);
794 errout:
795 	return ERR_PTR(err);
796 }
797 
798 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
799 {
800 	struct in_device *in_dev = ifa->ifa_dev;
801 	struct in_ifaddr *ifa1, **ifap;
802 
803 	if (!ifa->ifa_local)
804 		return NULL;
805 
806 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
807 	     ifap = &ifa1->ifa_next) {
808 		if (ifa1->ifa_mask == ifa->ifa_mask &&
809 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
810 		    ifa1->ifa_local == ifa->ifa_local)
811 			return ifa1;
812 	}
813 	return NULL;
814 }
815 
816 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
817 {
818 	struct net *net = sock_net(skb->sk);
819 	struct in_ifaddr *ifa;
820 	struct in_ifaddr *ifa_existing;
821 	__u32 valid_lft = INFINITY_LIFE_TIME;
822 	__u32 prefered_lft = INFINITY_LIFE_TIME;
823 
824 	ASSERT_RTNL();
825 
826 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
827 	if (IS_ERR(ifa))
828 		return PTR_ERR(ifa);
829 
830 	ifa_existing = find_matching_ifa(ifa);
831 	if (!ifa_existing) {
832 		/* It would be best to check for !NLM_F_CREATE here but
833 		 * userspace alreay relies on not having to provide this.
834 		 */
835 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
836 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
837 	} else {
838 		inet_free_ifa(ifa);
839 
840 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
841 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
842 			return -EEXIST;
843 		ifa = ifa_existing;
844 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
845 		cancel_delayed_work(&check_lifetime_work);
846 		queue_delayed_work(system_power_efficient_wq,
847 				&check_lifetime_work, 0);
848 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
849 		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
850 	}
851 	return 0;
852 }
853 
854 /*
855  *	Determine a default network mask, based on the IP address.
856  */
857 
858 static int inet_abc_len(__be32 addr)
859 {
860 	int rc = -1;	/* Something else, probably a multicast. */
861 
862 	if (ipv4_is_zeronet(addr))
863 		rc = 0;
864 	else {
865 		__u32 haddr = ntohl(addr);
866 
867 		if (IN_CLASSA(haddr))
868 			rc = 8;
869 		else if (IN_CLASSB(haddr))
870 			rc = 16;
871 		else if (IN_CLASSC(haddr))
872 			rc = 24;
873 	}
874 
875 	return rc;
876 }
877 
878 
879 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
880 {
881 	struct ifreq ifr;
882 	struct sockaddr_in sin_orig;
883 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
884 	struct in_device *in_dev;
885 	struct in_ifaddr **ifap = NULL;
886 	struct in_ifaddr *ifa = NULL;
887 	struct net_device *dev;
888 	char *colon;
889 	int ret = -EFAULT;
890 	int tryaddrmatch = 0;
891 
892 	/*
893 	 *	Fetch the caller's info block into kernel space
894 	 */
895 
896 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
897 		goto out;
898 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
899 
900 	/* save original address for comparison */
901 	memcpy(&sin_orig, sin, sizeof(*sin));
902 
903 	colon = strchr(ifr.ifr_name, ':');
904 	if (colon)
905 		*colon = 0;
906 
907 	dev_load(net, ifr.ifr_name);
908 
909 	switch (cmd) {
910 	case SIOCGIFADDR:	/* Get interface address */
911 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
912 	case SIOCGIFDSTADDR:	/* Get the destination address */
913 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
914 		/* Note that these ioctls will not sleep,
915 		   so that we do not impose a lock.
916 		   One day we will be forced to put shlock here (I mean SMP)
917 		 */
918 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
919 		memset(sin, 0, sizeof(*sin));
920 		sin->sin_family = AF_INET;
921 		break;
922 
923 	case SIOCSIFFLAGS:
924 		ret = -EPERM;
925 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
926 			goto out;
927 		break;
928 	case SIOCSIFADDR:	/* Set interface address (and family) */
929 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
930 	case SIOCSIFDSTADDR:	/* Set the destination address */
931 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
932 		ret = -EPERM;
933 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
934 			goto out;
935 		ret = -EINVAL;
936 		if (sin->sin_family != AF_INET)
937 			goto out;
938 		break;
939 	default:
940 		ret = -EINVAL;
941 		goto out;
942 	}
943 
944 	rtnl_lock();
945 
946 	ret = -ENODEV;
947 	dev = __dev_get_by_name(net, ifr.ifr_name);
948 	if (!dev)
949 		goto done;
950 
951 	if (colon)
952 		*colon = ':';
953 
954 	in_dev = __in_dev_get_rtnl(dev);
955 	if (in_dev) {
956 		if (tryaddrmatch) {
957 			/* Matthias Andree */
958 			/* compare label and address (4.4BSD style) */
959 			/* note: we only do this for a limited set of ioctls
960 			   and only if the original address family was AF_INET.
961 			   This is checked above. */
962 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
963 			     ifap = &ifa->ifa_next) {
964 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
965 				    sin_orig.sin_addr.s_addr ==
966 							ifa->ifa_local) {
967 					break; /* found */
968 				}
969 			}
970 		}
971 		/* we didn't get a match, maybe the application is
972 		   4.3BSD-style and passed in junk so we fall back to
973 		   comparing just the label */
974 		if (!ifa) {
975 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
976 			     ifap = &ifa->ifa_next)
977 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
978 					break;
979 		}
980 	}
981 
982 	ret = -EADDRNOTAVAIL;
983 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
984 		goto done;
985 
986 	switch (cmd) {
987 	case SIOCGIFADDR:	/* Get interface address */
988 		sin->sin_addr.s_addr = ifa->ifa_local;
989 		goto rarok;
990 
991 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
992 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
993 		goto rarok;
994 
995 	case SIOCGIFDSTADDR:	/* Get the destination address */
996 		sin->sin_addr.s_addr = ifa->ifa_address;
997 		goto rarok;
998 
999 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1000 		sin->sin_addr.s_addr = ifa->ifa_mask;
1001 		goto rarok;
1002 
1003 	case SIOCSIFFLAGS:
1004 		if (colon) {
1005 			ret = -EADDRNOTAVAIL;
1006 			if (!ifa)
1007 				break;
1008 			ret = 0;
1009 			if (!(ifr.ifr_flags & IFF_UP))
1010 				inet_del_ifa(in_dev, ifap, 1);
1011 			break;
1012 		}
1013 		ret = dev_change_flags(dev, ifr.ifr_flags);
1014 		break;
1015 
1016 	case SIOCSIFADDR:	/* Set interface address (and family) */
1017 		ret = -EINVAL;
1018 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1019 			break;
1020 
1021 		if (!ifa) {
1022 			ret = -ENOBUFS;
1023 			ifa = inet_alloc_ifa();
1024 			if (!ifa)
1025 				break;
1026 			INIT_HLIST_NODE(&ifa->hash);
1027 			if (colon)
1028 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1029 			else
1030 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1031 		} else {
1032 			ret = 0;
1033 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1034 				break;
1035 			inet_del_ifa(in_dev, ifap, 0);
1036 			ifa->ifa_broadcast = 0;
1037 			ifa->ifa_scope = 0;
1038 		}
1039 
1040 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1041 
1042 		if (!(dev->flags & IFF_POINTOPOINT)) {
1043 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1044 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1045 			if ((dev->flags & IFF_BROADCAST) &&
1046 			    ifa->ifa_prefixlen < 31)
1047 				ifa->ifa_broadcast = ifa->ifa_address |
1048 						     ~ifa->ifa_mask;
1049 		} else {
1050 			ifa->ifa_prefixlen = 32;
1051 			ifa->ifa_mask = inet_make_mask(32);
1052 		}
1053 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1054 		ret = inet_set_ifa(dev, ifa);
1055 		break;
1056 
1057 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1058 		ret = 0;
1059 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1060 			inet_del_ifa(in_dev, ifap, 0);
1061 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1062 			inet_insert_ifa(ifa);
1063 		}
1064 		break;
1065 
1066 	case SIOCSIFDSTADDR:	/* Set the destination address */
1067 		ret = 0;
1068 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1069 			break;
1070 		ret = -EINVAL;
1071 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1072 			break;
1073 		ret = 0;
1074 		inet_del_ifa(in_dev, ifap, 0);
1075 		ifa->ifa_address = sin->sin_addr.s_addr;
1076 		inet_insert_ifa(ifa);
1077 		break;
1078 
1079 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1080 
1081 		/*
1082 		 *	The mask we set must be legal.
1083 		 */
1084 		ret = -EINVAL;
1085 		if (bad_mask(sin->sin_addr.s_addr, 0))
1086 			break;
1087 		ret = 0;
1088 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1089 			__be32 old_mask = ifa->ifa_mask;
1090 			inet_del_ifa(in_dev, ifap, 0);
1091 			ifa->ifa_mask = sin->sin_addr.s_addr;
1092 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1093 
1094 			/* See if current broadcast address matches
1095 			 * with current netmask, then recalculate
1096 			 * the broadcast address. Otherwise it's a
1097 			 * funny address, so don't touch it since
1098 			 * the user seems to know what (s)he's doing...
1099 			 */
1100 			if ((dev->flags & IFF_BROADCAST) &&
1101 			    (ifa->ifa_prefixlen < 31) &&
1102 			    (ifa->ifa_broadcast ==
1103 			     (ifa->ifa_local|~old_mask))) {
1104 				ifa->ifa_broadcast = (ifa->ifa_local |
1105 						      ~sin->sin_addr.s_addr);
1106 			}
1107 			inet_insert_ifa(ifa);
1108 		}
1109 		break;
1110 	}
1111 done:
1112 	rtnl_unlock();
1113 out:
1114 	return ret;
1115 rarok:
1116 	rtnl_unlock();
1117 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1118 	goto out;
1119 }
1120 
1121 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1122 {
1123 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1124 	struct in_ifaddr *ifa;
1125 	struct ifreq ifr;
1126 	int done = 0;
1127 
1128 	if (!in_dev)
1129 		goto out;
1130 
1131 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1132 		if (!buf) {
1133 			done += sizeof(ifr);
1134 			continue;
1135 		}
1136 		if (len < (int) sizeof(ifr))
1137 			break;
1138 		memset(&ifr, 0, sizeof(struct ifreq));
1139 		strcpy(ifr.ifr_name, ifa->ifa_label);
1140 
1141 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1142 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1143 								ifa->ifa_local;
1144 
1145 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1146 			done = -EFAULT;
1147 			break;
1148 		}
1149 		buf  += sizeof(struct ifreq);
1150 		len  -= sizeof(struct ifreq);
1151 		done += sizeof(struct ifreq);
1152 	}
1153 out:
1154 	return done;
1155 }
1156 
1157 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1158 {
1159 	__be32 addr = 0;
1160 	struct in_device *in_dev;
1161 	struct net *net = dev_net(dev);
1162 
1163 	rcu_read_lock();
1164 	in_dev = __in_dev_get_rcu(dev);
1165 	if (!in_dev)
1166 		goto no_in_dev;
1167 
1168 	for_primary_ifa(in_dev) {
1169 		if (ifa->ifa_scope > scope)
1170 			continue;
1171 		if (!dst || inet_ifa_match(dst, ifa)) {
1172 			addr = ifa->ifa_local;
1173 			break;
1174 		}
1175 		if (!addr)
1176 			addr = ifa->ifa_local;
1177 	} endfor_ifa(in_dev);
1178 
1179 	if (addr)
1180 		goto out_unlock;
1181 no_in_dev:
1182 
1183 	/* Not loopback addresses on loopback should be preferred
1184 	   in this case. It is importnat that lo is the first interface
1185 	   in dev_base list.
1186 	 */
1187 	for_each_netdev_rcu(net, dev) {
1188 		in_dev = __in_dev_get_rcu(dev);
1189 		if (!in_dev)
1190 			continue;
1191 
1192 		for_primary_ifa(in_dev) {
1193 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1194 			    ifa->ifa_scope <= scope) {
1195 				addr = ifa->ifa_local;
1196 				goto out_unlock;
1197 			}
1198 		} endfor_ifa(in_dev);
1199 	}
1200 out_unlock:
1201 	rcu_read_unlock();
1202 	return addr;
1203 }
1204 EXPORT_SYMBOL(inet_select_addr);
1205 
1206 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1207 			      __be32 local, int scope)
1208 {
1209 	int same = 0;
1210 	__be32 addr = 0;
1211 
1212 	for_ifa(in_dev) {
1213 		if (!addr &&
1214 		    (local == ifa->ifa_local || !local) &&
1215 		    ifa->ifa_scope <= scope) {
1216 			addr = ifa->ifa_local;
1217 			if (same)
1218 				break;
1219 		}
1220 		if (!same) {
1221 			same = (!local || inet_ifa_match(local, ifa)) &&
1222 				(!dst || inet_ifa_match(dst, ifa));
1223 			if (same && addr) {
1224 				if (local || !dst)
1225 					break;
1226 				/* Is the selected addr into dst subnet? */
1227 				if (inet_ifa_match(addr, ifa))
1228 					break;
1229 				/* No, then can we use new local src? */
1230 				if (ifa->ifa_scope <= scope) {
1231 					addr = ifa->ifa_local;
1232 					break;
1233 				}
1234 				/* search for large dst subnet for addr */
1235 				same = 0;
1236 			}
1237 		}
1238 	} endfor_ifa(in_dev);
1239 
1240 	return same ? addr : 0;
1241 }
1242 
1243 /*
1244  * Confirm that local IP address exists using wildcards:
1245  * - net: netns to check, cannot be NULL
1246  * - in_dev: only on this interface, NULL=any interface
1247  * - dst: only in the same subnet as dst, 0=any dst
1248  * - local: address, 0=autoselect the local address
1249  * - scope: maximum allowed scope value for the local address
1250  */
1251 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1252 			 __be32 dst, __be32 local, int scope)
1253 {
1254 	__be32 addr = 0;
1255 	struct net_device *dev;
1256 
1257 	if (in_dev != NULL)
1258 		return confirm_addr_indev(in_dev, dst, local, scope);
1259 
1260 	rcu_read_lock();
1261 	for_each_netdev_rcu(net, dev) {
1262 		in_dev = __in_dev_get_rcu(dev);
1263 		if (in_dev) {
1264 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1265 			if (addr)
1266 				break;
1267 		}
1268 	}
1269 	rcu_read_unlock();
1270 
1271 	return addr;
1272 }
1273 EXPORT_SYMBOL(inet_confirm_addr);
1274 
1275 /*
1276  *	Device notifier
1277  */
1278 
1279 int register_inetaddr_notifier(struct notifier_block *nb)
1280 {
1281 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1282 }
1283 EXPORT_SYMBOL(register_inetaddr_notifier);
1284 
1285 int unregister_inetaddr_notifier(struct notifier_block *nb)
1286 {
1287 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1288 }
1289 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1290 
1291 /* Rename ifa_labels for a device name change. Make some effort to preserve
1292  * existing alias numbering and to create unique labels if possible.
1293 */
1294 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1295 {
1296 	struct in_ifaddr *ifa;
1297 	int named = 0;
1298 
1299 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1300 		char old[IFNAMSIZ], *dot;
1301 
1302 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1303 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1304 		if (named++ == 0)
1305 			goto skip;
1306 		dot = strchr(old, ':');
1307 		if (dot == NULL) {
1308 			sprintf(old, ":%d", named);
1309 			dot = old;
1310 		}
1311 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1312 			strcat(ifa->ifa_label, dot);
1313 		else
1314 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1315 skip:
1316 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1317 	}
1318 }
1319 
1320 static bool inetdev_valid_mtu(unsigned int mtu)
1321 {
1322 	return mtu >= 68;
1323 }
1324 
1325 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1326 					struct in_device *in_dev)
1327 
1328 {
1329 	struct in_ifaddr *ifa;
1330 
1331 	for (ifa = in_dev->ifa_list; ifa;
1332 	     ifa = ifa->ifa_next) {
1333 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1334 			 ifa->ifa_local, dev,
1335 			 ifa->ifa_local, NULL,
1336 			 dev->dev_addr, NULL);
1337 	}
1338 }
1339 
1340 /* Called only under RTNL semaphore */
1341 
1342 static int inetdev_event(struct notifier_block *this, unsigned long event,
1343 			 void *ptr)
1344 {
1345 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1346 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1347 
1348 	ASSERT_RTNL();
1349 
1350 	if (!in_dev) {
1351 		if (event == NETDEV_REGISTER) {
1352 			in_dev = inetdev_init(dev);
1353 			if (!in_dev)
1354 				return notifier_from_errno(-ENOMEM);
1355 			if (dev->flags & IFF_LOOPBACK) {
1356 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1357 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1358 			}
1359 		} else if (event == NETDEV_CHANGEMTU) {
1360 			/* Re-enabling IP */
1361 			if (inetdev_valid_mtu(dev->mtu))
1362 				in_dev = inetdev_init(dev);
1363 		}
1364 		goto out;
1365 	}
1366 
1367 	switch (event) {
1368 	case NETDEV_REGISTER:
1369 		pr_debug("%s: bug\n", __func__);
1370 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1371 		break;
1372 	case NETDEV_UP:
1373 		if (!inetdev_valid_mtu(dev->mtu))
1374 			break;
1375 		if (dev->flags & IFF_LOOPBACK) {
1376 			struct in_ifaddr *ifa = inet_alloc_ifa();
1377 
1378 			if (ifa) {
1379 				INIT_HLIST_NODE(&ifa->hash);
1380 				ifa->ifa_local =
1381 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1382 				ifa->ifa_prefixlen = 8;
1383 				ifa->ifa_mask = inet_make_mask(8);
1384 				in_dev_hold(in_dev);
1385 				ifa->ifa_dev = in_dev;
1386 				ifa->ifa_scope = RT_SCOPE_HOST;
1387 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1388 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1389 						 INFINITY_LIFE_TIME);
1390 				ipv4_devconf_setall(in_dev);
1391 				neigh_parms_data_state_setall(in_dev->arp_parms);
1392 				inet_insert_ifa(ifa);
1393 			}
1394 		}
1395 		ip_mc_up(in_dev);
1396 		/* fall through */
1397 	case NETDEV_CHANGEADDR:
1398 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1399 			break;
1400 		/* fall through */
1401 	case NETDEV_NOTIFY_PEERS:
1402 		/* Send gratuitous ARP to notify of link change */
1403 		inetdev_send_gratuitous_arp(dev, in_dev);
1404 		break;
1405 	case NETDEV_DOWN:
1406 		ip_mc_down(in_dev);
1407 		break;
1408 	case NETDEV_PRE_TYPE_CHANGE:
1409 		ip_mc_unmap(in_dev);
1410 		break;
1411 	case NETDEV_POST_TYPE_CHANGE:
1412 		ip_mc_remap(in_dev);
1413 		break;
1414 	case NETDEV_CHANGEMTU:
1415 		if (inetdev_valid_mtu(dev->mtu))
1416 			break;
1417 		/* disable IP when MTU is not enough */
1418 	case NETDEV_UNREGISTER:
1419 		inetdev_destroy(in_dev);
1420 		break;
1421 	case NETDEV_CHANGENAME:
1422 		/* Do not notify about label change, this event is
1423 		 * not interesting to applications using netlink.
1424 		 */
1425 		inetdev_changename(dev, in_dev);
1426 
1427 		devinet_sysctl_unregister(in_dev);
1428 		devinet_sysctl_register(in_dev);
1429 		break;
1430 	}
1431 out:
1432 	return NOTIFY_DONE;
1433 }
1434 
1435 static struct notifier_block ip_netdev_notifier = {
1436 	.notifier_call = inetdev_event,
1437 };
1438 
1439 static size_t inet_nlmsg_size(void)
1440 {
1441 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1442 	       + nla_total_size(4) /* IFA_ADDRESS */
1443 	       + nla_total_size(4) /* IFA_LOCAL */
1444 	       + nla_total_size(4) /* IFA_BROADCAST */
1445 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1446 	       + nla_total_size(4);  /* IFA_FLAGS */
1447 }
1448 
1449 static inline u32 cstamp_delta(unsigned long cstamp)
1450 {
1451 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1452 }
1453 
1454 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1455 			 unsigned long tstamp, u32 preferred, u32 valid)
1456 {
1457 	struct ifa_cacheinfo ci;
1458 
1459 	ci.cstamp = cstamp_delta(cstamp);
1460 	ci.tstamp = cstamp_delta(tstamp);
1461 	ci.ifa_prefered = preferred;
1462 	ci.ifa_valid = valid;
1463 
1464 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1465 }
1466 
1467 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1468 			    u32 portid, u32 seq, int event, unsigned int flags)
1469 {
1470 	struct ifaddrmsg *ifm;
1471 	struct nlmsghdr  *nlh;
1472 	u32 preferred, valid;
1473 
1474 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1475 	if (nlh == NULL)
1476 		return -EMSGSIZE;
1477 
1478 	ifm = nlmsg_data(nlh);
1479 	ifm->ifa_family = AF_INET;
1480 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1481 	ifm->ifa_flags = ifa->ifa_flags;
1482 	ifm->ifa_scope = ifa->ifa_scope;
1483 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1484 
1485 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1486 		preferred = ifa->ifa_preferred_lft;
1487 		valid = ifa->ifa_valid_lft;
1488 		if (preferred != INFINITY_LIFE_TIME) {
1489 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1490 
1491 			if (preferred > tval)
1492 				preferred -= tval;
1493 			else
1494 				preferred = 0;
1495 			if (valid != INFINITY_LIFE_TIME) {
1496 				if (valid > tval)
1497 					valid -= tval;
1498 				else
1499 					valid = 0;
1500 			}
1501 		}
1502 	} else {
1503 		preferred = INFINITY_LIFE_TIME;
1504 		valid = INFINITY_LIFE_TIME;
1505 	}
1506 	if ((ifa->ifa_address &&
1507 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1508 	    (ifa->ifa_local &&
1509 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1510 	    (ifa->ifa_broadcast &&
1511 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1512 	    (ifa->ifa_label[0] &&
1513 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1514 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1515 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1516 			  preferred, valid))
1517 		goto nla_put_failure;
1518 
1519 	return nlmsg_end(skb, nlh);
1520 
1521 nla_put_failure:
1522 	nlmsg_cancel(skb, nlh);
1523 	return -EMSGSIZE;
1524 }
1525 
1526 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1527 {
1528 	struct net *net = sock_net(skb->sk);
1529 	int h, s_h;
1530 	int idx, s_idx;
1531 	int ip_idx, s_ip_idx;
1532 	struct net_device *dev;
1533 	struct in_device *in_dev;
1534 	struct in_ifaddr *ifa;
1535 	struct hlist_head *head;
1536 
1537 	s_h = cb->args[0];
1538 	s_idx = idx = cb->args[1];
1539 	s_ip_idx = ip_idx = cb->args[2];
1540 
1541 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1542 		idx = 0;
1543 		head = &net->dev_index_head[h];
1544 		rcu_read_lock();
1545 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1546 			  net->dev_base_seq;
1547 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1548 			if (idx < s_idx)
1549 				goto cont;
1550 			if (h > s_h || idx > s_idx)
1551 				s_ip_idx = 0;
1552 			in_dev = __in_dev_get_rcu(dev);
1553 			if (!in_dev)
1554 				goto cont;
1555 
1556 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1557 			     ifa = ifa->ifa_next, ip_idx++) {
1558 				if (ip_idx < s_ip_idx)
1559 					continue;
1560 				if (inet_fill_ifaddr(skb, ifa,
1561 					     NETLINK_CB(cb->skb).portid,
1562 					     cb->nlh->nlmsg_seq,
1563 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1564 					rcu_read_unlock();
1565 					goto done;
1566 				}
1567 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1568 			}
1569 cont:
1570 			idx++;
1571 		}
1572 		rcu_read_unlock();
1573 	}
1574 
1575 done:
1576 	cb->args[0] = h;
1577 	cb->args[1] = idx;
1578 	cb->args[2] = ip_idx;
1579 
1580 	return skb->len;
1581 }
1582 
1583 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1584 		      u32 portid)
1585 {
1586 	struct sk_buff *skb;
1587 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1588 	int err = -ENOBUFS;
1589 	struct net *net;
1590 
1591 	net = dev_net(ifa->ifa_dev->dev);
1592 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1593 	if (skb == NULL)
1594 		goto errout;
1595 
1596 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1597 	if (err < 0) {
1598 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1599 		WARN_ON(err == -EMSGSIZE);
1600 		kfree_skb(skb);
1601 		goto errout;
1602 	}
1603 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1604 	return;
1605 errout:
1606 	if (err < 0)
1607 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1608 }
1609 
1610 static size_t inet_get_link_af_size(const struct net_device *dev)
1611 {
1612 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1613 
1614 	if (!in_dev)
1615 		return 0;
1616 
1617 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1618 }
1619 
1620 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1621 {
1622 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1623 	struct nlattr *nla;
1624 	int i;
1625 
1626 	if (!in_dev)
1627 		return -ENODATA;
1628 
1629 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1630 	if (nla == NULL)
1631 		return -EMSGSIZE;
1632 
1633 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1634 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1635 
1636 	return 0;
1637 }
1638 
1639 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1640 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1641 };
1642 
1643 static int inet_validate_link_af(const struct net_device *dev,
1644 				 const struct nlattr *nla)
1645 {
1646 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1647 	int err, rem;
1648 
1649 	if (dev && !__in_dev_get_rtnl(dev))
1650 		return -EAFNOSUPPORT;
1651 
1652 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1653 	if (err < 0)
1654 		return err;
1655 
1656 	if (tb[IFLA_INET_CONF]) {
1657 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1658 			int cfgid = nla_type(a);
1659 
1660 			if (nla_len(a) < 4)
1661 				return -EINVAL;
1662 
1663 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1664 				return -EINVAL;
1665 		}
1666 	}
1667 
1668 	return 0;
1669 }
1670 
1671 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1672 {
1673 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1674 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1675 	int rem;
1676 
1677 	if (!in_dev)
1678 		return -EAFNOSUPPORT;
1679 
1680 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1681 		BUG();
1682 
1683 	if (tb[IFLA_INET_CONF]) {
1684 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1685 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1686 	}
1687 
1688 	return 0;
1689 }
1690 
1691 static int inet_netconf_msgsize_devconf(int type)
1692 {
1693 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1694 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1695 
1696 	/* type -1 is used for ALL */
1697 	if (type == -1 || type == NETCONFA_FORWARDING)
1698 		size += nla_total_size(4);
1699 	if (type == -1 || type == NETCONFA_RP_FILTER)
1700 		size += nla_total_size(4);
1701 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1702 		size += nla_total_size(4);
1703 	if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1704 		size += nla_total_size(4);
1705 
1706 	return size;
1707 }
1708 
1709 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1710 				     struct ipv4_devconf *devconf, u32 portid,
1711 				     u32 seq, int event, unsigned int flags,
1712 				     int type)
1713 {
1714 	struct nlmsghdr  *nlh;
1715 	struct netconfmsg *ncm;
1716 
1717 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1718 			flags);
1719 	if (nlh == NULL)
1720 		return -EMSGSIZE;
1721 
1722 	ncm = nlmsg_data(nlh);
1723 	ncm->ncm_family = AF_INET;
1724 
1725 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1726 		goto nla_put_failure;
1727 
1728 	/* type -1 is used for ALL */
1729 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1730 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1731 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1732 		goto nla_put_failure;
1733 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1734 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1735 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1736 		goto nla_put_failure;
1737 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1738 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1739 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1740 		goto nla_put_failure;
1741 	if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1742 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1743 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1744 		goto nla_put_failure;
1745 
1746 	return nlmsg_end(skb, nlh);
1747 
1748 nla_put_failure:
1749 	nlmsg_cancel(skb, nlh);
1750 	return -EMSGSIZE;
1751 }
1752 
1753 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1754 				 struct ipv4_devconf *devconf)
1755 {
1756 	struct sk_buff *skb;
1757 	int err = -ENOBUFS;
1758 
1759 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1760 	if (skb == NULL)
1761 		goto errout;
1762 
1763 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1764 					RTM_NEWNETCONF, 0, type);
1765 	if (err < 0) {
1766 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1767 		WARN_ON(err == -EMSGSIZE);
1768 		kfree_skb(skb);
1769 		goto errout;
1770 	}
1771 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1772 	return;
1773 errout:
1774 	if (err < 0)
1775 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1776 }
1777 
1778 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1779 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1780 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1781 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1782 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1783 };
1784 
1785 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1786 				    struct nlmsghdr *nlh)
1787 {
1788 	struct net *net = sock_net(in_skb->sk);
1789 	struct nlattr *tb[NETCONFA_MAX+1];
1790 	struct netconfmsg *ncm;
1791 	struct sk_buff *skb;
1792 	struct ipv4_devconf *devconf;
1793 	struct in_device *in_dev;
1794 	struct net_device *dev;
1795 	int ifindex;
1796 	int err;
1797 
1798 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1799 			  devconf_ipv4_policy);
1800 	if (err < 0)
1801 		goto errout;
1802 
1803 	err = EINVAL;
1804 	if (!tb[NETCONFA_IFINDEX])
1805 		goto errout;
1806 
1807 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1808 	switch (ifindex) {
1809 	case NETCONFA_IFINDEX_ALL:
1810 		devconf = net->ipv4.devconf_all;
1811 		break;
1812 	case NETCONFA_IFINDEX_DEFAULT:
1813 		devconf = net->ipv4.devconf_dflt;
1814 		break;
1815 	default:
1816 		dev = __dev_get_by_index(net, ifindex);
1817 		if (dev == NULL)
1818 			goto errout;
1819 		in_dev = __in_dev_get_rtnl(dev);
1820 		if (in_dev == NULL)
1821 			goto errout;
1822 		devconf = &in_dev->cnf;
1823 		break;
1824 	}
1825 
1826 	err = -ENOBUFS;
1827 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1828 	if (skb == NULL)
1829 		goto errout;
1830 
1831 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1832 					NETLINK_CB(in_skb).portid,
1833 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1834 					-1);
1835 	if (err < 0) {
1836 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1837 		WARN_ON(err == -EMSGSIZE);
1838 		kfree_skb(skb);
1839 		goto errout;
1840 	}
1841 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1842 errout:
1843 	return err;
1844 }
1845 
1846 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1847 				     struct netlink_callback *cb)
1848 {
1849 	struct net *net = sock_net(skb->sk);
1850 	int h, s_h;
1851 	int idx, s_idx;
1852 	struct net_device *dev;
1853 	struct in_device *in_dev;
1854 	struct hlist_head *head;
1855 
1856 	s_h = cb->args[0];
1857 	s_idx = idx = cb->args[1];
1858 
1859 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1860 		idx = 0;
1861 		head = &net->dev_index_head[h];
1862 		rcu_read_lock();
1863 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1864 			  net->dev_base_seq;
1865 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1866 			if (idx < s_idx)
1867 				goto cont;
1868 			in_dev = __in_dev_get_rcu(dev);
1869 			if (!in_dev)
1870 				goto cont;
1871 
1872 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1873 						      &in_dev->cnf,
1874 						      NETLINK_CB(cb->skb).portid,
1875 						      cb->nlh->nlmsg_seq,
1876 						      RTM_NEWNETCONF,
1877 						      NLM_F_MULTI,
1878 						      -1) <= 0) {
1879 				rcu_read_unlock();
1880 				goto done;
1881 			}
1882 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1883 cont:
1884 			idx++;
1885 		}
1886 		rcu_read_unlock();
1887 	}
1888 	if (h == NETDEV_HASHENTRIES) {
1889 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1890 					      net->ipv4.devconf_all,
1891 					      NETLINK_CB(cb->skb).portid,
1892 					      cb->nlh->nlmsg_seq,
1893 					      RTM_NEWNETCONF, NLM_F_MULTI,
1894 					      -1) <= 0)
1895 			goto done;
1896 		else
1897 			h++;
1898 	}
1899 	if (h == NETDEV_HASHENTRIES + 1) {
1900 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1901 					      net->ipv4.devconf_dflt,
1902 					      NETLINK_CB(cb->skb).portid,
1903 					      cb->nlh->nlmsg_seq,
1904 					      RTM_NEWNETCONF, NLM_F_MULTI,
1905 					      -1) <= 0)
1906 			goto done;
1907 		else
1908 			h++;
1909 	}
1910 done:
1911 	cb->args[0] = h;
1912 	cb->args[1] = idx;
1913 
1914 	return skb->len;
1915 }
1916 
1917 #ifdef CONFIG_SYSCTL
1918 
1919 static void devinet_copy_dflt_conf(struct net *net, int i)
1920 {
1921 	struct net_device *dev;
1922 
1923 	rcu_read_lock();
1924 	for_each_netdev_rcu(net, dev) {
1925 		struct in_device *in_dev;
1926 
1927 		in_dev = __in_dev_get_rcu(dev);
1928 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1929 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1930 	}
1931 	rcu_read_unlock();
1932 }
1933 
1934 /* called with RTNL locked */
1935 static void inet_forward_change(struct net *net)
1936 {
1937 	struct net_device *dev;
1938 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1939 
1940 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1941 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1942 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1943 				    NETCONFA_IFINDEX_ALL,
1944 				    net->ipv4.devconf_all);
1945 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1946 				    NETCONFA_IFINDEX_DEFAULT,
1947 				    net->ipv4.devconf_dflt);
1948 
1949 	for_each_netdev(net, dev) {
1950 		struct in_device *in_dev;
1951 		if (on)
1952 			dev_disable_lro(dev);
1953 		rcu_read_lock();
1954 		in_dev = __in_dev_get_rcu(dev);
1955 		if (in_dev) {
1956 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1957 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1958 						    dev->ifindex, &in_dev->cnf);
1959 		}
1960 		rcu_read_unlock();
1961 	}
1962 }
1963 
1964 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1965 {
1966 	if (cnf == net->ipv4.devconf_dflt)
1967 		return NETCONFA_IFINDEX_DEFAULT;
1968 	else if (cnf == net->ipv4.devconf_all)
1969 		return NETCONFA_IFINDEX_ALL;
1970 	else {
1971 		struct in_device *idev
1972 			= container_of(cnf, struct in_device, cnf);
1973 		return idev->dev->ifindex;
1974 	}
1975 }
1976 
1977 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1978 			     void __user *buffer,
1979 			     size_t *lenp, loff_t *ppos)
1980 {
1981 	int old_value = *(int *)ctl->data;
1982 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1983 	int new_value = *(int *)ctl->data;
1984 
1985 	if (write) {
1986 		struct ipv4_devconf *cnf = ctl->extra1;
1987 		struct net *net = ctl->extra2;
1988 		int i = (int *)ctl->data - cnf->data;
1989 		int ifindex;
1990 
1991 		set_bit(i, cnf->state);
1992 
1993 		if (cnf == net->ipv4.devconf_dflt)
1994 			devinet_copy_dflt_conf(net, i);
1995 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1996 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1997 			if ((new_value == 0) && (old_value != 0))
1998 				rt_cache_flush(net);
1999 
2000 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2001 		    new_value != old_value) {
2002 			ifindex = devinet_conf_ifindex(net, cnf);
2003 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2004 						    ifindex, cnf);
2005 		}
2006 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2007 		    new_value != old_value) {
2008 			ifindex = devinet_conf_ifindex(net, cnf);
2009 			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2010 						    ifindex, cnf);
2011 		}
2012 	}
2013 
2014 	return ret;
2015 }
2016 
2017 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2018 				  void __user *buffer,
2019 				  size_t *lenp, loff_t *ppos)
2020 {
2021 	int *valp = ctl->data;
2022 	int val = *valp;
2023 	loff_t pos = *ppos;
2024 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2025 
2026 	if (write && *valp != val) {
2027 		struct net *net = ctl->extra2;
2028 
2029 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2030 			if (!rtnl_trylock()) {
2031 				/* Restore the original values before restarting */
2032 				*valp = val;
2033 				*ppos = pos;
2034 				return restart_syscall();
2035 			}
2036 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2037 				inet_forward_change(net);
2038 			} else {
2039 				struct ipv4_devconf *cnf = ctl->extra1;
2040 				struct in_device *idev =
2041 					container_of(cnf, struct in_device, cnf);
2042 				if (*valp)
2043 					dev_disable_lro(idev->dev);
2044 				inet_netconf_notify_devconf(net,
2045 							    NETCONFA_FORWARDING,
2046 							    idev->dev->ifindex,
2047 							    cnf);
2048 			}
2049 			rtnl_unlock();
2050 			rt_cache_flush(net);
2051 		} else
2052 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2053 						    NETCONFA_IFINDEX_DEFAULT,
2054 						    net->ipv4.devconf_dflt);
2055 	}
2056 
2057 	return ret;
2058 }
2059 
2060 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2061 				void __user *buffer,
2062 				size_t *lenp, loff_t *ppos)
2063 {
2064 	int *valp = ctl->data;
2065 	int val = *valp;
2066 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2067 	struct net *net = ctl->extra2;
2068 
2069 	if (write && *valp != val)
2070 		rt_cache_flush(net);
2071 
2072 	return ret;
2073 }
2074 
2075 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2076 	{ \
2077 		.procname	= name, \
2078 		.data		= ipv4_devconf.data + \
2079 				  IPV4_DEVCONF_ ## attr - 1, \
2080 		.maxlen		= sizeof(int), \
2081 		.mode		= mval, \
2082 		.proc_handler	= proc, \
2083 		.extra1		= &ipv4_devconf, \
2084 	}
2085 
2086 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2087 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2088 
2089 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2090 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2091 
2092 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2093 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2094 
2095 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2096 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2097 
2098 static struct devinet_sysctl_table {
2099 	struct ctl_table_header *sysctl_header;
2100 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2101 } devinet_sysctl = {
2102 	.devinet_vars = {
2103 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2104 					     devinet_sysctl_forward),
2105 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2106 
2107 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2108 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2109 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2110 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2111 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2112 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2113 					"accept_source_route"),
2114 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2115 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2116 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2117 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2118 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2119 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2120 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2121 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2122 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2123 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2124 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2125 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2126 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2127 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2128 					"force_igmp_version"),
2129 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2130 					"igmpv2_unsolicited_report_interval"),
2131 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2132 					"igmpv3_unsolicited_report_interval"),
2133 
2134 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2135 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2136 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2137 					      "promote_secondaries"),
2138 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2139 					      "route_localnet"),
2140 	},
2141 };
2142 
2143 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2144 					struct ipv4_devconf *p)
2145 {
2146 	int i;
2147 	struct devinet_sysctl_table *t;
2148 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2149 
2150 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2151 	if (!t)
2152 		goto out;
2153 
2154 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2155 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2156 		t->devinet_vars[i].extra1 = p;
2157 		t->devinet_vars[i].extra2 = net;
2158 	}
2159 
2160 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2161 
2162 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2163 	if (!t->sysctl_header)
2164 		goto free;
2165 
2166 	p->sysctl = t;
2167 	return 0;
2168 
2169 free:
2170 	kfree(t);
2171 out:
2172 	return -ENOBUFS;
2173 }
2174 
2175 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2176 {
2177 	struct devinet_sysctl_table *t = cnf->sysctl;
2178 
2179 	if (t == NULL)
2180 		return;
2181 
2182 	cnf->sysctl = NULL;
2183 	unregister_net_sysctl_table(t->sysctl_header);
2184 	kfree(t);
2185 }
2186 
2187 static void devinet_sysctl_register(struct in_device *idev)
2188 {
2189 	neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2190 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2191 					&idev->cnf);
2192 }
2193 
2194 static void devinet_sysctl_unregister(struct in_device *idev)
2195 {
2196 	__devinet_sysctl_unregister(&idev->cnf);
2197 	neigh_sysctl_unregister(idev->arp_parms);
2198 }
2199 
2200 static struct ctl_table ctl_forward_entry[] = {
2201 	{
2202 		.procname	= "ip_forward",
2203 		.data		= &ipv4_devconf.data[
2204 					IPV4_DEVCONF_FORWARDING - 1],
2205 		.maxlen		= sizeof(int),
2206 		.mode		= 0644,
2207 		.proc_handler	= devinet_sysctl_forward,
2208 		.extra1		= &ipv4_devconf,
2209 		.extra2		= &init_net,
2210 	},
2211 	{ },
2212 };
2213 #endif
2214 
2215 static __net_init int devinet_init_net(struct net *net)
2216 {
2217 	int err;
2218 	struct ipv4_devconf *all, *dflt;
2219 #ifdef CONFIG_SYSCTL
2220 	struct ctl_table *tbl = ctl_forward_entry;
2221 	struct ctl_table_header *forw_hdr;
2222 #endif
2223 
2224 	err = -ENOMEM;
2225 	all = &ipv4_devconf;
2226 	dflt = &ipv4_devconf_dflt;
2227 
2228 	if (!net_eq(net, &init_net)) {
2229 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2230 		if (all == NULL)
2231 			goto err_alloc_all;
2232 
2233 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2234 		if (dflt == NULL)
2235 			goto err_alloc_dflt;
2236 
2237 #ifdef CONFIG_SYSCTL
2238 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2239 		if (tbl == NULL)
2240 			goto err_alloc_ctl;
2241 
2242 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2243 		tbl[0].extra1 = all;
2244 		tbl[0].extra2 = net;
2245 #endif
2246 	}
2247 
2248 #ifdef CONFIG_SYSCTL
2249 	err = __devinet_sysctl_register(net, "all", all);
2250 	if (err < 0)
2251 		goto err_reg_all;
2252 
2253 	err = __devinet_sysctl_register(net, "default", dflt);
2254 	if (err < 0)
2255 		goto err_reg_dflt;
2256 
2257 	err = -ENOMEM;
2258 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2259 	if (forw_hdr == NULL)
2260 		goto err_reg_ctl;
2261 	net->ipv4.forw_hdr = forw_hdr;
2262 #endif
2263 
2264 	net->ipv4.devconf_all = all;
2265 	net->ipv4.devconf_dflt = dflt;
2266 	return 0;
2267 
2268 #ifdef CONFIG_SYSCTL
2269 err_reg_ctl:
2270 	__devinet_sysctl_unregister(dflt);
2271 err_reg_dflt:
2272 	__devinet_sysctl_unregister(all);
2273 err_reg_all:
2274 	if (tbl != ctl_forward_entry)
2275 		kfree(tbl);
2276 err_alloc_ctl:
2277 #endif
2278 	if (dflt != &ipv4_devconf_dflt)
2279 		kfree(dflt);
2280 err_alloc_dflt:
2281 	if (all != &ipv4_devconf)
2282 		kfree(all);
2283 err_alloc_all:
2284 	return err;
2285 }
2286 
2287 static __net_exit void devinet_exit_net(struct net *net)
2288 {
2289 #ifdef CONFIG_SYSCTL
2290 	struct ctl_table *tbl;
2291 
2292 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2293 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2294 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2295 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2296 	kfree(tbl);
2297 #endif
2298 	kfree(net->ipv4.devconf_dflt);
2299 	kfree(net->ipv4.devconf_all);
2300 }
2301 
2302 static __net_initdata struct pernet_operations devinet_ops = {
2303 	.init = devinet_init_net,
2304 	.exit = devinet_exit_net,
2305 };
2306 
2307 static struct rtnl_af_ops inet_af_ops = {
2308 	.family		  = AF_INET,
2309 	.fill_link_af	  = inet_fill_link_af,
2310 	.get_link_af_size = inet_get_link_af_size,
2311 	.validate_link_af = inet_validate_link_af,
2312 	.set_link_af	  = inet_set_link_af,
2313 };
2314 
2315 void __init devinet_init(void)
2316 {
2317 	int i;
2318 
2319 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2320 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2321 
2322 	register_pernet_subsys(&devinet_ops);
2323 
2324 	register_gifconf(PF_INET, inet_gifconf);
2325 	register_netdevice_notifier(&ip_netdev_notifier);
2326 
2327 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2328 
2329 	rtnl_af_register(&inet_af_ops);
2330 
2331 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2332 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2333 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2334 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2335 		      inet_netconf_dump_devconf, NULL);
2336 }
2337 
2338