xref: /openbmc/linux/net/ipv4/devinet.c (revision c819e2cf)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 	},
79 };
80 
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82 	.data = {
83 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90 	},
91 };
92 
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95 
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 	[IFA_LOCAL]     	= { .type = NLA_U32 },
98 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
99 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
100 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
102 	[IFA_FLAGS]		= { .type = NLA_U32 },
103 };
104 
105 #define IN4_ADDR_HSIZE_SHIFT	8
106 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
107 
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109 
110 static u32 inet_addr_hash(struct net *net, __be32 addr)
111 {
112 	u32 val = (__force u32) addr ^ net_hash_mix(net);
113 
114 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116 
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
120 
121 	ASSERT_RTNL();
122 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124 
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127 	ASSERT_RTNL();
128 	hlist_del_init_rcu(&ifa->hash);
129 }
130 
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141 	u32 hash = inet_addr_hash(net, addr);
142 	struct net_device *result = NULL;
143 	struct in_ifaddr *ifa;
144 
145 	rcu_read_lock();
146 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147 		if (ifa->ifa_local == addr) {
148 			struct net_device *dev = ifa->ifa_dev->dev;
149 
150 			if (!net_eq(dev_net(dev), net))
151 				continue;
152 			result = dev;
153 			break;
154 		}
155 	}
156 	if (!result) {
157 		struct flowi4 fl4 = { .daddr = addr };
158 		struct fib_result res = { 0 };
159 		struct fib_table *local;
160 
161 		/* Fallback to FIB local table so that communication
162 		 * over loopback subnets work.
163 		 */
164 		local = fib_get_table(net, RT_TABLE_LOCAL);
165 		if (local &&
166 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 		    res.type == RTN_LOCAL)
168 			result = FIB_RES_DEV(res);
169 	}
170 	if (result && devref)
171 		dev_hold(result);
172 	rcu_read_unlock();
173 	return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176 
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178 
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181 			 int destroy);
182 #ifdef CONFIG_SYSCTL
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static int devinet_sysctl_register(struct in_device *idev)
187 {
188 	return 0;
189 }
190 static void devinet_sysctl_unregister(struct in_device *idev)
191 {
192 }
193 #endif
194 
195 /* Locks all the inet devices. */
196 
197 static struct in_ifaddr *inet_alloc_ifa(void)
198 {
199 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 }
201 
202 static void inet_rcu_free_ifa(struct rcu_head *head)
203 {
204 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205 	if (ifa->ifa_dev)
206 		in_dev_put(ifa->ifa_dev);
207 	kfree(ifa);
208 }
209 
210 static void inet_free_ifa(struct in_ifaddr *ifa)
211 {
212 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 }
214 
215 void in_dev_finish_destroy(struct in_device *idev)
216 {
217 	struct net_device *dev = idev->dev;
218 
219 	WARN_ON(idev->ifa_list);
220 	WARN_ON(idev->mc_list);
221 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 #endif
225 	dev_put(dev);
226 	if (!idev->dead)
227 		pr_err("Freeing alive in_device %p\n", idev);
228 	else
229 		kfree(idev);
230 }
231 EXPORT_SYMBOL(in_dev_finish_destroy);
232 
233 static struct in_device *inetdev_init(struct net_device *dev)
234 {
235 	struct in_device *in_dev;
236 	int err = -ENOMEM;
237 
238 	ASSERT_RTNL();
239 
240 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241 	if (!in_dev)
242 		goto out;
243 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244 			sizeof(in_dev->cnf));
245 	in_dev->cnf.sysctl = NULL;
246 	in_dev->dev = dev;
247 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248 	if (!in_dev->arp_parms)
249 		goto out_kfree;
250 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251 		dev_disable_lro(dev);
252 	/* Reference in_dev->dev */
253 	dev_hold(dev);
254 	/* Account for reference dev->ip_ptr (below) */
255 	in_dev_hold(in_dev);
256 
257 	err = devinet_sysctl_register(in_dev);
258 	if (err) {
259 		in_dev->dead = 1;
260 		in_dev_put(in_dev);
261 		in_dev = NULL;
262 		goto out;
263 	}
264 	ip_mc_init_dev(in_dev);
265 	if (dev->flags & IFF_UP)
266 		ip_mc_up(in_dev);
267 
268 	/* we can receive as soon as ip_ptr is set -- do this last */
269 	rcu_assign_pointer(dev->ip_ptr, in_dev);
270 out:
271 	return in_dev ?: ERR_PTR(err);
272 out_kfree:
273 	kfree(in_dev);
274 	in_dev = NULL;
275 	goto out;
276 }
277 
278 static void in_dev_rcu_put(struct rcu_head *head)
279 {
280 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
281 	in_dev_put(idev);
282 }
283 
284 static void inetdev_destroy(struct in_device *in_dev)
285 {
286 	struct in_ifaddr *ifa;
287 	struct net_device *dev;
288 
289 	ASSERT_RTNL();
290 
291 	dev = in_dev->dev;
292 
293 	in_dev->dead = 1;
294 
295 	ip_mc_destroy_dev(in_dev);
296 
297 	while ((ifa = in_dev->ifa_list) != NULL) {
298 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299 		inet_free_ifa(ifa);
300 	}
301 
302 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
303 
304 	devinet_sysctl_unregister(in_dev);
305 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306 	arp_ifdown(dev);
307 
308 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
309 }
310 
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312 {
313 	rcu_read_lock();
314 	for_primary_ifa(in_dev) {
315 		if (inet_ifa_match(a, ifa)) {
316 			if (!b || inet_ifa_match(b, ifa)) {
317 				rcu_read_unlock();
318 				return 1;
319 			}
320 		}
321 	} endfor_ifa(in_dev);
322 	rcu_read_unlock();
323 	return 0;
324 }
325 
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327 			 int destroy, struct nlmsghdr *nlh, u32 portid)
328 {
329 	struct in_ifaddr *promote = NULL;
330 	struct in_ifaddr *ifa, *ifa1 = *ifap;
331 	struct in_ifaddr *last_prim = in_dev->ifa_list;
332 	struct in_ifaddr *prev_prom = NULL;
333 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334 
335 	ASSERT_RTNL();
336 
337 	/* 1. Deleting primary ifaddr forces deletion all secondaries
338 	 * unless alias promotion is set
339 	 **/
340 
341 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
342 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
343 
344 		while ((ifa = *ifap1) != NULL) {
345 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
346 			    ifa1->ifa_scope <= ifa->ifa_scope)
347 				last_prim = ifa;
348 
349 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
350 			    ifa1->ifa_mask != ifa->ifa_mask ||
351 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
352 				ifap1 = &ifa->ifa_next;
353 				prev_prom = ifa;
354 				continue;
355 			}
356 
357 			if (!do_promote) {
358 				inet_hash_remove(ifa);
359 				*ifap1 = ifa->ifa_next;
360 
361 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
362 				blocking_notifier_call_chain(&inetaddr_chain,
363 						NETDEV_DOWN, ifa);
364 				inet_free_ifa(ifa);
365 			} else {
366 				promote = ifa;
367 				break;
368 			}
369 		}
370 	}
371 
372 	/* On promotion all secondaries from subnet are changing
373 	 * the primary IP, we must remove all their routes silently
374 	 * and later to add them back with new prefsrc. Do this
375 	 * while all addresses are on the device list.
376 	 */
377 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
378 		if (ifa1->ifa_mask == ifa->ifa_mask &&
379 		    inet_ifa_match(ifa1->ifa_address, ifa))
380 			fib_del_ifaddr(ifa, ifa1);
381 	}
382 
383 	/* 2. Unlink it */
384 
385 	*ifap = ifa1->ifa_next;
386 	inet_hash_remove(ifa1);
387 
388 	/* 3. Announce address deletion */
389 
390 	/* Send message first, then call notifier.
391 	   At first sight, FIB update triggered by notifier
392 	   will refer to already deleted ifaddr, that could confuse
393 	   netlink listeners. It is not true: look, gated sees
394 	   that route deleted and if it still thinks that ifaddr
395 	   is valid, it will try to restore deleted routes... Grr.
396 	   So that, this order is correct.
397 	 */
398 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
399 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
400 
401 	if (promote) {
402 		struct in_ifaddr *next_sec = promote->ifa_next;
403 
404 		if (prev_prom) {
405 			prev_prom->ifa_next = promote->ifa_next;
406 			promote->ifa_next = last_prim->ifa_next;
407 			last_prim->ifa_next = promote;
408 		}
409 
410 		promote->ifa_flags &= ~IFA_F_SECONDARY;
411 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
412 		blocking_notifier_call_chain(&inetaddr_chain,
413 				NETDEV_UP, promote);
414 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
415 			if (ifa1->ifa_mask != ifa->ifa_mask ||
416 			    !inet_ifa_match(ifa1->ifa_address, ifa))
417 					continue;
418 			fib_add_ifaddr(ifa);
419 		}
420 
421 	}
422 	if (destroy)
423 		inet_free_ifa(ifa1);
424 }
425 
426 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
427 			 int destroy)
428 {
429 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
430 }
431 
432 static void check_lifetime(struct work_struct *work);
433 
434 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
435 
436 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
437 			     u32 portid)
438 {
439 	struct in_device *in_dev = ifa->ifa_dev;
440 	struct in_ifaddr *ifa1, **ifap, **last_primary;
441 
442 	ASSERT_RTNL();
443 
444 	if (!ifa->ifa_local) {
445 		inet_free_ifa(ifa);
446 		return 0;
447 	}
448 
449 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
450 	last_primary = &in_dev->ifa_list;
451 
452 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
453 	     ifap = &ifa1->ifa_next) {
454 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
455 		    ifa->ifa_scope <= ifa1->ifa_scope)
456 			last_primary = &ifa1->ifa_next;
457 		if (ifa1->ifa_mask == ifa->ifa_mask &&
458 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
459 			if (ifa1->ifa_local == ifa->ifa_local) {
460 				inet_free_ifa(ifa);
461 				return -EEXIST;
462 			}
463 			if (ifa1->ifa_scope != ifa->ifa_scope) {
464 				inet_free_ifa(ifa);
465 				return -EINVAL;
466 			}
467 			ifa->ifa_flags |= IFA_F_SECONDARY;
468 		}
469 	}
470 
471 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
472 		prandom_seed((__force u32) ifa->ifa_local);
473 		ifap = last_primary;
474 	}
475 
476 	ifa->ifa_next = *ifap;
477 	*ifap = ifa;
478 
479 	inet_hash_insert(dev_net(in_dev->dev), ifa);
480 
481 	cancel_delayed_work(&check_lifetime_work);
482 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
483 
484 	/* Send message first, then call notifier.
485 	   Notifier will trigger FIB update, so that
486 	   listeners of netlink will know about new ifaddr */
487 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
488 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
489 
490 	return 0;
491 }
492 
493 static int inet_insert_ifa(struct in_ifaddr *ifa)
494 {
495 	return __inet_insert_ifa(ifa, NULL, 0);
496 }
497 
498 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
499 {
500 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
501 
502 	ASSERT_RTNL();
503 
504 	if (!in_dev) {
505 		inet_free_ifa(ifa);
506 		return -ENOBUFS;
507 	}
508 	ipv4_devconf_setall(in_dev);
509 	neigh_parms_data_state_setall(in_dev->arp_parms);
510 	if (ifa->ifa_dev != in_dev) {
511 		WARN_ON(ifa->ifa_dev);
512 		in_dev_hold(in_dev);
513 		ifa->ifa_dev = in_dev;
514 	}
515 	if (ipv4_is_loopback(ifa->ifa_local))
516 		ifa->ifa_scope = RT_SCOPE_HOST;
517 	return inet_insert_ifa(ifa);
518 }
519 
520 /* Caller must hold RCU or RTNL :
521  * We dont take a reference on found in_device
522  */
523 struct in_device *inetdev_by_index(struct net *net, int ifindex)
524 {
525 	struct net_device *dev;
526 	struct in_device *in_dev = NULL;
527 
528 	rcu_read_lock();
529 	dev = dev_get_by_index_rcu(net, ifindex);
530 	if (dev)
531 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
532 	rcu_read_unlock();
533 	return in_dev;
534 }
535 EXPORT_SYMBOL(inetdev_by_index);
536 
537 /* Called only from RTNL semaphored context. No locks. */
538 
539 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
540 				    __be32 mask)
541 {
542 	ASSERT_RTNL();
543 
544 	for_primary_ifa(in_dev) {
545 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
546 			return ifa;
547 	} endfor_ifa(in_dev);
548 	return NULL;
549 }
550 
551 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
552 {
553 	struct net *net = sock_net(skb->sk);
554 	struct nlattr *tb[IFA_MAX+1];
555 	struct in_device *in_dev;
556 	struct ifaddrmsg *ifm;
557 	struct in_ifaddr *ifa, **ifap;
558 	int err = -EINVAL;
559 
560 	ASSERT_RTNL();
561 
562 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
563 	if (err < 0)
564 		goto errout;
565 
566 	ifm = nlmsg_data(nlh);
567 	in_dev = inetdev_by_index(net, ifm->ifa_index);
568 	if (in_dev == NULL) {
569 		err = -ENODEV;
570 		goto errout;
571 	}
572 
573 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
574 	     ifap = &ifa->ifa_next) {
575 		if (tb[IFA_LOCAL] &&
576 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
577 			continue;
578 
579 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
580 			continue;
581 
582 		if (tb[IFA_ADDRESS] &&
583 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
584 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
585 			continue;
586 
587 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
588 		return 0;
589 	}
590 
591 	err = -EADDRNOTAVAIL;
592 errout:
593 	return err;
594 }
595 
596 #define INFINITY_LIFE_TIME	0xFFFFFFFF
597 
598 static void check_lifetime(struct work_struct *work)
599 {
600 	unsigned long now, next, next_sec, next_sched;
601 	struct in_ifaddr *ifa;
602 	struct hlist_node *n;
603 	int i;
604 
605 	now = jiffies;
606 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
607 
608 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
609 		bool change_needed = false;
610 
611 		rcu_read_lock();
612 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
613 			unsigned long age;
614 
615 			if (ifa->ifa_flags & IFA_F_PERMANENT)
616 				continue;
617 
618 			/* We try to batch several events at once. */
619 			age = (now - ifa->ifa_tstamp +
620 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
621 
622 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
623 			    age >= ifa->ifa_valid_lft) {
624 				change_needed = true;
625 			} else if (ifa->ifa_preferred_lft ==
626 				   INFINITY_LIFE_TIME) {
627 				continue;
628 			} else if (age >= ifa->ifa_preferred_lft) {
629 				if (time_before(ifa->ifa_tstamp +
630 						ifa->ifa_valid_lft * HZ, next))
631 					next = ifa->ifa_tstamp +
632 					       ifa->ifa_valid_lft * HZ;
633 
634 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
635 					change_needed = true;
636 			} else if (time_before(ifa->ifa_tstamp +
637 					       ifa->ifa_preferred_lft * HZ,
638 					       next)) {
639 				next = ifa->ifa_tstamp +
640 				       ifa->ifa_preferred_lft * HZ;
641 			}
642 		}
643 		rcu_read_unlock();
644 		if (!change_needed)
645 			continue;
646 		rtnl_lock();
647 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
648 			unsigned long age;
649 
650 			if (ifa->ifa_flags & IFA_F_PERMANENT)
651 				continue;
652 
653 			/* We try to batch several events at once. */
654 			age = (now - ifa->ifa_tstamp +
655 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
656 
657 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
658 			    age >= ifa->ifa_valid_lft) {
659 				struct in_ifaddr **ifap;
660 
661 				for (ifap = &ifa->ifa_dev->ifa_list;
662 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
663 					if (*ifap == ifa) {
664 						inet_del_ifa(ifa->ifa_dev,
665 							     ifap, 1);
666 						break;
667 					}
668 				}
669 			} else if (ifa->ifa_preferred_lft !=
670 				   INFINITY_LIFE_TIME &&
671 				   age >= ifa->ifa_preferred_lft &&
672 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
673 				ifa->ifa_flags |= IFA_F_DEPRECATED;
674 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
675 			}
676 		}
677 		rtnl_unlock();
678 	}
679 
680 	next_sec = round_jiffies_up(next);
681 	next_sched = next;
682 
683 	/* If rounded timeout is accurate enough, accept it. */
684 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
685 		next_sched = next_sec;
686 
687 	now = jiffies;
688 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
689 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
690 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
691 
692 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
693 			next_sched - now);
694 }
695 
696 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
697 			     __u32 prefered_lft)
698 {
699 	unsigned long timeout;
700 
701 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
702 
703 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
704 	if (addrconf_finite_timeout(timeout))
705 		ifa->ifa_valid_lft = timeout;
706 	else
707 		ifa->ifa_flags |= IFA_F_PERMANENT;
708 
709 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
710 	if (addrconf_finite_timeout(timeout)) {
711 		if (timeout == 0)
712 			ifa->ifa_flags |= IFA_F_DEPRECATED;
713 		ifa->ifa_preferred_lft = timeout;
714 	}
715 	ifa->ifa_tstamp = jiffies;
716 	if (!ifa->ifa_cstamp)
717 		ifa->ifa_cstamp = ifa->ifa_tstamp;
718 }
719 
720 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
721 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
722 {
723 	struct nlattr *tb[IFA_MAX+1];
724 	struct in_ifaddr *ifa;
725 	struct ifaddrmsg *ifm;
726 	struct net_device *dev;
727 	struct in_device *in_dev;
728 	int err;
729 
730 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
731 	if (err < 0)
732 		goto errout;
733 
734 	ifm = nlmsg_data(nlh);
735 	err = -EINVAL;
736 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
737 		goto errout;
738 
739 	dev = __dev_get_by_index(net, ifm->ifa_index);
740 	err = -ENODEV;
741 	if (dev == NULL)
742 		goto errout;
743 
744 	in_dev = __in_dev_get_rtnl(dev);
745 	err = -ENOBUFS;
746 	if (in_dev == NULL)
747 		goto errout;
748 
749 	ifa = inet_alloc_ifa();
750 	if (ifa == NULL)
751 		/*
752 		 * A potential indev allocation can be left alive, it stays
753 		 * assigned to its device and is destroy with it.
754 		 */
755 		goto errout;
756 
757 	ipv4_devconf_setall(in_dev);
758 	neigh_parms_data_state_setall(in_dev->arp_parms);
759 	in_dev_hold(in_dev);
760 
761 	if (tb[IFA_ADDRESS] == NULL)
762 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
763 
764 	INIT_HLIST_NODE(&ifa->hash);
765 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
766 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
767 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
768 					 ifm->ifa_flags;
769 	ifa->ifa_scope = ifm->ifa_scope;
770 	ifa->ifa_dev = in_dev;
771 
772 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
773 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
774 
775 	if (tb[IFA_BROADCAST])
776 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
777 
778 	if (tb[IFA_LABEL])
779 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
780 	else
781 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
782 
783 	if (tb[IFA_CACHEINFO]) {
784 		struct ifa_cacheinfo *ci;
785 
786 		ci = nla_data(tb[IFA_CACHEINFO]);
787 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
788 			err = -EINVAL;
789 			goto errout_free;
790 		}
791 		*pvalid_lft = ci->ifa_valid;
792 		*pprefered_lft = ci->ifa_prefered;
793 	}
794 
795 	return ifa;
796 
797 errout_free:
798 	inet_free_ifa(ifa);
799 errout:
800 	return ERR_PTR(err);
801 }
802 
803 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
804 {
805 	struct in_device *in_dev = ifa->ifa_dev;
806 	struct in_ifaddr *ifa1, **ifap;
807 
808 	if (!ifa->ifa_local)
809 		return NULL;
810 
811 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
812 	     ifap = &ifa1->ifa_next) {
813 		if (ifa1->ifa_mask == ifa->ifa_mask &&
814 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
815 		    ifa1->ifa_local == ifa->ifa_local)
816 			return ifa1;
817 	}
818 	return NULL;
819 }
820 
821 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
822 {
823 	struct net *net = sock_net(skb->sk);
824 	struct in_ifaddr *ifa;
825 	struct in_ifaddr *ifa_existing;
826 	__u32 valid_lft = INFINITY_LIFE_TIME;
827 	__u32 prefered_lft = INFINITY_LIFE_TIME;
828 
829 	ASSERT_RTNL();
830 
831 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
832 	if (IS_ERR(ifa))
833 		return PTR_ERR(ifa);
834 
835 	ifa_existing = find_matching_ifa(ifa);
836 	if (!ifa_existing) {
837 		/* It would be best to check for !NLM_F_CREATE here but
838 		 * userspace already relies on not having to provide this.
839 		 */
840 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
841 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
842 	} else {
843 		inet_free_ifa(ifa);
844 
845 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
846 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
847 			return -EEXIST;
848 		ifa = ifa_existing;
849 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
850 		cancel_delayed_work(&check_lifetime_work);
851 		queue_delayed_work(system_power_efficient_wq,
852 				&check_lifetime_work, 0);
853 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
854 		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
855 	}
856 	return 0;
857 }
858 
859 /*
860  *	Determine a default network mask, based on the IP address.
861  */
862 
863 static int inet_abc_len(__be32 addr)
864 {
865 	int rc = -1;	/* Something else, probably a multicast. */
866 
867 	if (ipv4_is_zeronet(addr))
868 		rc = 0;
869 	else {
870 		__u32 haddr = ntohl(addr);
871 
872 		if (IN_CLASSA(haddr))
873 			rc = 8;
874 		else if (IN_CLASSB(haddr))
875 			rc = 16;
876 		else if (IN_CLASSC(haddr))
877 			rc = 24;
878 	}
879 
880 	return rc;
881 }
882 
883 
884 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
885 {
886 	struct ifreq ifr;
887 	struct sockaddr_in sin_orig;
888 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
889 	struct in_device *in_dev;
890 	struct in_ifaddr **ifap = NULL;
891 	struct in_ifaddr *ifa = NULL;
892 	struct net_device *dev;
893 	char *colon;
894 	int ret = -EFAULT;
895 	int tryaddrmatch = 0;
896 
897 	/*
898 	 *	Fetch the caller's info block into kernel space
899 	 */
900 
901 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
902 		goto out;
903 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
904 
905 	/* save original address for comparison */
906 	memcpy(&sin_orig, sin, sizeof(*sin));
907 
908 	colon = strchr(ifr.ifr_name, ':');
909 	if (colon)
910 		*colon = 0;
911 
912 	dev_load(net, ifr.ifr_name);
913 
914 	switch (cmd) {
915 	case SIOCGIFADDR:	/* Get interface address */
916 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
917 	case SIOCGIFDSTADDR:	/* Get the destination address */
918 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
919 		/* Note that these ioctls will not sleep,
920 		   so that we do not impose a lock.
921 		   One day we will be forced to put shlock here (I mean SMP)
922 		 */
923 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
924 		memset(sin, 0, sizeof(*sin));
925 		sin->sin_family = AF_INET;
926 		break;
927 
928 	case SIOCSIFFLAGS:
929 		ret = -EPERM;
930 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
931 			goto out;
932 		break;
933 	case SIOCSIFADDR:	/* Set interface address (and family) */
934 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
935 	case SIOCSIFDSTADDR:	/* Set the destination address */
936 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
937 		ret = -EPERM;
938 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
939 			goto out;
940 		ret = -EINVAL;
941 		if (sin->sin_family != AF_INET)
942 			goto out;
943 		break;
944 	default:
945 		ret = -EINVAL;
946 		goto out;
947 	}
948 
949 	rtnl_lock();
950 
951 	ret = -ENODEV;
952 	dev = __dev_get_by_name(net, ifr.ifr_name);
953 	if (!dev)
954 		goto done;
955 
956 	if (colon)
957 		*colon = ':';
958 
959 	in_dev = __in_dev_get_rtnl(dev);
960 	if (in_dev) {
961 		if (tryaddrmatch) {
962 			/* Matthias Andree */
963 			/* compare label and address (4.4BSD style) */
964 			/* note: we only do this for a limited set of ioctls
965 			   and only if the original address family was AF_INET.
966 			   This is checked above. */
967 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
968 			     ifap = &ifa->ifa_next) {
969 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
970 				    sin_orig.sin_addr.s_addr ==
971 							ifa->ifa_local) {
972 					break; /* found */
973 				}
974 			}
975 		}
976 		/* we didn't get a match, maybe the application is
977 		   4.3BSD-style and passed in junk so we fall back to
978 		   comparing just the label */
979 		if (!ifa) {
980 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
981 			     ifap = &ifa->ifa_next)
982 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
983 					break;
984 		}
985 	}
986 
987 	ret = -EADDRNOTAVAIL;
988 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
989 		goto done;
990 
991 	switch (cmd) {
992 	case SIOCGIFADDR:	/* Get interface address */
993 		sin->sin_addr.s_addr = ifa->ifa_local;
994 		goto rarok;
995 
996 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
997 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
998 		goto rarok;
999 
1000 	case SIOCGIFDSTADDR:	/* Get the destination address */
1001 		sin->sin_addr.s_addr = ifa->ifa_address;
1002 		goto rarok;
1003 
1004 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1005 		sin->sin_addr.s_addr = ifa->ifa_mask;
1006 		goto rarok;
1007 
1008 	case SIOCSIFFLAGS:
1009 		if (colon) {
1010 			ret = -EADDRNOTAVAIL;
1011 			if (!ifa)
1012 				break;
1013 			ret = 0;
1014 			if (!(ifr.ifr_flags & IFF_UP))
1015 				inet_del_ifa(in_dev, ifap, 1);
1016 			break;
1017 		}
1018 		ret = dev_change_flags(dev, ifr.ifr_flags);
1019 		break;
1020 
1021 	case SIOCSIFADDR:	/* Set interface address (and family) */
1022 		ret = -EINVAL;
1023 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1024 			break;
1025 
1026 		if (!ifa) {
1027 			ret = -ENOBUFS;
1028 			ifa = inet_alloc_ifa();
1029 			if (!ifa)
1030 				break;
1031 			INIT_HLIST_NODE(&ifa->hash);
1032 			if (colon)
1033 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1034 			else
1035 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1036 		} else {
1037 			ret = 0;
1038 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1039 				break;
1040 			inet_del_ifa(in_dev, ifap, 0);
1041 			ifa->ifa_broadcast = 0;
1042 			ifa->ifa_scope = 0;
1043 		}
1044 
1045 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1046 
1047 		if (!(dev->flags & IFF_POINTOPOINT)) {
1048 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1049 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1050 			if ((dev->flags & IFF_BROADCAST) &&
1051 			    ifa->ifa_prefixlen < 31)
1052 				ifa->ifa_broadcast = ifa->ifa_address |
1053 						     ~ifa->ifa_mask;
1054 		} else {
1055 			ifa->ifa_prefixlen = 32;
1056 			ifa->ifa_mask = inet_make_mask(32);
1057 		}
1058 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1059 		ret = inet_set_ifa(dev, ifa);
1060 		break;
1061 
1062 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1063 		ret = 0;
1064 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1065 			inet_del_ifa(in_dev, ifap, 0);
1066 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1067 			inet_insert_ifa(ifa);
1068 		}
1069 		break;
1070 
1071 	case SIOCSIFDSTADDR:	/* Set the destination address */
1072 		ret = 0;
1073 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1074 			break;
1075 		ret = -EINVAL;
1076 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1077 			break;
1078 		ret = 0;
1079 		inet_del_ifa(in_dev, ifap, 0);
1080 		ifa->ifa_address = sin->sin_addr.s_addr;
1081 		inet_insert_ifa(ifa);
1082 		break;
1083 
1084 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1085 
1086 		/*
1087 		 *	The mask we set must be legal.
1088 		 */
1089 		ret = -EINVAL;
1090 		if (bad_mask(sin->sin_addr.s_addr, 0))
1091 			break;
1092 		ret = 0;
1093 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1094 			__be32 old_mask = ifa->ifa_mask;
1095 			inet_del_ifa(in_dev, ifap, 0);
1096 			ifa->ifa_mask = sin->sin_addr.s_addr;
1097 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1098 
1099 			/* See if current broadcast address matches
1100 			 * with current netmask, then recalculate
1101 			 * the broadcast address. Otherwise it's a
1102 			 * funny address, so don't touch it since
1103 			 * the user seems to know what (s)he's doing...
1104 			 */
1105 			if ((dev->flags & IFF_BROADCAST) &&
1106 			    (ifa->ifa_prefixlen < 31) &&
1107 			    (ifa->ifa_broadcast ==
1108 			     (ifa->ifa_local|~old_mask))) {
1109 				ifa->ifa_broadcast = (ifa->ifa_local |
1110 						      ~sin->sin_addr.s_addr);
1111 			}
1112 			inet_insert_ifa(ifa);
1113 		}
1114 		break;
1115 	}
1116 done:
1117 	rtnl_unlock();
1118 out:
1119 	return ret;
1120 rarok:
1121 	rtnl_unlock();
1122 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1123 	goto out;
1124 }
1125 
1126 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1127 {
1128 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1129 	struct in_ifaddr *ifa;
1130 	struct ifreq ifr;
1131 	int done = 0;
1132 
1133 	if (!in_dev)
1134 		goto out;
1135 
1136 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1137 		if (!buf) {
1138 			done += sizeof(ifr);
1139 			continue;
1140 		}
1141 		if (len < (int) sizeof(ifr))
1142 			break;
1143 		memset(&ifr, 0, sizeof(struct ifreq));
1144 		strcpy(ifr.ifr_name, ifa->ifa_label);
1145 
1146 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1147 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1148 								ifa->ifa_local;
1149 
1150 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1151 			done = -EFAULT;
1152 			break;
1153 		}
1154 		buf  += sizeof(struct ifreq);
1155 		len  -= sizeof(struct ifreq);
1156 		done += sizeof(struct ifreq);
1157 	}
1158 out:
1159 	return done;
1160 }
1161 
1162 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1163 {
1164 	__be32 addr = 0;
1165 	struct in_device *in_dev;
1166 	struct net *net = dev_net(dev);
1167 
1168 	rcu_read_lock();
1169 	in_dev = __in_dev_get_rcu(dev);
1170 	if (!in_dev)
1171 		goto no_in_dev;
1172 
1173 	for_primary_ifa(in_dev) {
1174 		if (ifa->ifa_scope > scope)
1175 			continue;
1176 		if (!dst || inet_ifa_match(dst, ifa)) {
1177 			addr = ifa->ifa_local;
1178 			break;
1179 		}
1180 		if (!addr)
1181 			addr = ifa->ifa_local;
1182 	} endfor_ifa(in_dev);
1183 
1184 	if (addr)
1185 		goto out_unlock;
1186 no_in_dev:
1187 
1188 	/* Not loopback addresses on loopback should be preferred
1189 	   in this case. It is importnat that lo is the first interface
1190 	   in dev_base list.
1191 	 */
1192 	for_each_netdev_rcu(net, dev) {
1193 		in_dev = __in_dev_get_rcu(dev);
1194 		if (!in_dev)
1195 			continue;
1196 
1197 		for_primary_ifa(in_dev) {
1198 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1199 			    ifa->ifa_scope <= scope) {
1200 				addr = ifa->ifa_local;
1201 				goto out_unlock;
1202 			}
1203 		} endfor_ifa(in_dev);
1204 	}
1205 out_unlock:
1206 	rcu_read_unlock();
1207 	return addr;
1208 }
1209 EXPORT_SYMBOL(inet_select_addr);
1210 
1211 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1212 			      __be32 local, int scope)
1213 {
1214 	int same = 0;
1215 	__be32 addr = 0;
1216 
1217 	for_ifa(in_dev) {
1218 		if (!addr &&
1219 		    (local == ifa->ifa_local || !local) &&
1220 		    ifa->ifa_scope <= scope) {
1221 			addr = ifa->ifa_local;
1222 			if (same)
1223 				break;
1224 		}
1225 		if (!same) {
1226 			same = (!local || inet_ifa_match(local, ifa)) &&
1227 				(!dst || inet_ifa_match(dst, ifa));
1228 			if (same && addr) {
1229 				if (local || !dst)
1230 					break;
1231 				/* Is the selected addr into dst subnet? */
1232 				if (inet_ifa_match(addr, ifa))
1233 					break;
1234 				/* No, then can we use new local src? */
1235 				if (ifa->ifa_scope <= scope) {
1236 					addr = ifa->ifa_local;
1237 					break;
1238 				}
1239 				/* search for large dst subnet for addr */
1240 				same = 0;
1241 			}
1242 		}
1243 	} endfor_ifa(in_dev);
1244 
1245 	return same ? addr : 0;
1246 }
1247 
1248 /*
1249  * Confirm that local IP address exists using wildcards:
1250  * - net: netns to check, cannot be NULL
1251  * - in_dev: only on this interface, NULL=any interface
1252  * - dst: only in the same subnet as dst, 0=any dst
1253  * - local: address, 0=autoselect the local address
1254  * - scope: maximum allowed scope value for the local address
1255  */
1256 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1257 			 __be32 dst, __be32 local, int scope)
1258 {
1259 	__be32 addr = 0;
1260 	struct net_device *dev;
1261 
1262 	if (in_dev != NULL)
1263 		return confirm_addr_indev(in_dev, dst, local, scope);
1264 
1265 	rcu_read_lock();
1266 	for_each_netdev_rcu(net, dev) {
1267 		in_dev = __in_dev_get_rcu(dev);
1268 		if (in_dev) {
1269 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1270 			if (addr)
1271 				break;
1272 		}
1273 	}
1274 	rcu_read_unlock();
1275 
1276 	return addr;
1277 }
1278 EXPORT_SYMBOL(inet_confirm_addr);
1279 
1280 /*
1281  *	Device notifier
1282  */
1283 
1284 int register_inetaddr_notifier(struct notifier_block *nb)
1285 {
1286 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1287 }
1288 EXPORT_SYMBOL(register_inetaddr_notifier);
1289 
1290 int unregister_inetaddr_notifier(struct notifier_block *nb)
1291 {
1292 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1293 }
1294 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1295 
1296 /* Rename ifa_labels for a device name change. Make some effort to preserve
1297  * existing alias numbering and to create unique labels if possible.
1298 */
1299 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1300 {
1301 	struct in_ifaddr *ifa;
1302 	int named = 0;
1303 
1304 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1305 		char old[IFNAMSIZ], *dot;
1306 
1307 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1308 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1309 		if (named++ == 0)
1310 			goto skip;
1311 		dot = strchr(old, ':');
1312 		if (dot == NULL) {
1313 			sprintf(old, ":%d", named);
1314 			dot = old;
1315 		}
1316 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1317 			strcat(ifa->ifa_label, dot);
1318 		else
1319 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1320 skip:
1321 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1322 	}
1323 }
1324 
1325 static bool inetdev_valid_mtu(unsigned int mtu)
1326 {
1327 	return mtu >= 68;
1328 }
1329 
1330 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1331 					struct in_device *in_dev)
1332 
1333 {
1334 	struct in_ifaddr *ifa;
1335 
1336 	for (ifa = in_dev->ifa_list; ifa;
1337 	     ifa = ifa->ifa_next) {
1338 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1339 			 ifa->ifa_local, dev,
1340 			 ifa->ifa_local, NULL,
1341 			 dev->dev_addr, NULL);
1342 	}
1343 }
1344 
1345 /* Called only under RTNL semaphore */
1346 
1347 static int inetdev_event(struct notifier_block *this, unsigned long event,
1348 			 void *ptr)
1349 {
1350 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1351 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1352 
1353 	ASSERT_RTNL();
1354 
1355 	if (!in_dev) {
1356 		if (event == NETDEV_REGISTER) {
1357 			in_dev = inetdev_init(dev);
1358 			if (IS_ERR(in_dev))
1359 				return notifier_from_errno(PTR_ERR(in_dev));
1360 			if (dev->flags & IFF_LOOPBACK) {
1361 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1362 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1363 			}
1364 		} else if (event == NETDEV_CHANGEMTU) {
1365 			/* Re-enabling IP */
1366 			if (inetdev_valid_mtu(dev->mtu))
1367 				in_dev = inetdev_init(dev);
1368 		}
1369 		goto out;
1370 	}
1371 
1372 	switch (event) {
1373 	case NETDEV_REGISTER:
1374 		pr_debug("%s: bug\n", __func__);
1375 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1376 		break;
1377 	case NETDEV_UP:
1378 		if (!inetdev_valid_mtu(dev->mtu))
1379 			break;
1380 		if (dev->flags & IFF_LOOPBACK) {
1381 			struct in_ifaddr *ifa = inet_alloc_ifa();
1382 
1383 			if (ifa) {
1384 				INIT_HLIST_NODE(&ifa->hash);
1385 				ifa->ifa_local =
1386 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1387 				ifa->ifa_prefixlen = 8;
1388 				ifa->ifa_mask = inet_make_mask(8);
1389 				in_dev_hold(in_dev);
1390 				ifa->ifa_dev = in_dev;
1391 				ifa->ifa_scope = RT_SCOPE_HOST;
1392 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1393 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1394 						 INFINITY_LIFE_TIME);
1395 				ipv4_devconf_setall(in_dev);
1396 				neigh_parms_data_state_setall(in_dev->arp_parms);
1397 				inet_insert_ifa(ifa);
1398 			}
1399 		}
1400 		ip_mc_up(in_dev);
1401 		/* fall through */
1402 	case NETDEV_CHANGEADDR:
1403 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1404 			break;
1405 		/* fall through */
1406 	case NETDEV_NOTIFY_PEERS:
1407 		/* Send gratuitous ARP to notify of link change */
1408 		inetdev_send_gratuitous_arp(dev, in_dev);
1409 		break;
1410 	case NETDEV_DOWN:
1411 		ip_mc_down(in_dev);
1412 		break;
1413 	case NETDEV_PRE_TYPE_CHANGE:
1414 		ip_mc_unmap(in_dev);
1415 		break;
1416 	case NETDEV_POST_TYPE_CHANGE:
1417 		ip_mc_remap(in_dev);
1418 		break;
1419 	case NETDEV_CHANGEMTU:
1420 		if (inetdev_valid_mtu(dev->mtu))
1421 			break;
1422 		/* disable IP when MTU is not enough */
1423 	case NETDEV_UNREGISTER:
1424 		inetdev_destroy(in_dev);
1425 		break;
1426 	case NETDEV_CHANGENAME:
1427 		/* Do not notify about label change, this event is
1428 		 * not interesting to applications using netlink.
1429 		 */
1430 		inetdev_changename(dev, in_dev);
1431 
1432 		devinet_sysctl_unregister(in_dev);
1433 		devinet_sysctl_register(in_dev);
1434 		break;
1435 	}
1436 out:
1437 	return NOTIFY_DONE;
1438 }
1439 
1440 static struct notifier_block ip_netdev_notifier = {
1441 	.notifier_call = inetdev_event,
1442 };
1443 
1444 static size_t inet_nlmsg_size(void)
1445 {
1446 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1447 	       + nla_total_size(4) /* IFA_ADDRESS */
1448 	       + nla_total_size(4) /* IFA_LOCAL */
1449 	       + nla_total_size(4) /* IFA_BROADCAST */
1450 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1451 	       + nla_total_size(4)  /* IFA_FLAGS */
1452 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1453 }
1454 
1455 static inline u32 cstamp_delta(unsigned long cstamp)
1456 {
1457 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1458 }
1459 
1460 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1461 			 unsigned long tstamp, u32 preferred, u32 valid)
1462 {
1463 	struct ifa_cacheinfo ci;
1464 
1465 	ci.cstamp = cstamp_delta(cstamp);
1466 	ci.tstamp = cstamp_delta(tstamp);
1467 	ci.ifa_prefered = preferred;
1468 	ci.ifa_valid = valid;
1469 
1470 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1471 }
1472 
1473 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1474 			    u32 portid, u32 seq, int event, unsigned int flags)
1475 {
1476 	struct ifaddrmsg *ifm;
1477 	struct nlmsghdr  *nlh;
1478 	u32 preferred, valid;
1479 
1480 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1481 	if (nlh == NULL)
1482 		return -EMSGSIZE;
1483 
1484 	ifm = nlmsg_data(nlh);
1485 	ifm->ifa_family = AF_INET;
1486 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1487 	ifm->ifa_flags = ifa->ifa_flags;
1488 	ifm->ifa_scope = ifa->ifa_scope;
1489 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1490 
1491 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1492 		preferred = ifa->ifa_preferred_lft;
1493 		valid = ifa->ifa_valid_lft;
1494 		if (preferred != INFINITY_LIFE_TIME) {
1495 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1496 
1497 			if (preferred > tval)
1498 				preferred -= tval;
1499 			else
1500 				preferred = 0;
1501 			if (valid != INFINITY_LIFE_TIME) {
1502 				if (valid > tval)
1503 					valid -= tval;
1504 				else
1505 					valid = 0;
1506 			}
1507 		}
1508 	} else {
1509 		preferred = INFINITY_LIFE_TIME;
1510 		valid = INFINITY_LIFE_TIME;
1511 	}
1512 	if ((ifa->ifa_address &&
1513 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1514 	    (ifa->ifa_local &&
1515 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1516 	    (ifa->ifa_broadcast &&
1517 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1518 	    (ifa->ifa_label[0] &&
1519 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1520 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1521 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1522 			  preferred, valid))
1523 		goto nla_put_failure;
1524 
1525 	return nlmsg_end(skb, nlh);
1526 
1527 nla_put_failure:
1528 	nlmsg_cancel(skb, nlh);
1529 	return -EMSGSIZE;
1530 }
1531 
1532 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1533 {
1534 	struct net *net = sock_net(skb->sk);
1535 	int h, s_h;
1536 	int idx, s_idx;
1537 	int ip_idx, s_ip_idx;
1538 	struct net_device *dev;
1539 	struct in_device *in_dev;
1540 	struct in_ifaddr *ifa;
1541 	struct hlist_head *head;
1542 
1543 	s_h = cb->args[0];
1544 	s_idx = idx = cb->args[1];
1545 	s_ip_idx = ip_idx = cb->args[2];
1546 
1547 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1548 		idx = 0;
1549 		head = &net->dev_index_head[h];
1550 		rcu_read_lock();
1551 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1552 			  net->dev_base_seq;
1553 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1554 			if (idx < s_idx)
1555 				goto cont;
1556 			if (h > s_h || idx > s_idx)
1557 				s_ip_idx = 0;
1558 			in_dev = __in_dev_get_rcu(dev);
1559 			if (!in_dev)
1560 				goto cont;
1561 
1562 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1563 			     ifa = ifa->ifa_next, ip_idx++) {
1564 				if (ip_idx < s_ip_idx)
1565 					continue;
1566 				if (inet_fill_ifaddr(skb, ifa,
1567 					     NETLINK_CB(cb->skb).portid,
1568 					     cb->nlh->nlmsg_seq,
1569 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1570 					rcu_read_unlock();
1571 					goto done;
1572 				}
1573 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1574 			}
1575 cont:
1576 			idx++;
1577 		}
1578 		rcu_read_unlock();
1579 	}
1580 
1581 done:
1582 	cb->args[0] = h;
1583 	cb->args[1] = idx;
1584 	cb->args[2] = ip_idx;
1585 
1586 	return skb->len;
1587 }
1588 
1589 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1590 		      u32 portid)
1591 {
1592 	struct sk_buff *skb;
1593 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1594 	int err = -ENOBUFS;
1595 	struct net *net;
1596 
1597 	net = dev_net(ifa->ifa_dev->dev);
1598 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1599 	if (skb == NULL)
1600 		goto errout;
1601 
1602 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1603 	if (err < 0) {
1604 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1605 		WARN_ON(err == -EMSGSIZE);
1606 		kfree_skb(skb);
1607 		goto errout;
1608 	}
1609 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1610 	return;
1611 errout:
1612 	if (err < 0)
1613 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1614 }
1615 
1616 static size_t inet_get_link_af_size(const struct net_device *dev)
1617 {
1618 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1619 
1620 	if (!in_dev)
1621 		return 0;
1622 
1623 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1624 }
1625 
1626 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1627 {
1628 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1629 	struct nlattr *nla;
1630 	int i;
1631 
1632 	if (!in_dev)
1633 		return -ENODATA;
1634 
1635 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1636 	if (nla == NULL)
1637 		return -EMSGSIZE;
1638 
1639 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1640 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1641 
1642 	return 0;
1643 }
1644 
1645 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1646 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1647 };
1648 
1649 static int inet_validate_link_af(const struct net_device *dev,
1650 				 const struct nlattr *nla)
1651 {
1652 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1653 	int err, rem;
1654 
1655 	if (dev && !__in_dev_get_rtnl(dev))
1656 		return -EAFNOSUPPORT;
1657 
1658 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1659 	if (err < 0)
1660 		return err;
1661 
1662 	if (tb[IFLA_INET_CONF]) {
1663 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1664 			int cfgid = nla_type(a);
1665 
1666 			if (nla_len(a) < 4)
1667 				return -EINVAL;
1668 
1669 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1670 				return -EINVAL;
1671 		}
1672 	}
1673 
1674 	return 0;
1675 }
1676 
1677 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1678 {
1679 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1680 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1681 	int rem;
1682 
1683 	if (!in_dev)
1684 		return -EAFNOSUPPORT;
1685 
1686 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1687 		BUG();
1688 
1689 	if (tb[IFLA_INET_CONF]) {
1690 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1691 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1692 	}
1693 
1694 	return 0;
1695 }
1696 
1697 static int inet_netconf_msgsize_devconf(int type)
1698 {
1699 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1700 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1701 
1702 	/* type -1 is used for ALL */
1703 	if (type == -1 || type == NETCONFA_FORWARDING)
1704 		size += nla_total_size(4);
1705 	if (type == -1 || type == NETCONFA_RP_FILTER)
1706 		size += nla_total_size(4);
1707 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1708 		size += nla_total_size(4);
1709 	if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1710 		size += nla_total_size(4);
1711 
1712 	return size;
1713 }
1714 
1715 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1716 				     struct ipv4_devconf *devconf, u32 portid,
1717 				     u32 seq, int event, unsigned int flags,
1718 				     int type)
1719 {
1720 	struct nlmsghdr  *nlh;
1721 	struct netconfmsg *ncm;
1722 
1723 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1724 			flags);
1725 	if (nlh == NULL)
1726 		return -EMSGSIZE;
1727 
1728 	ncm = nlmsg_data(nlh);
1729 	ncm->ncm_family = AF_INET;
1730 
1731 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1732 		goto nla_put_failure;
1733 
1734 	/* type -1 is used for ALL */
1735 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1736 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1737 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1738 		goto nla_put_failure;
1739 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1740 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1741 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1742 		goto nla_put_failure;
1743 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1744 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1745 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1746 		goto nla_put_failure;
1747 	if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1748 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1749 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1750 		goto nla_put_failure;
1751 
1752 	return nlmsg_end(skb, nlh);
1753 
1754 nla_put_failure:
1755 	nlmsg_cancel(skb, nlh);
1756 	return -EMSGSIZE;
1757 }
1758 
1759 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1760 				 struct ipv4_devconf *devconf)
1761 {
1762 	struct sk_buff *skb;
1763 	int err = -ENOBUFS;
1764 
1765 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1766 	if (skb == NULL)
1767 		goto errout;
1768 
1769 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1770 					RTM_NEWNETCONF, 0, type);
1771 	if (err < 0) {
1772 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1773 		WARN_ON(err == -EMSGSIZE);
1774 		kfree_skb(skb);
1775 		goto errout;
1776 	}
1777 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1778 	return;
1779 errout:
1780 	if (err < 0)
1781 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1782 }
1783 
1784 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1785 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1786 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1787 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1788 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1789 };
1790 
1791 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1792 				    struct nlmsghdr *nlh)
1793 {
1794 	struct net *net = sock_net(in_skb->sk);
1795 	struct nlattr *tb[NETCONFA_MAX+1];
1796 	struct netconfmsg *ncm;
1797 	struct sk_buff *skb;
1798 	struct ipv4_devconf *devconf;
1799 	struct in_device *in_dev;
1800 	struct net_device *dev;
1801 	int ifindex;
1802 	int err;
1803 
1804 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1805 			  devconf_ipv4_policy);
1806 	if (err < 0)
1807 		goto errout;
1808 
1809 	err = EINVAL;
1810 	if (!tb[NETCONFA_IFINDEX])
1811 		goto errout;
1812 
1813 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1814 	switch (ifindex) {
1815 	case NETCONFA_IFINDEX_ALL:
1816 		devconf = net->ipv4.devconf_all;
1817 		break;
1818 	case NETCONFA_IFINDEX_DEFAULT:
1819 		devconf = net->ipv4.devconf_dflt;
1820 		break;
1821 	default:
1822 		dev = __dev_get_by_index(net, ifindex);
1823 		if (dev == NULL)
1824 			goto errout;
1825 		in_dev = __in_dev_get_rtnl(dev);
1826 		if (in_dev == NULL)
1827 			goto errout;
1828 		devconf = &in_dev->cnf;
1829 		break;
1830 	}
1831 
1832 	err = -ENOBUFS;
1833 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1834 	if (skb == NULL)
1835 		goto errout;
1836 
1837 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1838 					NETLINK_CB(in_skb).portid,
1839 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1840 					-1);
1841 	if (err < 0) {
1842 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1843 		WARN_ON(err == -EMSGSIZE);
1844 		kfree_skb(skb);
1845 		goto errout;
1846 	}
1847 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1848 errout:
1849 	return err;
1850 }
1851 
1852 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1853 				     struct netlink_callback *cb)
1854 {
1855 	struct net *net = sock_net(skb->sk);
1856 	int h, s_h;
1857 	int idx, s_idx;
1858 	struct net_device *dev;
1859 	struct in_device *in_dev;
1860 	struct hlist_head *head;
1861 
1862 	s_h = cb->args[0];
1863 	s_idx = idx = cb->args[1];
1864 
1865 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1866 		idx = 0;
1867 		head = &net->dev_index_head[h];
1868 		rcu_read_lock();
1869 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1870 			  net->dev_base_seq;
1871 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1872 			if (idx < s_idx)
1873 				goto cont;
1874 			in_dev = __in_dev_get_rcu(dev);
1875 			if (!in_dev)
1876 				goto cont;
1877 
1878 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1879 						      &in_dev->cnf,
1880 						      NETLINK_CB(cb->skb).portid,
1881 						      cb->nlh->nlmsg_seq,
1882 						      RTM_NEWNETCONF,
1883 						      NLM_F_MULTI,
1884 						      -1) <= 0) {
1885 				rcu_read_unlock();
1886 				goto done;
1887 			}
1888 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1889 cont:
1890 			idx++;
1891 		}
1892 		rcu_read_unlock();
1893 	}
1894 	if (h == NETDEV_HASHENTRIES) {
1895 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1896 					      net->ipv4.devconf_all,
1897 					      NETLINK_CB(cb->skb).portid,
1898 					      cb->nlh->nlmsg_seq,
1899 					      RTM_NEWNETCONF, NLM_F_MULTI,
1900 					      -1) <= 0)
1901 			goto done;
1902 		else
1903 			h++;
1904 	}
1905 	if (h == NETDEV_HASHENTRIES + 1) {
1906 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1907 					      net->ipv4.devconf_dflt,
1908 					      NETLINK_CB(cb->skb).portid,
1909 					      cb->nlh->nlmsg_seq,
1910 					      RTM_NEWNETCONF, NLM_F_MULTI,
1911 					      -1) <= 0)
1912 			goto done;
1913 		else
1914 			h++;
1915 	}
1916 done:
1917 	cb->args[0] = h;
1918 	cb->args[1] = idx;
1919 
1920 	return skb->len;
1921 }
1922 
1923 #ifdef CONFIG_SYSCTL
1924 
1925 static void devinet_copy_dflt_conf(struct net *net, int i)
1926 {
1927 	struct net_device *dev;
1928 
1929 	rcu_read_lock();
1930 	for_each_netdev_rcu(net, dev) {
1931 		struct in_device *in_dev;
1932 
1933 		in_dev = __in_dev_get_rcu(dev);
1934 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1935 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1936 	}
1937 	rcu_read_unlock();
1938 }
1939 
1940 /* called with RTNL locked */
1941 static void inet_forward_change(struct net *net)
1942 {
1943 	struct net_device *dev;
1944 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1945 
1946 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1947 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1948 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1949 				    NETCONFA_IFINDEX_ALL,
1950 				    net->ipv4.devconf_all);
1951 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1952 				    NETCONFA_IFINDEX_DEFAULT,
1953 				    net->ipv4.devconf_dflt);
1954 
1955 	for_each_netdev(net, dev) {
1956 		struct in_device *in_dev;
1957 		if (on)
1958 			dev_disable_lro(dev);
1959 		rcu_read_lock();
1960 		in_dev = __in_dev_get_rcu(dev);
1961 		if (in_dev) {
1962 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1963 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1964 						    dev->ifindex, &in_dev->cnf);
1965 		}
1966 		rcu_read_unlock();
1967 	}
1968 }
1969 
1970 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1971 {
1972 	if (cnf == net->ipv4.devconf_dflt)
1973 		return NETCONFA_IFINDEX_DEFAULT;
1974 	else if (cnf == net->ipv4.devconf_all)
1975 		return NETCONFA_IFINDEX_ALL;
1976 	else {
1977 		struct in_device *idev
1978 			= container_of(cnf, struct in_device, cnf);
1979 		return idev->dev->ifindex;
1980 	}
1981 }
1982 
1983 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1984 			     void __user *buffer,
1985 			     size_t *lenp, loff_t *ppos)
1986 {
1987 	int old_value = *(int *)ctl->data;
1988 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1989 	int new_value = *(int *)ctl->data;
1990 
1991 	if (write) {
1992 		struct ipv4_devconf *cnf = ctl->extra1;
1993 		struct net *net = ctl->extra2;
1994 		int i = (int *)ctl->data - cnf->data;
1995 		int ifindex;
1996 
1997 		set_bit(i, cnf->state);
1998 
1999 		if (cnf == net->ipv4.devconf_dflt)
2000 			devinet_copy_dflt_conf(net, i);
2001 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2002 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2003 			if ((new_value == 0) && (old_value != 0))
2004 				rt_cache_flush(net);
2005 
2006 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2007 		    new_value != old_value) {
2008 			ifindex = devinet_conf_ifindex(net, cnf);
2009 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2010 						    ifindex, cnf);
2011 		}
2012 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2013 		    new_value != old_value) {
2014 			ifindex = devinet_conf_ifindex(net, cnf);
2015 			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2016 						    ifindex, cnf);
2017 		}
2018 	}
2019 
2020 	return ret;
2021 }
2022 
2023 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2024 				  void __user *buffer,
2025 				  size_t *lenp, loff_t *ppos)
2026 {
2027 	int *valp = ctl->data;
2028 	int val = *valp;
2029 	loff_t pos = *ppos;
2030 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2031 
2032 	if (write && *valp != val) {
2033 		struct net *net = ctl->extra2;
2034 
2035 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2036 			if (!rtnl_trylock()) {
2037 				/* Restore the original values before restarting */
2038 				*valp = val;
2039 				*ppos = pos;
2040 				return restart_syscall();
2041 			}
2042 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2043 				inet_forward_change(net);
2044 			} else {
2045 				struct ipv4_devconf *cnf = ctl->extra1;
2046 				struct in_device *idev =
2047 					container_of(cnf, struct in_device, cnf);
2048 				if (*valp)
2049 					dev_disable_lro(idev->dev);
2050 				inet_netconf_notify_devconf(net,
2051 							    NETCONFA_FORWARDING,
2052 							    idev->dev->ifindex,
2053 							    cnf);
2054 			}
2055 			rtnl_unlock();
2056 			rt_cache_flush(net);
2057 		} else
2058 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2059 						    NETCONFA_IFINDEX_DEFAULT,
2060 						    net->ipv4.devconf_dflt);
2061 	}
2062 
2063 	return ret;
2064 }
2065 
2066 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2067 				void __user *buffer,
2068 				size_t *lenp, loff_t *ppos)
2069 {
2070 	int *valp = ctl->data;
2071 	int val = *valp;
2072 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2073 	struct net *net = ctl->extra2;
2074 
2075 	if (write && *valp != val)
2076 		rt_cache_flush(net);
2077 
2078 	return ret;
2079 }
2080 
2081 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2082 	{ \
2083 		.procname	= name, \
2084 		.data		= ipv4_devconf.data + \
2085 				  IPV4_DEVCONF_ ## attr - 1, \
2086 		.maxlen		= sizeof(int), \
2087 		.mode		= mval, \
2088 		.proc_handler	= proc, \
2089 		.extra1		= &ipv4_devconf, \
2090 	}
2091 
2092 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2093 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2094 
2095 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2096 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2097 
2098 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2099 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2100 
2101 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2102 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2103 
2104 static struct devinet_sysctl_table {
2105 	struct ctl_table_header *sysctl_header;
2106 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2107 } devinet_sysctl = {
2108 	.devinet_vars = {
2109 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2110 					     devinet_sysctl_forward),
2111 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2112 
2113 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2114 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2115 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2116 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2117 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2118 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2119 					"accept_source_route"),
2120 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2121 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2122 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2123 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2124 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2125 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2126 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2127 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2128 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2129 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2130 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2131 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2132 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2133 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2134 					"force_igmp_version"),
2135 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2136 					"igmpv2_unsolicited_report_interval"),
2137 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2138 					"igmpv3_unsolicited_report_interval"),
2139 
2140 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2141 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2142 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2143 					      "promote_secondaries"),
2144 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2145 					      "route_localnet"),
2146 	},
2147 };
2148 
2149 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2150 					struct ipv4_devconf *p)
2151 {
2152 	int i;
2153 	struct devinet_sysctl_table *t;
2154 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2155 
2156 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2157 	if (!t)
2158 		goto out;
2159 
2160 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2161 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2162 		t->devinet_vars[i].extra1 = p;
2163 		t->devinet_vars[i].extra2 = net;
2164 	}
2165 
2166 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2167 
2168 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2169 	if (!t->sysctl_header)
2170 		goto free;
2171 
2172 	p->sysctl = t;
2173 	return 0;
2174 
2175 free:
2176 	kfree(t);
2177 out:
2178 	return -ENOBUFS;
2179 }
2180 
2181 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2182 {
2183 	struct devinet_sysctl_table *t = cnf->sysctl;
2184 
2185 	if (t == NULL)
2186 		return;
2187 
2188 	cnf->sysctl = NULL;
2189 	unregister_net_sysctl_table(t->sysctl_header);
2190 	kfree(t);
2191 }
2192 
2193 static int devinet_sysctl_register(struct in_device *idev)
2194 {
2195 	int err;
2196 
2197 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2198 		return -EINVAL;
2199 
2200 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2201 	if (err)
2202 		return err;
2203 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2204 					&idev->cnf);
2205 	if (err)
2206 		neigh_sysctl_unregister(idev->arp_parms);
2207 	return err;
2208 }
2209 
2210 static void devinet_sysctl_unregister(struct in_device *idev)
2211 {
2212 	__devinet_sysctl_unregister(&idev->cnf);
2213 	neigh_sysctl_unregister(idev->arp_parms);
2214 }
2215 
2216 static struct ctl_table ctl_forward_entry[] = {
2217 	{
2218 		.procname	= "ip_forward",
2219 		.data		= &ipv4_devconf.data[
2220 					IPV4_DEVCONF_FORWARDING - 1],
2221 		.maxlen		= sizeof(int),
2222 		.mode		= 0644,
2223 		.proc_handler	= devinet_sysctl_forward,
2224 		.extra1		= &ipv4_devconf,
2225 		.extra2		= &init_net,
2226 	},
2227 	{ },
2228 };
2229 #endif
2230 
2231 static __net_init int devinet_init_net(struct net *net)
2232 {
2233 	int err;
2234 	struct ipv4_devconf *all, *dflt;
2235 #ifdef CONFIG_SYSCTL
2236 	struct ctl_table *tbl = ctl_forward_entry;
2237 	struct ctl_table_header *forw_hdr;
2238 #endif
2239 
2240 	err = -ENOMEM;
2241 	all = &ipv4_devconf;
2242 	dflt = &ipv4_devconf_dflt;
2243 
2244 	if (!net_eq(net, &init_net)) {
2245 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2246 		if (all == NULL)
2247 			goto err_alloc_all;
2248 
2249 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2250 		if (dflt == NULL)
2251 			goto err_alloc_dflt;
2252 
2253 #ifdef CONFIG_SYSCTL
2254 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2255 		if (tbl == NULL)
2256 			goto err_alloc_ctl;
2257 
2258 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2259 		tbl[0].extra1 = all;
2260 		tbl[0].extra2 = net;
2261 #endif
2262 	}
2263 
2264 #ifdef CONFIG_SYSCTL
2265 	err = __devinet_sysctl_register(net, "all", all);
2266 	if (err < 0)
2267 		goto err_reg_all;
2268 
2269 	err = __devinet_sysctl_register(net, "default", dflt);
2270 	if (err < 0)
2271 		goto err_reg_dflt;
2272 
2273 	err = -ENOMEM;
2274 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2275 	if (forw_hdr == NULL)
2276 		goto err_reg_ctl;
2277 	net->ipv4.forw_hdr = forw_hdr;
2278 #endif
2279 
2280 	net->ipv4.devconf_all = all;
2281 	net->ipv4.devconf_dflt = dflt;
2282 	return 0;
2283 
2284 #ifdef CONFIG_SYSCTL
2285 err_reg_ctl:
2286 	__devinet_sysctl_unregister(dflt);
2287 err_reg_dflt:
2288 	__devinet_sysctl_unregister(all);
2289 err_reg_all:
2290 	if (tbl != ctl_forward_entry)
2291 		kfree(tbl);
2292 err_alloc_ctl:
2293 #endif
2294 	if (dflt != &ipv4_devconf_dflt)
2295 		kfree(dflt);
2296 err_alloc_dflt:
2297 	if (all != &ipv4_devconf)
2298 		kfree(all);
2299 err_alloc_all:
2300 	return err;
2301 }
2302 
2303 static __net_exit void devinet_exit_net(struct net *net)
2304 {
2305 #ifdef CONFIG_SYSCTL
2306 	struct ctl_table *tbl;
2307 
2308 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2309 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2310 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2311 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2312 	kfree(tbl);
2313 #endif
2314 	kfree(net->ipv4.devconf_dflt);
2315 	kfree(net->ipv4.devconf_all);
2316 }
2317 
2318 static __net_initdata struct pernet_operations devinet_ops = {
2319 	.init = devinet_init_net,
2320 	.exit = devinet_exit_net,
2321 };
2322 
2323 static struct rtnl_af_ops inet_af_ops = {
2324 	.family		  = AF_INET,
2325 	.fill_link_af	  = inet_fill_link_af,
2326 	.get_link_af_size = inet_get_link_af_size,
2327 	.validate_link_af = inet_validate_link_af,
2328 	.set_link_af	  = inet_set_link_af,
2329 };
2330 
2331 void __init devinet_init(void)
2332 {
2333 	int i;
2334 
2335 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2336 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2337 
2338 	register_pernet_subsys(&devinet_ops);
2339 
2340 	register_gifconf(PF_INET, inet_gifconf);
2341 	register_netdevice_notifier(&ip_netdev_notifier);
2342 
2343 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2344 
2345 	rtnl_af_register(&inet_af_ops);
2346 
2347 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2348 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2349 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2350 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2351 		      inet_netconf_dump_devconf, NULL);
2352 }
2353 
2354